after first overfit test

This commit is contained in:
hofee 2024-09-18 06:49:59 +00:00
parent d80d0ea79d
commit 0280dc7292
6 changed files with 193 additions and 42 deletions

View File

@ -14,8 +14,8 @@ runner:
voxel_threshold: 0.01 voxel_threshold: 0.01
overlap_threshold: 0.5 overlap_threshold: 0.5
filter_degree: 75 filter_degree: 75
to_specified_dir: True # if True, output_dir is used, otherwise, root_dir is used to_specified_dir: False # if True, output_dir is used, otherwise, root_dir is used
save_points: False save_points: True
save_best_combined_points: True save_best_combined_points: True
save_mesh: True save_mesh: True
overwrite: False overwrite: False

View File

@ -1,18 +1,18 @@
runner: runner:
general: general:
seed: 0 seed: 1
device: cuda device: cuda
cuda_visible_devices: "0,1,2,3,4,5,6,7" cuda_visible_devices: "0,1,2,3,4,5,6,7"
parallel: False parallel: False
experiment: experiment:
name: test_overfit name: new_test_overfit_2
root_dir: "experiments" root_dir: "experiments"
use_checkpoint: False use_checkpoint: False
epoch: -1 # -1 stands for last epoch epoch: -1 # -1 stands for last epoch
max_epochs: 5000 max_epochs: 5000
save_checkpoint_interval: 1 save_checkpoint_interval: 3
test_first: False test_first: False
train: train:
@ -32,22 +32,29 @@ runner:
dataset: dataset:
OmniObject3d_train: OmniObject3d_train:
root_dir: "../data/sample_for_training/scenes" root_dir: "../data/sample_for_training/scenes"
model_dir: "../data/scaled_object_meshes"
source: nbv_reconstruction_dataset source: nbv_reconstruction_dataset
split_file: "../data/sample_for_training/OmniObject3d_train.txt" split_file: "../data/sample_for_training/OmniObject3d_train.txt"
ratio: 1.0 type: train
batch_size: 1 cache: True
ratio: 1
batch_size: 128
num_workers: 12 num_workers: 12
pts_num: 4096 pts_num: 4096
OmniObject3d_test: OmniObject3d_test:
root_dir: "../data/sample_for_training/scenes" root_dir: "../data/sample_for_training/scenes"
model_dir: "../data/scaled_object_meshes"
source: nbv_reconstruction_dataset source: nbv_reconstruction_dataset
split_file: "../data/sample_for_training/OmniObject3d_train.txt" split_file: "../data/sample_for_training/OmniObject3d_train.txt"
type: test
cache: True
filter_degree: 75
eval_list: eval_list:
- pose_diff - pose_diff
ratio: 0.1 ratio: 0.1
batch_size: 1 batch_size: 1
num_workers: 1 num_workers: 12
pts_num: 4096 pts_num: 4096
pipeline: pipeline:
@ -93,3 +100,5 @@ loss_function:
evaluation_method: evaluation_method:
pose_diff: pose_diff:
coverage_rate_increase:
renderer_path: "../blender/data_renderer.py"

View File

@ -1,11 +1,19 @@
import numpy as np import numpy as np
from PytorchBoot.dataset import BaseDataset from PytorchBoot.dataset import BaseDataset
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype import PytorchBoot.stereotype as stereotype
from PytorchBoot.config import ConfigManager
from PytorchBoot.utils.log_util import Log
import torch import torch
import os
import sys
sys.path.append(r"/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction")
from utils.data_load import DataLoadUtil from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil from utils.pose import PoseUtil
from utils.pts import PtsUtil from utils.pts import PtsUtil
from utils.reconstruction import ReconstructionUtil
@stereotype.dataset("nbv_reconstruction_dataset") @stereotype.dataset("nbv_reconstruction_dataset")
class NBVReconstructionDataset(BaseDataset): class NBVReconstructionDataset(BaseDataset):
@ -16,7 +24,20 @@ class NBVReconstructionDataset(BaseDataset):
self.split_file_path = config["split_file"] self.split_file_path = config["split_file"]
self.scene_name_list = self.load_scene_name_list() self.scene_name_list = self.load_scene_name_list()
self.datalist = self.get_datalist() self.datalist = self.get_datalist()
self.pts_num = config["pts_num"] self.pts_num = config["pts_num"]
self.type = config["type"]
self.cache = config["cache"]
if self.type == namespace.Mode.TEST:
self.model_dir = config["model_dir"]
self.filter_degree = config["filter_degree"]
if self.type == namespace.Mode.TRAIN:
self.datalist = self.datalist*100
if self.cache:
expr_root = ConfigManager.get("runner", "experiment", "root_dir")
expr_name = ConfigManager.get("runner", "experiment", "name")
self.cache_dir = os.path.join(expr_root, expr_name, "cache")
def load_scene_name_list(self): def load_scene_name_list(self):
scene_name_list = [] scene_name_list = []
@ -45,6 +66,26 @@ class NBVReconstructionDataset(BaseDataset):
) )
return datalist return datalist
def load_from_cache(self, scene_name, first_frame_idx, curr_frame_idx):
cache_name = f"{scene_name}_{first_frame_idx}_{curr_frame_idx}.txt"
cache_path = os.path.join(self.cache_dir, cache_name)
if os.path.exists(cache_path):
data = np.loadtxt(cache_path)
return data
else:
return None
def save_to_cache(self, scene_name, first_frame_idx, curr_frame_idx, data):
cache_name = f"{scene_name}_{first_frame_idx}_{curr_frame_idx}.txt"
cache_path = os.path.join(self.cache_dir, cache_name)
try:
np.savetxt(cache_path, data)
except Exception as e:
Log.error(f"Save cache failed: {e}")
# ----- Debug Trace ----- #
import ipdb; ipdb.set_trace()
# ------------------------ #
def __getitem__(self, index): def __getitem__(self, index):
data_item_info = self.datalist[index] data_item_info = self.datalist[index]
scanned_views = data_item_info["scanned_views"] scanned_views = data_item_info["scanned_views"]
@ -64,14 +105,21 @@ class NBVReconstructionDataset(BaseDataset):
nR_to_world_pose = cam_info["cam_to_world_R"] nR_to_world_pose = cam_info["cam_to_world_R"]
n_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), n_to_world_pose) n_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), n_to_world_pose)
nR_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), nR_to_world_pose) nR_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), nR_to_world_pose)
depth_L, depth_R = DataLoadUtil.load_depth(view_path, cam_info['near_plane'], cam_info['far_plane'], binocular=True) cached_data = self.load_from_cache(scene_name, first_frame_idx, frame_idx)
point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_info['cam_intrinsic'], n_to_1_pose)['points_world']
point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_info['cam_intrinsic'], nR_to_1_pose)['points_world'] if cached_data is None:
depth_L, depth_R = DataLoadUtil.load_depth(view_path, cam_info['near_plane'], cam_info['far_plane'], binocular=True)
point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_info['cam_intrinsic'], n_to_1_pose)['points_world']
point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_info['cam_intrinsic'], nR_to_1_pose)['points_world']
point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(overlap_points, self.pts_num)
self.save_to_cache(scene_name, first_frame_idx, frame_idx, downsampled_target_point_cloud)
else:
downsampled_target_point_cloud = cached_data
point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(overlap_points, self.pts_num)
scanned_views_pts.append(downsampled_target_point_cloud) scanned_views_pts.append(downsampled_target_point_cloud)
scanned_coverages_rate.append(coverage_rate) scanned_coverages_rate.append(coverage_rate)
n_to_1_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(n_to_1_pose[:3,:3])) n_to_1_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(n_to_1_pose[:3,:3]))
@ -97,7 +145,28 @@ class NBVReconstructionDataset(BaseDataset):
"max_coverage_rate": max_coverage_rate, "max_coverage_rate": max_coverage_rate,
"scene_name": scene_name "scene_name": scene_name
} }
# if self.type == namespace.Mode.TEST:
# diag = DataLoadUtil.get_bbox_diag(self.model_dir, scene_name)
# voxel_threshold = diag*0.02
# model_points_normals = DataLoadUtil.load_points_normals(self.root_dir, scene_name)
# pts_list = []
# for view in scanned_views:
# frame_idx = view[0]
# view_path = DataLoadUtil.get_path(self.root_dir, scene_name, frame_idx)
# point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(view_path, binocular=True)
# cam_params = DataLoadUtil.load_cam_info(view_path, binocular=True)
# sampled_point_cloud = ReconstructionUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=self.filter_degree)
# pts_list.append(sampled_point_cloud)
# nL_to_world_pose = cam_params["cam_to_world"]
# nO_to_world_pose = cam_params["cam_to_world_O"]
# nO_to_nL_pose = np.dot(np.linalg.inv(nL_to_world_pose), nO_to_world_pose)
# data_item["scanned_target_pts_list"] = pts_list
# data_item["model_points_normals"] = model_points_normals
# data_item["voxel_threshold"] = voxel_threshold
# data_item["filter_degree"] = self.filter_degree
# data_item["scene_path"] = os.path.join(self.root_dir, scene_name)
# data_item["first_frame_to_world"] = np.asarray(first_frame_to_world, dtype=np.float32)
# data_item["nO_to_nL_pose"] = np.asarray(nO_to_nL_pose, dtype=np.float32)
return data_item return data_item
def __len__(self): def __len__(self):
@ -109,8 +178,10 @@ class NBVReconstructionDataset(BaseDataset):
collate_data["scanned_pts"] = [torch.tensor(item['scanned_pts']) for item in batch] collate_data["scanned_pts"] = [torch.tensor(item['scanned_pts']) for item in batch]
collate_data["scanned_n_to_1_pose_9d"] = [torch.tensor(item['scanned_n_to_1_pose_9d']) for item in batch] collate_data["scanned_n_to_1_pose_9d"] = [torch.tensor(item['scanned_n_to_1_pose_9d']) for item in batch]
collate_data["best_to_1_pose_9d"] = torch.stack([torch.tensor(item['best_to_1_pose_9d']) for item in batch]) collate_data["best_to_1_pose_9d"] = torch.stack([torch.tensor(item['best_to_1_pose_9d']) for item in batch])
if "first_frame_to_world" in batch[0]:
collate_data["first_frame_to_world"] = torch.stack([torch.tensor(item["first_frame_to_world"]) for item in batch])
for key in batch[0].keys(): for key in batch[0].keys():
if key not in ["scanned_pts", "scanned_n_to_1_pose_9d", "best_to_1_pose_9d"]: if key not in ["scanned_pts", "scanned_n_to_1_pose_9d", "best_to_1_pose_9d", "first_frame_to_world"]:
collate_data[key] = [item[key] for item in batch] collate_data[key] = [item[key] for item in batch]
return collate_data return collate_data
return collate_fn return collate_fn
@ -123,10 +194,13 @@ if __name__ == "__main__":
config = { config = {
"root_dir": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes", "root_dir": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes",
"split_file": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt", "split_file": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt",
"model_dir": "/media/hofee/data/data/scaled_object_meshes",
"ratio": 0.5, "ratio": 0.5,
"batch_size": 2, "batch_size": 2,
"filter_degree": 75,
"num_workers": 0, "num_workers": 0,
"pts_num": 32684 "pts_num": 32684,
"type": namespace.Mode.TEST,
} }
ds = NBVReconstructionDataset(config) ds = NBVReconstructionDataset(config)
print(len(ds)) print(len(ds))
@ -135,7 +209,9 @@ if __name__ == "__main__":
for idx, data in enumerate(dl): for idx, data in enumerate(dl):
data = ds.process_batch(data, "cuda:0") data = ds.process_batch(data, "cuda:0")
print(data) print(data)
break # ------ Debug Start ------
import ipdb;ipdb.set_trace()
# ------ Debug End ------
# #
# for idx, data in enumerate(dl): # for idx, data in enumerate(dl):
# cnt=0 # cnt=0

View File

@ -1,10 +1,43 @@
import torch import torch
import os
import json
import numpy as np
import subprocess
import tempfile
from utils.data_load import DataLoadUtil
from utils.reconstruction import ReconstructionUtil
from utils.pose import PoseUtil from utils.pose import PoseUtil
from utils.pts import PtsUtil
import PytorchBoot.stereotype as stereotype import PytorchBoot.stereotype as stereotype
import PytorchBoot.namespace as namespace import PytorchBoot.namespace as namespace
from PytorchBoot.utils.log_util import Log
def get_view_data(cam_pose, scene_name): def render_pts(cam_pose, scene_path,script_path, model_points_normals, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None):
pass nO_to_world_pose = cam_pose.cpu().numpy() @ nO_to_nL_pose
nO_to_world_pose = DataLoadUtil.cam_pose_transformation(nO_to_world_pose)
with tempfile.TemporaryDirectory() as temp_dir:
params = {
"cam_pose": nO_to_world_pose.tolist(),
"scene_path": scene_path
}
params_data_path = os.path.join(temp_dir, "params.json")
with open(params_data_path, 'w') as f:
json.dump(params, f)
result = subprocess.run([
'blender', '-b', '-P', script_path, '--', temp_dir
], capture_output=True, text=True)
if result.returncode != 0:
print("Blender script failed:")
print(result.stderr)
return None
path = os.path.join(temp_dir, "tmp")
point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True)
cam_params = DataLoadUtil.load_cam_info(path, binocular=True)
sampled_point_cloud = ReconstructionUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree)
return sampled_point_cloud
@stereotype.evaluation_method("pose_diff") @stereotype.evaluation_method("pose_diff")
class PoseDiff: class PoseDiff:
@ -36,11 +69,11 @@ class PoseDiff:
@stereotype.evaluation_method("coverage_rate_increase",comment="unfinished") @stereotype.evaluation_method("coverage_rate_increase")
class ConverageRateIncrease: class ConverageRateIncrease:
def __init__(self, config): def __init__(self, config):
self.config = config self.config = config
self.renderer_path = config["renderer_path"]
def evaluate(self, output_list, data_list): def evaluate(self, output_list, data_list):
results = {namespace.TensorBoard.SCALAR: {}} results = {namespace.TensorBoard.SCALAR: {}}
@ -48,31 +81,57 @@ class ConverageRateIncrease:
pred_coverate_increase_list = [] pred_coverate_increase_list = []
cr_diff_list = [] cr_diff_list = []
for output, data in zip(output_list, data_list): for output, data in zip(output_list, data_list):
scanned_cr = data['scanned_coverages_rate'] scanned_cr = data['scanned_coverage_rate']
gt_cr = data["best_coverage_rate"] gt_cr = data["best_coverage_rate"]
scene_name_list = data['scene_name'] scene_path_list = data['scene_path']
scanned_view_pts_list = data['scanned_pts'] model_points_normals_list = data['model_points_normals']
scanned_view_pts_list = data['scanned_target_pts_list']
pred_pose_9ds = output['pred_pose_9d'] pred_pose_9ds = output['pred_pose_9d']
pred_rot_mats = PoseUtil.rotation_6d_to_matrix_tensor_batch(pred_pose_9ds[:, :6]) nO_to_nL_pose_batch = data["nO_to_nL_pose"]
pred_pose_mats = torch.cat([pred_rot_mats, pred_pose_9ds[:, 6:]], dim=-1) voxel_threshold_list = data["voxel_threshold"]
filter_degree_list = data["filter_degree"]
first_frame_to_world = data["first_frame_to_world"]
pred_n_to_1_pose_mats = torch.eye(4, device=pred_pose_9ds.device).unsqueeze(0).repeat(pred_pose_9ds.shape[0], 1, 1)
pred_n_to_1_pose_mats[:,:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(pred_pose_9ds[:, :6])
pred_n_to_1_pose_mats[:,:3,3] = pred_pose_9ds[:, 6:]
pred_n_to_world_pose_mats = torch.matmul(first_frame_to_world, pred_n_to_1_pose_mats)
for idx in range(len(scanned_cr)): for idx in range(len(scanned_cr)):
gt_coverate_increase_list.append(gt_cr-scanned_cr[idx]) model_points_normals = model_points_normals_list[idx]
scene_name = scene_name_list[idx]
pred_pose = pred_pose_mats[idx]
scanned_view_pts = scanned_view_pts_list[idx] scanned_view_pts = scanned_view_pts_list[idx]
view_data = get_view_data(pred_pose, scene_name) voxel_threshold = voxel_threshold_list[idx]
pred_cr = self.compute_coverage_rate(pred_pose, scanned_view_pts, view_data) model_pts = model_points_normals[:,:3]
pred_coverate_increase_list.append(pred_cr-scanned_cr[idx]) down_sampled_model_pts = PtsUtil.voxel_downsample_point_cloud(model_pts, voxel_threshold)
cr_diff_list.append(gt_cr-pred_cr) old_scanned_cr = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold)
gt_coverate_increase_list.append(gt_cr[idx]-old_scanned_cr)
scene_path = scene_path_list[idx]
pred_pose = pred_n_to_world_pose_mats[idx]
filter_degree = filter_degree_list[idx]
nO_to_nL_pose = nO_to_nL_pose_batch[idx]
try:
new_pts = render_pts(pred_pose, scene_path, self.renderer_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=nO_to_nL_pose)
pred_cr = self.compute_coverage_rate(scanned_view_pts, new_pts, down_sampled_model_pts, threshold=voxel_threshold)
except Exception as e:
Log.warning(f"Error in scene {scene_path}, {e}")
pred_cr = old_scanned_cr
pred_coverate_increase_list.append(pred_cr-old_scanned_cr)
cr_diff_list.append(gt_cr[idx]-pred_cr)
results[namespace.TensorBoard.SCALAR]["gt_cr_increase"] = float(sum(gt_coverate_increase_list) / len(gt_coverate_increase_list)) results[namespace.TensorBoard.SCALAR]["gt_cr_increase"] = float(sum(gt_coverate_increase_list) / len(gt_coverate_increase_list))
results[namespace.TensorBoard.SCALAR]["pred_cr_increase"] = float(sum(pred_coverate_increase_list) / len(pred_coverate_increase_list)) results[namespace.TensorBoard.SCALAR]["pred_cr_increase"] = float(sum(pred_coverate_increase_list) / len(pred_coverate_increase_list))
results[namespace.TensorBoard.SCALAR]["cr_diff"] = float(sum(cr_diff_list) / len(cr_diff_list)) results[namespace.TensorBoard.SCALAR]["cr_diff"] = float(sum(cr_diff_list) / len(cr_diff_list))
return results return results
def compute_coverage_rate(self, pred_pose, scanned_view_pts, view_data): def compute_coverage_rate(self, scanned_view_pts, new_pts, model_pts, threshold=0.005):
pass if new_pts is not None:
new_scanned_view_pts = scanned_view_pts + [new_pts]
else:
new_scanned_view_pts = scanned_view_pts
combined_point_cloud = np.vstack(new_scanned_view_pts)
down_sampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud,threshold)
return ReconstructionUtil.compute_coverage_rate(model_pts, down_sampled_combined_point_cloud, threshold)

View File

@ -5,7 +5,7 @@ import PytorchBoot.stereotype as stereotype
from PytorchBoot.factory.component_factory import ComponentFactory from PytorchBoot.factory.component_factory import ComponentFactory
from PytorchBoot.utils import Log from PytorchBoot.utils import Log
@stereotype.pipeline("nbv_reconstruction_pipeline") @stereotype.pipeline("nbv_reconstruction_pipeline", comment="should be tested")
class NBVReconstructionPipeline(nn.Module): class NBVReconstructionPipeline(nn.Module):
def __init__(self, config): def __init__(self, config):
super(NBVReconstructionPipeline, self).__init__() super(NBVReconstructionPipeline, self).__init__()
@ -72,10 +72,14 @@ class NBVReconstructionPipeline(nn.Module):
pose_feat_seq_list = [] pose_feat_seq_list = []
for scanned_pts,scanned_n_to_1_pose_9d in zip(scanned_pts_batch,scanned_n_to_1_pose_9d_batch): for scanned_pts,scanned_n_to_1_pose_9d in zip(scanned_pts_batch,scanned_n_to_1_pose_9d_batch):
scanned_pts = scanned_pts.to(best_to_1_pose_9d_batch.device) scanned_pts = scanned_pts.to(best_to_1_pose_9d_batch.device)
scanned_n_to_1_pose_9d = scanned_n_to_1_pose_9d.to(best_to_1_pose_9d_batch.device) scanned_n_to_1_pose_9d = scanned_n_to_1_pose_9d.to(best_to_1_pose_9d_batch.device)
pts_feat_seq_list.append(self.pts_encoder.encode_points(scanned_pts)) pts_feat_seq_list.append(self.pts_encoder.encode_points(scanned_pts))
pose_feat_seq_list.append(self.pose_encoder.encode_pose(scanned_n_to_1_pose_9d)) pose_feat_seq_list.append(self.pose_encoder.encode_pose(scanned_n_to_1_pose_9d))
seq_feat = self.seq_encoder.encode_sequence(pts_feat_seq_list, pose_feat_seq_list) seq_feat = self.seq_encoder.encode_sequence(pts_feat_seq_list, pose_feat_seq_list)
if torch.isnan(seq_feat).any():
Log.error("nan in seq_feat", True)
return seq_feat return seq_feat

View File

@ -177,6 +177,9 @@ class DataLoadUtil:
cam_to_world_R = np.asarray(label_data["extrinsic_R"]) cam_to_world_R = np.asarray(label_data["extrinsic_R"])
cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R) cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R)
cam_info["cam_to_world_R"] = cam_to_world_R cam_info["cam_to_world_R"] = cam_to_world_R
cam_to_world_O = np.asarray(label_data["extrinsic_cam_object"])
cam_to_world_O = DataLoadUtil.cam_pose_transformation(cam_to_world_O)
cam_info["cam_to_world_O"] = cam_to_world_O
return cam_info return cam_info
@staticmethod @staticmethod