diff --git a/configs/server/train_config.yaml b/configs/server/train_config.yaml index 0074d75..6c47be4 100644 --- a/configs/server/train_config.yaml +++ b/configs/server/train_config.yaml @@ -7,7 +7,7 @@ runner: parallel: False experiment: - name: new_test_overfit_2 + name: new_test_overfit_to_world root_dir: "experiments" use_checkpoint: False epoch: -1 # -1 stands for last epoch @@ -38,8 +38,8 @@ dataset: type: train cache: True ratio: 1 - batch_size: 128 - num_workers: 12 + batch_size: 160 + num_workers: 16 pts_num: 4096 OmniObject3d_test: diff --git a/core/dataset.py b/core/dataset.py index 9abea77..ec8d94d 100644 --- a/core/dataset.py +++ b/core/dataset.py @@ -17,6 +17,7 @@ from utils.reconstruction import ReconstructionUtil @stereotype.dataset("nbv_reconstruction_dataset") class NBVReconstructionDataset(BaseDataset): + DISPLAY_TABLE_POSITION = np.asarray([0,0,0.85]) def __init__(self, config): super(NBVReconstructionDataset, self).__init__(config) self.config = config @@ -37,6 +38,8 @@ class NBVReconstructionDataset(BaseDataset): expr_root = ConfigManager.get("runner", "experiment", "root_dir") expr_name = ConfigManager.get("runner", "experiment", "name") self.cache_dir = os.path.join(expr_root, expr_name, "cache") + #self.preprocess_cache() + def load_scene_name_list(self): @@ -65,9 +68,15 @@ class NBVReconstructionDataset(BaseDataset): } ) return datalist + + def preprocess_cache(self): + Log.info("preprocessing cache...") + for item_idx in range(len(self.datalist)): + self.__getitem__(item_idx) + Log.success("finish preprocessing cache.") - def load_from_cache(self, scene_name, first_frame_idx, curr_frame_idx): - cache_name = f"{scene_name}_{first_frame_idx}_{curr_frame_idx}.txt" + def load_from_cache(self, scene_name, curr_frame_idx): + cache_name = f"{scene_name}_{curr_frame_idx}.txt" cache_path = os.path.join(self.cache_dir, cache_name) if os.path.exists(cache_path): data = np.loadtxt(cache_path) @@ -75,8 +84,8 @@ class NBVReconstructionDataset(BaseDataset): else: return None - def save_to_cache(self, scene_name, first_frame_idx, curr_frame_idx, data): - cache_name = f"{scene_name}_{first_frame_idx}_{curr_frame_idx}.txt" + def save_to_cache(self, scene_name, curr_frame_idx, data): + cache_name = f"{scene_name}_{curr_frame_idx}.txt" cache_path = os.path.join(self.cache_dir, cache_name) try: np.savetxt(cache_path, data) @@ -106,7 +115,7 @@ class NBVReconstructionDataset(BaseDataset): cached_data = None if self.cache: - cached_data = self.load_from_cache(scene_name, first_frame_idx, frame_idx) + cached_data = self.load_from_cache(scene_name, frame_idx) if cached_data is None: depth_L, depth_R = DataLoadUtil.load_depth(view_path, cam_info['near_plane'], cam_info['far_plane'], binocular=True) @@ -118,7 +127,7 @@ class NBVReconstructionDataset(BaseDataset): overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R) downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(overlap_points, self.pts_num) if self.cache: - self.save_to_cache(scene_name, first_frame_idx, frame_idx, downsampled_target_point_cloud) + self.save_to_cache(scene_name, frame_idx, downsampled_target_point_cloud) else: downsampled_target_point_cloud = cached_data @@ -137,7 +146,6 @@ class NBVReconstructionDataset(BaseDataset): best_to_world_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(best_frame_to_world[:3,:3])) best_to_world_trans = best_frame_to_world[:3,3] best_to_world_9d = np.concatenate([best_to_world_6d, best_to_world_trans], axis=0) - data_item = { "scanned_pts": np.asarray(scanned_views_pts,dtype=np.float32), "scanned_coverage_rate": scanned_coverages_rate, @@ -147,6 +155,8 @@ class NBVReconstructionDataset(BaseDataset): "max_coverage_rate": max_coverage_rate, "scene_name": scene_name } + + if self.type == namespace.Mode.TEST: diag = DataLoadUtil.get_bbox_diag(self.model_dir, scene_name) voxel_threshold = diag*0.02 diff --git a/runners/inferencer.py b/runners/inferencer.py index bb17bde..aca0600 100644 --- a/runners/inferencer.py +++ b/runners/inferencer.py @@ -98,7 +98,7 @@ class Inferencer(Runner): ''' data for inference ''' input_data = {} input_data["scanned_pts"] = [data["first_pts"][0].to(self.device)] - input_data["scanned_n_to_1_pose_9d"] = [data["first_to_first_9d"][0].to(self.device)] + input_data["scanned_n_to_world_pose_9d"] = [data["first_to_first_9d"][0].to(self.device)] input_data["mode"] = namespace.Mode.TEST input_pts_N = input_data["scanned_pts"][0].shape[1] @@ -141,7 +141,7 @@ class Inferencer(Runner): new_pts_tensor = torch.tensor(new_pts, dtype=torch.float32).unsqueeze(0).to(self.device) input_data["scanned_pts"] = [torch.cat([input_data["scanned_pts"][0] , new_pts_tensor], dim=0)] - input_data["scanned_n_to_1_pose_9d"] = [torch.cat([input_data["scanned_n_to_1_pose_9d"][0], next_pose_9d], dim=0)] + input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], next_pose_9d], dim=0)] last_pred_cr = pred_cr # ------ Debug Start ------ @@ -150,9 +150,9 @@ class Inferencer(Runner): input_data["scanned_pts"] = input_data["scanned_pts"][0].cpu().numpy().tolist() - input_data["scanned_n_to_1_pose_9d"] = input_data["scanned_n_to_1_pose_9d"][0].cpu().numpy().tolist() + input_data["scanned_n_to_world_pose_9d"] = input_data["scanned_n_to_world_pose_9d"][0].cpu().numpy().tolist() result = { - "pred_pose_9d_seq": input_data["scanned_n_to_1_pose_9d"], + "pred_pose_9d_seq": input_data["scanned_n_to_world_pose_9d"], "pts_seq": input_data["scanned_pts"], "target_pts_seq": scanned_view_pts, "coverage_rate_seq": pred_cr_seq, diff --git a/utils/data_load.py b/utils/data_load.py index 4705ac0..e9fcc54 100644 --- a/utils/data_load.py +++ b/utils/data_load.py @@ -6,7 +6,7 @@ import trimesh from utils.pts import PtsUtil class DataLoadUtil: - + DISPLAY_TABLE_POSITION = np.asarray([0,0,0.85]) @staticmethod def get_path(root, scene_name, frame_idx): path = os.path.join(root, scene_name, f"{frame_idx}") @@ -160,12 +160,16 @@ class DataLoadUtil: return cam_pose_after @staticmethod - def load_cam_info(path, binocular=False): + def load_cam_info(path, binocular=False, display_table_as_world_space_origin=True): camera_params_path = os.path.join(os.path.dirname(path), "camera_params", os.path.basename(path) + ".json") with open(camera_params_path, 'r') as f: label_data = json.load(f) cam_to_world = np.asarray(label_data["extrinsic"]) cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world) + world_to_display_table = np.eye(4) + world_to_display_table[:3, 3] = - DataLoadUtil.DISPLAY_TABLE_POSITION + if display_table_as_world_space_origin: + cam_to_world = np.dot(world_to_display_table, cam_to_world) cam_intrinsic = np.asarray(label_data["intrinsic"]) cam_info = { "cam_to_world": cam_to_world, @@ -176,10 +180,13 @@ class DataLoadUtil: if binocular: cam_to_world_R = np.asarray(label_data["extrinsic_R"]) cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R) - cam_info["cam_to_world_R"] = cam_to_world_R cam_to_world_O = np.asarray(label_data["extrinsic_cam_object"]) cam_to_world_O = DataLoadUtil.cam_pose_transformation(cam_to_world_O) + if display_table_as_world_space_origin: + cam_to_world_O = np.dot(world_to_display_table, cam_to_world_O) + cam_to_world_R = np.dot(world_to_display_table, cam_to_world_R) cam_info["cam_to_world_O"] = cam_to_world_O + cam_info["cam_to_world_R"] = cam_to_world_R return cam_info @staticmethod