define display table as world space origin

2024-09-19 11:29:43 +00:00 · 2024-09-19 11:29:43 +00:00 · bb75372f7e
commit bb75372f7e
parent 8d5d6d5df4
4 changed files with 34 additions and 17 deletions
--- a/configs/server/train_config.yaml
+++ b/configs/server/train_config.yaml
@ -7,7 +7,7 @@ runner:
    parallel: False
    
  experiment:
-    name: new_test_overfit_2
+    name: new_test_overfit_to_world
    root_dir: "experiments"
    use_checkpoint: False
    epoch: -1 # -1 stands for last epoch
@ -38,8 +38,8 @@ dataset:
    type: train
    cache: True
    ratio: 1
-    batch_size: 128
-    num_workers: 12
+    batch_size: 160
+    num_workers: 16
    pts_num: 4096

  OmniObject3d_test:
--- a/core/dataset.py
+++ b/core/dataset.py
@ -17,6 +17,7 @@ from utils.reconstruction import ReconstructionUtil

@stereotype.dataset("nbv_reconstruction_dataset")
 class NBVReconstructionDataset(BaseDataset):
+    DISPLAY_TABLE_POSITION = np.asarray([0,0,0.85])
    def __init__(self, config):
        super(NBVReconstructionDataset, self).__init__(config)
        self.config = config
@ -37,6 +38,8 @@ class NBVReconstructionDataset(BaseDataset):
            expr_root = ConfigManager.get("runner", "experiment", "root_dir")
            expr_name = ConfigManager.get("runner", "experiment", "name")
            self.cache_dir = os.path.join(expr_root, expr_name, "cache")
+            #self.preprocess_cache()
+        
        

    def load_scene_name_list(self):
@ -66,8 +69,14 @@ class NBVReconstructionDataset(BaseDataset):
                )
        return datalist

-    def load_from_cache(self, scene_name, first_frame_idx, curr_frame_idx):
-        cache_name = f"{scene_name}_{first_frame_idx}_{curr_frame_idx}.txt"
+    def preprocess_cache(self):
+        Log.info("preprocessing cache...")
+        for item_idx in range(len(self.datalist)):
+            self.__getitem__(item_idx)
+        Log.success("finish preprocessing cache.")
+    
+    def load_from_cache(self, scene_name, curr_frame_idx):
+        cache_name = f"{scene_name}_{curr_frame_idx}.txt"
        cache_path = os.path.join(self.cache_dir, cache_name)
        if os.path.exists(cache_path):
            data = np.loadtxt(cache_path)
@ -75,8 +84,8 @@ class NBVReconstructionDataset(BaseDataset):
        else:
            return None
        
-    def save_to_cache(self, scene_name, first_frame_idx, curr_frame_idx, data):
-        cache_name = f"{scene_name}_{first_frame_idx}_{curr_frame_idx}.txt"
+    def save_to_cache(self, scene_name, curr_frame_idx, data):
+        cache_name = f"{scene_name}_{curr_frame_idx}.txt"
        cache_path = os.path.join(self.cache_dir, cache_name)
        try:
            np.savetxt(cache_path, data)
@ -106,7 +115,7 @@ class NBVReconstructionDataset(BaseDataset):
        
            cached_data = None
            if self.cache:
-                cached_data = self.load_from_cache(scene_name, first_frame_idx, frame_idx)
+                cached_data = self.load_from_cache(scene_name, frame_idx)
            
            if cached_data is None:
                depth_L, depth_R = DataLoadUtil.load_depth(view_path, cam_info['near_plane'], cam_info['far_plane'], binocular=True)
@ -118,7 +127,7 @@ class NBVReconstructionDataset(BaseDataset):
                overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
                downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(overlap_points, self.pts_num)
                if self.cache:
-                    self.save_to_cache(scene_name, first_frame_idx, frame_idx, downsampled_target_point_cloud)
+                    self.save_to_cache(scene_name, frame_idx, downsampled_target_point_cloud)
            else:
                downsampled_target_point_cloud = cached_data
                
@ -137,7 +146,6 @@ class NBVReconstructionDataset(BaseDataset):
        best_to_world_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(best_frame_to_world[:3,:3]))
        best_to_world_trans = best_frame_to_world[:3,3]
        best_to_world_9d = np.concatenate([best_to_world_6d, best_to_world_trans], axis=0)
-    
        data_item = {
            "scanned_pts": np.asarray(scanned_views_pts,dtype=np.float32),
            "scanned_coverage_rate": scanned_coverages_rate,
@ -147,6 +155,8 @@ class NBVReconstructionDataset(BaseDataset):
            "max_coverage_rate": max_coverage_rate,
            "scene_name": scene_name
        }
+        
+            
        if self.type == namespace.Mode.TEST:
            diag = DataLoadUtil.get_bbox_diag(self.model_dir, scene_name)
            voxel_threshold = diag*0.02
--- a/runners/inferencer.py
+++ b/runners/inferencer.py
@ -98,7 +98,7 @@ class Inferencer(Runner):
        ''' data for inference '''
        input_data = {}
        input_data["scanned_pts"] = [data["first_pts"][0].to(self.device)]
-        input_data["scanned_n_to_1_pose_9d"] = [data["first_to_first_9d"][0].to(self.device)]
+        input_data["scanned_n_to_world_pose_9d"] = [data["first_to_first_9d"][0].to(self.device)]
        input_data["mode"] = namespace.Mode.TEST
        input_pts_N = input_data["scanned_pts"][0].shape[1]
        
@ -141,7 +141,7 @@ class Inferencer(Runner):
            new_pts_tensor = torch.tensor(new_pts, dtype=torch.float32).unsqueeze(0).to(self.device)
            
            input_data["scanned_pts"] = [torch.cat([input_data["scanned_pts"][0] , new_pts_tensor], dim=0)]
-            input_data["scanned_n_to_1_pose_9d"] = [torch.cat([input_data["scanned_n_to_1_pose_9d"][0], next_pose_9d], dim=0)]
+            input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], next_pose_9d], dim=0)]
            
            last_pred_cr = pred_cr
            # ------  Debug Start ------
@ -150,9 +150,9 @@ class Inferencer(Runner):
            
        
        input_data["scanned_pts"] = input_data["scanned_pts"][0].cpu().numpy().tolist()
-        input_data["scanned_n_to_1_pose_9d"] = input_data["scanned_n_to_1_pose_9d"][0].cpu().numpy().tolist()
+        input_data["scanned_n_to_world_pose_9d"] = input_data["scanned_n_to_world_pose_9d"][0].cpu().numpy().tolist()
        result = {
-            "pred_pose_9d_seq": input_data["scanned_n_to_1_pose_9d"],
+            "pred_pose_9d_seq": input_data["scanned_n_to_world_pose_9d"],
            "pts_seq": input_data["scanned_pts"],
            "target_pts_seq": scanned_view_pts,
            "coverage_rate_seq": pred_cr_seq,
--- a/utils/data_load.py
+++ b/utils/data_load.py
@ -6,7 +6,7 @@ import trimesh
 from utils.pts import PtsUtil

 class DataLoadUtil:
-    
+    DISPLAY_TABLE_POSITION = np.asarray([0,0,0.85])
    @staticmethod
    def get_path(root, scene_name, frame_idx):
        path = os.path.join(root, scene_name, f"{frame_idx}")
@ -160,12 +160,16 @@ class DataLoadUtil:
        return cam_pose_after
    
    @staticmethod
-    def load_cam_info(path, binocular=False):
+    def load_cam_info(path, binocular=False, display_table_as_world_space_origin=True):
        camera_params_path = os.path.join(os.path.dirname(path), "camera_params", os.path.basename(path) + ".json")
        with open(camera_params_path, 'r') as f:
            label_data = json.load(f)
        cam_to_world = np.asarray(label_data["extrinsic"])
        cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
+        world_to_display_table = np.eye(4)
+        world_to_display_table[:3, 3] = - DataLoadUtil.DISPLAY_TABLE_POSITION
+        if display_table_as_world_space_origin:
+            cam_to_world = np.dot(world_to_display_table, cam_to_world)
        cam_intrinsic = np.asarray(label_data["intrinsic"])
        cam_info = {
            "cam_to_world": cam_to_world,
@ -176,10 +180,13 @@ class DataLoadUtil:
        if binocular:
            cam_to_world_R = np.asarray(label_data["extrinsic_R"])
            cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R)
-            cam_info["cam_to_world_R"] = cam_to_world_R
            cam_to_world_O = np.asarray(label_data["extrinsic_cam_object"])
            cam_to_world_O = DataLoadUtil.cam_pose_transformation(cam_to_world_O)
+            if display_table_as_world_space_origin:
+                cam_to_world_O = np.dot(world_to_display_table, cam_to_world_O)
+                cam_to_world_R = np.dot(world_to_display_table, cam_to_world_R)
            cam_info["cam_to_world_O"] = cam_to_world_O
+            cam_info["cam_to_world_R"] = cam_to_world_R
        return cam_info
    
    @staticmethod