From 4c69ed777b46573d2ee265666edfc9bb8610424b Mon Sep 17 00:00:00 2001
From: hofee <lexhofee@gmail.com>
Date: Thu, 12 Sep 2024 15:11:09 +0800
Subject: [PATCH] fix bug for training

---
 app_split.py                          |   2 +-
 app_train.py                          |   4 +-
 configs/split_dataset_config.yaml     |  10 +--
 configs/strategy_generate_config.yaml |   4 +-
 configs/train_config.yaml             |  56 ++++++++-----
 configs/view_generate_config.yaml     |   4 +-
 core/dataset.py                       | 112 ++++++++++++++------------
 core/pipeline.py                      |  52 ++++++------
 modules/gf_view_finder.py             |  17 ++--
 modules/pointnet_encoder.py           |  20 ++++-
 modules/transformer_seq_encoder.py    |   2 +-
 runners/strategy_generator.py         |   9 +++
 utils/data_load.py                    |  27 +++++++
 utils/pts.py                          |   1 -
 utils/reconstruction.py               |   1 -
 15 files changed, 201 insertions(+), 120 deletions(-)

diff --git a/app_split.py b/app_split.py
index 900aacc..35e803b 100644
--- a/app_split.py
+++ b/app_split.py
@@ -5,5 +5,5 @@ from runners.data_spliter import DataSpliter
 class DataSplitApp:
     @staticmethod
     def start():
-        DataSpliter(r"configs\split_dataset_config.yaml").run()
+        DataSpliter("configs/split_dataset_config.yaml").run()
         
\ No newline at end of file
diff --git a/app_train.py b/app_train.py
index f8af38d..8980dcf 100644
--- a/app_train.py
+++ b/app_train.py
@@ -1,8 +1,8 @@
 from PytorchBoot.application import PytorchBootApplication
-from runners.strategy_generator import StrategyGenerator
+from PytorchBoot.runners.trainer import DefaultTrainer
 
 @PytorchBootApplication("train")
 class TrainApp:
     @staticmethod
     def start():
-        StrategyGenerator(r"configs\train_config.yaml").run()
\ No newline at end of file
+        DefaultTrainer("configs/train_config.yaml").run()
\ No newline at end of file
diff --git a/configs/split_dataset_config.yaml b/configs/split_dataset_config.yaml
index f2f2805..7ef58e9 100644
--- a/configs/split_dataset_config.yaml
+++ b/configs/split_dataset_config.yaml
@@ -10,13 +10,13 @@ runner:
     root_dir: "experiments"
   
   split:
-    root_dir: "C:\\Document\\Local Project\\nbv_rec\\data\\sample"
+    root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes"
     type: "unseen_instance" # "unseen_category"
     datasets:
       OmniObject3d_train: 
-        path: "C:\\Document\\Local Project\\nbv_rec\\data\\OmniObject3d_train.txt"
-        ratio: 0.5
+        path: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt"
+        ratio: 0.9
 
       OmniObject3d_test: 
-        path: "C:\\Document\\Local Project\\nbv_rec\\data\\OmniObject3d_test.txt"
-        ratio: 0.5
\ No newline at end of file
+        path: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_test.txt"
+        ratio: 0.1
\ No newline at end of file
diff --git a/configs/strategy_generate_config.yaml b/configs/strategy_generate_config.yaml
index aadf271..73fae50 100644
--- a/configs/strategy_generate_config.yaml
+++ b/configs/strategy_generate_config.yaml
@@ -18,12 +18,14 @@ runner:
     save_points: False
     save_best_combined_points: True
     save_mesh: True
+    overwrite: False
     dataset_list:
       - OmniObject3d
   
 datasets:
     OmniObject3d:
-      root_dir: "/media/hofee/data/project/python/nbv_reconstruction/nbv_rec_visualize/data/sample"
+      #"/media/hofee/data/data/temp_output"
+      root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes" 
       model_dir: "/media/hofee/data/data/scaled_object_meshes"
       #output_dir: "/media/hofee/data/data/label_output"
 
diff --git a/configs/train_config.yaml b/configs/train_config.yaml
index 39ffc69..b8d9c0b 100644
--- a/configs/train_config.yaml
+++ b/configs/train_config.yaml
@@ -2,15 +2,16 @@
 runner:
   general:
     seed: 0
-    device: cpu
+    device: cuda
     cuda_visible_devices: "0,1,2,3,4,5,6,7"
+    parallel: False
     
   experiment:
-    name: debug
+    name: test_overfit
     root_dir: "experiments"
     use_checkpoint: False
     epoch: -1 # -1 stands for last epoch
-    max_epochs: 5
+    max_epochs: 5000
     save_checkpoint_interval: 1
     test_first: False  
 
@@ -19,34 +20,43 @@ runner:
       type: Adam
       lr: 0.0001
     losses: 
-      - mse_loss
+      - gf_loss
     dataset: OmniObject3d_train
   test:
     frequency: 3 # test frequency
     dataset_list:
-      - OmniObject3d_train
+      - OmniObject3d_test
 
   pipeline: nbv_reconstruction_pipeline
   
-datasets:
+dataset:
   OmniObject3d_train:
-    root_dir: "C:\\Document\\Local Project\\nbv_rec\\data\\sample"
-    split_file: "C:\\Document\\Local Project\\nbv_rec\\data\\OmniObject3d_train.txt"
+    root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes"
+    source: nbv_reconstruction_dataset
+    split_file: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt"
     ratio: 1.0
     batch_size: 1
     num_workers: 12
-    pts_num: 2048
+    pts_num: 4096
 
   OmniObject3d_test:
-    root_dir: "C:\\Document\\Local Project\\nbv_rec\\data\\sample"
-    split_file: "C:\\Document\\Local Project\\nbv_rec\\data\\OmniObject3d_test.txt"
+    root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes"
+    source: nbv_reconstruction_dataset
+    split_file: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt"
     eval_list:
       - pose_diff
-    ratio: 1.0
+    ratio: 0.1
     batch_size: 1
     num_workers: 1
-    pts_num: 2048
+    pts_num: 4096
 
+pipeline:
+  nbv_reconstruction_pipeline:
+    pts_encoder: pointnet_encoder
+    seq_encoder: transformer_seq_encoder
+    pose_encoder: pose_encoder
+    view_finder: gf_view_finder
+    
 module:
 
   pointnet_encoder:
@@ -58,13 +68,15 @@ module:
   transformer_seq_encoder:
     pts_embed_dim: 1024
     pose_embed_dim: 256
-    num_heads: 4
-    ffn_dim: 256
-    num_layers: 3
-    max_seq_len: 30
-    output_dim: 2048
+    num_heads: 2 # 4
+    ffn_dim: 128 # 256
+    num_layers: 2 # 3
+    output_dim: 1024 # 2048
 
   gf_view_finder:
+    t_feat_dim: 128
+    pose_feat_dim: 256
+    main_feat_dim: 1024 # 2048
     regression_head: Rx_Ry_and_T
     pose_mode: rot_matrix
     per_point_feature: False
@@ -74,4 +86,10 @@ module:
 
   pose_encoder:
     pose_dim: 9
-    output_dim: 256
\ No newline at end of file
+    out_dim: 256
+
+loss_function:
+  gf_loss:
+
+evaluation_method:
+  pose_diff:
\ No newline at end of file
diff --git a/configs/view_generate_config.yaml b/configs/view_generate_config.yaml
index a7a8089..cb5b8a8 100644
--- a/configs/view_generate_config.yaml
+++ b/configs/view_generate_config.yaml
@@ -9,10 +9,10 @@ runner:
   generate:
     object_dir: /media/hofee/data/data/scaled_object_meshes
     table_model_path: /media/hofee/data/data/others/table.obj
-    output_dir: /media/hofee/data/data/temp_output
+    output_dir: /media/hofee/repository/nbv_reconstruction_data_512
     binocular_vision: true
     plane_size: 10
-    max_views: 256
+    max_views: 512
     min_views: 64
     max_diag: 0.7
     min_diag: 0.1
diff --git a/core/dataset.py b/core/dataset.py
index 8756563..b4d810a 100644
--- a/core/dataset.py
+++ b/core/dataset.py
@@ -1,10 +1,10 @@
 import numpy as np
 from PytorchBoot.dataset import BaseDataset
 import PytorchBoot.stereotype as stereotype
-from torch.nn.utils.rnn import pad_sequence
+import torch
 
 import sys
-sys.path.append(r"C:\Document\Local Project\nbv_rec\nbv_reconstruction")
+sys.path.append(r"/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction")
 
 from utils.data_load import DataLoadUtil
 from utils.pose import PoseUtil
@@ -56,18 +56,25 @@ class NBVReconstructionDataset(BaseDataset):
         scene_name = data_item_info["scene_name"]
         scanned_views_pts, scanned_coverages_rate, scanned_n_to_1_pose = [], [], []
         first_frame_idx = scanned_views[0][0]
-        first_frame_to_world = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx))["cam_to_world"]
+        first_cam_info = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx), binocular=True)
+        first_frame_to_world = first_cam_info["cam_to_world"]
         for view in scanned_views:
             frame_idx = view[0]
             coverage_rate = view[1]
             view_path = DataLoadUtil.get_path(self.root_dir, scene_name, frame_idx)
-            depth = DataLoadUtil.load_depth(view_path)
-            cam_info = DataLoadUtil.load_cam_info(view_path)
-            mask = DataLoadUtil.load_seg(view_path)
-            frame_curr_to_world = cam_info["cam_to_world"]
-            n_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), frame_curr_to_world)
-            target_point_cloud = DataLoadUtil.get_target_point_cloud(depth, cam_info["cam_intrinsic"], n_to_1_pose, mask)["points_world"]
-            downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(target_point_cloud, self.pts_num)
+            cam_info = DataLoadUtil.load_cam_info(view_path, binocular=True)
+            n_to_world_pose = cam_info["cam_to_world"]
+            nR_to_world_pose = cam_info["cam_to_world_R"] 
+            n_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), n_to_world_pose)
+            nR_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), nR_to_world_pose)
+            depth_L, depth_R = DataLoadUtil.load_depth(view_path, cam_info['near_plane'], cam_info['far_plane'], binocular=True)
+            point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_info['cam_intrinsic'], n_to_1_pose)['points_world']
+            point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_info['cam_intrinsic'], nR_to_1_pose)['points_world']
+          
+            point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
+            point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
+            overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
+            downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(overlap_points, self.pts_num)
             scanned_views_pts.append(downsampled_target_point_cloud)
             scanned_coverages_rate.append(coverage_rate)          
             n_to_1_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(n_to_1_pose[:3,:3]))
@@ -86,10 +93,10 @@ class NBVReconstructionDataset(BaseDataset):
     
         data_item = {
             "scanned_pts": np.asarray(scanned_views_pts,dtype=np.float32),
-            "scanned_coverage_rate": np.asarray(scanned_coverages_rate,dtype=np.float32),
+            "scanned_coverage_rate": scanned_coverages_rate,
             "scanned_n_to_1_pose_9d": np.asarray(scanned_n_to_1_pose,dtype=np.float32),
             "best_coverage_rate": nbv_coverage_rate,
-            "best_to_1_pose_9d": best_to_1_9d,
+            "best_to_1_pose_9d": np.asarray(best_to_1_9d,dtype=np.float32),
             "max_coverage_rate": max_coverage_rate,
             "scene_name": scene_name
         }
@@ -101,23 +108,14 @@ class NBVReconstructionDataset(BaseDataset):
     
     def get_collate_fn(self):
         def collate_fn(batch):
-            scanned_pts = [item['scanned_pts'] for item in batch]
-            scanned_n_to_1_pose_9d = [item['scanned_n_to_1_pose_9d'] for item in batch]
-            rest = {}
+            collate_data = {}
+            collate_data["scanned_pts"] = [torch.tensor(item['scanned_pts']) for item in batch]
+            collate_data["scanned_n_to_1_pose_9d"] = [torch.tensor(item['scanned_n_to_1_pose_9d']) for item in batch]
+            collate_data["best_to_1_pose_9d"] = torch.stack([torch.tensor(item['best_to_1_pose_9d']) for item in batch])
             for key in batch[0].keys():
-                if key in ['scanned_pts', 'scanned_n_to_1_pose_9d']:
-                    continue
-                if isinstance(batch[0][key], torch.Tensor):
-                    rest[key] = torch.stack([item[key] for item in batch])
-                elif isinstance(batch[0][key], str):
-                    rest[key] = [item[key] for item in batch]
-                else:
-                    rest[key] = [item[key] for item in batch] 
-            return {
-                'scanned_pts': scanned_pts,
-                'scanned_n_to_1_pose_9d': scanned_n_to_1_pose_9d,
-                **rest
-            }
+                if key not in ["scanned_pts", "scanned_n_to_1_pose_9d", "best_to_1_pose_9d"]:
+                    collate_data[key] = [item[key] for item in batch]
+            return collate_data
         return collate_fn
 
 if __name__ == "__main__":
@@ -126,36 +124,48 @@ if __name__ == "__main__":
     torch.manual_seed(seed)
     np.random.seed(seed)
     config = {
-        "root_dir": "C:\\Document\\Local Project\\nbv_rec\\data\\sample",
-        "split_file": "C:\\Document\\Local Project\\nbv_rec\\data\\OmniObject3d_train.txt",
+        "root_dir": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes",
+        "split_file": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt",
         "ratio": 0.5,
         "batch_size": 2,
         "num_workers": 0,
-        "pts_num": 2048
+        "pts_num": 32684
     }
     ds = NBVReconstructionDataset(config)
     print(len(ds))
+    #ds.__getitem__(10)
     dl = ds.get_loader(shuffle=True)
     for idx, data in enumerate(dl):
-        cnt=0
-        print(data["scene_name"])
-        print(data["scanned_coverage_rate"])
-        print(data["best_coverage_rate"])
-        for pts in data["scanned_pts"][0]:
-            #np.savetxt(f"pts_{cnt}.txt", pts)
-            cnt+=1
-        #np.savetxt("best_pts.txt", best_pts)
-        for key, value in data.items():
-            if isinstance(value, torch.Tensor):
-                print(key, ":" ,value.shape)
-            else:
-                print(key, ":" ,len(value))
-            if key == "scanned_n_to_1_pose_9d":
-                for val in value:
-                    print(val.shape)
-            if key == "scanned_pts":
-                for val in value:
-                    print(val.shape)
+        data = ds.process_batch(data, "cuda:0")
+        print(data)
+        break
+    # 
+    # for idx, data in enumerate(dl):
+    #     cnt=0
+    #     print(data["scene_name"])
+    #     print(data["scanned_coverage_rate"])
+    #     print(data["best_coverage_rate"])
+    #     for pts in data["scanned_pts"][0]:
+    #         #np.savetxt(f"pts_{cnt}.txt", pts)
+    #         cnt+=1
+    #     #np.savetxt("best_pts.txt", best_pts)
+    #     for key, value in data.items():
+    #         if isinstance(value, torch.Tensor):
+    #             print(key, ":" ,value.shape)
+    #         else:
+    #             print(key, ":" ,len(value))
+    #         if key == "scanned_n_to_1_pose_9d":
+    #             for val in value:
+    #                 print(val.shape)
+    #         if key == "scanned_pts":
+    #             print("scanned_pts")
+    #             for val in value:
+    #                 print(val.shape)
+    #                 cnt = 0
+    #                 for v in val:
+    #                     import ipdb;ipdb.set_trace()
+    #                     np.savetxt(f"pts_{cnt}.txt", v)
+    #                     cnt+=1
             
                 
-        print()
\ No newline at end of file
+    #     print()
\ No newline at end of file
diff --git a/core/pipeline.py b/core/pipeline.py
index 223f83b..f4dba95 100644
--- a/core/pipeline.py
+++ b/core/pipeline.py
@@ -14,12 +14,11 @@ class NBVReconstructionPipeline(nn.Module):
         self.pose_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, config["pose_encoder"])
         self.seq_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, config["seq_encoder"])
         self.view_finder = ComponentFactory.create(namespace.Stereotype.MODULE, config["view_finder"])
+        self.eps = 1e-5
         
     def forward(self, data):
         mode = data["mode"]
-        # ----- Debug Trace ----- #
-        import ipdb; ipdb.set_trace()
-        # ------------------------ #
+        
         if mode == namespace.Mode.TRAIN:
             return self.forward_train(data)
         elif mode == namespace.Mode.TEST:
@@ -27,29 +26,22 @@ class NBVReconstructionPipeline(nn.Module):
         else:
             Log.error("Unknown mode: {}".format(mode), True)
     
-    def pertube_data(self, gt_delta_rot_6d):
-        bs = gt_delta_rot_6d.shape[0]
-        random_t = torch.rand(bs, device=self.device) * (1. - self.eps) + self.eps
+    def pertube_data(self, gt_delta_9d):
+        bs = gt_delta_9d.shape[0]
+        random_t = torch.rand(bs, device=gt_delta_9d.device) * (1. - self.eps) + self.eps
         random_t = random_t.unsqueeze(-1)
-        mu, std = self.view_finder.marginal_prob(gt_delta_rot_6d, random_t)
+        mu, std = self.view_finder.marginal_prob(gt_delta_9d, random_t)
         std = std.view(-1, 1)
-        z = torch.randn_like(gt_delta_rot_6d)
+        z = torch.randn_like(gt_delta_9d)
         perturbed_x = mu + z * std
         target_score = - z * std / (std ** 2)
         return perturbed_x, random_t, target_score, std
     
     def forward_train(self, data):
-        pts_list = data['pts_list']
-        pose_list = data['pose_list']
-        gt_rot_6d = data["nbv_cam_pose"]
-        pts_feat_list = []
-        pose_feat_list = []
-        for pts,pose in zip(pts_list,pose_list):
-            pts_feat_list.append(self.pts_encoder.encode_points(pts))
-            pose_feat_list.append(self.pose_encoder.encode_pose(pose))
-        seq_feat = self.seq_encoder.encode_sequence(pts_feat_list, pose_feat_list)
+        seq_feat = self.get_seq_feat(data)
         ''' get std '''
-        perturbed_x, random_t, target_score, std = self.pertube_data(gt_rot_6d)
+        best_to_1_pose_9d_batch = data["best_to_1_pose_9d"]
+        perturbed_x, random_t, target_score, std = self.pertube_data(best_to_1_pose_9d_batch)
         input_data = {
             "sampled_pose": perturbed_x,
             "t": random_t,
@@ -64,14 +56,7 @@ class NBVReconstructionPipeline(nn.Module):
         return output
     
     def forward_test(self,data):
-        pts_list = data['pts_list']
-        pose_list = data['pose_list']
-        pts_feat_list = []
-        pose_feat_list = []
-        for pts,pose in zip(pts_list,pose_list):
-            pts_feat_list.append(self.pts_encoder.encode_points(pts))
-            pose_feat_list.append(self.pose_encoder.encode_pose(pose))
-        seq_feat = self.seq_encoder.encode_sequence(pts_feat_list, pose_feat_list)
+        seq_feat = self.get_seq_feat(data)
         estimated_delta_rot_9d, in_process_sample = self.view_finder.next_best_view(seq_feat)
         result = {
             "pred_pose_9d": estimated_delta_rot_9d,
@@ -79,4 +64,19 @@ class NBVReconstructionPipeline(nn.Module):
         }
         return result
     
+    def get_seq_feat(self, data):
+        scanned_pts_batch = data['scanned_pts']
+        scanned_n_to_1_pose_9d_batch = data['scanned_n_to_1_pose_9d']
+        best_to_1_pose_9d_batch = data["best_to_1_pose_9d"]
+        pts_feat_seq_list = []
+        pose_feat_seq_list = []
+        
+        for scanned_pts,scanned_n_to_1_pose_9d in zip(scanned_pts_batch,scanned_n_to_1_pose_9d_batch):
+            print(scanned_n_to_1_pose_9d.shape)
+            scanned_pts = scanned_pts.to(best_to_1_pose_9d_batch.device)
+            scanned_n_to_1_pose_9d = scanned_n_to_1_pose_9d.to(best_to_1_pose_9d_batch.device)
+            pts_feat_seq_list.append(self.pts_encoder.encode_points(scanned_pts))
+            pose_feat_seq_list.append(self.pose_encoder.encode_pose(scanned_n_to_1_pose_9d))
+        seq_feat = self.seq_encoder.encode_sequence(pts_feat_seq_list, pose_feat_seq_list)
+        return seq_feat
     
diff --git a/modules/gf_view_finder.py b/modules/gf_view_finder.py
index 1e1cceb..bc20586 100644
--- a/modules/gf_view_finder.py
+++ b/modules/gf_view_finder.py
@@ -33,19 +33,22 @@ class GradientFieldViewFinder(nn.Module):
         pose_dim = PoseUtil.get_pose_dim(self.pose_mode)
         self.prior_fn, self.marginal_prob_fn, self.sde_fn, self.sampling_eps, self.T = flib.init_sde(config["sde_mode"])
         self.sampling_steps = config["sampling_steps"]
+        self.t_feat_dim = config["t_feat_dim"]
+        self.pose_feat_dim = config["pose_feat_dim"]
+        self.main_feat_dim = config["main_feat_dim"]
 
         ''' encode pose '''
         self.pose_encoder = nn.Sequential(
-            nn.Linear(pose_dim, 256),
+            nn.Linear(pose_dim, self.pose_feat_dim ),
             self.act,
-            nn.Linear(256, 256),
+            nn.Linear(self.pose_feat_dim , self.pose_feat_dim ),
             self.act,
         )
 
         ''' encode t '''
         self.t_encoder = nn.Sequential(
-            mlib.GaussianFourierProjection(embed_dim=128),
-            nn.Linear(128, 128),
+            mlib.GaussianFourierProjection(embed_dim=self.t_feat_dim ),
+            nn.Linear(self.t_feat_dim , self.t_feat_dim ),
             self.act,
         )
 
@@ -56,18 +59,18 @@ class GradientFieldViewFinder(nn.Module):
             if not self.per_point_feature:
                 ''' rotation_x_axis regress head '''
                 self.fusion_tail_rot_x = nn.Sequential(
-                    nn.Linear(128 + 256 + 2048, 256),
+                    nn.Linear(self.t_feat_dim + self.pose_feat_dim  + self.main_feat_dim, 256),
                     self.act,
                     zero_module(nn.Linear(256, 3)),
                 )
                 self.fusion_tail_rot_y = nn.Sequential(
-                    nn.Linear(128 + 256 + 2048, 256),
+                    nn.Linear(self.t_feat_dim + self.pose_feat_dim  + self.main_feat_dim, 256),
                     self.act,
                     zero_module(nn.Linear(256, 3)),
                 )
                 ''' tranalation regress head '''
                 self.fusion_tail_trans = nn.Sequential(
-                    nn.Linear(128 + 256 + 2048, 256),
+                    nn.Linear(self.t_feat_dim + self.pose_feat_dim  + self.main_feat_dim, 256),
                     self.act,
                     zero_module(nn.Linear(256, 3)),
                 )  
diff --git a/modules/pointnet_encoder.py b/modules/pointnet_encoder.py
index b669a4c..6483709 100644
--- a/modules/pointnet_encoder.py
+++ b/modules/pointnet_encoder.py
@@ -54,6 +54,7 @@ class PointNetEncoder(nn.Module):
 
     def encode_points(self, pts):
         pts = pts.transpose(2, 1)
+
         if not self.global_feat:
             pts_feature = self(pts).transpose(2, 1)
         else:
@@ -98,11 +99,24 @@ class STNkd(nn.Module):
 
 if __name__ == "__main__":
     sim_data = Variable(torch.rand(32, 2500, 3))
-
-    pointnet_global = PointNetEncoder(global_feat=True)
+    config = {
+        "in_dim": 3,
+        "out_dim": 1024,
+        "global_feat": True,
+        "feature_transform": False
+    }
+    pointnet_global = PointNetEncoder(config)
     out = pointnet_global.encode_points(sim_data)
+
     print("global feat", out.size())
 
-    pointnet = PointNetEncoder(global_feat=False)
+    config = {
+        "in_dim": 3,
+        "out_dim": 1024,
+        "global_feat": False,
+        "feature_transform": False
+    }
+
+    pointnet = PointNetEncoder(config)
     out = pointnet.encode_points(sim_data)
     print("point feat", out.size())
diff --git a/modules/transformer_seq_encoder.py b/modules/transformer_seq_encoder.py
index 79f151c..8b22b4f 100644
--- a/modules/transformer_seq_encoder.py
+++ b/modules/transformer_seq_encoder.py
@@ -38,7 +38,7 @@ class TransformerSequenceEncoder(nn.Module):
 
         # Prepare mask for padding
         max_len = max(lengths)
-        padding_mask = torch.tensor([([0] * length + [1] * (max_len - length)) for length in lengths], dtype=torch.bool)
+        padding_mask = torch.tensor([([0] * length + [1] * (max_len - length)) for length in lengths], dtype=torch.bool).to(combined_tensor.device)
         # Transformer encoding
         transformer_output = self.transformer_encoder(combined_tensor, src_key_padding_mask=padding_mask)
 
diff --git a/runners/strategy_generator.py b/runners/strategy_generator.py
index e9c3679..af90ea3 100644
--- a/runners/strategy_generator.py
+++ b/runners/strategy_generator.py
@@ -26,6 +26,7 @@ class StrategyGenerator(Runner):
         self.save_best_combined_pts = ConfigManager.get("runner", "generate", "save_best_combined_points")
         self.save_mesh = ConfigManager.get("runner", "generate", "save_mesh")
         self.filter_degree = ConfigManager.get("runner", "generate", "filter_degree")
+        self.overwrite = ConfigManager.get("runner", "generate", "overwrite")
         
 
         
@@ -44,6 +45,14 @@ class StrategyGenerator(Runner):
             for scene_name in scene_name_list:
                 Log.info(f"({dataset_name})Processing [{cnt}/{total}]: {scene_name}")
                 status_manager.set_progress("generate", "strategy_generator", "scene", cnt, total)
+                diag = DataLoadUtil.get_bbox_diag(model_dir, scene_name)
+                voxel_threshold = diag*0.02
+                status_manager.set_status("generate", "strategy_generator", "voxel_threshold", voxel_threshold)
+                output_label_path = DataLoadUtil.get_label_path(root_dir, scene_name)
+                if os.path.exists(output_label_path) and not self.overwrite:
+                    Log.info(f"Scene <{scene_name}> Already Exists, Skip")
+                    cnt += 1
+                    continue
                 self.generate_sequence(root_dir, model_dir, scene_name,voxel_threshold, overlap_threshold)
                 cnt += 1
             status_manager.set_progress("generate", "strategy_generator", "scene", total, total)
diff --git a/utils/data_load.py b/utils/data_load.py
index 48628d9..62c644b 100644
--- a/utils/data_load.py
+++ b/utils/data_load.py
@@ -45,6 +45,15 @@ class DataLoadUtil:
         mesh.apply_transform(world_object_pose)
         return mesh
     
+    @staticmethod
+    def get_bbox_diag(model_dir, object_name):
+        model_path = os.path.join(model_dir, object_name, "mesh.obj")
+        mesh = trimesh.load(model_path)
+        bbox = mesh.bounding_box.extents
+        diagonal_length = np.linalg.norm(bbox)
+        return diagonal_length
+    
+    
     @staticmethod
     def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose):
         mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose)
@@ -192,6 +201,24 @@ class DataLoadUtil:
             "points_world": target_points_world,
             "points_camera": target_points_camera
         }
+    
+    @staticmethod
+    def get_point_cloud(depth, cam_intrinsic, cam_extrinsic):
+        h, w = depth.shape
+        i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy')
+        
+        z = depth
+        x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+        y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+        
+        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+        points_camera_aug = np.concatenate([points_camera, np.ones((points_camera.shape[0], 1))], axis=-1)
+        
+        points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
+        return {
+            "points_world": points_world,
+            "points_camera": points_camera
+        }
         
     @staticmethod
     def get_target_point_cloud_world_from_path(path, binocular=False, random_downsample_N=65536, voxel_size = 0.005, target_mask_label=(0,255,0,255)):
diff --git a/utils/pts.py b/utils/pts.py
index 8c44f5d..19d6e2a 100644
--- a/utils/pts.py
+++ b/utils/pts.py
@@ -5,7 +5,6 @@ class PtsUtil:
 
     @staticmethod
     def voxel_downsample_point_cloud(point_cloud, voxel_size=0.005):
-        print("voxel_size: ", voxel_size)
         o3d_pc = o3d.geometry.PointCloud()
         o3d_pc.points = o3d.utility.Vector3dVector(point_cloud)
         downsampled_pc = o3d_pc.voxel_down_sample(voxel_size)
diff --git a/utils/reconstruction.py b/utils/reconstruction.py
index 4d5955f..1bc3436 100644
--- a/utils/reconstruction.py
+++ b/utils/reconstruction.py
@@ -6,7 +6,6 @@ class ReconstructionUtil:
     
     @staticmethod
     def compute_coverage_rate(target_point_cloud, combined_point_cloud, threshold=0.01):
-        print("threshold", threshold)
         kdtree = cKDTree(combined_point_cloud)
         distances, _ = kdtree.query(target_point_cloud)
         covered_points = np.sum(distances < threshold)