update

2024-10-08 21:28:30 +08:00 · 2024-10-08 21:28:30 +08:00 · 3ab046b134
commit 3ab046b134
parent d9d2716ba7
11 changed files with 773 additions and 354 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,7 +3,7 @@
 __pycache__/
 *.py[cod]
 *$py.class
-
+test_output/
 # C extensions
 *.so

--- a/configs/cad_config.yaml
+++ b/configs/cad_config.yaml
@ -10,7 +10,8 @@ runner:
    root_dir: "experiments"
  
  generate:
-    model_dir: "/home/yan20/nbv_rec/data/test_CAD/test_model"
+    model_dir: "/home/user/nbv_rec/data/models"
+    table_model_path: "/home/user/nbv_rec/data/table.obj"
    model_start_idx: 0
    voxel_size: 0.005
    max_view: 512
@ -19,6 +20,19 @@ runner:
    min_diag: 0.01
    random_view_ratio: 0
    min_cam_table_included_degree: 20
+    obj_name: "bear"
+    light_and_camera_config:
+      Camera:
+        near_plane: 0.01
+        far_plane: 5
+        fov_vertical: 25
+        resolution: [1280,800]
+        eye_distance: 0.15
+        eye_angle: 25
+      Light:
+        location: [0,0,3.5]
+        orientation: [0,0,0]
+        power: 150

  reconstruct:
    soft_overlap_threshold: 0.3
--- a/runners/cad_strategy.py
+++ b/runners/cad_strategy.py
@ -1,5 +1,7 @@
 import os
 import trimesh
+import tempfile
+import subprocess
 import numpy as np
 from PytorchBoot.runners.runner import Runner
 from PytorchBoot.config import ConfigManager
@ -7,16 +9,12 @@ import PytorchBoot.stereotype as stereotype
 from PytorchBoot.utils.log_util import Log
 from PytorchBoot.status import status_manager

-
-import sys
-
-sys.path.append(r"C:\Document\Local Project\nbv_rec_control")
-#from utils.control_util import ControlUtil
+from utils.control_util import ControlUtil
 from utils.communicate_util import CommunicateUtil
 from utils.pts_util import PtsUtil
-from utils.view_sample_util import ViewSampleUtil
 from utils.reconstruction_util import ReconstructionUtil
-
+from utils.preprocess_util import save_scene_data
+from utils.data_load import DataLoadUtil

@stereotype.runner("CAD_strategy_runner")
 class CADStrategyRunner(Runner):
@ -50,79 +48,111 @@ class CADStrategyRunner(Runner):
    def load_experiment(self, backup_name=None):
        super().load_experiment(backup_name)
        
+    def get_pts_from_view_data(self, view_data):
+        depth = view_data["depth_image"]
+        depth_intrinsics = view_data["depth_intrinsics"]
+        depth_extrinsics = view_data["depth_extrinsics"]
+        cam_pts = PtsUtil.get_pts_from_depth(depth, depth_intrinsics, depth_extrinsics)
+        return cam_pts
+    
+    def split_scan_pts_and_obj_pts(self, world_pts, scan_pts_z, z_threshold = 0.003):
+        scan_pts = world_pts[scan_pts_z < z_threshold]
+        obj_pts = world_pts[scan_pts_z >= z_threshold]
+        return scan_pts, obj_pts
+
    def run_one_model(self, model_name):
        
        ''' init robot '''
-        ControlUtil.init()
+        #ControlUtil.init()
        ''' load CAD model '''
-        model_path = os.path.join(self.model_dir, model_name)
+        model_path = os.path.join(self.model_dir, model_name,"mesh.obj")
        cad_model = trimesh.load(model_path)
        ''' take first view '''
-        view_data = CommunicateUtil.get_view_data()
-        first_cam_pts = None
+        #view_data = CommunicateUtil.get_view_data(init=True)
+        #first_cam_pts = self.get_pts_from_view_data(view_data)
        ''' register '''
-        cad_to_cam = PtsUtil.register_icp(first_cam_pts, cad_model)
-        cam_to_world = ControlUtil.get_pose()
-        cad_to_world = cam_to_world @ cad_to_cam
-        cad_model:trimesh.Trimesh = cad_model.apply_transform(cad_to_world)
-        ''' sample view '''
-        min_corner = cad_model.bounds[0]
-        max_corner = cad_model.bounds[1] 
-        diag = np.linalg.norm(max_corner - min_corner)
-        view_num = int(self.min_view + (diag - self.min_diag)/(self.max_diag - self.min_diag) * (self.max_view - self.min_view))
-        sampled_view_data = ViewSampleUtil.sample_view_data_world_space(
-            cad_model, cad_to_world,  
-            voxel_size = self.voxel_size,
-            max_views = view_num,
-            min_cam_table_included_degree= self.min_cam_table_included_degree,
-            random_view_ratio = self.random_view_ratio
-        )
-        cam_to_world_poses = sampled_view_data["cam_to_world_poses"]
-        world_model_points = sampled_view_data["voxel_down_sampled_points"]
+        #cad_to_cam = PtsUtil.register(first_cam_pts, cad_model)
+        #cam_to_world = ControlUtil.get_pose()
+        cad_to_world = np.eye(4) #cam_to_world @ cad_to_cam
+        
+        
+        world_to_blender_world = np.eye(4)
+        world_to_blender_world[:3, 3] = np.asarray([0, 0, 0.9215])
+        cad_to_blender_world = np.dot(world_to_blender_world, cad_to_world)
+        cad_model:trimesh.Trimesh = cad_model.apply_transform(cad_to_blender_world)
+        
+        with tempfile.TemporaryDirectory() as temp_dir:
+            name = "cad_model_world"
+            
+            cad_model.export(os.path.join(temp_dir, f"{name}.obj"))
+            temp_dir = "/home/user/nbv_rec/nbv_rec_control/test_output"
+            scene_dir = os.path.join(temp_dir, name)
+            script_path = "/home/user/nbv_rec/blender_app/data_generator.py"
+            ''' sample view '''
+            # import ipdb; ipdb.set_trace()
+            # print("start running renderer")
+            # result = subprocess.run([
+            #     'blender', '-b', '-P', script_path, '--', temp_dir
+            # ], capture_output=True, text=True)
+            # print("finish running renderer")
+            # 
+        
+            world_model_points = np.loadtxt(os.path.join(scene_dir, "points_and_normals.txt"))[:,:3]
+            ''' preprocess '''
+            # save_scene_data(temp_dir, name)
+            
+            pts_dir = os.path.join(temp_dir,name,"pts")
+            sample_view_pts_list = []
+            scan_points_idx_list = []
+            frame_num = len(os.listdir(pts_dir))
+            
+            for frame_idx in range(frame_num):
+                pts_path = os.path.join(temp_dir,name, "pts", f"{frame_idx}.txt")
+                idx_path = os.path.join(temp_dir,name, "scan_points_indices", f"{frame_idx}.txt")
+                point_cloud = np.loadtxt(pts_path)
+                sampled_point_cloud = PtsUtil.voxel_downsample_point_cloud(point_cloud, self.voxel_size)
+                indices = np.loadtxt(idx_path, dtype=np.int32) 
+                try:
+                    len(indices)
+                except:
+                    indices = np.array([indices])
+                sample_view_pts_list.append(sampled_point_cloud)
+                scan_points_idx_list.append(indices)

-        ''' take sample view '''
-        scan_points_idx_list = []
-        sample_view_pts_list = []
-        for cam_to_world in cam_to_world_poses:
-            ControlUtil.move_to(cam_to_world)
-            ''' get world pts '''
-            view_data = CommunicateUtil.get_view_data()
-            cam_pts = None
-            scan_points_idx = None
-            world_pts = PtsUtil.transform_point_cloud(cam_pts, cam_to_world)
-            sample_view_pts_list.append(world_pts)
-            scan_points_idx_list.append(scan_points_idx)
+            ''' generate strategy '''
            
-        ''' generate strategy '''
-        limited_useful_view, _, _ = ReconstructionUtil.compute_next_best_view_sequence_with_overlap(
-            world_model_points, sample_view_pts_list, 
-            scan_points_indices_list = scan_points_idx_list,
-            init_view=0,
-            threshold=self.voxel_size, 
-            soft_overlap_threshold = self.soft_overlap_threshold,
-            hard_overlap_threshold = self.hard_overlap_threshold,
-            scan_points_threshold = self.scan_points_threshold, 
-            status_info=self.status_info
-        )
-        
-        ''' extract cam_to_world sequence  '''
-        cam_to_world_seq = []
-        coveraget_rate_seq = []
-        
-        for idx, coverage_rate in limited_useful_view:
-            cam_to_world_seq.append(cam_to_world_poses[idx])
-            coveraget_rate_seq.append(coverage_rate)
+            limited_useful_view, _, _ = ReconstructionUtil.compute_next_best_view_sequence_with_overlap(
+                world_model_points, sample_view_pts_list, 
+                scan_points_indices_list = scan_points_idx_list,
+                init_view=0,
+                threshold=self.voxel_size, 
+                soft_overlap_threshold = self.soft_overlap_threshold,
+                hard_overlap_threshold = self.hard_overlap_threshold,
+                scan_points_threshold = self.scan_points_threshold, 
+                status_info=self.status_info
+            )
+           
+            ''' extract cam_to_world sequence  '''
+            cam_to_world_seq = []
+            coveraget_rate_seq = []
            
-        ''' take best seq view '''
-        for cam_to_world in cam_to_world_seq:
-            ControlUtil.move_to(cam_to_world)
-            ''' get world pts '''
-            view_data = CommunicateUtil.get_view_data()
-            cam_pts = None
-            scan_points_idx = None
-            world_pts = PtsUtil.transform_point_cloud(cam_pts, cam_to_world)
-            sample_view_pts_list.append(world_pts)
-            scan_points_idx_list.append(scan_points_idx)
+            from ipdb import set_trace; set_trace()
+            for idx, coverage_rate in limited_useful_view:
+                path = DataLoadUtil.get_path(temp_dir, name, idx)
+                cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
+                cam_to_world_seq.append(cam_info["cam_to_world"])
+                coveraget_rate_seq.append(coverage_rate)
+            
+        # ''' take best seq view '''
+        # for cam_to_world in cam_to_world_seq:
+        #     ControlUtil.move_to(cam_to_world)
+        #     ''' get world pts '''
+        #     view_data = CommunicateUtil.get_view_data()
+        #     cam_pts = self.get_pts_from_view_data(view_data)
+        #     scan_points_idx = None
+        #     world_pts = PtsUtil.transform_point_cloud(cam_pts, cam_to_world)
+        #     sample_view_pts_list.append(world_pts)
+        #     scan_points_idx_list.append(scan_points_idx)
        
    
    def run(self):
@ -159,32 +189,5 @@ if __name__ == "__main__":
    # cad_pts_L = PtsUtil.transform_point_cloud(noisy_pts_L, cad_to_cam_L)
    # np.savetxt(r"test.txt", cad_pts_L)
    # np.savetxt(r"src.txt", noisy_pts_L)
-    
-    ''' test view sample '''
-    
-    sampled_view_data = ViewSampleUtil.sample_view_data_world_space(
-            model, np.eye(4),  
-            voxel_size = 0.005,
-            max_views = 20,
-            min_cam_table_included_degree= 20,
-            random_view_ratio = 0
-        )
-    cam_poses = sampled_view_data["cam_to_world_poses"]
-    cam_poses = np.array(cam_poses)
-    print(cam_poses.shape)
-    def sample_camera_direction(cam_pose, num_samples, sample_distance):
-        cam_position = cam_pose[:3, 3]
-        cam_direction = cam_pose[:3, 2]
-        sampled_points = np.array([cam_position - (i + 1) * sample_distance * cam_direction for i in range(num_samples)])
-        return sampled_points
-    
-    all_sampled_points = []
-    for i in range(cam_poses.shape[0]):
-        samped_points = sample_camera_direction(cam_poses[i], 10, 0.02)
-        all_sampled_points.append(samped_points)
-        
-    all_sampled_points = np.concatenate(all_sampled_points, axis=0)
-    np.savetxt(r"cam_poses.txt", cam_poses[:, :3, 3])
-    np.savetxt(r"cam_directions.txt", all_sampled_points)
-    
+
    
--- a/runners/inference.py
+++ b/runners/inference.py
@ -1,87 +0,0 @@
-import os
-import trimesh
-import numpy as np
-from PytorchBoot.runners.runner import Runner
-from PytorchBoot.config import ConfigManager
-import PytorchBoot.stereotype as stereotype
-from PytorchBoot.utils.log_util import Log
-import PytorchBoot.namespace as namespace
-from PytorchBoot.status import status_manager
-
-from utils.control_util import ControlUtil
-from utils.communicate_util import CommunicateUtil
-from utils.pts_util import PtsUtil
-from utils.view_sample_util import ViewSampleUtil
-from utils.reconstruction_util import ReconstructionUtil
-
-
-@stereotype.runner("inferencer")
-class Inferencer(Runner):
-
-    def __init__(self, config_path: str):
-        super().__init__(config_path)
-        self.load_experiment("inferencer")
-        self.reconstruct_config = ConfigManager.get("runner", "reconstruct")
-        self.voxel_size = self.reconstruct_config["voxel_size"]
-        self.max_iter = self.reconstruct_config["max_iter"]
-        
-    def create_experiment(self, backup_name=None):
-        super().create_experiment(backup_name)
-    
-    def load_experiment(self, backup_name=None):
-        super().load_experiment(backup_name)
-        
-    def run_inference(self, model_name):
-        
-        ''' init robot '''
-        ControlUtil.init()
-        ''' take first view '''
-        view_data = CommunicateUtil.get_view_data()
-        first_cam_pts = None
-        first_cam_pose = None
-        combined_pts = first_cam_pts
-        input_data = {
-            "scanned_target_points_num": [first_cam_pts.shape[0]],
-            "scanned_n_to_world_pose_9d": [first_cam_pose],
-            "combined_scanned_pts": combined_pts
-        }
-        ''' enter loop '''
-        iter = 0
-        while True:
-            ''' inference '''
-            inference_result = CommunicateUtil.get_inference_data(input_data)
-            cam_to_world = inference_result["cam_to_world"]
-            ''' set pose '''
-            ControlUtil.set_pose(cam_to_world)
-            ''' take view '''
-            view_data = CommunicateUtil.get_view_data()
-            curr_cam_pts = None
-            curr_cam_pose = None
-            ''' update combined pts '''
-            combined_pts = np.concatenate([combined_pts, curr_cam_pts], axis=0)
-            combined_pts = PtsUtil.voxel_downsample_point_cloud(combined_pts, voxel_size=self.voxel_size)
-            ''' update input data '''
-            input_data["combined_scanned_pts"] = combined_pts
-            input_data["scanned_target_points_num"].append(curr_cam_pts.shape[0])
-            input_data["scanned_n_to_world_pose_9d"].append(curr_cam_pose)
-            
-            ''' check stop condition '''
-            if iter >= self.max_iter:
-                break
-            
-    
-    def run(self):
-        self.run_inference()
-            
-    
-if __name__ == "__main__":
-    model_path = "/home/yan20/nbv_rec/data/test_CAD/test_model/bear_scaled.ply"
-    model = trimesh.load(model_path)
-    test_pts_L = np.loadtxt("/home/yan20/nbv_rec/data/test_CAD/cam_pts_0_L.txt")
-    test_pts_R = np.loadtxt("/home/yan20/nbv_rec/data/test_CAD/cam_pts_0_R.txt")
-    cam_to_world_L = PtsUtil.register_icp(test_pts_L, model)
-    cam_to_world_R = PtsUtil.register_icp(test_pts_R, model)
-    print(cam_to_world_L)
-    print("================================")
-    print(cam_to_world_R)
-    
--- a/utils/communicate_util.py
+++ b/utils/communicate_util.py
@ -1,10 +1,20 @@
+import requests
+import numpy as np
+import cv2
+

 class CommunicateUtil:
    VIEW_HOST = "127.0.0.1:5000"
    INFERENCE_HOST = "127.0.0.1:5000"
    
-    def get_view_data() -> dict:
-        data = {}
+    def get_view_data(init = False) -> dict:
+        params = {}
+        params["create_scanner"] = init
+        response = requests.get(f"http://{CommunicateUtil.VIEW_HOST}/api/data", json=params)
+        data = response.json()
+        if not data["success"]:
+            print(f"Failed to get view data")
+            return None
        return data
    
    def get_inference_data(view_data:dict) -> dict:
--- a/utils/data_load.py
+++ b/utils/data_load.py
@ -0,0 +1,410 @@
+import os
+import numpy as np
+import json
+import cv2
+import trimesh
+import torch
+from utils.pts_util import PtsUtil
+
+
+class DataLoadUtil:
+    TABLE_POSITION = np.asarray([0, 0, 0.8215])
+
+    @staticmethod
+    def get_display_table_info(root, scene_name):
+        scene_info = DataLoadUtil.load_scene_info(root, scene_name)
+        display_table_info = scene_info["display_table"]
+        return display_table_info
+
+    @staticmethod
+    def get_display_table_top(root, scene_name):
+        display_table_height = DataLoadUtil.get_display_table_info(root, scene_name)[
+            "height"
+        ]
+        display_table_top = DataLoadUtil.TABLE_POSITION + np.asarray(
+            [0, 0, display_table_height]
+        )
+        return display_table_top
+
+    @staticmethod
+    def get_path(root, scene_name, frame_idx):
+        path = os.path.join(root, scene_name, f"{frame_idx}")
+        return path
+
+    @staticmethod
+    def get_label_num(root, scene_name):
+        label_dir = os.path.join(root, scene_name, "label")
+        return len(os.listdir(label_dir))
+
+    @staticmethod
+    def get_label_path(root, scene_name, seq_idx):
+        label_dir = os.path.join(root, scene_name, "label")
+        if not os.path.exists(label_dir):
+            os.makedirs(label_dir)
+        path = os.path.join(label_dir, f"{seq_idx}.json")
+        return path
+
+    @staticmethod
+    def get_label_path_old(root, scene_name):
+        path = os.path.join(root, scene_name, "label.json")
+        return path
+
+    @staticmethod
+    def get_scene_seq_length(root, scene_name):
+        camera_params_path = os.path.join(root, scene_name, "camera_params")
+        return len(os.listdir(camera_params_path))
+
+    @staticmethod
+    def load_mesh_at(model_dir, object_name, world_object_pose):
+        model_path = os.path.join(model_dir, object_name, "mesh.obj")
+        mesh = trimesh.load(model_path)
+        mesh.apply_transform(world_object_pose)
+        return mesh
+
+    @staticmethod
+    def get_bbox_diag(model_dir, object_name):
+        model_path = os.path.join(model_dir, object_name, "mesh.obj")
+        mesh = trimesh.load(model_path)
+        bbox = mesh.bounding_box.extents
+        diagonal_length = np.linalg.norm(bbox)
+        return diagonal_length
+
+    @staticmethod
+    def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose):
+        mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose)
+        model_path = os.path.join(output_dir, scene_name, "world_mesh.obj")
+        mesh.export(model_path)
+
+    @staticmethod
+    def save_target_mesh_at_world_space(
+        root, model_dir, scene_name, display_table_as_world_space_origin=True
+    ):
+        scene_info = DataLoadUtil.load_scene_info(root, scene_name)
+        target_name = scene_info["target_name"]
+        transformation = scene_info[target_name]
+        if display_table_as_world_space_origin:
+            location = transformation["location"] - DataLoadUtil.get_display_table_top(
+                root, scene_name
+            )
+        else:
+            location = transformation["location"]
+        rotation_euler = transformation["rotation_euler"]
+        pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
+        pose_mat[:3, 3] = location
+
+        mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat)
+        mesh_dir = os.path.join(root, scene_name, "mesh")
+        if not os.path.exists(mesh_dir):
+            os.makedirs(mesh_dir)
+        model_path = os.path.join(mesh_dir, "world_target_mesh.obj")
+        mesh.export(model_path)
+
+    @staticmethod
+    def load_scene_info(root, scene_name):
+        scene_info_path = os.path.join(root, scene_name, "scene_info.json")
+        with open(scene_info_path, "r") as f:
+            scene_info = json.load(f)
+        return scene_info
+
+    @staticmethod
+    def load_target_pts_num_dict(root, scene_name):
+        target_pts_num_path = os.path.join(root, scene_name, "target_pts_num.json")
+        with open(target_pts_num_path, "r") as f:
+            target_pts_num_dict = json.load(f)
+        return target_pts_num_dict
+
+    @staticmethod
+    def load_target_object_pose(root, scene_name):
+        scene_info = DataLoadUtil.load_scene_info(root, scene_name)
+        target_name = scene_info["target_name"]
+        transformation = scene_info[target_name]
+        location = transformation["location"]
+        rotation_euler = transformation["rotation_euler"]
+        pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
+        pose_mat[:3, 3] = location
+        return pose_mat
+
+    @staticmethod
+    def load_depth(path, min_depth=0.01, max_depth=5.0, binocular=False):
+
+        def load_depth_from_real_path(real_path, min_depth, max_depth):
+            depth = cv2.imread(real_path, cv2.IMREAD_UNCHANGED)
+            depth = depth.astype(np.float32) / 65535.0
+            min_depth = min_depth
+            max_depth = max_depth
+            depth_meters = min_depth + (max_depth - min_depth) * depth
+            return depth_meters
+
+        if binocular:
+            depth_path_L = os.path.join(
+                os.path.dirname(path), "depth", os.path.basename(path) + "_L.png"
+            )
+            depth_path_R = os.path.join(
+                os.path.dirname(path), "depth", os.path.basename(path) + "_R.png"
+            )
+            depth_meters_L = load_depth_from_real_path(
+                depth_path_L, min_depth, max_depth
+            )
+            depth_meters_R = load_depth_from_real_path(
+                depth_path_R, min_depth, max_depth
+            )
+            return depth_meters_L, depth_meters_R
+        else:
+            depth_path = os.path.join(
+                os.path.dirname(path), "depth", os.path.basename(path) + ".png"
+            )
+            depth_meters = load_depth_from_real_path(depth_path, min_depth, max_depth)
+            return depth_meters
+
+    @staticmethod
+    def load_seg(path, binocular=False, left_only=False):
+        if binocular and not left_only:
+
+            def clean_mask(mask_image):
+                green = [0, 255, 0, 255]
+                red = [255, 0, 0, 255]
+                threshold = 2
+                mask_image = np.where(
+                    np.abs(mask_image - green) <= threshold, green, mask_image
+                )
+                mask_image = np.where(
+                    np.abs(mask_image - red) <= threshold, red, mask_image
+                )
+                return mask_image
+
+            mask_path_L = os.path.join(
+                os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
+            )
+            mask_image_L = clean_mask(cv2.imread(mask_path_L, cv2.IMREAD_UNCHANGED))
+            mask_path_R = os.path.join(
+                os.path.dirname(path), "mask", os.path.basename(path) + "_R.png"
+            )
+            mask_image_R = clean_mask(cv2.imread(mask_path_R, cv2.IMREAD_UNCHANGED))
+            return mask_image_L, mask_image_R
+        else:
+            if binocular and left_only:
+                mask_path = os.path.join(
+                    os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
+                )
+            else:
+                mask_path = os.path.join(
+                    os.path.dirname(path), "mask", os.path.basename(path) + ".png"
+                )
+            mask_image = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)
+            return mask_image
+        
+    @staticmethod
+    def load_normal(path, binocular=False, left_only=False):
+        if binocular and not left_only:
+            normal_path_L = os.path.join(
+                os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
+            )
+            normal_image_L = cv2.imread(normal_path_L, cv2.IMREAD_COLOR)
+            normal_path_R = os.path.join(
+                os.path.dirname(path), "normal", os.path.basename(path) + "_R.png"
+            )
+            normal_image_R = cv2.imread(normal_path_R, cv2.IMREAD_COLOR)
+            normalized_normal_image_L = normal_image_L / 255.0 * 2.0 - 1.0
+            normalized_normal_image_R = normal_image_R / 255.0 * 2.0 - 1.0
+            return normalized_normal_image_L, normalized_normal_image_R
+        else:
+            if binocular and left_only:
+                normal_path = os.path.join(
+                    os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
+                )
+            else:
+                normal_path = os.path.join(
+                    os.path.dirname(path), "normal", os.path.basename(path) + ".png"
+                )
+            normal_image = cv2.imread(normal_path, cv2.IMREAD_COLOR)
+            normalized_normal_image = normal_image / 255.0 * 2.0 - 1.0
+            return normalized_normal_image
+
+    @staticmethod
+    def load_label(path):
+        with open(path, "r") as f:
+            label_data = json.load(f)
+        return label_data
+
+    @staticmethod
+    def load_rgb(path):
+        rgb_path = os.path.join(
+            os.path.dirname(path), "rgb", os.path.basename(path) + ".png"
+        )
+        rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR)
+        return rgb_image
+
+    @staticmethod
+    def load_from_preprocessed_pts(path):
+        npy_path = os.path.join(
+            os.path.dirname(path), "pts", os.path.basename(path) + ".npy"
+        )
+        pts = np.load(npy_path)
+        return pts
+
+    @staticmethod
+    def cam_pose_transformation(cam_pose_before):
+        offset = np.asarray([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
+        cam_pose_after = cam_pose_before @ offset
+        return cam_pose_after
+
+    @staticmethod
+    def load_cam_info(path, binocular=False, display_table_as_world_space_origin=True):
+        scene_dir = os.path.dirname(path)
+        root_dir = os.path.dirname(scene_dir)
+        scene_name = os.path.basename(scene_dir)
+        camera_params_path = os.path.join(
+            os.path.dirname(path), "camera_params", os.path.basename(path) + ".json"
+        )
+        with open(camera_params_path, "r") as f:
+            label_data = json.load(f)
+        cam_to_world = np.asarray(label_data["extrinsic"])
+        cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
+        world_to_display_table = np.eye(4)
+        world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
+            root_dir, scene_name
+        )
+        if display_table_as_world_space_origin:
+            cam_to_world = np.dot(world_to_display_table, cam_to_world)
+        cam_intrinsic = np.asarray(label_data["intrinsic"])
+        cam_info = {
+            "cam_to_world": cam_to_world,
+            "cam_intrinsic": cam_intrinsic,
+            "far_plane": label_data["far_plane"],
+            "near_plane": label_data["near_plane"],
+        }
+        if binocular:
+            cam_to_world_R = np.asarray(label_data["extrinsic_R"])
+            cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R)
+            cam_to_world_O = np.asarray(label_data["extrinsic_cam_object"])
+            cam_to_world_O = DataLoadUtil.cam_pose_transformation(cam_to_world_O)
+            if display_table_as_world_space_origin:
+                cam_to_world_O = np.dot(world_to_display_table, cam_to_world_O)
+                cam_to_world_R = np.dot(world_to_display_table, cam_to_world_R)
+            cam_info["cam_to_world_O"] = cam_to_world_O
+            cam_info["cam_to_world_R"] = cam_to_world_R
+        return cam_info
+
+    @staticmethod
+    def get_real_cam_O_from_cam_L(
+        cam_L, cam_O_to_cam_L, scene_path, display_table_as_world_space_origin=True
+    ):
+        root_dir = os.path.dirname(scene_path)
+        scene_name = os.path.basename(scene_path)
+        if isinstance(cam_L, torch.Tensor):
+            cam_L = cam_L.cpu().numpy()
+        nO_to_display_table_pose = cam_L @ cam_O_to_cam_L
+        if display_table_as_world_space_origin:
+            display_table_to_world = np.eye(4)
+            display_table_to_world[:3, 3] = DataLoadUtil.get_display_table_top(
+                root_dir, scene_name
+            )
+            nO_to_world_pose = np.dot(display_table_to_world, nO_to_display_table_pose)
+        nO_to_world_pose = DataLoadUtil.cam_pose_transformation(nO_to_world_pose)
+        return nO_to_world_pose
+
+    @staticmethod
+    def get_target_point_cloud(
+        depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0, 255, 0, 255), require_full_points=False
+    ):
+        h, w = depth.shape
+        i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
+
+        z = depth
+        x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+        y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+        mask = mask.reshape(-1, 4)
+
+        target_mask = (mask == target_mask_label).all(axis=-1)
+
+        target_points_camera = points_camera[target_mask]
+        target_points_camera_aug = np.concatenate(
+            [target_points_camera, np.ones((target_points_camera.shape[0], 1))], axis=-1
+        )
+
+        target_points_world = np.dot(cam_extrinsic, target_points_camera_aug.T).T[:, :3]
+        data = {
+            "points_world": target_points_world,
+            "points_camera": target_points_camera,
+        }
+        return data
+
+    @staticmethod
+    def get_point_cloud(depth, cam_intrinsic, cam_extrinsic):
+        h, w = depth.shape
+        i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
+
+        z = depth
+        x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+        y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+        points_camera_aug = np.concatenate(
+            [points_camera, np.ones((points_camera.shape[0], 1))], axis=-1
+        )
+
+        points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
+        return {"points_world": points_world, "points_camera": points_camera}
+
+    @staticmethod
+    def get_target_point_cloud_world_from_path(
+        path,
+        binocular=False,
+        random_downsample_N=65536,
+        voxel_size=0.005,
+        target_mask_label=(0, 255, 0, 255),
+        display_table_mask_label=(0, 0, 255, 255),
+        get_display_table_pts=False,
+        require_normal=False,
+    ):
+        cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular)
+        if binocular:
+            depth_L, depth_R = DataLoadUtil.load_depth(
+                path, cam_info["near_plane"], cam_info["far_plane"], binocular=True
+            )
+            mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
+            point_cloud_L = DataLoadUtil.get_target_point_cloud(
+                depth_L,
+                cam_info["cam_intrinsic"],
+                cam_info["cam_to_world"],
+                mask_L,
+                target_mask_label,
+            )["points_world"]
+            point_cloud_R = DataLoadUtil.get_target_point_cloud(
+                depth_R,
+                cam_info["cam_intrinsic"],
+                cam_info["cam_to_world_R"],
+                mask_R,
+                target_mask_label,
+            )["points_world"]
+            point_cloud_L = PtsUtil.random_downsample_point_cloud(
+                point_cloud_L, random_downsample_N
+            )
+            point_cloud_R = PtsUtil.random_downsample_point_cloud(
+                point_cloud_R, random_downsample_N
+            )
+            overlap_points = PtsUtil.get_overlapping_points(
+                point_cloud_L, point_cloud_R, voxel_size
+            )
+            return overlap_points
+        else:
+            depth = DataLoadUtil.load_depth(
+                path, cam_info["near_plane"], cam_info["far_plane"]
+            )
+            mask = DataLoadUtil.load_seg(path)
+            point_cloud = DataLoadUtil.get_target_point_cloud(
+                depth, cam_info["cam_intrinsic"], cam_info["cam_to_world"], mask
+            )["points_world"]
+            return point_cloud
+
+    @staticmethod
+    def load_points_normals(root, scene_name, display_table_as_world_space_origin=True):
+        points_path = os.path.join(root, scene_name, "points_and_normals.txt")
+        points_normals = np.loadtxt(points_path)
+        if display_table_as_world_space_origin:
+            points_normals[:, :3] = points_normals[
+                :, :3
+            ] - DataLoadUtil.get_display_table_top(root, scene_name)
+        return points_normals
--- a/utils/preprocess_util.py
+++ b/utils/preprocess_util.py
@ -0,0 +1,158 @@
+import os
+import numpy as np
+import time
+import sys
+np.random.seed(0)
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from utils.reconstruction_util import ReconstructionUtil
+from utils.data_load import DataLoadUtil
+from utils.pts_util import PtsUtil
+
+def save_np_pts(path, pts: np.ndarray, file_type="txt"):
+    if file_type == "txt":
+        np.savetxt(path, pts)
+    else:
+        np.save(path, pts)
+
+def save_target_points(root, scene, frame_idx, target_points: np.ndarray, file_type="txt"):
+    pts_path = os.path.join(root,scene, "pts", f"{frame_idx}.{file_type}")
+    if not os.path.exists(os.path.join(root,scene, "pts")):
+        os.makedirs(os.path.join(root,scene, "pts"))
+    save_np_pts(pts_path, target_points, file_type)
+    
+def save_scan_points_indices(root, scene, frame_idx, scan_points_indices: np.ndarray, file_type="txt"):
+    indices_path = os.path.join(root,scene, "scan_points_indices", f"{frame_idx}.{file_type}")
+    if not os.path.exists(os.path.join(root,scene, "scan_points_indices")):
+        os.makedirs(os.path.join(root,scene, "scan_points_indices"))
+    save_np_pts(indices_path, scan_points_indices, file_type)
+
+def save_scan_points(root, scene, scan_points: np.ndarray):
+    scan_points_path = os.path.join(root,scene, "scan_points.txt")
+    save_np_pts(scan_points_path, scan_points)
+
+def get_world_points(depth, mask, cam_intrinsic, cam_extrinsic):
+    z = depth[mask]
+    i, j = np.nonzero(mask)
+    x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+    y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+    points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+    points_camera_aug = np.concatenate((points_camera, np.ones((points_camera.shape[0], 1))), axis=-1)
+    points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
+    
+    return points_camera_world
+
+def get_scan_points_indices(scan_points, mask, display_table_mask_label, cam_intrinsic, cam_extrinsic):
+    scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1))))
+    points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3]
+    points_image_homogeneous = np.dot(cam_intrinsic, points_camera.T).T
+    points_image_homogeneous /= points_image_homogeneous[:, 2:]
+    pixel_x = points_image_homogeneous[:, 0].astype(int)
+    pixel_y = points_image_homogeneous[:, 1].astype(int)
+    h, w = mask.shape[:2]
+    valid_indices = (pixel_x >= 0) & (pixel_x < w) & (pixel_y >= 0) & (pixel_y < h)
+    mask_colors = mask[pixel_y[valid_indices], pixel_x[valid_indices]]
+    selected_points_indices = np.where((mask_colors == display_table_mask_label).all(axis=-1))[0]
+    selected_points_indices = np.where(valid_indices)[0][selected_points_indices]
+    return selected_points_indices
+    
+
+def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
+    
+    ''' configuration '''
+    target_mask_label = (0, 255, 0, 255)
+    display_table_mask_label=(0, 0, 255, 255)
+    random_downsample_N = 32768
+    voxel_size=0.002
+    filter_degree = 75
+    min_z = 0.2
+    max_z = 0.5
+    
+    ''' scan points '''
+    scan_points = np.asarray(ReconstructionUtil.generate_scan_points(display_table_top=0,display_table_radius=0.25))
+    
+    ''' read frame data(depth|mask|normal) '''
+    frame_num = DataLoadUtil.get_scene_seq_length(root, scene)
+    for frame_id in range(frame_num):
+        print(f"[scene({scene_idx}/{scene_total})|frame({frame_id}/{frame_num})]Processing {scene} frame {frame_id}")
+        path = DataLoadUtil.get_path(root, scene, frame_id)
+        cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
+        depth_L, depth_R = DataLoadUtil.load_depth(
+                path, cam_info["near_plane"], 
+                cam_info["far_plane"], 
+                binocular=True
+            )
+        mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
+        
+        ''' target points '''
+        mask_img_L = mask_L
+        mask_img_R = mask_R
+
+        target_mask_img_L = (mask_L == target_mask_label).all(axis=-1)
+        target_mask_img_R = (mask_R == target_mask_label).all(axis=-1)
+
+        
+        target_points_L = get_world_points(depth_L, target_mask_img_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"])
+        target_points_R = get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
+
+        sampled_target_points_L = PtsUtil.random_downsample_point_cloud(
+                target_points_L, random_downsample_N
+            )
+        sampled_target_points_R = PtsUtil.random_downsample_point_cloud(
+                target_points_R, random_downsample_N
+            )
+
+        has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0
+        if has_points:
+            target_points = PtsUtil.get_overlapping_points(
+                    sampled_target_points_L, sampled_target_points_R, voxel_size
+                )
+
+        if has_points:
+            has_points = target_points.shape[0] > 0
+
+        if has_points:
+            points_normals = DataLoadUtil.load_points_normals(root, scene, display_table_as_world_space_origin=True)
+            target_points = PtsUtil.filter_points(
+                target_points, points_normals, cam_info["cam_to_world"],voxel_size=0.002, theta = filter_degree, z_range=(min_z, max_z)
+                )
+
+
+        ''' scan points indices '''
+        scan_points_indices_L = get_scan_points_indices(scan_points, mask_img_L, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world"]) 
+        scan_points_indices_R = get_scan_points_indices(scan_points, mask_img_R, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
+        scan_points_indices = np.intersect1d(scan_points_indices_L, scan_points_indices_R)
+        
+        if not has_points:
+            target_points = np.zeros((0, 3))
+            
+        save_target_points(root, scene, frame_id, target_points, file_type=file_type)
+        save_scan_points_indices(root, scene, frame_id, scan_points_indices, file_type=file_type)
+    
+    save_scan_points(root, scene, scan_points) # The "done" flag of scene preprocess
+
+
+if __name__ == "__main__":
+    #root = "/media/hofee/repository/new_data_with_normal"
+    root = r"/media/hofee/data/tempdir/test_real_output"
+    # list_path = r"/media/hofee/repository/full_list.txt"
+    # scene_list = []
+    
+    # with open(list_path, "r") as f:
+    #     for line in f:
+    #         scene_list.append(line.strip())
+    scene_list = os.listdir(root)
+    from_idx = 0 # 1000
+    to_idx = 1 # 1500
+
+
+    cnt = 0
+    import time
+    total = to_idx - from_idx
+    for scene in scene_list[from_idx:to_idx]:
+        start = time.time()
+        save_scene_data(root, scene, cnt, total, file_type="npy")
+        cnt+=1
+        end = time.time()
+        print(f"Time cost: {end-start}")
--- a/utils/pts_util.py
+++ b/utils/pts_util.py
@ -169,3 +169,25 @@ class PtsUtil:
        )
        
        return reg_icp.transformation
+
+    @staticmethod
+    def get_pts_from_depth(depth, cam_intrinsic, cam_extrinsic):
+        h, w = depth.shape
+        i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
+
+        z = depth
+        x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+        y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+        mask = mask.reshape(-1, 4)
+        points_camera = np.concatenate(
+            [points_camera, np.ones((points_camera.shape[0], 1))], axis=-1
+        )
+
+        points_world = np.dot(cam_extrinsic, points_camera.T).T[:, :3]
+        data = {
+            "points_world": points_world,
+            "points_camera": points_camera,
+        }
+        return data
--- a/utils/reconstruction_util.py
+++ b/utils/reconstruction_util.py
@ -152,9 +152,15 @@ class ReconstructionUtil:
    
    @staticmethod
    def check_scan_points_overlap(history_indices, indices2, threshold=5):
-        for indices1 in history_indices:
-            if len(set(indices1).intersection(set(indices2))) >= threshold:
-                return True
+        try:
+            if len(indices2) == 0:
+                return False
+            for indices1 in history_indices:
+                if len(set(indices1).intersection(set(indices2))) >= threshold:
+                    return True
+        except Exception as e:
+            print(e)
+            import ipdb; ipdb.set_trace()
        return False

        
--- a/utils/render_util.py
+++ b/utils/render_util.py
@ -0,0 +1,45 @@
+
+import os
+import json
+import subprocess
+import tempfile
+import shutil
+from utils.data_load import DataLoadUtil
+from utils.pts_util import PtsUtil
+class RenderUtil:
+
+    @staticmethod
+    def render_pts(cam_pose, object_name, script_path, model_points_normals, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
+        nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path)
+        with tempfile.TemporaryDirectory() as temp_dir:
+            params = {
+                "cam_pose": nO_to_world_pose.tolist(),
+                "object_name": scene_path
+            }
+            params_data_path = os.path.join(temp_dir, "params.json")
+            with open(params_data_path, 'w') as f:
+                json.dump(params, f)
+            result = subprocess.run([
+                'blender', '-b', '-P', script_path, '--', temp_dir
+            ], capture_output=True, text=True)
+            if result.returncode != 0:
+                print("Blender script failed:")
+                print(result.stderr)
+                return None
+            path = os.path.join(temp_dir, "tmp")
+            point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True)
+            cam_params = DataLoadUtil.load_cam_info(path, binocular=True)
+            
+            filtered_point_cloud = PtsUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree)
+            full_scene_point_cloud = None
+            if require_full_scene:
+                depth_L, depth_R = DataLoadUtil.load_depth(path, cam_params['near_plane'], cam_params['far_plane'], binocular=True)
+                point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_params['cam_intrinsic'], cam_params['cam_to_world'])['points_world']
+                point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_params['cam_intrinsic'], cam_params['cam_to_world_R'])['points_world']
+            
+                point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
+                point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
+                full_scene_point_cloud = PtsUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
+
+            
+            return filtered_point_cloud, full_scene_point_cloud
--- a/utils/view_sample_util.py
+++ b/utils/view_sample_util.py
@ -1,162 +0,0 @@
-
-import numpy as np
-from utils.pose_util import PoseUtil
-import trimesh
-from collections import defaultdict
-from scipy.spatial.transform import Rotation as R
-import random
-
-class ViewSampleUtil:
-    @staticmethod
-    def farthest_point_sampling(points, num_samples):
-        num_points = points.shape[0]
-        if num_samples >= num_points:
-            return points, np.arange(num_points)
-        sampled_indices = np.zeros(num_samples, dtype=int)
-        sampled_indices[0] = np.random.randint(num_points)
-        min_distances = np.full(num_points, np.inf)
-        for i in range(1, num_samples):
-            current_point = points[sampled_indices[i - 1]]
-            dist_to_current_point = np.linalg.norm(points - current_point, axis=1)
-            min_distances = np.minimum(min_distances, dist_to_current_point)
-            sampled_indices[i] = np.argmax(min_distances)
-        downsampled_points = points[sampled_indices]
-        return downsampled_points, sampled_indices
-    
-    @staticmethod
-    def voxel_downsample(points, voxel_size):
-        voxel_grid = defaultdict(list)
-        for i, point in enumerate(points):
-            voxel_index = tuple((point // voxel_size).astype(int))
-            voxel_grid[voxel_index].append(i)
-
-        downsampled_points = []
-        downsampled_indices = []
-        for indices in voxel_grid.values():
-            selected_index = indices[0]
-            downsampled_points.append(points[selected_index])
-            downsampled_indices.append(selected_index)
-
-        return np.array(downsampled_points), downsampled_indices
-
-    @staticmethod
-    def sample_view_data(mesh: trimesh.Trimesh, distance_range: tuple = (0.25, 0.5), voxel_size: float = 0.005, max_views: int = 1, pertube_repeat: int = 1) -> dict:
-        view_data = {
-            "look_at_points": [],
-            "cam_positions": [],
-        }
-
-        vertices = mesh.vertices
-        look_at_points = []
-        cam_positions = []
-        normals = []
-        vertex_normals = mesh.vertex_normals
-
-        for i, vertex in enumerate(vertices):
-            look_at_point = vertex
-            
-            view_data["look_at_points"].append(look_at_point)
-
-            normal = vertex_normals[i]
-            if np.isnan(normal).any():
-                continue
-            if np.dot(normal, look_at_point) < 0:
-                normal = -normal
-            
-            normals.append(normal)
-
-            for _ in range(pertube_repeat):
-                perturb_angle = np.radians(np.random.uniform(0, 30))
-                perturb_axis = np.random.normal(size=3)
-                perturb_axis /= np.linalg.norm(perturb_axis)
-                rotation_matrix = R.from_rotvec(perturb_angle * perturb_axis).as_matrix()
-                perturbed_normal = np.dot(rotation_matrix, normal)
-
-                distance = np.random.uniform(*distance_range)
-                cam_position = look_at_point + distance * perturbed_normal
-                look_at_points.append(look_at_point)
-                cam_positions.append(cam_position)
-
-        look_at_points = np.array(look_at_points)
-        cam_positions = np.array(cam_positions)
-
-        voxel_downsampled_look_at_points, selected_indices = ViewSampleUtil.voxel_downsample(look_at_points, voxel_size)
-        voxel_downsampled_cam_positions = cam_positions[selected_indices]
-        voxel_downsampled_normals = np.array(normals)[selected_indices]
-
-        fps_downsampled_look_at_points, selected_indices = ViewSampleUtil.farthest_point_sampling(voxel_downsampled_look_at_points, max_views * 2)
-        fps_downsampled_cam_positions = voxel_downsampled_cam_positions[selected_indices]
-
-        view_data["look_at_points"] = fps_downsampled_look_at_points.tolist()
-        view_data["cam_positions"] = fps_downsampled_cam_positions.tolist()
-        view_data["normals"] = voxel_downsampled_normals.tolist()
-        view_data["voxel_down_sampled_points"] = voxel_downsampled_look_at_points 
-
-        return view_data
-    
-    @staticmethod
-    def get_world_points_and_normals(view_data: dict, obj_world_pose: np.ndarray) -> tuple:
-        world_points = []
-        world_normals = []
-        for voxel_down_sampled_points, normal in zip(view_data["voxel_down_sampled_points"], view_data["normals"]):
-            voxel_down_sampled_points_world = obj_world_pose @ np.append(voxel_down_sampled_points, 1.0)
-            normal_world = obj_world_pose[:3, :3] @ normal
-            world_points.append(voxel_down_sampled_points_world[:3])
-            world_normals.append(normal_world)
-        return np.array(world_points), np.array(world_normals)
-    
-    @staticmethod
-    def get_cam_pose(view_data: dict, obj_world_pose: np.ndarray, max_views: int, min_cam_table_included_degree: int, random_view_ratio: float) -> np.ndarray:
-        cam_poses = []
-        min_height_z = 1000
-        for look_at_point, cam_position in zip(view_data["look_at_points"], view_data["cam_positions"]):
-            look_at_point_world = obj_world_pose @ np.append(look_at_point, 1.0)
-            cam_position_world = obj_world_pose @ np.append(cam_position, 1.0)
-            if look_at_point_world[2] < min_height_z:
-                min_height_z = look_at_point_world[2]
-            look_at_point_world = look_at_point_world[:3]  
-            cam_position_world = cam_position_world[:3]    
-            
-            forward_vector = cam_position_world - look_at_point_world
-            forward_vector /= np.linalg.norm(forward_vector)
-            
-            up_vector = np.array([0, 0, 1])
-
-            right_vector = np.cross(up_vector, forward_vector)
-            right_vector /= np.linalg.norm(right_vector)
-
-            corrected_up_vector = np.cross(forward_vector, right_vector)
-            rotation_matrix = np.array([right_vector, corrected_up_vector, forward_vector]).T
-
-            cam_pose = np.eye(4)
-            cam_pose[:3, :3] = rotation_matrix
-            cam_pose[:3, 3] = cam_position_world
-            cam_poses.append(cam_pose)
-
-        filtered_cam_poses = []
-        for cam_pose in cam_poses:
-            if cam_pose[2, 3] > min_height_z:
-                direction_vector = cam_pose[:3, 2]
-                horizontal_normal = np.array([0, 0, 1])  
-                cos_angle = np.dot(direction_vector, horizontal_normal) / (np.linalg.norm(direction_vector) * np.linalg.norm(horizontal_normal))
-                angle = np.arccos(np.clip(cos_angle, -1.0, 1.0))  
-                angle_degree = np.degrees(angle)
-                if angle_degree < 90 - min_cam_table_included_degree:
-                    filtered_cam_poses.append(cam_pose)
-                if random.random() < random_view_ratio:
-                    pertube_pose = PoseUtil.get_uniform_pose([0.1, 0.1, 0.1], [3, 3, 3], 0, 180, "cm")
-                    filtered_cam_poses.append(pertube_pose @ cam_pose)
-                    
-        if len(filtered_cam_poses) > max_views:
-            indices = np.random.choice(len(filtered_cam_poses), max_views, replace=False)
-            filtered_cam_poses = [filtered_cam_poses[i] for i in indices]
-            
-        return np.array(filtered_cam_poses)
-        
-    @staticmethod
-    def sample_view_data_world_space(mesh: trimesh.Trimesh, cad_to_world: np.ndarray, distance_range:tuple = (0.25,0.5), voxel_size:float = 0.005, max_views: int=1, min_cam_table_included_degree:int=20, random_view_ratio:float = 0.2) -> dict:
-        view_data = ViewSampleUtil.sample_view_data(mesh, distance_range, voxel_size, max_views)
-        view_data["cam_to_world_poses"] = ViewSampleUtil.get_cam_pose(view_data, cad_to_world, max_views, min_cam_table_included_degree, random_view_ratio)
-        view_data["voxel_down_sampled_points"], view_data["normals"] =  ViewSampleUtil.get_world_points_and_normals(view_data, cad_to_world)
-        return view_data
-