diff --git a/.gitignore b/.gitignore index 5d381cc..6dfd75d 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,7 @@ __pycache__/ *.py[cod] *$py.class - +test_output/ # C extensions *.so diff --git a/configs/cad_config.yaml b/configs/cad_config.yaml index d528e99..e962b93 100644 --- a/configs/cad_config.yaml +++ b/configs/cad_config.yaml @@ -10,7 +10,8 @@ runner: root_dir: "experiments" generate: - model_dir: "/home/yan20/nbv_rec/data/test_CAD/test_model" + model_dir: "/home/user/nbv_rec/data/models" + table_model_path: "/home/user/nbv_rec/data/table.obj" model_start_idx: 0 voxel_size: 0.005 max_view: 512 @@ -19,6 +20,19 @@ runner: min_diag: 0.01 random_view_ratio: 0 min_cam_table_included_degree: 20 + obj_name: "bear" + light_and_camera_config: + Camera: + near_plane: 0.01 + far_plane: 5 + fov_vertical: 25 + resolution: [1280,800] + eye_distance: 0.15 + eye_angle: 25 + Light: + location: [0,0,3.5] + orientation: [0,0,0] + power: 150 reconstruct: soft_overlap_threshold: 0.3 diff --git a/runners/cad_strategy.py b/runners/cad_strategy.py index c87f17e..2a5c402 100644 --- a/runners/cad_strategy.py +++ b/runners/cad_strategy.py @@ -1,5 +1,7 @@ import os import trimesh +import tempfile +import subprocess import numpy as np from PytorchBoot.runners.runner import Runner from PytorchBoot.config import ConfigManager @@ -7,16 +9,12 @@ import PytorchBoot.stereotype as stereotype from PytorchBoot.utils.log_util import Log from PytorchBoot.status import status_manager - -import sys - -sys.path.append(r"C:\Document\Local Project\nbv_rec_control") -#from utils.control_util import ControlUtil +from utils.control_util import ControlUtil from utils.communicate_util import CommunicateUtil from utils.pts_util import PtsUtil -from utils.view_sample_util import ViewSampleUtil from utils.reconstruction_util import ReconstructionUtil - +from utils.preprocess_util import save_scene_data +from utils.data_load import DataLoadUtil @stereotype.runner("CAD_strategy_runner") class CADStrategyRunner(Runner): @@ -50,79 +48,111 @@ class CADStrategyRunner(Runner): def load_experiment(self, backup_name=None): super().load_experiment(backup_name) + def get_pts_from_view_data(self, view_data): + depth = view_data["depth_image"] + depth_intrinsics = view_data["depth_intrinsics"] + depth_extrinsics = view_data["depth_extrinsics"] + cam_pts = PtsUtil.get_pts_from_depth(depth, depth_intrinsics, depth_extrinsics) + return cam_pts + + def split_scan_pts_and_obj_pts(self, world_pts, scan_pts_z, z_threshold = 0.003): + scan_pts = world_pts[scan_pts_z < z_threshold] + obj_pts = world_pts[scan_pts_z >= z_threshold] + return scan_pts, obj_pts + def run_one_model(self, model_name): ''' init robot ''' - ControlUtil.init() + #ControlUtil.init() ''' load CAD model ''' - model_path = os.path.join(self.model_dir, model_name) + model_path = os.path.join(self.model_dir, model_name,"mesh.obj") cad_model = trimesh.load(model_path) ''' take first view ''' - view_data = CommunicateUtil.get_view_data() - first_cam_pts = None + #view_data = CommunicateUtil.get_view_data(init=True) + #first_cam_pts = self.get_pts_from_view_data(view_data) ''' register ''' - cad_to_cam = PtsUtil.register_icp(first_cam_pts, cad_model) - cam_to_world = ControlUtil.get_pose() - cad_to_world = cam_to_world @ cad_to_cam - cad_model:trimesh.Trimesh = cad_model.apply_transform(cad_to_world) - ''' sample view ''' - min_corner = cad_model.bounds[0] - max_corner = cad_model.bounds[1] - diag = np.linalg.norm(max_corner - min_corner) - view_num = int(self.min_view + (diag - self.min_diag)/(self.max_diag - self.min_diag) * (self.max_view - self.min_view)) - sampled_view_data = ViewSampleUtil.sample_view_data_world_space( - cad_model, cad_to_world, - voxel_size = self.voxel_size, - max_views = view_num, - min_cam_table_included_degree= self.min_cam_table_included_degree, - random_view_ratio = self.random_view_ratio - ) - cam_to_world_poses = sampled_view_data["cam_to_world_poses"] - world_model_points = sampled_view_data["voxel_down_sampled_points"] + #cad_to_cam = PtsUtil.register(first_cam_pts, cad_model) + #cam_to_world = ControlUtil.get_pose() + cad_to_world = np.eye(4) #cam_to_world @ cad_to_cam + + + world_to_blender_world = np.eye(4) + world_to_blender_world[:3, 3] = np.asarray([0, 0, 0.9215]) + cad_to_blender_world = np.dot(world_to_blender_world, cad_to_world) + cad_model:trimesh.Trimesh = cad_model.apply_transform(cad_to_blender_world) + + with tempfile.TemporaryDirectory() as temp_dir: + name = "cad_model_world" + + cad_model.export(os.path.join(temp_dir, f"{name}.obj")) + temp_dir = "/home/user/nbv_rec/nbv_rec_control/test_output" + scene_dir = os.path.join(temp_dir, name) + script_path = "/home/user/nbv_rec/blender_app/data_generator.py" + ''' sample view ''' + # import ipdb; ipdb.set_trace() + # print("start running renderer") + # result = subprocess.run([ + # 'blender', '-b', '-P', script_path, '--', temp_dir + # ], capture_output=True, text=True) + # print("finish running renderer") + # + + world_model_points = np.loadtxt(os.path.join(scene_dir, "points_and_normals.txt"))[:,:3] + ''' preprocess ''' + # save_scene_data(temp_dir, name) + + pts_dir = os.path.join(temp_dir,name,"pts") + sample_view_pts_list = [] + scan_points_idx_list = [] + frame_num = len(os.listdir(pts_dir)) + + for frame_idx in range(frame_num): + pts_path = os.path.join(temp_dir,name, "pts", f"{frame_idx}.txt") + idx_path = os.path.join(temp_dir,name, "scan_points_indices", f"{frame_idx}.txt") + point_cloud = np.loadtxt(pts_path) + sampled_point_cloud = PtsUtil.voxel_downsample_point_cloud(point_cloud, self.voxel_size) + indices = np.loadtxt(idx_path, dtype=np.int32) + try: + len(indices) + except: + indices = np.array([indices]) + sample_view_pts_list.append(sampled_point_cloud) + scan_points_idx_list.append(indices) - ''' take sample view ''' - scan_points_idx_list = [] - sample_view_pts_list = [] - for cam_to_world in cam_to_world_poses: - ControlUtil.move_to(cam_to_world) - ''' get world pts ''' - view_data = CommunicateUtil.get_view_data() - cam_pts = None - scan_points_idx = None - world_pts = PtsUtil.transform_point_cloud(cam_pts, cam_to_world) - sample_view_pts_list.append(world_pts) - scan_points_idx_list.append(scan_points_idx) + ''' generate strategy ''' - ''' generate strategy ''' - limited_useful_view, _, _ = ReconstructionUtil.compute_next_best_view_sequence_with_overlap( - world_model_points, sample_view_pts_list, - scan_points_indices_list = scan_points_idx_list, - init_view=0, - threshold=self.voxel_size, - soft_overlap_threshold = self.soft_overlap_threshold, - hard_overlap_threshold = self.hard_overlap_threshold, - scan_points_threshold = self.scan_points_threshold, - status_info=self.status_info - ) - - ''' extract cam_to_world sequence ''' - cam_to_world_seq = [] - coveraget_rate_seq = [] - - for idx, coverage_rate in limited_useful_view: - cam_to_world_seq.append(cam_to_world_poses[idx]) - coveraget_rate_seq.append(coverage_rate) + limited_useful_view, _, _ = ReconstructionUtil.compute_next_best_view_sequence_with_overlap( + world_model_points, sample_view_pts_list, + scan_points_indices_list = scan_points_idx_list, + init_view=0, + threshold=self.voxel_size, + soft_overlap_threshold = self.soft_overlap_threshold, + hard_overlap_threshold = self.hard_overlap_threshold, + scan_points_threshold = self.scan_points_threshold, + status_info=self.status_info + ) + + ''' extract cam_to_world sequence ''' + cam_to_world_seq = [] + coveraget_rate_seq = [] - ''' take best seq view ''' - for cam_to_world in cam_to_world_seq: - ControlUtil.move_to(cam_to_world) - ''' get world pts ''' - view_data = CommunicateUtil.get_view_data() - cam_pts = None - scan_points_idx = None - world_pts = PtsUtil.transform_point_cloud(cam_pts, cam_to_world) - sample_view_pts_list.append(world_pts) - scan_points_idx_list.append(scan_points_idx) + from ipdb import set_trace; set_trace() + for idx, coverage_rate in limited_useful_view: + path = DataLoadUtil.get_path(temp_dir, name, idx) + cam_info = DataLoadUtil.load_cam_info(path, binocular=True) + cam_to_world_seq.append(cam_info["cam_to_world"]) + coveraget_rate_seq.append(coverage_rate) + + # ''' take best seq view ''' + # for cam_to_world in cam_to_world_seq: + # ControlUtil.move_to(cam_to_world) + # ''' get world pts ''' + # view_data = CommunicateUtil.get_view_data() + # cam_pts = self.get_pts_from_view_data(view_data) + # scan_points_idx = None + # world_pts = PtsUtil.transform_point_cloud(cam_pts, cam_to_world) + # sample_view_pts_list.append(world_pts) + # scan_points_idx_list.append(scan_points_idx) def run(self): @@ -159,32 +189,5 @@ if __name__ == "__main__": # cad_pts_L = PtsUtil.transform_point_cloud(noisy_pts_L, cad_to_cam_L) # np.savetxt(r"test.txt", cad_pts_L) # np.savetxt(r"src.txt", noisy_pts_L) - - ''' test view sample ''' - - sampled_view_data = ViewSampleUtil.sample_view_data_world_space( - model, np.eye(4), - voxel_size = 0.005, - max_views = 20, - min_cam_table_included_degree= 20, - random_view_ratio = 0 - ) - cam_poses = sampled_view_data["cam_to_world_poses"] - cam_poses = np.array(cam_poses) - print(cam_poses.shape) - def sample_camera_direction(cam_pose, num_samples, sample_distance): - cam_position = cam_pose[:3, 3] - cam_direction = cam_pose[:3, 2] - sampled_points = np.array([cam_position - (i + 1) * sample_distance * cam_direction for i in range(num_samples)]) - return sampled_points - - all_sampled_points = [] - for i in range(cam_poses.shape[0]): - samped_points = sample_camera_direction(cam_poses[i], 10, 0.02) - all_sampled_points.append(samped_points) - - all_sampled_points = np.concatenate(all_sampled_points, axis=0) - np.savetxt(r"cam_poses.txt", cam_poses[:, :3, 3]) - np.savetxt(r"cam_directions.txt", all_sampled_points) - + \ No newline at end of file diff --git a/runners/inference.py b/runners/inference.py index d57ce9a..e69de29 100644 --- a/runners/inference.py +++ b/runners/inference.py @@ -1,87 +0,0 @@ -import os -import trimesh -import numpy as np -from PytorchBoot.runners.runner import Runner -from PytorchBoot.config import ConfigManager -import PytorchBoot.stereotype as stereotype -from PytorchBoot.utils.log_util import Log -import PytorchBoot.namespace as namespace -from PytorchBoot.status import status_manager - -from utils.control_util import ControlUtil -from utils.communicate_util import CommunicateUtil -from utils.pts_util import PtsUtil -from utils.view_sample_util import ViewSampleUtil -from utils.reconstruction_util import ReconstructionUtil - - -@stereotype.runner("inferencer") -class Inferencer(Runner): - - def __init__(self, config_path: str): - super().__init__(config_path) - self.load_experiment("inferencer") - self.reconstruct_config = ConfigManager.get("runner", "reconstruct") - self.voxel_size = self.reconstruct_config["voxel_size"] - self.max_iter = self.reconstruct_config["max_iter"] - - def create_experiment(self, backup_name=None): - super().create_experiment(backup_name) - - def load_experiment(self, backup_name=None): - super().load_experiment(backup_name) - - def run_inference(self, model_name): - - ''' init robot ''' - ControlUtil.init() - ''' take first view ''' - view_data = CommunicateUtil.get_view_data() - first_cam_pts = None - first_cam_pose = None - combined_pts = first_cam_pts - input_data = { - "scanned_target_points_num": [first_cam_pts.shape[0]], - "scanned_n_to_world_pose_9d": [first_cam_pose], - "combined_scanned_pts": combined_pts - } - ''' enter loop ''' - iter = 0 - while True: - ''' inference ''' - inference_result = CommunicateUtil.get_inference_data(input_data) - cam_to_world = inference_result["cam_to_world"] - ''' set pose ''' - ControlUtil.set_pose(cam_to_world) - ''' take view ''' - view_data = CommunicateUtil.get_view_data() - curr_cam_pts = None - curr_cam_pose = None - ''' update combined pts ''' - combined_pts = np.concatenate([combined_pts, curr_cam_pts], axis=0) - combined_pts = PtsUtil.voxel_downsample_point_cloud(combined_pts, voxel_size=self.voxel_size) - ''' update input data ''' - input_data["combined_scanned_pts"] = combined_pts - input_data["scanned_target_points_num"].append(curr_cam_pts.shape[0]) - input_data["scanned_n_to_world_pose_9d"].append(curr_cam_pose) - - ''' check stop condition ''' - if iter >= self.max_iter: - break - - - def run(self): - self.run_inference() - - -if __name__ == "__main__": - model_path = "/home/yan20/nbv_rec/data/test_CAD/test_model/bear_scaled.ply" - model = trimesh.load(model_path) - test_pts_L = np.loadtxt("/home/yan20/nbv_rec/data/test_CAD/cam_pts_0_L.txt") - test_pts_R = np.loadtxt("/home/yan20/nbv_rec/data/test_CAD/cam_pts_0_R.txt") - cam_to_world_L = PtsUtil.register_icp(test_pts_L, model) - cam_to_world_R = PtsUtil.register_icp(test_pts_R, model) - print(cam_to_world_L) - print("================================") - print(cam_to_world_R) - \ No newline at end of file diff --git a/utils/communicate_util.py b/utils/communicate_util.py index 636bf26..6abe5dc 100644 --- a/utils/communicate_util.py +++ b/utils/communicate_util.py @@ -1,10 +1,20 @@ +import requests +import numpy as np +import cv2 + class CommunicateUtil: VIEW_HOST = "127.0.0.1:5000" INFERENCE_HOST = "127.0.0.1:5000" - def get_view_data() -> dict: - data = {} + def get_view_data(init = False) -> dict: + params = {} + params["create_scanner"] = init + response = requests.get(f"http://{CommunicateUtil.VIEW_HOST}/api/data", json=params) + data = response.json() + if not data["success"]: + print(f"Failed to get view data") + return None return data def get_inference_data(view_data:dict) -> dict: diff --git a/utils/data_load.py b/utils/data_load.py new file mode 100644 index 0000000..3bd73e0 --- /dev/null +++ b/utils/data_load.py @@ -0,0 +1,410 @@ +import os +import numpy as np +import json +import cv2 +import trimesh +import torch +from utils.pts_util import PtsUtil + + +class DataLoadUtil: + TABLE_POSITION = np.asarray([0, 0, 0.8215]) + + @staticmethod + def get_display_table_info(root, scene_name): + scene_info = DataLoadUtil.load_scene_info(root, scene_name) + display_table_info = scene_info["display_table"] + return display_table_info + + @staticmethod + def get_display_table_top(root, scene_name): + display_table_height = DataLoadUtil.get_display_table_info(root, scene_name)[ + "height" + ] + display_table_top = DataLoadUtil.TABLE_POSITION + np.asarray( + [0, 0, display_table_height] + ) + return display_table_top + + @staticmethod + def get_path(root, scene_name, frame_idx): + path = os.path.join(root, scene_name, f"{frame_idx}") + return path + + @staticmethod + def get_label_num(root, scene_name): + label_dir = os.path.join(root, scene_name, "label") + return len(os.listdir(label_dir)) + + @staticmethod + def get_label_path(root, scene_name, seq_idx): + label_dir = os.path.join(root, scene_name, "label") + if not os.path.exists(label_dir): + os.makedirs(label_dir) + path = os.path.join(label_dir, f"{seq_idx}.json") + return path + + @staticmethod + def get_label_path_old(root, scene_name): + path = os.path.join(root, scene_name, "label.json") + return path + + @staticmethod + def get_scene_seq_length(root, scene_name): + camera_params_path = os.path.join(root, scene_name, "camera_params") + return len(os.listdir(camera_params_path)) + + @staticmethod + def load_mesh_at(model_dir, object_name, world_object_pose): + model_path = os.path.join(model_dir, object_name, "mesh.obj") + mesh = trimesh.load(model_path) + mesh.apply_transform(world_object_pose) + return mesh + + @staticmethod + def get_bbox_diag(model_dir, object_name): + model_path = os.path.join(model_dir, object_name, "mesh.obj") + mesh = trimesh.load(model_path) + bbox = mesh.bounding_box.extents + diagonal_length = np.linalg.norm(bbox) + return diagonal_length + + @staticmethod + def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose): + mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose) + model_path = os.path.join(output_dir, scene_name, "world_mesh.obj") + mesh.export(model_path) + + @staticmethod + def save_target_mesh_at_world_space( + root, model_dir, scene_name, display_table_as_world_space_origin=True + ): + scene_info = DataLoadUtil.load_scene_info(root, scene_name) + target_name = scene_info["target_name"] + transformation = scene_info[target_name] + if display_table_as_world_space_origin: + location = transformation["location"] - DataLoadUtil.get_display_table_top( + root, scene_name + ) + else: + location = transformation["location"] + rotation_euler = transformation["rotation_euler"] + pose_mat = trimesh.transformations.euler_matrix(*rotation_euler) + pose_mat[:3, 3] = location + + mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat) + mesh_dir = os.path.join(root, scene_name, "mesh") + if not os.path.exists(mesh_dir): + os.makedirs(mesh_dir) + model_path = os.path.join(mesh_dir, "world_target_mesh.obj") + mesh.export(model_path) + + @staticmethod + def load_scene_info(root, scene_name): + scene_info_path = os.path.join(root, scene_name, "scene_info.json") + with open(scene_info_path, "r") as f: + scene_info = json.load(f) + return scene_info + + @staticmethod + def load_target_pts_num_dict(root, scene_name): + target_pts_num_path = os.path.join(root, scene_name, "target_pts_num.json") + with open(target_pts_num_path, "r") as f: + target_pts_num_dict = json.load(f) + return target_pts_num_dict + + @staticmethod + def load_target_object_pose(root, scene_name): + scene_info = DataLoadUtil.load_scene_info(root, scene_name) + target_name = scene_info["target_name"] + transformation = scene_info[target_name] + location = transformation["location"] + rotation_euler = transformation["rotation_euler"] + pose_mat = trimesh.transformations.euler_matrix(*rotation_euler) + pose_mat[:3, 3] = location + return pose_mat + + @staticmethod + def load_depth(path, min_depth=0.01, max_depth=5.0, binocular=False): + + def load_depth_from_real_path(real_path, min_depth, max_depth): + depth = cv2.imread(real_path, cv2.IMREAD_UNCHANGED) + depth = depth.astype(np.float32) / 65535.0 + min_depth = min_depth + max_depth = max_depth + depth_meters = min_depth + (max_depth - min_depth) * depth + return depth_meters + + if binocular: + depth_path_L = os.path.join( + os.path.dirname(path), "depth", os.path.basename(path) + "_L.png" + ) + depth_path_R = os.path.join( + os.path.dirname(path), "depth", os.path.basename(path) + "_R.png" + ) + depth_meters_L = load_depth_from_real_path( + depth_path_L, min_depth, max_depth + ) + depth_meters_R = load_depth_from_real_path( + depth_path_R, min_depth, max_depth + ) + return depth_meters_L, depth_meters_R + else: + depth_path = os.path.join( + os.path.dirname(path), "depth", os.path.basename(path) + ".png" + ) + depth_meters = load_depth_from_real_path(depth_path, min_depth, max_depth) + return depth_meters + + @staticmethod + def load_seg(path, binocular=False, left_only=False): + if binocular and not left_only: + + def clean_mask(mask_image): + green = [0, 255, 0, 255] + red = [255, 0, 0, 255] + threshold = 2 + mask_image = np.where( + np.abs(mask_image - green) <= threshold, green, mask_image + ) + mask_image = np.where( + np.abs(mask_image - red) <= threshold, red, mask_image + ) + return mask_image + + mask_path_L = os.path.join( + os.path.dirname(path), "mask", os.path.basename(path) + "_L.png" + ) + mask_image_L = clean_mask(cv2.imread(mask_path_L, cv2.IMREAD_UNCHANGED)) + mask_path_R = os.path.join( + os.path.dirname(path), "mask", os.path.basename(path) + "_R.png" + ) + mask_image_R = clean_mask(cv2.imread(mask_path_R, cv2.IMREAD_UNCHANGED)) + return mask_image_L, mask_image_R + else: + if binocular and left_only: + mask_path = os.path.join( + os.path.dirname(path), "mask", os.path.basename(path) + "_L.png" + ) + else: + mask_path = os.path.join( + os.path.dirname(path), "mask", os.path.basename(path) + ".png" + ) + mask_image = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED) + return mask_image + + @staticmethod + def load_normal(path, binocular=False, left_only=False): + if binocular and not left_only: + normal_path_L = os.path.join( + os.path.dirname(path), "normal", os.path.basename(path) + "_L.png" + ) + normal_image_L = cv2.imread(normal_path_L, cv2.IMREAD_COLOR) + normal_path_R = os.path.join( + os.path.dirname(path), "normal", os.path.basename(path) + "_R.png" + ) + normal_image_R = cv2.imread(normal_path_R, cv2.IMREAD_COLOR) + normalized_normal_image_L = normal_image_L / 255.0 * 2.0 - 1.0 + normalized_normal_image_R = normal_image_R / 255.0 * 2.0 - 1.0 + return normalized_normal_image_L, normalized_normal_image_R + else: + if binocular and left_only: + normal_path = os.path.join( + os.path.dirname(path), "normal", os.path.basename(path) + "_L.png" + ) + else: + normal_path = os.path.join( + os.path.dirname(path), "normal", os.path.basename(path) + ".png" + ) + normal_image = cv2.imread(normal_path, cv2.IMREAD_COLOR) + normalized_normal_image = normal_image / 255.0 * 2.0 - 1.0 + return normalized_normal_image + + @staticmethod + def load_label(path): + with open(path, "r") as f: + label_data = json.load(f) + return label_data + + @staticmethod + def load_rgb(path): + rgb_path = os.path.join( + os.path.dirname(path), "rgb", os.path.basename(path) + ".png" + ) + rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR) + return rgb_image + + @staticmethod + def load_from_preprocessed_pts(path): + npy_path = os.path.join( + os.path.dirname(path), "pts", os.path.basename(path) + ".npy" + ) + pts = np.load(npy_path) + return pts + + @staticmethod + def cam_pose_transformation(cam_pose_before): + offset = np.asarray([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]) + cam_pose_after = cam_pose_before @ offset + return cam_pose_after + + @staticmethod + def load_cam_info(path, binocular=False, display_table_as_world_space_origin=True): + scene_dir = os.path.dirname(path) + root_dir = os.path.dirname(scene_dir) + scene_name = os.path.basename(scene_dir) + camera_params_path = os.path.join( + os.path.dirname(path), "camera_params", os.path.basename(path) + ".json" + ) + with open(camera_params_path, "r") as f: + label_data = json.load(f) + cam_to_world = np.asarray(label_data["extrinsic"]) + cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world) + world_to_display_table = np.eye(4) + world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top( + root_dir, scene_name + ) + if display_table_as_world_space_origin: + cam_to_world = np.dot(world_to_display_table, cam_to_world) + cam_intrinsic = np.asarray(label_data["intrinsic"]) + cam_info = { + "cam_to_world": cam_to_world, + "cam_intrinsic": cam_intrinsic, + "far_plane": label_data["far_plane"], + "near_plane": label_data["near_plane"], + } + if binocular: + cam_to_world_R = np.asarray(label_data["extrinsic_R"]) + cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R) + cam_to_world_O = np.asarray(label_data["extrinsic_cam_object"]) + cam_to_world_O = DataLoadUtil.cam_pose_transformation(cam_to_world_O) + if display_table_as_world_space_origin: + cam_to_world_O = np.dot(world_to_display_table, cam_to_world_O) + cam_to_world_R = np.dot(world_to_display_table, cam_to_world_R) + cam_info["cam_to_world_O"] = cam_to_world_O + cam_info["cam_to_world_R"] = cam_to_world_R + return cam_info + + @staticmethod + def get_real_cam_O_from_cam_L( + cam_L, cam_O_to_cam_L, scene_path, display_table_as_world_space_origin=True + ): + root_dir = os.path.dirname(scene_path) + scene_name = os.path.basename(scene_path) + if isinstance(cam_L, torch.Tensor): + cam_L = cam_L.cpu().numpy() + nO_to_display_table_pose = cam_L @ cam_O_to_cam_L + if display_table_as_world_space_origin: + display_table_to_world = np.eye(4) + display_table_to_world[:3, 3] = DataLoadUtil.get_display_table_top( + root_dir, scene_name + ) + nO_to_world_pose = np.dot(display_table_to_world, nO_to_display_table_pose) + nO_to_world_pose = DataLoadUtil.cam_pose_transformation(nO_to_world_pose) + return nO_to_world_pose + + @staticmethod + def get_target_point_cloud( + depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0, 255, 0, 255), require_full_points=False + ): + h, w = depth.shape + i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy") + + z = depth + x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0] + y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1] + + points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3) + mask = mask.reshape(-1, 4) + + target_mask = (mask == target_mask_label).all(axis=-1) + + target_points_camera = points_camera[target_mask] + target_points_camera_aug = np.concatenate( + [target_points_camera, np.ones((target_points_camera.shape[0], 1))], axis=-1 + ) + + target_points_world = np.dot(cam_extrinsic, target_points_camera_aug.T).T[:, :3] + data = { + "points_world": target_points_world, + "points_camera": target_points_camera, + } + return data + + @staticmethod + def get_point_cloud(depth, cam_intrinsic, cam_extrinsic): + h, w = depth.shape + i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy") + + z = depth + x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0] + y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1] + + points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3) + points_camera_aug = np.concatenate( + [points_camera, np.ones((points_camera.shape[0], 1))], axis=-1 + ) + + points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3] + return {"points_world": points_world, "points_camera": points_camera} + + @staticmethod + def get_target_point_cloud_world_from_path( + path, + binocular=False, + random_downsample_N=65536, + voxel_size=0.005, + target_mask_label=(0, 255, 0, 255), + display_table_mask_label=(0, 0, 255, 255), + get_display_table_pts=False, + require_normal=False, + ): + cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular) + if binocular: + depth_L, depth_R = DataLoadUtil.load_depth( + path, cam_info["near_plane"], cam_info["far_plane"], binocular=True + ) + mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True) + point_cloud_L = DataLoadUtil.get_target_point_cloud( + depth_L, + cam_info["cam_intrinsic"], + cam_info["cam_to_world"], + mask_L, + target_mask_label, + )["points_world"] + point_cloud_R = DataLoadUtil.get_target_point_cloud( + depth_R, + cam_info["cam_intrinsic"], + cam_info["cam_to_world_R"], + mask_R, + target_mask_label, + )["points_world"] + point_cloud_L = PtsUtil.random_downsample_point_cloud( + point_cloud_L, random_downsample_N + ) + point_cloud_R = PtsUtil.random_downsample_point_cloud( + point_cloud_R, random_downsample_N + ) + overlap_points = PtsUtil.get_overlapping_points( + point_cloud_L, point_cloud_R, voxel_size + ) + return overlap_points + else: + depth = DataLoadUtil.load_depth( + path, cam_info["near_plane"], cam_info["far_plane"] + ) + mask = DataLoadUtil.load_seg(path) + point_cloud = DataLoadUtil.get_target_point_cloud( + depth, cam_info["cam_intrinsic"], cam_info["cam_to_world"], mask + )["points_world"] + return point_cloud + + @staticmethod + def load_points_normals(root, scene_name, display_table_as_world_space_origin=True): + points_path = os.path.join(root, scene_name, "points_and_normals.txt") + points_normals = np.loadtxt(points_path) + if display_table_as_world_space_origin: + points_normals[:, :3] = points_normals[ + :, :3 + ] - DataLoadUtil.get_display_table_top(root, scene_name) + return points_normals diff --git a/utils/preprocess_util.py b/utils/preprocess_util.py new file mode 100644 index 0000000..83b1163 --- /dev/null +++ b/utils/preprocess_util.py @@ -0,0 +1,158 @@ +import os +import numpy as np +import time +import sys +np.random.seed(0) +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from utils.reconstruction_util import ReconstructionUtil +from utils.data_load import DataLoadUtil +from utils.pts_util import PtsUtil + +def save_np_pts(path, pts: np.ndarray, file_type="txt"): + if file_type == "txt": + np.savetxt(path, pts) + else: + np.save(path, pts) + +def save_target_points(root, scene, frame_idx, target_points: np.ndarray, file_type="txt"): + pts_path = os.path.join(root,scene, "pts", f"{frame_idx}.{file_type}") + if not os.path.exists(os.path.join(root,scene, "pts")): + os.makedirs(os.path.join(root,scene, "pts")) + save_np_pts(pts_path, target_points, file_type) + +def save_scan_points_indices(root, scene, frame_idx, scan_points_indices: np.ndarray, file_type="txt"): + indices_path = os.path.join(root,scene, "scan_points_indices", f"{frame_idx}.{file_type}") + if not os.path.exists(os.path.join(root,scene, "scan_points_indices")): + os.makedirs(os.path.join(root,scene, "scan_points_indices")) + save_np_pts(indices_path, scan_points_indices, file_type) + +def save_scan_points(root, scene, scan_points: np.ndarray): + scan_points_path = os.path.join(root,scene, "scan_points.txt") + save_np_pts(scan_points_path, scan_points) + +def get_world_points(depth, mask, cam_intrinsic, cam_extrinsic): + z = depth[mask] + i, j = np.nonzero(mask) + x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0] + y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1] + + points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3) + points_camera_aug = np.concatenate((points_camera, np.ones((points_camera.shape[0], 1))), axis=-1) + points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3] + + return points_camera_world + +def get_scan_points_indices(scan_points, mask, display_table_mask_label, cam_intrinsic, cam_extrinsic): + scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1)))) + points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3] + points_image_homogeneous = np.dot(cam_intrinsic, points_camera.T).T + points_image_homogeneous /= points_image_homogeneous[:, 2:] + pixel_x = points_image_homogeneous[:, 0].astype(int) + pixel_y = points_image_homogeneous[:, 1].astype(int) + h, w = mask.shape[:2] + valid_indices = (pixel_x >= 0) & (pixel_x < w) & (pixel_y >= 0) & (pixel_y < h) + mask_colors = mask[pixel_y[valid_indices], pixel_x[valid_indices]] + selected_points_indices = np.where((mask_colors == display_table_mask_label).all(axis=-1))[0] + selected_points_indices = np.where(valid_indices)[0][selected_points_indices] + return selected_points_indices + + +def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"): + + ''' configuration ''' + target_mask_label = (0, 255, 0, 255) + display_table_mask_label=(0, 0, 255, 255) + random_downsample_N = 32768 + voxel_size=0.002 + filter_degree = 75 + min_z = 0.2 + max_z = 0.5 + + ''' scan points ''' + scan_points = np.asarray(ReconstructionUtil.generate_scan_points(display_table_top=0,display_table_radius=0.25)) + + ''' read frame data(depth|mask|normal) ''' + frame_num = DataLoadUtil.get_scene_seq_length(root, scene) + for frame_id in range(frame_num): + print(f"[scene({scene_idx}/{scene_total})|frame({frame_id}/{frame_num})]Processing {scene} frame {frame_id}") + path = DataLoadUtil.get_path(root, scene, frame_id) + cam_info = DataLoadUtil.load_cam_info(path, binocular=True) + depth_L, depth_R = DataLoadUtil.load_depth( + path, cam_info["near_plane"], + cam_info["far_plane"], + binocular=True + ) + mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True) + + ''' target points ''' + mask_img_L = mask_L + mask_img_R = mask_R + + target_mask_img_L = (mask_L == target_mask_label).all(axis=-1) + target_mask_img_R = (mask_R == target_mask_label).all(axis=-1) + + + target_points_L = get_world_points(depth_L, target_mask_img_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"]) + target_points_R = get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"]) + + sampled_target_points_L = PtsUtil.random_downsample_point_cloud( + target_points_L, random_downsample_N + ) + sampled_target_points_R = PtsUtil.random_downsample_point_cloud( + target_points_R, random_downsample_N + ) + + has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0 + if has_points: + target_points = PtsUtil.get_overlapping_points( + sampled_target_points_L, sampled_target_points_R, voxel_size + ) + + if has_points: + has_points = target_points.shape[0] > 0 + + if has_points: + points_normals = DataLoadUtil.load_points_normals(root, scene, display_table_as_world_space_origin=True) + target_points = PtsUtil.filter_points( + target_points, points_normals, cam_info["cam_to_world"],voxel_size=0.002, theta = filter_degree, z_range=(min_z, max_z) + ) + + + ''' scan points indices ''' + scan_points_indices_L = get_scan_points_indices(scan_points, mask_img_L, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world"]) + scan_points_indices_R = get_scan_points_indices(scan_points, mask_img_R, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"]) + scan_points_indices = np.intersect1d(scan_points_indices_L, scan_points_indices_R) + + if not has_points: + target_points = np.zeros((0, 3)) + + save_target_points(root, scene, frame_id, target_points, file_type=file_type) + save_scan_points_indices(root, scene, frame_id, scan_points_indices, file_type=file_type) + + save_scan_points(root, scene, scan_points) # The "done" flag of scene preprocess + + +if __name__ == "__main__": + #root = "/media/hofee/repository/new_data_with_normal" + root = r"/media/hofee/data/tempdir/test_real_output" + # list_path = r"/media/hofee/repository/full_list.txt" + # scene_list = [] + + # with open(list_path, "r") as f: + # for line in f: + # scene_list.append(line.strip()) + scene_list = os.listdir(root) + from_idx = 0 # 1000 + to_idx = 1 # 1500 + + + cnt = 0 + import time + total = to_idx - from_idx + for scene in scene_list[from_idx:to_idx]: + start = time.time() + save_scene_data(root, scene, cnt, total, file_type="npy") + cnt+=1 + end = time.time() + print(f"Time cost: {end-start}") diff --git a/utils/pts_util.py b/utils/pts_util.py index 47dc938..da29e93 100644 --- a/utils/pts_util.py +++ b/utils/pts_util.py @@ -169,3 +169,25 @@ class PtsUtil: ) return reg_icp.transformation + + @staticmethod + def get_pts_from_depth(depth, cam_intrinsic, cam_extrinsic): + h, w = depth.shape + i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy") + + z = depth + x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0] + y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1] + + points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3) + mask = mask.reshape(-1, 4) + points_camera = np.concatenate( + [points_camera, np.ones((points_camera.shape[0], 1))], axis=-1 + ) + + points_world = np.dot(cam_extrinsic, points_camera.T).T[:, :3] + data = { + "points_world": points_world, + "points_camera": points_camera, + } + return data \ No newline at end of file diff --git a/utils/reconstruction_util.py b/utils/reconstruction_util.py index 0e7082d..84b196d 100644 --- a/utils/reconstruction_util.py +++ b/utils/reconstruction_util.py @@ -152,9 +152,15 @@ class ReconstructionUtil: @staticmethod def check_scan_points_overlap(history_indices, indices2, threshold=5): - for indices1 in history_indices: - if len(set(indices1).intersection(set(indices2))) >= threshold: - return True + try: + if len(indices2) == 0: + return False + for indices1 in history_indices: + if len(set(indices1).intersection(set(indices2))) >= threshold: + return True + except Exception as e: + print(e) + import ipdb; ipdb.set_trace() return False \ No newline at end of file diff --git a/utils/render_util.py b/utils/render_util.py new file mode 100644 index 0000000..c17814c --- /dev/null +++ b/utils/render_util.py @@ -0,0 +1,45 @@ + +import os +import json +import subprocess +import tempfile +import shutil +from utils.data_load import DataLoadUtil +from utils.pts_util import PtsUtil +class RenderUtil: + + @staticmethod + def render_pts(cam_pose, object_name, script_path, model_points_normals, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False): + nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path) + with tempfile.TemporaryDirectory() as temp_dir: + params = { + "cam_pose": nO_to_world_pose.tolist(), + "object_name": scene_path + } + params_data_path = os.path.join(temp_dir, "params.json") + with open(params_data_path, 'w') as f: + json.dump(params, f) + result = subprocess.run([ + 'blender', '-b', '-P', script_path, '--', temp_dir + ], capture_output=True, text=True) + if result.returncode != 0: + print("Blender script failed:") + print(result.stderr) + return None + path = os.path.join(temp_dir, "tmp") + point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True) + cam_params = DataLoadUtil.load_cam_info(path, binocular=True) + + filtered_point_cloud = PtsUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree) + full_scene_point_cloud = None + if require_full_scene: + depth_L, depth_R = DataLoadUtil.load_depth(path, cam_params['near_plane'], cam_params['far_plane'], binocular=True) + point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_params['cam_intrinsic'], cam_params['cam_to_world'])['points_world'] + point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_params['cam_intrinsic'], cam_params['cam_to_world_R'])['points_world'] + + point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536) + point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536) + full_scene_point_cloud = PtsUtil.get_overlapping_points(point_cloud_L, point_cloud_R) + + + return filtered_point_cloud, full_scene_point_cloud \ No newline at end of file diff --git a/utils/view_sample_util.py b/utils/view_sample_util.py deleted file mode 100644 index ce70ff1..0000000 --- a/utils/view_sample_util.py +++ /dev/null @@ -1,162 +0,0 @@ - -import numpy as np -from utils.pose_util import PoseUtil -import trimesh -from collections import defaultdict -from scipy.spatial.transform import Rotation as R -import random - -class ViewSampleUtil: - @staticmethod - def farthest_point_sampling(points, num_samples): - num_points = points.shape[0] - if num_samples >= num_points: - return points, np.arange(num_points) - sampled_indices = np.zeros(num_samples, dtype=int) - sampled_indices[0] = np.random.randint(num_points) - min_distances = np.full(num_points, np.inf) - for i in range(1, num_samples): - current_point = points[sampled_indices[i - 1]] - dist_to_current_point = np.linalg.norm(points - current_point, axis=1) - min_distances = np.minimum(min_distances, dist_to_current_point) - sampled_indices[i] = np.argmax(min_distances) - downsampled_points = points[sampled_indices] - return downsampled_points, sampled_indices - - @staticmethod - def voxel_downsample(points, voxel_size): - voxel_grid = defaultdict(list) - for i, point in enumerate(points): - voxel_index = tuple((point // voxel_size).astype(int)) - voxel_grid[voxel_index].append(i) - - downsampled_points = [] - downsampled_indices = [] - for indices in voxel_grid.values(): - selected_index = indices[0] - downsampled_points.append(points[selected_index]) - downsampled_indices.append(selected_index) - - return np.array(downsampled_points), downsampled_indices - - @staticmethod - def sample_view_data(mesh: trimesh.Trimesh, distance_range: tuple = (0.25, 0.5), voxel_size: float = 0.005, max_views: int = 1, pertube_repeat: int = 1) -> dict: - view_data = { - "look_at_points": [], - "cam_positions": [], - } - - vertices = mesh.vertices - look_at_points = [] - cam_positions = [] - normals = [] - vertex_normals = mesh.vertex_normals - - for i, vertex in enumerate(vertices): - look_at_point = vertex - - view_data["look_at_points"].append(look_at_point) - - normal = vertex_normals[i] - if np.isnan(normal).any(): - continue - if np.dot(normal, look_at_point) < 0: - normal = -normal - - normals.append(normal) - - for _ in range(pertube_repeat): - perturb_angle = np.radians(np.random.uniform(0, 30)) - perturb_axis = np.random.normal(size=3) - perturb_axis /= np.linalg.norm(perturb_axis) - rotation_matrix = R.from_rotvec(perturb_angle * perturb_axis).as_matrix() - perturbed_normal = np.dot(rotation_matrix, normal) - - distance = np.random.uniform(*distance_range) - cam_position = look_at_point + distance * perturbed_normal - look_at_points.append(look_at_point) - cam_positions.append(cam_position) - - look_at_points = np.array(look_at_points) - cam_positions = np.array(cam_positions) - - voxel_downsampled_look_at_points, selected_indices = ViewSampleUtil.voxel_downsample(look_at_points, voxel_size) - voxel_downsampled_cam_positions = cam_positions[selected_indices] - voxel_downsampled_normals = np.array(normals)[selected_indices] - - fps_downsampled_look_at_points, selected_indices = ViewSampleUtil.farthest_point_sampling(voxel_downsampled_look_at_points, max_views * 2) - fps_downsampled_cam_positions = voxel_downsampled_cam_positions[selected_indices] - - view_data["look_at_points"] = fps_downsampled_look_at_points.tolist() - view_data["cam_positions"] = fps_downsampled_cam_positions.tolist() - view_data["normals"] = voxel_downsampled_normals.tolist() - view_data["voxel_down_sampled_points"] = voxel_downsampled_look_at_points - - return view_data - - @staticmethod - def get_world_points_and_normals(view_data: dict, obj_world_pose: np.ndarray) -> tuple: - world_points = [] - world_normals = [] - for voxel_down_sampled_points, normal in zip(view_data["voxel_down_sampled_points"], view_data["normals"]): - voxel_down_sampled_points_world = obj_world_pose @ np.append(voxel_down_sampled_points, 1.0) - normal_world = obj_world_pose[:3, :3] @ normal - world_points.append(voxel_down_sampled_points_world[:3]) - world_normals.append(normal_world) - return np.array(world_points), np.array(world_normals) - - @staticmethod - def get_cam_pose(view_data: dict, obj_world_pose: np.ndarray, max_views: int, min_cam_table_included_degree: int, random_view_ratio: float) -> np.ndarray: - cam_poses = [] - min_height_z = 1000 - for look_at_point, cam_position in zip(view_data["look_at_points"], view_data["cam_positions"]): - look_at_point_world = obj_world_pose @ np.append(look_at_point, 1.0) - cam_position_world = obj_world_pose @ np.append(cam_position, 1.0) - if look_at_point_world[2] < min_height_z: - min_height_z = look_at_point_world[2] - look_at_point_world = look_at_point_world[:3] - cam_position_world = cam_position_world[:3] - - forward_vector = cam_position_world - look_at_point_world - forward_vector /= np.linalg.norm(forward_vector) - - up_vector = np.array([0, 0, 1]) - - right_vector = np.cross(up_vector, forward_vector) - right_vector /= np.linalg.norm(right_vector) - - corrected_up_vector = np.cross(forward_vector, right_vector) - rotation_matrix = np.array([right_vector, corrected_up_vector, forward_vector]).T - - cam_pose = np.eye(4) - cam_pose[:3, :3] = rotation_matrix - cam_pose[:3, 3] = cam_position_world - cam_poses.append(cam_pose) - - filtered_cam_poses = [] - for cam_pose in cam_poses: - if cam_pose[2, 3] > min_height_z: - direction_vector = cam_pose[:3, 2] - horizontal_normal = np.array([0, 0, 1]) - cos_angle = np.dot(direction_vector, horizontal_normal) / (np.linalg.norm(direction_vector) * np.linalg.norm(horizontal_normal)) - angle = np.arccos(np.clip(cos_angle, -1.0, 1.0)) - angle_degree = np.degrees(angle) - if angle_degree < 90 - min_cam_table_included_degree: - filtered_cam_poses.append(cam_pose) - if random.random() < random_view_ratio: - pertube_pose = PoseUtil.get_uniform_pose([0.1, 0.1, 0.1], [3, 3, 3], 0, 180, "cm") - filtered_cam_poses.append(pertube_pose @ cam_pose) - - if len(filtered_cam_poses) > max_views: - indices = np.random.choice(len(filtered_cam_poses), max_views, replace=False) - filtered_cam_poses = [filtered_cam_poses[i] for i in indices] - - return np.array(filtered_cam_poses) - - @staticmethod - def sample_view_data_world_space(mesh: trimesh.Trimesh, cad_to_world: np.ndarray, distance_range:tuple = (0.25,0.5), voxel_size:float = 0.005, max_views: int=1, min_cam_table_included_degree:int=20, random_view_ratio:float = 0.2) -> dict: - view_data = ViewSampleUtil.sample_view_data(mesh, distance_range, voxel_size, max_views) - view_data["cam_to_world_poses"] = ViewSampleUtil.get_cam_pose(view_data, cad_to_world, max_views, min_cam_table_included_degree, random_view_ratio) - view_data["voxel_down_sampled_points"], view_data["normals"] = ViewSampleUtil.get_world_points_and_normals(view_data, cad_to_world) - return view_data -