diff --git a/configs/local/train_config.yaml b/configs/local/train_config.yaml index 5645517..226c302 100644 --- a/configs/local/train_config.yaml +++ b/configs/local/train_config.yaml @@ -7,9 +7,9 @@ runner: parallel: False experiment: - name: local_eval + name: debug root_dir: "experiments" - use_checkpoint: True + use_checkpoint: False epoch: 600 # -1 stands for last epoch max_epochs: 5000 save_checkpoint_interval: 1 @@ -40,6 +40,7 @@ dataset: batch_size: 1 num_workers: 12 pts_num: 4096 + load_from_preprocess: True OmniObject3d_test: root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes" @@ -55,6 +56,7 @@ dataset: batch_size: 1 num_workers: 12 pts_num: 4096 + load_from_preprocess: True pipeline: nbv_reconstruction_pipeline: diff --git a/configs/server/train_config.yaml b/configs/server/train_config.yaml index 3881eb9..13aa48e 100644 --- a/configs/server/train_config.yaml +++ b/configs/server/train_config.yaml @@ -41,6 +41,7 @@ dataset: batch_size: 160 num_workers: 16 pts_num: 4096 + load_from_preprocess: True OmniObject3d_test: root_dir: "../data/sample_for_training/scenes" @@ -56,6 +57,7 @@ dataset: batch_size: 1 num_workers: 12 pts_num: 4096 + load_from_preprocess: True pipeline: nbv_reconstruction_pipeline: diff --git a/core/nbv_dataset.py b/core/nbv_dataset.py index acac8ba..72bd5db 100644 --- a/core/nbv_dataset.py +++ b/core/nbv_dataset.py @@ -7,7 +7,7 @@ from PytorchBoot.utils.log_util import Log import torch import os import sys -sys.path.append(r"/home/data/hofee/project/nbv_rec/nbv_reconstruction") +sys.path.append(r"/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction") from utils.data_load import DataLoadUtil from utils.pose import PoseUtil @@ -28,6 +28,7 @@ class NBVReconstructionDataset(BaseDataset): self.pts_num = config["pts_num"] self.type = config["type"] self.cache = config.get("cache") + self.load_from_preprocess = config.get("load_from_preprocess", False) if self.type == namespace.Mode.TEST: self.model_dir = config["model_dir"] self.filter_degree = config["filter_degree"] @@ -111,24 +112,28 @@ class NBVReconstructionDataset(BaseDataset): cam_info = DataLoadUtil.load_cam_info(view_path, binocular=True) n_to_world_pose = cam_info["cam_to_world"] nR_to_world_pose = cam_info["cam_to_world_R"] - - cached_data = None - if self.cache: - cached_data = self.load_from_cache(scene_name, frame_idx) - - if cached_data is None: - depth_L, depth_R = DataLoadUtil.load_depth(view_path, cam_info['near_plane'], cam_info['far_plane'], binocular=True) - point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_info['cam_intrinsic'], n_to_world_pose)['points_world'] - point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_info['cam_intrinsic'], nR_to_world_pose)['points_world'] - - point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536) - point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536) - overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R) - downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(overlap_points, self.pts_num) - if self.cache: - self.save_to_cache(scene_name, frame_idx, downsampled_target_point_cloud) + + if self.load_from_preprocess: + downsampled_target_point_cloud = DataLoadUtil.load_from_preprocessed_pts(view_path) else: - downsampled_target_point_cloud = cached_data + cached_data = None + if self.cache: + cached_data = self.load_from_cache(scene_name, frame_idx) + + if cached_data is None: + print("load depth") + depth_L, depth_R = DataLoadUtil.load_depth(view_path, cam_info['near_plane'], cam_info['far_plane'], binocular=True) + point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_info['cam_intrinsic'], n_to_world_pose)['points_world'] + point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_info['cam_intrinsic'], nR_to_world_pose)['points_world'] + + point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536) + point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536) + overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R) + downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(overlap_points, self.pts_num) + if self.cache: + self.save_to_cache(scene_name, frame_idx, downsampled_target_point_cloud) + else: + downsampled_target_point_cloud = cached_data scanned_views_pts.append(downsampled_target_point_cloud) scanned_coverages_rate.append(coverage_rate) @@ -205,10 +210,11 @@ if __name__ == "__main__": torch.manual_seed(seed) np.random.seed(seed) config = { - "root_dir": "../data/sample_for_training/scenes", - "model_dir": "../data/scaled_object_meshes", + "root_dir": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/preprocessed_scenes/", + "model_dir": "/media/hofee/data/data/scaled_object_meshes", "source": "nbv_reconstruction_dataset", - "split_file": "../data/sample_for_training/OmniObject3d_train.txt", + "split_file": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt", + "load_from_preprocess": True, "ratio": 0.5, "batch_size": 2, "filter_degree": 75, diff --git a/core/seq_dataset.py b/core/seq_dataset.py index b34746b..4cc4043 100644 --- a/core/seq_dataset.py +++ b/core/seq_dataset.py @@ -46,10 +46,12 @@ class SeqNBVReconstructionDataset(BaseDataset): best_seq = label_data["best_sequence"] max_coverage_rate = label_data["max_coverage_rate"] first_frame = best_seq[0] + best_seq_len = len(best_seq) datalist.append({ "scene_name": scene_name, "first_frame": first_frame, - "max_coverage_rate": max_coverage_rate + "max_coverage_rate": max_coverage_rate, + "best_seq_len": best_seq_len, }) return datalist[5:] @@ -98,6 +100,7 @@ class SeqNBVReconstructionDataset(BaseDataset): "first_frame_coverage": first_frame_coverage, "scene_path": scene_path, "model_points_normals": model_points_normals, + "best_seq_len": data_item_info["best_seq_len"], } return data_item diff --git a/runners/inferencer.py b/runners/inferencer.py index e0db9a9..aad0bc3 100644 --- a/runners/inferencer.py +++ b/runners/inferencer.py @@ -79,8 +79,7 @@ class Inferencer(Runner): status_manager.set_progress("inference", "inferencer", f"dataset", len(self.test_set_list), len(self.test_set_list)) - def predict_sequence(self, data, cr_increase_threshold=0, max_iter=100): - pred_cr_seq = [] + def predict_sequence(self, data, cr_increase_threshold=0, max_iter=50, max_retry=5): scene_name = data["scene_name"][0] Log.info(f"Processing scene: {scene_name}") status_manager.set_status("inference", "inferencer", "scene", scene_name) @@ -98,7 +97,7 @@ class Inferencer(Runner): ''' data for inference ''' input_data = {} input_data["scanned_pts"] = [data["first_pts"][0].to(self.device)] - input_data["scanned_n_to_world_pose_9d"] = [data["first_to_first_9d"][0].to(self.device)] + input_data["scanned_n_to_world_pose_9d"] = [data["first_frame_to_world"][0].to(self.device)] input_data["mode"] = namespace.Mode.TEST input_pts_N = input_data["scanned_pts"][0].shape[1] @@ -107,9 +106,11 @@ class Inferencer(Runner): scanned_view_pts = [first_frame_target_pts] last_pred_cr = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold) - - - while len(pred_cr_seq) < max_iter: + retry_duplication_pose = [] + retry_no_pts_pose = [] + retry = 0 + pred_cr_seq = [last_pred_cr] + while len(pred_cr_seq) < max_iter and retry < max_retry: output = self.pipeline(input_data) next_pose_9d = output["pred_pose_9d"] @@ -118,22 +119,30 @@ class Inferencer(Runner): pred_pose[:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(next_pose_9d[:,:6])[0] pred_pose[:3,3] = next_pose_9d[0,6:] pred_n_to_world_pose_mat = torch.matmul(first_frame_to_world, pred_pose) + try: new_target_pts_world, new_pts_world = RenderUtil.render_pts(pred_n_to_world_pose_mat, scene_path, self.script_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose, require_full_scene=True) except Exception as e: Log.warning(f"Error in scene {scene_path}, {e}") print("current pose: ", pred_pose) print("curr_pred_cr: ", last_pred_cr) + retry_no_pts_pose.append(pred_n_to_world_pose_mat.cpu().numpy().tolist()) + retry += 1 continue - + pred_cr = self.compute_coverage_rate(scanned_view_pts, new_target_pts_world, down_sampled_model_pts, threshold=voxel_threshold) - pred_cr_seq.append(pred_cr) - print(pred_cr, last_pred_cr) + + print(pred_cr, last_pred_cr, " max: ", data["max_coverage_rate"]) if pred_cr >= data["max_coverage_rate"]: break if pred_cr <= last_pred_cr + cr_increase_threshold: - break + retry += 1 + retry_duplication_pose.append(pred_n_to_world_pose_mat.cpu().numpy().tolist()) + continue + + retry = 0 + pred_cr_seq.append(pred_cr) scanned_view_pts.append(new_target_pts_world) down_sampled_new_pts_world = PtsUtil.random_downsample_point_cloud(new_pts_world, input_pts_N) new_pts_world_aug = np.hstack([down_sampled_new_pts_world, np.ones((down_sampled_new_pts_world.shape[0], 1))]) @@ -145,7 +154,7 @@ class Inferencer(Runner): input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], next_pose_9d], dim=0)] last_pred_cr = pred_cr - print(last_pred_cr) + input_data["scanned_pts"] = input_data["scanned_pts"][0].cpu().numpy().tolist() input_data["scanned_n_to_world_pose_9d"] = input_data["scanned_n_to_world_pose_9d"][0].cpu().numpy().tolist() @@ -154,8 +163,12 @@ class Inferencer(Runner): "pts_seq": input_data["scanned_pts"], "target_pts_seq": scanned_view_pts, "coverage_rate_seq": pred_cr_seq, - "max_coverage_rate": data["max_coverage_rate"], - "pred_max_coverage_rate": max(pred_cr_seq) + "max_coverage_rate": data["max_coverage_rate"][0], + "pred_max_coverage_rate": max(pred_cr_seq), + "scene_name": scene_name, + "retry_no_pts_pose": retry_no_pts_pose, + "retry_duplication_pose": retry_duplication_pose, + "best_seq_len": data["best_seq_len"][0], } return result diff --git a/utils/data_load.py b/utils/data_load.py index bbc7298..06523c3 100644 --- a/utils/data_load.py +++ b/utils/data_load.py @@ -133,6 +133,12 @@ class DataLoadUtil: rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR) return rgb_image + @staticmethod + def load_from_preprocessed_pts(path): + npy_path = os.path.join(os.path.dirname(path), "points", os.path.basename(path) + ".npy") + pts = np.load(npy_path) + return pts + @staticmethod def cam_pose_transformation(cam_pose_before): offset = np.asarray([ diff --git a/utils/render.py b/utils/render.py index 5022c7b..46ea97d 100644 --- a/utils/render.py +++ b/utils/render.py @@ -34,9 +34,6 @@ class RenderUtil: point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True) cam_params = DataLoadUtil.load_cam_info(path, binocular=True) filtered_point_cloud = ReconstructionUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree) - # ------ Debug Start ------ - import ipdb;ipdb.set_trace() - # ------ Debug End ------ full_scene_point_cloud = None if require_full_scene: depth_L, depth_R = DataLoadUtil.load_depth(path, cam_params['near_plane'], cam_params['far_plane'], binocular=True)