diff --git a/configs/local/inference_config.yaml b/configs/local/inference_config.yaml index 73a73de..1d1ea81 100644 --- a/configs/local/inference_config.yaml +++ b/configs/local/inference_config.yaml @@ -6,24 +6,24 @@ runner: cuda_visible_devices: "0,1,2,3,4,5,6,7" experiment: - name: local_full_eval + name: w_gf_wo_lf_full root_dir: "experiments" - epoch: 20 # -1 stands for last epoch + epoch: 1 # -1 stands for last epoch test: dataset_list: - OmniObject3d_train blender_script_path: "/media/hofee/data/project/python/nbv_reconstruction/blender/data_renderer.py" - output_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/inference_result_full" - pipeline: nbv_reconstruction_pipeline + output_dir: "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/test/inference_global_full_on_testset" + pipeline: nbv_reconstruction_global_pts_pipeline dataset: OmniObject3d_train: root_dir: "/media/hofee/repository/nbv_reconstruction_data_512" model_dir: "/media/hofee/data/data/scaled_object_meshes" source: seq_nbv_reconstruction_dataset - split_file: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt" + split_file: "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/test/test_set_list.txt" type: test filter_degree: 75 ratio: 1 @@ -33,11 +33,25 @@ dataset: load_from_preprocess: False pipeline: - nbv_reconstruction_pipeline: - pts_encoder: pointnet_encoder - seq_encoder: transformer_seq_encoder - pose_encoder: pose_encoder - view_finder: gf_view_finder + nbv_reconstruction_local_pts_pipeline: + modules: + pts_encoder: pointnet_encoder + seq_encoder: transformer_seq_encoder + pose_encoder: pose_encoder + view_finder: gf_view_finder + eps: 1e-5 + global_scanned_feat: False + + nbv_reconstruction_global_pts_pipeline: + modules: + pts_encoder: pointnet_encoder + pose_seq_encoder: transformer_pose_seq_encoder + pose_encoder: pose_encoder + view_finder: gf_view_finder + eps: 1e-5 + global_scanned_feat: True + + module: @@ -55,6 +69,13 @@ module: num_layers: 3 output_dim: 2048 + transformer_pose_seq_encoder: + pose_embed_dim: 256 + num_heads: 4 + ffn_dim: 256 + num_layers: 3 + output_dim: 1024 + gf_view_finder: t_feat_dim: 128 pose_feat_dim: 256 diff --git a/core/global_pts_pipeline.py b/core/global_pts_pipeline.py index ed2ea42..31b8ad4 100644 --- a/core/global_pts_pipeline.py +++ b/core/global_pts_pipeline.py @@ -73,7 +73,6 @@ class NBVReconstructionGlobalPointsPipeline(nn.Module): device = next(self.parameters()).device - pts_feat_seq_list = [] pose_feat_seq_list = [] for scanned_n_to_world_pose_9d in scanned_n_to_world_pose_9d_batch: @@ -82,10 +81,10 @@ class NBVReconstructionGlobalPointsPipeline(nn.Module): main_feat = self.pose_seq_encoder.encode_sequence(pose_feat_seq_list) - if self.enable_global_scanned_feat: - combined_scanned_pts_batch = data['combined_scanned_pts'] - global_scanned_feat = self.pts_encoder.encode_points(combined_scanned_pts_batch) - main_feat = torch.cat([main_feat, global_scanned_feat], dim=-1) + + combined_scanned_pts_batch = data['combined_scanned_pts'] + global_scanned_feat = self.pts_encoder.encode_points(combined_scanned_pts_batch) + main_feat = torch.cat([main_feat, global_scanned_feat], dim=-1) if torch.isnan(main_feat).any(): diff --git a/core/seq_dataset.py b/core/seq_dataset.py index 12aa763..f2f7bc3 100644 --- a/core/seq_dataset.py +++ b/core/seq_dataset.py @@ -39,42 +39,32 @@ class SeqNBVReconstructionDataset(BaseDataset): scene_name_list.append(scene_name) return scene_name_list - def get_datalist_new(self): - datalist = [] - for scene_name in self.scene_name_list: - label_num = DataLoadUtil.get_label_num(self.root_dir, scene_name) - for i in range(label_num): - label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, i) - label_data = DataLoadUtil.load_label(label_path) - best_seq = label_data["best_sequence"] - max_coverage_rate = label_data["max_coverage_rate"] - first_frame = best_seq[0] - best_seq_len = len(best_seq) - datalist.append({ - "scene_name": scene_name, - "first_frame": first_frame, - "max_coverage_rate": max_coverage_rate, - "best_seq_len": best_seq_len, - "label_idx": i, - }) - return datalist - def get_datalist(self): datalist = [] for scene_name in self.scene_name_list: - label_path = DataLoadUtil.get_label_path_old(self.root_dir, scene_name) + seq_num = DataLoadUtil.get_label_num(self.root_dir, scene_name) + scene_max_coverage_rate = 0 + scene_max_cr_idx = 0 + + for seq_idx in range(seq_num): + label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, seq_idx) + label_data = DataLoadUtil.load_label(label_path) + max_coverage_rate = label_data["max_coverage_rate"] + if max_coverage_rate > scene_max_coverage_rate: + scene_max_coverage_rate = max_coverage_rate + scene_max_cr_idx = seq_idx + + label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, scene_max_cr_idx) label_data = DataLoadUtil.load_label(label_path) - best_seq = label_data["best_sequence"] - max_coverage_rate = label_data["max_coverage_rate"] - first_frame = best_seq[0] - best_seq_len = len(best_seq) + first_frame = label_data["best_sequence"][0] + best_seq_len = len(label_data["best_sequence"]) datalist.append({ - "scene_name": scene_name, - "first_frame": first_frame, - "max_coverage_rate": max_coverage_rate, - "best_seq_len": best_seq_len, - "best_seq": best_seq, - }) + "scene_name": scene_name, + "first_frame": first_frame, + "max_coverage_rate": scene_max_coverage_rate, + "best_seq_len": best_seq_len, + "label_idx": scene_max_cr_idx, + }) return datalist def __getitem__(self, index): @@ -110,8 +100,10 @@ class SeqNBVReconstructionDataset(BaseDataset): first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose) scene_path = os.path.join(self.root_dir, scene_name) model_points_normals = DataLoadUtil.load_points_normals(self.root_dir, scene_name) + data_item = { "first_pts": np.asarray([first_downsampled_target_point_cloud],dtype=np.float32), + "combined_scanned_pts": np.asarray(first_downsampled_target_point_cloud,dtype=np.float32), "first_to_world_9d": np.asarray([first_to_world_9d],dtype=np.float32), "scene_name": scene_name, "max_coverage_rate": max_coverage_rate, @@ -134,8 +126,9 @@ class SeqNBVReconstructionDataset(BaseDataset): collate_data = {} collate_data["first_pts"] = [torch.tensor(item['first_pts']) for item in batch] collate_data["first_to_world_9d"] = [torch.tensor(item['first_to_world_9d']) for item in batch] + collate_data["combined_scanned_pts"] = torch.stack([torch.tensor(item['combined_scanned_pts']) for item in batch]) for key in batch[0].keys(): - if key not in ["first_pts", "first_to_world_9d"]: + if key not in ["first_pts", "first_to_world_9d", "combined_scanned_pts"]: collate_data[key] = [item[key] for item in batch] return collate_data return collate_fn diff --git a/runners/inferencer.py b/runners/inferencer.py index bfd20f1..60284d3 100644 --- a/runners/inferencer.py +++ b/runners/inferencer.py @@ -20,7 +20,7 @@ from PytorchBoot.runners.runner import Runner from PytorchBoot.utils import Log from PytorchBoot.status import status_manager -@stereotype.runner("inferencer", comment="not tested") +@stereotype.runner("inferencer") class Inferencer(Runner): def __init__(self, config_path): super().__init__(config_path) @@ -34,6 +34,7 @@ class Inferencer(Runner): ''' Experiment ''' self.load_experiment("nbv_evaluator") + self.stat_result = {} ''' Test ''' self.test_config = ConfigManager.get(namespace.Stereotype.RUNNER, namespace.Mode.TEST) @@ -103,9 +104,9 @@ class Inferencer(Runner): input_data["scanned_pts"] = [data["first_pts"][0].to(self.device)] input_data["scanned_n_to_world_pose_9d"] = [data["first_to_world_9d"][0].to(self.device)] input_data["mode"] = namespace.Mode.TEST + input_data["combined_scanned_pts"] = data["combined_scanned_pts"] input_pts_N = input_data["scanned_pts"][0].shape[1] - first_frame_target_pts, _ = RenderUtil.render_pts(first_frame_to_world, scene_path, self.script_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose) scanned_view_pts = [first_frame_target_pts] last_pred_cr = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold) @@ -138,7 +139,7 @@ class Inferencer(Runner): print(pred_cr, last_pred_cr, " max: ", data["max_coverage_rate"]) if pred_cr >= data["max_coverage_rate"]: - break + print("max coverage rate reached!") if pred_cr <= last_pred_cr + cr_increase_threshold: retry += 1 retry_duplication_pose.append(pred_pose.cpu().numpy().tolist()) @@ -155,6 +156,11 @@ class Inferencer(Runner): input_data["scanned_pts"] = [torch.cat([input_data["scanned_pts"][0] , new_pts_tensor], dim=0)] input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], pred_pose_9d], dim=0)] + combined_scanned_views_pts = np.concatenate(input_data["scanned_pts"][0].tolist(), axis=0) + voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_views_pts, 0.002) + random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, input_pts_N) + input_data["combined_scanned_pts"] = torch.tensor(random_downsampled_combined_scanned_pts_np, dtype=torch.float32).unsqueeze(0).to(self.device) + last_pred_cr = pred_cr @@ -173,6 +179,15 @@ class Inferencer(Runner): "retry_duplication_pose": retry_duplication_pose, "best_seq_len": data["best_seq_len"][0], } + self.stat_result[scene_name] = { + "max_coverage_rate": data["max_coverage_rate"][0], + "success_rate": max(pred_cr_seq)/ data["max_coverage_rate"][0], + "coverage_rate_seq": pred_cr_seq, + "pred_max_coverage_rate": max(pred_cr_seq), + "pred_seq_len": len(pred_cr_seq), + } + print('success rate: ', max(pred_cr_seq) / data["max_coverage_rate"][0]) + return result def compute_coverage_rate(self, scanned_view_pts, new_pts, model_pts, threshold=0.005): @@ -191,6 +206,8 @@ class Inferencer(Runner): os.makedirs(dataset_dir) output_path = os.path.join(dataset_dir, f"{scene_name}.pkl") pickle.dump(output, open(output_path, "wb")) + with open(os.path.join(dataset_dir, "stat.json"), "w") as f: + json.dump(self.stat_result, f) def get_checkpoint_path(self, is_last=False): diff --git a/utils/render.py b/utils/render.py index bede42d..286e04c 100644 --- a/utils/render.py +++ b/utils/render.py @@ -34,7 +34,7 @@ class RenderUtil: return None path = os.path.join(temp_dir, "tmp") # ------ Debug Start ------ - import ipdb;ipdb.set_trace() + # import ipdb;ipdb.set_trace() # ------ Debug End ------ point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True) cam_params = DataLoadUtil.load_cam_info(path, binocular=True)