diff --git a/configs/server/strategy_generate_config.yaml b/configs/server/strategy_generate_config.yaml index 34a0693..fae57ef 100644 --- a/configs/server/strategy_generate_config.yaml +++ b/configs/server/strategy_generate_config.yaml @@ -14,8 +14,8 @@ runner: voxel_threshold: 0.01 overlap_threshold: 0.5 filter_degree: 75 - to_specified_dir: True # if True, output_dir is used, otherwise, root_dir is used - save_points: False + to_specified_dir: False # if True, output_dir is used, otherwise, root_dir is used + save_points: True save_best_combined_points: True save_mesh: True overwrite: False diff --git a/configs/server/train_config.yaml b/configs/server/train_config.yaml index 59e0c29..0074d75 100644 --- a/configs/server/train_config.yaml +++ b/configs/server/train_config.yaml @@ -1,18 +1,18 @@ runner: general: - seed: 0 + seed: 1 device: cuda cuda_visible_devices: "0,1,2,3,4,5,6,7" parallel: False experiment: - name: test_overfit + name: new_test_overfit_2 root_dir: "experiments" use_checkpoint: False epoch: -1 # -1 stands for last epoch max_epochs: 5000 - save_checkpoint_interval: 1 + save_checkpoint_interval: 3 test_first: False train: @@ -32,22 +32,29 @@ runner: dataset: OmniObject3d_train: root_dir: "../data/sample_for_training/scenes" + model_dir: "../data/scaled_object_meshes" source: nbv_reconstruction_dataset split_file: "../data/sample_for_training/OmniObject3d_train.txt" - ratio: 1.0 - batch_size: 1 + type: train + cache: True + ratio: 1 + batch_size: 128 num_workers: 12 pts_num: 4096 OmniObject3d_test: root_dir: "../data/sample_for_training/scenes" + model_dir: "../data/scaled_object_meshes" source: nbv_reconstruction_dataset split_file: "../data/sample_for_training/OmniObject3d_train.txt" + type: test + cache: True + filter_degree: 75 eval_list: - pose_diff ratio: 0.1 batch_size: 1 - num_workers: 1 + num_workers: 12 pts_num: 4096 pipeline: @@ -92,4 +99,6 @@ loss_function: gf_loss: evaluation_method: - pose_diff: \ No newline at end of file + pose_diff: + coverage_rate_increase: + renderer_path: "../blender/data_renderer.py" \ No newline at end of file diff --git a/core/dataset.py b/core/dataset.py index 4cfc6d9..1e8859f 100644 --- a/core/dataset.py +++ b/core/dataset.py @@ -1,11 +1,19 @@ import numpy as np from PytorchBoot.dataset import BaseDataset +import PytorchBoot.namespace as namespace import PytorchBoot.stereotype as stereotype +from PytorchBoot.config import ConfigManager +from PytorchBoot.utils.log_util import Log import torch +import os +import sys +sys.path.append(r"/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction") from utils.data_load import DataLoadUtil from utils.pose import PoseUtil from utils.pts import PtsUtil +from utils.reconstruction import ReconstructionUtil + @stereotype.dataset("nbv_reconstruction_dataset") class NBVReconstructionDataset(BaseDataset): @@ -16,7 +24,20 @@ class NBVReconstructionDataset(BaseDataset): self.split_file_path = config["split_file"] self.scene_name_list = self.load_scene_name_list() self.datalist = self.get_datalist() + self.pts_num = config["pts_num"] + self.type = config["type"] + self.cache = config["cache"] + if self.type == namespace.Mode.TEST: + self.model_dir = config["model_dir"] + self.filter_degree = config["filter_degree"] + if self.type == namespace.Mode.TRAIN: + self.datalist = self.datalist*100 + if self.cache: + expr_root = ConfigManager.get("runner", "experiment", "root_dir") + expr_name = ConfigManager.get("runner", "experiment", "name") + self.cache_dir = os.path.join(expr_root, expr_name, "cache") + def load_scene_name_list(self): scene_name_list = [] @@ -44,7 +65,27 @@ class NBVReconstructionDataset(BaseDataset): } ) return datalist - + + def load_from_cache(self, scene_name, first_frame_idx, curr_frame_idx): + cache_name = f"{scene_name}_{first_frame_idx}_{curr_frame_idx}.txt" + cache_path = os.path.join(self.cache_dir, cache_name) + if os.path.exists(cache_path): + data = np.loadtxt(cache_path) + return data + else: + return None + + def save_to_cache(self, scene_name, first_frame_idx, curr_frame_idx, data): + cache_name = f"{scene_name}_{first_frame_idx}_{curr_frame_idx}.txt" + cache_path = os.path.join(self.cache_dir, cache_name) + try: + np.savetxt(cache_path, data) + except Exception as e: + Log.error(f"Save cache failed: {e}") + # ----- Debug Trace ----- # + import ipdb; ipdb.set_trace() + # ------------------------ # + def __getitem__(self, index): data_item_info = self.datalist[index] scanned_views = data_item_info["scanned_views"] @@ -64,14 +105,21 @@ class NBVReconstructionDataset(BaseDataset): nR_to_world_pose = cam_info["cam_to_world_R"] n_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), n_to_world_pose) nR_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), nR_to_world_pose) - depth_L, depth_R = DataLoadUtil.load_depth(view_path, cam_info['near_plane'], cam_info['far_plane'], binocular=True) - point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_info['cam_intrinsic'], n_to_1_pose)['points_world'] - point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_info['cam_intrinsic'], nR_to_1_pose)['points_world'] - - point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536) - point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536) - overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R) - downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(overlap_points, self.pts_num) + cached_data = self.load_from_cache(scene_name, first_frame_idx, frame_idx) + + if cached_data is None: + depth_L, depth_R = DataLoadUtil.load_depth(view_path, cam_info['near_plane'], cam_info['far_plane'], binocular=True) + point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_info['cam_intrinsic'], n_to_1_pose)['points_world'] + point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_info['cam_intrinsic'], nR_to_1_pose)['points_world'] + + point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536) + point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536) + overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R) + downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(overlap_points, self.pts_num) + self.save_to_cache(scene_name, first_frame_idx, frame_idx, downsampled_target_point_cloud) + else: + downsampled_target_point_cloud = cached_data + scanned_views_pts.append(downsampled_target_point_cloud) scanned_coverages_rate.append(coverage_rate) n_to_1_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(n_to_1_pose[:3,:3])) @@ -97,7 +145,28 @@ class NBVReconstructionDataset(BaseDataset): "max_coverage_rate": max_coverage_rate, "scene_name": scene_name } - + # if self.type == namespace.Mode.TEST: + # diag = DataLoadUtil.get_bbox_diag(self.model_dir, scene_name) + # voxel_threshold = diag*0.02 + # model_points_normals = DataLoadUtil.load_points_normals(self.root_dir, scene_name) + # pts_list = [] + # for view in scanned_views: + # frame_idx = view[0] + # view_path = DataLoadUtil.get_path(self.root_dir, scene_name, frame_idx) + # point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(view_path, binocular=True) + # cam_params = DataLoadUtil.load_cam_info(view_path, binocular=True) + # sampled_point_cloud = ReconstructionUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=self.filter_degree) + # pts_list.append(sampled_point_cloud) + # nL_to_world_pose = cam_params["cam_to_world"] + # nO_to_world_pose = cam_params["cam_to_world_O"] + # nO_to_nL_pose = np.dot(np.linalg.inv(nL_to_world_pose), nO_to_world_pose) + # data_item["scanned_target_pts_list"] = pts_list + # data_item["model_points_normals"] = model_points_normals + # data_item["voxel_threshold"] = voxel_threshold + # data_item["filter_degree"] = self.filter_degree + # data_item["scene_path"] = os.path.join(self.root_dir, scene_name) + # data_item["first_frame_to_world"] = np.asarray(first_frame_to_world, dtype=np.float32) + # data_item["nO_to_nL_pose"] = np.asarray(nO_to_nL_pose, dtype=np.float32) return data_item def __len__(self): @@ -109,8 +178,10 @@ class NBVReconstructionDataset(BaseDataset): collate_data["scanned_pts"] = [torch.tensor(item['scanned_pts']) for item in batch] collate_data["scanned_n_to_1_pose_9d"] = [torch.tensor(item['scanned_n_to_1_pose_9d']) for item in batch] collate_data["best_to_1_pose_9d"] = torch.stack([torch.tensor(item['best_to_1_pose_9d']) for item in batch]) + if "first_frame_to_world" in batch[0]: + collate_data["first_frame_to_world"] = torch.stack([torch.tensor(item["first_frame_to_world"]) for item in batch]) for key in batch[0].keys(): - if key not in ["scanned_pts", "scanned_n_to_1_pose_9d", "best_to_1_pose_9d"]: + if key not in ["scanned_pts", "scanned_n_to_1_pose_9d", "best_to_1_pose_9d", "first_frame_to_world"]: collate_data[key] = [item[key] for item in batch] return collate_data return collate_fn @@ -123,10 +194,13 @@ if __name__ == "__main__": config = { "root_dir": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes", "split_file": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt", + "model_dir": "/media/hofee/data/data/scaled_object_meshes", "ratio": 0.5, "batch_size": 2, + "filter_degree": 75, "num_workers": 0, - "pts_num": 32684 + "pts_num": 32684, + "type": namespace.Mode.TEST, } ds = NBVReconstructionDataset(config) print(len(ds)) @@ -135,7 +209,9 @@ if __name__ == "__main__": for idx, data in enumerate(dl): data = ds.process_batch(data, "cuda:0") print(data) - break + # ------ Debug Start ------ + import ipdb;ipdb.set_trace() + # ------ Debug End ------ # # for idx, data in enumerate(dl): # cnt=0 diff --git a/core/evaluation.py b/core/evaluation.py index d5b20e2..047df1c 100644 --- a/core/evaluation.py +++ b/core/evaluation.py @@ -1,10 +1,43 @@ import torch +import os +import json +import numpy as np +import subprocess +import tempfile +from utils.data_load import DataLoadUtil +from utils.reconstruction import ReconstructionUtil from utils.pose import PoseUtil +from utils.pts import PtsUtil import PytorchBoot.stereotype as stereotype import PytorchBoot.namespace as namespace +from PytorchBoot.utils.log_util import Log -def get_view_data(cam_pose, scene_name): - pass +def render_pts(cam_pose, scene_path,script_path, model_points_normals, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None): + nO_to_world_pose = cam_pose.cpu().numpy() @ nO_to_nL_pose + nO_to_world_pose = DataLoadUtil.cam_pose_transformation(nO_to_world_pose) + + + with tempfile.TemporaryDirectory() as temp_dir: + params = { + "cam_pose": nO_to_world_pose.tolist(), + "scene_path": scene_path + } + params_data_path = os.path.join(temp_dir, "params.json") + with open(params_data_path, 'w') as f: + json.dump(params, f) + result = subprocess.run([ + 'blender', '-b', '-P', script_path, '--', temp_dir + ], capture_output=True, text=True) + if result.returncode != 0: + print("Blender script failed:") + print(result.stderr) + return None + path = os.path.join(temp_dir, "tmp") + + point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True) + cam_params = DataLoadUtil.load_cam_info(path, binocular=True) + sampled_point_cloud = ReconstructionUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree) + return sampled_point_cloud @stereotype.evaluation_method("pose_diff") class PoseDiff: @@ -36,11 +69,11 @@ class PoseDiff: -@stereotype.evaluation_method("coverage_rate_increase",comment="unfinished") +@stereotype.evaluation_method("coverage_rate_increase") class ConverageRateIncrease: def __init__(self, config): self.config = config - + self.renderer_path = config["renderer_path"] def evaluate(self, output_list, data_list): results = {namespace.TensorBoard.SCALAR: {}} @@ -48,31 +81,57 @@ class ConverageRateIncrease: pred_coverate_increase_list = [] cr_diff_list = [] for output, data in zip(output_list, data_list): - scanned_cr = data['scanned_coverages_rate'] + scanned_cr = data['scanned_coverage_rate'] gt_cr = data["best_coverage_rate"] - scene_name_list = data['scene_name'] - scanned_view_pts_list = data['scanned_pts'] + scene_path_list = data['scene_path'] + model_points_normals_list = data['model_points_normals'] + scanned_view_pts_list = data['scanned_target_pts_list'] pred_pose_9ds = output['pred_pose_9d'] - pred_rot_mats = PoseUtil.rotation_6d_to_matrix_tensor_batch(pred_pose_9ds[:, :6]) - pred_pose_mats = torch.cat([pred_rot_mats, pred_pose_9ds[:, 6:]], dim=-1) - + nO_to_nL_pose_batch = data["nO_to_nL_pose"] + voxel_threshold_list = data["voxel_threshold"] + filter_degree_list = data["filter_degree"] + first_frame_to_world = data["first_frame_to_world"] + pred_n_to_1_pose_mats = torch.eye(4, device=pred_pose_9ds.device).unsqueeze(0).repeat(pred_pose_9ds.shape[0], 1, 1) + pred_n_to_1_pose_mats[:,:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(pred_pose_9ds[:, :6]) + pred_n_to_1_pose_mats[:,:3,3] = pred_pose_9ds[:, 6:] + pred_n_to_world_pose_mats = torch.matmul(first_frame_to_world, pred_n_to_1_pose_mats) for idx in range(len(scanned_cr)): - gt_coverate_increase_list.append(gt_cr-scanned_cr[idx]) - scene_name = scene_name_list[idx] - pred_pose = pred_pose_mats[idx] + model_points_normals = model_points_normals_list[idx] scanned_view_pts = scanned_view_pts_list[idx] - view_data = get_view_data(pred_pose, scene_name) - pred_cr = self.compute_coverage_rate(pred_pose, scanned_view_pts, view_data) - pred_coverate_increase_list.append(pred_cr-scanned_cr[idx]) - cr_diff_list.append(gt_cr-pred_cr) + voxel_threshold = voxel_threshold_list[idx] + model_pts = model_points_normals[:,:3] + down_sampled_model_pts = PtsUtil.voxel_downsample_point_cloud(model_pts, voxel_threshold) + old_scanned_cr = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold) + gt_coverate_increase_list.append(gt_cr[idx]-old_scanned_cr) + + scene_path = scene_path_list[idx] + pred_pose = pred_n_to_world_pose_mats[idx] + + filter_degree = filter_degree_list[idx] + nO_to_nL_pose = nO_to_nL_pose_batch[idx] + try: + new_pts = render_pts(pred_pose, scene_path, self.renderer_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=nO_to_nL_pose) + pred_cr = self.compute_coverage_rate(scanned_view_pts, new_pts, down_sampled_model_pts, threshold=voxel_threshold) + except Exception as e: + Log.warning(f"Error in scene {scene_path}, {e}") + pred_cr = old_scanned_cr + pred_coverate_increase_list.append(pred_cr-old_scanned_cr) + cr_diff_list.append(gt_cr[idx]-pred_cr) results[namespace.TensorBoard.SCALAR]["gt_cr_increase"] = float(sum(gt_coverate_increase_list) / len(gt_coverate_increase_list)) results[namespace.TensorBoard.SCALAR]["pred_cr_increase"] = float(sum(pred_coverate_increase_list) / len(pred_coverate_increase_list)) results[namespace.TensorBoard.SCALAR]["cr_diff"] = float(sum(cr_diff_list) / len(cr_diff_list)) return results - def compute_coverage_rate(self, pred_pose, scanned_view_pts, view_data): - pass + def compute_coverage_rate(self, scanned_view_pts, new_pts, model_pts, threshold=0.005): + if new_pts is not None: + new_scanned_view_pts = scanned_view_pts + [new_pts] + else: + new_scanned_view_pts = scanned_view_pts + combined_point_cloud = np.vstack(new_scanned_view_pts) + down_sampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud,threshold) + return ReconstructionUtil.compute_coverage_rate(model_pts, down_sampled_combined_point_cloud, threshold) + \ No newline at end of file diff --git a/core/pipeline.py b/core/pipeline.py index 2203c30..d1a9cd9 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -5,7 +5,7 @@ import PytorchBoot.stereotype as stereotype from PytorchBoot.factory.component_factory import ComponentFactory from PytorchBoot.utils import Log -@stereotype.pipeline("nbv_reconstruction_pipeline") +@stereotype.pipeline("nbv_reconstruction_pipeline", comment="should be tested") class NBVReconstructionPipeline(nn.Module): def __init__(self, config): super(NBVReconstructionPipeline, self).__init__() @@ -72,10 +72,14 @@ class NBVReconstructionPipeline(nn.Module): pose_feat_seq_list = [] for scanned_pts,scanned_n_to_1_pose_9d in zip(scanned_pts_batch,scanned_n_to_1_pose_9d_batch): + scanned_pts = scanned_pts.to(best_to_1_pose_9d_batch.device) scanned_n_to_1_pose_9d = scanned_n_to_1_pose_9d.to(best_to_1_pose_9d_batch.device) pts_feat_seq_list.append(self.pts_encoder.encode_points(scanned_pts)) pose_feat_seq_list.append(self.pose_encoder.encode_pose(scanned_n_to_1_pose_9d)) + seq_feat = self.seq_encoder.encode_sequence(pts_feat_seq_list, pose_feat_seq_list) + if torch.isnan(seq_feat).any(): + Log.error("nan in seq_feat", True) return seq_feat diff --git a/utils/data_load.py b/utils/data_load.py index 62c644b..4705ac0 100644 --- a/utils/data_load.py +++ b/utils/data_load.py @@ -177,6 +177,9 @@ class DataLoadUtil: cam_to_world_R = np.asarray(label_data["extrinsic_R"]) cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R) cam_info["cam_to_world_R"] = cam_to_world_R + cam_to_world_O = np.asarray(label_data["extrinsic_cam_object"]) + cam_to_world_O = DataLoadUtil.cam_pose_transformation(cam_to_world_O) + cam_info["cam_to_world_O"] = cam_to_world_O return cam_info @staticmethod