From 4c69ed777b46573d2ee265666edfc9bb8610424b Mon Sep 17 00:00:00 2001 From: hofee Date: Thu, 12 Sep 2024 15:11:09 +0800 Subject: [PATCH] fix bug for training --- app_split.py | 2 +- app_train.py | 4 +- configs/split_dataset_config.yaml | 10 +-- configs/strategy_generate_config.yaml | 4 +- configs/train_config.yaml | 56 ++++++++----- configs/view_generate_config.yaml | 4 +- core/dataset.py | 112 ++++++++++++++------------ core/pipeline.py | 52 ++++++------ modules/gf_view_finder.py | 17 ++-- modules/pointnet_encoder.py | 20 ++++- modules/transformer_seq_encoder.py | 2 +- runners/strategy_generator.py | 9 +++ utils/data_load.py | 27 +++++++ utils/pts.py | 1 - utils/reconstruction.py | 1 - 15 files changed, 201 insertions(+), 120 deletions(-) diff --git a/app_split.py b/app_split.py index 900aacc..35e803b 100644 --- a/app_split.py +++ b/app_split.py @@ -5,5 +5,5 @@ from runners.data_spliter import DataSpliter class DataSplitApp: @staticmethod def start(): - DataSpliter(r"configs\split_dataset_config.yaml").run() + DataSpliter("configs/split_dataset_config.yaml").run() \ No newline at end of file diff --git a/app_train.py b/app_train.py index f8af38d..8980dcf 100644 --- a/app_train.py +++ b/app_train.py @@ -1,8 +1,8 @@ from PytorchBoot.application import PytorchBootApplication -from runners.strategy_generator import StrategyGenerator +from PytorchBoot.runners.trainer import DefaultTrainer @PytorchBootApplication("train") class TrainApp: @staticmethod def start(): - StrategyGenerator(r"configs\train_config.yaml").run() \ No newline at end of file + DefaultTrainer("configs/train_config.yaml").run() \ No newline at end of file diff --git a/configs/split_dataset_config.yaml b/configs/split_dataset_config.yaml index f2f2805..7ef58e9 100644 --- a/configs/split_dataset_config.yaml +++ b/configs/split_dataset_config.yaml @@ -10,13 +10,13 @@ runner: root_dir: "experiments" split: - root_dir: "C:\\Document\\Local Project\\nbv_rec\\data\\sample" + root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes" type: "unseen_instance" # "unseen_category" datasets: OmniObject3d_train: - path: "C:\\Document\\Local Project\\nbv_rec\\data\\OmniObject3d_train.txt" - ratio: 0.5 + path: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt" + ratio: 0.9 OmniObject3d_test: - path: "C:\\Document\\Local Project\\nbv_rec\\data\\OmniObject3d_test.txt" - ratio: 0.5 \ No newline at end of file + path: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_test.txt" + ratio: 0.1 \ No newline at end of file diff --git a/configs/strategy_generate_config.yaml b/configs/strategy_generate_config.yaml index aadf271..73fae50 100644 --- a/configs/strategy_generate_config.yaml +++ b/configs/strategy_generate_config.yaml @@ -18,12 +18,14 @@ runner: save_points: False save_best_combined_points: True save_mesh: True + overwrite: False dataset_list: - OmniObject3d datasets: OmniObject3d: - root_dir: "/media/hofee/data/project/python/nbv_reconstruction/nbv_rec_visualize/data/sample" + #"/media/hofee/data/data/temp_output" + root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes" model_dir: "/media/hofee/data/data/scaled_object_meshes" #output_dir: "/media/hofee/data/data/label_output" diff --git a/configs/train_config.yaml b/configs/train_config.yaml index 39ffc69..b8d9c0b 100644 --- a/configs/train_config.yaml +++ b/configs/train_config.yaml @@ -2,15 +2,16 @@ runner: general: seed: 0 - device: cpu + device: cuda cuda_visible_devices: "0,1,2,3,4,5,6,7" + parallel: False experiment: - name: debug + name: test_overfit root_dir: "experiments" use_checkpoint: False epoch: -1 # -1 stands for last epoch - max_epochs: 5 + max_epochs: 5000 save_checkpoint_interval: 1 test_first: False @@ -19,34 +20,43 @@ runner: type: Adam lr: 0.0001 losses: - - mse_loss + - gf_loss dataset: OmniObject3d_train test: frequency: 3 # test frequency dataset_list: - - OmniObject3d_train + - OmniObject3d_test pipeline: nbv_reconstruction_pipeline -datasets: +dataset: OmniObject3d_train: - root_dir: "C:\\Document\\Local Project\\nbv_rec\\data\\sample" - split_file: "C:\\Document\\Local Project\\nbv_rec\\data\\OmniObject3d_train.txt" + root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes" + source: nbv_reconstruction_dataset + split_file: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt" ratio: 1.0 batch_size: 1 num_workers: 12 - pts_num: 2048 + pts_num: 4096 OmniObject3d_test: - root_dir: "C:\\Document\\Local Project\\nbv_rec\\data\\sample" - split_file: "C:\\Document\\Local Project\\nbv_rec\\data\\OmniObject3d_test.txt" + root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes" + source: nbv_reconstruction_dataset + split_file: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt" eval_list: - pose_diff - ratio: 1.0 + ratio: 0.1 batch_size: 1 num_workers: 1 - pts_num: 2048 + pts_num: 4096 +pipeline: + nbv_reconstruction_pipeline: + pts_encoder: pointnet_encoder + seq_encoder: transformer_seq_encoder + pose_encoder: pose_encoder + view_finder: gf_view_finder + module: pointnet_encoder: @@ -58,13 +68,15 @@ module: transformer_seq_encoder: pts_embed_dim: 1024 pose_embed_dim: 256 - num_heads: 4 - ffn_dim: 256 - num_layers: 3 - max_seq_len: 30 - output_dim: 2048 + num_heads: 2 # 4 + ffn_dim: 128 # 256 + num_layers: 2 # 3 + output_dim: 1024 # 2048 gf_view_finder: + t_feat_dim: 128 + pose_feat_dim: 256 + main_feat_dim: 1024 # 2048 regression_head: Rx_Ry_and_T pose_mode: rot_matrix per_point_feature: False @@ -74,4 +86,10 @@ module: pose_encoder: pose_dim: 9 - output_dim: 256 \ No newline at end of file + out_dim: 256 + +loss_function: + gf_loss: + +evaluation_method: + pose_diff: \ No newline at end of file diff --git a/configs/view_generate_config.yaml b/configs/view_generate_config.yaml index a7a8089..cb5b8a8 100644 --- a/configs/view_generate_config.yaml +++ b/configs/view_generate_config.yaml @@ -9,10 +9,10 @@ runner: generate: object_dir: /media/hofee/data/data/scaled_object_meshes table_model_path: /media/hofee/data/data/others/table.obj - output_dir: /media/hofee/data/data/temp_output + output_dir: /media/hofee/repository/nbv_reconstruction_data_512 binocular_vision: true plane_size: 10 - max_views: 256 + max_views: 512 min_views: 64 max_diag: 0.7 min_diag: 0.1 diff --git a/core/dataset.py b/core/dataset.py index 8756563..b4d810a 100644 --- a/core/dataset.py +++ b/core/dataset.py @@ -1,10 +1,10 @@ import numpy as np from PytorchBoot.dataset import BaseDataset import PytorchBoot.stereotype as stereotype -from torch.nn.utils.rnn import pad_sequence +import torch import sys -sys.path.append(r"C:\Document\Local Project\nbv_rec\nbv_reconstruction") +sys.path.append(r"/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction") from utils.data_load import DataLoadUtil from utils.pose import PoseUtil @@ -56,18 +56,25 @@ class NBVReconstructionDataset(BaseDataset): scene_name = data_item_info["scene_name"] scanned_views_pts, scanned_coverages_rate, scanned_n_to_1_pose = [], [], [] first_frame_idx = scanned_views[0][0] - first_frame_to_world = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx))["cam_to_world"] + first_cam_info = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx), binocular=True) + first_frame_to_world = first_cam_info["cam_to_world"] for view in scanned_views: frame_idx = view[0] coverage_rate = view[1] view_path = DataLoadUtil.get_path(self.root_dir, scene_name, frame_idx) - depth = DataLoadUtil.load_depth(view_path) - cam_info = DataLoadUtil.load_cam_info(view_path) - mask = DataLoadUtil.load_seg(view_path) - frame_curr_to_world = cam_info["cam_to_world"] - n_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), frame_curr_to_world) - target_point_cloud = DataLoadUtil.get_target_point_cloud(depth, cam_info["cam_intrinsic"], n_to_1_pose, mask)["points_world"] - downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(target_point_cloud, self.pts_num) + cam_info = DataLoadUtil.load_cam_info(view_path, binocular=True) + n_to_world_pose = cam_info["cam_to_world"] + nR_to_world_pose = cam_info["cam_to_world_R"] + n_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), n_to_world_pose) + nR_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), nR_to_world_pose) + depth_L, depth_R = DataLoadUtil.load_depth(view_path, cam_info['near_plane'], cam_info['far_plane'], binocular=True) + point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_info['cam_intrinsic'], n_to_1_pose)['points_world'] + point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_info['cam_intrinsic'], nR_to_1_pose)['points_world'] + + point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536) + point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536) + overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R) + downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(overlap_points, self.pts_num) scanned_views_pts.append(downsampled_target_point_cloud) scanned_coverages_rate.append(coverage_rate) n_to_1_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(n_to_1_pose[:3,:3])) @@ -86,10 +93,10 @@ class NBVReconstructionDataset(BaseDataset): data_item = { "scanned_pts": np.asarray(scanned_views_pts,dtype=np.float32), - "scanned_coverage_rate": np.asarray(scanned_coverages_rate,dtype=np.float32), + "scanned_coverage_rate": scanned_coverages_rate, "scanned_n_to_1_pose_9d": np.asarray(scanned_n_to_1_pose,dtype=np.float32), "best_coverage_rate": nbv_coverage_rate, - "best_to_1_pose_9d": best_to_1_9d, + "best_to_1_pose_9d": np.asarray(best_to_1_9d,dtype=np.float32), "max_coverage_rate": max_coverage_rate, "scene_name": scene_name } @@ -101,23 +108,14 @@ class NBVReconstructionDataset(BaseDataset): def get_collate_fn(self): def collate_fn(batch): - scanned_pts = [item['scanned_pts'] for item in batch] - scanned_n_to_1_pose_9d = [item['scanned_n_to_1_pose_9d'] for item in batch] - rest = {} + collate_data = {} + collate_data["scanned_pts"] = [torch.tensor(item['scanned_pts']) for item in batch] + collate_data["scanned_n_to_1_pose_9d"] = [torch.tensor(item['scanned_n_to_1_pose_9d']) for item in batch] + collate_data["best_to_1_pose_9d"] = torch.stack([torch.tensor(item['best_to_1_pose_9d']) for item in batch]) for key in batch[0].keys(): - if key in ['scanned_pts', 'scanned_n_to_1_pose_9d']: - continue - if isinstance(batch[0][key], torch.Tensor): - rest[key] = torch.stack([item[key] for item in batch]) - elif isinstance(batch[0][key], str): - rest[key] = [item[key] for item in batch] - else: - rest[key] = [item[key] for item in batch] - return { - 'scanned_pts': scanned_pts, - 'scanned_n_to_1_pose_9d': scanned_n_to_1_pose_9d, - **rest - } + if key not in ["scanned_pts", "scanned_n_to_1_pose_9d", "best_to_1_pose_9d"]: + collate_data[key] = [item[key] for item in batch] + return collate_data return collate_fn if __name__ == "__main__": @@ -126,36 +124,48 @@ if __name__ == "__main__": torch.manual_seed(seed) np.random.seed(seed) config = { - "root_dir": "C:\\Document\\Local Project\\nbv_rec\\data\\sample", - "split_file": "C:\\Document\\Local Project\\nbv_rec\\data\\OmniObject3d_train.txt", + "root_dir": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes", + "split_file": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt", "ratio": 0.5, "batch_size": 2, "num_workers": 0, - "pts_num": 2048 + "pts_num": 32684 } ds = NBVReconstructionDataset(config) print(len(ds)) + #ds.__getitem__(10) dl = ds.get_loader(shuffle=True) for idx, data in enumerate(dl): - cnt=0 - print(data["scene_name"]) - print(data["scanned_coverage_rate"]) - print(data["best_coverage_rate"]) - for pts in data["scanned_pts"][0]: - #np.savetxt(f"pts_{cnt}.txt", pts) - cnt+=1 - #np.savetxt("best_pts.txt", best_pts) - for key, value in data.items(): - if isinstance(value, torch.Tensor): - print(key, ":" ,value.shape) - else: - print(key, ":" ,len(value)) - if key == "scanned_n_to_1_pose_9d": - for val in value: - print(val.shape) - if key == "scanned_pts": - for val in value: - print(val.shape) + data = ds.process_batch(data, "cuda:0") + print(data) + break + # + # for idx, data in enumerate(dl): + # cnt=0 + # print(data["scene_name"]) + # print(data["scanned_coverage_rate"]) + # print(data["best_coverage_rate"]) + # for pts in data["scanned_pts"][0]: + # #np.savetxt(f"pts_{cnt}.txt", pts) + # cnt+=1 + # #np.savetxt("best_pts.txt", best_pts) + # for key, value in data.items(): + # if isinstance(value, torch.Tensor): + # print(key, ":" ,value.shape) + # else: + # print(key, ":" ,len(value)) + # if key == "scanned_n_to_1_pose_9d": + # for val in value: + # print(val.shape) + # if key == "scanned_pts": + # print("scanned_pts") + # for val in value: + # print(val.shape) + # cnt = 0 + # for v in val: + # import ipdb;ipdb.set_trace() + # np.savetxt(f"pts_{cnt}.txt", v) + # cnt+=1 - print() \ No newline at end of file + # print() \ No newline at end of file diff --git a/core/pipeline.py b/core/pipeline.py index 223f83b..f4dba95 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -14,12 +14,11 @@ class NBVReconstructionPipeline(nn.Module): self.pose_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, config["pose_encoder"]) self.seq_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, config["seq_encoder"]) self.view_finder = ComponentFactory.create(namespace.Stereotype.MODULE, config["view_finder"]) + self.eps = 1e-5 def forward(self, data): mode = data["mode"] - # ----- Debug Trace ----- # - import ipdb; ipdb.set_trace() - # ------------------------ # + if mode == namespace.Mode.TRAIN: return self.forward_train(data) elif mode == namespace.Mode.TEST: @@ -27,29 +26,22 @@ class NBVReconstructionPipeline(nn.Module): else: Log.error("Unknown mode: {}".format(mode), True) - def pertube_data(self, gt_delta_rot_6d): - bs = gt_delta_rot_6d.shape[0] - random_t = torch.rand(bs, device=self.device) * (1. - self.eps) + self.eps + def pertube_data(self, gt_delta_9d): + bs = gt_delta_9d.shape[0] + random_t = torch.rand(bs, device=gt_delta_9d.device) * (1. - self.eps) + self.eps random_t = random_t.unsqueeze(-1) - mu, std = self.view_finder.marginal_prob(gt_delta_rot_6d, random_t) + mu, std = self.view_finder.marginal_prob(gt_delta_9d, random_t) std = std.view(-1, 1) - z = torch.randn_like(gt_delta_rot_6d) + z = torch.randn_like(gt_delta_9d) perturbed_x = mu + z * std target_score = - z * std / (std ** 2) return perturbed_x, random_t, target_score, std def forward_train(self, data): - pts_list = data['pts_list'] - pose_list = data['pose_list'] - gt_rot_6d = data["nbv_cam_pose"] - pts_feat_list = [] - pose_feat_list = [] - for pts,pose in zip(pts_list,pose_list): - pts_feat_list.append(self.pts_encoder.encode_points(pts)) - pose_feat_list.append(self.pose_encoder.encode_pose(pose)) - seq_feat = self.seq_encoder.encode_sequence(pts_feat_list, pose_feat_list) + seq_feat = self.get_seq_feat(data) ''' get std ''' - perturbed_x, random_t, target_score, std = self.pertube_data(gt_rot_6d) + best_to_1_pose_9d_batch = data["best_to_1_pose_9d"] + perturbed_x, random_t, target_score, std = self.pertube_data(best_to_1_pose_9d_batch) input_data = { "sampled_pose": perturbed_x, "t": random_t, @@ -64,14 +56,7 @@ class NBVReconstructionPipeline(nn.Module): return output def forward_test(self,data): - pts_list = data['pts_list'] - pose_list = data['pose_list'] - pts_feat_list = [] - pose_feat_list = [] - for pts,pose in zip(pts_list,pose_list): - pts_feat_list.append(self.pts_encoder.encode_points(pts)) - pose_feat_list.append(self.pose_encoder.encode_pose(pose)) - seq_feat = self.seq_encoder.encode_sequence(pts_feat_list, pose_feat_list) + seq_feat = self.get_seq_feat(data) estimated_delta_rot_9d, in_process_sample = self.view_finder.next_best_view(seq_feat) result = { "pred_pose_9d": estimated_delta_rot_9d, @@ -79,4 +64,19 @@ class NBVReconstructionPipeline(nn.Module): } return result + def get_seq_feat(self, data): + scanned_pts_batch = data['scanned_pts'] + scanned_n_to_1_pose_9d_batch = data['scanned_n_to_1_pose_9d'] + best_to_1_pose_9d_batch = data["best_to_1_pose_9d"] + pts_feat_seq_list = [] + pose_feat_seq_list = [] + + for scanned_pts,scanned_n_to_1_pose_9d in zip(scanned_pts_batch,scanned_n_to_1_pose_9d_batch): + print(scanned_n_to_1_pose_9d.shape) + scanned_pts = scanned_pts.to(best_to_1_pose_9d_batch.device) + scanned_n_to_1_pose_9d = scanned_n_to_1_pose_9d.to(best_to_1_pose_9d_batch.device) + pts_feat_seq_list.append(self.pts_encoder.encode_points(scanned_pts)) + pose_feat_seq_list.append(self.pose_encoder.encode_pose(scanned_n_to_1_pose_9d)) + seq_feat = self.seq_encoder.encode_sequence(pts_feat_seq_list, pose_feat_seq_list) + return seq_feat diff --git a/modules/gf_view_finder.py b/modules/gf_view_finder.py index 1e1cceb..bc20586 100644 --- a/modules/gf_view_finder.py +++ b/modules/gf_view_finder.py @@ -33,19 +33,22 @@ class GradientFieldViewFinder(nn.Module): pose_dim = PoseUtil.get_pose_dim(self.pose_mode) self.prior_fn, self.marginal_prob_fn, self.sde_fn, self.sampling_eps, self.T = flib.init_sde(config["sde_mode"]) self.sampling_steps = config["sampling_steps"] + self.t_feat_dim = config["t_feat_dim"] + self.pose_feat_dim = config["pose_feat_dim"] + self.main_feat_dim = config["main_feat_dim"] ''' encode pose ''' self.pose_encoder = nn.Sequential( - nn.Linear(pose_dim, 256), + nn.Linear(pose_dim, self.pose_feat_dim ), self.act, - nn.Linear(256, 256), + nn.Linear(self.pose_feat_dim , self.pose_feat_dim ), self.act, ) ''' encode t ''' self.t_encoder = nn.Sequential( - mlib.GaussianFourierProjection(embed_dim=128), - nn.Linear(128, 128), + mlib.GaussianFourierProjection(embed_dim=self.t_feat_dim ), + nn.Linear(self.t_feat_dim , self.t_feat_dim ), self.act, ) @@ -56,18 +59,18 @@ class GradientFieldViewFinder(nn.Module): if not self.per_point_feature: ''' rotation_x_axis regress head ''' self.fusion_tail_rot_x = nn.Sequential( - nn.Linear(128 + 256 + 2048, 256), + nn.Linear(self.t_feat_dim + self.pose_feat_dim + self.main_feat_dim, 256), self.act, zero_module(nn.Linear(256, 3)), ) self.fusion_tail_rot_y = nn.Sequential( - nn.Linear(128 + 256 + 2048, 256), + nn.Linear(self.t_feat_dim + self.pose_feat_dim + self.main_feat_dim, 256), self.act, zero_module(nn.Linear(256, 3)), ) ''' tranalation regress head ''' self.fusion_tail_trans = nn.Sequential( - nn.Linear(128 + 256 + 2048, 256), + nn.Linear(self.t_feat_dim + self.pose_feat_dim + self.main_feat_dim, 256), self.act, zero_module(nn.Linear(256, 3)), ) diff --git a/modules/pointnet_encoder.py b/modules/pointnet_encoder.py index b669a4c..6483709 100644 --- a/modules/pointnet_encoder.py +++ b/modules/pointnet_encoder.py @@ -54,6 +54,7 @@ class PointNetEncoder(nn.Module): def encode_points(self, pts): pts = pts.transpose(2, 1) + if not self.global_feat: pts_feature = self(pts).transpose(2, 1) else: @@ -98,11 +99,24 @@ class STNkd(nn.Module): if __name__ == "__main__": sim_data = Variable(torch.rand(32, 2500, 3)) - - pointnet_global = PointNetEncoder(global_feat=True) + config = { + "in_dim": 3, + "out_dim": 1024, + "global_feat": True, + "feature_transform": False + } + pointnet_global = PointNetEncoder(config) out = pointnet_global.encode_points(sim_data) + print("global feat", out.size()) - pointnet = PointNetEncoder(global_feat=False) + config = { + "in_dim": 3, + "out_dim": 1024, + "global_feat": False, + "feature_transform": False + } + + pointnet = PointNetEncoder(config) out = pointnet.encode_points(sim_data) print("point feat", out.size()) diff --git a/modules/transformer_seq_encoder.py b/modules/transformer_seq_encoder.py index 79f151c..8b22b4f 100644 --- a/modules/transformer_seq_encoder.py +++ b/modules/transformer_seq_encoder.py @@ -38,7 +38,7 @@ class TransformerSequenceEncoder(nn.Module): # Prepare mask for padding max_len = max(lengths) - padding_mask = torch.tensor([([0] * length + [1] * (max_len - length)) for length in lengths], dtype=torch.bool) + padding_mask = torch.tensor([([0] * length + [1] * (max_len - length)) for length in lengths], dtype=torch.bool).to(combined_tensor.device) # Transformer encoding transformer_output = self.transformer_encoder(combined_tensor, src_key_padding_mask=padding_mask) diff --git a/runners/strategy_generator.py b/runners/strategy_generator.py index e9c3679..af90ea3 100644 --- a/runners/strategy_generator.py +++ b/runners/strategy_generator.py @@ -26,6 +26,7 @@ class StrategyGenerator(Runner): self.save_best_combined_pts = ConfigManager.get("runner", "generate", "save_best_combined_points") self.save_mesh = ConfigManager.get("runner", "generate", "save_mesh") self.filter_degree = ConfigManager.get("runner", "generate", "filter_degree") + self.overwrite = ConfigManager.get("runner", "generate", "overwrite") @@ -44,6 +45,14 @@ class StrategyGenerator(Runner): for scene_name in scene_name_list: Log.info(f"({dataset_name})Processing [{cnt}/{total}]: {scene_name}") status_manager.set_progress("generate", "strategy_generator", "scene", cnt, total) + diag = DataLoadUtil.get_bbox_diag(model_dir, scene_name) + voxel_threshold = diag*0.02 + status_manager.set_status("generate", "strategy_generator", "voxel_threshold", voxel_threshold) + output_label_path = DataLoadUtil.get_label_path(root_dir, scene_name) + if os.path.exists(output_label_path) and not self.overwrite: + Log.info(f"Scene <{scene_name}> Already Exists, Skip") + cnt += 1 + continue self.generate_sequence(root_dir, model_dir, scene_name,voxel_threshold, overlap_threshold) cnt += 1 status_manager.set_progress("generate", "strategy_generator", "scene", total, total) diff --git a/utils/data_load.py b/utils/data_load.py index 48628d9..62c644b 100644 --- a/utils/data_load.py +++ b/utils/data_load.py @@ -45,6 +45,15 @@ class DataLoadUtil: mesh.apply_transform(world_object_pose) return mesh + @staticmethod + def get_bbox_diag(model_dir, object_name): + model_path = os.path.join(model_dir, object_name, "mesh.obj") + mesh = trimesh.load(model_path) + bbox = mesh.bounding_box.extents + diagonal_length = np.linalg.norm(bbox) + return diagonal_length + + @staticmethod def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose): mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose) @@ -192,6 +201,24 @@ class DataLoadUtil: "points_world": target_points_world, "points_camera": target_points_camera } + + @staticmethod + def get_point_cloud(depth, cam_intrinsic, cam_extrinsic): + h, w = depth.shape + i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy') + + z = depth + x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0] + y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1] + + points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3) + points_camera_aug = np.concatenate([points_camera, np.ones((points_camera.shape[0], 1))], axis=-1) + + points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3] + return { + "points_world": points_world, + "points_camera": points_camera + } @staticmethod def get_target_point_cloud_world_from_path(path, binocular=False, random_downsample_N=65536, voxel_size = 0.005, target_mask_label=(0,255,0,255)): diff --git a/utils/pts.py b/utils/pts.py index 8c44f5d..19d6e2a 100644 --- a/utils/pts.py +++ b/utils/pts.py @@ -5,7 +5,6 @@ class PtsUtil: @staticmethod def voxel_downsample_point_cloud(point_cloud, voxel_size=0.005): - print("voxel_size: ", voxel_size) o3d_pc = o3d.geometry.PointCloud() o3d_pc.points = o3d.utility.Vector3dVector(point_cloud) downsampled_pc = o3d_pc.voxel_down_sample(voxel_size) diff --git a/utils/reconstruction.py b/utils/reconstruction.py index 4d5955f..1bc3436 100644 --- a/utils/reconstruction.py +++ b/utils/reconstruction.py @@ -6,7 +6,6 @@ class ReconstructionUtil: @staticmethod def compute_coverage_rate(target_point_cloud, combined_point_cloud, threshold=0.01): - print("threshold", threshold) kdtree = cKDTree(combined_point_cloud) distances, _ = kdtree.query(target_point_cloud) covered_points = np.sum(distances < threshold)