From 80cd4aba9dfc9d8d1bfbbdebbeef8313f5829d5f Mon Sep 17 00:00:00 2001 From: hofee <64160135+GitHofee@users.noreply.github.com> Date: Thu, 29 Aug 2024 13:54:13 -0500 Subject: [PATCH] change DataLoadUtil and Dataset to blender version --- app_generate.py | 2 +- app_train.py | 8 +++ core/dataset.py | 36 ++++++++---- core/evaluation.py | 66 +++++++++++++++++---- core/pipeline.py | 8 +-- utils/data_load.py | 144 +++++++++++++-------------------------------- 6 files changed, 135 insertions(+), 129 deletions(-) create mode 100644 app_train.py diff --git a/app_generate.py b/app_generate.py index a74a2e8..f023d41 100644 --- a/app_generate.py +++ b/app_generate.py @@ -2,7 +2,7 @@ from PytorchBoot.application import PytorchBootApplication from runners.strategy_generator import StrategyGenerator @PytorchBootApplication("generate") -class Generator: +class GenerateApp: @staticmethod def start(): StrategyGenerator("configs\generate_config.yaml").run() \ No newline at end of file diff --git a/app_train.py b/app_train.py new file mode 100644 index 0000000..f8af38d --- /dev/null +++ b/app_train.py @@ -0,0 +1,8 @@ +from PytorchBoot.application import PytorchBootApplication +from runners.strategy_generator import StrategyGenerator + +@PytorchBootApplication("train") +class TrainApp: + @staticmethod + def start(): + StrategyGenerator(r"configs\train_config.yaml").run() \ No newline at end of file diff --git a/core/dataset.py b/core/dataset.py index c4dd9a3..3cc8e7a 100644 --- a/core/dataset.py +++ b/core/dataset.py @@ -1,10 +1,15 @@ +import os import numpy as np from PytorchBoot.dataset import BaseDataset import PytorchBoot.stereotype as stereotype + +import sys +sys.path.append(r"C:\Document\Local Project\nbv_rec\nbv_reconstruction") + from utils.data_load import DataLoadUtil +from utils.pose import PoseUtil - -@stereotype.dataset("nbv_reconstruction_dataset") +@stereotype.dataset("nbv_reconstruction_dataset", comment="to be modified") class NBVReconstructionDataset(BaseDataset): def __init__(self, config): super(NBVReconstructionDataset, self).__init__(config) @@ -15,9 +20,9 @@ class NBVReconstructionDataset(BaseDataset): def get_datalist(self): datalist = [] - scene_idx_list = DataLoadUtil.get_scene_idx_list(self.root_dir) - for scene_idx in scene_idx_list: - label_path = DataLoadUtil.get_label_path(self.label_dir, scene_idx) + scene_name_list = os.listdir(self.root_dir) + for scene_name in scene_name_list: + label_path = DataLoadUtil.get_label_path(self.label_dir, scene_name) label_data = DataLoadUtil.load_label(label_path) for data_pair in label_data["data_pairs"]: scanned_views = data_pair[0] @@ -28,7 +33,7 @@ class NBVReconstructionDataset(BaseDataset): "scanned_views": scanned_views, "next_best_view": next_best_view, "max_coverage_rate": max_coverage_rate, - "scene_idx": scene_idx, + "scene_name": scene_name, } ) return datalist @@ -38,32 +43,39 @@ class NBVReconstructionDataset(BaseDataset): scanned_views = data_item_info["scanned_views"] nbv = data_item_info["next_best_view"] max_coverage_rate = data_item_info["max_coverage_rate"] - scene_idx = data_item_info["scene_idx"] + scene_name = data_item_info["scene_name"] scanned_views_pts, scanned_coverages_rate, scanned_cam_pose = [], [], [] for view in scanned_views: frame_idx = view[0] coverage_rate = view[1] - view_path = DataLoadUtil.get_path(self.root_dir, scene_idx, frame_idx) + view_path = DataLoadUtil.get_path(self.root_dir, scene_name, frame_idx) pts = DataLoadUtil.load_depth(view_path) scanned_views_pts.append(pts) scanned_coverages_rate.append(coverage_rate) cam_pose = DataLoadUtil.load_cam_info(view_path)["cam_to_world"] - scanned_cam_pose.append(cam_pose) + + cam_pose_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(cam_pose[:3,:3])) + translation = cam_pose[:3,3] + cam_pose_9d = np.concatenate([cam_pose_6d, translation], axis=0) + scanned_cam_pose.append(cam_pose_9d) nbv_idx, nbv_coverage_rate = nbv[0], nbv[1] - nbv_path = DataLoadUtil.get_path(self.root_dir, scene_idx, nbv_idx) + nbv_path = DataLoadUtil.get_path(self.root_dir, scene_name, nbv_idx) nbv_pts = DataLoadUtil.load_depth(nbv_path) cam_info = DataLoadUtil.load_cam_info(nbv_path) nbv_cam_pose = cam_info["cam_to_world"] - + nbv_cam_pose_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(nbv_cam_pose[:3,:3])) + translation = nbv_cam_pose[:3,3] + nbv_cam_pose_9d = np.concatenate([nbv_cam_pose_6d, translation], axis=0) data_item = { "scanned_views_pts": np.asarray(scanned_views_pts,dtype=np.float32), "scanned_coverages_rate": np.asarray(scanned_coverages_rate,dtype=np.float32), "scanned_cam_pose": np.asarray(scanned_cam_pose,dtype=np.float32), "nbv_pts": np.asarray(nbv_pts,dtype=np.float32), "nbv_coverage_rate": nbv_coverage_rate, - "nbv_cam_pose": nbv_cam_pose, + "nbv_cam_pose": nbv_cam_pose_9d, "max_coverage_rate": max_coverage_rate, + "scene_name": scene_name } return data_item diff --git a/core/evaluation.py b/core/evaluation.py index 385559c..c304bee 100644 --- a/core/evaluation.py +++ b/core/evaluation.py @@ -1,23 +1,37 @@ +import torch from utils.pose import PoseUtil import PytorchBoot.stereotype as stereotype import PytorchBoot.namespace as namespace -@stereotype.evaluation_method("delta_pose_diff") -class DeltaPoseDiff: +def get_view_data(cam_pose, scene_name): + pass + +@stereotype.evaluation_method("pose_diff", comment="not tested") +class PoseDiff: def __init__(self, _): pass + def evaluate(self, output_list, data_list): results = {namespace.TensorBoard.SCALAR: {}} rot_angle_list = [] + trans_dist_list = [] for output, data in zip(output_list, data_list): - gt_delta_rot_6d = data['delta_rot_6d'] - est_delta_rot_6d = output['estimated_delta_rot_6d'] - gt_delta_rot_mat = PoseUtil.rotation_6d_to_matrix_tensor_batch(gt_delta_rot_6d) - est_delta_rot_mat = PoseUtil.rotation_6d_to_matrix_tensor_batch(est_delta_rot_6d) - rotation_angles = PoseUtil.rotation_angle_distance(gt_delta_rot_mat, est_delta_rot_mat) + gt_pose_9d = data['nbv_cam_pose'] + pred_pose_9d = output['pred_pose_9d'] + gt_rot_6d = gt_pose_9d[:, :6] + gt_trans = gt_pose_9d[:, 6:] + pred_rot_6d = pred_pose_9d[:, :6] + pred_trans = pred_pose_9d[:, 6:] + gt_rot_mat = PoseUtil.rotation_6d_to_matrix_tensor_batch(gt_rot_6d) + pred_rot_mat = PoseUtil.rotation_6d_to_matrix_tensor_batch(pred_rot_6d) + rotation_angles = PoseUtil.rotation_angle_distance(gt_rot_mat, pred_rot_mat) rot_angle_list.extend(list(rotation_angles)) + trans_dist = torch.norm(gt_trans-pred_trans) + trans_dist_list.append(trans_dist) + - results[namespace.TensorBoard.SCALAR]["delta_rotation"] = float(sum(rot_angle_list) / len(rot_angle_list)) + results[namespace.TensorBoard.SCALAR]["rot_diff"] = float(sum(rot_angle_list) / len(rot_angle_list)) + results[namespace.TensorBoard.SCALAR]["trans_diff"] = float(sum(trans_dist_list) / len(trans_dist_list)) return results @@ -25,8 +39,40 @@ class DeltaPoseDiff: @stereotype.evaluation_method("coverage_rate_increase",comment="unfinished") class ConverageRateIncrease: def __init__(self, config): - pass + self.config = config + def evaluate(self, output_list, data_list): - return + results = {namespace.TensorBoard.SCALAR: {}} + gt_coverate_increase_list = [] + pred_coverate_increase_list = [] + cr_diff_list = [] + for output, data in zip(output_list, data_list): + scanned_cr = data['scanned_coverages_rate'] + gt_cr = data["nbv_coverage_rate"] + scene_name_list = data['scene_name'] + scanned_view_pts_list = data['scanned_views_pts'] + pred_pose_9ds = output['pred_pose_9d'] + pred_rot_mats = PoseUtil.rotation_6d_to_matrix_tensor_batch(pred_pose_9ds[:, :6]) + pred_pose_mats = torch.cat([pred_rot_mats, pred_pose_9ds[:, 6:]], dim=-1) + + for idx in range(len(scanned_cr)): + gt_coverate_increase_list.append(gt_cr-scanned_cr[idx]) + scene_name = scene_name_list[idx] + pred_pose = pred_pose_mats[idx] + scanned_view_pts = scanned_view_pts_list[idx] + view_data = get_view_data(pred_pose, scene_name) + pred_cr = self.compute_coverage_rate(pred_pose, scanned_view_pts, view_data) + pred_coverate_increase_list.append(pred_cr-scanned_cr[idx]) + cr_diff_list.append(gt_cr-pred_cr) + + results[namespace.TensorBoard.SCALAR]["gt_cr_increase"] = float(sum(gt_coverate_increase_list) / len(gt_coverate_increase_list)) + results[namespace.TensorBoard.SCALAR]["pred_cr_increase"] = float(sum(pred_coverate_increase_list) / len(pred_coverate_increase_list)) + results[namespace.TensorBoard.SCALAR]["cr_diff"] = float(sum(cr_diff_list) / len(cr_diff_list)) + return results + + def compute_coverage_rate(self, pred_pose, scanned_view_pts, view_data): + pass + + \ No newline at end of file diff --git a/core/pipeline.py b/core/pipeline.py index 033598d..9adc68a 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -38,7 +38,7 @@ class NBVReconstructionPipeline(nn.Module): def forward_train(self, data): pts_list = data['pts_list'] pose_list = data['pose_list'] - gt_delta_rot_6d = data["delta_rot_6d"] + gt_rot_6d = data["nbv_cam_pose"] pts_feat_list = [] pose_feat_list = [] for pts,pose in zip(pts_list,pose_list): @@ -46,7 +46,7 @@ class NBVReconstructionPipeline(nn.Module): pose_feat_list.append(self.pose_encoder.encode_pose(pose)) seq_feat = self.seq_encoder.encode_sequence(pts_feat_list, pose_feat_list) ''' get std ''' - perturbed_x, random_t, target_score, std = self.pertube_data(gt_delta_rot_6d) + perturbed_x, random_t, target_score, std = self.pertube_data(gt_rot_6d) input_data = { "sampled_pose": perturbed_x, "t": random_t, @@ -69,9 +69,9 @@ class NBVReconstructionPipeline(nn.Module): pts_feat_list.append(self.pts_encoder.encode_points(pts)) pose_feat_list.append(self.pose_encoder.encode_pose(pose)) seq_feat = self.seq_encoder.encode_sequence(pts_feat_list, pose_feat_list) - estimated_delta_rot_6d, in_process_sample = self.view_finder.next_best_view(seq_feat) + estimated_delta_rot_9d, in_process_sample = self.view_finder.next_best_view(seq_feat) result = { - "estimated_delta_rot_6d": estimated_delta_rot_6d, + "pred_pose_9d": estimated_delta_rot_9d, "in_process_sample": in_process_sample } return result diff --git a/utils/data_load.py b/utils/data_load.py index ee5cd64..3e05d23 100644 --- a/utils/data_load.py +++ b/utils/data_load.py @@ -1,70 +1,36 @@ import os -import OpenEXR -import Imath import numpy as np import json import cv2 -import re +import trimesh class DataLoadUtil: @staticmethod - def get_path(root, scene_idx, frame_idx): - path = os.path.join(root, f"sequence.{scene_idx}", f"step{frame_idx}") + def get_path(root, scene_name, frame_idx): + path = os.path.join(root, scene_name, f"{frame_idx}") return path @staticmethod - def get_label_path(root, scene_idx): - path = os.path.join(root, f"sequence.{scene_idx}_label.json") + def get_label_path(root, scene_name): + path = os.path.join(root,scene_name, f"label.json") return path @staticmethod - def get_scene_idx_list(root): - scene_dir = os.listdir(root) - scene_idx_list = [] - for scene in scene_dir: - if "sequence" in scene: - scene_idx = int(re.search(r'\d+', scene).group()) - scene_idx_list.append(scene_idx) - return scene_idx_list - - @staticmethod - def get_frame_idx_list(root, scene_idx): - scene_path = os.path.join(root, f"sequence.{scene_idx}") - view_dir = os.listdir(scene_path) - seen_frame_idx = set() - for view in view_dir: - if "step" in view: - frame_idx = int(re.search(r'\d+', view).group()) - seen_frame_idx.add(frame_idx) - return list(seen_frame_idx) - - @staticmethod - def load_model_points(root,scene_idx): - model_path = os.path.join(root, f"sequence.{scene_idx}", "world_points.txt") - model_pts = np.loadtxt(model_path) - return model_pts - - @staticmethod - def read_exr_depth(depth_path): - file = OpenEXR.InputFile(depth_path) - - dw = file.header()['dataWindow'] - width = dw.max.x - dw.min.x + 1 - height = dw.max.y - dw.min.y + 1 - - pix_type = Imath.PixelType(Imath.PixelType.FLOAT) - depth_map = np.frombuffer(file.channel('R', pix_type), dtype=np.float32) - - depth_map.shape = (height, width) - - return depth_map + def load_model_points(root, scene_name): + model_path = os.path.join(root, scene_name, "sampled_model_points.txt") + mesh = trimesh.load(model_path) + return mesh.vertices @staticmethod def load_depth(path): - depth_path = path + ".camera.Depth.exr" - depth_map = DataLoadUtil.read_exr_depth(depth_path) - return depth_map + depth_path = os.path.join(os.path.dirname(path), "depth", os.path.basename(path) + ".png") + depth = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED) + depth = depth.astype(np.float32) / 65535.0 + min_depth = 0.01 + max_depth = 5.0 + depth_meters = min_depth + (max_depth - min_depth) * depth + return depth_meters @staticmethod def load_label(path): @@ -74,49 +40,41 @@ class DataLoadUtil: @staticmethod def load_rgb(path): - rgb_path = path + ".camera.png" + rgb_path = os.path.join(os.path.dirname(path), "rgb", os.path.basename(path) + ".png") rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR) return rgb_image @staticmethod def load_seg(path): - seg_path = path + ".camera.semantic segmentation.png" - seg_image = cv2.imread(seg_path, cv2.IMREAD_COLOR) - return seg_image + mask_path = os.path.join(os.path.dirname(path), "mask", os.path.basename(path) + ".png") + mask_image = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) + return mask_image @staticmethod - def load_cam_info(path): - label_path = path + ".camera_params.json" - with open(label_path, 'r') as f: - label_data = json.load(f) - cam_transform = np.asarray(label_data['cam_to_world']).reshape( - (4, 4) - ).T - + def cam_pose_transformation(cam_pose_before): offset = np.asarray([ [1, 0, 0, 0], [0, -1, 0, 0], - [0, 0, 1, 0], + [0, 0, -1, 0], [0, 0, 0, 1]]) - - cam_to_world = cam_transform @ offset - - - - f_x = label_data['f_x'] - f_y = label_data['f_y'] - c_x = label_data['c_x'] - c_y = label_data['c_y'] - cam_intrinsic = np.array([[f_x, 0, c_x], [0, f_y, c_y], [0, 0, 1]]) - + cam_pose_after = cam_pose_before @ offset + return cam_pose_after + + @staticmethod + def load_cam_info(path): + camera_params_path = os.path.join(os.path.dirname(path), "camera_params", os.path.basename(path) + ".json") + with open(camera_params_path, 'r') as f: + label_data = json.load(f) + cam_to_world = np.asarray(label_data["extrinsic"]) + cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world) + cam_intrinsic = np.asarray(label_data["intrinsic"]) return { "cam_to_world": cam_to_world, "cam_intrinsic": cam_intrinsic } - @staticmethod - def get_target_point_cloud(depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(255,255,255)): + def get_target_point_cloud(depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=255): h, w = depth.shape i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy') @@ -125,34 +83,16 @@ class DataLoadUtil: y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1] points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3) - points_camera_aug = np.concatenate([points_camera, np.ones((points_camera.shape[0], 1))], axis=-1) - - points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3] mask = mask.reshape(-1, 3) - target_mask = np.all(mask == target_mask_label, axis=-1) + target_mask = np.all(mask == target_mask_label) + target_points_camera = points_camera[target_mask] + target_points_camera_aug = np.concatenate([target_points_camera, np.ones((target_points_camera.shape[0], 1))], axis=-1) + + target_points_world = np.dot(cam_extrinsic, target_points_camera_aug.T).T[:, :3] + return { - "points_world": points_world[target_mask], - "points_camera": points_camera[target_mask] - } - - @staticmethod - def get_target_point_cloud(depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(255,255,255)): - h, w = depth.shape - i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy') - - z = depth - x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0] - y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1] - - points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3) - points_camera_aug = np.concatenate([points_camera, np.ones((points_camera.shape[0], 1))], axis=-1) - - points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3] - mask = mask.reshape(-1, 3) - target_mask = np.all(mask == target_mask_label, axis=-1) - return { - "points_world": points_world[target_mask], - "points_camera": points_camera[target_mask] + "points_world": target_points_world, + "points_camera": target_points_camera } @staticmethod