define display table as world space origin

This commit is contained in:
hofee 2024-09-19 11:29:43 +00:00
parent 8d5d6d5df4
commit bb75372f7e
4 changed files with 34 additions and 17 deletions

View File

@ -7,7 +7,7 @@ runner:
parallel: False parallel: False
experiment: experiment:
name: new_test_overfit_2 name: new_test_overfit_to_world
root_dir: "experiments" root_dir: "experiments"
use_checkpoint: False use_checkpoint: False
epoch: -1 # -1 stands for last epoch epoch: -1 # -1 stands for last epoch
@ -38,8 +38,8 @@ dataset:
type: train type: train
cache: True cache: True
ratio: 1 ratio: 1
batch_size: 128 batch_size: 160
num_workers: 12 num_workers: 16
pts_num: 4096 pts_num: 4096
OmniObject3d_test: OmniObject3d_test:

View File

@ -17,6 +17,7 @@ from utils.reconstruction import ReconstructionUtil
@stereotype.dataset("nbv_reconstruction_dataset") @stereotype.dataset("nbv_reconstruction_dataset")
class NBVReconstructionDataset(BaseDataset): class NBVReconstructionDataset(BaseDataset):
DISPLAY_TABLE_POSITION = np.asarray([0,0,0.85])
def __init__(self, config): def __init__(self, config):
super(NBVReconstructionDataset, self).__init__(config) super(NBVReconstructionDataset, self).__init__(config)
self.config = config self.config = config
@ -37,6 +38,8 @@ class NBVReconstructionDataset(BaseDataset):
expr_root = ConfigManager.get("runner", "experiment", "root_dir") expr_root = ConfigManager.get("runner", "experiment", "root_dir")
expr_name = ConfigManager.get("runner", "experiment", "name") expr_name = ConfigManager.get("runner", "experiment", "name")
self.cache_dir = os.path.join(expr_root, expr_name, "cache") self.cache_dir = os.path.join(expr_root, expr_name, "cache")
#self.preprocess_cache()
def load_scene_name_list(self): def load_scene_name_list(self):
@ -65,9 +68,15 @@ class NBVReconstructionDataset(BaseDataset):
} }
) )
return datalist return datalist
def preprocess_cache(self):
Log.info("preprocessing cache...")
for item_idx in range(len(self.datalist)):
self.__getitem__(item_idx)
Log.success("finish preprocessing cache.")
def load_from_cache(self, scene_name, first_frame_idx, curr_frame_idx): def load_from_cache(self, scene_name, curr_frame_idx):
cache_name = f"{scene_name}_{first_frame_idx}_{curr_frame_idx}.txt" cache_name = f"{scene_name}_{curr_frame_idx}.txt"
cache_path = os.path.join(self.cache_dir, cache_name) cache_path = os.path.join(self.cache_dir, cache_name)
if os.path.exists(cache_path): if os.path.exists(cache_path):
data = np.loadtxt(cache_path) data = np.loadtxt(cache_path)
@ -75,8 +84,8 @@ class NBVReconstructionDataset(BaseDataset):
else: else:
return None return None
def save_to_cache(self, scene_name, first_frame_idx, curr_frame_idx, data): def save_to_cache(self, scene_name, curr_frame_idx, data):
cache_name = f"{scene_name}_{first_frame_idx}_{curr_frame_idx}.txt" cache_name = f"{scene_name}_{curr_frame_idx}.txt"
cache_path = os.path.join(self.cache_dir, cache_name) cache_path = os.path.join(self.cache_dir, cache_name)
try: try:
np.savetxt(cache_path, data) np.savetxt(cache_path, data)
@ -106,7 +115,7 @@ class NBVReconstructionDataset(BaseDataset):
cached_data = None cached_data = None
if self.cache: if self.cache:
cached_data = self.load_from_cache(scene_name, first_frame_idx, frame_idx) cached_data = self.load_from_cache(scene_name, frame_idx)
if cached_data is None: if cached_data is None:
depth_L, depth_R = DataLoadUtil.load_depth(view_path, cam_info['near_plane'], cam_info['far_plane'], binocular=True) depth_L, depth_R = DataLoadUtil.load_depth(view_path, cam_info['near_plane'], cam_info['far_plane'], binocular=True)
@ -118,7 +127,7 @@ class NBVReconstructionDataset(BaseDataset):
overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R) overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(overlap_points, self.pts_num) downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(overlap_points, self.pts_num)
if self.cache: if self.cache:
self.save_to_cache(scene_name, first_frame_idx, frame_idx, downsampled_target_point_cloud) self.save_to_cache(scene_name, frame_idx, downsampled_target_point_cloud)
else: else:
downsampled_target_point_cloud = cached_data downsampled_target_point_cloud = cached_data
@ -137,7 +146,6 @@ class NBVReconstructionDataset(BaseDataset):
best_to_world_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(best_frame_to_world[:3,:3])) best_to_world_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(best_frame_to_world[:3,:3]))
best_to_world_trans = best_frame_to_world[:3,3] best_to_world_trans = best_frame_to_world[:3,3]
best_to_world_9d = np.concatenate([best_to_world_6d, best_to_world_trans], axis=0) best_to_world_9d = np.concatenate([best_to_world_6d, best_to_world_trans], axis=0)
data_item = { data_item = {
"scanned_pts": np.asarray(scanned_views_pts,dtype=np.float32), "scanned_pts": np.asarray(scanned_views_pts,dtype=np.float32),
"scanned_coverage_rate": scanned_coverages_rate, "scanned_coverage_rate": scanned_coverages_rate,
@ -147,6 +155,8 @@ class NBVReconstructionDataset(BaseDataset):
"max_coverage_rate": max_coverage_rate, "max_coverage_rate": max_coverage_rate,
"scene_name": scene_name "scene_name": scene_name
} }
if self.type == namespace.Mode.TEST: if self.type == namespace.Mode.TEST:
diag = DataLoadUtil.get_bbox_diag(self.model_dir, scene_name) diag = DataLoadUtil.get_bbox_diag(self.model_dir, scene_name)
voxel_threshold = diag*0.02 voxel_threshold = diag*0.02

View File

@ -98,7 +98,7 @@ class Inferencer(Runner):
''' data for inference ''' ''' data for inference '''
input_data = {} input_data = {}
input_data["scanned_pts"] = [data["first_pts"][0].to(self.device)] input_data["scanned_pts"] = [data["first_pts"][0].to(self.device)]
input_data["scanned_n_to_1_pose_9d"] = [data["first_to_first_9d"][0].to(self.device)] input_data["scanned_n_to_world_pose_9d"] = [data["first_to_first_9d"][0].to(self.device)]
input_data["mode"] = namespace.Mode.TEST input_data["mode"] = namespace.Mode.TEST
input_pts_N = input_data["scanned_pts"][0].shape[1] input_pts_N = input_data["scanned_pts"][0].shape[1]
@ -141,7 +141,7 @@ class Inferencer(Runner):
new_pts_tensor = torch.tensor(new_pts, dtype=torch.float32).unsqueeze(0).to(self.device) new_pts_tensor = torch.tensor(new_pts, dtype=torch.float32).unsqueeze(0).to(self.device)
input_data["scanned_pts"] = [torch.cat([input_data["scanned_pts"][0] , new_pts_tensor], dim=0)] input_data["scanned_pts"] = [torch.cat([input_data["scanned_pts"][0] , new_pts_tensor], dim=0)]
input_data["scanned_n_to_1_pose_9d"] = [torch.cat([input_data["scanned_n_to_1_pose_9d"][0], next_pose_9d], dim=0)] input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], next_pose_9d], dim=0)]
last_pred_cr = pred_cr last_pred_cr = pred_cr
# ------ Debug Start ------ # ------ Debug Start ------
@ -150,9 +150,9 @@ class Inferencer(Runner):
input_data["scanned_pts"] = input_data["scanned_pts"][0].cpu().numpy().tolist() input_data["scanned_pts"] = input_data["scanned_pts"][0].cpu().numpy().tolist()
input_data["scanned_n_to_1_pose_9d"] = input_data["scanned_n_to_1_pose_9d"][0].cpu().numpy().tolist() input_data["scanned_n_to_world_pose_9d"] = input_data["scanned_n_to_world_pose_9d"][0].cpu().numpy().tolist()
result = { result = {
"pred_pose_9d_seq": input_data["scanned_n_to_1_pose_9d"], "pred_pose_9d_seq": input_data["scanned_n_to_world_pose_9d"],
"pts_seq": input_data["scanned_pts"], "pts_seq": input_data["scanned_pts"],
"target_pts_seq": scanned_view_pts, "target_pts_seq": scanned_view_pts,
"coverage_rate_seq": pred_cr_seq, "coverage_rate_seq": pred_cr_seq,

View File

@ -6,7 +6,7 @@ import trimesh
from utils.pts import PtsUtil from utils.pts import PtsUtil
class DataLoadUtil: class DataLoadUtil:
DISPLAY_TABLE_POSITION = np.asarray([0,0,0.85])
@staticmethod @staticmethod
def get_path(root, scene_name, frame_idx): def get_path(root, scene_name, frame_idx):
path = os.path.join(root, scene_name, f"{frame_idx}") path = os.path.join(root, scene_name, f"{frame_idx}")
@ -160,12 +160,16 @@ class DataLoadUtil:
return cam_pose_after return cam_pose_after
@staticmethod @staticmethod
def load_cam_info(path, binocular=False): def load_cam_info(path, binocular=False, display_table_as_world_space_origin=True):
camera_params_path = os.path.join(os.path.dirname(path), "camera_params", os.path.basename(path) + ".json") camera_params_path = os.path.join(os.path.dirname(path), "camera_params", os.path.basename(path) + ".json")
with open(camera_params_path, 'r') as f: with open(camera_params_path, 'r') as f:
label_data = json.load(f) label_data = json.load(f)
cam_to_world = np.asarray(label_data["extrinsic"]) cam_to_world = np.asarray(label_data["extrinsic"])
cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world) cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
world_to_display_table = np.eye(4)
world_to_display_table[:3, 3] = - DataLoadUtil.DISPLAY_TABLE_POSITION
if display_table_as_world_space_origin:
cam_to_world = np.dot(world_to_display_table, cam_to_world)
cam_intrinsic = np.asarray(label_data["intrinsic"]) cam_intrinsic = np.asarray(label_data["intrinsic"])
cam_info = { cam_info = {
"cam_to_world": cam_to_world, "cam_to_world": cam_to_world,
@ -176,10 +180,13 @@ class DataLoadUtil:
if binocular: if binocular:
cam_to_world_R = np.asarray(label_data["extrinsic_R"]) cam_to_world_R = np.asarray(label_data["extrinsic_R"])
cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R) cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R)
cam_info["cam_to_world_R"] = cam_to_world_R
cam_to_world_O = np.asarray(label_data["extrinsic_cam_object"]) cam_to_world_O = np.asarray(label_data["extrinsic_cam_object"])
cam_to_world_O = DataLoadUtil.cam_pose_transformation(cam_to_world_O) cam_to_world_O = DataLoadUtil.cam_pose_transformation(cam_to_world_O)
if display_table_as_world_space_origin:
cam_to_world_O = np.dot(world_to_display_table, cam_to_world_O)
cam_to_world_R = np.dot(world_to_display_table, cam_to_world_R)
cam_info["cam_to_world_O"] = cam_to_world_O cam_info["cam_to_world_O"] = cam_to_world_O
cam_info["cam_to_world_R"] = cam_to_world_R
return cam_info return cam_info
@staticmethod @staticmethod