This commit is contained in:
hofee 2024-09-27 08:06:55 +00:00
commit a358dd98a9
5 changed files with 81 additions and 51 deletions

View File

@ -6,24 +6,24 @@ runner:
cuda_visible_devices: "0,1,2,3,4,5,6,7" cuda_visible_devices: "0,1,2,3,4,5,6,7"
experiment: experiment:
name: local_full_eval name: w_gf_wo_lf_full
root_dir: "experiments" root_dir: "experiments"
epoch: 20 # -1 stands for last epoch epoch: 1 # -1 stands for last epoch
test: test:
dataset_list: dataset_list:
- OmniObject3d_train - OmniObject3d_train
blender_script_path: "/media/hofee/data/project/python/nbv_reconstruction/blender/data_renderer.py" blender_script_path: "/media/hofee/data/project/python/nbv_reconstruction/blender/data_renderer.py"
output_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/inference_result_full" output_dir: "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/test/inference_global_full_on_testset"
pipeline: nbv_reconstruction_pipeline pipeline: nbv_reconstruction_global_pts_pipeline
dataset: dataset:
OmniObject3d_train: OmniObject3d_train:
root_dir: "/media/hofee/repository/nbv_reconstruction_data_512" root_dir: "/media/hofee/repository/nbv_reconstruction_data_512"
model_dir: "/media/hofee/data/data/scaled_object_meshes" model_dir: "/media/hofee/data/data/scaled_object_meshes"
source: seq_nbv_reconstruction_dataset source: seq_nbv_reconstruction_dataset
split_file: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt" split_file: "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/test/test_set_list.txt"
type: test type: test
filter_degree: 75 filter_degree: 75
ratio: 1 ratio: 1
@ -33,11 +33,25 @@ dataset:
load_from_preprocess: False load_from_preprocess: False
pipeline: pipeline:
nbv_reconstruction_pipeline: nbv_reconstruction_local_pts_pipeline:
pts_encoder: pointnet_encoder modules:
seq_encoder: transformer_seq_encoder pts_encoder: pointnet_encoder
pose_encoder: pose_encoder seq_encoder: transformer_seq_encoder
view_finder: gf_view_finder pose_encoder: pose_encoder
view_finder: gf_view_finder
eps: 1e-5
global_scanned_feat: False
nbv_reconstruction_global_pts_pipeline:
modules:
pts_encoder: pointnet_encoder
pose_seq_encoder: transformer_pose_seq_encoder
pose_encoder: pose_encoder
view_finder: gf_view_finder
eps: 1e-5
global_scanned_feat: True
module: module:
@ -55,6 +69,13 @@ module:
num_layers: 3 num_layers: 3
output_dim: 2048 output_dim: 2048
transformer_pose_seq_encoder:
pose_embed_dim: 256
num_heads: 4
ffn_dim: 256
num_layers: 3
output_dim: 1024
gf_view_finder: gf_view_finder:
t_feat_dim: 128 t_feat_dim: 128
pose_feat_dim: 256 pose_feat_dim: 256

View File

@ -73,7 +73,6 @@ class NBVReconstructionGlobalPointsPipeline(nn.Module):
device = next(self.parameters()).device device = next(self.parameters()).device
pts_feat_seq_list = []
pose_feat_seq_list = [] pose_feat_seq_list = []
for scanned_n_to_world_pose_9d in scanned_n_to_world_pose_9d_batch: for scanned_n_to_world_pose_9d in scanned_n_to_world_pose_9d_batch:
@ -82,10 +81,10 @@ class NBVReconstructionGlobalPointsPipeline(nn.Module):
main_feat = self.pose_seq_encoder.encode_sequence(pose_feat_seq_list) main_feat = self.pose_seq_encoder.encode_sequence(pose_feat_seq_list)
if self.enable_global_scanned_feat:
combined_scanned_pts_batch = data['combined_scanned_pts'] combined_scanned_pts_batch = data['combined_scanned_pts']
global_scanned_feat = self.pts_encoder.encode_points(combined_scanned_pts_batch) global_scanned_feat = self.pts_encoder.encode_points(combined_scanned_pts_batch)
main_feat = torch.cat([main_feat, global_scanned_feat], dim=-1) main_feat = torch.cat([main_feat, global_scanned_feat], dim=-1)
if torch.isnan(main_feat).any(): if torch.isnan(main_feat).any():

View File

@ -39,42 +39,32 @@ class SeqNBVReconstructionDataset(BaseDataset):
scene_name_list.append(scene_name) scene_name_list.append(scene_name)
return scene_name_list return scene_name_list
def get_datalist_new(self):
datalist = []
for scene_name in self.scene_name_list:
label_num = DataLoadUtil.get_label_num(self.root_dir, scene_name)
for i in range(label_num):
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, i)
label_data = DataLoadUtil.load_label(label_path)
best_seq = label_data["best_sequence"]
max_coverage_rate = label_data["max_coverage_rate"]
first_frame = best_seq[0]
best_seq_len = len(best_seq)
datalist.append({
"scene_name": scene_name,
"first_frame": first_frame,
"max_coverage_rate": max_coverage_rate,
"best_seq_len": best_seq_len,
"label_idx": i,
})
return datalist
def get_datalist(self): def get_datalist(self):
datalist = [] datalist = []
for scene_name in self.scene_name_list: for scene_name in self.scene_name_list:
label_path = DataLoadUtil.get_label_path_old(self.root_dir, scene_name) seq_num = DataLoadUtil.get_label_num(self.root_dir, scene_name)
scene_max_coverage_rate = 0
scene_max_cr_idx = 0
for seq_idx in range(seq_num):
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, seq_idx)
label_data = DataLoadUtil.load_label(label_path)
max_coverage_rate = label_data["max_coverage_rate"]
if max_coverage_rate > scene_max_coverage_rate:
scene_max_coverage_rate = max_coverage_rate
scene_max_cr_idx = seq_idx
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, scene_max_cr_idx)
label_data = DataLoadUtil.load_label(label_path) label_data = DataLoadUtil.load_label(label_path)
best_seq = label_data["best_sequence"] first_frame = label_data["best_sequence"][0]
max_coverage_rate = label_data["max_coverage_rate"] best_seq_len = len(label_data["best_sequence"])
first_frame = best_seq[0]
best_seq_len = len(best_seq)
datalist.append({ datalist.append({
"scene_name": scene_name, "scene_name": scene_name,
"first_frame": first_frame, "first_frame": first_frame,
"max_coverage_rate": max_coverage_rate, "max_coverage_rate": scene_max_coverage_rate,
"best_seq_len": best_seq_len, "best_seq_len": best_seq_len,
"best_seq": best_seq, "label_idx": scene_max_cr_idx,
}) })
return datalist return datalist
def __getitem__(self, index): def __getitem__(self, index):
@ -110,8 +100,10 @@ class SeqNBVReconstructionDataset(BaseDataset):
first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose) first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose)
scene_path = os.path.join(self.root_dir, scene_name) scene_path = os.path.join(self.root_dir, scene_name)
model_points_normals = DataLoadUtil.load_points_normals(self.root_dir, scene_name) model_points_normals = DataLoadUtil.load_points_normals(self.root_dir, scene_name)
data_item = { data_item = {
"first_pts": np.asarray([first_downsampled_target_point_cloud],dtype=np.float32), "first_pts": np.asarray([first_downsampled_target_point_cloud],dtype=np.float32),
"combined_scanned_pts": np.asarray(first_downsampled_target_point_cloud,dtype=np.float32),
"first_to_world_9d": np.asarray([first_to_world_9d],dtype=np.float32), "first_to_world_9d": np.asarray([first_to_world_9d],dtype=np.float32),
"scene_name": scene_name, "scene_name": scene_name,
"max_coverage_rate": max_coverage_rate, "max_coverage_rate": max_coverage_rate,
@ -134,8 +126,9 @@ class SeqNBVReconstructionDataset(BaseDataset):
collate_data = {} collate_data = {}
collate_data["first_pts"] = [torch.tensor(item['first_pts']) for item in batch] collate_data["first_pts"] = [torch.tensor(item['first_pts']) for item in batch]
collate_data["first_to_world_9d"] = [torch.tensor(item['first_to_world_9d']) for item in batch] collate_data["first_to_world_9d"] = [torch.tensor(item['first_to_world_9d']) for item in batch]
collate_data["combined_scanned_pts"] = torch.stack([torch.tensor(item['combined_scanned_pts']) for item in batch])
for key in batch[0].keys(): for key in batch[0].keys():
if key not in ["first_pts", "first_to_world_9d"]: if key not in ["first_pts", "first_to_world_9d", "combined_scanned_pts"]:
collate_data[key] = [item[key] for item in batch] collate_data[key] = [item[key] for item in batch]
return collate_data return collate_data
return collate_fn return collate_fn

View File

@ -20,7 +20,7 @@ from PytorchBoot.runners.runner import Runner
from PytorchBoot.utils import Log from PytorchBoot.utils import Log
from PytorchBoot.status import status_manager from PytorchBoot.status import status_manager
@stereotype.runner("inferencer", comment="not tested") @stereotype.runner("inferencer")
class Inferencer(Runner): class Inferencer(Runner):
def __init__(self, config_path): def __init__(self, config_path):
super().__init__(config_path) super().__init__(config_path)
@ -34,6 +34,7 @@ class Inferencer(Runner):
''' Experiment ''' ''' Experiment '''
self.load_experiment("nbv_evaluator") self.load_experiment("nbv_evaluator")
self.stat_result = {}
''' Test ''' ''' Test '''
self.test_config = ConfigManager.get(namespace.Stereotype.RUNNER, namespace.Mode.TEST) self.test_config = ConfigManager.get(namespace.Stereotype.RUNNER, namespace.Mode.TEST)
@ -103,9 +104,9 @@ class Inferencer(Runner):
input_data["scanned_pts"] = [data["first_pts"][0].to(self.device)] input_data["scanned_pts"] = [data["first_pts"][0].to(self.device)]
input_data["scanned_n_to_world_pose_9d"] = [data["first_to_world_9d"][0].to(self.device)] input_data["scanned_n_to_world_pose_9d"] = [data["first_to_world_9d"][0].to(self.device)]
input_data["mode"] = namespace.Mode.TEST input_data["mode"] = namespace.Mode.TEST
input_data["combined_scanned_pts"] = data["combined_scanned_pts"]
input_pts_N = input_data["scanned_pts"][0].shape[1] input_pts_N = input_data["scanned_pts"][0].shape[1]
first_frame_target_pts, _ = RenderUtil.render_pts(first_frame_to_world, scene_path, self.script_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose) first_frame_target_pts, _ = RenderUtil.render_pts(first_frame_to_world, scene_path, self.script_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose)
scanned_view_pts = [first_frame_target_pts] scanned_view_pts = [first_frame_target_pts]
last_pred_cr = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold) last_pred_cr = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold)
@ -138,7 +139,7 @@ class Inferencer(Runner):
print(pred_cr, last_pred_cr, " max: ", data["max_coverage_rate"]) print(pred_cr, last_pred_cr, " max: ", data["max_coverage_rate"])
if pred_cr >= data["max_coverage_rate"]: if pred_cr >= data["max_coverage_rate"]:
break print("max coverage rate reached!")
if pred_cr <= last_pred_cr + cr_increase_threshold: if pred_cr <= last_pred_cr + cr_increase_threshold:
retry += 1 retry += 1
retry_duplication_pose.append(pred_pose.cpu().numpy().tolist()) retry_duplication_pose.append(pred_pose.cpu().numpy().tolist())
@ -155,6 +156,11 @@ class Inferencer(Runner):
input_data["scanned_pts"] = [torch.cat([input_data["scanned_pts"][0] , new_pts_tensor], dim=0)] input_data["scanned_pts"] = [torch.cat([input_data["scanned_pts"][0] , new_pts_tensor], dim=0)]
input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], pred_pose_9d], dim=0)] input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], pred_pose_9d], dim=0)]
combined_scanned_views_pts = np.concatenate(input_data["scanned_pts"][0].tolist(), axis=0)
voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_views_pts, 0.002)
random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, input_pts_N)
input_data["combined_scanned_pts"] = torch.tensor(random_downsampled_combined_scanned_pts_np, dtype=torch.float32).unsqueeze(0).to(self.device)
last_pred_cr = pred_cr last_pred_cr = pred_cr
@ -173,6 +179,15 @@ class Inferencer(Runner):
"retry_duplication_pose": retry_duplication_pose, "retry_duplication_pose": retry_duplication_pose,
"best_seq_len": data["best_seq_len"][0], "best_seq_len": data["best_seq_len"][0],
} }
self.stat_result[scene_name] = {
"max_coverage_rate": data["max_coverage_rate"][0],
"success_rate": max(pred_cr_seq)/ data["max_coverage_rate"][0],
"coverage_rate_seq": pred_cr_seq,
"pred_max_coverage_rate": max(pred_cr_seq),
"pred_seq_len": len(pred_cr_seq),
}
print('success rate: ', max(pred_cr_seq) / data["max_coverage_rate"][0])
return result return result
def compute_coverage_rate(self, scanned_view_pts, new_pts, model_pts, threshold=0.005): def compute_coverage_rate(self, scanned_view_pts, new_pts, model_pts, threshold=0.005):
@ -191,6 +206,8 @@ class Inferencer(Runner):
os.makedirs(dataset_dir) os.makedirs(dataset_dir)
output_path = os.path.join(dataset_dir, f"{scene_name}.pkl") output_path = os.path.join(dataset_dir, f"{scene_name}.pkl")
pickle.dump(output, open(output_path, "wb")) pickle.dump(output, open(output_path, "wb"))
with open(os.path.join(dataset_dir, "stat.json"), "w") as f:
json.dump(self.stat_result, f)
def get_checkpoint_path(self, is_last=False): def get_checkpoint_path(self, is_last=False):

View File

@ -34,7 +34,7 @@ class RenderUtil:
return None return None
path = os.path.join(temp_dir, "tmp") path = os.path.join(temp_dir, "tmp")
# ------ Debug Start ------ # ------ Debug Start ------
import ipdb;ipdb.set_trace() # import ipdb;ipdb.set_trace()
# ------ Debug End ------ # ------ Debug End ------
point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True) point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True)
cam_params = DataLoadUtil.load_cam_info(path, binocular=True) cam_params = DataLoadUtil.load_cam_info(path, binocular=True)