fix bug for training
This commit is contained in:
parent
a79ca7749d
commit
4c69ed777b
@ -5,5 +5,5 @@ from runners.data_spliter import DataSpliter
|
|||||||
class DataSplitApp:
|
class DataSplitApp:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def start():
|
def start():
|
||||||
DataSpliter(r"configs\split_dataset_config.yaml").run()
|
DataSpliter("configs/split_dataset_config.yaml").run()
|
||||||
|
|
@ -1,8 +1,8 @@
|
|||||||
from PytorchBoot.application import PytorchBootApplication
|
from PytorchBoot.application import PytorchBootApplication
|
||||||
from runners.strategy_generator import StrategyGenerator
|
from PytorchBoot.runners.trainer import DefaultTrainer
|
||||||
|
|
||||||
@PytorchBootApplication("train")
|
@PytorchBootApplication("train")
|
||||||
class TrainApp:
|
class TrainApp:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def start():
|
def start():
|
||||||
StrategyGenerator(r"configs\train_config.yaml").run()
|
DefaultTrainer("configs/train_config.yaml").run()
|
@ -10,13 +10,13 @@ runner:
|
|||||||
root_dir: "experiments"
|
root_dir: "experiments"
|
||||||
|
|
||||||
split:
|
split:
|
||||||
root_dir: "C:\\Document\\Local Project\\nbv_rec\\data\\sample"
|
root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes"
|
||||||
type: "unseen_instance" # "unseen_category"
|
type: "unseen_instance" # "unseen_category"
|
||||||
datasets:
|
datasets:
|
||||||
OmniObject3d_train:
|
OmniObject3d_train:
|
||||||
path: "C:\\Document\\Local Project\\nbv_rec\\data\\OmniObject3d_train.txt"
|
path: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt"
|
||||||
ratio: 0.5
|
ratio: 0.9
|
||||||
|
|
||||||
OmniObject3d_test:
|
OmniObject3d_test:
|
||||||
path: "C:\\Document\\Local Project\\nbv_rec\\data\\OmniObject3d_test.txt"
|
path: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_test.txt"
|
||||||
ratio: 0.5
|
ratio: 0.1
|
@ -18,12 +18,14 @@ runner:
|
|||||||
save_points: False
|
save_points: False
|
||||||
save_best_combined_points: True
|
save_best_combined_points: True
|
||||||
save_mesh: True
|
save_mesh: True
|
||||||
|
overwrite: False
|
||||||
dataset_list:
|
dataset_list:
|
||||||
- OmniObject3d
|
- OmniObject3d
|
||||||
|
|
||||||
datasets:
|
datasets:
|
||||||
OmniObject3d:
|
OmniObject3d:
|
||||||
root_dir: "/media/hofee/data/project/python/nbv_reconstruction/nbv_rec_visualize/data/sample"
|
#"/media/hofee/data/data/temp_output"
|
||||||
|
root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes"
|
||||||
model_dir: "/media/hofee/data/data/scaled_object_meshes"
|
model_dir: "/media/hofee/data/data/scaled_object_meshes"
|
||||||
#output_dir: "/media/hofee/data/data/label_output"
|
#output_dir: "/media/hofee/data/data/label_output"
|
||||||
|
|
||||||
|
@ -2,15 +2,16 @@
|
|||||||
runner:
|
runner:
|
||||||
general:
|
general:
|
||||||
seed: 0
|
seed: 0
|
||||||
device: cpu
|
device: cuda
|
||||||
cuda_visible_devices: "0,1,2,3,4,5,6,7"
|
cuda_visible_devices: "0,1,2,3,4,5,6,7"
|
||||||
|
parallel: False
|
||||||
|
|
||||||
experiment:
|
experiment:
|
||||||
name: debug
|
name: test_overfit
|
||||||
root_dir: "experiments"
|
root_dir: "experiments"
|
||||||
use_checkpoint: False
|
use_checkpoint: False
|
||||||
epoch: -1 # -1 stands for last epoch
|
epoch: -1 # -1 stands for last epoch
|
||||||
max_epochs: 5
|
max_epochs: 5000
|
||||||
save_checkpoint_interval: 1
|
save_checkpoint_interval: 1
|
||||||
test_first: False
|
test_first: False
|
||||||
|
|
||||||
@ -19,33 +20,42 @@ runner:
|
|||||||
type: Adam
|
type: Adam
|
||||||
lr: 0.0001
|
lr: 0.0001
|
||||||
losses:
|
losses:
|
||||||
- mse_loss
|
- gf_loss
|
||||||
dataset: OmniObject3d_train
|
dataset: OmniObject3d_train
|
||||||
test:
|
test:
|
||||||
frequency: 3 # test frequency
|
frequency: 3 # test frequency
|
||||||
dataset_list:
|
dataset_list:
|
||||||
- OmniObject3d_train
|
- OmniObject3d_test
|
||||||
|
|
||||||
pipeline: nbv_reconstruction_pipeline
|
pipeline: nbv_reconstruction_pipeline
|
||||||
|
|
||||||
datasets:
|
dataset:
|
||||||
OmniObject3d_train:
|
OmniObject3d_train:
|
||||||
root_dir: "C:\\Document\\Local Project\\nbv_rec\\data\\sample"
|
root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes"
|
||||||
split_file: "C:\\Document\\Local Project\\nbv_rec\\data\\OmniObject3d_train.txt"
|
source: nbv_reconstruction_dataset
|
||||||
|
split_file: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt"
|
||||||
ratio: 1.0
|
ratio: 1.0
|
||||||
batch_size: 1
|
batch_size: 1
|
||||||
num_workers: 12
|
num_workers: 12
|
||||||
pts_num: 2048
|
pts_num: 4096
|
||||||
|
|
||||||
OmniObject3d_test:
|
OmniObject3d_test:
|
||||||
root_dir: "C:\\Document\\Local Project\\nbv_rec\\data\\sample"
|
root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes"
|
||||||
split_file: "C:\\Document\\Local Project\\nbv_rec\\data\\OmniObject3d_test.txt"
|
source: nbv_reconstruction_dataset
|
||||||
|
split_file: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt"
|
||||||
eval_list:
|
eval_list:
|
||||||
- pose_diff
|
- pose_diff
|
||||||
ratio: 1.0
|
ratio: 0.1
|
||||||
batch_size: 1
|
batch_size: 1
|
||||||
num_workers: 1
|
num_workers: 1
|
||||||
pts_num: 2048
|
pts_num: 4096
|
||||||
|
|
||||||
|
pipeline:
|
||||||
|
nbv_reconstruction_pipeline:
|
||||||
|
pts_encoder: pointnet_encoder
|
||||||
|
seq_encoder: transformer_seq_encoder
|
||||||
|
pose_encoder: pose_encoder
|
||||||
|
view_finder: gf_view_finder
|
||||||
|
|
||||||
module:
|
module:
|
||||||
|
|
||||||
@ -58,13 +68,15 @@ module:
|
|||||||
transformer_seq_encoder:
|
transformer_seq_encoder:
|
||||||
pts_embed_dim: 1024
|
pts_embed_dim: 1024
|
||||||
pose_embed_dim: 256
|
pose_embed_dim: 256
|
||||||
num_heads: 4
|
num_heads: 2 # 4
|
||||||
ffn_dim: 256
|
ffn_dim: 128 # 256
|
||||||
num_layers: 3
|
num_layers: 2 # 3
|
||||||
max_seq_len: 30
|
output_dim: 1024 # 2048
|
||||||
output_dim: 2048
|
|
||||||
|
|
||||||
gf_view_finder:
|
gf_view_finder:
|
||||||
|
t_feat_dim: 128
|
||||||
|
pose_feat_dim: 256
|
||||||
|
main_feat_dim: 1024 # 2048
|
||||||
regression_head: Rx_Ry_and_T
|
regression_head: Rx_Ry_and_T
|
||||||
pose_mode: rot_matrix
|
pose_mode: rot_matrix
|
||||||
per_point_feature: False
|
per_point_feature: False
|
||||||
@ -74,4 +86,10 @@ module:
|
|||||||
|
|
||||||
pose_encoder:
|
pose_encoder:
|
||||||
pose_dim: 9
|
pose_dim: 9
|
||||||
output_dim: 256
|
out_dim: 256
|
||||||
|
|
||||||
|
loss_function:
|
||||||
|
gf_loss:
|
||||||
|
|
||||||
|
evaluation_method:
|
||||||
|
pose_diff:
|
@ -9,10 +9,10 @@ runner:
|
|||||||
generate:
|
generate:
|
||||||
object_dir: /media/hofee/data/data/scaled_object_meshes
|
object_dir: /media/hofee/data/data/scaled_object_meshes
|
||||||
table_model_path: /media/hofee/data/data/others/table.obj
|
table_model_path: /media/hofee/data/data/others/table.obj
|
||||||
output_dir: /media/hofee/data/data/temp_output
|
output_dir: /media/hofee/repository/nbv_reconstruction_data_512
|
||||||
binocular_vision: true
|
binocular_vision: true
|
||||||
plane_size: 10
|
plane_size: 10
|
||||||
max_views: 256
|
max_views: 512
|
||||||
min_views: 64
|
min_views: 64
|
||||||
max_diag: 0.7
|
max_diag: 0.7
|
||||||
min_diag: 0.1
|
min_diag: 0.1
|
||||||
|
112
core/dataset.py
112
core/dataset.py
@ -1,10 +1,10 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from PytorchBoot.dataset import BaseDataset
|
from PytorchBoot.dataset import BaseDataset
|
||||||
import PytorchBoot.stereotype as stereotype
|
import PytorchBoot.stereotype as stereotype
|
||||||
from torch.nn.utils.rnn import pad_sequence
|
import torch
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
sys.path.append(r"C:\Document\Local Project\nbv_rec\nbv_reconstruction")
|
sys.path.append(r"/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction")
|
||||||
|
|
||||||
from utils.data_load import DataLoadUtil
|
from utils.data_load import DataLoadUtil
|
||||||
from utils.pose import PoseUtil
|
from utils.pose import PoseUtil
|
||||||
@ -56,18 +56,25 @@ class NBVReconstructionDataset(BaseDataset):
|
|||||||
scene_name = data_item_info["scene_name"]
|
scene_name = data_item_info["scene_name"]
|
||||||
scanned_views_pts, scanned_coverages_rate, scanned_n_to_1_pose = [], [], []
|
scanned_views_pts, scanned_coverages_rate, scanned_n_to_1_pose = [], [], []
|
||||||
first_frame_idx = scanned_views[0][0]
|
first_frame_idx = scanned_views[0][0]
|
||||||
first_frame_to_world = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx))["cam_to_world"]
|
first_cam_info = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx), binocular=True)
|
||||||
|
first_frame_to_world = first_cam_info["cam_to_world"]
|
||||||
for view in scanned_views:
|
for view in scanned_views:
|
||||||
frame_idx = view[0]
|
frame_idx = view[0]
|
||||||
coverage_rate = view[1]
|
coverage_rate = view[1]
|
||||||
view_path = DataLoadUtil.get_path(self.root_dir, scene_name, frame_idx)
|
view_path = DataLoadUtil.get_path(self.root_dir, scene_name, frame_idx)
|
||||||
depth = DataLoadUtil.load_depth(view_path)
|
cam_info = DataLoadUtil.load_cam_info(view_path, binocular=True)
|
||||||
cam_info = DataLoadUtil.load_cam_info(view_path)
|
n_to_world_pose = cam_info["cam_to_world"]
|
||||||
mask = DataLoadUtil.load_seg(view_path)
|
nR_to_world_pose = cam_info["cam_to_world_R"]
|
||||||
frame_curr_to_world = cam_info["cam_to_world"]
|
n_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), n_to_world_pose)
|
||||||
n_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), frame_curr_to_world)
|
nR_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), nR_to_world_pose)
|
||||||
target_point_cloud = DataLoadUtil.get_target_point_cloud(depth, cam_info["cam_intrinsic"], n_to_1_pose, mask)["points_world"]
|
depth_L, depth_R = DataLoadUtil.load_depth(view_path, cam_info['near_plane'], cam_info['far_plane'], binocular=True)
|
||||||
downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(target_point_cloud, self.pts_num)
|
point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_info['cam_intrinsic'], n_to_1_pose)['points_world']
|
||||||
|
point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_info['cam_intrinsic'], nR_to_1_pose)['points_world']
|
||||||
|
|
||||||
|
point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
|
||||||
|
point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
|
||||||
|
overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
|
||||||
|
downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(overlap_points, self.pts_num)
|
||||||
scanned_views_pts.append(downsampled_target_point_cloud)
|
scanned_views_pts.append(downsampled_target_point_cloud)
|
||||||
scanned_coverages_rate.append(coverage_rate)
|
scanned_coverages_rate.append(coverage_rate)
|
||||||
n_to_1_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(n_to_1_pose[:3,:3]))
|
n_to_1_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(n_to_1_pose[:3,:3]))
|
||||||
@ -86,10 +93,10 @@ class NBVReconstructionDataset(BaseDataset):
|
|||||||
|
|
||||||
data_item = {
|
data_item = {
|
||||||
"scanned_pts": np.asarray(scanned_views_pts,dtype=np.float32),
|
"scanned_pts": np.asarray(scanned_views_pts,dtype=np.float32),
|
||||||
"scanned_coverage_rate": np.asarray(scanned_coverages_rate,dtype=np.float32),
|
"scanned_coverage_rate": scanned_coverages_rate,
|
||||||
"scanned_n_to_1_pose_9d": np.asarray(scanned_n_to_1_pose,dtype=np.float32),
|
"scanned_n_to_1_pose_9d": np.asarray(scanned_n_to_1_pose,dtype=np.float32),
|
||||||
"best_coverage_rate": nbv_coverage_rate,
|
"best_coverage_rate": nbv_coverage_rate,
|
||||||
"best_to_1_pose_9d": best_to_1_9d,
|
"best_to_1_pose_9d": np.asarray(best_to_1_9d,dtype=np.float32),
|
||||||
"max_coverage_rate": max_coverage_rate,
|
"max_coverage_rate": max_coverage_rate,
|
||||||
"scene_name": scene_name
|
"scene_name": scene_name
|
||||||
}
|
}
|
||||||
@ -101,23 +108,14 @@ class NBVReconstructionDataset(BaseDataset):
|
|||||||
|
|
||||||
def get_collate_fn(self):
|
def get_collate_fn(self):
|
||||||
def collate_fn(batch):
|
def collate_fn(batch):
|
||||||
scanned_pts = [item['scanned_pts'] for item in batch]
|
collate_data = {}
|
||||||
scanned_n_to_1_pose_9d = [item['scanned_n_to_1_pose_9d'] for item in batch]
|
collate_data["scanned_pts"] = [torch.tensor(item['scanned_pts']) for item in batch]
|
||||||
rest = {}
|
collate_data["scanned_n_to_1_pose_9d"] = [torch.tensor(item['scanned_n_to_1_pose_9d']) for item in batch]
|
||||||
|
collate_data["best_to_1_pose_9d"] = torch.stack([torch.tensor(item['best_to_1_pose_9d']) for item in batch])
|
||||||
for key in batch[0].keys():
|
for key in batch[0].keys():
|
||||||
if key in ['scanned_pts', 'scanned_n_to_1_pose_9d']:
|
if key not in ["scanned_pts", "scanned_n_to_1_pose_9d", "best_to_1_pose_9d"]:
|
||||||
continue
|
collate_data[key] = [item[key] for item in batch]
|
||||||
if isinstance(batch[0][key], torch.Tensor):
|
return collate_data
|
||||||
rest[key] = torch.stack([item[key] for item in batch])
|
|
||||||
elif isinstance(batch[0][key], str):
|
|
||||||
rest[key] = [item[key] for item in batch]
|
|
||||||
else:
|
|
||||||
rest[key] = [item[key] for item in batch]
|
|
||||||
return {
|
|
||||||
'scanned_pts': scanned_pts,
|
|
||||||
'scanned_n_to_1_pose_9d': scanned_n_to_1_pose_9d,
|
|
||||||
**rest
|
|
||||||
}
|
|
||||||
return collate_fn
|
return collate_fn
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@ -126,36 +124,48 @@ if __name__ == "__main__":
|
|||||||
torch.manual_seed(seed)
|
torch.manual_seed(seed)
|
||||||
np.random.seed(seed)
|
np.random.seed(seed)
|
||||||
config = {
|
config = {
|
||||||
"root_dir": "C:\\Document\\Local Project\\nbv_rec\\data\\sample",
|
"root_dir": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes",
|
||||||
"split_file": "C:\\Document\\Local Project\\nbv_rec\\data\\OmniObject3d_train.txt",
|
"split_file": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt",
|
||||||
"ratio": 0.5,
|
"ratio": 0.5,
|
||||||
"batch_size": 2,
|
"batch_size": 2,
|
||||||
"num_workers": 0,
|
"num_workers": 0,
|
||||||
"pts_num": 2048
|
"pts_num": 32684
|
||||||
}
|
}
|
||||||
ds = NBVReconstructionDataset(config)
|
ds = NBVReconstructionDataset(config)
|
||||||
print(len(ds))
|
print(len(ds))
|
||||||
|
#ds.__getitem__(10)
|
||||||
dl = ds.get_loader(shuffle=True)
|
dl = ds.get_loader(shuffle=True)
|
||||||
for idx, data in enumerate(dl):
|
for idx, data in enumerate(dl):
|
||||||
cnt=0
|
data = ds.process_batch(data, "cuda:0")
|
||||||
print(data["scene_name"])
|
print(data)
|
||||||
print(data["scanned_coverage_rate"])
|
break
|
||||||
print(data["best_coverage_rate"])
|
#
|
||||||
for pts in data["scanned_pts"][0]:
|
# for idx, data in enumerate(dl):
|
||||||
#np.savetxt(f"pts_{cnt}.txt", pts)
|
# cnt=0
|
||||||
cnt+=1
|
# print(data["scene_name"])
|
||||||
#np.savetxt("best_pts.txt", best_pts)
|
# print(data["scanned_coverage_rate"])
|
||||||
for key, value in data.items():
|
# print(data["best_coverage_rate"])
|
||||||
if isinstance(value, torch.Tensor):
|
# for pts in data["scanned_pts"][0]:
|
||||||
print(key, ":" ,value.shape)
|
# #np.savetxt(f"pts_{cnt}.txt", pts)
|
||||||
else:
|
# cnt+=1
|
||||||
print(key, ":" ,len(value))
|
# #np.savetxt("best_pts.txt", best_pts)
|
||||||
if key == "scanned_n_to_1_pose_9d":
|
# for key, value in data.items():
|
||||||
for val in value:
|
# if isinstance(value, torch.Tensor):
|
||||||
print(val.shape)
|
# print(key, ":" ,value.shape)
|
||||||
if key == "scanned_pts":
|
# else:
|
||||||
for val in value:
|
# print(key, ":" ,len(value))
|
||||||
print(val.shape)
|
# if key == "scanned_n_to_1_pose_9d":
|
||||||
|
# for val in value:
|
||||||
|
# print(val.shape)
|
||||||
|
# if key == "scanned_pts":
|
||||||
|
# print("scanned_pts")
|
||||||
|
# for val in value:
|
||||||
|
# print(val.shape)
|
||||||
|
# cnt = 0
|
||||||
|
# for v in val:
|
||||||
|
# import ipdb;ipdb.set_trace()
|
||||||
|
# np.savetxt(f"pts_{cnt}.txt", v)
|
||||||
|
# cnt+=1
|
||||||
|
|
||||||
|
|
||||||
print()
|
# print()
|
@ -14,12 +14,11 @@ class NBVReconstructionPipeline(nn.Module):
|
|||||||
self.pose_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, config["pose_encoder"])
|
self.pose_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, config["pose_encoder"])
|
||||||
self.seq_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, config["seq_encoder"])
|
self.seq_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, config["seq_encoder"])
|
||||||
self.view_finder = ComponentFactory.create(namespace.Stereotype.MODULE, config["view_finder"])
|
self.view_finder = ComponentFactory.create(namespace.Stereotype.MODULE, config["view_finder"])
|
||||||
|
self.eps = 1e-5
|
||||||
|
|
||||||
def forward(self, data):
|
def forward(self, data):
|
||||||
mode = data["mode"]
|
mode = data["mode"]
|
||||||
# ----- Debug Trace ----- #
|
|
||||||
import ipdb; ipdb.set_trace()
|
|
||||||
# ------------------------ #
|
|
||||||
if mode == namespace.Mode.TRAIN:
|
if mode == namespace.Mode.TRAIN:
|
||||||
return self.forward_train(data)
|
return self.forward_train(data)
|
||||||
elif mode == namespace.Mode.TEST:
|
elif mode == namespace.Mode.TEST:
|
||||||
@ -27,29 +26,22 @@ class NBVReconstructionPipeline(nn.Module):
|
|||||||
else:
|
else:
|
||||||
Log.error("Unknown mode: {}".format(mode), True)
|
Log.error("Unknown mode: {}".format(mode), True)
|
||||||
|
|
||||||
def pertube_data(self, gt_delta_rot_6d):
|
def pertube_data(self, gt_delta_9d):
|
||||||
bs = gt_delta_rot_6d.shape[0]
|
bs = gt_delta_9d.shape[0]
|
||||||
random_t = torch.rand(bs, device=self.device) * (1. - self.eps) + self.eps
|
random_t = torch.rand(bs, device=gt_delta_9d.device) * (1. - self.eps) + self.eps
|
||||||
random_t = random_t.unsqueeze(-1)
|
random_t = random_t.unsqueeze(-1)
|
||||||
mu, std = self.view_finder.marginal_prob(gt_delta_rot_6d, random_t)
|
mu, std = self.view_finder.marginal_prob(gt_delta_9d, random_t)
|
||||||
std = std.view(-1, 1)
|
std = std.view(-1, 1)
|
||||||
z = torch.randn_like(gt_delta_rot_6d)
|
z = torch.randn_like(gt_delta_9d)
|
||||||
perturbed_x = mu + z * std
|
perturbed_x = mu + z * std
|
||||||
target_score = - z * std / (std ** 2)
|
target_score = - z * std / (std ** 2)
|
||||||
return perturbed_x, random_t, target_score, std
|
return perturbed_x, random_t, target_score, std
|
||||||
|
|
||||||
def forward_train(self, data):
|
def forward_train(self, data):
|
||||||
pts_list = data['pts_list']
|
seq_feat = self.get_seq_feat(data)
|
||||||
pose_list = data['pose_list']
|
|
||||||
gt_rot_6d = data["nbv_cam_pose"]
|
|
||||||
pts_feat_list = []
|
|
||||||
pose_feat_list = []
|
|
||||||
for pts,pose in zip(pts_list,pose_list):
|
|
||||||
pts_feat_list.append(self.pts_encoder.encode_points(pts))
|
|
||||||
pose_feat_list.append(self.pose_encoder.encode_pose(pose))
|
|
||||||
seq_feat = self.seq_encoder.encode_sequence(pts_feat_list, pose_feat_list)
|
|
||||||
''' get std '''
|
''' get std '''
|
||||||
perturbed_x, random_t, target_score, std = self.pertube_data(gt_rot_6d)
|
best_to_1_pose_9d_batch = data["best_to_1_pose_9d"]
|
||||||
|
perturbed_x, random_t, target_score, std = self.pertube_data(best_to_1_pose_9d_batch)
|
||||||
input_data = {
|
input_data = {
|
||||||
"sampled_pose": perturbed_x,
|
"sampled_pose": perturbed_x,
|
||||||
"t": random_t,
|
"t": random_t,
|
||||||
@ -64,14 +56,7 @@ class NBVReconstructionPipeline(nn.Module):
|
|||||||
return output
|
return output
|
||||||
|
|
||||||
def forward_test(self,data):
|
def forward_test(self,data):
|
||||||
pts_list = data['pts_list']
|
seq_feat = self.get_seq_feat(data)
|
||||||
pose_list = data['pose_list']
|
|
||||||
pts_feat_list = []
|
|
||||||
pose_feat_list = []
|
|
||||||
for pts,pose in zip(pts_list,pose_list):
|
|
||||||
pts_feat_list.append(self.pts_encoder.encode_points(pts))
|
|
||||||
pose_feat_list.append(self.pose_encoder.encode_pose(pose))
|
|
||||||
seq_feat = self.seq_encoder.encode_sequence(pts_feat_list, pose_feat_list)
|
|
||||||
estimated_delta_rot_9d, in_process_sample = self.view_finder.next_best_view(seq_feat)
|
estimated_delta_rot_9d, in_process_sample = self.view_finder.next_best_view(seq_feat)
|
||||||
result = {
|
result = {
|
||||||
"pred_pose_9d": estimated_delta_rot_9d,
|
"pred_pose_9d": estimated_delta_rot_9d,
|
||||||
@ -79,4 +64,19 @@ class NBVReconstructionPipeline(nn.Module):
|
|||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def get_seq_feat(self, data):
|
||||||
|
scanned_pts_batch = data['scanned_pts']
|
||||||
|
scanned_n_to_1_pose_9d_batch = data['scanned_n_to_1_pose_9d']
|
||||||
|
best_to_1_pose_9d_batch = data["best_to_1_pose_9d"]
|
||||||
|
pts_feat_seq_list = []
|
||||||
|
pose_feat_seq_list = []
|
||||||
|
|
||||||
|
for scanned_pts,scanned_n_to_1_pose_9d in zip(scanned_pts_batch,scanned_n_to_1_pose_9d_batch):
|
||||||
|
print(scanned_n_to_1_pose_9d.shape)
|
||||||
|
scanned_pts = scanned_pts.to(best_to_1_pose_9d_batch.device)
|
||||||
|
scanned_n_to_1_pose_9d = scanned_n_to_1_pose_9d.to(best_to_1_pose_9d_batch.device)
|
||||||
|
pts_feat_seq_list.append(self.pts_encoder.encode_points(scanned_pts))
|
||||||
|
pose_feat_seq_list.append(self.pose_encoder.encode_pose(scanned_n_to_1_pose_9d))
|
||||||
|
seq_feat = self.seq_encoder.encode_sequence(pts_feat_seq_list, pose_feat_seq_list)
|
||||||
|
return seq_feat
|
||||||
|
|
||||||
|
@ -33,19 +33,22 @@ class GradientFieldViewFinder(nn.Module):
|
|||||||
pose_dim = PoseUtil.get_pose_dim(self.pose_mode)
|
pose_dim = PoseUtil.get_pose_dim(self.pose_mode)
|
||||||
self.prior_fn, self.marginal_prob_fn, self.sde_fn, self.sampling_eps, self.T = flib.init_sde(config["sde_mode"])
|
self.prior_fn, self.marginal_prob_fn, self.sde_fn, self.sampling_eps, self.T = flib.init_sde(config["sde_mode"])
|
||||||
self.sampling_steps = config["sampling_steps"]
|
self.sampling_steps = config["sampling_steps"]
|
||||||
|
self.t_feat_dim = config["t_feat_dim"]
|
||||||
|
self.pose_feat_dim = config["pose_feat_dim"]
|
||||||
|
self.main_feat_dim = config["main_feat_dim"]
|
||||||
|
|
||||||
''' encode pose '''
|
''' encode pose '''
|
||||||
self.pose_encoder = nn.Sequential(
|
self.pose_encoder = nn.Sequential(
|
||||||
nn.Linear(pose_dim, 256),
|
nn.Linear(pose_dim, self.pose_feat_dim ),
|
||||||
self.act,
|
self.act,
|
||||||
nn.Linear(256, 256),
|
nn.Linear(self.pose_feat_dim , self.pose_feat_dim ),
|
||||||
self.act,
|
self.act,
|
||||||
)
|
)
|
||||||
|
|
||||||
''' encode t '''
|
''' encode t '''
|
||||||
self.t_encoder = nn.Sequential(
|
self.t_encoder = nn.Sequential(
|
||||||
mlib.GaussianFourierProjection(embed_dim=128),
|
mlib.GaussianFourierProjection(embed_dim=self.t_feat_dim ),
|
||||||
nn.Linear(128, 128),
|
nn.Linear(self.t_feat_dim , self.t_feat_dim ),
|
||||||
self.act,
|
self.act,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -56,18 +59,18 @@ class GradientFieldViewFinder(nn.Module):
|
|||||||
if not self.per_point_feature:
|
if not self.per_point_feature:
|
||||||
''' rotation_x_axis regress head '''
|
''' rotation_x_axis regress head '''
|
||||||
self.fusion_tail_rot_x = nn.Sequential(
|
self.fusion_tail_rot_x = nn.Sequential(
|
||||||
nn.Linear(128 + 256 + 2048, 256),
|
nn.Linear(self.t_feat_dim + self.pose_feat_dim + self.main_feat_dim, 256),
|
||||||
self.act,
|
self.act,
|
||||||
zero_module(nn.Linear(256, 3)),
|
zero_module(nn.Linear(256, 3)),
|
||||||
)
|
)
|
||||||
self.fusion_tail_rot_y = nn.Sequential(
|
self.fusion_tail_rot_y = nn.Sequential(
|
||||||
nn.Linear(128 + 256 + 2048, 256),
|
nn.Linear(self.t_feat_dim + self.pose_feat_dim + self.main_feat_dim, 256),
|
||||||
self.act,
|
self.act,
|
||||||
zero_module(nn.Linear(256, 3)),
|
zero_module(nn.Linear(256, 3)),
|
||||||
)
|
)
|
||||||
''' tranalation regress head '''
|
''' tranalation regress head '''
|
||||||
self.fusion_tail_trans = nn.Sequential(
|
self.fusion_tail_trans = nn.Sequential(
|
||||||
nn.Linear(128 + 256 + 2048, 256),
|
nn.Linear(self.t_feat_dim + self.pose_feat_dim + self.main_feat_dim, 256),
|
||||||
self.act,
|
self.act,
|
||||||
zero_module(nn.Linear(256, 3)),
|
zero_module(nn.Linear(256, 3)),
|
||||||
)
|
)
|
||||||
|
@ -54,6 +54,7 @@ class PointNetEncoder(nn.Module):
|
|||||||
|
|
||||||
def encode_points(self, pts):
|
def encode_points(self, pts):
|
||||||
pts = pts.transpose(2, 1)
|
pts = pts.transpose(2, 1)
|
||||||
|
|
||||||
if not self.global_feat:
|
if not self.global_feat:
|
||||||
pts_feature = self(pts).transpose(2, 1)
|
pts_feature = self(pts).transpose(2, 1)
|
||||||
else:
|
else:
|
||||||
@ -98,11 +99,24 @@ class STNkd(nn.Module):
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
sim_data = Variable(torch.rand(32, 2500, 3))
|
sim_data = Variable(torch.rand(32, 2500, 3))
|
||||||
|
config = {
|
||||||
pointnet_global = PointNetEncoder(global_feat=True)
|
"in_dim": 3,
|
||||||
|
"out_dim": 1024,
|
||||||
|
"global_feat": True,
|
||||||
|
"feature_transform": False
|
||||||
|
}
|
||||||
|
pointnet_global = PointNetEncoder(config)
|
||||||
out = pointnet_global.encode_points(sim_data)
|
out = pointnet_global.encode_points(sim_data)
|
||||||
|
|
||||||
print("global feat", out.size())
|
print("global feat", out.size())
|
||||||
|
|
||||||
pointnet = PointNetEncoder(global_feat=False)
|
config = {
|
||||||
|
"in_dim": 3,
|
||||||
|
"out_dim": 1024,
|
||||||
|
"global_feat": False,
|
||||||
|
"feature_transform": False
|
||||||
|
}
|
||||||
|
|
||||||
|
pointnet = PointNetEncoder(config)
|
||||||
out = pointnet.encode_points(sim_data)
|
out = pointnet.encode_points(sim_data)
|
||||||
print("point feat", out.size())
|
print("point feat", out.size())
|
||||||
|
@ -38,7 +38,7 @@ class TransformerSequenceEncoder(nn.Module):
|
|||||||
|
|
||||||
# Prepare mask for padding
|
# Prepare mask for padding
|
||||||
max_len = max(lengths)
|
max_len = max(lengths)
|
||||||
padding_mask = torch.tensor([([0] * length + [1] * (max_len - length)) for length in lengths], dtype=torch.bool)
|
padding_mask = torch.tensor([([0] * length + [1] * (max_len - length)) for length in lengths], dtype=torch.bool).to(combined_tensor.device)
|
||||||
# Transformer encoding
|
# Transformer encoding
|
||||||
transformer_output = self.transformer_encoder(combined_tensor, src_key_padding_mask=padding_mask)
|
transformer_output = self.transformer_encoder(combined_tensor, src_key_padding_mask=padding_mask)
|
||||||
|
|
||||||
|
@ -26,6 +26,7 @@ class StrategyGenerator(Runner):
|
|||||||
self.save_best_combined_pts = ConfigManager.get("runner", "generate", "save_best_combined_points")
|
self.save_best_combined_pts = ConfigManager.get("runner", "generate", "save_best_combined_points")
|
||||||
self.save_mesh = ConfigManager.get("runner", "generate", "save_mesh")
|
self.save_mesh = ConfigManager.get("runner", "generate", "save_mesh")
|
||||||
self.filter_degree = ConfigManager.get("runner", "generate", "filter_degree")
|
self.filter_degree = ConfigManager.get("runner", "generate", "filter_degree")
|
||||||
|
self.overwrite = ConfigManager.get("runner", "generate", "overwrite")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -44,6 +45,14 @@ class StrategyGenerator(Runner):
|
|||||||
for scene_name in scene_name_list:
|
for scene_name in scene_name_list:
|
||||||
Log.info(f"({dataset_name})Processing [{cnt}/{total}]: {scene_name}")
|
Log.info(f"({dataset_name})Processing [{cnt}/{total}]: {scene_name}")
|
||||||
status_manager.set_progress("generate", "strategy_generator", "scene", cnt, total)
|
status_manager.set_progress("generate", "strategy_generator", "scene", cnt, total)
|
||||||
|
diag = DataLoadUtil.get_bbox_diag(model_dir, scene_name)
|
||||||
|
voxel_threshold = diag*0.02
|
||||||
|
status_manager.set_status("generate", "strategy_generator", "voxel_threshold", voxel_threshold)
|
||||||
|
output_label_path = DataLoadUtil.get_label_path(root_dir, scene_name)
|
||||||
|
if os.path.exists(output_label_path) and not self.overwrite:
|
||||||
|
Log.info(f"Scene <{scene_name}> Already Exists, Skip")
|
||||||
|
cnt += 1
|
||||||
|
continue
|
||||||
self.generate_sequence(root_dir, model_dir, scene_name,voxel_threshold, overlap_threshold)
|
self.generate_sequence(root_dir, model_dir, scene_name,voxel_threshold, overlap_threshold)
|
||||||
cnt += 1
|
cnt += 1
|
||||||
status_manager.set_progress("generate", "strategy_generator", "scene", total, total)
|
status_manager.set_progress("generate", "strategy_generator", "scene", total, total)
|
||||||
|
@ -45,6 +45,15 @@ class DataLoadUtil:
|
|||||||
mesh.apply_transform(world_object_pose)
|
mesh.apply_transform(world_object_pose)
|
||||||
return mesh
|
return mesh
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_bbox_diag(model_dir, object_name):
|
||||||
|
model_path = os.path.join(model_dir, object_name, "mesh.obj")
|
||||||
|
mesh = trimesh.load(model_path)
|
||||||
|
bbox = mesh.bounding_box.extents
|
||||||
|
diagonal_length = np.linalg.norm(bbox)
|
||||||
|
return diagonal_length
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose):
|
def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose):
|
||||||
mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose)
|
mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose)
|
||||||
@ -193,6 +202,24 @@ class DataLoadUtil:
|
|||||||
"points_camera": target_points_camera
|
"points_camera": target_points_camera
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_point_cloud(depth, cam_intrinsic, cam_extrinsic):
|
||||||
|
h, w = depth.shape
|
||||||
|
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy')
|
||||||
|
|
||||||
|
z = depth
|
||||||
|
x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
|
||||||
|
y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
|
||||||
|
|
||||||
|
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
|
||||||
|
points_camera_aug = np.concatenate([points_camera, np.ones((points_camera.shape[0], 1))], axis=-1)
|
||||||
|
|
||||||
|
points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
|
||||||
|
return {
|
||||||
|
"points_world": points_world,
|
||||||
|
"points_camera": points_camera
|
||||||
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_target_point_cloud_world_from_path(path, binocular=False, random_downsample_N=65536, voxel_size = 0.005, target_mask_label=(0,255,0,255)):
|
def get_target_point_cloud_world_from_path(path, binocular=False, random_downsample_N=65536, voxel_size = 0.005, target_mask_label=(0,255,0,255)):
|
||||||
cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular)
|
cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular)
|
||||||
|
@ -5,7 +5,6 @@ class PtsUtil:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def voxel_downsample_point_cloud(point_cloud, voxel_size=0.005):
|
def voxel_downsample_point_cloud(point_cloud, voxel_size=0.005):
|
||||||
print("voxel_size: ", voxel_size)
|
|
||||||
o3d_pc = o3d.geometry.PointCloud()
|
o3d_pc = o3d.geometry.PointCloud()
|
||||||
o3d_pc.points = o3d.utility.Vector3dVector(point_cloud)
|
o3d_pc.points = o3d.utility.Vector3dVector(point_cloud)
|
||||||
downsampled_pc = o3d_pc.voxel_down_sample(voxel_size)
|
downsampled_pc = o3d_pc.voxel_down_sample(voxel_size)
|
||||||
|
@ -6,7 +6,6 @@ class ReconstructionUtil:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def compute_coverage_rate(target_point_cloud, combined_point_cloud, threshold=0.01):
|
def compute_coverage_rate(target_point_cloud, combined_point_cloud, threshold=0.01):
|
||||||
print("threshold", threshold)
|
|
||||||
kdtree = cKDTree(combined_point_cloud)
|
kdtree = cKDTree(combined_point_cloud)
|
||||||
distances, _ = kdtree.query(target_point_cloud)
|
distances, _ = kdtree.query(target_point_cloud)
|
||||||
covered_points = np.sum(distances < threshold)
|
covered_points = np.sum(distances < threshold)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user