update
This commit is contained in:
parent
d9d2716ba7
commit
3ab046b134
2
.gitignore
vendored
2
.gitignore
vendored
@ -3,7 +3,7 @@
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
test_output/
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
|
@ -10,7 +10,8 @@ runner:
|
||||
root_dir: "experiments"
|
||||
|
||||
generate:
|
||||
model_dir: "/home/yan20/nbv_rec/data/test_CAD/test_model"
|
||||
model_dir: "/home/user/nbv_rec/data/models"
|
||||
table_model_path: "/home/user/nbv_rec/data/table.obj"
|
||||
model_start_idx: 0
|
||||
voxel_size: 0.005
|
||||
max_view: 512
|
||||
@ -19,6 +20,19 @@ runner:
|
||||
min_diag: 0.01
|
||||
random_view_ratio: 0
|
||||
min_cam_table_included_degree: 20
|
||||
obj_name: "bear"
|
||||
light_and_camera_config:
|
||||
Camera:
|
||||
near_plane: 0.01
|
||||
far_plane: 5
|
||||
fov_vertical: 25
|
||||
resolution: [1280,800]
|
||||
eye_distance: 0.15
|
||||
eye_angle: 25
|
||||
Light:
|
||||
location: [0,0,3.5]
|
||||
orientation: [0,0,0]
|
||||
power: 150
|
||||
|
||||
reconstruct:
|
||||
soft_overlap_threshold: 0.3
|
||||
|
@ -1,5 +1,7 @@
|
||||
import os
|
||||
import trimesh
|
||||
import tempfile
|
||||
import subprocess
|
||||
import numpy as np
|
||||
from PytorchBoot.runners.runner import Runner
|
||||
from PytorchBoot.config import ConfigManager
|
||||
@ -7,16 +9,12 @@ import PytorchBoot.stereotype as stereotype
|
||||
from PytorchBoot.utils.log_util import Log
|
||||
from PytorchBoot.status import status_manager
|
||||
|
||||
|
||||
import sys
|
||||
|
||||
sys.path.append(r"C:\Document\Local Project\nbv_rec_control")
|
||||
#from utils.control_util import ControlUtil
|
||||
from utils.control_util import ControlUtil
|
||||
from utils.communicate_util import CommunicateUtil
|
||||
from utils.pts_util import PtsUtil
|
||||
from utils.view_sample_util import ViewSampleUtil
|
||||
from utils.reconstruction_util import ReconstructionUtil
|
||||
|
||||
from utils.preprocess_util import save_scene_data
|
||||
from utils.data_load import DataLoadUtil
|
||||
|
||||
@stereotype.runner("CAD_strategy_runner")
|
||||
class CADStrategyRunner(Runner):
|
||||
@ -50,79 +48,111 @@ class CADStrategyRunner(Runner):
|
||||
def load_experiment(self, backup_name=None):
|
||||
super().load_experiment(backup_name)
|
||||
|
||||
def get_pts_from_view_data(self, view_data):
|
||||
depth = view_data["depth_image"]
|
||||
depth_intrinsics = view_data["depth_intrinsics"]
|
||||
depth_extrinsics = view_data["depth_extrinsics"]
|
||||
cam_pts = PtsUtil.get_pts_from_depth(depth, depth_intrinsics, depth_extrinsics)
|
||||
return cam_pts
|
||||
|
||||
def split_scan_pts_and_obj_pts(self, world_pts, scan_pts_z, z_threshold = 0.003):
|
||||
scan_pts = world_pts[scan_pts_z < z_threshold]
|
||||
obj_pts = world_pts[scan_pts_z >= z_threshold]
|
||||
return scan_pts, obj_pts
|
||||
|
||||
def run_one_model(self, model_name):
|
||||
|
||||
''' init robot '''
|
||||
ControlUtil.init()
|
||||
#ControlUtil.init()
|
||||
''' load CAD model '''
|
||||
model_path = os.path.join(self.model_dir, model_name)
|
||||
model_path = os.path.join(self.model_dir, model_name,"mesh.obj")
|
||||
cad_model = trimesh.load(model_path)
|
||||
''' take first view '''
|
||||
view_data = CommunicateUtil.get_view_data()
|
||||
first_cam_pts = None
|
||||
#view_data = CommunicateUtil.get_view_data(init=True)
|
||||
#first_cam_pts = self.get_pts_from_view_data(view_data)
|
||||
''' register '''
|
||||
cad_to_cam = PtsUtil.register_icp(first_cam_pts, cad_model)
|
||||
cam_to_world = ControlUtil.get_pose()
|
||||
cad_to_world = cam_to_world @ cad_to_cam
|
||||
cad_model:trimesh.Trimesh = cad_model.apply_transform(cad_to_world)
|
||||
''' sample view '''
|
||||
min_corner = cad_model.bounds[0]
|
||||
max_corner = cad_model.bounds[1]
|
||||
diag = np.linalg.norm(max_corner - min_corner)
|
||||
view_num = int(self.min_view + (diag - self.min_diag)/(self.max_diag - self.min_diag) * (self.max_view - self.min_view))
|
||||
sampled_view_data = ViewSampleUtil.sample_view_data_world_space(
|
||||
cad_model, cad_to_world,
|
||||
voxel_size = self.voxel_size,
|
||||
max_views = view_num,
|
||||
min_cam_table_included_degree= self.min_cam_table_included_degree,
|
||||
random_view_ratio = self.random_view_ratio
|
||||
)
|
||||
cam_to_world_poses = sampled_view_data["cam_to_world_poses"]
|
||||
world_model_points = sampled_view_data["voxel_down_sampled_points"]
|
||||
#cad_to_cam = PtsUtil.register(first_cam_pts, cad_model)
|
||||
#cam_to_world = ControlUtil.get_pose()
|
||||
cad_to_world = np.eye(4) #cam_to_world @ cad_to_cam
|
||||
|
||||
|
||||
world_to_blender_world = np.eye(4)
|
||||
world_to_blender_world[:3, 3] = np.asarray([0, 0, 0.9215])
|
||||
cad_to_blender_world = np.dot(world_to_blender_world, cad_to_world)
|
||||
cad_model:trimesh.Trimesh = cad_model.apply_transform(cad_to_blender_world)
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
name = "cad_model_world"
|
||||
|
||||
cad_model.export(os.path.join(temp_dir, f"{name}.obj"))
|
||||
temp_dir = "/home/user/nbv_rec/nbv_rec_control/test_output"
|
||||
scene_dir = os.path.join(temp_dir, name)
|
||||
script_path = "/home/user/nbv_rec/blender_app/data_generator.py"
|
||||
''' sample view '''
|
||||
# import ipdb; ipdb.set_trace()
|
||||
# print("start running renderer")
|
||||
# result = subprocess.run([
|
||||
# 'blender', '-b', '-P', script_path, '--', temp_dir
|
||||
# ], capture_output=True, text=True)
|
||||
# print("finish running renderer")
|
||||
#
|
||||
|
||||
world_model_points = np.loadtxt(os.path.join(scene_dir, "points_and_normals.txt"))[:,:3]
|
||||
''' preprocess '''
|
||||
# save_scene_data(temp_dir, name)
|
||||
|
||||
pts_dir = os.path.join(temp_dir,name,"pts")
|
||||
sample_view_pts_list = []
|
||||
scan_points_idx_list = []
|
||||
frame_num = len(os.listdir(pts_dir))
|
||||
|
||||
for frame_idx in range(frame_num):
|
||||
pts_path = os.path.join(temp_dir,name, "pts", f"{frame_idx}.txt")
|
||||
idx_path = os.path.join(temp_dir,name, "scan_points_indices", f"{frame_idx}.txt")
|
||||
point_cloud = np.loadtxt(pts_path)
|
||||
sampled_point_cloud = PtsUtil.voxel_downsample_point_cloud(point_cloud, self.voxel_size)
|
||||
indices = np.loadtxt(idx_path, dtype=np.int32)
|
||||
try:
|
||||
len(indices)
|
||||
except:
|
||||
indices = np.array([indices])
|
||||
sample_view_pts_list.append(sampled_point_cloud)
|
||||
scan_points_idx_list.append(indices)
|
||||
|
||||
''' take sample view '''
|
||||
scan_points_idx_list = []
|
||||
sample_view_pts_list = []
|
||||
for cam_to_world in cam_to_world_poses:
|
||||
ControlUtil.move_to(cam_to_world)
|
||||
''' get world pts '''
|
||||
view_data = CommunicateUtil.get_view_data()
|
||||
cam_pts = None
|
||||
scan_points_idx = None
|
||||
world_pts = PtsUtil.transform_point_cloud(cam_pts, cam_to_world)
|
||||
sample_view_pts_list.append(world_pts)
|
||||
scan_points_idx_list.append(scan_points_idx)
|
||||
''' generate strategy '''
|
||||
|
||||
''' generate strategy '''
|
||||
limited_useful_view, _, _ = ReconstructionUtil.compute_next_best_view_sequence_with_overlap(
|
||||
world_model_points, sample_view_pts_list,
|
||||
scan_points_indices_list = scan_points_idx_list,
|
||||
init_view=0,
|
||||
threshold=self.voxel_size,
|
||||
soft_overlap_threshold = self.soft_overlap_threshold,
|
||||
hard_overlap_threshold = self.hard_overlap_threshold,
|
||||
scan_points_threshold = self.scan_points_threshold,
|
||||
status_info=self.status_info
|
||||
)
|
||||
|
||||
''' extract cam_to_world sequence '''
|
||||
cam_to_world_seq = []
|
||||
coveraget_rate_seq = []
|
||||
|
||||
for idx, coverage_rate in limited_useful_view:
|
||||
cam_to_world_seq.append(cam_to_world_poses[idx])
|
||||
coveraget_rate_seq.append(coverage_rate)
|
||||
limited_useful_view, _, _ = ReconstructionUtil.compute_next_best_view_sequence_with_overlap(
|
||||
world_model_points, sample_view_pts_list,
|
||||
scan_points_indices_list = scan_points_idx_list,
|
||||
init_view=0,
|
||||
threshold=self.voxel_size,
|
||||
soft_overlap_threshold = self.soft_overlap_threshold,
|
||||
hard_overlap_threshold = self.hard_overlap_threshold,
|
||||
scan_points_threshold = self.scan_points_threshold,
|
||||
status_info=self.status_info
|
||||
)
|
||||
|
||||
''' extract cam_to_world sequence '''
|
||||
cam_to_world_seq = []
|
||||
coveraget_rate_seq = []
|
||||
|
||||
''' take best seq view '''
|
||||
for cam_to_world in cam_to_world_seq:
|
||||
ControlUtil.move_to(cam_to_world)
|
||||
''' get world pts '''
|
||||
view_data = CommunicateUtil.get_view_data()
|
||||
cam_pts = None
|
||||
scan_points_idx = None
|
||||
world_pts = PtsUtil.transform_point_cloud(cam_pts, cam_to_world)
|
||||
sample_view_pts_list.append(world_pts)
|
||||
scan_points_idx_list.append(scan_points_idx)
|
||||
from ipdb import set_trace; set_trace()
|
||||
for idx, coverage_rate in limited_useful_view:
|
||||
path = DataLoadUtil.get_path(temp_dir, name, idx)
|
||||
cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
|
||||
cam_to_world_seq.append(cam_info["cam_to_world"])
|
||||
coveraget_rate_seq.append(coverage_rate)
|
||||
|
||||
# ''' take best seq view '''
|
||||
# for cam_to_world in cam_to_world_seq:
|
||||
# ControlUtil.move_to(cam_to_world)
|
||||
# ''' get world pts '''
|
||||
# view_data = CommunicateUtil.get_view_data()
|
||||
# cam_pts = self.get_pts_from_view_data(view_data)
|
||||
# scan_points_idx = None
|
||||
# world_pts = PtsUtil.transform_point_cloud(cam_pts, cam_to_world)
|
||||
# sample_view_pts_list.append(world_pts)
|
||||
# scan_points_idx_list.append(scan_points_idx)
|
||||
|
||||
|
||||
def run(self):
|
||||
@ -159,32 +189,5 @@ if __name__ == "__main__":
|
||||
# cad_pts_L = PtsUtil.transform_point_cloud(noisy_pts_L, cad_to_cam_L)
|
||||
# np.savetxt(r"test.txt", cad_pts_L)
|
||||
# np.savetxt(r"src.txt", noisy_pts_L)
|
||||
|
||||
''' test view sample '''
|
||||
|
||||
sampled_view_data = ViewSampleUtil.sample_view_data_world_space(
|
||||
model, np.eye(4),
|
||||
voxel_size = 0.005,
|
||||
max_views = 20,
|
||||
min_cam_table_included_degree= 20,
|
||||
random_view_ratio = 0
|
||||
)
|
||||
cam_poses = sampled_view_data["cam_to_world_poses"]
|
||||
cam_poses = np.array(cam_poses)
|
||||
print(cam_poses.shape)
|
||||
def sample_camera_direction(cam_pose, num_samples, sample_distance):
|
||||
cam_position = cam_pose[:3, 3]
|
||||
cam_direction = cam_pose[:3, 2]
|
||||
sampled_points = np.array([cam_position - (i + 1) * sample_distance * cam_direction for i in range(num_samples)])
|
||||
return sampled_points
|
||||
|
||||
all_sampled_points = []
|
||||
for i in range(cam_poses.shape[0]):
|
||||
samped_points = sample_camera_direction(cam_poses[i], 10, 0.02)
|
||||
all_sampled_points.append(samped_points)
|
||||
|
||||
all_sampled_points = np.concatenate(all_sampled_points, axis=0)
|
||||
np.savetxt(r"cam_poses.txt", cam_poses[:, :3, 3])
|
||||
np.savetxt(r"cam_directions.txt", all_sampled_points)
|
||||
|
||||
|
||||
|
@ -1,87 +0,0 @@
|
||||
import os
|
||||
import trimesh
|
||||
import numpy as np
|
||||
from PytorchBoot.runners.runner import Runner
|
||||
from PytorchBoot.config import ConfigManager
|
||||
import PytorchBoot.stereotype as stereotype
|
||||
from PytorchBoot.utils.log_util import Log
|
||||
import PytorchBoot.namespace as namespace
|
||||
from PytorchBoot.status import status_manager
|
||||
|
||||
from utils.control_util import ControlUtil
|
||||
from utils.communicate_util import CommunicateUtil
|
||||
from utils.pts_util import PtsUtil
|
||||
from utils.view_sample_util import ViewSampleUtil
|
||||
from utils.reconstruction_util import ReconstructionUtil
|
||||
|
||||
|
||||
@stereotype.runner("inferencer")
|
||||
class Inferencer(Runner):
|
||||
|
||||
def __init__(self, config_path: str):
|
||||
super().__init__(config_path)
|
||||
self.load_experiment("inferencer")
|
||||
self.reconstruct_config = ConfigManager.get("runner", "reconstruct")
|
||||
self.voxel_size = self.reconstruct_config["voxel_size"]
|
||||
self.max_iter = self.reconstruct_config["max_iter"]
|
||||
|
||||
def create_experiment(self, backup_name=None):
|
||||
super().create_experiment(backup_name)
|
||||
|
||||
def load_experiment(self, backup_name=None):
|
||||
super().load_experiment(backup_name)
|
||||
|
||||
def run_inference(self, model_name):
|
||||
|
||||
''' init robot '''
|
||||
ControlUtil.init()
|
||||
''' take first view '''
|
||||
view_data = CommunicateUtil.get_view_data()
|
||||
first_cam_pts = None
|
||||
first_cam_pose = None
|
||||
combined_pts = first_cam_pts
|
||||
input_data = {
|
||||
"scanned_target_points_num": [first_cam_pts.shape[0]],
|
||||
"scanned_n_to_world_pose_9d": [first_cam_pose],
|
||||
"combined_scanned_pts": combined_pts
|
||||
}
|
||||
''' enter loop '''
|
||||
iter = 0
|
||||
while True:
|
||||
''' inference '''
|
||||
inference_result = CommunicateUtil.get_inference_data(input_data)
|
||||
cam_to_world = inference_result["cam_to_world"]
|
||||
''' set pose '''
|
||||
ControlUtil.set_pose(cam_to_world)
|
||||
''' take view '''
|
||||
view_data = CommunicateUtil.get_view_data()
|
||||
curr_cam_pts = None
|
||||
curr_cam_pose = None
|
||||
''' update combined pts '''
|
||||
combined_pts = np.concatenate([combined_pts, curr_cam_pts], axis=0)
|
||||
combined_pts = PtsUtil.voxel_downsample_point_cloud(combined_pts, voxel_size=self.voxel_size)
|
||||
''' update input data '''
|
||||
input_data["combined_scanned_pts"] = combined_pts
|
||||
input_data["scanned_target_points_num"].append(curr_cam_pts.shape[0])
|
||||
input_data["scanned_n_to_world_pose_9d"].append(curr_cam_pose)
|
||||
|
||||
''' check stop condition '''
|
||||
if iter >= self.max_iter:
|
||||
break
|
||||
|
||||
|
||||
def run(self):
|
||||
self.run_inference()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
model_path = "/home/yan20/nbv_rec/data/test_CAD/test_model/bear_scaled.ply"
|
||||
model = trimesh.load(model_path)
|
||||
test_pts_L = np.loadtxt("/home/yan20/nbv_rec/data/test_CAD/cam_pts_0_L.txt")
|
||||
test_pts_R = np.loadtxt("/home/yan20/nbv_rec/data/test_CAD/cam_pts_0_R.txt")
|
||||
cam_to_world_L = PtsUtil.register_icp(test_pts_L, model)
|
||||
cam_to_world_R = PtsUtil.register_icp(test_pts_R, model)
|
||||
print(cam_to_world_L)
|
||||
print("================================")
|
||||
print(cam_to_world_R)
|
||||
|
@ -1,10 +1,20 @@
|
||||
import requests
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
|
||||
class CommunicateUtil:
|
||||
VIEW_HOST = "127.0.0.1:5000"
|
||||
INFERENCE_HOST = "127.0.0.1:5000"
|
||||
|
||||
def get_view_data() -> dict:
|
||||
data = {}
|
||||
def get_view_data(init = False) -> dict:
|
||||
params = {}
|
||||
params["create_scanner"] = init
|
||||
response = requests.get(f"http://{CommunicateUtil.VIEW_HOST}/api/data", json=params)
|
||||
data = response.json()
|
||||
if not data["success"]:
|
||||
print(f"Failed to get view data")
|
||||
return None
|
||||
return data
|
||||
|
||||
def get_inference_data(view_data:dict) -> dict:
|
||||
|
410
utils/data_load.py
Normal file
410
utils/data_load.py
Normal file
@ -0,0 +1,410 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import json
|
||||
import cv2
|
||||
import trimesh
|
||||
import torch
|
||||
from utils.pts_util import PtsUtil
|
||||
|
||||
|
||||
class DataLoadUtil:
|
||||
TABLE_POSITION = np.asarray([0, 0, 0.8215])
|
||||
|
||||
@staticmethod
|
||||
def get_display_table_info(root, scene_name):
|
||||
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
|
||||
display_table_info = scene_info["display_table"]
|
||||
return display_table_info
|
||||
|
||||
@staticmethod
|
||||
def get_display_table_top(root, scene_name):
|
||||
display_table_height = DataLoadUtil.get_display_table_info(root, scene_name)[
|
||||
"height"
|
||||
]
|
||||
display_table_top = DataLoadUtil.TABLE_POSITION + np.asarray(
|
||||
[0, 0, display_table_height]
|
||||
)
|
||||
return display_table_top
|
||||
|
||||
@staticmethod
|
||||
def get_path(root, scene_name, frame_idx):
|
||||
path = os.path.join(root, scene_name, f"{frame_idx}")
|
||||
return path
|
||||
|
||||
@staticmethod
|
||||
def get_label_num(root, scene_name):
|
||||
label_dir = os.path.join(root, scene_name, "label")
|
||||
return len(os.listdir(label_dir))
|
||||
|
||||
@staticmethod
|
||||
def get_label_path(root, scene_name, seq_idx):
|
||||
label_dir = os.path.join(root, scene_name, "label")
|
||||
if not os.path.exists(label_dir):
|
||||
os.makedirs(label_dir)
|
||||
path = os.path.join(label_dir, f"{seq_idx}.json")
|
||||
return path
|
||||
|
||||
@staticmethod
|
||||
def get_label_path_old(root, scene_name):
|
||||
path = os.path.join(root, scene_name, "label.json")
|
||||
return path
|
||||
|
||||
@staticmethod
|
||||
def get_scene_seq_length(root, scene_name):
|
||||
camera_params_path = os.path.join(root, scene_name, "camera_params")
|
||||
return len(os.listdir(camera_params_path))
|
||||
|
||||
@staticmethod
|
||||
def load_mesh_at(model_dir, object_name, world_object_pose):
|
||||
model_path = os.path.join(model_dir, object_name, "mesh.obj")
|
||||
mesh = trimesh.load(model_path)
|
||||
mesh.apply_transform(world_object_pose)
|
||||
return mesh
|
||||
|
||||
@staticmethod
|
||||
def get_bbox_diag(model_dir, object_name):
|
||||
model_path = os.path.join(model_dir, object_name, "mesh.obj")
|
||||
mesh = trimesh.load(model_path)
|
||||
bbox = mesh.bounding_box.extents
|
||||
diagonal_length = np.linalg.norm(bbox)
|
||||
return diagonal_length
|
||||
|
||||
@staticmethod
|
||||
def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose):
|
||||
mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose)
|
||||
model_path = os.path.join(output_dir, scene_name, "world_mesh.obj")
|
||||
mesh.export(model_path)
|
||||
|
||||
@staticmethod
|
||||
def save_target_mesh_at_world_space(
|
||||
root, model_dir, scene_name, display_table_as_world_space_origin=True
|
||||
):
|
||||
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
|
||||
target_name = scene_info["target_name"]
|
||||
transformation = scene_info[target_name]
|
||||
if display_table_as_world_space_origin:
|
||||
location = transformation["location"] - DataLoadUtil.get_display_table_top(
|
||||
root, scene_name
|
||||
)
|
||||
else:
|
||||
location = transformation["location"]
|
||||
rotation_euler = transformation["rotation_euler"]
|
||||
pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
|
||||
pose_mat[:3, 3] = location
|
||||
|
||||
mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat)
|
||||
mesh_dir = os.path.join(root, scene_name, "mesh")
|
||||
if not os.path.exists(mesh_dir):
|
||||
os.makedirs(mesh_dir)
|
||||
model_path = os.path.join(mesh_dir, "world_target_mesh.obj")
|
||||
mesh.export(model_path)
|
||||
|
||||
@staticmethod
|
||||
def load_scene_info(root, scene_name):
|
||||
scene_info_path = os.path.join(root, scene_name, "scene_info.json")
|
||||
with open(scene_info_path, "r") as f:
|
||||
scene_info = json.load(f)
|
||||
return scene_info
|
||||
|
||||
@staticmethod
|
||||
def load_target_pts_num_dict(root, scene_name):
|
||||
target_pts_num_path = os.path.join(root, scene_name, "target_pts_num.json")
|
||||
with open(target_pts_num_path, "r") as f:
|
||||
target_pts_num_dict = json.load(f)
|
||||
return target_pts_num_dict
|
||||
|
||||
@staticmethod
|
||||
def load_target_object_pose(root, scene_name):
|
||||
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
|
||||
target_name = scene_info["target_name"]
|
||||
transformation = scene_info[target_name]
|
||||
location = transformation["location"]
|
||||
rotation_euler = transformation["rotation_euler"]
|
||||
pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
|
||||
pose_mat[:3, 3] = location
|
||||
return pose_mat
|
||||
|
||||
@staticmethod
|
||||
def load_depth(path, min_depth=0.01, max_depth=5.0, binocular=False):
|
||||
|
||||
def load_depth_from_real_path(real_path, min_depth, max_depth):
|
||||
depth = cv2.imread(real_path, cv2.IMREAD_UNCHANGED)
|
||||
depth = depth.astype(np.float32) / 65535.0
|
||||
min_depth = min_depth
|
||||
max_depth = max_depth
|
||||
depth_meters = min_depth + (max_depth - min_depth) * depth
|
||||
return depth_meters
|
||||
|
||||
if binocular:
|
||||
depth_path_L = os.path.join(
|
||||
os.path.dirname(path), "depth", os.path.basename(path) + "_L.png"
|
||||
)
|
||||
depth_path_R = os.path.join(
|
||||
os.path.dirname(path), "depth", os.path.basename(path) + "_R.png"
|
||||
)
|
||||
depth_meters_L = load_depth_from_real_path(
|
||||
depth_path_L, min_depth, max_depth
|
||||
)
|
||||
depth_meters_R = load_depth_from_real_path(
|
||||
depth_path_R, min_depth, max_depth
|
||||
)
|
||||
return depth_meters_L, depth_meters_R
|
||||
else:
|
||||
depth_path = os.path.join(
|
||||
os.path.dirname(path), "depth", os.path.basename(path) + ".png"
|
||||
)
|
||||
depth_meters = load_depth_from_real_path(depth_path, min_depth, max_depth)
|
||||
return depth_meters
|
||||
|
||||
@staticmethod
|
||||
def load_seg(path, binocular=False, left_only=False):
|
||||
if binocular and not left_only:
|
||||
|
||||
def clean_mask(mask_image):
|
||||
green = [0, 255, 0, 255]
|
||||
red = [255, 0, 0, 255]
|
||||
threshold = 2
|
||||
mask_image = np.where(
|
||||
np.abs(mask_image - green) <= threshold, green, mask_image
|
||||
)
|
||||
mask_image = np.where(
|
||||
np.abs(mask_image - red) <= threshold, red, mask_image
|
||||
)
|
||||
return mask_image
|
||||
|
||||
mask_path_L = os.path.join(
|
||||
os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
|
||||
)
|
||||
mask_image_L = clean_mask(cv2.imread(mask_path_L, cv2.IMREAD_UNCHANGED))
|
||||
mask_path_R = os.path.join(
|
||||
os.path.dirname(path), "mask", os.path.basename(path) + "_R.png"
|
||||
)
|
||||
mask_image_R = clean_mask(cv2.imread(mask_path_R, cv2.IMREAD_UNCHANGED))
|
||||
return mask_image_L, mask_image_R
|
||||
else:
|
||||
if binocular and left_only:
|
||||
mask_path = os.path.join(
|
||||
os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
|
||||
)
|
||||
else:
|
||||
mask_path = os.path.join(
|
||||
os.path.dirname(path), "mask", os.path.basename(path) + ".png"
|
||||
)
|
||||
mask_image = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)
|
||||
return mask_image
|
||||
|
||||
@staticmethod
|
||||
def load_normal(path, binocular=False, left_only=False):
|
||||
if binocular and not left_only:
|
||||
normal_path_L = os.path.join(
|
||||
os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
|
||||
)
|
||||
normal_image_L = cv2.imread(normal_path_L, cv2.IMREAD_COLOR)
|
||||
normal_path_R = os.path.join(
|
||||
os.path.dirname(path), "normal", os.path.basename(path) + "_R.png"
|
||||
)
|
||||
normal_image_R = cv2.imread(normal_path_R, cv2.IMREAD_COLOR)
|
||||
normalized_normal_image_L = normal_image_L / 255.0 * 2.0 - 1.0
|
||||
normalized_normal_image_R = normal_image_R / 255.0 * 2.0 - 1.0
|
||||
return normalized_normal_image_L, normalized_normal_image_R
|
||||
else:
|
||||
if binocular and left_only:
|
||||
normal_path = os.path.join(
|
||||
os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
|
||||
)
|
||||
else:
|
||||
normal_path = os.path.join(
|
||||
os.path.dirname(path), "normal", os.path.basename(path) + ".png"
|
||||
)
|
||||
normal_image = cv2.imread(normal_path, cv2.IMREAD_COLOR)
|
||||
normalized_normal_image = normal_image / 255.0 * 2.0 - 1.0
|
||||
return normalized_normal_image
|
||||
|
||||
@staticmethod
|
||||
def load_label(path):
|
||||
with open(path, "r") as f:
|
||||
label_data = json.load(f)
|
||||
return label_data
|
||||
|
||||
@staticmethod
|
||||
def load_rgb(path):
|
||||
rgb_path = os.path.join(
|
||||
os.path.dirname(path), "rgb", os.path.basename(path) + ".png"
|
||||
)
|
||||
rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR)
|
||||
return rgb_image
|
||||
|
||||
@staticmethod
|
||||
def load_from_preprocessed_pts(path):
|
||||
npy_path = os.path.join(
|
||||
os.path.dirname(path), "pts", os.path.basename(path) + ".npy"
|
||||
)
|
||||
pts = np.load(npy_path)
|
||||
return pts
|
||||
|
||||
@staticmethod
|
||||
def cam_pose_transformation(cam_pose_before):
|
||||
offset = np.asarray([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
|
||||
cam_pose_after = cam_pose_before @ offset
|
||||
return cam_pose_after
|
||||
|
||||
@staticmethod
|
||||
def load_cam_info(path, binocular=False, display_table_as_world_space_origin=True):
|
||||
scene_dir = os.path.dirname(path)
|
||||
root_dir = os.path.dirname(scene_dir)
|
||||
scene_name = os.path.basename(scene_dir)
|
||||
camera_params_path = os.path.join(
|
||||
os.path.dirname(path), "camera_params", os.path.basename(path) + ".json"
|
||||
)
|
||||
with open(camera_params_path, "r") as f:
|
||||
label_data = json.load(f)
|
||||
cam_to_world = np.asarray(label_data["extrinsic"])
|
||||
cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
|
||||
world_to_display_table = np.eye(4)
|
||||
world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
|
||||
root_dir, scene_name
|
||||
)
|
||||
if display_table_as_world_space_origin:
|
||||
cam_to_world = np.dot(world_to_display_table, cam_to_world)
|
||||
cam_intrinsic = np.asarray(label_data["intrinsic"])
|
||||
cam_info = {
|
||||
"cam_to_world": cam_to_world,
|
||||
"cam_intrinsic": cam_intrinsic,
|
||||
"far_plane": label_data["far_plane"],
|
||||
"near_plane": label_data["near_plane"],
|
||||
}
|
||||
if binocular:
|
||||
cam_to_world_R = np.asarray(label_data["extrinsic_R"])
|
||||
cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R)
|
||||
cam_to_world_O = np.asarray(label_data["extrinsic_cam_object"])
|
||||
cam_to_world_O = DataLoadUtil.cam_pose_transformation(cam_to_world_O)
|
||||
if display_table_as_world_space_origin:
|
||||
cam_to_world_O = np.dot(world_to_display_table, cam_to_world_O)
|
||||
cam_to_world_R = np.dot(world_to_display_table, cam_to_world_R)
|
||||
cam_info["cam_to_world_O"] = cam_to_world_O
|
||||
cam_info["cam_to_world_R"] = cam_to_world_R
|
||||
return cam_info
|
||||
|
||||
@staticmethod
|
||||
def get_real_cam_O_from_cam_L(
|
||||
cam_L, cam_O_to_cam_L, scene_path, display_table_as_world_space_origin=True
|
||||
):
|
||||
root_dir = os.path.dirname(scene_path)
|
||||
scene_name = os.path.basename(scene_path)
|
||||
if isinstance(cam_L, torch.Tensor):
|
||||
cam_L = cam_L.cpu().numpy()
|
||||
nO_to_display_table_pose = cam_L @ cam_O_to_cam_L
|
||||
if display_table_as_world_space_origin:
|
||||
display_table_to_world = np.eye(4)
|
||||
display_table_to_world[:3, 3] = DataLoadUtil.get_display_table_top(
|
||||
root_dir, scene_name
|
||||
)
|
||||
nO_to_world_pose = np.dot(display_table_to_world, nO_to_display_table_pose)
|
||||
nO_to_world_pose = DataLoadUtil.cam_pose_transformation(nO_to_world_pose)
|
||||
return nO_to_world_pose
|
||||
|
||||
@staticmethod
|
||||
def get_target_point_cloud(
|
||||
depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0, 255, 0, 255), require_full_points=False
|
||||
):
|
||||
h, w = depth.shape
|
||||
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
|
||||
|
||||
z = depth
|
||||
x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
|
||||
y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
|
||||
|
||||
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
|
||||
mask = mask.reshape(-1, 4)
|
||||
|
||||
target_mask = (mask == target_mask_label).all(axis=-1)
|
||||
|
||||
target_points_camera = points_camera[target_mask]
|
||||
target_points_camera_aug = np.concatenate(
|
||||
[target_points_camera, np.ones((target_points_camera.shape[0], 1))], axis=-1
|
||||
)
|
||||
|
||||
target_points_world = np.dot(cam_extrinsic, target_points_camera_aug.T).T[:, :3]
|
||||
data = {
|
||||
"points_world": target_points_world,
|
||||
"points_camera": target_points_camera,
|
||||
}
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
def get_point_cloud(depth, cam_intrinsic, cam_extrinsic):
|
||||
h, w = depth.shape
|
||||
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
|
||||
|
||||
z = depth
|
||||
x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
|
||||
y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
|
||||
|
||||
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
|
||||
points_camera_aug = np.concatenate(
|
||||
[points_camera, np.ones((points_camera.shape[0], 1))], axis=-1
|
||||
)
|
||||
|
||||
points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
|
||||
return {"points_world": points_world, "points_camera": points_camera}
|
||||
|
||||
@staticmethod
|
||||
def get_target_point_cloud_world_from_path(
|
||||
path,
|
||||
binocular=False,
|
||||
random_downsample_N=65536,
|
||||
voxel_size=0.005,
|
||||
target_mask_label=(0, 255, 0, 255),
|
||||
display_table_mask_label=(0, 0, 255, 255),
|
||||
get_display_table_pts=False,
|
||||
require_normal=False,
|
||||
):
|
||||
cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular)
|
||||
if binocular:
|
||||
depth_L, depth_R = DataLoadUtil.load_depth(
|
||||
path, cam_info["near_plane"], cam_info["far_plane"], binocular=True
|
||||
)
|
||||
mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
|
||||
point_cloud_L = DataLoadUtil.get_target_point_cloud(
|
||||
depth_L,
|
||||
cam_info["cam_intrinsic"],
|
||||
cam_info["cam_to_world"],
|
||||
mask_L,
|
||||
target_mask_label,
|
||||
)["points_world"]
|
||||
point_cloud_R = DataLoadUtil.get_target_point_cloud(
|
||||
depth_R,
|
||||
cam_info["cam_intrinsic"],
|
||||
cam_info["cam_to_world_R"],
|
||||
mask_R,
|
||||
target_mask_label,
|
||||
)["points_world"]
|
||||
point_cloud_L = PtsUtil.random_downsample_point_cloud(
|
||||
point_cloud_L, random_downsample_N
|
||||
)
|
||||
point_cloud_R = PtsUtil.random_downsample_point_cloud(
|
||||
point_cloud_R, random_downsample_N
|
||||
)
|
||||
overlap_points = PtsUtil.get_overlapping_points(
|
||||
point_cloud_L, point_cloud_R, voxel_size
|
||||
)
|
||||
return overlap_points
|
||||
else:
|
||||
depth = DataLoadUtil.load_depth(
|
||||
path, cam_info["near_plane"], cam_info["far_plane"]
|
||||
)
|
||||
mask = DataLoadUtil.load_seg(path)
|
||||
point_cloud = DataLoadUtil.get_target_point_cloud(
|
||||
depth, cam_info["cam_intrinsic"], cam_info["cam_to_world"], mask
|
||||
)["points_world"]
|
||||
return point_cloud
|
||||
|
||||
@staticmethod
|
||||
def load_points_normals(root, scene_name, display_table_as_world_space_origin=True):
|
||||
points_path = os.path.join(root, scene_name, "points_and_normals.txt")
|
||||
points_normals = np.loadtxt(points_path)
|
||||
if display_table_as_world_space_origin:
|
||||
points_normals[:, :3] = points_normals[
|
||||
:, :3
|
||||
] - DataLoadUtil.get_display_table_top(root, scene_name)
|
||||
return points_normals
|
158
utils/preprocess_util.py
Normal file
158
utils/preprocess_util.py
Normal file
@ -0,0 +1,158 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import time
|
||||
import sys
|
||||
np.random.seed(0)
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from utils.reconstruction_util import ReconstructionUtil
|
||||
from utils.data_load import DataLoadUtil
|
||||
from utils.pts_util import PtsUtil
|
||||
|
||||
def save_np_pts(path, pts: np.ndarray, file_type="txt"):
|
||||
if file_type == "txt":
|
||||
np.savetxt(path, pts)
|
||||
else:
|
||||
np.save(path, pts)
|
||||
|
||||
def save_target_points(root, scene, frame_idx, target_points: np.ndarray, file_type="txt"):
|
||||
pts_path = os.path.join(root,scene, "pts", f"{frame_idx}.{file_type}")
|
||||
if not os.path.exists(os.path.join(root,scene, "pts")):
|
||||
os.makedirs(os.path.join(root,scene, "pts"))
|
||||
save_np_pts(pts_path, target_points, file_type)
|
||||
|
||||
def save_scan_points_indices(root, scene, frame_idx, scan_points_indices: np.ndarray, file_type="txt"):
|
||||
indices_path = os.path.join(root,scene, "scan_points_indices", f"{frame_idx}.{file_type}")
|
||||
if not os.path.exists(os.path.join(root,scene, "scan_points_indices")):
|
||||
os.makedirs(os.path.join(root,scene, "scan_points_indices"))
|
||||
save_np_pts(indices_path, scan_points_indices, file_type)
|
||||
|
||||
def save_scan_points(root, scene, scan_points: np.ndarray):
|
||||
scan_points_path = os.path.join(root,scene, "scan_points.txt")
|
||||
save_np_pts(scan_points_path, scan_points)
|
||||
|
||||
def get_world_points(depth, mask, cam_intrinsic, cam_extrinsic):
|
||||
z = depth[mask]
|
||||
i, j = np.nonzero(mask)
|
||||
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
|
||||
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
|
||||
|
||||
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
|
||||
points_camera_aug = np.concatenate((points_camera, np.ones((points_camera.shape[0], 1))), axis=-1)
|
||||
points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
|
||||
|
||||
return points_camera_world
|
||||
|
||||
def get_scan_points_indices(scan_points, mask, display_table_mask_label, cam_intrinsic, cam_extrinsic):
|
||||
scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1))))
|
||||
points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3]
|
||||
points_image_homogeneous = np.dot(cam_intrinsic, points_camera.T).T
|
||||
points_image_homogeneous /= points_image_homogeneous[:, 2:]
|
||||
pixel_x = points_image_homogeneous[:, 0].astype(int)
|
||||
pixel_y = points_image_homogeneous[:, 1].astype(int)
|
||||
h, w = mask.shape[:2]
|
||||
valid_indices = (pixel_x >= 0) & (pixel_x < w) & (pixel_y >= 0) & (pixel_y < h)
|
||||
mask_colors = mask[pixel_y[valid_indices], pixel_x[valid_indices]]
|
||||
selected_points_indices = np.where((mask_colors == display_table_mask_label).all(axis=-1))[0]
|
||||
selected_points_indices = np.where(valid_indices)[0][selected_points_indices]
|
||||
return selected_points_indices
|
||||
|
||||
|
||||
def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
|
||||
|
||||
''' configuration '''
|
||||
target_mask_label = (0, 255, 0, 255)
|
||||
display_table_mask_label=(0, 0, 255, 255)
|
||||
random_downsample_N = 32768
|
||||
voxel_size=0.002
|
||||
filter_degree = 75
|
||||
min_z = 0.2
|
||||
max_z = 0.5
|
||||
|
||||
''' scan points '''
|
||||
scan_points = np.asarray(ReconstructionUtil.generate_scan_points(display_table_top=0,display_table_radius=0.25))
|
||||
|
||||
''' read frame data(depth|mask|normal) '''
|
||||
frame_num = DataLoadUtil.get_scene_seq_length(root, scene)
|
||||
for frame_id in range(frame_num):
|
||||
print(f"[scene({scene_idx}/{scene_total})|frame({frame_id}/{frame_num})]Processing {scene} frame {frame_id}")
|
||||
path = DataLoadUtil.get_path(root, scene, frame_id)
|
||||
cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
|
||||
depth_L, depth_R = DataLoadUtil.load_depth(
|
||||
path, cam_info["near_plane"],
|
||||
cam_info["far_plane"],
|
||||
binocular=True
|
||||
)
|
||||
mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
|
||||
|
||||
''' target points '''
|
||||
mask_img_L = mask_L
|
||||
mask_img_R = mask_R
|
||||
|
||||
target_mask_img_L = (mask_L == target_mask_label).all(axis=-1)
|
||||
target_mask_img_R = (mask_R == target_mask_label).all(axis=-1)
|
||||
|
||||
|
||||
target_points_L = get_world_points(depth_L, target_mask_img_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"])
|
||||
target_points_R = get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
|
||||
|
||||
sampled_target_points_L = PtsUtil.random_downsample_point_cloud(
|
||||
target_points_L, random_downsample_N
|
||||
)
|
||||
sampled_target_points_R = PtsUtil.random_downsample_point_cloud(
|
||||
target_points_R, random_downsample_N
|
||||
)
|
||||
|
||||
has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0
|
||||
if has_points:
|
||||
target_points = PtsUtil.get_overlapping_points(
|
||||
sampled_target_points_L, sampled_target_points_R, voxel_size
|
||||
)
|
||||
|
||||
if has_points:
|
||||
has_points = target_points.shape[0] > 0
|
||||
|
||||
if has_points:
|
||||
points_normals = DataLoadUtil.load_points_normals(root, scene, display_table_as_world_space_origin=True)
|
||||
target_points = PtsUtil.filter_points(
|
||||
target_points, points_normals, cam_info["cam_to_world"],voxel_size=0.002, theta = filter_degree, z_range=(min_z, max_z)
|
||||
)
|
||||
|
||||
|
||||
''' scan points indices '''
|
||||
scan_points_indices_L = get_scan_points_indices(scan_points, mask_img_L, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world"])
|
||||
scan_points_indices_R = get_scan_points_indices(scan_points, mask_img_R, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
|
||||
scan_points_indices = np.intersect1d(scan_points_indices_L, scan_points_indices_R)
|
||||
|
||||
if not has_points:
|
||||
target_points = np.zeros((0, 3))
|
||||
|
||||
save_target_points(root, scene, frame_id, target_points, file_type=file_type)
|
||||
save_scan_points_indices(root, scene, frame_id, scan_points_indices, file_type=file_type)
|
||||
|
||||
save_scan_points(root, scene, scan_points) # The "done" flag of scene preprocess
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
#root = "/media/hofee/repository/new_data_with_normal"
|
||||
root = r"/media/hofee/data/tempdir/test_real_output"
|
||||
# list_path = r"/media/hofee/repository/full_list.txt"
|
||||
# scene_list = []
|
||||
|
||||
# with open(list_path, "r") as f:
|
||||
# for line in f:
|
||||
# scene_list.append(line.strip())
|
||||
scene_list = os.listdir(root)
|
||||
from_idx = 0 # 1000
|
||||
to_idx = 1 # 1500
|
||||
|
||||
|
||||
cnt = 0
|
||||
import time
|
||||
total = to_idx - from_idx
|
||||
for scene in scene_list[from_idx:to_idx]:
|
||||
start = time.time()
|
||||
save_scene_data(root, scene, cnt, total, file_type="npy")
|
||||
cnt+=1
|
||||
end = time.time()
|
||||
print(f"Time cost: {end-start}")
|
@ -169,3 +169,25 @@ class PtsUtil:
|
||||
)
|
||||
|
||||
return reg_icp.transformation
|
||||
|
||||
@staticmethod
|
||||
def get_pts_from_depth(depth, cam_intrinsic, cam_extrinsic):
|
||||
h, w = depth.shape
|
||||
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
|
||||
|
||||
z = depth
|
||||
x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
|
||||
y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
|
||||
|
||||
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
|
||||
mask = mask.reshape(-1, 4)
|
||||
points_camera = np.concatenate(
|
||||
[points_camera, np.ones((points_camera.shape[0], 1))], axis=-1
|
||||
)
|
||||
|
||||
points_world = np.dot(cam_extrinsic, points_camera.T).T[:, :3]
|
||||
data = {
|
||||
"points_world": points_world,
|
||||
"points_camera": points_camera,
|
||||
}
|
||||
return data
|
@ -152,9 +152,15 @@ class ReconstructionUtil:
|
||||
|
||||
@staticmethod
|
||||
def check_scan_points_overlap(history_indices, indices2, threshold=5):
|
||||
for indices1 in history_indices:
|
||||
if len(set(indices1).intersection(set(indices2))) >= threshold:
|
||||
return True
|
||||
try:
|
||||
if len(indices2) == 0:
|
||||
return False
|
||||
for indices1 in history_indices:
|
||||
if len(set(indices1).intersection(set(indices2))) >= threshold:
|
||||
return True
|
||||
except Exception as e:
|
||||
print(e)
|
||||
import ipdb; ipdb.set_trace()
|
||||
return False
|
||||
|
||||
|
45
utils/render_util.py
Normal file
45
utils/render_util.py
Normal file
@ -0,0 +1,45 @@
|
||||
|
||||
import os
|
||||
import json
|
||||
import subprocess
|
||||
import tempfile
|
||||
import shutil
|
||||
from utils.data_load import DataLoadUtil
|
||||
from utils.pts_util import PtsUtil
|
||||
class RenderUtil:
|
||||
|
||||
@staticmethod
|
||||
def render_pts(cam_pose, object_name, script_path, model_points_normals, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
|
||||
nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path)
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
params = {
|
||||
"cam_pose": nO_to_world_pose.tolist(),
|
||||
"object_name": scene_path
|
||||
}
|
||||
params_data_path = os.path.join(temp_dir, "params.json")
|
||||
with open(params_data_path, 'w') as f:
|
||||
json.dump(params, f)
|
||||
result = subprocess.run([
|
||||
'blender', '-b', '-P', script_path, '--', temp_dir
|
||||
], capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
print("Blender script failed:")
|
||||
print(result.stderr)
|
||||
return None
|
||||
path = os.path.join(temp_dir, "tmp")
|
||||
point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True)
|
||||
cam_params = DataLoadUtil.load_cam_info(path, binocular=True)
|
||||
|
||||
filtered_point_cloud = PtsUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree)
|
||||
full_scene_point_cloud = None
|
||||
if require_full_scene:
|
||||
depth_L, depth_R = DataLoadUtil.load_depth(path, cam_params['near_plane'], cam_params['far_plane'], binocular=True)
|
||||
point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_params['cam_intrinsic'], cam_params['cam_to_world'])['points_world']
|
||||
point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_params['cam_intrinsic'], cam_params['cam_to_world_R'])['points_world']
|
||||
|
||||
point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
|
||||
point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
|
||||
full_scene_point_cloud = PtsUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
|
||||
|
||||
|
||||
return filtered_point_cloud, full_scene_point_cloud
|
@ -1,162 +0,0 @@
|
||||
|
||||
import numpy as np
|
||||
from utils.pose_util import PoseUtil
|
||||
import trimesh
|
||||
from collections import defaultdict
|
||||
from scipy.spatial.transform import Rotation as R
|
||||
import random
|
||||
|
||||
class ViewSampleUtil:
|
||||
@staticmethod
|
||||
def farthest_point_sampling(points, num_samples):
|
||||
num_points = points.shape[0]
|
||||
if num_samples >= num_points:
|
||||
return points, np.arange(num_points)
|
||||
sampled_indices = np.zeros(num_samples, dtype=int)
|
||||
sampled_indices[0] = np.random.randint(num_points)
|
||||
min_distances = np.full(num_points, np.inf)
|
||||
for i in range(1, num_samples):
|
||||
current_point = points[sampled_indices[i - 1]]
|
||||
dist_to_current_point = np.linalg.norm(points - current_point, axis=1)
|
||||
min_distances = np.minimum(min_distances, dist_to_current_point)
|
||||
sampled_indices[i] = np.argmax(min_distances)
|
||||
downsampled_points = points[sampled_indices]
|
||||
return downsampled_points, sampled_indices
|
||||
|
||||
@staticmethod
|
||||
def voxel_downsample(points, voxel_size):
|
||||
voxel_grid = defaultdict(list)
|
||||
for i, point in enumerate(points):
|
||||
voxel_index = tuple((point // voxel_size).astype(int))
|
||||
voxel_grid[voxel_index].append(i)
|
||||
|
||||
downsampled_points = []
|
||||
downsampled_indices = []
|
||||
for indices in voxel_grid.values():
|
||||
selected_index = indices[0]
|
||||
downsampled_points.append(points[selected_index])
|
||||
downsampled_indices.append(selected_index)
|
||||
|
||||
return np.array(downsampled_points), downsampled_indices
|
||||
|
||||
@staticmethod
|
||||
def sample_view_data(mesh: trimesh.Trimesh, distance_range: tuple = (0.25, 0.5), voxel_size: float = 0.005, max_views: int = 1, pertube_repeat: int = 1) -> dict:
|
||||
view_data = {
|
||||
"look_at_points": [],
|
||||
"cam_positions": [],
|
||||
}
|
||||
|
||||
vertices = mesh.vertices
|
||||
look_at_points = []
|
||||
cam_positions = []
|
||||
normals = []
|
||||
vertex_normals = mesh.vertex_normals
|
||||
|
||||
for i, vertex in enumerate(vertices):
|
||||
look_at_point = vertex
|
||||
|
||||
view_data["look_at_points"].append(look_at_point)
|
||||
|
||||
normal = vertex_normals[i]
|
||||
if np.isnan(normal).any():
|
||||
continue
|
||||
if np.dot(normal, look_at_point) < 0:
|
||||
normal = -normal
|
||||
|
||||
normals.append(normal)
|
||||
|
||||
for _ in range(pertube_repeat):
|
||||
perturb_angle = np.radians(np.random.uniform(0, 30))
|
||||
perturb_axis = np.random.normal(size=3)
|
||||
perturb_axis /= np.linalg.norm(perturb_axis)
|
||||
rotation_matrix = R.from_rotvec(perturb_angle * perturb_axis).as_matrix()
|
||||
perturbed_normal = np.dot(rotation_matrix, normal)
|
||||
|
||||
distance = np.random.uniform(*distance_range)
|
||||
cam_position = look_at_point + distance * perturbed_normal
|
||||
look_at_points.append(look_at_point)
|
||||
cam_positions.append(cam_position)
|
||||
|
||||
look_at_points = np.array(look_at_points)
|
||||
cam_positions = np.array(cam_positions)
|
||||
|
||||
voxel_downsampled_look_at_points, selected_indices = ViewSampleUtil.voxel_downsample(look_at_points, voxel_size)
|
||||
voxel_downsampled_cam_positions = cam_positions[selected_indices]
|
||||
voxel_downsampled_normals = np.array(normals)[selected_indices]
|
||||
|
||||
fps_downsampled_look_at_points, selected_indices = ViewSampleUtil.farthest_point_sampling(voxel_downsampled_look_at_points, max_views * 2)
|
||||
fps_downsampled_cam_positions = voxel_downsampled_cam_positions[selected_indices]
|
||||
|
||||
view_data["look_at_points"] = fps_downsampled_look_at_points.tolist()
|
||||
view_data["cam_positions"] = fps_downsampled_cam_positions.tolist()
|
||||
view_data["normals"] = voxel_downsampled_normals.tolist()
|
||||
view_data["voxel_down_sampled_points"] = voxel_downsampled_look_at_points
|
||||
|
||||
return view_data
|
||||
|
||||
@staticmethod
|
||||
def get_world_points_and_normals(view_data: dict, obj_world_pose: np.ndarray) -> tuple:
|
||||
world_points = []
|
||||
world_normals = []
|
||||
for voxel_down_sampled_points, normal in zip(view_data["voxel_down_sampled_points"], view_data["normals"]):
|
||||
voxel_down_sampled_points_world = obj_world_pose @ np.append(voxel_down_sampled_points, 1.0)
|
||||
normal_world = obj_world_pose[:3, :3] @ normal
|
||||
world_points.append(voxel_down_sampled_points_world[:3])
|
||||
world_normals.append(normal_world)
|
||||
return np.array(world_points), np.array(world_normals)
|
||||
|
||||
@staticmethod
|
||||
def get_cam_pose(view_data: dict, obj_world_pose: np.ndarray, max_views: int, min_cam_table_included_degree: int, random_view_ratio: float) -> np.ndarray:
|
||||
cam_poses = []
|
||||
min_height_z = 1000
|
||||
for look_at_point, cam_position in zip(view_data["look_at_points"], view_data["cam_positions"]):
|
||||
look_at_point_world = obj_world_pose @ np.append(look_at_point, 1.0)
|
||||
cam_position_world = obj_world_pose @ np.append(cam_position, 1.0)
|
||||
if look_at_point_world[2] < min_height_z:
|
||||
min_height_z = look_at_point_world[2]
|
||||
look_at_point_world = look_at_point_world[:3]
|
||||
cam_position_world = cam_position_world[:3]
|
||||
|
||||
forward_vector = cam_position_world - look_at_point_world
|
||||
forward_vector /= np.linalg.norm(forward_vector)
|
||||
|
||||
up_vector = np.array([0, 0, 1])
|
||||
|
||||
right_vector = np.cross(up_vector, forward_vector)
|
||||
right_vector /= np.linalg.norm(right_vector)
|
||||
|
||||
corrected_up_vector = np.cross(forward_vector, right_vector)
|
||||
rotation_matrix = np.array([right_vector, corrected_up_vector, forward_vector]).T
|
||||
|
||||
cam_pose = np.eye(4)
|
||||
cam_pose[:3, :3] = rotation_matrix
|
||||
cam_pose[:3, 3] = cam_position_world
|
||||
cam_poses.append(cam_pose)
|
||||
|
||||
filtered_cam_poses = []
|
||||
for cam_pose in cam_poses:
|
||||
if cam_pose[2, 3] > min_height_z:
|
||||
direction_vector = cam_pose[:3, 2]
|
||||
horizontal_normal = np.array([0, 0, 1])
|
||||
cos_angle = np.dot(direction_vector, horizontal_normal) / (np.linalg.norm(direction_vector) * np.linalg.norm(horizontal_normal))
|
||||
angle = np.arccos(np.clip(cos_angle, -1.0, 1.0))
|
||||
angle_degree = np.degrees(angle)
|
||||
if angle_degree < 90 - min_cam_table_included_degree:
|
||||
filtered_cam_poses.append(cam_pose)
|
||||
if random.random() < random_view_ratio:
|
||||
pertube_pose = PoseUtil.get_uniform_pose([0.1, 0.1, 0.1], [3, 3, 3], 0, 180, "cm")
|
||||
filtered_cam_poses.append(pertube_pose @ cam_pose)
|
||||
|
||||
if len(filtered_cam_poses) > max_views:
|
||||
indices = np.random.choice(len(filtered_cam_poses), max_views, replace=False)
|
||||
filtered_cam_poses = [filtered_cam_poses[i] for i in indices]
|
||||
|
||||
return np.array(filtered_cam_poses)
|
||||
|
||||
@staticmethod
|
||||
def sample_view_data_world_space(mesh: trimesh.Trimesh, cad_to_world: np.ndarray, distance_range:tuple = (0.25,0.5), voxel_size:float = 0.005, max_views: int=1, min_cam_table_included_degree:int=20, random_view_ratio:float = 0.2) -> dict:
|
||||
view_data = ViewSampleUtil.sample_view_data(mesh, distance_range, voxel_size, max_views)
|
||||
view_data["cam_to_world_poses"] = ViewSampleUtil.get_cam_pose(view_data, cad_to_world, max_views, min_cam_table_included_degree, random_view_ratio)
|
||||
view_data["voxel_down_sampled_points"], view_data["normals"] = ViewSampleUtil.get_world_points_and_normals(view_data, cad_to_world)
|
||||
return view_data
|
||||
|
Loading…
x
Reference in New Issue
Block a user