This commit is contained in:
hofee 2024-10-08 21:28:30 +08:00
parent d9d2716ba7
commit 3ab046b134
11 changed files with 773 additions and 354 deletions

2
.gitignore vendored
View File

@ -3,7 +3,7 @@
__pycache__/ __pycache__/
*.py[cod] *.py[cod]
*$py.class *$py.class
test_output/
# C extensions # C extensions
*.so *.so

View File

@ -10,7 +10,8 @@ runner:
root_dir: "experiments" root_dir: "experiments"
generate: generate:
model_dir: "/home/yan20/nbv_rec/data/test_CAD/test_model" model_dir: "/home/user/nbv_rec/data/models"
table_model_path: "/home/user/nbv_rec/data/table.obj"
model_start_idx: 0 model_start_idx: 0
voxel_size: 0.005 voxel_size: 0.005
max_view: 512 max_view: 512
@ -19,6 +20,19 @@ runner:
min_diag: 0.01 min_diag: 0.01
random_view_ratio: 0 random_view_ratio: 0
min_cam_table_included_degree: 20 min_cam_table_included_degree: 20
obj_name: "bear"
light_and_camera_config:
Camera:
near_plane: 0.01
far_plane: 5
fov_vertical: 25
resolution: [1280,800]
eye_distance: 0.15
eye_angle: 25
Light:
location: [0,0,3.5]
orientation: [0,0,0]
power: 150
reconstruct: reconstruct:
soft_overlap_threshold: 0.3 soft_overlap_threshold: 0.3

View File

@ -1,5 +1,7 @@
import os import os
import trimesh import trimesh
import tempfile
import subprocess
import numpy as np import numpy as np
from PytorchBoot.runners.runner import Runner from PytorchBoot.runners.runner import Runner
from PytorchBoot.config import ConfigManager from PytorchBoot.config import ConfigManager
@ -7,16 +9,12 @@ import PytorchBoot.stereotype as stereotype
from PytorchBoot.utils.log_util import Log from PytorchBoot.utils.log_util import Log
from PytorchBoot.status import status_manager from PytorchBoot.status import status_manager
from utils.control_util import ControlUtil
import sys
sys.path.append(r"C:\Document\Local Project\nbv_rec_control")
#from utils.control_util import ControlUtil
from utils.communicate_util import CommunicateUtil from utils.communicate_util import CommunicateUtil
from utils.pts_util import PtsUtil from utils.pts_util import PtsUtil
from utils.view_sample_util import ViewSampleUtil
from utils.reconstruction_util import ReconstructionUtil from utils.reconstruction_util import ReconstructionUtil
from utils.preprocess_util import save_scene_data
from utils.data_load import DataLoadUtil
@stereotype.runner("CAD_strategy_runner") @stereotype.runner("CAD_strategy_runner")
class CADStrategyRunner(Runner): class CADStrategyRunner(Runner):
@ -50,79 +48,111 @@ class CADStrategyRunner(Runner):
def load_experiment(self, backup_name=None): def load_experiment(self, backup_name=None):
super().load_experiment(backup_name) super().load_experiment(backup_name)
def get_pts_from_view_data(self, view_data):
depth = view_data["depth_image"]
depth_intrinsics = view_data["depth_intrinsics"]
depth_extrinsics = view_data["depth_extrinsics"]
cam_pts = PtsUtil.get_pts_from_depth(depth, depth_intrinsics, depth_extrinsics)
return cam_pts
def split_scan_pts_and_obj_pts(self, world_pts, scan_pts_z, z_threshold = 0.003):
scan_pts = world_pts[scan_pts_z < z_threshold]
obj_pts = world_pts[scan_pts_z >= z_threshold]
return scan_pts, obj_pts
def run_one_model(self, model_name): def run_one_model(self, model_name):
''' init robot ''' ''' init robot '''
ControlUtil.init() #ControlUtil.init()
''' load CAD model ''' ''' load CAD model '''
model_path = os.path.join(self.model_dir, model_name) model_path = os.path.join(self.model_dir, model_name,"mesh.obj")
cad_model = trimesh.load(model_path) cad_model = trimesh.load(model_path)
''' take first view ''' ''' take first view '''
view_data = CommunicateUtil.get_view_data() #view_data = CommunicateUtil.get_view_data(init=True)
first_cam_pts = None #first_cam_pts = self.get_pts_from_view_data(view_data)
''' register ''' ''' register '''
cad_to_cam = PtsUtil.register_icp(first_cam_pts, cad_model) #cad_to_cam = PtsUtil.register(first_cam_pts, cad_model)
cam_to_world = ControlUtil.get_pose() #cam_to_world = ControlUtil.get_pose()
cad_to_world = cam_to_world @ cad_to_cam cad_to_world = np.eye(4) #cam_to_world @ cad_to_cam
cad_model:trimesh.Trimesh = cad_model.apply_transform(cad_to_world)
''' sample view '''
min_corner = cad_model.bounds[0] world_to_blender_world = np.eye(4)
max_corner = cad_model.bounds[1] world_to_blender_world[:3, 3] = np.asarray([0, 0, 0.9215])
diag = np.linalg.norm(max_corner - min_corner) cad_to_blender_world = np.dot(world_to_blender_world, cad_to_world)
view_num = int(self.min_view + (diag - self.min_diag)/(self.max_diag - self.min_diag) * (self.max_view - self.min_view)) cad_model:trimesh.Trimesh = cad_model.apply_transform(cad_to_blender_world)
sampled_view_data = ViewSampleUtil.sample_view_data_world_space(
cad_model, cad_to_world, with tempfile.TemporaryDirectory() as temp_dir:
voxel_size = self.voxel_size, name = "cad_model_world"
max_views = view_num,
min_cam_table_included_degree= self.min_cam_table_included_degree, cad_model.export(os.path.join(temp_dir, f"{name}.obj"))
random_view_ratio = self.random_view_ratio temp_dir = "/home/user/nbv_rec/nbv_rec_control/test_output"
) scene_dir = os.path.join(temp_dir, name)
cam_to_world_poses = sampled_view_data["cam_to_world_poses"] script_path = "/home/user/nbv_rec/blender_app/data_generator.py"
world_model_points = sampled_view_data["voxel_down_sampled_points"] ''' sample view '''
# import ipdb; ipdb.set_trace()
# print("start running renderer")
# result = subprocess.run([
# 'blender', '-b', '-P', script_path, '--', temp_dir
# ], capture_output=True, text=True)
# print("finish running renderer")
#
world_model_points = np.loadtxt(os.path.join(scene_dir, "points_and_normals.txt"))[:,:3]
''' preprocess '''
# save_scene_data(temp_dir, name)
pts_dir = os.path.join(temp_dir,name,"pts")
sample_view_pts_list = []
scan_points_idx_list = []
frame_num = len(os.listdir(pts_dir))
for frame_idx in range(frame_num):
pts_path = os.path.join(temp_dir,name, "pts", f"{frame_idx}.txt")
idx_path = os.path.join(temp_dir,name, "scan_points_indices", f"{frame_idx}.txt")
point_cloud = np.loadtxt(pts_path)
sampled_point_cloud = PtsUtil.voxel_downsample_point_cloud(point_cloud, self.voxel_size)
indices = np.loadtxt(idx_path, dtype=np.int32)
try:
len(indices)
except:
indices = np.array([indices])
sample_view_pts_list.append(sampled_point_cloud)
scan_points_idx_list.append(indices)
''' take sample view ''' ''' generate strategy '''
scan_points_idx_list = []
sample_view_pts_list = []
for cam_to_world in cam_to_world_poses:
ControlUtil.move_to(cam_to_world)
''' get world pts '''
view_data = CommunicateUtil.get_view_data()
cam_pts = None
scan_points_idx = None
world_pts = PtsUtil.transform_point_cloud(cam_pts, cam_to_world)
sample_view_pts_list.append(world_pts)
scan_points_idx_list.append(scan_points_idx)
''' generate strategy ''' limited_useful_view, _, _ = ReconstructionUtil.compute_next_best_view_sequence_with_overlap(
limited_useful_view, _, _ = ReconstructionUtil.compute_next_best_view_sequence_with_overlap( world_model_points, sample_view_pts_list,
world_model_points, sample_view_pts_list, scan_points_indices_list = scan_points_idx_list,
scan_points_indices_list = scan_points_idx_list, init_view=0,
init_view=0, threshold=self.voxel_size,
threshold=self.voxel_size, soft_overlap_threshold = self.soft_overlap_threshold,
soft_overlap_threshold = self.soft_overlap_threshold, hard_overlap_threshold = self.hard_overlap_threshold,
hard_overlap_threshold = self.hard_overlap_threshold, scan_points_threshold = self.scan_points_threshold,
scan_points_threshold = self.scan_points_threshold, status_info=self.status_info
status_info=self.status_info )
)
''' extract cam_to_world sequence '''
''' extract cam_to_world sequence ''' cam_to_world_seq = []
cam_to_world_seq = [] coveraget_rate_seq = []
coveraget_rate_seq = []
for idx, coverage_rate in limited_useful_view:
cam_to_world_seq.append(cam_to_world_poses[idx])
coveraget_rate_seq.append(coverage_rate)
''' take best seq view ''' from ipdb import set_trace; set_trace()
for cam_to_world in cam_to_world_seq: for idx, coverage_rate in limited_useful_view:
ControlUtil.move_to(cam_to_world) path = DataLoadUtil.get_path(temp_dir, name, idx)
''' get world pts ''' cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
view_data = CommunicateUtil.get_view_data() cam_to_world_seq.append(cam_info["cam_to_world"])
cam_pts = None coveraget_rate_seq.append(coverage_rate)
scan_points_idx = None
world_pts = PtsUtil.transform_point_cloud(cam_pts, cam_to_world) # ''' take best seq view '''
sample_view_pts_list.append(world_pts) # for cam_to_world in cam_to_world_seq:
scan_points_idx_list.append(scan_points_idx) # ControlUtil.move_to(cam_to_world)
# ''' get world pts '''
# view_data = CommunicateUtil.get_view_data()
# cam_pts = self.get_pts_from_view_data(view_data)
# scan_points_idx = None
# world_pts = PtsUtil.transform_point_cloud(cam_pts, cam_to_world)
# sample_view_pts_list.append(world_pts)
# scan_points_idx_list.append(scan_points_idx)
def run(self): def run(self):
@ -159,32 +189,5 @@ if __name__ == "__main__":
# cad_pts_L = PtsUtil.transform_point_cloud(noisy_pts_L, cad_to_cam_L) # cad_pts_L = PtsUtil.transform_point_cloud(noisy_pts_L, cad_to_cam_L)
# np.savetxt(r"test.txt", cad_pts_L) # np.savetxt(r"test.txt", cad_pts_L)
# np.savetxt(r"src.txt", noisy_pts_L) # np.savetxt(r"src.txt", noisy_pts_L)
''' test view sample '''
sampled_view_data = ViewSampleUtil.sample_view_data_world_space(
model, np.eye(4),
voxel_size = 0.005,
max_views = 20,
min_cam_table_included_degree= 20,
random_view_ratio = 0
)
cam_poses = sampled_view_data["cam_to_world_poses"]
cam_poses = np.array(cam_poses)
print(cam_poses.shape)
def sample_camera_direction(cam_pose, num_samples, sample_distance):
cam_position = cam_pose[:3, 3]
cam_direction = cam_pose[:3, 2]
sampled_points = np.array([cam_position - (i + 1) * sample_distance * cam_direction for i in range(num_samples)])
return sampled_points
all_sampled_points = []
for i in range(cam_poses.shape[0]):
samped_points = sample_camera_direction(cam_poses[i], 10, 0.02)
all_sampled_points.append(samped_points)
all_sampled_points = np.concatenate(all_sampled_points, axis=0)
np.savetxt(r"cam_poses.txt", cam_poses[:, :3, 3])
np.savetxt(r"cam_directions.txt", all_sampled_points)

View File

@ -1,87 +0,0 @@
import os
import trimesh
import numpy as np
from PytorchBoot.runners.runner import Runner
from PytorchBoot.config import ConfigManager
import PytorchBoot.stereotype as stereotype
from PytorchBoot.utils.log_util import Log
import PytorchBoot.namespace as namespace
from PytorchBoot.status import status_manager
from utils.control_util import ControlUtil
from utils.communicate_util import CommunicateUtil
from utils.pts_util import PtsUtil
from utils.view_sample_util import ViewSampleUtil
from utils.reconstruction_util import ReconstructionUtil
@stereotype.runner("inferencer")
class Inferencer(Runner):
def __init__(self, config_path: str):
super().__init__(config_path)
self.load_experiment("inferencer")
self.reconstruct_config = ConfigManager.get("runner", "reconstruct")
self.voxel_size = self.reconstruct_config["voxel_size"]
self.max_iter = self.reconstruct_config["max_iter"]
def create_experiment(self, backup_name=None):
super().create_experiment(backup_name)
def load_experiment(self, backup_name=None):
super().load_experiment(backup_name)
def run_inference(self, model_name):
''' init robot '''
ControlUtil.init()
''' take first view '''
view_data = CommunicateUtil.get_view_data()
first_cam_pts = None
first_cam_pose = None
combined_pts = first_cam_pts
input_data = {
"scanned_target_points_num": [first_cam_pts.shape[0]],
"scanned_n_to_world_pose_9d": [first_cam_pose],
"combined_scanned_pts": combined_pts
}
''' enter loop '''
iter = 0
while True:
''' inference '''
inference_result = CommunicateUtil.get_inference_data(input_data)
cam_to_world = inference_result["cam_to_world"]
''' set pose '''
ControlUtil.set_pose(cam_to_world)
''' take view '''
view_data = CommunicateUtil.get_view_data()
curr_cam_pts = None
curr_cam_pose = None
''' update combined pts '''
combined_pts = np.concatenate([combined_pts, curr_cam_pts], axis=0)
combined_pts = PtsUtil.voxel_downsample_point_cloud(combined_pts, voxel_size=self.voxel_size)
''' update input data '''
input_data["combined_scanned_pts"] = combined_pts
input_data["scanned_target_points_num"].append(curr_cam_pts.shape[0])
input_data["scanned_n_to_world_pose_9d"].append(curr_cam_pose)
''' check stop condition '''
if iter >= self.max_iter:
break
def run(self):
self.run_inference()
if __name__ == "__main__":
model_path = "/home/yan20/nbv_rec/data/test_CAD/test_model/bear_scaled.ply"
model = trimesh.load(model_path)
test_pts_L = np.loadtxt("/home/yan20/nbv_rec/data/test_CAD/cam_pts_0_L.txt")
test_pts_R = np.loadtxt("/home/yan20/nbv_rec/data/test_CAD/cam_pts_0_R.txt")
cam_to_world_L = PtsUtil.register_icp(test_pts_L, model)
cam_to_world_R = PtsUtil.register_icp(test_pts_R, model)
print(cam_to_world_L)
print("================================")
print(cam_to_world_R)

View File

@ -1,10 +1,20 @@
import requests
import numpy as np
import cv2
class CommunicateUtil: class CommunicateUtil:
VIEW_HOST = "127.0.0.1:5000" VIEW_HOST = "127.0.0.1:5000"
INFERENCE_HOST = "127.0.0.1:5000" INFERENCE_HOST = "127.0.0.1:5000"
def get_view_data() -> dict: def get_view_data(init = False) -> dict:
data = {} params = {}
params["create_scanner"] = init
response = requests.get(f"http://{CommunicateUtil.VIEW_HOST}/api/data", json=params)
data = response.json()
if not data["success"]:
print(f"Failed to get view data")
return None
return data return data
def get_inference_data(view_data:dict) -> dict: def get_inference_data(view_data:dict) -> dict:

410
utils/data_load.py Normal file
View File

@ -0,0 +1,410 @@
import os
import numpy as np
import json
import cv2
import trimesh
import torch
from utils.pts_util import PtsUtil
class DataLoadUtil:
TABLE_POSITION = np.asarray([0, 0, 0.8215])
@staticmethod
def get_display_table_info(root, scene_name):
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
display_table_info = scene_info["display_table"]
return display_table_info
@staticmethod
def get_display_table_top(root, scene_name):
display_table_height = DataLoadUtil.get_display_table_info(root, scene_name)[
"height"
]
display_table_top = DataLoadUtil.TABLE_POSITION + np.asarray(
[0, 0, display_table_height]
)
return display_table_top
@staticmethod
def get_path(root, scene_name, frame_idx):
path = os.path.join(root, scene_name, f"{frame_idx}")
return path
@staticmethod
def get_label_num(root, scene_name):
label_dir = os.path.join(root, scene_name, "label")
return len(os.listdir(label_dir))
@staticmethod
def get_label_path(root, scene_name, seq_idx):
label_dir = os.path.join(root, scene_name, "label")
if not os.path.exists(label_dir):
os.makedirs(label_dir)
path = os.path.join(label_dir, f"{seq_idx}.json")
return path
@staticmethod
def get_label_path_old(root, scene_name):
path = os.path.join(root, scene_name, "label.json")
return path
@staticmethod
def get_scene_seq_length(root, scene_name):
camera_params_path = os.path.join(root, scene_name, "camera_params")
return len(os.listdir(camera_params_path))
@staticmethod
def load_mesh_at(model_dir, object_name, world_object_pose):
model_path = os.path.join(model_dir, object_name, "mesh.obj")
mesh = trimesh.load(model_path)
mesh.apply_transform(world_object_pose)
return mesh
@staticmethod
def get_bbox_diag(model_dir, object_name):
model_path = os.path.join(model_dir, object_name, "mesh.obj")
mesh = trimesh.load(model_path)
bbox = mesh.bounding_box.extents
diagonal_length = np.linalg.norm(bbox)
return diagonal_length
@staticmethod
def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose):
mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose)
model_path = os.path.join(output_dir, scene_name, "world_mesh.obj")
mesh.export(model_path)
@staticmethod
def save_target_mesh_at_world_space(
root, model_dir, scene_name, display_table_as_world_space_origin=True
):
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
target_name = scene_info["target_name"]
transformation = scene_info[target_name]
if display_table_as_world_space_origin:
location = transformation["location"] - DataLoadUtil.get_display_table_top(
root, scene_name
)
else:
location = transformation["location"]
rotation_euler = transformation["rotation_euler"]
pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
pose_mat[:3, 3] = location
mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat)
mesh_dir = os.path.join(root, scene_name, "mesh")
if not os.path.exists(mesh_dir):
os.makedirs(mesh_dir)
model_path = os.path.join(mesh_dir, "world_target_mesh.obj")
mesh.export(model_path)
@staticmethod
def load_scene_info(root, scene_name):
scene_info_path = os.path.join(root, scene_name, "scene_info.json")
with open(scene_info_path, "r") as f:
scene_info = json.load(f)
return scene_info
@staticmethod
def load_target_pts_num_dict(root, scene_name):
target_pts_num_path = os.path.join(root, scene_name, "target_pts_num.json")
with open(target_pts_num_path, "r") as f:
target_pts_num_dict = json.load(f)
return target_pts_num_dict
@staticmethod
def load_target_object_pose(root, scene_name):
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
target_name = scene_info["target_name"]
transformation = scene_info[target_name]
location = transformation["location"]
rotation_euler = transformation["rotation_euler"]
pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
pose_mat[:3, 3] = location
return pose_mat
@staticmethod
def load_depth(path, min_depth=0.01, max_depth=5.0, binocular=False):
def load_depth_from_real_path(real_path, min_depth, max_depth):
depth = cv2.imread(real_path, cv2.IMREAD_UNCHANGED)
depth = depth.astype(np.float32) / 65535.0
min_depth = min_depth
max_depth = max_depth
depth_meters = min_depth + (max_depth - min_depth) * depth
return depth_meters
if binocular:
depth_path_L = os.path.join(
os.path.dirname(path), "depth", os.path.basename(path) + "_L.png"
)
depth_path_R = os.path.join(
os.path.dirname(path), "depth", os.path.basename(path) + "_R.png"
)
depth_meters_L = load_depth_from_real_path(
depth_path_L, min_depth, max_depth
)
depth_meters_R = load_depth_from_real_path(
depth_path_R, min_depth, max_depth
)
return depth_meters_L, depth_meters_R
else:
depth_path = os.path.join(
os.path.dirname(path), "depth", os.path.basename(path) + ".png"
)
depth_meters = load_depth_from_real_path(depth_path, min_depth, max_depth)
return depth_meters
@staticmethod
def load_seg(path, binocular=False, left_only=False):
if binocular and not left_only:
def clean_mask(mask_image):
green = [0, 255, 0, 255]
red = [255, 0, 0, 255]
threshold = 2
mask_image = np.where(
np.abs(mask_image - green) <= threshold, green, mask_image
)
mask_image = np.where(
np.abs(mask_image - red) <= threshold, red, mask_image
)
return mask_image
mask_path_L = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
)
mask_image_L = clean_mask(cv2.imread(mask_path_L, cv2.IMREAD_UNCHANGED))
mask_path_R = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + "_R.png"
)
mask_image_R = clean_mask(cv2.imread(mask_path_R, cv2.IMREAD_UNCHANGED))
return mask_image_L, mask_image_R
else:
if binocular and left_only:
mask_path = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
)
else:
mask_path = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + ".png"
)
mask_image = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)
return mask_image
@staticmethod
def load_normal(path, binocular=False, left_only=False):
if binocular and not left_only:
normal_path_L = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
)
normal_image_L = cv2.imread(normal_path_L, cv2.IMREAD_COLOR)
normal_path_R = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + "_R.png"
)
normal_image_R = cv2.imread(normal_path_R, cv2.IMREAD_COLOR)
normalized_normal_image_L = normal_image_L / 255.0 * 2.0 - 1.0
normalized_normal_image_R = normal_image_R / 255.0 * 2.0 - 1.0
return normalized_normal_image_L, normalized_normal_image_R
else:
if binocular and left_only:
normal_path = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
)
else:
normal_path = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + ".png"
)
normal_image = cv2.imread(normal_path, cv2.IMREAD_COLOR)
normalized_normal_image = normal_image / 255.0 * 2.0 - 1.0
return normalized_normal_image
@staticmethod
def load_label(path):
with open(path, "r") as f:
label_data = json.load(f)
return label_data
@staticmethod
def load_rgb(path):
rgb_path = os.path.join(
os.path.dirname(path), "rgb", os.path.basename(path) + ".png"
)
rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR)
return rgb_image
@staticmethod
def load_from_preprocessed_pts(path):
npy_path = os.path.join(
os.path.dirname(path), "pts", os.path.basename(path) + ".npy"
)
pts = np.load(npy_path)
return pts
@staticmethod
def cam_pose_transformation(cam_pose_before):
offset = np.asarray([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
cam_pose_after = cam_pose_before @ offset
return cam_pose_after
@staticmethod
def load_cam_info(path, binocular=False, display_table_as_world_space_origin=True):
scene_dir = os.path.dirname(path)
root_dir = os.path.dirname(scene_dir)
scene_name = os.path.basename(scene_dir)
camera_params_path = os.path.join(
os.path.dirname(path), "camera_params", os.path.basename(path) + ".json"
)
with open(camera_params_path, "r") as f:
label_data = json.load(f)
cam_to_world = np.asarray(label_data["extrinsic"])
cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
world_to_display_table = np.eye(4)
world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
root_dir, scene_name
)
if display_table_as_world_space_origin:
cam_to_world = np.dot(world_to_display_table, cam_to_world)
cam_intrinsic = np.asarray(label_data["intrinsic"])
cam_info = {
"cam_to_world": cam_to_world,
"cam_intrinsic": cam_intrinsic,
"far_plane": label_data["far_plane"],
"near_plane": label_data["near_plane"],
}
if binocular:
cam_to_world_R = np.asarray(label_data["extrinsic_R"])
cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R)
cam_to_world_O = np.asarray(label_data["extrinsic_cam_object"])
cam_to_world_O = DataLoadUtil.cam_pose_transformation(cam_to_world_O)
if display_table_as_world_space_origin:
cam_to_world_O = np.dot(world_to_display_table, cam_to_world_O)
cam_to_world_R = np.dot(world_to_display_table, cam_to_world_R)
cam_info["cam_to_world_O"] = cam_to_world_O
cam_info["cam_to_world_R"] = cam_to_world_R
return cam_info
@staticmethod
def get_real_cam_O_from_cam_L(
cam_L, cam_O_to_cam_L, scene_path, display_table_as_world_space_origin=True
):
root_dir = os.path.dirname(scene_path)
scene_name = os.path.basename(scene_path)
if isinstance(cam_L, torch.Tensor):
cam_L = cam_L.cpu().numpy()
nO_to_display_table_pose = cam_L @ cam_O_to_cam_L
if display_table_as_world_space_origin:
display_table_to_world = np.eye(4)
display_table_to_world[:3, 3] = DataLoadUtil.get_display_table_top(
root_dir, scene_name
)
nO_to_world_pose = np.dot(display_table_to_world, nO_to_display_table_pose)
nO_to_world_pose = DataLoadUtil.cam_pose_transformation(nO_to_world_pose)
return nO_to_world_pose
@staticmethod
def get_target_point_cloud(
depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0, 255, 0, 255), require_full_points=False
):
h, w = depth.shape
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
z = depth
x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
mask = mask.reshape(-1, 4)
target_mask = (mask == target_mask_label).all(axis=-1)
target_points_camera = points_camera[target_mask]
target_points_camera_aug = np.concatenate(
[target_points_camera, np.ones((target_points_camera.shape[0], 1))], axis=-1
)
target_points_world = np.dot(cam_extrinsic, target_points_camera_aug.T).T[:, :3]
data = {
"points_world": target_points_world,
"points_camera": target_points_camera,
}
return data
@staticmethod
def get_point_cloud(depth, cam_intrinsic, cam_extrinsic):
h, w = depth.shape
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
z = depth
x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
points_camera_aug = np.concatenate(
[points_camera, np.ones((points_camera.shape[0], 1))], axis=-1
)
points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return {"points_world": points_world, "points_camera": points_camera}
@staticmethod
def get_target_point_cloud_world_from_path(
path,
binocular=False,
random_downsample_N=65536,
voxel_size=0.005,
target_mask_label=(0, 255, 0, 255),
display_table_mask_label=(0, 0, 255, 255),
get_display_table_pts=False,
require_normal=False,
):
cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular)
if binocular:
depth_L, depth_R = DataLoadUtil.load_depth(
path, cam_info["near_plane"], cam_info["far_plane"], binocular=True
)
mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
point_cloud_L = DataLoadUtil.get_target_point_cloud(
depth_L,
cam_info["cam_intrinsic"],
cam_info["cam_to_world"],
mask_L,
target_mask_label,
)["points_world"]
point_cloud_R = DataLoadUtil.get_target_point_cloud(
depth_R,
cam_info["cam_intrinsic"],
cam_info["cam_to_world_R"],
mask_R,
target_mask_label,
)["points_world"]
point_cloud_L = PtsUtil.random_downsample_point_cloud(
point_cloud_L, random_downsample_N
)
point_cloud_R = PtsUtil.random_downsample_point_cloud(
point_cloud_R, random_downsample_N
)
overlap_points = PtsUtil.get_overlapping_points(
point_cloud_L, point_cloud_R, voxel_size
)
return overlap_points
else:
depth = DataLoadUtil.load_depth(
path, cam_info["near_plane"], cam_info["far_plane"]
)
mask = DataLoadUtil.load_seg(path)
point_cloud = DataLoadUtil.get_target_point_cloud(
depth, cam_info["cam_intrinsic"], cam_info["cam_to_world"], mask
)["points_world"]
return point_cloud
@staticmethod
def load_points_normals(root, scene_name, display_table_as_world_space_origin=True):
points_path = os.path.join(root, scene_name, "points_and_normals.txt")
points_normals = np.loadtxt(points_path)
if display_table_as_world_space_origin:
points_normals[:, :3] = points_normals[
:, :3
] - DataLoadUtil.get_display_table_top(root, scene_name)
return points_normals

158
utils/preprocess_util.py Normal file
View File

@ -0,0 +1,158 @@
import os
import numpy as np
import time
import sys
np.random.seed(0)
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.reconstruction_util import ReconstructionUtil
from utils.data_load import DataLoadUtil
from utils.pts_util import PtsUtil
def save_np_pts(path, pts: np.ndarray, file_type="txt"):
if file_type == "txt":
np.savetxt(path, pts)
else:
np.save(path, pts)
def save_target_points(root, scene, frame_idx, target_points: np.ndarray, file_type="txt"):
pts_path = os.path.join(root,scene, "pts", f"{frame_idx}.{file_type}")
if not os.path.exists(os.path.join(root,scene, "pts")):
os.makedirs(os.path.join(root,scene, "pts"))
save_np_pts(pts_path, target_points, file_type)
def save_scan_points_indices(root, scene, frame_idx, scan_points_indices: np.ndarray, file_type="txt"):
indices_path = os.path.join(root,scene, "scan_points_indices", f"{frame_idx}.{file_type}")
if not os.path.exists(os.path.join(root,scene, "scan_points_indices")):
os.makedirs(os.path.join(root,scene, "scan_points_indices"))
save_np_pts(indices_path, scan_points_indices, file_type)
def save_scan_points(root, scene, scan_points: np.ndarray):
scan_points_path = os.path.join(root,scene, "scan_points.txt")
save_np_pts(scan_points_path, scan_points)
def get_world_points(depth, mask, cam_intrinsic, cam_extrinsic):
z = depth[mask]
i, j = np.nonzero(mask)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
points_camera_aug = np.concatenate((points_camera, np.ones((points_camera.shape[0], 1))), axis=-1)
points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return points_camera_world
def get_scan_points_indices(scan_points, mask, display_table_mask_label, cam_intrinsic, cam_extrinsic):
scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1))))
points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3]
points_image_homogeneous = np.dot(cam_intrinsic, points_camera.T).T
points_image_homogeneous /= points_image_homogeneous[:, 2:]
pixel_x = points_image_homogeneous[:, 0].astype(int)
pixel_y = points_image_homogeneous[:, 1].astype(int)
h, w = mask.shape[:2]
valid_indices = (pixel_x >= 0) & (pixel_x < w) & (pixel_y >= 0) & (pixel_y < h)
mask_colors = mask[pixel_y[valid_indices], pixel_x[valid_indices]]
selected_points_indices = np.where((mask_colors == display_table_mask_label).all(axis=-1))[0]
selected_points_indices = np.where(valid_indices)[0][selected_points_indices]
return selected_points_indices
def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
''' configuration '''
target_mask_label = (0, 255, 0, 255)
display_table_mask_label=(0, 0, 255, 255)
random_downsample_N = 32768
voxel_size=0.002
filter_degree = 75
min_z = 0.2
max_z = 0.5
''' scan points '''
scan_points = np.asarray(ReconstructionUtil.generate_scan_points(display_table_top=0,display_table_radius=0.25))
''' read frame data(depth|mask|normal) '''
frame_num = DataLoadUtil.get_scene_seq_length(root, scene)
for frame_id in range(frame_num):
print(f"[scene({scene_idx}/{scene_total})|frame({frame_id}/{frame_num})]Processing {scene} frame {frame_id}")
path = DataLoadUtil.get_path(root, scene, frame_id)
cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
depth_L, depth_R = DataLoadUtil.load_depth(
path, cam_info["near_plane"],
cam_info["far_plane"],
binocular=True
)
mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
''' target points '''
mask_img_L = mask_L
mask_img_R = mask_R
target_mask_img_L = (mask_L == target_mask_label).all(axis=-1)
target_mask_img_R = (mask_R == target_mask_label).all(axis=-1)
target_points_L = get_world_points(depth_L, target_mask_img_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"])
target_points_R = get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
sampled_target_points_L = PtsUtil.random_downsample_point_cloud(
target_points_L, random_downsample_N
)
sampled_target_points_R = PtsUtil.random_downsample_point_cloud(
target_points_R, random_downsample_N
)
has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0
if has_points:
target_points = PtsUtil.get_overlapping_points(
sampled_target_points_L, sampled_target_points_R, voxel_size
)
if has_points:
has_points = target_points.shape[0] > 0
if has_points:
points_normals = DataLoadUtil.load_points_normals(root, scene, display_table_as_world_space_origin=True)
target_points = PtsUtil.filter_points(
target_points, points_normals, cam_info["cam_to_world"],voxel_size=0.002, theta = filter_degree, z_range=(min_z, max_z)
)
''' scan points indices '''
scan_points_indices_L = get_scan_points_indices(scan_points, mask_img_L, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world"])
scan_points_indices_R = get_scan_points_indices(scan_points, mask_img_R, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
scan_points_indices = np.intersect1d(scan_points_indices_L, scan_points_indices_R)
if not has_points:
target_points = np.zeros((0, 3))
save_target_points(root, scene, frame_id, target_points, file_type=file_type)
save_scan_points_indices(root, scene, frame_id, scan_points_indices, file_type=file_type)
save_scan_points(root, scene, scan_points) # The "done" flag of scene preprocess
if __name__ == "__main__":
#root = "/media/hofee/repository/new_data_with_normal"
root = r"/media/hofee/data/tempdir/test_real_output"
# list_path = r"/media/hofee/repository/full_list.txt"
# scene_list = []
# with open(list_path, "r") as f:
# for line in f:
# scene_list.append(line.strip())
scene_list = os.listdir(root)
from_idx = 0 # 1000
to_idx = 1 # 1500
cnt = 0
import time
total = to_idx - from_idx
for scene in scene_list[from_idx:to_idx]:
start = time.time()
save_scene_data(root, scene, cnt, total, file_type="npy")
cnt+=1
end = time.time()
print(f"Time cost: {end-start}")

View File

@ -169,3 +169,25 @@ class PtsUtil:
) )
return reg_icp.transformation return reg_icp.transformation
@staticmethod
def get_pts_from_depth(depth, cam_intrinsic, cam_extrinsic):
h, w = depth.shape
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
z = depth
x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
mask = mask.reshape(-1, 4)
points_camera = np.concatenate(
[points_camera, np.ones((points_camera.shape[0], 1))], axis=-1
)
points_world = np.dot(cam_extrinsic, points_camera.T).T[:, :3]
data = {
"points_world": points_world,
"points_camera": points_camera,
}
return data

View File

@ -152,9 +152,15 @@ class ReconstructionUtil:
@staticmethod @staticmethod
def check_scan_points_overlap(history_indices, indices2, threshold=5): def check_scan_points_overlap(history_indices, indices2, threshold=5):
for indices1 in history_indices: try:
if len(set(indices1).intersection(set(indices2))) >= threshold: if len(indices2) == 0:
return True return False
for indices1 in history_indices:
if len(set(indices1).intersection(set(indices2))) >= threshold:
return True
except Exception as e:
print(e)
import ipdb; ipdb.set_trace()
return False return False

45
utils/render_util.py Normal file
View File

@ -0,0 +1,45 @@
import os
import json
import subprocess
import tempfile
import shutil
from utils.data_load import DataLoadUtil
from utils.pts_util import PtsUtil
class RenderUtil:
@staticmethod
def render_pts(cam_pose, object_name, script_path, model_points_normals, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path)
with tempfile.TemporaryDirectory() as temp_dir:
params = {
"cam_pose": nO_to_world_pose.tolist(),
"object_name": scene_path
}
params_data_path = os.path.join(temp_dir, "params.json")
with open(params_data_path, 'w') as f:
json.dump(params, f)
result = subprocess.run([
'blender', '-b', '-P', script_path, '--', temp_dir
], capture_output=True, text=True)
if result.returncode != 0:
print("Blender script failed:")
print(result.stderr)
return None
path = os.path.join(temp_dir, "tmp")
point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True)
cam_params = DataLoadUtil.load_cam_info(path, binocular=True)
filtered_point_cloud = PtsUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree)
full_scene_point_cloud = None
if require_full_scene:
depth_L, depth_R = DataLoadUtil.load_depth(path, cam_params['near_plane'], cam_params['far_plane'], binocular=True)
point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_params['cam_intrinsic'], cam_params['cam_to_world'])['points_world']
point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_params['cam_intrinsic'], cam_params['cam_to_world_R'])['points_world']
point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
full_scene_point_cloud = PtsUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
return filtered_point_cloud, full_scene_point_cloud

View File

@ -1,162 +0,0 @@
import numpy as np
from utils.pose_util import PoseUtil
import trimesh
from collections import defaultdict
from scipy.spatial.transform import Rotation as R
import random
class ViewSampleUtil:
@staticmethod
def farthest_point_sampling(points, num_samples):
num_points = points.shape[0]
if num_samples >= num_points:
return points, np.arange(num_points)
sampled_indices = np.zeros(num_samples, dtype=int)
sampled_indices[0] = np.random.randint(num_points)
min_distances = np.full(num_points, np.inf)
for i in range(1, num_samples):
current_point = points[sampled_indices[i - 1]]
dist_to_current_point = np.linalg.norm(points - current_point, axis=1)
min_distances = np.minimum(min_distances, dist_to_current_point)
sampled_indices[i] = np.argmax(min_distances)
downsampled_points = points[sampled_indices]
return downsampled_points, sampled_indices
@staticmethod
def voxel_downsample(points, voxel_size):
voxel_grid = defaultdict(list)
for i, point in enumerate(points):
voxel_index = tuple((point // voxel_size).astype(int))
voxel_grid[voxel_index].append(i)
downsampled_points = []
downsampled_indices = []
for indices in voxel_grid.values():
selected_index = indices[0]
downsampled_points.append(points[selected_index])
downsampled_indices.append(selected_index)
return np.array(downsampled_points), downsampled_indices
@staticmethod
def sample_view_data(mesh: trimesh.Trimesh, distance_range: tuple = (0.25, 0.5), voxel_size: float = 0.005, max_views: int = 1, pertube_repeat: int = 1) -> dict:
view_data = {
"look_at_points": [],
"cam_positions": [],
}
vertices = mesh.vertices
look_at_points = []
cam_positions = []
normals = []
vertex_normals = mesh.vertex_normals
for i, vertex in enumerate(vertices):
look_at_point = vertex
view_data["look_at_points"].append(look_at_point)
normal = vertex_normals[i]
if np.isnan(normal).any():
continue
if np.dot(normal, look_at_point) < 0:
normal = -normal
normals.append(normal)
for _ in range(pertube_repeat):
perturb_angle = np.radians(np.random.uniform(0, 30))
perturb_axis = np.random.normal(size=3)
perturb_axis /= np.linalg.norm(perturb_axis)
rotation_matrix = R.from_rotvec(perturb_angle * perturb_axis).as_matrix()
perturbed_normal = np.dot(rotation_matrix, normal)
distance = np.random.uniform(*distance_range)
cam_position = look_at_point + distance * perturbed_normal
look_at_points.append(look_at_point)
cam_positions.append(cam_position)
look_at_points = np.array(look_at_points)
cam_positions = np.array(cam_positions)
voxel_downsampled_look_at_points, selected_indices = ViewSampleUtil.voxel_downsample(look_at_points, voxel_size)
voxel_downsampled_cam_positions = cam_positions[selected_indices]
voxel_downsampled_normals = np.array(normals)[selected_indices]
fps_downsampled_look_at_points, selected_indices = ViewSampleUtil.farthest_point_sampling(voxel_downsampled_look_at_points, max_views * 2)
fps_downsampled_cam_positions = voxel_downsampled_cam_positions[selected_indices]
view_data["look_at_points"] = fps_downsampled_look_at_points.tolist()
view_data["cam_positions"] = fps_downsampled_cam_positions.tolist()
view_data["normals"] = voxel_downsampled_normals.tolist()
view_data["voxel_down_sampled_points"] = voxel_downsampled_look_at_points
return view_data
@staticmethod
def get_world_points_and_normals(view_data: dict, obj_world_pose: np.ndarray) -> tuple:
world_points = []
world_normals = []
for voxel_down_sampled_points, normal in zip(view_data["voxel_down_sampled_points"], view_data["normals"]):
voxel_down_sampled_points_world = obj_world_pose @ np.append(voxel_down_sampled_points, 1.0)
normal_world = obj_world_pose[:3, :3] @ normal
world_points.append(voxel_down_sampled_points_world[:3])
world_normals.append(normal_world)
return np.array(world_points), np.array(world_normals)
@staticmethod
def get_cam_pose(view_data: dict, obj_world_pose: np.ndarray, max_views: int, min_cam_table_included_degree: int, random_view_ratio: float) -> np.ndarray:
cam_poses = []
min_height_z = 1000
for look_at_point, cam_position in zip(view_data["look_at_points"], view_data["cam_positions"]):
look_at_point_world = obj_world_pose @ np.append(look_at_point, 1.0)
cam_position_world = obj_world_pose @ np.append(cam_position, 1.0)
if look_at_point_world[2] < min_height_z:
min_height_z = look_at_point_world[2]
look_at_point_world = look_at_point_world[:3]
cam_position_world = cam_position_world[:3]
forward_vector = cam_position_world - look_at_point_world
forward_vector /= np.linalg.norm(forward_vector)
up_vector = np.array([0, 0, 1])
right_vector = np.cross(up_vector, forward_vector)
right_vector /= np.linalg.norm(right_vector)
corrected_up_vector = np.cross(forward_vector, right_vector)
rotation_matrix = np.array([right_vector, corrected_up_vector, forward_vector]).T
cam_pose = np.eye(4)
cam_pose[:3, :3] = rotation_matrix
cam_pose[:3, 3] = cam_position_world
cam_poses.append(cam_pose)
filtered_cam_poses = []
for cam_pose in cam_poses:
if cam_pose[2, 3] > min_height_z:
direction_vector = cam_pose[:3, 2]
horizontal_normal = np.array([0, 0, 1])
cos_angle = np.dot(direction_vector, horizontal_normal) / (np.linalg.norm(direction_vector) * np.linalg.norm(horizontal_normal))
angle = np.arccos(np.clip(cos_angle, -1.0, 1.0))
angle_degree = np.degrees(angle)
if angle_degree < 90 - min_cam_table_included_degree:
filtered_cam_poses.append(cam_pose)
if random.random() < random_view_ratio:
pertube_pose = PoseUtil.get_uniform_pose([0.1, 0.1, 0.1], [3, 3, 3], 0, 180, "cm")
filtered_cam_poses.append(pertube_pose @ cam_pose)
if len(filtered_cam_poses) > max_views:
indices = np.random.choice(len(filtered_cam_poses), max_views, replace=False)
filtered_cam_poses = [filtered_cam_poses[i] for i in indices]
return np.array(filtered_cam_poses)
@staticmethod
def sample_view_data_world_space(mesh: trimesh.Trimesh, cad_to_world: np.ndarray, distance_range:tuple = (0.25,0.5), voxel_size:float = 0.005, max_views: int=1, min_cam_table_included_degree:int=20, random_view_ratio:float = 0.2) -> dict:
view_data = ViewSampleUtil.sample_view_data(mesh, distance_range, voxel_size, max_views)
view_data["cam_to_world_poses"] = ViewSampleUtil.get_cam_pose(view_data, cad_to_world, max_views, min_cam_table_included_degree, random_view_ratio)
view_data["voxel_down_sampled_points"], view_data["normals"] = ViewSampleUtil.get_world_points_and_normals(view_data, cad_to_world)
return view_data