This commit is contained in:
hofee 2024-10-08 21:28:30 +08:00
parent d9d2716ba7
commit 3ab046b134
11 changed files with 773 additions and 354 deletions

2
.gitignore vendored
View File

@ -3,7 +3,7 @@
__pycache__/
*.py[cod]
*$py.class
test_output/
# C extensions
*.so

View File

@ -10,7 +10,8 @@ runner:
root_dir: "experiments"
generate:
model_dir: "/home/yan20/nbv_rec/data/test_CAD/test_model"
model_dir: "/home/user/nbv_rec/data/models"
table_model_path: "/home/user/nbv_rec/data/table.obj"
model_start_idx: 0
voxel_size: 0.005
max_view: 512
@ -19,6 +20,19 @@ runner:
min_diag: 0.01
random_view_ratio: 0
min_cam_table_included_degree: 20
obj_name: "bear"
light_and_camera_config:
Camera:
near_plane: 0.01
far_plane: 5
fov_vertical: 25
resolution: [1280,800]
eye_distance: 0.15
eye_angle: 25
Light:
location: [0,0,3.5]
orientation: [0,0,0]
power: 150
reconstruct:
soft_overlap_threshold: 0.3

View File

@ -1,5 +1,7 @@
import os
import trimesh
import tempfile
import subprocess
import numpy as np
from PytorchBoot.runners.runner import Runner
from PytorchBoot.config import ConfigManager
@ -7,16 +9,12 @@ import PytorchBoot.stereotype as stereotype
from PytorchBoot.utils.log_util import Log
from PytorchBoot.status import status_manager
import sys
sys.path.append(r"C:\Document\Local Project\nbv_rec_control")
#from utils.control_util import ControlUtil
from utils.control_util import ControlUtil
from utils.communicate_util import CommunicateUtil
from utils.pts_util import PtsUtil
from utils.view_sample_util import ViewSampleUtil
from utils.reconstruction_util import ReconstructionUtil
from utils.preprocess_util import save_scene_data
from utils.data_load import DataLoadUtil
@stereotype.runner("CAD_strategy_runner")
class CADStrategyRunner(Runner):
@ -50,79 +48,111 @@ class CADStrategyRunner(Runner):
def load_experiment(self, backup_name=None):
super().load_experiment(backup_name)
def get_pts_from_view_data(self, view_data):
depth = view_data["depth_image"]
depth_intrinsics = view_data["depth_intrinsics"]
depth_extrinsics = view_data["depth_extrinsics"]
cam_pts = PtsUtil.get_pts_from_depth(depth, depth_intrinsics, depth_extrinsics)
return cam_pts
def split_scan_pts_and_obj_pts(self, world_pts, scan_pts_z, z_threshold = 0.003):
scan_pts = world_pts[scan_pts_z < z_threshold]
obj_pts = world_pts[scan_pts_z >= z_threshold]
return scan_pts, obj_pts
def run_one_model(self, model_name):
''' init robot '''
ControlUtil.init()
#ControlUtil.init()
''' load CAD model '''
model_path = os.path.join(self.model_dir, model_name)
model_path = os.path.join(self.model_dir, model_name,"mesh.obj")
cad_model = trimesh.load(model_path)
''' take first view '''
view_data = CommunicateUtil.get_view_data()
first_cam_pts = None
#view_data = CommunicateUtil.get_view_data(init=True)
#first_cam_pts = self.get_pts_from_view_data(view_data)
''' register '''
cad_to_cam = PtsUtil.register_icp(first_cam_pts, cad_model)
cam_to_world = ControlUtil.get_pose()
cad_to_world = cam_to_world @ cad_to_cam
cad_model:trimesh.Trimesh = cad_model.apply_transform(cad_to_world)
''' sample view '''
min_corner = cad_model.bounds[0]
max_corner = cad_model.bounds[1]
diag = np.linalg.norm(max_corner - min_corner)
view_num = int(self.min_view + (diag - self.min_diag)/(self.max_diag - self.min_diag) * (self.max_view - self.min_view))
sampled_view_data = ViewSampleUtil.sample_view_data_world_space(
cad_model, cad_to_world,
voxel_size = self.voxel_size,
max_views = view_num,
min_cam_table_included_degree= self.min_cam_table_included_degree,
random_view_ratio = self.random_view_ratio
)
cam_to_world_poses = sampled_view_data["cam_to_world_poses"]
world_model_points = sampled_view_data["voxel_down_sampled_points"]
#cad_to_cam = PtsUtil.register(first_cam_pts, cad_model)
#cam_to_world = ControlUtil.get_pose()
cad_to_world = np.eye(4) #cam_to_world @ cad_to_cam
world_to_blender_world = np.eye(4)
world_to_blender_world[:3, 3] = np.asarray([0, 0, 0.9215])
cad_to_blender_world = np.dot(world_to_blender_world, cad_to_world)
cad_model:trimesh.Trimesh = cad_model.apply_transform(cad_to_blender_world)
with tempfile.TemporaryDirectory() as temp_dir:
name = "cad_model_world"
cad_model.export(os.path.join(temp_dir, f"{name}.obj"))
temp_dir = "/home/user/nbv_rec/nbv_rec_control/test_output"
scene_dir = os.path.join(temp_dir, name)
script_path = "/home/user/nbv_rec/blender_app/data_generator.py"
''' sample view '''
# import ipdb; ipdb.set_trace()
# print("start running renderer")
# result = subprocess.run([
# 'blender', '-b', '-P', script_path, '--', temp_dir
# ], capture_output=True, text=True)
# print("finish running renderer")
#
world_model_points = np.loadtxt(os.path.join(scene_dir, "points_and_normals.txt"))[:,:3]
''' preprocess '''
# save_scene_data(temp_dir, name)
pts_dir = os.path.join(temp_dir,name,"pts")
sample_view_pts_list = []
scan_points_idx_list = []
frame_num = len(os.listdir(pts_dir))
for frame_idx in range(frame_num):
pts_path = os.path.join(temp_dir,name, "pts", f"{frame_idx}.txt")
idx_path = os.path.join(temp_dir,name, "scan_points_indices", f"{frame_idx}.txt")
point_cloud = np.loadtxt(pts_path)
sampled_point_cloud = PtsUtil.voxel_downsample_point_cloud(point_cloud, self.voxel_size)
indices = np.loadtxt(idx_path, dtype=np.int32)
try:
len(indices)
except:
indices = np.array([indices])
sample_view_pts_list.append(sampled_point_cloud)
scan_points_idx_list.append(indices)
''' take sample view '''
scan_points_idx_list = []
sample_view_pts_list = []
for cam_to_world in cam_to_world_poses:
ControlUtil.move_to(cam_to_world)
''' get world pts '''
view_data = CommunicateUtil.get_view_data()
cam_pts = None
scan_points_idx = None
world_pts = PtsUtil.transform_point_cloud(cam_pts, cam_to_world)
sample_view_pts_list.append(world_pts)
scan_points_idx_list.append(scan_points_idx)
''' generate strategy '''
''' generate strategy '''
limited_useful_view, _, _ = ReconstructionUtil.compute_next_best_view_sequence_with_overlap(
world_model_points, sample_view_pts_list,
scan_points_indices_list = scan_points_idx_list,
init_view=0,
threshold=self.voxel_size,
soft_overlap_threshold = self.soft_overlap_threshold,
hard_overlap_threshold = self.hard_overlap_threshold,
scan_points_threshold = self.scan_points_threshold,
status_info=self.status_info
)
''' extract cam_to_world sequence '''
cam_to_world_seq = []
coveraget_rate_seq = []
for idx, coverage_rate in limited_useful_view:
cam_to_world_seq.append(cam_to_world_poses[idx])
coveraget_rate_seq.append(coverage_rate)
limited_useful_view, _, _ = ReconstructionUtil.compute_next_best_view_sequence_with_overlap(
world_model_points, sample_view_pts_list,
scan_points_indices_list = scan_points_idx_list,
init_view=0,
threshold=self.voxel_size,
soft_overlap_threshold = self.soft_overlap_threshold,
hard_overlap_threshold = self.hard_overlap_threshold,
scan_points_threshold = self.scan_points_threshold,
status_info=self.status_info
)
''' extract cam_to_world sequence '''
cam_to_world_seq = []
coveraget_rate_seq = []
''' take best seq view '''
for cam_to_world in cam_to_world_seq:
ControlUtil.move_to(cam_to_world)
''' get world pts '''
view_data = CommunicateUtil.get_view_data()
cam_pts = None
scan_points_idx = None
world_pts = PtsUtil.transform_point_cloud(cam_pts, cam_to_world)
sample_view_pts_list.append(world_pts)
scan_points_idx_list.append(scan_points_idx)
from ipdb import set_trace; set_trace()
for idx, coverage_rate in limited_useful_view:
path = DataLoadUtil.get_path(temp_dir, name, idx)
cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
cam_to_world_seq.append(cam_info["cam_to_world"])
coveraget_rate_seq.append(coverage_rate)
# ''' take best seq view '''
# for cam_to_world in cam_to_world_seq:
# ControlUtil.move_to(cam_to_world)
# ''' get world pts '''
# view_data = CommunicateUtil.get_view_data()
# cam_pts = self.get_pts_from_view_data(view_data)
# scan_points_idx = None
# world_pts = PtsUtil.transform_point_cloud(cam_pts, cam_to_world)
# sample_view_pts_list.append(world_pts)
# scan_points_idx_list.append(scan_points_idx)
def run(self):
@ -159,32 +189,5 @@ if __name__ == "__main__":
# cad_pts_L = PtsUtil.transform_point_cloud(noisy_pts_L, cad_to_cam_L)
# np.savetxt(r"test.txt", cad_pts_L)
# np.savetxt(r"src.txt", noisy_pts_L)
''' test view sample '''
sampled_view_data = ViewSampleUtil.sample_view_data_world_space(
model, np.eye(4),
voxel_size = 0.005,
max_views = 20,
min_cam_table_included_degree= 20,
random_view_ratio = 0
)
cam_poses = sampled_view_data["cam_to_world_poses"]
cam_poses = np.array(cam_poses)
print(cam_poses.shape)
def sample_camera_direction(cam_pose, num_samples, sample_distance):
cam_position = cam_pose[:3, 3]
cam_direction = cam_pose[:3, 2]
sampled_points = np.array([cam_position - (i + 1) * sample_distance * cam_direction for i in range(num_samples)])
return sampled_points
all_sampled_points = []
for i in range(cam_poses.shape[0]):
samped_points = sample_camera_direction(cam_poses[i], 10, 0.02)
all_sampled_points.append(samped_points)
all_sampled_points = np.concatenate(all_sampled_points, axis=0)
np.savetxt(r"cam_poses.txt", cam_poses[:, :3, 3])
np.savetxt(r"cam_directions.txt", all_sampled_points)

View File

@ -1,87 +0,0 @@
import os
import trimesh
import numpy as np
from PytorchBoot.runners.runner import Runner
from PytorchBoot.config import ConfigManager
import PytorchBoot.stereotype as stereotype
from PytorchBoot.utils.log_util import Log
import PytorchBoot.namespace as namespace
from PytorchBoot.status import status_manager
from utils.control_util import ControlUtil
from utils.communicate_util import CommunicateUtil
from utils.pts_util import PtsUtil
from utils.view_sample_util import ViewSampleUtil
from utils.reconstruction_util import ReconstructionUtil
@stereotype.runner("inferencer")
class Inferencer(Runner):
def __init__(self, config_path: str):
super().__init__(config_path)
self.load_experiment("inferencer")
self.reconstruct_config = ConfigManager.get("runner", "reconstruct")
self.voxel_size = self.reconstruct_config["voxel_size"]
self.max_iter = self.reconstruct_config["max_iter"]
def create_experiment(self, backup_name=None):
super().create_experiment(backup_name)
def load_experiment(self, backup_name=None):
super().load_experiment(backup_name)
def run_inference(self, model_name):
''' init robot '''
ControlUtil.init()
''' take first view '''
view_data = CommunicateUtil.get_view_data()
first_cam_pts = None
first_cam_pose = None
combined_pts = first_cam_pts
input_data = {
"scanned_target_points_num": [first_cam_pts.shape[0]],
"scanned_n_to_world_pose_9d": [first_cam_pose],
"combined_scanned_pts": combined_pts
}
''' enter loop '''
iter = 0
while True:
''' inference '''
inference_result = CommunicateUtil.get_inference_data(input_data)
cam_to_world = inference_result["cam_to_world"]
''' set pose '''
ControlUtil.set_pose(cam_to_world)
''' take view '''
view_data = CommunicateUtil.get_view_data()
curr_cam_pts = None
curr_cam_pose = None
''' update combined pts '''
combined_pts = np.concatenate([combined_pts, curr_cam_pts], axis=0)
combined_pts = PtsUtil.voxel_downsample_point_cloud(combined_pts, voxel_size=self.voxel_size)
''' update input data '''
input_data["combined_scanned_pts"] = combined_pts
input_data["scanned_target_points_num"].append(curr_cam_pts.shape[0])
input_data["scanned_n_to_world_pose_9d"].append(curr_cam_pose)
''' check stop condition '''
if iter >= self.max_iter:
break
def run(self):
self.run_inference()
if __name__ == "__main__":
model_path = "/home/yan20/nbv_rec/data/test_CAD/test_model/bear_scaled.ply"
model = trimesh.load(model_path)
test_pts_L = np.loadtxt("/home/yan20/nbv_rec/data/test_CAD/cam_pts_0_L.txt")
test_pts_R = np.loadtxt("/home/yan20/nbv_rec/data/test_CAD/cam_pts_0_R.txt")
cam_to_world_L = PtsUtil.register_icp(test_pts_L, model)
cam_to_world_R = PtsUtil.register_icp(test_pts_R, model)
print(cam_to_world_L)
print("================================")
print(cam_to_world_R)

View File

@ -1,10 +1,20 @@
import requests
import numpy as np
import cv2
class CommunicateUtil:
VIEW_HOST = "127.0.0.1:5000"
INFERENCE_HOST = "127.0.0.1:5000"
def get_view_data() -> dict:
data = {}
def get_view_data(init = False) -> dict:
params = {}
params["create_scanner"] = init
response = requests.get(f"http://{CommunicateUtil.VIEW_HOST}/api/data", json=params)
data = response.json()
if not data["success"]:
print(f"Failed to get view data")
return None
return data
def get_inference_data(view_data:dict) -> dict:

410
utils/data_load.py Normal file
View File

@ -0,0 +1,410 @@
import os
import numpy as np
import json
import cv2
import trimesh
import torch
from utils.pts_util import PtsUtil
class DataLoadUtil:
TABLE_POSITION = np.asarray([0, 0, 0.8215])
@staticmethod
def get_display_table_info(root, scene_name):
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
display_table_info = scene_info["display_table"]
return display_table_info
@staticmethod
def get_display_table_top(root, scene_name):
display_table_height = DataLoadUtil.get_display_table_info(root, scene_name)[
"height"
]
display_table_top = DataLoadUtil.TABLE_POSITION + np.asarray(
[0, 0, display_table_height]
)
return display_table_top
@staticmethod
def get_path(root, scene_name, frame_idx):
path = os.path.join(root, scene_name, f"{frame_idx}")
return path
@staticmethod
def get_label_num(root, scene_name):
label_dir = os.path.join(root, scene_name, "label")
return len(os.listdir(label_dir))
@staticmethod
def get_label_path(root, scene_name, seq_idx):
label_dir = os.path.join(root, scene_name, "label")
if not os.path.exists(label_dir):
os.makedirs(label_dir)
path = os.path.join(label_dir, f"{seq_idx}.json")
return path
@staticmethod
def get_label_path_old(root, scene_name):
path = os.path.join(root, scene_name, "label.json")
return path
@staticmethod
def get_scene_seq_length(root, scene_name):
camera_params_path = os.path.join(root, scene_name, "camera_params")
return len(os.listdir(camera_params_path))
@staticmethod
def load_mesh_at(model_dir, object_name, world_object_pose):
model_path = os.path.join(model_dir, object_name, "mesh.obj")
mesh = trimesh.load(model_path)
mesh.apply_transform(world_object_pose)
return mesh
@staticmethod
def get_bbox_diag(model_dir, object_name):
model_path = os.path.join(model_dir, object_name, "mesh.obj")
mesh = trimesh.load(model_path)
bbox = mesh.bounding_box.extents
diagonal_length = np.linalg.norm(bbox)
return diagonal_length
@staticmethod
def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose):
mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose)
model_path = os.path.join(output_dir, scene_name, "world_mesh.obj")
mesh.export(model_path)
@staticmethod
def save_target_mesh_at_world_space(
root, model_dir, scene_name, display_table_as_world_space_origin=True
):
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
target_name = scene_info["target_name"]
transformation = scene_info[target_name]
if display_table_as_world_space_origin:
location = transformation["location"] - DataLoadUtil.get_display_table_top(
root, scene_name
)
else:
location = transformation["location"]
rotation_euler = transformation["rotation_euler"]
pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
pose_mat[:3, 3] = location
mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat)
mesh_dir = os.path.join(root, scene_name, "mesh")
if not os.path.exists(mesh_dir):
os.makedirs(mesh_dir)
model_path = os.path.join(mesh_dir, "world_target_mesh.obj")
mesh.export(model_path)
@staticmethod
def load_scene_info(root, scene_name):
scene_info_path = os.path.join(root, scene_name, "scene_info.json")
with open(scene_info_path, "r") as f:
scene_info = json.load(f)
return scene_info
@staticmethod
def load_target_pts_num_dict(root, scene_name):
target_pts_num_path = os.path.join(root, scene_name, "target_pts_num.json")
with open(target_pts_num_path, "r") as f:
target_pts_num_dict = json.load(f)
return target_pts_num_dict
@staticmethod
def load_target_object_pose(root, scene_name):
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
target_name = scene_info["target_name"]
transformation = scene_info[target_name]
location = transformation["location"]
rotation_euler = transformation["rotation_euler"]
pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
pose_mat[:3, 3] = location
return pose_mat
@staticmethod
def load_depth(path, min_depth=0.01, max_depth=5.0, binocular=False):
def load_depth_from_real_path(real_path, min_depth, max_depth):
depth = cv2.imread(real_path, cv2.IMREAD_UNCHANGED)
depth = depth.astype(np.float32) / 65535.0
min_depth = min_depth
max_depth = max_depth
depth_meters = min_depth + (max_depth - min_depth) * depth
return depth_meters
if binocular:
depth_path_L = os.path.join(
os.path.dirname(path), "depth", os.path.basename(path) + "_L.png"
)
depth_path_R = os.path.join(
os.path.dirname(path), "depth", os.path.basename(path) + "_R.png"
)
depth_meters_L = load_depth_from_real_path(
depth_path_L, min_depth, max_depth
)
depth_meters_R = load_depth_from_real_path(
depth_path_R, min_depth, max_depth
)
return depth_meters_L, depth_meters_R
else:
depth_path = os.path.join(
os.path.dirname(path), "depth", os.path.basename(path) + ".png"
)
depth_meters = load_depth_from_real_path(depth_path, min_depth, max_depth)
return depth_meters
@staticmethod
def load_seg(path, binocular=False, left_only=False):
if binocular and not left_only:
def clean_mask(mask_image):
green = [0, 255, 0, 255]
red = [255, 0, 0, 255]
threshold = 2
mask_image = np.where(
np.abs(mask_image - green) <= threshold, green, mask_image
)
mask_image = np.where(
np.abs(mask_image - red) <= threshold, red, mask_image
)
return mask_image
mask_path_L = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
)
mask_image_L = clean_mask(cv2.imread(mask_path_L, cv2.IMREAD_UNCHANGED))
mask_path_R = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + "_R.png"
)
mask_image_R = clean_mask(cv2.imread(mask_path_R, cv2.IMREAD_UNCHANGED))
return mask_image_L, mask_image_R
else:
if binocular and left_only:
mask_path = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
)
else:
mask_path = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + ".png"
)
mask_image = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)
return mask_image
@staticmethod
def load_normal(path, binocular=False, left_only=False):
if binocular and not left_only:
normal_path_L = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
)
normal_image_L = cv2.imread(normal_path_L, cv2.IMREAD_COLOR)
normal_path_R = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + "_R.png"
)
normal_image_R = cv2.imread(normal_path_R, cv2.IMREAD_COLOR)
normalized_normal_image_L = normal_image_L / 255.0 * 2.0 - 1.0
normalized_normal_image_R = normal_image_R / 255.0 * 2.0 - 1.0
return normalized_normal_image_L, normalized_normal_image_R
else:
if binocular and left_only:
normal_path = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
)
else:
normal_path = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + ".png"
)
normal_image = cv2.imread(normal_path, cv2.IMREAD_COLOR)
normalized_normal_image = normal_image / 255.0 * 2.0 - 1.0
return normalized_normal_image
@staticmethod
def load_label(path):
with open(path, "r") as f:
label_data = json.load(f)
return label_data
@staticmethod
def load_rgb(path):
rgb_path = os.path.join(
os.path.dirname(path), "rgb", os.path.basename(path) + ".png"
)
rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR)
return rgb_image
@staticmethod
def load_from_preprocessed_pts(path):
npy_path = os.path.join(
os.path.dirname(path), "pts", os.path.basename(path) + ".npy"
)
pts = np.load(npy_path)
return pts
@staticmethod
def cam_pose_transformation(cam_pose_before):
offset = np.asarray([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
cam_pose_after = cam_pose_before @ offset
return cam_pose_after
@staticmethod
def load_cam_info(path, binocular=False, display_table_as_world_space_origin=True):
scene_dir = os.path.dirname(path)
root_dir = os.path.dirname(scene_dir)
scene_name = os.path.basename(scene_dir)
camera_params_path = os.path.join(
os.path.dirname(path), "camera_params", os.path.basename(path) + ".json"
)
with open(camera_params_path, "r") as f:
label_data = json.load(f)
cam_to_world = np.asarray(label_data["extrinsic"])
cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
world_to_display_table = np.eye(4)
world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
root_dir, scene_name
)
if display_table_as_world_space_origin:
cam_to_world = np.dot(world_to_display_table, cam_to_world)
cam_intrinsic = np.asarray(label_data["intrinsic"])
cam_info = {
"cam_to_world": cam_to_world,
"cam_intrinsic": cam_intrinsic,
"far_plane": label_data["far_plane"],
"near_plane": label_data["near_plane"],
}
if binocular:
cam_to_world_R = np.asarray(label_data["extrinsic_R"])
cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R)
cam_to_world_O = np.asarray(label_data["extrinsic_cam_object"])
cam_to_world_O = DataLoadUtil.cam_pose_transformation(cam_to_world_O)
if display_table_as_world_space_origin:
cam_to_world_O = np.dot(world_to_display_table, cam_to_world_O)
cam_to_world_R = np.dot(world_to_display_table, cam_to_world_R)
cam_info["cam_to_world_O"] = cam_to_world_O
cam_info["cam_to_world_R"] = cam_to_world_R
return cam_info
@staticmethod
def get_real_cam_O_from_cam_L(
cam_L, cam_O_to_cam_L, scene_path, display_table_as_world_space_origin=True
):
root_dir = os.path.dirname(scene_path)
scene_name = os.path.basename(scene_path)
if isinstance(cam_L, torch.Tensor):
cam_L = cam_L.cpu().numpy()
nO_to_display_table_pose = cam_L @ cam_O_to_cam_L
if display_table_as_world_space_origin:
display_table_to_world = np.eye(4)
display_table_to_world[:3, 3] = DataLoadUtil.get_display_table_top(
root_dir, scene_name
)
nO_to_world_pose = np.dot(display_table_to_world, nO_to_display_table_pose)
nO_to_world_pose = DataLoadUtil.cam_pose_transformation(nO_to_world_pose)
return nO_to_world_pose
@staticmethod
def get_target_point_cloud(
depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0, 255, 0, 255), require_full_points=False
):
h, w = depth.shape
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
z = depth
x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
mask = mask.reshape(-1, 4)
target_mask = (mask == target_mask_label).all(axis=-1)
target_points_camera = points_camera[target_mask]
target_points_camera_aug = np.concatenate(
[target_points_camera, np.ones((target_points_camera.shape[0], 1))], axis=-1
)
target_points_world = np.dot(cam_extrinsic, target_points_camera_aug.T).T[:, :3]
data = {
"points_world": target_points_world,
"points_camera": target_points_camera,
}
return data
@staticmethod
def get_point_cloud(depth, cam_intrinsic, cam_extrinsic):
h, w = depth.shape
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
z = depth
x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
points_camera_aug = np.concatenate(
[points_camera, np.ones((points_camera.shape[0], 1))], axis=-1
)
points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return {"points_world": points_world, "points_camera": points_camera}
@staticmethod
def get_target_point_cloud_world_from_path(
path,
binocular=False,
random_downsample_N=65536,
voxel_size=0.005,
target_mask_label=(0, 255, 0, 255),
display_table_mask_label=(0, 0, 255, 255),
get_display_table_pts=False,
require_normal=False,
):
cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular)
if binocular:
depth_L, depth_R = DataLoadUtil.load_depth(
path, cam_info["near_plane"], cam_info["far_plane"], binocular=True
)
mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
point_cloud_L = DataLoadUtil.get_target_point_cloud(
depth_L,
cam_info["cam_intrinsic"],
cam_info["cam_to_world"],
mask_L,
target_mask_label,
)["points_world"]
point_cloud_R = DataLoadUtil.get_target_point_cloud(
depth_R,
cam_info["cam_intrinsic"],
cam_info["cam_to_world_R"],
mask_R,
target_mask_label,
)["points_world"]
point_cloud_L = PtsUtil.random_downsample_point_cloud(
point_cloud_L, random_downsample_N
)
point_cloud_R = PtsUtil.random_downsample_point_cloud(
point_cloud_R, random_downsample_N
)
overlap_points = PtsUtil.get_overlapping_points(
point_cloud_L, point_cloud_R, voxel_size
)
return overlap_points
else:
depth = DataLoadUtil.load_depth(
path, cam_info["near_plane"], cam_info["far_plane"]
)
mask = DataLoadUtil.load_seg(path)
point_cloud = DataLoadUtil.get_target_point_cloud(
depth, cam_info["cam_intrinsic"], cam_info["cam_to_world"], mask
)["points_world"]
return point_cloud
@staticmethod
def load_points_normals(root, scene_name, display_table_as_world_space_origin=True):
points_path = os.path.join(root, scene_name, "points_and_normals.txt")
points_normals = np.loadtxt(points_path)
if display_table_as_world_space_origin:
points_normals[:, :3] = points_normals[
:, :3
] - DataLoadUtil.get_display_table_top(root, scene_name)
return points_normals

158
utils/preprocess_util.py Normal file
View File

@ -0,0 +1,158 @@
import os
import numpy as np
import time
import sys
np.random.seed(0)
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.reconstruction_util import ReconstructionUtil
from utils.data_load import DataLoadUtil
from utils.pts_util import PtsUtil
def save_np_pts(path, pts: np.ndarray, file_type="txt"):
if file_type == "txt":
np.savetxt(path, pts)
else:
np.save(path, pts)
def save_target_points(root, scene, frame_idx, target_points: np.ndarray, file_type="txt"):
pts_path = os.path.join(root,scene, "pts", f"{frame_idx}.{file_type}")
if not os.path.exists(os.path.join(root,scene, "pts")):
os.makedirs(os.path.join(root,scene, "pts"))
save_np_pts(pts_path, target_points, file_type)
def save_scan_points_indices(root, scene, frame_idx, scan_points_indices: np.ndarray, file_type="txt"):
indices_path = os.path.join(root,scene, "scan_points_indices", f"{frame_idx}.{file_type}")
if not os.path.exists(os.path.join(root,scene, "scan_points_indices")):
os.makedirs(os.path.join(root,scene, "scan_points_indices"))
save_np_pts(indices_path, scan_points_indices, file_type)
def save_scan_points(root, scene, scan_points: np.ndarray):
scan_points_path = os.path.join(root,scene, "scan_points.txt")
save_np_pts(scan_points_path, scan_points)
def get_world_points(depth, mask, cam_intrinsic, cam_extrinsic):
z = depth[mask]
i, j = np.nonzero(mask)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
points_camera_aug = np.concatenate((points_camera, np.ones((points_camera.shape[0], 1))), axis=-1)
points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return points_camera_world
def get_scan_points_indices(scan_points, mask, display_table_mask_label, cam_intrinsic, cam_extrinsic):
scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1))))
points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3]
points_image_homogeneous = np.dot(cam_intrinsic, points_camera.T).T
points_image_homogeneous /= points_image_homogeneous[:, 2:]
pixel_x = points_image_homogeneous[:, 0].astype(int)
pixel_y = points_image_homogeneous[:, 1].astype(int)
h, w = mask.shape[:2]
valid_indices = (pixel_x >= 0) & (pixel_x < w) & (pixel_y >= 0) & (pixel_y < h)
mask_colors = mask[pixel_y[valid_indices], pixel_x[valid_indices]]
selected_points_indices = np.where((mask_colors == display_table_mask_label).all(axis=-1))[0]
selected_points_indices = np.where(valid_indices)[0][selected_points_indices]
return selected_points_indices
def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
''' configuration '''
target_mask_label = (0, 255, 0, 255)
display_table_mask_label=(0, 0, 255, 255)
random_downsample_N = 32768
voxel_size=0.002
filter_degree = 75
min_z = 0.2
max_z = 0.5
''' scan points '''
scan_points = np.asarray(ReconstructionUtil.generate_scan_points(display_table_top=0,display_table_radius=0.25))
''' read frame data(depth|mask|normal) '''
frame_num = DataLoadUtil.get_scene_seq_length(root, scene)
for frame_id in range(frame_num):
print(f"[scene({scene_idx}/{scene_total})|frame({frame_id}/{frame_num})]Processing {scene} frame {frame_id}")
path = DataLoadUtil.get_path(root, scene, frame_id)
cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
depth_L, depth_R = DataLoadUtil.load_depth(
path, cam_info["near_plane"],
cam_info["far_plane"],
binocular=True
)
mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
''' target points '''
mask_img_L = mask_L
mask_img_R = mask_R
target_mask_img_L = (mask_L == target_mask_label).all(axis=-1)
target_mask_img_R = (mask_R == target_mask_label).all(axis=-1)
target_points_L = get_world_points(depth_L, target_mask_img_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"])
target_points_R = get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
sampled_target_points_L = PtsUtil.random_downsample_point_cloud(
target_points_L, random_downsample_N
)
sampled_target_points_R = PtsUtil.random_downsample_point_cloud(
target_points_R, random_downsample_N
)
has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0
if has_points:
target_points = PtsUtil.get_overlapping_points(
sampled_target_points_L, sampled_target_points_R, voxel_size
)
if has_points:
has_points = target_points.shape[0] > 0
if has_points:
points_normals = DataLoadUtil.load_points_normals(root, scene, display_table_as_world_space_origin=True)
target_points = PtsUtil.filter_points(
target_points, points_normals, cam_info["cam_to_world"],voxel_size=0.002, theta = filter_degree, z_range=(min_z, max_z)
)
''' scan points indices '''
scan_points_indices_L = get_scan_points_indices(scan_points, mask_img_L, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world"])
scan_points_indices_R = get_scan_points_indices(scan_points, mask_img_R, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
scan_points_indices = np.intersect1d(scan_points_indices_L, scan_points_indices_R)
if not has_points:
target_points = np.zeros((0, 3))
save_target_points(root, scene, frame_id, target_points, file_type=file_type)
save_scan_points_indices(root, scene, frame_id, scan_points_indices, file_type=file_type)
save_scan_points(root, scene, scan_points) # The "done" flag of scene preprocess
if __name__ == "__main__":
#root = "/media/hofee/repository/new_data_with_normal"
root = r"/media/hofee/data/tempdir/test_real_output"
# list_path = r"/media/hofee/repository/full_list.txt"
# scene_list = []
# with open(list_path, "r") as f:
# for line in f:
# scene_list.append(line.strip())
scene_list = os.listdir(root)
from_idx = 0 # 1000
to_idx = 1 # 1500
cnt = 0
import time
total = to_idx - from_idx
for scene in scene_list[from_idx:to_idx]:
start = time.time()
save_scene_data(root, scene, cnt, total, file_type="npy")
cnt+=1
end = time.time()
print(f"Time cost: {end-start}")

View File

@ -169,3 +169,25 @@ class PtsUtil:
)
return reg_icp.transformation
@staticmethod
def get_pts_from_depth(depth, cam_intrinsic, cam_extrinsic):
h, w = depth.shape
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
z = depth
x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
mask = mask.reshape(-1, 4)
points_camera = np.concatenate(
[points_camera, np.ones((points_camera.shape[0], 1))], axis=-1
)
points_world = np.dot(cam_extrinsic, points_camera.T).T[:, :3]
data = {
"points_world": points_world,
"points_camera": points_camera,
}
return data

View File

@ -152,9 +152,15 @@ class ReconstructionUtil:
@staticmethod
def check_scan_points_overlap(history_indices, indices2, threshold=5):
for indices1 in history_indices:
if len(set(indices1).intersection(set(indices2))) >= threshold:
return True
try:
if len(indices2) == 0:
return False
for indices1 in history_indices:
if len(set(indices1).intersection(set(indices2))) >= threshold:
return True
except Exception as e:
print(e)
import ipdb; ipdb.set_trace()
return False

45
utils/render_util.py Normal file
View File

@ -0,0 +1,45 @@
import os
import json
import subprocess
import tempfile
import shutil
from utils.data_load import DataLoadUtil
from utils.pts_util import PtsUtil
class RenderUtil:
@staticmethod
def render_pts(cam_pose, object_name, script_path, model_points_normals, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path)
with tempfile.TemporaryDirectory() as temp_dir:
params = {
"cam_pose": nO_to_world_pose.tolist(),
"object_name": scene_path
}
params_data_path = os.path.join(temp_dir, "params.json")
with open(params_data_path, 'w') as f:
json.dump(params, f)
result = subprocess.run([
'blender', '-b', '-P', script_path, '--', temp_dir
], capture_output=True, text=True)
if result.returncode != 0:
print("Blender script failed:")
print(result.stderr)
return None
path = os.path.join(temp_dir, "tmp")
point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True)
cam_params = DataLoadUtil.load_cam_info(path, binocular=True)
filtered_point_cloud = PtsUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree)
full_scene_point_cloud = None
if require_full_scene:
depth_L, depth_R = DataLoadUtil.load_depth(path, cam_params['near_plane'], cam_params['far_plane'], binocular=True)
point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_params['cam_intrinsic'], cam_params['cam_to_world'])['points_world']
point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_params['cam_intrinsic'], cam_params['cam_to_world_R'])['points_world']
point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
full_scene_point_cloud = PtsUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
return filtered_point_cloud, full_scene_point_cloud

View File

@ -1,162 +0,0 @@
import numpy as np
from utils.pose_util import PoseUtil
import trimesh
from collections import defaultdict
from scipy.spatial.transform import Rotation as R
import random
class ViewSampleUtil:
@staticmethod
def farthest_point_sampling(points, num_samples):
num_points = points.shape[0]
if num_samples >= num_points:
return points, np.arange(num_points)
sampled_indices = np.zeros(num_samples, dtype=int)
sampled_indices[0] = np.random.randint(num_points)
min_distances = np.full(num_points, np.inf)
for i in range(1, num_samples):
current_point = points[sampled_indices[i - 1]]
dist_to_current_point = np.linalg.norm(points - current_point, axis=1)
min_distances = np.minimum(min_distances, dist_to_current_point)
sampled_indices[i] = np.argmax(min_distances)
downsampled_points = points[sampled_indices]
return downsampled_points, sampled_indices
@staticmethod
def voxel_downsample(points, voxel_size):
voxel_grid = defaultdict(list)
for i, point in enumerate(points):
voxel_index = tuple((point // voxel_size).astype(int))
voxel_grid[voxel_index].append(i)
downsampled_points = []
downsampled_indices = []
for indices in voxel_grid.values():
selected_index = indices[0]
downsampled_points.append(points[selected_index])
downsampled_indices.append(selected_index)
return np.array(downsampled_points), downsampled_indices
@staticmethod
def sample_view_data(mesh: trimesh.Trimesh, distance_range: tuple = (0.25, 0.5), voxel_size: float = 0.005, max_views: int = 1, pertube_repeat: int = 1) -> dict:
view_data = {
"look_at_points": [],
"cam_positions": [],
}
vertices = mesh.vertices
look_at_points = []
cam_positions = []
normals = []
vertex_normals = mesh.vertex_normals
for i, vertex in enumerate(vertices):
look_at_point = vertex
view_data["look_at_points"].append(look_at_point)
normal = vertex_normals[i]
if np.isnan(normal).any():
continue
if np.dot(normal, look_at_point) < 0:
normal = -normal
normals.append(normal)
for _ in range(pertube_repeat):
perturb_angle = np.radians(np.random.uniform(0, 30))
perturb_axis = np.random.normal(size=3)
perturb_axis /= np.linalg.norm(perturb_axis)
rotation_matrix = R.from_rotvec(perturb_angle * perturb_axis).as_matrix()
perturbed_normal = np.dot(rotation_matrix, normal)
distance = np.random.uniform(*distance_range)
cam_position = look_at_point + distance * perturbed_normal
look_at_points.append(look_at_point)
cam_positions.append(cam_position)
look_at_points = np.array(look_at_points)
cam_positions = np.array(cam_positions)
voxel_downsampled_look_at_points, selected_indices = ViewSampleUtil.voxel_downsample(look_at_points, voxel_size)
voxel_downsampled_cam_positions = cam_positions[selected_indices]
voxel_downsampled_normals = np.array(normals)[selected_indices]
fps_downsampled_look_at_points, selected_indices = ViewSampleUtil.farthest_point_sampling(voxel_downsampled_look_at_points, max_views * 2)
fps_downsampled_cam_positions = voxel_downsampled_cam_positions[selected_indices]
view_data["look_at_points"] = fps_downsampled_look_at_points.tolist()
view_data["cam_positions"] = fps_downsampled_cam_positions.tolist()
view_data["normals"] = voxel_downsampled_normals.tolist()
view_data["voxel_down_sampled_points"] = voxel_downsampled_look_at_points
return view_data
@staticmethod
def get_world_points_and_normals(view_data: dict, obj_world_pose: np.ndarray) -> tuple:
world_points = []
world_normals = []
for voxel_down_sampled_points, normal in zip(view_data["voxel_down_sampled_points"], view_data["normals"]):
voxel_down_sampled_points_world = obj_world_pose @ np.append(voxel_down_sampled_points, 1.0)
normal_world = obj_world_pose[:3, :3] @ normal
world_points.append(voxel_down_sampled_points_world[:3])
world_normals.append(normal_world)
return np.array(world_points), np.array(world_normals)
@staticmethod
def get_cam_pose(view_data: dict, obj_world_pose: np.ndarray, max_views: int, min_cam_table_included_degree: int, random_view_ratio: float) -> np.ndarray:
cam_poses = []
min_height_z = 1000
for look_at_point, cam_position in zip(view_data["look_at_points"], view_data["cam_positions"]):
look_at_point_world = obj_world_pose @ np.append(look_at_point, 1.0)
cam_position_world = obj_world_pose @ np.append(cam_position, 1.0)
if look_at_point_world[2] < min_height_z:
min_height_z = look_at_point_world[2]
look_at_point_world = look_at_point_world[:3]
cam_position_world = cam_position_world[:3]
forward_vector = cam_position_world - look_at_point_world
forward_vector /= np.linalg.norm(forward_vector)
up_vector = np.array([0, 0, 1])
right_vector = np.cross(up_vector, forward_vector)
right_vector /= np.linalg.norm(right_vector)
corrected_up_vector = np.cross(forward_vector, right_vector)
rotation_matrix = np.array([right_vector, corrected_up_vector, forward_vector]).T
cam_pose = np.eye(4)
cam_pose[:3, :3] = rotation_matrix
cam_pose[:3, 3] = cam_position_world
cam_poses.append(cam_pose)
filtered_cam_poses = []
for cam_pose in cam_poses:
if cam_pose[2, 3] > min_height_z:
direction_vector = cam_pose[:3, 2]
horizontal_normal = np.array([0, 0, 1])
cos_angle = np.dot(direction_vector, horizontal_normal) / (np.linalg.norm(direction_vector) * np.linalg.norm(horizontal_normal))
angle = np.arccos(np.clip(cos_angle, -1.0, 1.0))
angle_degree = np.degrees(angle)
if angle_degree < 90 - min_cam_table_included_degree:
filtered_cam_poses.append(cam_pose)
if random.random() < random_view_ratio:
pertube_pose = PoseUtil.get_uniform_pose([0.1, 0.1, 0.1], [3, 3, 3], 0, 180, "cm")
filtered_cam_poses.append(pertube_pose @ cam_pose)
if len(filtered_cam_poses) > max_views:
indices = np.random.choice(len(filtered_cam_poses), max_views, replace=False)
filtered_cam_poses = [filtered_cam_poses[i] for i in indices]
return np.array(filtered_cam_poses)
@staticmethod
def sample_view_data_world_space(mesh: trimesh.Trimesh, cad_to_world: np.ndarray, distance_range:tuple = (0.25,0.5), voxel_size:float = 0.005, max_views: int=1, min_cam_table_included_degree:int=20, random_view_ratio:float = 0.2) -> dict:
view_data = ViewSampleUtil.sample_view_data(mesh, distance_range, voxel_size, max_views)
view_data["cam_to_world_poses"] = ViewSampleUtil.get_cam_pose(view_data, cad_to_world, max_views, min_cam_table_included_degree, random_view_ratio)
view_data["voxel_down_sampled_points"], view_data["normals"] = ViewSampleUtil.get_world_points_and_normals(view_data, cad_to_world)
return view_data