add normal and visualize util

This commit is contained in:
hofee 2024-10-17 06:13:18 -05:00
parent 8d92676c34
commit 0267aed6e5
6 changed files with 191 additions and 88 deletions

3
.gitignore vendored
View File

@ -11,4 +11,5 @@ test/
*.log
/data_generation/data/*
/data_generation/output/*
test/
test/
temp*

View File

@ -8,16 +8,16 @@ runner:
root_dir: experiments
generate:
port: 5004
from: 590
to: 2000 # -1 means all
object_dir: /media/hofee/data/data/scaled_object_meshes
table_model_path: /media/hofee/data/data/others/table.obj
output_dir: /media/hofee/repository/full_data_output
from: 0
to: 2 # -1 means all
object_dir: H:\\AI\\Datasets\\scaled_object_box_meshes
table_model_path: "H:\\AI\\Datasets\\table.obj"
output_dir: C:\\Document\\Local Project\\nbv_rec\\nbv_reconstruction\\temp
binocular_vision: true
plane_size: 10
max_views: 512
min_views: 128
random_view_ratio: 0.1
random_view_ratio: 0.02
min_cam_table_included_degree: 20
max_diag: 0.7
min_diag: 0.01

View File

@ -46,6 +46,26 @@ def get_world_points(depth, mask, cam_intrinsic, cam_extrinsic, random_downsampl
return points_camera_world
def get_world_points_and_normal(depth, mask, normal, cam_intrinsic, cam_extrinsic, random_downsample_N):
z = depth[mask]
i, j = np.nonzero(mask)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
normal_camera = normal[mask].reshape(-1, 3)
sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N, require_idx=True
)
if len(sampled_target_points) == 0:
return np.zeros((0, 3)), np.zeros((0, 3))
sampled_normal_camera = normal_camera[idx]
points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return points_camera_world, sampled_normal_camera
def get_scan_points_indices(scan_points, mask, display_table_mask_label, cam_intrinsic, cam_extrinsic):
scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1))))
points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3]
@ -90,7 +110,7 @@ def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
binocular=True
)
mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True)
''' target points '''
mask_img_L = mask_L
mask_img_R = mask_R
@ -99,23 +119,23 @@ def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
target_mask_img_R = (mask_R == target_mask_label).all(axis=-1)
sampled_target_points_L = get_world_points(depth_L, target_mask_img_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"], random_downsample_N)
sampled_target_points_L, sampled_target_normal_L = get_world_points_and_normal(depth_L,target_mask_img_L,normal_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"], random_downsample_N)
sampled_target_points_R = get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"], random_downsample_N)
has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0
if has_points:
target_points = PtsUtil.get_overlapping_points(
sampled_target_points_L, sampled_target_points_R, voxel_size
target_points, overlap_idx = PtsUtil.get_overlapping_points(
sampled_target_points_L, sampled_target_points_R, voxel_size, require_idx=True
)
sampled_target_normal_L = sampled_target_normal_L[overlap_idx]
if has_points:
has_points = target_points.shape[0] > 0
if has_points:
points_normals = DataLoadUtil.load_points_normals(root, scene, display_table_as_world_space_origin=True)
target_points = PtsUtil.filter_points(
target_points, points_normals, cam_info["cam_to_world"],voxel_size=0.002, theta = filter_degree, z_range=(min_z, max_z)
target_points, sampled_target_normal_L, cam_info["cam_to_world"], theta_limit = filter_degree, z_range=(min_z, max_z)
)
@ -135,7 +155,7 @@ def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
if __name__ == "__main__":
#root = "/media/hofee/repository/new_data_with_normal"
root = r"/media/hofee/data/data/box_output"
root = r"C:\\Document\\Local Project\\nbv_rec\\nbv_reconstruction\\temp"
# list_path = r"/media/hofee/repository/full_list.txt"
# scene_list = []

View File

@ -44,11 +44,6 @@ class DataLoadUtil:
path = os.path.join(label_dir, f"{seq_idx}.json")
return path
@staticmethod
def get_label_path_old(root, scene_name):
path = os.path.join(root, scene_name, "label.json")
return path
@staticmethod
def get_scene_seq_length(root, scene_name):
camera_params_path = os.path.join(root, scene_name, "camera_params")
@ -69,36 +64,6 @@ class DataLoadUtil:
diagonal_length = np.linalg.norm(bbox)
return diagonal_length
@staticmethod
def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose):
mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose)
model_path = os.path.join(output_dir, scene_name, "world_mesh.obj")
mesh.export(model_path)
@staticmethod
def save_target_mesh_at_world_space(
root, model_dir, scene_name, display_table_as_world_space_origin=True
):
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
target_name = scene_info["target_name"]
transformation = scene_info[target_name]
if display_table_as_world_space_origin:
location = transformation["location"] - DataLoadUtil.get_display_table_top(
root, scene_name
)
else:
location = transformation["location"]
rotation_euler = transformation["rotation_euler"]
pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
pose_mat[:3, 3] = location
mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat)
mesh_dir = os.path.join(root, scene_name, "mesh")
if not os.path.exists(mesh_dir):
os.makedirs(mesh_dir)
model_path = os.path.join(mesh_dir, "world_target_mesh.obj")
mesh.export(model_path)
@staticmethod
def load_scene_info(root, scene_name):
scene_info_path = os.path.join(root, scene_name, "scene_info.json")
@ -113,17 +78,6 @@ class DataLoadUtil:
target_pts_num_dict = json.load(f)
return target_pts_num_dict
@staticmethod
def load_target_object_pose(root, scene_name):
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
target_name = scene_info["target_name"]
transformation = scene_info[target_name]
location = transformation["location"]
rotation_euler = transformation["rotation_euler"]
pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
pose_mat[:3, 3] = location
return pose_mat
@staticmethod
def load_depth(path, min_depth=0.01, max_depth=5.0, binocular=False):
@ -200,10 +154,13 @@ class DataLoadUtil:
os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
)
normal_image_L = cv2.imread(normal_path_L, cv2.IMREAD_COLOR)
normal_image_L = cv2.cvtColor(normal_image_L, cv2.COLOR_BGR2RGB)
normal_path_R = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + "_R.png"
)
normal_image_R = cv2.imread(normal_path_R, cv2.IMREAD_COLOR)
normal_image_R = cv2.cvtColor(normal_image_R, cv2.COLOR_BGR2RGB)
normalized_normal_image_L = normal_image_L / 255.0 * 2.0 - 1.0
normalized_normal_image_R = normal_image_R / 255.0 * 2.0 - 1.0
return normalized_normal_image_L, normalized_normal_image_R
@ -217,6 +174,7 @@ class DataLoadUtil:
os.path.dirname(path), "normal", os.path.basename(path) + ".png"
)
normal_image = cv2.imread(normal_path, cv2.IMREAD_COLOR)
normal_image = cv2.cvtColor(normal_image, cv2.COLOR_BGR2RGB)
normalized_normal_image = normal_image / 255.0 * 2.0 - 1.0
return normalized_normal_image
@ -227,19 +185,14 @@ class DataLoadUtil:
return label_data
@staticmethod
def load_rgb(path):
rgb_path = os.path.join(
os.path.dirname(path), "rgb", os.path.basename(path) + ".png"
)
rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR)
return rgb_image
@staticmethod
def load_from_preprocessed_pts(path):
def load_from_preprocessed_pts(path, file_type="npy"):
npy_path = os.path.join(
os.path.dirname(path), "pts", os.path.basename(path) + ".npy"
os.path.dirname(path), "pts", os.path.basename(path) + "." + file_type
)
pts = np.load(npy_path)
if file_type == "txt":
pts = np.loadtxt(npy_path)
else:
pts = np.load(npy_path)
return pts
@staticmethod

View File

@ -78,24 +78,21 @@ class PtsUtil:
return overlapping_points
@staticmethod
def filter_points(points, points_normals, cam_pose, voxel_size=0.002, theta=45, z_range=(0.2, 0.45)):
def filter_points(points, normals, cam_pose, theta_limit=45, z_range=(0.2, 0.45)):
""" filter with normal """
normals_normalized = normals / np.linalg.norm(normals, axis=1, keepdims=True)
cos_theta = np.dot(normals_normalized, np.array([0, 0, 1]))
theta = np.arccos(cos_theta) * 180 / np.pi
idx = theta < theta_limit
filtered_sampled_points = points[idx]
""" filter with z range """
points_cam = PtsUtil.transform_point_cloud(points, np.linalg.inv(cam_pose))
points_cam = PtsUtil.transform_point_cloud(filtered_sampled_points, np.linalg.inv(cam_pose))
idx = (points_cam[:, 2] > z_range[0]) & (points_cam[:, 2] < z_range[1])
z_filtered_points = points[idx]
z_filtered_points = filtered_sampled_points[idx]
""" filter with normal """
sampled_points = PtsUtil.voxel_downsample_point_cloud(z_filtered_points, voxel_size)
kdtree = cKDTree(points_normals[:,:3])
_, indices = kdtree.query(sampled_points)
nearest_points = points_normals[indices]
normals = nearest_points[:, 3:]
camera_axis = -cam_pose[:3, 2]
normals_normalized = normals / np.linalg.norm(normals, axis=1, keepdims=True)
cos_theta = np.dot(normals_normalized, camera_axis)
theta_rad = np.deg2rad(theta)
idx = cos_theta > np.cos(theta_rad)
filtered_sampled_points= sampled_points[idx]
return filtered_sampled_points[:, :3]
return z_filtered_points[:, :3]

132
utils/vis.py Normal file
View File

@ -0,0 +1,132 @@
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
import trimesh
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.data_load import DataLoadUtil
from utils.pts import PtsUtil
class visualizeUtil:
@staticmethod
def save_all_cam_pos_and_cam_axis(root, scene, output_dir):
length = DataLoadUtil.get_scene_seq_length(root, scene)
all_cam_pos = []
all_cam_axis = []
for i in range(length):
path = DataLoadUtil.get_path(root, scene, i)
cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
cam_pose = cam_info["cam_to_world"]
cam_pos = cam_pose[:3, 3]
cam_axis = cam_pose[:3, 2]
num_samples = 10
sample_points = [cam_pos + 0.02*t * cam_axis for t in range(num_samples)]
sample_points = np.array(sample_points)
all_cam_pos.append(cam_pos)
all_cam_axis.append(sample_points)
all_cam_pos = np.array(all_cam_pos)
all_cam_axis = np.array(all_cam_axis).reshape(-1, 3)
np.savetxt(os.path.join(output_dir, "all_cam_pos.txt"), all_cam_pos)
np.savetxt(os.path.join(output_dir, "all_cam_axis.txt"), all_cam_axis)
@staticmethod
def save_all_combined_pts(root, scene, output_dir):
length = DataLoadUtil.get_scene_seq_length(root, scene)
all_combined_pts = []
for i in range(length):
path = DataLoadUtil.get_path(root, scene, i)
pts = DataLoadUtil.load_from_preprocessed_pts(path,"txt")
if pts.shape[0] == 0:
continue
all_combined_pts.append(pts)
all_combined_pts = np.vstack(all_combined_pts)
downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.001)
np.savetxt(os.path.join(output_dir, "all_combined_pts.txt"), downsampled_all_pts)
@staticmethod
def save_target_mesh_at_world_space(
root, model_dir, scene_name, display_table_as_world_space_origin=True
):
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
target_name = scene_info["target_name"]
transformation = scene_info[target_name]
if display_table_as_world_space_origin:
location = transformation["location"] - DataLoadUtil.get_display_table_top(
root, scene_name
)
else:
location = transformation["location"]
rotation_euler = transformation["rotation_euler"]
pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
pose_mat[:3, 3] = location
mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat)
mesh_dir = os.path.join(root, scene_name, "mesh")
if not os.path.exists(mesh_dir):
os.makedirs(mesh_dir)
model_path = os.path.join(mesh_dir, "world_target_mesh.obj")
mesh.export(model_path)
@staticmethod
def save_points_and_normals(root, scene, frame_idx, output_dir):
target_mask_label = (0, 255, 0, 255)
path = DataLoadUtil.get_path(root, scene, frame_idx)
cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
depth_L,_ = DataLoadUtil.load_depth(
path, cam_info["near_plane"],
cam_info["far_plane"],
binocular=True,
)
mask_L = DataLoadUtil.load_seg(path, binocular=True, left_only=True)
normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True)
''' target points '''
target_mask_img_L = (mask_L == target_mask_label).all(axis=-1)
cam_intrinsic = cam_info["cam_intrinsic"]
z = depth_L[target_mask_img_L]
i, j = np.nonzero(target_mask_img_L)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
random_downsample_N = 1000
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
normal_camera = normal_L[target_mask_img_L].reshape(-1, 3)
sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N, require_idx=True
)
if len(sampled_target_points) == 0:
print("No target points")
offset = np.asarray([[1, 0, 0], [0, -1, 0], [0, 0, -1]])
sampled_normal_camera = normal_camera[idx]
sampled_normal_camera = np.dot(sampled_normal_camera, offset)
sampled_visualized_normal = []
num_samples = 10
for i in range(len(sampled_target_points)):
sampled_visualized_normal.append([sampled_target_points[i] + 0.02*t * sampled_normal_camera[i] for t in range(num_samples)])
sampled_visualized_normal = np.array(sampled_visualized_normal).reshape(-1, 3)
np.savetxt(os.path.join(output_dir, "target_pts.txt"), sampled_target_points)
np.savetxt(os.path.join(output_dir, "target_normal.txt"), sampled_visualized_normal)
# ------ Debug ------
if __name__ == "__main__":
root = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\temp"
model_dir = r"H:\\AI\\Datasets\\scaled_object_box_meshes"
scene = "omniobject3d-box_030"
output_dir = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\test"
# visualizeUtil.save_all_cam_pos_and_cam_axis(root, scene, output_dir)
# visualizeUtil.save_all_combined_pts(root, scene, output_dir)
# visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene)
visualizeUtil.save_points_and_normals(root, scene, 0, output_dir)