import numpy as np import bmesh from collections import defaultdict class ViewSampleUtil: @staticmethod def voxel_downsample(points, voxel_size): voxel_grid = defaultdict(list) for i, point in enumerate(points): voxel_index = tuple((point // voxel_size).astype(int)) voxel_grid[voxel_index].append(i) downsampled_points = [] downsampled_indices = [] for indices in voxel_grid.values(): selected_index = indices[0] downsampled_points.append(points[selected_index]) downsampled_indices.append(selected_index) return np.array(downsampled_points), downsampled_indices @staticmethod def sample_view_data(obj, distance_range:tuple = (0.3,0.5), voxel_size:float = 0.005, max_views: int = 1) -> dict: view_data = { "look_at_points": [], "cam_positions": [], } mesh = obj.data bm = bmesh.new() bm.from_mesh(mesh) bm.verts.ensure_lookup_table() bm.faces.ensure_lookup_table() bm.normal_update() look_at_points = [] cam_positions = [] normals = [] for v in bm.verts: look_at_point = np.array(v.co) view_data["look_at_points"].append(look_at_point) normal = np.zeros(3) for loop in v.link_loops: normal += np.array(loop.calc_normal()) normal /= len(v.link_loops) normal = normal / np.linalg.norm(normal) if np.isnan(normal).any(): continue if np.dot(normal, look_at_point) < 0: normal = -normal distance = np.random.uniform(*distance_range) cam_position = look_at_point + distance * normal look_at_points.append(look_at_point) cam_positions.append(cam_position) normals.append(normal) bm.free() look_at_points = np.array(look_at_points) cam_positions = np.array(cam_positions) voxel_downsampled_look_at_points, selected_indices = ViewSampleUtil.voxel_downsample(look_at_points, voxel_size) voxel_downsampled_cam_positions = cam_positions[selected_indices] voxel_downsampled_normals = np.array(normals)[selected_indices] if len(voxel_downsampled_look_at_points) > max_views*2: indices = np.random.choice(len(voxel_downsampled_look_at_points), max_views*2, replace=False) downsampled_look_at_points = voxel_downsampled_look_at_points[indices] downsampled_cam_positions = voxel_downsampled_cam_positions[indices] view_data["look_at_points"] = downsampled_look_at_points.tolist() view_data["cam_positions"] = downsampled_cam_positions.tolist() view_data["normals"] = voxel_downsampled_normals view_data["voxel_down_sampled_points"] = voxel_downsampled_look_at_points return view_data @staticmethod def get_world_points_and_normals(view_data: dict, obj_world_pose: np.ndarray) -> tuple: world_points = [] world_normals = [] for voxel_down_sampled_points, normal in zip(view_data["voxel_down_sampled_points"], view_data["normals"]): voxel_down_sampled_points_world = obj_world_pose @ np.append(voxel_down_sampled_points, 1.0) normal_world = obj_world_pose[:3, :3] @ normal world_points.append(voxel_down_sampled_points_world[:3]) world_normals.append(normal_world) return np.array(world_points), np.array(world_normals) @staticmethod def get_cam_pose(view_data: dict, obj_world_pose: np.ndarray, max_views: int) -> np.ndarray: cam_poses = [] min_height_z = 1000 for look_at_point, cam_position in zip(view_data["look_at_points"], view_data["cam_positions"]): look_at_point_world = obj_world_pose @ np.append(look_at_point, 1.0) cam_position_world = obj_world_pose @ np.append(cam_position, 1.0) if look_at_point_world[2] < min_height_z: min_height_z = look_at_point_world[2] look_at_point_world = look_at_point_world[:3] cam_position_world = cam_position_world[:3] forward_vector = cam_position_world - look_at_point_world forward_vector /= np.linalg.norm(forward_vector) up_vector = np.array([0, 0, 1]) right_vector = np.cross(up_vector, forward_vector) right_vector /= np.linalg.norm(right_vector) corrected_up_vector = np.cross(forward_vector, right_vector) rotation_matrix = np.array([right_vector, corrected_up_vector, forward_vector]).T cam_pose = np.eye(4) cam_pose[:3, :3] = rotation_matrix cam_pose[:3, 3] = cam_position_world cam_poses.append(cam_pose) filtered_cam_poses = [] for cam_pose in cam_poses: if cam_pose[2, 3] > min_height_z: filtered_cam_poses.append(cam_pose) if len(filtered_cam_poses) > max_views: indices = np.random.choice(len(filtered_cam_poses), max_views, replace=False) filtered_cam_poses = [filtered_cam_poses[i] for i in indices] return np.array(filtered_cam_poses) @staticmethod def sample_view_data_world_space(obj, distance_range:tuple = (0.3,0.5), voxel_size:float = 0.005, max_views: int=1) -> dict: obj_world_pose = np.asarray(obj.matrix_world) view_data = ViewSampleUtil.sample_view_data(obj, distance_range, voxel_size, max_views) view_data["cam_poses"] = ViewSampleUtil.get_cam_pose(view_data, obj_world_pose, max_views) view_data["voxel_down_sampled_points"], view_data["normals"] = ViewSampleUtil.get_world_points_and_normals(view_data, obj_world_pose) return view_data