import numpy as np from scipy.spatial.transform import Rotation as R from dataclasses import dataclass @dataclass class CameraIntrinsics: width: int height: int fx: float fy: float cx: float cy: float @property def intrinsic_matrix(self): return np.array([[self.fx, 0, self.cx], [0, self.fy, self.cy], [0, 0, 1]]) @dataclass class CameraExtrinsics: def __init__(self, rotation: np.ndarray, translation: np.ndarray, rot_type: str): """ rotation: 3x3 rotation matrix or 1x3 euler angles or 1x4 quaternion translation: 1x3 or 3x1 translation vector rot_type: "mat", "euler_xyz", "quat_xyzw" """ assert rot_type in ["mat", "euler_xyz", "quat_xyzw"] if rot_type == "mat": self._rot = R.from_matrix(rotation) elif rot_type == "euler_xyz": self._rot = R.from_euler('xyz', rotation, degrees=True) elif rot_type == "quat_xyzw": self._rot = R.from_quat(rotation) self._translation = translation @property def extrinsic_matrix(self): return np.vstack([np.hstack([self._rot.as_matrix(), self._translation.reshape(3, 1)]), [0, 0, 0, 1]]) @property def rotation_euler_xyz(self): return self._rot.as_euler('xyz', degrees=True) @property def rotation_quat_xyzw(self): return self._rot.as_quat() @property def rotation_matrix(self): return self._rot.as_matrix() @property def translation(self): return self._translation @dataclass class CameraData: def __init__(self, depth_image: np.ndarray, image_id: int, intrinsics: CameraIntrinsics, extrinsics: CameraExtrinsics): self._depth_image = depth_image self._image_id = image_id self._intrinsics = intrinsics self._extrinsics = extrinsics @property def depth_image(self): return self._depth_image @property def image_id(self): return self._image_id @property def intrinsics(self): return self._intrinsics.intrinsic_matrix @property def extrinsics(self): return self._extrinsics.extrinsic_matrix @property def projection_matrix(self): return self.intrinsics @ self.extrinsics[:3, :4] @property def pts_camera(self): height, width = self.depth_image.shape v, u = np.indices((height, width)) points = np.vstack([u.flatten(), v.flatten(), np.ones_like(u.flatten())]) # 3xN points = np.linalg.inv(self.intrinsics) @ points # 3xN points = points.T # Nx3 points = points * self.depth_image.flatten()[:, np.newaxis] # Nx3 points = points[points[:, 2] > 0] # Nx3 return points @property def pts_world(self): homogeneous_pts = np.hstack([self.pts_camera, np.ones((self.pts_camera.shape[0], 1))]) # Nx4 pts_world = self.extrinsics @ homogeneous_pts.T # 4xN return pts_world[:3, :].T class ViewUtil: def get_pts(view_data): image_id = view_data["image_id"] depth_intrinsics = view_data["depth_intrinsics"] depth_extrinsics = view_data["depth_extrinsics"] depth_image = np.array(view_data["depth_image"], dtype=np.uint16) if image_id is None: return None else: camera_intrinsics = CameraIntrinsics( depth_intrinsics['width'], depth_intrinsics['height'], depth_intrinsics['fx'], depth_intrinsics['fy'], depth_intrinsics['cx'], depth_intrinsics['cy'] ) camera_extrinsics = CameraExtrinsics( depth_extrinsics[:3, :3], depth_extrinsics[:3, 3], rot_type="mat" ) camera_data = CameraData(depth_image, image_id, camera_intrinsics, camera_extrinsics) pts = camera_data.pts_world return pts/1000