import os import pickle import pybullet as p import time import pybullet_data import numpy as np import matplotlib.pyplot as plt from PIL import Image import open3d as o3d import cv2 import math import json class GenerateScene: def __init__(self, dataset_path, scene_path, output_path, camera_params) -> None: self._init_variables() self._load_object_dataset(dataset_path) self._load_scene(scene_path) self._set_output_path(output_path) self._set_camera_params(camera_params) def _init_variables(self): self.object_paths = {} self.scene = {} self.object_model_scale = [0.001, 0.001, 0.001] self.output_path = None self.camera_params = None self.segmentation_labels = {} def _extract_model_name(self, path): # Specify it according to specific file directory NUM_SLASH_BEFORE_NAME = 1 num_slash = 0 object_name_str = [] for i in range(len(path)): index = len(path) -1 - i char = path[index] if(num_slash == NUM_SLASH_BEFORE_NAME): object_name_str.append(char) if(char == "/"): num_slash += 1 object_name_str.reverse() object_name_str = ''.join(object_name_str[1:]) return object_name_str def _load_object_dataset(self, dataset_path): if(dataset_path[-1] == "/"): dataset_path = dataset_path[:-1] for root, dirs, files in os.walk(dataset_path, topdown=False): for name in dirs: path = os.path.join(root, name) if(os.path.join(root, name)[-4:] == "Scan"): name = self._extract_model_name(path) self.object_paths[name] = path+"/Simp.obj" def _load_scene(self, scene_path): if(scene_path[-1] == "/"): scene_path = scene_path[:-1] scene_path = scene_path + "/scene.pickle" self.scene = pickle.load(open(scene_path, 'rb')) def _set_output_path(self, output_path): self.output_path = output_path if(self.output_path[-1] == "/"): self.output_path = self.output_path[:-1] def _set_camera_params(self, camera_params): self.camera_params = camera_params def load_camera_pose_from_frame(self, camera_params_path): with open(camera_params_path, "r") as f: camera_params = json.load(f) view_transform = camera_params["cameraViewTransform"] print(view_transform) view_transform = np.resize(view_transform, (4,4)) view_transform = np.linalg.inv(view_transform).T offset = np.mat([[1,0,0,0],[0,-1,0,0],[0,0,-1,0],[0,0,0,1]]) view_transform = view_transform.dot(offset) print(view_transform) return view_transform def _load_obj_to_pybullet(self, obj_file_path, position, orientation, scale): visual_ind = p.createVisualShape( shapeType=p.GEOM_MESH, fileName=obj_file_path, rgbaColor=[1, 1, 1, 1], specularColor=[0.4, 0.4, 0], visualFramePosition=[0, 0, 0], meshScale=scale) p.createMultiBody( baseMass=1, baseVisualShapeIndex=visual_ind, basePosition=position, baseOrientation=orientation, useMaximalCoordinates=True) def _render_image(self, camera_pose): width = self.camera_params["width"] height = self.camera_params["height"] fov = self.camera_params["fov"] aspect = width / height near = self.camera_params["near"] far = self.camera_params["far"] T = np.mat([[1,0,0,0], [0,1,0,0], [0,0,1,1], [0,0,0,1]]) look_at_T = camera_pose.dot(T) view_matrix = p.computeViewMatrix([camera_pose[0,3], camera_pose[1,3], camera_pose[2,3]], [look_at_T[0,3], look_at_T[1,3], look_at_T[2,3]], [-camera_pose[0,1], -camera_pose[1,1], -camera_pose[2,1]]) projection_matrix = p.computeProjectionMatrixFOV(fov, aspect, near, far) # Get depth values using the OpenGL renderer images = p.getCameraImage(width, height, view_matrix, projection_matrix, renderer=p.ER_BULLET_HARDWARE_OPENGL) rgb = images[2] depth = images[3] seg = images[4] rgb_image = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB) cv2.imwrite(self.output_path+'/rgb.jpg',rgb_image) depth_image = far * near / (far - (far - near) * depth) depth_image = np.asanyarray(depth_image).astype(np.float32) * 1000.0 depth_image_array = depth_image depth_image = (depth_image.astype(np.uint16)) depth_image = Image.fromarray(depth_image) depth_image.save(self.output_path+'/depth.png') cv2.imwrite(self.output_path+'/seg.jpg', seg) id = 0 for object_name in self.scene.keys(): self.segmentation_labels[str(id+1)] = object_name id += 1 with open(self.output_path+"/seg_labels.json", 'w') as seg_labels: json.dump(self.segmentation_labels, seg_labels) with open(self.output_path+"/cam_intrinsics.json", 'w') as cam_intrinsics: json.dump(self.camera_params, cam_intrinsics) def generate_images(self, camera_pose): physicsClient = p.connect(p.GUI) p.setAdditionalSearchPath(pybullet_data.getDataPath()) p.setGravity(0,0,0) p.loadURDF("plane100.urdf") for obj_name in self.scene.keys(): orientation = self.scene[obj_name]["rotation"] position = self.scene[obj_name]["position"] self._load_obj_to_pybullet(obj_file_path=self.object_paths[obj_name], position=position, orientation=orientation, scale=self.object_model_scale) self._render_image(camera_pose) p.stepSimulation() p.disconnect() def visualize_pcd(self): color_image = o3d.io.read_image(self.output_path+'/rgb.jpg') depth_image = o3d.io.read_image(self.output_path+'/depth.png') rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(color_image, depth_image,convert_rgb_to_intensity=False) intrinsic = o3d.camera.PinholeCameraIntrinsic( o3d.camera.PinholeCameraIntrinsicParameters.Kinect2DepthCameraDefault ) intrinsic.set_intrinsics(width=self.camera_params["width"], height=self.camera_params["height"], fx=self.camera_params["fx"], fy=self.camera_params["fy"], cx=self.camera_params["cx"], cy=self.camera_params["cy"]) point_cloud = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, intrinsic) o3d.visualization.draw_geometries([point_cloud]) def print_scene(self): print("================= Scene Objects: =================") print(self.scene.keys()) print("==================================================") DATASET_PATH = "/home/hzx/Downloads/OmniObject3d-simplified/output" SCENE_PATH = "/home/hzx/Projects/ActivePerception/data_generation/output/scene_0/" OUTPUT_PATH = "/home/hzx/Projects/ActivePerception/data_generation/output/" FRAME_PATH = "/home/hzx/Projects/ActivePerception/data_generation/output/scene_0/camera_params_0119.json" ISAAC_SIM_CAM_H_APERTURE = 20.955 # Isaac Sim里读取的 ISAAC_SIM_CAM_V_APERTURE = 15.2908 # Isaac Sim里读取的 ISAAC_SIM_FOCAL_LENGTH = 39 # 试出来的,其实Isaac Sim里原本是24 ISAAC_SIM_CAM_D_APERTURE = math.sqrt(ISAAC_SIM_CAM_H_APERTURE**2 + ISAAC_SIM_CAM_V_APERTURE**2) CAM_WIDTH = 640 CAM_HEIGHT = 480 CAM_FOV = 2*math.atan(ISAAC_SIM_CAM_D_APERTURE/(2*ISAAC_SIM_FOCAL_LENGTH))/math.pi*180 CAM_NEAR = 0.001 # 成像最近距离 CAM_FAR = 10 # 成像最远距离 CAM_CX = CAM_WIDTH/2 CAM_CY = CAM_HEIGHT/2 CAM_FX = 1/math.tan(CAM_FOV*math.pi/180.0/2.0)*CAM_WIDTH/2 CAM_FY = 1/(CAM_HEIGHT/CAM_WIDTH*math.tan(CAM_FOV*math.pi/180.0/2.0))*CAM_HEIGHT/2 CAMERA_PARAMS = { "width": CAM_WIDTH, "height": CAM_HEIGHT, "fov": CAM_FOV, "near": CAM_NEAR, "far": CAM_FAR, "cx": CAM_CX, "cy": CAM_CY, "fx": CAM_FX, "fy": CAM_FY, } if __name__ == "__main__": gs = GenerateScene(DATASET_PATH, SCENE_PATH, OUTPUT_PATH, CAMERA_PARAMS) gs.print_scene() cam_pose = gs.load_camera_pose_from_frame(FRAME_PATH) gs.generate_images(cam_pose) # 在OUTPUT_PATH路径下生成rgb、depth、segmentation图 # gs.visualize_pcd()