2024-10-09 16:13:22 +00:00

240 lines
8.7 KiB
Python
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import pickle
import pybullet as p
import time
import pybullet_data
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import open3d as o3d
import cv2
import math
import json
class GenerateScene:
def __init__(self, dataset_path, scene_path, output_path, camera_params) -> None:
self._init_variables()
self._load_object_dataset(dataset_path)
self._load_scene(scene_path)
self._set_output_path(output_path)
self._set_camera_params(camera_params)
def _init_variables(self):
self.object_paths = {}
self.scene = {}
self.object_model_scale = [0.001, 0.001, 0.001]
self.output_path = None
self.camera_params = None
self.segmentation_labels = {}
def _extract_model_name(self, path):
# Specify it according to specific file directory
NUM_SLASH_BEFORE_NAME = 1
num_slash = 0
object_name_str = []
for i in range(len(path)):
index = len(path) -1 - i
char = path[index]
if(num_slash == NUM_SLASH_BEFORE_NAME):
object_name_str.append(char)
if(char == "/"):
num_slash += 1
object_name_str.reverse()
object_name_str = ''.join(object_name_str[1:])
return object_name_str
def _load_object_dataset(self, dataset_path):
if(dataset_path[-1] == "/"):
dataset_path = dataset_path[:-1]
for root, dirs, files in os.walk(dataset_path, topdown=False):
for name in dirs:
path = os.path.join(root, name)
if(os.path.join(root, name)[-4:] == "Scan"):
name = self._extract_model_name(path)
self.object_paths[name] = path+"/Simp.obj"
def _load_scene(self, scene_path):
if(scene_path[-1] == "/"):
scene_path = scene_path[:-1]
scene_path = scene_path + "/scene.pickle"
self.scene = pickle.load(open(scene_path, 'rb'))
def _set_output_path(self, output_path):
self.output_path = output_path
if(self.output_path[-1] == "/"):
self.output_path = self.output_path[:-1]
def _set_camera_params(self, camera_params):
self.camera_params = camera_params
def load_camera_pose_from_frame(self, camera_params_path):
with open(camera_params_path, "r") as f:
camera_params = json.load(f)
view_transform = camera_params["cameraViewTransform"]
print(view_transform)
view_transform = np.resize(view_transform, (4,4))
view_transform = np.linalg.inv(view_transform).T
offset = np.mat([[1,0,0,0],[0,-1,0,0],[0,0,-1,0],[0,0,0,1]])
view_transform = view_transform.dot(offset)
print(view_transform)
return view_transform
def _load_obj_to_pybullet(self, obj_file_path, position, orientation, scale):
visual_ind = p.createVisualShape(
shapeType=p.GEOM_MESH,
fileName=obj_file_path,
rgbaColor=[1, 1, 1, 1],
specularColor=[0.4, 0.4, 0],
visualFramePosition=[0, 0, 0],
meshScale=scale)
p.createMultiBody(
baseMass=1,
baseVisualShapeIndex=visual_ind,
basePosition=position,
baseOrientation=orientation,
useMaximalCoordinates=True)
def _render_image(self, camera_pose):
width = self.camera_params["width"]
height = self.camera_params["height"]
fov = self.camera_params["fov"]
aspect = width / height
near = self.camera_params["near"]
far = self.camera_params["far"]
T = np.mat([[1,0,0,0],
[0,1,0,0],
[0,0,1,1],
[0,0,0,1]])
look_at_T = camera_pose.dot(T)
view_matrix = p.computeViewMatrix([camera_pose[0,3], camera_pose[1,3], camera_pose[2,3]],
[look_at_T[0,3], look_at_T[1,3], look_at_T[2,3]],
[-camera_pose[0,1], -camera_pose[1,1], -camera_pose[2,1]])
projection_matrix = p.computeProjectionMatrixFOV(fov, aspect, near, far)
# Get depth values using the OpenGL renderer
images = p.getCameraImage(width, height, view_matrix, projection_matrix, renderer=p.ER_BULLET_HARDWARE_OPENGL)
rgb = images[2]
depth = images[3]
seg = images[4]
rgb_image = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
cv2.imwrite(self.output_path+'/rgb.jpg',rgb_image)
depth_image = far * near / (far - (far - near) * depth)
depth_image = np.asanyarray(depth_image).astype(np.float32) * 1000.0
depth_image_array = depth_image
depth_image = (depth_image.astype(np.uint16))
depth_image = Image.fromarray(depth_image)
depth_image.save(self.output_path+'/depth.png')
cv2.imwrite(self.output_path+'/seg.jpg', seg)
id = 0
for object_name in self.scene.keys():
self.segmentation_labels[str(id+1)] = object_name
id += 1
with open(self.output_path+"/seg_labels.json", 'w') as seg_labels:
json.dump(self.segmentation_labels, seg_labels)
with open(self.output_path+"/cam_intrinsics.json", 'w') as cam_intrinsics:
json.dump(self.camera_params, cam_intrinsics)
def generate_images(self, camera_pose):
physicsClient = p.connect(p.GUI)
p.setAdditionalSearchPath(pybullet_data.getDataPath())
p.setGravity(0,0,0)
p.loadURDF("plane100.urdf")
for obj_name in self.scene.keys():
orientation = self.scene[obj_name]["rotation"]
position = self.scene[obj_name]["position"]
self._load_obj_to_pybullet(obj_file_path=self.object_paths[obj_name],
position=position,
orientation=orientation,
scale=self.object_model_scale)
self._render_image(camera_pose)
p.stepSimulation()
p.disconnect()
def visualize_pcd(self):
color_image = o3d.io.read_image(self.output_path+'/rgb.jpg')
depth_image = o3d.io.read_image(self.output_path+'/depth.png')
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(color_image, depth_image,convert_rgb_to_intensity=False)
intrinsic = o3d.camera.PinholeCameraIntrinsic(
o3d.camera.PinholeCameraIntrinsicParameters.Kinect2DepthCameraDefault )
intrinsic.set_intrinsics(width=self.camera_params["width"],
height=self.camera_params["height"],
fx=self.camera_params["fx"],
fy=self.camera_params["fy"],
cx=self.camera_params["cx"],
cy=self.camera_params["cy"])
point_cloud = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, intrinsic)
o3d.visualization.draw_geometries([point_cloud])
def print_scene(self):
print("================= Scene Objects: =================")
print(self.scene.keys())
print("==================================================")
DATASET_PATH = "/home/hzx/Downloads/OmniObject3d-simplified/output"
SCENE_PATH = "/home/hzx/Projects/ActivePerception/data_generation/output/scene_0/"
OUTPUT_PATH = "/home/hzx/Projects/ActivePerception/data_generation/output/"
FRAME_PATH = "/home/hzx/Projects/ActivePerception/data_generation/output/scene_0/camera_params_0119.json"
ISAAC_SIM_CAM_H_APERTURE = 20.955 # Isaac Sim里读取的
ISAAC_SIM_CAM_V_APERTURE = 15.2908 # Isaac Sim里读取的
ISAAC_SIM_FOCAL_LENGTH = 39 # 试出来的其实Isaac Sim里原本是24
ISAAC_SIM_CAM_D_APERTURE = math.sqrt(ISAAC_SIM_CAM_H_APERTURE**2 + ISAAC_SIM_CAM_V_APERTURE**2)
CAM_WIDTH = 640
CAM_HEIGHT = 480
CAM_FOV = 2*math.atan(ISAAC_SIM_CAM_D_APERTURE/(2*ISAAC_SIM_FOCAL_LENGTH))/math.pi*180
CAM_NEAR = 0.001 # 成像最近距离
CAM_FAR = 10 # 成像最远距离
CAM_CX = CAM_WIDTH/2
CAM_CY = CAM_HEIGHT/2
CAM_FX = 1/math.tan(CAM_FOV*math.pi/180.0/2.0)*CAM_WIDTH/2
CAM_FY = 1/(CAM_HEIGHT/CAM_WIDTH*math.tan(CAM_FOV*math.pi/180.0/2.0))*CAM_HEIGHT/2
CAMERA_PARAMS = {
"width": CAM_WIDTH,
"height": CAM_HEIGHT,
"fov": CAM_FOV,
"near": CAM_NEAR,
"far": CAM_FAR,
"cx": CAM_CX,
"cy": CAM_CY,
"fx": CAM_FX,
"fy": CAM_FY,
}
if __name__ == "__main__":
gs = GenerateScene(DATASET_PATH, SCENE_PATH, OUTPUT_PATH, CAMERA_PARAMS)
gs.print_scene()
cam_pose = gs.load_camera_pose_from_frame(FRAME_PATH)
gs.generate_images(cam_pose) # 在OUTPUT_PATH路径下生成rgb、depth、segmentation图
# gs.visualize_pcd()