nbv_grasping/runners/view_generator.py
2024-10-09 16:13:22 +00:00

191 lines
6.5 KiB
Python
Executable File

import os
import pickle
import pybullet as p
import pybullet_data
import numpy as np
import math
from flask import Flask, request, jsonify
import sys
path = os.path.abspath(__file__)
for i in range(2):
path = os.path.dirname(path)
PROJECT_ROOT = path
sys.path.append(PROJECT_ROOT)
from runners.runner import Runner
from configs.config import ConfigManager
class ViewGenerator(Runner):
def __init__(self, config_path, camera_params) -> None:
super().__init__(config_path)
self.data_dir = ConfigManager.get("settings", "dataset", "data_dir")
self.port = ConfigManager.get("settings", "web_api", "port")
self.camera_params = camera_params
self.object_model_scale = [0.001, 0.001, 0.001]
self.segmentation_labels = {}
self.app = Flask(__name__)
self._init_routes()
def create_experiment(self, backup_name=None):
return super().create_experiment(backup_name)
def load_experiment(self, backup_name=None):
return super().load_experiment(backup_name)
def _init_routes(self):
@self.app.route("/get_images", methods=["POST"])
def get_images_api():
data = request.get_json()
camera_pose = data["camera_pose"]
scene = data["scene"]
data_type = data["data_type"]
source = data["source"]
scene_path = os.path.join(self.data_dir, source, data_type, scene)
model_dir = os.path.join(self.data_dir, source, "objects")
self.load_scene(scene_path,model_dir)
result = self.generate_images(camera_pose)
result = {
"rgb": result["rgb"].tolist(),
"depth": result["depth"].tolist(),
"segmentation": result["segmentation"].tolist(),
"segmentation_labels": result["segmentation_labels"],
"camera_params": result["camera_params"],
}
return jsonify(result)
def load_scene(self, scene_path, model_dir):
scene_path = os.path.join(scene_path, "scene.pickle")
self.scene = pickle.load(open(scene_path, "rb"))
self._initialize_pybullet_scene(model_dir)
def _initialize_pybullet_scene(self,model_dir):
if p.isConnected():
p.resetSimulation()
else:
p.connect(p.DIRECT)
p.setAdditionalSearchPath(pybullet_data.getDataPath())
p.setGravity(0, 0, 0)
p.loadURDF("plane100.urdf")
for obj_name in self.scene.keys():
orientation = self.scene[obj_name]["rotation"]
position = self.scene[obj_name]["position"]
class_name = obj_name[:-4]
obj_path = os.path.join(model_dir,class_name, obj_name, "Scan", "Simp.obj")
self._load_obj_to_pybullet(
obj_file_path=obj_path,
position=position,
orientation=orientation,
scale=self.object_model_scale,
)
def _load_obj_to_pybullet(self, obj_file_path, position, orientation, scale):
visual_ind = p.createVisualShape(
shapeType=p.GEOM_MESH,
fileName=obj_file_path,
rgbaColor=[1, 1, 1, 1],
specularColor=[0.4, 0.4, 0],
visualFramePosition=[0, 0, 0],
meshScale=scale,
)
p.createMultiBody(
baseMass=1,
baseVisualShapeIndex=visual_ind,
basePosition=position,
baseOrientation=orientation,
useMaximalCoordinates=True,
)
def _render_image(self, camera_pose):
width = self.camera_params["width"]
height = self.camera_params["height"]
fov = self.camera_params["fov"]
aspect = width / height
near = self.camera_params["near"]
far = self.camera_params["far"]
T = np.mat([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 1], [0, 0, 0, 1]])
look_at_T = camera_pose.dot(T)
view_matrix = p.computeViewMatrix(
[camera_pose[0, 3], camera_pose[1, 3], camera_pose[2, 3]],
[look_at_T[0, 3], look_at_T[1, 3], look_at_T[2, 3]],
[-camera_pose[0, 1], -camera_pose[1, 1], -camera_pose[2, 1]],
)
projection_matrix = p.computeProjectionMatrixFOV(fov, aspect, near, far)
images = p.getCameraImage(
width,
height,
view_matrix,
projection_matrix,
renderer=p.ER_BULLET_HARDWARE_OPENGL,
)
rgb = images[2]
depth = images[3]
seg = images[4]
rgb = np.reshape(rgb, (height, width, 4))
depth = np.reshape(depth, (height, width))
seg = np.reshape(seg, (height, width))
rgb_image = rgb[..., :3]
depth_image = far * near / (far - (far - near) * depth)
depth_image = np.asanyarray(depth_image).astype(np.float32) * 1000.0
depth_image = depth_image.astype(np.uint16)
id = 0
for object_name in self.scene.keys():
self.segmentation_labels[str(id + 1)] = object_name
id += 1
return {
"rgb": rgb_image,
"depth": depth_image,
"segmentation": seg,
"segmentation_labels": self.segmentation_labels,
"camera_params": self.camera_params,
}
def generate_images(self, camera_pose):
results = self._render_image(np.asarray(camera_pose))
p.stepSimulation()
return results
def run(self):
self.app.run(host="0.0.0.0", port=self.port)
ISAAC_SIM_CAM_H_APERTURE = 20.955
ISAAC_SIM_CAM_V_APERTURE = 15.2908
ISAAC_SIM_FOCAL_LENGTH = 39
ISAAC_SIM_CAM_D_APERTURE = math.sqrt(ISAAC_SIM_CAM_H_APERTURE**2 + ISAAC_SIM_CAM_V_APERTURE**2)
CAM_WIDTH = 640
CAM_HEIGHT = 480
CAM_FOV = 2 * math.atan(ISAAC_SIM_CAM_D_APERTURE / (2 * ISAAC_SIM_FOCAL_LENGTH)) / math.pi * 180
CAM_NEAR = 0.001
CAM_FAR = 10
CAM_CX = CAM_WIDTH / 2
CAM_CY = CAM_HEIGHT / 2
CAM_FX = 1 / math.tan(CAM_FOV * math.pi / 180.0 / 2.0) * CAM_WIDTH / 2
CAM_FY = 1 / (CAM_HEIGHT / CAM_WIDTH * math.tan(CAM_FOV * math.pi / 180.0 / 2.0)) * CAM_HEIGHT / 2
CAMERA_PARAMS = {
"width": CAM_WIDTH,
"height": CAM_HEIGHT,
"fov": CAM_FOV,
"near": CAM_NEAR,
"far": CAM_FAR,
"cx": CAM_CX,
"cy": CAM_CY,
"fx": CAM_FX,
"fy": CAM_FY,
}
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--config", type=str, default="configs/server_view_generator.yaml")
args = parser.parse_args()
vg = ViewGenerator(args.config, CAMERA_PARAMS)
vg.run()