nbv_grasping/runners/view_generator.py

import os
import pickle
import pybullet as p
import pybullet_data
import numpy as np
import math
from flask import Flask, request, jsonify

import sys

path = os.path.abspath(__file__)
for i in range(2):
    path = os.path.dirname(path)
PROJECT_ROOT = path
sys.path.append(PROJECT_ROOT)

from runners.runner import Runner
from configs.config import ConfigManager

class ViewGenerator(Runner):
    def __init__(self, config_path, camera_params) -> None:
        super().__init__(config_path)
        self.data_dir = ConfigManager.get("settings", "dataset", "data_dir")
        self.port = ConfigManager.get("settings", "web_api", "port")
        self.camera_params = camera_params
        self.object_model_scale = [0.001, 0.001, 0.001]
        self.segmentation_labels = {}
        self.app = Flask(__name__)
        self._init_routes()

    def create_experiment(self, backup_name=None):
        return super().create_experiment(backup_name)

    def load_experiment(self, backup_name=None):
        return super().load_experiment(backup_name)

    def _init_routes(self):
        @self.app.route("/get_images", methods=["POST"])
        def get_images_api():
            data = request.get_json()
            camera_pose = data["camera_pose"]
            scene = data["scene"]
            data_type = data["data_type"]
            source = data["source"]
            scene_path = os.path.join(self.data_dir, source, data_type, scene)
            model_dir = os.path.join(self.data_dir, source, "objects")
            self.load_scene(scene_path,model_dir)
            result = self.generate_images(camera_pose)
            result = {
                "rgb": result["rgb"].tolist(),
                "depth": result["depth"].tolist(),
                "segmentation": result["segmentation"].tolist(),
                "segmentation_labels": result["segmentation_labels"],
                "camera_params": result["camera_params"],
            }

            return jsonify(result)

    def load_scene(self, scene_path, model_dir):
        scene_path = os.path.join(scene_path, "scene.pickle")
        self.scene = pickle.load(open(scene_path, "rb"))
        self._initialize_pybullet_scene(model_dir)

    def _initialize_pybullet_scene(self,model_dir):
        if p.isConnected():
            p.resetSimulation()
        else:
            p.connect(p.DIRECT)
        p.setAdditionalSearchPath(pybullet_data.getDataPath())
        p.setGravity(0, 0, 0)
        p.loadURDF("plane100.urdf")
        for obj_name in self.scene.keys():
            orientation = self.scene[obj_name]["rotation"]
            position = self.scene[obj_name]["position"]
            class_name = obj_name[:-4]
            obj_path = os.path.join(model_dir,class_name, obj_name, "Scan", "Simp.obj")
            self._load_obj_to_pybullet(
                obj_file_path=obj_path,
                position=position,
                orientation=orientation,
                scale=self.object_model_scale,
            )

    def _load_obj_to_pybullet(self, obj_file_path, position, orientation, scale):
        visual_ind = p.createVisualShape(
            shapeType=p.GEOM_MESH,
            fileName=obj_file_path,
            rgbaColor=[1, 1, 1, 1],
            specularColor=[0.4, 0.4, 0],
            visualFramePosition=[0, 0, 0],
            meshScale=scale,
        )
        p.createMultiBody(
            baseMass=1,
            baseVisualShapeIndex=visual_ind,
            basePosition=position,
            baseOrientation=orientation,
            useMaximalCoordinates=True,
        )

    def _render_image(self, camera_pose):
        width = self.camera_params["width"]
        height = self.camera_params["height"]
        fov = self.camera_params["fov"]
        aspect = width / height
        near = self.camera_params["near"]
        far = self.camera_params["far"]

        T = np.mat([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 1], [0, 0, 0, 1]])
        look_at_T = camera_pose.dot(T)
        view_matrix = p.computeViewMatrix(
            [camera_pose[0, 3], camera_pose[1, 3], camera_pose[2, 3]],
            [look_at_T[0, 3], look_at_T[1, 3], look_at_T[2, 3]],
            [-camera_pose[0, 1], -camera_pose[1, 1], -camera_pose[2, 1]],
        )
        projection_matrix = p.computeProjectionMatrixFOV(fov, aspect, near, far)
        images = p.getCameraImage(
            width,
            height,
            view_matrix,
            projection_matrix,
            renderer=p.ER_BULLET_HARDWARE_OPENGL,
        )
        rgb = images[2]
        depth = images[3]
        seg = images[4]
        rgb = np.reshape(rgb, (height, width, 4))
        depth = np.reshape(depth, (height, width))
        seg = np.reshape(seg, (height, width))
        rgb_image = rgb[..., :3]

        depth_image = far * near / (far - (far - near) * depth)
        depth_image = np.asanyarray(depth_image).astype(np.float32) * 1000.0
        depth_image = depth_image.astype(np.uint16)

        id = 0
        for object_name in self.scene.keys():
            self.segmentation_labels[str(id + 1)] = object_name
            id += 1

        return {
            "rgb": rgb_image,
            "depth": depth_image,
            "segmentation": seg,
            "segmentation_labels": self.segmentation_labels,
            "camera_params": self.camera_params,
        }

    def generate_images(self, camera_pose):
        results = self._render_image(np.asarray(camera_pose))
        p.stepSimulation()
        return results

    def run(self):
        self.app.run(host="0.0.0.0", port=self.port)

ISAAC_SIM_CAM_H_APERTURE = 20.955
ISAAC_SIM_CAM_V_APERTURE = 15.2908
ISAAC_SIM_FOCAL_LENGTH = 39
ISAAC_SIM_CAM_D_APERTURE = math.sqrt(ISAAC_SIM_CAM_H_APERTURE**2 + ISAAC_SIM_CAM_V_APERTURE**2)

CAM_WIDTH = 640
CAM_HEIGHT = 480
CAM_FOV = 2 * math.atan(ISAAC_SIM_CAM_D_APERTURE / (2 * ISAAC_SIM_FOCAL_LENGTH)) / math.pi * 180
CAM_NEAR = 0.001
CAM_FAR = 10
CAM_CX = CAM_WIDTH / 2
CAM_CY = CAM_HEIGHT / 2
CAM_FX = 1 / math.tan(CAM_FOV * math.pi / 180.0 / 2.0) * CAM_WIDTH / 2
CAM_FY = 1 / (CAM_HEIGHT / CAM_WIDTH * math.tan(CAM_FOV * math.pi / 180.0 / 2.0)) * CAM_HEIGHT / 2

CAMERA_PARAMS = {
    "width": CAM_WIDTH,
    "height": CAM_HEIGHT,
    "fov": CAM_FOV,
    "near": CAM_NEAR,
    "far": CAM_FAR,
    "cx": CAM_CX,
    "cy": CAM_CY,
    "fx": CAM_FX,
    "fy": CAM_FY,
}

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--config", type=str, default="configs/server_view_generator.yaml")
    args = parser.parse_args()
    vg = ViewGenerator(args.config, CAMERA_PARAMS)
    vg.run()