diff --git a/app.py b/app.py index 96dbdfb..fb94638 100644 --- a/app.py +++ b/app.py @@ -37,15 +37,59 @@ def get_scene_list(): scene_list = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))] return jsonify({"scene_list": scene_list, "success": True}) +@app.route('/get_label_list', methods=['POST']) +def get_label_list(): + data = request.json + dataset_name = data.get('dataset_name') + scene_name = data.get("scene_name") + + scene_dir = os.path.join(ROOT, dataset_name, scene_name) + label_dir = os.path.join(scene_dir, "label") + + if not os.path.exists(scene_dir): + print(f"Scene not found: {scene_dir}") + return jsonify({"error": "Scene not found"}), 404 + label_list = [] + global_min_coverage_rate = 1 + global_max_coverage_rate = 0 + global_total_coverage_rate = 0 + for label_file in os.listdir(label_dir): + if label_file.endswith(".json"): + label_path = os.path.join(label_dir, label_file) + with open(label_path, 'r') as f: + label_data = json.load(f) + max_coveraget_rate = label_data.get('max_coverage_rate') + if max_coveraget_rate > global_max_coverage_rate: + global_max_coverage_rate = max_coveraget_rate + if max_coveraget_rate < global_min_coverage_rate: + global_min_coverage_rate = max_coveraget_rate + label_list.append({ + "label_name": label_file, + "max_coverage_rate": round(max_coveraget_rate*100,3) + }) + global_total_coverage_rate += max_coveraget_rate + if len(label_list) == 0: + global_mean_coverage_rate = 0 + else: + global_mean_coverage_rate = global_total_coverage_rate / len(label_list) + + return jsonify({"label_list": label_list, + "total_max_coverage_rate": round(global_max_coverage_rate*100, 3), + "total_min_coverage_rate": round(global_min_coverage_rate*100, 3), + "total_mean_coverage_rate": round(global_mean_coverage_rate*100, 3), + "success": True}) + + @app.route('/get_scene_info', methods=['POST']) def get_scene_info(): data = request.json dataset_name = data.get('dataset_name') scene_name = data.get('scene_name') + label_name = data.get('label_name') scene_path = os.path.join(ROOT, dataset_name, scene_name) camera_params_path = os.path.join(scene_path, 'camera_params') - label_json_path = os.path.join(scene_path, 'label.json') + label_json_path = os.path.join(scene_path, "label", label_name) if not os.path.exists(scene_path) or not os.path.exists(label_json_path): @@ -228,4 +272,4 @@ def analysis_inference_result(): return jsonify(res) if __name__ == '__main__': - app.run(debug=True, port=13333) + app.run(debug=True, port=13333,host="0.0.0.0") diff --git a/data_load.py b/data_load.py index 2e440a0..e9797a1 100644 --- a/data_load.py +++ b/data_load.py @@ -3,23 +3,32 @@ import numpy as np import json import cv2 import trimesh +import torch from pts import PtsUtil class DataLoadUtil: - + DISPLAY_TABLE_POSITION = np.asarray([0,0,0.895]) @staticmethod def get_path(root, scene_name, frame_idx): path = os.path.join(root, scene_name, f"{frame_idx}") return path @staticmethod - def get_label_path(root, scene_name): - path = os.path.join(root,scene_name, f"label.json") + def get_label_num(root, scene_name): + label_dir = os.path.join(root,scene_name,"label") + return len(os.listdir(label_dir)) + + @staticmethod + def get_label_path(root, scene_name, seq_idx): + label_dir = os.path.join(root,scene_name,"label") + if not os.path.exists(label_dir): + os.makedirs(label_dir) + path = os.path.join(label_dir,f"{seq_idx}.json") return path @staticmethod - def get_sampled_model_points_path(root, scene_name): - path = os.path.join(root,scene_name, f"sampled_model_points.txt") + def get_label_path_old(root, scene_name): + path = os.path.join(root,scene_name,"label.json") return path @staticmethod @@ -27,17 +36,6 @@ class DataLoadUtil: camera_params_path = os.path.join(root, scene_name, "camera_params") return len(os.listdir(camera_params_path)) - @staticmethod - def load_downsampled_world_model_points(root, scene_name): - model_path = DataLoadUtil.get_sampled_model_points_path(root, scene_name) - model_points = np.loadtxt(model_path) - return model_points - - @staticmethod - def save_downsampled_world_model_points(root, scene_name, model_points): - model_path = DataLoadUtil.get_sampled_model_points_path(root, scene_name) - np.savetxt(model_path, model_points) - @staticmethod def load_mesh_at(model_dir, object_name, world_object_pose): model_path = os.path.join(model_dir, object_name, "mesh.obj") @@ -45,6 +43,15 @@ class DataLoadUtil: mesh.apply_transform(world_object_pose) return mesh + @staticmethod + def get_bbox_diag(model_dir, object_name): + model_path = os.path.join(model_dir, object_name, "mesh.obj") + mesh = trimesh.load(model_path) + bbox = mesh.bounding_box.extents + diagonal_length = np.linalg.norm(bbox) + return diagonal_length + + @staticmethod def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose): mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose) @@ -52,11 +59,14 @@ class DataLoadUtil: mesh.export(model_path) @staticmethod - def save_target_mesh_at_world_space(root, model_dir, scene_name): + def save_target_mesh_at_world_space(root, model_dir, scene_name, display_table_as_world_space_origin=True): scene_info = DataLoadUtil.load_scene_info(root, scene_name) target_name = scene_info["target_name"] transformation = scene_info[target_name] - location = transformation["location"] + if display_table_as_world_space_origin: + location = transformation["location"] - DataLoadUtil.DISPLAY_TABLE_POSITION + else: + location = transformation["location"] rotation_euler = transformation["rotation_euler"] pose_mat = trimesh.transformations.euler_matrix(*rotation_euler) pose_mat[:3, 3] = location @@ -140,6 +150,12 @@ class DataLoadUtil: rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR) return rgb_image + @staticmethod + def load_from_preprocessed_pts(path): + npy_path = os.path.join(os.path.dirname(path), "points", os.path.basename(path) + ".npy") + pts = np.load(npy_path) + return pts + @staticmethod def cam_pose_transformation(cam_pose_before): offset = np.asarray([ @@ -151,12 +167,16 @@ class DataLoadUtil: return cam_pose_after @staticmethod - def load_cam_info(path, binocular=False): + def load_cam_info(path, binocular=False, display_table_as_world_space_origin=True): camera_params_path = os.path.join(os.path.dirname(path), "camera_params", os.path.basename(path) + ".json") with open(camera_params_path, 'r') as f: label_data = json.load(f) cam_to_world = np.asarray(label_data["extrinsic"]) cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world) + world_to_display_table = np.eye(4) + world_to_display_table[:3, 3] = - DataLoadUtil.DISPLAY_TABLE_POSITION + if display_table_as_world_space_origin: + cam_to_world = np.dot(world_to_display_table, cam_to_world) cam_intrinsic = np.asarray(label_data["intrinsic"]) cam_info = { "cam_to_world": cam_to_world, @@ -167,9 +187,27 @@ class DataLoadUtil: if binocular: cam_to_world_R = np.asarray(label_data["extrinsic_R"]) cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R) + cam_to_world_O = np.asarray(label_data["extrinsic_cam_object"]) + cam_to_world_O = DataLoadUtil.cam_pose_transformation(cam_to_world_O) + if display_table_as_world_space_origin: + cam_to_world_O = np.dot(world_to_display_table, cam_to_world_O) + cam_to_world_R = np.dot(world_to_display_table, cam_to_world_R) + cam_info["cam_to_world_O"] = cam_to_world_O cam_info["cam_to_world_R"] = cam_to_world_R return cam_info + @staticmethod + def get_real_cam_O_from_cam_L(cam_L, cam_O_to_cam_L, display_table_as_world_space_origin=True): + if isinstance(cam_L, torch.Tensor): + cam_L = cam_L.cpu().numpy() + nO_to_display_table_pose = cam_L @ cam_O_to_cam_L + if display_table_as_world_space_origin: + display_table_to_world = np.eye(4) + display_table_to_world[:3, 3] = DataLoadUtil.DISPLAY_TABLE_POSITION + nO_to_world_pose = np.dot(display_table_to_world, nO_to_display_table_pose) + nO_to_world_pose = DataLoadUtil.cam_pose_transformation(nO_to_world_pose) + return nO_to_world_pose + @staticmethod def get_target_point_cloud(depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0,255,0,255)): h, w = depth.shape @@ -192,6 +230,24 @@ class DataLoadUtil: "points_world": target_points_world, "points_camera": target_points_camera } + + @staticmethod + def get_point_cloud(depth, cam_intrinsic, cam_extrinsic): + h, w = depth.shape + i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy') + + z = depth + x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0] + y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1] + + points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3) + points_camera_aug = np.concatenate([points_camera, np.ones((points_camera.shape[0], 1))], axis=-1) + + points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3] + return { + "points_world": points_world, + "points_camera": points_camera + } @staticmethod def get_target_point_cloud_world_from_path(path, binocular=False, random_downsample_N=65536, voxel_size = 0.005, target_mask_label=(0,255,0,255)): @@ -232,7 +288,9 @@ class DataLoadUtil: return overlapping_points @staticmethod - def load_points_normals(root, scene_name): + def load_points_normals(root, scene_name, display_table_as_world_space_origin=True): points_path = os.path.join(root, scene_name, "points_and_normals.txt") points_normals = np.loadtxt(points_path) + if display_table_as_world_space_origin: + points_normals[:,:3] = points_normals[:,:3] - DataLoadUtil.DISPLAY_TABLE_POSITION return points_normals \ No newline at end of file diff --git a/pts.py b/pts.py index f41ad7b..c15554a 100644 --- a/pts.py +++ b/pts.py @@ -1,5 +1,6 @@ import numpy as np import open3d as o3d +import torch class PtsUtil: @@ -18,5 +19,10 @@ class PtsUtil: @staticmethod def random_downsample_point_cloud(point_cloud, num_points): - idx = np.random.choice(len(point_cloud), num_points, replace=False) + idx = np.random.choice(len(point_cloud), num_points, replace=True) + return point_cloud[idx] + + @staticmethod + def random_downsample_point_cloud_tensor(point_cloud, num_points): + idx = torch.randint(0, len(point_cloud), (num_points,)) return point_cloud[idx] \ No newline at end of file