diff --git a/bop_toolkit_lib/config.py b/bop_toolkit_lib/config.py index 30a4022..54448cb 100644 --- a/bop_toolkit_lib/config.py +++ b/bop_toolkit_lib/config.py @@ -23,7 +23,7 @@ ######## Extended ######## # Folder for outputs (e.g. visualizations). -output_path = r"/path/to/output/folder" +output_path = r"/tmp" # For offscreen C++ rendering: Path to the build folder of bop_renderer (github.com/thodan/bop_renderer). bop_renderer_path = r"/path/to/bop_renderer/build" diff --git a/bop_toolkit_lib/dataset_params.py b/bop_toolkit_lib/dataset_params.py index ea68dcd..e7306ef 100644 --- a/bop_toolkit_lib/dataset_params.py +++ b/bop_toolkit_lib/dataset_params.py @@ -89,6 +89,8 @@ def get_model_params(datasets_path, dataset_name, model_type=None): "hopev2": list(range(1, 29)), "hot3d": list(range(1, 34)), "handal": list(range(1, 41)), + "ipd": list(range(0,21)), + "xyz": list(range(1,18)) }[dataset_name] # ID's of objects with ambiguous views evaluated using the ADI pose error @@ -110,6 +112,8 @@ def get_model_params(datasets_path, dataset_name, model_type=None): "hopev2": [], "hot3d": [1, 2, 3, 5, 22, 24, 25, 29, 30, 32], "handal": [26, 35, 36, 37, 38, 39, 40], + "ipd": [], + "xyz": [] }[dataset_name] # T-LESS includes two types of object models, CAD and reconstructed. @@ -433,6 +437,86 @@ def hot3d_eval_modality(scene_id): "gray2": "jpg", } + if split == "test": + p["depth_range"] = None # Not calculated yet. + p["azimuth_range"] = None # Not calculated yet. + p["elev_range"] = None # Not calculated yet. + + supported_error_types = ["ad", "add", "adi", "mssd", "mspd"] + elif dataset_name == "ipd": + modalities_have_separate_annotations = True + p["im_modalities"] = ["rgb_photoneo", "depth_photoneo"] + p["test_scene_ids"] = list(range(0,1)) + # p["test_aria_scene_ids"] = list(range(3365, 3832)) + p["scene_ids"] = { + "test": p["test_scene_ids"], # test_quest3 + test_aria + "train": p["test_scene_ids"], # train_quest3 + train_aria + "train_pbr": p["test_scene_ids"], # train_quest3 + train_aria + }[split] + # p["im_size"] = (2400, 2400) + # p["im_size"] = (1936, 1216) + + p["photoneo_im_size"] = (2064, 1544) + p["im_size"] = p["photoneo_im_size"] + + + + def ipd_eval_modality(scene_id): + return "rgb_photoneo" + + p["eval_modality"] = ipd_eval_modality + + exts = { + "rgb_photoneo": ".png", + "depth_photoneo": ".png", + } + + + if split == "test": + p["depth_range"] = None # Not calculated yet. + p["azimuth_range"] = None # Not calculated yet. + p["elev_range"] = None # Not calculated yet. + + supported_error_types = ["ad", "add", "adi", "mssd", "mspd"] + + elif dataset_name == "xyz": + modalities_have_separate_annotations = True + p["im_modalities"] = ["gray_photoneo", "depth_photoneo", "gray_xyz", "depth_xyz", "rgb_realsense", "depth_realsense"] + p["test_scene_ids"] = list(range(1,87)) + # p["test_aria_scene_ids"] = list(range(3365, 3832)) + p["scene_ids"] = { + "test": p["test_scene_ids"], # test_quest3 + test_aria + "train": p["test_scene_ids"], # train_quest3 + train_aria + "train_pbr": list(range(50)), # train_quest3 + train_aria + }[split] + + # These are probably mixed up in the real data! + p["photoneo_im_size"] = (1440, 1080) + p["realsense_im_size"] = (1280, 720) + p["xyz_im_size"] = (2064, 1544) + # pbr im size + p["im_size"] = p["photoneo_im_size"] + + def xyz_eval_modality(scene_id): + return "gray_xyz" + + p["eval_modality"] = xyz_eval_modality + + if "pbr" == split_type: + # The PBR data is in classical BOP format without sensor names. + p["eval_modality"] = None + modalities_have_separate_annotations = False + + exts = { + "gray_photoneo": ".png", + "depth_photoneo": ".png", + "gray_xyz": ".png", + "depth_xyz": ".png", + "rgb_realsense": ".png", + "depth_realsense": ".png", + } + rgb_ext = ".png" + if split == "test": p["depth_range"] = None # Not calculated yet. p["azimuth_range"] = None # Not calculated yet. @@ -500,7 +584,14 @@ def hot3d_eval_modality(scene_id): else: assert exts is not None, "Need to set 'exts' for dataset {}".format() + present_scene_id = get_present_scene_ids(p)[0] for moda in p["im_modalities"]: + sensor_moda = moda + if not os.path.exists(join( + split_path, "{present_scene_id:06d}", "scene_gt_{}.json".format(moda) + )): + # If modalities have aligned extrinsics/intrinsics they are combined in one file + sensor_moda = moda[(moda.find("_") + 1):] p.update( { # Path template to modality image. @@ -509,33 +600,34 @@ def hot3d_eval_modality(scene_id): ), # Path template to a file with per-image camera parameters. "scene_camera_{}_tpath".format(moda): join( - split_path, "{scene_id:06d}", "scene_camera_{}.json".format(moda) + split_path, "{scene_id:06d}", "scene_camera_{}.json".format(sensor_moda) ), # Path template to a file with GT annotations. "scene_gt_{}_tpath".format(moda): join( - split_path, "{scene_id:06d}", "scene_gt_{}.json".format(moda) + split_path, "{scene_id:06d}", "scene_gt_{}.json".format(sensor_moda) ), # Path template to a file with meta information about the GT annotations. "scene_gt_info_{}_tpath".format(moda): join( - split_path, "{scene_id:06d}", "scene_gt_info_{}.json".format(moda) + split_path, "{scene_id:06d}", "scene_gt_info_{}.json".format(sensor_moda) ), # Path template to a file with the coco GT annotations. "scene_gt_coco_{}_tpath".format(moda): join( - split_path, "{scene_id:06d}", "scene_gt_coco_{}.json".format(moda) + split_path, "{scene_id:06d}", "scene_gt_coco_{}.json".format(sensor_moda) ), # Path template to a mask of the full object silhouette. "mask_{}_tpath".format(moda): join( - split_path, "{scene_id:06d}", "mask_{}".format(moda), "{im_id:06d}_{gt_id:06d}.png" + split_path, "{scene_id:06d}", "mask_{}".format(sensor_moda), "{im_id:06d}_{gt_id:06d}.png" ), # Path template to a mask of the visible part of an object silhouette. "mask_visib_{}_tpath".format(moda): join( split_path, "{scene_id:06d}", - "mask_visib_{}".format(moda), + "mask_visib_{}".format(sensor_moda), "{im_id:06d}_{gt_id:06d}.png", ), } ) + print(p) return p @@ -559,11 +651,13 @@ def scene_tpaths_keys(eval_modality, scene_id=None): tpath_keys = [ "scene_gt_tpath", "scene_gt_info_tpath", "scene_camera_tpath", - "scene_gt_coco_tpath", "mask_tpath", "mask_visib_tpath" + "scene_gt_coco_tpath", "mask_tpath", "mask_visib_tpath", "rgb_tpath", + "gray_tpath", "depth_tpath" ] tpath_keys_multi = [ "scene_gt_{}_tpath", "scene_gt_info_{}_tpath", "scene_camera_{}_tpath", - "scene_gt_coco_{}_tpath", "mask_{}_tpath", "mask_visib_{}_tpath" + "scene_gt_coco_{}_tpath", "mask_{}_tpath", "mask_visib_{}_tpath", "{}_tpath", + "{}_tpath", "depth_{}_tpath" ] assert len(tpath_keys) == len(tpath_keys_multi) @@ -580,7 +674,10 @@ def scene_tpaths_keys(eval_modality, scene_id=None): tpath_keys_dic[key] = key_multi.format(eval_modality[scene_id]) else: raise ValueError("eval_modality type not supported, either None, str, callable or dictionary") - + # TODO: Find a nicer solution. e.g. split modality and sensor throughout the bop toolkit. + parts = tpath_keys_dic["depth_tpath"].split("_") + parts.pop(1) + tpath_keys_dic["depth_tpath"] = "_".join(parts) return tpath_keys_dic diff --git a/bop_toolkit_lib/inout.py b/bop_toolkit_lib/inout.py index 772b5e7..464d70d 100644 --- a/bop_toolkit_lib/inout.py +++ b/bop_toolkit_lib/inout.py @@ -621,6 +621,7 @@ def load_ply(path): "float": ("f", 4), "double": ("d", 8), "int": ("i", 4), + "uint": ("I", 4), "uchar": ("B", 1), } diff --git a/bop_toolkit_lib/visualization.py b/bop_toolkit_lib/visualization.py index e322b84..3ffbedb 100644 --- a/bop_toolkit_lib/visualization.py +++ b/bop_toolkit_lib/visualization.py @@ -268,6 +268,8 @@ def vis_object_poses( {"name": "min diff", "fmt": ":.3f", "val": np.min(depth_diff_valid)}, {"name": "max diff", "fmt": ":.3f", "val": np.max(depth_diff_valid)}, {"name": "mean diff", "fmt": ":.3f", "val": np.mean(depth_diff_valid)}, + {"name": "median diff", "fmt": ":.3f", "val": np.median(np.abs(depth_diff_valid))}, + {"name": "25 percentile", "fmt": ":.3f", "val": np.percentile(np.abs(depth_diff_valid), 25)}, ] depth_diff_vis = write_text_on_image(depth_diff_vis, depth_info) inout.save_im(vis_depth_diff_path, depth_diff_vis) diff --git a/requirements.txt b/requirements.txt index deeafb4..0f82ac9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ kiwisolver==1.3.1 matplotlib==2.2.4 imageio==2.5.0 pypng==0.0.19 -Cython==0.29.24 +Cython>=0.29.24 PyOpenGL==3.1.0 triangle>=20190115.2 glumpy==1.1.0 diff --git a/scripts/vis_gt_poses.py b/scripts/vis_gt_poses.py index ac8aa60..a3dc727 100644 --- a/scripts/vis_gt_poses.py +++ b/scripts/vis_gt_poses.py @@ -35,7 +35,7 @@ ################################################################################ p = { # See dataset_params.py for options. - "dataset": "lm", + "dataset": "ipd", # Dataset split. Options: 'train', 'val', 'test'. "dataset_split": "test", # Dataset split type. None = default. See dataset_params.py for options. @@ -50,6 +50,10 @@ "scene_ids": [], "im_ids": [], "gt_ids": [], + # Which sensor to visualize. By default it uses the evaluation modality set + # in dataset_params.py. Set to None for rendering PBR images or BOP core datasets. + # Set to sensor for new BOP core sets, e.g. "photoneo". + "sensor": "", # --------------------------------------------------------------------------------- # Next parameters apply only to classical BOP19 datasets (not the H3 BOP24 format) @@ -91,7 +95,7 @@ raise ImportError("Missing hand_tracking_toolkit dependency, mandatory for HOT3D dataset.") # if HOT3D dataset is used, next parameters are set -if p["dataset"] == "hot3d": +if p["dataset"] in ["hot3d"]: p["vis_rgb"] = True p["vis_rgb_resolve_visib"] = False p["vis_depth_diff"] = False @@ -104,6 +108,11 @@ model_type = "eval" # None = default. dp_model = dataset_params.get_model_params(p["datasets_path"], p["dataset"], model_type) +# Find color modality of specified sensor. +if p["sensor"]: + sensor_mods = [mod.split("_")[0] for mod in dp_split["im_modalities"] if p["sensor"] in mod] + p["modality"] = [mod for mod in sensor_mods if any(col in mod for col in ["rgb","gray"])][0] + # Load colors. colors_path = os.path.join(os.path.dirname(visualization.__file__), "colors.json") colors = inout.load_json(colors_path) @@ -142,11 +151,15 @@ aria_im_size = dp_split["aria_im_size"][dp_split["aria_eval_modality"]] quest3_ren = renderer_htt.RendererHtt(quest3_im_size, p["renderer_type"], shading="flat") aria_ren = renderer_htt.RendererHtt(aria_im_size, p["renderer_type"], shading="flat") -else: # classical BOP format +elif p["sensor"]: # classical BOP format + width, height = dp_split["{}_im_size".format(p["sensor"])] +else: width, height = dp_split["im_size"] - ren = renderer.create_renderer( - width, height, p["renderer_type"], mode=renderer_mode, shading="flat" - ) + +ren = renderer.create_renderer( + width, height, p["renderer_type"], mode=renderer_mode, shading="flat" +) +# ren = renderer_htt.RendererHtt(dp_split["im_size"], p["renderer_type"], shading="flat") # Load object models. models = {} @@ -164,7 +177,11 @@ scene_ids = dataset_params.get_present_scene_ids(dp_split) for scene_id in scene_ids: - tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id) + if p["sensor"]: + tpath_keys = dataset_params.scene_tpaths_keys("{}_{}".format(p["modality"], p["sensor"])) + else: + tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id) + if p["dataset"] == "hot3d": # for other dataset the renderer does not change # find which renderer to use (quest3 or aria) if scene_id in dp_split["test_quest3_scene_ids"] or scene_id in dp_split["train_quest3_scene_ids"]: @@ -224,10 +241,16 @@ } ) - if p["dataset"] == "hot3d": + if p["dataset"] in ["hot3d", "ipd", "xyz"]: # load the image of the eval modality + + img_path = dp_split[tpath_keys["rgb_tpath"]].format(scene_id=scene_id, im_id=im_id) + if not os.path.exists(img_path): + print("rbg path {} does not exist, looking for gray images".format(img_path)) + img_path = dp_split[tpath_keys["gray_tpath"]].format(scene_id=scene_id, im_id=im_id) rgb = inout.load_im( - dp_split[dp_split["eval_modality"](scene_id) + "_tpath"].format(scene_id=scene_id, im_id=im_id) + # dp_split[dp_split["eval_modality"](scene_id) + "_tpath"].format(scene_id=scene_id, im_id=im_id) + img_path ) # if image is grayscale (quest3), convert it to 3 channels if rgb.ndim == 2: @@ -249,20 +272,26 @@ raise ValueError("RGB nor gray images are available.") depth = None - if p["dataset"] != "hot3d": + if p["dataset"] not in ["hot3d"]: if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]): depth = inout.load_depth( - dp_split["depth_tpath"].format(scene_id=scene_id, im_id=im_id) + dp_split[tpath_keys["depth_tpath"]].format(scene_id=scene_id, im_id=im_id) ) depth *= scene_camera[im_id]["depth_scale"] # Convert to [mm]. + # if depth.ndim == 2: + # depth = np.dstack([depth, depth, depth]) + # breakpoint() + # depth = depth[:,:,0] + # Path to the output RGB visualization. vis_rgb_path = None if p["vis_rgb"]: + split = p["dataset_split"] if not p["sensor"] else p["dataset_split"] + "_{}".format(p["sensor"]) vis_rgb_path = p["vis_rgb_tpath"].format( vis_path=p["vis_path"], dataset=p["dataset"], - split=p["dataset_split"], + split=split, scene_id=scene_id, im_id=im_id, ) @@ -270,11 +299,12 @@ # Path to the output depth difference visualization. vis_depth_diff_path = None if p["dataset"] != "hot3d": + split = p["dataset_split"] if not p["sensor"] else p["dataset_split"] + "_{}".format(p["sensor"]) if p["vis_depth_diff"]: vis_depth_diff_path = p["vis_depth_diff_tpath"].format( vis_path=p["vis_path"], dataset=p["dataset"], - split=p["dataset_split"], + split=split, scene_id=scene_id, im_id=im_id, )