Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add xyz and ipd dataset for BOP robotics track #165

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bop_toolkit_lib/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
######## Extended ########

# Folder for outputs (e.g. visualizations).
output_path = r"/path/to/output/folder"
output_path = r"/tmp"

# For offscreen C++ rendering: Path to the build folder of bop_renderer (github.com/thodan/bop_renderer).
bop_renderer_path = r"/path/to/bop_renderer/build"
Expand Down
115 changes: 106 additions & 9 deletions bop_toolkit_lib/dataset_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ def get_model_params(datasets_path, dataset_name, model_type=None):
"hopev2": list(range(1, 29)),
"hot3d": list(range(1, 34)),
"handal": list(range(1, 41)),
"ipd": list(range(0,21)),
"xyz": list(range(1,18))
}[dataset_name]

# ID's of objects with ambiguous views evaluated using the ADI pose error
Expand All @@ -110,6 +112,8 @@ def get_model_params(datasets_path, dataset_name, model_type=None):
"hopev2": [],
"hot3d": [1, 2, 3, 5, 22, 24, 25, 29, 30, 32],
"handal": [26, 35, 36, 37, 38, 39, 40],
"ipd": [],
"xyz": []
}[dataset_name]

# T-LESS includes two types of object models, CAD and reconstructed.
Expand Down Expand Up @@ -433,6 +437,86 @@ def hot3d_eval_modality(scene_id):
"gray2": "jpg",
}

if split == "test":
p["depth_range"] = None # Not calculated yet.
p["azimuth_range"] = None # Not calculated yet.
p["elev_range"] = None # Not calculated yet.

supported_error_types = ["ad", "add", "adi", "mssd", "mspd"]
elif dataset_name == "ipd":
modalities_have_separate_annotations = True
p["im_modalities"] = ["rgb_photoneo", "depth_photoneo"]
p["test_scene_ids"] = list(range(0,1))
# p["test_aria_scene_ids"] = list(range(3365, 3832))
p["scene_ids"] = {
"test": p["test_scene_ids"], # test_quest3 + test_aria
"train": p["test_scene_ids"], # train_quest3 + train_aria
"train_pbr": p["test_scene_ids"], # train_quest3 + train_aria
}[split]
# p["im_size"] = (2400, 2400)
# p["im_size"] = (1936, 1216)

p["photoneo_im_size"] = (2064, 1544)
p["im_size"] = p["photoneo_im_size"]



def ipd_eval_modality(scene_id):
return "rgb_photoneo"

p["eval_modality"] = ipd_eval_modality

exts = {
"rgb_photoneo": ".png",
"depth_photoneo": ".png",
}


if split == "test":
p["depth_range"] = None # Not calculated yet.
p["azimuth_range"] = None # Not calculated yet.
p["elev_range"] = None # Not calculated yet.

supported_error_types = ["ad", "add", "adi", "mssd", "mspd"]

elif dataset_name == "xyz":
modalities_have_separate_annotations = True
p["im_modalities"] = ["gray_photoneo", "depth_photoneo", "gray_xyz", "depth_xyz", "rgb_realsense", "depth_realsense"]
p["test_scene_ids"] = list(range(1,87))
# p["test_aria_scene_ids"] = list(range(3365, 3832))
p["scene_ids"] = {
"test": p["test_scene_ids"], # test_quest3 + test_aria
"train": p["test_scene_ids"], # train_quest3 + train_aria
"train_pbr": list(range(50)), # train_quest3 + train_aria
}[split]

# These are probably mixed up in the real data!
p["photoneo_im_size"] = (1440, 1080)
p["realsense_im_size"] = (1280, 720)
p["xyz_im_size"] = (2064, 1544)
# pbr im size
p["im_size"] = p["photoneo_im_size"]

def xyz_eval_modality(scene_id):
return "gray_xyz"

p["eval_modality"] = xyz_eval_modality

if "pbr" == split_type:
# The PBR data is in classical BOP format without sensor names.
p["eval_modality"] = None
modalities_have_separate_annotations = False

exts = {
"gray_photoneo": ".png",
"depth_photoneo": ".png",
"gray_xyz": ".png",
"depth_xyz": ".png",
"rgb_realsense": ".png",
"depth_realsense": ".png",
}
rgb_ext = ".png"

if split == "test":
p["depth_range"] = None # Not calculated yet.
p["azimuth_range"] = None # Not calculated yet.
Expand Down Expand Up @@ -500,7 +584,14 @@ def hot3d_eval_modality(scene_id):

else:
assert exts is not None, "Need to set 'exts' for dataset {}".format()
present_scene_id = get_present_scene_ids(p)[0]
for moda in p["im_modalities"]:
sensor_moda = moda
if not os.path.exists(join(
split_path, "{present_scene_id:06d}", "scene_gt_{}.json".format(moda)
)):
# If modalities have aligned extrinsics/intrinsics they are combined in one file
sensor_moda = moda[(moda.find("_") + 1):]
p.update(
{
# Path template to modality image.
Expand All @@ -509,33 +600,34 @@ def hot3d_eval_modality(scene_id):
),
# Path template to a file with per-image camera parameters.
"scene_camera_{}_tpath".format(moda): join(
split_path, "{scene_id:06d}", "scene_camera_{}.json".format(moda)
split_path, "{scene_id:06d}", "scene_camera_{}.json".format(sensor_moda)
),
# Path template to a file with GT annotations.
"scene_gt_{}_tpath".format(moda): join(
split_path, "{scene_id:06d}", "scene_gt_{}.json".format(moda)
split_path, "{scene_id:06d}", "scene_gt_{}.json".format(sensor_moda)
),
# Path template to a file with meta information about the GT annotations.
"scene_gt_info_{}_tpath".format(moda): join(
split_path, "{scene_id:06d}", "scene_gt_info_{}.json".format(moda)
split_path, "{scene_id:06d}", "scene_gt_info_{}.json".format(sensor_moda)
),
# Path template to a file with the coco GT annotations.
"scene_gt_coco_{}_tpath".format(moda): join(
split_path, "{scene_id:06d}", "scene_gt_coco_{}.json".format(moda)
split_path, "{scene_id:06d}", "scene_gt_coco_{}.json".format(sensor_moda)
),
# Path template to a mask of the full object silhouette.
"mask_{}_tpath".format(moda): join(
split_path, "{scene_id:06d}", "mask_{}".format(moda), "{im_id:06d}_{gt_id:06d}.png"
split_path, "{scene_id:06d}", "mask_{}".format(sensor_moda), "{im_id:06d}_{gt_id:06d}.png"
),
# Path template to a mask of the visible part of an object silhouette.
"mask_visib_{}_tpath".format(moda): join(
split_path,
"{scene_id:06d}",
"mask_visib_{}".format(moda),
"mask_visib_{}".format(sensor_moda),
"{im_id:06d}_{gt_id:06d}.png",
),
}
)
print(p)

return p

Expand All @@ -559,11 +651,13 @@ def scene_tpaths_keys(eval_modality, scene_id=None):

tpath_keys = [
"scene_gt_tpath", "scene_gt_info_tpath", "scene_camera_tpath",
"scene_gt_coco_tpath", "mask_tpath", "mask_visib_tpath"
"scene_gt_coco_tpath", "mask_tpath", "mask_visib_tpath", "rgb_tpath",
"gray_tpath", "depth_tpath"
]
tpath_keys_multi = [
"scene_gt_{}_tpath", "scene_gt_info_{}_tpath", "scene_camera_{}_tpath",
"scene_gt_coco_{}_tpath", "mask_{}_tpath", "mask_visib_{}_tpath"
"scene_gt_coco_{}_tpath", "mask_{}_tpath", "mask_visib_{}_tpath", "{}_tpath",
"{}_tpath", "depth_{}_tpath"
]

assert len(tpath_keys) == len(tpath_keys_multi)
Expand All @@ -580,7 +674,10 @@ def scene_tpaths_keys(eval_modality, scene_id=None):
tpath_keys_dic[key] = key_multi.format(eval_modality[scene_id])
else:
raise ValueError("eval_modality type not supported, either None, str, callable or dictionary")

# TODO: Find a nicer solution. e.g. split modality and sensor throughout the bop toolkit.
parts = tpath_keys_dic["depth_tpath"].split("_")
parts.pop(1)
tpath_keys_dic["depth_tpath"] = "_".join(parts)
return tpath_keys_dic


Expand Down
1 change: 1 addition & 0 deletions bop_toolkit_lib/inout.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,7 @@ def load_ply(path):
"float": ("f", 4),
"double": ("d", 8),
"int": ("i", 4),
"uint": ("I", 4),
"uchar": ("B", 1),
}

Expand Down
2 changes: 2 additions & 0 deletions bop_toolkit_lib/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,8 @@ def vis_object_poses(
{"name": "min diff", "fmt": ":.3f", "val": np.min(depth_diff_valid)},
{"name": "max diff", "fmt": ":.3f", "val": np.max(depth_diff_valid)},
{"name": "mean diff", "fmt": ":.3f", "val": np.mean(depth_diff_valid)},
{"name": "median diff", "fmt": ":.3f", "val": np.median(np.abs(depth_diff_valid))},
{"name": "25 percentile", "fmt": ":.3f", "val": np.percentile(np.abs(depth_diff_valid), 25)},
]
depth_diff_vis = write_text_on_image(depth_diff_vis, depth_info)
inout.save_im(vis_depth_diff_path, depth_diff_vis)
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kiwisolver==1.3.1
matplotlib==2.2.4
imageio==2.5.0
pypng==0.0.19
Cython==0.29.24
Cython>=0.29.24
PyOpenGL==3.1.0
triangle>=20190115.2
glumpy==1.1.0
Expand Down
56 changes: 43 additions & 13 deletions scripts/vis_gt_poses.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
################################################################################
p = {
# See dataset_params.py for options.
"dataset": "lm",
"dataset": "ipd",
# Dataset split. Options: 'train', 'val', 'test'.
"dataset_split": "test",
# Dataset split type. None = default. See dataset_params.py for options.
Expand All @@ -50,6 +50,10 @@
"scene_ids": [],
"im_ids": [],
"gt_ids": [],
# Which sensor to visualize. By default it uses the evaluation modality set
# in dataset_params.py. Set to None for rendering PBR images or BOP core datasets.
# Set to sensor for new BOP core sets, e.g. "photoneo".
"sensor": "",

# ---------------------------------------------------------------------------------
# Next parameters apply only to classical BOP19 datasets (not the H3 BOP24 format)
Expand Down Expand Up @@ -91,7 +95,7 @@
raise ImportError("Missing hand_tracking_toolkit dependency, mandatory for HOT3D dataset.")

# if HOT3D dataset is used, next parameters are set
if p["dataset"] == "hot3d":
if p["dataset"] in ["hot3d"]:
p["vis_rgb"] = True
p["vis_rgb_resolve_visib"] = False
p["vis_depth_diff"] = False
Expand All @@ -104,6 +108,11 @@
model_type = "eval" # None = default.
dp_model = dataset_params.get_model_params(p["datasets_path"], p["dataset"], model_type)

# Find color modality of specified sensor.
if p["sensor"]:
sensor_mods = [mod.split("_")[0] for mod in dp_split["im_modalities"] if p["sensor"] in mod]
p["modality"] = [mod for mod in sensor_mods if any(col in mod for col in ["rgb","gray"])][0]

# Load colors.
colors_path = os.path.join(os.path.dirname(visualization.__file__), "colors.json")
colors = inout.load_json(colors_path)
Expand Down Expand Up @@ -142,11 +151,15 @@
aria_im_size = dp_split["aria_im_size"][dp_split["aria_eval_modality"]]
quest3_ren = renderer_htt.RendererHtt(quest3_im_size, p["renderer_type"], shading="flat")
aria_ren = renderer_htt.RendererHtt(aria_im_size, p["renderer_type"], shading="flat")
else: # classical BOP format
elif p["sensor"]: # classical BOP format
width, height = dp_split["{}_im_size".format(p["sensor"])]
else:
width, height = dp_split["im_size"]
ren = renderer.create_renderer(
width, height, p["renderer_type"], mode=renderer_mode, shading="flat"
)

ren = renderer.create_renderer(
width, height, p["renderer_type"], mode=renderer_mode, shading="flat"
)
# ren = renderer_htt.RendererHtt(dp_split["im_size"], p["renderer_type"], shading="flat")

# Load object models.
models = {}
Expand All @@ -164,7 +177,11 @@

scene_ids = dataset_params.get_present_scene_ids(dp_split)
for scene_id in scene_ids:
tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id)
if p["sensor"]:
tpath_keys = dataset_params.scene_tpaths_keys("{}_{}".format(p["modality"], p["sensor"]))
else:
tpath_keys = dataset_params.scene_tpaths_keys(dp_split["eval_modality"], scene_id)

if p["dataset"] == "hot3d": # for other dataset the renderer does not change
# find which renderer to use (quest3 or aria)
if scene_id in dp_split["test_quest3_scene_ids"] or scene_id in dp_split["train_quest3_scene_ids"]:
Expand Down Expand Up @@ -224,10 +241,16 @@
}
)

if p["dataset"] == "hot3d":
if p["dataset"] in ["hot3d", "ipd", "xyz"]:
# load the image of the eval modality

img_path = dp_split[tpath_keys["rgb_tpath"]].format(scene_id=scene_id, im_id=im_id)
if not os.path.exists(img_path):
print("rbg path {} does not exist, looking for gray images".format(img_path))
img_path = dp_split[tpath_keys["gray_tpath"]].format(scene_id=scene_id, im_id=im_id)
rgb = inout.load_im(
dp_split[dp_split["eval_modality"](scene_id) + "_tpath"].format(scene_id=scene_id, im_id=im_id)
# dp_split[dp_split["eval_modality"](scene_id) + "_tpath"].format(scene_id=scene_id, im_id=im_id)
img_path
)
# if image is grayscale (quest3), convert it to 3 channels
if rgb.ndim == 2:
Expand All @@ -249,32 +272,39 @@
raise ValueError("RGB nor gray images are available.")

depth = None
if p["dataset"] != "hot3d":
if p["dataset"] not in ["hot3d"]:
if p["vis_depth_diff"] or (p["vis_rgb"] and p["vis_rgb_resolve_visib"]):
depth = inout.load_depth(
dp_split["depth_tpath"].format(scene_id=scene_id, im_id=im_id)
dp_split[tpath_keys["depth_tpath"]].format(scene_id=scene_id, im_id=im_id)
)
depth *= scene_camera[im_id]["depth_scale"] # Convert to [mm].

# if depth.ndim == 2:
# depth = np.dstack([depth, depth, depth])
# breakpoint()
# depth = depth[:,:,0]

# Path to the output RGB visualization.
vis_rgb_path = None
if p["vis_rgb"]:
split = p["dataset_split"] if not p["sensor"] else p["dataset_split"] + "_{}".format(p["sensor"])
vis_rgb_path = p["vis_rgb_tpath"].format(
vis_path=p["vis_path"],
dataset=p["dataset"],
split=p["dataset_split"],
split=split,
scene_id=scene_id,
im_id=im_id,
)

# Path to the output depth difference visualization.
vis_depth_diff_path = None
if p["dataset"] != "hot3d":
split = p["dataset_split"] if not p["sensor"] else p["dataset_split"] + "_{}".format(p["sensor"])
if p["vis_depth_diff"]:
vis_depth_diff_path = p["vis_depth_diff_tpath"].format(
vis_path=p["vis_path"],
dataset=p["dataset"],
split=p["dataset_split"],
split=split,
scene_id=scene_id,
im_id=im_id,
)
Expand Down