HumanCompatibleAI · mschweizer · Dec 15, 2022 · Dec 15, 2022 · Dec 15, 2022 · Dec 15, 2022
diff --git a/ci/clean_notebooks.py b/ci/clean_notebooks.py
@@ -18,6 +18,7 @@ class UncleanNotebookError(Exception):
     "metadata": {"do": "constant", "value": dict()},
     "source": {"do": "keep"},
     "id": {"do": "keep"},
+    "attachments": {"do": "constant", "value": {}},
 }
 
 code_structure: Dict[str, Dict[str, Any]] = {
@@ -76,7 +77,8 @@ def clean_notebook(file: pathlib.Path, check_only=False) -> None:
             if key not in structure[cell["cell_type"]]:
                 if check_only:
                     raise UncleanNotebookError(
-                        f"Notebook {file} has unknown cell key {key}",
+                        f"Notebook {file} has unknown cell key {key} for cell type "
+                        + f"{cell['cell_type']}",
                     )
                 del cell[key]
                 was_dirty = True
@@ -108,7 +110,12 @@ def clean_notebook(file: pathlib.Path, check_only=False) -> None:
 
 
 def parse_args():
-    """Parse command-line arguments."""
+    """Parse command-line arguments.
+
+    Returns:
+        parser: The parser object.
+        args: The parsed arguments.
+    """
     # if the argument --check has been passed, check if the notebooks are clean
     # otherwise, clean them in-place
     parser = argparse.ArgumentParser()
@@ -125,7 +132,14 @@ def parse_args():
 
 
 def get_files(input_paths: List):
-    """Build list of files to scan from list of paths and files."""
+    """Build list of files to scan from list of paths and files.
+
+    Args:
+        input_paths: List of paths and files to scan.
+
+    Returns:
+        files: List of files to scan.
+    """
     files = []
     for file in input_paths:
         if file.is_dir():

diff --git a/docs/algorithms/airl.rst b/docs/algorithms/airl.rst
@@ -42,7 +42,7 @@ Detailed example notebook: :doc:`../tutorials/4_train_airl`
         "seals:seals/CartPole-v0",
         rng=np.random.default_rng(SEED),
         n_envs=8,
-        post_wrappers=[lambda env, _: RolloutInfoWrapper(env)],  # to compute rollouts
+        post_wrappers={"RolloutInfoWrapper": lambda env, _: RolloutInfoWrapper(env)},
     )
     expert = load_policy(
         "ppo-huggingface",

diff --git a/docs/algorithms/gail.rst b/docs/algorithms/gail.rst
@@ -39,7 +39,7 @@ Detailed example notebook: :doc:`../tutorials/3_train_gail`
         "seals:seals/CartPole-v0",
         rng=np.random.default_rng(SEED),
         n_envs=8,
-        post_wrappers=[lambda env, _: RolloutInfoWrapper(env)],  # to compute rollouts
+        post_wrappers={"RolloutInfoWrapper": lambda env, _: RolloutInfoWrapper(env)},
     )
     expert = load_policy(
         "ppo-huggingface",

diff --git a/docs/algorithms/preference_comparisons.rst b/docs/algorithms/preference_comparisons.rst
@@ -47,6 +47,7 @@ For a more detailed example, refer to :doc:`../tutorials/5_train_preference_comp
 
     fragmenter = preference_comparisons.RandomFragmenter(warning_threshold=0, rng=rng)
     gatherer = preference_comparisons.SyntheticGatherer(rng=rng)
+    querent = preference_comparisons.PreferenceQuerent()
     preference_model = preference_comparisons.PreferenceModel(reward_net)
     reward_trainer = preference_comparisons.BasicRewardTrainer(
         preference_model=preference_model,
@@ -84,6 +85,7 @@ For a more detailed example, refer to :doc:`../tutorials/5_train_preference_comp
         reward_net,
         num_iterations=5, # Set to 60 for better performance
         fragmenter=fragmenter,
+        preference_querent=querent,
         preference_gatherer=gatherer,
         reward_trainer=reward_trainer,
         initial_epoch_multiplier=4,

diff --git a/docs/index.rst b/docs/index.rst
@@ -84,6 +84,7 @@ If you use ``imitation`` in your research project, please cite our paper to help
    tutorials/4_train_airl
    tutorials/5_train_preference_comparisons
    tutorials/5a_train_preference_comparisons_with_cnn
+   tutorials/5b_train_preference_comparisons_with_synchronous_human_feedback
    tutorials/6_train_mce
    tutorials/7_train_density
    tutorials/8_train_sqil

diff --git a/docs/tutorials/10_train_custom_env.ipynb b/docs/tutorials/10_train_custom_env.ipynb
@@ -136,12 +136,12 @@
     "\n",
     "# Create a vectorized environment for training with `imitation`\n",
     "\n",
-    "# Option A: use the `make_vec_env` helper function - make sure to pass `post_wrappers=[lambda env, _: RolloutInfoWrapper(env)]`\n",
+    "# Option A: use the `make_vec_env` helper function - make sure to pass `post_wrappers={\"RolloutInfoWrapper\": lambda env, _: RolloutInfoWrapper(env)}`\n",
     "venv = make_vec_env(\n",
     "    \"custom/ObservationMatching-v0\",\n",
     "    rng=np.random.default_rng(),\n",
     "    n_envs=4,\n",
-    "    post_wrappers=[lambda env, _: RolloutInfoWrapper(env)],\n",
+    "    post_wrappers={\"RolloutInfoWrapper\": lambda env, _: RolloutInfoWrapper(env)},\n",
     ")\n",
     "\n",
     "\n",

diff --git a/docs/tutorials/3_train_gail.ipynb b/docs/tutorials/3_train_gail.ipynb
@@ -37,9 +37,7 @@
     "    \"seals:seals/CartPole-v0\",\n",
     "    rng=np.random.default_rng(SEED),\n",
     "    n_envs=8,\n",
-    "    post_wrappers=[\n",
-    "        lambda env, _: RolloutInfoWrapper(env)\n",
-    "    ],  # needed for computing rollouts later\n",
+    "    post_wrappers={\"RolloutInfoWrapper\": lambda env, _: RolloutInfoWrapper(env)},  # needed for computing rollouts later\n",
     ")\n",
     "expert = load_policy(\n",
     "    \"ppo-huggingface\",\n",

diff --git a/docs/tutorials/4_train_airl.ipynb b/docs/tutorials/4_train_airl.ipynb
@@ -41,9 +41,7 @@
     "    \"seals:seals/CartPole-v0\",\n",
     "    rng=np.random.default_rng(SEED),\n",
     "    n_envs=8,\n",
-    "    post_wrappers=[\n",
-    "        lambda env, _: RolloutInfoWrapper(env)\n",
-    "    ],  # needed for computing rollouts later\n",
+    "    post_wrappers={\"RolloutInfoWrapper\": lambda env, _: RolloutInfoWrapper(env)},  # needed for computing rollouts later\n",
     ")\n",
     "expert = load_policy(\n",
     "    \"ppo-huggingface\",\n",