From d4f40a9bc7e423a579c5f8f1a0d4c74b65181359 Mon Sep 17 00:00:00 2001 From: pseudo-rnd-thoughts Date: Sun, 12 Jan 2025 11:10:29 +0000 Subject: [PATCH] Add wrappers to website --- docs/_scripts/gen_envs_display.py | 2 +- docs/conf.py | 4 ++-- docs/content/wrappers.md | 9 +++++++++ docs/index.md | 1 + miniworld/envs/collecthealth.py | 11 +++++------ miniworld/envs/fourrooms.py | 6 +++--- miniworld/envs/hallway.py | 10 ++++++---- miniworld/envs/maze.py | 13 ++++++------- miniworld/envs/oneroom.py | 11 +++++------ miniworld/envs/pickupobjects.py | 14 ++++++-------- miniworld/envs/putnext.py | 10 ++++++---- miniworld/envs/roomobjects.py | 11 +++++------ miniworld/envs/sidewalk.py | 7 +++---- miniworld/envs/sign.py | 20 ++++++++------------ miniworld/envs/threerooms.py | 7 +++---- miniworld/envs/tmaze.py | 8 ++++---- miniworld/envs/wallgap.py | 6 +++--- miniworld/envs/ymaze.py | 9 ++++----- miniworld/manual_control.py | 8 ++------ miniworld/wrappers.py | 19 +++++++++++-------- 20 files changed, 93 insertions(+), 93 deletions(-) create mode 100644 docs/content/wrappers.md diff --git a/docs/_scripts/gen_envs_display.py b/docs/_scripts/gen_envs_display.py index acf04cdc..235fd851 100644 --- a/docs/_scripts/gen_envs_display.py +++ b/docs/_scripts/gen_envs_display.py @@ -10,7 +10,7 @@ def create_grid_cell(env_id): env_name = env_id.split("-")[-2] return f""" - +
diff --git a/docs/conf.py b/docs/conf.py index 6dd14521..03366cd5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,11 +16,12 @@ # -- Project information ----------------------------------------------------- import os +import time import miniworld project = "Miniworld" -copyright = "2023 Farama Foundation" +copyright = f"{time.localtime().tm_year} Farama Foundation" author = "Farama Foundation" # The full version, including alpha/beta/rc tags @@ -34,7 +35,6 @@ # ones. extensions = [ "sphinx.ext.napoleon", - "sphinx.ext.doctest", "sphinx.ext.autodoc", "sphinx.ext.githubpages", "sphinx.ext.viewcode", diff --git a/docs/content/wrappers.md b/docs/content/wrappers.md new file mode 100644 index 00000000..70f74de8 --- /dev/null +++ b/docs/content/wrappers.md @@ -0,0 +1,9 @@ +# Wrapper + +Miniworld include several built in wrapper available to researchers in `miniworld.wrappers` + +```{eval-rst} +.. autoclass:: miniworld.wrappers.PyTorchObsWrapper +.. autoclass:: miniworld.wrappers.GreyscaleWrapper +.. autoclass:: miniworld.wrappers.StochasticActionWrapper +``` diff --git a/docs/index.md b/docs/index.md index bb2ec8d8..dfcb7fe7 100644 --- a/docs/index.md +++ b/docs/index.md @@ -10,6 +10,7 @@ lastpage: content/design environments +content/wrappers content/create_env content/troubleshooting content/installation diff --git a/miniworld/envs/collecthealth.py b/miniworld/envs/collecthealth.py index f69611b3..543262b3 100644 --- a/miniworld/envs/collecthealth.py +++ b/miniworld/envs/collecthealth.py @@ -30,21 +30,20 @@ class CollectHealth(MiniWorldEnv, utils.EzPickle): ## Observation Space The observation space is an `ndarray` with shape `(obs_height, obs_width, 3)` - representing a RGB image of what the agents sees. + representing an RGB image of what the agents see. - ## Rewards: + ## Rewards +2 for each time step -100 for dying ## Arguments + * `size`: size of the room + ```python - CollectHealth(size=16) + env = gymnasium.make("Miniworld-CollectHealth-v0", size=16) ``` - - `size`: size of the room - """ def __init__(self, size=16, **kwargs): diff --git a/miniworld/envs/fourrooms.py b/miniworld/envs/fourrooms.py index 25769d0e..b3213809 100644 --- a/miniworld/envs/fourrooms.py +++ b/miniworld/envs/fourrooms.py @@ -22,16 +22,16 @@ class FourRooms(MiniWorldEnv, utils.EzPickle): ## Observation Space The observation space is an `ndarray` with shape `(obs_height, obs_width, 3)` - representing a RGB image of what the agents sees. + representing an RGB image of what the agents see. - ## Rewards: + ## Rewards +(1 - 0.2 * (step_count / max_episode_steps)) when red box reached and zero otherwise. ## Arguments ```python - env = gym.make("MiniWorld-FourRooms-v0") + env = gymnasium.make("MiniWorld-FourRooms-v0") ``` """ diff --git a/miniworld/envs/hallway.py b/miniworld/envs/hallway.py index 1887c384..c835dd0c 100644 --- a/miniworld/envs/hallway.py +++ b/miniworld/envs/hallway.py @@ -24,19 +24,21 @@ class Hallway(MiniWorldEnv, utils.EzPickle): ## Observation Space The observation space is an `ndarray` with shape `(obs_height, obs_width, 3)` - representing a RGB image of what the agents sees. + representing an RGB image of what the agents see. - ## Rewards: + ## Rewards +(1 - 0.2 * (step_count / max_episode_steps)) when red box reached ## Arguments + * `length`: length of the entire space + ```python - Hallway(length=12) + env = gymnasium.make("Miniworld-Hallway-v0", length=12) ``` - `length`: length of the entire space + """ diff --git a/miniworld/envs/maze.py b/miniworld/envs/maze.py index 10df92fa..29ca9805 100644 --- a/miniworld/envs/maze.py +++ b/miniworld/envs/maze.py @@ -26,24 +26,23 @@ class Maze(MiniWorldEnv, utils.EzPickle): ## Observation Space The observation space is an `ndarray` with shape `(obs_height, obs_width, 3)` - representing a RGB image of what the agents sees. + representing an RGB image of what the agents see. - ## Rewards: + ## Rewards +(1 - 0.2 * (step_count / max_episode_steps)) when red box reached and zero otherwise. ## Arguments ```python - env = gym.make("MiniWorld-Maze-v0") + env = gymnasium.make("MiniWorld-Maze-v0") # or - env = gym.make("MiniWorld-MazeS2-v0") + env = gymnasium.make("MiniWorld-MazeS2-v0") # or - env = gym.make("MiniWorld-MazeS3-v0") + env = gymnasium.make("MiniWorld-MazeS3-v0") # or - env = gym.make("MiniWorld-MazeS3Fast-v0") + env = gymnasium.make("MiniWorld-MazeS3Fast-v0") ``` - """ def __init__( diff --git a/miniworld/envs/oneroom.py b/miniworld/envs/oneroom.py index 2e60e7b1..3fcbf244 100644 --- a/miniworld/envs/oneroom.py +++ b/miniworld/envs/oneroom.py @@ -26,22 +26,21 @@ class OneRoom(MiniWorldEnv, utils.EzPickle): ## Observation Space The observation space is an `ndarray` with shape `(obs_height, obs_width, 3)` - representing a RGB image of what the agents sees. + representing an RGB image of what the agents see. - ## Rewards: + ## Rewards +(1 - 0.2 * (step_count / max_episode_steps)) when red box reached and zero otherwise. ## Arguments ```python - env = gym.make("MiniWorld-OneRoom-v0") + env = gymnasium.make("MiniWorld-OneRoom-v0") # or - env = gym.make("MiniWorld-OneRoomS6-v0") + env = gymnasium.make("MiniWorld-OneRoomS6-v0") # or - env = gym.make("MiniWorld-OneRoomS6Fast-v0") + env = gymnasium.make("MiniWorld-OneRoomS6Fast-v0") ``` - """ def __init__(self, size=10, max_episode_steps=180, **kwargs): diff --git a/miniworld/envs/pickupobjects.py b/miniworld/envs/pickupobjects.py index 8dcd4d48..21df78e7 100644 --- a/miniworld/envs/pickupobjects.py +++ b/miniworld/envs/pickupobjects.py @@ -24,22 +24,20 @@ class PickupObjects(MiniWorldEnv, utils.EzPickle): ## Observation Space The observation space is an `ndarray` with shape `(obs_height, obs_width, 3)` - representing a RGB image of what the agents sees. + representing an RGB image of what the agents see. - ## Rewards: + ## Rewards +1 when agent picked up object ## Arguments + * `size`: size of world + * `num_objs`: number of objects + ```python - PickupObjects(size=12, num_objs=5) + env = gymnasium.make("Miniworld-PickupObjects-v0", size=12, num_objs=5) ``` - - `size`: size of world - - `num_objs`: number of objects - """ def __init__(self, size=12, num_objs=5, **kwargs): diff --git a/miniworld/envs/putnext.py b/miniworld/envs/putnext.py index 52f119eb..e6e2ca1a 100644 --- a/miniworld/envs/putnext.py +++ b/miniworld/envs/putnext.py @@ -28,19 +28,21 @@ class PutNext(MiniWorldEnv, utils.EzPickle): ## Observation Space The observation space is an `ndarray` with shape `(obs_height, obs_width, 3)` - representing a RGB image of what the agents sees. + representing an RGB image of what the agents see. - ## Rewards: + ## Rewards +(1 - 0.2 * (step_count / max_episode_steps)) when red box is next to yellow box ## Arguments + * `size`: size of world + ```python - PutNext(size=12) + env = gymnasium.make("Miniworld-PutNext-v0", size=12) ``` - `size`: size of world + """ diff --git a/miniworld/envs/roomobjects.py b/miniworld/envs/roomobjects.py index b90cf48d..56c6cc01 100644 --- a/miniworld/envs/roomobjects.py +++ b/miniworld/envs/roomobjects.py @@ -30,20 +30,19 @@ class RoomObjects(MiniWorldEnv, utils.EzPickle): ## Observation Space The observation space is an `ndarray` with shape `(obs_height, obs_width, 3)` - representing a RGB image of what the agents sees. + representing an RGB image of what the agents see. - ## Rewards: + ## Rewards None ## Arguments + * `size`: size of world + ```python - RoomObjects(size=16) + env = gymnasium.make("Miniworld-RoomObjects-v0", size=16) ``` - - `size`: size of world - """ def __init__(self, size=10, **kwargs): diff --git a/miniworld/envs/sidewalk.py b/miniworld/envs/sidewalk.py index a58aeea0..db84c1cc 100644 --- a/miniworld/envs/sidewalk.py +++ b/miniworld/envs/sidewalk.py @@ -25,18 +25,17 @@ class Sidewalk(MiniWorldEnv, utils.EzPickle): ## Observation Space The observation space is an `ndarray` with shape `(obs_height, obs_width, 3)` - representing a RGB image of what the agents sees. + representing an RGB image of what the agents see. - ## Rewards: + ## Rewards +(1 - 0.2 * (step_count / max_episode_steps)) when object reached ## Arguments ```python - env = gym.make("MiniWorld-Sidewalk-v0") + env = gymnasium.make("MiniWorld-Sidewalk-v0") ``` - """ def __init__(self, **kwargs): diff --git a/miniworld/envs/sign.py b/miniworld/envs/sign.py index 5b648b01..9be835c6 100644 --- a/miniworld/envs/sign.py +++ b/miniworld/envs/sign.py @@ -52,27 +52,23 @@ class Sign(MiniWorldEnv, utils.EzPickle): ## Observation Space The observation space is an `ndarray` with shape `(obs_height, obs_width, 3)` - representing a RGB image of what the agents sees. + representing an RGB image of what the agents see. - ## Rewards: + ## Rewards +1 for touching the object where the color matches the sign and the shape matches the goal -1 for touching any other object ## Arguments + * `size`: size of the square room. + * `max_episode_steps`: number of steps before the episode ends. + * `color_index`: specifies whether the sign says blue (0), green (1), or red (2). + * `goal`: specifies box (0) or key (1). + ```python - Sign(size=10, max_episode_steps=20, color_index=0, goal=0) + env = gymnasium.make("Miniworld-Sign-v0", size=10, max_episode_steps=20, color_index=0, goal=0) ``` - - `size`: size of the square room. - - `max_episode_steps`: number of steps before the episode ends. - - `color_index`: specifies whether the sign says blue (0), green (1), or red (2). - - `goal`: specifies box (0) or key (1). - """ def __init__(self, size=10, max_episode_steps=20, color_index=0, goal=0, **kwargs): diff --git a/miniworld/envs/threerooms.py b/miniworld/envs/threerooms.py index 0e43a5b8..71a28567 100644 --- a/miniworld/envs/threerooms.py +++ b/miniworld/envs/threerooms.py @@ -25,18 +25,17 @@ class ThreeRooms(MiniWorldEnv, utils.EzPickle): ## Observation Space The observation space is an `ndarray` with shape `(obs_height, obs_width, 3)` - representing a RGB image of what the agents sees. + representing an RGB image of what the agents see. - ## Rewards: + ## Rewards None ## Arguments ```python - env = gym.make("MiniWorld-ThreeRooms-v0") + env = gymnasium.make("MiniWorld-ThreeRooms-v0") ``` - """ def __init__(self, **kwargs): diff --git a/miniworld/envs/tmaze.py b/miniworld/envs/tmaze.py index 28b08fb2..b8e1e222 100644 --- a/miniworld/envs/tmaze.py +++ b/miniworld/envs/tmaze.py @@ -27,18 +27,18 @@ class TMaze(MiniWorldEnv, utils.EzPickle): ## Observation Space The observation space is an `ndarray` with shape `(obs_height, obs_width, 3)` - representing a RGB image of what the agents sees. + representing an RGB image of what the agents see. - ## Rewards: + ## Rewards +(1 - 0.2 * (step_count / max_episode_steps)) when box reached and zero otherwise. ## Arguments ```python - env = gym.make("MiniWorld-TMazeLeft-v0") + env = gymnasium.make("MiniWorld-TMazeLeft-v0") # or - env = gym.make("MiniWorld-TMazeRight-v0") + env = gymnasium.make("MiniWorld-TMazeRight-v0") ``` """ diff --git a/miniworld/envs/wallgap.py b/miniworld/envs/wallgap.py index 589a86a8..f0e99843 100644 --- a/miniworld/envs/wallgap.py +++ b/miniworld/envs/wallgap.py @@ -25,16 +25,16 @@ class WallGap(MiniWorldEnv, utils.EzPickle): ## Observation Space The observation space is an `ndarray` with shape `(obs_height, obs_width, 3)` - representing a RGB image of what the agents sees. + representing an RGB image of what the agents see. - ## Rewards: + ## Rewards +(1 - 0.2 * (step_count / max_episode_steps)) when box reached ## Arguments ```python - env = gym.make("MiniWorld-WallGap-v0") + env = gymnasium.make("MiniWorld-WallGap-v0") ``` """ diff --git a/miniworld/envs/ymaze.py b/miniworld/envs/ymaze.py index dd2cf651..9ced3d25 100644 --- a/miniworld/envs/ymaze.py +++ b/miniworld/envs/ymaze.py @@ -29,20 +29,19 @@ class YMaze(MiniWorldEnv, utils.EzPickle): ## Observation Space The observation space is an `ndarray` with shape `(obs_height, obs_width, 3)` - representing a RGB image of what the agents sees. + representing an RGB image of what the agents see. - ## Rewards: + ## Rewards +(1 - 0.2 * (step_count / max_episode_steps)) when box reached ## Arguments ```python - env = gym.make("MiniWorld-YMazeLeft-v0") + env = gymnasium.make("MiniWorld-YMazeLeft-v0") # or - env = gym.make("MiniWorld-YMazeRight-v0") + env = gymnasium.make("MiniWorld-YMazeRight-v0") ``` - """ def __init__(self, goal_pos=None, **kwargs): diff --git a/miniworld/manual_control.py b/miniworld/manual_control.py index bb7c12a1..f625ff49 100644 --- a/miniworld/manual_control.py +++ b/miniworld/manual_control.py @@ -5,7 +5,7 @@ class ManualControl: - def __init__(self, env, no_time_limit, domain_rand): + def __init__(self, env, no_time_limit: bool, domain_rand: bool): self.env = env.unwrapped if no_time_limit: @@ -77,11 +77,7 @@ def on_close(): def step(self, action): print( - "step {}/{}: {}".format( - self.env.unwrapped.step_count + 1, - self.env.unwrapped.max_episode_steps, - self.env.unwrapped.actions(action).name, - ) + f"step {self.env.unwrapped.step_count + 1}/{self.env.unwrapped.max_episode_steps}: {self.env.unwrapped.actions(action).name}" ) obs, reward, termination, truncation, info = self.env.step(action) diff --git a/miniworld/wrappers.py b/miniworld/wrappers.py index 9b228207..6ad8e3f5 100644 --- a/miniworld/wrappers.py +++ b/miniworld/wrappers.py @@ -1,4 +1,4 @@ -import random +from typing import Optional import gymnasium as gym import numpy as np @@ -9,8 +9,9 @@ class PyTorchObsWrapper(gym.ObservationWrapper): Transpose the observation image tensors for PyTorch """ - def __init__(self, env=None): + def __init__(self, env): super().__init__(env) + obs_shape = self.observation_space.shape self.observation_space = gym.spaces.Box( self.observation_space.low[0, 0, 0], @@ -25,16 +26,17 @@ def observation(self, observation): class GreyscaleWrapper(gym.ObservationWrapper): """ - Convert image obserations from RGB to greyscale + Convert image observations from RGB to greyscale """ - def __init__(self, env=None): + def __init__(self, env): super().__init__(env) + obs_shape = self.observation_space.shape self.observation_space = gym.spaces.Box( self.observation_space.low[0, 0, 0], self.observation_space.high[0, 0, 0], - [obs_shape[0], obs_shape[1], 1], + (obs_shape[0], obs_shape[1], 1), dtype=self.observation_space.dtype, ) @@ -52,17 +54,18 @@ class StochasticActionWrapper(gym.ActionWrapper): Else, a random action is sampled from the action space. """ - def __init__(self, env=None, prob=0.9, random_action=None): + def __init__(self, env, prob: float = 0.9, random_action: Optional[int] = None): super().__init__(env) + self.prob = prob self.random_action = random_action def action(self, action): """ """ - if np.random.uniform() < self.prob: + if self.np_random.uniform() < self.prob: return action else: if self.random_action is None: - return random.randint(0, 6) + return self.np_random.integers(0, 6) else: return self.random_action