Skip to content

Commit

Permalink
+ flake8
Browse files Browse the repository at this point in the history
  • Loading branch information
markub3327 committed Jan 10, 2025
1 parent 48e6468 commit 7aed7bb
Show file tree
Hide file tree
Showing 12 changed files with 41 additions and 54 deletions.
2 changes: 1 addition & 1 deletion rl_toolkit/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .__main__ import main
from .__main__ import main # noqa
10 changes: 7 additions & 3 deletions rl_toolkit/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,9 @@ def main():
clip_mean_max=config["Model"]["Actor"]["clip_mean_max"],
n_quantiles=config["Model"]["Critic"]["n_quantiles"],
merge_index=config["Model"]["Critic"]["merge_index"],
top_quantiles_to_drop=config["Model"]["Critic"]["top_quantiles_to_drop"],
top_quantiles_to_drop=config["Model"]["Critic"][
"top_quantiles_to_drop"
],
n_critics=config["Model"]["Critic"]["count"],
gamma=config["Learner"]["gamma"],
tau=config["Learner"]["tau"],
Expand Down Expand Up @@ -217,7 +219,9 @@ def main():
critic_learning_rate=config["Model"]["Critic"]["learning_rate"],
alpha_learning_rate=config["Model"]["Alpha"]["learning_rate"],
n_quantiles=config["Model"]["Critic"]["n_quantiles"],
top_quantiles_to_drop=config["Model"]["Critic"]["top_quantiles_to_drop"],
top_quantiles_to_drop=config["Model"]["Critic"][
"top_quantiles_to_drop"
],
n_critics=config["Model"]["Critic"]["count"],
clip_mean_min=config["Model"]["Actor"]["clip_mean_min"],
clip_mean_max=config["Model"]["Actor"]["clip_mean_max"],
Expand Down Expand Up @@ -303,4 +307,4 @@ def main():


if __name__ == "__main__":
main()
main()
41 changes: 15 additions & 26 deletions rl_toolkit/agents/dueling_dqn/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import numpy as np
import reverb
import tensorflow as tf
import wandb

import wandb
from rl_toolkit.networks.models import DuelingDQN
from rl_toolkit.utils import VariableContainer

Expand Down Expand Up @@ -200,22 +200,24 @@ def collect(self, writer, policy):
# Block until all the items have been sent to the server
writer.end_episode()

# save the checkpoint
if self._total_episodes > 0:
if self._episode_reward > self._best_episode_reward:
self._best_episode_reward = self._episode_reward
self.save()
print(
f"Model is saved at {self._total_episodes} episode with score {self._best_episode_reward}"
)
wandb.log({"best_score": self._best_episode_reward}, commit=False)
else:
# Store best weights
if self._episode_reward > self._best_episode_reward:
self._best_episode_reward = self._episode_reward
self._best_episode = self._total_episodes
if self._save_path:
os.makedirs(self._save_path, exist_ok=True)
# Save model
self.model.save_weights(
os.path.join(self._save_path, "best_actor.h5")
)

# Logging
print("=============================================")
print(f"Epoch: {self._total_episodes}")
print(f"Score: {self._episode_reward}")
print(
f"Best score: {self._best_episode_reward} (at epoch {self._best_episode})"
)
print(f"Steps: {self._episode_steps}")
print(f"TotalInteractions: {self._total_steps}")
print(f"Train step: {self._train_step.numpy()}")
Expand Down Expand Up @@ -250,6 +252,8 @@ def collect(self, writer, policy):
def run(self):
# Init environment
self._episode_reward = 0.0
self._best_episode_reward = float("-inf")
self._best_episode = 0
self._episode_steps = 0
self._total_episodes = 0
self._total_steps = 0
Expand All @@ -267,18 +271,3 @@ def run(self):
# Main loop
while not self._stop_agents:
self.collect(writer, self.collect_policy)

def save(self, path=""):
if self._save_path:
try:
os.makedirs(os.path.join(os.path.join(self._save_path, path)))
except OSError:
print("The path already exist ❗❗❗")
finally:
# Save model
self.model.save_weights(
os.path.join(
os.path.join(self._save_path, path),
f"dqn_{self._total_episodes}.h5",
)
)
2 changes: 1 addition & 1 deletion rl_toolkit/agents/dueling_dqn/learner.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import reverb
import tensorflow as tf
import wandb
from tensorflow.keras.callbacks import LearningRateScheduler
from wandb.integration.keras import WandbMetricsLogger

import wandb
from rl_toolkit.networks.callbacks import DQNAgentCallback, PrintLR, cosine_schedule
from rl_toolkit.networks.models import DuelingDQN
from rl_toolkit.utils import make_reverb_dataset
Expand Down
2 changes: 1 addition & 1 deletion rl_toolkit/agents/dueling_dqn/tester.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import tensorflow as tf
import wandb

import wandb
from rl_toolkit.networks.models import DuelingDQN

from ...core.process import Process
Expand Down
8 changes: 5 additions & 3 deletions rl_toolkit/agents/sac/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import numpy as np
import reverb
import tensorflow as tf
import wandb

import wandb
from rl_toolkit.networks.models import Actor
from rl_toolkit.utils import VariableContainer

Expand Down Expand Up @@ -199,7 +199,9 @@ def collect(self, writer, max_steps, policy):
print("=============================================")
print(f"Epoch: {self._total_episodes}")
print(f"Score: {self._episode_reward}")
print(f"Best score: {self._best_episode_reward} (at epoch {self._best_episode})")
print(
f"Best score: {self._best_episode_reward} (at epoch {self._best_episode})"
)
print(f"Steps: {self._episode_steps}")
print(f"TotalInteractions: {self._total_steps}")
print(f"Train step: {self._train_step.numpy()}")
Expand Down Expand Up @@ -233,7 +235,7 @@ def collect(self, writer, max_steps, policy):
def run(self):
# Init environment
self._episode_reward = 0.0
self._best_episode_reward = float('-inf')
self._best_episode_reward = float("-inf")
self._best_episode = None
self._episode_steps = 0
self._total_episodes = 0
Expand Down
6 changes: 2 additions & 4 deletions rl_toolkit/agents/sac/learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import numpy as np
import reverb
import tensorflow as tf
import wandb
from tensorflow.keras.optimizers import Adam
from wandb.integration.keras import WandbMetricsLogger

import wandb
from rl_toolkit.networks.callbacks import SACAgentCallback
from rl_toolkit.networks.models import ActorCritic
from rl_toolkit.utils import make_reverb_dataset
Expand Down Expand Up @@ -159,9 +159,7 @@ def save(self):
if self._save_path:
os.makedirs(self._save_path, exist_ok=True)
# Save model
self.model.save_weights(
os.path.join(self._save_path, "actor_critic.h5")
)
self.model.save_weights(os.path.join(self._save_path, "actor_critic.h5"))

def close(self):
super(Learner, self).close()
Expand Down
2 changes: 1 addition & 1 deletion rl_toolkit/agents/sac/tester.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import numpy as np
import tensorflow as tf
import wandb
from dm_control import viewer

import wandb
from rl_toolkit.networks.models import Actor

from ...core.process import Process
Expand Down
3 changes: 2 additions & 1 deletion rl_toolkit/networks/callbacks/lr.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import numpy as np
import tensorflow as tf
import wandb
from tensorflow.keras.callbacks import Callback

import wandb


def cosine_schedule(base_lr, total_steps, warmup_steps):
def step_fn(epoch):
Expand Down
4 changes: 1 addition & 3 deletions rl_toolkit/networks/models/actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,7 @@ def __init__(
self.fc_layers = []

for m in units:
self.fc_layers.append(
Dense(units=m, activation="elu")
)
self.fc_layers.append(Dense(units=m, activation="elu"))

# Deterministicke akcie
self.mean = Dense(
Expand Down
4 changes: 1 addition & 3 deletions rl_toolkit/networks/models/critic.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@ def __init__(self, units: list, n_quantiles: int, merge_index: int, **kwargs):

for i, m in enumerate(units):
if i != self.merge_index:
self.fc_layers.append(
Dense(units=m, activation="elu")
)
self.fc_layers.append(Dense(units=m, activation="elu"))
else:
self.fc_layers.append(None) # add empty layer instead of merge layer

Expand Down
11 changes: 4 additions & 7 deletions rl_toolkit/networks/models/dueling.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.initializers import Orthogonal, TruncatedNormal
from tensorflow.keras.layers import (
from tensorflow.keras.layers import ( # GlobalMaxPooling1D,; Lambda,
Add,
Dense,
Dropout,
GlobalAveragePooling1D,
Layer,
LayerNormalization,
MultiHeadAttention,
GlobalAveragePooling1D,
GlobalMaxPooling1D,
Lambda,
)


Expand Down Expand Up @@ -142,11 +140,11 @@ def __init__(
for _ in range(num_layers)
]

# Reduce
# Reduce
# self.flatten = Lambda(lambda x: x[:, -1])
# self.flatten = GlobalMaxPooling1D()
self.flatten = GlobalAveragePooling1D()

# Output
self.V = Dense(
1,
Expand All @@ -165,7 +163,6 @@ def call(self, inputs, training=None):
for layer in self.e_layers:
x = layer(x, training=training)


# Reduce block
x = self.flatten(x, training=training)
# x = self.drop_out(x, training=training)
Expand Down

0 comments on commit 7aed7bb

Please sign in to comment.