Skip to content

Commit

Permalink
Feature: add tests and linting (#1)
Browse files Browse the repository at this point in the history
* Sets up an environment.yml and prepares for testing

* Adds tests for the PMO benchmark

* Moves tox testing to the relevant folder

* adds PR to the event triggers

* Moves the action to the relevant folder

* Wrong version of black was running. Lints

* Removes conda dependency in tox

* Cleans up tox to only run a single env

* Removes old dependencies to molopt's VAE

* Removes old dependencies to molopt in experiment run
  • Loading branch information
miguelgondu authored Jun 10, 2024
1 parent 8c6a4ab commit 06ea1d0
Show file tree
Hide file tree
Showing 14 changed files with 120 additions and 216 deletions.
26 changes: 26 additions & 0 deletions .github/workflows/tox-lint-and-pytest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: Tests on hdbo (conda, python 3.10)

on: [push]

jobs:
build-linux:
runs-on: ubuntu-latest
strategy:
max-parallel: 5

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: '3.10'
- name: Add conda to system path
run: |
# $CONDA is an environment variable pointing to the root of the miniconda directory
echo $CONDA/bin >> $GITHUB_PATH
- name: Install dependencies
run: |
python -m pip install tox
- name: Check linting and tests with tox
run: |
tox
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,14 @@ After implementing a solver in `poli-baselines`, you can **register it** in `src
The scripts used to run the benchmarks can be found in `src/hdbo_benchmark/experiments`. To run e.g. `albuterol_similarity` [of the PMO benchmark](https://openreview.net/forum?id=yCZRdI0Y7G) you can run:

```bash
conda run -n hdbo python src/hdbo_benchmark/experiments/benchmark_on_pmo/run.py \
--function_name=albuterol_similarity \
--solver_name=your_solver_name \
--latent_dim=128 \
conda run -n hdbo_benchmark python src/hdbo_benchmark/experiments/benchmark_on_pmo/run.py \
--function-name=albuterol_similarity \
--solver-name=line_bo \
--latent-dim=128 \
--max-iter=300 \
```

assuming `hdbo` is an environment in which you can run your solver, and in which this package is installed. Examples of environments where solvers have been tested to run can be found in `poli-baselines`.
assuming `hdbo_benchmark` is an environment in which you can run your solver, and in which this package is installed. Examples of environments where solvers have been tested to run can be found in `poli-baselines`.

## Replicating the data preprocessing for downloading zinc250k

Expand Down
15 changes: 15 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: hdbo_benchmark
channels:
- defaults
dependencies:
- python=3.10
- pip
- pip:
- botorch
- seaborn
- CairoSVG
- wandb
- click
- "git+https://github.com/MachineLearningLifeScience/poli.git@dev"
- "git+https://github.com/MachineLearningLifeScience/poli-baselines.git@main"
- -e .
9 changes: 7 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,13 @@ build-backend = "setuptools.build_meta"
name = "hdbo_benchmark"
version = "0.0.1"
dependencies = [
"numpy",
"torch",
"botorch",
"seaborn",
"CairoSVG",
"wandb",
"click",
"poli@git+https://github.com/MachineLearningLifeScience/poli.git@dev",
"poli-baselines@git+https://github.com/MachineLearningLifeScience/poli-baselines.git@main"
]

[tool.mypy]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Loads the processed datasets, and saves them as a csv."""

import pickle
from pathlib import Path

Expand Down
3 changes: 0 additions & 3 deletions src/hdbo_benchmark/experiments/benchmark_on_pmo/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

import hdbo_benchmark
from hdbo_benchmark.generative_models.vae_factory import VAEFactory, VAESelfies, VAE
from hdbo_benchmark.generative_models.vae_molopt import VAEMolOpt
from hdbo_benchmark.utils.experiments.load_solvers import load_solver, SOLVER_NAMES
from hdbo_benchmark.utils.experiments.load_metadata_for_vaes import (
load_alphabet_for_pmo,
Expand All @@ -39,8 +38,6 @@ def in_latent_space(
) -> Callable[[np.ndarray], np.ndarray]:
def _latent_f(z: np.ndarray) -> np.ndarray:
selfies_strings = vae.decode_to_string_array(z)
if isinstance(vae, VAEMolOpt):
selfies_strings = np.array(["".join(selfies_strings)])
val: np.ndarray = f(np.array(selfies_strings))
return val

Expand Down
3 changes: 1 addition & 2 deletions src/hdbo_benchmark/generative_models/vae_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import torch

from hdbo_benchmark.generative_models.vae import VAE
from hdbo_benchmark.generative_models.vae_molopt import VAEMolOpt
from hdbo_benchmark.generative_models.vae_selfies import VAESelfies
from hdbo_benchmark.generative_models.vae_rnn_selfies import VAERNNSelfies
from hdbo_benchmark.generative_models.vae_mario import VAEMario
Expand Down Expand Up @@ -77,7 +76,7 @@ def _create_vae_on_mario(self, latent_dim: int) -> VAEMario:
opt_vae.load_state_dict(torch.load(weights_path, map_location=DEVICE))
return opt_vae

def _create_vae_on_molecules(self, latent_dim: int) -> VAESelfies | VAEMolOpt:
def _create_vae_on_molecules(self, latent_dim: int) -> VAESelfies:
match latent_dim:
case 2:
weights_path = (
Expand Down
9 changes: 4 additions & 5 deletions src/hdbo_benchmark/generative_models/vae_mario.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,7 @@ def _from_level_to_onehot(self, level: str):

return onehot

def decode_to_string_array(self, z: np.ndarray) -> np.ndarray:
...
def decode_to_string_array(self, z: np.ndarray) -> np.ndarray: ...

def plot_grid(
self,
Expand Down Expand Up @@ -180,9 +179,9 @@ def plot_grid(
pixels = 16 * 14
final_img = np.zeros((n_cols * pixels, n_rows * pixels, 3))
for z, (i, j) in positions.items():
final_img[
i * pixels : (i + 1) * pixels, j * pixels : (j + 1) * pixels
] = img_dict[z]
final_img[i * pixels : (i + 1) * pixels, j * pixels : (j + 1) * pixels] = (
img_dict[z]
)

final_img = final_img.astype(int)

Expand Down
198 changes: 0 additions & 198 deletions src/hdbo_benchmark/generative_models/vae_molopt.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ def summary_per_function(


def plot_heatmap(df, normalized: bool = True):

summary_avg, _ = summary_per_function(df, normalized_per_row=normalized)

# We keep the columns in solver_name_but_pretty order
Expand Down
Empty file.
Empty file.
35 changes: 35 additions & 0 deletions src/hdbo_benchmark/tests/benchmark_on_pmo/test_run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import os

import pytest

from click.testing import CliRunner
from hdbo_benchmark.experiments.benchmark_on_pmo.run import main


@pytest.mark.parametrize("function_name", ["albuterol_similarity", "valsartan_smarts"])
@pytest.mark.parametrize("solver_name", ["random_mutation", "line_bo", "turbo"])
@pytest.mark.parametrize("latent_dim", [2, 128])
def test_main_run(function_name, solver_name, latent_dim):
os.environ["WANDB_MODE"] = "disabled"

runner = CliRunner()
result = runner.invoke(
main,
[
"--solver-name",
solver_name,
"--function-name",
function_name,
"--latent-dim",
str(latent_dim),
"--max-iter",
"3",
"--n-initial-points",
"2",
"--no-strict-on-hash",
"--force-run",
"--solve-in-discrete-space",
"--tag",
"test",
],
)
Loading

0 comments on commit 06ea1d0

Please sign in to comment.