Feature: add tests and linting (#1)

* Sets up an environment.yml and prepares for testing * Adds tests for the PMO benchmark * Moves tox testing to the relevant folder * adds PR to the event triggers * Moves the action to the relevant folder * Wrong version of black was running. Lints * Removes conda dependency in tox * Cleans up tox to only run a single env * Removes old dependencies to molopt's VAE * Removes old dependencies to molopt in experiment run
MachineLearningLifeScience · Jun 10, 2024 · 06ea1d0 · 06ea1d0
1 parent 8c6a4ab
commit 06ea1d0
Show file tree

Hide file tree

Showing 14 changed files with 120 additions and 216 deletions.
diff --git a/.github/workflows/tox-lint-and-pytest.yml b/.github/workflows/tox-lint-and-pytest.yml
@@ -0,0 +1,26 @@
+name: Tests on hdbo (conda, python 3.10)
+
+on: [push]
+
+jobs:
+  build-linux:
+    runs-on: ubuntu-latest
+    strategy:
+      max-parallel: 5
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.10'
+    - name: Add conda to system path
+      run: |
+        # $CONDA is an environment variable pointing to the root of the miniconda directory
+        echo $CONDA/bin >> $GITHUB_PATH
+    - name: Install dependencies
+      run: |
+        python -m pip install tox
+    - name: Check linting and tests with tox
+      run: |
+        tox
diff --git a/README.md b/README.md
@@ -61,14 +61,14 @@ After implementing a solver in `poli-baselines`, you can **register it** in `src
 The scripts used to run the benchmarks can be found in `src/hdbo_benchmark/experiments`. To run e.g. `albuterol_similarity` [of the PMO benchmark](https://openreview.net/forum?id=yCZRdI0Y7G) you can run:
 
 ```bash
-conda run -n hdbo python src/hdbo_benchmark/experiments/benchmark_on_pmo/run.py \
-    --function_name=albuterol_similarity \
-    --solver_name=your_solver_name \
-    --latent_dim=128 \
+conda run -n hdbo_benchmark python src/hdbo_benchmark/experiments/benchmark_on_pmo/run.py \
+    --function-name=albuterol_similarity \
+    --solver-name=line_bo \
+    --latent-dim=128 \
     --max-iter=300 \
 ```
 
-assuming `hdbo` is an environment in which you can run your solver, and in which this package is installed. Examples of environments where solvers have been tested to run can be found in `poli-baselines`.
+assuming `hdbo_benchmark` is an environment in which you can run your solver, and in which this package is installed. Examples of environments where solvers have been tested to run can be found in `poli-baselines`.
 
 ## Replicating the data preprocessing for downloading zinc250k
 

diff --git a/environment.yml b/environment.yml
@@ -0,0 +1,15 @@
+name: hdbo_benchmark 
+channels:
+  - defaults
+dependencies:
+  - python=3.10
+  - pip
+  - pip:
+    - botorch
+    - seaborn
+    - CairoSVG
+    - wandb
+    - click
+    - "git+https://github.com/MachineLearningLifeScience/poli.git@dev"
+    - "git+https://github.com/MachineLearningLifeScience/poli-baselines.git@main"
+    - -e .
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,8 +6,13 @@ build-backend = "setuptools.build_meta"
 name = "hdbo_benchmark"
 version = "0.0.1"
 dependencies = [
-    "numpy",
-    "torch",
+    "botorch",
+    "seaborn",
+    "CairoSVG",
+    "wandb",
+    "click",
+    "poli@git+https://github.com/MachineLearningLifeScience/poli.git@dev",
+    "poli-baselines@git+https://github.com/MachineLearningLifeScience/poli-baselines.git@main"
 ]
 
 [tool.mypy]

diff --git a/src/hdbo_benchmark/data_preprocessing/zinc250k/02_save_as_csv.py b/src/hdbo_benchmark/data_preprocessing/zinc250k/02_save_as_csv.py
@@ -1,4 +1,5 @@
 """Loads the processed datasets, and saves them as a csv."""
+
 import pickle
 from pathlib import Path
 

diff --git a/src/hdbo_benchmark/experiments/benchmark_on_pmo/run.py b/src/hdbo_benchmark/experiments/benchmark_on_pmo/run.py
@@ -17,7 +17,6 @@
 
 import hdbo_benchmark
 from hdbo_benchmark.generative_models.vae_factory import VAEFactory, VAESelfies, VAE
-from hdbo_benchmark.generative_models.vae_molopt import VAEMolOpt
 from hdbo_benchmark.utils.experiments.load_solvers import load_solver, SOLVER_NAMES
 from hdbo_benchmark.utils.experiments.load_metadata_for_vaes import (
     load_alphabet_for_pmo,
@@ -39,8 +38,6 @@ def in_latent_space(
 ) -> Callable[[np.ndarray], np.ndarray]:
     def _latent_f(z: np.ndarray) -> np.ndarray:
         selfies_strings = vae.decode_to_string_array(z)
-        if isinstance(vae, VAEMolOpt):
-            selfies_strings = np.array(["".join(selfies_strings)])
         val: np.ndarray = f(np.array(selfies_strings))
         return val
 

diff --git a/src/hdbo_benchmark/generative_models/vae_factory.py b/src/hdbo_benchmark/generative_models/vae_factory.py
@@ -8,7 +8,6 @@
 import torch
 
 from hdbo_benchmark.generative_models.vae import VAE
-from hdbo_benchmark.generative_models.vae_molopt import VAEMolOpt
 from hdbo_benchmark.generative_models.vae_selfies import VAESelfies
 from hdbo_benchmark.generative_models.vae_rnn_selfies import VAERNNSelfies
 from hdbo_benchmark.generative_models.vae_mario import VAEMario
@@ -77,7 +76,7 @@ def _create_vae_on_mario(self, latent_dim: int) -> VAEMario:
         opt_vae.load_state_dict(torch.load(weights_path, map_location=DEVICE))
         return opt_vae
 
-    def _create_vae_on_molecules(self, latent_dim: int) -> VAESelfies | VAEMolOpt:
+    def _create_vae_on_molecules(self, latent_dim: int) -> VAESelfies:
         match latent_dim:
             case 2:
                 weights_path = (

diff --git a/src/hdbo_benchmark/generative_models/vae_mario.py b/src/hdbo_benchmark/generative_models/vae_mario.py
@@ -133,8 +133,7 @@ def _from_level_to_onehot(self, level: str):
 
         return onehot
 
-    def decode_to_string_array(self, z: np.ndarray) -> np.ndarray:
-        ...
+    def decode_to_string_array(self, z: np.ndarray) -> np.ndarray: ...
 
     def plot_grid(
         self,
@@ -180,9 +179,9 @@ def plot_grid(
         pixels = 16 * 14
         final_img = np.zeros((n_cols * pixels, n_rows * pixels, 3))
         for z, (i, j) in positions.items():
-            final_img[
-                i * pixels : (i + 1) * pixels, j * pixels : (j + 1) * pixels
-            ] = img_dict[z]
+            final_img[i * pixels : (i + 1) * pixels, j * pixels : (j + 1) * pixels] = (
+                img_dict[z]
+            )
 
         final_img = final_img.astype(int)
 

diff --git a/src/hdbo_benchmark/generative_models/vae_molopt.py b/src/hdbo_benchmark/generative_models/vae_molopt.py
diff --git a/src/hdbo_benchmark/results/benchmark_on_pmo/visualize_function_vs_solver_heatmap.py b/src/hdbo_benchmark/results/benchmark_on_pmo/visualize_function_vs_solver_heatmap.py
@@ -106,7 +106,6 @@ def summary_per_function(
 
 
 def plot_heatmap(df, normalized: bool = True):
-
     summary_avg, _ = summary_per_function(df, normalized_per_row=normalized)
 
     # We keep the columns in solver_name_but_pretty order

diff --git a/src/hdbo_benchmark/tests/__init__.py b/src/hdbo_benchmark/tests/__init__.py
diff --git a/src/hdbo_benchmark/tests/benchmark_on_pmo/__init__.py b/src/hdbo_benchmark/tests/benchmark_on_pmo/__init__.py
diff --git a/src/hdbo_benchmark/tests/benchmark_on_pmo/test_run.py b/src/hdbo_benchmark/tests/benchmark_on_pmo/test_run.py
@@ -0,0 +1,35 @@
+import os
+
+import pytest
+
+from click.testing import CliRunner
+from hdbo_benchmark.experiments.benchmark_on_pmo.run import main
+
+
+@pytest.mark.parametrize("function_name", ["albuterol_similarity", "valsartan_smarts"])
+@pytest.mark.parametrize("solver_name", ["random_mutation", "line_bo", "turbo"])
+@pytest.mark.parametrize("latent_dim", [2, 128])
+def test_main_run(function_name, solver_name, latent_dim):
+    os.environ["WANDB_MODE"] = "disabled"
+
+    runner = CliRunner()
+    result = runner.invoke(
+        main,
+        [
+            "--solver-name",
+            solver_name,
+            "--function-name",
+            function_name,
+            "--latent-dim",
+            str(latent_dim),
+            "--max-iter",
+            "3",
+            "--n-initial-points",
+            "2",
+            "--no-strict-on-hash",
+            "--force-run",
+            "--solve-in-discrete-space",
+            "--tag",
+            "test",
+        ],
+    )
Original file line number	Diff line number	Diff line change
Expand Up		@@ -106,7 +106,6 @@ def summary_per_function(


		def plot_heatmap(df, normalized: bool = True):

		summary_avg, _ = summary_per_function(df, normalized_per_row=normalized)

		# We keep the columns in solver_name_but_pretty order
Expand Down