From f35a77cf8f460587fd8b8b7ac17bd707a202876c Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Fri, 27 Oct 2023 21:19:37 -0400 Subject: [PATCH 1/4] rdkit compat --- .github/workflows/test.yml | 10 ++-------- README.md | 1 + datamol/cluster.py | 4 +++- datamol/mol.py | 5 +---- docs/index.md | 1 + pyproject.toml | 11 ++++++++--- tests/test_mcs.py | 2 +- tests/test_mol.py | 2 +- tests/test_utils_fs.py | 8 ++++---- 9 files changed, 22 insertions(+), 22 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2ee31e0f..3628512b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,15 +16,9 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10"] + python-version: ["3.10", "3.11"] os: ["ubuntu-latest", "macos-latest", "windows-latest"] - rdkit-version: ["2022.09", "2023.03"] - - # just enable python 3.11 on ubuntu to not blow up the CI time. - include: - - os: ubuntu-latest - python-version: "3.11" - rdkit-version: "2023.03" + rdkit-version: ["2023.03", "2023.09"] runs-on: ${{ matrix.os }} timeout-minutes: 30 diff --git a/README.md b/README.md index 037ccf9e..37f22323 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,7 @@ See below the associated versions of Python and RDKit, for which a minor version | `datamol` | `python` | `rdkit` | | --------- | ------------------- | ----------------------------- | +| `0.12.x` | `[3.10, 3.11]` | `[2023.03, 2023.09]` | | `0.11.x` | `[3.9, 3.10, 3.11]` | `[2022.09, 2023.03]` | | `0.10.x` | `[3.9, 3.10, 3.11]` | `[2022.03, 2022.09]` | | `0.9.x` | `[3.9, 3.10, 3.11]` | `[2022.03, 2022.09]` | diff --git a/datamol/cluster.py b/datamol/cluster.py index 6c5a63ef..9b80bdcb 100644 --- a/datamol/cluster.py +++ b/datamol/cluster.py @@ -242,7 +242,9 @@ def assign_to_centroids( features = dm.parallelized(feature_fn, all_mols, n_jobs=n_jobs) def distij(i, j, features=features): - return 1.0 - DataStructs.cDataStructs.TanimotoSimilarity(features[int(i)], features[int(j)]) + return 1.0 - DataStructs.cDataStructs.TanimotoSimilarity( + features[int(i.item())], features[int(j.item())] + ) if dist_fn is None: dist_fn = distij diff --git a/datamol/mol.py b/datamol/mol.py index e0cf6cab..ac373481 100644 --- a/datamol/mol.py +++ b/datamol/mol.py @@ -25,7 +25,6 @@ from rdkit.Chem.Scaffolds import MurckoScaffold from rdkit.Chem.MolStandardize import rdMolStandardize -from rdkit.Chem.MolStandardize import canonicalize_tautomer_smiles from rdkit.Chem.SaltRemover import SaltRemover import datamol @@ -395,7 +394,7 @@ def sanitize_first(mols: List[Mol], charge_neutral: bool = False, sanifix: bool return None -def standardize_smiles(smiles: str, tautomer: bool = False) -> str: +def standardize_smiles(smiles: str) -> str: r""" Apply smile standardization procedure. This is a convenient function wrapped arrounf RDKit smiles standardizer and tautomeric canonicalization. @@ -409,8 +408,6 @@ def standardize_smiles(smiles: str, tautomer: bool = False) -> str: """ smiles = rdMolStandardize.StandardizeSmiles(smiles) - if tautomer: - smiles = canonicalize_tautomer_smiles(smiles) return smiles diff --git a/docs/index.md b/docs/index.md index 0a0cc235..bb7d165f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -75,6 +75,7 @@ See below the associated versions of Python and RDKit, for which a minor version | `datamol` | `python` | `rdkit` | | --------- | ------------------- | ----------------------------- | +| `0.12.x` | `[3.10, 3.11]` | `[2023.03, 2023.09]` | | `0.11.x` | `[3.9, 3.10, 3.11]` | `[2022.09, 2023.03]` | | `0.10.x` | `[3.9, 3.10, 3.11]` | `[2022.03, 2022.09]` | | `0.9.x` | `[3.9, 3.10, 3.11]` | `[2022.03, 2022.09]` | diff --git a/pyproject.toml b/pyproject.toml index 0b3dc530..c2ae64dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,9 +83,14 @@ minversion = "6.0" addopts = "--verbose --cov=datamol --cov-fail-under=85 --cov-report xml --cov-report term --durations=10 -n auto" testpaths = ["tests"] filterwarnings = [ - "ignore::DeprecationWarning:rdkit.*:", - "ignore::DeprecationWarning:jupyter_client.*:", - "ignore::DeprecationWarning:pkg_resources.*:", + "ignore::DeprecationWarning:rdkit.Chem.MolStandardize", + "ignore::DeprecationWarning:jupyter_client", + "ignore::DeprecationWarning:pkg_resources", + "ignore::DeprecationWarning:joblib.externals.loky.backend", + "ignore::DeprecationWarning:dateutil.tz.tz", + "ignore::DeprecationWarning:joblib._utils", + "ignore::DeprecationWarning:openpyxl.packaging.core", + "ignore::DeprecationWarning:tqdm.std", ] [tool.coverage.run] diff --git a/tests/test_mcs.py b/tests/test_mcs.py index a79684ef..7e46fb35 100644 --- a/tests/test_mcs.py +++ b/tests/test_mcs.py @@ -13,7 +13,7 @@ def test_find_mcs(): smarts = dm.find_mcs(mols=mols, timeout=2) # Load/export SMARTS to check RDKit versions compatibility. - excepted_smarts = "[#6&!R]-&!@[#6&!R]-&!@[#8&!R]-&!@[#6&R]1:&@[#6&R]:&@[#6&R]2:&@[#7&R]:&@[#6&R]:&@[#7&R]:&@[#6&R](:&@[#6&R]:&@2:&@[#6&R]:&@[#6&R]:&@1-&!@[#7&!R]-&!@[#6&!R](=&!@[#8&!R])-&!@[#6&!R]=&!@[#6&!R])-&!@[#7&!R]-&!@[#6&R]1:&@[#6&R]:&@[#6&R]:&@[#6&R]:&@[#6&R]:&@[#6&R]:&@1" + excepted_smarts = "[#6&!R]-&!@[#6&!R](=&!@[#8&!R])-&!@[#7&!R]-&!@[#6&!R]-&!@[#6&!R]-&!@[#8&!R]-&!@[#6&R]1:&@[#6&R]:&@[#6&R]2:&@[#7&R]:&@[#6&R]:&@[#7&R]:&@[#6&R](:&@[#6&R]:&@2:&@[#6&R]:&@[#6&R]:&@1-&!@[#7&!R]-&!@[#6&!R](=&!@[#8&!R])-&!@[#6&!R]=&!@[#6&!R])-&!@[#7&!R]-&!@[#6&R]1:&@[#6&R]:&@[#6&R]:&@[#6&R](:&@[#6&R]:&@[#6&R]:&@1-&!@[#9&!R])-&!@[#35&!R]" excepted_smarts_mol = dm.from_smarts(excepted_smarts) excepted_smarts = dm.to_smarts(excepted_smarts_mol) diff --git a/tests/test_mol.py b/tests/test_mol.py index 0b8bc511..ef77af8d 100644 --- a/tests/test_mol.py +++ b/tests/test_mol.py @@ -302,7 +302,7 @@ def test_sanitize_smiles_none(): def test_standardize_smiles_tautomer(): smiles = "C1=CC=CN=C1" - std_smiles = dm.standardize_smiles(smiles, tautomer=True) + std_smiles = dm.standardize_smiles(smiles) assert "c1ccncc1" == std_smiles diff --git a/tests/test_utils_fs.py b/tests/test_utils_fs.py index f17b142d..9573a7e8 100644 --- a/tests/test_utils_fs.py +++ b/tests/test_utils_fs.py @@ -17,7 +17,7 @@ def test_copy_files(tmp_path): dm.utils.fs.copy_file(source_path, destination_path) with open(destination_path) as f: - f.read() == content + assert f.read() == content def test_copy_dir(tmp_path): @@ -49,10 +49,10 @@ def test_copy_dir(tmp_path): assert dm.utils.fs.is_file(file2_path) with open(file1_path) as f: - f.read() == content + assert f.read() == content with open(file2_path) as f: - f.read() == content + assert f.read() == content def test_mkdir(tmp_path): @@ -91,7 +91,7 @@ def test_cache_dir(): def test_get_mapper(tmp_path): fsmapper = dm.utils.fs.get_mapper(str(tmp_path / "test.txt")) - assert fsmapper.fs.protocol == "file" + assert fsmapper.fs.protocol == ("file", "local") @pytest.mark.skip_platform("win") From ef4f5f832f46d2d76734f25070cd72424b0accf9 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Fri, 27 Oct 2023 21:28:49 -0400 Subject: [PATCH 2/4] more robust mcs tests --- tests/test_mcs.py | 9 ++------- tests/test_utils_fs.py | 4 +++- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/test_mcs.py b/tests/test_mcs.py index 7e46fb35..c5addf43 100644 --- a/tests/test_mcs.py +++ b/tests/test_mcs.py @@ -12,11 +12,6 @@ def test_find_mcs(): mols = [dm.to_mol(s) for s in smiles_list] smarts = dm.find_mcs(mols=mols, timeout=2) - # Load/export SMARTS to check RDKit versions compatibility. - excepted_smarts = "[#6&!R]-&!@[#6&!R](=&!@[#8&!R])-&!@[#7&!R]-&!@[#6&!R]-&!@[#6&!R]-&!@[#8&!R]-&!@[#6&R]1:&@[#6&R]:&@[#6&R]2:&@[#7&R]:&@[#6&R]:&@[#7&R]:&@[#6&R](:&@[#6&R]:&@2:&@[#6&R]:&@[#6&R]:&@1-&!@[#7&!R]-&!@[#6&!R](=&!@[#8&!R])-&!@[#6&!R]=&!@[#6&!R])-&!@[#7&!R]-&!@[#6&R]1:&@[#6&R]:&@[#6&R]:&@[#6&R](:&@[#6&R]:&@[#6&R]:&@1-&!@[#9&!R])-&!@[#35&!R]" - excepted_smarts_mol = dm.from_smarts(excepted_smarts) - excepted_smarts = dm.to_smarts(excepted_smarts_mol) + excepted_hash = "762f483ac10cc0f45c5aa2c790f9ef52f8dfb337" - print(smarts) - - assert smarts == excepted_smarts + assert dm.hash_mol(dm.from_smarts(smarts)) == excepted_hash diff --git a/tests/test_utils_fs.py b/tests/test_utils_fs.py index 9573a7e8..db8bf340 100644 --- a/tests/test_utils_fs.py +++ b/tests/test_utils_fs.py @@ -91,7 +91,9 @@ def test_cache_dir(): def test_get_mapper(tmp_path): fsmapper = dm.utils.fs.get_mapper(str(tmp_path / "test.txt")) - assert fsmapper.fs.protocol == ("file", "local") + + # NOTE(hadim): depends the fsspec version + assert fsmapper.fs.protocol in ["file", ("file", "local")] @pytest.mark.skip_platform("win") From 1e12f27b92abfe130c94bfd4bc9a6d9d5b2d410e Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Fri, 27 Oct 2023 21:32:35 -0400 Subject: [PATCH 3/4] more robust mcs tests --- tests/test_mcs.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test_mcs.py b/tests/test_mcs.py index c5addf43..8a4e8ef8 100644 --- a/tests/test_mcs.py +++ b/tests/test_mcs.py @@ -12,6 +12,12 @@ def test_find_mcs(): mols = [dm.to_mol(s) for s in smiles_list] smarts = dm.find_mcs(mols=mols, timeout=2) - excepted_hash = "762f483ac10cc0f45c5aa2c790f9ef52f8dfb337" + # NOTE(hadim): hash are different given different RDKit version + expected_hashes = [ + # RDKit >= 2023.09 + "762f483ac10cc0f45c5aa2c790f9ef52f8dfb337", + # RDKit <= 2023.03 + "49eff32e405d17980fad428cf4063ec52e2c5fda", + ] - assert dm.hash_mol(dm.from_smarts(smarts)) == excepted_hash + assert dm.hash_mol(dm.from_smarts(smarts)) in expected_hashes From e8533f91decc5617ad8f65e14657cf787a3ebecd Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Fri, 27 Oct 2023 21:35:32 -0400 Subject: [PATCH 4/4] more robust mcs tests --- datamol/mol.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datamol/mol.py b/datamol/mol.py index ac373481..a2e0efae 100644 --- a/datamol/mol.py +++ b/datamol/mol.py @@ -401,7 +401,6 @@ def standardize_smiles(smiles: str) -> str: Args: smiles: Smiles to standardize - tautomer: Whether to canonicalize tautomers Returns: standard_smiles: the standardized smiles