diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2ee31e0f..3628512b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,15 +16,9 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10"] + python-version: ["3.10", "3.11"] os: ["ubuntu-latest", "macos-latest", "windows-latest"] - rdkit-version: ["2022.09", "2023.03"] - - # just enable python 3.11 on ubuntu to not blow up the CI time. - include: - - os: ubuntu-latest - python-version: "3.11" - rdkit-version: "2023.03" + rdkit-version: ["2023.03", "2023.09"] runs-on: ${{ matrix.os }} timeout-minutes: 30 diff --git a/README.md b/README.md index 037ccf9e..37f22323 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,7 @@ See below the associated versions of Python and RDKit, for which a minor version | `datamol` | `python` | `rdkit` | | --------- | ------------------- | ----------------------------- | +| `0.12.x` | `[3.10, 3.11]` | `[2023.03, 2023.09]` | | `0.11.x` | `[3.9, 3.10, 3.11]` | `[2022.09, 2023.03]` | | `0.10.x` | `[3.9, 3.10, 3.11]` | `[2022.03, 2022.09]` | | `0.9.x` | `[3.9, 3.10, 3.11]` | `[2022.03, 2022.09]` | diff --git a/datamol/cluster.py b/datamol/cluster.py index 6c5a63ef..9b80bdcb 100644 --- a/datamol/cluster.py +++ b/datamol/cluster.py @@ -242,7 +242,9 @@ def assign_to_centroids( features = dm.parallelized(feature_fn, all_mols, n_jobs=n_jobs) def distij(i, j, features=features): - return 1.0 - DataStructs.cDataStructs.TanimotoSimilarity(features[int(i)], features[int(j)]) + return 1.0 - DataStructs.cDataStructs.TanimotoSimilarity( + features[int(i.item())], features[int(j.item())] + ) if dist_fn is None: dist_fn = distij diff --git a/datamol/mol.py b/datamol/mol.py index e0cf6cab..a2e0efae 100644 --- a/datamol/mol.py +++ b/datamol/mol.py @@ -25,7 +25,6 @@ from rdkit.Chem.Scaffolds import MurckoScaffold from rdkit.Chem.MolStandardize import rdMolStandardize -from rdkit.Chem.MolStandardize import canonicalize_tautomer_smiles from rdkit.Chem.SaltRemover import SaltRemover import datamol @@ -395,22 +394,19 @@ def sanitize_first(mols: List[Mol], charge_neutral: bool = False, sanifix: bool return None -def standardize_smiles(smiles: str, tautomer: bool = False) -> str: +def standardize_smiles(smiles: str) -> str: r""" Apply smile standardization procedure. This is a convenient function wrapped arrounf RDKit smiles standardizer and tautomeric canonicalization. Args: smiles: Smiles to standardize - tautomer: Whether to canonicalize tautomers Returns: standard_smiles: the standardized smiles """ smiles = rdMolStandardize.StandardizeSmiles(smiles) - if tautomer: - smiles = canonicalize_tautomer_smiles(smiles) return smiles diff --git a/docs/index.md b/docs/index.md index 0a0cc235..bb7d165f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -75,6 +75,7 @@ See below the associated versions of Python and RDKit, for which a minor version | `datamol` | `python` | `rdkit` | | --------- | ------------------- | ----------------------------- | +| `0.12.x` | `[3.10, 3.11]` | `[2023.03, 2023.09]` | | `0.11.x` | `[3.9, 3.10, 3.11]` | `[2022.09, 2023.03]` | | `0.10.x` | `[3.9, 3.10, 3.11]` | `[2022.03, 2022.09]` | | `0.9.x` | `[3.9, 3.10, 3.11]` | `[2022.03, 2022.09]` | diff --git a/pyproject.toml b/pyproject.toml index 0b3dc530..c2ae64dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,9 +83,14 @@ minversion = "6.0" addopts = "--verbose --cov=datamol --cov-fail-under=85 --cov-report xml --cov-report term --durations=10 -n auto" testpaths = ["tests"] filterwarnings = [ - "ignore::DeprecationWarning:rdkit.*:", - "ignore::DeprecationWarning:jupyter_client.*:", - "ignore::DeprecationWarning:pkg_resources.*:", + "ignore::DeprecationWarning:rdkit.Chem.MolStandardize", + "ignore::DeprecationWarning:jupyter_client", + "ignore::DeprecationWarning:pkg_resources", + "ignore::DeprecationWarning:joblib.externals.loky.backend", + "ignore::DeprecationWarning:dateutil.tz.tz", + "ignore::DeprecationWarning:joblib._utils", + "ignore::DeprecationWarning:openpyxl.packaging.core", + "ignore::DeprecationWarning:tqdm.std", ] [tool.coverage.run] diff --git a/tests/test_mcs.py b/tests/test_mcs.py index a79684ef..8a4e8ef8 100644 --- a/tests/test_mcs.py +++ b/tests/test_mcs.py @@ -12,11 +12,12 @@ def test_find_mcs(): mols = [dm.to_mol(s) for s in smiles_list] smarts = dm.find_mcs(mols=mols, timeout=2) - # Load/export SMARTS to check RDKit versions compatibility. - excepted_smarts = "[#6&!R]-&!@[#6&!R]-&!@[#8&!R]-&!@[#6&R]1:&@[#6&R]:&@[#6&R]2:&@[#7&R]:&@[#6&R]:&@[#7&R]:&@[#6&R](:&@[#6&R]:&@2:&@[#6&R]:&@[#6&R]:&@1-&!@[#7&!R]-&!@[#6&!R](=&!@[#8&!R])-&!@[#6&!R]=&!@[#6&!R])-&!@[#7&!R]-&!@[#6&R]1:&@[#6&R]:&@[#6&R]:&@[#6&R]:&@[#6&R]:&@[#6&R]:&@1" - excepted_smarts_mol = dm.from_smarts(excepted_smarts) - excepted_smarts = dm.to_smarts(excepted_smarts_mol) - - print(smarts) + # NOTE(hadim): hash are different given different RDKit version + expected_hashes = [ + # RDKit >= 2023.09 + "762f483ac10cc0f45c5aa2c790f9ef52f8dfb337", + # RDKit <= 2023.03 + "49eff32e405d17980fad428cf4063ec52e2c5fda", + ] - assert smarts == excepted_smarts + assert dm.hash_mol(dm.from_smarts(smarts)) in expected_hashes diff --git a/tests/test_mol.py b/tests/test_mol.py index 0b8bc511..ef77af8d 100644 --- a/tests/test_mol.py +++ b/tests/test_mol.py @@ -302,7 +302,7 @@ def test_sanitize_smiles_none(): def test_standardize_smiles_tautomer(): smiles = "C1=CC=CN=C1" - std_smiles = dm.standardize_smiles(smiles, tautomer=True) + std_smiles = dm.standardize_smiles(smiles) assert "c1ccncc1" == std_smiles diff --git a/tests/test_utils_fs.py b/tests/test_utils_fs.py index f17b142d..db8bf340 100644 --- a/tests/test_utils_fs.py +++ b/tests/test_utils_fs.py @@ -17,7 +17,7 @@ def test_copy_files(tmp_path): dm.utils.fs.copy_file(source_path, destination_path) with open(destination_path) as f: - f.read() == content + assert f.read() == content def test_copy_dir(tmp_path): @@ -49,10 +49,10 @@ def test_copy_dir(tmp_path): assert dm.utils.fs.is_file(file2_path) with open(file1_path) as f: - f.read() == content + assert f.read() == content with open(file2_path) as f: - f.read() == content + assert f.read() == content def test_mkdir(tmp_path): @@ -91,7 +91,9 @@ def test_cache_dir(): def test_get_mapper(tmp_path): fsmapper = dm.utils.fs.get_mapper(str(tmp_path / "test.txt")) - assert fsmapper.fs.protocol == "file" + + # NOTE(hadim): depends the fsspec version + assert fsmapper.fs.protocol in ["file", ("file", "local")] @pytest.mark.skip_platform("win")