Skip to content

Commit

Permalink
Merge pull request #216 from datamol-io/new_rdkit
Browse files Browse the repository at this point in the history
Compat with latest RDKit 2023.09
  • Loading branch information
hadim authored Oct 28, 2023
2 parents 3939c12 + e8533f9 commit 9e94d02
Show file tree
Hide file tree
Showing 9 changed files with 31 additions and 29 deletions.
10 changes: 2 additions & 8 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,9 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10"]
python-version: ["3.10", "3.11"]
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
rdkit-version: ["2022.09", "2023.03"]

# just enable python 3.11 on ubuntu to not blow up the CI time.
include:
- os: ubuntu-latest
python-version: "3.11"
rdkit-version: "2023.03"
rdkit-version: ["2023.03", "2023.09"]

runs-on: ${{ matrix.os }}
timeout-minutes: 30
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ See below the associated versions of Python and RDKit, for which a minor version

| `datamol` | `python` | `rdkit` |
| --------- | ------------------- | ----------------------------- |
| `0.12.x` | `[3.10, 3.11]` | `[2023.03, 2023.09]` |
| `0.11.x` | `[3.9, 3.10, 3.11]` | `[2022.09, 2023.03]` |
| `0.10.x` | `[3.9, 3.10, 3.11]` | `[2022.03, 2022.09]` |
| `0.9.x` | `[3.9, 3.10, 3.11]` | `[2022.03, 2022.09]` |
Expand Down
4 changes: 3 additions & 1 deletion datamol/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,9 @@ def assign_to_centroids(
features = dm.parallelized(feature_fn, all_mols, n_jobs=n_jobs)

def distij(i, j, features=features):
return 1.0 - DataStructs.cDataStructs.TanimotoSimilarity(features[int(i)], features[int(j)])
return 1.0 - DataStructs.cDataStructs.TanimotoSimilarity(
features[int(i.item())], features[int(j.item())]
)

if dist_fn is None:
dist_fn = distij
Expand Down
6 changes: 1 addition & 5 deletions datamol/mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from rdkit.Chem.Scaffolds import MurckoScaffold

from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.Chem.MolStandardize import canonicalize_tautomer_smiles
from rdkit.Chem.SaltRemover import SaltRemover

import datamol
Expand Down Expand Up @@ -395,22 +394,19 @@ def sanitize_first(mols: List[Mol], charge_neutral: bool = False, sanifix: bool
return None


def standardize_smiles(smiles: str, tautomer: bool = False) -> str:
def standardize_smiles(smiles: str) -> str:
r"""
Apply smile standardization procedure. This is a convenient function wrapped arrounf RDKit
smiles standardizer and tautomeric canonicalization.
Args:
smiles: Smiles to standardize
tautomer: Whether to canonicalize tautomers
Returns:
standard_smiles: the standardized smiles
"""

smiles = rdMolStandardize.StandardizeSmiles(smiles)
if tautomer:
smiles = canonicalize_tautomer_smiles(smiles)
return smiles


Expand Down
1 change: 1 addition & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ See below the associated versions of Python and RDKit, for which a minor version

| `datamol` | `python` | `rdkit` |
| --------- | ------------------- | ----------------------------- |
| `0.12.x` | `[3.10, 3.11]` | `[2023.03, 2023.09]` |
| `0.11.x` | `[3.9, 3.10, 3.11]` | `[2022.09, 2023.03]` |
| `0.10.x` | `[3.9, 3.10, 3.11]` | `[2022.03, 2022.09]` |
| `0.9.x` | `[3.9, 3.10, 3.11]` | `[2022.03, 2022.09]` |
Expand Down
11 changes: 8 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,14 @@ minversion = "6.0"
addopts = "--verbose --cov=datamol --cov-fail-under=85 --cov-report xml --cov-report term --durations=10 -n auto"
testpaths = ["tests"]
filterwarnings = [
"ignore::DeprecationWarning:rdkit.*:",
"ignore::DeprecationWarning:jupyter_client.*:",
"ignore::DeprecationWarning:pkg_resources.*:",
"ignore::DeprecationWarning:rdkit.Chem.MolStandardize",
"ignore::DeprecationWarning:jupyter_client",
"ignore::DeprecationWarning:pkg_resources",
"ignore::DeprecationWarning:joblib.externals.loky.backend",
"ignore::DeprecationWarning:dateutil.tz.tz",
"ignore::DeprecationWarning:joblib._utils",
"ignore::DeprecationWarning:openpyxl.packaging.core",
"ignore::DeprecationWarning:tqdm.std",
]

[tool.coverage.run]
Expand Down
15 changes: 8 additions & 7 deletions tests/test_mcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ def test_find_mcs():
mols = [dm.to_mol(s) for s in smiles_list]
smarts = dm.find_mcs(mols=mols, timeout=2)

# Load/export SMARTS to check RDKit versions compatibility.
excepted_smarts = "[#6&!R]-&!@[#6&!R]-&!@[#8&!R]-&!@[#6&R]1:&@[#6&R]:&@[#6&R]2:&@[#7&R]:&@[#6&R]:&@[#7&R]:&@[#6&R](:&@[#6&R]:&@2:&@[#6&R]:&@[#6&R]:&@1-&!@[#7&!R]-&!@[#6&!R](=&!@[#8&!R])-&!@[#6&!R]=&!@[#6&!R])-&!@[#7&!R]-&!@[#6&R]1:&@[#6&R]:&@[#6&R]:&@[#6&R]:&@[#6&R]:&@[#6&R]:&@1"
excepted_smarts_mol = dm.from_smarts(excepted_smarts)
excepted_smarts = dm.to_smarts(excepted_smarts_mol)

print(smarts)
# NOTE(hadim): hash are different given different RDKit version
expected_hashes = [
# RDKit >= 2023.09
"762f483ac10cc0f45c5aa2c790f9ef52f8dfb337",
# RDKit <= 2023.03
"49eff32e405d17980fad428cf4063ec52e2c5fda",
]

assert smarts == excepted_smarts
assert dm.hash_mol(dm.from_smarts(smarts)) in expected_hashes
2 changes: 1 addition & 1 deletion tests/test_mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def test_sanitize_smiles_none():

def test_standardize_smiles_tautomer():
smiles = "C1=CC=CN=C1"
std_smiles = dm.standardize_smiles(smiles, tautomer=True)
std_smiles = dm.standardize_smiles(smiles)
assert "c1ccncc1" == std_smiles


Expand Down
10 changes: 6 additions & 4 deletions tests/test_utils_fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_copy_files(tmp_path):
dm.utils.fs.copy_file(source_path, destination_path)

with open(destination_path) as f:
f.read() == content
assert f.read() == content


def test_copy_dir(tmp_path):
Expand Down Expand Up @@ -49,10 +49,10 @@ def test_copy_dir(tmp_path):
assert dm.utils.fs.is_file(file2_path)

with open(file1_path) as f:
f.read() == content
assert f.read() == content

with open(file2_path) as f:
f.read() == content
assert f.read() == content


def test_mkdir(tmp_path):
Expand Down Expand Up @@ -91,7 +91,9 @@ def test_cache_dir():

def test_get_mapper(tmp_path):
fsmapper = dm.utils.fs.get_mapper(str(tmp_path / "test.txt"))
assert fsmapper.fs.protocol == "file"

# NOTE(hadim): depends the fsspec version
assert fsmapper.fs.protocol in ["file", ("file", "local")]


@pytest.mark.skip_platform("win")
Expand Down

0 comments on commit 9e94d02

Please sign in to comment.