From c9a1b7613920dcd98c7d47c634f8d7b58359f88a Mon Sep 17 00:00:00 2001 From: sfluegel Date: Wed, 29 Nov 2023 10:02:11 +0100 Subject: [PATCH] add support for SELFIES --- chebai/preprocessing/datasets/chebi.py | 8 ++++++++ chebai/preprocessing/datasets/pubchem.py | 5 +++++ configs/data/chebi100_SELFIES.yml | 1 + configs/data/pubchem_SELFIES.yml | 1 + 4 files changed, 15 insertions(+) create mode 100644 configs/data/chebi100_SELFIES.yml create mode 100644 configs/data/pubchem_SELFIES.yml diff --git a/chebai/preprocessing/datasets/chebi.py b/chebai/preprocessing/datasets/chebi.py index 349a3824..22d7623a 100644 --- a/chebai/preprocessing/datasets/chebi.py +++ b/chebai/preprocessing/datasets/chebi.py @@ -425,6 +425,10 @@ class ChEBIOverXDeepSMILES(ChEBIOverX): READER = dr.DeepChemDataReader +class ChEBIOverXSELFIES(ChEBIOverX): + READER = dr.SelfiesReader + + class ChEBIOver100(ChEBIOverX): THRESHOLD = 100 @@ -443,6 +447,10 @@ class ChEBIOver100DeepSMILES(ChEBIOverXDeepSMILES, ChEBIOver100): pass +class ChEBIOver100SELFIES(ChEBIOverXSELFIES, ChEBIOver100): + pass + + class JCIExtendedBPEData(JCIExtendedBase): READER = dr.ChemBPEReader diff --git a/chebai/preprocessing/datasets/pubchem.py b/chebai/preprocessing/datasets/pubchem.py index b3bb0423..ed30bc94 100644 --- a/chebai/preprocessing/datasets/pubchem.py +++ b/chebai/preprocessing/datasets/pubchem.py @@ -271,5 +271,10 @@ class PubToxAndChebi100(PubToxAndChebiX): class PubToxAndChebi50(PubToxAndChebiX): CHEBI_X = ChEBIOver50 + class PubChemDeepSMILES(PubChem): READER = dr.DeepChemDataReader + + +class PubChemSELFIES(PubChem): + READER = dr.SelfiesReader diff --git a/configs/data/chebi100_SELFIES.yml b/configs/data/chebi100_SELFIES.yml new file mode 100644 index 00000000..fbdfeafa --- /dev/null +++ b/configs/data/chebi100_SELFIES.yml @@ -0,0 +1 @@ +class_path: chebai.preprocessing.datasets.chebi.ChEBIOver100SELFIES \ No newline at end of file diff --git a/configs/data/pubchem_SELFIES.yml b/configs/data/pubchem_SELFIES.yml new file mode 100644 index 00000000..5595043f --- /dev/null +++ b/configs/data/pubchem_SELFIES.yml @@ -0,0 +1 @@ +class_path: chebai.preprocessing.datasets.pubchem.PubChemSELFIES