From fe6c3f096376f5c718b270724550ad3711eff0c4 Mon Sep 17 00:00:00 2001 From: sfluegel Date: Wed, 29 Nov 2023 11:10:06 +0100 Subject: [PATCH] fix selfies reader file creation --- chebai/preprocessing/reader.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/chebai/preprocessing/reader.py b/chebai/preprocessing/reader.py index 4bb5887a..0db6d1a7 100644 --- a/chebai/preprocessing/reader.py +++ b/chebai/preprocessing/reader.py @@ -88,8 +88,9 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) dirname = os.path.dirname(__file__) os.makedirs(os.path.join(dirname, "bin", self.name()), exist_ok=True) + self.tokens_path = os.path.join(dirname, "bin", self.name(), "tokens.txt") - with open(self.tokens_path, "r+") as pk: + with open(self.tokens_path, "a+") as pk: self.cache = [x.strip() for x in pk] def _get_token_index(self, token): @@ -183,7 +184,7 @@ def _read_data(self, raw_data): tokenized = sf.split_selfies(sf.encoder(raw_data, strict=True)) tokenized = [self._get_token_index(v) for v in tokenized] except Exception as e: - print(f'could not process {raw_data}') + print(f'could not process {raw_data} (type: {type(raw_data)}') print(f'\t{e}') self.error_count += 1 print(f'\terror count: {self.error_count}')