Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

only argparse #27

Open
wants to merge 47 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
e376b7e
update options and saving for encoders
soulios Jan 1, 2024
6f7611e
only argparse
soulios Mar 6, 2024
16a24f4
flaked and fixed predictgnn arg
soulios Mar 6, 2024
774b0a1
add json
soulios Mar 6, 2024
11fb829
remove comma
soulios Mar 6, 2024
add3993
final fix
soulios Mar 6, 2024
fa33f2f
final fix
soulios Mar 6, 2024
1f59fe9
final fix
soulios Mar 6, 2024
00fa012
convert fix
soulios Mar 7, 2024
ace62d3
Update dfpl/options.py
soulios Mar 8, 2024
8a1b334
Apply suggestions from code review
soulios Mar 8, 2024
3c92b98
edited help in args
soulios Mar 8, 2024
40e6b0b
flaked and blacked
soulios Mar 8, 2024
c03a32e
removed metavar from args with choices
soulios Mar 11, 2024
ebaaaca
make literals optionals for None
soulios Mar 11, 2024
d6090a9
applied black
soulios Mar 11, 2024
cb3fa01
rename some variables
bernt-matthias Apr 9, 2024
460c482
Merge branch 'argparse' of https://github.com/soulios/deepFPlearn int…
bernt-matthias Apr 9, 2024
e87be1b
fixup
bernt-matthias Apr 9, 2024
8b0af64
removed paths from default args and fixed creating args from json and…
soulios Jul 11, 2024
31f48a4
Merge branch 'master' of https://github.com/yigbt/deepFPlearn
soulios Jul 11, 2024
efef88a
only argparse
soulios Mar 6, 2024
ac0db5d
rename some variables
bernt-matthias Apr 9, 2024
13a1626
flaked and fixed predictgnn arg
soulios Mar 6, 2024
c3a5da2
add json
soulios Mar 6, 2024
2577f10
remove comma
soulios Mar 6, 2024
96f59b4
final fix
soulios Mar 6, 2024
01942ba
final fix
soulios Mar 6, 2024
056110e
final fix
soulios Mar 6, 2024
630f6d1
convert fix
soulios Mar 7, 2024
4f418cc
Update dfpl/options.py
soulios Mar 8, 2024
dd34bca
Apply suggestions from code review
soulios Mar 8, 2024
83361ee
edited help in args
soulios Mar 8, 2024
d165644
removed metavar from args with choices
soulios Mar 11, 2024
3d5ae80
make literals optionals for None
soulios Mar 11, 2024
e2ceb28
applied black
soulios Mar 11, 2024
f748c2f
removed paths from default args and fixed creating args from json and…
soulios Jul 11, 2024
a03642a
Merge branch 'master' into argparse
soulios Jul 11, 2024
51453b5
rebased argparse
soulios Jul 11, 2024
4cdd5a8
merged argparse
soulios Jul 11, 2024
7348fd0
blacked and flaked
soulios Jul 11, 2024
40be7bb
trying fix for cmd and json args
soulios Jul 11, 2024
35a63ee
changed path for input file
soulios Jul 11, 2024
1ba017c
changed path for input file
soulios Jul 11, 2024
11e808c
changed path for input file
soulios Jul 11, 2024
4989421
changed path for input file
soulios Jul 11, 2024
8cc13b4
changed path for test file in pr.yml
soulios Jul 11, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,14 @@ jobs:
exit 1
fi
echo "result lines "$(wc -l preds_dmpnn/DMPNN_preds.csv)
if [ "$(cat preds_dmpnn/DMPNN_preds.csv | wc -l)" -lt "6" ]; then
echo "predict result should have at least 5 lines. But had only $(cat preds_dmpnn/DMPNN_preds.csv | wc -l)" >&2
if [ "$(cat preds_dmpnn/preds.csv | wc -l)" -lt "6" ]; then
echo "predict result should have at least 5 lines. But had only $(cat preds_dmpnn/preds.csv | wc -l)" >&2
exit 1
fi

soulios marked this conversation as resolved.
Show resolved Hide resolved
dfpl convert -f tests/data
if [ "$(find tests/data \( -name '*.csv' -o -name '*.tsv' \) | wc -l)" -ne "$(find tests/data -name '*.pkl' | wc -l)" ]; then
echo "not all csv files are converted to pickle ones" >&2
exit 1
fi
echo "All tests passed!"
165 changes: 59 additions & 106 deletions dfpl/__main__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import dataclasses
import logging
import os.path
import pathlib
import os
from argparse import Namespace
from os import path

import chemprop as cp
import pandas as pd
import chemprop
from keras.models import load_model

from dfpl import autoencoder as ac
Expand All @@ -17,108 +15,60 @@
from dfpl import vae as vae
from dfpl.utils import createArgsFromJson, createDirectory, makePathAbsolute

project_directory = pathlib.Path(".").parent.parent.absolute()
test_train_opts = options.Options(
inputFile=f"{project_directory}/input_datasets/S_dataset.pkl",
outputDir=f"{project_directory}/output_data/console_test",
ecWeightsFile=f"{project_directory}/output_data/case_00/AE_S/ae_S.encoder.hdf5",
ecModelDir=f"{project_directory}/output_data/case_00/AE_S/saved_model",
type="smiles",
fpType="topological",
epochs=100,
batchSize=1024,
fpSize=2048,
encFPSize=256,
enableMultiLabel=False,
testSize=0.2,
kFolds=2,
verbose=2,
trainAC=False,
trainFNN=True,
compressFeatures=True,
activationFunction="selu",
lossFunction="bce",
optimizer="Adam",
fnnType="FNN",
)

test_pred_opts = options.Options(
inputFile=f"{project_directory}/input_datasets/S_dataset.pkl",
outputDir=f"{project_directory}/output_data/console_test",
outputFile=f"{project_directory}/output_data/console_test/S_dataset.predictions_ER.csv",
ecModelDir=f"{project_directory}/output_data/case_00/AE_S/saved_model",
fnnModelDir=f"{project_directory}/output_data/console_test/ER_saved_model",
type="smiles",
fpType="topological",
)


def traindmpnn(opts: options.GnnOptions):

def traindmpnn(opts: options.GnnOptions) -> None:
"""
Train a D-MPNN model using the given options.
Args:
- opts: options.GnnOptions instance containing the details of the training
Returns:
- None
"""
os.environ["CUDA_VISIBLE_DEVICES"] = f"{opts.gpu}"
ignore_elements = ["py/object"]
# Load options from a JSON file and replace the relevant attributes in `opts`
arguments = createArgsFromJson(
opts.configFile, ignore_elements, return_json_object=False
)
opts = cp.args.TrainArgs().parse_args(arguments)
arguments = createArgsFromJson(jsonFile=opts.configFile)
opts = chemprop.args.TrainArgs().parse_args(arguments)
logging.info("Training DMPNN...")
# Train the model and get the mean and standard deviation of AUC score from cross-validation
mean_score, std_score = cp.train.cross_validate(
args=opts, train_func=cp.train.run_training
mean_score, std_score = chemprop.train.cross_validate(
args=opts, train_func=chemprop.train.run_training
)
logging.info(f"Results: {mean_score:.5f} +/- {std_score:.5f}")


def predictdmpnn(opts: options.GnnOptions, json_arg_path: str) -> None:
def predictdmpnn(opts: options.GnnOptions) -> None:
"""
Predict the values using a trained D-MPNN model with the given options.
Args:
- opts: options.GnnOptions instance containing the details of the prediction
- JSON_ARG_PATH: path to a JSON file containing additional arguments for prediction
Returns:
- None
"""
ignore_elements = [
"py/object",
"checkpoint_paths",
"save_dir",
"saving_name",
]
# Load options and additional arguments from a JSON file
arguments, data = createArgsFromJson(
json_arg_path, ignore_elements, return_json_object=True
)
arguments.append("--preds_path")
arguments.append("")
save_dir = data.get("save_dir")
name = data.get("saving_name")
# Replace relevant attributes in `opts` with loaded options
opts = cp.args.PredictArgs().parse_args(arguments)
opts.preds_path = save_dir + "/" + name
df = pd.read_csv(opts.test_path)
smiles = []
for index, rows in df.iterrows():
my_list = [rows.smiles]
smiles.append(my_list)
# Make predictions and return the result
cp.train.make_predictions(args=opts, smiles=smiles)
arguments = createArgsFromJson(jsonFile=opts.configFile)
opts = chemprop.args.PredictArgs().parse_args(arguments)

chemprop.train.make_predictions(args=opts)


def interpretdmpnn(opts: options.GnnOptions) -> None:
"""
Interpret the predictions of a trained D-MPNN model with the given options.
Args:
- opts: options.GnnOptions instance containing the details of the prediction
Returns:
- None
"""
# Load options and additional arguments from a JSON file
arguments = createArgsFromJson(jsonFile=opts.configFile)
opts = chemprop.args.InterpretArgs().parse_args(arguments)

chemprop.interpret.interpret(args=opts, save_to_csv=True)


def train(opts: options.Options):
"""
Run the main training procedure
:param opts: Options defining the details of the training
"""

os.environ["CUDA_VISIBLE_DEVICES"] = f"{opts.gpu}"

# import data from file and create DataFrame
if "tsv" in opts.inputFile:
df = fp.importDataFile(
Expand All @@ -128,7 +78,7 @@ def train(opts: options.Options):
df = fp.importDataFile(
opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize
)
# initialize encoders to None
# initialize (auto)encoders to None
encoder = None
autoencoder = None
if opts.trainAC:
Expand All @@ -142,26 +92,31 @@ def train(opts: options.Options):
# if feature compression is enabled
if opts.compressFeatures:
if not opts.trainAC:
if opts.aeType == "deterministic":
(autoencoder, encoder) = ac.define_ac_model(opts=options.Options())
elif opts.aeType == "variational":
if opts.aeType == "variational":
(autoencoder, encoder) = vae.define_vae_model(opts=options.Options())
elif opts.ecWeightsFile == "":
else:
(autoencoder, encoder) = ac.define_ac_model(opts=options.Options())

if opts.ecWeightsFile == "":
encoder = load_model(opts.ecModelDir)
else:
autoencoder.load_weights(
os.path.join(opts.ecModelDir, opts.ecWeightsFile)
)
# compress the fingerprints using the autoencoder
df = ac.compress_fingerprints(df, encoder)
# ac.visualize_fingerprints(
# df,
# before_col="fp",
# after_col="fpcompressed",
# train_indices=train_indices,
# test_indices=test_indices,
# save_as=f"UMAP_{opts.aeSplitType}.png",
# )
if opts.visualizeLatent and opts.trainAC:
ac.visualize_fingerprints(
df,
train_indices=train_indices,
test_indices=test_indices,
save_as=f"{opts.ecModelDir}/UMAP_{opts.aeSplitType}.png",
)
elif opts.visualizeLatent:
logging.info(
"Visualizing latent space is only available if you train the autoencoder. Skipping visualization."
)

# train single label models if requested
if opts.trainFNN and not opts.enableMultiLabel:
sl.train_single_label_models(df=df, opts=opts)
Expand Down Expand Up @@ -257,24 +212,22 @@ def main():
raise ValueError("Input directory is not a directory")
elif prog_args.method == "traingnn":
traingnn_opts = options.GnnOptions.fromCmdArgs(prog_args)

createLogger("traingnn.log")
traindmpnn(traingnn_opts)

elif prog_args.method == "predictgnn":
predictgnn_opts = options.GnnOptions.fromCmdArgs(prog_args)
fixed_opts = dataclasses.replace(
predictgnn_opts,
test_path=makePathAbsolute(predictgnn_opts.test_path),
preds_path=makePathAbsolute(predictgnn_opts.preds_path),
)

logging.info(
f"The following arguments are received or filled with default values:\n{prog_args}"
)

predictdmpnn(fixed_opts, prog_args.configFile)
predictgnn_opts = options.PredictGnnOptions.fromCmdArgs(prog_args)
createLogger("predictgnn.log")
predictdmpnn(predictgnn_opts)
elif prog_args.method == "interpretgnn":
interpretgnn_opts = options.InterpretGNNoptions.fromCmdArgs(prog_args)
createLogger("interpretgnn.log")
interpretdmpnn(interpretgnn_opts)

elif prog_args.method == "train":
if prog_args.configFile is None and prog_args.inputFile is None:
parser.error("Either --configFile or --inputFile must be provided.")

train_opts = options.Options.fromCmdArgs(prog_args)
fixed_opts = dataclasses.replace(
train_opts,
Expand All @@ -288,6 +241,8 @@ def main():
)
train(fixed_opts)
elif prog_args.method == "predict":
if prog_args.configFile is None and prog_args.inputFile is None:
parser.error("Either --configFile or --inputFile must be provided.")
predict_opts = options.Options.fromCmdArgs(prog_args)
fixed_opts = dataclasses.replace(
predict_opts,
Expand All @@ -298,8 +253,6 @@ def main():
),
ecModelDir=makePathAbsolute(predict_opts.ecModelDir),
fnnModelDir=makePathAbsolute(predict_opts.fnnModelDir),
trainAC=False,
trainFNN=False,
)
createDirectory(fixed_opts.outputDir)
createLogger(path.join(fixed_opts.outputDir, "predict.log"))
Expand Down
Loading
Loading