Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support openfe 0.14 #13

Merged
merged 8 commits into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion devtools/conda-envs/test_env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ dependencies:
# Base depends
- gufe
- openfe # TODO: Remove once we don't depend on openfe
- python
- pip
- pydantic=1 # TODO: Modify when we support pydantic 2
- python

# Testing
- pytest
Expand Down
61 changes: 37 additions & 24 deletions feflow/protocols/nonequilibrium_cycling.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Adapted from perses: https://github.com/choderalab/perses/blob/protocol-neqcyc/perses/protocols/nonequilibrium_cycling.py

from typing import Optional, Iterable, List, Dict, Any
from itertools import chain

import datetime
import logging
Expand Down Expand Up @@ -132,11 +133,16 @@ def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs):

phase = self._detect_phase(state_a, state_b) # infer phase from systems and components

# Get components from systems if found (None otherwise) -- NOTE: Uses hardcoded keys!
# Get receptor components from systems if found (None otherwise) -- NOTE: Uses hardcoded keys!
receptor_a = state_a.components.get("protein")
# receptor_b = state_b.components.get("protein") # Should not be needed
ligand_a = mapping.get("ligand").componentA
ligand_b = mapping.get("ligand").componentB

# Get ligand/small-mol components
ligand_mapping = mapping["ligand"]
ligand_a = ligand_mapping.componentA
ligand_b = ligand_mapping.componentB

# Get solvent components
solvent_a = state_a.components.get("solvent")
# solvent_b = state_b.components.get("solvent") # Should not be needed

Expand Down Expand Up @@ -165,26 +171,37 @@ def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs):
# Note: by default this is cached to ctx.shared/db.json so shouldn't
# incur too large a cost
self.logger.info("Parameterizing molecules")
small_mols_a = []
# The following creates a dictionary with all the small molecules in the states, with the structure:
# Dict[SmallMoleculeComponent, openff.toolkit.Molecule]
# Alchemical small mols
alchemical_small_mols_a = {ligand_a: ligand_a.to_openff()}
alchemical_small_mols_b = {ligand_b: ligand_b.to_openff()}
all_alchemical_mols = alchemical_small_mols_a | alchemical_small_mols_b
# non-alchemical common small mols
common_small_mols = {}
for comp in state_a.components.values():
if isinstance(comp, SmallMoleculeComponent):
small_mols_a.append(comp)

for comp in small_mols_a:
offmol = comp.to_openff()
system_generator.create_system(offmol.to_topology().to_openmm(),
molecules=[offmol])
if comp == ligand_a:
mol_b = ligand_b.to_openff()
system_generator.create_system(mol_b.to_topology().to_openmm(),
molecules=[mol_b])
# TODO: Refactor if/when gufe provides the functionality https://github.com/OpenFreeEnergy/gufe/issues/251
if isinstance(comp, SmallMoleculeComponent) and comp not in all_alchemical_mols:
common_small_mols[comp] = comp.to_openff()

# Assign charges to ALL small mols, if unassigned -- more info: Openfe issue #576
for off_mol in chain(all_alchemical_mols.values(), common_small_mols.values()):
# skip if we already have user charges
if not (off_mol.partial_charges is not None and np.any(off_mol.partial_charges)):
# due to issues with partial charge generation in ambertools
# we default to using the input conformer for charge generation
off_mol.assign_partial_charges(
'am1bcc', use_conformers=off_mol.conformers
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll raise an issue, once we've got OpenFreeEnergy/openfe#598 sorted out, we should switch this to using a settings-selected backend.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah that sounds great, having this option is indeed useful. Thanks

)
system_generator.create_system(off_mol.to_topology().to_openmm(),
molecules=[off_mol])
Comment on lines +196 to +197
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clarification question: are we just running create_system here to make sure that the charged molecules can be parameterized by OpenMM?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for the solvation step a bit later on, if you don't do this here you don't have necessary topology info registered to solvate your system.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where does the topology info get registered? Looking at the definition of SystemGenerator.create_system, it's not clear to me that this method has any side effects or stores any state on the SystemGenerator...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry wrong word, not topology but forcefield - which you need to pass to addSolvent.

It's been a long while, but my rough recollection is that by calling create_system you update the TemplateGenerator which is hooked up to the app.ForceField object, so that it remains in "template memory" the next time around.

Note: we could probably reduce the footprint to a single system_generator here by moving this out of the loop and passing list(chain(all_alchemical_mols.values(), common_small_mols.values())) as the input to molecules. I don't think it'll have significant performance differences, but it might be a tad bit cleaner?


# c. get OpenMM Modeller + a dictionary of resids for each component
solvation_settings = settings.solvation_settings
state_a_modeller, comp_resids = system_creation.get_omm_modeller(
protein_comp=receptor_a,
solvent_comp=solvent_a,
small_mols=small_mols_a,
small_mols=alchemical_small_mols_a | common_small_mols,
omm_forcefield=system_generator.forcefield,
solvent_settings=solvation_settings,
)
Expand All @@ -199,7 +216,8 @@ def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs):
# e. create the stateA System
state_a_system = system_generator.create_system(
state_a_modeller.topology,
molecules=[s.to_openff() for s in small_mols_a],
molecules=list(chain(alchemical_small_mols_a.values(),
common_small_mols.values())),
)

# 2. Get stateB system
Expand All @@ -210,15 +228,10 @@ def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs):
exclude_resids=comp_resids[ligand_a],
)

# b. get a list of small molecules for stateB
off_mols_state_b = [ligand_b.to_openff(), ]
for comp in small_mols_a:
if comp != ligand_a:
off_mols_state_b.append(comp.to_openff())

state_b_system = system_generator.create_system(
state_b_topology,
molecules=off_mols_state_b,
molecules=list(chain(alchemical_small_mols_b.values(),
common_small_mols.values())),
)

# c. Define correspondence mappings between the two systems
Expand Down
7 changes: 3 additions & 4 deletions feflow/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
# fixtures for chemicalcomponents and chemicalsystems to test protocols with
import gufe
import pytest
import importlib.resources
from importlib.resources import files, as_file
from rdkit import Chem
from gufe.mapping import LigandAtomMapping


@pytest.fixture
def benzene_modifications():
with importlib.resources.path('gufe.tests.data',
'benzene_modifications.sdf') as f:
source = files("gufe.tests.data").joinpath("benzene_modifications.sdf")
with as_file(source) as f:
supp = Chem.SDMolSupplier(str(f), removeHs=False)

mols = list(supp)

return {m.GetProp('_Name'): m for m in mols}
Expand Down