-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmd.py
333 lines (217 loc) · 11.6 KB
/
md.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
#!/usr/bin/env python
# coding: utf-8
# # Molecular dynamics simulation
# python md.py /path/to/directory cutoff_protein.pdb
# ### 1. Import modules
# In[47]:
import copy
import sys
import argparse
from pathlib import Path
import requests
from IPython.display import display
import numpy as np
import openmm as mm
import openmm.app as app
from openmm import unit
import mdtraj as md
import pdbfixer
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import AllChem
from openff.toolkit.topology import Molecule, Topology
from openmmforcefields.generators import GAFFTemplateGenerator
# ### 2. Input Files
# In[46]:
# Set up argument parser
parser = argparse.ArgumentParser(description="Process a directory and PDB file name.")
parser.add_argument("directory", type=str, help="Directory path")
parser.add_argument("filename", type=str, help="PDB file name")
# Parse the arguments
args = parser.parse_args()
# Create the path
HERE = Path(args.directory)
DATA = HERE
pdb_path = HERE / args.filename
# Print the resulting PDB path
print("PDB Path:")
print(pdb_path)
# ### Force field
# In[ ]:
"""
I plan to use the amber99sb-ildn force field for the protein, TIP3P for water molecules,
and the Joung-Cheatham (JC) model for ions. Since the "amber99sbildn.xml" file already includes ion parameters,
I have commented out the ion potentials (Na+ and Cl-, since I will use only them anyway) in this file to avoid conflicts. The JC parameters are provided in the "amber14/tip3p.xml" file.
The path to the "amber99sbildn.xml" file on the cluster (under my account) is:
/Users/jaeohshin/miniconda3/envs/md/lib/python3.12/site-packages/openmm/app/data/amber99sbildn.xml
I have removed the parameters for Na+ and Cl- from "amber99sbildn.xml" to ensure that the system uses the ion parameters defined in
"amber14/tip3p.xml" (as of October 1st, 2024, by Jaeoh Shin).
---
Below is comment from OpenMM website for your infomation.
The solvent model XML files included under the amber14/ directory include both water and ions compatible with that water model,
so if you mistakenly specify tip3p.xml instead of amber14/tip3p.xml, you run the risk of having ForceField throw an exception
since tip3p.xml will be missing parameters for ions in your system.
---
"""
protein_ff="amber99sbildn.xml"
solvent_ff="amber14/tip3p.xml"
forcefield = app.ForceField(protein_ff, solvent_ff)
# ### 3. System Configuration
# In[23]:
# Water, hydrogen mass, PME options
nonbondedMethod = app.PME
rigidWater = True
#hydrogenMass = 2*unit.amu
# Integration Options
dt = 2.0 * unit.femtoseconds
temperature = 300 * unit.kelvin
friction = 1.0 / unit.picoseconds
pressure = 1.0 * unit.atmosphere
barostatInterval = 25 # default value
# Simulation Options
steps = 5e9 ## 1e8= 200 ns, 1e6=2ns
write_interval = 50000 ## 25,000 = 50 ps --> for 1us run, there will be 20,000 frames
equilibrationSteps = 1e5 ## 1e5 = 0.2 ns.
log_interval = 5e5
platform = mm.Platform.getPlatformByName('CUDA')
platformProperties = {'Precision': 'mixed'}
dataReporter= app.StateDataReporter(
sys.stdout, log_interval, step=True,
potentialEnergy=True, kineticEnergy=True,
temperature=True, volume=True, density=True,
progress=True, remainingTime=True, speed=True,
totalSteps=steps, separator="\t"
)
# ### 3. Prepare the protein
# #### Protein preparation
#
# A crucial part for successful simulation is a correct and complete system. Crystallographic structures retrieved from the Protein Data Bank often miss atoms, mainly hydrogens, and may contain non-standard residues. In this talktorial, we will use the Python package [PDBFixer](https://github.com/openmm/pdbfixer) to prepare the protein structure. However, co-crystallized ligands are not handled well by [PDBFixer](https://github.com/openmm/pdbfixer) and will thus be prepared separately.
# In[24]:
def prepare_protein(
pdb_file, ignore_missing_residues=True, ignore_terminal_missing_residues=True, ph=7.0
):
"""
Use pdbfixer to prepare the protein from a PDB file. Hetero atoms such as ligands are
removed and non-standard residues replaced. Missing atoms to existing residues are added.
Missing residues are ignored by default, but can be included.
Parameters
----------
pdb_file: pathlib.Path or str
PDB file containing the system to simulate.
ignore_missing_residues: bool, optional
If missing residues should be ignored or built.
ignore_terminal_missing_residues: bool, optional
If missing residues at the beginning and the end of a chain should be ignored or built.
ph: float, optional
pH value used to determine protonation state of residues
Returns
-------
fixer: pdbfixer.pdbfixer.PDBFixer
Prepared protein system.
"""
fixer = pdbfixer.PDBFixer(str(pdb_file))
fixer.removeHeterogens() # co-crystallized ligands are unknown to PDBFixer
fixer.findMissingResidues() # identify missing residues, needed for identification of missing atoms
# if missing terminal residues shall be ignored, remove them from the dictionary
if ignore_terminal_missing_residues:
chains = list(fixer.topology.chains())
keys = fixer.missingResidues.keys()
for key in list(keys):
chain = chains[key[0]]
if key[1] == 0 or key[1] == len(list(chain.residues())):
del fixer.missingResidues[key]
# if all missing residues shall be ignored ignored, clear the dictionary
if ignore_missing_residues:
fixer.missingResidues = {}
fixer.findNonstandardResidues() # find non-standard residue
fixer.replaceNonstandardResidues() # replace non-standard residues with standard one
fixer.findMissingAtoms() # find missing heavy atoms
fixer.addMissingAtoms() # add missing atoms and residues
fixer.addMissingHydrogens(ph) # add missing hydrogens
return fixer
# In[25]:
# prepare protein and build only missing non-terminal residues
prepared_protein = prepare_protein(pdb_path, ignore_missing_residues=False)
# ### 4. Merge protein and ligand
#
# In the next step, we want to merge the prepared protein and ligand structures using the Python package [MDTraj](https://github.com/mdtraj/mdtraj). [MDTraj](https://github.com/mdtraj/mdtraj) can handle the prepared protein, which is currently a [PDBFixer](https://github.com/openmm/pdbfixer) molecule, a format that has a topology and atom positions similar to and usually interchangeable with [OpenMM Modeller](http://docs.openmm.org/latest/userguide/application.html#model-building-and-editing) topologies and positions. For the ligand however, we need to do several conversions, since it is currently an [RDKit](https://github.com/rdkit/rdkit) molecule.
# Now protein and ligand are both in [OpenMM](https://github.com/openmm/openmm) like formats and can be merged with [MDTraj](https://github.com/mdtraj/mdtraj).
# In[26]:
def merge_protein(protein):
"""
Merge two OpenMM objects.
Parameters
----------
protein: pdbfixer.pdbfixer.PDBFixer
Protein to merge.
ligand: openmm.app.Modeller
Ligand to merge.
Returns
-------
complex_topology: openmm.app.topology.Topology
The merged topology.
complex_positions: openmm.unit.quantity.Quantity
The merged positions.
"""
# combine topologies
md_protein_topology = md.Topology.from_openmm(protein.topology) # using mdtraj for protein top
complex_topology = md_protein_topology.to_openmm()
# combine positions
#total_atoms = len(protein.positions) + len(ligand.positions)
total_atoms = len(protein.positions)
# create an array for storing all atom positions as tupels containing a value and a unit
# called OpenMM Quantities
complex_positions = unit.Quantity(np.zeros([total_atoms, 3]), unit=unit.nanometers)
complex_positions = protein.positions # add protein positions
return complex_topology, complex_positions
# In[27]:
complex_topology, complex_positions = merge_protein(prepared_protein)
# In[28]:
print("Complex topology has", complex_topology.getNumAtoms(), "atoms.")
# ### 7. System setup
# With our configured force field we can now use the [OpenMM Modeller](http://docs.openmm.org/latest/userguide/application.html#model-building-and-editing) class to create the MD environment, a simulation box which contains the complex and is filled with a solvent. The standard solvent is water with a specified amount of ions. The size of the box can be determined in various ways. We define it with a padding, which results in a cubic box with dimensions dependent on the largest dimension of the complex.
#
# > Note this step can take a long time, in the order of minutes, depending on your hardware.
# In[29]:
modeller = app.Modeller(complex_topology, complex_positions)
modeller.addSolvent(forcefield, padding=1.0 * unit.nanometers, boxShape='dodecahedron', ionicStrength=0.15 * unit.molar)
# ### Building system
# With our solvated system and force field, we can finally create an [OpenMM System](http://docs.openmm.org/development/api-python/generated/openmm.openmm.System.html#openmm.openmm.System) and set up the simulation.
# Additionally to the system the simulation needs an integrator. An [OpenMM Integrator](http://docs.openmm.org/development/api-python/library.html#integrators) defines a method for simulating a system by integrating the equations of motion. The chosen **Langevin Integrator** uses Langevin equations. A list of all different kinds of integrators can be found in the [OpenMM Docs](http://docs.openmm.org/development/api-python/library.html#integrators). For further insight into the **Langevin Integrator**, we recommend reading about Langevin equations, e.g. on [Wikipedia](https://en.wikipedia.org/wiki/Langevin_equation).
# In[30]:
print('Building system...')
system = forcefield.createSystem(modeller.topology, nonbondedMethod=nonbondedMethod, rigidWater=rigidWater)
system.addForce(mm.MonteCarloBarostat(pressure,temperature, barostatInterval))
integrator = mm.LangevinMiddleIntegrator(temperature, friction, dt)
simulation = app.Simulation(modeller.topology, system, integrator, platform, platformProperties)
simulation.context.setPositions(modeller.positions)
simulation.context.reinitialize(True)
# ### 8. Minimize and Equilibrate
# Now that everything is set up, we can perform the simulation. We need to set starting positions and minimize the energy of the system to get a low energy starting configuration, which is important to decrease the chance of simulation failures due to severe atom clashes. The energy minimized system is saved.
# In[31]:
print("Minimizing energy...")
simulation.minimizeEnergy()
print("Equilibrating...")
simulation.context.setVelocitiesToTemperature(temperature)
simulation.step(equilibrationSteps)
# In[34]:
# Energy minimized system is saved.
with open(DATA / "top.pdb", "w") as pdb_file:
positions = simulation.context.getState(getPositions=True, enforcePeriodicBox=True).getPositions()
app.PDBFile.writeFile(simulation.topology, positions=positions, file=pdb_file, keepIds=True)
# In[35]:
# Save only protein system
traj = md.load(DATA / "top.pdb")
protein_atoms = traj.topology.select("protein")
protein_traj = traj.atom_slice(protein_atoms)
protein_traj.save_pdb(DATA / "top_protein.pdb")
# In[37]:
xtcReporter = md.reporters.XTCReporter(file=str(DATA / "traj_protein.xtc"),
reportInterval=write_interval, atomSubset=protein_atoms)
# #### Simulating
# In[40]:
print('Simulating...')
simulation.reporters.append(xtcReporter)
simulation.reporters.append(dataReporter)
simulation.currentStep = 0
simulation.step(steps)