-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluate_calc.py
737 lines (678 loc) · 27.2 KB
/
evaluate_calc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
"""
Evaluate the output of the QM calculations based on the conformer ensemble directory structure.
"""
from __future__ import annotations
import json
import os
import shutil
import subprocess as sp
import warnings
import numpy as np
from scipy.stats import spearmanr
from .miscelleanous import bcolors, chdir, create_directory
# define a general parameter for the evaluation of the conformer ensemble
HARTREE2KCAL = 627.5094740631 # Hartree
MINDIFF = 0.01 # kcal/mol
def get_calc_ensemble(
desired_charge: int,
) -> dict[int, dict[str, list[str | int | float]]]:
"""
Evaluates a given conformer ensemble.
"""
pwd = os.getcwd()
# check if the directory structure is correct
if not os.path.exists("compounds.conformers.txt"):
print(
f"{bcolors.FAIL}Error: Directory structure is not correct. \
File 'compounds.conformers.txt' not found.{bcolors.ENDC}"
)
raise SystemExit(1)
mols: dict[int, str] = {}
try:
with open("compounds.conformers.txt", encoding="UTF-8") as f:
lines = f.readlines()
for i in lines:
mols[int(i.split()[0])] = str(i.split()[1])
except FileNotFoundError as e:
print(f"{bcolors.FAIL}Error: {e}{bcolors.ENDC}")
print(f"{bcolors.FAIL}File 'compounds.conformers.txt' not found.{bcolors.ENDC}")
raise SystemExit(1) from e
# catch index error and start again with not reading the name of the molecule in the
# second column
except IndexError:
print(
f"{bcolors.FAIL}File 'compounds.conformers.txt' does not have \
the correct format. Trying again with not reading the names...{bcolors.ENDC}"
)
try:
with open("compounds.conformers.txt", encoding="UTF-8") as f:
lines = f.readlines()
for i in lines:
mols[int(i.split()[0])] = ""
except IndexError as eindex2:
print(
f"{bcolors.FAIL}File 'compounds.conformers.txt' does not have \
the correct format. Stopping here.{bcolors.ENDC}"
)
raise SystemExit(1) from eindex2
energies: dict[int, dict[str, list[str | int | float]]] = {}
for cid, name in mols.items():
chdir(str(cid))
try:
with open("index.conformers", encoding="UTF-8") as f:
lines = f.readlines()
conformer = [int(j.strip()) for j in lines]
except FileNotFoundError as e:
print(f"{bcolors.FAIL}Error: {e}{bcolors.ENDC}")
print(
f"{bcolors.FAIL}File 'index.conformers' not found \
in directory {cid}.{bcolors.ENDC}"
)
chdir(pwd)
raise SystemExit(1) from e
# read the conformer.json file in the directory
try:
with open("conformer.json", encoding="UTF-8") as f:
conformer_prop = json.load(f)
except FileNotFoundError as e:
print(f"{bcolors.FAIL}Error: {e}{bcolors.ENDC}")
print(
f"{bcolors.FAIL}File 'conformer.json' not found \
in directory {cid}.{bcolors.ENDC}"
)
chdir(pwd)
raise SystemExit(1) from e
# check if the conformer ensemble matches to the constraints
if len(conformer) == 0:
print(
f"{bcolors.WARNING}Warning: \
Number of conformers ({len(conformer)}) in {cid} is zero. Skipping...{bcolors.ENDC}"
)
chdir(pwd)
continue
else:
print(
f"{bcolors.OKGREEN}Number of effective conformers \
in {cid} is {len(conformer)}{bcolors.ENDC}"
)
# check for charge constraints
if conformer_prop["charge"] != desired_charge:
print(
f"{bcolors.WARNING}Warning: \
Charge of molecule {cid} is not equal to {desired_charge}. Skipping...{bcolors.ENDC}"
)
chdir(pwd)
continue
tmpgfn2: list[float] = []
tmpgp3: list[float] = []
tmpwb97xd4: list[float] = []
tmpindex: list[int] = []
indices: list[int] = []
moldir = os.getcwd()
# go through the conformers and read the energies for gfn2, gp3, and TZ
continuewithmol = False
for j in conformer:
chdir(str(j))
# check if the calculation directories exist
if not os.path.exists("gfn2"):
print(
f"{bcolors.FAIL}Error: Directory structure is not correct. \
Directory 'gfn2' not found in {j}.{bcolors.ENDC}"
)
continuewithmol = True
break
if not os.path.exists("gp3"):
print(
f"{bcolors.FAIL}Error: Directory structure is not correct. \
Directory 'gp3' not found in {j}.{bcolors.ENDC}"
)
continuewithmol = True
break
if not os.path.exists("TZ"):
print(
f"{bcolors.FAIL}Error: Directory structure is not correct. \
Directory 'TZ' not found in {j}.{bcolors.ENDC}"
)
continuewithmol = True
break
# read the energies from the output files
tmpgfn2.append(read_energy_file("gfn2/energy"))
tmpgp3.append(read_energy_file("gp3/energy"))
tmpwb97xd4.append(read_energy_file("TZ/energy"))
tmpindex.append(j)
chdir(moldir)
if continuewithmol:
chdir(pwd)
continue
# sort the energies in ascending order with the TZ/wB97X-D4 energies as reference
# the GFN2 and GP3 energies should be sorted with the same indices as the TZ energies.
energies[cid] = {}
energies[cid]["GFN2"] = []
energies[cid]["GP3"] = []
energies[cid]["wB97X-D4"] = []
energies[cid]["conformer_index"] = []
energies[cid]["conformer_lowest"] = []
indices = sorted(range(len(tmpwb97xd4)), key=lambda k: tmpwb97xd4[k])
tmpwb97xd4 = [tmpwb97xd4[k] for k in indices]
tmpgfn2 = [tmpgfn2[k] for k in indices]
tmpgp3 = [tmpgp3[k] for k in indices]
tmpindex = [tmpindex[k] for k in indices]
# check if energy values in the reference are closer to each other than
# the minimum difference. For that, go through all combinations of energies
# and check if the difference is smaller than the minimum difference.
conformer_deleted = (
False # Initialize a flag tracking whether a conformer was deleted
)
if len(tmpwb97xd4) > 2:
while True:
for k in range(1, len(tmpwb97xd4)):
conformer_deleted = False # Initialize a flag tracking
# whether a conformer was deleted
if len(tmpwb97xd4) <= 3:
break
if (
abs((tmpwb97xd4[k] - tmpwb97xd4[k - 1]) * HARTREE2KCAL)
< MINDIFF
):
# delete the conformer with the higher energy
if tmpwb97xd4[k] > tmpwb97xd4[k - 1]:
del tmpwb97xd4[k]
del tmpgfn2[k]
del tmpgp3[k]
del tmpindex[k]
del conformer[k]
else:
del tmpwb97xd4[k - 1]
del tmpgfn2[k - 1]
del tmpgp3[k - 1]
del tmpindex[k - 1]
del conformer[k - 1]
conformer_deleted = True # Set the flag to True
print(
f"{bcolors.WARNING}Warning: \
conformer {k} was deleted in {cid} due to too close-lying energies.{bcolors.ENDC}"
)
break # Break out of the for loop
if not conformer_deleted:
# If no conformers were deleted in the current iteration,
# break out of the while loop
break
### DEV OUTPUT ###
# print the tmp arrays next to each other for comparison
for j in range(len(conformer)):
print(
f"{tmpindex[j]:4d} {tmpwb97xd4[j]:10.6f} {tmpgfn2[j]:10.6f} \
{tmpgp3[j]:10.6f}"
)
for j in range(len(conformer)):
if j == 0:
energies[cid]["conformer_lowest"].append(tmpindex[j])
else:
energies[cid]["wB97X-D4"].append(
(tmpwb97xd4[j] - tmpwb97xd4[0]) * HARTREE2KCAL
)
energies[cid]["GFN2"].append((tmpgfn2[j] - tmpgfn2[0]) * HARTREE2KCAL)
energies[cid]["GP3"].append((tmpgp3[j] - tmpgp3[0]) * HARTREE2KCAL)
energies[cid]["conformer_index"].append(tmpindex[j])
# add the conformer_prop infos to the energies dict
energies[cid]["natoms"] = conformer_prop["natoms"]
energies[cid]["charge"] = conformer_prop["charge"]
energies[cid]["nconf"] = conformer_prop["nconf"]
energies[cid]["name"] = [name]
chdir(pwd)
# check if the energies dict is empty
if len(energies) == 0:
print(
f"{bcolors.FAIL}Error: No conformer ensemble found. \
Stopping here.{bcolors.ENDC}"
)
raise SystemExit(1)
# write the old and new indices to a JSON file
with open("energies.json", "w", encoding="UTF-8") as f:
json.dump(energies, f, indent=4)
return energies
def eval_calc_ensemble(
confe: dict[int, dict[str, list[str | int | float]]]
) -> list[int]:
"""
Evaluates a given conformer ensemble in dictionary format.
"""
# total number of data points and total number of molecules
ndatapoints = 0
nmolecules = len(confe.keys())
# calculate the Spearman rank correlation coefficient for the GFN2 and GP3 energies
# the order of the wB97X-D4 energies is used as reference
spearmanrcc: dict[str, list[float]] = {}
spearmanrcc["GFN2"] = []
spearmanrcc["GP3"] = []
# 1st: iterate over the molecules via the keys of the dictionary items
print(
f"\n{bcolors.OKCYAN}Spearman rank correlation coefficients \
for the energy ranking in each selected conformer ensemble:{bcolors.ENDC}"
)
print(
5 * " "
+ f"{bcolors.OKCYAN}Mol:"
+ 5 * " "
+ "GFN2"
+ 4 * " "
+ f"GP3{bcolors.ENDC}"
)
for i in confe.keys():
# 2nd: calculate the Spearman rank correlation coefficient
gfn2scc = 0.0
gp3scc = 0.0
try:
with warnings.catch_warnings():
warnings.filterwarnings("error")
gfn2scc = spearmanr(confe[i]["wB97X-D4"], confe[i]["GFN2"])[0]
except ValueError as e:
print(f"{bcolors.FAIL}Error: {e}{bcolors.ENDC}")
print(
f"{bcolors.FAIL}Error in molecule {i}: \
Spearman rank correlation coefficient could not be calculated.{bcolors.ENDC}"
)
raise SystemExit(1) from e
except RuntimeWarning as e:
print(f"{bcolors.WARNING}ERROR in {i}: {e}\nSkipping...{bcolors.ENDC}")
continue
try:
with warnings.catch_warnings():
warnings.filterwarnings("error")
gp3scc = spearmanr(confe[i]["wB97X-D4"], confe[i]["GP3"])[0]
except ValueError as e:
print(f"{bcolors.FAIL}Error: {e}{bcolors.ENDC}")
print(
f"{bcolors.FAIL}Error in molecule {i}: \
Spearman rank correlation coefficient could not be calculated.{bcolors.ENDC}"
)
raise SystemExit(1) from e
except RuntimeWarning as e:
print(f"{bcolors.WARNING}ERROR in {i}: {e}\nSkipping...{bcolors.ENDC}")
continue
if np.isnan(gfn2scc) or np.isnan(gp3scc):
print("Skipping...")
continue
spearmanrcc["GFN2"].append(gfn2scc)
spearmanrcc["GP3"].append(gp3scc)
print(
3 * " "
+ f"{i:8d}: {spearmanrcc['GFN2'][-1]:6.3f} {spearmanrcc['GP3'][-1]:6.3f}"
)
# 3rd: add the counter for the number of data points
ndatapoints += len(confe[i]["wB97X-D4"])
# calculate the mean and standard deviation of the Spearman rank correlation coefficient
print(
f"{bcolors.OKCYAN} Mean and \
StdDev of the Spearman rank correlation coefficient:{bcolors.ENDC}"
)
print(
f"{bcolors.BOLD}GFN2: {np.mean(spearmanrcc['GFN2']):6.3f} \
{np.std(spearmanrcc['GFN2']):6.3f}{bcolors.ENDC}"
)
print(
f"{bcolors.BOLD}GP3: {np.mean(spearmanrcc['GP3']):6.3f} \
{np.std(spearmanrcc['GP3']):6.3f}{bcolors.ENDC}"
)
# calculate the number of cases for which the Spearman rank correlation coefficient
# is better with GP3 than with GFN2 and vice versa
better_scc: dict[str, int] = {}
better_scc["GFN2"] = 0
better_scc["GP3"] = 0
better_scc["equal"] = 0
for i in range(len(spearmanrcc["GFN2"])):
if spearmanrcc["GFN2"][i] > spearmanrcc["GP3"][i]:
better_scc["GFN2"] += 1
elif spearmanrcc["GFN2"][i] < spearmanrcc["GP3"][i]:
better_scc["GP3"] += 1
else:
better_scc["equal"] += 1
print(
f"{bcolors.OKCYAN} Number of cases for which \
the Spearman rank correlation coefficient is better:{bcolors.ENDC}"
)
print(f"{bcolors.BOLD}GFN2: {better_scc['GFN2']:6d}{bcolors.ENDC}")
print(f"{bcolors.BOLD}GP3: {better_scc['GP3']:6d}{bcolors.ENDC}")
print(f"{bcolors.BOLD}equal: {better_scc['equal']:6d}{bcolors.ENDC}")
# calculate the RMSD of the GFN2 and GP3 energies with respect to the wB97X-D4 energies
# for the whole data set
rmsd: dict[str, list[float]] = {}
rmsd["GFN2"] = []
rmsd["GP3"] = []
print(
f"\n{bcolors.OKCYAN}Root mean square deviations \
of the energies in each selected conformer ensemble:{bcolors.ENDC}"
)
print(
5 * " "
+ f"{bcolors.OKCYAN}Mol:"
+ 5 * " "
+ "GFN2"
+ 4 * " "
+ f"GP3{bcolors.ENDC}"
)
for i in confe.keys():
rmsd["GFN2"].append(
np.sqrt(
np.mean(np.square(np.subtract(confe[i]["wB97X-D4"], confe[i]["GFN2"])))
)
)
rmsd["GP3"].append(
np.sqrt(
np.mean(np.square(np.subtract(confe[i]["wB97X-D4"], confe[i]["GP3"])))
)
)
print(3 * " " + f"{i:8d}: {rmsd['GFN2'][-1]:6.3f} {rmsd['GP3'][-1]:6.3f}")
# calculate the mean and standard deviation of the RMSD
print(
f"{bcolors.OKCYAN} Mean and \
StdDev of the RMSD:{bcolors.ENDC}"
)
print(
f"{bcolors.BOLD}GFN2: {np.mean(rmsd['GFN2']):6.3f} \
{np.std(rmsd['GFN2']):6.3f}{bcolors.ENDC}"
)
print(
f"{bcolors.BOLD}GP3: {np.mean(rmsd['GP3']):6.3f} \
{np.std(rmsd['GP3']):6.3f}{bcolors.ENDC}"
)
# calculate the number of cases
# for which the RMSD is better with GP3 than with GFN2 and vice versa
better_rmsd: dict[str, int] = {}
better_rmsd["GFN2"] = 0
better_rmsd["GP3"] = 0
better_rmsd["equal"] = 0
for i in range(len(rmsd["GFN2"])):
if rmsd["GFN2"][i] > rmsd["GP3"][i]:
better_rmsd["GP3"] += 1
elif rmsd["GFN2"][i] < rmsd["GP3"][i]:
better_rmsd["GFN2"] += 1
else:
better_rmsd["equal"] += 1
print(
f"{bcolors.OKCYAN} Number of cases for which \
the RMSD is better:{bcolors.ENDC}"
)
print(f"{bcolors.BOLD}GFN2: {better_rmsd['GFN2']:6d}{bcolors.ENDC}")
print(f"{bcolors.BOLD}GP3: {better_rmsd['GP3']:6d}{bcolors.ENDC}")
print(f"{bcolors.BOLD}equal: {better_rmsd['equal']:6d}{bcolors.ENDC}")
# FIRST: create a list with the differences in RMSD between GFN2 and GP3
# and sort it in ascending order. Keep the sorting indices to refer to the
# original molecule numbering
# THEN: print the RMSD differences for the 10 best and 10 worst cases
# PRINT: the molecule CID number, the name, the RMSD difference, the GFN2 RMSD,
# the GP3 RMSD
# create a list with the differences in RMSD between GFN2 and GP3
rmsd_diff: list[float] = []
for i in range(len(rmsd["GFN2"])):
rmsd_diff.append(rmsd["GFN2"][i] - rmsd["GP3"][i])
# sort the list in ascending order
indices = sorted(range(len(rmsd_diff)), key=lambda k: rmsd_diff[k])
rmsd_diff = [rmsd_diff[k] for k in indices]
rmsd_gfn2 = [rmsd["GFN2"][k] for k in indices]
rmsd_gp3 = [rmsd["GP3"][k] for k in indices]
rmsd_cid = [list(confe.keys())[k] for k in indices]
# rmsd_name = [confe[list(confe.keys())[k]]["name"][0] for k in indices]
# print the RMSD differences for the 10 best and 10 worst cases
print(
f"\n{bcolors.OKCYAN}10 best and 10 worst cases \
for the RMSD difference between GFN2 and GP3:{bcolors.ENDC}"
)
print(
5 * " "
+ f"{bcolors.OKCYAN}Mol:"
+ 5 * " "
+ "dRMSD"
+ 2 * " "
+ f"GFN2"
+ 3 * " "
+ f"GP3{bcolors.ENDC}"
)
print(f"{bcolors.BOLD}Best cases:{bcolors.ENDC}")
for i in range(len(rmsd_diff) - 10, len(rmsd_diff)):
print(
3 * " "
+ f"{rmsd_cid[i]:8d}: \
{rmsd_diff[i]:6.3f} {rmsd_gfn2[i]:6.3f} {rmsd_gp3[i]:6.3f}"
)
print(f"{bcolors.BOLD}Worst cases:{bcolors.ENDC}")
for i in range(10):
print(
3 * " "
+ f"{rmsd_cid[i]:8d}: \
{rmsd_diff[i]:6.3f} {rmsd_gfn2[i]:6.3f} {rmsd_gp3[i]:6.3f}"
)
print(
f"\n{bcolors.BOLD}Total number of data points: \
{ndatapoints}{bcolors.ENDC}"
)
print(
f"{bcolors.BOLD}Total number of molecules: \
{nmolecules}{bcolors.ENDC}"
)
# return the cids of the ten worst cases
return rmsd_cid[:10]
def create_res_dir(
energy_db: dict[int, dict[str, list[str | int | float]]],
wipe: bool,
desired_charge: int,
worst_cids: list[int] = [],
) -> None:
"""
Creates the result directory if it does not exist already.
"""
prefix = "RNDCONF"
tzfilestocopy = ["energy", "control", "coord", "ridft.out", "gradient", "basis"]
resfile = "res.sh"
# if worst_cids not empty; remove all entries from energy_db that are NOT in worst_cids
if len(worst_cids) > 0:
energy_db = {k: v for k, v in energy_db.items() if k in worst_cids}
# check the file system for the existence of the first entry of the energy_db
# if it exists, the directory structure is assumed to be correct
# 1: get the first entry of the energy_db
tmpkey = list(energy_db.keys())[0]
if not os.path.exists(f"{tmpkey}"):
print(
f"{bcolors.FAIL}Error: Directory structure is not correct. \
Directory {tmpkey} not found.{bcolors.ENDC}"
)
raise SystemExit(1)
exist = create_directory("res_archive")
if exist and not wipe:
print(
f"{bcolors.FAIL}Error: Directory 'res_archive' already exists. \
Stopping here.{bcolors.ENDC}"
)
raise SystemExit(1)
elif exist and wipe:
print(
f"{bcolors.WARNING}Warning: Directory 'res_archive' already exists. \
Wiping it...{bcolors.ENDC}"
)
# remove the directory and create it again
shutil.rmtree("res_archive")
create_directory("res_archive")
# write the header of the .res file to a file called .res
with open(f"res_archive/{resfile}", "w", encoding="UTF-8") as f:
print("#!/bin/bash\n", file=f)
print("# MM 11/23", file=f)
print(
"# wB97X-D4/def2-TZVPPD // wB97X-3c at CREST(GFN2-xTB) conformers\n", file=f
)
print('if [ "$TMER" == "" ]', file=f)
print("then", file=f)
print(" tmer=tmer2++", file=f)
print("else", file=f)
print(" tmer=$TMER", file=f)
print("fi", file=f)
print("f=$1", file=f)
print("if [ -z $2 ]", file=f)
print("then", file=f)
print(" w=0", file=f)
print("else", file=f)
print(" w=$2", file=f)
print("fi", file=f)
# iteratate over all conformer ensembles in the energy_db
for mol in energy_db.keys():
# check if it is a charged species
if energy_db[mol]["charge"] != desired_charge:
print(
f"{bcolors.WARNING}Warning: \
Charge of molecule {mol} is not equal to {desired_charge}.{bcolors.ENDC}"
)
if energy_db[mol]["charge"] != 0:
tzfilestocopy.append(".CHRG")
# create a directory for each conformer
if not len(energy_db[mol]["conformer_lowest"]) == 1:
raise ValueError(f"Error: More than one lowest conformer in {mol}.")
else:
create_directory(f"res_archive/{prefix}_{str(mol)}_1")
create_directory(f"res_archive/{prefix}_{str(mol)}_1/TZ")
for file in tzfilestocopy:
if not os.path.exists(
f"{mol}/{energy_db[mol]['conformer_lowest'][0]}/TZ/{file}"
):
raise FileNotFoundError(
f"Error: \
Directory {mol}/{energy_db[mol]['conformer_lowest'][0]}/TZ/{file} not found."
)
else:
shutil.copy2(
f"{mol}/{energy_db[mol]['conformer_lowest'][0]}/TZ/{file}",
f"res_archive/{prefix}_{mol}_1/TZ/{file}",
)
# copy 'coord' also to main directory of conformer
shutil.copy2(
f"{mol}/{energy_db[mol]['conformer_lowest'][0]}/TZ/coord",
f"res_archive/{prefix}_{mol}_1/coord",
)
if energy_db[mol]["charge"] != 0:
# copy the .CHRG file to the new directory
shutil.copy2(
f"{mol}/{energy_db[mol]['conformer_lowest'][0]}/TZ/.CHRG",
f"res_archive/{prefix}_{mol}_1/",
)
print(
f"{bcolors.OKGREEN}Copied files for molecule {mol} to \
res_archive/{prefix}_{mol}_1.{bcolors.ENDC}"
)
struclocationfirst = str(f"{prefix}_{mol}_1/")
dirpathfirst = "res_archive/" + struclocationfirst
dirpathin = dirpathfirst + "coord"
dirpathout = dirpathfirst + "struc.xyz"
try:
pgout = sp.run(
[
"mctc-convert",
dirpathin,
dirpathout,
"--normalize",
],
check=True,
capture_output=True,
timeout=120,
)
except sp.TimeoutExpired as exc:
error = " " * 3 + f"Process timed out.\n{exc}"
print(error)
raise SystemExit(1) from exc
except sp.CalledProcessError as exc:
print(" " * 3 + "Status : FAIL", exc.returncode, exc.output)
# write the error output to a file
with open("mctc-convert_error.err", "w", encoding="UTF-8") as f:
f.write(exc.stderr.decode("utf-8"))
error = f"{bcolors.WARNING}mctc-convert failed.{bcolors.ENDC}"
print(error)
raise SystemExit(1) from exc
with open(f"res_archive/{resfile}", "a", encoding="UTF-8") as f:
print(f"\n# CID: {mol}", file=f)
k = 1
for conf in energy_db[mol]["conformer_index"]:
k += 1
# create a directory for each molecule
create_directory(f"res_archive/{prefix}_{mol}_{k}")
create_directory(f"res_archive/{prefix}_{mol}_{k}/TZ")
for file in tzfilestocopy:
if not os.path.exists(f"{mol}/{conf}/TZ/{file}"):
raise FileNotFoundError(
f"Error: \
Directory {mol}/{conf}/TZ/{file} not found."
)
else:
shutil.copy2(
f"{mol}/{conf}/TZ/{file}",
f"res_archive/{prefix}_{mol}_{k}/TZ/{file}",
)
# copy 'coord' also to main directory of conformer
shutil.copy2(
f"{mol}/{conf}/TZ/coord",
f"res_archive/{prefix}_{mol}_{k}/coord",
)
if energy_db[mol]["charge"] != 0:
# copy the .CHRG file to the new directory
shutil.copy2(
f"{mol}/{conf}/TZ/.CHRG",
f"res_archive/{prefix}_{mol}_{k}/",
)
struclocation = str(f"{prefix}_{mol}_{k}/")
dirpath = "res_archive/" + struclocation
dirpathin = dirpath + "coord"
dirpathout = dirpath + "struc.xyz"
try:
pgout = sp.run(
[
"mctc-convert",
dirpathin,
dirpathout,
"--normalize",
],
check=True,
capture_output=True,
timeout=120,
)
except sp.TimeoutExpired as exc:
error = " " * 3 + f"Process timed out.\n{exc}"
print(error)
raise SystemExit(1) from exc
except sp.CalledProcessError as exc:
print(" " * 3 + "Status : FAIL", exc.returncode, exc.output)
# write the error output to a file
with open("mctc-convert_error.err", "w", encoding="UTF-8") as f:
f.write(exc.stderr.decode("utf-8"))
error = f"{bcolors.WARNING}mctc-convert failed.{bcolors.ENDC}"
print(error)
raise SystemExit(1) from exc
with open(f"res_archive/{resfile}", "a", encoding="UTF-8") as f:
resline = (
f"$tmer {struclocationfirst:>25s}$f {struclocation:>25s}$f"
+ " x -1 1 $w"
+ f"{energy_db[mol]['wB97X-D4'][k-2]:10.6f}"
# 'k-2' because the first conformer is
# the lowest one and the array starts at 0 instead of 1
)
print(resline, file=f)
# if it exists, remove the entry ".CHRG" from the tzfilestocopy list
if ".CHRG" in tzfilestocopy:
tzfilestocopy.remove(".CHRG")
def read_energy_file(file: str) -> float:
"""
Reads the energy from the 'energy' output file of a QM calculation.
"""
try:
with open(file, encoding="UTF-8") as f:
lines = f.readlines()
energy = float(lines[1].strip().split()[1])
return energy
except FileNotFoundError as e:
print(f"{bcolors.FAIL}Error: File {file} not found.{bcolors.ENDC}")
raise SystemExit(1) from e
except ValueError as e:
print(f"{bcolors.FAIL}Error: {e} not a float.{bcolors.ENDC}")
raise SystemExit(1) from e
# except for the case that the file is empty
except IndexError as e:
print(f"{bcolors.FAIL}Error: File {file} is empty.{bcolors.ENDC}")
raise SystemExit(1) from e
except Exception as e:
print(f"{bcolors.FAIL}Error: {e} - general error.{bcolors.ENDC}")
raise SystemExit(1) from e