-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_regression.py
89 lines (76 loc) · 5.32 KB
/
run_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import argparse
from ccl import CCLMethod
def main():
parser = argparse.ArgumentParser()
files = parser.add_argument_group('Files')
files.add_argument('--ccl-code', type=str, required=True, help='CCL code skeleton')
files.add_argument('--molecules', type=str, required=True, help='Input set of molecules')
files.add_argument('--ref-charges', type=str, required=True, help='Reference charges to compare to')
files.add_argument('--parameters', type=str, required=True, help='Parameters required for the method')
files.add_argument('--seeded-individuals', type=str, default=None, help='File with individuals to start with')
files.add_argument('--wanted-individuals', type=str, default=None, help='File with individuals to search for')
files.add_argument('--save-best', type=str, default=None, help='File to store the best individuals')
files.add_argument('--results', type=str, default=None, help='File to store results')
options = parser.add_argument_group('Regression options')
options.add_argument('--population-size', type=int, default=500, help='Size of the initial population')
options.add_argument('--crossover-probability', type=float, default=0.8, help='Crossover probability')
options.add_argument('--mutation-probability', type=float, default=0.3, help='Mutation probability')
options.add_argument('--generations', type=int, default=50, help='Number of generations')
options.add_argument('--no-unique-population', action='store_false', dest='unique_population',
help='Don\'t require unique initial population')
options.add_argument('--seed', type=int, default=None, help='Seed for the random number generator')
options.add_argument('--require-symmetry', action='store_true', default=False,
help='Require symmetry in atom/bond objects in the initial population')
options.add_argument('--metric', type=str, choices=['R2', 'RMSD', 'COMBINED'], default='RMSD',
help='Metric to evaluate fitness')
options.add_argument('--ncpus', type=int, default=None, help='Number of CPUs to use')
options.add_argument('--early-exit', action='store_true', default=False,
help='Stop the computation if the individual has RMSD < 0.0001 or R2 > 0.9999 depending'
' on the metric used')
options.add_argument('--initial-seed-mutations', type=int, default=10,
help='Number of mutations to generate from each seeded individual')
options.add_argument('--top-results', type=int, default=20, help='Number of results to report/save at the end')
options.add_argument('--max-constant-allowed', type=int, default=None,
help='Max constant that can appear in the initial population')
options.add_argument('--use-math-functions', action='store_true', default=False,
help='Allow additional math functions like sin or cos')
options.add_argument('--allow-random-constants', action='store_true', default=False,
help='Allow to generate random constants in range [0; 2]')
options.add_argument('--only-multiplicative-constants', action='store_true', default=False,
help='Allow to use only two constants in a form of 2 * x or 0.5 * x')
options.add_argument('--symbol-counts', type=str, nargs='+', default=[],
help="Specify the occurrence limits of symbols in the expression")
options.add_argument('--max-tree-height', type=int, default=17, help='Maximum height of the expression tree')
sys_dirs = parser.add_argument_group('System directories')
sys_dirs.add_argument('--eigen-include', type=str, default='/usr/include/eigen3',
help='Directory with Eigen3 include files')
sys_dirs.add_argument('--chargefw2-dir', type=str, default='/opt/chargefw2',
help='ChargeFW2 installation directory')
parsed_options = vars(parser.parse_args())
symbol_counts = list(parsed_options['symbol_counts'])
if len(symbol_counts) % 2:
raise RuntimeError('symbols-count argument error')
parsed_options['symbol_counts'] = {}
for s, c in zip(symbol_counts[::2], symbol_counts[1::2]):
parts = c.split(':')
try:
if len(parts) == 1:
low = high = int(parts[0])
elif len(parts) == 2:
low, high = parts
low = int(low) if low else None
high = int(high) if high else None
if low is not None and high is not None and low > high:
raise RuntimeError('Options error: symbol-counts has lower bound higher than upper')
else:
raise RuntimeError('Options error: symbol-counts has bad count specifier')
except ValueError:
raise RuntimeError('Options error: symbol-counts is unable to convert a value to integer')
parsed_options['symbol_counts'][s] = (low, high)
method = CCLMethod.from_file(parsed_options['ccl_code'])
method.run_regression(molecules_file=parsed_options['molecules'],
ref_chg_file=parsed_options['ref_charges'],
parameters_file=parsed_options['parameters'],
regression_options=parsed_options)
if __name__ == '__main__':
main()