-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathworkload_runner.py
executable file
·151 lines (137 loc) · 5.87 KB
/
workload_runner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env python3
"""Starts the randomized workload generator."""
# pylint: disable=duplicate-code
# Above disable doesn't work, so below imports are non-alpha to avoid the
# warning
# https://github.com/PyCQA/pylint/issues/214
import logging
import argparse
import os
import random
import time
import event
import log_gather
import log_gather_ocs
import osio
import kube
import util
CLI_ARGS: argparse.Namespace
RUN_ID = random.randrange(999999999)
def set_health(healthy: bool) -> None:
"""Sets the state of the health indicator file."""
filename = "/tmp/healthy_runner"
if healthy:
logging.info("creating health file: %s", filename)
file = os.open(filename, os.O_CREAT | os.O_WRONLY)
os.close(file)
else:
logging.info("deleting health file: %s", filename)
os.unlink(filename)
def main() -> None:
"""Run the workload."""
parser = argparse.ArgumentParser()
parser.add_argument("-l", "--log-dir",
default=os.getcwd(),
type=str,
help="Path to use for log files")
parser.add_argument("-m", "--accessmode",
default="ReadWriteOnce",
type=str, choices=["ReadWriteOnce", "ReadWriteMany"],
help="StorageClassName for the workload's PVCs")
parser.add_argument("-n", "--namespace",
default="ocs-monkey",
type=str,
help="Namespace to use for the workload")
parser.add_argument("--oc",
default="oc",
type=str,
help="Path/executable for the oc command")
parser.add_argument("--ocs-namespace",
default="openshift-storage",
type=str,
help="Namespace where the OCS components are running")
parser.add_argument("-s", "--storageclasses",
default=["ocs-storagecluster-ceph-rbd", "ocs-storagecluster-cephfs"],
nargs="+",
help="List of StorageClass names for the workload's PVCs")
parser.add_argument("-z", "--sleep-on-error",
action="store_true",
help="On error, sleep forever instead of exit")
parser.add_argument("--osio-interarrival",
default=20,
type=float,
help="OSIO workload mean interrarival time (s)")
parser.add_argument("--osio-lifetime",
default=3600,
type=float,
help="OSIO workload mean lifetime (s)")
parser.add_argument("--osio-active-time",
default=300,
type=float,
help="OSIO workload mean active period (s)")
parser.add_argument("--osio-idle-time",
default=60,
type=float,
help="OSIO workload mean idle period (s)")
parser.add_argument("--osio-kernel-slots",
default=3,
type=int,
help="OSIO workload slots for kernel untar")
parser.add_argument("--osio-kernel-untar",
default=10,
type=float,
help="OSIO workload kernel untar rate (#/hr)")
parser.add_argument("--osio-kernel-rm",
default=10,
type=float,
help="OSIO workload kernel rm rate (#/hr)")
parser.add_argument("--osio-image",
default="quay.io/ocsci/osio-workload",
type=str,
help="Container image for OSIO worker pods")
parser.add_argument("-t", "--runtime",
default=7200,
type=int,
help="Run time in seconds")
global CLI_ARGS # pylint: disable=global-statement
CLI_ARGS = parser.parse_args()
log_dir = os.path.join(CLI_ARGS.log_dir, f'ocs-monkey-{RUN_ID}')
util.setup_logging(log_dir)
logging.info("starting execution-- run id: %d", RUN_ID)
logging.info("program arguments: %s", CLI_ARGS)
logging.info("log directory: %s", log_dir)
# register log collector(s)
log_gather.add(log_gather_ocs.OcsMustGather(CLI_ARGS.oc))
log_gather.add(log_gather_ocs.MustGather(CLI_ARGS.oc))
log_gather.add(log_gather_ocs.OcsImageVersions(CLI_ARGS.oc,
CLI_ARGS.ocs_namespace))
kube.create_namespace(CLI_ARGS.namespace, existing_ok=True)
if CLI_ARGS.sleep_on_error:
set_health(True)
dispatch = event.Dispatcher()
dispatch.add(*osio.resume(CLI_ARGS.namespace))
events = osio.start(namespace=CLI_ARGS.namespace,
storage_classes=CLI_ARGS.storageclasses,
access_mode=CLI_ARGS.accessmode,
interarrival=CLI_ARGS.osio_interarrival,
lifetime=CLI_ARGS.osio_lifetime,
active=CLI_ARGS.osio_active_time,
idle=CLI_ARGS.osio_idle_time,
kernel_slots=CLI_ARGS.osio_kernel_slots,
kernel_untar=CLI_ARGS.osio_kernel_untar,
kernel_rm=CLI_ARGS.osio_kernel_rm,
workload_image=CLI_ARGS.osio_image)
dispatch.add(*events)
try:
dispatch.run(runtime=CLI_ARGS.runtime)
except osio.UnhealthyDeployment:
if CLI_ARGS.sleep_on_error:
set_health(False)
logging.info("starting log collection")
log_gather.gather(log_dir)
logging.error("Controller stopped due to detected error")
while CLI_ARGS.sleep_on_error:
time.sleep(9999)
raise
if __name__ == '__main__':
main()