diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index aefc4b8a..d2cf74e3 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -25,6 +25,7 @@ dependencies: - dask >=2.7.0 - distributed >=2.7.0 - dask-jobqueue >=0.8.0 + - dask-kubernetes - uncertainties - openmmtools - pyyaml diff --git a/docs/backends/daskbackends.rst b/docs/backends/daskbackends.rst index 721f845d..5d8c7f5e 100644 --- a/docs/backends/daskbackends.rst +++ b/docs/backends/daskbackends.rst @@ -103,7 +103,7 @@ See the `dask documentation Selecting GPU Platform ---------------------- -The calculation backends alos allows the user to specify the GPU platform and precision level. Users can specify +The calculation backends also allows the user to specify the GPU platform and precision level. Users can specify either ``auto``, ``CUDA`` or ``OpenCL`` as the `preferred_gpu_toolkit` using the ``GPUToolkit`` enum class. The default precision level is set to ``mixed`` but can be overridden by specifying `preferred_gpu_precision` using the ``GPUPrecision`` enum class:: diff --git a/docs/backends/daskkubernetesbackend.rst b/docs/backends/daskkubernetesbackend.rst new file mode 100644 index 00000000..674ce6ed --- /dev/null +++ b/docs/backends/daskkubernetesbackend.rst @@ -0,0 +1,260 @@ +.. |dask_kubernetes_backend| replace:: :py:class:`~openff.evaluator.backends.dask_kubernetes.DaskKubernetesBackend` +.. |dask_kubernetes_existing_backend| replace:: :py:class:`~openff.evaluator.backends.dask_kubernetes.DaskKubernetesExistingBackend` +.. |evaluator_server| replace:: :py:class:`~openff.evaluator.server.EvaluatorServer` +.. |evaluator_client| replace:: :py:class:`~openff.evaluator.client.EvaluatorClient` +.. |pod_resources| replace:: :py:class:`~openff.evaluator.backends.backends.PodResources` +.. |compute_resources| replace:: :py:class:`~openff.evaluator.backends.ComputeResources` +.. |kubernetes_persistent_volume_claim| replace:: :py:class:`~openff.evaluator.backends.dask_kubernetes.KubernetesPersistentVolumeClaim` +.. |kubernetes_secret| replace:: :py:class:`~openff.evaluator.backends.dask_kubernetes.KubernetesSecret` +.. |openmm_simulation| replace:: :py:class:`~openff.evaluator.protocols.openmm.OpenMMSimulation` + + +Dask Kubernetes Backend +======================== + +The framework implements a special set of calculation backends which integrate with the ``dask`` `distributed `_ and `dask-kubernetes `_ libraries. +These backends are designed to run on the `National Research Platform `_ +(NRP) and have not been otherwise tested. + + +Several separate components are required for executing Evaluator on NRP due to the limited user permissions we have: + +* a shared filesystem that is accessible by the |evaluator_server| and the |dask_kubernetes_backend|. + Typically this is constructed with a `PersistentVolumeClaim `_. +* a |dask_kubernetes_backend| that can submit tasks to the Kubernetes cluster. This must be initiated locally with NRP. + The backend must have the PVC mounted. +* an |evaluator_server|, running remotely on a deployment on NRP, that can receive tasks from the local |evaluator_client|. + This needs to connect to the |dask_kubernetes_backend| to submit tasks to the Kubernetes cluster. + If permissions are limited as they are on NRP, you may not be able to create the |dask_kubernetes_backend| remotely. + In that case, you will need a |dask_kubernetes_existing_backend| to connect to an existing KubeCluster. +* the |evaluator_server| port forwarded so a local |evaluator_client| can communicate with the |evaluator_server|. + + +PersistentVolumeClaims in Python +-------------------------------- + +A PVC can be constructed with `this tutorial `_, +or dynamically through Python using the Kubernetes client:: + + import time + from kubernetes import client, config + from openff.units import unit + + core_v1 = client.CoreV1Api() + + # from https://ucsd-prp.gitlab.io/userdocs/storage/ceph/#currently-available-storageclasses + storage_class_name = "rook-cephfs-central" + + # required space to request + storage_space = 1 * unit.gigabytes + + pvc_spec = client.V1PersistentVolumeClaimSpec( + access_modes=["ReadWriteMany"], + storage_class_name=storage_class_name, + resources=client.V1ResourceRequirements( + requests={ + "storage": f"{storage_space.to(unit.gigabytes).m}Gi", + } + ), + ) + + + pvc_name = f"evaluator-storage-{job_name}" + metadata = client.V1ObjectMeta(name=pvc_name) + pvc = client.V1PersistentVolumeClaim( + api_version="v1", + kind="PersistentVolumeClaim", + metadata=metadata, + spec=pvc_spec, + ) + api_response = core_v1.create_namespaced_persistent_volume_claim( + namespace=namespace, + body=pvc + ) + logger.info( + f"Created PVC {pvc.metadata.name}. State={api_response.status.phase}" + ) + + # wait for PVC to bind + timeout = 1000 + end_time = time.time() + timeout + while time.time() < end_time: + pvc = core_v1.read_namespaced_persistent_volume_claim(name=pvc_name, namespace=namespace) + if pvc.status.phase == "Bound": + logger.info(f"PVC '{pvc_name}' is Bound.") + return pvc_name + logger.info(f"Waiting for PVC '{pvc_name}' to become Bound. Current phase: {pvc.status.phase}") + time.sleep(5) + + +Dask Kubernetes Cluster +----------------------- + +The |dask_kubernetes_backend| backend wraps around the dask `Dask KubeCluster `_ +class to distribute tasks on Kubernetes:: + + # replace with own docker image + docker_image = "ghcr.io/lilyminium/openff-images:tmp-evaluator-dask-v2" + cluster_name = "evaluator-cluster" + namespace = "openforcefield" # namespace on NRP + + backend = DaskKubernetesBackend( + cluster_name=cluster_name, + gpu_resources_per_worker=gpu_resources_per_worker, # see below + cpu_resources_per_worker=cpu_resources_per_worker, # see below + image=image, + namespace=namespace, + env={ + "OE_LICENSE": "/secrets/oe_license.txt", + # daemonic processes are not allowed to have children + "DASK_DISTRIBUTED__WORKER__DAEMON": "False", + "DASK_LOGGING__DISTRIBUTED": "debug", + "DASK__TEMPORARY_DIRECTORY": "/evaluator-storage", + "STORAGE_DIRECTORY": "/evaluator-storage", + "EXTRA_PIP_PACKAGES": "jupyterlab" + }, + volumes=[volume], # see below + secrets=[secret], # see below + annotate_resources=True, # see below + cluster_kwargs={"resource_timeout": 300} + ) + + +Specifying pod resources +~~~~~~~~~~~~~~~~~~~~~~~~ + +Pod resources should be specified using |pod_resources|, which works analogously to |compute_resources|, +but encodes settings for Kubernetes pods. For example:: + + from openff.units import unit + + ephemeral_storage = 20 * unit.gigabytes + memory = 8 * unit.gigabytes + + gpu_resources_per_worker=PodResources( + minimum_number_of_workers=0, + maximum_number_of_workers=10, + number_of_threads=1, + memory_limit=memory, + ephemeral_storage_limit=ephemeral_storage, + number_of_gpus=1, + preferred_gpu_toolkit=ComputeResources.GPUToolkit.CUDA, + ) + cpu_resources_per_worker=PodResources( + minimum_number_of_workers=0, + maximum_number_of_workers=40, + number_of_threads=1, + memory_limit=memory, + ephemeral_storage_limit=ephemeral_storage, + number_of_gpus=0, + ) + + +Specifying volumes +~~~~~~~~~~~~~~~~~~ + +Volumes should be specified as a list of |kubernetes_persistent_volume_claim| objects. For example:: + + volume = KubernetesPersistentVolumeClaim( + name="evaluator-storage", # `pvc_name`, the name of the PVC + mount_path="/evaluator-storage", # where to mount the PVC + ) + + +Specifying secrets +~~~~~~~~~~~~~~~~~~ + +Secrets should be specified as a list of |kubernetes_secret| objects. For example:: + + secret = KubernetesSecret( + name="openeye-license", + secret_name="oe-license", + mount_path="/secrets/oe_license.txt", + sub_path="oe_license.txt", + read_only=True, + ) + + +This example of mounting an OpenEye license mounts the ``secret_name`` secret +at the ``mount_path`` path in the pod, at the ``sub_path`` path. + +.. note:: + + A secret should first be created in Kubernetes as following + `the documentation `_. + + +Annotating resources +~~~~~~~~~~~~~~~~~~~~ + +Dask allows you to specify whether tasks require particular +`resources `_ to be available on the worker used +to execute them. Setting ``annotate_resources=True`` will split tasks into those that can only be +executed on GPU workers, and those that can only be executed on CPU workers. +Simulation protocols such as |openmm_simulation| are executed on GPUs, whereas tasks such as packing boxes +are executed on CPUs. Splitting tasks this way will increase the GPU utilization of GPU workers. + +The resources specified are 'GPU' (set to 0.5 per protocol to encourage multiple protocols to run on the same worker), +and 'notGPU' (set to 1 per protocol). Workers are run with either the 'GPU' or 'notGPU' resource, and tasks are +allocated to workers based on the resources they require. + +Setting ``annotate_resources=False`` will allow tasks to be executed on any worker. + + + +Dask Kubernetes Existing Backend +-------------------------------- + +If you are unable to create a |dask_kubernetes_backend| remotely, you can connect to an existing KubeCluster +with the |dask_kubernetes_existing_backend| with the same arguments:: + + from openff.evaluator.backends.dask_kubernetes import DaskKubernetesExistingBackend + + backend = DaskKubernetesExistingBackend( + cluster_name=cluster_name, + gpu_resources_per_worker=gpu_resources_per_worker, + cpu_resources_per_worker=cpu_resources_per_worker, + image=image, + namespace=namespace, + env={ + "OE_LICENSE": "/secrets/oe_license.txt", + # daemonic processes are not allowed to have children + "DASK_DISTRIBUTED__WORKER__DAEMON": "False", + "DASK_LOGGING__DISTRIBUTED": "debug", + "DASK__TEMPORARY_DIRECTORY": "/evaluator-storage", + "STORAGE_DIRECTORY": "/evaluator-storage", + "EXTRA_PIP_PACKAGES": "jupyterlab" + }, + volumes=[volume], + secrets=[secret], + annotate_resources=True, + cluster_kwargs={"resource_timeout": 300} + ) + +Not all of these are important to keep the same, as this cluster simply connects to an +already initialized |dask_kubernetes_backend|. However, the following are important to keep the same: + +* ``cluster_name`` -- for connection +* ``namespace`` -- for connection +* ``gpu_resources_per_worker`` -- the `preferred_gpu_toolkit` is important here, although not the number of workers +* ``volumes`` -- the PVC must be mounted +* ``secrets`` -- an OpenEye license would ideally be mounted +* ``annotate_resources`` -- this controls whether or not to split tasks between GPU/CPU workers + + +Deployment +~~~~~~~~~~ + +The |evaluator_server| can be deployed remotely on NRP with the following command:: + + with backend: + evaluator_server = EvaluatorServer( + backend=backend, + port=port, + debug=True, + ) + evaluator_server.start(asynchronous=False) + +Ideally this should be done on a Kubernetes deployment to ensure the |evaluator_server| is always running. +The |evaluator_server| should be port forwarded to allow ForceBalance to communicate with it on a ``server_port``. \ No newline at end of file diff --git a/docs/examples/kubernetes-run/README.md b/docs/examples/kubernetes-run/README.md new file mode 100644 index 00000000..6a33d02f --- /dev/null +++ b/docs/examples/kubernetes-run/README.md @@ -0,0 +1,31 @@ +# Example Kubernetes run + +This directory contains files for an example Evaluator run on Kubernetes. +Please see [the documentation](https://docs.openforcefield.org/projects/evaluator/en/stable/backends/daskkubernetesbackend.html) for more. + +The `run.py` script runs the following steps: + +1. Create a PersistentVolumeClaim (PVC) for storage. +2. Create a DaskKubernetesBackend, mounting the PVC. This backend runs pods using a pre-built image with Evaluator installed. The spec of the cluster is written out to ``cluster-spec.yaml``. +3. Copies `server-existing.py` to start an EvaluatorServer with the local filesystem storage mounted. +4. Create a Deployment to run the script in step 3. +5. Forward a port from the Deployment to the local machine. +6. Estimates the dataset in `dataset.json` using the EvaluatorClient. + +Output from an example run is captured in `run.log`: + +``` +python run.py > run.log 2>&1 +``` + +## Environments + +An input environment file is provided in ``input-environment.yaml``. +The full environment specification used for the example run is provided in ``full-environment.yaml``. + +## Usage + +Make sure to keep an eye on GPU usage to make sure it's not too low: https://grafana.nrp-nautilus.io/d/dRG9q0Ymz/k8s-compute-resources-namespace-gpus?var-namespace=openforcefield&orgId=1&refresh=30s&from=now-1h&to=now + +Note that the [KubeCluster](https://kubernetes.dask.org/en/latest/operator_kubecluster.html) scales adaptively, so even though a maximum of 10 GPU workers +are requested in the `DaskKubernetesBackend`, only 2 are launched as there are only 2 properties in the dataset. \ No newline at end of file diff --git a/docs/examples/kubernetes-run/cluster-spec.yaml b/docs/examples/kubernetes-run/cluster-spec.yaml new file mode 100644 index 00000000..e068396f --- /dev/null +++ b/docs/examples/kubernetes-run/cluster-spec.yaml @@ -0,0 +1,144 @@ +apiVersion: kubernetes.dask.org/v1 +kind: DaskCluster +metadata: + name: evaluator-lw +spec: + idleTimeout: 0 + scheduler: + service: + ports: + - name: tcp-comm + port: 8786 + protocol: TCP + targetPort: tcp-comm + - name: http-dashboard + port: 8787 + protocol: TCP + targetPort: http-dashboard + selector: + dask.org/cluster-name: evaluator-lw + dask.org/component: scheduler + type: ClusterIP + spec: + containers: + - args: + - dask-scheduler + - --host + - 0.0.0.0 + env: + - name: OE_LICENSE + value: /secrets/oe_license.txt + - name: DASK_DISTRIBUTED__WORKER__DAEMON + value: 'False' + - name: DASK_LOGGING__DISTRIBUTED + value: debug + - name: DASK__TEMPORARY_DIRECTORY + value: /evaluator-storage + - name: STORAGE_DIRECTORY + value: /evaluator-storage + - name: EXTRA_PIP_PACKAGES + value: jupyterlab + image: ghcr.io/lilyminium/openff-images:tmp-evaluator-dask-v2 + livenessProbe: + httpGet: + path: /health + port: http-dashboard + initialDelaySeconds: 15 + periodSeconds: 20 + name: scheduler + ports: + - containerPort: 8786 + name: tcp-comm + protocol: TCP + - containerPort: 8787 + name: http-dashboard + protocol: TCP + readinessProbe: + httpGet: + path: /health + port: http-dashboard + initialDelaySeconds: 0 + periodSeconds: 1 + timeoutSeconds: 3600 + resources: + limits: + cpu: '1' + ephemeral-storage: 20.000Gi + memory: 8.000Gi + requests: + cpu: '1' + ephemeral-storage: 20.000Gi + memory: 8.000Gi + volumeMounts: + - mountPath: /evaluator-storage + name: evaluator-storage-lw + readOnly: false + - mountPath: /secrets/oe_license.txt + name: openeye-license + readOnly: true + subPath: oe_license.txt + volumes: + - name: evaluator-storage-lw + persistentVolumeClaim: + claimName: evaluator-storage-lw + - name: openeye-license + secret: + secretName: oe-license-feb-2024 + worker: + replicas: 0 + spec: + containers: + - args: + - dask-worker + - --name + - $(DASK_WORKER_NAME) + - --dashboard + - --dashboard-address + - '8788' + - --resources + - GPU=1,notGPU=0 + env: + - name: OE_LICENSE + value: /secrets/oe_license.txt + - name: DASK_DISTRIBUTED__WORKER__DAEMON + value: 'False' + - name: DASK_LOGGING__DISTRIBUTED + value: debug + - name: DASK__TEMPORARY_DIRECTORY + value: /evaluator-storage + - name: STORAGE_DIRECTORY + value: /evaluator-storage + - name: EXTRA_PIP_PACKAGES + value: jupyterlab + image: ghcr.io/lilyminium/openff-images:tmp-evaluator-dask-v2 + name: worker + ports: + - containerPort: 8788 + name: http-dashboard + protocol: TCP + resources: + limits: + cpu: '1' + ephemeral-storage: 20.000Gi + memory: 8.000Gi + nvidia.com/gpu: '1' + requests: + cpu: '1' + ephemeral-storage: 20.000Gi + memory: 8.000Gi + nvidia.com/gpu: '1' + volumeMounts: + - mountPath: /evaluator-storage + name: evaluator-storage-lw + readOnly: false + - mountPath: /secrets/oe_license.txt + name: openeye-license + readOnly: true + subPath: oe_license.txt + volumes: + - name: evaluator-storage-lw + persistentVolumeClaim: + claimName: evaluator-storage-lw + - name: openeye-license + secret: + secretName: oe-license-feb-2024 diff --git a/docs/examples/kubernetes-run/dataset.json b/docs/examples/kubernetes-run/dataset.json new file mode 100644 index 00000000..cf881ce7 --- /dev/null +++ b/docs/examples/kubernetes-run/dataset.json @@ -0,0 +1,133 @@ +{ + "@type": "openff.evaluator.datasets.datasets.PhysicalPropertyDataSet", + "properties": [ + { + "@type": "openff.evaluator.properties.density.Density", + "gradients": [], + "id": "6385", + "phase": 2, + "source": { + "@type": "openff.evaluator.datasets.provenance.MeasurementSource", + "doi": "10.1016/j.fluid.2010.05.001", + "reference": "" + }, + "substance": { + "@type": "openff.evaluator.substances.substances.Substance", + "amounts": { + "CN1CCCCC1{solv}": [ + { + "@type": "openff.evaluator.substances.amounts.MoleFraction", + "value": 0.2452 + } + ], + "O{solv}": [ + { + "@type": "openff.evaluator.substances.amounts.MoleFraction", + "value": 0.7548 + } + ] + }, + "components": [ + { + "@type": "openff.evaluator.substances.components.Component", + "role": { + "@type": "openff.evaluator.substances.components.Component.Role", + "value": "solv" + }, + "smiles": "CN1CCCCC1" + }, + { + "@type": "openff.evaluator.substances.components.Component", + "role": { + "@type": "openff.evaluator.substances.components.Component.Role", + "value": "solv" + }, + "smiles": "O" + } + ] + }, + "thermodynamic_state": { + "@type": "openff.evaluator.thermodynamics.ThermodynamicState", + "pressure": { + "@type": "openff.evaluator.unit.Quantity", + "unit": "kilopascal", + "value": 101.0 + }, + "temperature": { + "@type": "openff.evaluator.unit.Quantity", + "unit": "kelvin", + "value": 298.15 + } + }, + "value": { + "@type": "openff.evaluator.unit.Quantity", + "unit": "gram / milliliter", + "value": 0.9038110000000004 + } + }, + { + "@type": "openff.evaluator.properties.density.Density", + "gradients": [], + "id": "6386", + "phase": 2, + "source": { + "@type": "openff.evaluator.datasets.provenance.MeasurementSource", + "doi": "10.1016/j.fluid.2010.05.001", + "reference": "" + }, + "substance": { + "@type": "openff.evaluator.substances.substances.Substance", + "amounts": { + "CN1CCCCC1{solv}": [ + { + "@type": "openff.evaluator.substances.amounts.MoleFraction", + "value": 0.505 + } + ], + "O{solv}": [ + { + "@type": "openff.evaluator.substances.amounts.MoleFraction", + "value": 0.495 + } + ] + }, + "components": [ + { + "@type": "openff.evaluator.substances.components.Component", + "role": { + "@type": "openff.evaluator.substances.components.Component.Role", + "value": "solv" + }, + "smiles": "CN1CCCCC1" + }, + { + "@type": "openff.evaluator.substances.components.Component", + "role": { + "@type": "openff.evaluator.substances.components.Component.Role", + "value": "solv" + }, + "smiles": "O" + } + ] + }, + "thermodynamic_state": { + "@type": "openff.evaluator.thermodynamics.ThermodynamicState", + "pressure": { + "@type": "openff.evaluator.unit.Quantity", + "unit": "kilopascal", + "value": 101.0 + }, + "temperature": { + "@type": "openff.evaluator.unit.Quantity", + "unit": "kelvin", + "value": 298.15 + } + }, + "value": { + "@type": "openff.evaluator.unit.Quantity", + "unit": "gram / milliliter", + "value": 0.8581580000000002 + } + } + ] + } \ No newline at end of file diff --git a/docs/examples/kubernetes-run/full-environment.yaml b/docs/examples/kubernetes-run/full-environment.yaml new file mode 100644 index 00000000..c581dd82 --- /dev/null +++ b/docs/examples/kubernetes-run/full-environment.yaml @@ -0,0 +1,429 @@ +name: evaluator-test-env +channels: +- conda-forge +- openeye +dependencies: +- aiohappyeyeballs=2.4.0=pyhd8ed1ab_0 +- aiohttp=3.10.5=py311h460d6c5_1 +- aiosignal=1.3.1=pyhd8ed1ab_0 +- annotated-types=0.7.0=pyhd8ed1ab_0 +- anyio=4.4.0=pyhd8ed1ab_0 +- appnope=0.1.4=pyhd8ed1ab_0 +- argon2-cffi=23.1.0=pyhd8ed1ab_0 +- argon2-cffi-bindings=21.2.0=py311h460d6c5_5 +- arpack=3.9.1=nompi_h593882a_101 +- arrow=1.3.0=pyhd8ed1ab_0 +- asttokens=2.4.1=pyhd8ed1ab_0 +- astunparse=1.6.3=pyhd8ed1ab_0 +- async-lru=2.0.4=pyhd8ed1ab_0 +- asyncache=0.3.1=pyhd8ed1ab_0 +- attrs=24.2.0=pyh71513ae_0 +- aws-c-auth=0.7.26=h1e647a1_2 +- aws-c-cal=0.7.4=h41e72e7_0 +- aws-c-common=0.9.27=h99b78c6_0 +- aws-c-compression=0.2.19=h41e72e7_0 +- aws-c-event-stream=0.4.3=h79ff00d_0 +- aws-c-http=0.8.8=h69517e7_1 +- aws-c-io=0.14.18=h20e6805_7 +- aws-c-mqtt=0.10.4=h3e8bf47_18 +- aws-c-s3=0.6.4=he4d1bc2_11 +- aws-c-sdkutils=0.1.19=h85401af_2 +- aws-checksums=0.1.18=h85401af_10 +- aws-crt-cpp=0.28.2=h46cb957_0 +- aws-sdk-cpp=1.11.379=h8d911dc_8 +- azure-core-cpp=1.13.0=hd01fc5c_0 +- azure-identity-cpp=1.8.0=h13ea094_2 +- azure-storage-blobs-cpp=12.12.0=hfde595f_0 +- azure-storage-common-cpp=12.7.0=hcf3b6fd_1 +- azure-storage-files-datalake-cpp=12.11.0=h082e32e_1 +- babel=2.14.0=pyhd8ed1ab_0 +- beautifulsoup4=4.12.3=pyha770c72_0 +- bleach=6.1.0=pyhd8ed1ab_0 +- blinker=1.8.2=pyhd8ed1ab_0 +- blosc=1.21.6=h5499902_0 +- bokeh=3.5.2=pyhd8ed1ab_0 +- boltons=24.0.0=pyhd8ed1ab_0 +- brotli=1.1.0=hd74edd7_2 +- brotli-bin=1.1.0=hd74edd7_2 +- brotli-python=1.1.0=py311h3f08180_2 +- bson=0.5.9=py_0 +- bzip2=1.0.8=h99b78c6_7 +- c-ares=1.33.1=hd74edd7_0 +- c-blosc2=2.15.1=h5063078_0 +- ca-certificates=2024.8.30=hf0a4a13_0 +- cached-property=1.5.2=hd8ed1ab_1 +- cached_property=1.5.2=pyha770c72_1 +- cachetools=5.5.0=pyhd8ed1ab_0 +- cairo=1.18.0=hb4a6bf7_3 +- certifi=2024.8.30=pyhd8ed1ab_0 +- cffi=1.17.0=py311h3a79f62_1 +- cftime=1.6.4=py311h0f07fe1_1 +- chardet=5.2.0=py311h267d04e_2 +- charset-normalizer=3.3.2=pyhd8ed1ab_0 +- click=8.1.7=unix_pyh707e725_0 +- cloudpickle=3.0.0=pyhd8ed1ab_0 +- colorama=0.4.6=pyhd8ed1ab_0 +- comm=0.2.2=pyhd8ed1ab_0 +- contourpy=1.3.0=py311h2c37856_1 +- coverage=7.6.1=py311h460d6c5_1 +- cryptography=43.0.1=py311h47c44cf_0 +- cycler=0.12.1=pyhd8ed1ab_0 +- cytoolz=0.12.3=py311h05b510d_0 +- dask=2024.2.1=pyhd8ed1ab_0 +- dask-core=2024.2.1=pyhd8ed1ab_1 +- dask-jobqueue=0.9.0=pyhd8ed1ab_0 +- dask-kubernetes=2024.9.0=pyhd8ed1ab_0 +- dataclasses=0.8=pyhc8e2a94_3 +- debugpy=1.8.5=py311h3f08180_1 +- decorator=5.1.1=pyhd8ed1ab_0 +- defusedxml=0.7.1=pyhd8ed1ab_0 +- distributed=2024.2.1=pyhd8ed1ab_0 +- ele=0.2.0=pyhd8ed1ab_0 +- entrypoints=0.4=pyhd8ed1ab_0 +- exceptiongroup=1.2.2=pyhd8ed1ab_0 +- execnet=2.1.1=pyhd8ed1ab_0 +- executing=2.1.0=pyhd8ed1ab_0 +- expat=2.6.2=hebf3989_0 +- fftw=3.3.10=nompi_h6637ab6_110 +- font-ttf-dejavu-sans-mono=2.37=hab24e00_0 +- font-ttf-inconsolata=3.000=h77eed37_0 +- font-ttf-source-code-pro=2.038=h77eed37_0 +- font-ttf-ubuntu=0.83=h77eed37_2 +- fontconfig=2.14.2=h82840c6_0 +- fonts-conda-ecosystem=1=0 +- fonts-conda-forge=1=0 +- fonttools=4.53.1=py311h460d6c5_1 +- forcebalance=1.9.6=py311hd6dc194_2 +- forcefield-utilities=0.3.0=pyhd8ed1ab_0 +- foyer=1.0.0=pyhd8ed1ab_0 +- fqdn=1.5.1=pyhd8ed1ab_0 +- freetype=2.12.1=hadb7bae_2 +- freetype-py=2.3.0=pyhd8ed1ab_0 +- frozenlist=1.4.1=py311h460d6c5_1 +- fsspec=2024.6.1=pyhff2d567_0 +- future=1.0.0=pyhd8ed1ab_0 +- gf2x=1.3.0=hdaa854c_2 +- gflags=2.2.2=hc88da5d_1004 +- glog=0.7.1=heb240a5_0 +- gmp=6.3.0=h7bae524_2 +- gmpy2=2.1.5=py311hb5ce3a2_2 +- gmso=0.11.2=pyhd8ed1ab_0 +- google-auth=2.34.0=pyhff2d567_0 +- greenlet=3.0.3=py311h92babd0_0 +- h11=0.14.0=pyhd8ed1ab_0 +- h2=4.1.0=pyhd8ed1ab_0 +- hdf4=4.2.15=h2ee6834_7 +- hdf5=1.14.3=nompi_hec07895_105 +- hpack=4.0.0=pyh9f0ad1d_0 +- httpcore=1.0.5=pyhd8ed1ab_0 +- httpx=0.27.2=pyhd8ed1ab_0 +- httpx-ws=0.6.0=pyhd8ed1ab_0 +- hyperframe=6.0.1=pyhd8ed1ab_0 +- icu=75.1=hfee45f7_0 +- idna=3.8=pyhd8ed1ab_0 +- importlib-metadata=8.4.0=pyha770c72_0 +- importlib-resources=6.4.4=pyhd8ed1ab_0 +- importlib_metadata=8.4.0=hd8ed1ab_0 +- importlib_resources=6.4.4=pyhd8ed1ab_0 +- iniconfig=2.0.0=pyhd8ed1ab_0 +- ipykernel=6.29.5=pyh57ce528_0 +- ipython=8.27.0=pyh707e725_0 +- ipywidgets=8.1.5=pyhd8ed1ab_0 +- iso8601=2.1.0=pyhd8ed1ab_0 +- isoduration=20.11.0=pyhd8ed1ab_0 +- jax=0.4.31=pyhd8ed1ab_1 +- jaxlib=0.4.31=cpu_py311hd54497d_1 +- jedi=0.19.1=pyhd8ed1ab_0 +- jinja2=3.1.4=pyhd8ed1ab_0 +- joblib=1.4.2=pyhd8ed1ab_0 +- json5=0.9.25=pyhd8ed1ab_0 +- jsonpath-ng=1.6.1=pyhd8ed1ab_0 +- jsonpointer=3.0.0=py311h267d04e_1 +- jsonschema=4.23.0=pyhd8ed1ab_0 +- jsonschema-specifications=2023.12.1=pyhd8ed1ab_0 +- jsonschema-with-format-nongpl=4.23.0=hd8ed1ab_0 +- jupyter-lsp=2.2.5=pyhd8ed1ab_0 +- jupyter_client=8.6.2=pyhd8ed1ab_0 +- jupyter_core=5.7.2=py311h267d04e_0 +- jupyter_events=0.10.0=pyhd8ed1ab_0 +- jupyter_server=2.14.2=pyhd8ed1ab_0 +- jupyter_server_terminals=0.5.3=pyhd8ed1ab_0 +- jupyterlab=4.2.5=pyhd8ed1ab_0 +- jupyterlab_pygments=0.3.0=pyhd8ed1ab_1 +- jupyterlab_server=2.27.3=pyhd8ed1ab_0 +- jupyterlab_widgets=3.0.13=pyhd8ed1ab_0 +- khronos-opencl-icd-loader=2023.04.17=hf50ae52_1 +- kiwisolver=1.4.5=py311h2c37856_2 +- kopf=1.37.2=pyhd8ed1ab_0 +- kr8s=0.17.4=pyhd8ed1ab_0 +- krb5=1.21.3=h237132a_0 +- kubernetes=1.31.1=hce30654_0 +- kubernetes-client=1.31.1=h36934fe_0 +- kubernetes-node=1.31.1=h36934fe_0 +- kubernetes-server=1.31.1=h36934fe_0 +- kubernetes_asyncio=31.1.0=pyhd8ed1ab_0 +- lark=1.2.2=pyhd8ed1ab_0 +- lcms2=2.16=ha0e7c42_0 +- lerc=4.0.0=h9a09cb3_0 +- libabseil=20240116.2=cxx17_h00cdb27_1 +- libaec=1.1.3=hebf3989_0 +- libarrow=17.0.0=h20538ec_13_cpu +- libarrow-acero=17.0.0=hf9b8971_13_cpu +- libarrow-dataset=17.0.0=hf9b8971_13_cpu +- libarrow-substrait=17.0.0=hbf8b706_13_cpu +- libblas=3.9.0=23_osxarm64_openblas +- libboost=1.84.0=h29978a0_6 +- libboost-python=1.84.0=py311h225995f_6 +- libbrotlicommon=1.1.0=hd74edd7_2 +- libbrotlidec=1.1.0=hd74edd7_2 +- libbrotlienc=1.1.0=hd74edd7_2 +- libcblas=3.9.0=23_osxarm64_openblas +- libcrc32c=1.1.2=hbdafb3b_0 +- libcurl=8.9.1=hfd8ffcc_0 +- libcxx=18.1.8=h3ed4263_7 +- libdeflate=1.21=h99b78c6_0 +- libedit=3.1.20191231=hc8eb9b7_2 +- libev=4.33=h93a5062_2 +- libevent=2.1.12=h2757513_1 +- libexpat=2.6.2=hebf3989_0 +- libffi=3.4.2=h3422bc3_5 +- libflint=2.9.0=h587781b_ntl_100 +- libgfortran=5.0.0=13_2_0_hd922786_3 +- libgfortran5=13.2.0=hf226fd6_3 +- libglib=2.80.3=h59d46d9_2 +- libgoogle-cloud=2.28.0=hfe08963_0 +- libgoogle-cloud-storage=2.28.0=h1466eeb_0 +- libgrpc=1.62.2=h9c18a4f_0 +- libiconv=1.17=h0d3ecfb_2 +- libintl=0.22.5=h8414b35_3 +- libjpeg-turbo=3.0.0=hb547adb_1 +- liblapack=3.9.0=23_osxarm64_openblas +- libllvm14=14.0.6=hd1a9a77_4 +- libnetcdf=4.9.2=nompi_he469be0_114 +- libnghttp2=1.58.0=ha4dd798_1 +- libopenblas=0.3.27=openmp_h517c56d_1 +- libparquet=17.0.0=hf0ba9ef_13_cpu +- libpng=1.6.43=h091b4b1_0 +- libpq=16.4=h671472c_1 +- libprotobuf=4.25.3=hbfab5d5_0 +- librdkit=2024.03.5=h54a62e4_3 +- libre2-11=2023.09.01=h7b2c953_2 +- libsodium=1.0.18=h27ca646_1 +- libsqlite=3.46.1=hc14010f_0 +- libssh2=1.11.0=h7a5bd25_0 +- libthrift=0.20.0=h64651cc_1 +- libtiff=4.6.0=hf8409c0_4 +- libutf8proc=2.8.0=h1a8c8d9_0 +- libwebp-base=1.4.0=h93a5062_0 +- libxcb=1.16=hc9fafa5_1 +- libxml2=2.12.7=h01dff8b_4 +- libxslt=1.1.39=h223e5b9_0 +- libzip=1.10.1=ha0bc3c6_3 +- libzlib=1.3.1=hfb2fe0b_1 +- llvm-openmp=18.1.8=hde57baf_1 +- llvmlite=0.43.0=py311hc367efa_1 +- locket=1.0.0=pyhd8ed1ab_0 +- lxml=5.3.0=py311hd29ea9b_0 +- lz4=4.3.3=py311hebe0b09_1 +- lz4-c=1.9.4=hb7217d7_0 +- markdown-it-py=3.0.0=pyhd8ed1ab_0 +- markupsafe=2.1.5=py311h460d6c5_1 +- matplotlib-base=3.9.2=py311hba6b155_0 +- matplotlib-inline=0.1.7=pyhd8ed1ab_0 +- mda-xdrlib=0.2.0=pyhd8ed1ab_0 +- mdtraj=1.10.0=py311habad988_0 +- mdurl=0.1.2=pyhd8ed1ab_0 +- mistune=3.0.2=pyhd8ed1ab_0 +- ml_dtypes=0.4.0=py311h4b4568b_1 +- mpc=1.3.1=h91ba8db_0 +- mpfr=4.2.1=h1cfca0a_2 +- mpiplus=v0.0.2=pyhd8ed1ab_0 +- mpmath=1.3.0=pyhd8ed1ab_0 +- msgpack-python=1.0.8=py311h6bde47b_0 +- multidict=6.1.0=py311h426a4a9_0 +- munkres=1.1.4=pyh9f0ad1d_0 +- nbclient=0.10.0=pyhd8ed1ab_0 +- nbconvert-core=7.16.4=pyhd8ed1ab_1 +- nbformat=5.10.4=pyhd8ed1ab_0 +- nbval=0.11.0=pyhd8ed1ab_0 +- ncurses=6.5=h7bae524_1 +- nest-asyncio=1.6.0=pyhd8ed1ab_0 +- netcdf-fortran=4.6.1=nompi_hb210b0d_105 +- netcdf4=1.7.1=nompi_py311h42682c7_101 +- networkx=3.3=pyhd8ed1ab_1 +- nose=1.3.7=py_1006 +- notebook=7.2.2=pyhd8ed1ab_0 +- notebook-shim=0.2.4=pyhd8ed1ab_0 +- ntl=11.4.3=hbb3f309_1 +- numba=0.60.0=py311h9506ed5_0 +- numexpr=2.10.0=py311h4b4568b_0 +- numpy=1.26.4=py311h7125741_0 +- oauthlib=3.2.2=pyhd8ed1ab_0 +- ocl_icd_wrapper_apple=1.0.0=h27ca646_0 +- openeye-toolkits=2024.1.1=py311_0 +- openff-amber-ff-ports=0.0.4=pyhca7485f_0 +- openff-forcefields=2024.07.0=pyhff2d567_0 +- openff-interchange-base=0.3.29=pyhd8ed1ab_0 +- openff-models=0.1.2=pyhca7485f_0 +- openff-toolkit-base=0.16.4=pyhd8ed1ab_0 +- openff-units=0.2.2=pyhca7485f_0 +- openff-utilities=0.1.12=pyhd8ed1ab_0 +- openjpeg=2.5.2=h9f1df11_0 +- openmm=8.1.2=py311h778421a_2_khronos +- openmmtools=0.23.1=pyhd8ed1ab_0 +- openssl=3.3.2=h8359307_0 +- opt-einsum=3.3.0=hd8ed1ab_2 +- opt_einsum=3.3.0=pyhc1e730c_2 +- orc=2.0.2=h75dedd0_0 +- overrides=7.7.0=pyhd8ed1ab_0 +- packaging=24.1=pyhd8ed1ab_0 +- packmol=20.15.1=h4c1867c_1 +- pandas=1.5.3=py311h4eec4a9_1 +- pandocfilters=1.5.0=pyhd8ed1ab_0 +- panedr=0.8.0=pyhd8ed1ab_0 +- paprika=0.0.4=py_0 +- parmed=4.2.2=py311h92babd0_1 +- parso=0.8.4=pyhd8ed1ab_0 +- partd=1.4.2=pyhd8ed1ab_0 +- patsy=1.0.1=pyhd8ed1ab_1 +- pcre2=10.44=h297a79d_2 +- pdbfixer=1.9=pyh1a96a4e_0 +- perl=5.32.1=7_h4614cfb_perl5 +- pexpect=4.9.0=pyhd8ed1ab_0 +- pickleshare=0.7.5=py_1003 +- pillow=10.4.0=py311hd7951ec_0 +- pint=0.23=pyhd8ed1ab_1 +- pip=24.2=pyh8b19718_1 +- pixman=0.43.4=hebf3989_0 +- pkgutil-resolve-name=1.3.10=pyhd8ed1ab_1 +- platformdirs=4.2.2=pyhd8ed1ab_0 +- pluggy=1.5.0=pyhd8ed1ab_0 +- ply=3.11=pyhd8ed1ab_2 +- prometheus_client=0.20.0=pyhd8ed1ab_0 +- prompt-toolkit=3.0.47=pyha770c72_0 +- psutil=6.0.0=py311hd3f4193_0 +- pthread-stubs=0.4=h27ca646_1001 +- ptyprocess=0.7.0=pyhd3deb0d_0 +- pure_eval=0.2.3=pyhd8ed1ab_0 +- py-cpuinfo=9.0.0=pyhd8ed1ab_0 +- pyarrow=17.0.0=py311h35c05fe_1 +- pyarrow-core=17.0.0=py311he42f270_1_cpu +- pyarrow-hotfix=0.6=pyhd8ed1ab_0 +- pyasn1=0.6.1=pyhd8ed1ab_0 +- pyasn1-modules=0.4.1=pyhd8ed1ab_0 +- pycairo=1.26.1=py311h84a5a08_1 +- pycparser=2.22=pyhd8ed1ab_0 +- pydantic=2.8.2=pyhd8ed1ab_0 +- pydantic-core=2.20.1=py311h98c6a39_0 +- pyedr=0.8.0=pyhd8ed1ab_0 +- pygments=2.18.0=pyhd8ed1ab_0 +- pyjwt=2.9.0=pyhd8ed1ab_1 +- pykube-ng=23.6.0=pyhd8ed1ab_0 +- pymbar=3.1.1=py311hed53245_3 +- pymbar-core=4.0.3=py311h9ea6feb_1 +- pyobjc-core=10.3.1=py311h5f135c3_0 +- pyobjc-framework-cocoa=10.3.1=py311h5f135c3_0 +- pyopenssl=24.2.1=pyhd8ed1ab_2 +- pyparsing=3.1.4=pyhd8ed1ab_0 +- pysocks=1.7.1=pyha2e5f31_6 +- pytables=3.10.1=py311h95628f6_1 +- pytest=8.3.2=pyhd8ed1ab_0 +- pytest-cov=5.0.0=pyhd8ed1ab_0 +- pytest-randomly=3.15.0=pyhd8ed1ab_0 +- pytest-xdist=3.6.1=pyhd8ed1ab_0 +- python=3.11.9=h932a869_0_cpython +- python-box=7.2.0=py311hd3f4193_0 +- python-constraint=1.4.0=py_0 +- python-dateutil=2.9.0=pyhd8ed1ab_0 +- python-fastjsonschema=2.20.0=pyhd8ed1ab_0 +- python-json-logger=2.0.7=pyhd8ed1ab_0 +- python-jsonpath=1.2.0=pyhd8ed1ab_0 +- python-kubernetes=27.2.0=pyhd8ed1ab_0 +- python-symengine=0.11.0=py311h2031a10_1 +- python_abi=3.11=5_cp311 +- pytz=2024.1=pyhd8ed1ab_0 +- pyu2f=0.1.5=pyhd8ed1ab_0 +- pyyaml=6.0.2=py311hd3f4193_0 +- pyzmq=26.2.0=py311h137d824_0 +- qhull=2020.2=h420ef59_5 +- rdkit=2024.03.5=py311h8a4e316_3 +- re2=2023.09.01=h4cba328_2 +- readline=8.2=h92ec313_1 +- red-molsim=0.1.1=pyhd8ed1ab_0 +- referencing=0.35.1=pyhd8ed1ab_0 +- reportlab=4.2.2=py311hd3f4193_0 +- requests=2.32.3=pyhd8ed1ab_0 +- requests-mock=1.12.1=pyhd8ed1ab_0 +- requests-oauthlib=2.0.0=pyhd8ed1ab_0 +- rfc3339-validator=0.1.4=pyhd8ed1ab_0 +- rfc3986-validator=0.1.1=pyh9f0ad1d_0 +- rich=13.8.1=pyhd8ed1ab_0 +- rlpycairo=0.2.0=pyhd8ed1ab_0 +- rpds-py=0.20.0=py311h481aa64_1 +- rsa=4.9=pyhd8ed1ab_0 +- ruamel.yaml=0.18.6=py311h05b510d_0 +- ruamel.yaml.clib=0.2.8=py311h05b510d_0 +- scipy=1.14.1=py311h2929bc6_0 +- send2trash=1.8.3=pyh31c8845_0 +- setuptools=73.0.1=pyhd8ed1ab_0 +- six=1.16.0=pyh6c4a22f_0 +- smirnoff99frosst=1.1.0=pyh44b312d_0 +- snappy=1.2.1=hd02b534_0 +- sniffio=1.3.1=pyhd8ed1ab_0 +- sortedcontainers=2.4.0=pyhd8ed1ab_0 +- soupsieve=2.5=pyhd8ed1ab_1 +- sqlalchemy=2.0.32=py311hd3f4193_0 +- stack_data=0.6.2=pyhd8ed1ab_0 +- statsmodels=0.14.4=py311h0f07fe1_0 +- symengine=0.11.2=h417e0f7_1 +- sympy=1.13.2=pypyh2585a3b_103 +- taproom=0.2=pyh9f0ad1d_0 +- tblib=3.0.0=pyhd8ed1ab_0 +- terminado=0.18.1=pyh31c8845_0 +- tinycss2=1.3.0=pyhd8ed1ab_0 +- tk=8.6.13=h5083fa2_1 +- toml=0.10.2=pyhd8ed1ab_0 +- tomli=2.0.1=pyhd8ed1ab_0 +- toolz=0.12.1=pyhd8ed1ab_0 +- tornado=6.4.1=py311h460d6c5_1 +- tqdm=4.66.5=pyhd8ed1ab_0 +- traitlets=5.14.3=pyhd8ed1ab_0 +- types-python-dateutil=2.9.0.20240821=pyhd8ed1ab_0 +- typing-extensions=4.12.2=hd8ed1ab_0 +- typing_extensions=4.12.2=pyha770c72_0 +- typing_utils=0.1.0=pyhd8ed1ab_0 +- tzdata=2024a=h8827d51_1 +- uncertainties=3.2.2=pyhd8ed1ab_1 +- unyt=2.9.2=pyhd8ed1ab_1 +- uri-template=1.3.0=pyhd8ed1ab_0 +- urllib3=2.2.2=pyhd8ed1ab_1 +- wcwidth=0.2.13=pyhd8ed1ab_0 +- webcolors=24.8.0=pyhd8ed1ab_0 +- webencodings=0.5.1=pyhd8ed1ab_2 +- websocket-client=1.8.0=pyhd8ed1ab_0 +- wheel=0.44.0=pyhd8ed1ab_0 +- widgetsnbextension=4.0.13=pyhd8ed1ab_0 +- wsproto=1.2.0=pyhd8ed1ab_0 +- xmltodict=0.13.0=pyhd8ed1ab_0 +- xorg-kbproto=1.0.7=h27ca646_1002 +- xorg-libice=1.1.1=hb547adb_0 +- xorg-libsm=1.2.4=hb547adb_0 +- xorg-libx11=1.8.9=he5f3e76_1 +- xorg-libxau=1.0.11=hb547adb_0 +- xorg-libxdmcp=1.1.3=h27ca646_0 +- xorg-libxext=1.3.4=h1a8c8d9_2 +- xorg-libxt=1.3.0=hb547adb_1 +- xorg-xextproto=7.3.0=h1a8c8d9_1003 +- xorg-xproto=7.0.31=h27ca646_1007 +- xyzservices=2024.9.0=pyhd8ed1ab_0 +- xz=5.2.6=h57fd34a_0 +- yaml=0.2.5=h3422bc3_2 +- yarl=1.9.4=py311h460d6c5_1 +- zeromq=4.3.5=hcc0f68c_4 +- zict=3.0.0=pyhd8ed1ab_0 +- zipp=3.20.1=pyhd8ed1ab_0 +- zlib=1.3.1=hfb2fe0b_1 +- zlib-ng=2.2.1=h00cdb27_0 +- zstandard=0.18.0=py311he2be06e_1 +- zstd=1.5.6=hb46c0d2_0 + diff --git a/docs/examples/kubernetes-run/input-environment.yaml b/docs/examples/kubernetes-run/input-environment.yaml new file mode 100644 index 00000000..23ddf13f --- /dev/null +++ b/docs/examples/kubernetes-run/input-environment.yaml @@ -0,0 +1,40 @@ +name: evaluator-test-env +channels: +- conda-forge +- openeye # optional, for openeye +dependencies: + # Base depends + - python + - pip + + # Testing and development + - pytest + - pytest-cov + - pytest-xdist + - pytest-randomly + - nbval + - requests-mock # For testing http requests. + - foyer + + # Shim + - pint >=0.21,<0.25 + - click + + # Standard dependencies + - openeye-toolkits # optional, for openeye + - openff-toolkit >=0.14.3 + - openmm + - pymbar + - dask >=2.7.0 + - distributed >=2.7.0 + - dask-jobqueue >=0.8.0 + - dask-kubernetes + - uncertainties + - openmmtools + - pyyaml + - requests + - python-dateutil + - pydantic >=1.10.17,<3 + - taproom + - dataclasses + - pandas =1 diff --git a/docs/examples/kubernetes-run/results.json b/docs/examples/kubernetes-run/results.json new file mode 100644 index 00000000..b842304a --- /dev/null +++ b/docs/examples/kubernetes-run/results.json @@ -0,0 +1 @@ +{"queued_properties": {"properties": [], "@type": "openff.evaluator.datasets.datasets.PhysicalPropertyDataSet"}, "estimated_properties": {"properties": [{"id": "6385", "substance": {"components": [{"smiles": "CN1CCCCC1", "role": {"value": "solv", "@type": "openff.evaluator.substances.components.Component.Role"}, "@type": "openff.evaluator.substances.components.Component"}, {"smiles": "O", "role": {"value": "solv", "@type": "openff.evaluator.substances.components.Component.Role"}, "@type": "openff.evaluator.substances.components.Component"}], "amounts": {"CN1CCCCC1{solv}": [{"value": 0.2452, "@type": "openff.evaluator.substances.amounts.MoleFraction"}], "O{solv}": [{"value": 0.7548, "@type": "openff.evaluator.substances.amounts.MoleFraction"}]}, "@type": "openff.evaluator.substances.substances.Substance"}, "phase": 2, "thermodynamic_state": {"temperature": {"value": 298.15, "unit": "kelvin", "@type": "openff.evaluator.unit.Quantity"}, "pressure": {"value": 101.0, "unit": "kilopascal", "@type": "openff.evaluator.unit.Quantity"}, "@type": "openff.evaluator.thermodynamics.ThermodynamicState"}, "value": {"value": 0.8915693540345646, "unit": "gram / milliliter", "@type": "openff.evaluator.unit.Quantity"}, "uncertainty": {"value": 0.0006464409320249381, "unit": "gram / milliliter", "@type": "openff.evaluator.unit.Quantity"}, "source": {"fidelity": "SimulationLayer", "provenance": "{\"protocol_schemas\": [{\"id\": \"6385|build_coordinates\", \"type\": \"BuildCoordinatesPackmol\", \"inputs\": {\".allow_merging\": true, \".max_molecules\": 256, \".count_exact_amount\": true, \".mass_density\": {\"value\": 0.95, \"unit\": \"gram / milliliter\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \".box_aspect_ratio\": [1.0, 1.0, 1.0], \".substance\": {\"components\": [{\"smiles\": \"CN1CCCCC1\", \"role\": {\"value\": \"solv\", \"@type\": \"openff.evaluator.substances.components.Component.Role\"}, \"@type\": \"openff.evaluator.substances.components.Component\"}, {\"smiles\": \"O\", \"role\": {\"value\": \"solv\", \"@type\": \"openff.evaluator.substances.components.Component.Role\"}, \"@type\": \"openff.evaluator.substances.components.Component\"}], \"amounts\": {\"CN1CCCCC1{solv}\": [{\"value\": 0.2452, \"@type\": \"openff.evaluator.substances.amounts.MoleFraction\"}], \"O{solv}\": [{\"value\": 0.7548, \"@type\": \"openff.evaluator.substances.amounts.MoleFraction\"}]}, \"@type\": \"openff.evaluator.substances.substances.Substance\"}, \".tolerance\": {\"value\": 2.0, \"unit\": \"angstrom\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \".verbose_packmol\": false, \".retain_packmol_files\": false}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolSchema\"}, {\"id\": \"6385|energy_minimisation\", \"type\": \"OpenMMEnergyMinimisation\", \"inputs\": {\".allow_merging\": true, \".input_coordinate_file\": {\"full_path\": \"6385|build_coordinates.coordinate_file_path\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".parameterized_system\": {\"full_path\": \"6385|assign_parameters.parameterized_system\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".tolerance\": {\"value\": 10.0, \"unit\": \"kilojoules_per_mole / nanometer\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \".max_iterations\": 0, \".enable_pbc\": true}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolSchema\"}, {\"id\": \"6385|equilibration_simulation\", \"type\": \"OpenMMSimulation\", \"inputs\": {\".allow_merging\": true, \".steps_per_iteration\": 100000, \".total_number_of_iterations\": 1, \".output_frequency\": 5000, \".checkpoint_frequency\": 10, \".timestep\": {\"value\": 2.0, \"unit\": \"femtosecond\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \".thermodynamic_state\": {\"temperature\": {\"value\": 298.15, \"unit\": \"kelvin\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \"pressure\": {\"value\": 101.0, \"unit\": \"kilopascal\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \"@type\": \"openff.evaluator.thermodynamics.ThermodynamicState\"}, \".ensemble\": {\"value\": \"NPT\", \"@type\": \"openff.evaluator.thermodynamics.Ensemble\"}, \".thermostat_friction\": {\"value\": 1.0, \"unit\": \"1 / picosecond\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \".input_coordinate_file\": {\"full_path\": \"6385|energy_minimisation.output_coordinate_file\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".parameterized_system\": {\"full_path\": \"6385|assign_parameters.parameterized_system\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".enable_pbc\": true, \".allow_gpu_platforms\": true, \".high_precision\": false, \".gradient_parameters\": []}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolSchema\"}, {\"id\": \"6385|conditional_group\", \"type\": \"ConditionalGroup\", \"inputs\": {\".allow_merging\": true, \".conditions\": [], \".max_iterations\": 100}, \"protocol_schemas\": {\"6385|production_simulation\": {\"id\": \"6385|production_simulation\", \"type\": \"OpenMMSimulation\", \"inputs\": {\".allow_merging\": true, \".steps_per_iteration\": 1000000, \".total_number_of_iterations\": 1, \".output_frequency\": 2000, \".checkpoint_frequency\": 10, \".timestep\": {\"value\": 2.0, \"unit\": \"femtosecond\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \".thermodynamic_state\": {\"temperature\": {\"value\": 298.15, \"unit\": \"kelvin\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \"pressure\": {\"value\": 101.0, \"unit\": \"kilopascal\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \"@type\": \"openff.evaluator.thermodynamics.ThermodynamicState\"}, \".ensemble\": {\"value\": \"NPT\", \"@type\": \"openff.evaluator.thermodynamics.Ensemble\"}, \".thermostat_friction\": {\"value\": 1.0, \"unit\": \"1 / picosecond\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \".input_coordinate_file\": {\"full_path\": \"6385|equilibration_simulation.output_coordinate_file\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".parameterized_system\": {\"full_path\": \"6385|assign_parameters.parameterized_system\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".enable_pbc\": true, \".allow_gpu_platforms\": true, \".high_precision\": false, \".gradient_parameters\": []}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolSchema\"}, \"6385|average_density\": {\"id\": \"6385|average_density\", \"type\": \"AverageObservable\", \"inputs\": {\".allow_merging\": true, \".bootstrap_iterations\": 250, \".bootstrap_sample_size\": 1.0, \".thermodynamic_state\": {\"temperature\": {\"value\": 298.15, \"unit\": \"kelvin\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \"pressure\": {\"value\": 101.0, \"unit\": \"kilopascal\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \"@type\": \"openff.evaluator.thermodynamics.ThermodynamicState\"}, \".potential_energies\": {\"full_path\": \"6385|production_simulation.observables[PotentialEnergy]\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".observable\": {\"full_path\": \"6385|production_simulation.observables[Density]\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".divisor\": 1.0}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolSchema\"}}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolGroupSchema\"}, {\"id\": \"6385|decorrelate_trajectory\", \"type\": \"DecorrelateTrajectory\", \"inputs\": {\".allow_merging\": true, \".time_series_statistics\": {\"full_path\": \"6385|conditional_group/6385|average_density.time_series_statistics\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".input_coordinate_file\": {\"full_path\": \"6385|conditional_group/6385|production_simulation.output_coordinate_file\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".input_trajectory_path\": {\"full_path\": \"6385|conditional_group/6385|production_simulation.trajectory_file_path\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolSchema\"}, {\"id\": \"6385|decorrelate_observables\", \"type\": \"DecorrelateObservables\", \"inputs\": {\".allow_merging\": true, \".time_series_statistics\": {\"full_path\": \"6385|conditional_group/6385|average_density.time_series_statistics\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".input_observables\": {\"full_path\": \"6385|conditional_group/6385|production_simulation.observables\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolSchema\"}, {\"id\": \"6385|assign_parameters\", \"type\": \"BuildSmirnoffSystem\", \"inputs\": {\".allow_merging\": true, \".force_field_path\": \"/evaluator-storage/working-directory/SimulationLayer/051b994db4774ef7ac7370eedb29e6dd/60240dc3a7574fd4b292d2938dcbf971\", \".coordinate_file_path\": {\"full_path\": \"6385|build_coordinates.coordinate_file_path\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".substance\": {\"full_path\": \"6385|build_coordinates.output_substance\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolSchema\"}], \"final_value_source\": {\"full_path\": \"6385|conditional_group/6385|average_density.value\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \"outputs_to_store\": {\"full_system\": {\"substance\": {\"full_path\": \"6385|build_coordinates.output_substance\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \"thermodynamic_state\": {\"temperature\": {\"value\": 298.15, \"unit\": \"kelvin\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \"pressure\": {\"value\": 101.0, \"unit\": \"kilopascal\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \"@type\": \"openff.evaluator.thermodynamics.ThermodynamicState\"}, \"property_phase\": 2, \"source_calculation_id\": {\"@type\": \"openff.evaluator.attributes.attributes.PlaceholderValue\"}, \"force_field_id\": {\"@type\": \"openff.evaluator.attributes.attributes.PlaceholderValue\"}, \"coordinate_file_name\": {\"full_path\": \"6385|conditional_group/6385|production_simulation.output_coordinate_file\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \"trajectory_file_name\": {\"full_path\": \"6385|decorrelate_trajectory.output_trajectory_path\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \"observables\": {\"full_path\": \"6385|decorrelate_observables.output_observables\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \"statistical_inefficiency\": {\"full_path\": \"6385|conditional_group/6385|average_density.time_series_statistics.statistical_inefficiency\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \"number_of_molecules\": {\"full_path\": \"6385|build_coordinates.output_number_of_molecules\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \"@type\": \"openff.evaluator.storage.data.StoredSimulationData\"}}, \"@type\": \"openff.evaluator.workflow.schemas.WorkflowSchema\"}", "@type": "openff.evaluator.datasets.provenance.CalculationSource"}, "gradients": [], "@type": "openff.evaluator.properties.density.Density"}, {"id": "6386", "substance": {"components": [{"smiles": "CN1CCCCC1", "role": {"value": "solv", "@type": "openff.evaluator.substances.components.Component.Role"}, "@type": "openff.evaluator.substances.components.Component"}, {"smiles": "O", "role": {"value": "solv", "@type": "openff.evaluator.substances.components.Component.Role"}, "@type": "openff.evaluator.substances.components.Component"}], "amounts": {"CN1CCCCC1{solv}": [{"value": 0.505, "@type": "openff.evaluator.substances.amounts.MoleFraction"}], "O{solv}": [{"value": 0.495, "@type": "openff.evaluator.substances.amounts.MoleFraction"}]}, "@type": "openff.evaluator.substances.substances.Substance"}, "phase": 2, "thermodynamic_state": {"temperature": {"value": 298.15, "unit": "kelvin", "@type": "openff.evaluator.unit.Quantity"}, "pressure": {"value": 101.0, "unit": "kilopascal", "@type": "openff.evaluator.unit.Quantity"}, "@type": "openff.evaluator.thermodynamics.ThermodynamicState"}, "value": {"value": 0.8602986749775117, "unit": "gram / milliliter", "@type": "openff.evaluator.unit.Quantity"}, "uncertainty": {"value": 0.000721351186383778, "unit": "gram / milliliter", "@type": "openff.evaluator.unit.Quantity"}, "source": {"fidelity": "SimulationLayer", "provenance": "{\"protocol_schemas\": [{\"id\": \"6386|build_coordinates\", \"type\": \"BuildCoordinatesPackmol\", \"inputs\": {\".allow_merging\": true, \".max_molecules\": 256, \".count_exact_amount\": true, \".mass_density\": {\"value\": 0.95, \"unit\": \"gram / milliliter\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \".box_aspect_ratio\": [1.0, 1.0, 1.0], \".substance\": {\"components\": [{\"smiles\": \"CN1CCCCC1\", \"role\": {\"value\": \"solv\", \"@type\": \"openff.evaluator.substances.components.Component.Role\"}, \"@type\": \"openff.evaluator.substances.components.Component\"}, {\"smiles\": \"O\", \"role\": {\"value\": \"solv\", \"@type\": \"openff.evaluator.substances.components.Component.Role\"}, \"@type\": \"openff.evaluator.substances.components.Component\"}], \"amounts\": {\"CN1CCCCC1{solv}\": [{\"value\": 0.505, \"@type\": \"openff.evaluator.substances.amounts.MoleFraction\"}], \"O{solv}\": [{\"value\": 0.495, \"@type\": \"openff.evaluator.substances.amounts.MoleFraction\"}]}, \"@type\": \"openff.evaluator.substances.substances.Substance\"}, \".tolerance\": {\"value\": 2.0, \"unit\": \"angstrom\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \".verbose_packmol\": false, \".retain_packmol_files\": false}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolSchema\"}, {\"id\": \"6386|energy_minimisation\", \"type\": \"OpenMMEnergyMinimisation\", \"inputs\": {\".allow_merging\": true, \".input_coordinate_file\": {\"full_path\": \"6386|build_coordinates.coordinate_file_path\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".parameterized_system\": {\"full_path\": \"6386|assign_parameters.parameterized_system\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".tolerance\": {\"value\": 10.0, \"unit\": \"kilojoules_per_mole / nanometer\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \".max_iterations\": 0, \".enable_pbc\": true}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolSchema\"}, {\"id\": \"6386|equilibration_simulation\", \"type\": \"OpenMMSimulation\", \"inputs\": {\".allow_merging\": true, \".steps_per_iteration\": 100000, \".total_number_of_iterations\": 1, \".output_frequency\": 5000, \".checkpoint_frequency\": 10, \".timestep\": {\"value\": 2.0, \"unit\": \"femtosecond\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \".thermodynamic_state\": {\"temperature\": {\"value\": 298.15, \"unit\": \"kelvin\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \"pressure\": {\"value\": 101.0, \"unit\": \"kilopascal\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \"@type\": \"openff.evaluator.thermodynamics.ThermodynamicState\"}, \".ensemble\": {\"value\": \"NPT\", \"@type\": \"openff.evaluator.thermodynamics.Ensemble\"}, \".thermostat_friction\": {\"value\": 1.0, \"unit\": \"1 / picosecond\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \".input_coordinate_file\": {\"full_path\": \"6386|energy_minimisation.output_coordinate_file\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".parameterized_system\": {\"full_path\": \"6386|assign_parameters.parameterized_system\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".enable_pbc\": true, \".allow_gpu_platforms\": true, \".high_precision\": false, \".gradient_parameters\": []}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolSchema\"}, {\"id\": \"6386|conditional_group\", \"type\": \"ConditionalGroup\", \"inputs\": {\".allow_merging\": true, \".conditions\": [], \".max_iterations\": 100}, \"protocol_schemas\": {\"6386|production_simulation\": {\"id\": \"6386|production_simulation\", \"type\": \"OpenMMSimulation\", \"inputs\": {\".allow_merging\": true, \".steps_per_iteration\": 1000000, \".total_number_of_iterations\": 1, \".output_frequency\": 2000, \".checkpoint_frequency\": 10, \".timestep\": {\"value\": 2.0, \"unit\": \"femtosecond\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \".thermodynamic_state\": {\"temperature\": {\"value\": 298.15, \"unit\": \"kelvin\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \"pressure\": {\"value\": 101.0, \"unit\": \"kilopascal\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \"@type\": \"openff.evaluator.thermodynamics.ThermodynamicState\"}, \".ensemble\": {\"value\": \"NPT\", \"@type\": \"openff.evaluator.thermodynamics.Ensemble\"}, \".thermostat_friction\": {\"value\": 1.0, \"unit\": \"1 / picosecond\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \".input_coordinate_file\": {\"full_path\": \"6386|equilibration_simulation.output_coordinate_file\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".parameterized_system\": {\"full_path\": \"6386|assign_parameters.parameterized_system\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".enable_pbc\": true, \".allow_gpu_platforms\": true, \".high_precision\": false, \".gradient_parameters\": []}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolSchema\"}, \"6386|average_density\": {\"id\": \"6386|average_density\", \"type\": \"AverageObservable\", \"inputs\": {\".allow_merging\": true, \".bootstrap_iterations\": 250, \".bootstrap_sample_size\": 1.0, \".thermodynamic_state\": {\"temperature\": {\"value\": 298.15, \"unit\": \"kelvin\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \"pressure\": {\"value\": 101.0, \"unit\": \"kilopascal\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \"@type\": \"openff.evaluator.thermodynamics.ThermodynamicState\"}, \".potential_energies\": {\"full_path\": \"6386|production_simulation.observables[PotentialEnergy]\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".observable\": {\"full_path\": \"6386|production_simulation.observables[Density]\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".divisor\": 1.0}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolSchema\"}}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolGroupSchema\"}, {\"id\": \"6386|decorrelate_trajectory\", \"type\": \"DecorrelateTrajectory\", \"inputs\": {\".allow_merging\": true, \".time_series_statistics\": {\"full_path\": \"6386|conditional_group/6386|average_density.time_series_statistics\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".input_coordinate_file\": {\"full_path\": \"6386|conditional_group/6386|production_simulation.output_coordinate_file\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".input_trajectory_path\": {\"full_path\": \"6386|conditional_group/6386|production_simulation.trajectory_file_path\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolSchema\"}, {\"id\": \"6386|decorrelate_observables\", \"type\": \"DecorrelateObservables\", \"inputs\": {\".allow_merging\": true, \".time_series_statistics\": {\"full_path\": \"6386|conditional_group/6386|average_density.time_series_statistics\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".input_observables\": {\"full_path\": \"6386|conditional_group/6386|production_simulation.observables\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolSchema\"}, {\"id\": \"6386|assign_parameters\", \"type\": \"BuildSmirnoffSystem\", \"inputs\": {\".allow_merging\": true, \".force_field_path\": \"/evaluator-storage/working-directory/SimulationLayer/051b994db4774ef7ac7370eedb29e6dd/60240dc3a7574fd4b292d2938dcbf971\", \".coordinate_file_path\": {\"full_path\": \"6386|build_coordinates.coordinate_file_path\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \".substance\": {\"full_path\": \"6386|build_coordinates.output_substance\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}}, \"@type\": \"openff.evaluator.workflow.schemas.ProtocolSchema\"}], \"final_value_source\": {\"full_path\": \"6386|conditional_group/6386|average_density.value\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \"outputs_to_store\": {\"full_system\": {\"substance\": {\"full_path\": \"6386|build_coordinates.output_substance\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \"thermodynamic_state\": {\"temperature\": {\"value\": 298.15, \"unit\": \"kelvin\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \"pressure\": {\"value\": 101.0, \"unit\": \"kilopascal\", \"@type\": \"openff.evaluator.unit.Quantity\"}, \"@type\": \"openff.evaluator.thermodynamics.ThermodynamicState\"}, \"property_phase\": 2, \"source_calculation_id\": {\"@type\": \"openff.evaluator.attributes.attributes.PlaceholderValue\"}, \"force_field_id\": {\"@type\": \"openff.evaluator.attributes.attributes.PlaceholderValue\"}, \"coordinate_file_name\": {\"full_path\": \"6386|conditional_group/6386|production_simulation.output_coordinate_file\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \"trajectory_file_name\": {\"full_path\": \"6386|decorrelate_trajectory.output_trajectory_path\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \"observables\": {\"full_path\": \"6386|decorrelate_observables.output_observables\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \"statistical_inefficiency\": {\"full_path\": \"6386|conditional_group/6386|average_density.time_series_statistics.statistical_inefficiency\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \"number_of_molecules\": {\"full_path\": \"6386|build_coordinates.output_number_of_molecules\", \"@type\": \"openff.evaluator.workflow.utils.ProtocolPath\"}, \"@type\": \"openff.evaluator.storage.data.StoredSimulationData\"}}, \"@type\": \"openff.evaluator.workflow.schemas.WorkflowSchema\"}", "@type": "openff.evaluator.datasets.provenance.CalculationSource"}, "gradients": [], "@type": "openff.evaluator.properties.density.Density"}], "@type": "openff.evaluator.datasets.datasets.PhysicalPropertyDataSet"}, "unsuccessful_properties": {"properties": [], "@type": "openff.evaluator.datasets.datasets.PhysicalPropertyDataSet"}, "exceptions": [], "@type": "openff.evaluator.client.client.RequestResult"} \ No newline at end of file diff --git a/docs/examples/kubernetes-run/run.py b/docs/examples/kubernetes-run/run.py new file mode 100644 index 00000000..44221d7e --- /dev/null +++ b/docs/examples/kubernetes-run/run.py @@ -0,0 +1,662 @@ +import contextlib +import logging +import copy +import os +import pathlib +import subprocess +import sys +import time + +import click +from kubernetes import client, config +import yaml + +from openff.evaluator.backends.dask_kubernetes import ( + KubernetesPersistentVolumeClaim, KubernetesSecret, + DaskKubernetesBackend, +) +from openff.units import unit +from openff.evaluator.datasets import PhysicalPropertyDataSet +from openff.evaluator.properties import Density, EnthalpyOfMixing +from openff.evaluator.client import RequestOptions +from openff.evaluator.client import EvaluatorClient, RequestOptions, ConnectionOptions +from openff.evaluator.forcefield import SmirnoffForceFieldSource + +logger = logging.getLogger(__name__) +logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + +def _save_script(contents: str, path: str): + """Save a script to a path. + + Parameters + ---------- + contents : str + The contents of the script. + path : str + The path to save the script to. + + """ + with open(path, "w") as f: + f.write(contents) + return path + + +def copy_file_to_storage( + evaluator_backend, + input_file, + output_file, +): + """ + Copy a file to the storage of a Kubernetes cluster. + + Parameters + ---------- + evaluator_backend : DaskKubernetesBackend + The backend to copy the file to. + input_file : str + The path to the file to copy (locally). + output_file : str + The path to save the file to (remotely). + """ + with open(input_file, "r") as f: + data = f.read() + future = evaluator_backend._client.submit(_save_script, data, output_file, resources={"notGPU": 1, "GPU": 0}) + future.result() + logger.info(f"Copied {input_file} to {output_file}") + + +def wait_for_pod( + pod_name: str, + namespace: str, + status: str = "Running", + timeout: int = 1000, + polling_interval: int = 10, +): + """ + Wait for a pod to reach a certain status. + + Parameters + ---------- + pod_name : str + The name of the pod. + namespace : str + The namespace of the pod. + status : str + The status to wait for. + timeout : int + The maximum time to wait. + polling_interval : int + The interval to poll the pod status. + + + Raises + ------ + TimeoutError + If the pod does not reach the desired status within the timeout. + """ + core_v1 = client.CoreV1Api() + + start_time = time.time() + while time.time() - start_time < timeout: + pod = core_v1.read_namespaced_pod(name=pod_name, namespace=namespace) + if pod.status.phase == status: + return pod + time.sleep(polling_interval) + + raise TimeoutError(f"Pod {pod_name} did not reach status {status} within {timeout} seconds.") + + + +def get_pod_name( + deployment_name: str, + namespace: str = "openforcefield", +) -> str: + """ + Get the pod name of a deployment + + + Parameters + ---------- + deployment_name : str + The name of the deployment. + namespace : str + The namespace of the deployment. + + + Returns + ------- + str + The name of a pod in the deployment. + """ + apps_v1 = client.AppsV1Api() + core_v1 = client.CoreV1Api() + + # Get the deployment's labels + deployment = apps_v1.read_namespaced_deployment(name=deployment_name, namespace=namespace) + deployment_labels = deployment.spec.selector.match_labels + + # List pods with the deployment's labels + label_selector = ",".join([f"{key}={value}" for key, value in deployment_labels.items()]) + pods = core_v1.list_namespaced_pod(namespace=namespace, label_selector=label_selector).items + pod_name = pods[0].metadata.name.split("_")[0] + return pod_name + + +@contextlib.contextmanager +def forward_port( + deployment_name, + namespace: str = "openforcefield", + port: int = 8998, +): + """ + Forward a port from a Kubernetes deployment to the local machine. + + This assumes that the deployment has at least one pod. + + Parameters + ---------- + deployment_name : str + The name of the deployment. + namespace : str + The namespace of the deployment. + port : int + The port to forward. + """ + + pod_name = get_pod_name(deployment_name, namespace) + print(f"Pod name: {pod_name}") + + # Wait for the pod to be running + wait_for_pod(pod_name, namespace, status="Running") + command = [ + "kubectl", "port-forward", f"pod/{pod_name}", f"{port}:{port}", + "-n", namespace, + ] + logger.info(f"Forwarding port {port} to pod {pod_name}") + proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + # Wait for the port forward to be established + time.sleep(5) + if proc.poll() is not None: + stdout, stderr = proc.communicate() + raise RuntimeError(f"Port forward failed: {stderr.decode()}") + try: + yield + finally: + proc.terminate() + + + +def create_pvc( + namespace: str = "openforcefield", + job_name: str = "lw", + storage_class_name: str = "rook-cephfs-central", + storage_space: unit.Quantity = 2 * unit.terabytes, + apply_pvc: bool = True, + timeout: int = 1000, +) -> str: + """ + Create a persistent volume claim and deploy it. + + Possibly could be turned into a method of `KubernetesPersistentVolumeClaim`. + + Parameters + ---------- + namespace : str + The namespace to deploy the PVC in. + job_name : str + The name of the job. + storage_class_name : str + The name of the storage class (on NRP) to request a PVC. + storage_space : unit.Quantity + The amount of storage to request. + apply_pvc : bool + Whether to launch the PVC. + timeout : int + The maximum time to wait for the PVC to be bound. + + Returns + ------- + str + The name of the PVC. + """ + core_v1 = client.CoreV1Api() + + pvc_spec = client.V1PersistentVolumeClaimSpec( + access_modes=["ReadWriteMany"], + storage_class_name=storage_class_name, + resources=client.V1ResourceRequirements( + requests={ + "storage": f"{storage_space.to(unit.gigabytes).m}Gi", + } + ), + ) + + + pvc_name = f"evaluator-storage-{job_name}" + metadata = client.V1ObjectMeta(name=pvc_name) + pvc = client.V1PersistentVolumeClaim( + api_version="v1", + kind="PersistentVolumeClaim", + metadata=metadata, + spec=pvc_spec, + ) + if apply_pvc: + api_response = core_v1.create_namespaced_persistent_volume_claim( + namespace=namespace, + body=pvc + ) + logger.info( + f"Created PVC {pvc.metadata.name}. State={api_response.status.phase}" + ) + + # wait + end_time = time.time() + timeout + while time.time() < end_time: + pvc = core_v1.read_namespaced_persistent_volume_claim(name=pvc_name, namespace=namespace) + if pvc.status.phase == "Bound": + logger.info(f"PVC '{pvc_name}' is Bound.") + return pvc_name + logger.info(f"Waiting for PVC '{pvc_name}' to become Bound. Current phase: {pvc.status.phase}") + time.sleep(5) + return pvc_name + + +def create_deployment( + calculation_backend, + remote_script_path: str, + remote_storage_path: str, + env: dict = None, + volumes: list[KubernetesPersistentVolumeClaim] = None, + secrets: list[KubernetesSecret] = None, + namespace: str = "openforcefield", + job_name: str = "lw", + port: int = 8998, + image: str = "ghcr.io/lilyminium/openff-images:tmp-evaluator-dask-v2", +): + """ + Create Kubernetes deployment for Evaluator server. + + Parameters + ---------- + calculation_backend : DaskKubernetesBackend + The backend to use. + remote_script_path : str + The path to the script to coy over and run + remote_storage_path : str + The path to the filesystem storage to mount. + env : dict + Environment variables to set in the container. + volumes : list[KubernetesPersistentVolumeClaim] + Volumes to mount. + secrets : list[KubernetesSecret] + Secrets to mount. + namespace : str + The namespace to deploy the deployment in. + job_name : str + The name of the job. + port : int + The server port to expose. + image : str + The image to use for each container. + """ + server_name = f"evaluator-server-{job_name}-deployment" + apps_v1 = client.AppsV1Api() + + metadata = client.V1ObjectMeta( + name=f"evaluator-server-{job_name}", + labels={"k8s-app": server_name}, + ) + + # generate volume mounts and volumes + k8s_volume_mounts = [] + k8s_volumes = [] + + if volumes is None: + volumes = [] + if secrets is None: + secrets = [] + for volume in volumes + secrets: + k8s_volume_mounts.append(volume._to_volume_mount_k8s()) + k8s_volumes.append(volume._to_volume_k8s()) + + k8s_env = {} + if env is not None: + assert isinstance(env, dict) + k8s_env.update(env) + + k8s_env_objects = [ + client.V1EnvVar(name=key, value=value) + for key, value in k8s_env.items() + ] + resources = calculation_backend._resources_per_worker + + command = [ + "python", + remote_script_path, + "--cluster-name", + calculation_backend._cluster.name, + "--namespace", + calculation_backend._cluster.namespace, + "--memory", + str(resources._memory_limit.m_as(unit.gigabytes)), + "--ephemeral-storage", + str(resources._ephemeral_storage_limit.m_as(unit.gigabytes)), + "--storage-path", + remote_storage_path, + "--port", + str(port) + ] + logger.info(f"Command: {command}") + + container = client.V1Container( + name=server_name, + image=image, + env=k8s_env_objects, + command=command, + resources=client.V1ResourceRequirements( + requests={"cpu": "1", "memory": "4Gi"}, + limits={"cpu": "1", "memory": "4Gi"}, + ), + volume_mounts=k8s_volume_mounts, + ) + + deployment_spec = client.V1DeploymentSpec( + replicas=1, + selector=client.V1LabelSelector( + match_labels={"k8s-app": server_name} + ), + template=client.V1PodTemplateSpec( + metadata=metadata, + spec=client.V1PodSpec( + containers=[container], + volumes=k8s_volumes, + ) + ), + ) + + deployment = client.V1Deployment( + api_version="apps/v1", + kind="Deployment", + metadata=metadata, + spec=deployment_spec, + ) + + # submit + api_response = apps_v1.create_namespaced_deployment( + namespace=namespace, + body=deployment, + ) + logger.info( + f"Created deployment {deployment.metadata.name}. State={api_response.status}" + ) + return deployment.metadata.name + + +def simulate( + dataset_path: str = "dataset.json", + n_molecules: int = 256, + force_field: str = "openff-2.1.0.offxml", + port: int = 8000 +): + """ + Simulate and run a dataset. + + Parameters + ---------- + dataset_path : str + The path to the dataset. + n_molecules : int + The number of molecules to simulate in a liquid box. + force_field : str + The path to the force field. + port : int + The server port to connect to + """ + # load dataset + dataset = PhysicalPropertyDataSet.from_json(dataset_path) + print(f"Loaded {len(dataset.properties)} properties from {dataset_path}") + + error = 50 + + options = RequestOptions() + options.calculation_layers = ["SimulationLayer"] + density_schema = Density.default_simulation_schema(n_molecules=n_molecules) + + dhmix_schema = EnthalpyOfMixing.default_simulation_schema(n_molecules=n_molecules) + + options.add_schema("SimulationLayer", "Density", density_schema) + options.add_schema("SimulationLayer", "EnthalpyOfMixing", dhmix_schema) + + force_field_source = SmirnoffForceFieldSource.from_path( + force_field + ) + + client = EvaluatorClient( + connection_options=ConnectionOptions(server_port=port) + ) + + # we first request the equilibration data + # this can be copied between different runs to avoid re-running + # the data is saved in a directory called "stored_data" + + request, error = client.request_estimate( + dataset, + force_field_source, + options, + ) + assert error is None, error + + # block until computation finished + results, exception = request.results(synchronous=True, polling_interval=30) + assert exception is None, exception + + print(f"Simulation complete") + print(f"# estimated: {len(results.estimated_properties)}") + print(f"# unsuccessful: {len(results.unsuccessful_properties)}") + print(f"# exceptions: {len(results.exceptions)}") + + with open("results.json", "w") as f: + f.write(results.json()) + + + +@click.command() +@click.option("--namespace", default="openforcefield", help="The namespace to operate in.") +@click.option("--job-name", default="lw", help="The name of the job.") +@click.option("--storage-class-name", default="rook-cephfs-central", help="The name of the storage class to use for the PVC.") +@click.option("--storage-path", default="/evaluator-storage", help="The path to local filesystem storage for Evaluator.") +@click.option("--script-file", default="server-existing.py", help="The path to the script to copy over and run to execute an EvaluatorServer.") +@click.option("--port", default=8998, help="The port to forward from the deployment.") +@click.option("--image", default="ghcr.io/lilyminium/openff-images:tmp-evaluator-dask-v2", help="The image to use for the deployment.") +def main( + namespace: str = "openforcefield", + job_name: str = "lw", + storage_class_name: str = "rook-cephfs-central", + storage_path: str = "/evaluator-storage", + script_file: str = "server-existing.py", + port: int = 8998, + image: str = "ghcr.io/lilyminium/openff-images:tmp-evaluator-dask-v2", + storage_space: unit.Quantity = 500 * unit.gigabytes, + memory: unit.Quantity = 8 * unit.gigabytes, + ephemeral_storage: unit.Quantity = 20 * unit.gigabytes, +): + """ + Run Evaluator on a Kubernetes cluster. + + This script runs the following steps: + + 1. Create a PersistentVolumeClaim (PVC) for storage. + 2. Create a DaskKubernetesBackend, mounting the PVC. + 3. Copy a script to start an EvaluatorServer to the storage. + 4. Create a Deployment to run the script in step 3. + 5. Forward a port from the Deployment to the local machine. + 6. Run a simulation using the EvaluatorClient. + + Parameters + ---------- + namespace : str + The namespace to operate in. + job_name : str + The name of the job. + storage_class_name : str + The name of the storage class to use for the PVC. + storage_space : unit.Quantity + The amount of storage to request (should be compatible with GB). + memory : unit.Quantity + The amount of memory to request (should be compatible with GB). + ephemeral_storage : unit.Quantity + The amount of ephemeral storage to request (should be compatible with GB). + storage_path : str + The path to local filesystem storage for Evaluator. + script_file : str + The path to the script to copy over and run to execute an EvaluatorServer. + port : int + The port to forward from the deployment. + image : str + The image to use for the deployment. + + """ + config.load_kube_config() + core_v1 = client.CoreV1Api() + + from openff.evaluator.backends.backends import PodResources, ComputeResources + + + results = None + + # run in a try/except to clean up on error + try: + # 1. set up filesystem storage with PVC + pvc_name = create_pvc( + namespace=namespace, + job_name=job_name, + storage_class_name=storage_class_name, + storage_space=storage_space, + apply_pvc=True, + ) + + # 2. create and submit KubeCluster + volume = KubernetesPersistentVolumeClaim( + name=pvc_name, + mount_path=storage_path, + ) + secret = KubernetesSecret( + name="openeye-license", + secret_name="oe-license-feb-2024", + mount_path="/secrets/oe_license.txt", + sub_path="oe_license.txt", + read_only=True, + ) + cluster_name = f"evaluator-{job_name}" + calculation_backend = DaskKubernetesBackend( + cluster_name=cluster_name, + gpu_resources_per_worker=PodResources( + minimum_number_of_workers=0, + maximum_number_of_workers=10, + number_of_threads=1, + memory_limit=memory, + ephemeral_storage_limit=ephemeral_storage, + number_of_gpus=1, + preferred_gpu_toolkit=ComputeResources.GPUToolkit.CUDA, + ), + cpu_resources_per_worker=PodResources( + minimum_number_of_workers=0, + maximum_number_of_workers=40, + number_of_threads=1, + memory_limit=memory, + ephemeral_storage_limit=ephemeral_storage, + number_of_gpus=0, + ), + image=image, + namespace=namespace, + env={ + "OE_LICENSE": "/secrets/oe_license.txt", + # daemonic processes are not allowed to have children + "DASK_DISTRIBUTED__WORKER__DAEMON": "False", + "DASK_LOGGING__DISTRIBUTED": "debug", + "DASK__TEMPORARY_DIRECTORY": "/evaluator-storage", + "STORAGE_DIRECTORY": "/evaluator-storage", + "EXTRA_PIP_PACKAGES": "jupyterlab" + }, + volumes=[volume], + secrets=[secret], + annotate_resources=True, + cluster_kwargs={"resource_timeout": 300} + ) + + spec = calculation_backend._generate_cluster_spec() + with open("cluster-spec.yaml", "w") as f: + yaml.safe_dump(spec, f) + calculation_backend.start() + + logger.info(f"Calculating with backend {calculation_backend}") + + # 3. copy script to storage + remote_script_file = os.path.join(storage_path, pathlib.Path(script_file).name) + copy_file_to_storage( + calculation_backend, + script_file, + remote_script_file + ) + + + + # 4. create and submit deployment + deployment_name = create_deployment( + calculation_backend, + remote_script_file, + storage_path, + volumes=[volume], + secrets=[secret], + namespace=namespace, + job_name=job_name, + port=port, + env={ + "OE_LICENSE": "/secrets/oe_license.txt", + }, + image=image, + ) + + # 5. forward port + with forward_port( + deployment_name, + namespace=namespace, + port=port, + ): + # 6. run simulation + simulate( + dataset_path="dataset.json", + n_molecules=256, + force_field="openff-2.1.0.offxml", + port=port + ) + + except Exception as e: + print(e) + raise e + + finally: + + + print(f"Cleaning up") + # clean up deployment + apps_v1 = client.AppsV1Api() + apps_v1.delete_namespaced_deployment( + name=deployment_name, + namespace=namespace, + ) + + # clean up pvc + # note this may fail if you have another pod looking at the storage + core_v1.delete_namespaced_persistent_volume_claim( + name=pvc_name, + namespace=namespace, + ) + + + + +if __name__ == "__main__": + main() diff --git a/docs/examples/kubernetes-run/server-existing.py b/docs/examples/kubernetes-run/server-existing.py new file mode 100644 index 00000000..e4865068 --- /dev/null +++ b/docs/examples/kubernetes-run/server-existing.py @@ -0,0 +1,80 @@ +""" +This script is written to run on a remote Kubernetes deployment and connect to an existing DaskKubernetesBackend cluster. + +The script will start an EvaluatorServer instance and listen for incoming requests. +""" + +import argparse +import logging +import sys + +import os + +from openff.evaluator.backends.dask_kubernetes import DaskKubernetesExistingBackend, KubernetesPersistentVolumeClaim +from openff.evaluator.backends.backends import ComputeResources, PodResources +from openff.toolkit.utils import OPENEYE_AVAILABLE +from openff.evaluator.server import EvaluatorServer +import openff.evaluator +from openff.units import unit + + +logger = logging.getLogger(__name__) +logging.basicConfig(stream=sys.stdout, level=logging.INFO) + +parser = argparse.ArgumentParser() +parser.add_argument("--cluster-name", type=str, default="evaluator-lw") +parser.add_argument("--namespace", type=str, default="openforcefield") +parser.add_argument("--storage-path", type=str, default="/evaluator-storage") +parser.add_argument("--memory", type=int, default=8, help="Memory limit in GB") +parser.add_argument("--ephemeral-storage", type=int, default=20, help="Ephemeral storage limit in GB") +parser.add_argument("--port", type=int, default=8998) + + + + +if __name__ == "__main__": + args = parser.parse_args() + logger.info(f"OpenEye is available: {OPENEYE_AVAILABLE}") + + logger.info("Evaluator version: " + openff.evaluator.__version__) + + # change directory to storage path + os.chdir(args.storage_path) + + working_directory = os.path.abspath( + os.path.join(args.storage_path, "working-directory") + ) + + volume = KubernetesPersistentVolumeClaim( + name="evaluator-storage-lw", + mount_path=args.storage_path, + ) + + + calculation_backend = DaskKubernetesExistingBackend( + cluster_name=args.cluster_name, + namespace=args.namespace, + cluster_port=8786, + gpu_resources_per_worker=PodResources( + number_of_threads=1, + memory_limit=args.memory * unit.gigabytes, + ephemeral_storage_limit=args.ephemeral_storage * unit.gigabytes, + number_of_gpus=1, + preferred_gpu_toolkit=ComputeResources.GPUToolkit.CUDA, + preferred_gpu_precision=ComputeResources.GPUPrecision.mixed, + ), + annotate_resources=True, + volumes=[volume], + ) + + logger.info(f"Calculating with backend {calculation_backend}") + with calculation_backend: + evaluator_server = EvaluatorServer( + calculation_backend, + working_directory=working_directory, + port=args.port, + delete_working_files=True, + ) + logger.info("Starting server") + evaluator_server.start(asynchronous=False) + diff --git a/docs/index.rst b/docs/index.rst index 3a0aaf99..c5556417 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -178,6 +178,7 @@ See the :doc:`physical properties overview page ` for mor Overview backends/daskbackends + backends/daskkubernetesbackend .. toctree:: :maxdepth: 2 diff --git a/openff/evaluator/_tests/test_backends/test_backends.py b/openff/evaluator/_tests/test_backends/test_backends.py new file mode 100644 index 00000000..03fcd76f --- /dev/null +++ b/openff/evaluator/_tests/test_backends/test_backends.py @@ -0,0 +1,117 @@ +import pytest +from openff.units import unit + +from openff.evaluator.backends.backends import PodResources + + +class TestPodResources: + + @pytest.fixture + def gpu_resources(self): + node_affinity = { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "nvidia.com/cuda.runtime.major", + "operator": "In", + "values": ["12"], + }, + { + "key": "nvidia.com/cuda.runtime.minor", + "operator": "In", + "values": ["4"], + }, + ] + } + ] + } + } + } + return PodResources( + number_of_threads=1, + number_of_gpus=1, + affinity_specification=node_affinity, + minimum_number_of_workers=1, + maximum_number_of_workers=1, + ) + + @pytest.fixture + def cpu_resources(self): + return PodResources( + number_of_threads=1, + number_of_gpus=0, + memory_limit=5 * unit.terabyte, + ephemeral_storage_limit=20.0 * unit.megabyte, + affinity_specification=None, + minimum_number_of_workers=1, + maximum_number_of_workers=1, + ) + + def test_podresources_initialization_gpu(self, gpu_resources): + assert gpu_resources._number_of_threads == 1 + assert gpu_resources._number_of_gpus == 1 + assert gpu_resources._affinity_specification == { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "nvidia.com/cuda.runtime.major", + "operator": "In", + "values": ["12"], + }, + { + "key": "nvidia.com/cuda.runtime.minor", + "operator": "In", + "values": ["4"], + }, + ] + } + ] + } + } + } + assert gpu_resources._minimum_number_of_workers == 1 + assert gpu_resources._maximum_number_of_workers == 1 + assert gpu_resources._resources == {"GPU": 1, "notGPU": 0} + + def test_to_kubernetes_resources_limits_gpu(self, gpu_resources): + k8s_resources = gpu_resources._to_kubernetes_resource_limits() + assert k8s_resources == { + "cpu": "1", + "memory": "4.000Gi", + "ephemeral-storage": "20.000Gi", + "nvidia.com/gpu": "1", + } + + def _to_dask_worker_resources_gpu(self, gpu_resources): + assert gpu_resources._to_dask_worker_resources() == [ + "--resources", + "GPU=1,notGPU=0", + ] + + def test_podresources_initialization_cpu(self, cpu_resources): + assert cpu_resources._number_of_threads == 1 + assert cpu_resources._number_of_gpus == 0 + assert cpu_resources._affinity_specification == {} + assert cpu_resources._minimum_number_of_workers == 1 + assert cpu_resources._maximum_number_of_workers == 1 + assert cpu_resources._resources == {"GPU": 0, "notGPU": 1} + + def test_to_kubernetes_resources_limits_cpu(self, cpu_resources): + k8s_resources = cpu_resources._to_kubernetes_resource_limits() + assert k8s_resources == { + "cpu": "1", + "memory": "5000.000Gi", + "ephemeral-storage": "0.020Gi", + } + + def _to_dask_worker_resources_cpu(self, cpu_resources): + assert cpu_resources._to_dask_worker_resources() == [ + "--resources", + "GPU=0,notGPU=1", + ] diff --git a/openff/evaluator/_tests/test_backends/test_dask_kubernetes.py b/openff/evaluator/_tests/test_backends/test_dask_kubernetes.py new file mode 100644 index 00000000..3b72701a --- /dev/null +++ b/openff/evaluator/_tests/test_backends/test_dask_kubernetes.py @@ -0,0 +1,166 @@ +import json +import pathlib + +import pytest +import yaml +from openff.units import unit +from openff.utilities.utilities import get_data_dir_path + +from openff.evaluator.backends.backends import PodResources +from openff.evaluator.backends.dask_kubernetes import ( + DaskKubernetesBackend, + KubernetesEmptyDirVolume, + KubernetesSecret, +) + + +class TestDaskKubernetesBackend: + @pytest.fixture + def gpu_resources(self): + node_affinity = { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "nvidia.com/cuda.runtime.major", + "operator": "In", + "values": ["12"], + }, + { + "key": "nvidia.com/cuda.runtime.minor", + "operator": "In", + "values": ["4"], + }, + ] + } + ] + } + } + } + return PodResources( + number_of_threads=1, + number_of_gpus=1, + affinity_specification=node_affinity, + minimum_number_of_workers=1, + maximum_number_of_workers=10, + ) + + @pytest.fixture + def cpu_resources(self): + return PodResources( + number_of_threads=1, + number_of_gpus=0, + affinity_specification=None, + maximum_number_of_workers=20, + ) + + @pytest.fixture + def calculation_backend(self, gpu_resources, cpu_resources): + volume = KubernetesEmptyDirVolume( + name="evaluator-storage", + mount_path="/evaluator-storage", + ) + secret = KubernetesSecret( + name="openeye-license", + secret_name="oe-license-feb-2024", + mount_path="/secrets/oe_license.txt", + sub_path="oe_license.txt", + ) + calculation_backend = DaskKubernetesBackend( + gpu_resources_per_worker=gpu_resources, + cpu_resources_per_worker=cpu_resources, + cluster_name="evaluator", + image="ghcr.io/lilyminium/openff-images:evaluator-0.4.10-kubernetes-dask-v0", + namespace="openforcefield", + env={ + "OE_LICENSE": "/secrets/oe_license.txt", + # daemonic processes are not allowed to have children + "DASK_DISTRIBUTED__WORKER__DAEMON": "False", + "DASK_LOGGING__DISTRIBUTED": "debug", + "DASK__TEMPORARY_DIRECTORY": "/evaluator-storage", + }, + volumes=[volume], + secrets=[secret], + ) + return calculation_backend + + def test_no_initialization_without_volumes(self, gpu_resources): + with pytest.raises(ValueError, match="No volumes specified"): + DaskKubernetesBackend( + gpu_resources_per_worker=gpu_resources, + cluster_name="evaluator", + image="ghcr.io/lilyminium/openff-images:evaluator-0.4.10-kubernetes-dask-v0", + namespace="openforcefield", + env={ + "OE_LICENSE": "/secrets/oe_license.txt", + # daemonic processes are not allowed to have children + "DASK_DISTRIBUTED__WORKER__DAEMON": "False", + "DASK_LOGGING__DISTRIBUTED": "debug", + "DASK__TEMPORARY_DIRECTORY": "/evaluator-storage", + }, + ) + + def test_no_initialization_without_resources(self): + with pytest.raises(ValueError, match="must be specified"): + DaskKubernetesBackend() + + def test_generate_volume_specifications(self, calculation_backend): + volume_mounts, volumes = calculation_backend._generate_volume_specifications() + assert volume_mounts == [ + { + "name": "evaluator-storage", + "mountPath": "/evaluator-storage", + "readOnly": False, + }, + { + "name": "openeye-license", + "mountPath": "/secrets/oe_license.txt", + "subPath": "oe_license.txt", + "readOnly": True, + }, + ] + + assert volumes == [ + { + "name": "evaluator-storage", + "emptyDir": {}, + }, + { + "name": "openeye-license", + "secret": { + "secretName": "oe-license-feb-2024", + }, + }, + ] + + def test_generate_worker_spec(self, calculation_backend): + data_directory = pathlib.Path( + get_data_dir_path("test/kubernetes", "openff.evaluator") + ) + reference_file = data_directory / "dask_worker_spec.yaml" + + worker_spec = calculation_backend._generate_worker_spec( + calculation_backend._other_resources["cpu"] + ) + with open(reference_file, "r") as file: + reference_spec = yaml.safe_load(file) + + assert worker_spec == reference_spec + + def test_generate_cluster_spec(self, calculation_backend): + cluster_spec = calculation_backend._generate_cluster_spec() + + data_directory = pathlib.Path( + get_data_dir_path("test/kubernetes", "openff.evaluator") + ) + reference_file = data_directory / "dask_cluster_spec.yaml" + with open(reference_file, "r") as file: + reference_spec = yaml.safe_load(file) + + assert cluster_spec == reference_spec + + @pytest.mark.skip(reason="Currently only works with existing kubectl credentials.") + def test_start(self, calculation_backend): + calculation_backend.start() diff --git a/openff/evaluator/backends/backends.py b/openff/evaluator/backends/backends.py index fc1b11c4..a64285eb 100644 --- a/openff/evaluator/backends/backends.py +++ b/openff/evaluator/backends/backends.py @@ -219,6 +219,142 @@ def __ne__(self, other): return not self.__eq__(other) +class PodResources(ComputeResources): + """A class to represent the resources available to a single worker in a Dask Kubernetes cluster.""" + + _additional_attrs = ( + "memory_limit", + "ephemeral_storage_limit", + "additional_limit_specifications", + "affinity_specification", + "minimum_number_of_workers", + "maximum_number_of_workers", + ) + + def __init__( + self, + number_of_threads=1, + number_of_gpus=0, + preferred_gpu_toolkit=ComputeResources.GPUToolkit.auto, + preferred_gpu_precision=None, + memory_limit=4 * unit.gigabytes, + ephemeral_storage_limit=20 * unit.gigabytes, + additional_limit_specifications=None, + affinity_specification: dict = None, + minimum_number_of_workers: int = 1, + maximum_number_of_workers: int = 1, + ): + """Constructs a new ComputeResources object. + + Parameters + ---------- + number_of_threads: int + The number of threads available to a calculation worker. + number_of_gpus: int + The number of GPUs available to a calculation worker. + preferred_gpu_toolkit: ComputeResources.GPUToolkit, optional + The preferred toolkit to use when running on GPUs. + preferred_gpu_precision: ComputeResources.GPUPrecision, optional + The preferred GPU precision + memory_limit: unit.Quantity + The memory limit for each worker. + ephemeral_storage_limit: unit.Quantity + The storage limit for each worker. + additional_limit_specifications: dict, optional + Additional limit specifications to pass to Kubernetes. + affinity_specification: dict, optional + The affinity specification to pass to Kubernetes. + Can be used for CUDA. + minimum_number_of_workers: int, optional + The minimum number of workers to start with. + maximum_number_of_workers: int, optional + The maximum number of workers to start with. + """ + super().__init__( + number_of_threads=number_of_threads, + number_of_gpus=number_of_gpus, + preferred_gpu_toolkit=preferred_gpu_toolkit, + preferred_gpu_precision=preferred_gpu_precision, + ) + + assert minimum_number_of_workers <= maximum_number_of_workers + + self._memory_limit = memory_limit + self._ephemeral_storage_limit = ephemeral_storage_limit + self._additional_limit_specifications = {} + self._affinity_specification = {} + self._minimum_number_of_workers = minimum_number_of_workers + self._maximum_number_of_workers = maximum_number_of_workers + if additional_limit_specifications is not None: + assert isinstance(additional_limit_specifications, dict) + self._additional_limit_specifications.update( + additional_limit_specifications + ) + if affinity_specification is not None: + assert isinstance(affinity_specification, dict) + self._affinity_specification = affinity_specification + + if number_of_gpus > 0: + resources = {"GPU": number_of_gpus, "notGPU": 0} + else: + resources = {"GPU": 0, "notGPU": 1} + self._resources = resources + + def __getstate__(self): + state = super().__getstate__() + + for attr in type(self)._additional_attrs: + state[attr] = getattr(self, f"_{attr}") + + return state + + def __setstate__(self, state): + super().__setstate__(state) + for attr in type(self)._additional_attrs: + setattr(self, f"_{attr}", state[attr]) + + def __eq__(self, other): + equals = (type(other) == type(self)) and super().__eq__(other) + for attr in type(self)._additional_attrs: + equals &= getattr(self, f"_{attr}") == getattr(other, f"_{attr}") + return equals + + def __ne__(self, other): + return not self.__eq__(other) + + def _to_kubernetes_resource_limits(self) -> dict[str, str]: + """Converts this object into a dictionary of Kubernetes resource limits.""" + memory_gb = self._memory_limit.to(unit.gigabytes).m + ephemeral_storage_gb = self._ephemeral_storage_limit.to(unit.gigabytes).m + resource_limits = { + "cpu": str(self._number_of_threads), + "memory": f"{memory_gb:.3f}Gi", + "ephemeral-storage": f"{ephemeral_storage_gb:.3f}Gi", + } + + if self._number_of_gpus > 0: + resource_limits["nvidia.com/gpu"] = str(self._number_of_gpus) + + resource_limits.update( + {k: str(v) for k, v in self._additional_limit_specifications.items()} + ) + return resource_limits + + def _to_dask_worker_resources(self) -> list[str]: + """Append the resources to the list of resources for a Dask worker.""" + if not self._resources: + return [] + resources = ",".join([f"{k}={v}" for k, v in self._resources.items()]) + return ["--resources", resources] + + def _update_worker_with_resources(self, worker_spec: dict) -> dict: + worker_container = worker_spec["containers"][0] + worker_command = list(worker_container["args"]) + worker_command.extend(self._to_dask_worker_resources()) + worker_container["args"] = worker_command + return worker_spec + + class CalculationBackend(abc.ABC): """An abstract base representation of an openff-evaluator calculation backend. A backend is responsible for coordinating, distributing and running calculations on the diff --git a/openff/evaluator/backends/dask_kubernetes.py b/openff/evaluator/backends/dask_kubernetes.py new file mode 100644 index 00000000..2241c4eb --- /dev/null +++ b/openff/evaluator/backends/dask_kubernetes.py @@ -0,0 +1,486 @@ +import contextlib +import copy +import logging +import os +import pathlib +import subprocess +import time +from enum import Enum + +from openff.units import unit +from openff.utilities.utilities import requires_package + +from openff.evaluator._pydantic import BaseModel, Field +from openff.evaluator.backends.backends import PodResources +from openff.evaluator.backends.dask import BaseDaskBackend, BaseDaskJobQueueBackend + +logger = logging.getLogger(__name__) + + +class AccessMode(Enum): + """An enumeration of the different access modes for a Kubernetes PVC""" + + READ_WRITE_ONCE = "ReadWriteOnce" + READ_WRITE_MANY = "ReadWriteMany" + READ_ONLY_MANY = "ReadOnlyMany" + + +class BaseKubernetesVolume(BaseModel): + """A helper base class for specifying Kubernetes volume-like objects.""" + + name: str = Field( + ..., description="The name assigned to the volume during this run." + ) + mount_path: str = Field(..., description="The path to mount the volume to.") + + def _to_volume_mount_spec(self): + mount_path = self.mount_path + if not mount_path.startswith("/"): + mount_path = f"/{mount_path}" + return { + "name": self.name, + "mountPath": mount_path, + "readOnly": self.read_only, + } + + @requires_package("kubernetes") + def _to_volume_mount_k8s(self): + from kubernetes import client + + return client.V1VolumeMount( + name=self.name, + mount_path=self.mount_path, + read_only=self.read_only, + ) + + +class KubernetesSecret(BaseKubernetesVolume): + """A helper class for specifying Kubernetes secrets.""" + + secret_name: str = Field(..., description="The name of the saved secret to use.") + sub_path: str = Field(None, description="The sub path to mount the secret to.") + read_only: bool = Field(True, description="Whether the volume should be read-only.") + + def _to_volume_spec(self): + return { + "name": self.name, + "secret": {"secretName": self.secret_name}, + } + + def _to_volume_mount_spec(self): + spec = super()._to_volume_mount_spec() + spec["subPath"] = self.sub_path + return spec + + @requires_package("kubernetes") + def _to_volume_mount_k8s(self): + volume_mount = super()._to_volume_mount_k8s() + volume_mount.sub_path = self.sub_path + return volume_mount + + @requires_package("kubernetes") + def _to_volume_k8s(self): + from kubernetes import client + + return client.V1Volume( + name=self.name, + secret=client.V1SecretVolumeSource(secret_name=self.secret_name), + ) + + +class KubernetesPersistentVolumeClaim(BaseKubernetesVolume): + """A helper class for specifying Kubernetes volumes.""" + + read_only: bool = Field( + False, description="Whether the volume should be read-only." + ) + + def _generate_pvc_spec( + self, + storage_class_name: str = "rook-cephfs-central", + access_mode: AccessMode = AccessMode.READ_WRITE_MANY, + storage: unit.Quantity = 5 * unit.terabytes, + ): + storage_tb = storage.to(unit.terabytes).m + spec = { + "apiVersion": "v1", + "kind": "PersistentVolumeClaim", + "metadata": {"name": self.name}, + "spec": { + "storageClassName": storage_class_name, + "accessModes": [access_mode.value], + "resources": {"requests": {"storage": f"{storage_tb}Ti"}}, + }, + } + return spec + + def _to_volume_spec(self): + return { + "name": self.name, + "persistentVolumeClaim": {"claimName": self.name}, + } + + @requires_package("kubernetes") + def _to_volume_k8s(self): + from kubernetes import client + + return client.V1Volume( + name=self.name, + persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource( + claim_name=self.name + ), + ) + + +class KubernetesEmptyDirVolume(BaseKubernetesVolume): + """A helper class for specifying Kubernetes emptyDir volumes.""" + + read_only: bool = Field( + False, description="Whether the volume should be read-only." + ) + + def _to_volume_spec(self): + return { + "name": self.name, + "emptyDir": {}, + } + + @requires_package("kubernetes") + def _to_volume_k8s(self): + from kubernetes import client + + return client.V1Volume( + name=self.name, + empty_dir=client.V1EmptyDirVolumeSource(), + ) + + +class BaseDaskKubernetesBackend(BaseDaskBackend): + + def __init__( + self, + gpu_resources_per_worker=None, + cpu_resources_per_worker=None, + cluster_name="openff-evaluator", + cluster_port=8786, + disable_nanny_process=False, + image: str = "ghcr.io/lilyminium/openff-images:evaluator-0.4.10-kubernetes-dask-v0", + namespace: str = "openforcefield", + env: dict = None, + secrets: list[KubernetesSecret] = None, + volumes: list[KubernetesPersistentVolumeClaim] = None, + cluster_kwargs: dict = None, + annotate_resources: bool = False, + include_jupyter: bool = False, + ): + default_resources = None + other_resources = {} + if gpu_resources_per_worker is not None: + # preference gpu resources over cpu + assert isinstance(gpu_resources_per_worker, PodResources) + default_resources = gpu_resources_per_worker + other_resources["cpu"] = cpu_resources_per_worker + elif cpu_resources_per_worker is not None: + assert isinstance(cpu_resources_per_worker, PodResources) + default_resources = cpu_resources_per_worker + else: + raise ValueError( + "Either gpu_resources_per_worker or cpu_resources_per_worker must be specified." + ) + + super().__init__( + default_resources._minimum_number_of_workers, + default_resources, + ) + + self._cluster_name = cluster_name + self._cluster_port = cluster_port + self._namespace = namespace + self._annotate_resources = annotate_resources + self._image = image + self._other_resources = other_resources + self._include_jupyter = include_jupyter + self._disable_nanny_process = disable_nanny_process + self._env = {} + if env is not None: + assert isinstance(env, dict) + self._env.update(env) + + self._secrets = [] + if secrets is not None: + assert isinstance(secrets, list) + for secret in secrets: + assert isinstance(secret, KubernetesSecret) + self._secrets.append(secret) + + self._volumes = [] + if volumes is not None: + assert isinstance(volumes, list) + for volume in volumes: + assert isinstance(volume, BaseKubernetesVolume) + self._volumes.append(volume) + + # fail if there are no volumes -- we need volumes... + # unless we swap to S3? + if len(self._volumes) == 0: + raise ValueError("No volumes specified. We need at least a filesystem") + + self._cluster_kwargs = {} + if cluster_kwargs is not None: + assert isinstance(cluster_kwargs, dict) + self._cluster_kwargs.update(cluster_kwargs) + + def submit_task(self, function, *args, **kwargs): + from openff.evaluator.workflow.plugins import registered_workflow_protocols + + key = kwargs.pop("key", None) + + protocols_to_import = [ + protocol_class.__module__ + "." + protocol_class.__qualname__ + for protocol_class in registered_workflow_protocols.values() + ] + + # look for simulation protocols + if self._annotate_resources: + resources = kwargs.get("resources", {}) + if len(args) >= 2: + # schema is the second argument + # awful temporary terribad hack + schema_json = args[1] + if ( + '".allow_gpu_platforms": true' in schema_json + or "energy_minimisation" in schema_json + ): + resources["GPU"] = 0.5 + resources["notGPU"] = 0 + else: + resources["GPU"] = 0 + resources["notGPU"] = 1 + kwargs["resources"] = resources + logger.info(f"Annotating resources: {resources}") + + return self._client.submit( + BaseDaskJobQueueBackend._wrapped_function, + function, + *args, + **kwargs, + available_resources=self._resources_per_worker, + registered_workflow_protocols=protocols_to_import, + gpu_assignments={}, + per_worker_logging=True, + key=key, + ) + + +class DaskKubernetesBackend(BaseDaskKubernetesBackend): + """ + A class which defines a Dask backend which runs on a Kubernetes cluster + + This class is a wrapper around the Dask Kubernetes cluster class that + uses the Dask Kubernetes operator. It allows for the creation of a + Dask cluster on a Kubernetes cluster with adaptive scaling. + However, adaptive scaling currently *only applies to the "default" worker group*. + This is preferentially the GPU worker group, but will fall back to the CPU + worker group if no GPU worker resources are specified. + + Parameters + ---------- + gpu_resources_per_worker: PodResources + The resources to allocate to each GPU worker. + cpu_resources_per_worker: PodResources + The resources to allocate to each CPU worker. + cluster_name: str + The name of the Dask cluster. + cluster_port: int + The port to use for the Dask cluster. + disable_nanny_process: bool + Whether to disable the Dask nanny process. + image: str + The Docker image to use for the Dask cluster. + namespace: str + The Kubernetes namespace to use. + env: dict + The environment variables to use for the Dask cluster. + secrets: list[KubernetesSecret] + The Kubernetes secrets to use for the Dask cluster. + volumes: list[KubernetesPersistentVolumeClaim] + The Kubernetes volumes to use for the Dask cluster. + cluster_kwargs: dict + Additional keyword arguments to pass to the Dask KubeCluster + constructor. + annotate_resources: bool + Whether to annotate resources for the Dask cluster. + include_jupyter: bool + Whether to include a Jupyter notebook in the Dask cluster. + """ + + @requires_package("dask_kubernetes") + def _generate_cluster_spec(self) -> dict[str, dict]: + """ + Generate a Dask Kubernetes cluster specification + that can be used to create a Dask cluster on a Kubernetes cluster. + """ + from dask_kubernetes.operator import make_cluster_spec + + resources = self._resources_per_worker._to_kubernetes_resource_limits() + full_resources = { + "requests": copy.deepcopy(resources), + "limits": copy.deepcopy(resources), + } + spec = make_cluster_spec( + name=self._cluster_name, + image=self._image, + n_workers=self._resources_per_worker._minimum_number_of_workers, + resources=full_resources, + jupyter=self._include_jupyter, + env=self._env, + ) + + # remove any gpu specifications from scheduler + scheduler_spec = spec["spec"]["scheduler"]["spec"] + scheduler_container = scheduler_spec["containers"][0] + # need longer than default + scheduler_container["readinessProbe"]["timeoutSeconds"] = 3600 + scheduler_resources = copy.deepcopy(full_resources) + scheduler_resources["requests"].pop("nvidia.com/gpu", None) + scheduler_resources["limits"].pop("nvidia.com/gpu", None) + + # set up port + scheduler_container["resources"] = scheduler_resources + port_list = scheduler_container["ports"] + for port_spec in port_list: + if port_spec["name"] == "tcp-comm": + port_spec["containerPort"] = self._cluster_port + + # update worker spec + worker_spec = spec["spec"]["worker"]["spec"] + if self._resources_per_worker._affinity_specification: + worker_spec["affinity"] = copy.deepcopy( + self._resources_per_worker._affinity_specification + ) + + # update worker command with resources + if self._annotate_resources: + self._resources_per_worker._update_worker_with_resources(worker_spec) + worker_container = worker_spec["containers"][0] + + # add volume mounts + volume_mounts, volumes = self._generate_volume_specifications() + + # deepcopy all the dicts in case we write out to yaml + # having references to the same object makes things weird + worker_container["volumeMounts"] = copy.deepcopy(volume_mounts) + scheduler_container["volumeMounts"] = copy.deepcopy(volume_mounts) + worker_spec["volumes"] = copy.deepcopy(volumes) + scheduler_spec["volumes"] = copy.deepcopy(volumes) + + return spec + + def _generate_volume_specifications(self) -> tuple[list[dict], list[dict]]: + """ + Generate the volume mount and volume specifications for the cluster + + Returns + ------- + tuple[list[dict], list[dict]] + A tuple of lists of dictionaries representing the volume mounts + and volumes for the cluster, in that order. + """ + volume_mounts = [] + volumes = [] + + for volume in self._volumes + self._secrets: + volume_spec = volume._to_volume_spec() + volume_mount_spec = volume._to_volume_mount_spec() + + volume_mounts.append(dict(volume_mount_spec)) + volumes.append(dict(volume_spec)) + + return volume_mounts, volumes + + @requires_package("dask_kubernetes") + def _generate_worker_spec(self, pod_resources) -> dict[str, dict]: + """ + Generate a Dask Kubernetes worker specification + """ + from dask_kubernetes.operator import make_worker_spec + + resources = pod_resources._to_kubernetes_resource_limits() + + k8s_resources = { + "limits": copy.deepcopy(resources), + "requests": copy.deepcopy(resources), + } + + worker_spec = make_worker_spec( + resources=k8s_resources, + n_workers=pod_resources._maximum_number_of_workers, + image=self._image, + env=self._env, + ) + + # add volume mounts + worker_container = worker_spec["spec"]["containers"][0] + volume_mounts, volumes = self._generate_volume_specifications() + worker_container["volumeMounts"] = copy.deepcopy(volume_mounts) + worker_spec["spec"]["volumes"] = copy.deepcopy(volumes) + + # update worker spec + if self._resources_per_worker._affinity_specification: + worker_spec["spec"]["affinity"] = copy.deepcopy( + self._resources_per_worker._affinity_specification + ) + + # update worker command with resources + if self._annotate_resources: + pod_resources._update_worker_with_resources(worker_spec["spec"]) + return worker_spec + + @requires_package("dask_kubernetes") + def start(self): + from dask_kubernetes.operator import KubeCluster + from kubernetes import config + + config.load_kube_config() + + spec = self._generate_cluster_spec() + self._cluster = KubeCluster( + namespace=self._namespace, custom_cluster_spec=spec, **self._cluster_kwargs + ) + self._cluster.adapt( + minimum=self._resources_per_worker._minimum_number_of_workers, + maximum=self._resources_per_worker._maximum_number_of_workers, + ) + # add other worker groups + for name, resources in self._other_resources.items(): + worker_spec = self._generate_worker_spec(resources) + self._cluster.add_worker_group( + name=name, + n_workers=resources._maximum_number_of_workers, + custom_spec=worker_spec, + ) + + super().start() + + +class DaskKubernetesExistingBackend(BaseDaskKubernetesBackend): + """ + A class which defines a Dask backend which runs on an existing Kubernetes cluster. + + This class simply connects to an existing Dask cluster. + Note that it is still important to define default resources + as some of these get passed onto the protocols themselves, + e.g. GPU availability and the GPUToolkit. + + """ + + def start(self): + self._cluster = ( + f"tcp://{self._cluster_name}-scheduler" + f".{self._namespace}.svc.cluster.local:" + f"{self._cluster_port}" + ) + super().start() + + def stop(self): + logger.warning("Cannot stop an existing Kubernetes cluster.") diff --git a/openff/evaluator/data/test/kubernetes/dask_cluster_spec.yaml b/openff/evaluator/data/test/kubernetes/dask_cluster_spec.yaml new file mode 100644 index 00000000..003aef5a --- /dev/null +++ b/openff/evaluator/data/test/kubernetes/dask_cluster_spec.yaml @@ -0,0 +1,145 @@ +apiVersion: kubernetes.dask.org/v1 +kind: DaskCluster +metadata: + name: evaluator +spec: + idleTimeout: 0 + scheduler: + service: + ports: + - name: tcp-comm + port: 8786 + protocol: TCP + targetPort: tcp-comm + - name: http-dashboard + port: 8787 + protocol: TCP + targetPort: http-dashboard + selector: + dask.org/cluster-name: evaluator + dask.org/component: scheduler + type: ClusterIP + spec: + containers: + - args: + - dask-scheduler + - --host + - 0.0.0.0 + env: + - name: OE_LICENSE + value: /secrets/oe_license.txt + - name: DASK_DISTRIBUTED__WORKER__DAEMON + value: 'False' + - name: DASK_LOGGING__DISTRIBUTED + value: debug + - name: DASK__TEMPORARY_DIRECTORY + value: /evaluator-storage + image: ghcr.io/lilyminium/openff-images:evaluator-0.4.10-kubernetes-dask-v0 + livenessProbe: + httpGet: + path: /health + port: http-dashboard + initialDelaySeconds: 15 + periodSeconds: 20 + name: scheduler + ports: + - containerPort: 8786 + name: tcp-comm + protocol: TCP + - containerPort: 8787 + name: http-dashboard + protocol: TCP + readinessProbe: + httpGet: + path: /health + port: http-dashboard + initialDelaySeconds: 0 + periodSeconds: 1 + timeoutSeconds: 3600 + resources: + limits: + cpu: '1' + ephemeral-storage: 20.000Gi + memory: 4.000Gi + requests: + cpu: '1' + ephemeral-storage: 20.000Gi + memory: 4.000Gi + volumeMounts: + - mountPath: /evaluator-storage + name: evaluator-storage + readOnly: false + - mountPath: /secrets/oe_license.txt + name: openeye-license + readOnly: true + subPath: oe_license.txt + volumes: + - emptyDir: {} + name: evaluator-storage + - name: openeye-license + secret: + secretName: oe-license-feb-2024 + worker: + replicas: 1 + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: nvidia.com/cuda.runtime.major + operator: In + values: + - '12' + - key: nvidia.com/cuda.runtime.minor + operator: In + values: + - '4' + containers: + - args: + - dask-worker + - --name + - $(DASK_WORKER_NAME) + - --dashboard + - --dashboard-address + - '8788' + env: + - name: OE_LICENSE + value: /secrets/oe_license.txt + - name: DASK_DISTRIBUTED__WORKER__DAEMON + value: 'False' + - name: DASK_LOGGING__DISTRIBUTED + value: debug + - name: DASK__TEMPORARY_DIRECTORY + value: /evaluator-storage + image: ghcr.io/lilyminium/openff-images:evaluator-0.4.10-kubernetes-dask-v0 + name: worker + ports: + - containerPort: 8788 + name: http-dashboard + protocol: TCP + resources: + limits: + cpu: '1' + ephemeral-storage: 20.000Gi + memory: 4.000Gi + nvidia.com/gpu: '1' + requests: + cpu: '1' + ephemeral-storage: 20.000Gi + memory: 4.000Gi + nvidia.com/gpu: '1' + volumeMounts: + - mountPath: /evaluator-storage + name: evaluator-storage + readOnly: false + - mountPath: /secrets/oe_license.txt + name: openeye-license + readOnly: true + subPath: oe_license.txt + volumes: + - emptyDir: {} + name: evaluator-storage + - name: openeye-license + secret: + secretName: oe-license-feb-2024 diff --git a/openff/evaluator/data/test/kubernetes/dask_worker_spec.yaml b/openff/evaluator/data/test/kubernetes/dask_worker_spec.yaml new file mode 100644 index 00000000..d744fb3d --- /dev/null +++ b/openff/evaluator/data/test/kubernetes/dask_worker_spec.yaml @@ -0,0 +1,61 @@ +replicas: 20 +spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: nvidia.com/cuda.runtime.major + operator: In + values: + - '12' + - key: nvidia.com/cuda.runtime.minor + operator: In + values: + - '4' + containers: + - args: + - dask-worker + - --name + - $(DASK_WORKER_NAME) + - --dashboard + - --dashboard-address + - '8788' + env: + - name: OE_LICENSE + value: /secrets/oe_license.txt + - name: DASK_DISTRIBUTED__WORKER__DAEMON + value: 'False' + - name: DASK_LOGGING__DISTRIBUTED + value: debug + - name: DASK__TEMPORARY_DIRECTORY + value: /evaluator-storage + image: ghcr.io/lilyminium/openff-images:evaluator-0.4.10-kubernetes-dask-v0 + name: worker + ports: + - containerPort: 8788 + name: http-dashboard + protocol: TCP + resources: + limits: + cpu: '1' + ephemeral-storage: 20.000Gi + memory: 4.000Gi + requests: + cpu: '1' + ephemeral-storage: 20.000Gi + memory: 4.000Gi + volumeMounts: + - mountPath: /evaluator-storage + name: evaluator-storage + readOnly: false + - mountPath: /secrets/oe_license.txt + name: openeye-license + readOnly: true + subPath: oe_license.txt + volumes: + - emptyDir: {} + name: evaluator-storage + - name: openeye-license + secret: + secretName: oe-license-feb-2024