feat: entire python client for rainbow

Problem: we will need to interact with rainbow from flux in a consistent way Solution: mirror the current Go client with a Python one, including a client with commands to register, create a config, submit jobs (to the graph database GRPC and rainbow) and loading backends. The next step after this should be to prototype what is running in the flux instance, and then registering from there and also try messing around with subsystems Signed-off-by: vsoch <[email protected]>
converged-computing · Mar 4, 2024 · 6083496 · 6083496
1 parent 0832df1
commit 6083496
Show file tree

Hide file tree

Showing 41 changed files with 1,226 additions and 419 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -1,2 +1,2 @@
 rainbow.db
-env
+env
diff --git a/.github/workflows/build-deploy.yaml b/.github/workflows/build-deploy.yaml
@@ -29,7 +29,7 @@ jobs:
       if: (github.event_name == 'release')
       run: |
         tag=${GITHUB_REF#refs/tags/}
-        echo "Tagging and releasing ${{ env.container}}:${tag}"        
+        echo "Tagging and releasing ${{ env.container}}:${tag}"
         docker tag ${{ env.container }}:latest ${{ env.container }}:${tag}
 
     - name: GHCR Login
@@ -43,7 +43,7 @@ jobs:
     - name: Deploy Container
       if: (github.event_name != 'pull_request')
       run: docker push ${{ env.container }} --all-tags
-      
+
   build-rainbow-scheduler:
     permissions:
       packages: write
@@ -60,7 +60,7 @@ jobs:
       if: (github.event_name == 'release')
       run: |
         tag=${GITHUB_REF#refs/tags/}
-        echo "Tagging and releasing ${{ env.container}}:${tag}"        
+        echo "Tagging and releasing ${{ env.container}}:${tag}"
         docker tag ${{ env.container }}:latest ${{ env.container }}:${tag}
 
     - name: GHCR Login
@@ -73,4 +73,4 @@ jobs:
 
     - name: Deploy Container
       if: (github.event_name != 'pull_request')
-      run: docker push ${{ env.container }} --all-tags
+      run: docker push ${{ env.container }} --all-tags
diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
@@ -0,0 +1,26 @@
+name: test rainbow
+
+on:
+  pull_request: []
+
+jobs:
+  formatting:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Setup black linter
+      run: conda create --quiet --name black pyflakes
+
+    - name: Check Spelling
+      uses: crate-ci/typos@7ad296c72fa8265059cc03d1eda562fbdfcd6df2 # v1.9.0
+      with:
+        files: ./docs/*.md ./README.md ./python/v1/README.md
+
+    - name: Lint and format Python code
+      run: |
+        export PATH="/usr/share/miniconda/bin:$PATH"
+        source activate black
+        pip install -r .github/dev-requirements.txt
+        cd python/v1
+        pre-commit run --all-files
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,4 @@
-# Mac 
+# Mac
 .DS_Store
 
 # Go
@@ -13,7 +13,7 @@ tmp
 __pycache__
 rainbow_scheduler.egg-info
 .eggs
-env 
+env
 python/v1/build
 python/v1/dist
 
@@ -24,11 +24,11 @@ python/v1/dist
 bin
 dist
 
-# Keys 
+# Keys
 cosign.*
 *.pem
 
-# ko 
+# ko
 /.digest
 
 # Terraform

diff --git a/Dockerfile b/Dockerfile
@@ -28,4 +28,4 @@ EXPOSE 443
 
 # Recommended to add --secret here! If you need to persist the database, mount that
 # or we can add support for non sqlite.
-CMD ["--address", "0.0.0.0:8080", "--name", "rainbow"]
+CMD ["--address", "0.0.0.0:8080", "--name", "rainbow"]
diff --git a/Makefile b/Makefile
@@ -35,7 +35,7 @@ docker-flux:
 	docker build --build-arg base=fluxrm/flux-sched:jammy -t $(REGISTRY)/rainbow-flux:latest .
 
 .PHONY: docker-ubuntu
-docker-ubuntu: 
+docker-ubuntu:
 	docker build -t $(REGISTRY)/rainbow-scheduler:latest .
 
 .PHONY: proto
@@ -46,12 +46,15 @@ proto: protoc ## Generates the API code and documentation
 
 .PHONY: python
 python: python ## Generate python proto files in python
+	# pip install grpcio-tools
 	# pip freeze | grep grpcio-tools
+    # We will put rainbow plus the memory protos here
 	mkdir -p python/v1/rainbow/protos
 	cd python/v1/rainbow/protos
 	python -m grpc_tools.protoc -I./api/v1 --python_out=./python/v1/rainbow/protos --pyi_out=./python/v1/rainbow/protos --grpc_python_out=./python/v1/rainbow/protos ./api/v1/rainbow.proto
-	# Not great, but gets the job done
 	sed -i 's/import rainbow_pb2 as rainbow__pb2/from . import rainbow_pb2 as rainbow__pb2/' ./python/v1/rainbow/protos/rainbow_pb2_grpc.py
+	python -m grpc_tools.protoc -I./backends/memory/service --python_out=./python/v1/rainbow/protos --pyi_out=./python/v1/rainbow/protos --grpc_python_out=./python/v1/rainbow/protos ./backends/memory/service/memory.proto
+	sed -i 's/import memory_pb2 as memory__pb2/from . import memory_pb2 as memory__pb2/' ./python/v1/rainbow/protos/memory_pb2_grpc.py
 
 .PHONY: version
 version: ## Prints the current version
@@ -85,12 +88,12 @@ register: ## Run mock registration
 	go run cmd/rainbow/rainbow.go register --cluster-name keebler --cluster-nodes ./docs/examples/scheduler/cluster-nodes.json
 
 .PHONY: tag
-tag: ## Creates release tag 
+tag: ## Creates release tag
 	git tag -s -m "version bump to $(VERSION)" $(VERSION)
 	git push origin $(VERSION)
 
 .PHONY: tagless
-tagless: ## Delete the current release tag 
+tagless: ## Delete the current release tag
 	git tag -d $(VERSION)
 	git push --delete origin $(VERSION)
 

diff --git a/README.md b/README.md
@@ -1,26 +1,26 @@
 # rainbow
 
-> 🌈️ Where keebler elves and schedulers live, somewhere in the clouds, and with marshmallows 
+> 🌈️ Where keebler elves and schedulers live, somewhere in the clouds, and with marshmallows
 
 [![PyPI version](https://badge.fury.io/py/rainbow-scheduler.svg)](https://badge.fury.io/py/rainbow-scheduler)
 ![docs/img/rainbow.png](docs/img/rainbow.png)
 
-This is a prototype that will use a Go [gRPC](https://grpc.io/) server/client to demonstrate multi-cluster scheduling. 
+This is a prototype that will use a Go [gRPC](https://grpc.io/) server/client to demonstrate multi-cluster scheduling.
 For more information:
 
  - ⭐️ [Documentation](https://converged-computing.github.io/rainbow) ⭐️
 
 
 ## TODO
 
-- satifies
+- satisfies
  - the function needs to actually do DFS (look at what fluxion does) and then address each resource
  - add print statements to debug checks at different levels / types
 - clusters
  - implement function to add a subsystem to an existing cluster (e.g., add I/O)
 - subsystems
   - a satisfies request will need to have a representation of subsystems. E.g., what are we asking of each?
-    - right now we assume a node resouces request going to the dominant subsystem
+    - right now we assume a node resources request going to the dominant subsystem
   - we will want a function to add a new subsystem, right now we have one dominant for nodes
   - make also a function to delete subsystems
 - we can have top level metrics for quick assessment if cluster is OK

diff --git a/api/v1/rainbow.proto b/api/v1/rainbow.proto
@@ -22,7 +22,7 @@ service RainbowScheduler {
    rpc AcceptJobs(AcceptJobsRequest) returns (AcceptJobsResponse);
 }
 
-// RegisterRequest registers a cluster to the scheduler service 
+// RegisterRequest registers a cluster to the scheduler service
 // The shared secret is required to validate the request
 message RegisterRequest {
   string name = 1;
@@ -57,7 +57,7 @@ message SubmitJobRequest {
 message RequestJobsRequest {
   string cluster = 1;
 
-  // cluster secret given on registration 
+  // cluster secret given on registration
   // No other cluster or user can take a cluster's jobs!
   string secret = 2;
 
@@ -137,4 +137,4 @@ message AcceptJobsResponse {
     RESULT_TYPE_ERROR = 3;
   }
   ResultType status = 1;
-}
+}
diff --git a/backends/memory/service/memory.proto b/backends/memory/service/memory.proto
@@ -10,13 +10,13 @@ service MemoryGraph {
 }
 
 message RegisterRequest {
-    string name = 1; 
-    string payload = 2; 
+    string name = 1;
+    string payload = 2;
     string subsystem = 3;
 }
 
 message SatisfyRequest {
-  string payload = 1; 
+  string payload = 1;
 }
 
 message SatisfyResponse {
@@ -27,7 +27,7 @@ message SatisfyResponse {
     RESULT_TYPE_ERROR = 2;
   }
 
-  repeated string clusters = 1; 
+  repeated string clusters = 1;
   ResultType status = 2;
 }
 
@@ -40,6 +40,6 @@ message Response {
       RESULT_TYPE_UNSPECIFIED = 0;
       RESULT_TYPE_SUCCESS = 1;
       RESULT_TYPE_ERROR = 2;
-    }  
+    }
     ResultType status = 1;
-}
+}
diff --git a/cmd/rainbow/rainbow.go b/cmd/rainbow/rainbow.go
@@ -17,10 +17,10 @@ import (
 )
 
 var (
-	Header = `              
-    •  ┓      
+	Header = `
+    •  ┓
 ┏┓┏┓┓┏┓┣┓┏┓┓┏┏
-┛ ┗┻┗┛┗┗┛┗┛┗┻┛              
+┛ ┗┻┗┛┗┗┛┗┛┗┻┛
 `
 
 	defaultSecret = "chocolate-cookies"

diff --git a/docs/README.md b/docs/README.md
@@ -38,12 +38,12 @@ go run cmd/server/server.go
 2024/02/12 19:38:58 server listening: [::]:50051
 ```
 
-Note that we also provide [containers](https://github.com/orgs/converged-computing/packages?repo_name=rainbow) for running the scheduler, or a client with Flux. For more advanced examples, continue reading commands below or check out our [examples](https://github.com/converged-computing/rainbow/tree/main/docs/examples). 
+Note that we also provide [containers](https://github.com/orgs/converged-computing/packages?repo_name=rainbow) for running the scheduler, or a client with Flux. For more advanced examples, continue reading commands below or check out our [examples](https://github.com/converged-computing/rainbow/tree/main/docs/examples).
 
 ## Commands
 
 Read more about the commands shown above [here](commands.md#commands).
 
 ## Development
 
-Read our [developer guide](#developer.md)
+Read our [developer guide](#developer.md)
diff --git a/docs/commands.md b/docs/commands.md
@@ -47,7 +47,7 @@ graphdatabase:
 clusters: []
 ```
 
-Note that the name of the database corresponds to your choice of graph database. For each, you should read about [databases](databases.md) to 
+Note that the name of the database corresponds to your choice of graph database. For each, you should read about [databases](databases.md) to
 run a corresponding databaset that your application can interact with.
 
 ## Register
@@ -74,7 +74,7 @@ rainbow register --cluster-name keebler --cluster-nodes ./docs/examples/schedule
 ```
 
 If you are watching the server, you'll see that the registration happens (token, secret, etc) and then the nodes are sent over
-to rainbow. 
+to rainbow.
 
 ```console
 2024/02/28 23:26:17 creating 🌈️ server...
@@ -210,15 +210,15 @@ We are instead going to use a config file provided in the examples directory tha
 know where the work will best run, and are querying rainbow. Note that for a more final design, we would want the interaction to go through another service
 that connects to the same database (to check the clusters you have access to) and then to the graph database directly without touching rainbow.
 However for development, we are going to still interact with the in-memory database grpc to keep things simple, since the authentication (token)
-is known there (and we have not [sent it to a truly external graph database](https://dgraph.io/docs/v21.03/graphql/authorization/authorization-overview/)). 
+is known there (and we have not [sent it to a truly external graph database](https://dgraph.io/docs/v21.03/graphql/authorization/authorization-overview/)).
 Note that the flow (for searching the cluster graph) is going to go directly from the client to the graph, e.g.,:
 
 ```bash
 rainbow submit -> graph database GRPC or query -> response
 ```
 
 And where the middle step is provided from will depend on the graph - the in-memory database will be GRPC from rainbow, for example.
-Assuming that rainbow is running with the in-memory database and we've registered (and our config file has the correct token), 
+Assuming that rainbow is running with the in-memory database and we've registered (and our config file has the correct token),
 here is how we ask for a simple job:
 
 ```bash
@@ -228,7 +228,7 @@ go run ./cmd/rainbow/rainbow.go submit --config-path ./docs/examples/scheduler/r
 2024/02/29 21:04:11 🌈️ starting client (localhost:50051)...
 2024/02/29 21:04:11 submit job: echo hello world
 2024/02/29 21:04:11 🎯️ We found 1 matches! [keebler]
-2024/02/29 21:04:11 
+2024/02/29 21:04:11
 ```
 
 On the server side, we see that it also registers a match! Note that this is coming from rainbow because the in-memory database GRPC hits there, but doesn't necessarily have to.
@@ -247,7 +247,7 @@ go run ./cmd/rainbow/rainbow.go submit --config-path ./docs/examples/scheduler/r
 2024/02/29 21:05:44 🌈️ starting client (localhost:50051)...
 2024/02/29 21:05:44 submit job: echo hello world
 2024/02/29 21:05:44 😥️ There were no matches for this job
-2024/02/29 21:05:44 
+2024/02/29 21:05:44
 ```
 On the server side, we see it cannot be satisfied. We just don't have that many nodes!
 
@@ -263,11 +263,11 @@ Note that the above has a two step process:
 - A quick check against clusters in the graph database if total resources can be satisfied.
 - For that set, a (Vanessa written and janky) "DFS" that likely has bugs that traverses the graph
 
-This will be improved upon with Fluxion and actual graph databases, but this is OK for the prototype. 
+This will be improved upon with Fluxion and actual graph databases, but this is OK for the prototype.
 
 ### 2. Pre-Assignment
 
-When the initial satisfy resquest is done (the step above) and we have a list of clusters, we can then tell rainbow about them.
+When the initial satisfy request is done (the step above) and we have a list of clusters, we can then tell rainbow about them.
 This means that a list of clusters is returned that is passed from the same client request to rainbow
 to do assignment, and logically, if there are no clusters that can sastify, that response is returned to the client.
 

diff --git a/docs/design.md b/docs/design.md
@@ -26,7 +26,7 @@ We next want to add a simple scheduler, meaning that the new user interaction wo
 
 1. The user submits a job or application specification (e.g., run a container with compatibility information, or an application with the same) to the rainbow scheduler.
 2. The rainbow scheduler then authenticates the user, and can select a best match from a subset of clusters for which the user has access
-  - This requires the user tokens, and eventually something more robust like accounts in a database). - This also requires (finally) a graph in rainbow, making it more of a scheduler 
+  - This requires the user tokens, and eventually something more robust like accounts in a database). - This also requires (finally) a graph in rainbow, making it more of a scheduler
 3. The rainbow scheduler then filters down clusters to those that might match.
   - This requires sending over cluster metadata on the register step
 4. The clusters respond with Yes/No and ETA or cost to choose from.
@@ -53,7 +53,7 @@ What does it mean to request work to run on rainbow?
 1. The first step is seeing if the work can be run on a cluster. E.g, if the total sum of nodes / resources isn't enough, we immediately filter it. We can prune out an entire cluster as a hiuristic.
   - High level idea: look for opportunities to prune.
   - There should be a table that summarizes resources.
-2. The next step is figuring out when it can be run on each cluster. 
+2. The next step is figuring out when it can be run on each cluster.
 
 I think the best thing to do here would be to add a graph interface, and then allow the user to select which graph to use.
 This will allow me to experiment / prototype and switch to Fluxion if needed.
@@ -99,4 +99,4 @@ This is currently a prototype that demonstrates we can do a basic interaction fr
 We can run the client alongside any flux instance that has access to this service (and is given some shared secret).
 
 
-[home](/README.md#rainbow-scheduler)
+[home](/README.md#rainbow-scheduler)
diff --git a/docs/developer.md b/docs/developer.md
@@ -1,6 +1,6 @@
 # Developer Guide
 
-This is a short guide to help with development. 
+This is a short guide to help with development.
 
 ### Documentation
 
@@ -9,7 +9,7 @@ javascript. You can edit the markdown files there to update the documentation.
 
 ### Protobuf
 
-We are using [Protocol Buffers](https://developers.google.com/protocol-buffers/)  "Protobuf" to define the API (how the payloads are shared and the methods for communication between client and server). These are defined in [api/v1/sample.proto](api/v1/sample.proto). 
+We are using [Protocol Buffers](https://developers.google.com/protocol-buffers/)  "Protobuf" to define the API (how the payloads are shared and the methods for communication between client and server). These are defined in [api/v1/sample.proto](api/v1/sample.proto).
 You can read more about Protobuf [here](https://github.com/golang/protobuf), I first saw / used them with fluence and am still pretty new.
 
 ```shell
@@ -82,4 +82,4 @@ REGISTRY=vanessa make docker
 
 Further instructions will be added for running these containers in the next round of work - likely we will have a basic kind setup that demonstrates the orchestration.
 
-[home](/README.md#rainbow-scheduler)
+[home](/README.md#rainbow-scheduler)