From 9d2fef13b16da382760d41d3b5bd4e52a0d5d5e5 Mon Sep 17 00:00:00 2001 From: vsoch Date: Sat, 30 Mar 2024 15:09:19 -0600 Subject: [PATCH 1/2] feat: support for selection and match algorithms Problem: we currently have an interface to selection but not match algorithms Solution: improve the algorithm interface by removing the match logic from the graph, and creating an interface that the graph uses. In practice I wanted to use this when I realized I needed a new match algorithm to match in a range of strings. Signed-off-by: vsoch --- cmd/rainbow/config/config.go | 2 +- cmd/rainbow/rainbow.go | 12 ++- cmd/rainbow/receive/receive.go | 2 +- cmd/rainbow/register/register.go | 2 + cmd/rainbow/register/subsystem.go | 2 +- cmd/rainbow/submit/submit.go | 4 +- cmd/server/server.go | 11 ++- docs/algorithms.md | 60 +++++++++++--- docs/commands.md | 46 +++++++++-- docs/examples/scheduler/jobspec-io.yaml | 4 +- docs/examples/scheduler/rainbow-config.yaml | 9 +- pkg/client/endpoint.go | 10 ++- pkg/config/config.go | 45 ++++++---- pkg/graph/algorithm/algorithm.go | 34 ++++---- pkg/graph/backend/backend.go | 5 +- pkg/graph/selection/selection.go | 54 ++++++++++++ pkg/server/endpoint.go | 2 +- pkg/server/server.go | 30 +++---- pkg/types/backend.go | 52 ++++++++++++ plugins/README.md | 6 ++ .../match/match.go} | 47 +++++++++-- plugins/backends/memory/cluster.go | 4 +- plugins/backends/memory/dfs.go | 49 +++++++---- plugins/backends/memory/graph.go | 33 +++++--- plugins/backends/memory/memory.go | 31 +++++-- plugins/backends/memory/resource.go | 9 +- plugins/backends/memory/server.go | 12 ++- plugins/backends/memory/service/memory.pb.go | 82 +++++++++++-------- plugins/backends/memory/service/memory.proto | 1 + plugins/backends/memory/subsystem.go | 17 ++-- plugins/backends/memory/types.go | 51 +----------- .../random/random.go | 4 +- python/v1/rainbow/protos/memory_pb2.py | 24 +++--- python/v1/rainbow/protos/memory_pb2.pyi | 6 +- python/v1/setup.py | 2 +- 35 files changed, 524 insertions(+), 240 deletions(-) create mode 100644 pkg/graph/selection/selection.go create mode 100644 pkg/types/backend.go create mode 100644 plugins/README.md rename plugins/{backends/memory/algorithm.go => algorithms/match/match.go} (75%) rename plugins/{algorithms => selection}/random/random.go (92%) diff --git a/cmd/rainbow/config/config.go b/cmd/rainbow/config/config.go index b084787..0bdb5a2 100644 --- a/cmd/rainbow/config/config.go +++ b/cmd/rainbow/config/config.go @@ -21,7 +21,7 @@ func RunInit(path string) error { // Generate an empty config - providing an empty filename ensures we don't read an existing one // This defaults to an in-memory vanilla database - cfg, err := config.NewRainbowClientConfig("", "rainbow-cluster", "chocolate-cookies", "", "random") + cfg, err := config.NewRainbowClientConfig("", "rainbow-cluster", "chocolate-cookies", "", "random", "match") if err != nil { return err } diff --git a/cmd/rainbow/rainbow.go b/cmd/rainbow/rainbow.go index 5a2ef8e..2dd4eeb 100644 --- a/cmd/rainbow/rainbow.go +++ b/cmd/rainbow/rainbow.go @@ -13,8 +13,9 @@ import ( "github.com/converged-computing/rainbow/pkg/types" // Register database backends and selection algorithms - _ "github.com/converged-computing/rainbow/plugins/algorithms/random" + _ "github.com/converged-computing/rainbow/plugins/algorithms/match" _ "github.com/converged-computing/rainbow/plugins/backends/memory" + _ "github.com/converged-computing/rainbow/plugins/selection/random" ) var ( @@ -49,7 +50,8 @@ func main() { host := parser.String("", "host", &argparse.Options{Default: "localhost:50051", Help: "Scheduler server address (host:port)"}) clusterName := parser.String("", "cluster-name", &argparse.Options{Help: "Name of cluster to register"}) graphDatabase := parser.String("", "graph-database", &argparse.Options{Help: "Graph database backend to use"}) - selectionAlgorithm := parser.String("", "select-algorithm", &argparse.Options{Default: "random", Help: "Selection algorithm for graph database (defaults to random)"}) + selectAlgo := parser.String("", "select-algorithm", &argparse.Options{Default: "random", Help: "Selection algorithm for final cluster selection (defaults to random)"}) + matchAlgo := parser.String("", "match-algorithm", &argparse.Options{Default: "match", Help: "Match algorithm for graph database (defaults to match)"}) // Receive Jobs clusterSecret := receiveCmd.String("", "request-secret", &argparse.Options{Help: "Cluster 'secret' to retrieve jobs"}) @@ -111,7 +113,8 @@ func main() { *cfg, *graphDatabase, *subsystem, - *selectionAlgorithm, + *selectAlgo, + *matchAlgo, ) if err != nil { log.Fatalf("Issue with register: %s\n", err) @@ -143,7 +146,8 @@ func main() { *clusterName, *graphDatabase, *cfg, - *selectionAlgorithm, + *selectAlgo, + *matchAlgo, ) if err != nil { log.Fatal(err.Error()) diff --git a/cmd/rainbow/receive/receive.go b/cmd/rainbow/receive/receive.go index 2c89d64..11068f8 100644 --- a/cmd/rainbow/receive/receive.go +++ b/cmd/rainbow/receive/receive.go @@ -27,7 +27,7 @@ func Run( } // Read in the config, if provided, TODO we need a set of tokens here? - cfg, err := config.NewRainbowClientConfig(cfgFile, cluster, secret, "", "") + cfg, err := config.NewRainbowClientConfig(cfgFile, cluster, secret, "", "", "") if err != nil { return err } diff --git a/cmd/rainbow/register/register.go b/cmd/rainbow/register/register.go index 2633705..ec35b46 100644 --- a/cmd/rainbow/register/register.go +++ b/cmd/rainbow/register/register.go @@ -20,6 +20,7 @@ func Run( graphDatabase, subsystem, selectionAlgorithm string, + matchAlgorithm string, ) error { c, err := client.NewClient(host) @@ -34,6 +35,7 @@ func Run( secret, graphDatabase, selectionAlgorithm, + matchAlgorithm, ) if err != nil { return err diff --git a/cmd/rainbow/register/subsystem.go b/cmd/rainbow/register/subsystem.go index 1540d76..608cd66 100644 --- a/cmd/rainbow/register/subsystem.go +++ b/cmd/rainbow/register/subsystem.go @@ -31,7 +31,7 @@ func RegisterSubsystem( return fmt.Errorf("a subsystem name is required to register") } // Read in the config, if provided, command line takes preference - cfg, err := config.NewRainbowClientConfig(cfgFile, "", "", "", "") + cfg, err := config.NewRainbowClientConfig(cfgFile, "", "", "", "", "") if err != nil { return err } diff --git a/cmd/rainbow/submit/submit.go b/cmd/rainbow/submit/submit.go index d3c566a..080a526 100644 --- a/cmd/rainbow/submit/submit.go +++ b/cmd/rainbow/submit/submit.go @@ -18,7 +18,7 @@ func Run( nodes, tasks int, token, jobspec, clusterName, database, cfgFile string, - selectionAlgorithm string, + selectAlgo, matchAlgo string, ) error { c, err := client.NewClient(host) @@ -49,7 +49,7 @@ func Run( } // Read in the config, if provided, TODO we need a set of tokens here? - cfg, err := config.NewRainbowClientConfig(cfgFile, "", "", database, selectionAlgorithm) + cfg, err := config.NewRainbowClientConfig(cfgFile, "", "", database, selectAlgo, matchAlgo) if err != nil { return err } diff --git a/cmd/server/server.go b/cmd/server/server.go index d81da71..69ad3b3 100644 --- a/cmd/server/server.go +++ b/cmd/server/server.go @@ -10,8 +10,9 @@ import ( "github.com/converged-computing/rainbow/pkg/types" // Register database backends - _ "github.com/converged-computing/rainbow/plugins/algorithms/random" + _ "github.com/converged-computing/rainbow/plugins/algorithms/match" _ "github.com/converged-computing/rainbow/plugins/backends/memory" + _ "github.com/converged-computing/rainbow/plugins/selection/random" ) var ( @@ -19,7 +20,8 @@ var ( name = "rainbow" sqliteFile = "rainbow.db" configFile = "" - algorithm = "random" + matchAlgo = "match" + selectAlgo = "random" database = "" cleanup = false secret = "chocolate-cookies" @@ -33,13 +35,14 @@ func main() { flag.StringVar(&globalToken, "global-token", name, "global token for cluster access (not recommended)") flag.StringVar(&secret, "secret", secret, "secret to validate registration (default: chocolate-cookies)") flag.StringVar(&database, "graph-database", database, "graph database backend (defaults to memory)") - flag.StringVar(&algorithm, "select-algorithm", algorithm, "selection algorithm for graph (defaults to random)") + flag.StringVar(&selectAlgo, "select-algorithm", selectAlgo, "selection algorithm for final cluster selection (defaults to random)") + flag.StringVar(&matchAlgo, "match-algorithm", matchAlgo, "match algorithm for graph (defaults to random)") flag.StringVar(&configFile, "config", configFile, "rainbow config file") flag.BoolVar(&cleanup, "cleanup", cleanup, "cleanup previous sqlite database (default: false)") flag.Parse() // Load (or generate a default) config file here, if provided - cfg, err := config.NewRainbowClientConfig(configFile, name, secret, database, algorithm) + cfg, err := config.NewRainbowClientConfig(configFile, name, secret, database, selectAlgo, matchAlgo) if err != nil { log.Fatalf("error while creating server: %v", err) } diff --git a/docs/algorithms.md b/docs/algorithms.md index b06ce05..eec890c 100644 --- a/docs/algorithms.md +++ b/docs/algorithms.md @@ -1,16 +1,26 @@ # Algorithms -This is a brief summary of notes about current algorithms. +This is a brief summary of notes about current interfaces that support algorithms. While algorithms are an important part of rainbow, they are implemented via interfaces. There are currently three kinds of interfaces: -## Memory Graph + - [Backends](#graph-backends) are graph database backends. These backends use match algorithms directly + - [Match Algorithms](#match-algorithms) are used by the graph databases to determine how to match a subsystem to a slot. Each graph backend can have a default and (likely) support a subset + - [Selection](#selection-algorithms) are the last set that are given a set of cluster matches and allowed to decide on a final assignment, usually from stateful data. + +These sections will go through the different interfaces and algorithms afforded by each. + +## Graph Backends + +We currently only support a custom memory graph backend. It would be good to get fluxion in here soon, when it's ready. + +### Memory Graph The "memory" graph backend is an in-memory graph database that is a custom implementation (by @vsoch). Although it is primarily intended for learning, it serves as a good base for development and prototyping too, and warrants a discussion of algorithms involved. For design, see the [design](design.md) document. This will detail basics about the search. -### Depth First Search +#### Depth First Search While Fluxion uses depth first search and up (to support an adjacency list), since we are just using this graph for prototyping, we instead use recursion, which means we can traverse (depth) and not need to find our way back up, because we can return from a recursive call. -#### 1. Quick Check +##### 1. Quick Check We start with a hieuristic that says "if I know the totals that are needed for this Jobspec are not available across the cluster, bail out before doing any search." That works as follows. @@ -21,13 +31,13 @@ We start with a hieuristic that says "if I know the totals that are needed for t At the end, we have a summary of the total resources requested by the jobspec, and do a quick check to see if any clusters have less than that amount (the totals we already have cached from registration) OR if the clusters are missing a resource entirely. Note that this is only for the dominant subsystem. If a cluster passes these checks, it proceeds into depth first search. -#### 2. Depth First Search +##### 2. Depth First Search Depth first search is going to do checks from the perspective of a slot, because this (as I understand it) is the level where we are "pinning" the request. Thus, we start our search by creating a lookup of slots, which we do from the "tasks" section of the jobspec. We do this because as we are traversing we are going to be randomly hitting slots defined by the user, and we need to be able to look up details about it. Note that this search is still rooted in the dominant subsystem, and for other subsystem resources (e.g., IO) these are going to linked off of vertices here. For each cluster in our matches, we then start at the root, which is generally just a node named by the cluster. We get that vertex, because since this memory database has an object oriented design, all children vertices are going to be edges off of that. -##### findSlots +###### findSlots We then define a recursive function `findSlots` that is going to recurse into a slot resource and recurse into child resources under that to count what it finds. For example, if the Jobspec is saying that it wants some number of cores per slot, the `findSlots` function will start at a vertex where the slot is, and then figure out if we have that number. It returns a number that represents that count. Specifically, the function works as follows: @@ -39,7 +49,7 @@ We then define a recursive function `findSlots` that is going to recurse into a The function `findSlots` will (should) return with the number of matches for a specific resource type below a vertex in the graph, allowing us to determine if a subtree can match a request that is specific to a slot. -##### satisfies +###### satisfies Satisfies is a recursive function that determines if a vertex can satisfy a resource need. Given a resource and a vertex root, it returns the count of vertices under the root that satisfy the request. This function uses `findSlots` because as it is traversing, when it finds a `resource.Type` @@ -51,7 +61,7 @@ of type "slot" it will call that function. Akin to `findSlots`, it works as foll The result of satisfies is returning the count for some resource that is satisfied starting at some root, accounting for slots too. -##### traverseResource +###### traverseResource The traverse resource is the main (also recursive function) to handle traversing the graph. It starts at the top level resource from the Jobspec, and instead of returning a count, returns a boolean to indicate if the match is a yes or no. It has two cases: @@ -75,7 +85,7 @@ if isMatch is true here, add the cluster to matches At this point, the basic list of clusters is returned to the calling function (the interface in rainbow) and passed on to a selection algorithm, which can take some logic about the clusters (likely state) and make a final decision. We currently just randomly select from the set (random is the only selection algorithm available, mainly for development). -## Jobspec Resources +#### Jobspec Resources While we need to have more [discussion](https://github.com/flux-framework/flux-sched/discussions/1153#discussioncomment-8726678) on what constitutes a request for subsystem resources, I am taking a simple approach that will satisfy an initial need to run experiments with compatibility metadata (relevant to subsystems) that use a scheduler. The approach I am taking is the following. You can read about the [design](design.md) and I'll repeat the high level points here. When we register a subsystem, it is a separate graph that (at the highest level) is still organized by cluster name. However, each node in the graph needs to be attached to another node known to itself, or to a vertex in the dominant subsystem graph. When asking for a subsystem resource, we are asking for a check at a specific vertex (defined by the slot) that is relevant for a specific subsystem and resource type. We do this by way of defining "resources" under a task, as shown below: @@ -91,8 +101,8 @@ resources: with: - count: 2 type: core -tasks: -- command: +task: + command: - ior slot: default count: @@ -107,4 +117,32 @@ In the above, we are saying that when we find a slot, we need to see if the vert I understand this is likely not perfect for what everyone wants, but I believe it to be a reasonable first shot, and within the ability of what I can prototype without having fluxion ready yet. +## Match Algorithms + +### Match + +The expliciy "match" type is going to look exactly at the type of a subsystem node, and return true (match) if it matches what the subsystem needs. For example, given this task: + +```yaml +task: + command: + - ior + slot: default + count: + per_slot: 1 + resources: + io: + match: + - type: shm +``` + +We would look for a node of type "shm" in the io subsystem that is directly attached (an edge) to a node in the dominant subsystem graph. + + +## Selection Algorithms + +### Random + +This algorithm speaks for itself. Given a listing of contender clusters (where all clusters have a match) we randomly choose. + [home](/README.md#rainbow-scheduler) diff --git a/docs/commands.md b/docs/commands.md index b0ac1de..c571e65 100644 --- a/docs/commands.md +++ b/docs/commands.md @@ -2,6 +2,30 @@ The following commands are currently supported. For Python, see the [README](https://github.com/converged-computing/rainbow/tree/main/python/v1) in the Python directory. +## Run the Server + +You can run the server (with defaults) as follows: + +```bash +make server +``` +```console +go run cmd/server/server.go --global-token rainbow +2024/03/30 14:56:26 creating 🌈ī¸ server... +2024/03/30 14:56:26 🧩ī¸ selection algorithm: random +2024/03/30 14:56:26 🧩ī¸ graph database: memory +2024/03/30 14:56:26 ✨ī¸ creating rainbow.db... +2024/03/30 14:56:26 rainbow.db file created +2024/03/30 14:56:26 🏓ī¸ creating tables... +2024/03/30 14:56:26 🏓ī¸ tables created +2024/03/30 14:56:26 ⚠ī¸ WARNING: global-token is set, use with caution. +2024/03/30 14:56:26 starting scheduler server: rainbow v0.1.1-draft +2024/03/30 14:56:26 🧠ī¸ Registering memory graph database... +2024/03/30 14:56:26 server listening: [::]:50051 +``` +It shows you the commands that are run above with go. You could also build the `rainbow` binary instead with `make build` and use that instead. +All subsequent commands require a server to be running. + ## Prepare to Register The registration step happens when a cluster joins the rainbow scheduler. The registering cluster submits a [JGF format](https://github.com/converged-computing/jsongraph-go) resource graph. @@ -25,8 +49,8 @@ Then we give that directory to compspec, and used the cluster creation plugin to compspec create nodes --cluster-name cluster-red --node-dir ./docs/rainbow/cluster/ --nodes-output ./cluster-nodes.json ``` -That example is provided in [examples](examples/scheduler/cluster-nodes.json). This is the cluster metadata that we need to send over to the rainbow scheduler on the register step, -discussed next. +That example is provided in [examples](examples/scheduler/cluster-nodes.json) if you want to look. The high level TLDR of this step is that you need your nodes in JGF format to register, which will +be shown after the config section, next. ## Config @@ -40,10 +64,17 @@ This generates the following file. ```yaml scheduler: - secret: chocolate-cookied + secret: chocolate-cookies name: rainbow-cluster + algorithms: + selection: + name: random + match: + name: match +cluster: {} graphdatabase: name: memory + host: 127.0.0.1:50051 clusters: [] ``` @@ -386,10 +417,11 @@ or more likely is defined in the rainbow cluster configuration file. As an examp scheduler: secret: chocolate-cookies name: rainbow-cluster - algorithm: - name: randon - options: - key: value + algorithms: + selection: + name: randon + options: + key: value graphdatabase: name: memory diff --git a/docs/examples/scheduler/jobspec-io.yaml b/docs/examples/scheduler/jobspec-io.yaml index 17b1229..c411d95 100644 --- a/docs/examples/scheduler/jobspec-io.yaml +++ b/docs/examples/scheduler/jobspec-io.yaml @@ -9,8 +9,8 @@ resources: with: - count: 2 type: core -tasks: -- command: +task: + command: - ior slot: default count: diff --git a/docs/examples/scheduler/rainbow-config.yaml b/docs/examples/scheduler/rainbow-config.yaml index 40e7157..a99ba56 100644 --- a/docs/examples/scheduler/rainbow-config.yaml +++ b/docs/examples/scheduler/rainbow-config.yaml @@ -1,11 +1,14 @@ scheduler: secret: chocolate-cookies name: keebler - algorithm: - name: random + algorithms: + selection: + name: random + match: + name: match cluster: name: keebler - secret: d8d50175-4d9f-4dc0-8f95-4893d26618ac + secret: a5a8ebd8-6b22-44d4-aafe-6e81f92124ce graphdatabase: name: memory host: 127.0.0.1:50051 diff --git a/pkg/client/endpoint.go b/pkg/client/endpoint.go index eb21ff0..1065a8e 100644 --- a/pkg/client/endpoint.go +++ b/pkg/client/endpoint.go @@ -10,6 +10,7 @@ import ( pb "github.com/converged-computing/rainbow/pkg/api/v1" "github.com/converged-computing/rainbow/pkg/config" "github.com/converged-computing/rainbow/pkg/graph" + "github.com/converged-computing/rainbow/pkg/graph/algorithm" "github.com/converged-computing/rainbow/pkg/graph/backend" "github.com/converged-computing/rainbow/pkg/utils" "github.com/pkg/errors" @@ -44,6 +45,13 @@ func (c *RainbowClient) SubmitJob( return response, err } + // Prepare the subsystem match algorithm + matchAlgo, err := algorithm.Get(cfg.Scheduler.Algorithms.Match.Name) + if err != nil { + log.Fatal(err) + } + matchAlgo.Init(cfg.Scheduler.Algorithms.Match.Options) + // TODO we need to have a check here to see what clusters // the user has permission to do. Either that can be represented in // the graph database (and the call goes directly to it) or it @@ -51,7 +59,7 @@ func (c *RainbowClient) SubmitJob( // (but we limit our search). Likely the first is preferable. // Ask the graphDB if the jobspec can be satisfied // TODO what does a match look like? - matches, err := graphDB.Satisfies(job) + matches, err := graphDB.Satisfies(job, matchAlgo) if err != nil { return response, err } diff --git a/pkg/config/config.go b/pkg/config/config.go index b1cbe33..24d9b9e 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -9,8 +9,9 @@ import ( ) var ( - defaultSelectionAlgorithm = "random" - defaultGraphDatabase = "memory" + DefaultSelectionAlgorithm = "random" + DefaultMatchAlgorithm = "match" + DefaultGraphDatabase = "memory" ) // RainbowConfig is a static file that holds configuration parameteres @@ -36,13 +37,18 @@ type RainbowScheduler struct { // Secret to register with the cluster // Absolutely should come from environment - Secret string `json:"secret" yaml:"secret" envconfig:"RAINBOW_SECRET"` - Name string `json:"name" yaml:"name" envconfig:"RAINBOW_SCHEDULER_NAME"` - Algorithm SelectionAlgorithm `json:"algorithm" yaml:"algorithm"` + Secret string `json:"secret" yaml:"secret" envconfig:"RAINBOW_SECRET"` + Name string `json:"name" yaml:"name" envconfig:"RAINBOW_SCHEDULER_NAME"` + Algorithms Algorithms `json:"algorithms" yaml:"algorithms"` } -type SelectionAlgorithm struct { - Name string `json:"name" yaml:"name" envconfig:"RAINBOW_SCHDULER_ALGORITHM"` +type Algorithms struct { + Selection Algorithm `json:"selection" yaml:"selection"` + Match Algorithm `json:"match" yaml:"match"` +} + +type Algorithm struct { + Name string `json:"name" yaml:"name,omitempty"` Options map[string]string `json:"options,omitempty" yaml:"options,omitempty"` } @@ -50,7 +56,7 @@ type SelectionAlgorithm struct { // When used for a "self" cluster, we have a name and secret // When used for a "submit to" cluster, we have a name and token type ClusterCredential struct { - Name string `json:"name" yaml:"name"` + Name string `json:"name,omitempty" yaml:"name,omitempty"` Token string `json:"token,omitempty" yaml:"token,omitempty"` Secret string `json:"secret,omitempty" yaml:"secret,omitempty"` } @@ -71,6 +77,20 @@ func (c *RainbowConfig) ToYaml() (string, error) { return string(out), nil } +// setAlgorithm sets the algorithms for the rainbow scheduler +func (c *RainbowConfig) setAlgorithms(matchAlgo, selectAlgo string) { + sAlgo := Algorithm{Name: DefaultSelectionAlgorithm, Options: map[string]string{}} + mAlgo := Algorithm{Name: DefaultMatchAlgorithm, Options: map[string]string{}} + c.Scheduler.Algorithms.Selection = sAlgo + c.Scheduler.Algorithms.Match = mAlgo + if selectAlgo == "" { + c.Scheduler.Algorithms.Selection.Name = selectAlgo + } + if matchAlgo == "" { + c.Scheduler.Algorithms.Match.Name = matchAlgo + } +} + // ToJson serializes to json func (c *RainbowConfig) ToJson() (string, error) { out, err := json.MarshalIndent(c, "", " ") @@ -108,6 +128,7 @@ func NewRainbowClientConfig( secret, database, selectionAlgorithm string, + matchAlgorithm string, ) (*RainbowConfig, error) { config := RainbowConfig{} @@ -130,17 +151,13 @@ func NewRainbowClientConfig( } // By default we use the in-memory (vanilla, simple) database - config.GraphDatabase.Name = defaultGraphDatabase + config.GraphDatabase.Name = DefaultGraphDatabase if database != "" { config.GraphDatabase.Name = database } // Scheduling algorithm defaults to random selection - algo := SelectionAlgorithm{Name: defaultSelectionAlgorithm, Options: map[string]string{}} - config.Scheduler.Algorithm = algo - if selectionAlgorithm == "" { - config.Scheduler.Algorithm.Name = selectionAlgorithm - } + config.setAlgorithms(selectionAlgorithm, matchAlgorithm) // Default host, for now is always this if config.GraphDatabase.Host == "" { diff --git a/pkg/graph/algorithm/algorithm.go b/pkg/graph/algorithm/algorithm.go index 08bddf7..565d49f 100644 --- a/pkg/graph/algorithm/algorithm.go +++ b/pkg/graph/algorithm/algorithm.go @@ -1,42 +1,48 @@ package algorithm +// An algorithm is used to match a subsystem to a slot + import ( "fmt" "log" + + v1 "github.com/compspec/jobspec-go/pkg/jobspec/experimental" + "github.com/converged-computing/rainbow/pkg/types" ) // Lookup of Algorthms var ( - Algorithms map[string]SelectionAlgorithm + MatchAlgorithms map[string]MatchAlgorithm ) // A SelectionAlgorithm is used by the rainbow scheduler to make // a final decision about assigning work to a group of clusters. -type SelectionAlgorithm interface { +type MatchAlgorithm interface { Name() string Description() string Init(map[string]string) error - // Take a list of contenders and select based on algorithm - Select([]string) (string, error) + // A MatchAlgorithm needs to take a slot and determine if it matches + GetSlotResourceNeeds(slot *v1.Task) *types.SlotResourceNeeds + CheckSubsystemEdge(slotNeeds *types.SlotResourceNeeds, edge *types.Edge, vtx *types.Vertex) } -// List returns known backends -func List() map[string]SelectionAlgorithm { - return Algorithms +// List returns known algorithms +func List() map[string]MatchAlgorithm { + return MatchAlgorithms } // Register a new backend by name -func Register(algorithm SelectionAlgorithm) { - if Algorithms == nil { - Algorithms = make(map[string]SelectionAlgorithm) +func Register(algorithm MatchAlgorithm) { + if MatchAlgorithms == nil { + MatchAlgorithms = make(map[string]MatchAlgorithm) } - Algorithms[algorithm.Name()] = algorithm + MatchAlgorithms[algorithm.Name()] = algorithm } // Get a backend by name -func Get(name string) (SelectionAlgorithm, error) { - for algoName, entry := range Algorithms { +func Get(name string) (MatchAlgorithm, error) { + for algoName, entry := range MatchAlgorithms { if algoName == name { return entry, nil } @@ -45,7 +51,7 @@ func Get(name string) (SelectionAlgorithm, error) { } // GetOrFail ensures we can find the entry -func GetOrFail(name string) SelectionAlgorithm { +func GetOrFail(name string) MatchAlgorithm { algorithm, err := Get(name) if err != nil { log.Fatalf("Failed to get algorithm: %v", err) diff --git a/pkg/graph/backend/backend.go b/pkg/graph/backend/backend.go index 6134942..34124e3 100644 --- a/pkg/graph/backend/backend.go +++ b/pkg/graph/backend/backend.go @@ -7,6 +7,7 @@ import ( js "github.com/compspec/jobspec-go/pkg/jobspec/experimental" "github.com/converged-computing/jsongraph-go/jsongraph/v2/graph" + "github.com/converged-computing/rainbow/pkg/graph/algorithm" "google.golang.org/grpc" ) @@ -29,8 +30,8 @@ type GraphBackend interface { Description() string Init(map[string]string) error - // TODO we need a jobspec for here - Satisfies(*js.Jobspec) ([]string, error) + // Determine if a jobspec can be satified in the graph + Satisfies(*js.Jobspec, algorithm.MatchAlgorithm) ([]string, error) // Register an additional grpc server RegisterService(*grpc.Server) error diff --git a/pkg/graph/selection/selection.go b/pkg/graph/selection/selection.go new file mode 100644 index 0000000..89a0fab --- /dev/null +++ b/pkg/graph/selection/selection.go @@ -0,0 +1,54 @@ +package selection + +import ( + "fmt" + "log" +) + +// Lookup of Algorthms +var ( + SelectionAlgorithms map[string]SelectionAlgorithm +) + +// A SelectionAlgorithm is used by the rainbow scheduler to make +// a final decision about assigning work to a group of clusters. +type SelectionAlgorithm interface { + Name() string + Description() string + Init(map[string]string) error + + // Take a list of contenders and select based on algorithm + Select([]string) (string, error) +} + +// List returns known backends +func List() map[string]SelectionAlgorithm { + return SelectionAlgorithms +} + +// Register a new backend by name +func Register(algorithm SelectionAlgorithm) { + if SelectionAlgorithms == nil { + SelectionAlgorithms = make(map[string]SelectionAlgorithm) + } + SelectionAlgorithms[algorithm.Name()] = algorithm +} + +// Get a backend by name +func Get(name string) (SelectionAlgorithm, error) { + for algoName, entry := range SelectionAlgorithms { + if algoName == name { + return entry, nil + } + } + return nil, fmt.Errorf("did not find algorithm named %s", name) +} + +// GetOrFail ensures we can find the entry +func GetOrFail(name string) SelectionAlgorithm { + algorithm, err := Get(name) + if err != nil { + log.Fatalf("Failed to get algorithm: %v", err) + } + return algorithm +} diff --git a/pkg/server/endpoint.go b/pkg/server/endpoint.go index e9fccc6..5672414 100644 --- a/pkg/server/endpoint.go +++ b/pkg/server/endpoint.go @@ -120,7 +120,7 @@ func (s *Server) SubmitJob(_ context.Context, in *pb.SubmitJobRequest) (*pb.Subm log.Printf("📝ī¸ received job %s for %d contender clusters", in.Name, len(clusters)) // Use the algorithm to select a final cluster - selected, err := s.algorithm.Select(clusters) + selected, err := s.selectionAlgorithm.Select(clusters) if err != nil { return nil, err } diff --git a/pkg/server/server.go b/pkg/server/server.go index fd70254..d1ba873 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -10,8 +10,8 @@ import ( pb "github.com/converged-computing/rainbow/pkg/api/v1" "github.com/converged-computing/rainbow/pkg/config" "github.com/converged-computing/rainbow/pkg/database" - "github.com/converged-computing/rainbow/pkg/graph/algorithm" "github.com/converged-computing/rainbow/pkg/graph/backend" + "github.com/converged-computing/rainbow/pkg/graph/selection" "github.com/pkg/errors" "google.golang.org/grpc" @@ -42,8 +42,8 @@ type Server struct { host string // graph database handle - graph backend.GraphBackend - algorithm algorithm.SelectionAlgorithm + graph backend.GraphBackend + selectionAlgorithm selection.SelectionAlgorithm } // NewServer creates a new "scheduler" server @@ -66,12 +66,12 @@ func NewServer( } // Prepare the selection algorithm - algo, err := algorithm.Get(cfg.Scheduler.Algorithm.Name) + selectAlgo, err := selection.Get(cfg.Scheduler.Algorithms.Selection.Name) if err != nil { log.Fatal(err) } - algo.Init(cfg.Scheduler.Algorithm.Options) - log.Printf("🧩ī¸ selection algorithm: %v", algo.Name()) + selectAlgo.Init(cfg.Scheduler.Algorithms.Selection.Options) + log.Printf("🧩ī¸ selection algorithm: %v", selectAlgo.Name()) // Load the graph backend! graphDB, err := backend.Get(cfg.GraphDatabase.Name) @@ -79,7 +79,7 @@ func NewServer( log.Fatal(err) } - // Run init with any options from the config + // Run init with any options from the config, and the match algorithm graphDB.Init(cfg.GraphDatabase.Options) log.Printf("🧩ī¸ graph database: %v", graphDB.Name()) @@ -90,14 +90,14 @@ func NewServer( } return &Server{ - db: db, - name: cfg.Scheduler.Name, - graph: graphDB, - version: version, - secret: cfg.Scheduler.Secret, - globalToken: globalToken, - algorithm: algo, - host: host, + db: db, + name: cfg.Scheduler.Name, + graph: graphDB, + version: version, + secret: cfg.Scheduler.Secret, + globalToken: globalToken, + selectionAlgorithm: selectAlgo, + host: host, }, nil } diff --git a/pkg/types/backend.go b/pkg/types/backend.go new file mode 100644 index 0000000..9dc14aa --- /dev/null +++ b/pkg/types/backend.go @@ -0,0 +1,52 @@ +package types + +import ( + "github.com/converged-computing/jsongraph-go/jsongraph/metadata" +) + +// A Resource is a collection of attributes we load from a node +// intending to put into the graph, and associated functions +type Resource struct { + Size int32 + Type string + Unit string + // The request coming in can know about the type + Metadata metadata.Metadata +} + +// A vertex is defined by an identifier. We use an int +// instead of a string because it's faster. Edges are other +// vertices (and their identifiers) it's connected to. +type Vertex struct { + Identifier int `json:"identifier"` + Edges map[int]*Edge `json:"edges"` + Size int32 `json:"size"` + Unit string `json:"unit"` + Type string `json:"type"` + + // Link to another subsystem vertex + Subsystems map[string]map[int]*Edge `json:"subsystems"` + + // Less commonly accessed (and standardized) metadaa + Metadata metadata.Metadata +} + +// An edge in the graph has a source vertex (where it's defined from) +// and a destination (the Vertex field below) +type Edge struct { + Weight int `json:"weight"` + Vertex *Vertex `json:"vertex"` + Relation string `json:"relation"` + Subsystem string `json:"subsystem"` +} + +// Serialize slot resource needs into a struct that is easier to parse +type SlotResourceNeeds struct { + Satisfied bool + Subsystems []SubsystemNeeds +} + +type SubsystemNeeds struct { + Name string + Attributes map[string]bool +} diff --git a/plugins/README.md b/plugins/README.md new file mode 100644 index 0000000..fa41810 --- /dev/null +++ b/plugins/README.md @@ -0,0 +1,6 @@ +# Plugins + + - [backends](backends): are for the graph backend used + - [algorithms](algorithms): is the algorithm used to match a subsystem to a slot (loaded by graphs) + - Each graph can choose a different default, if desired + - [selection](selection): is the final cluster selection algorithm (e.g., selecting cluster from a final matched list) diff --git a/plugins/backends/memory/algorithm.go b/plugins/algorithms/match/match.go similarity index 75% rename from plugins/backends/memory/algorithm.go rename to plugins/algorithms/match/match.go index 1d22954..3c55c3a 100644 --- a/plugins/backends/memory/algorithm.go +++ b/plugins/algorithms/match/match.go @@ -1,11 +1,31 @@ -package memory +package match import ( "fmt" v1 "github.com/compspec/jobspec-go/pkg/jobspec/experimental" + "github.com/converged-computing/rainbow/pkg/graph/algorithm" + "github.com/converged-computing/rainbow/pkg/types" ) +// Random selection of a cluster +// It doesn't get simpler than this! + +type MatchType struct{} + +var ( + description = "match type for a subsystem for job assignment" + selectorName = "match" +) + +func (s MatchType) Name() string { + return selectorName +} + +func (s MatchType) Description() string { + return description +} + // getSlotResource needs assumes a subsystem request as follows: /* tasks: - command: @@ -22,7 +42,7 @@ import ( // available. This can eventually take a count, but right now is a boolean match // and this is done intentionally to satisfy the simplest scheduler experiment // prototype where we are more interested in features -func getSlotResourceNeeds(slot *v1.Task) *SlotResourceNeeds { +func (m MatchType) GetSlotResourceNeeds(slot *v1.Task) *types.SlotResourceNeeds { sNeeds := map[string]map[string]bool{} for subsystem, needs := range slot.Resources { @@ -71,25 +91,25 @@ func getSlotResourceNeeds(slot *v1.Task) *SlotResourceNeeds { } } // Parse into the slot resource needs - needs := []SubsystemNeeds{} + needs := []types.SubsystemNeeds{} for subsystem, sneeds := range sNeeds { - subsystemNeeds := SubsystemNeeds{Name: subsystem, Attributes: sneeds} + subsystemNeeds := types.SubsystemNeeds{Name: subsystem, Attributes: sneeds} needs = append(needs, subsystemNeeds) } // If we don't have any needs, the slot is satisfied for that - slotNeeds := &SlotResourceNeeds{Subsystems: needs} + slotNeeds := &types.SlotResourceNeeds{Subsystems: needs} if len(needs) == 0 { slotNeeds.Satisfied = true } - fmt.Printf(" => Assessing needs for slot: %v\n", slotNeeds) + // fmt.Printf(" => Assessing needs for slot: %v\n", slotNeeds) return slotNeeds } // checkSubsystemEdge evaluates a node edge in the dominant subsystem for a // subsystem attribute. E.g., if the io subsystem provides // Vertex (from dominant subsysetem) is only passed in for informational purposes -func checkSubsystemEdge(slotNeeds *SlotResourceNeeds, edge *Edge, vtx *Vertex) { +func (m MatchType) CheckSubsystemEdge(slotNeeds *types.SlotResourceNeeds, edge *types.Edge, vtx *types.Vertex) { // Return early if we are satisfied if slotNeeds.Satisfied { @@ -138,3 +158,16 @@ func checkSubsystemEdge(slotNeeds *SlotResourceNeeds, edge *Edge, vtx *Vertex) { // is satisfied without needing to parse again slotNeeds.Satisfied = allSatisfied } + +// Init provides extra initialization functionality, if needed +// The in memory database can take a backup file if desired +func (s MatchType) Init(options map[string]string) error { + // If an algorithm has options, they can be set here + return nil +} + +// Add the selection algorithm to be known to rainbow +func init() { + algo := MatchType{} + algorithm.Register(algo) +} diff --git a/plugins/backends/memory/cluster.go b/plugins/backends/memory/cluster.go index 0d65786..a0a63d6 100644 --- a/plugins/backends/memory/cluster.go +++ b/plugins/backends/memory/cluster.go @@ -25,6 +25,7 @@ type ClusterGraph struct { // The dominant subsystem is a lookup in the subsystem map // It defaults to nodes (node resources) dominantSubsystem string + quiet bool } // Dominant subsystem gets the dominant subsystem @@ -96,7 +97,7 @@ func (c *ClusterGraph) validateNodes(nodes *jgf.JsonGraph) (error, int, int) { // NewClusterGraph creates a new cluster graph with a dominant subsystem // We assume the dominant is hard coded to be containment -func NewClusterGraph(name string, domSubsystem string) *ClusterGraph { +func NewClusterGraph(name string, domSubsystem string, quiet bool) *ClusterGraph { // If not defined, set the dominant subsystem if domSubsystem == "" { @@ -111,6 +112,7 @@ func NewClusterGraph(name string, domSubsystem string) *ClusterGraph { Name: name, subsystem: subsystems, dominantSubsystem: defaultDominantSubsystem, + quiet: quiet, } return g } diff --git a/plugins/backends/memory/dfs.go b/plugins/backends/memory/dfs.go index 84a3f96..79c6758 100644 --- a/plugins/backends/memory/dfs.go +++ b/plugins/backends/memory/dfs.go @@ -4,6 +4,8 @@ import ( "fmt" v1 "github.com/compspec/jobspec-go/pkg/jobspec/experimental" + "github.com/converged-computing/rainbow/pkg/graph/algorithm" + "github.com/converged-computing/rainbow/pkg/types" ) // DFSForMatch WILL is a depth first search if the cluter matches @@ -11,7 +13,10 @@ import ( // and then traverses into those that match the first check // THIS IS EXPERIMENTAL and likely wrong, or missing details, // which is OK as we will only be using it for prototyping. -func (g *ClusterGraph) DFSForMatch(jobspec *v1.Jobspec) (bool, error) { +func (g *ClusterGraph) DFSForMatch( + jobspec *v1.Jobspec, + matcher algorithm.MatchAlgorithm, +) (bool, error) { // Get subsystem (will get dominant, this can eventually take a variable) subsystem := g.getSubsystem("") @@ -72,16 +77,22 @@ func (g *ClusterGraph) DFSForMatch(jobspec *v1.Jobspec) (bool, error) { } // If it's a superficial match, search more deeply if isMatch { - return g.depthFirstSearch(ss, jobspec) + return g.depthFirstSearch(ss, jobspec, matcher) } return false, nil } // depthFirstSearch fully searches the graph finding a list of maches and a jobspec -func (g *ClusterGraph) depthFirstSearch(dom *Subsystem, jobspec *v1.Jobspec) (bool, error) { +func (g *ClusterGraph) depthFirstSearch( + dom *Subsystem, + jobspec *v1.Jobspec, + matcher algorithm.MatchAlgorithm, +) (bool, error) { // Note that in the experimental version we have one task and thus one slot - fmt.Printf(" 🎰ī¸ Slots that need to be satisfied\n") + if !g.quiet { + fmt.Printf(" 🎰ī¸ Slots that need to be satisfied\n") + } slots := map[string]*v1.Task{} // If a slot isn't defined for the task, assume the slot is at the top level @@ -95,7 +106,9 @@ func (g *ClusterGraph) depthFirstSearch(dom *Subsystem, jobspec *v1.Jobspec) (bo // If we don't have jobspec.Task.Resources, no slot to search for. // Return early based on top level counts if len(jobspec.Task.Resources) == 0 { - fmt.Printf(" 🎰ī¸ No resources defined, top level counts satisfied so cluster is match\n") + if !g.quiet { + fmt.Printf(" 🎰ī¸ No resources defined, top level counts satisfied so cluster is match\n") + } return true, nil } @@ -106,8 +119,9 @@ func (g *ClusterGraph) depthFirstSearch(dom *Subsystem, jobspec *v1.Jobspec) (bo } // Look through our potential matching clusters - fmt.Printf("\n 🔍ī¸ Exploring cluster %s deeper with depth first search\n", g.Name) - + if !g.quiet { + fmt.Printf("\n 🔍ī¸ Exploring cluster %s deeper with depth first search\n", g.Name) + } // This is the root vertex of the cluster "cluster" we start with it // We can store this instead, but for now we can assume the index 0 // is the root, as it is the first one made / added @@ -119,8 +133,8 @@ func (g *ClusterGraph) depthFirstSearch(dom *Subsystem, jobspec *v1.Jobspec) (bo // of matches for the slot. This returns a count of the matching // slots under a parent level, recursing into child vertices until // we find the right type (and take a count) or keep exploring - var findSlots func(vtx *Vertex, slot *v1.Resource, slotNeeds *SlotResourceNeeds, slotsFound int32) int32 - findSlots = func(vtx *Vertex, resource *v1.Resource, slotNeeds *SlotResourceNeeds, slotsFound int32) int32 { + var findSlots func(vtx *types.Vertex, slot *v1.Resource, slotNeeds *types.SlotResourceNeeds, slotsFound int32) int32 + findSlots = func(vtx *types.Vertex, resource *v1.Resource, slotNeeds *types.SlotResourceNeeds, slotsFound int32) int32 { // This is just for debugging lookingFor := "" @@ -133,15 +147,18 @@ func (g *ClusterGraph) depthFirstSearch(dom *Subsystem, jobspec *v1.Jobspec) (bo // fmt.Printf(" => Searching for %s and resource type %s in subsystem %v with %d subsystem edges\n", lookingFor, resource.Type, sName, len(edges)) for _, child := range edges { - fmt.Printf(" Found subsystem edge %s with type %s\n", sName, child.Vertex.Type) - + if !g.quiet { + fmt.Printf(" Found subsystem edge %s with type %s\n", sName, child.Vertex.Type) + } // Check if the subsystem edge satisfies the needs of the slot // This will update the slotNeeds.Satisfied - checkSubsystemEdge(slotNeeds, child, vtx) + matcher.CheckSubsystemEdge(slotNeeds, child, vtx) // Return early if minimum needs are satsified if slotNeeds.Satisfied { - fmt.Printf(" Minimum slot needs are satisfied at %s for %s at %s, returning early.\n", vtx.Type, child.Subsystem, child.Vertex.Type) + if !g.quiet { + fmt.Printf(" Minimum slot needs are satisfied at %s for %s at %s, returning early.\n", vtx.Type, child.Subsystem, child.Vertex.Type) + } return slotsFound + vtx.Size } } @@ -184,7 +201,7 @@ func (g *ClusterGraph) depthFirstSearch(dom *Subsystem, jobspec *v1.Jobspec) (bo // matching labels, because that's all we need for the early // scheduling experiments. This can eventually be a setting, but right // now is a single algorithm (function) since there is only one. - slotResourceNeeds := getSlotResourceNeeds(slot) + slotResourceNeeds := matcher.GetSlotResourceNeeds(slot) // TODO: how does the slot Count (under tasks) fit in? // I don't understand what these counts are, because they seem like MPI tasks @@ -202,7 +219,9 @@ func (g *ClusterGraph) depthFirstSearch(dom *Subsystem, jobspec *v1.Jobspec) (bo if resource.With != nil { for _, subresource := range resource.With { slotsFound += findSlots(vertex, &subresource, slotResourceNeeds, slotsFound) - fmt.Printf("Slots found %d/%d for vertex %s\n", slotsFound, slotsNeeded, vertex.Type) + if !g.quiet { + fmt.Printf("Slots found %d/%d for vertex %s\n", slotsFound, slotsNeeded, vertex.Type) + } } } // The slot is satisfied and we can continue searching resources diff --git a/plugins/backends/memory/graph.go b/plugins/backends/memory/graph.go index bd6bcec..cd35465 100644 --- a/plugins/backends/memory/graph.go +++ b/plugins/backends/memory/graph.go @@ -14,6 +14,7 @@ import ( js "github.com/compspec/jobspec-go/pkg/jobspec/experimental" jgf "github.com/converged-computing/jsongraph-go/jsongraph/v2/graph" "github.com/converged-computing/rainbow/pkg/graph" + "github.com/converged-computing/rainbow/pkg/graph/algorithm" "github.com/converged-computing/rainbow/pkg/utils" "github.com/converged-computing/rainbow/plugins/backends/memory/service" ) @@ -23,6 +24,7 @@ type Graph struct { Clusters map[string]*ClusterGraph lock sync.RWMutex backupFile string + quiet bool // The dominant subsystem for all clusters, if desired to set dominantSubsystem string @@ -97,7 +99,7 @@ func (g *Graph) LoadClusterNodes( } // Create a new ClusterGraph - clusterG := NewClusterGraph(clusterName, subsystem) + clusterG := NewClusterGraph(clusterName, subsystem, g.quiet) err := clusterG.LoadClusterNodes(nodes, subsystem) if err != nil { return err @@ -110,7 +112,10 @@ func (g *Graph) LoadClusterNodes( // 1. Read in and populate the payload into a jobspec // 2. Determine by way of a depth first search if we can satisfy // 3. Return the names of the cluster -func (g *Graph) Satisfies(payload string) (*service.SatisfyResponse, error) { +func (g *Graph) Satisfies( + payload string, + matcher algorithm.MatchAlgorithm, +) (*service.SatisfyResponse, error) { response := service.SatisfyResponse{} // Serialize back into Jobspec @@ -121,13 +126,16 @@ func (g *Graph) Satisfies(payload string) (*service.SatisfyResponse, error) { } // Tell the user /logs we are looking for a match - fmt.Printf("\n🍇ī¸ Satisfy request to Graph 🍇ī¸\n") - fmt.Printf(" jobspec: %s\n", payload) + if !g.quiet { + fmt.Printf("\n🍇ī¸ Satisfy request to Graph 🍇ī¸\n") + fmt.Printf(" jobspec: %s\n", payload) + } matches := []string{} + notMatches := []string{} // Determine if each cluster can match for clusterName, clusterG := range g.Clusters { - isMatch, err := clusterG.DFSForMatch(&jobspec) + isMatch, err := clusterG.DFSForMatch(&jobspec, matcher) // Return early if we hit an error if err != nil { @@ -137,15 +145,20 @@ func (g *Graph) Satisfies(payload string) (*service.SatisfyResponse, error) { if isMatch { matches = append(matches, clusterName) } else { - fmt.Printf(" match: đŸŽ¯ī¸ cluster %s does not have sufficient resources and is NOT a match\n", clusterName) + notMatches = append(notMatches, clusterName) + if !g.quiet { + fmt.Printf(" match: đŸŽ¯ī¸ cluster %s does not have sufficient resources and is NOT a match\n", clusterName) + } } + } if len(matches) == 0 { fmt.Println(" match: đŸ˜Ĩī¸ no clusters could satisfy this request. We are sad") - } - // Show all matches at once - for _, match := range matches { - fmt.Printf(" match: ✅ī¸ cluster %s has enough resources and is a match\n", match) + } else { + fmt.Printf(" match: ✅ī¸ there are %d matches with sufficient resources\n", len(matches)) + if len(notMatches) > 0 { + fmt.Printf(" đŸŽ¯ī¸ there are %d clusters that do not match\n", len(notMatches)) + } } // Add the matches to the response response.Clusters = matches diff --git a/plugins/backends/memory/memory.go b/plugins/backends/memory/memory.go index 1f84ed1..a86fc89 100644 --- a/plugins/backends/memory/memory.go +++ b/plugins/backends/memory/memory.go @@ -10,6 +10,7 @@ import ( js "github.com/compspec/jobspec-go/pkg/jobspec/experimental" jgf "github.com/converged-computing/jsongraph-go/jsongraph/v2/graph" + "github.com/converged-computing/rainbow/pkg/graph/algorithm" "github.com/converged-computing/rainbow/pkg/graph/backend" "github.com/converged-computing/rainbow/plugins/backends/memory/service" "google.golang.org/grpc" @@ -72,17 +73,13 @@ func (m MemoryGraph) RegisterService(s *grpc.Server) error { return nil } -// Add the backend to be known to rainbow -func init() { - - graph := MemoryGraph{} - backend.Register(graph) -} - // Satisfies - determine what clusters satisfy a jobspec request // Since this is called from the client function, it's technically // running from the client (not from the server) -func (g MemoryGraph) Satisfies(jobspec *js.Jobspec) ([]string, error) { +func (g MemoryGraph) Satisfies( + jobspec *js.Jobspec, + matcher algorithm.MatchAlgorithm, +) ([]string, error) { matches := []string{} var opts []grpc.DialOption @@ -110,12 +107,21 @@ func (g MemoryGraph) Satisfies(jobspec *js.Jobspec) ([]string, error) { // Init provides extra initialization functionality, if needed // The in memory database can take a backup file if desired -func (g MemoryGraph) Init(options map[string]string) error { +func (g MemoryGraph) Init( + options map[string]string, +) error { backupFile, ok := options["backupFile"] if ok { graphClient.backupFile = backupFile } + quiet, ok := options["quiet"] + if ok { + if quiet == "true" || quiet == "yes" { + graphClient.quiet = true + } + } + // Warning: this assumes one client running with one graph host host, ok := options["host"] if ok { @@ -123,3 +129,10 @@ func (g MemoryGraph) Init(options map[string]string) error { } return nil } + +// Add the backend to be known to rainbow +func init() { + + graph := MemoryGraph{} + backend.Register(graph) +} diff --git a/plugins/backends/memory/resource.go b/plugins/backends/memory/resource.go index baf7022..f18c773 100644 --- a/plugins/backends/memory/resource.go +++ b/plugins/backends/memory/resource.go @@ -2,11 +2,12 @@ package memory import ( jgf "github.com/converged-computing/jsongraph-go/jsongraph/v2/graph" + "github.com/converged-computing/rainbow/pkg/types" ) // Generate a new resource from a JGF node // A resource is associated with a dominant subsystem resource -func NewResource(node jgf.Node) *Resource { +func NewResource(node jgf.Node) *types.Resource { // We assume the node has a type for metadata resourceType := "resource" @@ -27,7 +28,7 @@ func NewResource(node jgf.Node) *Resource { resourceUnit = unit } - return &Resource{ + return &types.Resource{ Size: resourceSize, Unit: resourceUnit, Type: resourceType, @@ -36,7 +37,7 @@ func NewResource(node jgf.Node) *Resource { // New SubsystemResource creates a resource, // but also adds arbitrary metadata -func NewSubsystemResource(node jgf.Node) *Resource { +func NewSubsystemResource(node jgf.Node) *types.Resource { resourceType := "resource" typ, err := node.Metadata.GetStringElement("type") if err == nil { @@ -55,7 +56,7 @@ func NewSubsystemResource(node jgf.Node) *Resource { resourceUnit = unit } - return &Resource{ + return &types.Resource{ Size: resourceSize, Unit: resourceUnit, Type: resourceType, diff --git a/plugins/backends/memory/server.go b/plugins/backends/memory/server.go index fb965e1..640b4be 100644 --- a/plugins/backends/memory/server.go +++ b/plugins/backends/memory/server.go @@ -3,6 +3,8 @@ package memory import ( "context" + "github.com/converged-computing/rainbow/pkg/config" + "github.com/converged-computing/rainbow/pkg/graph/algorithm" "github.com/converged-computing/rainbow/plugins/backends/memory/service" ) @@ -21,7 +23,15 @@ func (MemoryServer) Register(c context.Context, req *service.RegisterRequest) (* // Satisfy determines if the graph can satisfy a request func (MemoryServer) Satisfy(c context.Context, req *service.SatisfyRequest) (*service.SatisfyResponse, error) { - response, err := graphClient.Satisfies(req.Payload) + if req.Matcher == "" { + req.Matcher = config.DefaultMatchAlgorithm + } + // Instantiate the matcher + matcher, err := algorithm.Get(req.Matcher) + if err != nil { + return nil, err + } + response, err := graphClient.Satisfies(req.Payload, matcher) if err != nil { return nil, err } diff --git a/plugins/backends/memory/service/memory.pb.go b/plugins/backends/memory/service/memory.pb.go index 2558823..97cb4a1 100644 --- a/plugins/backends/memory/service/memory.pb.go +++ b/plugins/backends/memory/service/memory.pb.go @@ -188,6 +188,7 @@ type SatisfyRequest struct { unknownFields protoimpl.UnknownFields Payload string `protobuf:"bytes,1,opt,name=payload,proto3" json:"payload,omitempty"` + Matcher string `protobuf:"bytes,2,opt,name=matcher,proto3" json:"matcher,omitempty"` } func (x *SatisfyRequest) Reset() { @@ -229,6 +230,13 @@ func (x *SatisfyRequest) GetPayload() string { return "" } +func (x *SatisfyRequest) GetMatcher() string { + if x != nil { + return x.Matcher + } + return "" +} + type SatisfyResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -342,45 +350,47 @@ var file_memory_proto_rawDesc = []byte{ 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x12, 0x1c, 0x0a, 0x09, 0x73, 0x75, 0x62, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x73, 0x75, 0x62, - 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x22, 0x2a, 0x0a, 0x0e, 0x53, 0x61, 0x74, 0x69, 0x73, 0x66, + 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x22, 0x44, 0x0a, 0x0e, 0x53, 0x61, 0x74, 0x69, 0x73, 0x66, 0x79, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, - 0x61, 0x64, 0x22, 0xc5, 0x01, 0x0a, 0x0f, 0x53, 0x61, 0x74, 0x69, 0x73, 0x66, 0x79, 0x52, 0x65, - 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, - 0x72, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, - 0x72, 0x73, 0x12, 0x3b, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x0e, 0x32, 0x23, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x53, 0x61, 0x74, - 0x69, 0x73, 0x66, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x52, 0x65, 0x73, - 0x75, 0x6c, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, - 0x59, 0x0a, 0x0a, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1b, 0x0a, - 0x17, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x53, - 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x17, 0x0a, 0x13, 0x52, 0x45, - 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, 0x55, 0x43, 0x43, 0x45, 0x53, - 0x53, 0x10, 0x01, 0x12, 0x15, 0x0a, 0x11, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x54, 0x59, - 0x50, 0x45, 0x5f, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0x02, 0x22, 0x9b, 0x01, 0x0a, 0x08, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x34, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, - 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1c, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, - 0x65, 0x2e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, - 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x59, 0x0a, - 0x0a, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1b, 0x0a, 0x17, 0x52, - 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, - 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x17, 0x0a, 0x13, 0x52, 0x45, 0x53, 0x55, - 0x4c, 0x54, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, 0x55, 0x43, 0x43, 0x45, 0x53, 0x53, 0x10, - 0x01, 0x12, 0x15, 0x0a, 0x11, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x54, 0x59, 0x50, 0x45, - 0x5f, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0x02, 0x32, 0x88, 0x01, 0x0a, 0x0b, 0x4d, 0x65, 0x6d, - 0x6f, 0x72, 0x79, 0x47, 0x72, 0x61, 0x70, 0x68, 0x12, 0x3e, 0x0a, 0x07, 0x53, 0x61, 0x74, 0x69, - 0x73, 0x66, 0x79, 0x12, 0x17, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x53, 0x61, - 0x74, 0x69, 0x73, 0x66, 0x79, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x18, 0x2e, 0x73, + 0x61, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x72, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x72, 0x22, 0xc5, 0x01, 0x0a, + 0x0f, 0x53, 0x61, 0x74, 0x69, 0x73, 0x66, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x1a, 0x0a, 0x08, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x18, 0x01, 0x20, 0x03, + 0x28, 0x09, 0x52, 0x08, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x12, 0x3b, 0x0a, 0x06, + 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x23, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x53, 0x61, 0x74, 0x69, 0x73, 0x66, 0x79, 0x52, 0x65, - 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x39, 0x0a, 0x08, 0x52, 0x65, 0x67, 0x69, - 0x73, 0x74, 0x65, 0x72, 0x12, 0x18, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x52, - 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, 0x72, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x11, - 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x22, 0x00, 0x42, 0x40, 0x5a, 0x3e, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, - 0x6d, 0x2f, 0x63, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x67, 0x65, 0x64, 0x2d, 0x63, 0x6f, 0x6d, 0x70, - 0x75, 0x74, 0x69, 0x6e, 0x67, 0x2f, 0x72, 0x61, 0x69, 0x6e, 0x62, 0x6f, 0x77, 0x2f, 0x62, 0x61, - 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x73, 0x2f, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x2f, 0x73, 0x65, - 0x72, 0x76, 0x69, 0x63, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x54, 0x79, 0x70, + 0x65, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x59, 0x0a, 0x0a, 0x52, 0x65, 0x73, + 0x75, 0x6c, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1b, 0x0a, 0x17, 0x52, 0x45, 0x53, 0x55, 0x4c, + 0x54, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, + 0x45, 0x44, 0x10, 0x00, 0x12, 0x17, 0x0a, 0x13, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x54, + 0x59, 0x50, 0x45, 0x5f, 0x53, 0x55, 0x43, 0x43, 0x45, 0x53, 0x53, 0x10, 0x01, 0x12, 0x15, 0x0a, + 0x11, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x45, 0x52, 0x52, + 0x4f, 0x52, 0x10, 0x02, 0x22, 0x9b, 0x01, 0x0a, 0x08, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x12, 0x34, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0e, 0x32, 0x1c, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, + 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x59, 0x0a, 0x0a, 0x52, 0x65, 0x73, 0x75, 0x6c, + 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1b, 0x0a, 0x17, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, + 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, + 0x10, 0x00, 0x12, 0x17, 0x0a, 0x13, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x54, 0x59, 0x50, + 0x45, 0x5f, 0x53, 0x55, 0x43, 0x43, 0x45, 0x53, 0x53, 0x10, 0x01, 0x12, 0x15, 0x0a, 0x11, 0x52, + 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x45, 0x52, 0x52, 0x4f, 0x52, + 0x10, 0x02, 0x32, 0x88, 0x01, 0x0a, 0x0b, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x47, 0x72, 0x61, + 0x70, 0x68, 0x12, 0x3e, 0x0a, 0x07, 0x53, 0x61, 0x74, 0x69, 0x73, 0x66, 0x79, 0x12, 0x17, 0x2e, + 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x53, 0x61, 0x74, 0x69, 0x73, 0x66, 0x79, 0x52, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x18, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, + 0x2e, 0x53, 0x61, 0x74, 0x69, 0x73, 0x66, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x22, 0x00, 0x12, 0x39, 0x0a, 0x08, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, 0x72, 0x12, 0x18, + 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, + 0x72, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x11, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, + 0x63, 0x65, 0x2e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x40, 0x5a, + 0x3e, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x63, 0x6f, 0x6e, 0x76, + 0x65, 0x72, 0x67, 0x65, 0x64, 0x2d, 0x63, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x69, 0x6e, 0x67, 0x2f, + 0x72, 0x61, 0x69, 0x6e, 0x62, 0x6f, 0x77, 0x2f, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x73, + 0x2f, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x62, + 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/plugins/backends/memory/service/memory.proto b/plugins/backends/memory/service/memory.proto index 7ee5c82..2d3fda0 100644 --- a/plugins/backends/memory/service/memory.proto +++ b/plugins/backends/memory/service/memory.proto @@ -17,6 +17,7 @@ message RegisterRequest { message SatisfyRequest { string payload = 1; + string matcher = 2; } message SatisfyResponse { diff --git a/plugins/backends/memory/subsystem.go b/plugins/backends/memory/subsystem.go index 99a596f..f5ca6bd 100644 --- a/plugins/backends/memory/subsystem.go +++ b/plugins/backends/memory/subsystem.go @@ -4,11 +4,12 @@ import ( "fmt" "github.com/converged-computing/jsongraph-go/jsongraph/metadata" + "github.com/converged-computing/rainbow/pkg/types" ) // NewSubsystem generates a new subsystem graph func NewSubsystem(name string) *Subsystem { - vertices := map[int]*Vertex{} + vertices := map[int]*types.Vertex{} lookup := map[string]int{} metrics := Metrics{ResourceCounts: map[string]int64{}, Name: name} s := Subsystem{ @@ -40,11 +41,11 @@ func (s *Subsystem) AddNode( } id := s.counter - newEdges := map[int]*Edge{} - newSubsystems := map[string]map[int]*Edge{} + newEdges := map[int]*types.Edge{} + newSubsystems := map[string]map[int]*types.Edge{} // Add the subsystem node - s.Vertices[id] = &Vertex{ + s.Vertices[id] = &types.Vertex{ Identifier: id, Edges: newEdges, Size: size, @@ -92,7 +93,7 @@ func (s *Subsystem) AddInternalEdge( // add edge src --> dest // Right now subsystem references the source - newEdge := Edge{ + newEdge := types.Edge{ Weight: weight, Vertex: destVertex, Relation: relation, @@ -107,7 +108,7 @@ func (s *Subsystem) AddInternalEdge( // This would be called by the dominant to add an edge to itself func (s *Subsystem) AddSubsystemEdge( src int, - dest *Vertex, + dest *types.Vertex, weight int, relation string, subsystem string, @@ -122,7 +123,7 @@ func (s *Subsystem) AddSubsystemEdge( // add edge src --> dest // Right now subsystem references the source - newEdge := Edge{ + newEdge := types.Edge{ Weight: weight, Vertex: dest, Relation: relation, @@ -134,7 +135,7 @@ func (s *Subsystem) AddSubsystemEdge( // iterate thrugh subsystem AND dominant subsystem nodes. subsysEdges, ok := srcVertex.Subsystems[subsystem] if !ok { - subsysEdges = map[int]*Edge{} + subsysEdges = map[int]*types.Edge{} srcVertex.Subsystems[subsystem] = subsysEdges } srcVertex.Subsystems[subsystem][dest.Identifier] = &newEdge diff --git a/plugins/backends/memory/types.go b/plugins/backends/memory/types.go index b9af3bd..589fb23 100644 --- a/plugins/backends/memory/types.go +++ b/plugins/backends/memory/types.go @@ -1,7 +1,7 @@ package memory import ( - "github.com/converged-computing/jsongraph-go/jsongraph/metadata" + "github.com/converged-computing/rainbow/pkg/types" ) // A subsystem is a graph with a set of vertices that are connected by edges @@ -13,7 +13,7 @@ type Subsystem struct { Name string // Using a map means O(1) lookup time - Vertices map[int]*Vertex `json:"vertices"` + Vertices map[int]*types.Vertex `json:"vertices"` // There are a small number of vertices we care to lookup by name // Put them here for now until I have a better idea :) @@ -26,42 +26,6 @@ type Subsystem struct { Metrics Metrics } -// A Resource is a collection of attributes we load from a node -// intending to put into the graph, and associated functions -type Resource struct { - Size int32 - Type string - Unit string - // The request coming in can know about the type - Metadata metadata.Metadata -} - -// A vertex is defined by an identifier. We use an int -// instead of a string because it's faster. Edges are other -// vertices (and their identifiers) it's connected to. -type Vertex struct { - Identifier int `json:"identifier"` - Edges map[int]*Edge `json:"edges"` - Size int32 `json:"size"` - Unit string `json:"unit"` - Type string `json:"type"` - - // Link to another subsystem vertex - Subsystems map[string]map[int]*Edge `json:"subsystems"` - - // Less commonly accessed (and standardized) metadaa - Metadata metadata.Metadata -} - -// An edge in the graph has a source vertex (where it's defined from) -// and a destination (the Vertex field below) -type Edge struct { - Weight int `json:"weight"` - Vertex *Vertex `json:"vertex"` - Relation string `json:"relation"` - Subsystem string `json:"subsystem"` -} - // Metrics keeps track of counts of things type Metrics struct { // This is across all subsystems @@ -75,14 +39,3 @@ type Metrics struct { // Resource specific metrics ResourceCounts map[string]int64 } - -// Serialize slot resource needs into a struct that is easier to parse -type SlotResourceNeeds struct { - Satisfied bool - Subsystems []SubsystemNeeds -} - -type SubsystemNeeds struct { - Name string - Attributes map[string]bool -} diff --git a/plugins/algorithms/random/random.go b/plugins/selection/random/random.go similarity index 92% rename from plugins/algorithms/random/random.go rename to plugins/selection/random/random.go index 818657c..0f36a58 100644 --- a/plugins/algorithms/random/random.go +++ b/plugins/selection/random/random.go @@ -3,7 +3,7 @@ package random import ( "math/rand" - "github.com/converged-computing/rainbow/pkg/graph/algorithm" + "github.com/converged-computing/rainbow/pkg/graph/selection" ) // Random selection of a cluster @@ -46,5 +46,5 @@ func (s RandomSelection) Init(options map[string]string) error { // Add the selection algorithm to be known to rainbow func init() { algo := RandomSelection{} - algorithm.Register(algo) + selection.Register(algo) } diff --git a/python/v1/rainbow/protos/memory_pb2.py b/python/v1/rainbow/protos/memory_pb2.py index 9582719..4540da6 100644 --- a/python/v1/rainbow/protos/memory_pb2.py +++ b/python/v1/rainbow/protos/memory_pb2.py @@ -14,7 +14,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x0cmemory.proto\x12\x07service"C\n\x0fRegisterRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07payload\x18\x02 \x01(\t\x12\x11\n\tsubsystem\x18\x03 \x01(\t"!\n\x0eSatisfyRequest\x12\x0f\n\x07payload\x18\x01 \x01(\t"\xb3\x01\n\x0fSatisfyResponse\x12\x10\n\x08\x63lusters\x18\x01 \x03(\t\x12\x33\n\x06status\x18\x02 \x01(\x0e\x32#.service.SatisfyResponse.ResultType"Y\n\nResultType\x12\x1b\n\x17RESULT_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13RESULT_TYPE_SUCCESS\x10\x01\x12\x15\n\x11RESULT_TYPE_ERROR\x10\x02"\x93\x01\n\x08Response\x12,\n\x06status\x18\x01 \x01(\x0e\x32\x1c.service.Response.ResultType"Y\n\nResultType\x12\x1b\n\x17RESULT_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13RESULT_TYPE_SUCCESS\x10\x01\x12\x15\n\x11RESULT_TYPE_ERROR\x10\x02\x32\x88\x01\n\x0bMemoryGraph\x12>\n\x07Satisfy\x12\x17.service.SatisfyRequest\x1a\x18.service.SatisfyResponse"\x00\x12\x39\n\x08Register\x12\x18.service.RegisterRequest\x1a\x11.service.Response"\x00\x42@Z>github.com/converged-computing/rainbow/backends/memory/serviceb\x06proto3' + b'\n\x0cmemory.proto\x12\x07service"C\n\x0fRegisterRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07payload\x18\x02 \x01(\t\x12\x11\n\tsubsystem\x18\x03 \x01(\t"2\n\x0eSatisfyRequest\x12\x0f\n\x07payload\x18\x01 \x01(\t\x12\x0f\n\x07matcher\x18\x02 \x01(\t"\xb3\x01\n\x0fSatisfyResponse\x12\x10\n\x08\x63lusters\x18\x01 \x03(\t\x12\x33\n\x06status\x18\x02 \x01(\x0e\x32#.service.SatisfyResponse.ResultType"Y\n\nResultType\x12\x1b\n\x17RESULT_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13RESULT_TYPE_SUCCESS\x10\x01\x12\x15\n\x11RESULT_TYPE_ERROR\x10\x02"\x93\x01\n\x08Response\x12,\n\x06status\x18\x01 \x01(\x0e\x32\x1c.service.Response.ResultType"Y\n\nResultType\x12\x1b\n\x17RESULT_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13RESULT_TYPE_SUCCESS\x10\x01\x12\x15\n\x11RESULT_TYPE_ERROR\x10\x02\x32\x88\x01\n\x0bMemoryGraph\x12>\n\x07Satisfy\x12\x17.service.SatisfyRequest\x1a\x18.service.SatisfyResponse"\x00\x12\x39\n\x08Register\x12\x18.service.RegisterRequest\x1a\x11.service.Response"\x00\x42@Z>github.com/converged-computing/rainbow/backends/memory/serviceb\x06proto3' ) _globals = globals() @@ -28,15 +28,15 @@ _globals["_REGISTERREQUEST"]._serialized_start = 25 _globals["_REGISTERREQUEST"]._serialized_end = 92 _globals["_SATISFYREQUEST"]._serialized_start = 94 - _globals["_SATISFYREQUEST"]._serialized_end = 127 - _globals["_SATISFYRESPONSE"]._serialized_start = 130 - _globals["_SATISFYRESPONSE"]._serialized_end = 309 - _globals["_SATISFYRESPONSE_RESULTTYPE"]._serialized_start = 220 - _globals["_SATISFYRESPONSE_RESULTTYPE"]._serialized_end = 309 - _globals["_RESPONSE"]._serialized_start = 312 - _globals["_RESPONSE"]._serialized_end = 459 - _globals["_RESPONSE_RESULTTYPE"]._serialized_start = 220 - _globals["_RESPONSE_RESULTTYPE"]._serialized_end = 309 - _globals["_MEMORYGRAPH"]._serialized_start = 462 - _globals["_MEMORYGRAPH"]._serialized_end = 598 + _globals["_SATISFYREQUEST"]._serialized_end = 144 + _globals["_SATISFYRESPONSE"]._serialized_start = 147 + _globals["_SATISFYRESPONSE"]._serialized_end = 326 + _globals["_SATISFYRESPONSE_RESULTTYPE"]._serialized_start = 237 + _globals["_SATISFYRESPONSE_RESULTTYPE"]._serialized_end = 326 + _globals["_RESPONSE"]._serialized_start = 329 + _globals["_RESPONSE"]._serialized_end = 476 + _globals["_RESPONSE_RESULTTYPE"]._serialized_start = 237 + _globals["_RESPONSE_RESULTTYPE"]._serialized_end = 326 + _globals["_MEMORYGRAPH"]._serialized_start = 479 + _globals["_MEMORYGRAPH"]._serialized_end = 615 # @@protoc_insertion_point(module_scope) diff --git a/python/v1/rainbow/protos/memory_pb2.pyi b/python/v1/rainbow/protos/memory_pb2.pyi index 8064eca..75288f3 100644 --- a/python/v1/rainbow/protos/memory_pb2.pyi +++ b/python/v1/rainbow/protos/memory_pb2.pyi @@ -17,10 +17,12 @@ class RegisterRequest(_message.Message): def __init__(self, name: _Optional[str] = ..., payload: _Optional[str] = ..., subsystem: _Optional[str] = ...) -> None: ... class SatisfyRequest(_message.Message): - __slots__ = ("payload",) + __slots__ = ("payload", "matcher") PAYLOAD_FIELD_NUMBER: _ClassVar[int] + MATCHER_FIELD_NUMBER: _ClassVar[int] payload: str - def __init__(self, payload: _Optional[str] = ...) -> None: ... + matcher: str + def __init__(self, payload: _Optional[str] = ..., matcher: _Optional[str] = ...) -> None: ... class SatisfyResponse(_message.Message): __slots__ = ("clusters", "status") diff --git a/python/v1/setup.py b/python/v1/setup.py index e53a9af..bf399a1 100644 --- a/python/v1/setup.py +++ b/python/v1/setup.py @@ -18,7 +18,7 @@ if __name__ == "__main__": setup( name="rainbow-scheduler", - version="0.0.14rc0", + version="0.0.14rc1", author="Vanessasaurus", author_email="vsoch@users.noreply.github.com", maintainer="Vanessasaurus", From 4475624d269eda2d8e68b6b1d0442ff5ee396aaa Mon Sep 17 00:00:00 2001 From: vsoch Date: Sat, 30 Mar 2024 17:10:54 -0600 Subject: [PATCH 2/2] feat: add new range match algorithm Signed-off-by: vsoch --- cmd/rainbow/config/config.go | 7 +- cmd/rainbow/rainbow.go | 3 +- cmd/rainbow/register/register.go | 8 + cmd/server/server.go | 1 + docs/algorithms.md | 41 + docs/commands.md | 48 + .../match-algorithms/range/cluster-nodes.json | 1274 +++++++++++++++++ .../range/jobspec-invalid-range.yaml | 23 + .../range/jobspec-valid-range.yaml | 23 + .../range/rainbow-config.yaml | 17 + .../range/spack-subsystem.json | 249 ++++ docs/examples/scheduler/rainbow-config.yaml | 2 +- go.mod | 1 + go.sum | 2 + pkg/config/config.go | 14 +- plugins/algorithms/match/match.go | 29 +- plugins/algorithms/range/range.go | 282 ++++ plugins/backends/memory/dfs.go | 2 +- plugins/backends/memory/memory.go | 3 +- plugins/backends/memory/resource.go | 8 +- 20 files changed, 2005 insertions(+), 32 deletions(-) create mode 100644 docs/examples/match-algorithms/range/cluster-nodes.json create mode 100644 docs/examples/match-algorithms/range/jobspec-invalid-range.yaml create mode 100644 docs/examples/match-algorithms/range/jobspec-valid-range.yaml create mode 100644 docs/examples/match-algorithms/range/rainbow-config.yaml create mode 100644 docs/examples/match-algorithms/range/spack-subsystem.json create mode 100644 plugins/algorithms/range/range.go diff --git a/cmd/rainbow/config/config.go b/cmd/rainbow/config/config.go index 0bdb5a2..93074f3 100644 --- a/cmd/rainbow/config/config.go +++ b/cmd/rainbow/config/config.go @@ -13,7 +13,10 @@ var ( ) // Run will init a new config -func RunInit(path string) error { +func RunInit( + path string, + clusterName, selectAlgo, matchAlgo string, +) error { if path == "" { path = defaultConfigFile @@ -21,7 +24,7 @@ func RunInit(path string) error { // Generate an empty config - providing an empty filename ensures we don't read an existing one // This defaults to an in-memory vanilla database - cfg, err := config.NewRainbowClientConfig("", "rainbow-cluster", "chocolate-cookies", "", "random", "match") + cfg, err := config.NewRainbowClientConfig("", clusterName, "chocolate-cookies", "", selectAlgo, matchAlgo) if err != nil { return err } diff --git a/cmd/rainbow/rainbow.go b/cmd/rainbow/rainbow.go index 2dd4eeb..0d07d2c 100644 --- a/cmd/rainbow/rainbow.go +++ b/cmd/rainbow/rainbow.go @@ -14,6 +14,7 @@ import ( // Register database backends and selection algorithms _ "github.com/converged-computing/rainbow/plugins/algorithms/match" + _ "github.com/converged-computing/rainbow/plugins/algorithms/range" _ "github.com/converged-computing/rainbow/plugins/backends/memory" _ "github.com/converged-computing/rainbow/plugins/selection/random" ) @@ -85,7 +86,7 @@ func main() { } if configCmd.Happened() && configInitCmd.Happened() { - err := config.RunInit(*cfg) + err := config.RunInit(*cfg, *clusterName, *selectAlgo, *matchAlgo) if err != nil { log.Fatalf("Issue with config: %s\n", err) } diff --git a/cmd/rainbow/register/register.go b/cmd/rainbow/register/register.go index ec35b46..68deb7d 100644 --- a/cmd/rainbow/register/register.go +++ b/cmd/rainbow/register/register.go @@ -2,6 +2,7 @@ package register import ( "context" + "fmt" "log" "os" @@ -28,6 +29,9 @@ func Run( return err } + if clusterName == "" { + return fmt.Errorf("s --cluster-name is required") + } // Read in the config, if provided, command line takes preference cfg, err := config.NewRainbowClientConfig( cfgFile, @@ -64,6 +68,10 @@ func Run( if saveSecret && cfgFile != "" { log.Printf("Saving cluster secret to %s\n", cfgFile) cfg.Cluster = config.ClusterCredential{Secret: response.Secret, Name: clusterName} + + // Assume we want to submit to our cluster too + newCluster := config.ClusterCredential{Token: response.Token, Name: clusterName} + cfg.Clusters = []config.ClusterCredential{newCluster} yaml, err := cfg.ToYaml() if err != nil { return err diff --git a/cmd/server/server.go b/cmd/server/server.go index 69ad3b3..fd5354f 100644 --- a/cmd/server/server.go +++ b/cmd/server/server.go @@ -11,6 +11,7 @@ import ( // Register database backends _ "github.com/converged-computing/rainbow/plugins/algorithms/match" + _ "github.com/converged-computing/rainbow/plugins/algorithms/range" _ "github.com/converged-computing/rainbow/plugins/backends/memory" _ "github.com/converged-computing/rainbow/plugins/selection/random" ) diff --git a/docs/algorithms.md b/docs/algorithms.md index eec890c..34c98c1 100644 --- a/docs/algorithms.md +++ b/docs/algorithms.md @@ -138,6 +138,47 @@ task: We would look for a node of type "shm" in the io subsystem that is directly attached (an edge) to a node in the dominant subsystem graph. +### Range + +Range is designed typically to handle package versions. You *must* specify a field that is to be inspected on the subsystem metadata, and you must specify one of "min" or "max" or both. For example: + +```yaml +task: + command: + - spack + slot: default + count: + per_slot: 1 + resources: + spack: + range: + - field: version + min: "0.5.1" + max: "0.5.5" +``` + +The above would expect to look for the field `version` defined for a slot, and use semver to determine within that range. Here is what the subsystem node might look like. In this case, the node is saying "the dominant subsystem node that I'm connected to has this package with this metadata": + +```json +"spack1": { + "label": "spack1", + "metadata": { + "basename": "package", + "exclusive": true, + "id": 1, + "name": "package0", + "paths": { + "containment": "/spack0/package0" + }, + "size": 1, + "type": "package", + "uniq_id": 1, + "version": "0.5.2" + } +}, +``` + +In the above, the field is "version" and it is an arbitrary metadata field in the "metadata" section of a node. For the time being, the match algorithm is the determination of types allowed there. For example, the range algorithm interface is expecting to parse a string in a semantic version format. Different plugins might expect differently. ## Selection Algorithms diff --git a/docs/commands.md b/docs/commands.md index c571e65..667589f 100644 --- a/docs/commands.md +++ b/docs/commands.md @@ -444,7 +444,11 @@ Now we will take the same command, but submit with a jobspec directly. This is c ```bash # terminal 1 for server rm -f rainbow.db && make server +``` + +#### Match Algorithm (default) +```bash # terminal 2 to register cluster, subsystem, and submit job make register && make subsystem && go run ./cmd/rainbow/rainbow.go submit --config-path ./docs/examples/scheduler/rainbow-config.yaml --jobspec ./docs/examples/scheduler/jobspec-io.yaml ``` @@ -474,6 +478,50 @@ The new portion from the above is seeing that the subsystem "io" is satisfied at And the work is still assigned to the cluster. +#### Range Algorithm (default) + +This algorithm is intended to match a range of versions, either a min, max, or both. +We have an example subsystem JGF intended for spack, complete with packages, compilers, externals, licenses, and anguish. In one +window, start the server: + +```bash +make server +``` + +In another terminal register the nodes, the subsystem, and then submit the job with the range algorithm; + +```bash +# Create your rainbow config +go run cmd/rainbow/rainbow.go config init --cluster-name spack-builder --config-path ./docs/examples/match-algorithms/range/rainbow-config.yaml --match-algorithm range + +# Register your nodes +go run cmd/rainbow/rainbow.go register cluster --cluster-name spack-builder --nodes-json ./docs/examples/match-algorithms/range/cluster-nodes.json --config-path ./docs/examples/match-algorithms/range/rainbow-config.yaml --save + +# Register the subsystem +go run cmd/rainbow/rainbow.go register subsystem --subsystem spack --nodes-json ./docs/examples/match-algorithms/range/spack-subsystem.json --config-path ./docs/examples/match-algorithms/range/rainbow-config.yaml + +# Submit a job that asked for a valid range +go run ./cmd/rainbow/rainbow.go submit --config-path ./docs/examples/match-algorithms/range/rainbow-config.yaml --jobspec ./docs/examples/match-algorithms/range/jobspec-valid-range.yaml --match-algorithm range +``` +For the above job, you'll see it's satisfied: + +```console + match: ✅ī¸ there are 1 matches with sufficient resources +2024/03/30 17:03:35 📝ī¸ received job ior for 1 contender clusters +2024/03/30 17:03:35 📝ī¸ job ior is assigned to cluster spack-builder +``` + +Try submitting a job that can't be satisfied for the range. + +```bash +# Submit a job that asked for a valid range +go run ./cmd/rainbow/rainbow.go submit --config-path ./docs/examples/match-algorithms/range/rainbow-config.yaml --jobspec ./docs/examples/match-algorithms/range/jobspec-invalid-range.yaml --match-algorithm range +``` +```console +Slots found 0/1 for vertex cluster + match: đŸŽ¯ī¸ cluster spack-builder does not have sufficient resources and is NOT a match + match: đŸ˜Ĩī¸ no clusters could satisfy this request. We are sad +``` ## Receive Jobs diff --git a/docs/examples/match-algorithms/range/cluster-nodes.json b/docs/examples/match-algorithms/range/cluster-nodes.json new file mode 100644 index 0000000..2c1e515 --- /dev/null +++ b/docs/examples/match-algorithms/range/cluster-nodes.json @@ -0,0 +1,1274 @@ +{ + "graph": { + "directed": true, + "nodes": { + "0": { + "label": "0", + "metadata": { + "basename": "cluster-red", + "exclusive": false, + "id": 0, + "name": "cluster-red0", + "paths": { + "containment": "/cluster-red0" + }, + "rank": -1, + "size": 1, + "type": "cluster", + "uniq_id": 0, + "unit": "" + } + }, + "1": { + "label": "1", + "metadata": { + "basename": "rack", + "exclusive": false, + "id": "0", + "name": "rack0", + "paths": { + "containment": "/cluster-red0/rack0" + }, + "rank": -1, + "size": 1, + "type": "rack", + "uniq_id": 1, + "unit": "" + } + }, + "10": { + "label": "10", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "6", + "name": "core6", + "paths": { + "containment": "/cluster-red0/rack0/node0/socket0/core6" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 10, + "unit": "" + } + }, + "11": { + "label": "11", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "7", + "name": "core7", + "paths": { + "containment": "/cluster-red0/rack0/node0/socket0/core7" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 11, + "unit": "" + } + }, + "12": { + "label": "12", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "8", + "name": "core8", + "paths": { + "containment": "/cluster-red0/rack0/node0/socket0/core8" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 12, + "unit": "" + } + }, + "13": { + "label": "13", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "9", + "name": "core9", + "paths": { + "containment": "/cluster-red0/rack0/node0/socket0/core9" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 13, + "unit": "" + } + }, + "14": { + "label": "14", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "10", + "name": "core10", + "paths": { + "containment": "/cluster-red0/rack0/node0/socket0/core10" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 14, + "unit": "" + } + }, + "15": { + "label": "15", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "11", + "name": "core11", + "paths": { + "containment": "/cluster-red0/rack0/node0/socket0/core11" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 15, + "unit": "" + } + }, + "16": { + "label": "16", + "metadata": { + "basename": "node", + "exclusive": false, + "id": "1", + "name": "node1", + "paths": { + "containment": "/cluster-red0/rack0/node1" + }, + "rank": -1, + "size": 1, + "type": "node", + "uniq_id": 16, + "unit": "" + } + }, + "17": { + "label": "17", + "metadata": { + "basename": "socket", + "exclusive": false, + "id": "1", + "name": "socket1", + "paths": { + "containment": "/cluster-red0/rack0/node1/socket1" + }, + "rank": -1, + "size": 1, + "type": "socket", + "uniq_id": 17, + "unit": "" + } + }, + "18": { + "label": "18", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "12", + "name": "core12", + "paths": { + "containment": "/cluster-red0/rack0/node1/socket1/core12" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 18, + "unit": "" + } + }, + "19": { + "label": "19", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "13", + "name": "core13", + "paths": { + "containment": "/cluster-red0/rack0/node1/socket1/core13" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 19, + "unit": "" + } + }, + "2": { + "label": "2", + "metadata": { + "basename": "node", + "exclusive": false, + "id": "0", + "name": "node0", + "paths": { + "containment": "/cluster-red0/rack0/node0" + }, + "rank": -1, + "size": 1, + "type": "node", + "uniq_id": 2, + "unit": "" + } + }, + "20": { + "label": "20", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "14", + "name": "core14", + "paths": { + "containment": "/cluster-red0/rack0/node1/socket1/core14" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 20, + "unit": "" + } + }, + "21": { + "label": "21", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "15", + "name": "core15", + "paths": { + "containment": "/cluster-red0/rack0/node1/socket1/core15" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 21, + "unit": "" + } + }, + "22": { + "label": "22", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "16", + "name": "core16", + "paths": { + "containment": "/cluster-red0/rack0/node1/socket1/core16" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 22, + "unit": "" + } + }, + "23": { + "label": "23", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "17", + "name": "core17", + "paths": { + "containment": "/cluster-red0/rack0/node1/socket1/core17" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 23, + "unit": "" + } + }, + "24": { + "label": "24", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "18", + "name": "core18", + "paths": { + "containment": "/cluster-red0/rack0/node1/socket1/core18" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 24, + "unit": "" + } + }, + "25": { + "label": "25", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "19", + "name": "core19", + "paths": { + "containment": "/cluster-red0/rack0/node1/socket1/core19" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 25, + "unit": "" + } + }, + "26": { + "label": "26", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "20", + "name": "core20", + "paths": { + "containment": "/cluster-red0/rack0/node1/socket1/core20" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 26, + "unit": "" + } + }, + "27": { + "label": "27", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "21", + "name": "core21", + "paths": { + "containment": "/cluster-red0/rack0/node1/socket1/core21" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 27, + "unit": "" + } + }, + "28": { + "label": "28", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "22", + "name": "core22", + "paths": { + "containment": "/cluster-red0/rack0/node1/socket1/core22" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 28, + "unit": "" + } + }, + "29": { + "label": "29", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "23", + "name": "core23", + "paths": { + "containment": "/cluster-red0/rack0/node1/socket1/core23" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 29, + "unit": "" + } + }, + "3": { + "label": "3", + "metadata": { + "basename": "socket", + "exclusive": false, + "id": "0", + "name": "socket0", + "paths": { + "containment": "/cluster-red0/rack0/node0/socket0" + }, + "rank": -1, + "size": 1, + "type": "socket", + "uniq_id": 3, + "unit": "" + } + }, + "30": { + "label": "30", + "metadata": { + "basename": "node", + "exclusive": false, + "id": "2", + "name": "node2", + "paths": { + "containment": "/cluster-red0/rack0/node2" + }, + "rank": -1, + "size": 1, + "type": "node", + "uniq_id": 30, + "unit": "" + } + }, + "31": { + "label": "31", + "metadata": { + "basename": "socket", + "exclusive": false, + "id": "2", + "name": "socket2", + "paths": { + "containment": "/cluster-red0/rack0/node2/socket2" + }, + "rank": -1, + "size": 1, + "type": "socket", + "uniq_id": 31, + "unit": "" + } + }, + "32": { + "label": "32", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "24", + "name": "core24", + "paths": { + "containment": "/cluster-red0/rack0/node2/socket2/core24" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 32, + "unit": "" + } + }, + "33": { + "label": "33", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "25", + "name": "core25", + "paths": { + "containment": "/cluster-red0/rack0/node2/socket2/core25" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 33, + "unit": "" + } + }, + "34": { + "label": "34", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "26", + "name": "core26", + "paths": { + "containment": "/cluster-red0/rack0/node2/socket2/core26" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 34, + "unit": "" + } + }, + "35": { + "label": "35", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "27", + "name": "core27", + "paths": { + "containment": "/cluster-red0/rack0/node2/socket2/core27" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 35, + "unit": "" + } + }, + "36": { + "label": "36", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "28", + "name": "core28", + "paths": { + "containment": "/cluster-red0/rack0/node2/socket2/core28" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 36, + "unit": "" + } + }, + "37": { + "label": "37", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "29", + "name": "core29", + "paths": { + "containment": "/cluster-red0/rack0/node2/socket2/core29" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 37, + "unit": "" + } + }, + "38": { + "label": "38", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "30", + "name": "core30", + "paths": { + "containment": "/cluster-red0/rack0/node2/socket2/core30" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 38, + "unit": "" + } + }, + "39": { + "label": "39", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "31", + "name": "core31", + "paths": { + "containment": "/cluster-red0/rack0/node2/socket2/core31" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 39, + "unit": "" + } + }, + "4": { + "label": "4", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "0", + "name": "core0", + "paths": { + "containment": "/cluster-red0/rack0/node0/socket0/core0" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 4, + "unit": "" + } + }, + "40": { + "label": "40", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "32", + "name": "core32", + "paths": { + "containment": "/cluster-red0/rack0/node2/socket2/core32" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 40, + "unit": "" + } + }, + "41": { + "label": "41", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "33", + "name": "core33", + "paths": { + "containment": "/cluster-red0/rack0/node2/socket2/core33" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 41, + "unit": "" + } + }, + "42": { + "label": "42", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "34", + "name": "core34", + "paths": { + "containment": "/cluster-red0/rack0/node2/socket2/core34" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 42, + "unit": "" + } + }, + "43": { + "label": "43", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "35", + "name": "core35", + "paths": { + "containment": "/cluster-red0/rack0/node2/socket2/core35" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 43, + "unit": "" + } + }, + "5": { + "label": "5", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "1", + "name": "core1", + "paths": { + "containment": "/cluster-red0/rack0/node0/socket0/core1" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 5, + "unit": "" + } + }, + "6": { + "label": "6", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "2", + "name": "core2", + "paths": { + "containment": "/cluster-red0/rack0/node0/socket0/core2" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 6, + "unit": "" + } + }, + "7": { + "label": "7", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "3", + "name": "core3", + "paths": { + "containment": "/cluster-red0/rack0/node0/socket0/core3" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 7, + "unit": "" + } + }, + "8": { + "label": "8", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "4", + "name": "core4", + "paths": { + "containment": "/cluster-red0/rack0/node0/socket0/core4" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 8, + "unit": "" + } + }, + "9": { + "label": "9", + "metadata": { + "basename": "core", + "exclusive": false, + "id": "5", + "name": "core5", + "paths": { + "containment": "/cluster-red0/rack0/node0/socket0/core5" + }, + "rank": -1, + "size": 1, + "type": "core", + "uniq_id": 9, + "unit": "" + } + } + }, + "edges": [ + { + "source": "0", + "target": "1", + "relation": "contains", + "metadata": {} + }, + { + "source": "1", + "target": "0", + "relation": "in", + "metadata": {} + }, + { + "source": "1", + "target": "2", + "relation": "contains", + "metadata": {} + }, + { + "source": "2", + "target": "1", + "relation": "in", + "metadata": {} + }, + { + "source": "2", + "target": "3", + "relation": "contains", + "metadata": {} + }, + { + "source": "3", + "target": "2", + "relation": "in", + "metadata": {} + }, + { + "source": "3", + "target": "4", + "relation": "contains", + "metadata": {} + }, + { + "source": "4", + "target": "3", + "relation": "in", + "metadata": {} + }, + { + "source": "3", + "target": "5", + "relation": "contains", + "metadata": {} + }, + { + "source": "5", + "target": "3", + "relation": "in", + "metadata": {} + }, + { + "source": "3", + "target": "6", + "relation": "contains", + "metadata": {} + }, + { + "source": "6", + "target": "3", + "relation": "in", + "metadata": {} + }, + { + "source": "3", + "target": "7", + "relation": "contains", + "metadata": {} + }, + { + "source": "7", + "target": "3", + "relation": "in", + "metadata": {} + }, + { + "source": "3", + "target": "8", + "relation": "contains", + "metadata": {} + }, + { + "source": "8", + "target": "3", + "relation": "in", + "metadata": {} + }, + { + "source": "3", + "target": "9", + "relation": "contains", + "metadata": {} + }, + { + "source": "9", + "target": "3", + "relation": "in", + "metadata": {} + }, + { + "source": "3", + "target": "10", + "relation": "contains", + "metadata": {} + }, + { + "source": "10", + "target": "3", + "relation": "in", + "metadata": {} + }, + { + "source": "3", + "target": "11", + "relation": "contains", + "metadata": {} + }, + { + "source": "11", + "target": "3", + "relation": "in", + "metadata": {} + }, + { + "source": "3", + "target": "12", + "relation": "contains", + "metadata": {} + }, + { + "source": "12", + "target": "3", + "relation": "in", + "metadata": {} + }, + { + "source": "3", + "target": "13", + "relation": "contains", + "metadata": {} + }, + { + "source": "13", + "target": "3", + "relation": "in", + "metadata": {} + }, + { + "source": "3", + "target": "14", + "relation": "contains", + "metadata": {} + }, + { + "source": "14", + "target": "3", + "relation": "in", + "metadata": {} + }, + { + "source": "3", + "target": "15", + "relation": "contains", + "metadata": {} + }, + { + "source": "15", + "target": "3", + "relation": "in", + "metadata": {} + }, + { + "source": "1", + "target": "16", + "relation": "contains", + "metadata": {} + }, + { + "source": "16", + "target": "1", + "relation": "in", + "metadata": {} + }, + { + "source": "16", + "target": "17", + "relation": "contains", + "metadata": {} + }, + { + "source": "17", + "target": "16", + "relation": "in", + "metadata": {} + }, + { + "source": "17", + "target": "18", + "relation": "contains", + "metadata": {} + }, + { + "source": "18", + "target": "17", + "relation": "in", + "metadata": {} + }, + { + "source": "17", + "target": "19", + "relation": "contains", + "metadata": {} + }, + { + "source": "19", + "target": "17", + "relation": "in", + "metadata": {} + }, + { + "source": "17", + "target": "20", + "relation": "contains", + "metadata": {} + }, + { + "source": "20", + "target": "17", + "relation": "in", + "metadata": {} + }, + { + "source": "17", + "target": "21", + "relation": "contains", + "metadata": {} + }, + { + "source": "21", + "target": "17", + "relation": "in", + "metadata": {} + }, + { + "source": "17", + "target": "22", + "relation": "contains", + "metadata": {} + }, + { + "source": "22", + "target": "17", + "relation": "in", + "metadata": {} + }, + { + "source": "17", + "target": "23", + "relation": "contains", + "metadata": {} + }, + { + "source": "23", + "target": "17", + "relation": "in", + "metadata": {} + }, + { + "source": "17", + "target": "24", + "relation": "contains", + "metadata": {} + }, + { + "source": "24", + "target": "17", + "relation": "in", + "metadata": {} + }, + { + "source": "17", + "target": "25", + "relation": "contains", + "metadata": {} + }, + { + "source": "25", + "target": "17", + "relation": "in", + "metadata": {} + }, + { + "source": "17", + "target": "26", + "relation": "contains", + "metadata": {} + }, + { + "source": "26", + "target": "17", + "relation": "in", + "metadata": {} + }, + { + "source": "17", + "target": "27", + "relation": "contains", + "metadata": {} + }, + { + "source": "27", + "target": "17", + "relation": "in", + "metadata": {} + }, + { + "source": "17", + "target": "28", + "relation": "contains", + "metadata": {} + }, + { + "source": "28", + "target": "17", + "relation": "in", + "metadata": {} + }, + { + "source": "17", + "target": "29", + "relation": "contains", + "metadata": {} + }, + { + "source": "29", + "target": "17", + "relation": "in", + "metadata": {} + }, + { + "source": "1", + "target": "30", + "relation": "contains", + "metadata": {} + }, + { + "source": "30", + "target": "1", + "relation": "in", + "metadata": {} + }, + { + "source": "30", + "target": "31", + "relation": "contains", + "metadata": {} + }, + { + "source": "31", + "target": "30", + "relation": "in", + "metadata": {} + }, + { + "source": "31", + "target": "32", + "relation": "contains", + "metadata": {} + }, + { + "source": "32", + "target": "31", + "relation": "in", + "metadata": {} + }, + { + "source": "31", + "target": "33", + "relation": "contains", + "metadata": {} + }, + { + "source": "33", + "target": "31", + "relation": "in", + "metadata": {} + }, + { + "source": "31", + "target": "34", + "relation": "contains", + "metadata": {} + }, + { + "source": "34", + "target": "31", + "relation": "in", + "metadata": {} + }, + { + "source": "31", + "target": "35", + "relation": "contains", + "metadata": {} + }, + { + "source": "35", + "target": "31", + "relation": "in", + "metadata": {} + }, + { + "source": "31", + "target": "36", + "relation": "contains", + "metadata": {} + }, + { + "source": "36", + "target": "31", + "relation": "in", + "metadata": {} + }, + { + "source": "31", + "target": "37", + "relation": "contains", + "metadata": {} + }, + { + "source": "37", + "target": "31", + "relation": "in", + "metadata": {} + }, + { + "source": "31", + "target": "38", + "relation": "contains", + "metadata": {} + }, + { + "source": "38", + "target": "31", + "relation": "in", + "metadata": {} + }, + { + "source": "31", + "target": "39", + "relation": "contains", + "metadata": {} + }, + { + "source": "39", + "target": "31", + "relation": "in", + "metadata": {} + }, + { + "source": "31", + "target": "40", + "relation": "contains", + "metadata": {} + }, + { + "source": "40", + "target": "31", + "relation": "in", + "metadata": {} + }, + { + "source": "31", + "target": "41", + "relation": "contains", + "metadata": {} + }, + { + "source": "41", + "target": "31", + "relation": "in", + "metadata": {} + }, + { + "source": "31", + "target": "42", + "relation": "contains", + "metadata": {} + }, + { + "source": "42", + "target": "31", + "relation": "in", + "metadata": {} + }, + { + "source": "31", + "target": "43", + "relation": "contains", + "metadata": {} + }, + { + "source": "43", + "target": "31", + "relation": "in", + "metadata": {} + } + ] + }, + "name": "cluster-red" +} \ No newline at end of file diff --git a/docs/examples/match-algorithms/range/jobspec-invalid-range.yaml b/docs/examples/match-algorithms/range/jobspec-invalid-range.yaml new file mode 100644 index 0000000..0ab5381 --- /dev/null +++ b/docs/examples/match-algorithms/range/jobspec-invalid-range.yaml @@ -0,0 +1,23 @@ +version: 1 +resources: +- count: 2 + type: node + with: + - count: 1 + label: default + type: slot + with: + - count: 2 + type: core +task: + command: + - ior + slot: default + count: + per_slot: 1 + resources: + spack: + range: + - field: version + min: "0.7.1" + max: "0.7.5" \ No newline at end of file diff --git a/docs/examples/match-algorithms/range/jobspec-valid-range.yaml b/docs/examples/match-algorithms/range/jobspec-valid-range.yaml new file mode 100644 index 0000000..06f29c6 --- /dev/null +++ b/docs/examples/match-algorithms/range/jobspec-valid-range.yaml @@ -0,0 +1,23 @@ +version: 1 +resources: +- count: 2 + type: node + with: + - count: 1 + label: default + type: slot + with: + - count: 2 + type: core +task: + command: + - ior + slot: default + count: + per_slot: 1 + resources: + spack: + range: + - field: version + min: "0.5.1" + max: "0.5.5" \ No newline at end of file diff --git a/docs/examples/match-algorithms/range/rainbow-config.yaml b/docs/examples/match-algorithms/range/rainbow-config.yaml new file mode 100644 index 0000000..ec1654a --- /dev/null +++ b/docs/examples/match-algorithms/range/rainbow-config.yaml @@ -0,0 +1,17 @@ +scheduler: + secret: chocolate-cookies + name: spack-builder + algorithms: + selection: + name: random + match: + name: match +cluster: + name: spack-builder + secret: 85e59eea-c427-4f55-9668-4ed418de9be8 +graphdatabase: + name: memory + host: 127.0.0.1:50051 +clusters: + - name: spack-builder + token: rainbow diff --git a/docs/examples/match-algorithms/range/spack-subsystem.json b/docs/examples/match-algorithms/range/spack-subsystem.json new file mode 100644 index 0000000..565af47 --- /dev/null +++ b/docs/examples/match-algorithms/range/spack-subsystem.json @@ -0,0 +1,249 @@ +{ + "graph": { + "directed": true, + "nodes": { + "spack0": { + "label": "spack0", + "metadata": { + "basename": "spack", + "exclusive": false, + "id": 0, + "name": "spack0", + "paths": { + "containment": "/spack0" + }, + "size": 1, + "type": "spack", + "uniq_id": 0 + } + }, + "spack1": { + "label": "spack1", + "metadata": { + "basename": "package", + "exclusive": true, + "id": 1, + "name": "package0", + "paths": { + "containment": "/spack0/package0" + }, + "size": 1, + "type": "package", + "uniq_id": 1, + "version": "0.5.2" + } + }, + "spack2": { + "label": "spack2", + "metadata": { + "basename": "compiler", + "exclusive": true, + "id": 2, + "name": "compiler0", + "paths": { + "containment": "/spack0/compiler0" + }, + "size": 1, + "type": "compiler", + "uniq_id": 2 + } + }, + "spack3": { + "label": "spack3", + "metadata": { + "basename": "external", + "exclusive": true, + "id": 3, + "name": "external0", + "paths": { + "containment": "/spack0/external0" + }, + "size": 1, + "type": "external", + "uniq_id": 3 + } + }, + "spack4": { + "label": "spack4", + "metadata": { + "basename": "anguish", + "exclusive": false, + "id": 4, + "name": "anguish0", + "paths": { + "containment": "/spack0/anguish0" + }, + "size": 4, + "type": "anguish" + } + }, + "spack5": { + "label": "spack5", + "metadata": { + "basename": "license", + "exclusive": false, + "id": 5, + "name": "license0", + "paths": { + "containment": "/spack0/license0" + }, + "size": 16, + "type": "license", + "uniq_id": 5 + } + } + }, + "edges": [ + { + "source": "2", + "target": "spack1", + "relation": "contains" + }, + { + "source": "spack1", + "target": "2", + "relation": "in" + }, + { + "source": "2", + "target": "spack2", + "relation": "contains" + }, + { + "source": "spack2", + "target": "2", + "relation": "in" + }, + { + "source": "2", + "target": "spack3", + "relation": "contains" + }, + { + "source": "spack3", + "target": "2", + "relation": "in" + }, + { + "source": "2", + "target": "spack4", + "relation": "contains" + }, + { + "source": "spack4", + "target": "2", + "relation": "in" + }, + { + "source": "2", + "target": "spack5", + "relation": "contains" + }, + { + "source": "spack5", + "target": "2", + "relation": "in" + }, + { + "source": "16", + "target": "spack1", + "relation": "contains" + }, + { + "source": "spack1", + "target": "16", + "relation": "in" + }, + { + "source": "16", + "target": "spack2", + "relation": "contains" + }, + { + "source": "spack2", + "target": "16", + "relation": "in" + }, + { + "source": "16", + "target": "spack3", + "relation": "contains" + }, + { + "source": "spack3", + "target": "16", + "relation": "in" + }, + { + "source": "16", + "target": "spack4", + "relation": "contains" + }, + { + "source": "spack4", + "target": "16", + "relation": "in" + }, + { + "source": "16", + "target": "spack5", + "relation": "contains" + }, + { + "source": "spack5", + "target": "16", + "relation": "in" + }, + { + "source": "30", + "target": "spack1", + "relation": "contains" + }, + { + "source": "spack1", + "target": "30", + "relation": "in" + }, + { + "source": "30", + "target": "spack2", + "relation": "contains" + }, + { + "source": "spack2", + "target": "30", + "relation": "in" + }, + { + "source": "30", + "target": "spack3", + "relation": "contains" + }, + { + "source": "spack3", + "target": "30", + "relation": "in" + }, + { + "source": "30", + "target": "spack4", + "relation": "contains" + }, + { + "source": "spack4", + "target": "30", + "relation": "in" + }, + { + "source": "30", + "target": "spack5", + "relation": "contains" + }, + { + "source": "spack5", + "target": "30", + "relation": "in" + } + ] + } +} \ No newline at end of file diff --git a/docs/examples/scheduler/rainbow-config.yaml b/docs/examples/scheduler/rainbow-config.yaml index a99ba56..422a875 100644 --- a/docs/examples/scheduler/rainbow-config.yaml +++ b/docs/examples/scheduler/rainbow-config.yaml @@ -8,7 +8,7 @@ scheduler: name: match cluster: name: keebler - secret: a5a8ebd8-6b22-44d4-aafe-6e81f92124ce + secret: 3994c1e7-9cc7-4b81-ab75-3b00128eda16 graphdatabase: name: memory host: 127.0.0.1:50051 diff --git a/go.mod b/go.mod index 8c8513e..4f50281 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/converged-computing/rainbow go 1.20 require ( + github.com/Masterminds/semver/v3 v3.2.1 github.com/akamensky/argparse v1.4.0 github.com/compspec/jobspec-go v0.0.0-20240319000127-8020a01a65da github.com/converged-computing/jsongraph-go v0.0.0-20240229082022-c6887a5a00fe diff --git a/go.sum b/go.sum index ad05baa..411776e 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0= +github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= github.com/akamensky/argparse v1.4.0 h1:YGzvsTqCvbEZhL8zZu2AiA5nq805NZh75JNj4ajn1xc= github.com/akamensky/argparse v1.4.0/go.mod h1:S5kwC7IuDcEr5VeXtGPRVZ5o/FdhcMlQz4IZQuw64xA= github.com/compspec/jobspec-go v0.0.0-20240319000127-8020a01a65da h1:Uvfk4IgWMIiyBse5aIm7STzSmrBlUotKn5AP+9xqJcw= diff --git a/pkg/config/config.go b/pkg/config/config.go index 24d9b9e..660b4c1 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -78,17 +78,17 @@ func (c *RainbowConfig) ToYaml() (string, error) { } // setAlgorithm sets the algorithms for the rainbow scheduler -func (c *RainbowConfig) setAlgorithms(matchAlgo, selectAlgo string) { +func (c *RainbowConfig) setAlgorithms(selectAlgo, matchAlgo string) { sAlgo := Algorithm{Name: DefaultSelectionAlgorithm, Options: map[string]string{}} mAlgo := Algorithm{Name: DefaultMatchAlgorithm, Options: map[string]string{}} - c.Scheduler.Algorithms.Selection = sAlgo - c.Scheduler.Algorithms.Match = mAlgo - if selectAlgo == "" { - c.Scheduler.Algorithms.Selection.Name = selectAlgo + if selectAlgo != "" { + mAlgo.Name = selectAlgo } - if matchAlgo == "" { - c.Scheduler.Algorithms.Match.Name = matchAlgo + if matchAlgo != "" { + mAlgo.Name = matchAlgo } + c.Scheduler.Algorithms.Selection = sAlgo + c.Scheduler.Algorithms.Match = mAlgo } // ToJson serializes to json diff --git a/plugins/algorithms/match/match.go b/plugins/algorithms/match/match.go index 3c55c3a..fd080cc 100644 --- a/plugins/algorithms/match/match.go +++ b/plugins/algorithms/match/match.go @@ -8,18 +8,15 @@ import ( "github.com/converged-computing/rainbow/pkg/types" ) -// Random selection of a cluster -// It doesn't get simpler than this! - type MatchType struct{} var ( - description = "match type for a subsystem for job assignment" - selectorName = "match" + description = "match type for a subsystem for job assignment" + matcherName = "match" ) func (s MatchType) Name() string { - return selectorName + return matcherName } func (s MatchType) Description() string { @@ -27,16 +24,16 @@ func (s MatchType) Description() string { } // getSlotResource needs assumes a subsystem request as follows: -/* tasks: -- command: - - ior - slot: default - count: - per_slot: 1 - resources: - io: - match: - - type: shm +/* task: +command: +- ior + slot: default + count: + per_slot: 1 +resources: + io: + match: + - type: shm */ // it is an explicit match, so we expect the slot to have that exact resource // available. This can eventually take a count, but right now is a boolean match diff --git a/plugins/algorithms/range/range.go b/plugins/algorithms/range/range.go new file mode 100644 index 0000000..d0b8eb9 --- /dev/null +++ b/plugins/algorithms/range/range.go @@ -0,0 +1,282 @@ +package rangematch + +// note range is a protected term + +import ( + "fmt" + "strings" + + semver "github.com/Masterminds/semver/v3" + v1 "github.com/compspec/jobspec-go/pkg/jobspec/experimental" + "github.com/converged-computing/rainbow/pkg/graph/algorithm" + "github.com/converged-computing/rainbow/pkg/types" +) + +type RangeType struct{} + +type RangeRequest struct { + Min string + Max string + Field string +} + +// Compress into a string to hand off to the graph for later matching +func (req *RangeRequest) Compress() string { + + value := fmt.Sprintf("range||field=%s", req.Field) + if req.Min != "" { + value = fmt.Sprintf("%s||min=%s", value, req.Min) + } + if req.Max != "" { + value = fmt.Sprintf("%s||max=%s", value, req.Max) + } + return value +} + +var ( + description = "determine subsystem match based on membership in a range" + matcherName = "range" +) + +func (s RangeType) Name() string { + return matcherName +} + +func (s RangeType) Description() string { + return description +} + +func NewRangeRequest(value string) *RangeRequest { + req := RangeRequest{} + pieces := strings.Split(value, "||") + for _, piece := range pieces { + if strings.HasPrefix(piece, "min=") { + req.Min = strings.ReplaceAll(piece, "min=", "") + } else if strings.HasPrefix(piece, "max=") { + req.Max = strings.ReplaceAll(piece, "max=", "") + } else if strings.HasPrefix(piece, "field=") { + req.Field = strings.ReplaceAll(piece, "field=", "") + } + } + return &req +} + +// Determine if a range request satisfies the node field +// If the user specifies a wonky range, this will still work, +// but not as they expect :) +func (req *RangeRequest) Satisfies(value string) (bool, error) { + + // We already have the value for the field from the graph, now just use semver to match + matchVersion, err := semver.NewVersion(value) + if err != nil { + // fmt.Printf(" => Error parsing semver for match value %s\n", err) + return false, err + } + if req.Min != "" { + // Is the version provided greater than the min requested? + c, err := semver.NewConstraint(fmt.Sprintf("> %s", req.Min)) + if err != nil { + // fmt.Printf(" => Error parsing min constraint %s\n", err) + return false, err + } + // Check if the version meets the constraints. The a variable will be true. + satisfied := c.Check(matchVersion) + if !satisfied { + // fmt.Printf(" => Not satisfied\n") + return false, err + + } + } + if req.Max != "" { + // Is the version provided less than the max requested? + c, err := semver.NewConstraint(fmt.Sprintf("< %s", req.Max)) + if err != nil { + // fmt.Printf(" => Error parsing max constraint %s\n", err) + return false, err + } + // Check if the version meets the constraints. The a variable will be true. + satisfied := c.Check(matchVersion) + if !satisfied { + // fmt.Printf(" => Not satisfied\n") + return false, err + } + } + return true, nil +} + +// getSlotResource needs assumes a subsystem request as follows +/* +task: + command: + - spack + slot: default + count: + per_slot: 1 + resources: + spack: + range: + - field: version + min: "0.5.1" + max: "0.5.5" +*/ +// We look for a field in the subsystem metadata attached to a node, +// in the example above "version" and then parse either > a min, < a max, +// or between the range. +func (m RangeType) GetSlotResourceNeeds(slot *v1.Task) *types.SlotResourceNeeds { + sNeeds := map[string]map[string]bool{} + for subsystem, needs := range slot.Resources { + + // Needs should be interface{} --> map[string][]map[string]string{} + // Assume if we cannot parse, don't consider + needs, ok := needs.(map[string]interface{}) + if !ok { + continue + } + + // Do we have a range algorithm? + request, ok := needs["range"] + if !ok { + continue + } + + // Now "request" goes from interface{} -> []map[string]string{} + matches, ok := request.([]interface{}) + if !ok { + continue + } + + // Finally, we just parse the list - these should be key value pairs to match exactly + for _, entry := range matches { + entry, ok := entry.(map[string]interface{}) + if !ok { + continue + } + + // Go through each entry and parse into a request + req := RangeRequest{} + for key, value := range entry { + value, ok := value.(string) + + // We only know how to parse these + if key == "field" && ok { + req.Field = value + } else if key == "min" && ok { + req.Min = value + } else if key == "max" && ok { + req.Max = value + } + } + // If we get here and we have a field and at LEAST + // one of min or max, we can add to to our needs + // This is a bit janky - compressing with || separators + if req.Field != "" && (req.Min != "" || req.Max != "") { + _, ok := sNeeds[subsystem] + if !ok { + sNeeds[subsystem] = map[string]bool{} + } + // This sets the starting state that the range is not satisfied + sNeeds[subsystem][req.Compress()] = false + } + } + } + // Parse into the slot resource needs + needs := []types.SubsystemNeeds{} + for subsystem, sneeds := range sNeeds { + subsystemNeeds := types.SubsystemNeeds{Name: subsystem, Attributes: sneeds} + needs = append(needs, subsystemNeeds) + } + + // If we don't have any needs, the slot is satisfied for that + slotNeeds := &types.SlotResourceNeeds{Subsystems: needs} + if len(needs) == 0 { + slotNeeds.Satisfied = true + } + fmt.Printf(" => Assessing needs for slot: %v\n", slotNeeds) + return slotNeeds +} + +// checkSubsystemEdge evaluates a node edge in the dominant subsystem for a +// subsystem attribute. E.g., if the io subsystem provides +// Vertex (from dominant subsysetem) is only passed in for informational purposes +func (m RangeType) CheckSubsystemEdge( + slotNeeds *types.SlotResourceNeeds, + edge *types.Edge, + vtx *types.Vertex, +) { + + // Return early if we are satisfied + if slotNeeds.Satisfied { + return + } + + // Determine if our slot needs can be met + // fmt.Printf("Looking at edge %s->%s\n", edge.Relation, edge.Vertex.Type) + + // TODO Keep a record if all are satisfied so we stop searching + // earlier if this is the case on subsequent calls + for i, subsys := range slotNeeds.Subsystems { + + //fmt.Printf(" => Looking in subsystem %s\n", edge.Subsystem) + + // The subsystem has an edge defined here! + if subsys.Name == edge.Subsystem { + // fmt.Printf(" => Found matching subsystem %s for %s\n", subsys.Name, edge.Subsystem) + + // This would match the top level subsystem name + for k := range subsys.Attributes { + // fmt.Printf(" => Looking at edge %s '%s' for %s that needs %s\n", edge.Subsystem, edge.Vertex.Type, subsys.Name, k) + + // We care if the attribute is marked as a range + if strings.HasPrefix(k, "range") { + + // fmt.Printf(" => Found %s and inspecting edge metadata %v\n", k, edge.Vertex.Metadata.Elements) + + req := NewRangeRequest(k) + // Get the field requested by the jobspec + toMatch, err := edge.Vertex.Metadata.GetStringElement(req.Field) + if err != nil { + continue + } + + // fmt.Printf(" => Found field requested for range match %s\n", toMatch) + satisfied, err := req.Satisfies(toMatch) + if err != nil { + continue + } + if satisfied { + fmt.Printf(" => Resource '%s' has edge '%s' satisfies subsystem %s %s\n", vtx.Type, edge.Vertex.Type, subsys.Name, k) + subsys.Attributes[k] = true + } + } + } + } + slotNeeds.Subsystems[i] = subsys + } + + // Try to avoid future checking if subsystem needs are addressed + allSatisfied := true + for _, subsys := range slotNeeds.Subsystems { + for _, v := range subsys.Attributes { + if !v { + allSatisfied = false + break + } + } + } + // This is going to provide a quick check to determine if the subsystem + // is satisfied without needing to parse again + slotNeeds.Satisfied = allSatisfied +} + +// Init provides extra initialization functionality, if needed +// The in memory database can take a backup file if desired +func (s RangeType) Init(options map[string]string) error { + // If an algorithm has options, they can be set here + return nil +} + +// Add the selection algorithm to be known to rainbow +func init() { + algo := RangeType{} + algorithm.Register(algo) +} diff --git a/plugins/backends/memory/dfs.go b/plugins/backends/memory/dfs.go index 79c6758..7cbc50e 100644 --- a/plugins/backends/memory/dfs.go +++ b/plugins/backends/memory/dfs.go @@ -91,7 +91,7 @@ func (g *ClusterGraph) depthFirstSearch( // Note that in the experimental version we have one task and thus one slot if !g.quiet { - fmt.Printf(" 🎰ī¸ Slots that need to be satisfied\n") + fmt.Printf(" 🎰ī¸ Slots that need to be satisfied with matcher %s\n", matcher.Name()) } slots := map[string]*v1.Task{} diff --git a/plugins/backends/memory/memory.go b/plugins/backends/memory/memory.go index a86fc89..b4bb118 100644 --- a/plugins/backends/memory/memory.go +++ b/plugins/backends/memory/memory.go @@ -96,7 +96,8 @@ func (g MemoryGraph) Satisfies( if err != nil { return matches, err } - request := service.SatisfyRequest{Payload: string(out)} + // Make the satisfy request, ensuring we provide the graph algorithm + request := service.SatisfyRequest{Payload: string(out), Matcher: matcher.Name()} ctx := context.Background() response, err := client.Satisfy(ctx, &request) if err != nil { diff --git a/plugins/backends/memory/resource.go b/plugins/backends/memory/resource.go index f18c773..3287363 100644 --- a/plugins/backends/memory/resource.go +++ b/plugins/backends/memory/resource.go @@ -28,10 +28,12 @@ func NewResource(node jgf.Node) *types.Resource { resourceUnit = unit } + // Throw in the rest of the metadata for algorithms to parse return &types.Resource{ - Size: resourceSize, - Unit: resourceUnit, - Type: resourceType, + Size: resourceSize, + Unit: resourceUnit, + Type: resourceType, + Metadata: node.Metadata, } }