From f83a85bd5f79274a75c4b9701bafe85b01080455 Mon Sep 17 00:00:00 2001 From: Alex Price Date: Mon, 27 May 2024 17:36:27 +1000 Subject: [PATCH 1/3] add the ability to remove nodes older than a certain age even when the node group has reached the min - adds max_node_age field to node group configuration - max_node_age by default is disabled, and needs to be a positive, greater than zero duration to enable - when the node group has reached the minimum and there are nodes older than the max_node_age, the scale delta will be set to +1 - this will then force escalator to replace the oldest node fixes #239 Signed-off-by: Alex Price --- pkg/controller/controller.go | 33 +++ .../controller_scale_node_group_test.go | 224 +++++++++++++++++- pkg/controller/node_group.go | 27 +++ pkg/controller/node_group_test.go | 54 +++++ pkg/test/builder.go | 3 + 5 files changed, 333 insertions(+), 8 deletions(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 90c2aa52..a4e45735 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -373,6 +373,12 @@ func (c *Controller) scaleNodeGroup(nodegroup string, nodeGroup *NodeGroupState) nodesDelta = int(math.Max(float64(nodesDelta), 1)) } + if c.scaleOnMaxNodeAge(nodeGroup, untaintedNodes) { + log.WithField("nodegroup", nodegroup). + Info("Setting scale to minimum of 1 to rotate out a node older than the max node age") + nodesDelta = int(math.Max(float64(nodesDelta), 1)) + } + log.WithField("nodegroup", nodegroup).Debugf("Delta: %v", nodesDelta) scaleOptions := scaleOpts{ @@ -431,6 +437,33 @@ func (c *Controller) isScaleOnStarve( len(untaintedNodes) < nodeGroup.Opts.MaxNodes } +// scaleOnMaxNodeAge returns true if the node group is at the minimum and there is a node in the untaintedNodes older +// than the configured node group's maxNodeAge. The idea here is to constantly rotate out the oldest nodes +// when the node group is at the minimum. This is to ensure the nodes are receiving the most up-to-date configuration +// from the cloud provider. +func (c *Controller) scaleOnMaxNodeAge(nodeGroup *NodeGroupState, untaintedNodes []*v1.Node) bool { + // Only enable this functionality if the node group has the feature enabled + if nodeGroup.Opts.MaxNodeAgeDuration() <= 0 { + return false + } + + // We don't want to attempt to rotate nodes that have reached the max age if we haven't reached the minimum node + // count, as the scaling down of the node group will remove the oldest first anyway. + if len(untaintedNodes) != nodeGroup.Opts.MinNodes || len(untaintedNodes) == 0 { + return false + } + + // Determine if there is an untainted node that exceeds the max age, if so then we should scale up + // to trigger that node to be replaced. + for _, n := range untaintedNodes { + if time.Since(n.CreationTimestamp.Time) > nodeGroup.Opts.MaxNodeAgeDuration() { + return true + } + } + + return false +} + // RunOnce performs the main autoscaler logic once func (c *Controller) RunOnce() error { startTime := time.Now() diff --git a/pkg/controller/controller_scale_node_group_test.go b/pkg/controller/controller_scale_node_group_test.go index 538d4235..3c355f04 100644 --- a/pkg/controller/controller_scale_node_group_test.go +++ b/pkg/controller/controller_scale_node_group_test.go @@ -2,7 +2,7 @@ package controller import ( "testing" - duration "time" + stdtime "time" "github.com/atlassian/escalator/pkg/k8s" "github.com/atlassian/escalator/pkg/k8s/resource" @@ -39,7 +39,7 @@ func buildTestClient(nodes []*v1.Node, pods []*v1.Pod, nodeGroups []NodeGroupOpt opts := Opts{ K8SClient: fakeClient, NodeGroups: nodeGroups, - ScanInterval: 1 * duration.Minute, + ScanInterval: 1 * stdtime.Minute, DryMode: false, } allPodLister, err := test.NewTestPodWatcher(pods, listerOptions.podListerOptions) @@ -853,7 +853,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) { args args scaleUpWithCachedCapacity bool runs int - runInterval duration.Duration + runInterval stdtime.Duration want int err error }{ @@ -879,7 +879,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) { }, false, 1, - duration.Minute, + stdtime.Minute, -4, nil, }, @@ -905,7 +905,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) { }, false, 5, - duration.Minute, + stdtime.Minute, -2, nil, }, @@ -931,7 +931,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) { }, false, 1, - duration.Minute, + stdtime.Minute, -4, nil, }, @@ -958,7 +958,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) { }, false, 1, - duration.Minute, + stdtime.Minute, 1, nil, }, @@ -985,7 +985,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) { }, true, 1, - duration.Minute, + stdtime.Minute, 6, nil, }, @@ -1060,3 +1060,211 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) { }) } } + +func TestScaleNodeGroupNodeMaxAge(t *testing.T) { + buildNode := func(creation stdtime.Time, tainted bool) *v1.Node { + return test.BuildTestNode(test.NodeOpts{ + CPU: int64(1000), + Mem: int64(1000), + Creation: creation, + Tainted: tainted, + }) + } + + type args struct { + nodes []*v1.Node + pods []*v1.Pod + nodeGroupOptions NodeGroupOptions + listerOptions ListerOptions + } + + tests := []struct { + name string + args args + expectedNodeDelta int + err error + }{ + { + "max_node_age disabled", + args{ + nodes: []*v1.Node{ + buildNode(time.Now().Add(-1*stdtime.Hour), false), + buildNode(time.Now().Add(-24*stdtime.Hour), false), + buildNode(time.Now().Add(-36*stdtime.Hour), false), + }, + pods: nil, + nodeGroupOptions: NodeGroupOptions{ + Name: "default", + CloudProviderGroupName: "default", + MinNodes: 3, + MaxNodes: 10, + ScaleUpThresholdPercent: 70, + MaxNodeAge: "0", + }, + listerOptions: ListerOptions{}, + }, + 0, + nil, + }, + { + "max_node_age enabled, max node age 12 hours", + args{ + nodes: []*v1.Node{ + buildNode(time.Now().Add(-1*stdtime.Hour), false), + buildNode(time.Now().Add(-24*stdtime.Hour), false), + buildNode(time.Now().Add(-36*stdtime.Hour), false), + }, + pods: nil, + nodeGroupOptions: NodeGroupOptions{ + Name: "default", + CloudProviderGroupName: "default", + MinNodes: 3, + MaxNodes: 10, + ScaleUpThresholdPercent: 70, + MaxNodeAge: "12h", + }, + listerOptions: ListerOptions{}, + }, + 1, + nil, + }, + { + "max_node_age enabled, max node age 48 hours", + args{ + nodes: []*v1.Node{ + buildNode(time.Now().Add(-1*stdtime.Hour), false), + buildNode(time.Now().Add(-24*stdtime.Hour), false), + buildNode(time.Now().Add(-36*stdtime.Hour), false), + }, + pods: nil, + nodeGroupOptions: NodeGroupOptions{ + Name: "default", + CloudProviderGroupName: "default", + MinNodes: 3, + MaxNodes: 10, + ScaleUpThresholdPercent: 70, + MaxNodeAge: "48h", + }, + listerOptions: ListerOptions{}, + }, + 0, + nil, + }, + { + "max_node_age enabled, but not at node minimum", + args{ + nodes: []*v1.Node{ + buildNode(time.Now().Add(-1*stdtime.Hour), false), + buildNode(time.Now().Add(-24*stdtime.Hour), false), + buildNode(time.Now().Add(-36*stdtime.Hour), false), + }, + pods: nil, + nodeGroupOptions: NodeGroupOptions{ + Name: "default", + CloudProviderGroupName: "default", + MinNodes: 1, + MaxNodes: 10, + ScaleUpThresholdPercent: 70, + MaxNodeAge: "12h", + }, + listerOptions: ListerOptions{}, + }, + 0, + nil, + }, + { + "max_node_age enabled, but no nodes", + args{ + nodes: nil, + pods: nil, + nodeGroupOptions: NodeGroupOptions{ + Name: "default", + CloudProviderGroupName: "default", + MinNodes: 1, + MaxNodes: 10, + ScaleUpThresholdPercent: 70, + MaxNodeAge: "12h", + }, + listerOptions: ListerOptions{}, + }, + 0, + nil, + }, + { + "max_node_age enabled, some nodes are tainted", + args{ + nodes: []*v1.Node{ + buildNode(time.Now().Add(-1*stdtime.Hour), false), + buildNode(time.Now().Add(-24*stdtime.Hour), false), + buildNode(time.Now().Add(-36*stdtime.Hour), true), + }, + pods: nil, + nodeGroupOptions: NodeGroupOptions{ + Name: "default", + CloudProviderGroupName: "default", + MinNodes: 1, + MaxNodes: 10, + ScaleUpThresholdPercent: 70, + MaxNodeAge: "12h", + }, + listerOptions: ListerOptions{}, + }, + 0, + nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + nodeGroups := []NodeGroupOptions{tt.args.nodeGroupOptions} + ngName := tt.args.nodeGroupOptions.Name + client, opts, err := buildTestClient(tt.args.nodes, tt.args.pods, nodeGroups, tt.args.listerOptions) + require.NoError(t, err) + + // For these test cases we only use 1 node group/cloud provider node group + nodeGroupSize := 1 + + // Create a test (mock) cloud provider + testCloudProvider := test.NewCloudProvider(nodeGroupSize) + testNodeGroup := test.NewNodeGroup( + tt.args.nodeGroupOptions.CloudProviderGroupName, + tt.args.nodeGroupOptions.Name, + int64(tt.args.nodeGroupOptions.MinNodes), + int64(tt.args.nodeGroupOptions.MaxNodes), + int64(len(tt.args.nodes)), + ) + testCloudProvider.RegisterNodeGroup(testNodeGroup) + + // Create a node group state with the mapping of node groups to the cloud providers node groups + nodeGroupsState := BuildNodeGroupsState(nodeGroupsStateOpts{ + nodeGroups: nodeGroups, + client: *client, + }) + + controller := &Controller{ + Client: client, + Opts: opts, + stopChan: nil, + nodeGroups: nodeGroupsState, + cloudProvider: testCloudProvider, + } + + nodesDelta, err := controller.scaleNodeGroup(ngName, nodeGroupsState[ngName]) + + // Ensure there were no errors + if tt.err == nil { + require.NoError(t, err) + } else { + require.EqualError(t, tt.err, err.Error()) + } + + assert.Equal(t, tt.expectedNodeDelta, nodesDelta) + if nodesDelta <= 0 { + return + } + + // Ensure the node group on the cloud provider side scales up to the correct amount + assert.Equal(t, int64(len(tt.args.nodes)+nodesDelta), testNodeGroup.TargetSize()) + }) + } +} diff --git a/pkg/controller/node_group.go b/pkg/controller/node_group.go index 073844be..6bd0caf0 100644 --- a/pkg/controller/node_group.go +++ b/pkg/controller/node_group.go @@ -47,10 +47,13 @@ type NodeGroupOptions struct { AWS AWSNodeGroupOptions `json:"aws" yaml:"aws"` + MaxNodeAge string `json:"max_node_age,omitempty" yaml:"max_node_age,omitempty"` + // Private variables for storing the parsed duration from the string softDeleteGracePeriodDuration time.Duration hardDeleteGracePeriodDuration time.Duration scaleUpCoolDownPeriodDuration time.Duration + maxNodeAgeDuration time.Duration } // AWSNodeGroupOptions represents a nodegroup running on a cluster that is @@ -124,6 +127,9 @@ func ValidateNodeGroup(nodegroup NodeGroupOptions) []error { checkThat(validTaintEffect(nodegroup.TaintEffect), "taint_effect must be valid kubernetes taint") checkThat(validAWSLifecycle(nodegroup.AWS.Lifecycle), "aws.lifecycle must be '%v' or '%v' if provided.", aws.LifecycleOnDemand, aws.LifecycleSpot) + + checkThat(validMaxNodeAgeDuration(nodegroup.MaxNodeAge), "max_node_age failed to parse into a time.Duration. Set to '0' or '' to disable, or a positive Go duration to enable.") + return problems } @@ -137,6 +143,15 @@ func validTaintEffect(taintEffect v1.TaintEffect) bool { return len(taintEffect) == 0 || k8s.TaintEffectTypes[taintEffect] } +func validMaxNodeAgeDuration(maxNodeAge string) bool { + // Accept a blank maxNodeAge as valid, which will disable the feature + if maxNodeAge == "" { + return true + } + _, err := time.ParseDuration(maxNodeAge) + return err == nil +} + // SoftDeleteGracePeriodDuration lazily returns/parses the softDeleteGracePeriod string into a duration func (n *NodeGroupOptions) SoftDeleteGracePeriodDuration() time.Duration { if n.softDeleteGracePeriodDuration == 0 { @@ -176,6 +191,18 @@ func (n *NodeGroupOptions) ScaleUpCoolDownPeriodDuration() time.Duration { return n.scaleUpCoolDownPeriodDuration } +func (n *NodeGroupOptions) MaxNodeAgeDuration() time.Duration { + if n.maxNodeAgeDuration == 0 { + duration, err := time.ParseDuration(n.MaxNodeAge) + if err != nil { + return 0 + } + n.maxNodeAgeDuration = duration + } + + return n.maxNodeAgeDuration +} + // autoDiscoverMinMaxNodeOptions returns whether the min_nodes and max_nodes options should be "auto-discovered" from the cloud provider func (n *NodeGroupOptions) autoDiscoverMinMaxNodeOptions() bool { return n.MinNodes == 0 && n.MaxNodes == 0 diff --git a/pkg/controller/node_group_test.go b/pkg/controller/node_group_test.go index 1a71515e..16dd80a7 100644 --- a/pkg/controller/node_group_test.go +++ b/pkg/controller/node_group_test.go @@ -335,6 +335,7 @@ func TestUnmarshalNodeGroupOptions(t *testing.T) { assert.Equal(t, time.Minute*10, opts[0].SoftDeleteGracePeriodDuration()) assert.Equal(t, time.Duration(0), opts[0].HardDeleteGracePeriodDuration()) assert.Equal(t, v1.TaintEffectNoExecute, opts[0].TaintEffect) + assert.Equal(t, time.Duration(0), opts[0].MaxNodeAgeDuration()) assert.NotNil(t, opts[1]) assert.Equal(t, "default", opts[1].Name) @@ -344,6 +345,7 @@ func TestUnmarshalNodeGroupOptions(t *testing.T) { assert.Equal(t, 10, opts[1].MaxNodes) assert.Equal(t, true, opts[1].DryMode) assert.Equal(t, v1.TaintEffectNoSchedule, opts[1].TaintEffect) + assert.Equal(t, 12*time.Hour, opts[1].MaxNodeAgeDuration()) }) t.Run("test yaml unmarshal bad", func(t *testing.T) { @@ -394,6 +396,7 @@ node_groups: hard_delete_grace_period: 42 scale_up_cooldown_period: 1h2m30s taint_effect: NoExecute + max_node_age: 0 - name: "default" label_key: "customer" label_value: "shared" @@ -406,6 +409,7 @@ node_groups: fast_node_removal_rate: 3 scale_up_cooldown_period: 21h taint_effect: NoSchedule + max_node_age: 12h ` var yamlBE = `node_groups: @@ -448,6 +452,7 @@ func TestValidateNodeGroup(t *testing.T) { HardDeleteGracePeriod: "1h10m", ScaleUpCoolDownPeriod: "55m", TaintEffect: "NoExecute", + MaxNodeAge: "12h", }, }, nil, @@ -475,6 +480,53 @@ func TestValidateNodeGroup(t *testing.T) { }, nil, }, + { + "valid nodegroup with empty maxNodeAge", + args{ + NodeGroupOptions{ + Name: "test", + LabelKey: "customer", + LabelValue: "buileng", + CloudProviderGroupName: "somegroup", + TaintUpperCapacityThresholdPercent: 70, + TaintLowerCapacityThresholdPercent: 60, + ScaleUpThresholdPercent: 100, + MinNodes: 1, + MaxNodes: 3, + SlowNodeRemovalRate: 1, + FastNodeRemovalRate: 2, + SoftDeleteGracePeriod: "10m", + HardDeleteGracePeriod: "1h10m", + ScaleUpCoolDownPeriod: "55m", + MaxNodeAge: "", + }, + }, + nil, + }, + { + "valid nodegroup with zero max_node_age", + args{ + NodeGroupOptions{ + Name: "test", + LabelKey: "customer", + LabelValue: "buileng", + CloudProviderGroupName: "somegroup", + TaintUpperCapacityThresholdPercent: 70, + TaintLowerCapacityThresholdPercent: 60, + ScaleUpThresholdPercent: 100, + MinNodes: 1, + MaxNodes: 3, + SlowNodeRemovalRate: 1, + FastNodeRemovalRate: 2, + SoftDeleteGracePeriod: "10m", + HardDeleteGracePeriod: "1h10m", + ScaleUpCoolDownPeriod: "55m", + TaintEffect: "", + MaxNodeAge: "0", + }, + }, + nil, + }, { "invalid nodegroup", args{ @@ -494,6 +546,7 @@ func TestValidateNodeGroup(t *testing.T) { HardDeleteGracePeriod: "1h10m", ScaleUpCoolDownPeriod: "21h21m21s", TaintEffect: "invalid", + MaxNodeAge: "bla", }, }, []string{ @@ -503,6 +556,7 @@ func TestValidateNodeGroup(t *testing.T) { "max_nodes must be larger than 0", "soft_delete_grace_period failed to parse into a time.Duration. check your formatting.", "taint_effect must be valid kubernetes taint", + "max_node_age failed to parse into a time.Duration. Set to '0' or '' to disable, or a positive Go duration to enable.", }, }, } diff --git a/pkg/test/builder.go b/pkg/test/builder.go index 3775fe35..d4ff7ded 100644 --- a/pkg/test/builder.go +++ b/pkg/test/builder.go @@ -102,6 +102,9 @@ func NameFromChan(c <-chan string, timeout time.Duration) string { // BuildTestNode creates a node with specified capacity. func BuildTestNode(opts NodeOpts) *apiv1.Node { + if opts.Name == "" { + opts.Name = uuid.New().String() + } var taints []apiv1.Taint if opts.Tainted { From 6e227c53a476e1e510eaba77bb12179ac4f593ab Mon Sep 17 00:00:00 2001 From: Alex Price Date: Thu, 30 May 2024 16:07:14 +1000 Subject: [PATCH 2/3] add test case for scale down to zero, also fix edge case that tries to rotate nodes when at the ASG min, but there is a tainted node Signed-off-by: Alex Price --- pkg/controller/controller.go | 7 ++-- .../controller_scale_node_group_test.go | 39 +++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index a4e45735..edbdf6db 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -373,7 +373,7 @@ func (c *Controller) scaleNodeGroup(nodegroup string, nodeGroup *NodeGroupState) nodesDelta = int(math.Max(float64(nodesDelta), 1)) } - if c.scaleOnMaxNodeAge(nodeGroup, untaintedNodes) { + if c.scaleOnMaxNodeAge(nodeGroup, untaintedNodes, taintedNodes) { log.WithField("nodegroup", nodegroup). Info("Setting scale to minimum of 1 to rotate out a node older than the max node age") nodesDelta = int(math.Max(float64(nodesDelta), 1)) @@ -441,7 +441,7 @@ func (c *Controller) isScaleOnStarve( // than the configured node group's maxNodeAge. The idea here is to constantly rotate out the oldest nodes // when the node group is at the minimum. This is to ensure the nodes are receiving the most up-to-date configuration // from the cloud provider. -func (c *Controller) scaleOnMaxNodeAge(nodeGroup *NodeGroupState, untaintedNodes []*v1.Node) bool { +func (c *Controller) scaleOnMaxNodeAge(nodeGroup *NodeGroupState, untaintedNodes []*v1.Node, taintedNodes []*v1.Node) bool { // Only enable this functionality if the node group has the feature enabled if nodeGroup.Opts.MaxNodeAgeDuration() <= 0 { return false @@ -449,7 +449,8 @@ func (c *Controller) scaleOnMaxNodeAge(nodeGroup *NodeGroupState, untaintedNodes // We don't want to attempt to rotate nodes that have reached the max age if we haven't reached the minimum node // count, as the scaling down of the node group will remove the oldest first anyway. - if len(untaintedNodes) != nodeGroup.Opts.MinNodes || len(untaintedNodes) == 0 { + // We also don't want to try to rotate out nodes if there are already nodes tainted. + if len(untaintedNodes) != nodeGroup.Opts.MinNodes || len(untaintedNodes) == 0 || len(taintedNodes) > 0 { return false } diff --git a/pkg/controller/controller_scale_node_group_test.go b/pkg/controller/controller_scale_node_group_test.go index 3c355f04..325da2ad 100644 --- a/pkg/controller/controller_scale_node_group_test.go +++ b/pkg/controller/controller_scale_node_group_test.go @@ -1212,6 +1212,45 @@ func TestScaleNodeGroupNodeMaxAge(t *testing.T) { 0, nil, }, + { + "max_node_age enabled, scaled down to zero", + args{ + nodes: []*v1.Node{}, + pods: nil, + nodeGroupOptions: NodeGroupOptions{ + Name: "default", + CloudProviderGroupName: "default", + MinNodes: 0, + MaxNodes: 10, + ScaleUpThresholdPercent: 70, + MaxNodeAge: "12h", + }, + listerOptions: ListerOptions{}, + }, + 0, + nil, + }, + { + "max_node_age enabled, 1 tainted, 1 untainted", + args{ + nodes: []*v1.Node{ + buildNode(time.Now().Add(-1*stdtime.Hour), false), + buildNode(time.Now().Add(-24*stdtime.Hour), true), + }, + pods: nil, + nodeGroupOptions: NodeGroupOptions{ + Name: "default", + CloudProviderGroupName: "default", + MinNodes: 1, + MaxNodes: 10, + ScaleUpThresholdPercent: 70, + MaxNodeAge: "30m", + }, + listerOptions: ListerOptions{}, + }, + 0, + nil, + }, } for _, tt := range tests { From 72d1da338e2fdcccf3d828d4fdfdf20ca0f6efd2 Mon Sep 17 00:00:00 2001 From: Alex Price Date: Thu, 6 Jun 2024 14:56:05 +1000 Subject: [PATCH 3/3] add docs Signed-off-by: Alex Price --- docs/configuration/nodegroup.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/configuration/nodegroup.md b/docs/configuration/nodegroup.md index 888f7676..a1ba2401 100644 --- a/docs/configuration/nodegroup.md +++ b/docs/configuration/nodegroup.md @@ -27,6 +27,7 @@ node_groups: soft_delete_grace_period: 1m hard_delete_grace_period: 10m taint_effect: NoExecute + max_node_age: 24h aws: fleet_instance_ready_timeout: 1m launch_template_id: lt-1a2b3c4d @@ -258,3 +259,17 @@ be taken from the launch template. Tag ASG and Fleet Request resources used by Escalator with the metatdata key-value pair `k8s.io/atlassian-escalator/enabled`:`true`. Tagging doesn't alter the functionality of Escalator. Read more about tagging your AWS resources [here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html). + +### `max_node_age` + +`max_node_age` allows the configuration of a maximum age for nodes in the node group when the node group has scaled down +to the minimum node group size. When at the minimum node grop size, Escalator will trigger a scale up by a minimum of 1 +if there are any nodes exceeding this max node age in an effort to have the old node rotated out. + +This is to ensure that nodes in the node group are kept relatively new to pick up any changes to the launch +configuration or launch template. + +When not at the minimum, the natural scaling up and down of the node group will ensure new nodes are introduced to the +node group. + +This is an optional feature and by default is disabled.