atlassian · awprice · Jun 6, 2024 · May 27, 2024 · May 30, 2024 · Jun 6, 2024
diff --git a/docs/configuration/nodegroup.md b/docs/configuration/nodegroup.md
@@ -27,6 +27,7 @@ node_groups:
     soft_delete_grace_period: 1m
     hard_delete_grace_period: 10m
     taint_effect: NoExecute
+    max_node_age: 24h
     aws:
         fleet_instance_ready_timeout: 1m
         launch_template_id: lt-1a2b3c4d
@@ -258,3 +259,17 @@ be taken from the launch template.
 Tag ASG and Fleet Request resources used by Escalator with the metatdata key-value pair
 `k8s.io/atlassian-escalator/enabled`:`true`. Tagging doesn't alter the functionality of Escalator. Read more about
 tagging your AWS resources [here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html).
+
+### `max_node_age`
+
+`max_node_age` allows the configuration of a maximum age for nodes in the node group when the node group has scaled down
+to the minimum node group size. When at the minimum node grop size, Escalator will trigger a scale up by a minimum of 1
+if there are any nodes exceeding this max node age in an effort to have the old node rotated out.
+
+This is to ensure that nodes in the node group are kept relatively new to pick up any changes to the launch
+configuration or launch template.
+
+When not at the minimum, the natural scaling up and down of the node group will ensure new nodes are introduced to the
+node group.
+
+This is an optional feature and by default is disabled.
diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go
@@ -373,6 +373,12 @@ func (c *Controller) scaleNodeGroup(nodegroup string, nodeGroup *NodeGroupState)
 		nodesDelta = int(math.Max(float64(nodesDelta), 1))
 	}
 
+	if c.scaleOnMaxNodeAge(nodeGroup, untaintedNodes, taintedNodes) {
+		log.WithField("nodegroup", nodegroup).
+			Info("Setting scale to minimum of 1 to rotate out a node older than the max node age")
+		nodesDelta = int(math.Max(float64(nodesDelta), 1))
+	}
+
 	log.WithField("nodegroup", nodegroup).Debugf("Delta: %v", nodesDelta)
 
 	scaleOptions := scaleOpts{
@@ -431,6 +437,34 @@ func (c *Controller) isScaleOnStarve(
 		len(untaintedNodes) < nodeGroup.Opts.MaxNodes
 }
 
+// scaleOnMaxNodeAge returns true if the node group is at the minimum and there is a node in the untaintedNodes older
+// than the configured node group's maxNodeAge. The idea here is to constantly rotate out the oldest nodes
+// when the node group is at the minimum. This is to ensure the nodes are receiving the most up-to-date configuration
+// from the cloud provider.
+func (c *Controller) scaleOnMaxNodeAge(nodeGroup *NodeGroupState, untaintedNodes []*v1.Node, taintedNodes []*v1.Node) bool {
+	// Only enable this functionality if the node group has the feature enabled
+	if nodeGroup.Opts.MaxNodeAgeDuration() <= 0 {
+		return false
+	}
+
+	// We don't want to attempt to rotate nodes that have reached the max age if we haven't reached the minimum node
+	// count, as the scaling down of the node group will remove the oldest first anyway.
+	// We also don't want to try to rotate out nodes if there are already nodes tainted.
+	if len(untaintedNodes) != nodeGroup.Opts.MinNodes || len(untaintedNodes) == 0 || len(taintedNodes) > 0 {
+		return false
+	}
+
+	// Determine if there is an untainted node that exceeds the max age, if so then we should scale up
+	// to trigger that node to be replaced.
+	for _, n := range untaintedNodes {
+		if time.Since(n.CreationTimestamp.Time) > nodeGroup.Opts.MaxNodeAgeDuration() {
+			return true
+		}
+	}
+
+	return false
+}
+
 // RunOnce performs the main autoscaler logic once
 func (c *Controller) RunOnce() error {
 	startTime := time.Now()

diff --git a/pkg/controller/controller_scale_node_group_test.go b/pkg/controller/controller_scale_node_group_test.go
@@ -2,7 +2,7 @@ package controller
 
 import (
 	"testing"
-	duration "time"
+	stdtime "time"
 
 	"github.com/atlassian/escalator/pkg/k8s"
 	"github.com/atlassian/escalator/pkg/k8s/resource"
@@ -39,7 +39,7 @@ func buildTestClient(nodes []*v1.Node, pods []*v1.Pod, nodeGroups []NodeGroupOpt
 	opts := Opts{
 		K8SClient:    fakeClient,
 		NodeGroups:   nodeGroups,
-		ScanInterval: 1 * duration.Minute,
+		ScanInterval: 1 * stdtime.Minute,
 		DryMode:      false,
 	}
 	allPodLister, err := test.NewTestPodWatcher(pods, listerOptions.podListerOptions)
@@ -853,7 +853,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) {
 		args                      args
 		scaleUpWithCachedCapacity bool
 		runs                      int
-		runInterval               duration.Duration
+		runInterval               stdtime.Duration
 		want                      int
 		err                       error
 	}{
@@ -879,7 +879,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) {
 			},
 			false,
 			1,
-			duration.Minute,
+			stdtime.Minute,
 			-4,
 			nil,
 		},
@@ -905,7 +905,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) {
 			},
 			false,
 			5,
-			duration.Minute,
+			stdtime.Minute,
 			-2,
 			nil,
 		},
@@ -931,7 +931,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) {
 			},
 			false,
 			1,
-			duration.Minute,
+			stdtime.Minute,
 			-4,
 			nil,
 		},
@@ -958,7 +958,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) {
 			},
 			false,
 			1,
-			duration.Minute,
+			stdtime.Minute,
 			1,
 			nil,
 		},
@@ -985,7 +985,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) {
 			},
 			true,
 			1,
-			duration.Minute,
+			stdtime.Minute,
 			6,
 			nil,
 		},
@@ -1060,3 +1060,250 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) {
 		})
 	}
 }
+
+func TestScaleNodeGroupNodeMaxAge(t *testing.T) {
+	buildNode := func(creation stdtime.Time, tainted bool) *v1.Node {
+		return test.BuildTestNode(test.NodeOpts{
+			CPU:      int64(1000),
+			Mem:      int64(1000),
+			Creation: creation,
+			Tainted:  tainted,
+		})
+	}
+
+	type args struct {
+		nodes            []*v1.Node
+		pods             []*v1.Pod
+		nodeGroupOptions NodeGroupOptions
+		listerOptions    ListerOptions
+	}
+
+	tests := []struct {
+		name              string
+		args              args
+		expectedNodeDelta int
+		err               error
+	}{
+		{
+			"max_node_age disabled",
+			args{
+				nodes: []*v1.Node{
+					buildNode(time.Now().Add(-1*stdtime.Hour), false),
+					buildNode(time.Now().Add(-24*stdtime.Hour), false),
+					buildNode(time.Now().Add(-36*stdtime.Hour), false),
+				},
+				pods: nil,
+				nodeGroupOptions: NodeGroupOptions{
+					Name:                    "default",
+					CloudProviderGroupName:  "default",
+					MinNodes:                3,
+					MaxNodes:                10,
+					ScaleUpThresholdPercent: 70,
+					MaxNodeAge:              "0",
+				},
+				listerOptions: ListerOptions{},
+			},
+			0,
+			nil,
+		},
+		{
+			"max_node_age enabled, max node age 12 hours",
+			args{
+				nodes: []*v1.Node{
+					buildNode(time.Now().Add(-1*stdtime.Hour), false),
+					buildNode(time.Now().Add(-24*stdtime.Hour), false),
+					buildNode(time.Now().Add(-36*stdtime.Hour), false),
+				},
+				pods: nil,
+				nodeGroupOptions: NodeGroupOptions{
+					Name:                    "default",
+					CloudProviderGroupName:  "default",
+					MinNodes:                3,
+					MaxNodes:                10,
+					ScaleUpThresholdPercent: 70,
+					MaxNodeAge:              "12h",
+				},
+				listerOptions: ListerOptions{},
+			},
+			1,
+			nil,
+		},
+		{
+			"max_node_age enabled, max node age 48 hours",
+			args{
+				nodes: []*v1.Node{
+					buildNode(time.Now().Add(-1*stdtime.Hour), false),
+					buildNode(time.Now().Add(-24*stdtime.Hour), false),
+					buildNode(time.Now().Add(-36*stdtime.Hour), false),
+				},
+				pods: nil,
+				nodeGroupOptions: NodeGroupOptions{
+					Name:                    "default",
+					CloudProviderGroupName:  "default",
+					MinNodes:                3,
+					MaxNodes:                10,
+					ScaleUpThresholdPercent: 70,
+					MaxNodeAge:              "48h",
+				},
+				listerOptions: ListerOptions{},
+			},
+			0,
+			nil,
+		},
+		{
+			"max_node_age enabled, but not at node minimum",
+			args{
+				nodes: []*v1.Node{
+					buildNode(time.Now().Add(-1*stdtime.Hour), false),
+					buildNode(time.Now().Add(-24*stdtime.Hour), false),
+					buildNode(time.Now().Add(-36*stdtime.Hour), false),
+				},
+				pods: nil,
+				nodeGroupOptions: NodeGroupOptions{
+					Name:                    "default",
+					CloudProviderGroupName:  "default",
+					MinNodes:                1,
+					MaxNodes:                10,
+					ScaleUpThresholdPercent: 70,
+					MaxNodeAge:              "12h",
+				},
+				listerOptions: ListerOptions{},
+			},
+			0,
+			nil,
+		},
+		{
+			"max_node_age enabled, but no nodes",
+			args{
+				nodes: nil,
+				pods:  nil,
+				nodeGroupOptions: NodeGroupOptions{
+					Name:                    "default",
+					CloudProviderGroupName:  "default",
+					MinNodes:                1,
+					MaxNodes:                10,
+					ScaleUpThresholdPercent: 70,
+					MaxNodeAge:              "12h",
+				},
+				listerOptions: ListerOptions{},
+			},
+			0,
+			nil,
+		},
+		{
+			"max_node_age enabled, some nodes are tainted",
+			args{
+				nodes: []*v1.Node{
+					buildNode(time.Now().Add(-1*stdtime.Hour), false),
+					buildNode(time.Now().Add(-24*stdtime.Hour), false),
+					buildNode(time.Now().Add(-36*stdtime.Hour), true),
+				},
+				pods: nil,
+				nodeGroupOptions: NodeGroupOptions{
+					Name:                    "default",
+					CloudProviderGroupName:  "default",
+					MinNodes:                1,
+					MaxNodes:                10,
+					ScaleUpThresholdPercent: 70,
+					MaxNodeAge:              "12h",
+				},
+				listerOptions: ListerOptions{},
+			},
+			0,
+			nil,
+		},
+		{
+			"max_node_age enabled, scaled down to zero",
+			args{
+				nodes: []*v1.Node{},
+				pods: nil,
+				nodeGroupOptions: NodeGroupOptions{
+					Name:                    "default",
+					CloudProviderGroupName:  "default",
+					MinNodes:                0,
+					MaxNodes:                10,
+					ScaleUpThresholdPercent: 70,
+					MaxNodeAge:              "12h",
+				},
+				listerOptions: ListerOptions{},
+			},
+			0,
+			nil,
+		},
+		{
+			"max_node_age enabled, 1 tainted, 1 untainted",
+			args{
+				nodes: []*v1.Node{
+					buildNode(time.Now().Add(-1*stdtime.Hour), false),
+					buildNode(time.Now().Add(-24*stdtime.Hour), true),
+				},
+				pods: nil,
+				nodeGroupOptions: NodeGroupOptions{
+					Name:                    "default",
+					CloudProviderGroupName:  "default",
+					MinNodes:                1,
+					MaxNodes:                10,
+					ScaleUpThresholdPercent: 70,
+					MaxNodeAge:              "30m",
+				},
+				listerOptions: ListerOptions{},
+			},
+			0,
+			nil,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			nodeGroups := []NodeGroupOptions{tt.args.nodeGroupOptions}
+			ngName := tt.args.nodeGroupOptions.Name
+			client, opts, err := buildTestClient(tt.args.nodes, tt.args.pods, nodeGroups, tt.args.listerOptions)
+			require.NoError(t, err)
+
+			// For these test cases we only use 1 node group/cloud provider node group
+			nodeGroupSize := 1
+
+			// Create a test (mock) cloud provider
+			testCloudProvider := test.NewCloudProvider(nodeGroupSize)
+			testNodeGroup := test.NewNodeGroup(
+				tt.args.nodeGroupOptions.CloudProviderGroupName,
+				tt.args.nodeGroupOptions.Name,
+				int64(tt.args.nodeGroupOptions.MinNodes),
+				int64(tt.args.nodeGroupOptions.MaxNodes),
+				int64(len(tt.args.nodes)),
+			)
+			testCloudProvider.RegisterNodeGroup(testNodeGroup)
+
+			// Create a node group state with the mapping of node groups to the cloud providers node groups
+			nodeGroupsState := BuildNodeGroupsState(nodeGroupsStateOpts{
+				nodeGroups: nodeGroups,
+				client:     *client,
+			})
+
+			controller := &Controller{
+				Client:        client,
+				Opts:          opts,
+				stopChan:      nil,
+				nodeGroups:    nodeGroupsState,
+				cloudProvider: testCloudProvider,
+			}
+
+			nodesDelta, err := controller.scaleNodeGroup(ngName, nodeGroupsState[ngName])
+
+			// Ensure there were no errors
+			if tt.err == nil {
+				require.NoError(t, err)
+			} else {
+				require.EqualError(t, tt.err, err.Error())
+			}
+
+			assert.Equal(t, tt.expectedNodeDelta, nodesDelta)
+			if nodesDelta <= 0 {
+				return
+			}
+
+			// Ensure the node group on the cloud provider side scales up to the correct amount
+			assert.Equal(t, int64(len(tt.args.nodes)+nodesDelta), testNodeGroup.TargetSize())
+		})
+	}
+}