-
Notifications
You must be signed in to change notification settings - Fork 60
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add the ability to remove nodes older than a certain age even when the node group has reached the min #240
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -373,6 +373,12 @@ func (c *Controller) scaleNodeGroup(nodegroup string, nodeGroup *NodeGroupState) | |
nodesDelta = int(math.Max(float64(nodesDelta), 1)) | ||
} | ||
|
||
if c.scaleOnMaxNodeAge(nodeGroup, untaintedNodes, taintedNodes) { | ||
log.WithField("nodegroup", nodegroup). | ||
Info("Setting scale to minimum of 1 to rotate out a node older than the max node age") | ||
nodesDelta = int(math.Max(float64(nodesDelta), 1)) | ||
} | ||
|
||
log.WithField("nodegroup", nodegroup).Debugf("Delta: %v", nodesDelta) | ||
|
||
scaleOptions := scaleOpts{ | ||
|
@@ -431,6 +437,34 @@ func (c *Controller) isScaleOnStarve( | |
len(untaintedNodes) < nodeGroup.Opts.MaxNodes | ||
} | ||
|
||
// scaleOnMaxNodeAge returns true if the node group is at the minimum and there is a node in the untaintedNodes older | ||
// than the configured node group's maxNodeAge. The idea here is to constantly rotate out the oldest nodes | ||
// when the node group is at the minimum. This is to ensure the nodes are receiving the most up-to-date configuration | ||
// from the cloud provider. | ||
func (c *Controller) scaleOnMaxNodeAge(nodeGroup *NodeGroupState, untaintedNodes []*v1.Node, taintedNodes []*v1.Node) bool { | ||
// Only enable this functionality if the node group has the feature enabled | ||
if nodeGroup.Opts.MaxNodeAgeDuration() <= 0 { | ||
return false | ||
} | ||
|
||
// We don't want to attempt to rotate nodes that have reached the max age if we haven't reached the minimum node | ||
// count, as the scaling down of the node group will remove the oldest first anyway. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's still an edge case here which is a node group that somehow has very consistent load such that it always maintains X number of nodes (e.g. always 2) and thus never gets cycled. I think this is very unlikely with the kinds of workloads that run on Escalator, but worth calling out in case others think it's worth considering here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had thought of this edge case whilst writing this feature - I guess it's a been an edge case in Escalator since it's existence, and won't be fixed or made worse by this feature being added. Ideally if there is a consistent base load, the mininum nodes in the ASG should really be configured to a value that meets the base load, then this feature can be used to ensure the nodes are cycled out. I can check if this edge case is documented in Escalator's documentation There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This isn't documented anywhere - I've raised #241 |
||
// We also don't want to try to rotate out nodes if there are already nodes tainted. | ||
if len(untaintedNodes) != nodeGroup.Opts.MinNodes || len(untaintedNodes) == 0 || len(taintedNodes) > 0 { | ||
return false | ||
} | ||
|
||
// Determine if there is an untainted node that exceeds the max age, if so then we should scale up | ||
// to trigger that node to be replaced. | ||
for _, n := range untaintedNodes { | ||
if time.Since(n.CreationTimestamp.Time) > nodeGroup.Opts.MaxNodeAgeDuration() { | ||
return true | ||
} | ||
} | ||
|
||
return false | ||
} | ||
|
||
// RunOnce performs the main autoscaler logic once | ||
func (c *Controller) RunOnce() error { | ||
startTime := time.Now() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ package controller | |
|
||
import ( | ||
"testing" | ||
duration "time" | ||
stdtime "time" | ||
|
||
"github.com/atlassian/escalator/pkg/k8s" | ||
"github.com/atlassian/escalator/pkg/k8s/resource" | ||
|
@@ -39,7 +39,7 @@ func buildTestClient(nodes []*v1.Node, pods []*v1.Pod, nodeGroups []NodeGroupOpt | |
opts := Opts{ | ||
K8SClient: fakeClient, | ||
NodeGroups: nodeGroups, | ||
ScanInterval: 1 * duration.Minute, | ||
ScanInterval: 1 * stdtime.Minute, | ||
DryMode: false, | ||
} | ||
allPodLister, err := test.NewTestPodWatcher(pods, listerOptions.podListerOptions) | ||
|
@@ -853,7 +853,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) { | |
args args | ||
scaleUpWithCachedCapacity bool | ||
runs int | ||
runInterval duration.Duration | ||
runInterval stdtime.Duration | ||
want int | ||
err error | ||
}{ | ||
|
@@ -879,7 +879,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) { | |
}, | ||
false, | ||
1, | ||
duration.Minute, | ||
stdtime.Minute, | ||
-4, | ||
nil, | ||
}, | ||
|
@@ -905,7 +905,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) { | |
}, | ||
false, | ||
5, | ||
duration.Minute, | ||
stdtime.Minute, | ||
-2, | ||
nil, | ||
}, | ||
|
@@ -931,7 +931,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) { | |
}, | ||
false, | ||
1, | ||
duration.Minute, | ||
stdtime.Minute, | ||
-4, | ||
nil, | ||
}, | ||
|
@@ -958,7 +958,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) { | |
}, | ||
false, | ||
1, | ||
duration.Minute, | ||
stdtime.Minute, | ||
1, | ||
nil, | ||
}, | ||
|
@@ -985,7 +985,7 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) { | |
}, | ||
true, | ||
1, | ||
duration.Minute, | ||
stdtime.Minute, | ||
6, | ||
nil, | ||
}, | ||
|
@@ -1060,3 +1060,250 @@ func TestScaleNodeGroup_MultipleRuns(t *testing.T) { | |
}) | ||
} | ||
} | ||
|
||
func TestScaleNodeGroupNodeMaxAge(t *testing.T) { | ||
buildNode := func(creation stdtime.Time, tainted bool) *v1.Node { | ||
return test.BuildTestNode(test.NodeOpts{ | ||
CPU: int64(1000), | ||
Mem: int64(1000), | ||
Creation: creation, | ||
Tainted: tainted, | ||
}) | ||
} | ||
|
||
type args struct { | ||
nodes []*v1.Node | ||
pods []*v1.Pod | ||
nodeGroupOptions NodeGroupOptions | ||
listerOptions ListerOptions | ||
} | ||
|
||
tests := []struct { | ||
name string | ||
args args | ||
expectedNodeDelta int | ||
err error | ||
}{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could we add a test for num nodes == maxNodes, and all expired, with scale up enabled There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What are you worried about here? Are we expecting delta to be 0 because we're at max, or to be positive but unable to go further due to max? Either way I think that particular case isn't related to this feature There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah just the very unlikely case of nodes at max for a long time, and this min of +1 node delta to cycle them trying to push above maxASG. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's actually perfectly fine for node delta to be +1 when at the max ASG size - we have logic later on when trying to increase the ASG in the cloud provider above the max that will reject the scale up. I don't think this test case is really relevant for this specific feature and the case of node delta exceeding the ASG max should be tested elsewhere. Ideally we're really testing if nodeDelta is either +1 or 0 given the criteria for the feature and assuming whatever Escalator does with that nodeDelta is appropriately tested later on |
||
{ | ||
"max_node_age disabled", | ||
args{ | ||
nodes: []*v1.Node{ | ||
buildNode(time.Now().Add(-1*stdtime.Hour), false), | ||
buildNode(time.Now().Add(-24*stdtime.Hour), false), | ||
buildNode(time.Now().Add(-36*stdtime.Hour), false), | ||
}, | ||
pods: nil, | ||
nodeGroupOptions: NodeGroupOptions{ | ||
Name: "default", | ||
CloudProviderGroupName: "default", | ||
MinNodes: 3, | ||
MaxNodes: 10, | ||
ScaleUpThresholdPercent: 70, | ||
MaxNodeAge: "0", | ||
}, | ||
listerOptions: ListerOptions{}, | ||
}, | ||
0, | ||
nil, | ||
}, | ||
{ | ||
"max_node_age enabled, max node age 12 hours", | ||
args{ | ||
nodes: []*v1.Node{ | ||
buildNode(time.Now().Add(-1*stdtime.Hour), false), | ||
buildNode(time.Now().Add(-24*stdtime.Hour), false), | ||
buildNode(time.Now().Add(-36*stdtime.Hour), false), | ||
}, | ||
pods: nil, | ||
nodeGroupOptions: NodeGroupOptions{ | ||
Name: "default", | ||
CloudProviderGroupName: "default", | ||
MinNodes: 3, | ||
MaxNodes: 10, | ||
ScaleUpThresholdPercent: 70, | ||
MaxNodeAge: "12h", | ||
}, | ||
listerOptions: ListerOptions{}, | ||
}, | ||
1, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So in this case, the behaviour is that we scale up one node, it would be replaced and then we would do another scale up to replace the other older node once we made it back to our minimum node count again. Is that correct? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep - that's the desired behaviour |
||
nil, | ||
}, | ||
{ | ||
"max_node_age enabled, max node age 48 hours", | ||
args{ | ||
nodes: []*v1.Node{ | ||
buildNode(time.Now().Add(-1*stdtime.Hour), false), | ||
buildNode(time.Now().Add(-24*stdtime.Hour), false), | ||
buildNode(time.Now().Add(-36*stdtime.Hour), false), | ||
}, | ||
pods: nil, | ||
nodeGroupOptions: NodeGroupOptions{ | ||
Name: "default", | ||
CloudProviderGroupName: "default", | ||
MinNodes: 3, | ||
MaxNodes: 10, | ||
ScaleUpThresholdPercent: 70, | ||
MaxNodeAge: "48h", | ||
}, | ||
listerOptions: ListerOptions{}, | ||
}, | ||
0, | ||
nil, | ||
}, | ||
{ | ||
"max_node_age enabled, but not at node minimum", | ||
args{ | ||
nodes: []*v1.Node{ | ||
buildNode(time.Now().Add(-1*stdtime.Hour), false), | ||
buildNode(time.Now().Add(-24*stdtime.Hour), false), | ||
buildNode(time.Now().Add(-36*stdtime.Hour), false), | ||
}, | ||
pods: nil, | ||
nodeGroupOptions: NodeGroupOptions{ | ||
Name: "default", | ||
CloudProviderGroupName: "default", | ||
MinNodes: 1, | ||
MaxNodes: 10, | ||
ScaleUpThresholdPercent: 70, | ||
MaxNodeAge: "12h", | ||
}, | ||
listerOptions: ListerOptions{}, | ||
}, | ||
0, | ||
nil, | ||
}, | ||
{ | ||
"max_node_age enabled, but no nodes", | ||
args{ | ||
nodes: nil, | ||
pods: nil, | ||
nodeGroupOptions: NodeGroupOptions{ | ||
Name: "default", | ||
CloudProviderGroupName: "default", | ||
MinNodes: 1, | ||
MaxNodes: 10, | ||
ScaleUpThresholdPercent: 70, | ||
MaxNodeAge: "12h", | ||
}, | ||
listerOptions: ListerOptions{}, | ||
}, | ||
0, | ||
nil, | ||
}, | ||
{ | ||
"max_node_age enabled, some nodes are tainted", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should there be a test case where we're at the MinNodes but some are tainted? I don't think that should occur in regular operation though, just throwing this out there in case it makes sense. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Had a look into adding this test case - it doesn't make sense to add to this feature's tests, as Escalator should get its self out of this scenario prior to the maxNodeAge code running (it will untaint the tainted nodes in an attempt to get the untainted nodes equal to the min nodes). Again, this should be tested elsewhere already. |
||
args{ | ||
nodes: []*v1.Node{ | ||
buildNode(time.Now().Add(-1*stdtime.Hour), false), | ||
buildNode(time.Now().Add(-24*stdtime.Hour), false), | ||
buildNode(time.Now().Add(-36*stdtime.Hour), true), | ||
}, | ||
pods: nil, | ||
nodeGroupOptions: NodeGroupOptions{ | ||
Name: "default", | ||
CloudProviderGroupName: "default", | ||
MinNodes: 1, | ||
MaxNodes: 10, | ||
ScaleUpThresholdPercent: 70, | ||
MaxNodeAge: "12h", | ||
}, | ||
listerOptions: ListerOptions{}, | ||
}, | ||
0, | ||
nil, | ||
}, | ||
{ | ||
"max_node_age enabled, scaled down to zero", | ||
args{ | ||
nodes: []*v1.Node{}, | ||
pods: nil, | ||
nodeGroupOptions: NodeGroupOptions{ | ||
Name: "default", | ||
CloudProviderGroupName: "default", | ||
MinNodes: 0, | ||
MaxNodes: 10, | ||
ScaleUpThresholdPercent: 70, | ||
MaxNodeAge: "12h", | ||
}, | ||
listerOptions: ListerOptions{}, | ||
}, | ||
0, | ||
nil, | ||
}, | ||
{ | ||
"max_node_age enabled, 1 tainted, 1 untainted", | ||
args{ | ||
nodes: []*v1.Node{ | ||
buildNode(time.Now().Add(-1*stdtime.Hour), false), | ||
buildNode(time.Now().Add(-24*stdtime.Hour), true), | ||
}, | ||
pods: nil, | ||
nodeGroupOptions: NodeGroupOptions{ | ||
Name: "default", | ||
CloudProviderGroupName: "default", | ||
MinNodes: 1, | ||
MaxNodes: 10, | ||
ScaleUpThresholdPercent: 70, | ||
MaxNodeAge: "30m", | ||
}, | ||
listerOptions: ListerOptions{}, | ||
}, | ||
0, | ||
nil, | ||
}, | ||
} | ||
|
||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
nodeGroups := []NodeGroupOptions{tt.args.nodeGroupOptions} | ||
ngName := tt.args.nodeGroupOptions.Name | ||
client, opts, err := buildTestClient(tt.args.nodes, tt.args.pods, nodeGroups, tt.args.listerOptions) | ||
require.NoError(t, err) | ||
|
||
// For these test cases we only use 1 node group/cloud provider node group | ||
nodeGroupSize := 1 | ||
|
||
// Create a test (mock) cloud provider | ||
testCloudProvider := test.NewCloudProvider(nodeGroupSize) | ||
testNodeGroup := test.NewNodeGroup( | ||
tt.args.nodeGroupOptions.CloudProviderGroupName, | ||
tt.args.nodeGroupOptions.Name, | ||
int64(tt.args.nodeGroupOptions.MinNodes), | ||
int64(tt.args.nodeGroupOptions.MaxNodes), | ||
int64(len(tt.args.nodes)), | ||
) | ||
testCloudProvider.RegisterNodeGroup(testNodeGroup) | ||
|
||
// Create a node group state with the mapping of node groups to the cloud providers node groups | ||
nodeGroupsState := BuildNodeGroupsState(nodeGroupsStateOpts{ | ||
nodeGroups: nodeGroups, | ||
client: *client, | ||
}) | ||
|
||
controller := &Controller{ | ||
Client: client, | ||
Opts: opts, | ||
stopChan: nil, | ||
nodeGroups: nodeGroupsState, | ||
cloudProvider: testCloudProvider, | ||
} | ||
|
||
nodesDelta, err := controller.scaleNodeGroup(ngName, nodeGroupsState[ngName]) | ||
|
||
// Ensure there were no errors | ||
if tt.err == nil { | ||
require.NoError(t, err) | ||
} else { | ||
require.EqualError(t, tt.err, err.Error()) | ||
} | ||
|
||
assert.Equal(t, tt.expectedNodeDelta, nodesDelta) | ||
if nodesDelta <= 0 { | ||
return | ||
} | ||
|
||
// Ensure the node group on the cloud provider side scales up to the correct amount | ||
assert.Equal(t, int64(len(tt.args.nodes)+nodesDelta), testNodeGroup.TargetSize()) | ||
}) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should this also protect against nodesDelta exceeding ASGMax? Or is it protected against in normal scale functions
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is somewhat related to the other conversation, but I don't think this is relevant for the tests of this feature - this should already be covered by the general scaling tests that check that if nodeDelta would breach the ASG max, that Escalator prevents it.