Skip to content

Commit

Permalink
Merge pull request #11404 from fabriziopandini/refine-v1beta2-conditi…
Browse files Browse the repository at this point in the history
…on-messages

🌱 Refine v1beta2 condition messages
  • Loading branch information
k8s-ci-robot authored Nov 13, 2024
2 parents 657b5f0 + 2373832 commit 6d32337
Show file tree
Hide file tree
Showing 25 changed files with 772 additions and 457 deletions.
12 changes: 8 additions & 4 deletions controlplane/kubeadm/internal/controllers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,11 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
conditions.MarkFalse(controlPlane.KCP, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "Waiting for worker nodes to be deleted first")

controlPlane.DeletingReason = controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason
controlPlane.DeletingMessage = fmt.Sprintf("KCP deletion blocked because %s still exist", objectsPendingDeleteNames(allMachines, allMachinePools, controlPlane.Cluster))
names := objectsPendingDeleteNames(allMachines, allMachinePools, controlPlane.Cluster)
for i := range names {
names[i] = "* " + names[i]
}
controlPlane.DeletingMessage = fmt.Sprintf("KubeadmControlPlane deletion blocked because following objects still exist:\n%s", strings.Join(names, "\n"))
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
}

Expand Down Expand Up @@ -703,7 +707,7 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
}

// objectsPendingDeleteNames return the names of worker Machines and MachinePools pending delete.
func objectsPendingDeleteNames(allMachines collections.Machines, allMachinePools *expv1.MachinePoolList, cluster *clusterv1.Cluster) string {
func objectsPendingDeleteNames(allMachines collections.Machines, allMachinePools *expv1.MachinePoolList, cluster *clusterv1.Cluster) []string {
controlPlaneMachines := allMachines.Filter(collections.ControlPlaneMachines(cluster.Name))
workerMachines := allMachines.Difference(controlPlaneMachines)

Expand All @@ -725,9 +729,9 @@ func objectsPendingDeleteNames(allMachines collections.Machines, allMachinePools
}
if len(workerMachineNames) > 0 {
sort.Strings(workerMachineNames)
descendants = append(descendants, "worker Machines: "+clog.StringListToString(workerMachineNames))
descendants = append(descendants, "Machines: "+clog.StringListToString(workerMachineNames))
}
return strings.Join(descendants, "; ")
return descendants
}

func (r *KubeadmControlPlaneReconciler) removePreTerminateHookAnnotationFromMachine(ctx context.Context, machine *clusterv1.Machine) error {
Expand Down
48 changes: 29 additions & 19 deletions controlplane/kubeadm/internal/controllers/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2008,16 +2008,21 @@ func TestKubeadmControlPlaneReconciler_reconcileControlPlaneAndMachinesCondition
Reason: controlplanev1.KubeadmControlPlaneInitializedV1Beta2Reason,
},
{
Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition,
Status: metav1.ConditionUnknown,
Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason,
Message: "Following Machines are reporting etcd member unknown: machine1-test",
Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition,
Status: metav1.ConditionUnknown,
Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason,
Message: "* Machine machine1-test:\n" +
" * EtcdMemberHealthy: Node does not exist",
},
{
Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition,
Status: metav1.ConditionUnknown,
Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason,
Message: "Following Machines are reporting control plane unknown: machine1-test",
Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition,
Status: metav1.ConditionUnknown,
Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason,
Message: "* Machine machine1-test:\n" +
" * APIServerPodHealthy: Node does not exist\n" +
" * ControllerManagerPodHealthy: Node does not exist\n" +
" * SchedulerPodHealthy: Node does not exist\n" +
" * EtcdPodHealthy: Node does not exist",
},
},
expectMachineConditions: []metav1.Condition{
Expand Down Expand Up @@ -2083,16 +2088,21 @@ func TestKubeadmControlPlaneReconciler_reconcileControlPlaneAndMachinesCondition
Reason: controlplanev1.KubeadmControlPlaneInitializedV1Beta2Reason,
},
{
Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition,
Status: metav1.ConditionUnknown,
Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason,
Message: "Following Machines are reporting etcd member unknown: machine1-test",
Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition,
Status: metav1.ConditionUnknown,
Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason,
Message: "* Machine machine1-test:\n" +
" * EtcdMemberHealthy: Node does not exist",
},
{
Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition,
Status: metav1.ConditionUnknown,
Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason,
Message: "Following Machines are reporting control plane unknown: machine1-test",
Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition,
Status: metav1.ConditionUnknown,
Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason,
Message: "* Machine machine1-test:\n" +
" * APIServerPodHealthy: Node does not exist\n" +
" * ControllerManagerPodHealthy: Node does not exist\n" +
" * SchedulerPodHealthy: Node does not exist\n" +
" * EtcdPodHealthy: Node does not exist",
},
},
expectMachineConditions: []metav1.Condition{
Expand Down Expand Up @@ -3344,7 +3354,7 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
g.Expect(err).ToNot(HaveOccurred())
g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer))
g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason))
g.Expect(controlPlane.DeletingMessage).To(Equal("KCP deletion blocked because worker Machines: worker-0, worker-1, worker-2, worker-3, worker-4, ... (5 more) still exist"))
g.Expect(controlPlane.DeletingMessage).To(Equal("KubeadmControlPlane deletion blocked because following objects still exist:\n* Machines: worker-0, worker-1, worker-2, worker-3, worker-4, ... (5 more)"))

controlPlaneMachines := clusterv1.MachineList{}
labels := map[string]string{
Expand Down Expand Up @@ -3405,7 +3415,7 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
g.Expect(err).ToNot(HaveOccurred())
g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer))
g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason))
g.Expect(controlPlane.DeletingMessage).To(Equal("KCP deletion blocked because MachinePools: mp-0, mp-1, mp-2, mp-3, mp-4, ... (5 more) still exist"))
g.Expect(controlPlane.DeletingMessage).To(Equal("KubeadmControlPlane deletion blocked because following objects still exist:\n* MachinePools: mp-0, mp-1, mp-2, mp-3, mp-4, ... (5 more)"))

controlPlaneMachines := clusterv1.MachineList{}
labels := map[string]string{
Expand Down Expand Up @@ -3489,7 +3499,7 @@ func TestObjectsPendingDelete(t *testing.T) {

g := NewWithT(t)

g.Expect(objectsPendingDeleteNames(allMachines, machinePools, c)).To(Equal("MachinePools: mp1; worker Machines: w1, w2, w3, w4, w5, ... (3 more)"))
g.Expect(objectsPendingDeleteNames(allMachines, machinePools, c)).To(Equal([]string{"MachinePools: mp1", "Machines: w1, w2, w3, w4, w5, ... (3 more)"}))
}

// test utils.
Expand Down
6 changes: 3 additions & 3 deletions controlplane/kubeadm/internal/controllers/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -482,13 +482,13 @@ func Test_setMachinesReadyAndMachinesUpToDateConditions(t *testing.T) {
Type: controlplanev1.KubeadmControlPlaneMachinesReadyV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: "SomeReason", // There is only one machine reporting issues, using the reason from that machine.
Message: "NotReady from Machine m3",
Message: "* Machine m3: NotReady",
},
expectMachinesUpToDateCondition: metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneMachinesUpToDateV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: v1beta2conditions.MultipleIssuesReportedReason, // There are many machines reporting issues, using a generic reason.
Message: "NotUpToDate from Machines m2, m3",
Message: "* Machines m2, m3: NotUpToDate",
},
},
}
Expand Down Expand Up @@ -550,7 +550,7 @@ func Test_setRemediatingCondition(t *testing.T) {
Type: controlplanev1.KubeadmControlPlaneRemediatingV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneRemediatingV1Beta2Reason,
Message: "Machine deletionTimestamp set from Machine m3",
Message: "* Machine m3: Machine deletionTimestamp set",
},
},
{
Expand Down
62 changes: 56 additions & 6 deletions controlplane/kubeadm/internal/workload_cluster_conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package internal
import (
"context"
"fmt"
"sort"
"strings"
"time"

Expand All @@ -38,6 +39,7 @@ import (
"sigs.k8s.io/cluster-api/util/collections"
"sigs.k8s.io/cluster-api/util/conditions"
v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2"
clog "sigs.k8s.io/cluster-api/util/log"
)

// UpdateEtcdConditions is responsible for updating machine conditions reflecting the status of all the etcd members.
Expand Down Expand Up @@ -934,33 +936,81 @@ func aggregateV1Beta2ConditionsFromMachinesToKCP(input aggregateV1Beta2Condition
kcpMachinesWithUnknown := sets.Set[string]{}
kcpMachinesWithInfo := sets.Set[string]{}

messageMap := map[string][]string{}
for i := range input.controlPlane.Machines {
machine := input.controlPlane.Machines[i]
machineMessages := []string{}
for _, condition := range input.machineConditions {
if machineCondition := v1beta2conditions.Get(machine, condition); machineCondition != nil {
switch machineCondition.Status {
case metav1.ConditionTrue:
kcpMachinesWithInfo.Insert(machine.Name)
case metav1.ConditionFalse:
kcpMachinesWithErrors.Insert(machine.Name)
m := machineCondition.Message
if m == "" {
m = fmt.Sprintf("condition is %s", machineCondition.Status)
}
machineMessages = append(machineMessages, fmt.Sprintf(" * %s: %s", machineCondition.Type, m))
case metav1.ConditionUnknown:
kcpMachinesWithUnknown.Insert(machine.Name)
m := machineCondition.Message
if m == "" {
m = fmt.Sprintf("condition is %s", machineCondition.Status)
}
machineMessages = append(machineMessages, fmt.Sprintf(" * %s: %s", machineCondition.Type, m))
}
}
}

if len(machineMessages) > 0 {
message := strings.Join(machineMessages, "\n")
messageMap[message] = append(messageMap[message], machine.Name)
}
}

// compute the order of messages according to the number of machines reporting the same message.
// Note: The list of object names is used as a secondary criteria to sort messages with the same number of objects.
messageIndex := make([]string, 0, len(messageMap))
for m := range messageMap {
messageIndex = append(messageIndex, m)
}

sort.SliceStable(messageIndex, func(i, j int) bool {
return len(messageMap[messageIndex[i]]) > len(messageMap[messageIndex[j]]) ||
(len(messageMap[messageIndex[i]]) == len(messageMap[messageIndex[j]]) && strings.Join(messageMap[messageIndex[i]], ",") < strings.Join(messageMap[messageIndex[j]], ","))
})

// Build the message
messages := []string{}
for _, message := range messageIndex {
machines := messageMap[message]
machinesMessage := "Machine"
if len(messageMap[message]) > 1 {
machinesMessage += "s"
}

sort.Strings(machines)
machinesMessage += " " + clog.ListToString(machines, func(s string) string { return s }, 3)

messages = append(messages, fmt.Sprintf("* %s:\n%s", machinesMessage, message))
}

// Append messages impacting KCP as a whole, if any
if len(input.kcpErrors) > 0 {
for _, message := range input.kcpErrors {
messages = append(messages, fmt.Sprintf("* %s", message))
}
}
message := strings.Join(messages, "\n")

// In case of at least one machine with errors or KCP level errors (nodes without machines), report false.
if len(input.kcpErrors) > 0 || len(kcpMachinesWithErrors) > 0 {
messages := input.kcpErrors
if len(kcpMachinesWithErrors) > 0 {
messages = append(messages, fmt.Sprintf("Following Machines are reporting %s errors: %s", input.note, strings.Join(sets.List(kcpMachinesWithErrors), ", ")))
}
v1beta2conditions.Set(input.controlPlane.KCP, metav1.Condition{
Type: input.condition,
Status: metav1.ConditionFalse,
Reason: input.falseReason,
Message: strings.Join(messages, ", "),
Message: message,
})
return
}
Expand All @@ -971,7 +1021,7 @@ func aggregateV1Beta2ConditionsFromMachinesToKCP(input aggregateV1Beta2Condition
Type: input.condition,
Status: metav1.ConditionUnknown,
Reason: input.unknownReason,
Message: fmt.Sprintf("Following Machines are reporting %s unknown: %s", input.note, strings.Join(sets.List(kcpMachinesWithUnknown), ", ")),
Message: message,
})
return
}
Expand Down
Loading

0 comments on commit 6d32337

Please sign in to comment.