Skip to content

Commit

Permalink
Merge pull request #3443 from kinarashah/hstname
Browse files Browse the repository at this point in the history
  • Loading branch information
snasovich authored Nov 28, 2023
2 parents c56fb8c + a74c388 commit 359128a
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 22 deletions.
4 changes: 2 additions & 2 deletions cluster/addons.go
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ func (c *Cluster) doAddonDeploy(ctx context.Context, addonYaml, resourceName str
if err != nil {
return &addonError{fmt.Sprintf("%v", err), isCritical}
}
node, err := k8s.GetNode(k8sClient, c.ControlPlaneHosts[0].HostnameOverride, c.CloudProvider.Name)
node, err := k8s.GetNode(k8sClient, c.ControlPlaneHosts[0].HostnameOverride, c.ControlPlaneHosts[0].InternalAddress, c.CloudProvider.Name)
if err != nil {
return &addonError{fmt.Sprintf("Failed to get Node [%s]: %v", c.ControlPlaneHosts[0].HostnameOverride, err), isCritical}
}
Expand All @@ -513,7 +513,7 @@ func (c *Cluster) doAddonDelete(ctx context.Context, resourceName string, isCrit
if err != nil {
return &addonError{fmt.Sprintf("%v", err), isCritical}
}
node, err := k8s.GetNode(k8sClient, c.ControlPlaneHosts[0].HostnameOverride, c.CloudProvider.Name)
node, err := k8s.GetNode(k8sClient, c.ControlPlaneHosts[0].HostnameOverride, c.ControlPlaneHosts[0].InternalAddress, c.CloudProvider.Name)
if err != nil {
return &addonError{fmt.Sprintf("Failed to get Node [%s]: %v", c.ControlPlaneHosts[0].HostnameOverride, err), isCritical}
}
Expand Down
3 changes: 1 addition & 2 deletions cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -1018,13 +1018,12 @@ func setNodeAnnotationsLabelsTaints(k8sClient *kubernetes.Clientset, host *hosts
node := &v1.Node{}
var err error
for retries := 0; retries <= 5; retries++ {
node, err = k8s.GetNode(k8sClient, host.HostnameOverride, cloudProviderName)
node, err = k8s.GetNode(k8sClient, host.HostnameOverride, host.InternalAddress, cloudProviderName)
if err != nil {
logrus.Debugf("[hosts] Can't find node by name [%s], error: %v", host.HostnameOverride, err)
time.Sleep(2 * time.Second)
continue
}

oldNode := node.DeepCopy()
k8s.SetNodeAddressesAnnotations(node, host.InternalAddress, host.Address)
k8s.SyncNodeLabels(node, host.ToAddLabels, host.ToDelLabels)
Expand Down
9 changes: 8 additions & 1 deletion cluster/remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
v3 "github.com/rancher/rke/types"
"github.com/rancher/rke/util"
"golang.org/x/sync/errgroup"
v1 "k8s.io/api/core/v1"
)

func (c *Cluster) ClusterRemove(ctx context.Context) error {
Expand Down Expand Up @@ -92,7 +93,13 @@ func (c *Cluster) RemoveOldNodes(ctx context.Context) error {
host := &hosts.Host{}
host.HostnameOverride = node.Name
if !hosts.IsNodeInList(host, uniqueHosts) {
if err := k8s.DeleteNode(kubeClient, node.Name, c.CloudProvider.Name); err != nil {
nodeAddress := ""
for _, addr := range node.Status.Addresses {
if addr.Type == v1.NodeInternalIP {
nodeAddress = addr.Address
}
}
if err := k8s.DeleteNode(kubeClient, node.Name, nodeAddress, c.CloudProvider.Name); err != nil {
log.Warnf(ctx, "Failed to delete old node [%s] from kubernetes")
}
}
Expand Down
6 changes: 3 additions & 3 deletions hosts/hosts.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,19 +197,19 @@ func DeleteNode(ctx context.Context, toDeleteHost *Host, kubeClient *kubernetes.
return nil
}
log.Infof(ctx, "[hosts] Cordoning host [%s]", toDeleteHost.Address)
if _, err := k8s.GetNode(kubeClient, toDeleteHost.HostnameOverride, cloudProviderName); err != nil {
if _, err := k8s.GetNode(kubeClient, toDeleteHost.HostnameOverride, toDeleteHost.InternalAddress, cloudProviderName); err != nil {
if apierrors.IsNotFound(err) {
log.Warnf(ctx, "[hosts] Can't find node by name [%s]", toDeleteHost.Address)
return nil
}
return err

}
if err := k8s.CordonUncordon(kubeClient, toDeleteHost.HostnameOverride, cloudProviderName, true); err != nil {
if err := k8s.CordonUncordon(kubeClient, toDeleteHost.HostnameOverride, toDeleteHost.InternalAddress, cloudProviderName, true); err != nil {
return err
}
log.Infof(ctx, "[hosts] Deleting host [%s] from the cluster", toDeleteHost.Address)
if err := k8s.DeleteNode(kubeClient, toDeleteHost.HostnameOverride, cloudProviderName); err != nil {
if err := k8s.DeleteNode(kubeClient, toDeleteHost.HostnameOverride, toDeleteHost.InternalAddress, cloudProviderName); err != nil {
return err
}
log.Infof(ctx, "[hosts] Successfully deleted host [%s] from the cluster", toDeleteHost.Address)
Expand Down
19 changes: 12 additions & 7 deletions k8s/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ const (
RetryInterval = 5
)

func DeleteNode(k8sClient *kubernetes.Clientset, nodeName, cloudProviderName string) error {
func DeleteNode(k8sClient *kubernetes.Clientset, nodeName string, nodeAddress string, cloudProviderName string) error {
// If cloud provider is configured, the node name can be set by the cloud provider, which can be different from the original node name
if cloudProviderName != "" {
node, err := GetNode(k8sClient, nodeName, cloudProviderName)
node, err := GetNode(k8sClient, nodeName, nodeAddress, cloudProviderName)
if err != nil {
return err
}
Expand All @@ -40,7 +40,7 @@ func GetNodeList(k8sClient *kubernetes.Clientset) (*v1.NodeList, error) {
return k8sClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
}

func GetNode(k8sClient *kubernetes.Clientset, nodeName, cloudProviderName string) (*v1.Node, error) {
func GetNode(k8sClient *kubernetes.Clientset, nodeName, nodeAddress, cloudProviderName string) (*v1.Node, error) {
var listErr error
for retries := 0; retries < MaxRetries; retries++ {
logrus.Debugf("Checking node list for node [%v], try #%v", nodeName, retries+1)
Expand All @@ -57,9 +57,13 @@ func GetNode(k8sClient *kubernetes.Clientset, nodeName, cloudProviderName string
return &node, nil
}
if cloudProviderName == ExternalAWSCloudProviderName {
logrus.Debugf("Checking hostname address for node [%v], cloud provider: %v", nodeName, cloudProviderName)
if nodeAddress == "" {
return nil, fmt.Errorf("failed to find node [%v] with empty nodeAddress, cloud provider: %v", nodeName, cloudProviderName)
}
logrus.Debugf("Checking internal address for node [%v], cloud provider: %v", nodeName, cloudProviderName)
for _, addr := range node.Status.Addresses {
if addr.Type == v1.NodeHostName && strings.ToLower(node.Labels[HostnameLabel]) == addr.Address {
if addr.Type == v1.NodeInternalIP && nodeAddress == addr.Address {
logrus.Debugf("Found node [%s]: %v", nodeName, nodeAddress)
return &node, nil
}
}
Expand All @@ -73,10 +77,10 @@ func GetNode(k8sClient *kubernetes.Clientset, nodeName, cloudProviderName string
return nil, apierrors.NewNotFound(schema.GroupResource{}, nodeName)
}

func CordonUncordon(k8sClient *kubernetes.Clientset, nodeName string, cloudProviderName string, cordoned bool) error {
func CordonUncordon(k8sClient *kubernetes.Clientset, nodeName string, nodeAddress string, cloudProviderName string, cordoned bool) error {
updated := false
for retries := 0; retries < MaxRetries; retries++ {
node, err := GetNode(k8sClient, nodeName, cloudProviderName)
node, err := GetNode(k8sClient, nodeName, nodeAddress, cloudProviderName)
if err != nil {
logrus.Debugf("Error getting node %s: %v", nodeName, err)
// no need to retry here since GetNode already retries
Expand Down Expand Up @@ -127,6 +131,7 @@ func SyncNodeLabels(node *v1.Node, toAddLabels, toDelLabels map[string]string) {
delete(node.Labels, key)
}
}

// ADD Labels
for key, value := range toAddLabels {
node.Labels[key] = value
Expand Down
4 changes: 2 additions & 2 deletions services/controlplane.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ func processControlPlaneForUpgrade(ctx context.Context, kubeClient *kubernetes.C
}
if !controlPlaneUpgradable && !workerPlaneUpgradable {
log.Infof(ctx, "Upgrade not required for controlplane and worker components of host %v", runHost.HostnameOverride)
if err := k8s.CordonUncordon(kubeClient, runHost.HostnameOverride, cloudProviderName, false); err != nil {
if err := k8s.CordonUncordon(kubeClient, runHost.HostnameOverride, runHost.InternalAddress, cloudProviderName, false); err != nil {
// This node didn't undergo an upgrade, so RKE will only log any error after uncordoning it and won't count this in maxUnavailable
logrus.Errorf("[controlplane] Failed to uncordon node %v, error: %v", runHost.HostnameOverride, err)
}
Expand Down Expand Up @@ -237,7 +237,7 @@ func upgradeControlHost(ctx context.Context, kubeClient *kubernetes.Clientset, h
if err := CheckNodeReady(kubeClient, host, ControlRole, cloudProviderName); err != nil {
return err
}
return k8s.CordonUncordon(kubeClient, host.HostnameOverride, cloudProviderName, false)
return k8s.CordonUncordon(kubeClient, host.HostnameOverride, host.InternalAddress, cloudProviderName, false)
}

func RemoveControlPlane(ctx context.Context, controlHosts []*hosts.Host, force bool) error {
Expand Down
4 changes: 2 additions & 2 deletions services/node_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
func CheckNodeReady(kubeClient *kubernetes.Clientset, runHost *hosts.Host, component, cloudProviderName string) error {
for retries := 0; retries < k8s.MaxRetries; retries++ {
logrus.Infof("[%s] Now checking status of node %v, try #%v", component, runHost.HostnameOverride, retries+1)
k8sNode, err := k8s.GetNode(kubeClient, runHost.HostnameOverride, cloudProviderName)
k8sNode, err := k8s.GetNode(kubeClient, runHost.HostnameOverride, runHost.InternalAddress, cloudProviderName)
if err != nil {
return fmt.Errorf("[%s] Error getting node %v: %v", component, runHost.HostnameOverride, err)
}
Expand All @@ -35,7 +35,7 @@ func CheckNodeReady(kubeClient *kubernetes.Clientset, runHost *hosts.Host, compo

func cordonAndDrainNode(kubeClient *kubernetes.Clientset, host *hosts.Host, drainNode bool, drainHelper drain.Helper, component, cloudProviderName string) error {
logrus.Debugf("[%s] Cordoning node %v", component, host.HostnameOverride)
if err := k8s.CordonUncordon(kubeClient, host.HostnameOverride, cloudProviderName, true); err != nil {
if err := k8s.CordonUncordon(kubeClient, host.HostnameOverride, host.InternalAddress, cloudProviderName, true); err != nil {
return err
}
if !drainNode {
Expand Down
6 changes: 3 additions & 3 deletions services/workerplane.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func UpgradeWorkerPlaneForWorkerAndEtcdNodes(ctx context.Context, kubeClient *ku

func updateNewHostsList(kubeClient *kubernetes.Clientset, allHosts []*hosts.Host, newHosts map[string]bool, cloudProviderName string) {
for _, h := range allHosts {
_, err := k8s.GetNode(kubeClient, h.HostnameOverride, cloudProviderName)
_, err := k8s.GetNode(kubeClient, h.HostnameOverride, h.InternalAddress, cloudProviderName)
if err != nil && apierrors.IsNotFound(err) {
// this host could have been added to cluster state upon successful controlplane upgrade but isn't a node yet.
newHosts[h.HostnameOverride] = true
Expand Down Expand Up @@ -167,7 +167,7 @@ func processWorkerPlaneForUpgrade(ctx context.Context, kubeClient *kubernetes.Cl
}
if !upgradable {
logrus.Infof("[workerplane] Upgrade not required for worker components of host %v", runHost.HostnameOverride)
if err := k8s.CordonUncordon(kubeClient, runHost.HostnameOverride, cloudProviderName, false); err != nil {
if err := k8s.CordonUncordon(kubeClient, runHost.HostnameOverride, runHost.InternalAddress, cloudProviderName, false); err != nil {
// This node didn't undergo an upgrade, so RKE will only log any error after uncordoning it and won't count this in maxUnavailable
logrus.Errorf("[workerplane] Failed to uncordon node %v, error: %v", runHost.HostnameOverride, err)
}
Expand Down Expand Up @@ -211,7 +211,7 @@ func upgradeWorkerHost(ctx context.Context, kubeClient *kubernetes.Clientset, ru
return err
}
// uncordon node
return k8s.CordonUncordon(kubeClient, runHost.HostnameOverride, cloudProviderName, false)
return k8s.CordonUncordon(kubeClient, runHost.HostnameOverride, runHost.InternalAddress, cloudProviderName, false)
}

func doDeployWorkerPlaneHost(ctx context.Context, host *hosts.Host, localConnDialerFactory hosts.DialerFactory, prsMap map[string]v3.PrivateRegistry, processMap map[string]v3.Process, certMap map[string]pki.CertificatePKI, updateWorkersOnly bool, alpineImage, k8sVersion string) error {
Expand Down

0 comments on commit 359128a

Please sign in to comment.