Skip to content

Commit

Permalink
Cleans up k8s-dqlite state dir and stops it on remove hook (#908)
Browse files Browse the repository at this point in the history
cfg.Datastore.GetType() may return an empty string if the bootstrap
action failed before database.SetClusterConfig has been called. Because
of this, we're not removing the state dir for k8s-dqlite, which will be
wrongfully removed by setup.K8sDqlite on the next bootstrap attempt.

We're now opportunistically cleaning up the k8s-dqlite related state
directory.

If a bootstrap attempt fails, the k8s-dqlite service will still be
running, which will cause the next bootstrap attempt to fail, as the
k8s-dqlite port will be currently in use.
  • Loading branch information
claudiubelu authored Jan 10, 2025
1 parent 12e32f0 commit 91f29ec
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 22 deletions.
18 changes: 0 additions & 18 deletions src/k8s/pkg/k8sd/app/cluster_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,24 +29,6 @@ func startControlPlaneServices(ctx context.Context, snap snap.Snap, datastore st
return nil
}

func stopControlPlaneServices(ctx context.Context, snap snap.Snap, datastore string) error {
// Stop services
switch datastore {
case "k8s-dqlite":
if err := snaputil.StopK8sDqliteServices(ctx, snap); err != nil {
return fmt.Errorf("failed to stop k8s-dqlite service: %w", err)
}
case "external":
default:
return fmt.Errorf("unsupported datastore %s, must be one of %v", datastore, setup.SupportedDatastores)
}

if err := snaputil.StopControlPlaneServices(ctx, snap); err != nil {
return fmt.Errorf("failed to stop control plane services: %w", err)
}
return nil
}

func waitApiServerReady(ctx context.Context, snap snap.Snap) error {
// Wait for API server to come up
client, err := snap.KubernetesClient("")
Expand Down
13 changes: 9 additions & 4 deletions src/k8s/pkg/k8sd/app/hooks_remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,6 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr
log.Error(err, "Failed to create k8s-dqlite client: %w")
}

log.Info("Cleaning up k8s-dqlite directory")
if err := os.RemoveAll(snap.K8sDqliteStateDir()); err != nil {
return fmt.Errorf("failed to cleanup k8s-dqlite state directory: %w", err)
}
case "external":
log.Info("Cleaning up external datastore certificates")
if _, err := setup.EnsureExtDatastorePKI(snap, &pki.ExternalDatastorePKI{}); err != nil {
Expand All @@ -108,6 +104,10 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr
log.Error(err, "Failed to retrieve cluster config")
}

log.Info("Cleaning up k8s-dqlite directory")
if err := os.RemoveAll(snap.K8sDqliteStateDir()); err != nil {
log.Error(err, "failed to cleanup k8s-dqlite state directory")
}
for _, dir := range []string{snap.ServiceArgumentsDir()} {
log.WithValues("directory", dir).Info("Cleaning up config files", dir)
if err := os.RemoveAll(dir); err != nil {
Expand Down Expand Up @@ -145,6 +145,11 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr
if err := snaputil.StopControlPlaneServices(ctx, snap); err != nil {
log.Error(err, "Failed to stop control-plane services")
}

log.Info("Stopping k8s-dqlite")
if err := snaputil.StopK8sDqliteServices(ctx, snap); err != nil {
log.Error(err, "Failed to stop k8s-dqlite service")
}
}

tryCleanupContainerdPaths(log, snap)
Expand Down

0 comments on commit 91f29ec

Please sign in to comment.