From 201568ac45057ab1aaabf588f80d6823c769f371 Mon Sep 17 00:00:00 2001 From: Benjamin Wang Date: Tue, 21 Jan 2025 15:46:01 +0000 Subject: [PATCH] add commment to clarify the etcd shutting down workflow Signed-off-by: Benjamin Wang --- server/embed/etcd.go | 37 +++++++++++++++++++++++++++++++++---- server/embed/serve.go | 12 ++++++++++-- 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/server/embed/etcd.go b/server/embed/etcd.go index fe2450401de..fb610e0f42d 100644 --- a/server/embed/etcd.go +++ b/server/embed/etcd.go @@ -79,12 +79,23 @@ type Etcd struct { Server *etcdserver.EtcdServer - cfg Config - stopc chan struct{} - errc chan error + cfg Config + // closeOnce is to ensure `stopc` is closed only once, no matter + // how many times the Close() method is called. closeOnce sync.Once - wg sync.WaitGroup + // stopc is used to notify the sub goroutines not to send + // any errors to `errc`. + stopc chan struct{} + // errc is used to receive error from sub goroutines (including + // client handler, peer handler and metrics handler). It's closed + // after all these sub goroutines exit (checked via `wg`). Writers + // should avoid writing after `stopc` is closed by selecting on + // reading from `stopc`. + errc chan error + + // wg is used to track the lifecycle of all sub goroutines created by `StartEtcd`. + wg sync.WaitGroup } type peerListener struct { @@ -388,6 +399,24 @@ func (e *Etcd) Config() Config { // Close gracefully shuts down all servers/listeners. // Client requests will be terminated with request timeout. // After timeout, enforce remaning requests be closed immediately. +// +// The rough workflow to shut down etcd: +// 1. close the `stopc` channel, so that all error handlers (child +// goroutines) won't send back any errors anymore; +// 2. stop the http and grpc servers gracefully, within request timeout; +// 3. close all client and metrics listeners, so that etcd server +// stops receiving any new connection; +// 4. call the cancel function to close the gateway context, so that +// all gateway connections are closed. +// 5. stop etcd server gracefully, and ensure the main raft loop +// goroutine is stopped; +// 6. stop all peer listeners, so that it stops receiving peer connections +// and messages (wait up to 1-second); +// 7. wait for all child goroutines (i.e. client handlers, peer handlers +// and metrics handlers) to exit; +// 8. close the `errc` channel to release the resource. Note that it's only +// safe to close the `errc` after step 7 above is done, otherwise the +// child goroutines may send errors back to already closed `errc` channel. func (e *Etcd) Close() { fields := []zap.Field{ zap.String("name", e.cfg.Name), diff --git a/server/embed/serve.go b/server/embed/serve.go index 5615a0c2120..ad7307666b0 100644 --- a/server/embed/serve.go +++ b/server/embed/serve.go @@ -61,14 +61,22 @@ type serveCtx struct { insecure bool httpOnly bool + // ctx is used to control the grpc gateway. Terminate the grpc gateway + // by calling `cancel` when shutting down the etcd. ctx context.Context cancel context.CancelFunc userHandlers map[string]http.Handler serviceRegister func(*grpc.Server) - serversC chan *servers - closeOnce sync.Once + // serversC is used to receive the http and grpc server objects (created + // in `serve`), both of which will be closed when shutting down the etcd. + // Close it when `serve` returns or when etcd fails to bootstrap. + serversC chan *servers + // closeOnce is to ensure `serversC` is closed only once. + closeOnce sync.Once + + // wg is used to track the lifecycle of all sub goroutines created by `serve`. wg sync.WaitGroup }