fix: shutdown order

cre8ivejp · cre8ivejp · commit cfdfa1804096 · 2025-10-09T16:57:01.000+09:00
Signed-off-by: Alessandro Yuichi Okimoto &lt;yuichijpn@gmail.com&gt;
diff --git a/manifests/bucketeer/charts/api/templates/deployment.yaml b/manifests/bucketeer/charts/api/templates/deployment.yaml
@@ -181,25 +181,31 @@ spec:
                   - "/bin/sh"
                   - "-c"
                   - |
-                    # Fail Envoy health check immediately
+                    # Step 1: Fail Envoy health check so K8s removes pod from endpoints
                     wget -O- --post-data='{}' http://localhost:$ENVOY_ADMIN_PORT/healthcheck/fail
 
-                    # Wait for API to signal ready for shutdown (max 22s)
-                    # This is coordinated with the app's 20s shutdown timeout.
-                    # Envoy must wait LONGER than the app timeout to ensure it doesn't
-                    # start draining while the app is still processing requests.
-                    timeout=22
-                    while [ $timeout -gt 0 ]; do
-                      if wget -q -O- --no-check-certificate https://localhost:9090/internal/shutdown-ready 2>/dev/null | grep -q "ready"; then
-                        echo "API ready for shutdown, draining connections..."
+                    # Step 2: Stop accepting new inbound connections at Envoy
+                    wget -O- --post-data='{}' http://localhost:$ENVOY_ADMIN_PORT/drain_listeners?inboundonly
+
+                    # Step 3: Wait for all active connections to drain (max 25s)
+                    # Uses Istio pattern: dynamically checks all connections excluding Envoy and TIME-WAIT
+                    elapsed=0
+                    max_wait=25
+                    while [ $elapsed -lt $max_wait ]; do
+                      # Count active connections excluding Envoy process and TIME-WAIT states
+                      active_conns=$(ss -Htlp state all | grep -vE '(envoy|TIME-WAIT)' | wc -l | xargs)
+                      if [ "$active_conns" -eq 0 ]; then
+                        echo "All connections drained after ${elapsed}s"
                         break
                       fi
+                      echo "Waiting for $active_conns connections to drain..."
                       sleep 1
-                      timeout=$((timeout-1))
+                      elapsed=$((elapsed + 1))
                     done
 
-                    # Additional drain time for remaining connections
-                    sleep 3
+                    if [ $elapsed -ge $max_wait ]; then
+                      echo "Timeout reached, forcing shutdown with $active_conns remaining connections"
+                    fi
           command: ["envoy"]
           args:
             - "-c"
diff --git a/manifests/bucketeer/charts/api/templates/envoy-configmap.yaml b/manifests/bucketeer/charts/api/templates/envoy-configmap.yaml
@@ -201,6 +201,8 @@ data:
                     - name: envoy.filters.http.cors
                       typed_config:
                         "@type": type.googleapis.com/envoy.extensions.filters.http.cors.v3.Cors
+                    # DEPRECATED: grpc-web filter for legacy Node.js SDK
+                    # TODO: Remove once Node.js SDK migrates to gRPC-Gateway (REST) or pure gRPC
                     - name: envoy.filters.http.grpc_web
                       typed_config:
                         "@type": type.googleapis.com/envoy.extensions.filters.http.grpc_web.v3.GrpcWeb
diff --git a/manifests/bucketeer/charts/batch/templates/deployment.yaml b/manifests/bucketeer/charts/batch/templates/deployment.yaml
@@ -217,25 +217,31 @@ spec:
                   - "/bin/sh"
                   - "-c"
                   - |
-                    # Fail Envoy health check immediately
+                    # Step 1: Fail Envoy health check so K8s removes pod from endpoints
                     wget -O- --post-data='{}' http://localhost:$ENVOY_ADMIN_PORT/healthcheck/fail
 
-                    # Wait for Batch service to signal ready for shutdown (max 22s)
-                    # This is coordinated with the app's 20s shutdown timeout.
-                    # Envoy must wait LONGER than the app timeout to ensure it doesn't
-                    # start draining while the app is still processing requests.
-                    timeout=22
-                    while [ $timeout -gt 0 ]; do
-                      if wget -q -O- --no-check-certificate https://localhost:9090/internal/shutdown-ready 2>/dev/null | grep -q "ready"; then
-                        echo "Batch service ready for shutdown, draining connections..."
+                    # Step 2: Stop accepting new inbound connections at Envoy
+                    wget -O- --post-data='{}' http://localhost:$ENVOY_ADMIN_PORT/drain_listeners?inboundonly
+
+                    # Step 3: Wait for all active connections to drain (max 25s)
+                    # Uses Istio pattern: dynamically checks all connections excluding Envoy and TIME-WAIT
+                    elapsed=0
+                    max_wait=25
+                    while [ $elapsed -lt $max_wait ]; do
+                      # Count active connections excluding Envoy process and TIME-WAIT states
+                      active_conns=$(ss -Htlp state all | grep -vE '(envoy|TIME-WAIT)' | wc -l | xargs)
+                      if [ "$active_conns" -eq 0 ]; then
+                        echo "All connections drained after ${elapsed}s"
                         break
                       fi
+                      echo "Waiting for $active_conns connections to drain..."
                       sleep 1
-                      timeout=$((timeout-1))
+                      elapsed=$((elapsed + 1))
                     done
 
-                    # Additional drain time for remaining connections
-                    sleep 3
+                    if [ $elapsed -ge $max_wait ]; then
+                      echo "Timeout reached, forcing shutdown with $active_conns remaining connections"
+                    fi
           command: ["envoy"]
           args:
             - "-c"
diff --git a/manifests/bucketeer/charts/subscriber/templates/deployment.yaml b/manifests/bucketeer/charts/subscriber/templates/deployment.yaml
@@ -197,27 +197,31 @@ spec:
                   - "/bin/sh"
                   - "-c"
                   - |
-                    # Fail Envoy health check immediately
+                    # Step 1: Fail Envoy health check so K8s removes pod from endpoints
                     wget -O- --post-data='{}' http://localhost:$ENVOY_ADMIN_PORT/healthcheck/fail
 
-                    # For subscriber service, give time for PubSub message processing to complete (max 22s)
-                    # This is coordinated with the app's 20s shutdown timeout.
-                    # Envoy must wait LONGER than the app timeout to ensure it doesn't exit
-                    # while the subscriber is still processing messages.
-                    # Note: Subscriber uses process detection instead of /internal/shutdown-ready endpoint
-                    timeout=22
-                    while [ $timeout -gt 0 ]; do
-                      # Check if subscriber main process is still running (processing messages)
-                      if ! pgrep -f "subscriber" > /dev/null; then
-                        echo "Subscriber process completed, ready for shutdown..."
+                    # Step 2: Stop accepting new inbound connections at Envoy
+                    wget -O- --post-data='{}' http://localhost:$ENVOY_ADMIN_PORT/drain_listeners?inboundonly
+
+                    # Step 3: Wait for all active connections to drain (max 25s)
+                    # Uses Istio pattern: dynamically checks all connections excluding Envoy and TIME-WAIT
+                    elapsed=0
+                    max_wait=25
+                    while [ $elapsed -lt $max_wait ]; do
+                      # Count active connections excluding Envoy process and TIME-WAIT states
+                      active_conns=$(ss -Htlp state all | grep -vE '(envoy|TIME-WAIT)' | wc -l | xargs)
+                      if [ "$active_conns" -eq 0 ]; then
+                        echo "All connections drained after ${elapsed}s"
                         break
                       fi
+                      echo "Waiting for $active_conns connections to drain..."
                       sleep 1
-                      timeout=$((timeout-1))
+                      elapsed=$((elapsed + 1))
                     done
 
-                    # Additional drain time for remaining connections
-                    sleep 3
+                    if [ $elapsed -ge $max_wait ]; then
+                      echo "Timeout reached, forcing shutdown with $active_conns remaining connections"
+                    fi
           command: ["envoy"]
           args:
             - "-c"
diff --git a/manifests/bucketeer/charts/web/templates/deployment.yaml b/manifests/bucketeer/charts/web/templates/deployment.yaml
@@ -267,25 +267,31 @@ spec:
                   - "/bin/sh"
                   - "-c"
                   - |
-                    # Fail Envoy health check immediately
+                    # Step 1: Fail Envoy health check so K8s removes pod from endpoints
                     wget -O- --post-data='{}' http://localhost:$ENVOY_ADMIN_PORT/healthcheck/fail
 
-                    # Wait for Web services to signal ready for shutdown (max 22s)
-                    # This is coordinated with the app's 20s shutdown timeout.
-                    # Envoy must wait LONGER than the app timeout to ensure it doesn't
-                    # start draining while the app is still processing requests.
-                    timeout=22
-                    while [ $timeout -gt 0 ]; do
-                      if wget -q -O- --no-check-certificate https://localhost:9090/internal/shutdown-ready 2>/dev/null | grep -q "ready"; then
-                        echo "Web services ready for shutdown, draining connections..."
+                    # Step 2: Stop accepting new inbound connections at Envoy
+                    wget -O- --post-data='{}' http://localhost:$ENVOY_ADMIN_PORT/drain_listeners?inboundonly
+
+                    # Step 3: Wait for all active connections to drain (max 25s)
+                    # Uses Istio pattern: dynamically checks all connections excluding Envoy and TIME-WAIT
+                    elapsed=0
+                    max_wait=25
+                    while [ $elapsed -lt $max_wait ]; do
+                      # Count active connections excluding Envoy process and TIME-WAIT states
+                      active_conns=$(ss -Htlp state all | grep -vE '(envoy|TIME-WAIT)' | wc -l | xargs)
+                      if [ "$active_conns" -eq 0 ]; then
+                        echo "All connections drained after ${elapsed}s"
                         break
                       fi
+                      echo "Waiting for $active_conns connections to drain..."
                       sleep 1
-                      timeout=$((timeout-1))
+                      elapsed=$((elapsed + 1))
                     done
 
-                    # Additional drain time for remaining connections
-                    sleep 3
+                    if [ $elapsed -ge $max_wait ]; then
+                      echo "Timeout reached, forcing shutdown with $active_conns remaining connections"
+                    fi
           command: ["envoy"]
           args:
             - "-c"
diff --git a/manifests/bucketeer/charts/web/templates/envoy-configmap.yaml b/manifests/bucketeer/charts/web/templates/envoy-configmap.yaml
@@ -779,6 +779,8 @@ data:
                       - name: envoy.filters.http.cors
                         typed_config:
                           "@type": type.googleapis.com/envoy.extensions.filters.http.cors.v3.Cors
+                      # DEPRECATED: grpc-web filter for legacy Node.js SDK
+                      # TODO: Remove once Node.js SDK migrates to gRPC-Gateway (REST) or pure gRPC
                       - name: envoy.filters.http.grpc_web
                         typed_config:
                           "@type": type.googleapis.com/envoy.extensions.filters.http.grpc_web.v3.GrpcWeb
diff --git a/pkg/api/cmd/server.go b/pkg/api/cmd/server.go
@@ -569,8 +569,12 @@ func (s *server) Run(ctx context.Context, metrics metrics.Metrics, logger *zap.L
 
 	// Graceful shutdown sequence optimized for GCP Spot VM constraints (30s termination window):
 	// 1. Stop health checks immediately to fail Kubernetes readiness probe ASAP
-	// 2. Gracefully drain all servers in parallel (allows in-flight requests to complete)
-	// 3. Close clients
+	// 2. Gracefully drain REST/HTTP servers first (apiGateway + httpServer)
+	// 3. Then stop gRPC server (after REST traffic completes)
+	// 4. Close clients
+	//
+	// Shutdown order is critical because apiGateway forwards requests to server (port 9090).
+	// If server stops while apiGateway is processing, those requests fail.
 	//
 	// This coordinates with Envoy's preStop hook which waits for /internal/shutdown-ready
 	// to return 200 (set by rpc.Server after graceful shutdown completes).
@@ -581,16 +585,10 @@ func (s *server) Run(ctx context.Context, metrics metrics.Metrics, logger *zap.L
 		healthChecker.Stop()
 		restHealthChecker.Stop()
 
-		// Step 2: Gracefully stop all servers in parallel
-		// Each server will reject new requests and wait for existing requests to complete.
+		// Step 2: Gracefully stop REST/HTTP servers (these call the gRPC server internally)
+		// We run these in parallel since they don't depend on each other
 		var wg sync.WaitGroup
 
-		wg.Add(1)
-		go func() {
-			defer wg.Done()
-			server.Stop(serverShutDownTimeout)
-		}()
-
 		wg.Add(1)
 		go func() {
 			defer wg.Done()
@@ -603,9 +601,12 @@ func (s *server) Run(ctx context.Context, metrics metrics.Metrics, logger *zap.L
 			httpServer.Stop(serverShutDownTimeout)
 		}()
 
-		// Wait for all servers to complete shutdown
+		// Wait for REST/HTTP traffic to drain
 		wg.Wait()
 
+		// Step 3: Stop gRPC server (only pure gRPC connections remain)
+		server.Stop(serverShutDownTimeout)
+
 		// Step 3: Close clients
 		// These are fast cleanup operations that can run asynchronously.
 		go goalPublisher.Stop()
diff --git a/pkg/batch/cmd/server/server.go b/pkg/batch/cmd/server/server.go
@@ -19,7 +19,6 @@ import (
 	"fmt"
 	"os"
 	"strings"
-	"sync"
 	"time"
 
 	"github.com/grpc-ecosystem/grpc-gateway/v2/runtime"
@@ -621,8 +620,12 @@ func (s *server) Run(ctx context.Context, metrics metrics.Metrics, logger *zap.L
 
 	// Graceful shutdown sequence optimized for GCP Spot VM constraints (30s termination window):
 	// 1. Stop health check immediately to fail Kubernetes readiness probe ASAP
-	// 2. Gracefully drain all servers in parallel (allows in-flight requests to complete)
-	// 3. Close database/cache/pubsub clients
+	// 2. Gracefully drain REST gateway first (batchGateway)
+	// 3. Then stop gRPC server (after REST traffic completes)
+	// 4. Close database/cache/pubsub clients
+	//
+	// Shutdown order is critical because batchGateway forwards requests to server (port 9000).
+	// If server stops while batchGateway is processing, those requests fail.
 	//
 	// This coordinates with Envoy's preStop hook which waits for /internal/shutdown-ready
 	// to return 200 (set by rpc.Server after graceful shutdown completes).
@@ -632,24 +635,11 @@ func (s *server) Run(ctx context.Context, metrics metrics.Metrics, logger *zap.L
 		// preventing new traffic from being routed to this pod.
 		healthChecker.Stop()
 
-		// Step 2: Gracefully stop all servers in parallel
-		// Each server will reject new requests and wait for existing requests to complete.
-		var wg sync.WaitGroup
-
-		wg.Add(1)
-		go func() {
-			defer wg.Done()
-			server.Stop(serverShutDownTimeout)
-		}()
-
-		wg.Add(1)
-		go func() {
-			defer wg.Done()
-			batchGateway.Stop(serverShutDownTimeout)
-		}()
+		// Step 2: Gracefully stop REST gateway (calls the gRPC server internally)
+		batchGateway.Stop(serverShutDownTimeout)
 
-		// Wait for all servers to complete shutdown
-		wg.Wait()
+		// Step 3: Stop gRPC server (only pure gRPC connections remain)
+		server.Stop(serverShutDownTimeout)
 
 		// Step 3: Close clients
 		// These are fast cleanup operations that can run asynchronously.
diff --git a/pkg/rpc/server.go b/pkg/rpc/server.go
diff --git a/pkg/web/cmd/server/server.go b/pkg/web/cmd/server/server.go