Skip to content

Commit ba5cb4d

Browse files
committed
chore: reduce cronjob log verbosity with structured logging
Signed-off-by: Alessandro Yuichi Okimoto <yuichijpn@gmail.com>
1 parent 8b138de commit ba5cb4d

File tree

8 files changed

+22
-21
lines changed

8 files changed

+22
-21
lines changed

manifests/bucketeer/charts/batch/templates/cronjob.yaml

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,23 @@ spec:
4848
args:
4949
- -c
5050
- |
51-
echo "Start {{ .name }} job."
51+
echo '{"severity":"DEBUG","message":"Start {{ .name }} job."}'
5252
ENDPOINT="${WEB_GATEWAY_ADDRESS}/bucketeer.batch.BatchService/ExecuteBatchJob"
5353
TOKEN=`cat /usr/local/service-token/token`
54-
RES=`curl -X POST -m 3600 --cacert /usr/local/certs/service/tls.crt -d '{"job": "{{ .jobId }}"}' -H "authorization: bearer ${TOKEN}" -H "Content-Type: application/json" -s -o /dev/null -w '%{http_code}\\n' ${ENDPOINT}`
55-
echo "{{ .name }} job result: ${RES}"
54+
RESPONSE=$(mktemp)
55+
RES=`curl -X POST -m 3600 --cacert /usr/local/certs/service/tls.crt -d '{"job": "{{ .jobId }}"}' -H "authorization: bearer ${TOKEN}" -H "Content-Type: application/json" -s -o ${RESPONSE} -w '%{http_code}\\n' ${ENDPOINT}`
56+
BODY=$(cat ${RESPONSE})
57+
rm -f ${RESPONSE}
58+
# API returns empty {} on success, or error details on failure
59+
# Log based on response code like the API does (codes.InvalidArgument=400, other errors=500)
60+
if [ "$RES" = "200" ]; then
61+
# Success - use DEBUG to avoid verbose logs
62+
echo '{"severity":"DEBUG","message":"{{ .name }} job completed successfully"}'
63+
else
64+
# Failure - log as ERROR with response details for debugging
65+
BODY_ESCAPED=$(echo "${BODY}" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | tr '\n\r\t' ' ')
66+
echo '{"severity":"ERROR","message":"{{ .name }} job failed with status '"${RES}"'","response":"'"${BODY_ESCAPED}"'"}'
67+
fi
5668
if [ "$RES" = 200 ] || [ "$RES" = 503 ] || [ "$RES" = 000 ]
5769
then
5870
exit 0

pkg/api/cmd/server.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,6 @@ func (s *server) Run(ctx context.Context, metrics metrics.Metrics, logger *zap.L
571571
// Wait for K8s endpoint propagation
572572
// This prevents "context deadline exceeded" errors during high traffic.
573573
time.Sleep(propagationDelay)
574-
logger.Info("Starting HTTP/gRPC server shutdown")
575574

576575
// Mark as unhealthy so readiness probes fail
577576
// This ensures Kubernetes readiness probe fails on next check,
@@ -600,11 +599,9 @@ func (s *server) Run(ctx context.Context, metrics metrics.Metrics, logger *zap.L
600599

601600
// Wait for HTTP/REST traffic to fully drain
602601
wg.Wait()
603-
logger.Info("gRPC-gateway and HTTP server shutdown completed")
604602

605603
// Now it's safe to stop the gRPC server (no more HTTP→gRPC calls)
606604
server.Stop(grpcStopTimeout)
607-
logger.Info("gRPC server shutdown completed")
608605

609606
// Close clients
610607
// These are fast cleanup operations that can run asynchronously.

pkg/batch/cmd/server/server.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,6 @@ func (s *server) Run(ctx context.Context, metrics metrics.Metrics, logger *zap.L
636636
// Wait for K8s endpoint propagation
637637
// This prevents "context deadline exceeded" errors during high traffic.
638638
time.Sleep(propagationDelay)
639-
logger.Info("Starting HTTP/gRPC server shutdown")
640639

641640
// Mark as unhealthy so readiness probes fail
642641
// This ensures Kubernetes readiness probe fails on next check,
@@ -645,11 +644,9 @@ func (s *server) Run(ctx context.Context, metrics metrics.Metrics, logger *zap.L
645644

646645
// Gracefully stop gRPC Gateway (calls the gRPC server internally)
647646
batchGateway.Stop(serverShutDownTimeout)
648-
logger.Info("gRPC-gateway server shutdown completed")
649647

650648
// Stop gRPC server (only pure gRPC connections remain)
651649
server.Stop(grpcStopTimeout)
652-
logger.Info("gRPC server shutdown completed")
653650

654651
// Close clients
655652
// These are fast cleanup operations that can run asynchronously.

pkg/batch/jobs/calculator/experiment_calculate.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ func (e *experimentCalculate) runCalculation() {
135135
return
136136
}
137137
if experiments == nil {
138-
e.logger.Info("There are no experiments for calculation in the specified environment",
138+
e.logger.Debug("There are no experiments for calculation in the specified environment",
139139
log.FieldsFromIncomingContext(ctxWithTimeout).AddFields(
140140
zap.String("environmentId", env.Id),
141141
)...,

pkg/batch/jobs/opsevent/progressove_rollout_watcher.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ func (w *progressiveRolloutWatcher) executeProgressiveRollout(
129129
now := time.Now().Unix()
130130
for _, s := range schedules {
131131
if s.TriggeredAt == 0 && s.ExecuteAt <= now {
132-
w.logger.Info("scheduled time is passed",
132+
w.logger.Debug("scheduled time is passed",
133133
zap.String("environmentId", environmentId),
134134
zap.String("featureId", progressiveRollout.FeatureId),
135135
zap.String("progressiveRolloutId", progressiveRollout.Id),

pkg/batch/jobs/rediscounter/redis_counter_deleter.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ func (r *redisCounterDeleter) deleteKeysByKind(environmentId, kind string) (int,
131131
return 0, err
132132
}
133133
if len(keys) == 0 {
134-
r.logger.Info("No keys was found",
134+
r.logger.Debug("No keys was found",
135135
zap.String("environmentId", environmentId),
136136
zap.String("kind", kind),
137137
)
@@ -141,15 +141,14 @@ func (r *redisCounterDeleter) deleteKeysByKind(environmentId, kind string) (int,
141141
if err != nil {
142142
return 0, err
143143
}
144-
r.logger.Info("Filtered keys older than 31 days",
144+
r.logger.Debug("Filtered keys older than 31 days",
145145
zap.String("environmentId", environmentId),
146146
zap.String("kind", kind),
147147
zap.Int("filteredKeysSize", len(filteredKeys)),
148148
)
149149
// To avoid blocking Redis for too much time while deleting all the keys
150150
// we split the keys in chunks
151151
chunks := r.chunkSlice(filteredKeys, redisChunkMaxSize)
152-
r.logger.Info("Chunked the filtered keys", zap.Int("chunkSize", len(chunks)))
153152
deletedKeys := 0
154153
for _, chunk := range chunks {
155154
if err := r.deleteKeys(chunk); err != nil {
@@ -163,7 +162,7 @@ func (r *redisCounterDeleter) deleteKeysByKind(environmentId, kind string) (int,
163162
return deletedKeys, err
164163
}
165164
deletedKeys += len(chunk)
166-
r.logger.Info("Chunk deleted successfully", zap.Strings("keys", chunk))
165+
r.logger.Debug("Chunk deleted successfully", zap.Strings("keys", chunk))
167166
}
168167
return deletedKeys, nil
169168
}
@@ -174,7 +173,7 @@ func (r *redisCounterDeleter) newKeyPrefix(environmentId, kind string) string {
174173
}
175174

176175
func (r *redisCounterDeleter) scan(environmentId, kind, key string) ([]string, error) {
177-
r.logger.Info("Starting scan keys from Redis",
176+
r.logger.Debug("Starting scan keys from Redis",
178177
zap.String("environmentId", environmentId),
179178
zap.String("kind", kind),
180179
)
@@ -196,7 +195,7 @@ func (r *redisCounterDeleter) scan(environmentId, kind, key string) ([]string, e
196195
if err != nil {
197196
return nil, err
198197
}
199-
r.logger.Info("Finished scanning keys from Redis",
198+
r.logger.Debug("Finished scanning keys from Redis",
200199
zap.String("environmentId", environmentId),
201200
zap.String("kind", kind),
202201
zap.Duration("elapsedTime", time.Since(startTime)),

pkg/subscriber/cmd/server/server.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -368,12 +368,10 @@ func (s *server) Run(ctx context.Context, metrics metrics.Metrics, logger *zap.L
368368
// This ensures Kubernetes readiness probe fails on next check,
369369
// preventing new traffic from being routed to this pod.
370370
restHealthChecker.Stop()
371-
logger.Info("Health check marked as unhealthy (readiness will fail)")
372371

373372
// Stop PubSub subscription
374373
// This stops receiving new messages and allows in-flight messages to be processed.
375374
multiPubSub.Stop()
376-
logger.Info("PubSub subscription stopped, all messages processed")
377375

378376
// Close clients
379377
// These are fast cleanup operations that can run asynchronously.

pkg/web/cmd/server/server.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -865,7 +865,6 @@ func (s *server) Run(ctx context.Context, metrics metrics.Metrics, logger *zap.L
865865
// Wait for K8s endpoint propagation
866866
// This prevents "context deadline exceeded" errors during high traffic.
867867
time.Sleep(propagationDelay)
868-
logger.Info("Starting HTTP/gRPC server shutdown")
869868

870869
// Mark as unhealthy so readiness probes fail
871870
// This ensures Kubernetes readiness probe fails on next check,
@@ -891,7 +890,6 @@ func (s *server) Run(ctx context.Context, metrics metrics.Metrics, logger *zap.L
891890

892891
// Wait for REST traffic to drain
893892
restWg.Wait()
894-
logger.Info("REST servers shutdown completed")
895893

896894
// Gracefully stop all gRPC servers in parallel
897895
// Now safe to stop since REST traffic has drained

0 commit comments

Comments
 (0)