diff --git a/README.md b/README.md index fc41ba5..262eb23 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,7 @@ BLADE_LISTEN_METRICS=":1234" | `BLADE_FAN_SPEED_PERCENT=80` | Set static fan speed | | `BLADE_CRITICAL_TEMPERATURE_THRESHOLD=60` | Set critical temp threshold (°C) | | `BLADE_HAL_RPM_REPORTING_STANDARD_FAN_UNIT=false` | Disable RPM monitoring for lower CPU use | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | Endpoint for the OTLP exporter | ## Exposing the gRPC API for Remote Access diff --git a/cmd/agent/main.go b/cmd/agent/main.go index f91976d..b01ac04 100644 --- a/cmd/agent/main.go +++ b/cmd/agent/main.go @@ -17,6 +17,8 @@ import ( "github.com/compute-blade-community/compute-blade-agent/internal/api" "github.com/compute-blade-community/compute-blade-agent/pkg/log" "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/spechtlabs/go-otel-utils/otelprovider" + "github.com/spechtlabs/go-otel-utils/otelzap" "github.com/spf13/pflag" "github.com/spf13/viper" "go.uber.org/zap" @@ -25,7 +27,6 @@ import ( var ( Version string Commit string - Date string ) var debug = pflag.BoolP("debug", "v", false, "enable verbose logging") @@ -55,13 +56,68 @@ func main() { baseLogger = zap.Must(zap.NewProduction()) } - zapLogger := baseLogger.With(zap.String("app", "compute-blade-agent")) + zapLogger := baseLogger.With( + zap.String("app", "compute-blade-agent"), + zap.String("version", Version), + zap.String("commit", Commit), + ) defer func() { _ = zapLogger.Sync() }() - _ = zap.ReplaceGlobals(zapLogger.With(zap.String("scope", "global"))) - baseCtx := log.IntoContext(context.Background(), zapLogger) + // Replace zap global + undoZapGlobals := zap.ReplaceGlobals(zapLogger) + + // Redirect stdlib log to zap + undoStdLogRedirect := zap.RedirectStdLog(zapLogger) + + // Create OpenTelemetry Log and Trace provider + logProvider := otelprovider.NewLogger( + otelprovider.WithLogAutomaticEnv(), + ) + + traceProvider := otelprovider.NewTracer( + otelprovider.WithTraceAutomaticEnv(), + ) + + // Create otelLogger + otelZapLogger := otelzap.New(zapLogger, + otelzap.WithCaller(true), + otelzap.WithMinLevel(zap.InfoLevel), + otelzap.WithAnnotateLevel(zap.WarnLevel), + otelzap.WithErrorStatusLevel(zap.ErrorLevel), + otelzap.WithStackTrace(false), + otelzap.WithLoggerProvider(logProvider), + ) + + // Replace global otelZap logger + undoOtelZapGlobals := otelzap.ReplaceGlobals(otelZapLogger) + defer undoOtelZapGlobals() + + // Cleanup Logging and Tracing + defer func() { + if err := traceProvider.ForceFlush(context.Background()); err != nil { + otelzap.L().Warn("failed to flush traces") + } + + if err := logProvider.ForceFlush(context.Background()); err != nil { + otelzap.L().Warn("failed to flush logs") + } + + if err := traceProvider.Shutdown(context.Background()); err != nil { + panic(err) + } + + if err := logProvider.Shutdown(context.Background()); err != nil { + panic(err) + } + + undoStdLogRedirect() + undoZapGlobals() + }() + + // Setup context + baseCtx := log.IntoContext(context.Background(), otelZapLogger) ctx, cancelCtx := context.WithCancelCause(baseCtx) defer cancelCtx(context.Canceled) @@ -69,7 +125,7 @@ func main() { var cbAgentConfig agent.ComputeBladeAgentConfig if err := viper.Unmarshal(&cbAgentConfig); err != nil { cancelCtx(err) - log.FromContext(ctx).Fatal("Failed to load configuration", zap.Error(err)) + log.FromContext(ctx).WithError(err).Fatal("Failed to load configuration") } // setup stop signal handlers @@ -97,11 +153,11 @@ func main() { } }() - log.FromContext(ctx).Info("Bootstrapping compute-blade-agent", zap.String("version", Version), zap.String("commit", Commit), zap.String("date", Date)) + log.FromContext(ctx).Info("Bootstrapping compute-blade-agent") computebladeAgent, err := agent.NewComputeBladeAgent(ctx, cbAgentConfig) if err != nil { cancelCtx(err) - log.FromContext(ctx).Fatal("Failed to create agent", zap.Error(err)) + log.FromContext(ctx).WithError(err).Fatal("Failed to create agent") } // Run agent @@ -124,13 +180,17 @@ func main() { // Wait for done <-ctx.Done() + // Since ctx is now done, we can no longer use it to get `log.FromContext(ctx)` + // but we must use otelzap.L() to get a logger + + // Shut down gRPC and Prom Servers async var wg sync.WaitGroup // Shut-Down GRPC Server wg.Add(1) go func() { defer wg.Done() - log.FromContext(ctx).Info("Shutting down grpc server") + otelzap.L().Info("Shutting down grpc server") grpcServer.GracefulStop() }() @@ -142,18 +202,19 @@ func main() { shutdownCtx, shutdownCtxCancel := context.WithTimeout(context.Background(), 5*time.Second) defer shutdownCtxCancel() + otelzap.L().Info("Shutting down prometheus/pprof server") if err := promServer.Shutdown(shutdownCtx); err != nil { - log.FromContext(ctx).Error("Failed to shutdown prometheus/pprof server", zap.Error(err)) + otelzap.L().WithError(err).Error("Failed to shutdown prometheus/pprof server") } }() wg.Wait() - // Wait for context cancel + // Terminate accordingly if err := ctx.Err(); !errors.Is(err, context.Canceled) { - log.FromContext(ctx).Fatal("Exiting", zap.Error(err)) + otelzap.L().WithError(err).Fatal("Exiting") } else { - log.FromContext(ctx).Info("Exiting") + otelzap.L().Info("Exiting") } } @@ -172,7 +233,7 @@ func runPrometheusEndpoint(ctx context.Context, cancel context.CancelCauseFunc, go func() { err := server.ListenAndServe() if err != nil && !errors.Is(err, http.ErrServerClosed) { - log.FromContext(ctx).Error("Failed to start prometheus/pprof server", zap.Error(err)) + log.FromContext(ctx).WithError(err).Error("Failed to start prometheus/pprof server") cancel(err) } }() diff --git a/cmd/bladectl/cmd_identify.go b/cmd/bladectl/cmd_identify.go index eea142c..436def5 100644 --- a/cmd/bladectl/cmd_identify.go +++ b/cmd/bladectl/cmd_identify.go @@ -2,6 +2,7 @@ package main import ( "errors" + bladeapiv1alpha1 "github.com/compute-blade-community/compute-blade-agent/api/bladeapi/v1alpha1" "github.com/sierrasoftworks/humane-errors-go" "github.com/spf13/cobra" diff --git a/cmd/bladectl/config/config.go b/cmd/bladectl/config/config.go index 3fb9c9f..fc979be 100644 --- a/cmd/bladectl/config/config.go +++ b/cmd/bladectl/config/config.go @@ -6,7 +6,7 @@ import ( "path/filepath" "github.com/sierrasoftworks/humane-errors-go" - "go.uber.org/zap" + "github.com/spechtlabs/go-otel-utils/otelzap" ) type BladectlConfig struct { @@ -58,7 +58,7 @@ func NewAuthenticatedBladectlConfig(server string, caPEM []byte, clientCertDER [ func NewBladectlConfig(server string) *BladectlConfig { hostname, err := os.Hostname() if err != nil { - zap.L().Fatal("Failed to extract hostname", zap.Error(err)) + otelzap.L().WithError(err).Fatal("Failed to extract hostname") } return &BladectlConfig{ diff --git a/go.mod b/go.mod index 699b3be..d9aeebc 100644 --- a/go.mod +++ b/go.mod @@ -6,12 +6,15 @@ require ( github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.2 github.com/prometheus/client_golang v1.22.0 github.com/sierrasoftworks/humane-errors-go v0.0.0-20250507223502-4bb667dc1e16 + github.com/spechtlabs/go-otel-utils/otelprovider v0.0.10 + github.com/spechtlabs/go-otel-utils/otelzap v0.0.10 github.com/spf13/cobra v1.9.1 github.com/spf13/pflag v1.0.6 github.com/spf13/viper v1.20.1 github.com/stretchr/testify v1.10.0 github.com/warthog618/gpiod v0.8.1 go.bug.st/serial v1.6.4 + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 go.uber.org/zap v1.27.0 golang.org/x/sync v0.15.0 google.golang.org/grpc v1.73.0 @@ -21,13 +24,19 @@ require ( ) require ( + github.com/aws/smithy-go v1.22.3 // indirect github.com/beorn7/perks v1.0.1 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/creack/goselect v0.1.3 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/go-viper/mapstructure/v2 v2.2.1 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pelletier/go-toml/v2 v2.2.4 // indirect @@ -41,9 +50,23 @@ require ( github.com/spf13/cast v1.8.0 // indirect github.com/stretchr/objx v0.5.2 // indirect github.com/subosito/gotenv v1.6.0 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/otel v1.36.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.11.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.11.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.35.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.35.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.35.0 // indirect + go.opentelemetry.io/otel/log v0.11.0 // indirect + go.opentelemetry.io/otel/metric v1.36.0 // indirect + go.opentelemetry.io/otel/sdk v1.36.0 // indirect + go.opentelemetry.io/otel/sdk/log v0.11.0 // indirect + go.opentelemetry.io/otel/trace v1.36.0 // indirect + go.opentelemetry.io/proto/otlp v1.5.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/net v0.40.0 // indirect golang.org/x/sys v0.33.0 // indirect golang.org/x/text v0.25.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20250512202823-5a2f75b736a9 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250324211829-b45e905df463 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250519155744-55703ea1f237 // indirect ) diff --git a/go.sum b/go.sum index f01373d..0a5e083 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,9 @@ +github.com/aws/smithy-go v1.22.3 h1:Z//5NuZCSW6R4PhQ93hShNbyBbn8BWCmCVCt+Q8Io5k= +github.com/aws/smithy-go v1.22.3/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= @@ -11,6 +15,7 @@ github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHk github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= @@ -27,6 +32,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.2 h1:sGm2vDRFUrQJO/Veii4h4zG2vvqG6uWNkBHSTqXOZk0= github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.2/go.mod h1:wd1YpapPLivG6nQgbf7ZkG1hhSOXDhhn4MLTknx2aAc= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1 h1:e9Rjr40Z98/clHv5Yg79Is0NtosR5LXRvdr7o/6NwbA= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1/go.mod h1:tIxuGz/9mpox++sgp9fJjHO0+q1X9/UOWd798aAm22M= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= @@ -53,8 +60,8 @@ github.com/prometheus/common v0.63.0 h1:YR/EIY1o3mEFP/kZCD7iDMnLPlGyuU2Gb3HIcXnA github.com/prometheus/common v0.63.0/go.mod h1:VVFF/fBIoToEnWRVkYoXEkq3R3paCoxG9PXP74SnV18= github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= -github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= -github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sagikazarmark/locafero v0.9.0 h1:GbgQGNtTrEmddYDSAH9QLRyfAHY12md+8YFTqyMTC9k= github.com/sagikazarmark/locafero v0.9.0/go.mod h1:UBUyz37V+EdMS3hDF3QWIiVr/2dPrx49OMO0Bn0hJqk= @@ -62,6 +69,10 @@ github.com/sierrasoftworks/humane-errors-go v0.0.0-20250507223502-4bb667dc1e16 h github.com/sierrasoftworks/humane-errors-go v0.0.0-20250507223502-4bb667dc1e16/go.mod h1:CbJLj9L1qHdzLg4YRh2Lzr0noe9pR6QrVEqfLbITRKw= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= +github.com/spechtlabs/go-otel-utils/otelprovider v0.0.10 h1:Q5p+5KGA587GfzR6FdXGje4XBfxhi1u4NSu6lSnWCGA= +github.com/spechtlabs/go-otel-utils/otelprovider v0.0.10/go.mod h1:sFuJXEBbNq/pQx9pP5OnVtx9yGJnH4fXi7x3qihW0ak= +github.com/spechtlabs/go-otel-utils/otelzap v0.0.10 h1:RR/WS4b+ABxNL7xzlK4FTvnuXRbGk3yyggvvLnQ+FeM= +github.com/spechtlabs/go-otel-utils/otelzap v0.0.10/go.mod h1:IhsBuW+sZwLxX1Ww5LmTlIonBP8GiyhsiZkIRq+ySE0= github.com/spf13/afero v1.14.0 h1:9tH6MapGnn/j0eb0yIXiLjERO8RB6xIVZRDCX7PtqWA= github.com/spf13/afero v1.14.0/go.mod h1:acJQ8t0ohCGuMN3O+Pv0V0hgMxNYDlvdk+VTfyZmbYo= github.com/spf13/cast v1.8.0 h1:gEN9K4b8Xws4EX0+a0reLmhq8moKn7ntRlQYgjPeCDk= @@ -84,16 +95,34 @@ go.bug.st/serial v1.6.4 h1:7FmqNPgVp3pu2Jz5PoPtbZ9jJO5gnEnZIvnI1lzve8A= go.bug.st/serial v1.6.4/go.mod h1:nofMJxTeNVny/m6+KaafC6vJGj3miwQZ6vW4BZUGJPI= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY= -go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI= -go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ= -go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE= -go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A= -go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU= -go.opentelemetry.io/otel/sdk/metric v1.34.0 h1:5CeK9ujjbFVL5c1PhLuStg1wxA7vQv7ce1EK0Gyvahk= -go.opentelemetry.io/otel/sdk/metric v1.34.0/go.mod h1:jQ/r8Ze28zRKoNRdkjCZxfs6YvBTG1+YIqyFVFYec5w= -go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k= -go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 h1:q4XOmH/0opmeuJtPsbFNivyl7bCt7yRBbeEm2sC/XtQ= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0/go.mod h1:snMWehoOh2wsEwnvvwtDyFCxVeDAODenXHtn5vzrKjo= +go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg= +go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E= +go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.11.0 h1:HMUytBT3uGhPKYY/u/G5MR9itrlSO2SMOsSD3Tk3k7A= +go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.11.0/go.mod h1:hdDXsiNLmdW/9BF2jQpnHHlhFajpWCEYfM6e5m2OAZg= +go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.11.0 h1:C/Wi2F8wEmbxJ9Kuzw/nhP+Z9XaHYMkyDmXy6yR2cjw= +go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.11.0/go.mod h1:0Lr9vmGKzadCTgsiBydxr6GEZ8SsZ7Ks53LzjWG5Ar4= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.35.0 h1:1fTNlAIJZGWLP5FVu0fikVry1IsiUnXjf7QFvoNN3Xw= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.35.0/go.mod h1:zjPK58DtkqQFn+YUMbx0M2XV3QgKU0gS9LeGohREyK4= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.35.0 h1:m639+BofXTvcY1q8CGs4ItwQarYtJPOWmVobfM1HpVI= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.35.0/go.mod h1:LjReUci/F4BUyv+y4dwnq3h/26iNOeC3wAIqgvTIZVo= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.35.0 h1:xJ2qHD0C1BeYVTLLR9sX12+Qb95kfeD/byKj6Ky1pXg= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.35.0/go.mod h1:u5BF1xyjstDowA1R5QAO9JHzqK+ublenEW/dyqTjBVk= +go.opentelemetry.io/otel/log v0.11.0 h1:c24Hrlk5WJ8JWcwbQxdBqxZdOK7PcP/LFtOtwpDTe3Y= +go.opentelemetry.io/otel/log v0.11.0/go.mod h1:U/sxQ83FPmT29trrifhQg+Zj2lo1/IPN1PF6RTFqdwc= +go.opentelemetry.io/otel/metric v1.36.0 h1:MoWPKVhQvJ+eeXWHFBOPoBOi20jh6Iq2CcCREuTYufE= +go.opentelemetry.io/otel/metric v1.36.0/go.mod h1:zC7Ks+yeyJt4xig9DEw9kuUFe5C3zLbVjV2PzT6qzbs= +go.opentelemetry.io/otel/sdk v1.36.0 h1:b6SYIuLRs88ztox4EyrvRti80uXIFy+Sqzoh9kFULbs= +go.opentelemetry.io/otel/sdk v1.36.0/go.mod h1:+lC+mTgD+MUWfjJubi2vvXWcVxyr9rmlshZni72pXeY= +go.opentelemetry.io/otel/sdk/log v0.11.0 h1:7bAOpjpGglWhdEzP8z0VXc4jObOiDEwr3IYbhBnjk2c= +go.opentelemetry.io/otel/sdk/log v0.11.0/go.mod h1:dndLTxZbwBstZoqsJB3kGsRPkpAgaJrWfQg3lhlHFFY= +go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5JpUCaEqEI9o= +go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w= +go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w= +go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA= +go.opentelemetry.io/proto/otlp v1.5.0 h1:xJvq7gMzB31/d406fB8U5CBdyQGw4P399D1aQWU/3i4= +go.opentelemetry.io/proto/otlp v1.5.0/go.mod h1:keN8WnHxOy8PG0rQZjJJ5A2ebUoafqWp0eVQ4yIXvJ4= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= @@ -102,18 +131,16 @@ go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY= golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds= -golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ= -golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8= golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4= golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250512202823-5a2f75b736a9 h1:IkAfh6J/yllPtpYFU0zZN1hUPYdT0ogkBT/9hMxHjvg= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250512202823-5a2f75b736a9/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= -google.golang.org/grpc v1.72.0 h1:S7UkcVa60b5AAQTaO6ZKamFp1zMZSU0fGDK2WZLbBnM= -google.golang.org/grpc v1.72.0/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM= +google.golang.org/genproto/googleapis/api v0.0.0-20250324211829-b45e905df463 h1:hE3bRWtU6uceqlh4fhrSnUyjKHMKB9KrTLLG+bc0ddM= +google.golang.org/genproto/googleapis/api v0.0.0-20250324211829-b45e905df463/go.mod h1:U90ffi8eUL9MwPcrJylN5+Mk2v3vuPDptd5yyNUiRR8= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250519155744-55703ea1f237 h1:cJfm9zPbe1e873mHJzmQ1nwVEeRDU/T1wXDK2kUSU34= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250519155744-55703ea1f237/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= google.golang.org/grpc v1.73.0 h1:VIWSmpI2MegBtTuFt5/JWy2oXxtjJ/e89Z70ImfD2ok= google.golang.org/grpc v1.73.0/go.mod h1:50sbHOUqWoCQGI8V2HQLJM0B+LMlIUjNSZmow7EVBQc= google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= diff --git a/internal/agent/agent.go b/internal/agent/agent.go index f12adcf..e652bb2 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -89,7 +89,7 @@ func (a *computeBladeAgentImpl) RunAsync(ctx context.Context, cancel context.Can log.FromContext(ctx).Info("Starting agent") err := a.Run(ctx) if err != nil && !errors.Is(err, context.Canceled) { - log.FromContext(ctx).Error("Failed to run agent", zap.Error(err)) + log.FromContext(ctx).WithError(err).Error("Failed to run agent") cancel(err) } }() @@ -117,7 +117,7 @@ func (a *computeBladeAgentImpl) Run(origCtx context.Context) error { defer wg.Done() log.FromContext(ctx).Info("Starting HAL") if err := a.blade.Run(ctx); err != nil && !errors.Is(err, context.Canceled) { - log.FromContext(ctx).Error("HAL failed", zap.Error(err)) + log.FromContext(ctx).WithError(err).Error("HAL failed") cancelCtx(err) } }() @@ -130,7 +130,7 @@ func (a *computeBladeAgentImpl) Run(origCtx context.Context) error { for { err := a.blade.WaitForEdgeButtonPress(ctx) if err != nil && !errors.Is(err, context.Canceled) { - log.FromContext(ctx).Error("Edge button event handler failed", zap.Error(err)) + log.FromContext(ctx).WithError(err).Error("Edge button event handler failed") cancelCtx(err) } else if err != nil { return @@ -151,7 +151,7 @@ func (a *computeBladeAgentImpl) Run(origCtx context.Context) error { log.FromContext(ctx).Info("Starting top LED engine") err := a.runTopLedEngine(ctx) if err != nil && !errors.Is(err, context.Canceled) { - log.FromContext(ctx).Error("Top LED engine failed", zap.Error(err)) + log.FromContext(ctx).WithError(err).Error("Top LED engine failed") cancelCtx(err) } }() @@ -163,7 +163,7 @@ func (a *computeBladeAgentImpl) Run(origCtx context.Context) error { log.FromContext(ctx).Info("Starting edge LED engine") err := a.runEdgeLedEngine(ctx) if err != nil && !errors.Is(err, context.Canceled) { - log.FromContext(ctx).Error("Edge LED engine failed", zap.Error(err)) + log.FromContext(ctx).WithError(err).Error("Edge LED engine failed") cancelCtx(err) } }() @@ -175,7 +175,7 @@ func (a *computeBladeAgentImpl) Run(origCtx context.Context) error { log.FromContext(ctx).Info("Starting fan controller") err := a.runFanController(ctx) if err != nil && !errors.Is(err, context.Canceled) { - log.FromContext(ctx).Error("Fan Controller Failed", zap.Error(err)) + log.FromContext(ctx).WithError(err).Error("Fan Controller Failed") cancelCtx(err) } }() @@ -192,7 +192,7 @@ func (a *computeBladeAgentImpl) Run(origCtx context.Context) error { case event := <-a.eventChan: err := a.handleEvent(ctx, event) if err != nil && !errors.Is(err, context.Canceled) { - log.FromContext(ctx).Error("Event handler failed", zap.Error(err)) + log.FromContext(ctx).WithError(err).Error("Event handler failed") cancelCtx(err) } } @@ -207,16 +207,16 @@ func (a *computeBladeAgentImpl) Run(origCtx context.Context) error { func (a *computeBladeAgentImpl) cleanup(ctx context.Context) { log.FromContext(ctx).Info("Exiting, restoring safe settings") if err := a.blade.SetFanSpeed(100); err != nil { - log.FromContext(ctx).Error("Failed to set fan speed to 100%", zap.Error(err)) + log.FromContext(ctx).WithError(err).Error("Failed to set fan speed to 100%") } if err := a.blade.SetLed(hal.LedEdge, led.Color{}); err != nil { - log.FromContext(ctx).Error("Failed to set edge LED to off", zap.Error(err)) + log.FromContext(ctx).WithError(err).Error("Failed to set edge LED to off") } if err := a.blade.SetLed(hal.LedTop, led.Color{}); err != nil { - log.FromContext(ctx).Error("Failed to set edge LED to off", zap.Error(err)) + log.FromContext(ctx).WithError(err).Error("Failed to set edge LED to off") } if err := a.Close(); err != nil { - log.FromContext(ctx).Error("Failed to close blade", zap.Error(err)) + log.FromContext(ctx).WithError(err).Error("Failed to close blade") } } @@ -377,14 +377,14 @@ func (a *computeBladeAgentImpl) runFanController(ctx context.Context) error { // Get temperature temp, err := a.blade.GetTemperature() if err != nil { - log.FromContext(ctx).Error("Failed to get temperature", zap.Error(err)) + log.FromContext(ctx).WithError(err).Error("Failed to get temperature") temp = 100 // set to a high value to trigger the maximum speed defined by the fan curve } // Derive fan speed from temperature speed := a.fanController.GetFanSpeed(temp) // Set fan speed if err := a.blade.SetFanSpeed(speed); err != nil { - log.FromContext(ctx).Error("Failed to set fan speed", zap.Error(err)) + log.FromContext(ctx).WithError(err).Error("Failed to set fan speed") } } } diff --git a/internal/api/api.go b/internal/api/api.go index 09c6661..e40d39d 100644 --- a/internal/api/api.go +++ b/internal/api/api.go @@ -12,6 +12,7 @@ import ( "github.com/compute-blade-community/compute-blade-agent/pkg/log" grpczap "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/logging" "github.com/sierrasoftworks/humane-errors-go" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "go.uber.org/zap" "google.golang.org/grpc" "google.golang.org/grpc/codes" @@ -72,10 +73,7 @@ func NewGrpcApiServer(ctx context.Context, options ...GrpcApiServiceOption) *Age // Load server's certificate and private key cert, certPool, err := EnsureServerCertificate(ctx) if err != nil { - log.FromContext(ctx).Fatal("failed to load server key pair", - zap.Error(err), - zap.Strings("advice", err.Advice()), - ) + log.FromContext(ctx).WithError(err).Fatal("failed to load server key pair") } // Create the TLS config that enforces mTLS for client authentication @@ -90,24 +88,19 @@ func NewGrpcApiServer(ctx context.Context, options ...GrpcApiServiceOption) *Age // Make sure we have a local bladectl config with authentication enabled if err := EnsureAuthenticatedBladectlConfig(ctx, service.listenAddr, service.listenMode); err != nil { - log.FromContext(ctx).Fatal("failed to ensure proper local bladectl config", - zap.Error(err), - zap.Strings("advice", err.Advice()), - ) + log.FromContext(ctx).WithError(err).Fatal("failed to ensure proper local bladectl config") } } else { // Make sure we have a local bladectl config with no authentication enabled if err := EnsureUnauthenticatedBladectlConfig(ctx, service.listenAddr, service.listenMode); err != nil { - log.FromContext(ctx).Fatal("failed to ensure proper local bladectl config", - zap.Error(err), - zap.Strings("advice", err.Advice()), - ) + log.FromContext(ctx).WithError(err).Fatal("failed to ensure proper local bladectl config") } } // Add Logging Middleware - grpcOpts = append(grpcOpts, grpc.ChainUnaryInterceptor(grpczap.UnaryServerInterceptor(log.InterceptorLogger(zap.L())))) - grpcOpts = append(grpcOpts, grpc.ChainStreamInterceptor(grpczap.StreamServerInterceptor(log.InterceptorLogger(zap.L())))) + grpcOpts = append(grpcOpts, grpc.ChainUnaryInterceptor(grpczap.UnaryServerInterceptor(log.InterceptorLogger(log.FromContext(ctx))))) + grpcOpts = append(grpcOpts, grpc.ChainStreamInterceptor(grpczap.StreamServerInterceptor(log.InterceptorLogger(log.FromContext(ctx))))) + grpcOpts = append(grpcOpts, grpc.StatsHandler(otelgrpc.NewServerHandler())) // Make server service.server = grpc.NewServer(grpcOpts...) @@ -121,11 +114,7 @@ func (s *AgentGrpcService) ServeAsync(ctx context.Context, cancel context.Cancel go func() { err := s.Serve(ctx) if err != nil { - log.FromContext(ctx).Error("Failed to start grpc server", - zap.Error(err), - zap.String("cause", err.Cause().Error()), - zap.Strings("advice", err.Advice()), - ) + log.FromContext(ctx).WithError(err).Error("Failed to start grpc server") cancel(err.Cause()) } diff --git a/internal/api/options.go b/internal/api/options.go index 359d1b3..f72c96a 100644 --- a/internal/api/options.go +++ b/internal/api/options.go @@ -2,6 +2,7 @@ package api import ( "github.com/compute-blade-community/compute-blade-agent/pkg/agent" + "github.com/spechtlabs/go-otel-utils/otelzap" "go.uber.org/zap" ) @@ -34,7 +35,7 @@ func WithListenMode(mode string) GrpcApiServiceOption { return func(service *AgentGrpcService) { lMode, err := ListenModeFromString(mode) if err != nil { - zap.L().Fatal(err.Error(), + otelzap.L().Fatal(err.Error(), zap.String("mode", mode), zap.Strings("advice", err.Advice()), ) diff --git a/pkg/agent/state.go b/pkg/agent/state.go index 0be8ae8..81e6a24 100644 --- a/pkg/agent/state.go +++ b/pkg/agent/state.go @@ -7,6 +7,7 @@ import ( "github.com/compute-blade-community/compute-blade-agent/pkg/events" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/spechtlabs/go-otel-utils/otelzap" "go.uber.org/zap" ) @@ -63,7 +64,7 @@ func (s *computebladeStateImpl) RegisterEvent(event events.Event) { s.criticalConfirmChan = make(chan struct{}) default: - zap.L().Warn("Unknown event", zap.String("event", event.String())) + otelzap.L().Warn("Unknown event", zap.String("event", event.String())) } // Set identify state metric diff --git a/pkg/hal/hal_bcm2711.go b/pkg/hal/hal_bcm2711.go index f5eaf86..ff66f7e 100644 --- a/pkg/hal/hal_bcm2711.go +++ b/pkg/hal/hal_bcm2711.go @@ -199,7 +199,7 @@ func (bcm *bcm2711) setup(ctx context.Context) error { return err } } else { - log.FromContext(ctx).Info("no smart fan unit detected, assuming standard fan unit", zap.Error(err)) + log.FromContext(ctx).WithError(err).Info("no smart fan unit detected, assuming standard fan unit") // FAN PWM output for standard fan unit (GPIO 12) // -> bcm2711RegGpfsel1 8:6, alt0 bcm.gpioMem[bcm2711RegGpfsel1] = (bcm.gpioMem[bcm2711RegGpfsel1] &^ (0b111 << 6)) | (0b100 << 6) @@ -258,7 +258,7 @@ func (bcm *bcm2711) WaitForEdgeButtonPress(parentCtx context.Context) error { go func() { err := bcm.fanUnit.WaitForButtonPress(ctx) if err != nil && err != context.Canceled { - log.FromContext(ctx).Error("failed to wait for button press", zap.Error(err)) + log.FromContext(ctx).WithError(err).Error("failed to wait for button press") } else { close(fanUnitChan) } diff --git a/pkg/hal/hal_bcm2711_simulated.go b/pkg/hal/hal_bcm2711_simulated.go index 70e4533..f8a32c2 100644 --- a/pkg/hal/hal_bcm2711_simulated.go +++ b/pkg/hal/hal_bcm2711_simulated.go @@ -7,6 +7,7 @@ import ( "time" "github.com/compute-blade-community/compute-blade-agent/pkg/hal/led" + "github.com/spechtlabs/go-otel-utils/otelzap" "go.uber.org/zap" ) @@ -19,7 +20,7 @@ type SimulatedHal struct { } func NewCm4Hal(_ context.Context, _ ComputeBladeHalOpts) (ComputeBladeHal, error) { - logger := zap.L().Named("hal").Named("simulated-cm4") + logger := otelzap.L().Named("hal").Named("simulated-cm4") logger.Warn("Using simulated hal") computeModule.WithLabelValues("simulated").Set(1) diff --git a/pkg/hal/hal_bcm2711_standardfanunit.go b/pkg/hal/hal_bcm2711_standardfanunit.go index 67a0f8a..08e26af 100644 --- a/pkg/hal/hal_bcm2711_standardfanunit.go +++ b/pkg/hal/hal_bcm2711_standardfanunit.go @@ -4,9 +4,10 @@ package hal import ( "context" + "math" + "github.com/compute-blade-community/compute-blade-agent/pkg/log" "go.uber.org/zap" - "math" "github.com/compute-blade-community/compute-blade-agent/pkg/hal/led" "github.com/warthog618/gpiod" diff --git a/pkg/hal/smartfanunit.go b/pkg/hal/smartfanunit.go index c0fab8b..e7aa799 100644 --- a/pkg/hal/smartfanunit.go +++ b/pkg/hal/smartfanunit.go @@ -14,7 +14,6 @@ import ( "github.com/compute-blade-community/compute-blade-agent/pkg/smartfanunit" "github.com/compute-blade-community/compute-blade-agent/pkg/smartfanunit/proto" "go.bug.st/serial" - "go.uber.org/zap" "golang.org/x/sync/errgroup" ) @@ -32,7 +31,7 @@ func SmartFanUnitPresent(ctx context.Context, portName string) (bool, error) { defer func(rwc serial.Port) { err := rwc.Close() if err != nil { - log.FromContext(ctx).Warn("Error while closing serial port", zap.Error(err)) + log.FromContext(ctx).WithError(err).Warn("Error while closing serial port") } }(rwc) @@ -42,7 +41,7 @@ func SmartFanUnitPresent(ctx context.Context, portName string) (bool, error) { log.FromContext(ctx).Warn("Closing serial port") err := rwc.Close() if err != nil { - log.FromContext(ctx).Warn("Error while closing serial port", zap.Error(err)) + log.FromContext(ctx).WithError(err).Warn("Error while closing serial port") } }() @@ -117,7 +116,7 @@ func (fuc *smartFanUnit) Run(parentCtx context.Context) error { pkt, err := proto.ReadPacket(ctx, fuc.rwc) if err != nil { - log.FromContext(ctx).Error("Failed to read packet from serial port", zap.Error(err)) + log.FromContext(ctx).WithError(err).Error("Failed to read packet from serial port") continue } fuc.eb.Publish(inboundTopic, pkt) diff --git a/pkg/log/interceptor_logger.go b/pkg/log/interceptor_logger.go index 60f53dc..e1d7a40 100644 --- a/pkg/log/interceptor_logger.go +++ b/pkg/log/interceptor_logger.go @@ -4,12 +4,13 @@ import ( "context" "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/logging" + "github.com/spechtlabs/go-otel-utils/otelzap" "go.uber.org/zap" ) // InterceptorLogger adapts zap logger to interceptor logger. // This code is simple enough to be copied and not imported. -func InterceptorLogger(l *zap.Logger) logging.Logger { +func InterceptorLogger(l *otelzap.Logger) logging.Logger { return logging.LoggerFunc(func(ctx context.Context, lvl logging.Level, msg string, fields ...any) { f := make([]zap.Field, 0, len(fields)/2) @@ -24,12 +25,14 @@ func InterceptorLogger(l *zap.Logger) logging.Logger { f = append(f, zap.Int(key.(string), v)) case bool: f = append(f, zap.Bool(key.(string), v)) + case zap.Field: + f = append(f, v) default: f = append(f, zap.Any(key.(string), v)) } } - logger := zap.L().WithOptions(zap.AddCallerSkip(4)).With(f...) + logger := l.WithOptions(zap.AddCallerSkip(4)).With(f...) switch lvl { case logging.LevelDebug: diff --git a/pkg/log/logger.go b/pkg/log/logger.go index 5b37e52..4916776 100644 --- a/pkg/log/logger.go +++ b/pkg/log/logger.go @@ -3,21 +3,22 @@ package log import ( "context" + "github.com/spechtlabs/go-otel-utils/otelzap" "go.uber.org/zap" ) type logCtxKey int -func IntoContext(ctx context.Context, logger *zap.Logger) context.Context { +func IntoContext(ctx context.Context, logger *otelzap.Logger) context.Context { return context.WithValue(ctx, logCtxKey(0), logger) } -func FromContext(ctx context.Context) *zap.Logger { +func FromContext(ctx context.Context) *otelzap.Logger { val := ctx.Value(logCtxKey(0)) if val != nil { - return val.(*zap.Logger) + return val.(*otelzap.Logger) } - zap.L().WithOptions(zap.AddCallerSkip(1)).Warn("No logger in context, passing default") - return zap.L() + otelzap.L().WithOptions(zap.AddCallerSkip(1)).Warn("No logger in context, passing default") + return otelzap.L() }