Skip to content

Migrate tracing to OpenTelemetry and add support for standard OTel env config #234

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* [ENHANCEMENT] Updated dependencies, including: #227 #231
* `golang.org/x/sync` from `v0.14.0` to `v0.15.0`
* `sigs.k8s.io/controller-runtime` from `v0.20.4` to `v0.21.0`
* [ENHANCEMENT] Migrate to OpenTelemetry tracing library, removing the dependency on OpenTracing. You can now configure tracing using the standard `OTEL_` [environment variables](https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#batch-span-processor). Previous configurations using `JAEGER_` environment variables will still work, but are deprecated. #234

## v0.27.0

Expand Down
14 changes: 9 additions & 5 deletions cmd/rollout-operator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,16 +142,20 @@ func main() {
ready := atomic.NewBool(false)
restart := make(chan string)

name := os.Getenv("JAEGER_SERVICE_NAME")
if name == "" {
var name string
if otelEnvName := os.Getenv("OTEL_SERVICE_NAME"); otelEnvName != "" {
name = otelEnvName
} else if jaegerEnvName := os.Getenv("JAEGER_SERVICE_NAME"); jaegerEnvName != "" {
name = jaegerEnvName
} else {
name = "rollout-operator"
}

if trace, err := tracing.NewFromEnv(name); err != nil {
trace, err := tracing.NewOTelOrJaegerFromEnv(name, logger)
if err != nil {
fatal(fmt.Errorf("failed to set up tracing: %w", err))
} else {
defer trace.Close()
}
defer trace.Close()

// Expose HTTP endpoints.
srv := newServer(cfg, logger, metrics)
Expand Down
8 changes: 4 additions & 4 deletions development/jaeger-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ spec:
port: 16686
targetPort: 16686
name: http
- protocol: UDP
port: 6831
targetPort: 6831
name: thrift-compact-udp
- protocol: TCP
port: 4318
targetPort: 4318
name: otlp-http
type: ClusterIP
14 changes: 2 additions & 12 deletions development/rollout-operator-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,6 @@ spec:
port: 8001
path: /ready
env:
- name: JAEGER_AGENT_HOST
value: jaeger.rollout-operator-development.svc.cluster.local
- name: JAEGER_AGENT_PORT
value: "6831"
- name: JAEGER_SAMPLER_TYPE
value: const
- name: JAEGER_SAMPLER_PARAM
value: "1"
- name: JAEGER_TAGS
value: app=rollout-operator
- name: JAEGER_REPORTER_MAX_QUEUE_SIZE
value: "1000"
- name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
value: http://jaeger.rollout-operator-development.svc.cluster.local.:4318/v1/traces
Comment on lines +44 to +45
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note for reviewers: jaeger also supports otlp, and I didn't replace it with something else because having something that has a frontend to query comes in handy in this dev env.

restartPolicy: Always
11 changes: 6 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@ require (
github.com/grafana/dskit v0.0.0-20250611075409-46f51e1ce914
github.com/hashicorp/go-multierror v1.1.1
github.com/k3d-io/k3d/v5 v5.8.3
github.com/opentracing-contrib/go-stdlib v1.1.0
github.com/opentracing/opentracing-go v1.2.0
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.22.0
github.com/prometheus/common v0.64.0
github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.10.0
go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.60.0
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0
go.opentelemetry.io/otel v1.36.0
go.opentelemetry.io/otel/trace v1.36.0
go.uber.org/atomic v1.11.0
golang.org/x/sync v0.15.0
k8s.io/api v0.33.1
Expand Down Expand Up @@ -99,6 +101,8 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.0 // indirect
github.com/opentracing-contrib/go-stdlib v1.1.0 // indirect
github.com/opentracing/opentracing-go v1.2.0 // indirect
github.com/pelletier/go-toml/v2 v2.1.0 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
Expand All @@ -124,10 +128,8 @@ require (
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/contrib/bridges/prometheus v0.61.0 // indirect
go.opentelemetry.io/contrib/exporters/autoexport v0.61.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 // indirect
go.opentelemetry.io/contrib/propagators/jaeger v1.35.0 // indirect
go.opentelemetry.io/contrib/samplers/jaegerremote v0.30.0 // indirect
go.opentelemetry.io/otel v1.36.0 // indirect
go.opentelemetry.io/otel/exporters/jaeger v1.17.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.12.2 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.12.2 // indirect
Expand All @@ -145,7 +147,6 @@ require (
go.opentelemetry.io/otel/sdk v1.36.0 // indirect
go.opentelemetry.io/otel/sdk/log v0.12.2 // indirect
go.opentelemetry.io/otel/sdk/metric v1.36.0 // indirect
go.opentelemetry.io/otel/trace v1.36.0 // indirect
go.opentelemetry.io/proto/otlp v1.6.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
go4.org/netipx v0.0.0-20231129151722-fdeea329fbba // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,8 @@ go.opentelemetry.io/contrib/bridges/prometheus v0.61.0 h1:RyrtJzu5MAmIcbRrwg75b+
go.opentelemetry.io/contrib/bridges/prometheus v0.61.0/go.mod h1:tirr4p9NXbzjlbruiRGp53IzlYrDk5CO2fdHj0sSSaY=
go.opentelemetry.io/contrib/exporters/autoexport v0.61.0 h1:XfzKtKSrbtYk9TNCF8dkO0Y9M7IOfb4idCwBOTwGBiI=
go.opentelemetry.io/contrib/exporters/autoexport v0.61.0/go.mod h1:N6otC+qXTD5bAnbK2O1f/1SXq3cX+3KYSWrkBUqG0cw=
go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.60.0 h1:0tY123n7CdWMem7MOVdKOt0YfshufLCwfE5Bob+hQuM=
go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.60.0/go.mod h1:CosX/aS4eHnG9D7nESYpV753l4j9q5j3SL/PUYd2lR8=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ=
go.opentelemetry.io/contrib/propagators/jaeger v1.35.0 h1:UIrZgRBHUrYRlJ4V419lVb4rs2ar0wFzKNAebaP05XU=
Expand Down
18 changes: 11 additions & 7 deletions pkg/admission/no_downscale.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/dskit/spanlogger"
"github.com/opentracing/opentracing-go"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
v1 "k8s.io/api/admission/v1"
appsv1 "k8s.io/api/apps/v1"
autoscalingv1 "k8s.io/api/autoscaling/v1"
Expand All @@ -24,6 +26,8 @@ const (
NoDownscaleWebhookPath = "/admission/no-downscale"
)

var tracer = otel.Tracer("pkg/admission")

func NoDownscale(ctx context.Context, l log.Logger, ar v1.AdmissionReview, api *kubernetes.Clientset) *v1.AdmissionResponse {
logger, ctx := spanlogger.New(ctx, l, "admission.NoDownscale()", tenantResolver)
defer logger.Finish()
Expand Down Expand Up @@ -147,12 +151,12 @@ func allowErr(logger log.Logger, msg string, err error) *v1.AdmissionResponse {
}

func getResourceLabels(ctx context.Context, ar v1.AdmissionReview, api kubernetes.Interface) (map[string]string, error) {
span, ctx := opentracing.StartSpanFromContext(ctx, "admission.getResourceLabels()")
defer span.Finish()

span.SetTag("object.namespace", ar.Request.Namespace)
span.SetTag("object.name", ar.Request.Name)
span.SetTag("object.resource", ar.Request.Resource.Resource)
ctx, span := tracer.Start(ctx, "admission.getResourceLabels()", trace.WithAttributes(
attribute.String("object.namespace", ar.Request.Namespace),
attribute.String("object.name", ar.Request.Name),
attribute.String("object.resource", ar.Request.Resource.Resource),
))
defer span.End()

switch ar.Request.Resource.Resource {
case "statefulsets":
Expand Down
72 changes: 37 additions & 35 deletions pkg/admission/prep_downscale.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@ import (
"fmt"
"io"
"net/http"
"net/http/httptrace"
"time"

"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/dskit/spanlogger"
"github.com/opentracing-contrib/go-stdlib/nethttp"
"github.com/opentracing/opentracing-go"
"go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"golang.org/x/sync/errgroup"
admissionv1 "k8s.io/api/admission/v1"
appsv1 "k8s.io/api/apps/v1"
Expand All @@ -37,8 +40,10 @@ const (

func PrepareDownscale(ctx context.Context, rt http.RoundTripper, logger log.Logger, ar admissionv1.AdmissionReview, api *kubernetes.Clientset, useZoneTracker bool, zoneTrackerConfigMapName string) *admissionv1.AdmissionResponse {
client := &http.Client{
Timeout: 5 * time.Second,
Transport: &nethttp.Transport{RoundTripper: rt},
Timeout: 5 * time.Second,
Transport: otelhttp.NewTransport(rt, otelhttp.WithClientTrace(func(ctx context.Context) *httptrace.ClientTrace {
return otelhttptrace.NewClientTrace(ctx)
})),
}

if useZoneTracker {
Expand Down Expand Up @@ -255,11 +260,11 @@ func deny(msg string) *admissionv1.AdmissionResponse {
}

func getStatefulSet(ctx context.Context, ar admissionv1.AdmissionReview, api kubernetes.Interface) (*appsv1.StatefulSet, error) {
span, ctx := opentracing.StartSpanFromContext(ctx, "admission.getStatefulSet()")
defer span.Finish()

span.SetTag("object.namespace", ar.Request.Namespace)
span.SetTag("object.name", ar.Request.Name)
ctx, span := tracer.Start(ctx, "admission.getStatefulSet()", trace.WithAttributes(
attribute.String("object.namespace", ar.Request.Namespace),
attribute.String("object.name", ar.Request.Name),
))
defer span.End()

switch ar.Request.Resource.Resource {
case "statefulsets":
Expand All @@ -273,11 +278,11 @@ func getStatefulSet(ctx context.Context, ar admissionv1.AdmissionReview, api kub
}

func addDownscaledAnnotationToStatefulSet(ctx context.Context, api kubernetes.Interface, namespace, stsName string) error {
span, ctx := opentracing.StartSpanFromContext(ctx, "admission.addDownscaledAnnotationToStatefulSet()")
defer span.Finish()

span.SetTag("object.namespace", namespace)
span.SetTag("object.name", stsName)
ctx, span := tracer.Start(ctx, "admission.addDownscaledAnnotationToStatefulSet()", trace.WithAttributes(
attribute.String("object.namespace", namespace),
attribute.String("object.name", stsName),
))
defer span.End()

client := api.AppsV1().StatefulSets(namespace)
patch := fmt.Sprintf(`{"metadata":{"annotations":{"%v":"%v"}}}`, config.LastDownscaleAnnotationKey, time.Now().UTC().Format(time.RFC3339))
Expand Down Expand Up @@ -344,10 +349,10 @@ func findDownscalesDoneMinTimeAgo(stsList *appsv1.StatefulSetList, excludeStsNam
//
// The StatefulSet whose name matches the input excludeStsName is not checked.
func findStatefulSetWithNonUpdatedReplicas(ctx context.Context, api kubernetes.Interface, namespace string, stsList *appsv1.StatefulSetList, excludeStsName string) (*statefulSetDownscale, error) {
span, ctx := opentracing.StartSpanFromContext(ctx, "admission.findStatefulSetWithNonUpdatedReplicas()")
defer span.Finish()

span.SetTag("object.namespace", namespace)
ctx, span := tracer.Start(ctx, "admission.findStatefulSetWithNonUpdatedReplicas()", trace.WithAttributes(
attribute.String("object.namespace", namespace),
))
defer span.End()

for _, sts := range stsList.Items {
if sts.Name == excludeStsName {
Expand All @@ -371,11 +376,11 @@ func findStatefulSetWithNonUpdatedReplicas(ctx context.Context, api kubernetes.I

// countRunningAndReadyPods counts running and ready pods for a StatefulSet.
func countRunningAndReadyPods(ctx context.Context, api kubernetes.Interface, namespace string, sts *appsv1.StatefulSet) (int, error) {
span, ctx := opentracing.StartSpanFromContext(ctx, "admission.countRunningAndReadyPods()")
defer span.Finish()

span.SetTag("object.namespace", namespace)
span.SetTag("object.name", sts.Name)
ctx, span := tracer.Start(ctx, "admission.countRunningAndReadyPods()", trace.WithAttributes(
attribute.String("object.namespace", namespace),
attribute.String("object.name", sts.Name),
))
defer span.End()

pods, err := findPodsForStatefulSet(ctx, api, namespace, sts)
if err != nil {
Expand All @@ -402,11 +407,11 @@ func findPodsForStatefulSet(ctx context.Context, api kubernetes.Interface, names
}

func findStatefulSetsForRolloutGroup(ctx context.Context, api kubernetes.Interface, namespace, rolloutGroup string) (*appsv1.StatefulSetList, error) {
span, ctx := opentracing.StartSpanFromContext(ctx, "admission.findStatefulSetsForRolloutGroup()")
defer span.Finish()

span.SetTag("object.namespace", namespace)
span.SetTag("rollout_group", rolloutGroup)
ctx, span := tracer.Start(ctx, "admission.findStatefulSetsForRolloutGroup()", trace.WithAttributes(
attribute.String("object.namespace", namespace),
attribute.String("rollout_group", rolloutGroup),
))
defer span.End()

groupReq, err := labels.NewRequirement(config.RolloutGroupLabelKey, selection.Equals, []string{rolloutGroup})
if err != nil {
Expand Down Expand Up @@ -510,9 +515,6 @@ func invokePrepareShutdown(ctx context.Context, method string, parentLogger log.
}

req.Header.Set("Content-Type", "application/json")
req, ht := nethttp.TraceRequest(opentracing.GlobalTracer(), req)
defer ht.Finish()

resp, err := client.Do(req)
if err != nil {
level.Error(logger).Log("msg", fmt.Sprintf("error sending HTTP %s request", method), "err", err)
Expand All @@ -532,8 +534,8 @@ func invokePrepareShutdown(ctx context.Context, method string, parentLogger log.
}

func sendPrepareShutdownRequests(ctx context.Context, logger log.Logger, client httpClient, eps []endpoint) error {
span, ctx := opentracing.StartSpanFromContext(ctx, "admission.sendPrepareShutdownRequests()")
defer span.Finish()
ctx, span := tracer.Start(ctx, "admission.sendPrepareShutdownRequests()", trace.WithAttributes())
defer span.End()

if len(eps) == 0 {
return nil
Expand All @@ -558,8 +560,8 @@ func sendPrepareShutdownRequests(ctx context.Context, logger log.Logger, client

// undoPrepareShutdownRequests sends an HTTP DELETE to each of the given endpoints.
func undoPrepareShutdownRequests(ctx context.Context, logger log.Logger, client httpClient, eps []endpoint) {
span, ctx := opentracing.StartSpanFromContext(ctx, "admission.undoPrepareShutdownRequests()")
defer span.Finish()
ctx, span := tracer.Start(ctx, "admission.undoPrepareShutdownRequests()", trace.WithAttributes())
defer span.End()

if len(eps) == 0 {
return
Expand Down
17 changes: 8 additions & 9 deletions pkg/admission/zone_tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/dskit/spanlogger"
"github.com/opentracing/opentracing-go"
admissionv1 "k8s.io/api/admission/v1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -204,8 +203,8 @@ func (zt *zoneTracker) prepareDownscale(ctx context.Context, l log.Logger, ar ad

// Create the ConfigMap and populate each zone with the current time as a starting point
func (zt *zoneTracker) createConfigMap(ctx context.Context, stsList *appsv1.StatefulSetList) (*corev1.ConfigMap, error) {
span, ctx := opentracing.StartSpanFromContext(ctx, "zoneTracker.createConfigMap()")
defer span.Finish()
ctx, span := tracer.Start(ctx, "zoneTracker.createConfigMap()")
defer span.End()

defaultInfo := &zoneInfo{LastDownscaled: time.Now().UTC().Format(time.RFC3339)}
zones := make(map[string]zoneInfo, len(stsList.Items))
Expand Down Expand Up @@ -237,8 +236,8 @@ func (zt *zoneTracker) createConfigMap(ctx context.Context, stsList *appsv1.Stat

// Get the zoneTracker ConfigMap if it exists, or nil if it does not exist
func (zt *zoneTracker) getConfigMap(ctx context.Context) (*corev1.ConfigMap, error) {
span, ctx := opentracing.StartSpanFromContext(ctx, "zoneTracker.getConfigMap()")
defer span.Finish()
ctx, span := tracer.Start(ctx, "zoneTracker.getConfigMap()")
defer span.End()

cm, err := zt.client.CoreV1().ConfigMaps(zt.namespace).Get(ctx, zt.configMapName, metav1.GetOptions{})
if err != nil {
Expand All @@ -254,8 +253,8 @@ func (zt *zoneTracker) getConfigMap(ctx context.Context) (*corev1.ConfigMap, err

// Get the zoneTracker ConfigMap if it exists, otherwise create it
func (zt *zoneTracker) getOrCreateConfigMap(ctx context.Context, stsList *appsv1.StatefulSetList) (*corev1.ConfigMap, error) {
span, ctx := opentracing.StartSpanFromContext(ctx, "zoneTracker.getOrCreateConfigMap()")
defer span.Finish()
ctx, span := tracer.Start(ctx, "zoneTracker.getOrCreateConfigMap()")
defer span.End()

if cm, err := zt.getConfigMap(ctx); err != nil {
return nil, err
Expand Down Expand Up @@ -288,8 +287,8 @@ func (zt *zoneTracker) loadZones(ctx context.Context, stsList *appsv1.StatefulSe

// Save the zones map to the zoneTracker ConfigMap
func (zt *zoneTracker) saveZones(ctx context.Context) error {
span, ctx := opentracing.StartSpanFromContext(ctx, "zoneTracker.saveZones()")
defer span.Finish()
ctx, span := tracer.Start(ctx, "zoneTracker.saveZones()")
defer span.End()

// Convert the zones map to ConfigMap data
data := make(map[string]string)
Expand Down
8 changes: 5 additions & 3 deletions pkg/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/hashicorp/go-multierror"
"github.com/opentracing/opentracing-go"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"go.opentelemetry.io/otel"
"go.uber.org/atomic"
v1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
Expand All @@ -40,6 +40,8 @@ const (
informerSyncInterval = 5 * time.Minute
)

var tracer = otel.Tracer("pkg/controller")

type httpClient interface {
Do(req *http.Request) (*http.Response, error)
}
Expand Down Expand Up @@ -250,8 +252,8 @@ func (c *RolloutController) enqueueReconcile() {
}

func (c *RolloutController) reconcile(ctx context.Context) error {
span, ctx := opentracing.StartSpanFromContext(ctx, "RolloutController.reconcile()")
defer span.Finish()
ctx, span := tracer.Start(ctx, "RolloutController.reconcile()")
defer span.End()

level.Info(c.logger).Log("msg", "reconcile started")

Expand Down
Loading