Skip to content

Commit 5dacc78

Browse files
Allow additional Prometheus labels based on Kubernetes labels (#109)
1 parent 8fb6254 commit 5dacc78

File tree

2 files changed

+111
-18
lines changed

2 files changed

+111
-18
lines changed

status/controller.go

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828

2929
type Controller[T Object] struct {
3030
gvk schema.GroupVersionKind
31+
additionalMetricLabels []string
3132
kubeClient client.Client
3233
eventRecorder record.EventRecorder
3334
observedConditions sync.Map // map[reconcile.Request]ConditionSet
@@ -51,12 +52,19 @@ type Option struct {
5152
// - operator_termination_current_time_seconds
5253
// - operator_termination_duration_seconds
5354
EmitDeprecatedMetrics bool
55+
MetricLabels []string
5456
}
5557

5658
func EmitDeprecatedMetrics(o *Option) {
5759
o.EmitDeprecatedMetrics = true
5860
}
5961

62+
func WithLabels(labels ...string) func(*Option) {
63+
return func(o *Option) {
64+
o.MetricLabels = append(o.MetricLabels, labels...)
65+
}
66+
}
67+
6068
func NewController[T Object](client client.Client, eventRecorder record.EventRecorder, opts ...option.Function[Option]) *Controller[T] {
6169
options := option.Resolve(opts...)
6270
obj := reflect.New(reflect.TypeOf(*new(T)).Elem()).Interface().(runtime.Object)
@@ -65,15 +73,16 @@ func NewController[T Object](client client.Client, eventRecorder record.EventRec
6573

6674
return &Controller[T]{
6775
gvk: gvk,
76+
additionalMetricLabels: options.MetricLabels,
6877
kubeClient: client,
6978
eventRecorder: eventRecorder,
7079
emitDeprecatedMetrics: options.EmitDeprecatedMetrics,
71-
ConditionDuration: conditionDurationMetric(strings.ToLower(gvk.Kind)),
72-
ConditionCount: conditionCountMetric(strings.ToLower(gvk.Kind)),
73-
ConditionCurrentStatusSeconds: conditionCurrentStatusSecondsMetric(strings.ToLower(gvk.Kind)),
74-
ConditionTransitionsTotal: conditionTransitionsTotalMetric(strings.ToLower(gvk.Kind)),
75-
TerminationCurrentTimeSeconds: terminationCurrentTimeSecondsMetric(strings.ToLower(gvk.Kind)),
76-
TerminationDuration: terminationDurationMetric(strings.ToLower(gvk.Kind)),
80+
ConditionDuration: conditionDurationMetric(strings.ToLower(gvk.Kind), lo.Map(options.MetricLabels, func(k string, _ int) string { return toPrometheusLabel(k) })...),
81+
ConditionCount: conditionCountMetric(strings.ToLower(gvk.Kind), lo.Map(options.MetricLabels, func(k string, _ int) string { return toPrometheusLabel(k) })...),
82+
ConditionCurrentStatusSeconds: conditionCurrentStatusSecondsMetric(strings.ToLower(gvk.Kind), lo.Map(options.MetricLabels, func(k string, _ int) string { return toPrometheusLabel(k) })...),
83+
ConditionTransitionsTotal: conditionTransitionsTotalMetric(strings.ToLower(gvk.Kind), lo.Map(options.MetricLabels, func(k string, _ int) string { return toPrometheusLabel(k) })...),
84+
TerminationCurrentTimeSeconds: terminationCurrentTimeSecondsMetric(strings.ToLower(gvk.Kind), lo.Map(options.MetricLabels, func(k string, _ int) string { return toPrometheusLabel(k) })...),
85+
TerminationDuration: terminationDurationMetric(strings.ToLower(gvk.Kind), lo.Map(options.MetricLabels, func(k string, _ int) string { return toPrometheusLabel(k) })...),
7786
}
7887
}
7988

@@ -111,6 +120,18 @@ func (c *GenericObjectController[T]) Reconcile(ctx context.Context, req reconcil
111120
return c.reconcile(ctx, req, NewUnstructuredAdapter[T](object.New[T]()))
112121
}
113122

123+
func (c *Controller[T]) toAdditionalMetricLabels(obj Object) map[string]string {
124+
return lo.SliceToMap(c.additionalMetricLabels, func(label string) (string, string) { return toPrometheusLabel(label), obj.GetLabels()[label] })
125+
}
126+
127+
func toPrometheusLabel(k string) string {
128+
unsupportedChars := []string{"/", "."}
129+
for _, char := range unsupportedChars {
130+
k = strings.ReplaceAll(k, char, "_")
131+
}
132+
return k
133+
}
134+
114135
func (c *Controller[T]) reconcile(ctx context.Context, req reconcile.Request, o Object) (reconcile.Result, error) {
115136
if err := c.kubeClient.Get(ctx, req.NamespacedName, o); err != nil {
116137
if errors.IsNotFound(err) {
@@ -127,7 +148,7 @@ func (c *Controller[T]) reconcile(ctx context.Context, req reconcile.Request, o
127148
MetricLabelName: req.Name,
128149
})
129150
if deletionTS, ok := c.terminatingObjects.Load(req); ok {
130-
c.observeHistogram(c.TerminationDuration, TerminationDuration, time.Since(deletionTS.(*metav1.Time).Time).Seconds(), map[string]string{})
151+
c.observeHistogram(c.TerminationDuration, TerminationDuration, time.Since(deletionTS.(*metav1.Time).Time).Seconds(), c.toAdditionalMetricLabels(o))
131152
}
132153
if finalizers, ok := c.observedFinalizers.LoadAndDelete(req); ok {
133154
for _, finalizer := range finalizers.([]string) {
@@ -148,10 +169,10 @@ func (c *Controller[T]) reconcile(ctx context.Context, req reconcile.Request, o
148169
}
149170

150171
if o.GetDeletionTimestamp() != nil {
151-
c.setGaugeMetric(c.TerminationCurrentTimeSeconds, TerminationCurrentTimeSeconds, time.Since(o.GetDeletionTimestamp().Time).Seconds(), map[string]string{
172+
c.setGaugeMetric(c.TerminationCurrentTimeSeconds, TerminationCurrentTimeSeconds, time.Since(o.GetDeletionTimestamp().Time).Seconds(), lo.Assign(map[string]string{
152173
MetricLabelNamespace: req.Namespace,
153174
MetricLabelName: req.Name,
154-
})
175+
}, c.toAdditionalMetricLabels(o)))
155176
c.terminatingObjects.Store(req, o.GetDeletionTimestamp())
156177
}
157178

@@ -164,20 +185,20 @@ func (c *Controller[T]) reconcile(ctx context.Context, req reconcile.Request, o
164185
c.observedConditions.Store(req, currentConditions)
165186

166187
for _, condition := range o.GetConditions() {
167-
c.setGaugeMetric(c.ConditionCount, ConditionCount, 1, map[string]string{
188+
c.setGaugeMetric(c.ConditionCount, ConditionCount, 1, lo.Assign(map[string]string{
168189
MetricLabelNamespace: req.Namespace,
169190
MetricLabelName: req.Name,
170191
pmetrics.LabelType: condition.Type,
171192
MetricLabelConditionStatus: string(condition.Status),
172193
pmetrics.LabelReason: condition.Reason,
173-
})
174-
c.setGaugeMetric(c.ConditionCurrentStatusSeconds, ConditionCurrentStatusSeconds, time.Since(condition.LastTransitionTime.Time).Seconds(), map[string]string{
194+
}, c.toAdditionalMetricLabels(o)))
195+
c.setGaugeMetric(c.ConditionCurrentStatusSeconds, ConditionCurrentStatusSeconds, time.Since(condition.LastTransitionTime.Time).Seconds(), lo.Assign(map[string]string{
175196
MetricLabelNamespace: req.Namespace,
176197
MetricLabelName: req.Name,
177198
pmetrics.LabelType: condition.Type,
178199
MetricLabelConditionStatus: string(condition.Status),
179200
pmetrics.LabelReason: condition.Reason,
180-
})
201+
}, c.toAdditionalMetricLabels(o)))
181202
}
182203

183204
for _, observedCondition := range observedConditions.List() {
@@ -220,19 +241,19 @@ func (c *Controller[T]) reconcile(ctx context.Context, req reconcile.Request, o
220241
continue
221242
}
222243
// A condition transitions if it either didn't exist before or it has changed
223-
c.incCounterMetric(c.ConditionTransitionsTotal, ConditionTransitionsTotal, map[string]string{
244+
c.incCounterMetric(c.ConditionTransitionsTotal, ConditionTransitionsTotal, lo.Assign(map[string]string{
224245
pmetrics.LabelType: condition.Type,
225246
MetricLabelConditionStatus: string(condition.Status),
226247
pmetrics.LabelReason: condition.Reason,
227-
})
248+
}, c.toAdditionalMetricLabels(o)))
228249
if observedCondition == nil {
229250
continue
230251
}
231252
duration := condition.LastTransitionTime.Time.Sub(observedCondition.LastTransitionTime.Time).Seconds()
232-
c.observeHistogram(c.ConditionDuration, ConditionDuration, duration, map[string]string{
253+
c.observeHistogram(c.ConditionDuration, ConditionDuration, duration, lo.Assign(map[string]string{
233254
pmetrics.LabelType: observedCondition.Type,
234255
MetricLabelConditionStatus: string(observedCondition.Status),
235-
})
256+
}, c.toAdditionalMetricLabels(o)))
236257
c.eventRecorder.Event(o, v1.EventTypeNormal, condition.Type, fmt.Sprintf("Status condition transitioned, Type: %s, Status: %s -> %s, Reason: %s%s",
237258
condition.Type,
238259
observedCondition.Status,
@@ -263,7 +284,7 @@ func (c *Controller[T]) setGaugeMetric(current pmetrics.GaugeMetric, deprecated
263284
}
264285

265286
func (c *Controller[T]) deleteGaugeMetric(current pmetrics.GaugeMetric, deprecated pmetrics.GaugeMetric, labels map[string]string) {
266-
current.Delete(labels)
287+
current.DeletePartialMatch(labels)
267288
if c.emitDeprecatedMetrics {
268289
labels[pmetrics.LabelKind] = c.gvk.Kind
269290
labels[pmetrics.LabelGroup] = c.gvk.Group

status/controller_test.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import (
1212
. "github.com/awslabs/operatorpkg/test/expectations"
1313
. "github.com/onsi/ginkgo/v2"
1414
. "github.com/onsi/gomega"
15+
"github.com/prometheus/client_golang/prometheus"
16+
"github.com/samber/lo"
1517
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1618
"k8s.io/apimachinery/pkg/runtime/schema"
1719
"k8s.io/client-go/kubernetes/scheme"
@@ -25,6 +27,11 @@ import (
2527
var ctx context.Context
2628
var recorder *record.FakeRecorder
2729
var kubeClient client.Client
30+
var registry = metrics.Registry
31+
32+
var _ = BeforeEach(func() {
33+
metrics.Registry = registry // reset the registry to handle cases where the registry is overridden
34+
})
2835

2936
var _ = AfterEach(func() {
3037
status.ConditionDuration.Reset()
@@ -556,6 +563,71 @@ var _ = Describe("Controller", func() {
556563
Expect(GetMetric("operator_customobject_status_condition_transitions_total", conditionLabels(ConditionTypeBar, metav1.ConditionFalse))).To(BeNil())
557564
Expect(GetMetric("operator_customobject_status_condition_transitions_total", conditionLabels(ConditionTypeBar, metav1.ConditionUnknown))).To(BeNil())
558565
})
566+
It("should add labels to metrics when using WithLabels", func() {
567+
metrics.Registry = prometheus.NewRegistry()
568+
controller = status.NewController[*test.CustomObject](kubeClient, recorder, status.WithLabels("operator.pkg/key1", "operator.pkg/key2"))
569+
testObject := test.Object(&test.CustomObject{
570+
ObjectMeta: metav1.ObjectMeta{
571+
Labels: map[string]string{
572+
"operator.pkg/key1": "value1",
573+
"operator.pkg/key2": "value2",
574+
},
575+
},
576+
})
577+
ExpectApplied(ctx, kubeClient, testObject)
578+
ExpectReconciled(ctx, controller, testObject)
579+
580+
// Transition Foo
581+
time.Sleep(time.Second * 1)
582+
testObject.StatusConditions().SetTrue(test.ConditionTypeFoo)
583+
ExpectApplied(ctx, kubeClient, testObject)
584+
ExpectReconciled(ctx, controller, testObject)
585+
ExpectStatusConditions(ctx, kubeClient, FastTimeout, testObject, status.Condition{Type: test.ConditionTypeFoo, Status: metav1.ConditionTrue})
586+
587+
// Ready Condition
588+
Expect(GetMetric("operator_customobject_status_condition_count", lo.Assign(conditionLabels(status.ConditionReady, metav1.ConditionTrue), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
589+
Expect(GetMetric("operator_customobject_status_condition_count", lo.Assign(conditionLabels(status.ConditionReady, metav1.ConditionFalse), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
590+
Expect(GetMetric("operator_customobject_status_condition_count", lo.Assign(conditionLabels(status.ConditionReady, metav1.ConditionUnknown), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"})).GetGauge().GetValue()).To(BeEquivalentTo(1))
591+
Expect(GetMetric("operator_customobject_status_condition_current_status_seconds", lo.Assign(conditionLabels(status.ConditionReady, metav1.ConditionTrue)), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"})).To(BeNil())
592+
Expect(GetMetric("operator_customobject_status_condition_current_status_seconds", lo.Assign(conditionLabels(status.ConditionReady, metav1.ConditionFalse)), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"})).To(BeNil())
593+
Expect(GetMetric("operator_customobject_status_condition_current_status_seconds", lo.Assign(conditionLabels(status.ConditionReady, metav1.ConditionUnknown), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"})).GetGauge().GetValue()).ToNot(BeZero())
594+
595+
// Foo Condition
596+
Expect(GetMetric("operator_customobject_status_condition_count", lo.Assign(conditionLabels(ConditionTypeFoo, metav1.ConditionTrue), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"})).GetGauge().GetValue()).To(BeEquivalentTo(1))
597+
Expect(GetMetric("operator_customobject_status_condition_count", lo.Assign(conditionLabels(ConditionTypeFoo, metav1.ConditionFalse), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
598+
Expect(GetMetric("operator_customobject_status_condition_count", lo.Assign(conditionLabels(ConditionTypeFoo, metav1.ConditionUnknown), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
599+
Expect(GetMetric("operator_customobject_status_condition_current_status_seconds", lo.Assign(conditionLabels(ConditionTypeFoo, metav1.ConditionTrue), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).ToNot(BeZero())
600+
Expect(GetMetric("operator_customobject_status_condition_current_status_seconds", lo.Assign(conditionLabels(ConditionTypeFoo, metav1.ConditionFalse), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
601+
Expect(GetMetric("operator_customobject_status_condition_current_status_seconds", lo.Assign(conditionLabels(ConditionTypeFoo, metav1.ConditionUnknown), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
602+
603+
// Bar Condition
604+
Expect(GetMetric("operator_customobject_status_condition_count", lo.Assign(conditionLabels(ConditionTypeBar, metav1.ConditionTrue), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
605+
Expect(GetMetric("operator_customobject_status_condition_count", lo.Assign(conditionLabels(ConditionTypeBar, metav1.ConditionFalse), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
606+
Expect(GetMetric("operator_customobject_status_condition_count", lo.Assign(conditionLabels(ConditionTypeBar, metav1.ConditionUnknown), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"})).GetGauge().GetValue()).To(BeEquivalentTo(1))
607+
Expect(GetMetric("operator_customobject_status_condition_current_status_seconds", lo.Assign(conditionLabels(ConditionTypeBar, metav1.ConditionTrue), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
608+
Expect(GetMetric("operator_customobject_status_condition_current_status_seconds", lo.Assign(conditionLabels(ConditionTypeBar, metav1.ConditionFalse), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
609+
Expect(GetMetric("operator_customobject_status_condition_current_status_seconds", lo.Assign(conditionLabels(ConditionTypeBar, metav1.ConditionUnknown), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"})).GetGauge().GetValue()).ToNot(BeZero())
610+
611+
Expect(GetMetric("operator_customobject_status_condition_transition_seconds", lo.Assign(conditionLabels(status.ConditionReady, metav1.ConditionTrue), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
612+
Expect(GetMetric("operator_customobject_status_condition_transition_seconds", lo.Assign(conditionLabels(status.ConditionReady, metav1.ConditionFalse), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
613+
Expect(GetMetric("operator_customobject_status_condition_transition_seconds", lo.Assign(conditionLabels(status.ConditionReady, metav1.ConditionUnknown), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
614+
Expect(GetMetric("operator_customobject_status_condition_transition_seconds", lo.Assign(conditionLabels(ConditionTypeFoo, metav1.ConditionTrue), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
615+
Expect(GetMetric("operator_customobject_status_condition_transition_seconds", lo.Assign(conditionLabels(ConditionTypeFoo, metav1.ConditionFalse), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
616+
Expect(GetMetric("operator_customobject_status_condition_transition_seconds", lo.Assign(conditionLabels(ConditionTypeFoo, metav1.ConditionUnknown), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"})).GetHistogram().GetSampleCount()).To(BeNumerically(">", 0))
617+
Expect(GetMetric("operator_customobject_status_condition_transition_seconds", lo.Assign(conditionLabels(ConditionTypeBar, metav1.ConditionTrue), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
618+
Expect(GetMetric("operator_customobject_status_condition_transition_seconds", lo.Assign(conditionLabels(ConditionTypeBar, metav1.ConditionFalse), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
619+
Expect(GetMetric("operator_customobject_status_condition_transition_seconds", lo.Assign(conditionLabels(ConditionTypeBar, metav1.ConditionUnknown), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
620+
621+
Expect(GetMetric("operator_customobject_status_condition_transitions_total", lo.Assign(conditionLabels(status.ConditionReady, metav1.ConditionTrue), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
622+
Expect(GetMetric("operator_customobject_status_condition_transitions_total", lo.Assign(conditionLabels(status.ConditionReady, metav1.ConditionFalse), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
623+
Expect(GetMetric("operator_customobject_status_condition_transitions_total", lo.Assign(conditionLabels(status.ConditionReady, metav1.ConditionUnknown), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
624+
Expect(GetMetric("operator_customobject_status_condition_transitions_total", lo.Assign(conditionLabels(ConditionTypeFoo, metav1.ConditionTrue), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"})).GetCounter().GetValue()).To(BeEquivalentTo(1))
625+
Expect(GetMetric("operator_customobject_status_condition_transitions_total", lo.Assign(conditionLabels(ConditionTypeFoo, metav1.ConditionFalse), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
626+
Expect(GetMetric("operator_customobject_status_condition_transitions_total", lo.Assign(conditionLabels(ConditionTypeFoo, metav1.ConditionUnknown), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
627+
Expect(GetMetric("operator_customobject_status_condition_transitions_total", lo.Assign(conditionLabels(ConditionTypeBar, metav1.ConditionTrue), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
628+
Expect(GetMetric("operator_customobject_status_condition_transitions_total", lo.Assign(conditionLabels(ConditionTypeBar, metav1.ConditionFalse), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
629+
Expect(GetMetric("operator_customobject_status_condition_transitions_total", lo.Assign(conditionLabels(ConditionTypeBar, metav1.ConditionUnknown), map[string]string{"operator_pkg_key1": "value1", "operator_pkg_key2": "value2"}))).To(BeNil())
630+
})
559631
})
560632

561633
var _ = Describe("Generic Controller", func() {

0 commit comments

Comments
 (0)