@@ -3,6 +3,7 @@ package status
3
3
import (
4
4
"context"
5
5
"fmt"
6
+ "time"
6
7
7
8
"github.com/awslabs/operatorpkg/object"
8
9
"github.com/prometheus/client_golang/prometheus"
@@ -12,6 +13,7 @@ import (
12
13
"k8s.io/client-go/tools/record"
13
14
controllerruntime "sigs.k8s.io/controller-runtime"
14
15
"sigs.k8s.io/controller-runtime/pkg/client"
16
+ "sigs.k8s.io/controller-runtime/pkg/controller"
15
17
"sigs.k8s.io/controller-runtime/pkg/manager"
16
18
"sigs.k8s.io/controller-runtime/pkg/metrics"
17
19
"sigs.k8s.io/controller-runtime/pkg/reconcile"
@@ -24,6 +26,7 @@ const (
24
26
MetricLabelName = "name"
25
27
MetricLabelConditionType = "type"
26
28
MetricLabelConditionStatus = "status"
29
+ MetricLabelConditionReason = "reason"
27
30
)
28
31
29
32
const (
@@ -45,9 +48,10 @@ func NewController[T Object](client client.Client, eventRecorder record.EventRec
45
48
}
46
49
}
47
50
48
- func (c * Controller [T ]) Register (ctx context.Context , m manager.Manager ) error {
51
+ func (c * Controller [T ]) Register (_ context.Context , m manager.Manager ) error {
49
52
return controllerruntime .NewControllerManagedBy (m ).
50
53
For (object .New [T ]()).
54
+ WithOptions (controller.Options {MaxConcurrentReconciles : 10 }).
51
55
Named ("status" ).
52
56
Complete (c )
53
57
}
@@ -61,8 +65,14 @@ func (c *Controller[T]) Reconcile(ctx context.Context, req reconcile.Request) (r
61
65
ConditionCount .DeletePartialMatch (prometheus.Labels {
62
66
MetricLabelGroup : gvk .Group ,
63
67
MetricLabelKind : gvk .Kind ,
64
- MetricLabelNamespace : string (req .Namespace ),
65
- MetricLabelName : string (req .Name ),
68
+ MetricLabelNamespace : req .Namespace ,
69
+ MetricLabelName : req .Name ,
70
+ })
71
+ ConditionCurrentStatusSeconds .DeletePartialMatch (prometheus.Labels {
72
+ MetricLabelGroup : gvk .Group ,
73
+ MetricLabelKind : gvk .Kind ,
74
+ MetricLabelNamespace : req .Namespace ,
75
+ MetricLabelName : req .Name ,
66
76
})
67
77
return reconcile.Result {}, nil
68
78
}
@@ -78,21 +88,41 @@ func (c *Controller[T]) Reconcile(ctx context.Context, req reconcile.Request) (r
78
88
ConditionCount .With (prometheus.Labels {
79
89
MetricLabelGroup : gvk .Group ,
80
90
MetricLabelKind : gvk .Kind ,
81
- MetricLabelNamespace : string ( req .Namespace ) ,
82
- MetricLabelName : string ( req .Name ) ,
83
- MetricLabelConditionType : string ( condition .Type ) ,
91
+ MetricLabelNamespace : req .Namespace ,
92
+ MetricLabelName : req .Name ,
93
+ MetricLabelConditionType : condition .Type ,
84
94
MetricLabelConditionStatus : string (condition .Status ),
95
+ MetricLabelConditionReason : condition .Reason ,
85
96
}).Set (1 )
97
+ ConditionCurrentStatusSeconds .With (prometheus.Labels {
98
+ MetricLabelGroup : gvk .Group ,
99
+ MetricLabelKind : gvk .Kind ,
100
+ MetricLabelNamespace : req .Namespace ,
101
+ MetricLabelName : req .Name ,
102
+ MetricLabelConditionType : condition .Type ,
103
+ MetricLabelConditionStatus : string (condition .Status ),
104
+ MetricLabelConditionReason : condition .Reason ,
105
+ }).Set (time .Since (condition .LastTransitionTime .Time ).Seconds ())
86
106
}
87
107
for _ , observedCondition := range observedConditions .List () {
88
108
if currentCondition := currentConditions .Get (observedCondition .Type ); currentCondition == nil || currentCondition .Status != observedCondition .Status {
89
109
ConditionCount .Delete (prometheus.Labels {
90
110
MetricLabelGroup : gvk .Group ,
91
111
MetricLabelKind : gvk .Kind ,
92
- MetricLabelNamespace : string ( req .Namespace ) ,
93
- MetricLabelName : string ( req .Name ) ,
94
- MetricLabelConditionType : string ( observedCondition .Type ) ,
112
+ MetricLabelNamespace : req .Namespace ,
113
+ MetricLabelName : req .Name ,
114
+ MetricLabelConditionType : observedCondition .Type ,
95
115
MetricLabelConditionStatus : string (observedCondition .Status ),
116
+ MetricLabelConditionReason : observedCondition .Reason ,
117
+ })
118
+ ConditionCurrentStatusSeconds .Delete (prometheus.Labels {
119
+ MetricLabelGroup : gvk .Group ,
120
+ MetricLabelKind : gvk .Kind ,
121
+ MetricLabelNamespace : req .Namespace ,
122
+ MetricLabelName : req .Name ,
123
+ MetricLabelConditionType : observedCondition .Type ,
124
+ MetricLabelConditionStatus : string (observedCondition .Status ),
125
+ MetricLabelConditionReason : observedCondition .Reason ,
96
126
})
97
127
}
98
128
}
@@ -114,25 +144,36 @@ func (c *Controller[T]) Reconcile(ctx context.Context, req reconcile.Request) (r
114
144
// time, and our likelyhood of observing this is much higher.
115
145
for _ , condition := range currentConditions .List () {
116
146
observedCondition := observedConditions .Get (condition .Type )
117
- if observedCondition == nil || observedCondition .GetStatus () == condition .GetStatus () {
147
+ if observedCondition .GetStatus () == condition .GetStatus () {
148
+ continue
149
+ }
150
+ // A condition transitions if it either didn't exist before or it has changed
151
+ ConditionTransitionsTotal .With (prometheus.Labels {
152
+ MetricLabelGroup : gvk .Group ,
153
+ MetricLabelKind : gvk .Kind ,
154
+ MetricLabelConditionType : condition .Type ,
155
+ MetricLabelConditionStatus : string (condition .Status ),
156
+ MetricLabelConditionReason : condition .Reason ,
157
+ }).Inc ()
158
+ if observedCondition == nil {
118
159
continue
119
160
}
120
161
duration := condition .LastTransitionTime .Time .Sub (observedCondition .LastTransitionTime .Time ).Seconds ()
121
162
ConditionDuration .With (prometheus.Labels {
122
163
MetricLabelGroup : gvk .Group ,
123
164
MetricLabelKind : gvk .Kind ,
124
- MetricLabelConditionType : string ( observedCondition .Type ) ,
165
+ MetricLabelConditionType : observedCondition .Type ,
125
166
MetricLabelConditionStatus : string (observedCondition .Status ),
126
- }).Observe (float64 ( duration ) )
127
- c .eventRecorder .Event (o , v1 .EventTypeNormal , string ( condition .Type ) , fmt .Sprintf ("Status condition transitioned, Type: %s, Status: %s -> %s, Reason: %s%s" ,
167
+ }).Observe (duration )
168
+ c .eventRecorder .Event (o , v1 .EventTypeNormal , condition .Type , fmt .Sprintf ("Status condition transitioned, Type: %s, Status: %s -> %s, Reason: %s%s" ,
128
169
condition .Type ,
129
170
observedCondition .Status ,
130
171
condition .Status ,
131
172
condition .Reason ,
132
173
lo .Ternary (condition .Message != "" , fmt .Sprintf (", Message: %s" , condition .Message ), "" ),
133
174
))
134
175
}
135
- return reconcile.Result {}, nil
176
+ return reconcile.Result {RequeueAfter : time . Second * 10 }, nil
136
177
}
137
178
138
179
// Cardinality is limited to # objects * # conditions * # objectives
@@ -166,12 +207,53 @@ var ConditionCount = prometheus.NewGaugeVec(
166
207
MetricLabelKind ,
167
208
MetricLabelConditionType ,
168
209
MetricLabelConditionStatus ,
210
+ MetricLabelConditionReason ,
211
+ },
212
+ )
213
+
214
+ // Cardinality is limited to # objects * # conditions
215
+ // NOTE: This metric is based on a requeue so it won't show the current status seconds with extremely high accuracy.
216
+ // This metric is useful for aggreations. If you need a high accuracy metric, use operator_status_condition_last_transition_time_seconds
217
+ var ConditionCurrentStatusSeconds = prometheus .NewGaugeVec (
218
+ prometheus.GaugeOpts {
219
+ Namespace : MetricNamespace ,
220
+ Subsystem : MetricSubsystem ,
221
+ Name : "current_status_seconds" ,
222
+ Help : "The current amount of time in seconds that a status condition has been in a specific state. Alarm := P99(Updated=Unknown) > 5 minutes" ,
223
+ },
224
+ []string {
225
+ MetricLabelNamespace ,
226
+ MetricLabelName ,
227
+ MetricLabelGroup ,
228
+ MetricLabelKind ,
229
+ MetricLabelConditionType ,
230
+ MetricLabelConditionStatus ,
231
+ MetricLabelConditionReason ,
232
+ },
233
+ )
234
+
235
+ // Cardinality is limited to # objects * # conditions
236
+ var ConditionTransitionsTotal = prometheus .NewCounterVec (
237
+ prometheus.CounterOpts {
238
+ Namespace : MetricNamespace ,
239
+ Subsystem : MetricSubsystem ,
240
+ Name : "transitions_total" ,
241
+ Help : "The count of transitions of a given object, type and status." ,
242
+ },
243
+ []string {
244
+ MetricLabelGroup ,
245
+ MetricLabelKind ,
246
+ MetricLabelConditionType ,
247
+ MetricLabelConditionStatus ,
248
+ MetricLabelConditionReason ,
169
249
},
170
250
)
171
251
172
252
func init () {
173
253
metrics .Registry .MustRegister (
174
254
ConditionCount ,
175
255
ConditionDuration ,
256
+ ConditionTransitionsTotal ,
257
+ ConditionCurrentStatusSeconds ,
176
258
)
177
259
}
0 commit comments