@@ -75,10 +75,12 @@ type MetricsRegistryImpl struct {
75
75
backupsSucceededCount * prometheus.GaugeVec
76
76
77
77
// schedule metrics
78
- scheduleActionFailedCount * prometheus.CounterVec
79
- scheduleActionSucceededCount * prometheus.CounterVec
80
- scheduleLastBackupTimestamp * prometheus.GaugeVec
81
- scheduleRPOMarginRatio * prometheus.GaugeVec
78
+ scheduleActionFailedCount * prometheus.CounterVec
79
+ scheduleActionSucceededCount * prometheus.CounterVec
80
+ scheduleLastBackupTimestamp * prometheus.GaugeVec
81
+ scheduleRPOMarginRatio * prometheus.GaugeVec
82
+ scheduleElapsedTimeSinceLastBackup * prometheus.GaugeVec
83
+ scheduleRPODuration * prometheus.GaugeVec
82
84
}
83
85
84
86
func (s * MetricsRegistryImpl ) ReportHealthCheck () {
@@ -241,6 +243,13 @@ func (s *MetricsRegistryImpl) IncScheduleCounters(schedule *types.BackupSchedule
241
243
schedule .ID ,
242
244
scheduleNameLabel ,
243
245
).Set (float64 (schedule .RecoveryPoint .Unix ()))
246
+
247
+ s .scheduleElapsedTimeSinceLastBackup .WithLabelValues (
248
+ schedule .ContainerID ,
249
+ schedule .DatabaseName ,
250
+ schedule .ID ,
251
+ scheduleNameLabel ,
252
+ ).Set (s .clock .Since (* schedule .RecoveryPoint ).Seconds ())
244
253
} else if schedule .Audit != nil && schedule .Audit .CreatedAt != nil {
245
254
// Report schedule creation time as last backup time if no backups were made
246
255
s .scheduleLastBackupTimestamp .WithLabelValues (
@@ -249,7 +258,24 @@ func (s *MetricsRegistryImpl) IncScheduleCounters(schedule *types.BackupSchedule
249
258
schedule .ID ,
250
259
scheduleNameLabel ,
251
260
).Set (float64 (schedule .Audit .CreatedAt .AsTime ().Unix ()))
261
+
262
+ s .scheduleElapsedTimeSinceLastBackup .WithLabelValues (
263
+ schedule .ContainerID ,
264
+ schedule .DatabaseName ,
265
+ schedule .ID ,
266
+ scheduleNameLabel ,
267
+ ).Set (s .clock .Since (schedule .Audit .CreatedAt .AsTime ()).Seconds ())
252
268
}
269
+
270
+ if schedule .ScheduleSettings .RecoveryPointObjective != nil {
271
+ s .scheduleRPODuration .WithLabelValues (
272
+ schedule .ContainerID ,
273
+ schedule .DatabaseName ,
274
+ schedule .ID ,
275
+ scheduleNameLabel ,
276
+ ).Set (float64 (schedule .ScheduleSettings .RecoveryPointObjective .Seconds ))
277
+ }
278
+
253
279
info := schedule .GetBackupInfo (s .clock )
254
280
if info != nil {
255
281
s .scheduleRPOMarginRatio .WithLabelValues (
@@ -392,6 +418,18 @@ func newMetricsRegistry(ctx context.Context, wg *sync.WaitGroup, cfg *config.Met
392
418
Help : "if RPO is set for schedule, calculates a ratio to which RPO is satisfied" ,
393
419
}, []string {"container_id" , "database" , "schedule_id" , "schedule_name" })
394
420
421
+ s .scheduleElapsedTimeSinceLastBackup = promauto .With (s .reg ).NewGaugeVec (prometheus.GaugeOpts {
422
+ Subsystem : "schedules" ,
423
+ Name : "elapsed_seconds_since_last_backup" ,
424
+ Help : "Amount of time elapsed since last successful backup for this schedule" ,
425
+ }, []string {"container_id" , "database" , "schedule_id" , "schedule_name" })
426
+
427
+ s .scheduleRPODuration = promauto .With (s .reg ).NewGaugeVec (prometheus.GaugeOpts {
428
+ Subsystem : "schedules" ,
429
+ Name : "rpo_duration_seconds" ,
430
+ Help : "Maximum length of time permitted, that backup can be restored for this schedule" ,
431
+ }, []string {"container_id" , "database" , "schedule_id" , "schedule_name" })
432
+
395
433
mux := http .NewServeMux ()
396
434
mux .Handle ("/metrics" , promhttp .HandlerFor (s .reg , promhttp.HandlerOpts {Registry : s .reg }))
397
435
0 commit comments