Skip to content

Commit b9cd81d

Browse files
feat(metrics): add ElapsedTimeSinceLastBackup and RPODuration gauges for schedules (#133)
1 parent 6252d73 commit b9cd81d

File tree

2 files changed

+46
-4
lines changed

2 files changed

+46
-4
lines changed

internal/metrics/metrics.go

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,12 @@ type MetricsRegistryImpl struct {
7575
backupsSucceededCount *prometheus.GaugeVec
7676

7777
// schedule metrics
78-
scheduleActionFailedCount *prometheus.CounterVec
79-
scheduleActionSucceededCount *prometheus.CounterVec
80-
scheduleLastBackupTimestamp *prometheus.GaugeVec
81-
scheduleRPOMarginRatio *prometheus.GaugeVec
78+
scheduleActionFailedCount *prometheus.CounterVec
79+
scheduleActionSucceededCount *prometheus.CounterVec
80+
scheduleLastBackupTimestamp *prometheus.GaugeVec
81+
scheduleRPOMarginRatio *prometheus.GaugeVec
82+
scheduleElapsedTimeSinceLastBackup *prometheus.GaugeVec
83+
scheduleRPODuration *prometheus.GaugeVec
8284
}
8385

8486
func (s *MetricsRegistryImpl) ReportHealthCheck() {
@@ -241,6 +243,13 @@ func (s *MetricsRegistryImpl) IncScheduleCounters(schedule *types.BackupSchedule
241243
schedule.ID,
242244
scheduleNameLabel,
243245
).Set(float64(schedule.RecoveryPoint.Unix()))
246+
247+
s.scheduleElapsedTimeSinceLastBackup.WithLabelValues(
248+
schedule.ContainerID,
249+
schedule.DatabaseName,
250+
schedule.ID,
251+
scheduleNameLabel,
252+
).Set(s.clock.Since(*schedule.RecoveryPoint).Seconds())
244253
} else if schedule.Audit != nil && schedule.Audit.CreatedAt != nil {
245254
// Report schedule creation time as last backup time if no backups were made
246255
s.scheduleLastBackupTimestamp.WithLabelValues(
@@ -249,7 +258,24 @@ func (s *MetricsRegistryImpl) IncScheduleCounters(schedule *types.BackupSchedule
249258
schedule.ID,
250259
scheduleNameLabel,
251260
).Set(float64(schedule.Audit.CreatedAt.AsTime().Unix()))
261+
262+
s.scheduleElapsedTimeSinceLastBackup.WithLabelValues(
263+
schedule.ContainerID,
264+
schedule.DatabaseName,
265+
schedule.ID,
266+
scheduleNameLabel,
267+
).Set(s.clock.Since(schedule.Audit.CreatedAt.AsTime()).Seconds())
252268
}
269+
270+
if schedule.ScheduleSettings.RecoveryPointObjective != nil {
271+
s.scheduleRPODuration.WithLabelValues(
272+
schedule.ContainerID,
273+
schedule.DatabaseName,
274+
schedule.ID,
275+
scheduleNameLabel,
276+
).Set(float64(schedule.ScheduleSettings.RecoveryPointObjective.Seconds))
277+
}
278+
253279
info := schedule.GetBackupInfo(s.clock)
254280
if info != nil {
255281
s.scheduleRPOMarginRatio.WithLabelValues(
@@ -392,6 +418,18 @@ func newMetricsRegistry(ctx context.Context, wg *sync.WaitGroup, cfg *config.Met
392418
Help: "if RPO is set for schedule, calculates a ratio to which RPO is satisfied",
393419
}, []string{"container_id", "database", "schedule_id", "schedule_name"})
394420

421+
s.scheduleElapsedTimeSinceLastBackup = promauto.With(s.reg).NewGaugeVec(prometheus.GaugeOpts{
422+
Subsystem: "schedules",
423+
Name: "elapsed_seconds_since_last_backup",
424+
Help: "Amount of time elapsed since last successful backup for this schedule",
425+
}, []string{"container_id", "database", "schedule_id", "schedule_name"})
426+
427+
s.scheduleRPODuration = promauto.With(s.reg).NewGaugeVec(prometheus.GaugeOpts{
428+
Subsystem: "schedules",
429+
Name: "rpo_duration_seconds",
430+
Help: "Maximum length of time permitted, that backup can be restored for this schedule",
431+
}, []string{"container_id", "database", "schedule_id", "schedule_name"})
432+
395433
mux := http.NewServeMux()
396434
mux.Handle("/metrics", promhttp.HandlerFor(s.reg, promhttp.HandlerOpts{Registry: s.reg}))
397435

internal/metrics/metrics_mock.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,16 +113,20 @@ func (s *MockMetricsRegistry) IncScheduleCounters(schedule *types.BackupSchedule
113113
}
114114
if schedule.RecoveryPoint != nil {
115115
s.metrics["schedules_last_backup_timestamp"] = float64(schedule.RecoveryPoint.Unix())
116+
s.metrics["schedules_elapsed_seconds_since_last_backup"] = s.clock.Since(*schedule.RecoveryPoint).Seconds()
116117
if schedule.ScheduleSettings.RecoveryPointObjective != nil {
117118
info := schedule.GetBackupInfo(s.clock)
118119
s.metrics["schedules_rpo_margin_ratio"] = info.LastBackupRpoMarginRatio
120+
s.metrics["schedules_rpo_duration_seconds"] = float64(schedule.ScheduleSettings.RecoveryPointObjective.Seconds)
119121
}
120122
} else if schedule.Audit != nil && schedule.Audit.CreatedAt != nil {
121123
s.metrics["schedules_last_backup_timestamp"] = float64(schedule.Audit.CreatedAt.AsTime().Unix())
124+
s.metrics["schedules_elapsed_seconds_since_last_backup"] = s.clock.Since(schedule.Audit.CreatedAt.AsTime()).Seconds()
122125
if schedule.ScheduleSettings.RecoveryPointObjective != nil {
123126
fakeRpoMargin := s.clock.Since(schedule.Audit.CreatedAt.AsTime())
124127
fakeLastBackupRpoMarginRatio := fakeRpoMargin.Seconds() / float64(schedule.ScheduleSettings.RecoveryPointObjective.Seconds)
125128
s.metrics["schedules_rpo_margin_ratio"] = fakeLastBackupRpoMarginRatio
129+
s.metrics["schedules_rpo_duration_seconds"] = float64(schedule.ScheduleSettings.RecoveryPointObjective.Seconds)
126130
}
127131
}
128132
}

0 commit comments

Comments
 (0)