Skip to content

Commit defb431

Browse files
Keep the history of the past 5 triggered runs of DisruptionCron (#817)
* Refactor * WIP not really working. * Working stored history. * Limit history size to 5. * Fix lint. * Update controllers/cron_rollout_helpers.go Co-authored-by: Philip Thompson <philip.thompson@datadoghq.com> * Rename struct as DisruptionCronTrigger. * Address code review comments. --------- Co-authored-by: Philip Thompson <philip.thompson@datadoghq.com>
1 parent 6fa6ef5 commit defb431

File tree

6 files changed

+82
-26
lines changed

6 files changed

+82
-26
lines changed

api/v1beta1/disruption_cron_types.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,4 +83,14 @@ type DisruptionCronStatus struct {
8383
// Time when the target resource was previously missing.
8484
// +nullable
8585
TargetResourcePreviouslyMissing *metav1.Time `json:"targetResourcePreviouslyMissing,omitempty"`
86+
87+
History []DisruptionCronTrigger `json:"history,omitempty"`
88+
}
89+
90+
const MaxHistoryLen = 5
91+
92+
type DisruptionCronTrigger struct {
93+
Name string `json:"name,omitempty"`
94+
Kind string `json:"kind,omitempty"`
95+
CreatedAt metav1.Time `json:"createdAt,omitempty"`
8696
}

api/v1beta1/zz_generated.deepcopy.go

Lines changed: 23 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

chart/templates/generated/chaos.datadoghq.com_disruptioncrons.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,18 @@ spec:
611611
status:
612612
description: DisruptionCronStatus defines the observed state of DisruptionCron
613613
properties:
614+
history:
615+
items:
616+
properties:
617+
createdAt:
618+
format: date-time
619+
type: string
620+
kind:
621+
type: string
622+
name:
623+
type: string
624+
type: object
625+
type: array
614626
lastScheduleTime:
615627
description: The last time when the disruption was last successfully scheduled.
616628
format: date-time

controllers/cron_rollout_helpers.go

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"context"
99
"errors"
1010
"fmt"
11+
"sort"
1112
"time"
1213

1314
chaosv1beta1 "github.com/DataDog/chaos-controller/api/v1beta1"
@@ -174,41 +175,36 @@ func CreateDisruptionFromTemplate(ctx context.Context, cl client.Client, scheme
174175
}
175176

176177
// getScheduledTimeForDisruption returns the scheduled time for a particular disruption.
177-
func getScheduledTimeForDisruption(disruption *chaosv1beta1.Disruption) (*time.Time, error) {
178+
func getScheduledTimeForDisruption(log *zap.SugaredLogger, disruption *chaosv1beta1.Disruption) time.Time {
178179
timeRaw := disruption.Annotations[ScheduledAtAnnotation]
179180
if len(timeRaw) == 0 {
180-
return nil, nil
181+
return time.Time{}
181182
}
182183

183184
timeParsed, err := time.Parse(time.RFC3339, timeRaw)
184185
if err != nil {
185-
return nil, err
186+
log.Errorw("unable to parse schedule time for child disruption", "err", err, "disruptionName", disruption.Name)
187+
return time.Time{}
186188
}
187189

188-
return &timeParsed, nil
190+
return timeParsed
189191
}
190192

191-
// getMostRecentScheduleTime returns the most recent scheduled time from a list of disruptions.
192-
func GetMostRecentScheduleTime(log *zap.SugaredLogger, disruptions *chaosv1beta1.DisruptionList) *time.Time {
193-
var mostRecentScheduleTime *time.Time
193+
// GetMostRecentScheduleTime returns the most recent scheduled time from a list of disruptions.
194+
func GetMostRecentScheduleTime(log *zap.SugaredLogger, disruptions *chaosv1beta1.DisruptionList) time.Time {
195+
length := len(disruptions.Items)
196+
if length == 0 {
197+
return time.Time{}
198+
}
194199

195-
for _, disruption := range disruptions.Items {
196-
scheduledTimeForDisruption, err := getScheduledTimeForDisruption(&disruption)
197-
if err != nil {
198-
log.Errorw("unable to parse schedule time for child disruption", "err", err, "disruption", disruption.Name)
199-
continue
200-
}
200+
sort.Slice(disruptions.Items, func(i, j int) bool {
201+
scheduleTime1 := getScheduledTimeForDisruption(log, &disruptions.Items[i])
202+
scheduleTime2 := getScheduledTimeForDisruption(log, &disruptions.Items[j])
201203

202-
if scheduledTimeForDisruption != nil {
203-
if mostRecentScheduleTime == nil {
204-
mostRecentScheduleTime = scheduledTimeForDisruption
205-
} else if mostRecentScheduleTime.Before(*scheduledTimeForDisruption) {
206-
mostRecentScheduleTime = scheduledTimeForDisruption
207-
}
208-
}
209-
}
204+
return scheduleTime1.Before(scheduleTime2)
205+
})
210206

211-
return mostRecentScheduleTime
207+
return getScheduledTimeForDisruption(log, &disruptions.Items[length-1])
212208
}
213209

214210
// generateDisruptionName produces a disruption name based on the specific CR controller, that's invoking it.

controllers/disruption_cron_controller.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ func (r *DisruptionCronReconciler) Reconcile(ctx context.Context, req ctrl.Reque
5454
return ctrl.Result{}, client.IgnoreNotFound(err)
5555
}
5656

57+
r.log.Infow("fetched last known history", "history", instance.Status.History)
5758
DisruptionCronTags = []string{"disruptionCronName:" + instance.Name, "disruptionCronNamespace:" + instance.Namespace, "targetName:" + instance.Spec.TargetResource.Name}
5859

5960
if !instance.DeletionTimestamp.IsZero() {
@@ -162,6 +163,20 @@ func (r *DisruptionCronReconciler) Reconcile(ctx context.Context, req ctrl.Reque
162163
// Add the start time of the just initiated disruption to the status
163164
instance.Status.LastScheduleTime = &metav1.Time{Time: missedRun}
164165

166+
// Add to history, then ensure only the last MaxHistoryLen items are kept
167+
instance.Status.History = append(instance.Status.History, chaosv1beta1.DisruptionCronTrigger{
168+
Name: instance.ObjectMeta.Name,
169+
Kind: instance.TypeMeta.Kind,
170+
CreatedAt: *instance.Status.LastScheduleTime,
171+
})
172+
173+
if len(instance.Status.History) > chaosv1beta1.MaxHistoryLen {
174+
instance.Status.History = instance.Status.History[len(instance.Status.History)-chaosv1beta1.MaxHistoryLen:]
175+
}
176+
177+
r.log.Debugw("updating instance Status lastScheduleTime and history",
178+
"lastScheduleTime", instance.Status.LastScheduleTime, "history", instance.Status.History)
179+
165180
if err := r.Client.Status().Update(ctx, instance); err != nil {
166181
r.log.Warnw("unable to update LastScheduleTime of DisruptionCron status", "err", err)
167182
return ctrl.Result{}, err
@@ -174,8 +189,8 @@ func (r *DisruptionCronReconciler) Reconcile(ctx context.Context, req ctrl.Reque
174189
// based on the most recent schedule time among the given disruptions.
175190
func (r *DisruptionCronReconciler) updateLastScheduleTime(ctx context.Context, instance *chaosv1beta1.DisruptionCron, disruptions *chaosv1beta1.DisruptionList) error {
176191
mostRecentScheduleTime := GetMostRecentScheduleTime(r.log, disruptions) // find the last run so we can update the status
177-
if mostRecentScheduleTime != nil {
178-
instance.Status.LastScheduleTime = &metav1.Time{Time: *mostRecentScheduleTime}
192+
if !mostRecentScheduleTime.IsZero() {
193+
instance.Status.LastScheduleTime = &metav1.Time{Time: mostRecentScheduleTime}
179194
return r.Client.Status().Update(ctx, instance)
180195
}
181196

controllers/disruption_rollout_controller.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,8 @@ func (r *DisruptionRolloutReconciler) Reconcile(ctx context.Context, req ctrl.Re
172172
// based on the most recent schedule time among the given disruptions.
173173
func (r *DisruptionRolloutReconciler) updateLastScheduleTime(ctx context.Context, instance *chaosv1beta1.DisruptionRollout, disruptions *chaosv1beta1.DisruptionList) error {
174174
mostRecentScheduleTime := GetMostRecentScheduleTime(r.log, disruptions) // find the last run so we can update the status
175-
if mostRecentScheduleTime != nil {
176-
instance.Status.LastScheduleTime = &metav1.Time{Time: *mostRecentScheduleTime}
175+
if !mostRecentScheduleTime.IsZero() {
176+
instance.Status.LastScheduleTime = &metav1.Time{Time: mostRecentScheduleTime}
177177
return r.Client.Status().Update(ctx, instance)
178178
}
179179

0 commit comments

Comments
 (0)