From 931425981caf5c0622bcdd7c6361722de2fa3998 Mon Sep 17 00:00:00 2001 From: Philip Thompson Date: Fri, 7 Feb 2025 12:55:17 -0500 Subject: [PATCH 1/3] Move cron_rollout_helpers into api/v1beta1 --- .../v1beta1}/cron_rollout_helpers.go | 39 +++++++++---------- controllers/disruption_cron_controller.go | 8 ++-- controllers/disruption_rollout_controller.go | 8 ++-- 3 files changed, 27 insertions(+), 28 deletions(-) rename {controllers => api/v1beta1}/cron_rollout_helpers.go (83%) diff --git a/controllers/cron_rollout_helpers.go b/api/v1beta1/cron_rollout_helpers.go similarity index 83% rename from controllers/cron_rollout_helpers.go rename to api/v1beta1/cron_rollout_helpers.go index cafe62cb2..01a3c2521 100644 --- a/controllers/cron_rollout_helpers.go +++ b/api/v1beta1/cron_rollout_helpers.go @@ -2,7 +2,7 @@ // under the Apache License Version 2.0. // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2025 Datadog, Inc. -package controllers +package v1beta1 import ( "context" @@ -11,7 +11,6 @@ import ( "sort" "time" - chaosv1beta1 "github.com/DataDog/chaos-controller/api/v1beta1" cLog "github.com/DataDog/chaos-controller/log" "go.uber.org/zap" appsv1 "k8s.io/api/apps/v1" @@ -25,14 +24,14 @@ import ( ) const ( - DisruptionCronNameLabel = chaosv1beta1.GroupName + "/disruption-cron-name" - DisruptionRolloutNameLabel = chaosv1beta1.GroupName + "/disruption-rollout-name" + DisruptionCronNameLabel = GroupName + "/disruption-cron-name" + DisruptionRolloutNameLabel = GroupName + "/disruption-rollout-name" ) // GetChildDisruptions retrieves disruptions associated with a resource by its label. // Most of the time, this will return an empty list as disruptions are typically short-lived objects. -func GetChildDisruptions(ctx context.Context, cl client.Client, log *zap.SugaredLogger, namespace, labelKey, labelVal string) (*chaosv1beta1.DisruptionList, error) { - disruptions := &chaosv1beta1.DisruptionList{} +func GetChildDisruptions(ctx context.Context, cl client.Client, log *zap.SugaredLogger, namespace, labelKey, labelVal string) (*DisruptionList, error) { + disruptions := &DisruptionList{} labelSelector := labels.SelectorFromSet(labels.Set{labelKey: labelVal}) if err := cl.List(ctx, disruptions, client.InNamespace(namespace), &client.ListOptions{LabelSelector: labelSelector}); err != nil { @@ -45,7 +44,7 @@ func GetChildDisruptions(ctx context.Context, cl client.Client, log *zap.Sugared // GetTargetResource retrieves the specified target resource (Deployment or StatefulSet). // It returns the target resource object and any error encountered during retrieval. -func GetTargetResource(ctx context.Context, cl client.Client, targetResource *chaosv1beta1.TargetResourceSpec, namespace string) (client.Object, error) { +func GetTargetResource(ctx context.Context, cl client.Client, targetResource *TargetResourceSpec, namespace string) (client.Object, error) { var targetObj client.Object switch targetResource.Kind { @@ -65,7 +64,7 @@ func GetTargetResource(ctx context.Context, cl client.Client, targetResource *ch // CheckTargetResourceExists determines if the target resource exists. // Returns a boolean indicating presence and an error if one occurs. -func CheckTargetResourceExists(ctx context.Context, cl client.Client, targetResource *chaosv1beta1.TargetResourceSpec, namespace string) (bool, error) { +func CheckTargetResourceExists(ctx context.Context, cl client.Client, targetResource *TargetResourceSpec, namespace string) (bool, error) { _, err := GetTargetResource(ctx, cl, targetResource, namespace) if apierrors.IsNotFound(err) { @@ -79,7 +78,7 @@ func CheckTargetResourceExists(ctx context.Context, cl client.Client, targetReso // GetSelectors retrieves the labels of the specified target resource (Deployment or StatefulSet). // Returns a set of labels to be used as Disruption selectors and an error if retrieval fails. -func GetSelectors(ctx context.Context, cl client.Client, targetResource *chaosv1beta1.TargetResourceSpec, namespace string) (labels *metav1.LabelSelector, err error) { +func GetSelectors(ctx context.Context, cl client.Client, targetResource *TargetResourceSpec, namespace string) (labels *metav1.LabelSelector, err error) { targetObj, err := GetTargetResource(ctx, cl, targetResource, namespace) if err != nil { return nil, err @@ -104,10 +103,10 @@ func GetSelectors(ctx context.Context, cl client.Client, targetResource *chaosv1 // createBaseDisruption generates a basic Disruption object using the provided owner and disruptionSpec. // The returned Disruption object has its basic details set, but it's not saved or stored anywhere yet. -func createBaseDisruption(owner metav1.Object, disruptionSpec *chaosv1beta1.DisruptionSpec) *chaosv1beta1.Disruption { +func createBaseDisruption(owner metav1.Object, disruptionSpec *DisruptionSpec) *Disruption { name := generateDisruptionName(owner) - return &chaosv1beta1.Disruption{ + return &Disruption{ ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: owner.GetNamespace(), @@ -121,7 +120,7 @@ func createBaseDisruption(owner metav1.Object, disruptionSpec *chaosv1beta1.Disr // setDisruptionAnnotations updates the annotations of a given Disruption object with those of its owner. // It sets a scheduled time annotation using the provided scheduledTime. // It parses the UserInfo annotation if it exists and sets user-related annotations. -func setDisruptionAnnotations(disruption *chaosv1beta1.Disruption, owner metav1.Object, scheduledTime time.Time) error { +func setDisruptionAnnotations(disruption *Disruption, owner metav1.Object, scheduledTime time.Time) error { disruption.CopyOwnerAnnotations(owner) disruption.SetScheduledAtAnnotation(scheduledTime) @@ -131,7 +130,7 @@ func setDisruptionAnnotations(disruption *chaosv1beta1.Disruption, owner metav1. // overwriteDisruptionSelectors updates the selectors of a given Disruption object based on the provided targetResource. // Returns an error if fetching selectors from the target resource fails. -func overwriteDisruptionSelectors(ctx context.Context, cl client.Client, disruption *chaosv1beta1.Disruption, targetResource *chaosv1beta1.TargetResourceSpec, namespace string) error { +func overwriteDisruptionSelectors(ctx context.Context, cl client.Client, disruption *Disruption, targetResource *TargetResourceSpec, namespace string) error { // Get selectors from target resource selectors, err := GetSelectors(ctx, cl, targetResource, namespace) if err != nil { @@ -156,7 +155,7 @@ func overwriteDisruptionSelectors(ctx context.Context, cl client.Client, disrupt // CreateDisruptionFromTemplate constructs a Disruption object based on the provided owner, disruptionSpec, and targetResource. // The function sets annotations, overwrites selectors, and associates the Disruption with its owner. // It returns the constructed Disruption or an error if any step fails. -func CreateDisruptionFromTemplate(ctx context.Context, cl client.Client, scheme *runtime.Scheme, owner metav1.Object, targetResource *chaosv1beta1.TargetResourceSpec, disruptionSpec *chaosv1beta1.DisruptionSpec, scheduledTime time.Time, log *zap.SugaredLogger) (*chaosv1beta1.Disruption, error) { +func CreateDisruptionFromTemplate(ctx context.Context, cl client.Client, scheme *runtime.Scheme, owner metav1.Object, targetResource *TargetResourceSpec, disruptionSpec *DisruptionSpec, scheduledTime time.Time, log *zap.SugaredLogger) (*Disruption, error) { disruption := createBaseDisruption(owner, disruptionSpec) ownerNameLabel := getOwnerNameLabel(owner) @@ -178,7 +177,7 @@ func CreateDisruptionFromTemplate(ctx context.Context, cl client.Client, scheme } // getScheduledTimeForDisruption returns the scheduled time for a particular disruption. -func getScheduledTimeForDisruption(log *zap.SugaredLogger, disruption *chaosv1beta1.Disruption) time.Time { +func getScheduledTimeForDisruption(log *zap.SugaredLogger, disruption *Disruption) time.Time { parsedTime, err := disruption.GetScheduledAtAnnotation() if err != nil { log.Errorw("unable to parse schedule time for child disruption", "err", err, cLog.DisruptionNameKey, disruption.Name) @@ -189,7 +188,7 @@ func getScheduledTimeForDisruption(log *zap.SugaredLogger, disruption *chaosv1be } // GetMostRecentScheduleTime returns the most recent scheduled time from a list of disruptions. -func GetMostRecentScheduleTime(log *zap.SugaredLogger, disruptions *chaosv1beta1.DisruptionList) time.Time { +func GetMostRecentScheduleTime(log *zap.SugaredLogger, disruptions *DisruptionList) time.Time { length := len(disruptions.Items) if length == 0 { return time.Time{} @@ -209,9 +208,9 @@ func GetMostRecentScheduleTime(log *zap.SugaredLogger, disruptions *chaosv1beta1 // It returns a formatted string name. func generateDisruptionName(owner metav1.Object) string { switch typedOwner := owner.(type) { - case *chaosv1beta1.DisruptionCron: + case *DisruptionCron: return fmt.Sprintf("disruption-cron-%s", typedOwner.GetName()) - case *chaosv1beta1.DisruptionRollout: + case *DisruptionRollout: return fmt.Sprintf("disruption-rollout-%s", typedOwner.GetName()) } @@ -222,9 +221,9 @@ func generateDisruptionName(owner metav1.Object) string { // It returns the label string. func getOwnerNameLabel(owner metav1.Object) string { switch owner.(type) { - case *chaosv1beta1.DisruptionCron: + case *DisruptionCron: return DisruptionCronNameLabel - case *chaosv1beta1.DisruptionRollout: + case *DisruptionRollout: return DisruptionRolloutNameLabel } diff --git a/controllers/disruption_cron_controller.go b/controllers/disruption_cron_controller.go index 300a1c161..c5c0150df 100644 --- a/controllers/disruption_cron_controller.go +++ b/controllers/disruption_cron_controller.go @@ -111,7 +111,7 @@ func (r *DisruptionCronReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{}, nil } - disruptions, err := GetChildDisruptions(ctx, r.Client, r.log, instance.Namespace, DisruptionCronNameLabel, instance.Name) + disruptions, err := chaosv1beta1.GetChildDisruptions(ctx, r.Client, r.log, instance.Namespace, chaosv1beta1.DisruptionCronNameLabel, instance.Name) if err != nil { return ctrl.Result{}, nil } @@ -179,7 +179,7 @@ func (r *DisruptionCronReconciler) Reconcile(ctx context.Context, req ctrl.Reque r.log.Infow("processing current run", "currentRun", missedRun.Format(time.UnixDate)) // Create disruption for current run - disruption, err := CreateDisruptionFromTemplate(ctx, r.Client, r.Scheme, instance, &instance.Spec.TargetResource, &instance.Spec.DisruptionTemplate, missedRun, r.log) + disruption, err := chaosv1beta1.CreateDisruptionFromTemplate(ctx, r.Client, r.Scheme, instance, &instance.Spec.TargetResource, &instance.Spec.DisruptionTemplate, missedRun, r.log) if err != nil { r.log.Warnw("unable to construct disruption from template", "err", err) @@ -230,7 +230,7 @@ func (r *DisruptionCronReconciler) Reconcile(ctx context.Context, req ctrl.Reque // updateLastScheduleTime updates the LastScheduleTime in the status of a DisruptionCron instance // based on the most recent schedule time among the given disruptions. func (r *DisruptionCronReconciler) updateLastScheduleTime(ctx context.Context, instance *chaosv1beta1.DisruptionCron, disruptions *chaosv1beta1.DisruptionList) error { - mostRecentScheduleTime := GetMostRecentScheduleTime(r.log, disruptions) // find the last run so we can update the status + mostRecentScheduleTime := chaosv1beta1.GetMostRecentScheduleTime(r.log, disruptions) // find the last run so we can update the status if !mostRecentScheduleTime.IsZero() { instance.Status.LastScheduleTime = &metav1.Time{Time: mostRecentScheduleTime} return r.Client.Status().Update(ctx, instance) @@ -246,7 +246,7 @@ func (r *DisruptionCronReconciler) updateLastScheduleTime(ctx context.Context, i // - error: Represents any error that occurred during the execution of the function. func (r *DisruptionCronReconciler) updateTargetResourcePreviouslyMissing(ctx context.Context, instance *chaosv1beta1.DisruptionCron) (bool, bool, error) { disruptionCronDeleted := false - targetResourceExists, err := CheckTargetResourceExists(ctx, r.Client, &instance.Spec.TargetResource, instance.Namespace) + targetResourceExists, err := chaosv1beta1.CheckTargetResourceExists(ctx, r.Client, &instance.Spec.TargetResource, instance.Namespace) if err != nil { return targetResourceExists, disruptionCronDeleted, err diff --git a/controllers/disruption_rollout_controller.go b/controllers/disruption_rollout_controller.go index f36f562c8..e8042c4df 100644 --- a/controllers/disruption_rollout_controller.go +++ b/controllers/disruption_rollout_controller.go @@ -77,7 +77,7 @@ func (r *DisruptionRolloutReconciler) Reconcile(ctx context.Context, req ctrl.Re return ctrl.Result{}, nil } - disruptions, err := GetChildDisruptions(ctx, r.Client, r.log, instance.Namespace, DisruptionRolloutNameLabel, instance.Name) + disruptions, err := chaosv1beta1.GetChildDisruptions(ctx, r.Client, r.log, instance.Namespace, chaosv1beta1.DisruptionRolloutNameLabel, instance.Name) if err != nil { return ctrl.Result{}, nil } @@ -138,7 +138,7 @@ func (r *DisruptionRolloutReconciler) Reconcile(ctx context.Context, req ctrl.Re // Create disruption scheduledTime := time.Now() - disruption, err := CreateDisruptionFromTemplate(ctx, r.Client, r.Scheme, instance, &instance.Spec.TargetResource, &instance.Spec.DisruptionTemplate, scheduledTime, r.log) + disruption, err := chaosv1beta1.CreateDisruptionFromTemplate(ctx, r.Client, r.Scheme, instance, &instance.Spec.TargetResource, &instance.Spec.DisruptionTemplate, scheduledTime, r.log) if err != nil { r.log.Warnw("unable to construct disruption from template", "err", err) @@ -173,7 +173,7 @@ func (r *DisruptionRolloutReconciler) Reconcile(ctx context.Context, req ctrl.Re // updateLastScheduleTime updates the LastScheduleTime in the status of a DisruptionRollout instance // based on the most recent schedule time among the given disruptions. func (r *DisruptionRolloutReconciler) updateLastScheduleTime(ctx context.Context, instance *chaosv1beta1.DisruptionRollout, disruptions *chaosv1beta1.DisruptionList) error { - mostRecentScheduleTime := GetMostRecentScheduleTime(r.log, disruptions) // find the last run so we can update the status + mostRecentScheduleTime := chaosv1beta1.GetMostRecentScheduleTime(r.log, disruptions) // find the last run so we can update the status if !mostRecentScheduleTime.IsZero() { instance.Status.LastScheduleTime = &metav1.Time{Time: mostRecentScheduleTime} return r.Client.Status().Update(ctx, instance) @@ -189,7 +189,7 @@ func (r *DisruptionRolloutReconciler) updateLastScheduleTime(ctx context.Context // - error: Represents any error that occurred during the execution of the function. func (r *DisruptionRolloutReconciler) updateTargetResourcePreviouslyMissing(ctx context.Context, instance *chaosv1beta1.DisruptionRollout) (bool, bool, error) { disruptionRolloutDeleted := false - targetResourceExists, err := CheckTargetResourceExists(ctx, r.Client, &instance.Spec.TargetResource, instance.Namespace) + targetResourceExists, err := chaosv1beta1.CheckTargetResourceExists(ctx, r.Client, &instance.Spec.TargetResource, instance.Namespace) if err != nil { return targetResourceExists, disruptionRolloutDeleted, err From 73ccb72a7ec89b4493a6252855211d220ef3bb68 Mon Sep 17 00:00:00 2001 From: Philip Thompson Date: Fri, 7 Feb 2025 12:49:32 -0500 Subject: [PATCH 2/3] Block DisruptionCrons from being created if the target does not exist --- api/v1beta1/disruption_cron_webhook.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/api/v1beta1/disruption_cron_webhook.go b/api/v1beta1/disruption_cron_webhook.go index 7f397a5c4..ac751395b 100644 --- a/api/v1beta1/disruption_cron_webhook.go +++ b/api/v1beta1/disruption_cron_webhook.go @@ -6,6 +6,7 @@ package v1beta1 import ( + "context" "encoding/json" "errors" "fmt" @@ -15,6 +16,7 @@ import ( cLog "github.com/DataDog/chaos-controller/log" "github.com/DataDog/chaos-controller/o11y/metrics" "github.com/DataDog/chaos-controller/utils" + "github.com/robfig/cron" "go.uber.org/zap" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -121,6 +123,22 @@ func (d *DisruptionCron) ValidateCreate() (_ admission.Warnings, err error) { return nil, err } + var exists bool + + // CheckTargetResourceExists doesn't return apierrors.NotFound. Which means if there is an error, + // we could not determine if the target existed, and should allow the Create. + if exists, err = CheckTargetResourceExists(context.Background(), k8sClient, &d.Spec.TargetResource, d.Namespace); err != nil { + log.Errorw("error checking if target resource exists", "error", err) + } else if !exists { + log.Warnw("rejecting disruption cron because target does not exist", + "targetName", d.Spec.TargetResource.Name, + "targetKind", d.Spec.TargetResource.Kind, + "error", err) + + return nil, fmt.Errorf("rejecting disruption cron because target %s %s/%s does not exist", + d.Spec.TargetResource.Kind, d.Namespace, d.Spec.TargetResource.Name) + } + // send informative event to disruption cron to broadcast d.emitEvent(EventDisruptionCronCreated) From dbf1997206b373145942977e9d7a350221995fbc Mon Sep 17 00:00:00 2001 From: Philip Thompson Date: Mon, 10 Mar 2025 10:07:03 +0100 Subject: [PATCH 3/3] Add config for requiring dicron target --- api/v1beta1/cron_rollout_helpers.go | 2 ++ api/v1beta1/disruption_cron_webhook.go | 32 +++++++++++++++----------- chart/templates/configmap.yaml | 1 + chart/values.yaml | 1 + config/config.go | 13 ++++++++--- main.go | 1 + utils/utils.go | 1 + 7 files changed, 34 insertions(+), 17 deletions(-) diff --git a/api/v1beta1/cron_rollout_helpers.go b/api/v1beta1/cron_rollout_helpers.go index 01a3c2521..4d56c2cd5 100644 --- a/api/v1beta1/cron_rollout_helpers.go +++ b/api/v1beta1/cron_rollout_helpers.go @@ -2,6 +2,7 @@ // under the Apache License Version 2.0. // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2025 Datadog, Inc. + package v1beta1 import ( @@ -12,6 +13,7 @@ import ( "time" cLog "github.com/DataDog/chaos-controller/log" + "go.uber.org/zap" appsv1 "k8s.io/api/apps/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" diff --git a/api/v1beta1/disruption_cron_webhook.go b/api/v1beta1/disruption_cron_webhook.go index ac751395b..142e22a47 100644 --- a/api/v1beta1/disruption_cron_webhook.go +++ b/api/v1beta1/disruption_cron_webhook.go @@ -41,6 +41,7 @@ var ( disruptionCronPermittedUserGroups map[string]struct{} disruptionCronPermittedUserGroupString string disruptionCronMetricsSink metrics.Sink + requireDisruptionCronTarget bool defaultCronDelayedStartTolerance time.Duration minimumCronFrequency time.Duration ) @@ -64,6 +65,7 @@ func (d *DisruptionCron) SetupWebhookWithManager(setupWebhookConfig utils.SetupW defaultCronDelayedStartTolerance = setupWebhookConfig.DefaultCronDelayedStartTolerance minimumCronFrequency = setupWebhookConfig.MinimumCronFrequency defaultDuration = setupWebhookConfig.DefaultDurationFlag + requireDisruptionCronTarget = setupWebhookConfig.RequireDisruptionCronTarget return ctrl.NewWebhookManagedBy(setupWebhookConfig.Manager). For(d). @@ -123,20 +125,22 @@ func (d *DisruptionCron) ValidateCreate() (_ admission.Warnings, err error) { return nil, err } - var exists bool - - // CheckTargetResourceExists doesn't return apierrors.NotFound. Which means if there is an error, - // we could not determine if the target existed, and should allow the Create. - if exists, err = CheckTargetResourceExists(context.Background(), k8sClient, &d.Spec.TargetResource, d.Namespace); err != nil { - log.Errorw("error checking if target resource exists", "error", err) - } else if !exists { - log.Warnw("rejecting disruption cron because target does not exist", - "targetName", d.Spec.TargetResource.Name, - "targetKind", d.Spec.TargetResource.Kind, - "error", err) - - return nil, fmt.Errorf("rejecting disruption cron because target %s %s/%s does not exist", - d.Spec.TargetResource.Kind, d.Namespace, d.Spec.TargetResource.Name) + if requireDisruptionCronTarget { + var exists bool + + // CheckTargetResourceExists doesn't return apierrors.NotFound. Which means if there is an error, + // we could not determine if the target existed, and should allow the Create. + if exists, err = CheckTargetResourceExists(context.Background(), k8sClient, &d.Spec.TargetResource, d.Namespace); err != nil { + log.Errorw("error checking if target resource exists", "error", err) + } else if !exists { + log.Warnw("rejecting disruption cron because target does not exist", + "targetName", d.Spec.TargetResource.Name, + "targetKind", d.Spec.TargetResource.Kind, + "error", err) + + return nil, fmt.Errorf("rejecting disruption cron because target %s %s/%s does not exist", + d.Spec.TargetResource.Kind, d.Namespace, d.Spec.TargetResource.Name) + } } // send informative event to disruption cron to broadcast diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index fade27de1..297aaee3a 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -84,6 +84,7 @@ data: allowNodeLevel: {{ .Values.controller.safeMode.allowNodeLevel }} allowNodeFailure: {{ .Values.controller.safeMode.allowNodeFailure }} disruptionCronEnabled: {{ .Values.controller.disruptionCronEnabled }} + requireDisruptionCronTarget: {{ .Values.controller.requireDisruptionCronTarget | default false }} disruptionRolloutEnabled: {{ .Values.controller.disruptionRolloutEnabled }} disruptionDeletionTimeout: {{ .Values.controller.disruptionDeletionTimeout }} disabledDisruptions: diff --git a/chart/values.yaml b/chart/values.yaml index 4491cdfe7..057898c23 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -94,6 +94,7 @@ controller: memory: 300Mi ephemeralStorage: 1Gi disruptionCronEnabled: true + requireDisruptionCronTarget: false # If set to true, disruption crons will be rejected on create if we cannot find their target disruptionRolloutEnabled: false disruptionDeletionTimeout: 15m # The duration after which a disruption will be marked as "stuck on removal" if its removal process exceeds this duration. aggregateToClusterRole: false # If this is true two aggregated cluster roles are created for viewing and editing (https://kubernetes.io/docs/reference/access-authn-authz/rbac/#aggregated-clusterroles) diff --git a/config/config.go b/config/config.go index db7a57c0e..2253d4d63 100644 --- a/config/config.go +++ b/config/config.go @@ -11,6 +11,9 @@ import ( "os" "time" + cloudtypes "github.com/DataDog/chaos-controller/cloudservice/types" + "github.com/DataDog/chaos-controller/eventnotifier" + "github.com/cenkalti/backoff" "github.com/fsnotify/fsnotify" "github.com/spf13/pflag" @@ -20,9 +23,6 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" corev1client "k8s.io/client-go/kubernetes/typed/core/v1" - - cloudtypes "github.com/DataDog/chaos-controller/cloudservice/types" - "github.com/DataDog/chaos-controller/eventnotifier" ) type config struct { @@ -52,6 +52,7 @@ type controllerConfig struct { ProfilerSink string `json:"profilerSink" yaml:"profilerSink"` TracerSink string `json:"tracerSink" yaml:"tracerSink"` DisruptionCronEnabled bool `json:"disruptionCronEnabled" yaml:"disruptionCronEnabled"` + RequireDisruptionCronTarget bool `json:"requireDisruptionCronTarget" yaml:"requireDisruptionCronTarget"` DisruptionRolloutEnabled bool `json:"disruptionRolloutEnabled" yaml:"disruptionRolloutEnabled"` DisruptionDeletionTimeout time.Duration `json:"disruptionDeletionTimeout" yaml:"disruptionDeletionTimeout"` FinalizerDeletionDelay time.Duration `json:"finalizerDeletionDelay" yaml:"finalizerDeletionDelay"` @@ -552,6 +553,12 @@ func New(client corev1client.ConfigMapInterface, logger *zap.SugaredLogger, osAr return cfg, err } + mainFS.BoolVar(&cfg.Controller.RequireDisruptionCronTarget, "require-disruption-cron-target", false, "Reject disruption crons on create if their target cannot be found") + + if err := viper.BindPFlag("controller.requireDisruptionCronTarget", mainFS.Lookup("require-disruption-cron-target")); err != nil { + return cfg, err + } + mainFS.BoolVar(&cfg.Controller.DisruptionRolloutEnabled, "disruption-rollout-enabled", false, "Enable the DisruptionRollout CRD and its controller") if err := viper.BindPFlag("controller.disruptionRolloutEnabled", mainFS.Lookup("disruption-rollout-enabled")); err != nil { diff --git a/main.go b/main.go index 3553afe72..0a72a3516 100644 --- a/main.go +++ b/main.go @@ -410,6 +410,7 @@ func main() { DefaultCronDelayedStartTolerance: cfg.Controller.DefaultCronDelayedStartTolerance, MinimumCronFrequency: cfg.Controller.MinimumCronFrequency, DefaultDurationFlag: cfg.Controller.DefaultDuration, + RequireDisruptionCronTarget: cfg.Controller.RequireDisruptionCronTarget, MetricsSink: disruptionCronMetricsSink, } diff --git a/utils/utils.go b/utils/utils.go index 635963ee0..078dad0a2 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -50,6 +50,7 @@ type SetupWebhookWithManagerConfig struct { EnableSafemodeFlag bool AllowNodeLevel bool AllowNodeFailure bool + RequireDisruptionCronTarget bool DisabledDisruptions []string DeleteOnlyFlag bool HandlerEnabledFlag bool