Skip to content

Commit e9dd2ce

Browse files
authored
CHAOS-232: Moving chaos pod logic from the chaos controller to a dedicated service (#764)
* Reducing the responsibility of the disruption controller by moving the logic of chaos pods into a dedicated service called ChaosPodService. * Refactoring cloud service manager to be able to mock it. * refactor: use context from the controller * fix: chaos pod service unit test + wrong behavior * refacto: wrong usage of logs labels Jira: CHAOS-232
1 parent 3ca0133 commit e9dd2ce

31 files changed

+4883
-1386
lines changed

.vendor.mockery.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ inpackage: False
1212
# If you wish to mock an interface from the vendor, you need to define both the package and the specific interface you want to mock.
1313

1414
packages:
15+
net/http:
16+
interfaces:
17+
RoundTripper:
18+
config:
19+
mockname: RoundTripperMock
1520
sigs.k8s.io/controller-runtime/pkg/controller:
1621
interfaces:
1722
Controller:
@@ -22,6 +27,9 @@ packages:
2227
Reader:
2328
config:
2429
mockname: ReaderMock
30+
Client:
31+
config:
32+
mockname: K8SClientMock
2533
k8s.io/client-go/tools/record:
2634
interfaces:
2735
EventRecorder:

api/v1beta1/disk_failure_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ var _ = Describe("DiskFailureSpec", func() {
4141

4242
// Assert
4343
Expect(err).To(HaveOccurred())
44-
Expect(err.Error()).Should(Equal(expectedError))
44+
Expect(err).To(MatchError(expectedError))
4545
},
4646
Entry("with a path exceeding 62 characters",
4747
DiskFailureSpec{

api/v1beta1/disruption_types.go

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"github.com/DataDog/chaos-controller/utils"
2525
"github.com/hashicorp/go-multierror"
2626
corev1 "k8s.io/api/core/v1"
27+
apierrors "k8s.io/apimachinery/pkg/api/errors"
2728
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2829
"k8s.io/apimachinery/pkg/labels"
2930
"k8s.io/apimachinery/pkg/selection"
@@ -93,6 +94,14 @@ type DisruptionTriggers struct {
9394
CreatePods DisruptionTrigger `json:"createPods,omitempty"`
9495
}
9596

97+
type TerminationStatus uint8
98+
99+
const (
100+
TSNotTerminated TerminationStatus = iota
101+
TSTemporarilyTerminated
102+
TSDefinitivelyTerminated
103+
)
104+
96105
func (dt DisruptionTriggers) IsZero() bool {
97106
return dt.Inject.IsZero() && dt.CreatePods.IsZero()
98107
}
@@ -248,6 +257,137 @@ type Disruption struct {
248257
Status DisruptionStatus `json:"status,omitempty"`
249258
}
250259

260+
// TimeToInject calculates the time at which the disruption should be injected based on its own creationTimestamp.
261+
// It considers the specified triggers for injection timing in the disruption's specification.
262+
func (r *Disruption) TimeToInject() time.Time {
263+
triggers := r.Spec.Triggers
264+
265+
if triggers.IsZero() {
266+
return r.CreationTimestamp.Time
267+
}
268+
269+
if triggers.Inject.IsZero() {
270+
return r.TimeToCreatePods()
271+
}
272+
273+
var notInjectedBefore time.Time
274+
275+
// validation should have already prevented a situation where both Offset and NotBefore are set
276+
if !triggers.Inject.NotBefore.IsZero() {
277+
notInjectedBefore = triggers.Inject.NotBefore.Time
278+
}
279+
280+
if triggers.Inject.Offset.Duration() > 0 {
281+
// We measure the offset from the latter of two timestamps: creationTimestamp of the disruption, and spec.trigger.createPods
282+
notInjectedBefore = r.TimeToCreatePods().Add(triggers.Inject.Offset.Duration())
283+
}
284+
285+
if r.CreationTimestamp.Time.After(notInjectedBefore) {
286+
return r.CreationTimestamp.Time
287+
}
288+
289+
return notInjectedBefore
290+
}
291+
292+
// TimeToCreatePods takes the DisruptionTriggers field from a Disruption spec, along with the time.Time at which that disruption was created
293+
// It returns the earliest time.Time at which the chaos-controller should begin creating chaos pods, given the specified DisruptionTriggers
294+
func (r *Disruption) TimeToCreatePods() time.Time {
295+
triggers := r.Spec.Triggers
296+
297+
if triggers.IsZero() {
298+
return r.CreationTimestamp.Time
299+
}
300+
301+
if triggers.CreatePods.IsZero() {
302+
return r.CreationTimestamp.Time
303+
}
304+
305+
var noPodsBefore time.Time
306+
307+
// validation should have already prevented a situation where both Offset and NotBefore are set
308+
if !triggers.CreatePods.NotBefore.IsZero() {
309+
noPodsBefore = triggers.CreatePods.NotBefore.Time
310+
}
311+
312+
if triggers.CreatePods.Offset.Duration() > 0 {
313+
noPodsBefore = r.CreationTimestamp.Add(triggers.CreatePods.Offset.Duration())
314+
}
315+
316+
if r.CreationTimestamp.After(noPodsBefore) {
317+
return r.CreationTimestamp.Time
318+
}
319+
320+
return noPodsBefore
321+
}
322+
323+
// RemainingDuration return the remaining duration of the disruption.
324+
func (r *Disruption) RemainingDuration() time.Duration {
325+
return r.calculateDeadline(
326+
r.Spec.Duration.Duration(),
327+
r.TimeToInject(),
328+
)
329+
}
330+
331+
func (r *Disruption) calculateDeadline(duration time.Duration, creationTime time.Time) time.Duration {
332+
// first we must calculate the timeout from when the disruption was created, not from now
333+
timeout := creationTime.Add(duration)
334+
now := time.Now() // rather not take the risk that the time changes by a second during this function
335+
336+
// return the number of seconds between now and the deadline
337+
return timeout.Sub(now)
338+
}
339+
340+
// TerminationStatus determines the termination status of a disruption based on various factors.
341+
func (r *Disruption) TerminationStatus(chaosPods []corev1.Pod) TerminationStatus {
342+
// a not yet created disruption is neither temporarily nor definitively ended
343+
if r.CreationTimestamp.IsZero() {
344+
return TSNotTerminated
345+
}
346+
347+
// a definitive state (expired duration or deletion) imply a definitively deleted injection
348+
// and should be returned prior to a temporarily terminated state
349+
if r.RemainingDuration() <= 0 || !r.DeletionTimestamp.IsZero() {
350+
return TSDefinitivelyTerminated
351+
}
352+
353+
if len(chaosPods) == 0 {
354+
// we were never injected, we are hence not terminated if we reach here
355+
if r.Status.InjectionStatus.NeverInjected() {
356+
return TSNotTerminated
357+
}
358+
359+
// we were injected before hence temporarily not terminated
360+
return TSTemporarilyTerminated
361+
}
362+
363+
// if all pods exited successfully, we can consider the disruption is ended already
364+
// it can be caused by either an appromixative date sync (in a distributed infra it's hard)
365+
// or by deletion of targets leading to deletion of injectors
366+
// injection terminated with an error are considered NOT terminated
367+
for _, chaosPod := range chaosPods {
368+
for _, containerStatuses := range chaosPod.Status.ContainerStatuses {
369+
if containerStatuses.State.Terminated == nil || containerStatuses.State.Terminated.ExitCode != 0 {
370+
return TSNotTerminated
371+
}
372+
}
373+
}
374+
375+
// this MIGHT be a temporary status, that could become definitive once disruption is expired or deleted
376+
return TSTemporarilyTerminated
377+
}
378+
379+
// GetTargetsCountAsInt This function returns a scaled value from the spec.Count IntOrString type. If the count
380+
// // is a percentage string value it's treated as a percentage and scaled appropriately
381+
// // in accordance to the total, if it's an int value it's treated as a simple value and
382+
// // if it is a string value which is either non-numeric or numeric but lacking a trailing '%' it returns an error.
383+
func (r *Disruption) GetTargetsCountAsInt(targetTotal int, roundUp bool) (int, error) {
384+
if r.Spec.Count == nil {
385+
return 0, apierrors.NewBadRequest("nil value for IntOrString")
386+
}
387+
388+
return intstr.GetScaledValueFromIntOrPercent(r.Spec.Count, targetTotal, roundUp)
389+
}
390+
251391
// +kubebuilder:object:root=true
252392

253393
// DisruptionList contains a list of Disruption

0 commit comments

Comments
 (0)