diff --git a/README.md b/README.md index 19581f58..ff56cfb3 100644 --- a/README.md +++ b/README.md @@ -197,6 +197,7 @@ Use `UTC`, `Local` or pick a timezone name from the [(IANA) tz database](https:/ | `--dry-run` | don't kill pods, only log what would have been done | true | | `--log-format` | specify the format of the log messages. Options are text and json | text | | `--log-caller` | include the calling function name and location in the log messages | false | +| `--webhook` | filter pods by a POST webhook, if non HTTP 200 returned, exclude pod | no webhook calls | ## Related work diff --git a/chaoskube/chaoskube.go b/chaoskube/chaoskube.go index eaae805a..5912d307 100644 --- a/chaoskube/chaoskube.go +++ b/chaoskube/chaoskube.go @@ -1,9 +1,13 @@ package chaoskube import ( + "bytes" "context" + "encoding/json" "errors" "fmt" + "net/http" + "net/url" "regexp" "time" @@ -67,6 +71,8 @@ type Chaoskube struct { Now func() time.Time MaxKill int + // Webhook + Webhook url.URL } var ( @@ -90,7 +96,7 @@ var ( // * a logger implementing logrus.FieldLogger to send log output to // * what specific terminator to use to imbue chaos on victim pods // * whether to enable/disable dry-run mode -func New(client kubernetes.Interface, labels, annotations, namespaces, namespaceLabels labels.Selector, includedPodNames, excludedPodNames *regexp.Regexp, excludedWeekdays []time.Weekday, excludedTimesOfDay []util.TimePeriod, excludedDaysOfYear []time.Time, timezone *time.Location, minimumAge time.Duration, logger log.FieldLogger, dryRun bool, terminator terminator.Terminator, maxKill int) *Chaoskube { +func New(client kubernetes.Interface, labels, annotations, namespaces, namespaceLabels labels.Selector, includedPodNames, excludedPodNames *regexp.Regexp, excludedWeekdays []time.Weekday, excludedTimesOfDay []util.TimePeriod, excludedDaysOfYear []time.Time, timezone *time.Location, minimumAge time.Duration, logger log.FieldLogger, dryRun bool, terminator terminator.Terminator, maxKill int, Webhook url.URL) *Chaoskube { broadcaster := record.NewBroadcaster() broadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: client.CoreV1().Events(v1.NamespaceAll)}) recorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "chaoskube"}) @@ -114,6 +120,7 @@ func New(client kubernetes.Interface, labels, annotations, namespaces, namespace EventRecorder: recorder, Now: time.Now, MaxKill: maxKill, + Webhook: Webhook, } } @@ -224,6 +231,7 @@ func (c *Chaoskube) Candidates() ([]v1.Pod, error) { pods = filterByMinimumAge(pods, c.MinimumAge, c.Now()) pods = filterByPodName(pods, c.IncludedPodNames, c.ExcludedPodNames) pods = filterByOwnerReference(pods) + pods = filterByWebhook(pods, c.Webhook) return pods, nil } @@ -455,3 +463,32 @@ func filterByOwnerReference(pods []v1.Pod) []v1.Pod { return filteredList } + +// filterByWebhook filters pods by a POST webhook. Only pods where the webhooks returns an +// HTTP 200 are returned +func filterByWebhook(pods []v1.Pod, url url.URL) []v1.Pod { + // return early if url is not given + if url.String() == "" { + return pods + } + + filteredList := []v1.Pod{} + + for _, pod := range pods { + postData := new(bytes.Buffer) + err := json.NewEncoder(postData).Encode(pod) + if err != nil { + continue + } + resp, err := http.Post(url.String(), "application/json", postData) + if err != nil { + continue + } + + if resp.StatusCode == http.StatusOK { + filteredList = append(filteredList, pod) + } + } + + return filteredList +} diff --git a/chaoskube/chaoskube_test.go b/chaoskube/chaoskube_test.go index 970421fb..f355bee2 100644 --- a/chaoskube/chaoskube_test.go +++ b/chaoskube/chaoskube_test.go @@ -3,6 +3,7 @@ package chaoskube import ( "context" "math/rand" + "net/url" "regexp" "testing" "time" @@ -53,7 +54,7 @@ func (suite *Suite) TestNew() { includedPodNames = regexp.MustCompile("foo") excludedPodNames = regexp.MustCompile("bar") excludedWeekdays = []time.Weekday{time.Friday} - excludedTimesOfDay = []util.TimePeriod{util.TimePeriod{}} + excludedTimesOfDay = []util.TimePeriod{{}} excludedDaysOfYear = []time.Time{time.Now()} minimumAge = time.Duration(42) dryRun = true @@ -61,6 +62,7 @@ func (suite *Suite) TestNew() { maxKill = 1 ) + webhook, _ := url.Parse("") chaoskube := New( client, labelSelector, @@ -78,6 +80,7 @@ func (suite *Suite) TestNew() { dryRun, terminator, maxKill, + *webhook, ) suite.Require().NotNil(chaoskube) @@ -115,6 +118,7 @@ func (suite *Suite) TestRunContextCanceled() { false, 10, 1, + url.URL{}, ) ctx, cancel := context.WithCancel(context.Background()) @@ -132,19 +136,24 @@ func (suite *Suite) TestCandidates() { labelSelector string annotationSelector string namespaceSelector string + webhook string pods []map[string]string }{ - {"", "", "", []map[string]string{foo, bar}}, - {"app=foo", "", "", []map[string]string{foo}}, - {"app!=foo", "", "", []map[string]string{bar}}, - {"", "chaos=foo", "", []map[string]string{foo}}, - {"", "chaos!=foo", "", []map[string]string{bar}}, - {"", "", "default", []map[string]string{foo}}, - {"", "", "default,testing", []map[string]string{foo, bar}}, - {"", "", "!testing", []map[string]string{foo}}, - {"", "", "!default,!testing", []map[string]string{}}, - {"", "", "default,!testing", []map[string]string{foo}}, - {"", "", "default,!default", []map[string]string{}}, + {"", "", "", "", []map[string]string{foo, bar}}, + {"app=foo", "", "", "", []map[string]string{foo}}, + {"app!=foo", "", "", "", []map[string]string{bar}}, + {"", "chaos=foo", "", "", []map[string]string{foo}}, + {"", "chaos!=foo", "", "", []map[string]string{bar}}, + {"", "", "default", "", []map[string]string{foo}}, + {"", "", "default,testing", "", []map[string]string{foo, bar}}, + {"", "", "!testing", "", []map[string]string{foo}}, + {"", "", "!default,!testing", "", []map[string]string{}}, + {"", "", "default,!testing", "", []map[string]string{foo}}, + {"", "", "default,!default", "", []map[string]string{}}, + {"", "", "", "https://httpbin.org/status/404", []map[string]string{}}, + {"", "", "", "https://httpbin.org/get", []map[string]string{}}, + {"", "", "", "https://httpbin.org/status/200", []map[string]string{foo, bar}}, + {"", "", "", "https://httpbin.org/post", []map[string]string{foo, bar}}, } { labelSelector, err := labels.Parse(tt.labelSelector) suite.Require().NoError(err) @@ -155,6 +164,9 @@ func (suite *Suite) TestCandidates() { namespaceSelector, err := labels.Parse(tt.namespaceSelector) suite.Require().NoError(err) + webhook, err := url.Parse(tt.webhook) + suite.Require().NoError(err) + chaoskube := suite.setupWithPods( labelSelector, annotationSelector, @@ -169,6 +181,7 @@ func (suite *Suite) TestCandidates() { time.Duration(0), false, 10, + *webhook, ) suite.assertCandidates(chaoskube, tt.pods) @@ -213,6 +226,7 @@ func (suite *Suite) TestCandidatesNamespaceLabels() { time.Duration(0), false, 10, + url.URL{}, ) suite.assertCandidates(chaoskube, tt.pods) @@ -255,6 +269,7 @@ func (suite *Suite) TestCandidatesPodNameRegexp() { time.Duration(0), false, 10, + url.URL{}, ) suite.assertCandidates(chaoskube, tt.pods) @@ -294,6 +309,7 @@ func (suite *Suite) TestVictim() { time.Duration(0), false, 10, + url.URL{}, ) suite.assertVictim(chaoskube, tt.victim) @@ -347,6 +363,7 @@ func (suite *Suite) TestVictims() { false, 10, tt.maxKill, + url.URL{}, ) suite.createPods(chaoskube.Client, podsInfo) @@ -371,6 +388,7 @@ func (suite *Suite) TestNoVictimReturnsError() { false, 10, 1, + url.URL{}, ) _, err := chaoskube.Victims() @@ -404,6 +422,7 @@ func (suite *Suite) TestDeletePod() { time.Duration(0), tt.dryRun, 10, + url.URL{}, ) victim := util.NewPod("default", "foo", v1.PodRunning) @@ -433,6 +452,7 @@ func (suite *Suite) TestDeletePodNotFound() { false, 10, 1, + url.URL{}, ) victim := util.NewPod("default", "foo", v1.PodRunning) @@ -663,6 +683,7 @@ func (suite *Suite) TestTerminateVictim() { time.Duration(0), false, 10, + url.URL{}, ) chaoskube.Now = tt.now @@ -693,6 +714,7 @@ func (suite *Suite) TestTerminateNoVictimLogsInfo() { false, 10, 1, + url.URL{}, ) err := chaoskube.TerminateVictims() @@ -723,7 +745,7 @@ func (suite *Suite) assertVictim(chaoskube *Chaoskube, expected map[string]strin suite.assertVictims(chaoskube, []map[string]string{expected}) } -func (suite *Suite) setupWithPods(labelSelector labels.Selector, annotations labels.Selector, namespaces labels.Selector, namespaceLabels labels.Selector, includedPodNames *regexp.Regexp, excludedPodNames *regexp.Regexp, excludedWeekdays []time.Weekday, excludedTimesOfDay []util.TimePeriod, excludedDaysOfYear []time.Time, timezone *time.Location, minimumAge time.Duration, dryRun bool, gracePeriod time.Duration) *Chaoskube { +func (suite *Suite) setupWithPods(labelSelector labels.Selector, annotations labels.Selector, namespaces labels.Selector, namespaceLabels labels.Selector, includedPodNames *regexp.Regexp, excludedPodNames *regexp.Regexp, excludedWeekdays []time.Weekday, excludedTimesOfDay []util.TimePeriod, excludedDaysOfYear []time.Time, timezone *time.Location, minimumAge time.Duration, dryRun bool, gracePeriod time.Duration, webhook url.URL) *Chaoskube { chaoskube := suite.setup( labelSelector, annotations, @@ -739,6 +761,7 @@ func (suite *Suite) setupWithPods(labelSelector labels.Selector, annotations lab dryRun, gracePeriod, 1, + webhook, ) for _, namespace := range []v1.Namespace{ @@ -774,7 +797,7 @@ func (suite *Suite) createPods(client kubernetes.Interface, podsInfo []podInfo) } } -func (suite *Suite) setup(labelSelector labels.Selector, annotations labels.Selector, namespaces labels.Selector, namespaceLabels labels.Selector, includedPodNames *regexp.Regexp, excludedPodNames *regexp.Regexp, excludedWeekdays []time.Weekday, excludedTimesOfDay []util.TimePeriod, excludedDaysOfYear []time.Time, timezone *time.Location, minimumAge time.Duration, dryRun bool, gracePeriod time.Duration, maxKill int) *Chaoskube { +func (suite *Suite) setup(labelSelector labels.Selector, annotations labels.Selector, namespaces labels.Selector, namespaceLabels labels.Selector, includedPodNames *regexp.Regexp, excludedPodNames *regexp.Regexp, excludedWeekdays []time.Weekday, excludedTimesOfDay []util.TimePeriod, excludedDaysOfYear []time.Time, timezone *time.Location, minimumAge time.Duration, dryRun bool, gracePeriod time.Duration, maxKill int, webhook url.URL) *Chaoskube { logOutput.Reset() client := fake.NewSimpleClientset() @@ -797,6 +820,7 @@ func (suite *Suite) setup(labelSelector labels.Selector, annotations labels.Sele dryRun, terminator.NewDeletePodTerminator(client, nullLogger, gracePeriod), maxKill, + webhook, ) } @@ -901,6 +925,7 @@ func (suite *Suite) TestMinimumAge() { false, 10, 1, + url.URL{}, ) chaoskube.Now = tt.now diff --git a/examples/chaoskube.yaml b/examples/chaoskube.yaml index 25d027ba..0f10f2c7 100644 --- a/examples/chaoskube.yaml +++ b/examples/chaoskube.yaml @@ -38,6 +38,8 @@ spec: - --timezone=UTC # exclude all pods that haven't been running for at least one hour - --minimum-age=1h + # check with a webhook before killing a pod + - --webhook=https://httpbin.org/post # terminate pods for real: this disables dry-run mode which is on by default - --no-dry-run securityContext: diff --git a/main.go b/main.go index 3cf2b45c..3ad523ac 100644 --- a/main.go +++ b/main.go @@ -7,6 +7,7 @@ import ( "math/rand" "net/http" _ "net/http/pprof" + "net/url" "os" "os/signal" "path" @@ -53,6 +54,7 @@ var ( dryRun bool debug bool metricsAddress string + webhook string gracePeriod time.Duration logFormat string logCaller bool @@ -68,6 +70,7 @@ func init() { kingpin.Flag("namespace-labels", "A set of labels to restrict the list of affected namespaces. Defaults to everything.").StringVar(&nsLabelString) kingpin.Flag("included-pod-names", "Regular expression that defines which pods to include. All included by default.").RegexpVar(&includedPodNames) kingpin.Flag("excluded-pod-names", "Regular expression that defines which pods to exclude. None excluded by default.").RegexpVar(&excludedPodNames) + kingpin.Flag("webhook", "An HTTP webhook to execute before killing a pod").StringVar(&webhook) kingpin.Flag("excluded-weekdays", "A list of weekdays when termination is suspended, e.g. Sat,Sun").StringVar(&excludedWeekdays) kingpin.Flag("excluded-times-of-day", "A list of time periods of a day when termination is suspended, e.g. 22:00-08:00").StringVar(&excludedTimesOfDay) kingpin.Flag("excluded-days-of-year", "A list of days of a year when termination is suspended, e.g. Apr1,Dec24").StringVar(&excludedDaysOfYear) @@ -123,6 +126,7 @@ func main() { "metricsAddress": metricsAddress, "gracePeriod": gracePeriod, "logFormat": logFormat, + "webhook": webhook, }).Debug("reading config") log.WithFields(log.Fields{ @@ -185,6 +189,14 @@ func main() { } timezoneName, offset := time.Now().In(parsedTimezone).Zone() + parsedWebhook, err := url.Parse(webhook) + if err != nil { + log.WithFields(log.Fields{ + "webhook": webhook, + "err": err, + }).Fatal("failed to parse webhook") + } + log.WithFields(log.Fields{ "name": timezoneName, "location": parsedTimezone, @@ -208,6 +220,7 @@ func main() { dryRun, terminator.NewDeletePodTerminator(client, log.StandardLogger(), gracePeriod), maxKill, + *parsedWebhook, ) if metricsAddress != "" {