Skip to content

Commit 29becc3

Browse files
committed
fix(chart): default prometheusrules
1 parent e2ed0bc commit 29becc3

File tree

3 files changed

+33
-8
lines changed

3 files changed

+33
-8
lines changed

chart/saml-exporter/README.md

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,24 @@ To install the chart with the release name `saml-exporter`:
1010
helm upgrade saml-exporter --install oci://ghcr.io/doodlescheduling/charts/saml-exporter
1111
```
1212

13-
This command deploys the MongoDB Exporter with the default configuration. The [configuration](#configuration) section lists the parameters that can be configured during installation.
13+
This command deploys the SAML Exporter with the default configuration. The [configuration](#configuration) section lists the parameters that can be configured during installation.
1414

1515
## Using the Chart
1616

1717
The chart comes with a ServiceMonitor (or PodMonitor) for use with the [Prometheus Operator](https://github.com/helm/charts/tree/master/stable/prometheus-operator).
18-
If you're not using the Prometheus Operator, you can disable the ServiceMonitor by setting `serviceMonitor.enabled` to `false` and instead
19-
populate the `podAnnotations` as below:
18+
The chart also bundles sane default PrometheusRules which alert regarding invalid metadata, expiring certificates and unavailable endpoints.
19+
Prometheus Operator support is disabled by default but may be enabled using:
20+
21+
```
22+
podMonitor:
23+
enabled: true
24+
25+
prometheusRule:
26+
enabled: true
27+
```
28+
29+
30+
If you're not using the Prometheus Operator you may add pod annotations for scraping:
2031

2132
```yaml
2233
podAnnotations:

chart/saml-exporter/templates/prometheusrule.yaml

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
{{- $rulePrefix:= .Values.prometheusRule.rulePrefix }}
33
apiVersion: monitoring.coreos.com/v1
44
kind: PrometheusRule
5-
metadata:
65
metadata:
76
name: {{ template "saml-exporter.fullname" . }}
87
labels:
@@ -32,7 +31,7 @@ spec:
3231
- name: saml-exporter.rules
3332
rules:
3433
{{- if .Values.prometheusRule.alertOnReadErrors }}
35-
- alert: '{{ printf "%s %s" $rulePrefix "X509CertificateReadErrors" | trim }}'
34+
- alert: '{{ printf "%s%s" $rulePrefix "X509CertificateReadErrors" | trim }}'
3635
expr: delta(saml_x509_read_errors[15m]) > 0
3736
for: 5m
3837
labels:
@@ -48,7 +47,7 @@ spec:
4847
{{- end }}
4948
{{- end }}
5049
{{- if .Values.prometheusRule.alertOnMetadataErrors }}
51-
- alert: '{{ printf "%s %s" $rulePrefix "MetadataError" | trim }}'
50+
- alert: '{{ printf "%s%s" $rulePrefix "MetadataError" | trim }}'
5251
expr: delta(saml_metadata_errors[15m]) > 0
5352
for: 5m
5453
labels:
@@ -63,7 +62,7 @@ spec:
6362
{{- toYaml .Values.prometheusRule.alertExtraAnnotations | nindent 8 }}
6463
{{- end }}
6564
{{- end }}
66-
- alert: '{{ printf "%s %s" $rulePrefix "X509CertificateRenewal" | trim }}'
65+
- alert: '{{ printf "%s%s" $rulePrefix "X509CertificateRenewal" | trim }}'
6766
expr: count(saml_x509_cert_not_after) by (entityid, use) - count(((saml_x509_cert_not_after - time()) / 86400) < {{ .Values.prometheusRule.warningDaysLeft }}) by (entityid, use) == 0
6867
for: 15m
6968
labels:
@@ -77,7 +76,7 @@ spec:
7776
{{- if .Values.prometheusRule.alertExtraAnnotations }}
7877
{{- toYaml .Values.prometheusRule.alertExtraAnnotations | nindent 8 }}
7978
{{- end }}
80-
- alert: '{{ printf "%s %s" $rulePrefix "X509CertificateExpiration" | trim }}'
79+
- alert: '{{ printf "%s%s" $rulePrefix "X509CertificateExpiration" | trim }}'
8180
expr: count(saml_x509_cert_not_after) by (entityid, use) - count(((saml_x509_cert_not_after - time()) / 86400) < {{ .Values.prometheusRule.criticalDaysLeft }}) by (entityid, use) == 0
8281
for: 15m
8382
labels:
@@ -91,6 +90,20 @@ spec:
9190
{{- if .Values.prometheusRule.alertExtraAnnotations }}
9291
{{- toYaml .Values.prometheusRule.alertExtraAnnotations | nindent 8 }}
9392
{{- end }}
93+
- alert: '{{ printf "%s%s" $rulePrefix "UnavailableMetadataEndpoint" | trim }}'
94+
expr: sum(rate(http_client_request{job="{{ include "saml-exporter.fullname" . }}", code!~"^2.*"}[5m])) > 0
95+
for: 3m
96+
labels:
97+
severity: {{ .Values.prometheusRule.unavailableMetadataEndpointSeverity }}
98+
{{- if .Values.prometheusRule.alertExtraLabels }}
99+
{{- toYaml .Values.prometheusRule.alertExtraLabels | nindent 8 }}
100+
{{- end }}
101+
annotations:
102+
summary: A SAML metadata endpoint is unreachable
103+
description: A SAML metadata endpoint "{{ "{{" }} $labels.host {{ "}}" }}" is not responding with a 2xx code for over 5m.
104+
{{- if .Values.prometheusRule.alertExtraAnnotations }}
105+
{{- toYaml .Values.prometheusRule.alertExtraAnnotations | nindent 8 }}
106+
{{- end }}
94107
{{- end }}
95108
{{- range .Values.prometheusRule.extraAlertGroups }}
96109
- {{ tpl (toYaml .) $ | indent 4 | trim }}

chart/saml-exporter/values.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ prometheusRule:
153153
metadataErrorsSeverity: warning
154154
certificateRenewalsSeverity: warning
155155
certificateExpirationsSeverity: critical
156+
unavailableMetadataEndpointSeverity: critical
156157
warningDaysLeft: 28
157158
criticalDaysLeft: 14
158159

0 commit comments

Comments
 (0)