|
2 | 2 | {{- $rulePrefix:= .Values.prometheusRule.rulePrefix }}
|
3 | 3 | apiVersion: monitoring.coreos.com/v1
|
4 | 4 | kind: PrometheusRule
|
5 |
| -metadata: |
6 | 5 | metadata:
|
7 | 6 | name: {{ template "saml-exporter.fullname" . }}
|
8 | 7 | labels:
|
|
32 | 31 | - name: saml-exporter.rules
|
33 | 32 | rules:
|
34 | 33 | {{- if .Values.prometheusRule.alertOnReadErrors }}
|
35 |
| - - alert: '{{ printf "%s %s" $rulePrefix "X509CertificateReadErrors" | trim }}' |
| 34 | + - alert: '{{ printf "%s%s" $rulePrefix "X509CertificateReadErrors" | trim }}' |
36 | 35 | expr: delta(saml_x509_read_errors[15m]) > 0
|
37 | 36 | for: 5m
|
38 | 37 | labels:
|
|
48 | 47 | {{- end }}
|
49 | 48 | {{- end }}
|
50 | 49 | {{- if .Values.prometheusRule.alertOnMetadataErrors }}
|
51 |
| - - alert: '{{ printf "%s %s" $rulePrefix "MetadataError" | trim }}' |
| 50 | + - alert: '{{ printf "%s%s" $rulePrefix "MetadataError" | trim }}' |
52 | 51 | expr: delta(saml_metadata_errors[15m]) > 0
|
53 | 52 | for: 5m
|
54 | 53 | labels:
|
|
63 | 62 | {{- toYaml .Values.prometheusRule.alertExtraAnnotations | nindent 8 }}
|
64 | 63 | {{- end }}
|
65 | 64 | {{- end }}
|
66 |
| - - alert: '{{ printf "%s %s" $rulePrefix "X509CertificateRenewal" | trim }}' |
| 65 | + - alert: '{{ printf "%s%s" $rulePrefix "X509CertificateRenewal" | trim }}' |
67 | 66 | expr: count(saml_x509_cert_not_after) by (entityid, use) - count(((saml_x509_cert_not_after - time()) / 86400) < {{ .Values.prometheusRule.warningDaysLeft }}) by (entityid, use) == 0
|
68 | 67 | for: 15m
|
69 | 68 | labels:
|
|
77 | 76 | {{- if .Values.prometheusRule.alertExtraAnnotations }}
|
78 | 77 | {{- toYaml .Values.prometheusRule.alertExtraAnnotations | nindent 8 }}
|
79 | 78 | {{- end }}
|
80 |
| - - alert: '{{ printf "%s %s" $rulePrefix "X509CertificateExpiration" | trim }}' |
| 79 | + - alert: '{{ printf "%s%s" $rulePrefix "X509CertificateExpiration" | trim }}' |
81 | 80 | expr: count(saml_x509_cert_not_after) by (entityid, use) - count(((saml_x509_cert_not_after - time()) / 86400) < {{ .Values.prometheusRule.criticalDaysLeft }}) by (entityid, use) == 0
|
82 | 81 | for: 15m
|
83 | 82 | labels:
|
|
91 | 90 | {{- if .Values.prometheusRule.alertExtraAnnotations }}
|
92 | 91 | {{- toYaml .Values.prometheusRule.alertExtraAnnotations | nindent 8 }}
|
93 | 92 | {{- end }}
|
| 93 | + - alert: '{{ printf "%s%s" $rulePrefix "UnavailableMetadataEndpoint" | trim }}' |
| 94 | + expr: sum(rate(http_client_request{job="{{ include "saml-exporter.fullname" . }}", code!~"^2.*"}[5m])) > 0 |
| 95 | + for: 3m |
| 96 | + labels: |
| 97 | + severity: {{ .Values.prometheusRule.unavailableMetadataEndpointSeverity }} |
| 98 | + {{- if .Values.prometheusRule.alertExtraLabels }} |
| 99 | + {{- toYaml .Values.prometheusRule.alertExtraLabels | nindent 8 }} |
| 100 | + {{- end }} |
| 101 | + annotations: |
| 102 | + summary: A SAML metadata endpoint is unreachable |
| 103 | + description: A SAML metadata endpoint "{{ "{{" }} $labels.host {{ "}}" }}" is not responding with a 2xx code for over 5m. |
| 104 | + {{- if .Values.prometheusRule.alertExtraAnnotations }} |
| 105 | + {{- toYaml .Values.prometheusRule.alertExtraAnnotations | nindent 8 }} |
| 106 | + {{- end }} |
94 | 107 | {{- end }}
|
95 | 108 | {{- range .Values.prometheusRule.extraAlertGroups }}
|
96 | 109 | - {{ tpl (toYaml .) $ | indent 4 | trim }}
|
|
0 commit comments