Skip to content

Commit f0aee9f

Browse files
author
Robert Wikman
authored
Add perfdata support
This commit reworks condition and result handling code to add support for perfdata and multi-line output.
1 parent 3918448 commit f0aee9f

File tree

17 files changed

+370
-249
lines changed

17 files changed

+370
-249
lines changed

check_k8s.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,13 @@
55
import traceback
66
import json
77

8-
from collections import namedtuple
98
from urllib.error import URLError, HTTPError
109

1110
from k8s.components import MAPPINGS
1211
from k8s.cli import parse_cmdline
1312
from k8s.http import build_url, request
14-
from k8s.consts import NAGIOS_MSG, Severity
15-
from k8s.exceptions import PluginException
16-
17-
18-
Output = namedtuple("Output", ["state", "message", "channel"])
13+
from k8s.consts import NAGIOS_MSG, NaemonState
14+
from k8s.result import Output
1915

2016

2117
def main():
@@ -38,28 +34,27 @@ def main():
3834
# Request and check health data
3935
try:
4036
response, status = request(url, token=parsed.token, insecure=parsed.insecure)
41-
result = health_check(response)
42-
output = Output(Severity.OK, result, sys.stdout)
43-
except PluginException as e:
44-
output = Output(e.state, e.message, sys.stderr)
37+
output = health_check(response).output
38+
if not isinstance(output, Output):
39+
raise TypeError("Unknown health check format")
4540
except HTTPError as e:
4641
body = json.loads(e.read().decode("utf8"))
4742
output = Output(
48-
Severity.UNKNOWN,
43+
NaemonState.UNKNOWN,
4944
"{0}: {1}".format(e.code, body.get("message")),
5045
sys.stderr
5146
)
5247
except URLError as e:
53-
output = Output(Severity.UNKNOWN, e.reason, sys.stderr)
48+
output = Output(NaemonState.UNKNOWN, e.reason, sys.stderr)
5449
except Exception as e:
5550
if parsed.debug:
5651
exc_type, exc_value, exc_traceback = sys.exc_info()
5752
traceback.print_tb(exc_traceback, file=sys.stdout)
5853

59-
output = Output(Severity.UNKNOWN, e, sys.stderr)
54+
output = Output(NaemonState.UNKNOWN, e, sys.stderr)
6055

6156
msg = NAGIOS_MSG.format(state=output.state.name, message=output.message)
62-
print(msg, file=output.channel)
57+
output.channel.write(msg)
6358
sys.exit(output.state.value)
6459

6560

k8s/components/deployment/check.py

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from k8s.exceptions import NagiosCritical, NagiosWarning
1+
from k8s.result import Result
22

33
from .resource import Deployment
44

@@ -14,19 +14,4 @@ def check_deployments(items):
1414
:return: Deployments health summary
1515
"""
1616

17-
for item in items:
18-
deployment = Deployment(item)
19-
reps = deployment.replicas
20-
21-
if deployment.alerts_critical:
22-
raise NagiosCritical(deployment.alerts_critical[0])
23-
elif deployment.alerts_warning:
24-
raise NagiosWarning(deployment.alerts_warning[0])
25-
26-
if reps.available < reps.total or reps.updated < reps.total:
27-
if reps.available != 0 and reps.updated != 0:
28-
raise NagiosWarning("Deployment degraded", **deployment.meta)
29-
30-
raise NagiosCritical("Deployment unavailable", **deployment.meta)
31-
32-
return "Found {} healthy Deployments".format(len(items))
17+
return Result(Deployment, items)

k8s/components/deployment/resource.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,40 @@
11
from collections import namedtuple
2+
from enum import Enum
3+
4+
from k8s.consts import NaemonState
5+
6+
from ..resource import Resource, NaemonStatus
27

3-
from k8s.resource import Resource
4-
from k8s.consts import Severity
58

69
Replicas = namedtuple("Replicas", ["total", "ready", "updated", "available"])
710

811

912
class Deployment(Resource):
10-
def __init__(self, data):
11-
super(Deployment, self).__init__(data)
13+
class PerfMap(Enum):
14+
AVAILABLE = "available"
15+
UNAVAILABLE = "unavailable"
16+
DEGRADED = "degraded"
17+
NOREPS = "noreps"
18+
19+
def __init__(self, data, *args, **kwargs):
20+
super(Deployment, self).__init__(data, *args, **kwargs)
1221

1322
self.replicas = Replicas(
14-
self._status["replicas"],
15-
self._status["readyReplicas"],
16-
self._status["updatedReplicas"],
17-
self._status["availableReplicas"]
23+
self._status.get("replicas", 0),
24+
self._status.get("readyReplicas", 0),
25+
self._status.get("updatedReplicas", 0),
26+
self._status.get("availableReplicas", 0)
1827
)
1928

20-
def _condition_severity(self, _type, status):
21-
if _type == "Available" and status != "True":
22-
return Severity.CRITICAL
29+
def _get_status(self, cnd_type, cnd_status):
30+
reps = self.replicas
31+
32+
if cnd_type == "Available":
33+
if cnd_status == "True":
34+
return NaemonStatus(NaemonState.OK, self.perf.AVAILABLE)
35+
else:
36+
return NaemonStatus(NaemonState.CRITICAL, self.perf.UNAVAILABLE)
37+
elif reps.available < reps.total or reps.updated < reps.total:
38+
if reps.available != 0 and reps.updated != 0:
39+
return NaemonStatus(NaemonState.WARNING, self.perf.DEGRADED)
40+
return NaemonStatus(NaemonState.CRITICAL, self.perf.NOREPS)

k8s/components/node/check.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from k8s.exceptions import NagiosCritical, NagiosWarning
1+
from k8s.result import Result
22

33
from .resource import Node
44

@@ -14,15 +14,4 @@ def check_nodes(items):
1414
:return: Nodes health summary
1515
"""
1616

17-
for item in items:
18-
node = Node(item)
19-
20-
if node.alerts_critical:
21-
raise NagiosCritical(node.alerts_critical[0])
22-
elif node.alerts_warning:
23-
raise NagiosWarning(node.alerts_warning[0])
24-
25-
if node.unschedulable:
26-
raise NagiosWarning("Node {} is ready, but unschedulable".format(node.meta["name"]))
27-
28-
return "Found {} healthy Nodes".format(len(items))
17+
return Result(Node, items)

k8s/components/node/resource.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,34 @@
1-
from k8s.resource import Resource
2-
from k8s.consts import Severity
1+
from enum import Enum
2+
3+
from k8s.consts import NaemonState
4+
5+
from ..resource import Resource, NaemonStatus
36

47

58
class Node(Resource):
6-
def __init__(self, data):
7-
super(Node, self).__init__(data)
9+
class PerfMap(Enum):
10+
AVAILABLE = "available"
11+
UNAVAILABLE = "unavailable"
12+
DEGRADED = "degraded"
13+
UNSCHEDULABLE = "unschedulable"
14+
15+
def __init__(self, data, *args, **kwargs):
16+
super(Node, self).__init__(data, *args, **kwargs)
817

918
# https://kubernetes.io/docs/concepts/architecture/nodes/#manual-node-administration
1019
self.unschedulable = data["spec"].get("unschedulable", False)
1120

12-
def _condition_severity(self, _type, status):
13-
if _type == "Ready" and status != "True":
14-
return Severity.CRITICAL
15-
elif _type != "Ready" and status == "True":
16-
return Severity.WARNING
21+
def _get_status(self, cnd_type, cnd_status):
22+
if self.unschedulable:
23+
return NaemonStatus(
24+
NaemonState.WARNING,
25+
self.perf.UNSCHEDULABLE,
26+
"Node {} is ready, but unschedulable".format(self.meta["name"])
27+
)
28+
elif cnd_type == "Ready":
29+
if cnd_status == "True":
30+
return NaemonStatus(NaemonState.OK, self.perf.AVAILABLE)
31+
else:
32+
return NaemonStatus(NaemonState.CRITICAL, self.perf.UNAVAILABLE)
33+
elif cnd_type != "Ready" and cnd_status == "True":
34+
return NaemonStatus(NaemonState.WARNING, self.perf.DEGRADED)

k8s/components/pod/check.py

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
from k8s.exceptions import NagiosCritical, NagiosWarning
1+
from k8s.result import Result
22

33
from .resource import Pod
4-
from .consts import Phase
54

65

76
def check_pods(items):
@@ -15,17 +14,4 @@ def check_pods(items):
1514
:return: Pods health summary
1615
"""
1716

18-
for item in items:
19-
pod = Pod(item)
20-
21-
if pod.phase == Phase.pending:
22-
raise NagiosWarning("{kind} {name} is {0}".format(pod.phase.value, **pod.meta))
23-
elif pod.phase != Phase.running and pod.phase != Phase.succeeded:
24-
raise NagiosCritical("Unexpected Phase for {kind} {name}: {0}".format(pod.phase.value, **pod.meta))
25-
26-
if pod.alerts_critical:
27-
raise NagiosCritical(pod.alerts_critical[0])
28-
elif pod.alerts_warning:
29-
raise NagiosWarning(pod.alerts_warning[0])
30-
31-
return "Found {} healthy Pods".format(len(items))
17+
return Result(Pod, items)

k8s/components/pod/consts.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from enum import Enum
22

3-
CONDITIONS_HEALTHY = ["Ready", "Initialized", "PodScheduled", "ContainersReady"]
3+
STATUSES = ["Ready", "Initialized", "PodScheduled", "ContainersReady"]
44

55

66
class ContainerState(Enum):

k8s/components/pod/resource.py

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1-
from k8s.resource import Resource
2-
from k8s.consts import Severity
1+
from enum import Enum
32

4-
from .consts import ContainerState, Phase, CONDITIONS_HEALTHY
3+
from k8s.consts import NaemonState
4+
5+
from ..resource import Resource, NaemonStatus
6+
7+
from .consts import ContainerState, Phase, STATUSES
58

69

710
class Container:
@@ -12,21 +15,35 @@ def __init__(self, data):
1215
# Container State is a single-item dict, with a nested dict value.
1316
# https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.15/#containerstate-v1-core
1417
state = list(data["state"].keys())
15-
16-
# Ensure state is known
1718
self.state = ContainerState(state[0])
1819

1920

2021
class Pod(Resource):
21-
def __init__(self, data):
22-
super(Pod, self).__init__(data)
22+
class PerfMap(Enum):
23+
AVAILABLE = "available"
24+
UNAVAILABLE = "unavailable"
25+
DEGRADED = "degraded"
26+
PENDING = "pending"
2327

24-
self.containers = [Container(c) for c in self._status["containerStatuses"]]
25-
self.phase = Phase(self._status["phase"])
28+
def __init__(self, data, *args, **kwargs):
29+
super(Pod, self).__init__(data, *args, **kwargs)
2630

27-
def _condition_severity(self, _type, status):
28-
if _type in CONDITIONS_HEALTHY and status != "True":
29-
return Severity.CRITICAL
30-
elif _type not in CONDITIONS_HEALTHY and status == "True":
31-
return Severity.WARNING
31+
self.containers = [Container(c) for c in self._status.get("containerStatuses", [])]
32+
self.phase = Phase(self._status["phase"])
3233

34+
def _get_status(self, cnd_type, cnd_status):
35+
if self.phase != Phase.running and self.phase != Phase.succeeded:
36+
return NaemonStatus(
37+
NaemonState.CRITICAL,
38+
self.perf.UNAVAILABLE,
39+
"Unexpected Phase for {kind} {name}: {0}".format(self.phase.value, **self.meta)
40+
)
41+
elif cnd_type in STATUSES:
42+
if cnd_status == "True":
43+
return NaemonStatus(NaemonState.OK, self.perf.AVAILABLE)
44+
else:
45+
return NaemonStatus(NaemonState.CRITICAL, self.perf.UNAVAILABLE)
46+
elif cnd_type not in STATUSES and cnd_status == "True":
47+
return NaemonStatus(NaemonState.WARNING, self.perf.DEGRADED)
48+
elif self.phase == Phase.pending:
49+
return NaemonStatus(NaemonState.WARNING, self.perf.PENDING)

0 commit comments

Comments
 (0)