Skip to content

Commit 91340d5

Browse files
test: Check metrics in smoke test
1 parent e9cf012 commit 91340d5

File tree

8 files changed

+141
-11
lines changed

8 files changed

+141
-11
lines changed

tests/templates/kuttl/smoke/30-install-hdfs.yaml.j2

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,18 @@ data:
252252
kind: 'MetricsSystem'
253253
sub: $2
254254
type: GAUGE
255+
# FSDatasetState with _total suffix (also extracts the FSDataset ID),
256+
# e.g. Hadoop:name=FSDatasetState,attribute=EstimatedCapacityLostTotal
257+
- pattern: 'Hadoop<service=(.*), name=FSDatasetState-(.*)><>(.*_total): (\d+)'
258+
attrNameSnakeCase: true
259+
name: hadoop_$1_$3
260+
value: $4
261+
labels:
262+
service: HDFS
263+
role: $1
264+
fsdatasetid: $2
265+
kind: 'FSDatasetState'
266+
type: COUNTER
255267
# FSDatasetState (also extracts the FSDataset ID)
256268
- pattern: 'Hadoop<service=(.*), name=FSDatasetState-(.*)><>(.*): (\d+)'
257269
attrNameSnakeCase: true
@@ -263,7 +275,19 @@ data:
263275
fsdatasetid: $2
264276
kind: 'FSDatasetState'
265277
type: GAUGE
266-
# DataNodeActivity (also extracts hostname and port)
278+
# DataNodeActivity with _info suffix (also extracts hostname and port),
279+
# e.g. Hadoop:name=DataNodeActivity-hdfs-datanode-default-0-9866,attribute=BlocksGetLocalPathInfo
280+
- pattern: 'Hadoop<service=(.*), name=DataNodeActivity-(.*)-(\d+)><>(.*_info): (\d+)'
281+
attrNameSnakeCase: true
282+
name: hadoop_$1_$4_
283+
value: $5
284+
labels:
285+
service: HDFS
286+
role: $1
287+
host: $2
288+
port: $3
289+
kind: 'DataNodeActivity'
290+
type: GAUGE
267291
- pattern: 'Hadoop<service=(.*), name=DataNodeActivity-(.*)-(\d+)><>(.*): (\d+)'
268292
attrNameSnakeCase: true
269293
name: hadoop_$1_$4

tests/templates/kuttl/smoke/40-assert.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ timeout: 300
66
apiVersion: apps/v1
77
kind: StatefulSet
88
metadata:
9-
name: webhdfs
9+
name: test-runner
1010
status:
1111
readyReplicas: 1
1212
replicas: 1

tests/templates/kuttl/smoke/40-webhdfs.yaml renamed to tests/templates/kuttl/smoke/40-install-test-runner.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,21 @@
22
apiVersion: apps/v1
33
kind: StatefulSet
44
metadata:
5-
name: webhdfs
5+
name: test-runner
66
labels:
7-
app: webhdfs
7+
app: test-runner
88
spec:
99
replicas: 1
1010
selector:
1111
matchLabels:
12-
app: webhdfs
12+
app: test-runner
1313
template:
1414
metadata:
1515
labels:
16-
app: webhdfs
16+
app: test-runner
1717
spec:
1818
containers:
19-
- name: webhdfs
19+
- name: test-runner
2020
image: docker.stackable.tech/stackable/testing-tools:0.2.0-stackable0.0.0-dev
2121
stdin: true
2222
tty: true

tests/templates/kuttl/smoke/50-assert.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
apiVersion: kuttl.dev/v1beta1
33
kind: TestAssert
44
commands:
5-
- script: kubectl exec -n $NAMESPACE webhdfs-0 -- python /tmp/webhdfs.py $NAMESPACE ls
5+
- script: kubectl exec -n $NAMESPACE test-runner-0 -- python /tmp/webhdfs.py $NAMESPACE ls

tests/templates/kuttl/smoke/50-create-file.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
apiVersion: kuttl.dev/v1beta1
33
kind: TestStep
44
commands:
5-
- script: kubectl cp -n $NAMESPACE ./webhdfs.py webhdfs-0:/tmp
6-
- script: kubectl cp -n $NAMESPACE ./testdata.txt webhdfs-0:/tmp
7-
- script: kubectl exec -n $NAMESPACE webhdfs-0 -- python /tmp/webhdfs.py $NAMESPACE create
5+
- script: kubectl cp -n $NAMESPACE ./webhdfs.py test-runner-0:/tmp
6+
- script: kubectl cp -n $NAMESPACE ./testdata.txt test-runner-0:/tmp
7+
- script: kubectl exec -n $NAMESPACE test-runner-0 -- python /tmp/webhdfs.py $NAMESPACE create
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
apiVersion: kuttl.dev/v1beta1
3+
kind: TestAssert
4+
commands:
5+
- script: kubectl exec -n $NAMESPACE test-runner-0 -- python /tmp/metrics-test.py $NAMESPACE
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
apiVersion: kuttl.dev/v1beta1
3+
kind: TestStep
4+
commands:
5+
- script: kubectl cp -n $NAMESPACE ./metrics-test.py test-runner-0:/tmp
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
from requests import Response
2+
import re
3+
import requests
4+
import sys
5+
import logging
6+
7+
8+
def check_metrics(
9+
namespace: str,
10+
role: str,
11+
port: int,
12+
expected_metrics: list[str]
13+
) -> None:
14+
response: Response = requests.get(
15+
f'http://hdfs-{role}-default-0.hdfs-{role}-default.{namespace}.svc.cluster.local:{port}/metrics'
16+
)
17+
assert response.ok, "Requesting metrics failed"
18+
19+
for metric in expected_metrics:
20+
assert re.search(f'^{metric}', response.text, re.MULTILINE) is not None, \
21+
f"Metric '{metric}' not found for {role}"
22+
23+
24+
def check_namenode_metrics(
25+
namespace: str,
26+
) -> None:
27+
expected_metrics = [
28+
# Kind "MetricsSystem"
29+
'hadoop_namenode_num_active_sources{kind="MetricsSystem",role="NameNode",service="HDFS",sub="Stats"}',
30+
# Attribute "Total"
31+
'hadoop_namenode_total{kind="NameNodeInfo",role="NameNode",service="HDFS"}',
32+
# Counter suffixed with "_total"
33+
'hadoop_namenode_files_total{kind="FSNamesystem",role="NameNode",service="HDFS"}',
34+
# Metric suffixed with "_created"
35+
'hadoop_namenode_files_created_{kind="NameNodeActivity",role="NameNode",service="HDFS"}',
36+
# Metric suffixed with "_info"
37+
'hadoop_namenode_log_info_{kind="JvmMetrics",role="NameNode",service="HDFS"}',
38+
# Non-special metric
39+
'hadoop_namenode_files_deleted{kind="NameNodeActivity",role="NameNode",service="HDFS"}',
40+
]
41+
check_metrics(namespace, 'namenode', 8183, expected_metrics)
42+
43+
44+
def check_datanode_metrics(
45+
namespace: str,
46+
) -> None:
47+
expected_metrics = [
48+
# Kind "MetricsSystem"
49+
'hadoop_datanode_num_active_sources{kind="MetricsSystem",role="DataNode",service="HDFS",sub="Stats"}',
50+
# Kind "FSDatasetState" suffixed with "_total"
51+
'hadoop_datanode_estimated_capacity_lost_total{fsdatasetid=".+",kind="FSDatasetState",role="DataNode",service="HDFS"}',
52+
# Kind "FSDatasetState"
53+
'hadoop_datanode_capacity{fsdatasetid=".+",kind="FSDatasetState",role="DataNode",service="HDFS"}',
54+
# Kind "DataNodeActivity" suffixed with "_info"
55+
'hadoop_datanode_blocks_get_local_path_info_{host="hdfs-datanode-default-0\\.hdfs-datanode-default\\..+\\.svc\\.cluster\\.local",kind="DataNodeActivity",port="9866",role="DataNode",service="HDFS"}',
56+
# Kind "DataNodeActivity"
57+
'hadoop_datanode_blocks_read{host="hdfs-datanode-default-0\\.hdfs-datanode-default\\..+\\.svc\\.cluster\\.local",kind="DataNodeActivity",port="9866",role="DataNode",service="HDFS"}',
58+
# Counter suffixed with "_total"
59+
'hadoop_datanode_estimated_capacity_lost_total{kind="FSDatasetState",role="DataNode",service="HDFS"}',
60+
# Metric suffixed with "_info"
61+
'hadoop_datanode_log_info_{kind="JvmMetrics",role="DataNode",service="HDFS"}',
62+
# Non-special metric
63+
'hadoop_datanode_gc_count{kind="JvmMetrics",role="DataNode",service="HDFS"}',
64+
]
65+
check_metrics(namespace, 'datanode', 8082, expected_metrics)
66+
67+
68+
def check_journalnode_metrics(
69+
namespace: str,
70+
) -> None:
71+
expected_metrics = [
72+
# Kind "MetricsSystem"
73+
'hadoop_journalnode_num_active_sources{kind="MetricsSystem",role="JournalNode",service="HDFS",sub="Stats"}',
74+
# Metric suffixed with "_info"
75+
'hadoop_journalnode_log_info_{kind="JvmMetrics",role="JournalNode",service="HDFS"}',
76+
# Non-special metric
77+
'hadoop_journalnode_bytes_written{kind="Journal-hdfs",role="JournalNode",service="HDFS"}',
78+
]
79+
check_metrics(namespace, 'journalnode', 8081, expected_metrics)
80+
81+
82+
if __name__ == "__main__":
83+
namespace: str = sys.argv[1]
84+
85+
log_level = "DEBUG"
86+
logging.basicConfig(
87+
level=log_level,
88+
format="%(asctime)s %(levelname)s: %(message)s",
89+
stream=sys.stdout,
90+
)
91+
92+
check_namenode_metrics(namespace)
93+
check_datanode_metrics(namespace)
94+
check_journalnode_metrics(namespace)
95+
96+
print("All expected metrics found")

0 commit comments

Comments
 (0)