Skip to content

Commit a49b429

Browse files
Test metrics in the smoke test (#635)
* test: Fix metrics in smoke test * test: Update metrics configuration in smoke test * test: Check metrics in smoke test * test: Remove the custom JMX exporter config * test: Fix smoke test for Hadoop 3.4.0 * chore: Format Python code in the smoke test
1 parent 278126b commit a49b429

File tree

7 files changed

+151
-10
lines changed

7 files changed

+151
-10
lines changed

tests/templates/kuttl/smoke/40-assert.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ timeout: 300
66
apiVersion: apps/v1
77
kind: StatefulSet
88
metadata:
9-
name: webhdfs
9+
name: test-runner
1010
status:
1111
readyReplicas: 1
1212
replicas: 1

tests/templates/kuttl/smoke/40-webhdfs.yaml renamed to tests/templates/kuttl/smoke/40-install-test-runner.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,22 @@
22
apiVersion: apps/v1
33
kind: StatefulSet
44
metadata:
5-
name: webhdfs
5+
name: test-runner
66
labels:
7-
app: webhdfs
7+
app: test-runner
88
spec:
99
replicas: 1
1010
selector:
1111
matchLabels:
12-
app: webhdfs
12+
app: test-runner
1313
template:
1414
metadata:
1515
labels:
16-
app: webhdfs
16+
app: test-runner
1717
spec:
1818
shareProcessNamespace: true
1919
containers:
20-
- name: webhdfs
20+
- name: test-runner
2121
image: docker.stackable.tech/stackable/testing-tools:0.2.0-stackable0.0.0-dev
2222
args: [sleep, infinity]
2323
stdin: true

tests/templates/kuttl/smoke/50-assert.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
apiVersion: kuttl.dev/v1beta1
33
kind: TestAssert
44
commands:
5-
- script: kubectl exec -n $NAMESPACE webhdfs-0 -- python /tmp/webhdfs.py $NAMESPACE ls
5+
- script: kubectl exec -n $NAMESPACE test-runner-0 -- python /tmp/webhdfs.py $NAMESPACE ls

tests/templates/kuttl/smoke/50-create-file.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
apiVersion: kuttl.dev/v1beta1
33
kind: TestStep
44
commands:
5-
- script: kubectl cp -n $NAMESPACE ./webhdfs.py webhdfs-0:/tmp
6-
- script: kubectl cp -n $NAMESPACE ./testdata.txt webhdfs-0:/tmp
7-
- script: kubectl exec -n $NAMESPACE webhdfs-0 -- python /tmp/webhdfs.py $NAMESPACE create
5+
- script: kubectl cp -n $NAMESPACE ./webhdfs.py test-runner-0:/tmp
6+
- script: kubectl cp -n $NAMESPACE ./testdata.txt test-runner-0:/tmp
7+
- script: kubectl exec -n $NAMESPACE test-runner-0 -- python /tmp/webhdfs.py $NAMESPACE create
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
---
2+
apiVersion: kuttl.dev/v1beta1
3+
kind: TestAssert
4+
commands:
5+
- script: |
6+
{% if test_scenario['values']['hadoop'].find(",") > 0 %}
7+
PRODUCT_VERSION={{ test_scenario['values']['hadoop'].split(',')[0] }}
8+
{% else %}
9+
PRODUCT_VERSION={{ test_scenario['values']['hadoop'] }}
10+
{% endif %}
11+
kubectl exec --namespace=$NAMESPACE test-runner-0 -- \
12+
python /tmp/test_metrics.py $NAMESPACE $PRODUCT_VERSION
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
apiVersion: kuttl.dev/v1beta1
3+
kind: TestStep
4+
commands:
5+
- script: kubectl cp -n $NAMESPACE ./test_metrics.py test-runner-0:/tmp
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
# Every rule in the JMX configuration is covered by one expected metric.
2+
3+
import re
4+
import sys
5+
import logging
6+
7+
import requests
8+
9+
10+
def check_metrics(
11+
namespace: str, role: str, port: int, expected_metrics: list[str]
12+
) -> None:
13+
response: requests.Response = requests.get(
14+
f"http://hdfs-{role}-default-0.hdfs-{role}-default.{namespace}.svc.cluster.local:{port}/metrics",
15+
timeout=10,
16+
)
17+
assert response.ok, "Requesting metrics failed"
18+
19+
for metric in expected_metrics:
20+
assert (
21+
re.search(f"^{metric}", response.text, re.MULTILINE) is not None
22+
), f"Metric '{metric}' not found for {role}"
23+
24+
25+
def check_namenode_metrics(
26+
namespace: str,
27+
product_version: str,
28+
) -> None:
29+
expected_metrics: list[str] = [
30+
# Kind "MetricsSystem"
31+
'hadoop_namenode_num_active_sources{kind="MetricsSystem",role="NameNode",service="HDFS",sub="Stats"}',
32+
# Attribute "Total"
33+
'hadoop_namenode_total{kind="NameNodeInfo",role="NameNode",service="HDFS"}',
34+
# Counter suffixed with "_total"
35+
'hadoop_namenode_files_total{kind="FSNamesystem",role="NameNode",service="HDFS"}',
36+
# Metric suffixed with "_created"
37+
'hadoop_namenode_files_created_{kind="NameNodeActivity",role="NameNode",service="HDFS"}',
38+
# Non-special metric
39+
'hadoop_namenode_files_deleted{kind="NameNodeActivity",role="NameNode",service="HDFS"}',
40+
]
41+
42+
if product_version in ["3.3.4", "3.3.6"]:
43+
# Log counters were removed in 3.4.0 (HADOOP-17524).
44+
expected_metrics.extend(
45+
[
46+
# Metric suffixed with "_info"
47+
'hadoop_namenode_log_info_{kind="JvmMetrics",role="NameNode",service="HDFS"}',
48+
]
49+
)
50+
51+
check_metrics(namespace, "namenode", 8183, expected_metrics)
52+
53+
54+
def check_datanode_metrics(
55+
namespace: str,
56+
product_version: str,
57+
) -> None:
58+
expected_metrics: list[str] = [
59+
# Kind "MetricsSystem"
60+
'hadoop_datanode_num_active_sources{kind="MetricsSystem",role="DataNode",service="HDFS",sub="Stats"}',
61+
# Kind "FSDatasetState" suffixed with "_total"
62+
'hadoop_datanode_estimated_capacity_lost_total{fsdatasetid=".+",kind="FSDatasetState",role="DataNode",service="HDFS"}',
63+
# Kind "FSDatasetState"
64+
'hadoop_datanode_capacity{fsdatasetid=".+",kind="FSDatasetState",role="DataNode",service="HDFS"}',
65+
# Kind "DataNodeActivity" suffixed with "_info"
66+
'hadoop_datanode_blocks_get_local_path_info_{host="hdfs-datanode-default-0\\.hdfs-datanode-default\\..+\\.svc\\.cluster\\.local",kind="DataNodeActivity",port="9866",role="DataNode",service="HDFS"}',
67+
# Kind "DataNodeActivity"
68+
'hadoop_datanode_blocks_read{host="hdfs-datanode-default-0\\.hdfs-datanode-default\\..+\\.svc\\.cluster\\.local",kind="DataNodeActivity",port="9866",role="DataNode",service="HDFS"}',
69+
# Counter suffixed with "_total"
70+
'hadoop_datanode_estimated_capacity_lost_total{kind="FSDatasetState",role="DataNode",service="HDFS"}',
71+
# Non-special metric
72+
'hadoop_datanode_gc_count{kind="JvmMetrics",role="DataNode",service="HDFS"}',
73+
]
74+
75+
if product_version in ["3.3.4", "3.3.6"]:
76+
# Log counters were removed in 3.4.0 (HADOOP-17524).
77+
expected_metrics.extend(
78+
[
79+
# Metric suffixed with "_info"
80+
'hadoop_datanode_log_info_{kind="JvmMetrics",role="DataNode",service="HDFS"}',
81+
]
82+
)
83+
84+
check_metrics(namespace, "datanode", 8082, expected_metrics)
85+
86+
87+
def check_journalnode_metrics(
88+
namespace: str,
89+
product_version: str,
90+
) -> None:
91+
expected_metrics: list[str] = [
92+
# Kind "MetricsSystem"
93+
'hadoop_journalnode_num_active_sources{kind="MetricsSystem",role="JournalNode",service="HDFS",sub="Stats"}',
94+
# Non-special metric
95+
'hadoop_journalnode_bytes_written{kind="Journal-hdfs",role="JournalNode",service="HDFS"}',
96+
]
97+
98+
if product_version in ["3.3.4", "3.3.6"]:
99+
# Log counters were removed in 3.4.0 (HADOOP-17524).
100+
expected_metrics.extend(
101+
[
102+
# Metric suffixed with "_info"
103+
'hadoop_journalnode_log_info_{kind="JvmMetrics",role="JournalNode",service="HDFS"}',
104+
]
105+
)
106+
107+
check_metrics(namespace, "journalnode", 8081, expected_metrics)
108+
109+
110+
if __name__ == "__main__":
111+
namespace_arg: str = sys.argv[1]
112+
product_version_arg: str = sys.argv[2]
113+
114+
logging.basicConfig(
115+
level="DEBUG",
116+
format="%(asctime)s %(levelname)s: %(message)s",
117+
stream=sys.stdout,
118+
)
119+
120+
check_namenode_metrics(namespace_arg, product_version_arg)
121+
check_datanode_metrics(namespace_arg, product_version_arg)
122+
check_journalnode_metrics(namespace_arg, product_version_arg)
123+
124+
print("All expected metrics found")

0 commit comments

Comments
 (0)