Skip to content

Commit a261a76

Browse files
authored
Refactoring parsing of nodes info response from cluster (#11681)
1 parent 16b2be5 commit a261a76

File tree

3 files changed

+75
-46
lines changed

3 files changed

+75
-46
lines changed

ydb/tests/olap/lib/allure_utils.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,8 @@ def _set_results_plot(test_info: dict[str, str], suite: str, test: str, refferen
5252
def _set_logs_command(test_info: dict[str, str], start_time: float, end_time: float):
5353
hosts = []
5454
for node in YdbCluster.get_cluster_nodes():
55-
ss = node.get('SystemState', {})
56-
if 'Storage' in ss.get('Roles', []):
57-
hosts.append(ss.get('Host'))
55+
if node.role == YdbCluster.Node.Role.STORAGE:
56+
hosts.append(node.host)
5857
hosts_cmd = ' '.join([f'-H {h}' for h in hosts])
5958
start = datetime.fromtimestamp(start_time, UTC).isoformat()
6059
end = datetime.fromtimestamp(end_time, UTC).isoformat()

ydb/tests/olap/lib/ydb_cli.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from ydb.tests.olap.lib.ydb_cluster import YdbCluster
88
from ydb.tests.olap.lib.utils import get_external_param
99
from enum import StrEnum
10-
from time import time
1110
from types import TracebackType
1211

1312

@@ -152,13 +151,12 @@ def _load_query_out(self) -> None:
152151
self.result.query_out = r.read()
153152

154153
@staticmethod
155-
def _get_nodes_info() -> dict[str, dict[str, int]]:
156-
nodes = YdbCluster.get_cluster_nodes(db_only=True)
154+
def _get_nodes_info() -> dict[str, dict[str, Any]]:
157155
return {
158-
n['SystemState']['Host']: {
159-
'start_time': int(int(n['SystemState'].get('StartTime', time() * 1000)) / 1000)
156+
n.host: {
157+
'start_time': n.start_time
160158
}
161-
for n in nodes
159+
for n in YdbCluster.get_cluster_nodes(db_only=True)
162160
}
163161

164162
def _check_nodes(self):

ydb/tests/olap/lib/ydb_cluster.py

Lines changed: 69 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from copy import deepcopy
99
from time import sleep, time
1010
from typing import List, Optional
11+
from enum import Enum
1112

1213
LOGGER = logging.getLogger()
1314

@@ -20,6 +21,33 @@ def __init__(self, url: str, caption: str = 'link') -> None:
2021
self.url = f'https://{self.url}'
2122
self.caption = caption
2223

24+
class Node:
25+
class Role(Enum):
26+
UNKNOWN = 0
27+
STORAGE = 1
28+
COMPUTE = 2
29+
30+
class Tablet:
31+
def __init__(self, desc: dict):
32+
self.state: str = desc.get('State', 'Red')
33+
self.type: str = desc.get('Type', 'Unknown')
34+
self.count: int = desc.get('Count', 0)
35+
36+
def __init__(self, desc: dict):
37+
ss = desc.get('SystemState', {})
38+
self.host: str = ss.get('Host', '')
39+
self.disconnected: bool = desc.get('Disconnected', False)
40+
self.cluster_name: str = ss.get('ClusterName', '')
41+
self.version: str = ss.get('Version', '')
42+
self.start_time: float = 0.001 * int(ss.get('StartTime', time() * 1000))
43+
if 'Storage' in ss.get('Roles', []):
44+
self.role = YdbCluster.Node.Role.STORAGE
45+
elif 'Tenants' in ss.get('Roles', []):
46+
self.role = YdbCluster.Node.Role.COMPUTE
47+
else:
48+
self.role = YdbCluster.Node.Role.UNKNOWN
49+
self.tablets = [YdbCluster.Node.Tablet(t) for t in desc.get('Tablets', [])]
50+
2351
_ydb_driver = None
2452
_results_driver = None
2553
_cluster_info = None
@@ -53,7 +81,7 @@ def _get_service_url(cls):
5381
return f'http://{host}:{port}'
5482

5583
@classmethod
56-
def get_cluster_nodes(cls, path: Optional[str] = None, db_only: bool = False) -> list[dict[str:any]]:
84+
def get_cluster_nodes(cls, path: Optional[str] = None, db_only: bool = False) -> list[YdbCluster.Node]:
5785
try:
5886
url = f'{cls._get_service_url()}/viewer/json/nodes?'
5987
if db_only or path is not None:
@@ -64,27 +92,32 @@ def get_cluster_nodes(cls, path: Optional[str] = None, db_only: bool = False) ->
6492
# token = os.getenv('OLAP_YDB_OAUTH', None)
6593
# if token is not None:
6694
# headers['Authorization'] = token
67-
data = requests.get(url, headers=headers).json()
68-
nodes = data.get('Nodes', [])
69-
return nodes
95+
response = requests.get(url, headers=headers)
96+
response.raise_for_status()
97+
data = response.json()
98+
if not isinstance(data, dict):
99+
raise Exception(f'Incorrect response type: {data}')
100+
return [YdbCluster.Node(n) for n in data.get('Nodes', [])]
101+
except requests.HTTPError as e:
102+
LOGGER.error(f'{e.strerror}: {e.response.content}')
70103
except Exception as e:
71104
LOGGER.error(e)
72-
return [], 0
105+
return []
73106

74107
@classmethod
75108
def get_cluster_info(cls):
76109
if cls._cluster_info is None:
77110
version = ''
78111
cluster_name = ''
79112
nodes_wilcard = ''
80-
nodes = cls.get_cluster_nodes()
113+
nodes = cls.get_cluster_nodes(db_only=True)
81114
for node in nodes:
82-
n = node.get('SystemState', {})
83-
cluster_name = n.get('ClusterName', cluster_name)
84-
version = n.get('Version', version)
85-
for tenant in n.get('Tenants', []):
86-
if tenant.endswith(cls.ydb_database):
87-
nodes_wilcard = n.get('Host', nodes_wilcard).split('.')[0].rstrip('0123456789')
115+
if not cluster_name:
116+
cluster_name = node.cluster_name
117+
if not version:
118+
version = node.version
119+
if not nodes_wilcard and node.role == YdbCluster.Node.Role.COMPUTE:
120+
nodes_wilcard = node.host.split('.')[0].rstrip('0123456789')
88121
cls._cluster_info = {
89122
'database': cls.ydb_database,
90123
'version': version,
@@ -162,6 +195,14 @@ def _get_tables(cls, path):
162195
result.append(full_path)
163196
return result
164197

198+
@staticmethod
199+
def _join_errors(log_level: int, errors: list[str]):
200+
if len(errors) > 0:
201+
error = ', '.join(errors)
202+
LOGGER.log(log_level, error)
203+
return error
204+
return None
205+
165206
@classmethod
166207
@allure.step('Execute scan query')
167208
def execute_single_result_query(cls, query, timeout=10):
@@ -180,21 +221,14 @@ def execute_single_result_query(cls, query, timeout=10):
180221
@classmethod
181222
@allure.step('Check if YDB alive')
182223
def check_if_ydb_alive(cls, timeout=10, balanced_paths=None) -> tuple[str, str]:
183-
def _check_node(n):
184-
name = 'UnknownNode'
185-
error = None
186-
try:
187-
ss = n.get('SystemState', {})
188-
name = ss.get("Host")
189-
start_time = int(ss.get('StartTime', int(time()) * 1000)) / 1000
190-
uptime = int(time()) - start_time
191-
if uptime < 15:
192-
error = f'Node {name} too yong: {uptime}'
193-
except BaseException as ex:
194-
error = f"Error while process node {name}: {ex}"
195-
if error:
196-
LOGGER.error(error)
197-
return error
224+
def _check_node(n: YdbCluster.Node):
225+
errors = []
226+
if n.disconnected:
227+
errors.append(f'Node {n.host} disconnected')
228+
uptime = time() - n.start_time
229+
if uptime < 15:
230+
errors.append(f'Node {n.host} too yong: {uptime}')
231+
return cls._join_errors(logging.ERROR, errors)
198232

199233
errors = []
200234
warnings = []
@@ -216,7 +250,7 @@ def _check_node(n):
216250
else:
217251
ok_node_count += 1
218252
if ok_node_count < nodes_count:
219-
errors.append(f'Only {ok_node_count} from {ok_node_count} dynnodes are ok: {",".join(node_errors)}')
253+
errors.append(f'Only {ok_node_count} from {nodes_count} dynnodes are ok: {",".join(node_errors)}')
220254
paths_to_balance = []
221255
if isinstance(balanced_paths, str):
222256
paths_to_balance += cls._get_tables(balanced_paths)
@@ -232,11 +266,11 @@ def _check_node(n):
232266
min = 0
233267
for tn in table_nodes:
234268
tablet_count = 0
235-
for tablet in tn.get("Tablets", []):
236-
if tablet.get("State") != "Green":
237-
warnings.append(f'Node {tn.get("SystemState", {}).get("Host")}: {tablet.get("Count")} tablets of type {tablet.get("Type")} in {tablet.get("State")} state')
238-
if tablet.get("Type") in {"ColumnShard", "DataShard"}:
239-
tablet_count += tablet.get("Count")
269+
for tablet in tn.tablets:
270+
if tablet.count > 0 and tablet.state != "Green":
271+
warnings.append(f'Node {tn.host}: {tablet.count} tablets of type {tablet.type} in {tablet.state} state')
272+
if tablet.type in {"ColumnShard", "DataShard"}:
273+
tablet_count += tablet.count
240274
if tablet_count > 0:
241275
if min is None or tablet_count < min:
242276
min = tablet_count
@@ -251,10 +285,8 @@ def _check_node(n):
251285
cls.execute_single_result_query("select 1", timeout)
252286
except BaseException as ex:
253287
errors.append(f"Cannot connect to YDB: {ex}")
254-
error = ', '.join(errors) if len(errors) > 0 else None
255-
warning = ', '.join(warnings) if len(warnings) > 0 else None
256-
LOGGER.error(f'Errors: {error}, warnings: {warning}')
257-
return error, warning
288+
289+
return cls._join_errors(logging.ERROR, errors), cls._join_errors(logging.WARNING, warnings)
258290

259291
@classmethod
260292
@allure.step('Wait YDB alive')

0 commit comments

Comments
 (0)