Skip to content

Commit 28512db

Browse files
authored
nemesis driver and ut (#7937)
1 parent 89826f0 commit 28512db

File tree

16 files changed

+1196
-0
lines changed

16 files changed

+1196
-0
lines changed
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
# -*- coding: utf-8 -*-
2+
import argparse
3+
import logging.config
4+
import subprocess as sp
5+
import os
6+
import tempfile
7+
8+
import logging
9+
10+
from ydb.tests.tools.nemesis.library import monitor
11+
from ydb.tests.tools.nemesis.library import catalog
12+
from ydb.tests.library.harness.kikimr_cluster import ExternalKiKiMRCluster
13+
14+
15+
def setup_logging_config(filename=None):
16+
handler = {'class': 'logging.StreamHandler', 'level': 'DEBUG', 'formatter': 'base'}
17+
if filename:
18+
handler = {
19+
'class': 'logging.handlers.TimedRotatingFileHandler',
20+
'filename': filename, 'when': 'midnight', 'level': 'DEBUG', 'formatter': 'base'
21+
}
22+
return {
23+
'version': 1,
24+
'formatters': {
25+
'base': {
26+
'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
27+
},
28+
},
29+
'handlers': {
30+
'handler': handler,
31+
},
32+
'root': {
33+
'level': 'DEBUG',
34+
'handlers': (
35+
'handler',
36+
)
37+
},
38+
'ydb.tests.library.harness.kikimr_runner': {
39+
'level': 'DEBUG',
40+
'handlers': (
41+
'handler',
42+
)
43+
}
44+
}
45+
46+
47+
logger = logging.getLogger(__name__)
48+
49+
50+
class SshAgent(object):
51+
def __init__(self):
52+
self._env = {}
53+
self._env_backup = {}
54+
self._keys = {}
55+
self.start()
56+
57+
@property
58+
def pid(self):
59+
return int(self._env["SSH_AGENT_PID"])
60+
61+
def start(self):
62+
self._env_backup["SSH_AUTH_SOCK"] = os.environ.get("SSH_AUTH_SOCK")
63+
self._env_backup["SSH_OPTIONS"] = os.environ.get("SSH_OPTIONS")
64+
65+
for line in self._run(["ssh-agent"]).splitlines():
66+
name, _, value = line.decode('utf-8').partition("=")
67+
if _ == "=":
68+
value = value.split(";", 1)[0]
69+
self._env[name] = value
70+
os.environ[name] = value
71+
72+
os.environ["SSH_OPTIONS"] = "{}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no".format(
73+
"," + os.environ["SSH_OPTIONS"] if os.environ.get("SSH_OPTIONS") else ""
74+
)
75+
76+
def stop(self):
77+
self._run(['kill', '-9', str(self.pid)])
78+
79+
def add(self, key):
80+
key_pub = self._key_pub(key)
81+
self._run(["ssh-add", "-"], stdin=key)
82+
return key_pub
83+
84+
def remove(self, key_pub):
85+
with tempfile.NamedTemporaryFile() as f:
86+
f.write(key_pub)
87+
f.flush()
88+
self._run(["ssh-add", "-d", f.name])
89+
90+
def _key_pub(self, key):
91+
with tempfile.NamedTemporaryFile() as f:
92+
f.write(key)
93+
f.flush()
94+
return self._run(["ssh-keygen", "-y", "-f", f.name])
95+
96+
@staticmethod
97+
def _run(cmd, stdin=None):
98+
p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE, stdin=sp.PIPE if stdin else None)
99+
stdout, stderr = p.communicate(stdin)
100+
101+
# Listing keys from empty ssh-agent results in exit code 1
102+
if stdout.strip() == "The agent has no identities.":
103+
return ""
104+
105+
if p.returncode:
106+
message = stderr.strip() + "\n" + stdout.strip()
107+
raise RuntimeError(message.strip())
108+
109+
return stdout
110+
111+
112+
class Key(object):
113+
def __init__(self, key_file):
114+
self.key_file = key_file
115+
with open(key_file) as fd:
116+
self.key = fd.read()
117+
self._key_pub = None
118+
self._ssh_agent = SshAgent()
119+
120+
def __enter__(self):
121+
self._key_pub = self._ssh_agent.add(self.key.encode('utf-8'))
122+
123+
def __exit__(self, exc_type, exc_val, exc_tb):
124+
self._ssh_agent.remove(self._key_pub)
125+
self._ssh_agent.stop()
126+
127+
128+
def nemesis_logic(arguments):
129+
logging.config.dictConfig(setup_logging_config(arguments.log_file))
130+
nemesis = catalog.nemesis_factory(
131+
ExternalKiKiMRCluster(
132+
arguments.ydb_cluster_template,
133+
binary_path=arguments.ydb_binary_path,
134+
output_path=tempfile.gettempdir(),
135+
),
136+
enable_nemesis_list_filter_by_hostname=arguments.enable_nemesis_list_filter_by_hostname,
137+
)
138+
nemesis.start()
139+
monitor.setup_page(arguments.mon_host, arguments.mon_port)
140+
nemesis.stop()
141+
142+
143+
def main():
144+
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter)
145+
parser.add_argument('--ydb-cluster-template', required=True, help='Path to the Yandex DB cluster template')
146+
parser.add_argument('--ydb-binary-path', required=True, help='Path to the Yandex DB binary')
147+
parser.add_argument('--private-key-file', default='')
148+
parser.add_argument('--log-file', default=None)
149+
parser.add_argument('--mon-port', default=8666, type=lambda x: int(x))
150+
parser.add_argument('--mon-host', default='::', type=lambda x: str(x))
151+
parser.add_argument('--enable-nemesis-list-filter-by-hostname', action='store_true')
152+
arguments = parser.parse_args()
153+
154+
if arguments.private_key_file:
155+
with Key(arguments.private_key_file):
156+
nemesis_logic(arguments)
157+
else:
158+
nemesis_logic(arguments)
159+
160+
161+
if __name__ == '__main__':
162+
main()
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
SUBSCRIBER(g:kikimr)
2+
PY3_PROGRAM(nemesis)
3+
4+
PY_SRCS(
5+
__main__.py
6+
)
7+
8+
PEERDIR(
9+
ydb/tests/library
10+
ydb/tests/tools/nemesis/library
11+
ydb/tools/cfg
12+
)
13+
14+
END()

ydb/tests/tools/nemesis/library/__init__.py

Whitespace-only changes.
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# -*- coding: utf-8 -*-
2+
import abc
3+
4+
import six
5+
from ydb.tests.tools.nemesis.library import monitor
6+
7+
8+
@six.add_metaclass(abc.ABCMeta)
9+
class AbstractMonitoredNemesis(object):
10+
def __init__(self, scope=None):
11+
self.inject_completed = None
12+
self.inject_in_flight = None
13+
self.inject_in_flight_value = 0
14+
self.extract_completed = None
15+
self.registry = monitor.monitor()
16+
self.register_counters(scope)
17+
18+
@property
19+
def name(self):
20+
return self.__class__.__name__
21+
22+
def register_counters(self, scope=None):
23+
labels = {'nemesis': self.name}
24+
if scope is not None:
25+
labels.update({'scope': scope})
26+
self.inject_completed = self.registry.rate('InjectCompleted', labels)
27+
self.inject_in_flight = self.registry.int_gauge('InjectInFlight', labels)
28+
self.extract_completed = self.registry.rate('ExtractCompleted', labels)
29+
30+
def start_inject_fault(self):
31+
self.inject_in_flight_value += 1
32+
self.inject_in_flight.set(self.inject_in_flight_value)
33+
34+
def on_success_extract_fault(self):
35+
self.extract_completed.inc()
36+
37+
def on_success_inject_fault(self):
38+
if self.inject_in_flight_value > 0:
39+
self.inject_in_flight_value -= 1
40+
self.inject_in_flight.set(self.inject_in_flight_value)
41+
self.inject_completed.inc()
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# -*- coding: utf-8 -*-
2+
import socket
3+
4+
from ydb.tests.library.nemesis.nemesis_core import NemesisProcess
5+
from ydb.tests.library.nemesis.nemesis_network import NetworkNemesis
6+
7+
from ydb.tests.library.harness import param_constants
8+
9+
from ydb.tests.tools.nemesis.library.node import nodes_nemesis_list
10+
11+
from ydb.tests.tools.nemesis.library.tablet import change_tablet_group_nemesis_list
12+
from ydb.tests.tools.nemesis.library.tablet import ReBalanceTabletsNemesis
13+
from ydb.tests.tools.nemesis.library.tablet import KillTenantSlotBrokerNemesis
14+
from ydb.tests.tools.nemesis.library.tablet import KillPersQueueNemesis
15+
from ydb.tests.tools.nemesis.library.tablet import KickTabletsFromNode
16+
from ydb.tests.tools.nemesis.library.tablet import KillKeyValueNemesis
17+
from ydb.tests.tools.nemesis.library.tablet import KillHiveNemesis
18+
from ydb.tests.tools.nemesis.library.tablet import KillBsControllerNemesis
19+
from ydb.tests.tools.nemesis.library.tablet import KillCoordinatorNemesis
20+
from ydb.tests.tools.nemesis.library.tablet import KillSchemeShardNemesis
21+
from ydb.tests.tools.nemesis.library.tablet import KillMediatorNemesis
22+
from ydb.tests.tools.nemesis.library.tablet import KillDataShardNemesis
23+
from ydb.tests.tools.nemesis.library.tablet import KillTxAllocatorNemesis
24+
from ydb.tests.tools.nemesis.library.tablet import KillNodeBrokerNemesis
25+
from ydb.tests.tools.nemesis.library.tablet import KillBlocktoreVolume
26+
from ydb.tests.tools.nemesis.library.tablet import KillBlocktorePartition
27+
from ydb.tests.tools.nemesis.library.disk import data_storage_nemesis_list
28+
29+
30+
def is_first_cluster_node(cluster):
31+
if len(cluster.hostnames) > 0:
32+
return cluster.hostnames[0] == socket.gethostname().strip()
33+
return False
34+
35+
36+
def basic_kikimr_nemesis_list(
37+
cluster, num_of_pq_nemesis=10, network_nemesis=False,
38+
enable_nemesis_list_filter_by_hostname=False):
39+
harmful_nemesis_list = []
40+
harmful_nemesis_list.extend(data_storage_nemesis_list(cluster))
41+
harmful_nemesis_list.extend(nodes_nemesis_list(cluster))
42+
harmful_nemesis_list.extend(
43+
[
44+
KickTabletsFromNode(cluster),
45+
ReBalanceTabletsNemesis(cluster),
46+
]
47+
)
48+
49+
if network_nemesis:
50+
harmful_nemesis_list.append(
51+
NetworkNemesis(
52+
cluster,
53+
ssh_username=param_constants.ssh_username
54+
)
55+
)
56+
57+
light_nemesis_list = []
58+
light_nemesis_list.extend([
59+
KillCoordinatorNemesis(cluster),
60+
KillHiveNemesis(cluster),
61+
KillBsControllerNemesis(cluster),
62+
KillNodeBrokerNemesis(cluster),
63+
KillSchemeShardNemesis(cluster),
64+
KillMediatorNemesis(cluster),
65+
KillTxAllocatorNemesis(cluster),
66+
KillKeyValueNemesis(cluster),
67+
KillTenantSlotBrokerNemesis(cluster),
68+
])
69+
70+
light_nemesis_list.extend(change_tablet_group_nemesis_list(cluster))
71+
light_nemesis_list.extend([KillPersQueueNemesis(cluster) for _ in range(num_of_pq_nemesis)])
72+
light_nemesis_list.extend([KillDataShardNemesis(cluster) for _ in range(num_of_pq_nemesis)])
73+
light_nemesis_list.extend([KillBlocktoreVolume(cluster) for _ in range(num_of_pq_nemesis)])
74+
light_nemesis_list.extend([KillBlocktorePartition(cluster) for _ in range(num_of_pq_nemesis)])
75+
76+
nemesis_list = []
77+
if enable_nemesis_list_filter_by_hostname:
78+
hostnames = cluster.hostnames
79+
self_hostname = socket.gethostname()
80+
self_id = None
81+
82+
for host_id, hostname in enumerate(hostnames):
83+
if self_hostname == hostname:
84+
self_id = host_id
85+
86+
for nemesis_actor_id, nemesis_actor in enumerate(light_nemesis_list):
87+
if self_id is not None and nemesis_actor_id % len(hostnames) == self_id:
88+
nemesis_list.append(nemesis_actor)
89+
90+
if is_first_cluster_node(cluster):
91+
nemesis_list.extend(
92+
harmful_nemesis_list)
93+
94+
return nemesis_list
95+
nemesis_list.extend(light_nemesis_list)
96+
nemesis_list.extend(harmful_nemesis_list)
97+
return nemesis_list
98+
99+
100+
def nemesis_factory(kikimr_cluster, num_of_pq_nemesis=10, **kwargs):
101+
return NemesisProcess(basic_kikimr_nemesis_list(kikimr_cluster, num_of_pq_nemesis, **kwargs))

0 commit comments

Comments
 (0)