Skip to content

Support config v2 for nemesis and stress tool #20688

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 16 additions & 9 deletions ydb/tests/library/harness/kikimr_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,26 +29,33 @@ def kikimr_cluster_factory(configurator=None, config_path=None):
class ExternalKiKiMRCluster(KiKiMRClusterInterface):
def __init__(
self,
config_path,
cluster_template,
kikimr_configure_binary_path,
kikimr_path,
kikimr_next_path=None,
ssh_username=None,
deploy_cluster=False,
):
self.__config_path = config_path
with open(config_path, 'r') as r:
self.__yaml_config = yaml.safe_load(r.read())
yaml_config=None):
with open(cluster_template, 'r') as r:
self.__cluster_template = yaml.safe_load(r.read())
if yaml_config is not None:
with open(yaml_config, 'r') as r:
self.__yaml_config = yaml.safe_load(r.read())
self.__kikimr_configure_binary_path = kikimr_configure_binary_path
self.__hosts = [host.get('name', host.get('host')) for host in self.__yaml_config.get('hosts', [])]
self._slots = None
self.__kikimr_path = kikimr_path
self.__kikimr_next_path = kikimr_next_path
self.__ssh_username = ssh_username
self.__deploy_cluster = deploy_cluster
self.__slot_count = 0

for domain in self.__yaml_config['domains']:
if yaml_config is not None:
self.__hosts = [host.get('name', host.get('host')) for host in self.__yaml_config.get('config', {}).get('hosts')]
else:
# Backward compatibility for cluster_template
self.__hosts = [host.get('name', host.get('host')) for host in self.__cluster_template.get('hosts')]

self.__slot_count = 0
for domain in self.__cluster_template['domains']:
self.__slot_count = max(self.__slot_count, domain['dynamic_slots'])

super(ExternalKiKiMRCluster, self).__init__()
Expand Down Expand Up @@ -133,7 +140,7 @@ def _prepare_cluster(self):
self._run_on(
inst_set,
lambda x: x.prepare_artifacts(
self.__config_path
self.__cluster_template
)
)

Expand Down
20 changes: 11 additions & 9 deletions ydb/tests/library/harness/kikimr_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1014,8 +1014,9 @@ def switch_version(self):
self.update_binary_links()

def prepare_artifacts(self, cluster_yml):
self.copy_file_or_dir(
self.__kikimr_configure_binary_path, self.kikimr_configure_binary_deploy_path)
if self.__kikimr_configure_binary_path is not None:
self.copy_file_or_dir(
self.__kikimr_configure_binary_path, self.kikimr_configure_binary_deploy_path)

for version, local_driver in zip(self.versions, self.local_drivers_path):
self.ssh_command("sudo rm -rf %s" % version)
Expand All @@ -1025,14 +1026,15 @@ def prepare_artifacts(self, cluster_yml):
self.ssh_command("sudo /sbin/setcap 'CAP_SYS_RAWIO,CAP_SYS_NICE=ep' %s" % version)

self.update_binary_links()
self.ssh_command("sudo mkdir -p %s" % self.kikimr_configuration_deploy_path)
self.copy_file_or_dir(cluster_yml, self.kikimr_cluster_yaml_deploy_path)
self.ssh_command(self.__generate_configs_cmd())
self.ssh_command(
self.__generate_configs_cmd(
"--dynamic"
if self.__kikimr_configure_binary_path is not None:
self.ssh_command("sudo mkdir -p %s" % self.kikimr_configuration_deploy_path)
self.copy_file_or_dir(cluster_yml, self.kikimr_cluster_yaml_deploy_path)
self.ssh_command(self.__generate_configs_cmd())
self.ssh_command(
self.__generate_configs_cmd(
"--dynamic"
)
)
)

def format_pdisk(self, pdisk_id):
pass
Expand Down
62 changes: 46 additions & 16 deletions ydb/tests/stability/tool/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,13 +224,14 @@ class bcolors:


class StabilityCluster:
def __init__(self, ssh_username, cluster_path, ydbd_path=None, ydbd_next_path=None):
def __init__(self, ssh_username, cluster_path, ydbd_path=None, ydbd_next_path=None, yaml_config=None):
self.working_dir = os.path.join(tempfile.gettempdir(), "ydb_stability")
os.makedirs(self.working_dir, exist_ok=True)
self.ssh_username = ssh_username
self.slice_directory = cluster_path
self.ydbd_path = ydbd_path
self.ydbd_next_path = ydbd_next_path
self.yaml_config = yaml_config

self.artifacts = (
self._unpack_resource('nemesis'),
Expand All @@ -244,14 +245,24 @@ def __init__(self, ssh_username, cluster_path, ydbd_path=None, ydbd_next_path=No
self._unpack_resource('ydb_cli'),
)

self.kikimr_cluster = ExternalKiKiMRCluster(
config_path=self.slice_directory,
kikimr_configure_binary_path=self._unpack_resource("cfg"),
kikimr_path=self.ydbd_path,
kikimr_next_path=self.ydbd_next_path,
ssh_username=self.ssh_username,
deploy_cluster=True,
)
if self.yaml_config is None:
self.kikimr_cluster = ExternalKiKiMRCluster(
cluster_template=self.slice_directory,
kikimr_configure_binary_path=self._unpack_resource("cfg"),
kikimr_path=self.ydbd_path,
kikimr_next_path=self.ydbd_next_path,
ssh_username=self.ssh_username,
deploy_cluster=True,
)
else:
self.kikimr_cluster = ExternalKiKiMRCluster(
cluster_template=self.slice_directory,
kikimr_configure_binary_path=None,
kikimr_path=self.ydbd_path,
kikimr_next_path=self.ydbd_next_path,
ssh_username=self.ssh_username,
yaml_config=self.yaml_config,
)

def _unpack_resource(self, name):
res = resource.find(name)
Expand Down Expand Up @@ -401,7 +412,7 @@ def perform_checks(self):
print(f' {node}: {minidumps_search_results[node]}')

def start_nemesis(self):
self.prepare_cluster_yaml()
self.prepare_config_files()
with ThreadPoolExecutor() as pool:
pool.map(lambda node: node.ssh_command(DICT_OF_SERVICES['nemesis']['start_command'], raise_on_error=True), self.kikimr_cluster.nodes.values())

Expand Down Expand Up @@ -603,17 +614,27 @@ def deploy_node_tools(self, node):
)
node.ssh_command(f"sudo chmod 777 {node_artifact_path}", raise_on_error=False)

def prepare_cluster_yaml(self):
def prepare_config_files(self):
with ThreadPoolExecutor() as pool:
pool.map(lambda node: node.copy_file_or_dir(
self.slice_directory,
'/Berkanavt/kikimr/cfg/cluster.yaml'
), self.kikimr_cluster.nodes.values())
if self.yaml_config is None:
pool.map(lambda node: node.copy_file_or_dir(
self.slice_directory,
'/Berkanavt/kikimr/cfg/cluster.yaml'
), self.kikimr_cluster.nodes.values())
else:
pool.map(lambda node: node.copy_file_or_dir(
self.slice_directory,
'/Berkanavt/kikimr/cfg/databases.yaml'
), self.kikimr_cluster.nodes.values())
pool.map(lambda node: node.copy_file_or_dir(
self.yaml_config,
'/Berkanavt/kikimr/cfg/config.yaml'
), self.kikimr_cluster.nodes.values())

def deploy_tools(self):
with ThreadPoolExecutor(len(self.kikimr_cluster.nodes)) as pool:
pool.map(self.deploy_node_tools, self.kikimr_cluster.nodes.values())
self.prepare_cluster_yaml()
self.prepare_config_files()

def get_workload_outputs(self, mode='err', last_n_lines=10):
"""Capture last N lines of output from all running workload screens."""
Expand Down Expand Up @@ -1040,6 +1061,13 @@ def parse_args():
type=path_type,
help="Path to next ydbd version binary (for cross-version testing)",
)
parser.add_argument(
"--yaml-config",
required=False,
default=None,
type=path_type,
help="Path to Yandex DB configuration v2",
)
parser.add_argument(
"--ssh_user",
required=False,
Expand Down Expand Up @@ -1184,12 +1212,14 @@ def parse_args():
def main():
args = parse_args()
ssh_username = args.ssh_user
yaml_config = args.yaml_config
print('Initing cluster info')
stability_cluster = StabilityCluster(
ssh_username=ssh_username,
cluster_path=args.cluster_path,
ydbd_path=args.ydbd_path,
ydbd_next_path=args.next_ydbd_path,
yaml_config=yaml_config,
)

for action in args.actions:
Expand Down
37 changes: 26 additions & 11 deletions ydb/tests/tools/nemesis/driver/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,25 +128,40 @@ def __exit__(self, exc_type, exc_val, exc_tb):
def nemesis_logic(arguments):
logging.config.dictConfig(setup_logging_config(arguments.log_file))
ssh_username = os.getenv('NEMESIS_USER', 'robot-nemesis')
nemesis = catalog.nemesis_factory(
ExternalKiKiMRCluster(
arguments.ydb_cluster_template,
kikimr_configure_binary_path=None,
kikimr_path=arguments.ydb_binary_path,
yaml_config = arguments.yaml_config
if yaml_config is not None:
nemesis = catalog.nemesis_factory(
ExternalKiKiMRCluster(
cluster_template=arguments.ydb_cluster_template,
kikimr_configure_binary_path=None,
kikimr_path=arguments.ydb_binary_path,
ssh_username=ssh_username,
yaml_config=yaml_config,
),
ssh_username=ssh_username,
),
ssh_username=ssh_username,
enable_nemesis_list_filter_by_hostname=arguments.enable_nemesis_list_filter_by_hostname,
)
enable_nemesis_list_filter_by_hostname=arguments.enable_nemesis_list_filter_by_hostname,
)
else:
nemesis = catalog.nemesis_factory(
ExternalKiKiMRCluster(
cluster_template=arguments.ydb_cluster_template,
kikimr_configure_binary_path=None,
kikimr_path=arguments.ydb_binary_path,
ssh_username=ssh_username,
),
ssh_username=ssh_username,
enable_nemesis_list_filter_by_hostname=arguments.enable_nemesis_list_filter_by_hostname,
)
nemesis.start()
monitor.setup_page(arguments.mon_host, arguments.mon_port)
nemesis.stop()


def main():
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('--ydb-cluster-template', required=True, help='Path to the Yandex DB cluster template')
parser.add_argument('--ydb-binary-path', required=True, help='Path to the Yandex DB binary')
parser.add_argument('--ydb-cluster-template', required=True, help='Path to the YDB cluster template')
parser.add_argument('--ydb-binary-path', required=True, help='Path to the YDB binary')
parser.add_argument('--yaml-config', required=False, default=None, help='Path to the YDB configuration v2')
parser.add_argument('--private-key-file', default='')
parser.add_argument('--log-file', default=None)
parser.add_argument('--mon-port', default=8666, type=lambda x: int(x))
Expand Down
Loading