Skip to content

Commit 1f633c9

Browse files
authored
Improve in-test tpc benchs (#9124)
1 parent 5209b75 commit 1f633c9

File tree

3 files changed

+64
-24
lines changed

3 files changed

+64
-24
lines changed

ydb/library/benchmarks/runner/run_tests/run_tests.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ def parse_args():
2323
subparser.add_argument('--datasize', type=int, default=1)
2424
subparser.add_argument('--variant', type=variant, default='h')
2525
subparser.add_argument('--tasks', type=int, default=1)
26+
subparser.add_argument('--perf', action="store_true", default=False)
2627

2728
subparser.add_argument('-o', '--output', default="./results")
2829
subparser.add_argument('--clean-old', action="store_true", default=False)
@@ -49,17 +50,25 @@ def parse_args():
4950

5051
parser.add_argument('--ydb-root', type=lambda path: pathlib.Path(path).resolve(), default="../../../../")
5152

52-
args = parser.parse_args(argv, namespace=args)
53+
args, argv = parser.parse_known_args(argv, namespace=args)
5354

54-
args.dqrun = args.ydb_root / "ydb" / "library" / "yql" / "tools" / "dqrun" / "dqrun"
5555
args.gen_queries = args.ydb_root / "ydb" / "library" / "benchmarks" / "gen_queries" / "gen_queries"
5656
args.downloaders_dir = args.ydb_root / "ydb" / "library" / "benchmarks" / "runner"
57-
args.fs_cfg = args.ydb_root / "ydb" / "library" / "yql" / "tools" / "dqrun" / "examples" / "fs.conf"
5857
args.flame_graph = args.ydb_root / "contrib" / "tools" / "flame-graph"
5958
args.result_compare = args.ydb_root / "ydb" / "library" / "benchmarks" / "runner" / "result_compare" / "result_compare"
60-
args.gateways_cfg = args.ydb_root / "ydb" / "library" / "benchmarks" / "runner" / "runner" / "test-gateways.conf"
6159
args.runner_path = args.ydb_root / "ydb" / "library" / "benchmarks" / "runner" / "runner" / "runner"
6260

61+
def_dqrun = args.ydb_root / "ydb" / "library" / "yql" / "tools" / "dqrun" / "dqrun"
62+
def_fs_cfg = args.ydb_root / "ydb" / "library" / "yql" / "tools" / "dqrun" / "examples" / "fs.conf"
63+
def_gateways_cfg = args.ydb_root / "ydb" / "library" / "benchmarks" / "runner" / "runner" / "test-gateways.conf"
64+
65+
override_parser = argparse.ArgumentParser()
66+
override_parser.add_argument('--dqrun', type=pathlib.Path, default=def_dqrun)
67+
override_parser.add_argument('--fs-cfg', type=pathlib.Path, default=def_fs_cfg)
68+
override_parser.add_argument('--gateways-cfg', type=pathlib.Path, default=def_gateways_cfg)
69+
70+
args = override_parser.parse_args(argv, namespace=args)
71+
6372
udfs_prefix = args.ydb_root / "ydb" / "library" / "yql" / "udfs" / "common"
6473
args.udfs_dir = [udfs_prefix / name for name in ["set", "url_base", "datetime2", "re2", "math", "unicode_base"]]
6574

@@ -102,7 +111,7 @@ def __init__(self, args, enable_spilling):
102111
"dq.EnableSpillingNodes=All",
103112
] if self.enable_spilling else [])
104113
self.tpc_dir = pathlib.Path(f"{self.args.downloaders_dir}/tpc/{self.args.variant}/{self.args.datasize}")
105-
if self.args.clean_old or not self.tpc_dir.exists():
114+
if not self.tpc_dir.exists():
106115
self.prepare_tpc_dir()
107116
if not pathlib.Path("./tpc").exists():
108117
os.symlink(f"{self.args.downloaders_dir}/tpc", f"{pathlib.Path("./tpc")}", target_is_directory=True)
@@ -112,7 +121,7 @@ def __init__(self, args, enable_spilling):
112121

113122
def run(self):
114123
cmd = ["/usr/bin/time", f"{str(self.args.runner_path)}"]
115-
# cmd += ["--perf"]
124+
cmd += ["--perf"] if self.args.perf else []
116125
for it in self.args.query_filter:
117126
cmd += ["--include-q", it]
118127
cmd += ["--query-dir", f"{str(self.queries_dir)}/{self.args.variant}"]

ydb/library/benchmarks/runner/tpc_tests.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,23 +58,28 @@ def wrapped_run(self, variant, datasize, tasks, query_filter):
5858
yatest.common.execute(cmd, stdout=sys.stdout, stderr=sys.stderr)
5959

6060

61-
def upload(result_path, s3_folder):
61+
def upload(result_path, s3_folder, try_num):
6262
uploader = pathlib.Path(yatest.common.source_path("ydb/library/benchmarks/runner/upload_results.py")).resolve()
6363
cmd = ["python3", str(uploader)]
6464
cmd += ["--result-path", str(result_path)]
6565
cmd += ["--s3-folder", str(s3_folder)]
66+
cmd += ["--try-num", str(try_num)] if try_num else []
6667
yatest.common.execute(cmd, stdout=sys.stdout, stderr=sys.stderr)
6768

6869

6970
def test_tpc():
70-
is_ci = os.environ.get("PUBLIC_DIR") is not None
71+
is_ci = os.environ.get("CURRENT_PUBLIC_DIR") is not None
7172

7273
runner = Runner()
7374
runner.wrapped_run("h", 1, 1, None)
7475
result_path = runner.results_path.resolve()
7576
print("Results path: ", result_path, file=sys.stderr)
7677

7778
if is_ci:
78-
s3_folder = pathlib.Path(os.environ["PUBLIC_DIR"]).resolve()
79+
s3_folder = pathlib.Path(os.environ["CURRENT_PUBLIC_DIR"]).resolve()
80+
try:
81+
try_num = int(s3_folder.name.split("try_")[-1])
82+
except Exception:
83+
try_num = None
7984

80-
upload(result_path, s3_folder)
85+
upload(result_path, s3_folder, try_num)

ydb/library/benchmarks/runner/upload_results.py

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@ def __init__(self):
3535
self.user_time_ms = None
3636
self.system_time = None
3737
self.rss = None
38-
self.output_hash = None
38+
self.result_hash = None
39+
self.stdout_file_path = None
40+
self.stderr_file_path = None
3941
self.perf_file_path = None
4042

4143
def from_json(self, json):
@@ -64,6 +66,8 @@ def pretty_print(value):
6466
return f"Unwrap(DateTime::FromSeconds({int(delt.total_seconds())}))"
6567
if type(value) == datetime.timedelta:
6668
return f"DateTime::IntervalFromMicroseconds({int(value / datetime.timedelta(microseconds=1))})"
69+
if isinstance(value, pathlib.Path):
70+
return f'\"{value}\"'
6771
if type(value) == str:
6872
return f'\"{value}\"'
6973
if type(value) in [int, float]:
@@ -74,7 +78,21 @@ def pretty_print(value):
7478
assert False, f"unrecognized type: {type(value)}"
7579

7680

77-
def upload_results(result_path, s3_folder, test_start):
81+
def upload_file_to_s3(s3_folder, result_path, file):
82+
# copying files to folder that will be synced with s3
83+
dst = file.relative_to(result_path)
84+
s3_file = (s3_folder / dst).resolve()
85+
s3_file.parent.mkdir(parents=True, exist_ok=True)
86+
_ = shutil.copy2(str(file.resolve()), str(s3_file))
87+
return dst
88+
89+
90+
def upload_results(result_path, s3_folder, test_start, try_num):
91+
def add_try_num_to_path(path):
92+
if try_num:
93+
path = f"try_{try_num}" / path
94+
return path
95+
7896
results_map = {}
7997
for entry in result_path.glob("*/*"):
8098
if not entry.is_dir():
@@ -98,17 +116,22 @@ def upload_results(result_path, s3_folder, test_start):
98116
if query_num not in this_result:
99117
this_result[query_num] = RunResults()
100118

119+
# q<num>.svg
101120
if file.suffix == ".svg":
102-
dst = file.relative_to(result_path)
103-
this_result[query_num].perf_file_path = dst
104-
# copying files to folder that will be synced with s3
105-
dst = (s3_folder / dst).resolve()
106-
dst.parent.mkdir(parents=True, exist_ok=True)
107-
_ = shutil.copy2(str(file.resolve()), str(dst))
121+
this_result[query_num].perf_file_path = add_try_num_to_path(upload_file_to_s3(s3_folder, result_path, file))
122+
123+
# q<num>-result.yson
124+
if file.stem == f"q{query_num}-result":
125+
with open(file, "r") as result:
126+
this_result[query_num].result_hash = str(hash(result.read().strip()))
127+
108128
# q<num>-stdout.txt
109129
if file.stem == f"q{query_num}-stdout":
110-
with open(file, "r") as stdout:
111-
this_result[query_num].output_hash = str(hash(stdout.read().strip()))
130+
this_result[query_num].stdout_file_path = add_try_num_to_path(upload_file_to_s3(s3_folder, result_path, file))
131+
132+
# q<num>-stderr.txt
133+
if file.stem == f"q{query_num}-stderr":
134+
this_result[query_num].stderr_file_path = add_try_num_to_path(upload_file_to_s3(s3_folder, result_path, file))
112135

113136
summary_file = entry / "summary.json"
114137

@@ -144,12 +167,14 @@ def upload_results(result_path, s3_folder, test_start):
144167
"WasSpillingInJoin" : None,
145168
"WasSpillingInChannels" : None,
146169
"MaxTasksPerStage" : params.tasks,
147-
"PerfFileLink" : results.perf_file_path,
148170
"ExitCode" : results.exitcode,
149-
"ResultHash" : results.output_hash,
171+
"ResultHash" : results.result_hash,
150172
"SpilledBytes" : results.read_bytes,
151173
"UserTime" : results.user_time,
152-
"SystemTime" : results.system_time
174+
"SystemTime" : results.system_time,
175+
"StdoutFileLink" : results.stdout_file_path,
176+
"StderrFileLink" : results.stderr_file_path,
177+
"PerfFileLink" : results.perf_file_path
153178
}
154179
sql = 'UPSERT INTO `perfomance/olap/dq_spilling_nightly_runs`\n\t({columns})\nVALUES\n\t({values})'.format(
155180
columns=", ".join(map(str, mapping.keys())),
@@ -164,14 +189,15 @@ def main():
164189

165190
parser.add_argument("--result-path", type=pathlib.Path)
166191
parser.add_argument("--s3-folder", type=pathlib.Path)
192+
parser.add_argument("--try-num", default=None)
167193

168194
args = parser.parse_args()
169195

170196
if "CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS" not in os.environ:
171197
raise AttributeError("Env variable CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS is missing, skipping uploading")
172198
os.environ["YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS"] = os.environ["CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS"]
173199

174-
upload_results(args.result_path, args.s3_folder, upload_time)
200+
upload_results(args.result_path, args.s3_folder, upload_time, args.try_num)
175201

176202

177203
if __name__ == "__main__":

0 commit comments

Comments
 (0)