Skip to content

Commit 1994c6f

Browse files
Gymlib consolidation (#68)
**Summary**: moved all necessary files into gymlib. **Demo**: <img width="343" alt="Screenshot 2024-12-30 at 17 24 45" src="https://github.com/user-attachments/assets/8cd953f2-28e7-458c-89cc-1a21bd1bb8a5" /> **Details**: * `gymlib` is independent import-wise. Nothing in `gymlib` imports anything outside of it. * `gymlib` is **not fully independent** though. `gymlib` relies on the DBMS and benchmark already being set up by the outer `dbgym`. In fact, some of the tests inside gymlib will call `task.py`. * `_run_tests.py` will discover and run `gymlib` tests from the base `dbgym` repo so do keep that in mind.
1 parent 34a47c0 commit 1994c6f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+414
-519
lines changed

benchmark/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import click
2+
from gymlib.workspace import DBGymWorkspace
23

34
from benchmark.job.cli import job_group
45
from benchmark.tpch.cli import tpch_group
5-
from util.workspace import DBGymWorkspace
66

77

88
@click.group(name="benchmark")

benchmark/job/cli.py

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,15 @@
22
from typing import Optional
33

44
import click
5-
from gymlib.symlinks_paths import (
5+
from gymlib.infra_paths import (
66
get_tables_dirname,
77
get_workload_dirname,
88
get_workload_suffix,
9-
name_to_linkname,
109
)
10+
from gymlib.workspace import DBGymWorkspace, fully_resolve_path, name_to_linkname
1111

1212
from benchmark.constants import DEFAULT_SCALE_FACTOR
13-
from util.log import DBGYM_LOGGER_NAME
1413
from util.shell import subprocess_run
15-
from util.workspace import DBGymWorkspace, fully_resolve_path
1614

1715
JOB_TABLES_URL = "https://event.cwi.nl/da/job/imdb.tgz"
1816
JOB_QUERIES_URL = "https://event.cwi.nl/da/job/job.tgz"
@@ -213,12 +211,10 @@ def _download_and_untar_dir(
213211
dbgym_workspace.dbgym_cur_symlinks_path / f"{untarred_dname}.link"
214212
)
215213
if expected_symlink_path.exists():
216-
logging.getLogger(DBGYM_LOGGER_NAME).info(
217-
f"Skipping download: {expected_symlink_path}"
218-
)
214+
logging.info(f"Skipping download: {expected_symlink_path}")
219215
return
220216

221-
logging.getLogger(DBGYM_LOGGER_NAME).info(f"Downloading: {expected_symlink_path}")
217+
logging.info(f"Downloading: {expected_symlink_path}")
222218
subprocess_run(f"curl -O {download_url}", cwd=dbgym_workspace.dbgym_this_run_path)
223219
untarred_data_path = dbgym_workspace.dbgym_this_run_path / untarred_dname
224220

@@ -243,7 +239,7 @@ def _download_and_untar_dir(
243239
)
244240
symlink_path = dbgym_workspace.link_result(untarred_data_path)
245241
assert expected_symlink_path.samefile(symlink_path)
246-
logging.getLogger(DBGYM_LOGGER_NAME).info(f"Downloaded: {expected_symlink_path}")
242+
logging.info(f"Downloaded: {expected_symlink_path}")
247243

248244

249245
def _generate_job_workload(
@@ -259,14 +255,10 @@ def _generate_job_workload(
259255
name_to_linkname(workload_name)
260256
)
261257
if expected_workload_symlink_path.exists():
262-
logging.getLogger(DBGYM_LOGGER_NAME).info(
263-
f"Skipping generation: {expected_workload_symlink_path}"
264-
)
258+
logging.info(f"Skipping generation: {expected_workload_symlink_path}")
265259
return
266260

267-
logging.getLogger(DBGYM_LOGGER_NAME).info(
268-
f"Generating: {expected_workload_symlink_path}"
269-
)
261+
logging.info(f"Generating: {expected_workload_symlink_path}")
270262
workload_path = dbgym_workspace.dbgym_this_run_path / workload_name
271263
workload_path.mkdir(parents=False, exist_ok=False)
272264

@@ -291,6 +283,4 @@ def _generate_job_workload(
291283

292284
workload_symlink_path = dbgym_workspace.link_result(workload_path)
293285
assert workload_symlink_path == expected_workload_symlink_path
294-
logging.getLogger(DBGYM_LOGGER_NAME).info(
295-
f"Generated: {expected_workload_symlink_path}"
296-
)
286+
logging.info(f"Generated: {expected_workload_symlink_path}")

benchmark/job/load_info.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from pathlib import Path
22
from typing import Optional
33

4-
from gymlib.symlinks_paths import get_tables_symlink_path
4+
from gymlib.infra_paths import get_tables_symlink_path
5+
from gymlib.workspace import DBGymWorkspace, fully_resolve_path
56

67
from benchmark.constants import DEFAULT_SCALE_FACTOR
78
from dbms.load_info_base_class import LoadInfoBaseClass
8-
from util.workspace import DBGymWorkspace, fully_resolve_path
99

1010
JOB_SCHEMA_FNAME = "job_schema.sql"
1111

benchmark/tests/integtest_benchmark.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,22 @@
22
import unittest
33
from pathlib import Path
44

5-
from gymlib.symlinks_paths import (
5+
from gymlib.infra_paths import (
66
get_tables_symlink_path,
77
get_workload_suffix,
88
get_workload_symlink_path,
99
)
10+
from gymlib.workspace import (
11+
DBGymWorkspace,
12+
fully_resolve_path,
13+
get_workspace_path_from_config,
14+
)
1015

1116
# It's ok to import private functions from the benchmark module because this is an integration test.
1217
from benchmark.constants import DEFAULT_SCALE_FACTOR
1318
from benchmark.job.cli import _job_tables, _job_workload
1419
from benchmark.tpch.cli import _tpch_tables, _tpch_workload
1520
from benchmark.tpch.constants import DEFAULT_TPCH_SEED
16-
from util.workspace import (
17-
DBGymWorkspace,
18-
fully_resolve_path,
19-
get_workspace_path_from_config,
20-
)
2121

2222

2323
class BenchmarkTests(unittest.TestCase):

benchmark/tpch/cli.py

Lines changed: 17 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,24 @@
11
import logging
22

33
import click
4-
from gymlib.symlinks_paths import (
4+
from gymlib.infra_paths import (
55
get_scale_factor_string,
66
get_tables_dirname,
77
get_tables_symlink_path,
88
get_workload_suffix,
99
get_workload_symlink_path,
10+
)
11+
from gymlib.workspace import (
12+
DBGymWorkspace,
13+
fully_resolve_path,
14+
is_fully_resolved,
1015
linkname_to_name,
1116
name_to_linkname,
1217
)
1318

1419
from benchmark.constants import DEFAULT_SCALE_FACTOR
1520
from benchmark.tpch.constants import DEFAULT_TPCH_SEED, NUM_TPCH_QUERIES
16-
from util.log import DBGYM_LOGGER_NAME
1721
from util.shell import subprocess_run
18-
from util.workspace import DBGymWorkspace, fully_resolve_path, is_fully_resolved
1922

2023
TPCH_KIT_DIRNAME = "tpch-kit"
2124

@@ -102,12 +105,10 @@ def _clone_tpch_kit(dbgym_workspace: DBGymWorkspace) -> None:
102105
name_to_linkname(TPCH_KIT_DIRNAME)
103106
)
104107
if expected_symlink_path.exists():
105-
logging.getLogger(DBGYM_LOGGER_NAME).info(
106-
f"Skipping clone: {expected_symlink_path}"
107-
)
108+
logging.info(f"Skipping clone: {expected_symlink_path}")
108109
return
109110

110-
logging.getLogger(DBGYM_LOGGER_NAME).info(f"Cloning: {expected_symlink_path}")
111+
logging.info(f"Cloning: {expected_symlink_path}")
111112
subprocess_run(
112113
f"./clone_tpch_kit.sh {dbgym_workspace.dbgym_this_run_path}",
113114
cwd=dbgym_workspace.base_dbgym_repo_path / "benchmark" / "tpch",
@@ -116,7 +117,7 @@ def _clone_tpch_kit(dbgym_workspace: DBGymWorkspace) -> None:
116117
dbgym_workspace.dbgym_this_run_path / TPCH_KIT_DIRNAME
117118
)
118119
assert expected_symlink_path.samefile(symlink_path)
119-
logging.getLogger(DBGYM_LOGGER_NAME).info(f"Cloned: {expected_symlink_path}")
120+
logging.info(f"Cloned: {expected_symlink_path}")
120121

121122

122123
def _generate_tpch_queries(
@@ -125,9 +126,7 @@ def _generate_tpch_queries(
125126
tpch_kit_path = dbgym_workspace.dbgym_cur_symlinks_path / (
126127
name_to_linkname(TPCH_KIT_DIRNAME)
127128
)
128-
logging.getLogger(DBGYM_LOGGER_NAME).info(
129-
f"Generating queries: [{seed_start}, {seed_end}]"
130-
)
129+
logging.info(f"Generating queries: [{seed_start}, {seed_end}]")
131130
for seed in range(seed_start, seed_end + 1):
132131
expected_queries_symlink_path = dbgym_workspace.dbgym_cur_symlinks_path / (
133132
name_to_linkname(_get_queries_dirname(seed, scale_factor))
@@ -149,9 +148,7 @@ def _generate_tpch_queries(
149148
)
150149
queries_symlink_path = dbgym_workspace.link_result(queries_parent_path)
151150
assert queries_symlink_path.samefile(expected_queries_symlink_path)
152-
logging.getLogger(DBGYM_LOGGER_NAME).info(
153-
f"Generated queries: [{seed_start}, {seed_end}]"
154-
)
151+
logging.info(f"Generated queries: [{seed_start}, {seed_end}]")
155152

156153

157154
def _generate_tpch_tables(dbgym_workspace: DBGymWorkspace, scale_factor: float) -> None:
@@ -162,14 +159,10 @@ def _generate_tpch_tables(dbgym_workspace: DBGymWorkspace, scale_factor: float)
162159
dbgym_workspace.dbgym_workspace_path, "tpch", scale_factor
163160
)
164161
if expected_tables_symlink_path.exists():
165-
logging.getLogger(DBGYM_LOGGER_NAME).info(
166-
f"Skipping generation: {expected_tables_symlink_path}"
167-
)
162+
logging.info(f"Skipping generation: {expected_tables_symlink_path}")
168163
return
169164

170-
logging.getLogger(DBGYM_LOGGER_NAME).info(
171-
f"Generating: {expected_tables_symlink_path}"
172-
)
165+
logging.info(f"Generating: {expected_tables_symlink_path}")
173166
subprocess_run(f"./dbgen -vf -s {scale_factor}", cwd=tpch_kit_path / "dbgen")
174167
tables_parent_path = dbgym_workspace.dbgym_this_run_path / get_tables_dirname(
175168
"tpch", scale_factor
@@ -179,9 +172,7 @@ def _generate_tpch_tables(dbgym_workspace: DBGymWorkspace, scale_factor: float)
179172

180173
tables_symlink_path = dbgym_workspace.link_result(tables_parent_path)
181174
assert tables_symlink_path.samefile(expected_tables_symlink_path)
182-
logging.getLogger(DBGYM_LOGGER_NAME).info(
183-
f"Generated: {expected_tables_symlink_path}"
184-
)
175+
logging.info(f"Generated: {expected_tables_symlink_path}")
185176

186177

187178
def _generate_tpch_workload(
@@ -200,14 +191,10 @@ def _generate_tpch_workload(
200191
),
201192
)
202193
if expected_workload_symlink_path.exists():
203-
logging.getLogger(DBGYM_LOGGER_NAME).info(
204-
f"Skipping generation: {expected_workload_symlink_path}"
205-
)
194+
logging.info(f"Skipping generation: {expected_workload_symlink_path}")
206195
return
207196

208-
logging.getLogger(DBGYM_LOGGER_NAME).info(
209-
f"Generating: {expected_workload_symlink_path}"
210-
)
197+
logging.info(f"Generating: {expected_workload_symlink_path}")
211198
workload_path = dbgym_workspace.dbgym_this_run_path / linkname_to_name(
212199
expected_workload_symlink_path.name
213200
)
@@ -238,6 +225,4 @@ def _generate_tpch_workload(
238225

239226
workload_symlink_path = dbgym_workspace.link_result(workload_path)
240227
assert workload_symlink_path == expected_workload_symlink_path
241-
logging.getLogger(DBGYM_LOGGER_NAME).info(
242-
f"Generated: {expected_workload_symlink_path}"
243-
)
228+
logging.info(f"Generated: {expected_workload_symlink_path}")

benchmark/tpch/load_info.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
from pathlib import Path
22
from typing import Optional
33

4-
from gymlib.symlinks_paths import get_tables_symlink_path
4+
from gymlib.infra_paths import get_tables_symlink_path
5+
from gymlib.workspace import DBGymWorkspace, fully_resolve_path
56

67
from dbms.load_info_base_class import LoadInfoBaseClass
7-
from util.workspace import DBGymWorkspace, fully_resolve_path
88

99
TPCH_SCHEMA_FNAME = "tpch_schema.sql"
1010
TPCH_CONSTRAINTS_FNAME = "tpch_constraints.sql"

dbms/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import click
2+
from gymlib.workspace import DBGymWorkspace
23

34
from dbms.postgres.cli import postgres_group
4-
from util.workspace import DBGymWorkspace
55

66

77
@click.group(name="dbms")

dbms/postgres/cli.py

Lines changed: 30 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -6,42 +6,39 @@
66
import shutil
77
import subprocess
88
from pathlib import Path
9-
from typing import Optional
9+
from typing import Any, Optional
1010

1111
import click
1212
import sqlalchemy
13-
from gymlib.symlinks_paths import (
13+
from gymlib.infra_paths import (
1414
get_dbdata_tgz_symlink_path,
1515
get_pgbin_symlink_path,
1616
get_repo_symlink_path,
17-
linkname_to_name,
18-
)
19-
20-
from benchmark.constants import DEFAULT_SCALE_FACTOR
21-
from benchmark.job.load_info import JobLoadInfo
22-
from benchmark.tpch.load_info import TpchLoadInfo
23-
from dbms.load_info_base_class import LoadInfoBaseClass
24-
from util.log import DBGYM_LOGGER_NAME
25-
from util.pg import (
26-
DBGYM_POSTGRES_DBNAME,
27-
DBGYM_POSTGRES_PASS,
28-
DBGYM_POSTGRES_USER,
29-
DEFAULT_POSTGRES_DBNAME,
30-
DEFAULT_POSTGRES_PORT,
31-
SHARED_PRELOAD_LIBRARIES,
32-
create_sqlalchemy_conn,
33-
sql_file_execute,
34-
sqlalchemy_conn_execute,
3517
)
36-
from util.shell import subprocess_run
37-
from util.workspace import (
18+
from gymlib.pg import create_sqlalchemy_conn, sql_file_execute
19+
from gymlib.workspace import (
3820
WORKSPACE_PATH_PLACEHOLDER,
3921
DBGymWorkspace,
4022
fully_resolve_path,
4123
get_tmp_path_from_workspace_path,
4224
is_fully_resolved,
4325
is_ssd,
26+
linkname_to_name,
4427
)
28+
from sqlalchemy import text
29+
30+
from benchmark.constants import DEFAULT_SCALE_FACTOR
31+
from benchmark.job.load_info import JobLoadInfo
32+
from benchmark.tpch.load_info import TpchLoadInfo
33+
from dbms.load_info_base_class import LoadInfoBaseClass
34+
from util.shell import subprocess_run
35+
36+
DBGYM_POSTGRES_USER = "dbgym_user"
37+
DBGYM_POSTGRES_PASS = "dbgym_pass"
38+
DBGYM_POSTGRES_DBNAME = "dbgym"
39+
DEFAULT_POSTGRES_DBNAME = "postgres"
40+
DEFAULT_POSTGRES_PORT = 5432
41+
SHARED_PRELOAD_LIBRARIES = "boot,pg_hint_plan,pg_prewarm"
4542

4643

4744
@click.group(name="postgres")
@@ -72,14 +69,10 @@ def _postgres_build(dbgym_workspace: DBGymWorkspace, rebuild: bool) -> None:
7269
dbgym_workspace.dbgym_workspace_path
7370
)
7471
if not rebuild and expected_repo_symlink_path.exists():
75-
logging.getLogger(DBGYM_LOGGER_NAME).info(
76-
f"Skipping _postgres_build: {expected_repo_symlink_path}"
77-
)
72+
logging.info(f"Skipping _postgres_build: {expected_repo_symlink_path}")
7873
return
7974

80-
logging.getLogger(DBGYM_LOGGER_NAME).info(
81-
f"Setting up repo in {expected_repo_symlink_path}"
82-
)
75+
logging.info(f"Setting up repo in {expected_repo_symlink_path}")
8376
repo_real_path = dbgym_workspace.dbgym_this_run_path / "repo"
8477
repo_real_path.mkdir(parents=False, exist_ok=False)
8578
subprocess_run(
@@ -90,9 +83,7 @@ def _postgres_build(dbgym_workspace: DBGymWorkspace, rebuild: bool) -> None:
9083
# only link at the end so that the link only ever points to a complete repo
9184
repo_symlink_path = dbgym_workspace.link_result(repo_real_path)
9285
assert expected_repo_symlink_path.samefile(repo_symlink_path)
93-
logging.getLogger(DBGYM_LOGGER_NAME).info(
94-
f"Set up repo in {expected_repo_symlink_path}"
95-
)
86+
logging.info(f"Set up repo in {expected_repo_symlink_path}")
9687

9788

9889
@postgres_group.command(
@@ -198,9 +189,7 @@ def _create_dbdata(
198189
scale_factor,
199190
)
200191
if expected_dbdata_tgz_symlink_path.exists():
201-
logging.getLogger(DBGYM_LOGGER_NAME).info(
202-
f"Skipping _create_dbdata: {expected_dbdata_tgz_symlink_path}"
203-
)
192+
logging.info(f"Skipping _create_dbdata: {expected_dbdata_tgz_symlink_path}")
204193
return
205194

206195
# It's ok for the dbdata/ directory to be temporary. It just matters that the .tgz is saved in a safe place.
@@ -236,9 +225,7 @@ def _create_dbdata(
236225
# Only link at the end so that the link only ever points to a complete dbdata.
237226
dbdata_tgz_symlink_path = dbgym_workspace.link_result(dbdata_tgz_real_path)
238227
assert expected_dbdata_tgz_symlink_path.samefile(dbdata_tgz_symlink_path)
239-
logging.getLogger(DBGYM_LOGGER_NAME).info(
240-
f"Created dbdata in {dbdata_tgz_symlink_path}"
241-
)
228+
logging.info(f"Created dbdata in {dbdata_tgz_symlink_path}")
242229

243230

244231
def _generic_dbdata_setup(dbgym_workspace: DBGymWorkspace) -> None:
@@ -370,3 +357,9 @@ def _start_or_stop_postgres(
370357
f"./pg_ctl -D \"{dbdata_path}\" -o '-p {pgport}' stop",
371358
cwd=pgbin_path,
372359
)
360+
361+
362+
def sqlalchemy_conn_execute(
363+
conn: sqlalchemy.Connection, sql: str
364+
) -> sqlalchemy.engine.CursorResult[Any]:
365+
return conn.execute(text(sql))

0 commit comments

Comments
 (0)