Skip to content

Integration test CI #37

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 31 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
5e9e19e
added integration test to ci
wangpatrick57 Jul 18, 2024
d7b83d8
fixed bug of result_path var not existing
wangpatrick57 Jul 18, 2024
a472bd3
fixed type is Path checks to use isintance
wangpatrick57 Jul 18, 2024
d252e41
fixed bug where var wasn't path
wangpatrick57 Jul 18, 2024
62516b4
replaced most uses of os.path.samefile with Path.samefile
wangpatrick57 Jul 18, 2024
6889d1e
fixed bug where link_result didn't add .link to the end of names
wangpatrick57 Jul 18, 2024
42b5727
fixed bug where save_file() was still taking in regular paths
wangpatrick57 Jul 18, 2024
7d1cab4
changed save_file() to what it used to be
wangpatrick57 Jul 18, 2024
837fadc
using parent_dpath_of_path and basename_of_path instead of os.path fu…
wangpatrick57 Jul 18, 2024
05a9497
fixed comment
wangpatrick57 Jul 18, 2024
7dd2bd5
fixed bug of putting dbdata in dbgym_tmp_path during embedding datagen
wangpatrick57 Jul 19, 2024
5b98b6e
fixed bug where we were initializing ray with the boot redis port
wangpatrick57 Jul 19, 2024
979f8ab
moved stuff to integration_test.sh
wangpatrick57 Jul 19, 2024
ca7dc7d
added integtest with its own config
wangpatrick57 Jul 23, 2024
9dddf56
made integration_test.sh export the config path envvar
wangpatrick57 Jul 24, 2024
1ae656f
changed ssh links to http
wangpatrick57 Jul 24, 2024
66ae6dd
testing cargo in gha
wangpatrick57 Jul 24, 2024
c967df7
. cargo env at start of integtest step
wangpatrick57 Jul 24, 2024
72807e3
made ssh into https link
wangpatrick57 Jul 24, 2024
43d4125
added intended dbdata hardware to integtest.sh
wangpatrick57 Jul 24, 2024
dfc521b
now switching to phw2
wangpatrick57 Jul 24, 2024
54053c4
viewing users
wangpatrick57 Jul 30, 2024
91dea10
runner instead of phw2
wangpatrick57 Jul 30, 2024
822c446
whoami
wangpatrick57 Jul 30, 2024
d2f2d29
using sudo -u instead of su to switch to runner
wangpatrick57 Jul 30, 2024
71053ec
del whoami and sudo -u. tried changing user in docker compose itself
wangpatrick57 Jul 30, 2024
de2ac2b
rerun
wangpatrick57 Jul 30, 2024
c8321a6
rerun2
wangpatrick57 Aug 5, 2024
6ab4239
rerun
wangpatrick57 Aug 5, 2024
e845ab9
rerun2
wangpatrick57 Aug 5, 2024
657c870
cleanup
wangpatrick57 Aug 5, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Unit Tests
name: Unit and Integration Tests

on:
push: {}
Expand Down Expand Up @@ -26,10 +26,22 @@ jobs:
# Note that the GHA runners are stateful. Dependencies installed from previous runs will still be on the runner.
# This means this step will usually be pretty fast as most dependencies will already be cached. However, it also
# means that past runs might interfere with the current run, so you sometimes may need to restart the GHA runners.

# We need to do `. "$HOME/.cargo/env"` in each step for it to work.
- name: Install dependencies
run: |
./dependencies/install_dependencies.sh
. "$HOME/.cargo/env"

- name: Run unit tests
run: python scripts/run_unittests.py
run: |
. "$HOME/.cargo/env"
python scripts/run_unit_tests.py

- name: Run integration test
# Delete the workspace. Run once with a clean workspace. Run again from the existing workspace.
# Need to run with a non-root user in order to start Postgres.
run: |
. "$HOME/.cargo/env"
rm -rf ../dbgym_integtest_workspace
./scripts/integration_test.sh ssd
./scripts/integration_test.sh ssd
18 changes: 9 additions & 9 deletions benchmark/tpch/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def _clone(dbgym_cfg: DBGymConfig):
f"./tpch_setup.sh {real_build_path}", cwd=dbgym_cfg.cur_source_path()
)
symlink_dpath = link_result(dbgym_cfg, real_build_path / "tpch-kit")
assert os.path.samefile(expected_symlink_dpath, symlink_dpath)
assert expected_symlink_dpath.samefile(symlink_dpath)
benchmark_tpch_logger.info(f"Cloned: {expected_symlink_dpath}")


Expand Down Expand Up @@ -97,7 +97,7 @@ def _generate_queries(dbgym_cfg: DBGymConfig, seed_start: int, seed_end: int, sc
verbose=False,
)
queries_symlink_dpath = link_result(dbgym_cfg, real_dir)
assert os.path.samefile(queries_symlink_dpath, expected_queries_symlink_dpath)
assert queries_symlink_dpath.samefile(expected_queries_symlink_dpath)
benchmark_tpch_logger.info(
f"Generated queries: {data_path} [{seed_start}, {seed_end}]"
)
Expand All @@ -119,7 +119,7 @@ def _generate_data(dbgym_cfg: DBGymConfig, scale_factor: float):
subprocess_run(f"mv ./*.tbl {real_dir}", cwd=tpch_kit_dpath / "dbgen")

tables_symlink_dpath = link_result(dbgym_cfg, real_dir)
assert os.path.samefile(tables_symlink_dpath, expected_tables_symlink_dpath)
assert tables_symlink_dpath.samefile(expected_tables_symlink_dpath)
benchmark_tpch_logger.info(f"Generated: {expected_tables_symlink_dpath}")


Expand All @@ -130,12 +130,12 @@ def _generate_workload(
query_subset: str,
scale_factor: float,
):
symlink_data_dir = dbgym_cfg.cur_symlinks_data_path(mkdir=True)
symlink_data_dpath = dbgym_cfg.cur_symlinks_data_path(mkdir=True)
workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
expected_workload_symlink_dpath = symlink_data_dir / (workload_name + ".link")
expected_workload_symlink_dpath = symlink_data_dpath / (workload_name + ".link")

benchmark_tpch_logger.info(f"Generating: {expected_workload_symlink_dpath}")
real_dir = dbgym_cfg.cur_task_runs_data_path(
real_dpath = dbgym_cfg.cur_task_runs_data_path(
workload_name, mkdir=True
)

Expand All @@ -147,15 +147,15 @@ def _generate_workload(
elif query_subset == "odd":
queries = [f"{i}" for i in range(1, 22 + 1) if i % 2 == 1]

with open(real_dir / "order.txt", "w") as f:
with open(real_dpath / "order.txt", "w") as f:
for seed in range(seed_start, seed_end + 1):
for qnum in queries:
sql_fpath = (symlink_data_dir / (_get_queries_dname(seed, scale_factor) + ".link")).resolve() / f"{qnum}.sql"
sql_fpath = (symlink_data_dpath / (_get_queries_dname(seed, scale_factor) + ".link")).resolve() / f"{qnum}.sql"
assert sql_fpath.exists() and not sql_fpath.is_symlink() and sql_fpath.is_absolute(), "We should only write existent real absolute paths to a file"
output = ",".join([f"S{seed}-Q{qnum}", str(sql_fpath)])
print(output, file=f)
# TODO(WAN): add option to deep-copy the workload.

workload_symlink_dpath = link_result(dbgym_cfg, real_dir)
workload_symlink_dpath = link_result(dbgym_cfg, real_dpath)
assert workload_symlink_dpath == expected_workload_symlink_dpath
benchmark_tpch_logger.info(f"Generated: {expected_workload_symlink_dpath}")
2 changes: 1 addition & 1 deletion benchmark/tpch/tpch_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ TPCH_REPO_ROOT="$1"
if [ ! -d "${TPCH_REPO_ROOT}/tpch-kit" ]; then
mkdir -p "${TPCH_REPO_ROOT}"
cd "${TPCH_REPO_ROOT}"
git clone git@github.com:lmwnshn/tpch-kit.git --single-branch --branch master --depth 1
git clone https://github.com/lmwnshn/tpch-kit.git --single-branch --branch master --depth 1
cd ./tpch-kit/dbgen
make MACHINE=LINUX DATABASE=POSTGRESQL
fi
File renamed without changes.
4 changes: 2 additions & 2 deletions dbms/postgres/build_repo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ REPO_REAL_PARENT_DPATH="$1"
# Download and make postgres from the boot repository.
mkdir -p "${REPO_REAL_PARENT_DPATH}"
cd "${REPO_REAL_PARENT_DPATH}"
git clone git@github.com:lmwnshn/boot.git --single-branch --branch vldb_2024 --depth 1
git clone https://github.com/lmwnshn/boot.git --single-branch --branch vldb_2024 --depth 1
cd ./boot
./cmudb/build/configure.sh release "${REPO_REAL_PARENT_DPATH}/boot/build/postgres"
make clean
Expand All @@ -25,7 +25,7 @@ make install -j
cd "${REPO_REAL_PARENT_DPATH}/boot"

# Download and make hypopg.
git clone git@github.com:HypoPG/hypopg.git
git clone https://github.com/HypoPG/hypopg.git
cd ./hypopg
PG_CONFIG="${REPO_REAL_PARENT_DPATH}/boot/build/postgres/bin/pg_config" make install
cd "${REPO_REAL_PARENT_DPATH}/boot"
Expand Down
2 changes: 1 addition & 1 deletion dbms/postgres/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def _build_repo(dbgym_cfg: DBGymConfig, rebuild):

# only link at the end so that the link only ever points to a complete repo
repo_symlink_dpath = link_result(dbgym_cfg, repo_real_dpath)
assert os.path.samefile(expected_repo_symlink_dpath, repo_symlink_dpath)
assert expected_repo_symlink_dpath.samefile(repo_symlink_dpath)
dbms_postgres_logger.info(f"Set up repo in {expected_repo_symlink_dpath}")


Expand Down
8 changes: 4 additions & 4 deletions manage/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,11 @@ def clean_workspace(dbgym_cfg: DBGymConfig, mode: str="safe", verbose=False) ->
if not is_child_path(real_fordpath, dbgym_cfg.dbgym_runs_path):
continue

assert not os.path.samefile(real_fordpath, dbgym_cfg.dbgym_runs_path)
assert not real_fordpath.samefile(dbgym_cfg.dbgym_runs_path)

# Figure out the task_run_child_fordpath to put into task_run_child_fordpaths_to_keep
task_run_child_fordpath = None
if os.path.samefile(parent_dpath_of_path(real_fordpath), dbgym_cfg.dbgym_runs_path):
if parent_dpath_of_path(real_fordpath).samefile(dbgym_cfg.dbgym_runs_path):
# While it's true that it shouldn't be possible to symlink to a directory directly in task_runs/,
# we'll just not delete it if the user happens to have one like this. Even if the user messed up
# the structure somehow, it's just a good idea not to delete it.
Expand All @@ -183,10 +183,10 @@ def clean_workspace(dbgym_cfg: DBGymConfig, mode: str="safe", verbose=False) ->
# However, as with above, we won't just nuke files if the workspace doesn't follow this rule for
# some reason.
task_run_child_fordpath = real_fordpath
while not os.path.samefile(parent_dpath_of_path(task_run_child_fordpath), dbgym_cfg.dbgym_runs_path):
while not parent_dpath_of_path(task_run_child_fordpath).samefile(dbgym_cfg.dbgym_runs_path):
task_run_child_fordpath = parent_dpath_of_path(task_run_child_fordpath)
assert task_run_child_fordpath != None
assert os.path.samefile(parent_dpath_of_path(task_run_child_fordpath), dbgym_cfg.dbgym_runs_path), f"task_run_child_fordpath ({task_run_child_fordpath}) is not a direct child of dbgym_cfg.dbgym_runs_path"
assert parent_dpath_of_path(task_run_child_fordpath).samefile(dbgym_cfg.dbgym_runs_path), f"task_run_child_fordpath ({task_run_child_fordpath}) is not a direct child of dbgym_cfg.dbgym_runs_path"
task_run_child_fordpaths_to_keep.add(task_run_child_fordpath)

# If on safe mode, add symlinks inside the task_run_child_fordpath to be processed
Expand Down
Loading
Loading