diff --git a/benchmark/cli.py b/benchmark/cli.py index 45943a16..09001be6 100644 --- a/benchmark/cli.py +++ b/benchmark/cli.py @@ -1,8 +1,8 @@ import click +from gymlib.workspace import DBGymWorkspace from benchmark.job.cli import job_group from benchmark.tpch.cli import tpch_group -from util.workspace import DBGymWorkspace @click.group(name="benchmark") diff --git a/benchmark/job/cli.py b/benchmark/job/cli.py index 066fa3cc..122473bc 100644 --- a/benchmark/job/cli.py +++ b/benchmark/job/cli.py @@ -2,17 +2,15 @@ from typing import Optional import click -from gymlib.symlinks_paths import ( +from gymlib.infra_paths import ( get_tables_dirname, get_workload_dirname, get_workload_suffix, - name_to_linkname, ) +from gymlib.workspace import DBGymWorkspace, fully_resolve_path, name_to_linkname from benchmark.constants import DEFAULT_SCALE_FACTOR -from util.log import DBGYM_LOGGER_NAME from util.shell import subprocess_run -from util.workspace import DBGymWorkspace, fully_resolve_path JOB_TABLES_URL = "https://event.cwi.nl/da/job/imdb.tgz" JOB_QUERIES_URL = "https://event.cwi.nl/da/job/job.tgz" @@ -213,12 +211,10 @@ def _download_and_untar_dir( dbgym_workspace.dbgym_cur_symlinks_path / f"{untarred_dname}.link" ) if expected_symlink_path.exists(): - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Skipping download: {expected_symlink_path}" - ) + logging.info(f"Skipping download: {expected_symlink_path}") return - logging.getLogger(DBGYM_LOGGER_NAME).info(f"Downloading: {expected_symlink_path}") + logging.info(f"Downloading: {expected_symlink_path}") subprocess_run(f"curl -O {download_url}", cwd=dbgym_workspace.dbgym_this_run_path) untarred_data_path = dbgym_workspace.dbgym_this_run_path / untarred_dname @@ -243,7 +239,7 @@ def _download_and_untar_dir( ) symlink_path = dbgym_workspace.link_result(untarred_data_path) assert expected_symlink_path.samefile(symlink_path) - logging.getLogger(DBGYM_LOGGER_NAME).info(f"Downloaded: {expected_symlink_path}") + logging.info(f"Downloaded: {expected_symlink_path}") def _generate_job_workload( @@ -259,14 +255,10 @@ def _generate_job_workload( name_to_linkname(workload_name) ) if expected_workload_symlink_path.exists(): - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Skipping generation: {expected_workload_symlink_path}" - ) + logging.info(f"Skipping generation: {expected_workload_symlink_path}") return - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Generating: {expected_workload_symlink_path}" - ) + logging.info(f"Generating: {expected_workload_symlink_path}") workload_path = dbgym_workspace.dbgym_this_run_path / workload_name workload_path.mkdir(parents=False, exist_ok=False) @@ -291,6 +283,4 @@ def _generate_job_workload( workload_symlink_path = dbgym_workspace.link_result(workload_path) assert workload_symlink_path == expected_workload_symlink_path - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Generated: {expected_workload_symlink_path}" - ) + logging.info(f"Generated: {expected_workload_symlink_path}") diff --git a/benchmark/job/load_info.py b/benchmark/job/load_info.py index 8ea28f8a..d57c0f20 100644 --- a/benchmark/job/load_info.py +++ b/benchmark/job/load_info.py @@ -1,11 +1,11 @@ from pathlib import Path from typing import Optional -from gymlib.symlinks_paths import get_tables_symlink_path +from gymlib.infra_paths import get_tables_symlink_path +from gymlib.workspace import DBGymWorkspace, fully_resolve_path from benchmark.constants import DEFAULT_SCALE_FACTOR from dbms.load_info_base_class import LoadInfoBaseClass -from util.workspace import DBGymWorkspace, fully_resolve_path JOB_SCHEMA_FNAME = "job_schema.sql" diff --git a/benchmark/tests/integtest_benchmark.py b/benchmark/tests/integtest_benchmark.py index b4f4fdbc..d039e3ca 100644 --- a/benchmark/tests/integtest_benchmark.py +++ b/benchmark/tests/integtest_benchmark.py @@ -2,22 +2,22 @@ import unittest from pathlib import Path -from gymlib.symlinks_paths import ( +from gymlib.infra_paths import ( get_tables_symlink_path, get_workload_suffix, get_workload_symlink_path, ) +from gymlib.workspace import ( + DBGymWorkspace, + fully_resolve_path, + get_workspace_path_from_config, +) # It's ok to import private functions from the benchmark module because this is an integration test. from benchmark.constants import DEFAULT_SCALE_FACTOR from benchmark.job.cli import _job_tables, _job_workload from benchmark.tpch.cli import _tpch_tables, _tpch_workload from benchmark.tpch.constants import DEFAULT_TPCH_SEED -from util.workspace import ( - DBGymWorkspace, - fully_resolve_path, - get_workspace_path_from_config, -) class BenchmarkTests(unittest.TestCase): diff --git a/benchmark/tpch/cli.py b/benchmark/tpch/cli.py index c83fae75..200a1fff 100644 --- a/benchmark/tpch/cli.py +++ b/benchmark/tpch/cli.py @@ -1,21 +1,24 @@ import logging import click -from gymlib.symlinks_paths import ( +from gymlib.infra_paths import ( get_scale_factor_string, get_tables_dirname, get_tables_symlink_path, get_workload_suffix, get_workload_symlink_path, +) +from gymlib.workspace import ( + DBGymWorkspace, + fully_resolve_path, + is_fully_resolved, linkname_to_name, name_to_linkname, ) from benchmark.constants import DEFAULT_SCALE_FACTOR from benchmark.tpch.constants import DEFAULT_TPCH_SEED, NUM_TPCH_QUERIES -from util.log import DBGYM_LOGGER_NAME from util.shell import subprocess_run -from util.workspace import DBGymWorkspace, fully_resolve_path, is_fully_resolved TPCH_KIT_DIRNAME = "tpch-kit" @@ -102,12 +105,10 @@ def _clone_tpch_kit(dbgym_workspace: DBGymWorkspace) -> None: name_to_linkname(TPCH_KIT_DIRNAME) ) if expected_symlink_path.exists(): - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Skipping clone: {expected_symlink_path}" - ) + logging.info(f"Skipping clone: {expected_symlink_path}") return - logging.getLogger(DBGYM_LOGGER_NAME).info(f"Cloning: {expected_symlink_path}") + logging.info(f"Cloning: {expected_symlink_path}") subprocess_run( f"./clone_tpch_kit.sh {dbgym_workspace.dbgym_this_run_path}", cwd=dbgym_workspace.base_dbgym_repo_path / "benchmark" / "tpch", @@ -116,7 +117,7 @@ def _clone_tpch_kit(dbgym_workspace: DBGymWorkspace) -> None: dbgym_workspace.dbgym_this_run_path / TPCH_KIT_DIRNAME ) assert expected_symlink_path.samefile(symlink_path) - logging.getLogger(DBGYM_LOGGER_NAME).info(f"Cloned: {expected_symlink_path}") + logging.info(f"Cloned: {expected_symlink_path}") def _generate_tpch_queries( @@ -125,9 +126,7 @@ def _generate_tpch_queries( tpch_kit_path = dbgym_workspace.dbgym_cur_symlinks_path / ( name_to_linkname(TPCH_KIT_DIRNAME) ) - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Generating queries: [{seed_start}, {seed_end}]" - ) + logging.info(f"Generating queries: [{seed_start}, {seed_end}]") for seed in range(seed_start, seed_end + 1): expected_queries_symlink_path = dbgym_workspace.dbgym_cur_symlinks_path / ( name_to_linkname(_get_queries_dirname(seed, scale_factor)) @@ -149,9 +148,7 @@ def _generate_tpch_queries( ) queries_symlink_path = dbgym_workspace.link_result(queries_parent_path) assert queries_symlink_path.samefile(expected_queries_symlink_path) - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Generated queries: [{seed_start}, {seed_end}]" - ) + logging.info(f"Generated queries: [{seed_start}, {seed_end}]") def _generate_tpch_tables(dbgym_workspace: DBGymWorkspace, scale_factor: float) -> None: @@ -162,14 +159,10 @@ def _generate_tpch_tables(dbgym_workspace: DBGymWorkspace, scale_factor: float) dbgym_workspace.dbgym_workspace_path, "tpch", scale_factor ) if expected_tables_symlink_path.exists(): - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Skipping generation: {expected_tables_symlink_path}" - ) + logging.info(f"Skipping generation: {expected_tables_symlink_path}") return - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Generating: {expected_tables_symlink_path}" - ) + logging.info(f"Generating: {expected_tables_symlink_path}") subprocess_run(f"./dbgen -vf -s {scale_factor}", cwd=tpch_kit_path / "dbgen") tables_parent_path = dbgym_workspace.dbgym_this_run_path / get_tables_dirname( "tpch", scale_factor @@ -179,9 +172,7 @@ def _generate_tpch_tables(dbgym_workspace: DBGymWorkspace, scale_factor: float) tables_symlink_path = dbgym_workspace.link_result(tables_parent_path) assert tables_symlink_path.samefile(expected_tables_symlink_path) - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Generated: {expected_tables_symlink_path}" - ) + logging.info(f"Generated: {expected_tables_symlink_path}") def _generate_tpch_workload( @@ -200,14 +191,10 @@ def _generate_tpch_workload( ), ) if expected_workload_symlink_path.exists(): - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Skipping generation: {expected_workload_symlink_path}" - ) + logging.info(f"Skipping generation: {expected_workload_symlink_path}") return - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Generating: {expected_workload_symlink_path}" - ) + logging.info(f"Generating: {expected_workload_symlink_path}") workload_path = dbgym_workspace.dbgym_this_run_path / linkname_to_name( expected_workload_symlink_path.name ) @@ -238,6 +225,4 @@ def _generate_tpch_workload( workload_symlink_path = dbgym_workspace.link_result(workload_path) assert workload_symlink_path == expected_workload_symlink_path - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Generated: {expected_workload_symlink_path}" - ) + logging.info(f"Generated: {expected_workload_symlink_path}") diff --git a/benchmark/tpch/load_info.py b/benchmark/tpch/load_info.py index a717173c..2b1052c9 100644 --- a/benchmark/tpch/load_info.py +++ b/benchmark/tpch/load_info.py @@ -1,10 +1,10 @@ from pathlib import Path from typing import Optional -from gymlib.symlinks_paths import get_tables_symlink_path +from gymlib.infra_paths import get_tables_symlink_path +from gymlib.workspace import DBGymWorkspace, fully_resolve_path from dbms.load_info_base_class import LoadInfoBaseClass -from util.workspace import DBGymWorkspace, fully_resolve_path TPCH_SCHEMA_FNAME = "tpch_schema.sql" TPCH_CONSTRAINTS_FNAME = "tpch_constraints.sql" diff --git a/dbms/cli.py b/dbms/cli.py index 2804b79b..352b8f11 100644 --- a/dbms/cli.py +++ b/dbms/cli.py @@ -1,7 +1,7 @@ import click +from gymlib.workspace import DBGymWorkspace from dbms.postgres.cli import postgres_group -from util.workspace import DBGymWorkspace @click.group(name="dbms") diff --git a/dbms/postgres/cli.py b/dbms/postgres/cli.py index 28dd1620..9b32608a 100644 --- a/dbms/postgres/cli.py +++ b/dbms/postgres/cli.py @@ -6,42 +6,39 @@ import shutil import subprocess from pathlib import Path -from typing import Optional +from typing import Any, Optional import click import sqlalchemy -from gymlib.symlinks_paths import ( +from gymlib.infra_paths import ( get_dbdata_tgz_symlink_path, get_pgbin_symlink_path, get_repo_symlink_path, - linkname_to_name, -) - -from benchmark.constants import DEFAULT_SCALE_FACTOR -from benchmark.job.load_info import JobLoadInfo -from benchmark.tpch.load_info import TpchLoadInfo -from dbms.load_info_base_class import LoadInfoBaseClass -from util.log import DBGYM_LOGGER_NAME -from util.pg import ( - DBGYM_POSTGRES_DBNAME, - DBGYM_POSTGRES_PASS, - DBGYM_POSTGRES_USER, - DEFAULT_POSTGRES_DBNAME, - DEFAULT_POSTGRES_PORT, - SHARED_PRELOAD_LIBRARIES, - create_sqlalchemy_conn, - sql_file_execute, - sqlalchemy_conn_execute, ) -from util.shell import subprocess_run -from util.workspace import ( +from gymlib.pg import create_sqlalchemy_conn, sql_file_execute +from gymlib.workspace import ( WORKSPACE_PATH_PLACEHOLDER, DBGymWorkspace, fully_resolve_path, get_tmp_path_from_workspace_path, is_fully_resolved, is_ssd, + linkname_to_name, ) +from sqlalchemy import text + +from benchmark.constants import DEFAULT_SCALE_FACTOR +from benchmark.job.load_info import JobLoadInfo +from benchmark.tpch.load_info import TpchLoadInfo +from dbms.load_info_base_class import LoadInfoBaseClass +from util.shell import subprocess_run + +DBGYM_POSTGRES_USER = "dbgym_user" +DBGYM_POSTGRES_PASS = "dbgym_pass" +DBGYM_POSTGRES_DBNAME = "dbgym" +DEFAULT_POSTGRES_DBNAME = "postgres" +DEFAULT_POSTGRES_PORT = 5432 +SHARED_PRELOAD_LIBRARIES = "boot,pg_hint_plan,pg_prewarm" @click.group(name="postgres") @@ -72,14 +69,10 @@ def _postgres_build(dbgym_workspace: DBGymWorkspace, rebuild: bool) -> None: dbgym_workspace.dbgym_workspace_path ) if not rebuild and expected_repo_symlink_path.exists(): - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Skipping _postgres_build: {expected_repo_symlink_path}" - ) + logging.info(f"Skipping _postgres_build: {expected_repo_symlink_path}") return - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Setting up repo in {expected_repo_symlink_path}" - ) + logging.info(f"Setting up repo in {expected_repo_symlink_path}") repo_real_path = dbgym_workspace.dbgym_this_run_path / "repo" repo_real_path.mkdir(parents=False, exist_ok=False) subprocess_run( @@ -90,9 +83,7 @@ def _postgres_build(dbgym_workspace: DBGymWorkspace, rebuild: bool) -> None: # only link at the end so that the link only ever points to a complete repo repo_symlink_path = dbgym_workspace.link_result(repo_real_path) assert expected_repo_symlink_path.samefile(repo_symlink_path) - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Set up repo in {expected_repo_symlink_path}" - ) + logging.info(f"Set up repo in {expected_repo_symlink_path}") @postgres_group.command( @@ -198,9 +189,7 @@ def _create_dbdata( scale_factor, ) if expected_dbdata_tgz_symlink_path.exists(): - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Skipping _create_dbdata: {expected_dbdata_tgz_symlink_path}" - ) + logging.info(f"Skipping _create_dbdata: {expected_dbdata_tgz_symlink_path}") return # It's ok for the dbdata/ directory to be temporary. It just matters that the .tgz is saved in a safe place. @@ -236,9 +225,7 @@ def _create_dbdata( # Only link at the end so that the link only ever points to a complete dbdata. dbdata_tgz_symlink_path = dbgym_workspace.link_result(dbdata_tgz_real_path) assert expected_dbdata_tgz_symlink_path.samefile(dbdata_tgz_symlink_path) - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Created dbdata in {dbdata_tgz_symlink_path}" - ) + logging.info(f"Created dbdata in {dbdata_tgz_symlink_path}") def _generic_dbdata_setup(dbgym_workspace: DBGymWorkspace) -> None: @@ -370,3 +357,9 @@ def _start_or_stop_postgres( f"./pg_ctl -D \"{dbdata_path}\" -o '-p {pgport}' stop", cwd=pgbin_path, ) + + +def sqlalchemy_conn_execute( + conn: sqlalchemy.Connection, sql: str +) -> sqlalchemy.engine.CursorResult[Any]: + return conn.execute(text(sql)) diff --git a/dbms/tests/integtest_dbms.py b/dbms/tests/integtest_dbms.py index 822e8aac..92ad4b5e 100644 --- a/dbms/tests/integtest_dbms.py +++ b/dbms/tests/integtest_dbms.py @@ -3,16 +3,16 @@ import unittest from pathlib import Path -from gymlib.symlinks_paths import get_dbdata_tgz_symlink_path, get_repo_symlink_path - -from benchmark.tpch.cli import _tpch_tables -from dbms.postgres.cli import _postgres_build, _postgres_dbdata -from util.workspace import ( +from gymlib.infra_paths import get_dbdata_tgz_symlink_path, get_repo_symlink_path +from gymlib.workspace import ( DBGymWorkspace, fully_resolve_path, get_workspace_path_from_config, ) +from benchmark.tpch.cli import _tpch_tables +from dbms.postgres.cli import _postgres_build, _postgres_dbdata + class DBMSTests(unittest.TestCase): DBGYM_CONFIG_PATH = Path("dbms/tests/dbms_integtest_dbgym_config.yaml") diff --git a/env/__init__.py b/gymlib_package/__init__.py similarity index 100% rename from env/__init__.py rename to gymlib_package/__init__.py diff --git a/gymlib_package/gymlib/__init__.py b/gymlib_package/gymlib/__init__.py index 99257dda..b55d93cf 100644 --- a/gymlib_package/gymlib/__init__.py +++ b/gymlib_package/gymlib/__init__.py @@ -1 +1 @@ -from . import magic, symlinks_paths +from . import infra_paths, workspace diff --git a/gymlib_package/gymlib/symlinks_paths.py b/gymlib_package/gymlib/infra_paths.py similarity index 85% rename from gymlib_package/gymlib/symlinks_paths.py rename to gymlib_package/gymlib/infra_paths.py index 90736adf..04f20b71 100644 --- a/gymlib_package/gymlib/symlinks_paths.py +++ b/gymlib_package/gymlib/infra_paths.py @@ -1,9 +1,12 @@ +""" +"Infra" refers to benchmark/ and dbms/. These are all the paths used to access the files created by benchmark/ and dbms/. +They're inside gymlib because agents will need to access them. +""" + from pathlib import Path from typing import Any -# TODO: move these into workspace.py and move workspace.py into gymlib. -SYMLINKS_DNAME = "symlinks" -DBGYM_APP_NAME = "dbgym" +from gymlib.workspace import DBGYM_APP_NAME, SYMLINKS_DNAME, name_to_linkname SCALE_FACTOR_PLACEHOLDER: str = "[scale_factor]" BENCHMARK_NAME_PLACEHOLDER: str = "[benchmark_name]" @@ -82,18 +85,3 @@ def get_dbdata_tgz_symlink_path( / DBGYM_APP_NAME / name_to_linkname(get_dbdata_tgz_filename(benchmark_name, scale_factor)) ) - - -def is_linkname(name: str) -> bool: - assert not name.endswith(".link.link") - return name.endswith(".link") - - -def name_to_linkname(name: str) -> str: - assert not is_linkname(name) - return f"{name}.link" - - -def linkname_to_name(linkname: str) -> str: - assert is_linkname(linkname) - return linkname[: -len(".link")] diff --git a/gymlib_package/gymlib/magic.py b/gymlib_package/gymlib/magic.py deleted file mode 100644 index 4c95af05..00000000 --- a/gymlib_package/gymlib/magic.py +++ /dev/null @@ -1,2 +0,0 @@ -def get_magic_number() -> int: - return 42 diff --git a/util/pg.py b/gymlib_package/gymlib/pg.py similarity index 98% rename from util/pg.py rename to gymlib_package/gymlib/pg.py index 23c06b60..0328fecb 100644 --- a/util/pg.py +++ b/gymlib_package/gymlib/pg.py @@ -9,10 +9,9 @@ import psutil import psycopg import sqlalchemy +from gymlib.workspace import DBGymWorkspace from sqlalchemy import create_engine, text -from util.workspace import DBGymWorkspace - DBGYM_POSTGRES_USER = "dbgym_user" DBGYM_POSTGRES_PASS = "dbgym_pass" DBGYM_POSTGRES_DBNAME = "dbgym" diff --git a/env/pg_conn.py b/gymlib_package/gymlib/pg_conn.py similarity index 92% rename from env/pg_conn.py rename to gymlib_package/gymlib/pg_conn.py index 7aa4e5d8..dea668a6 100644 --- a/env/pg_conn.py +++ b/gymlib_package/gymlib/pg_conn.py @@ -19,13 +19,11 @@ import psutil import psycopg import yaml +from gymlib.pg import DBGYM_POSTGRES_DBNAME, SHARED_PRELOAD_LIBRARIES, get_kv_connstr +from gymlib.workspace import DBGymWorkspace, parent_path_of_path from plumbum import local from psycopg.errors import ProgramLimitExceeded, QueryCanceled -from util.log import DBGYM_LOGGER_NAME -from util.pg import DBGYM_POSTGRES_DBNAME, SHARED_PRELOAD_LIBRARIES, get_kv_connstr -from util.workspace import DBGymWorkspace, parent_path_of_path - CONNECT_TIMEOUT = 300 @@ -177,14 +175,10 @@ def time_query( qid_runtime = float(c["Execution Time"]) * 1e3 explain_data = c - logging.getLogger(DBGYM_LOGGER_NAME).debug( - f"{query} evaluated in {qid_runtime/1e6}" - ) + logging.debug(f"{query} evaluated in {qid_runtime/1e6}") except QueryCanceled: - logging.getLogger(DBGYM_LOGGER_NAME).debug( - f"{query} exceeded evaluation timeout {timeout}" - ) + logging.debug(f"{query} exceeded evaluation timeout {timeout}") qid_runtime = timeout * 1e6 did_time_out = True except Exception as e: @@ -203,14 +197,12 @@ def shutdown_postgres(self) -> None: return while True: - logging.getLogger(DBGYM_LOGGER_NAME).debug("Shutting down postgres...") + logging.debug("Shutting down postgres...") _, stdout, stderr = local[f"{self.pgbin_path}/pg_ctl"][ "stop", "--wait", "-t", "180", "-D", self.dbdata_path ].run(retcode=None) time.sleep(1) - logging.getLogger(DBGYM_LOGGER_NAME).debug( - "Stop message: (%s, %s)", stdout, stderr - ) + logging.debug("Stop message: (%s, %s)", stdout, stderr) # Wait until pg_isready fails. retcode, _, _ = local[f"{self.pgbin_path}/pg_isready"][ @@ -303,12 +295,10 @@ def restart_with_changes( if retcode == 0 or pid_lock.exists(): break - logging.getLogger(DBGYM_LOGGER_NAME).warning( - "startup encountered: (%s, %s)", stdout, stderr - ) + logging.warning("startup encountered: (%s, %s)", stdout, stderr) attempts += 1 if attempts >= 5: - logging.getLogger(DBGYM_LOGGER_NAME).error( + logging.error( "Number of attempts to start postgres has exceeded limit." ) assert False, "Could not start postgres." @@ -318,9 +308,7 @@ def restart_with_changes( while True: if num_cycles >= CONNECT_TIMEOUT: # In this case, we've failed to start postgres. - logging.getLogger(DBGYM_LOGGER_NAME).error( - "Failed to start postgres before timeout..." - ) + logging.error("Failed to start postgres before timeout...") return False retcode, _, _ = local[f"{self.pgbin_path}/pg_isready"][ @@ -336,9 +324,7 @@ def restart_with_changes( time.sleep(1) num_cycles += 1 - logging.getLogger(DBGYM_LOGGER_NAME).debug( - "Waiting for postgres to bootup but it is not..." - ) + logging.debug("Waiting for postgres to bootup but it is not...") # Set up Boot if we're told to do so if self.boot_config_path is not None: @@ -384,7 +370,7 @@ def _set_up_boot( """ # If any of these commands fail, they'll throw a Python exception # Thus, if none of them throw an exception, we know they passed - logging.getLogger(DBGYM_LOGGER_NAME).debug("Setting up boot") + logging.debug("Setting up boot") self.conn().execute("DROP EXTENSION IF EXISTS boot") self.conn().execute("CREATE EXTENSION IF NOT EXISTS boot") self.conn().execute("SELECT boot_connect()") @@ -399,7 +385,7 @@ def _set_up_boot( self.conn().execute(f"SET boot.mu_hyp_opt={mu_hyp_opt}") self.conn().execute(f"SET boot.mu_hyp_time={mu_hyp_time}") self.conn().execute(f"SET boot.mu_hyp_stdev={mu_hyp_stdev}") - logging.getLogger(DBGYM_LOGGER_NAME).debug("Set up boot") + logging.debug("Set up boot") def psql(self, sql: str) -> tuple[int, Optional[str]]: """ @@ -425,7 +411,7 @@ def cancel_fn(conn_str: str) -> None: ] for row in r: - logging.getLogger(DBGYM_LOGGER_NAME).info(f"Killing process {row[0]}") + logging.info(f"Killing process {row[0]}") try: psutil.Process(row[0]).kill() except: @@ -447,17 +433,17 @@ def cancel_fn(conn_str: str) -> None: except ProgramLimitExceeded as e: timer.cancel() self.disconnect() - logging.getLogger(DBGYM_LOGGER_NAME).debug(f"Action error: {e}") + logging.debug(f"Action error: {e}") return -1, str(e) except QueryCanceled as e: timer.cancel() self.disconnect() - logging.getLogger(DBGYM_LOGGER_NAME).debug(f"Action error: {e}") + logging.debug(f"Action error: {e}") return -1, f"canceling statement: {sql}." except psycopg.OperationalError as e: timer.cancel() self.disconnect() - logging.getLogger(DBGYM_LOGGER_NAME).debug(f"Action error: {e}") + logging.debug(f"Action error: {e}") return -1, f"operational error: {sql}." except psycopg.errors.UndefinedTable: timer.cancel() diff --git a/env/tests/__init__.py b/gymlib_package/gymlib/tests/__init__.py similarity index 100% rename from env/tests/__init__.py rename to gymlib_package/gymlib/tests/__init__.py diff --git a/env/tests/_set_up_gymlib_integtest_workspace.sh b/gymlib_package/gymlib/tests/_set_up_gymlib_integtest_workspace.sh similarity index 100% rename from env/tests/_set_up_gymlib_integtest_workspace.sh rename to gymlib_package/gymlib/tests/_set_up_gymlib_integtest_workspace.sh diff --git a/util/tests/filesystem_unittest_util.py b/gymlib_package/gymlib/tests/filesystem_unittest_util.py similarity index 96% rename from util/tests/filesystem_unittest_util.py rename to gymlib_package/gymlib/tests/filesystem_unittest_util.py index 37997c71..cccb500f 100644 --- a/util/tests/filesystem_unittest_util.py +++ b/gymlib_package/gymlib/tests/filesystem_unittest_util.py @@ -3,6 +3,8 @@ from pathlib import Path from typing import Any, NewType, cast +from gymlib.workspace import RUNS_DNAME, SYMLINKS_DNAME, TMP_DNAME + FilesystemStructure = NewType("FilesystemStructure", dict[str, Any]) @@ -113,9 +115,9 @@ def make_workspace_structure( return FilesystemStructure( { "dbgym_workspace": { - "symlinks": symlinks_structure, - "task_runs": task_runs_structure, - "tmp": {}, + SYMLINKS_DNAME: symlinks_structure, + RUNS_DNAME: task_runs_structure, + TMP_DNAME: {}, } } ) diff --git a/env/tests/gymlib_integtest_dbgym_config.yaml b/gymlib_package/gymlib/tests/gymlib_integtest_dbgym_config.yaml similarity index 100% rename from env/tests/gymlib_integtest_dbgym_config.yaml rename to gymlib_package/gymlib/tests/gymlib_integtest_dbgym_config.yaml diff --git a/env/tests/gymlib_integtest_util.py b/gymlib_package/gymlib/tests/gymlib_integtest_util.py similarity index 91% rename from env/tests/gymlib_integtest_util.py rename to gymlib_package/gymlib/tests/gymlib_integtest_util.py index 44ec3c4c..e58cf39b 100644 --- a/env/tests/gymlib_integtest_util.py +++ b/gymlib_package/gymlib/tests/gymlib_integtest_util.py @@ -3,23 +3,22 @@ from pathlib import Path from typing import Optional -# TODO: remove symlinks_paths from the import -from gymlib.symlinks_paths import ( +# TODO: remove infra_paths from the import +from gymlib.infra_paths import ( get_dbdata_tgz_symlink_path, get_pgbin_symlink_path, get_workload_suffix, get_workload_symlink_path, ) - -from benchmark.tpch.constants import DEFAULT_TPCH_SEED -from env.tuning_artifacts import TuningMetadata -from util.workspace import ( - DBGymWorkspace, +from gymlib.tuning_artifacts import TuningMetadata +from gymlib.workspace import ( fully_resolve_path, get_tmp_path_from_workspace_path, get_workspace_path_from_config, ) +from benchmark.tpch.constants import DEFAULT_TPCH_SEED + class GymlibIntegtestManager: """ @@ -34,7 +33,9 @@ class GymlibIntegtestManager: # These constants are also used by _set_up_gymlib_integtest_workspace.sh. BENCHMARK = "tpch" SCALE_FACTOR = 0.01 - DBGYM_CONFIG_PATH = Path("env/tests/gymlib_integtest_dbgym_config.yaml") + DBGYM_CONFIG_PATH = Path( + "gymlib_package/gymlib/tests/gymlib_integtest_dbgym_config.yaml" + ) WORKSPACE_PATH: Optional[Path] = None @staticmethod @@ -50,7 +51,7 @@ def set_up_workspace() -> None: # This if statement prevents us from setting up the workspace twice, which saves time. if not GymlibIntegtestManager.WORKSPACE_PATH.exists(): subprocess.run( - ["./env/tests/_set_up_gymlib_integtest_workspace.sh"], + ["./gymlib_package/gymlib/tests/_set_up_gymlib_integtest_workspace.sh"], env={ "BENCHMARK": GymlibIntegtestManager.BENCHMARK, "SCALE_FACTOR": str(GymlibIntegtestManager.SCALE_FACTOR), diff --git a/env/tests/integtest_pg_conn.py b/gymlib_package/gymlib/tests/integtest_pg_conn.py similarity index 97% rename from env/tests/integtest_pg_conn.py rename to gymlib_package/gymlib/tests/integtest_pg_conn.py index da586398..d090ea62 100644 --- a/env/tests/integtest_pg_conn.py +++ b/gymlib_package/gymlib/tests/integtest_pg_conn.py @@ -2,15 +2,14 @@ import unittest import psycopg - -from env.pg_conn import PostgresConn -from env.tests.gymlib_integtest_util import GymlibIntegtestManager -from util.pg import ( +from gymlib.pg import ( DEFAULT_POSTGRES_PORT, get_is_postgres_running, get_running_postgres_ports, ) -from util.workspace import DBGymWorkspace +from gymlib.pg_conn import PostgresConn +from gymlib.tests.gymlib_integtest_util import GymlibIntegtestManager +from gymlib.workspace import DBGymWorkspace class PostgresConnTests(unittest.TestCase): diff --git a/env/tests/integtest_tuning_artifacts.py b/gymlib_package/gymlib/tests/integtest_tuning_artifacts.py similarity index 95% rename from env/tests/integtest_tuning_artifacts.py rename to gymlib_package/gymlib/tests/integtest_tuning_artifacts.py index 1baa1fdb..6188ec86 100644 --- a/env/tests/integtest_tuning_artifacts.py +++ b/gymlib_package/gymlib/tests/integtest_tuning_artifacts.py @@ -1,7 +1,7 @@ import unittest -from env.tests.gymlib_integtest_util import GymlibIntegtestManager -from env.tuning_artifacts import ( +from gymlib.tests.gymlib_integtest_util import GymlibIntegtestManager +from gymlib.tuning_artifacts import ( DBMSConfigDelta, IndexesDelta, QueryKnobsDelta, @@ -9,7 +9,7 @@ TuningArtifactsReader, TuningArtifactsWriter, ) -from util.workspace import DBGymWorkspace +from gymlib.workspace import DBGymWorkspace class PostgresConnTests(unittest.TestCase): diff --git a/env/tests/integtest_workload.py b/gymlib_package/gymlib/tests/integtest_workload.py similarity index 87% rename from env/tests/integtest_workload.py rename to gymlib_package/gymlib/tests/integtest_workload.py index 494e0365..2066c39d 100644 --- a/env/tests/integtest_workload.py +++ b/gymlib_package/gymlib/tests/integtest_workload.py @@ -1,9 +1,10 @@ import unittest +from gymlib.tests.gymlib_integtest_util import GymlibIntegtestManager +from gymlib.workload import Workload +from gymlib.workspace import DBGymWorkspace + from benchmark.tpch.constants import DEFAULT_TPCH_SEED, NUM_TPCH_QUERIES -from env.tests.gymlib_integtest_util import GymlibIntegtestManager -from env.workload import Workload -from util.workspace import DBGymWorkspace class WorkloadTests(unittest.TestCase): diff --git a/util/tests/unittest_filesystem_unittest_util.py b/gymlib_package/gymlib/tests/unittest_filesystem_unittest_util.py similarity index 98% rename from util/tests/unittest_filesystem_unittest_util.py rename to gymlib_package/gymlib/tests/unittest_filesystem_unittest_util.py index 04f3fe7d..199f5a8c 100644 --- a/util/tests/unittest_filesystem_unittest_util.py +++ b/gymlib_package/gymlib/tests/unittest_filesystem_unittest_util.py @@ -3,7 +3,7 @@ import unittest from pathlib import Path -from util.tests.filesystem_unittest_util import ( +from gymlib.tests.filesystem_unittest_util import ( FilesystemStructure, create_structure, verify_structure, diff --git a/util/tests/unittest_workspace.py b/gymlib_package/gymlib/tests/unittest_workspace.py similarity index 79% rename from util/tests/unittest_workspace.py rename to gymlib_package/gymlib/tests/unittest_workspace.py index 0575b8b3..5cb77f89 100644 --- a/util/tests/unittest_workspace.py +++ b/gymlib_package/gymlib/tests/unittest_workspace.py @@ -6,13 +6,21 @@ from pathlib import Path from typing import Optional -from util.tests.filesystem_unittest_util import ( +from gymlib.tests.filesystem_unittest_util import ( FilesystemStructure, create_structure, make_workspace_structure, verify_structure, ) -from util.workspace import DBGymWorkspace +from gymlib.workspace import ( + DBGYM_APP_NAME, + RUNS_DNAME, + SYMLINKS_DNAME, + DBGymWorkspace, + name_to_linkname, +) + +from gymlib_package.gymlib.workspace import LATEST_RUN_FNAME class WorkspaceTests(unittest.TestCase): @@ -58,14 +66,14 @@ def init_workspace_helper(self) -> None: ), ) else: - self.expected_structure["dbgym_workspace"]["task_runs"][ + self.expected_structure["dbgym_workspace"][RUNS_DNAME][ self.workspace.dbgym_this_run_path.name ] = {} - self.expected_structure["dbgym_workspace"]["task_runs"][ - "latest_run.link" + self.expected_structure["dbgym_workspace"][RUNS_DNAME][ + name_to_linkname(LATEST_RUN_FNAME) ] = ( "symlink", - f"dbgym_workspace/task_runs/{self.workspace.dbgym_this_run_path.name}", + f"dbgym_workspace/{RUNS_DNAME}/{self.workspace.dbgym_this_run_path.name}", ) self.assertTrue( @@ -123,7 +131,7 @@ def make_result_helper( def test_init_fields(self) -> None: workspace = DBGymWorkspace(self.workspace_path) - self.assertEqual(workspace.app_name, "dbgym") + self.assertEqual(workspace.app_name, DBGYM_APP_NAME) def test_init_from_nonexistent_workspace(self) -> None: self.init_workspace_helper() @@ -142,12 +150,12 @@ def test_link_result_basic_functionality(self) -> None: assert self.workspace is not None and self.expected_structure is not None result_path = self.make_result_helper() self.workspace.link_result(result_path) - self.expected_structure["dbgym_workspace"]["symlinks"]["dbgym"] = {} - self.expected_structure["dbgym_workspace"]["symlinks"]["dbgym"][ - f"{result_path.name}.link" + self.expected_structure["dbgym_workspace"][SYMLINKS_DNAME][DBGYM_APP_NAME] = {} + self.expected_structure["dbgym_workspace"][SYMLINKS_DNAME][DBGYM_APP_NAME][ + name_to_linkname(result_path.name) ] = ( "symlink", - f"dbgym_workspace/task_runs/{self.workspace.dbgym_this_run_path.name}/{result_path.name}", + f"dbgym_workspace/{RUNS_DNAME}/{self.workspace.dbgym_this_run_path.name}/{result_path.name}", ) self.assertTrue( verify_structure(self.scratchspace_path, self.expected_structure) @@ -163,12 +171,12 @@ def test_link_result_does_not_copy_directory_structure_to_symlinks_dir( assert self.workspace is not None and self.expected_structure is not None result_path = self.make_result_helper(relative_path="dir1/dir2/dir3/result.txt") self.workspace.link_result(result_path) - self.expected_structure["dbgym_workspace"]["symlinks"]["dbgym"] = {} - self.expected_structure["dbgym_workspace"]["symlinks"]["dbgym"][ - f"{result_path.name}.link" + self.expected_structure["dbgym_workspace"][SYMLINKS_DNAME][DBGYM_APP_NAME] = {} + self.expected_structure["dbgym_workspace"][SYMLINKS_DNAME][DBGYM_APP_NAME][ + name_to_linkname(result_path.name) ] = ( "symlink", - f"dbgym_workspace/task_runs/{self.workspace.dbgym_this_run_path.name}/dir1/dir2/dir3/{result_path.name}", + f"dbgym_workspace/{RUNS_DNAME}/{self.workspace.dbgym_this_run_path.name}/dir1/dir2/dir3/{result_path.name}", ) self.assertTrue( verify_structure(self.scratchspace_path, self.expected_structure) @@ -187,13 +195,15 @@ def test_link_result_valid_custom_link_name(self) -> None: self.init_workspace_helper() assert self.workspace is not None and self.expected_structure is not None result_path = self.make_result_helper() - self.workspace.link_result(result_path, custom_link_name="custom.link") - self.expected_structure["dbgym_workspace"]["symlinks"]["dbgym"] = {} - self.expected_structure["dbgym_workspace"]["symlinks"]["dbgym"][ - "custom.link" + self.workspace.link_result( + result_path, custom_link_name=name_to_linkname("custom") + ) + self.expected_structure["dbgym_workspace"][SYMLINKS_DNAME][DBGYM_APP_NAME] = {} + self.expected_structure["dbgym_workspace"][SYMLINKS_DNAME][DBGYM_APP_NAME][ + name_to_linkname("custom") ] = ( "symlink", - f"dbgym_workspace/task_runs/{self.workspace.dbgym_this_run_path.name}/{result_path.name}", + f"dbgym_workspace/{RUNS_DNAME}/{self.workspace.dbgym_this_run_path.name}/{result_path.name}", ) self.assertTrue( verify_structure(self.scratchspace_path, self.expected_structure) @@ -205,12 +215,12 @@ def test_link_same_result_twice_with_same_link_name(self) -> None: result_path = self.make_result_helper() self.workspace.link_result(result_path) self.workspace.link_result(result_path) - self.expected_structure["dbgym_workspace"]["symlinks"]["dbgym"] = {} - self.expected_structure["dbgym_workspace"]["symlinks"]["dbgym"][ - f"{result_path.name}.link" + self.expected_structure["dbgym_workspace"][SYMLINKS_DNAME][DBGYM_APP_NAME] = {} + self.expected_structure["dbgym_workspace"][SYMLINKS_DNAME][DBGYM_APP_NAME][ + name_to_linkname(result_path.name) ] = ( "symlink", - f"dbgym_workspace/task_runs/{self.workspace.dbgym_this_run_path.name}/{result_path.name}", + f"dbgym_workspace/{RUNS_DNAME}/{self.workspace.dbgym_this_run_path.name}/{result_path.name}", ) self.assertTrue( verify_structure(self.scratchspace_path, self.expected_structure) @@ -221,19 +231,21 @@ def test_link_same_result_with_different_name(self) -> None: assert self.workspace is not None and self.expected_structure is not None result_path = self.make_result_helper() self.workspace.link_result(result_path) - self.workspace.link_result(result_path, custom_link_name="custom.link") - self.expected_structure["dbgym_workspace"]["symlinks"]["dbgym"] = {} - self.expected_structure["dbgym_workspace"]["symlinks"]["dbgym"][ - f"{result_path.name}.link" + self.workspace.link_result( + result_path, custom_link_name=name_to_linkname("custom") + ) + self.expected_structure["dbgym_workspace"][SYMLINKS_DNAME][DBGYM_APP_NAME] = {} + self.expected_structure["dbgym_workspace"][SYMLINKS_DNAME][DBGYM_APP_NAME][ + name_to_linkname(result_path.name) ] = ( "symlink", - f"dbgym_workspace/task_runs/{self.workspace.dbgym_this_run_path.name}/{result_path.name}", + f"dbgym_workspace/{RUNS_DNAME}/{self.workspace.dbgym_this_run_path.name}/{result_path.name}", ) - self.expected_structure["dbgym_workspace"]["symlinks"]["dbgym"][ - f"custom.link" + self.expected_structure["dbgym_workspace"][SYMLINKS_DNAME][DBGYM_APP_NAME][ + name_to_linkname("custom") ] = ( "symlink", - f"dbgym_workspace/task_runs/{self.workspace.dbgym_this_run_path.name}/{result_path.name}", + f"dbgym_workspace/{RUNS_DNAME}/{self.workspace.dbgym_this_run_path.name}/{result_path.name}", ) self.assertTrue( verify_structure(self.scratchspace_path, self.expected_structure) @@ -265,10 +277,10 @@ def test_link_result_cannot_link_symlink(self) -> None: assert self.workspace is not None and self.expected_structure is not None result_path = self.make_result_helper() symlink_path = self.make_result_helper( - "symlink.link", + name_to_linkname("symlink"), file_obj=( "symlink", - f"dbgym_workspace/task_runs/{self.workspace.dbgym_this_run_path.name}/{result_path.name}", + f"dbgym_workspace/{RUNS_DNAME}/{self.workspace.dbgym_this_run_path.name}/{result_path.name}", ), ) with self.assertRaisesRegex( @@ -287,11 +299,11 @@ def test_save_file_dependency(self) -> None: result_path = self.make_result_helper() self.init_workspace_helper() self.workspace.save_file(result_path) - self.expected_structure["dbgym_workspace"]["task_runs"][ + self.expected_structure["dbgym_workspace"][RUNS_DNAME][ self.workspace.dbgym_this_run_path.name - ][f"{result_path.name}.link"] = ( + ][name_to_linkname(result_path.name)] = ( "symlink", - f"dbgym_workspace/task_runs/{prev_run_name}/{result_path.name}", + f"dbgym_workspace/{RUNS_DNAME}/{prev_run_name}/{result_path.name}", ) self.assertTrue( verify_structure(self.scratchspace_path, self.expected_structure) @@ -305,11 +317,11 @@ def test_save_file_same_dependency_twice(self) -> None: self.init_workspace_helper() self.workspace.save_file(result_path) self.workspace.save_file(result_path) - self.expected_structure["dbgym_workspace"]["task_runs"][ + self.expected_structure["dbgym_workspace"][RUNS_DNAME][ self.workspace.dbgym_this_run_path.name - ][f"{result_path.name}.link"] = ( + ][name_to_linkname(result_path.name)] = ( "symlink", - f"dbgym_workspace/task_runs/{prev_run_name}/{result_path.name}", + f"dbgym_workspace/{RUNS_DNAME}/{prev_run_name}/{result_path.name}", ) self.assertTrue( verify_structure(self.scratchspace_path, self.expected_structure) @@ -333,11 +345,11 @@ def test_save_file_two_different_dependencies_with_same_filename_both_directly_i self.workspace.save_file(result1_path) self.workspace.save_file(result2_path) # The second save_file() should have overwritten the first one. - self.expected_structure["dbgym_workspace"]["task_runs"][ + self.expected_structure["dbgym_workspace"][RUNS_DNAME][ self.workspace.dbgym_this_run_path.name - ][f"{filename}.link"] = ( + ][name_to_linkname(filename)] = ( "symlink", - f"dbgym_workspace/task_runs/{prev_run_names[-1]}/{filename}", + f"dbgym_workspace/{RUNS_DNAME}/{prev_run_names[-1]}/{filename}", ) self.assertTrue( verify_structure(self.scratchspace_path, self.expected_structure) @@ -358,17 +370,17 @@ def test_save_file_two_different_dependencies_with_same_filename_but_different_o self.workspace.save_file(result1_path) self.workspace.save_file(result2_path) # The second save_file() should not overwrite the first one because the outermost dirs are different. - self.expected_structure["dbgym_workspace"]["task_runs"][ + self.expected_structure["dbgym_workspace"][RUNS_DNAME][ self.workspace.dbgym_this_run_path.name - ][f"{filename}.link"] = ( + ][name_to_linkname(filename)] = ( "symlink", - f"dbgym_workspace/task_runs/{prev_run_name}/{filename}", + f"dbgym_workspace/{RUNS_DNAME}/{prev_run_name}/{filename}", ) - self.expected_structure["dbgym_workspace"]["task_runs"][ + self.expected_structure["dbgym_workspace"][RUNS_DNAME][ self.workspace.dbgym_this_run_path.name - ]["dir1.link"] = ( + ][name_to_linkname("dir1")] = ( "symlink", - f"dbgym_workspace/task_runs/{prev_run_name}/dir1", + f"dbgym_workspace/{RUNS_DNAME}/{prev_run_name}/dir1", ) self.assertTrue( verify_structure(self.scratchspace_path, self.expected_structure) @@ -384,7 +396,7 @@ def test_save_file_config(self) -> None: "external/result.txt", file_obj=("file", "contents") ) self.workspace.save_file(result_path) - self.expected_structure["dbgym_workspace"]["task_runs"][ + self.expected_structure["dbgym_workspace"][RUNS_DNAME][ self.workspace.dbgym_this_run_path.name ][f"{result_path.name}"] = ("file", "contents") self.assertTrue( @@ -399,7 +411,7 @@ def test_save_file_same_config_twice(self) -> None: ) self.workspace.save_file(result_path) self.workspace.save_file(result_path) - self.expected_structure["dbgym_workspace"]["task_runs"][ + self.expected_structure["dbgym_workspace"][RUNS_DNAME][ self.workspace.dbgym_this_run_path.name ][f"{result_path.name}"] = ("file", "contents") self.assertTrue( @@ -420,7 +432,7 @@ def test_save_file_two_different_configs_with_same_filename(self) -> None: self.workspace.save_file(result1_path) self.workspace.save_file(result2_path) - self.expected_structure["dbgym_workspace"]["task_runs"][ + self.expected_structure["dbgym_workspace"][RUNS_DNAME][ self.workspace.dbgym_this_run_path.name ][f"{filename}"] = ("file", "contents2") self.assertTrue( @@ -436,11 +448,11 @@ def test_save_file_dependency_inside_directory(self) -> None: self.make_result_helper("dir1/dir3/other2.txt") self.init_workspace_helper() self.workspace.save_file(result_path) - self.expected_structure["dbgym_workspace"]["task_runs"][ + self.expected_structure["dbgym_workspace"][RUNS_DNAME][ self.workspace.dbgym_this_run_path.name - ]["dir1.link"] = ( + ][name_to_linkname("dir1")] = ( "symlink", - f"dbgym_workspace/task_runs/{prev_run_name}/dir1", + f"dbgym_workspace/{RUNS_DNAME}/{prev_run_name}/dir1", ) self.assertTrue( verify_structure(self.scratchspace_path, self.expected_structure) diff --git a/env/tuning_artifacts.py b/gymlib_package/gymlib/tuning_artifacts.py similarity index 98% rename from env/tuning_artifacts.py rename to gymlib_package/gymlib/tuning_artifacts.py index ab26e3fd..e6b70108 100644 --- a/env/tuning_artifacts.py +++ b/gymlib_package/gymlib/tuning_artifacts.py @@ -3,7 +3,7 @@ from pathlib import Path from typing import Any, NewType -from util.workspace import DBGymWorkspace, is_fully_resolved +from gymlib.workspace import DBGymWorkspace, is_fully_resolved # PostgresConn doesn't use these types because PostgresConn is used internally by tuning agents # while these types are only used in the interface between the orchestrator and the tuning agents. diff --git a/env/workload.py b/gymlib_package/gymlib/workload.py similarity index 94% rename from env/workload.py rename to gymlib_package/gymlib/workload.py index 4b164ffc..bc45af69 100644 --- a/env/workload.py +++ b/gymlib_package/gymlib/workload.py @@ -1,6 +1,6 @@ from pathlib import Path -from util.workspace import DBGymWorkspace, is_fully_resolved +from gymlib.workspace import DBGymWorkspace, is_fully_resolved class Workload: diff --git a/util/workspace.py b/gymlib_package/gymlib/workspace.py similarity index 96% rename from util/workspace.py rename to gymlib_package/gymlib/workspace.py index 32bfd03d..69e11d45 100644 --- a/util/workspace.py +++ b/gymlib_package/gymlib/workspace.py @@ -12,28 +12,46 @@ from typing import IO, Any, Optional import yaml -from gymlib.symlinks_paths import is_linkname, name_to_linkname - -from util.log import DBGYM_LOGGER_NAME WORKSPACE_PATH_PLACEHOLDER = Path("[workspace]") +SYMLINKS_DNAME = "symlinks" +TMP_DNAME = "tmp" +RUNS_DNAME = "task_runs" +DBGYM_APP_NAME = "dbgym" +LATEST_RUN_FNAME = "latest_run" + + +def is_linkname(name: str) -> bool: + assert not name.endswith(".link.link") + return name.endswith(".link") + + +def name_to_linkname(name: str) -> str: + assert not is_linkname(name) + return f"{name}.link" + + +def linkname_to_name(linkname: str) -> str: + assert is_linkname(linkname) + return linkname[: -len(".link")] -# Helper functions that both this file and other files use def get_symlinks_path_from_workspace_path(workspace_path: Path) -> Path: - return workspace_path / "symlinks" + return workspace_path / SYMLINKS_DNAME def get_tmp_path_from_workspace_path(workspace_path: Path) -> Path: - return workspace_path / "tmp" + return workspace_path / TMP_DNAME def get_runs_path_from_workspace_path(workspace_path: Path) -> Path: - return workspace_path / "task_runs" + return workspace_path / RUNS_DNAME def get_latest_run_path_from_workspace_path(workspace_path: Path) -> Path: - return get_runs_path_from_workspace_path(workspace_path) / "latest_run.link" + return get_runs_path_from_workspace_path(workspace_path) / name_to_linkname( + LATEST_RUN_FNAME + ) # Paths of config files in the codebase. These are always relative paths. @@ -57,9 +75,7 @@ def __init__(self, dbgym_workspace_path: Path): ), f"DBGymWorkspace has been created {DBGymWorkspace._num_times_created_this_run} times. It should only be created once per run." self.base_dbgym_repo_path = get_base_dbgym_repo_path() - self.app_name = ( - "dbgym" # TODO: discover this dynamically. app means dbgym or an agent - ) + self.app_name = DBGYM_APP_NAME # TODO: discover this dynamically. app means dbgym or an agent # Set and create paths. self.dbgym_workspace_path = dbgym_workspace_path @@ -286,7 +302,6 @@ def make_standard_dbgym_workspace() -> DBGymWorkspace: default path of dbgym_config.yaml. """ dbgym_config_path = Path(os.getenv("DBGYM_CONFIG_PATH", "dbgym_config.yaml")) - assert dbgym_config_path == Path("env/tests/gymlib_integtest_dbgym_config.yaml") dbgym_workspace_path = get_workspace_path_from_config(dbgym_config_path) dbgym_workspace = DBGymWorkspace(dbgym_workspace_path) return dbgym_workspace @@ -500,5 +515,5 @@ def is_ssd(path: Path) -> bool: return is_ssd return False except Exception as e: - logging.getLogger(DBGYM_LOGGER_NAME).error(f"An error occurred: {e}") + logging.error(f"An error occurred: {e}") return False diff --git a/manage/__init__.py b/orchestrate/__init__.py similarity index 100% rename from manage/__init__.py rename to orchestrate/__init__.py diff --git a/manage/cli.py b/orchestrate/clean.py similarity index 76% rename from manage/cli.py rename to orchestrate/clean.py index 4f41cbc7..0e438d25 100644 --- a/manage/cli.py +++ b/orchestrate/clean.py @@ -4,52 +4,7 @@ from itertools import chain from pathlib import Path -import click - -from util.log import DBGYM_LOGGER_NAME, DBGYM_OUTPUT_LOGGER_NAME -from util.workspace import ( - DBGymWorkspace, - get_runs_path_from_workspace_path, - get_symlinks_path_from_workspace_path, - is_child_path, - parent_path_of_path, -) - - -# This is used in test_clean.py. It's defined here to avoid a circular import. -class MockDBGymWorkspace: - def __init__(self, scratchspace_path: Path): - self.dbgym_workspace_path = scratchspace_path - self.dbgym_symlinks_path = get_symlinks_path_from_workspace_path( - scratchspace_path - ) - self.dbgym_runs_path = get_runs_path_from_workspace_path(scratchspace_path) - - -@click.group(name="manage") -def manage_group() -> None: - pass - - -@click.command("clean") -@click.pass_obj -@click.option( - "--mode", - type=click.Choice(["safe", "aggressive"]), - default="safe", - help='The mode to clean the workspace (default="safe"). "aggressive" means "only keep run_*/ folders referenced by a file in symlinks/". "safe" means "in addition to that, recursively keep any run_*/ folders referenced by any symlinks in run_*/ folders we are keeping."', -) -def manage_clean(dbgym_workspace: DBGymWorkspace, mode: str) -> None: - clean_workspace(dbgym_workspace, mode=mode, verbose=True) - - -@click.command("count") -@click.pass_obj -def manage_count(dbgym_workspace: DBGymWorkspace) -> None: - num_files = _count_files_in_workspace(dbgym_workspace) - logging.getLogger(DBGYM_OUTPUT_LOGGER_NAME).info( - f"The workspace ({dbgym_workspace.dbgym_workspace_path}) has {num_files} total files/dirs/symlinks." - ) +from gymlib.workspace import DBGymWorkspace, is_child_path, parent_path_of_path def add_symlinks_in_path( @@ -68,9 +23,7 @@ def add_symlinks_in_path( processed_symlinks.add(file_path) -def _count_files_in_workspace( - dbgym_workspace: DBGymWorkspace | MockDBGymWorkspace, -) -> int: +def count_files_in_workspace(dbgym_workspace: DBGymWorkspace) -> int: """ Counts the number of files (regular file or dir or symlink) in the workspace. """ @@ -90,7 +43,7 @@ def _count_files_in_workspace( def clean_workspace( - dbgym_workspace: DBGymWorkspace | MockDBGymWorkspace, + dbgym_workspace: DBGymWorkspace, mode: str = "safe", verbose: bool = False, ) -> None: @@ -175,7 +128,7 @@ def clean_workspace( # 3. Go through all children of task_runs/*, deleting any that we weren't told to keep # It's true that symlinks might link outside of task_runs/*. We'll just not care about those - starting_num_files = _count_files_in_workspace(dbgym_workspace) + starting_num_files = count_files_in_workspace(dbgym_workspace) if dbgym_workspace.dbgym_runs_path.exists(): for child_path in dbgym_workspace.dbgym_runs_path.iterdir(): if child_path not in task_run_child_paths_to_keep: @@ -183,16 +136,12 @@ def clean_workspace( shutil.rmtree(child_path) else: os.remove(child_path) - ending_num_files = _count_files_in_workspace(dbgym_workspace) + ending_num_files = count_files_in_workspace(dbgym_workspace) if verbose: - logging.getLogger(DBGYM_LOGGER_NAME).info( + logging.info( f"Removed {starting_num_files - ending_num_files} out of {starting_num_files} files" ) - logging.getLogger(DBGYM_LOGGER_NAME).info( + logging.info( f"Workspace went from {starting_num_files - ending_num_files} to {starting_num_files}" ) - - -manage_group.add_command(manage_clean) -manage_group.add_command(manage_count) diff --git a/orchestrate/cli.py b/orchestrate/cli.py new file mode 100644 index 00000000..974edd79 --- /dev/null +++ b/orchestrate/cli.py @@ -0,0 +1,34 @@ +import click +from gymlib.workspace import DBGymWorkspace + +from orchestrate.clean import clean_workspace, count_files_in_workspace + + +@click.group(name="manage") +def manage_group() -> None: + pass + + +@click.command("clean") +@click.pass_obj +@click.option( + "--mode", + type=click.Choice(["safe", "aggressive"]), + default="safe", + help='The mode to clean the workspace (default="safe"). "aggressive" means "only keep run_*/ folders referenced by a file in symlinks/". "safe" means "in addition to that, recursively keep any run_*/ folders referenced by any symlinks in run_*/ folders we are keeping."', +) +def manage_clean(dbgym_workspace: DBGymWorkspace, mode: str) -> None: + clean_workspace(dbgym_workspace, mode=mode, verbose=True) + + +@click.command("count") +@click.pass_obj +def manage_count(dbgym_workspace: DBGymWorkspace) -> None: + num_files = count_files_in_workspace(dbgym_workspace) + print( + f"The workspace ({dbgym_workspace.dbgym_workspace_path}) has {num_files} total files/dirs/symlinks." + ) + + +manage_group.add_command(manage_clean) +manage_group.add_command(manage_count) diff --git a/env/replay.py b/orchestrate/replay.py similarity index 90% rename from env/replay.py rename to orchestrate/replay.py index 55621284..f5363034 100644 --- a/env/replay.py +++ b/orchestrate/replay.py @@ -1,11 +1,11 @@ from collections import defaultdict from pathlib import Path -from env.pg_conn import PostgresConn -from env.tuning_artifacts import TuningArtifactsReader -from env.workload import Workload -from util.pg import DEFAULT_POSTGRES_PORT -from util.workspace import DBGymWorkspace +from gymlib.pg import DEFAULT_POSTGRES_PORT +from gymlib.pg_conn import PostgresConn +from gymlib.tuning_artifacts import TuningArtifactsReader +from gymlib.workload import Workload +from gymlib.workspace import DBGymWorkspace def replay( diff --git a/manage/tests/__init__.py b/orchestrate/tests/__init__.py similarity index 100% rename from manage/tests/__init__.py rename to orchestrate/tests/__init__.py diff --git a/env/tests/integtest_replay.py b/orchestrate/tests/integtest_replay.py similarity index 91% rename from env/tests/integtest_replay.py rename to orchestrate/tests/integtest_replay.py index 1752e192..a35a581a 100644 --- a/env/tests/integtest_replay.py +++ b/orchestrate/tests/integtest_replay.py @@ -1,16 +1,17 @@ import unittest -from benchmark.tpch.constants import DEFAULT_TPCH_SEED -from env.replay import replay -from env.tests.gymlib_integtest_util import GymlibIntegtestManager -from env.tuning_artifacts import ( +from gymlib.tests.gymlib_integtest_util import GymlibIntegtestManager +from gymlib.tuning_artifacts import ( DBMSConfigDelta, IndexesDelta, QueryKnobsDelta, SysKnobsDelta, TuningArtifactsWriter, ) -from util.workspace import DBGymWorkspace +from gymlib.workspace import DBGymWorkspace + +from benchmark.tpch.constants import DEFAULT_TPCH_SEED +from orchestrate.replay import replay class ReplayTests(unittest.TestCase): diff --git a/manage/tests/unittest_clean.py b/orchestrate/tests/unittest_clean.py similarity index 74% rename from manage/tests/unittest_clean.py rename to orchestrate/tests/unittest_clean.py index 2e82d4a9..be25af34 100644 --- a/manage/tests/unittest_clean.py +++ b/orchestrate/tests/unittest_clean.py @@ -3,13 +3,15 @@ import unittest from pathlib import Path -from manage.cli import MockDBGymWorkspace, clean_workspace -from util.tests.filesystem_unittest_util import ( +from gymlib.tests.filesystem_unittest_util import ( FilesystemStructure, create_structure, make_workspace_structure, verify_structure, ) +from gymlib.workspace import RUNS_DNAME, SYMLINKS_DNAME, DBGymWorkspace + +from orchestrate.clean import clean_workspace # This is here instead of on `if __name__ == "__main__"` because we often run individual tests, which # does not go through the `if __name__ == "__main__"` codepath. @@ -25,43 +27,55 @@ class CleanTests(unittest.TestCase): @classmethod def setUpClass(cls) -> None: - cls.scratchspace_path = Path.cwd() / "manage/tests/test_clean_scratchspace/" + cls.scratchspace_path = ( + Path.cwd() / "orchestrate/tests/test_clean_scratchspace/" + ) cls.workspace_path = cls.scratchspace_path / "dbgym_workspace" def setUp(self) -> None: if self.scratchspace_path.exists(): shutil.rmtree(self.scratchspace_path) + # Reset _num_times_created_this_run since previous tests may have created a workspace. + DBGymWorkspace._num_times_created_this_run = 0 + self.workspace = DBGymWorkspace(self.workspace_path) + # Since creating DBGymWorkspace creates the workspace, we want to remove it. + shutil.rmtree(self.workspace_path) + def tearDown(self) -> None: if self.scratchspace_path.exists(): shutil.rmtree(self.scratchspace_path) def test_nonexistent_workspace(self) -> None: # This just ensures that it doesn't raise an exception. - clean_workspace(MockDBGymWorkspace(self.workspace_path)) + clean_workspace(self.workspace) def test_empty_workspace(self) -> None: starting_structure = FilesystemStructure({"dbgym_workspace": {}}) ending_structure = FilesystemStructure({"dbgym_workspace": {}}) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path)) + clean_workspace(self.workspace) self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) def test_no_symlinks_dir_and_yes_task_runs_dir(self) -> None: starting_structure = FilesystemStructure( - {"dbgym_workspace": {"task_runs": {"file1.txt": ("file",)}}} + {"dbgym_workspace": {RUNS_DNAME: {"file1.txt": ("file",)}}} ) - ending_structure = FilesystemStructure({"dbgym_workspace": {"task_runs": {}}}) + ending_structure = FilesystemStructure({"dbgym_workspace": {RUNS_DNAME: {}}}) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path)) + clean_workspace(self.workspace) self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) def test_yes_symlinks_dir_and_no_task_runs_dir(self) -> None: # If there are no task runs there can't be any symlinks. - starting_structure = FilesystemStructure({"dbgym_workspace": {"symlinks": {}}}) - ending_structure = FilesystemStructure({"dbgym_workspace": {"symlinks": {}}}) + starting_structure = FilesystemStructure( + {"dbgym_workspace": {SYMLINKS_DNAME: {}}} + ) + ending_structure = FilesystemStructure( + {"dbgym_workspace": {SYMLINKS_DNAME: {}}} + ) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path)) + clean_workspace(self.workspace) self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) def test_no_symlinks_in_dir_and_no_task_runs_in_dir(self) -> None: @@ -77,7 +91,7 @@ def test_no_symlinks_in_dir_and_no_task_runs_in_dir(self) -> None: ) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path)) + clean_workspace(self.workspace) self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) def test_no_links_in_symlinks(self) -> None: @@ -93,12 +107,12 @@ def test_no_links_in_symlinks(self) -> None: ) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path)) + clean_workspace(self.workspace) self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) def test_link_to_file_directly_in_task_runs(self) -> None: starting_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/file1.txt")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/file1.txt")} ) starting_task_runs_structure = FilesystemStructure( {"file1.txt": ("file",), "file2.txt": ("file",)} @@ -107,7 +121,7 @@ def test_link_to_file_directly_in_task_runs(self) -> None: starting_symlinks_structure, starting_task_runs_structure ) ending_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/file1.txt")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/file1.txt")} ) ending_task_runs_structure = FilesystemStructure({"file1.txt": ("file",)}) ending_structure = make_workspace_structure( @@ -115,12 +129,12 @@ def test_link_to_file_directly_in_task_runs(self) -> None: ) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path)) + clean_workspace(self.workspace) self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) def test_link_to_dir_directly_in_task_runs(self) -> None: starting_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1")} ) starting_task_runs_structure = FilesystemStructure( { @@ -132,7 +146,7 @@ def test_link_to_dir_directly_in_task_runs(self) -> None: starting_symlinks_structure, starting_task_runs_structure ) ending_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1")} ) ending_task_runs_structure = FilesystemStructure( {"dir1": {"file1.txt": ("file",)}} @@ -142,12 +156,12 @@ def test_link_to_dir_directly_in_task_runs(self) -> None: ) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path)) + clean_workspace(self.workspace) self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) def test_link_to_file_in_dir_in_task_runs(self) -> None: starting_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1/file1.txt")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1/file1.txt")} ) starting_task_runs_structure = FilesystemStructure( { @@ -159,7 +173,7 @@ def test_link_to_file_in_dir_in_task_runs(self) -> None: starting_symlinks_structure, starting_task_runs_structure ) ending_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1/file1.txt")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1/file1.txt")} ) ending_task_runs_structure = FilesystemStructure( {"dir1": {"file1.txt": ("file",)}} @@ -169,12 +183,12 @@ def test_link_to_file_in_dir_in_task_runs(self) -> None: ) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path)) + clean_workspace(self.workspace) self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) def test_link_to_dir_in_dir_in_task_runs(self) -> None: starting_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1/dir2")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1/dir2")} ) starting_task_runs_structure = FilesystemStructure( { @@ -186,7 +200,7 @@ def test_link_to_dir_in_dir_in_task_runs(self) -> None: starting_symlinks_structure, starting_task_runs_structure ) ending_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1/dir2")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1/dir2")} ) ending_task_runs_structure = FilesystemStructure( { @@ -198,16 +212,16 @@ def test_link_to_dir_in_dir_in_task_runs(self) -> None: ) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path)) + clean_workspace(self.workspace) self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) def test_link_to_link_crashes(self) -> None: starting_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/symlink2")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/symlink2")} ) starting_task_runs_structure = FilesystemStructure( { - "symlink2": ("symlink", "dbgym_workspace/task_runs/file1.txt"), + "symlink2": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/file1.txt"), "file1.txt": ("file",), } ) @@ -217,16 +231,16 @@ def test_link_to_link_crashes(self) -> None: create_structure(self.scratchspace_path, starting_structure) with self.assertRaises(AssertionError): - clean_workspace(MockDBGymWorkspace(self.workspace_path)) + clean_workspace(self.workspace) def test_safe_mode_link_to_dir_with_link(self) -> None: starting_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1")} ) starting_task_runs_structure = FilesystemStructure( { "dir1": { - "symlink2": ("symlink", "dbgym_workspace/task_runs/file1.txt") + "symlink2": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/file1.txt") }, "file1.txt": ("file",), "file2.txt": ("file",), @@ -236,12 +250,12 @@ def test_safe_mode_link_to_dir_with_link(self) -> None: starting_symlinks_structure, starting_task_runs_structure ) ending_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1")} ) ending_task_runs_structure = FilesystemStructure( { "dir1": { - "symlink2": ("symlink", "dbgym_workspace/task_runs/file1.txt") + "symlink2": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/file1.txt") }, "file1.txt": ("file",), } @@ -251,18 +265,18 @@ def test_safe_mode_link_to_dir_with_link(self) -> None: ) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path), mode="safe") + clean_workspace(self.workspace, mode="safe") self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) def test_safe_mode_link_to_file_in_dir_with_link(self) -> None: starting_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1/file1.txt")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1/file1.txt")} ) starting_task_runs_structure = FilesystemStructure( { "dir1": { "file1.txt": ("file",), - "symlink2": ("symlink", "dbgym_workspace/task_runs/file2.txt"), + "symlink2": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/file2.txt"), }, "file2.txt": ("file",), "file3.txt": ("file",), @@ -272,13 +286,13 @@ def test_safe_mode_link_to_file_in_dir_with_link(self) -> None: starting_symlinks_structure, starting_task_runs_structure ) ending_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1/file1.txt")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1/file1.txt")} ) ending_task_runs_structure = FilesystemStructure( { "dir1": { "file1.txt": ("file",), - "symlink2": ("symlink", "dbgym_workspace/task_runs/file2.txt"), + "symlink2": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/file2.txt"), }, "file2.txt": ("file",), } @@ -288,17 +302,20 @@ def test_safe_mode_link_to_file_in_dir_with_link(self) -> None: ) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path), mode="safe") + clean_workspace(self.workspace, mode="safe") self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) def test_safe_mode_link_to_dir_with_link_to_file_in_dir_in_task_runs(self) -> None: starting_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1")} ) starting_task_runs_structure = FilesystemStructure( { "dir1": { - "symlink2": ("symlink", "dbgym_workspace/task_runs/dir2/file2.txt") + "symlink2": ( + "symlink", + f"dbgym_workspace/{RUNS_DNAME}/dir2/file2.txt", + ) }, "dir2": { "file2.txt": ("file",), @@ -310,12 +327,15 @@ def test_safe_mode_link_to_dir_with_link_to_file_in_dir_in_task_runs(self) -> No starting_symlinks_structure, starting_task_runs_structure ) ending_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1")} ) ending_task_runs_structure = FilesystemStructure( { "dir1": { - "symlink2": ("symlink", "dbgym_workspace/task_runs/dir2/file2.txt") + "symlink2": ( + "symlink", + f"dbgym_workspace/{RUNS_DNAME}/dir2/file2.txt", + ) }, "dir2": { "file2.txt": ("file",), @@ -327,17 +347,17 @@ def test_safe_mode_link_to_dir_with_link_to_file_in_dir_in_task_runs(self) -> No ) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path), mode="safe") + clean_workspace(self.workspace, mode="safe") self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) def test_aggressive_mode_link_to_dir_with_link(self) -> None: starting_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1")} ) starting_task_runs_structure = FilesystemStructure( { "dir1": { - "symlink2": ("symlink", "dbgym_workspace/task_runs/file1.txt") + "symlink2": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/file1.txt") }, "file1.txt": ("file",), "file2.txt": ("file",), @@ -347,7 +367,7 @@ def test_aggressive_mode_link_to_dir_with_link(self) -> None: starting_symlinks_structure, starting_task_runs_structure ) ending_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1")} ) ending_task_runs_structure = FilesystemStructure( { @@ -359,17 +379,17 @@ def test_aggressive_mode_link_to_dir_with_link(self) -> None: ) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path), mode="aggressive") + clean_workspace(self.workspace, mode="aggressive") self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) def test_link_to_link_to_file_gives_error(self) -> None: starting_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1/symlink2")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1/symlink2")} ) starting_task_runs_structure = FilesystemStructure( { "dir1": { - "symlink2": ("symlink", "dbgym_workspace/task_runs/file2.txt") + "symlink2": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/file2.txt") }, "file2.txt": ("file",), } @@ -382,15 +402,20 @@ def test_link_to_link_to_file_gives_error(self) -> None: # We disallow links to links so it's an AssertionError with self.assertRaises(AssertionError): - clean_workspace(MockDBGymWorkspace(self.workspace_path), mode="safe") + clean_workspace(self.workspace, mode="safe") def test_multi_link_loop_gives_error(self) -> None: starting_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1/symlink2")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1/symlink2")} ) starting_task_runs_structure = FilesystemStructure( { - "dir1": {"symlink2": ("symlink", "dbgym_workspace/symlinks/symlink1")}, + "dir1": { + "symlink2": ( + "symlink", + f"dbgym_workspace/{SYMLINKS_DNAME}/symlink1", + ) + }, } ) starting_structure = make_workspace_structure( @@ -401,11 +426,11 @@ def test_multi_link_loop_gives_error(self) -> None: # pathlib disallows multi-link loops so it's a RuntimeError with self.assertRaises(RuntimeError): - clean_workspace(MockDBGymWorkspace(self.workspace_path), mode="safe") + clean_workspace(self.workspace, mode="safe") def test_link_self_loop_gives_error(self) -> None: starting_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/symlinks/symlink1")} + {"symlink1": ("symlink", f"dbgym_workspace/{SYMLINKS_DNAME}/symlink1")} ) starting_task_runs_structure = FilesystemStructure({}) starting_structure = make_workspace_structure( @@ -416,23 +441,29 @@ def test_link_self_loop_gives_error(self) -> None: # pathlib disallows link self-loops so it's a RuntimeError with self.assertRaises(RuntimeError): - clean_workspace(MockDBGymWorkspace(self.workspace_path), mode="safe") + clean_workspace(self.workspace, mode="safe") def test_dont_loop_infinitely_if_there_are_cycles_between_different_dirs_in_runs( self, ) -> None: starting_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1")} ) starting_task_runs_structure = FilesystemStructure( { "dir1": { "file1.txt": ("file",), - "symlink2": ("symlink", "dbgym_workspace/task_runs/dir2/file2.txt"), + "symlink2": ( + "symlink", + f"dbgym_workspace/{RUNS_DNAME}/dir2/file2.txt", + ), }, "dir2": { "file2.txt": ("file",), - "symlink2": ("symlink", "dbgym_workspace/task_runs/dir1/file1.txt"), + "symlink2": ( + "symlink", + f"dbgym_workspace/{RUNS_DNAME}/dir1/file1.txt", + ), }, } ) @@ -440,17 +471,23 @@ def test_dont_loop_infinitely_if_there_are_cycles_between_different_dirs_in_runs starting_symlinks_structure, starting_task_runs_structure ) ending_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1")} ) ending_task_runs_structure = FilesystemStructure( { "dir1": { "file1.txt": ("file",), - "symlink2": ("symlink", "dbgym_workspace/task_runs/dir2/file2.txt"), + "symlink2": ( + "symlink", + f"dbgym_workspace/{RUNS_DNAME}/dir2/file2.txt", + ), }, "dir2": { "file2.txt": ("file",), - "symlink2": ("symlink", "dbgym_workspace/task_runs/dir1/file1.txt"), + "symlink2": ( + "symlink", + f"dbgym_workspace/{RUNS_DNAME}/dir1/file1.txt", + ), }, } ) @@ -459,20 +496,23 @@ def test_dont_loop_infinitely_if_there_are_cycles_between_different_dirs_in_runs ) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path), mode="safe") + clean_workspace(self.workspace, mode="safe") self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) def test_dont_loop_infinitely_if_there_is_a_dir_in_runs_that_links_to_a_file_in_itself( self, ) -> None: starting_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1")} ) starting_task_runs_structure = FilesystemStructure( { "dir1": { "file1.txt": ("file",), - "symlink2": ("symlink", "dbgym_workspace/task_runs/dir1/file1.txt"), + "symlink2": ( + "symlink", + f"dbgym_workspace/{RUNS_DNAME}/dir1/file1.txt", + ), }, } ) @@ -480,13 +520,16 @@ def test_dont_loop_infinitely_if_there_is_a_dir_in_runs_that_links_to_a_file_in_ starting_symlinks_structure, starting_task_runs_structure ) ending_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1")} ) ending_task_runs_structure = FilesystemStructure( { "dir1": { "file1.txt": ("file",), - "symlink2": ("symlink", "dbgym_workspace/task_runs/dir1/file1.txt"), + "symlink2": ( + "symlink", + f"dbgym_workspace/{RUNS_DNAME}/dir1/file1.txt", + ), }, } ) @@ -495,18 +538,21 @@ def test_dont_loop_infinitely_if_there_is_a_dir_in_runs_that_links_to_a_file_in_ ) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path), mode="safe") + clean_workspace(self.workspace, mode="safe") self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) def test_dont_loop_infinitely_if_there_is_loop_amongst_symlinks(self) -> None: starting_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1")} ) starting_task_runs_structure = FilesystemStructure( { "dir1": { "file1.txt": ("file",), - "symlink2": ("symlink", "dbgym_workspace/task_runs/dir1/file1.txt"), + "symlink2": ( + "symlink", + f"dbgym_workspace/{RUNS_DNAME}/dir1/file1.txt", + ), }, } ) @@ -514,13 +560,16 @@ def test_dont_loop_infinitely_if_there_is_loop_amongst_symlinks(self) -> None: starting_symlinks_structure, starting_task_runs_structure ) ending_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1")} ) ending_task_runs_structure = FilesystemStructure( { "dir1": { "file1.txt": ("file",), - "symlink2": ("symlink", "dbgym_workspace/task_runs/dir1/file1.txt"), + "symlink2": ( + "symlink", + f"dbgym_workspace/{RUNS_DNAME}/dir1/file1.txt", + ), }, } ) @@ -529,12 +578,12 @@ def test_dont_loop_infinitely_if_there_is_loop_amongst_symlinks(self) -> None: ) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path), mode="safe") + clean_workspace(self.workspace, mode="safe") self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) def test_broken_symlink_has_no_effect(self) -> None: starting_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1")} ) starting_task_runs_structure = FilesystemStructure( { @@ -542,7 +591,7 @@ def test_broken_symlink_has_no_effect(self) -> None: "file1.txt": ("file",), "symlink2": ( "symlink", - "dbgym_workspace/task_runs/dir1/non_existent_file.txt", + f"dbgym_workspace/{RUNS_DNAME}/dir1/non_existent_file.txt", ), }, "dir2": {"file2.txt": ("file",)}, @@ -552,7 +601,7 @@ def test_broken_symlink_has_no_effect(self) -> None: starting_symlinks_structure, starting_task_runs_structure ) ending_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1")} ) ending_task_runs_structure = FilesystemStructure( {"dir1": {"file1.txt": ("file",), "symlink2": ("symlink", None)}} @@ -562,7 +611,7 @@ def test_broken_symlink_has_no_effect(self) -> None: ) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path), mode="safe") + clean_workspace(self.workspace, mode="safe") self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) # The idea behind this test is that we shouldn't be following links outside of task_runs, even on safe mode @@ -570,7 +619,7 @@ def test_link_to_folder_outside_runs_that_contains_link_to_other_run_doesnt_save self, ) -> None: starting_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1/file1.txt")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1/file1.txt")} ) starting_task_runs_structure = FilesystemStructure( { @@ -588,12 +637,15 @@ def test_link_to_folder_outside_runs_that_contains_link_to_other_run_doesnt_save { "dir3": { "file3.txt": ("file",), - "symlink3": ("symlink", "dbgym_workspace/task_runs/dir2/file2.txt"), + "symlink3": ( + "symlink", + f"dbgym_workspace/{RUNS_DNAME}/dir2/file2.txt", + ), } } ) ending_symlinks_structure = FilesystemStructure( - {"symlink1": ("symlink", "dbgym_workspace/task_runs/dir1/file1.txt")} + {"symlink1": ("symlink", f"dbgym_workspace/{RUNS_DNAME}/dir1/file1.txt")} ) ending_task_runs_structure = FilesystemStructure( { @@ -611,7 +663,7 @@ def test_link_to_folder_outside_runs_that_contains_link_to_other_run_doesnt_save } create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path), mode="safe") + clean_workspace(self.workspace, mode="safe") self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) def test_outside_task_runs_doesnt_get_deleted(self) -> None: @@ -629,7 +681,7 @@ def test_outside_task_runs_doesnt_get_deleted(self) -> None: ending_structure["external"] = FilesystemStructure({"file1.txt": ("file",)}) create_structure(self.scratchspace_path, starting_structure) - clean_workspace(MockDBGymWorkspace(self.workspace_path), mode="safe") + clean_workspace(self.workspace, mode="safe") self.assertTrue(verify_structure(self.scratchspace_path, ending_structure)) diff --git a/experiments/load_per_machine_envvars.sh b/scripts/_load_per_machine_envvars.sh similarity index 100% rename from experiments/load_per_machine_envvars.sh rename to scripts/_load_per_machine_envvars.sh diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh index c15a20c3..db226fb3 100755 --- a/scripts/pat_test.sh +++ b/scripts/pat_test.sh @@ -2,10 +2,10 @@ set -euxo pipefail -. ./experiments/load_per_machine_envvars.sh +. ./scripts/_load_per_machine_envvars.sh # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars) -# exit 0 +exit 0 # benchmark python3 task.py benchmark job data diff --git a/scripts/read_parquet.py b/scripts/read_parquet.py deleted file mode 100644 index 20fe55be..00000000 --- a/scripts/read_parquet.py +++ /dev/null @@ -1,24 +0,0 @@ -import logging -import sys -from pathlib import Path - -import pandas as pd - -from util.log import DBGYM_OUTPUT_LOGGER_NAME - - -def read_and_output_parquet(file_path: Path) -> None: - # Read the Parquet file into a DataFrame - df = pd.read_parquet(file_path) - - # Output the DataFrame - logging.getLogger(DBGYM_OUTPUT_LOGGER_NAME).info("DataFrame:") - logging.getLogger(DBGYM_OUTPUT_LOGGER_NAME).info(df) - - -if __name__ == "__main__": - # Specify the path to the Parquet file - parquet_file_path = Path(sys.argv[0]) - - # Call the function to read and output the Parquet file - read_and_output_parquet(parquet_file_path) diff --git a/task.py b/task.py index f5a0e278..334294a4 100644 --- a/task.py +++ b/task.py @@ -1,10 +1,9 @@ import click +from gymlib.workspace import make_standard_dbgym_workspace from benchmark.cli import benchmark_group from dbms.cli import dbms_group -from manage.cli import manage_group -from util.log import set_up_loggers, set_up_warnings -from util.workspace import make_standard_dbgym_workspace +from orchestrate.cli import manage_group # TODO(phw2): Save commit, git diff, and run command. # TODO(phw2): Remove write permissions on old run_*/ dirs to enforce that they are immutable. @@ -18,10 +17,6 @@ def task(ctx: click.Context) -> None: dbgym_workspace = make_standard_dbgym_workspace() ctx.obj = dbgym_workspace - log_path = dbgym_workspace.dbgym_this_run_path - set_up_loggers(log_path) - set_up_warnings(log_path) - if __name__ == "__main__": task.add_command(benchmark_group) diff --git a/util/log.py b/util/log.py deleted file mode 100644 index ae0ca936..00000000 --- a/util/log.py +++ /dev/null @@ -1,82 +0,0 @@ -import logging -import warnings -from logging import Logger -from pathlib import Path -from typing import Any, Optional - -DBGYM_LOGGER_NAME = "dbgym" -DBGYM_OUTPUT_LOGGER_NAME = f"{DBGYM_LOGGER_NAME}.output" - - -def set_up_loggers(log_path: Path) -> None: - """ - Set up everything related to the logging library. - - If your script needs to provide output, use the output logger (I usually use the info level). If you want to print things for - debugging purposes, use print(). If you want to log things, use the dbgym logger. - """ - - # The dbgym logger is set up globally here. Do not reconfigure the dbgym logger anywhere else. - log_format = "%(levelname)s:%(asctime)s [%(filename)s:%(lineno)s] %(message)s" - _set_up_logger( - logging.getLogger(DBGYM_LOGGER_NAME), - log_format, - log_path / f"{DBGYM_LOGGER_NAME}.log", - ) - - # The output logger is meant to output things to the console. We use it instead of using print to indicate that something is - # not a debugging print but rather is actual output of the program. - # We pass it None so that it doesn't write to its own file. However, due to the logging hierarchy, all messages logged to - # the output logger will be propagated to the dbgym logger and will thus be written to its file. - output_format = "%(message)s" - _set_up_logger( - logging.getLogger(DBGYM_OUTPUT_LOGGER_NAME), - output_format, - None, - console_level=logging.DEBUG, - ) - - -def _set_up_logger( - logger: Logger, - format: str, - output_log_path: Optional[Path], - console_level: int = logging.ERROR, - file_level: int = logging.DEBUG, -) -> None: - # Set this so that the logger captures everything. - logger.setLevel(logging.DEBUG) - formatter = logging.Formatter(format) - - # Only make it output warnings or higher to the console. - console_handler = logging.StreamHandler() - console_handler.setLevel(console_level) - console_handler.setFormatter(formatter) - logger.addHandler(console_handler) - - # Let it output everything to the output file. - if output_log_path is not None: - file_handler = logging.FileHandler(output_log_path) - file_handler.setFormatter(formatter) - file_handler.setLevel(file_level) - logger.addHandler(file_handler) - - -def set_up_warnings(log_path: Path) -> None: - """ - Some libraries (like torch) use warnings instead of logging for warnings. I want to redirect these too to avoid cluttering the console. - """ - warnings_path = log_path / "warnings.log" - - def write_warning_to_file( - message: Any, - category: Any, - filename: Any, - lineno: Any, - file: Optional[Any] = None, - line: Optional[Any] = None, - ) -> None: - with open(warnings_path, "a") as f: - f.write(f"{filename}:{lineno}: {category.__name__}: {message}\n") - - warnings.showwarning = write_warning_to_file diff --git a/util/shell.py b/util/shell.py index 4ea7e3c2..b4d69cbd 100644 --- a/util/shell.py +++ b/util/shell.py @@ -4,8 +4,6 @@ from pathlib import Path from typing import Optional -from util.log import DBGYM_LOGGER_NAME - def subprocess_run( c: str, @@ -13,10 +11,13 @@ def subprocess_run( check_returncode: bool = True, verbose: bool = True, ) -> subprocess.Popen[str]: + """ + We use this instead of subprocess.run() because of the cwd option. + """ cwd_msg = f"(cwd: {cwd if cwd is not None else os.getcwd()})" if verbose: - logging.getLogger(DBGYM_LOGGER_NAME).info(f"Running {cwd_msg}: {c}") + logging.info(f"Running {cwd_msg}: {c}") with subprocess.Popen( c, @@ -32,7 +33,7 @@ def subprocess_run( assert proc.stdout is not None for line in proc.stdout: if verbose: - logging.getLogger(DBGYM_LOGGER_NAME).info(line) + logging.info(line) if not loop: break if check_returncode and proc.returncode != 0: diff --git a/util/tests/__init__.py b/util/tests/__init__.py deleted file mode 100644 index e69de29b..00000000