From 4bef40f886511f954b8eb3659f61eb7dd8dba9b3 Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Sun, 22 Dec 2024 20:50:01 -0500 Subject: [PATCH 1/8] added ...Delta types --- env/integtest_tuning_agent.py | 14 ++++++++++++-- env/replay.py | 0 env/tuning_agent.py | 11 ++++++++--- 3 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 env/replay.py diff --git a/env/integtest_tuning_agent.py b/env/integtest_tuning_agent.py index 11c527a1..aa7faea4 100644 --- a/env/integtest_tuning_agent.py +++ b/env/integtest_tuning_agent.py @@ -2,7 +2,13 @@ from typing import Any, Optional from env.integtest_util import IntegtestWorkspace -from env.tuning_agent import DBMSConfigDelta, TuningAgent +from env.tuning_agent import ( + DBMSConfigDelta, + IndexesDelta, + QueryKnobsDelta, + SysKnobsDelta, + TuningAgent, +) class MockTuningAgent(TuningAgent): @@ -25,7 +31,11 @@ def setUpClass() -> None: @staticmethod def make_config(letter: str) -> DBMSConfigDelta: - return DBMSConfigDelta([letter], {letter: letter}, {letter: [letter]}) + return DBMSConfigDelta( + IndexesDelta([letter]), + SysKnobsDelta({letter: letter}), + QueryKnobsDelta({letter: [letter]}), + ) def test_get_step_delta(self) -> None: agent = MockTuningAgent(IntegtestWorkspace.get_dbgym_cfg()) diff --git a/env/replay.py b/env/replay.py new file mode 100644 index 00000000..e69de29b diff --git a/env/tuning_agent.py b/env/tuning_agent.py index 182242cc..6681cb6b 100644 --- a/env/tuning_agent.py +++ b/env/tuning_agent.py @@ -1,9 +1,14 @@ import json from dataclasses import asdict, dataclass from pathlib import Path +from typing import NewType from util.workspace import DBGymConfig +IndexesDelta = NewType("IndexesDelta", list[str]) +SysKnobsDelta = NewType("SysKnobsDelta", dict[str, str]) +QueryKnobsDelta = NewType("QueryKnobsDelta", dict[str, list[str]]) + @dataclass class DBMSConfigDelta: @@ -21,9 +26,9 @@ class DBMSConfigDelta: because knobs can be settings ("SET (enable_sort on)") or flags ("IndexOnlyScan(it)"). """ - indexes: list[str] - sysknobs: dict[str, str] - qknobs: dict[str, list[str]] + indexes: IndexesDelta + sysknobs: SysKnobsDelta + qknobs: QueryKnobsDelta class TuningAgent: From b9ba3f384404d075f3b35d00299d1a2fe868b381 Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Sun, 22 Dec 2024 20:54:43 -0500 Subject: [PATCH 2/8] made DBMSConfigDelta a typeddict --- env/integtest_tuning_agent.py | 6 +++--- env/tuning_agent.py | 16 +++++++++------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/env/integtest_tuning_agent.py b/env/integtest_tuning_agent.py index aa7faea4..a23fd609 100644 --- a/env/integtest_tuning_agent.py +++ b/env/integtest_tuning_agent.py @@ -32,9 +32,9 @@ def setUpClass() -> None: @staticmethod def make_config(letter: str) -> DBMSConfigDelta: return DBMSConfigDelta( - IndexesDelta([letter]), - SysKnobsDelta({letter: letter}), - QueryKnobsDelta({letter: [letter]}), + indexes=IndexesDelta([letter]), + sysknobs=SysKnobsDelta({letter: letter}), + qknobs=QueryKnobsDelta({letter: [letter]}), ) def test_get_step_delta(self) -> None: diff --git a/env/tuning_agent.py b/env/tuning_agent.py index 6681cb6b..8f84239e 100644 --- a/env/tuning_agent.py +++ b/env/tuning_agent.py @@ -1,7 +1,6 @@ import json -from dataclasses import asdict, dataclass from pathlib import Path -from typing import NewType +from typing import NewType, TypedDict from util.workspace import DBGymConfig @@ -10,8 +9,7 @@ QueryKnobsDelta = NewType("QueryKnobsDelta", dict[str, list[str]]) -@dataclass -class DBMSConfigDelta: +class DBMSConfigDelta(TypedDict): """ This class represents a DBMS config delta. A "DBMS config" is the indexes, system knobs, and query knobs set by the tuning agent. A "delta" is the change from the prior config. @@ -47,7 +45,7 @@ def step(self) -> None: self.next_step_num += 1 dbms_cfg_delta = self._step() with self.get_step_delta_fpath(curr_step_num).open("w") as f: - json.dump(asdict(dbms_cfg_delta), f) + json.dump(dbms_cfg_delta, f) def get_step_delta_fpath(self, step_num: int) -> Path: return self.dbms_cfg_deltas_dpath / f"step{step_num}_delta.json" @@ -64,8 +62,12 @@ def _step(self) -> DBMSConfigDelta: def get_step_delta(self, step_num: int) -> DBMSConfigDelta: assert step_num >= 0 and step_num < self.next_step_num with self.get_step_delta_fpath(step_num).open("r") as f: - return DBMSConfigDelta(**json.load(f)) - assert False + data = json.load(f) + return DBMSConfigDelta( + indexes=data["indexes"], + sysknobs=data["sysknobs"], + qknobs=data["qknobs"], + ) def get_all_deltas(self) -> list[DBMSConfigDelta]: return [self.get_step_delta(step_num) for step_num in range(self.next_step_num)] From ce4ed3e81a5f294ca202c13ec81f2628cd963599 Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Mon, 23 Dec 2024 14:02:44 -0500 Subject: [PATCH 3/8] separate reader from tuning agent --- env/integtest_tuning_agent.py | 15 ++++++++++----- env/tuning_agent.py | 28 +++++++++++++++++++++------- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/env/integtest_tuning_agent.py b/env/integtest_tuning_agent.py index a23fd609..74ef8c9c 100644 --- a/env/integtest_tuning_agent.py +++ b/env/integtest_tuning_agent.py @@ -8,6 +8,7 @@ QueryKnobsDelta, SysKnobsDelta, TuningAgent, + TuningAgentStepReader, ) @@ -47,10 +48,12 @@ def test_get_step_delta(self) -> None: agent.config_to_return = PostgresConnTests.make_config("c") agent.step() - self.assertEqual(agent.get_step_delta(1), PostgresConnTests.make_config("b")) - self.assertEqual(agent.get_step_delta(0), PostgresConnTests.make_config("a")) - self.assertEqual(agent.get_step_delta(1), PostgresConnTests.make_config("b")) - self.assertEqual(agent.get_step_delta(2), PostgresConnTests.make_config("c")) + reader = TuningAgentStepReader(agent.dbms_cfg_deltas_dpath) + + self.assertEqual(reader.get_step_delta(1), PostgresConnTests.make_config("b")) + self.assertEqual(reader.get_step_delta(0), PostgresConnTests.make_config("a")) + self.assertEqual(reader.get_step_delta(1), PostgresConnTests.make_config("b")) + self.assertEqual(reader.get_step_delta(2), PostgresConnTests.make_config("c")) def test_get_all_deltas(self) -> None: agent = MockTuningAgent(IntegtestWorkspace.get_dbgym_cfg()) @@ -62,8 +65,10 @@ def test_get_all_deltas(self) -> None: agent.config_to_return = PostgresConnTests.make_config("c") agent.step() + reader = TuningAgentStepReader(agent.dbms_cfg_deltas_dpath) + self.assertEqual( - agent.get_all_deltas(), + reader.get_all_deltas(), [ PostgresConnTests.make_config("a"), PostgresConnTests.make_config("b"), diff --git a/env/tuning_agent.py b/env/tuning_agent.py index 8f84239e..6f693cae 100644 --- a/env/tuning_agent.py +++ b/env/tuning_agent.py @@ -4,6 +4,8 @@ from util.workspace import DBGymConfig +# PostgresConn doesn't use these types because PostgresConn is used internally by tuning agents. +# These types are only given as the outputs of tuning agents. IndexesDelta = NewType("IndexesDelta", list[str]) SysKnobsDelta = NewType("SysKnobsDelta", dict[str, str]) QueryKnobsDelta = NewType("QueryKnobsDelta", dict[str, list[str]]) @@ -29,6 +31,10 @@ class DBMSConfigDelta(TypedDict): qknobs: QueryKnobsDelta +def get_step_delta_fpath(dbms_cfg_deltas_dpath: Path, step_num: int) -> Path: + return dbms_cfg_deltas_dpath / f"step{step_num}_delta.json" + + class TuningAgent: def __init__(self, dbgym_cfg: DBGymConfig) -> None: self.dbgym_cfg = dbgym_cfg @@ -44,12 +50,11 @@ def step(self) -> None: curr_step_num = self.next_step_num self.next_step_num += 1 dbms_cfg_delta = self._step() - with self.get_step_delta_fpath(curr_step_num).open("w") as f: + with get_step_delta_fpath(self.dbms_cfg_deltas_dpath, curr_step_num).open( + "w" + ) as f: json.dump(dbms_cfg_delta, f) - def get_step_delta_fpath(self, step_num: int) -> Path: - return self.dbms_cfg_deltas_dpath / f"step{step_num}_delta.json" - # Subclasses should override this function. def _step(self) -> DBMSConfigDelta: """ @@ -59,9 +64,18 @@ def _step(self) -> DBMSConfigDelta: """ raise NotImplementedError + +class TuningAgentStepReader: + def __init__(self, dbms_cfg_deltas_dpath: Path) -> None: + self.dbms_cfg_deltas_dpath = dbms_cfg_deltas_dpath + num_steps = 0 + while get_step_delta_fpath(self.dbms_cfg_deltas_dpath, num_steps).exists(): + num_steps += 1 + self.num_steps = num_steps + def get_step_delta(self, step_num: int) -> DBMSConfigDelta: - assert step_num >= 0 and step_num < self.next_step_num - with self.get_step_delta_fpath(step_num).open("r") as f: + assert step_num >= 0 and step_num < self.num_steps + with get_step_delta_fpath(self.dbms_cfg_deltas_dpath, step_num).open("r") as f: data = json.load(f) return DBMSConfigDelta( indexes=data["indexes"], @@ -70,4 +84,4 @@ def get_step_delta(self, step_num: int) -> DBMSConfigDelta: ) def get_all_deltas(self) -> list[DBMSConfigDelta]: - return [self.get_step_delta(step_num) for step_num in range(self.next_step_num)] + return [self.get_step_delta(step_num) for step_num in range(self.num_steps)] From c8b9ddc5bd24ab277cace3019eae573124a0768c Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Mon, 23 Dec 2024 14:10:43 -0500 Subject: [PATCH 4/8] hardened is_fully_resolved --- util/workspace.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/util/workspace.py b/util/workspace.py index 790fbcce..688a35c7 100644 --- a/util/workspace.py +++ b/util/workspace.py @@ -409,8 +409,31 @@ def is_base_git_dir(cwd: str) -> bool: def is_fully_resolved(path: Path) -> bool: + """ + Checks if a path is fully resolved (exists, is absolute, and contains no symlinks in its entire ancestry). + + Even if a path exists, is absolute, and is not itself a symlink, it could still contain + symlinks in its parent directories. For example: + /home/user/ # Real directory + /home/user/links/ # Symlink to /data/links + /home/user/links/file.txt # Real file + + In this case, "/home/user/links/file.txt" exists and isn't itself a symlink, + but it's not fully resolved because it contains a symlink in its ancestry. + The fully resolved path would be "/data/links/file.txt". + """ assert isinstance(path, Path) resolved_path = path.resolve() + + # Check if the path exists. + if not resolved_path.exists(): + return False + + # Check if the path contains no symlinks in its entire ancestry. + # This also checks if the path is absolute because resolved_path is absolute. + assert ( + resolved_path.is_absolute() + ), "resolved_path should be absolute (see comment above)" # Converting them to strings is the most unambiguously strict way of checking equality. # Stuff like Path.__eq__() or Path.samefile() might be more lenient. return str(resolved_path) == str(path) From ea456f6de4c810ae3dbe9ae528fe1fa52f813b14 Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Mon, 23 Dec 2024 14:16:26 -0500 Subject: [PATCH 5/8] changed all absolute, not symlink, and existent assertions to is_fully_resolved assertions --- benchmark/job/cli.py | 7 +++---- benchmark/job/load_info.py | 8 +++----- benchmark/tpch/cli.py | 13 ++++--------- benchmark/tpch/load_info.py | 8 +++----- dbms/postgres/cli.py | 6 +++--- tune/protox/agent/hpo.py | 24 ++++++++++-------------- tune/protox/agent/replay.py | 4 ++-- tune/protox/agent/tune.py | 6 +++--- tune/protox/embedding/datagen.py | 14 ++++++-------- tune/protox/embedding/train.py | 12 +++++------- tune/protox/env/workload.py | 5 ++--- util/workspace.py | 25 +++++++++++-------------- 12 files changed, 55 insertions(+), 77 deletions(-) diff --git a/benchmark/job/cli.py b/benchmark/job/cli.py index a3ef2bdd..9e1e9767 100644 --- a/benchmark/job/cli.py +++ b/benchmark/job/cli.py @@ -10,6 +10,7 @@ DBGymConfig, default_tables_dname, get_workload_name, + is_fully_resolved, link_result, ) @@ -260,10 +261,8 @@ def _generate_job_workload( dbgym_cfg.cur_symlinks_data_path(mkdir=True) / (f"{JOB_QUERIES_DNAME}.link") ).resolve() / f"{qname}.sql" - assert ( - sql_fpath.exists() - and not sql_fpath.is_symlink() - and sql_fpath.is_absolute() + assert is_fully_resolved( + sql_fpath ), "We should only write existent real absolute paths to a file" f.write(f"Q{qname},{sql_fpath}\n") diff --git a/benchmark/job/load_info.py b/benchmark/job/load_info.py index bb94bd6c..eafbed5b 100644 --- a/benchmark/job/load_info.py +++ b/benchmark/job/load_info.py @@ -3,7 +3,7 @@ from benchmark.constants import DEFAULT_SCALE_FACTOR from dbms.load_info_base_class import LoadInfoBaseClass -from util.workspace import DBGymConfig, default_tables_dname +from util.workspace import DBGymConfig, default_tables_dname, is_fully_resolved JOB_SCHEMA_FNAME = "job_schema.sql" @@ -55,10 +55,8 @@ def __init__(self, dbgym_cfg: DBGymConfig): data_root_dpath / f"{default_tables_dname(DEFAULT_SCALE_FACTOR)}.link" ) tables_dpath = tables_symlink_dpath.resolve() - assert ( - tables_dpath.exists() - and tables_dpath.is_absolute() - and not tables_dpath.is_symlink() + assert is_fully_resolved( + tables_dpath ), f"tables_dpath ({tables_dpath}) should be an existent real absolute path. Make sure you have generated the TPC-H data" self._tables_and_fpaths = [] for table in JobLoadInfo.TABLES: diff --git a/benchmark/tpch/cli.py b/benchmark/tpch/cli.py index ef3aaa71..99f0fa77 100644 --- a/benchmark/tpch/cli.py +++ b/benchmark/tpch/cli.py @@ -11,6 +11,7 @@ default_tables_dname, get_scale_factor_string, get_workload_name, + is_fully_resolved, link_result, ) @@ -94,11 +95,7 @@ def _clone_tpch_kit(dbgym_cfg: DBGymConfig) -> None: def _get_tpch_kit_dpath(dbgym_cfg: DBGymConfig) -> Path: tpch_kit_dpath = (dbgym_cfg.cur_symlinks_build_path() / "tpch-kit.link").resolve() - assert ( - tpch_kit_dpath.exists() - and tpch_kit_dpath.is_absolute() - and not tpch_kit_dpath.is_symlink() - ) + assert is_fully_resolved(tpch_kit_dpath) return tpch_kit_dpath @@ -197,10 +194,8 @@ def _generate_tpch_workload( symlink_data_dpath / (_get_queries_dname(seed, scale_factor) + ".link") ).resolve() / f"{qname}.sql" - assert ( - sql_fpath.exists() - and not sql_fpath.is_symlink() - and sql_fpath.is_absolute() + assert is_fully_resolved( + sql_fpath ), "We should only write existent real absolute paths to a file" f.write(f"S{seed}-Q{qname},{sql_fpath}\n") # TODO(WAN): add option to deep-copy the workload. diff --git a/benchmark/tpch/load_info.py b/benchmark/tpch/load_info.py index fed2e329..e7c6ed31 100644 --- a/benchmark/tpch/load_info.py +++ b/benchmark/tpch/load_info.py @@ -2,7 +2,7 @@ from typing import Optional from dbms.load_info_base_class import LoadInfoBaseClass -from util.workspace import DBGymConfig, default_tables_dname +from util.workspace import DBGymConfig, default_tables_dname, is_fully_resolved TPCH_SCHEMA_FNAME = "tpch_schema.sql" TPCH_CONSTRAINTS_FNAME = "tpch_constraints.sql" @@ -48,10 +48,8 @@ def __init__(self, dbgym_cfg: DBGymConfig, scale_factor: float): data_root_dpath / f"{default_tables_dname(scale_factor)}.link" ) tables_dpath = tables_symlink_dpath.resolve() - assert ( - tables_dpath.exists() - and tables_dpath.is_absolute() - and not tables_dpath.is_symlink() + assert is_fully_resolved( + tables_dpath ), f"tables_dpath ({tables_dpath}) should be an existent real absolute path. Make sure you have generated the TPC-H data" self._tables_and_fpaths = [] for table in TpchLoadInfo.TABLES: diff --git a/dbms/postgres/cli.py b/dbms/postgres/cli.py index 373ed8cf..0f96349e 100644 --- a/dbms/postgres/cli.py +++ b/dbms/postgres/cli.py @@ -35,9 +35,9 @@ from util.workspace import ( WORKSPACE_PATH_PLACEHOLDER, DBGymConfig, - conv_inputpath_to_realabspath, default_dbdata_parent_dpath, default_pgbin_path, + fully_resolve_inputpath, get_dbdata_tgz_name, is_ssd, link_result, @@ -108,8 +108,8 @@ def postgres_dbdata( ) # Convert all input paths to absolute paths - pgbin_path = conv_inputpath_to_realabspath(dbgym_cfg, pgbin_path) - dbdata_parent_dpath = conv_inputpath_to_realabspath(dbgym_cfg, dbdata_parent_dpath) + pgbin_path = fully_resolve_inputpath(dbgym_cfg, pgbin_path) + dbdata_parent_dpath = fully_resolve_inputpath(dbgym_cfg, dbdata_parent_dpath) # Check assertions on args if intended_dbdata_hardware == "hdd": diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py index 1dda312b..a30d9cb9 100644 --- a/tune/protox/agent/hpo.py +++ b/tune/protox/agent/hpo.py @@ -35,7 +35,6 @@ WORKSPACE_PATH_PLACEHOLDER, DBGymConfig, TuningMode, - conv_inputpath_to_realabspath, default_benchbase_config_path, default_benchmark_config_path, default_dbdata_parent_dpath, @@ -44,6 +43,7 @@ default_pgbin_path, default_pristine_dbdata_snapshot_path, default_workload_path, + fully_resolve_inputpath, get_default_workload_name_suffix, get_workload_name, is_ssd, @@ -295,21 +295,17 @@ def hpo( seed = random.randint(0, int(1e8)) # Convert all input paths to absolute paths - embedder_path = conv_inputpath_to_realabspath(dbgym_cfg, embedder_path) - benchmark_config_path = conv_inputpath_to_realabspath( - dbgym_cfg, benchmark_config_path - ) - benchbase_config_path = conv_inputpath_to_realabspath( - dbgym_cfg, benchbase_config_path - ) - sysknobs_path = conv_inputpath_to_realabspath(dbgym_cfg, sysknobs_path) - pristine_dbdata_snapshot_path = conv_inputpath_to_realabspath( + embedder_path = fully_resolve_inputpath(dbgym_cfg, embedder_path) + benchmark_config_path = fully_resolve_inputpath(dbgym_cfg, benchmark_config_path) + benchbase_config_path = fully_resolve_inputpath(dbgym_cfg, benchbase_config_path) + sysknobs_path = fully_resolve_inputpath(dbgym_cfg, sysknobs_path) + pristine_dbdata_snapshot_path = fully_resolve_inputpath( dbgym_cfg, pristine_dbdata_snapshot_path ) - dbdata_parent_dpath = conv_inputpath_to_realabspath(dbgym_cfg, dbdata_parent_dpath) - pgbin_path = conv_inputpath_to_realabspath(dbgym_cfg, pgbin_path) - workload_path = conv_inputpath_to_realabspath(dbgym_cfg, workload_path) - boot_config_fpath_during_hpo = conv_inputpath_to_realabspath( + dbdata_parent_dpath = fully_resolve_inputpath(dbgym_cfg, dbdata_parent_dpath) + pgbin_path = fully_resolve_inputpath(dbgym_cfg, pgbin_path) + workload_path = fully_resolve_inputpath(dbgym_cfg, workload_path) + boot_config_fpath_during_hpo = fully_resolve_inputpath( dbgym_cfg, boot_config_fpath_during_hpo ) diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py index af403831..c5ba9511 100644 --- a/tune/protox/agent/replay.py +++ b/tune/protox/agent/replay.py @@ -31,9 +31,9 @@ from util.workspace import ( DBGymConfig, TuningMode, - conv_inputpath_to_realabspath, default_replay_data_fname, default_tuning_steps_dpath, + fully_resolve_inputpath, get_default_workload_name_suffix, get_workload_name, link_result, @@ -152,7 +152,7 @@ def replay( ) # Convert all input paths to absolute paths - tuning_steps_dpath = conv_inputpath_to_realabspath(dbgym_cfg, tuning_steps_dpath) + tuning_steps_dpath = fully_resolve_inputpath(dbgym_cfg, tuning_steps_dpath) # Group args together to reduce the # of parameters we pass into functions replay_args = ReplayArgs( diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py index cd47c0ce..ad7a3859 100644 --- a/tune/protox/agent/tune.py +++ b/tune/protox/agent/tune.py @@ -17,9 +17,9 @@ WORKSPACE_PATH_PLACEHOLDER, DBGymConfig, TuningMode, - conv_inputpath_to_realabspath, default_hpoed_agent_params_path, default_tuning_steps_dname, + fully_resolve_inputpath, get_default_workload_name_suffix, get_workload_name, link_result, @@ -87,10 +87,10 @@ def tune( ) # Convert all input paths to absolute paths - hpoed_agent_params_path = conv_inputpath_to_realabspath( + hpoed_agent_params_path = fully_resolve_inputpath( dbgym_cfg, hpoed_agent_params_path ) - boot_config_fpath_during_tune = conv_inputpath_to_realabspath( + boot_config_fpath_during_tune = fully_resolve_inputpath( dbgym_cfg, boot_config_fpath_during_tune ) diff --git a/tune/protox/embedding/datagen.py b/tune/protox/embedding/datagen.py index 50606f45..f8ea818b 100644 --- a/tune/protox/embedding/datagen.py +++ b/tune/protox/embedding/datagen.py @@ -38,13 +38,13 @@ WORKLOAD_NAME_PLACEHOLDER, WORKSPACE_PATH_PLACEHOLDER, DBGymConfig, - conv_inputpath_to_realabspath, default_benchmark_config_path, default_dbdata_parent_dpath, default_pgbin_path, default_pristine_dbdata_snapshot_path, default_traindata_fname, default_workload_path, + fully_resolve_inputpath, get_default_workload_name_suffix, get_workload_name, is_ssd, @@ -226,15 +226,13 @@ def datagen( seed = random.randint(0, int(1e8)) # Convert all input paths to absolute paths - workload_path = conv_inputpath_to_realabspath(dbgym_cfg, workload_path) - benchmark_config_path = conv_inputpath_to_realabspath( - dbgym_cfg, benchmark_config_path - ) - pgbin_path = conv_inputpath_to_realabspath(dbgym_cfg, pgbin_path) - pristine_dbdata_snapshot_path = conv_inputpath_to_realabspath( + workload_path = fully_resolve_inputpath(dbgym_cfg, workload_path) + benchmark_config_path = fully_resolve_inputpath(dbgym_cfg, benchmark_config_path) + pgbin_path = fully_resolve_inputpath(dbgym_cfg, pgbin_path) + pristine_dbdata_snapshot_path = fully_resolve_inputpath( dbgym_cfg, pristine_dbdata_snapshot_path ) - dbdata_parent_dpath = conv_inputpath_to_realabspath(dbgym_cfg, dbdata_parent_dpath) + dbdata_parent_dpath = fully_resolve_inputpath(dbgym_cfg, dbdata_parent_dpath) # Check assertions on args if intended_dbdata_hardware == "hdd": diff --git a/tune/protox/embedding/train.py b/tune/protox/embedding/train.py index a404dc98..6ed8fddb 100644 --- a/tune/protox/embedding/train.py +++ b/tune/protox/embedding/train.py @@ -29,10 +29,10 @@ WORKLOAD_NAME_PLACEHOLDER, WORKSPACE_PATH_PLACEHOLDER, DBGymConfig, - conv_inputpath_to_realabspath, default_benchmark_config_path, default_traindata_path, default_workload_path, + fully_resolve_inputpath, get_default_workload_name_suffix, get_workload_name, ) @@ -212,18 +212,16 @@ def train( seed = random.randint(0, int(1e8)) # Convert all input paths to absolute paths - benchmark_config_path = conv_inputpath_to_realabspath( - dbgym_cfg, benchmark_config_path - ) - traindata_path = conv_inputpath_to_realabspath(dbgym_cfg, traindata_path) - hpo_space_path = conv_inputpath_to_realabspath(dbgym_cfg, hpo_space_path) + benchmark_config_path = fully_resolve_inputpath(dbgym_cfg, benchmark_config_path) + traindata_path = fully_resolve_inputpath(dbgym_cfg, traindata_path) + hpo_space_path = fully_resolve_inputpath(dbgym_cfg, hpo_space_path) # setup random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) - workload_path = conv_inputpath_to_realabspath( + workload_path = fully_resolve_inputpath( dbgym_cfg, default_workload_path( dbgym_cfg.dbgym_workspace_path, benchmark_name, workload_name diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py index 6aa10b0d..591b4cb1 100644 --- a/tune/protox/env/workload.py +++ b/tune/protox/env/workload.py @@ -41,7 +41,7 @@ extract_sqltypes, ) from util.log import DBGYM_LOGGER_NAME -from util.workspace import DBGymConfig, open_and_save +from util.workspace import DBGymConfig, is_fully_resolved, open_and_save class Workload(object): @@ -66,8 +66,7 @@ def _crunch( query_spec: QuerySpec, ) -> None: assert all( - sql[1].exists() and not sql[1].is_symlink() and sql[1].is_absolute() - for sql in sqls + is_fully_resolved(sql[1]) for sql in sqls ), f"sqls ({sqls}) should only contain existent real absolute paths." do_tbl_include_subsets_prune = query_spec["tbl_include_subsets_prune"] self.order = [] diff --git a/util/workspace.py b/util/workspace.py index 688a35c7..c9fd9ce3 100644 --- a/util/workspace.py +++ b/util/workspace.py @@ -362,16 +362,16 @@ def make_standard_dbgym_cfg() -> DBGymConfig: return dbgym_cfg -def conv_inputpath_to_realabspath( +def fully_resolve_inputpath( dbgym_cfg: DBGymConfig, inputpath: os.PathLike[str] ) -> Path: """ - Convert any user inputted path to a real, absolute path + Convert any user inputted path to a real, absolute path. For flexibility, we take in any os.PathLike. However, for consistency, we always output a Path object - Whenever a path is required, the user is allowed to enter relative paths, absolute paths, or paths starting with ~ - Relative paths are relative to the base dbgym repo dir - It *does not* check whether the path exists, since the user might be wanting to create a new file/dir - Raises RuntimeError for errors + Whenever a path is required, the user is allowed to enter relative paths, absolute paths, or paths starting with ~. + Relative paths are relative to the base dbgym repo dir. + It *does not* check whether the path exists, since the user might be wanting to create a new file/dir. + Raises RuntimeError for errors. """ # For simplicity, we only process Path objects. realabspath = Path(inputpath) @@ -385,12 +385,9 @@ def conv_inputpath_to_realabspath( # I believe the pathlib library (https://docs.python.org/3/library/pathlib.html#pathlib.Path.resolve) does it this # way to avoid an edge case related to symlinks and normalizing paths (footnote 1 of the linked docs) realabspath = realabspath.resolve() - assert ( - realabspath.is_absolute() - ), f"after being processed, realabspath ({realabspath}) is still not absolute" - assert ( - realabspath.exists() - ), f"after being processed, realabspath ({realabspath}) is still a non-existent path" + assert is_fully_resolved( + realabspath + ), f"realabspath ({realabspath}) is not fully resolved" return realabspath @@ -501,7 +498,7 @@ def open_and_save(dbgym_cfg: DBGymConfig, open_fpath: Path, mode: str = "r") -> Open a file and "save" it to [workspace]/task_runs/run_*/. It takes in a str | Path to match the interface of open(). This file does not work if open_fpath is a symlink, to make its interface identical to that of open(). - Make sure to resolve all symlinks with conv_inputpath_to_realabspath(). + Make sure to resolve all symlinks with fully_resolve_inputpath(). To avoid confusion, I'm enforcing this function to only work with absolute paths. See the comment of save_file() for what "saving" means If you are generating a "result" for the run, _do not_ use this. Just use the normal open(). @@ -650,7 +647,7 @@ def link_result( assert is_fully_resolved( result_fordpath ), f"result_fordpath ({result_fordpath}) should be a fully resolved path" - result_fordpath = conv_inputpath_to_realabspath(dbgym_cfg, result_fordpath) + result_fordpath = fully_resolve_inputpath(dbgym_cfg, result_fordpath) assert is_child_path(result_fordpath, dbgym_cfg.dbgym_this_run_path) assert not os.path.islink(result_fordpath) From b86ad3112f607139d6608af235721c9dc54675a3 Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Mon, 23 Dec 2024 14:41:41 -0500 Subject: [PATCH 6/8] changed all absolute and existent assertions to is_fully_resolved (removing the resolve() call as well) --- dbms/postgres/cli.py | 14 ++++++-------- tune/protox/embedding/datagen.py | 11 +++++------ 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/dbms/postgres/cli.py b/dbms/postgres/cli.py index 0f96349e..3e930174 100644 --- a/dbms/postgres/cli.py +++ b/dbms/postgres/cli.py @@ -39,6 +39,7 @@ default_pgbin_path, fully_resolve_inputpath, get_dbdata_tgz_name, + is_fully_resolved, is_ssd, link_result, open_and_save, @@ -316,13 +317,10 @@ def _start_or_stop_postgres( dbgym_cfg: DBGymConfig, pgbin_path: Path, dbdata_dpath: Path, is_start: bool ) -> None: # They should be absolute paths and should exist - assert pgbin_path.is_absolute() and pgbin_path.exists() - assert dbdata_dpath.is_absolute() and dbdata_dpath.exists() - # The inputs may be symlinks so we need to resolve them first - pgbin_real_dpath = pgbin_path.resolve() - dbdata_dpath = dbdata_dpath.resolve() + assert is_fully_resolved(pgbin_path) + assert is_fully_resolved(dbdata_dpath) pgport = DEFAULT_POSTGRES_PORT - save_file(dbgym_cfg, pgbin_real_dpath / "pg_ctl") + save_file(dbgym_cfg, pgbin_path / "pg_ctl") if is_start: # We use subprocess.run() because subprocess_run() never returns when running "pg_ctl start". @@ -330,12 +328,12 @@ def _start_or_stop_postgres( # On the other hand, subprocess.run() does return normally, like calling `./pg_ctl` on the command line would do. result = subprocess.run( f"./pg_ctl -D \"{dbdata_dpath}\" -o '-p {pgport}' start", - cwd=pgbin_real_dpath, + cwd=pgbin_path, shell=True, ) result.check_returncode() else: subprocess_run( f"./pg_ctl -D \"{dbdata_dpath}\" -o '-p {pgport}' stop", - cwd=pgbin_real_dpath, + cwd=pgbin_path, ) diff --git a/tune/protox/embedding/datagen.py b/tune/protox/embedding/datagen.py index f8ea818b..f2728027 100644 --- a/tune/protox/embedding/datagen.py +++ b/tune/protox/embedding/datagen.py @@ -47,6 +47,7 @@ fully_resolve_inputpath, get_default_workload_name_suffix, get_workload_name, + is_fully_resolved, is_ssd, link_result, open_and_save, @@ -306,19 +307,17 @@ def untar_snapshot( dbgym_cfg: DBGymConfig, dbdata_snapshot_fpath: Path, dbdata_parent_dpath: Path ) -> Path: # It should be an absolute path and it should exist - assert ( - dbdata_snapshot_fpath.is_absolute() and dbdata_snapshot_fpath.exists() + assert is_fully_resolved( + dbdata_snapshot_fpath ), f"untar_snapshot(): dbdata_snapshot_fpath ({dbdata_snapshot_fpath}) either doesn't exist or is not absolute" - # It may be a symlink so we need to resolve them first - dbdata_snapshot_real_fpath = dbdata_snapshot_fpath.resolve() - save_file(dbgym_cfg, dbdata_snapshot_real_fpath) + save_file(dbgym_cfg, dbdata_snapshot_fpath) dbdata_dpath = dbdata_parent_dpath / "dbdata" # Make the parent dir and the dbdata dir. Note how we require that dbdata_dpath does not exist while it's ok if the parent does. dbdata_parent_dpath.mkdir(parents=True, exist_ok=True) if dbdata_dpath.exists(): shutil.rmtree(dbdata_dpath) dbdata_dpath.mkdir(parents=False, exist_ok=False) - subprocess_run(f"tar -xzf {dbdata_snapshot_real_fpath} -C {dbdata_dpath}") + subprocess_run(f"tar -xzf {dbdata_snapshot_fpath} -C {dbdata_dpath}") return dbdata_dpath From 03ccb0da4c83c2f39cefc6b91cbf170f6fce80bc Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Mon, 23 Dec 2024 15:20:17 -0500 Subject: [PATCH 7/8] comment change --- dbms/postgres/cli.py | 2 +- tune/protox/agent/hpo.py | 2 +- tune/protox/agent/replay.py | 2 +- tune/protox/agent/tune.py | 2 +- tune/protox/embedding/datagen.py | 2 +- tune/protox/embedding/train.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/postgres/cli.py b/dbms/postgres/cli.py index 3e930174..9c9d1435 100644 --- a/dbms/postgres/cli.py +++ b/dbms/postgres/cli.py @@ -108,7 +108,7 @@ def postgres_dbdata( dbgym_cfg.dbgym_workspace_path ) - # Convert all input paths to absolute paths + # Fully resolve all input paths. pgbin_path = fully_resolve_inputpath(dbgym_cfg, pgbin_path) dbdata_parent_dpath = fully_resolve_inputpath(dbgym_cfg, dbdata_parent_dpath) diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py index a30d9cb9..d977e258 100644 --- a/tune/protox/agent/hpo.py +++ b/tune/protox/agent/hpo.py @@ -294,7 +294,7 @@ def hpo( if seed is None: seed = random.randint(0, int(1e8)) - # Convert all input paths to absolute paths + # Fully resolve all input paths. embedder_path = fully_resolve_inputpath(dbgym_cfg, embedder_path) benchmark_config_path = fully_resolve_inputpath(dbgym_cfg, benchmark_config_path) benchbase_config_path = fully_resolve_inputpath(dbgym_cfg, benchbase_config_path) diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py index c5ba9511..fe4dbaf6 100644 --- a/tune/protox/agent/replay.py +++ b/tune/protox/agent/replay.py @@ -151,7 +151,7 @@ def replay( boot_enabled_during_tune, ) - # Convert all input paths to absolute paths + # Fully resolve all input paths. tuning_steps_dpath = fully_resolve_inputpath(dbgym_cfg, tuning_steps_dpath) # Group args together to reduce the # of parameters we pass into functions diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py index ad7a3859..b8eea2f7 100644 --- a/tune/protox/agent/tune.py +++ b/tune/protox/agent/tune.py @@ -86,7 +86,7 @@ def tune( dbgym_cfg.dbgym_workspace_path, benchmark_name, workload_name ) - # Convert all input paths to absolute paths + # Fully resolve all input paths. hpoed_agent_params_path = fully_resolve_inputpath( dbgym_cfg, hpoed_agent_params_path ) diff --git a/tune/protox/embedding/datagen.py b/tune/protox/embedding/datagen.py index f2728027..d43c2e73 100644 --- a/tune/protox/embedding/datagen.py +++ b/tune/protox/embedding/datagen.py @@ -226,7 +226,7 @@ def datagen( if seed is None: seed = random.randint(0, int(1e8)) - # Convert all input paths to absolute paths + # Fully resolve all input paths. workload_path = fully_resolve_inputpath(dbgym_cfg, workload_path) benchmark_config_path = fully_resolve_inputpath(dbgym_cfg, benchmark_config_path) pgbin_path = fully_resolve_inputpath(dbgym_cfg, pgbin_path) diff --git a/tune/protox/embedding/train.py b/tune/protox/embedding/train.py index 6ed8fddb..ed0d9daf 100644 --- a/tune/protox/embedding/train.py +++ b/tune/protox/embedding/train.py @@ -211,7 +211,7 @@ def train( if seed is None: seed = random.randint(0, int(1e8)) - # Convert all input paths to absolute paths + # Fully resolve all input paths. benchmark_config_path = fully_resolve_inputpath(dbgym_cfg, benchmark_config_path) traindata_path = fully_resolve_inputpath(dbgym_cfg, traindata_path) hpo_space_path = fully_resolve_inputpath(dbgym_cfg, hpo_space_path) From a3eb878ae4850382585dc1b89062eadf0d1074a4 Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Mon, 23 Dec 2024 15:22:07 -0500 Subject: [PATCH 8/8] datagen now fully resolve pgbin path --- dbms/postgres/cli.py | 6 +++--- tune/protox/agent/hpo.py | 20 ++++++++++---------- tune/protox/agent/replay.py | 4 ++-- tune/protox/agent/tune.py | 8 +++----- tune/protox/embedding/datagen.py | 16 +++++++++------- tune/protox/embedding/train.py | 10 +++++----- util/workspace.py | 17 ++++++++++------- 7 files changed, 42 insertions(+), 39 deletions(-) diff --git a/dbms/postgres/cli.py b/dbms/postgres/cli.py index 9c9d1435..10816f9a 100644 --- a/dbms/postgres/cli.py +++ b/dbms/postgres/cli.py @@ -37,7 +37,7 @@ DBGymConfig, default_dbdata_parent_dpath, default_pgbin_path, - fully_resolve_inputpath, + fully_resolve_path, get_dbdata_tgz_name, is_fully_resolved, is_ssd, @@ -109,8 +109,8 @@ def postgres_dbdata( ) # Fully resolve all input paths. - pgbin_path = fully_resolve_inputpath(dbgym_cfg, pgbin_path) - dbdata_parent_dpath = fully_resolve_inputpath(dbgym_cfg, dbdata_parent_dpath) + pgbin_path = fully_resolve_path(dbgym_cfg, pgbin_path) + dbdata_parent_dpath = fully_resolve_path(dbgym_cfg, dbdata_parent_dpath) # Check assertions on args if intended_dbdata_hardware == "hdd": diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py index d977e258..68fdfb4d 100644 --- a/tune/protox/agent/hpo.py +++ b/tune/protox/agent/hpo.py @@ -43,7 +43,7 @@ default_pgbin_path, default_pristine_dbdata_snapshot_path, default_workload_path, - fully_resolve_inputpath, + fully_resolve_path, get_default_workload_name_suffix, get_workload_name, is_ssd, @@ -295,17 +295,17 @@ def hpo( seed = random.randint(0, int(1e8)) # Fully resolve all input paths. - embedder_path = fully_resolve_inputpath(dbgym_cfg, embedder_path) - benchmark_config_path = fully_resolve_inputpath(dbgym_cfg, benchmark_config_path) - benchbase_config_path = fully_resolve_inputpath(dbgym_cfg, benchbase_config_path) - sysknobs_path = fully_resolve_inputpath(dbgym_cfg, sysknobs_path) - pristine_dbdata_snapshot_path = fully_resolve_inputpath( + embedder_path = fully_resolve_path(dbgym_cfg, embedder_path) + benchmark_config_path = fully_resolve_path(dbgym_cfg, benchmark_config_path) + benchbase_config_path = fully_resolve_path(dbgym_cfg, benchbase_config_path) + sysknobs_path = fully_resolve_path(dbgym_cfg, sysknobs_path) + pristine_dbdata_snapshot_path = fully_resolve_path( dbgym_cfg, pristine_dbdata_snapshot_path ) - dbdata_parent_dpath = fully_resolve_inputpath(dbgym_cfg, dbdata_parent_dpath) - pgbin_path = fully_resolve_inputpath(dbgym_cfg, pgbin_path) - workload_path = fully_resolve_inputpath(dbgym_cfg, workload_path) - boot_config_fpath_during_hpo = fully_resolve_inputpath( + dbdata_parent_dpath = fully_resolve_path(dbgym_cfg, dbdata_parent_dpath) + pgbin_path = fully_resolve_path(dbgym_cfg, pgbin_path) + workload_path = fully_resolve_path(dbgym_cfg, workload_path) + boot_config_fpath_during_hpo = fully_resolve_path( dbgym_cfg, boot_config_fpath_during_hpo ) diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py index fe4dbaf6..3eb07b06 100644 --- a/tune/protox/agent/replay.py +++ b/tune/protox/agent/replay.py @@ -33,7 +33,7 @@ TuningMode, default_replay_data_fname, default_tuning_steps_dpath, - fully_resolve_inputpath, + fully_resolve_path, get_default_workload_name_suffix, get_workload_name, link_result, @@ -152,7 +152,7 @@ def replay( ) # Fully resolve all input paths. - tuning_steps_dpath = fully_resolve_inputpath(dbgym_cfg, tuning_steps_dpath) + tuning_steps_dpath = fully_resolve_path(dbgym_cfg, tuning_steps_dpath) # Group args together to reduce the # of parameters we pass into functions replay_args = ReplayArgs( diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py index b8eea2f7..ec1e5767 100644 --- a/tune/protox/agent/tune.py +++ b/tune/protox/agent/tune.py @@ -19,7 +19,7 @@ TuningMode, default_hpoed_agent_params_path, default_tuning_steps_dname, - fully_resolve_inputpath, + fully_resolve_path, get_default_workload_name_suffix, get_workload_name, link_result, @@ -87,10 +87,8 @@ def tune( ) # Fully resolve all input paths. - hpoed_agent_params_path = fully_resolve_inputpath( - dbgym_cfg, hpoed_agent_params_path - ) - boot_config_fpath_during_tune = fully_resolve_inputpath( + hpoed_agent_params_path = fully_resolve_path(dbgym_cfg, hpoed_agent_params_path) + boot_config_fpath_during_tune = fully_resolve_path( dbgym_cfg, boot_config_fpath_during_tune ) diff --git a/tune/protox/embedding/datagen.py b/tune/protox/embedding/datagen.py index d43c2e73..1cd84ff1 100644 --- a/tune/protox/embedding/datagen.py +++ b/tune/protox/embedding/datagen.py @@ -44,7 +44,7 @@ default_pristine_dbdata_snapshot_path, default_traindata_fname, default_workload_path, - fully_resolve_inputpath, + fully_resolve_path, get_default_workload_name_suffix, get_workload_name, is_fully_resolved, @@ -227,13 +227,13 @@ def datagen( seed = random.randint(0, int(1e8)) # Fully resolve all input paths. - workload_path = fully_resolve_inputpath(dbgym_cfg, workload_path) - benchmark_config_path = fully_resolve_inputpath(dbgym_cfg, benchmark_config_path) - pgbin_path = fully_resolve_inputpath(dbgym_cfg, pgbin_path) - pristine_dbdata_snapshot_path = fully_resolve_inputpath( + workload_path = fully_resolve_path(dbgym_cfg, workload_path) + benchmark_config_path = fully_resolve_path(dbgym_cfg, benchmark_config_path) + pgbin_path = fully_resolve_path(dbgym_cfg, pgbin_path) + pristine_dbdata_snapshot_path = fully_resolve_path( dbgym_cfg, pristine_dbdata_snapshot_path ) - dbdata_parent_dpath = fully_resolve_inputpath(dbgym_cfg, dbdata_parent_dpath) + dbdata_parent_dpath = fully_resolve_path(dbgym_cfg, dbdata_parent_dpath) # Check assertions on args if intended_dbdata_hardware == "hdd": @@ -293,7 +293,9 @@ def datagen( generic_args.pristine_dbdata_snapshot_path, generic_args.dbdata_parent_dpath, ) - pgbin_path = default_pgbin_path(dbgym_cfg.dbgym_workspace_path) + pgbin_path = fully_resolve_path( + dbgym_cfg, default_pgbin_path(dbgym_cfg.dbgym_workspace_path) + ) start_postgres(dbgym_cfg, pgbin_path, dbdata_dpath) _gen_traindata_dpath(dbgym_cfg, generic_args, dir_gen_args) _combine_traindata_dpath_into_parquet(dbgym_cfg, generic_args, file_gen_args) diff --git a/tune/protox/embedding/train.py b/tune/protox/embedding/train.py index ed0d9daf..6d66cb2d 100644 --- a/tune/protox/embedding/train.py +++ b/tune/protox/embedding/train.py @@ -32,7 +32,7 @@ default_benchmark_config_path, default_traindata_path, default_workload_path, - fully_resolve_inputpath, + fully_resolve_path, get_default_workload_name_suffix, get_workload_name, ) @@ -212,16 +212,16 @@ def train( seed = random.randint(0, int(1e8)) # Fully resolve all input paths. - benchmark_config_path = fully_resolve_inputpath(dbgym_cfg, benchmark_config_path) - traindata_path = fully_resolve_inputpath(dbgym_cfg, traindata_path) - hpo_space_path = fully_resolve_inputpath(dbgym_cfg, hpo_space_path) + benchmark_config_path = fully_resolve_path(dbgym_cfg, benchmark_config_path) + traindata_path = fully_resolve_path(dbgym_cfg, traindata_path) + hpo_space_path = fully_resolve_path(dbgym_cfg, hpo_space_path) # setup random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) - workload_path = fully_resolve_inputpath( + workload_path = fully_resolve_path( dbgym_cfg, default_workload_path( dbgym_cfg.dbgym_workspace_path, benchmark_name, workload_name diff --git a/util/workspace.py b/util/workspace.py index c9fd9ce3..5582f5c1 100644 --- a/util/workspace.py +++ b/util/workspace.py @@ -362,15 +362,18 @@ def make_standard_dbgym_cfg() -> DBGymConfig: return dbgym_cfg -def fully_resolve_inputpath( - dbgym_cfg: DBGymConfig, inputpath: os.PathLike[str] -) -> Path: +def fully_resolve_path(dbgym_cfg: DBGymConfig, inputpath: os.PathLike[str]) -> Path: """ - Convert any user inputted path to a real, absolute path. - For flexibility, we take in any os.PathLike. However, for consistency, we always output a Path object + Fully resolve any path to a real, absolute path. + + For flexibility, we take in any os.PathLike. However, for consistency, we always output a Path object. + Whenever a path is required, the user is allowed to enter relative paths, absolute paths, or paths starting with ~. + Relative paths are relative to the base dbgym repo dir. + It *does not* check whether the path exists, since the user might be wanting to create a new file/dir. + Raises RuntimeError for errors. """ # For simplicity, we only process Path objects. @@ -498,7 +501,7 @@ def open_and_save(dbgym_cfg: DBGymConfig, open_fpath: Path, mode: str = "r") -> Open a file and "save" it to [workspace]/task_runs/run_*/. It takes in a str | Path to match the interface of open(). This file does not work if open_fpath is a symlink, to make its interface identical to that of open(). - Make sure to resolve all symlinks with fully_resolve_inputpath(). + Make sure to resolve all symlinks with fully_resolve_path(). To avoid confusion, I'm enforcing this function to only work with absolute paths. See the comment of save_file() for what "saving" means If you are generating a "result" for the run, _do not_ use this. Just use the normal open(). @@ -647,7 +650,7 @@ def link_result( assert is_fully_resolved( result_fordpath ), f"result_fordpath ({result_fordpath}) should be a fully resolved path" - result_fordpath = fully_resolve_inputpath(dbgym_cfg, result_fordpath) + result_fordpath = fully_resolve_path(dbgym_cfg, result_fordpath) assert is_child_path(result_fordpath, dbgym_cfg.dbgym_this_run_path) assert not os.path.islink(result_fordpath)