Skip to content

Updates the project name and prepares to run on all problems again #16

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,546 changes: 1,546 additions & 0 deletions data/run_lengths/run_lengths_128.csv

Large diffs are not rendered by default.

1,546 changes: 1,546 additions & 0 deletions data/run_lengths/run_lengths_2.csv

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ dependencies = [
"click",
"lightning",
"fair-esm",
"jinja2",
"poli-core@git+https://github.com/MachineLearningLifeScience/poli.git",
"poli-baselines@git+https://github.com/MachineLearningLifeScience/poli-baselines.git"
]
Expand Down
5 changes: 3 additions & 2 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from poli.core.exceptions import BudgetExhaustedException
from poli.core.util.seeding import seed_python_numpy_and_torch

from hdbo_benchmark.utils.constants import WANDB_PROJECT
from hdbo_benchmark.utils.experiments.load_generative_models import (
load_generative_model_and_bounds,
)
Expand Down Expand Up @@ -56,7 +57,7 @@ def _main(
not force_run
and wandb_mode == "online"
and experiment_has_already_run(
experiment_name="hdbo_benchmark_results",
experiment_name=WANDB_PROJECT,
solver_name=solver_name,
function_name=function_name,
n_dimensions=n_dimensions,
Expand All @@ -78,7 +79,7 @@ def _main(

# Setting the observer configuration
observer_config = ObserverConfig(
experiment_name="hdbo_benchmark_results",
experiment_name=WANDB_PROJECT,
function_name=function_name,
solver_name=solver_name,
n_dimensions=n_dimensions,
Expand Down
30 changes: 27 additions & 3 deletions src/hdbo_benchmark/results/download_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import wandb
from hdbo_benchmark.utils.constants import ROOT_DIR
from hdbo_benchmark.utils.results.download_from_wandb import (
get_all_runs_for_experiment,
get_all_runs_for_function_names,
)

Expand Down Expand Up @@ -85,10 +86,33 @@ def create_base_table_for_ehrlich(
return df


def create_base_table_for_entire_benchmark(
save_cache: bool = True,
use_cache: bool = False,
tags: list[str] | None = None,
):
CACHE_PATH = ROOT_DIR / "data" / "results_cache"
CACHE_PATH.mkdir(exist_ok=True, parents=True)
tags_str = "-".join(tags) if tags is not None else "all"
CACHE_FILE = CACHE_PATH / f"base_table_entire_benchmark-tags-{tags_str}.csv"

if use_cache and CACHE_FILE.exists():
df = pd.read_csv(CACHE_FILE)
return df

all_runs = get_all_runs_for_experiment(experiment_name="hdbo-benchmark-results-v2")
all_dfs = convert_data_to_dataframes(all_runs)

df = pd.concat(all_dfs)
if save_cache:
df.to_csv(CACHE_FILE, index=False)

return df


if __name__ == "__main__":
df = create_base_table_for_ehrlich(
create_base_table_for_entire_benchmark(
save_cache=True,
use_cache=True,
use_cache=False,
tags=None,
)
print(df)
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
matplotlib.rcParams.update({"text.usetex": True})

initialization = {
r"Hvarfner's \texttt{VanillaBO}": [33, 40, 59, 89, 240],
r"Hvarfner's": [33, 40, 59, 89, 240],
r"\texttt{RandomLineBO}": [],
r"\texttt{SAASBO}": [10, 20],
r"\texttt{Turbo}": [50, 100, 1000],
Expand All @@ -23,7 +23,7 @@
}

evaluation_budget = {
r"Hvarfner's \texttt{VanillaBO}": [100, 200, 500, 1000],
r"Hvarfner's": [100, 200, 500, 1000],
r"\texttt{RandomLineBO}": [1000],
r"\texttt{BAxUS}": [100, 500, 1000],
r"\texttt{SAASBO}": [50, 100, 400],
Expand All @@ -37,7 +37,7 @@
}

independent_replications = {
r"Hvarfner's \texttt{VanillaBO}": [10, 20],
r"Hvarfner's": [10, 20],
r"\texttt{RandomLineBO}": [100],
r"\texttt{BAxUS}": [20],
r"\texttt{SAASBO}": [30],
Expand Down
126 changes: 108 additions & 18 deletions src/hdbo_benchmark/results/print_tables_for_website_and_paper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@
"""

import json

from typing import Literal
import numpy as np
import pandas as pd

from hdbo_benchmark.results.download_tables import create_base_table_for_ehrlich
from hdbo_benchmark.results.download_tables import (
create_base_table_for_ehrlich,
create_base_table_for_entire_benchmark,
)
from hdbo_benchmark.utils.constants import ROOT_DIR

pd.set_option("display.max_colwidth", None)
Expand Down Expand Up @@ -54,18 +57,39 @@ def compute_pretty_names_for_functions():
}


def select_runs(df: pd.DataFrame, function_name: str, solver_name: str) -> pd.DataFrame:
def select_runs(
df: pd.DataFrame,
function_name: str,
solver_name: str,
n_dimensions: Literal[128, 2, None] = None,
) -> tuple[pd.DataFrame, pd.DataFrame]:
sliced_df = df[
(df["function_name"] == function_name) & (df["solver_name"] == solver_name)
]
if n_dimensions is not None:
sliced_df = sliced_df[sliced_df["n_dimensions"] == n_dimensions]
sliced_df = sliced_df[sliced_df["seed"].isin([1, 2, 3, 4, 5])]

max_iter = 1299 if function_name == "ehrlich_holo_large" else 309
sliced_df = sliced_df[sliced_df["_step"] <= max_iter]

# # Saving the longest runs per seed per problem
# longest_runs_per_problem_per_seed: dict[str, list[int]] = {}
# for function_name in sliced_df["function_name"].unique():
# longest_runs_per_problem_per_seed[function_name] = []
# for seed_ in range(1, 6):
# sliced_df_of_seed = sliced_df[sliced_df["seed"] == seed_]
# longest_run = (
# sliced_df_of_seed.groupby("experiment_id")["_step"].max().nlargest(1)
# )
# longest_runs_per_problem_per_seed[function_name].append(
# longest_run.index[0]
# )

# If the number of experiments is greater than 5, we need to select the "longest" 5,
# i.e. the ones in which the number of iterations is the greatest (for each seed)
if len(sliced_df["experiment_id"].unique()) > 5:
rows_for_summary = []
if len(sliced_df["experiment_id"].unique()) >= 5:
slices_per_seed = []
for seed_ in range(1, 6):
assert seed_ in sliced_df["seed"].unique()
Expand All @@ -76,6 +100,15 @@ def select_runs(df: pd.DataFrame, function_name: str, solver_name: str) -> pd.Da
print(
f"Largest experiment id for {solver_name} in {function_name} (seed {seed_}): {experiment_id_of_longest_runs.index[0]} ({experiment_id_of_longest_runs.values[0]})"
)
rows_for_summary.append(
{
"function_name": function_name,
"solver_name": solver_name,
"seed": seed_,
"experiment_id": experiment_id_of_longest_runs.index[0],
"max_steps": experiment_id_of_longest_runs.values[0],
}
)
slices_per_seed.append(
sliced_df_of_seed[
sliced_df_of_seed["experiment_id"]
Expand All @@ -89,23 +122,32 @@ def select_runs(df: pd.DataFrame, function_name: str, solver_name: str) -> pd.Da
assert len(sliced_df["seed"].unique()) == 5
for i in range(1, 6):
assert i in sliced_df["seed"].unique()
else:
print(
f"Something's fishy with {function_name} and {solver_name}. Seeds: {sliced_df['seed'].unique()}"
)

return sliced_df
return sliced_df, pd.DataFrame(rows_for_summary)


def summary_per_function(
df: pd.DataFrame,
normalized_per_row: bool = True,
use_tex: bool = True,
normalize_with_max_value: float | None = None,
) -> tuple[pd.DataFrame, pd.DataFrame]:
n_dimensions: Literal[128, 2, None] = None,
) -> tuple[pd.DataFrame, pd.DataFrame, list[dict[str, str]], pd.DataFrame]:
solver_name_but_pretty = compute_pretty_names_for_solvers(use_tex=use_tex)

rows = []
missing_experiments = []
run_lengths = []
for function_name in df["function_name"].unique():
for solver_name in df["solver_name"].unique():
slice_df = select_runs(df, function_name, solver_name)
slice_df, summary_of_run_lengths = select_runs(
df, function_name, solver_name
)
run_lengths.append(summary_of_run_lengths)
if (
len(slice_df["seed"].unique()) != 5
and solver_name in solver_name_but_pretty.keys()
Expand Down Expand Up @@ -155,7 +197,7 @@ def summary_per_function(
continue
summary_avg.loc[i] = (row - lowest_value) / (best_value - lowest_value)

return summary_avg, summary_std, missing_experiments
return summary_avg, summary_std, missing_experiments, pd.concat(run_lengths)


def print_table_as_tex(
Expand All @@ -164,26 +206,45 @@ def print_table_as_tex(
transpose: bool = False,
use_tex: bool = True,
include_color: bool = True,
n_dimensions: Literal[128, 2, None] = None,
):
solver_name_but_pretty = compute_pretty_names_for_solvers(use_tex=use_tex)
problem_name_but_pretty = compute_pretty_names_for_functions()
index_name = "Solver" + r"\textbackslash " + "Oracle" if transpose else "Oracle"
summary_avg, summary_std, missing_experiments = summary_per_function(
df, normalized_per_row=normalized
summary_avg, summary_std, missing_experiments, run_lengths = summary_per_function(
df,
normalized_per_row=normalized,
n_dimensions=n_dimensions,
)
summary_avg_normalized, _, _ = summary_per_function(
df, normalized_per_row=True, normalize_with_max_value=1.0
summary_avg_normalized, _, _, _ = summary_per_function(
df,
normalized_per_row=True,
normalize_with_max_value=1.0,
n_dimensions=n_dimensions,
)

final_table_rows: list[dict[str, str]] = []
for function_name, pretty_function_name in problem_name_but_pretty.items():
for function_name in summary_avg.index:
pretty_function_name = problem_name_but_pretty.get(function_name, function_name)
row = {
index_name: pretty_function_name,
}
for solver_name, pretty_solver_name in solver_name_but_pretty.items():
run_lengths_for_solver = run_lengths[
(run_lengths["function_name"] == function_name)
& (run_lengths["solver_name"] == solver_name)
]
if solver_name not in summary_avg.columns:
row[pretty_solver_name] = r"\alert{[TBD]}"
continue
if any(run_lengths_for_solver["max_steps"] < 20):
print(
f"Some runs for {pretty_solver_name} in {pretty_function_name}"
" have less than 20 steps."
" Will consider them as TBD for now."
)
row[pretty_solver_name] = r"\alert{[TBD]}"
continue

average = summary_avg.loc[function_name, solver_name]
std = summary_std.loc[function_name, solver_name]
Expand Down Expand Up @@ -266,21 +327,50 @@ def print_table_as_tex(
if transpose:
final_table = final_table.T

final_table.to_csv(ROOT_DIR / "data" / "results_cache" / "table_ehrlich.csv")
final_table.to_csv(
ROOT_DIR / "data" / "results_cache" / f"table_benchmark_{n_dimensions}.csv"
)

latex_table = final_table.to_latex(escape=False)
latex_table = r"\resizebox{\textwidth}{!}{" + latex_table + "}"

print(latex_table)

with open(ROOT_DIR / "data" / "results_cache" / "table_ehrlich.tex", "w") as f:
with open(
ROOT_DIR / "data" / "results_cache" / f"table_benchmark_{n_dimensions}.tex",
"w",
) as f:
f.write(latex_table)

with open(
ROOT_DIR / "data" / "results_cache" / "missing_experiments_ehrlich.json", "w"
ROOT_DIR
/ "data"
/ "results_cache"
/ f"missing_experiments_{n_dimensions}.json",
"w",
) as f:
json.dump(missing_experiments, f)

run_lengths.to_csv(
ROOT_DIR / "data" / "results_cache" / f"run_lengths_{n_dimensions}.csv"
)


def print_table_for_website(
use_tex: bool = True,
include_color: bool = True,
n_dimensions: Literal[128, 2] = 128,
):
df = create_base_table_for_entire_benchmark(save_cache=True, use_cache=True)

print_table_as_tex(
df,
transpose=False,
use_tex=use_tex,
include_color=include_color,
n_dimensions=n_dimensions,
)


def print_table_for_ehrlich(use_tex: bool = True, include_color: bool = True):
df = create_base_table_for_ehrlich(save_cache=True, use_cache=True)
Expand All @@ -294,5 +384,5 @@ def print_table_for_ehrlich(use_tex: bool = True, include_color: bool = True):


if __name__ == "__main__":
print_table_for_ehrlich(use_tex=True, include_color=True)
print_table_for_ehrlich(use_tex=False, include_color=False)
print_table_for_website(use_tex=True, include_color=False, n_dimensions=128)
print_table_for_website(use_tex=True, include_color=False, n_dimensions=2)
2 changes: 1 addition & 1 deletion src/hdbo_benchmark/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
ROOT_DIR = Path(__file__).parent.parent.parent.parent.resolve()
MODELS_DIR = ROOT_DIR / "data" / "trained_models"

WANDB_PROJECT = "hdbo-embeddings-benchmark"
WANDB_PROJECT = "hdbo-benchmark-results-v2"
WANDB_ENTITY = "hdbo-benchmark"

PENALIZE_UNFEASIBLE_WITH = -100.0
7 changes: 7 additions & 0 deletions src/hdbo_benchmark/utils/experiments/load_problems.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@ def _load_ehrlich_holo(size: Literal["tiny", "small", "large"]) -> Problem:
n_motifs = 2
motif_length = 5
n_supervised_points = 10
case "medium":
sequence_length = 32
n_motifs = 3
motif_length = 8
n_supervised_points = 50
case "large":
sequence_length = 64
n_motifs = 4
Expand Down Expand Up @@ -159,6 +164,8 @@ def _load_problem(function_name: str) -> Problem:
return _load_ehrlich_holo(size="tiny")
case "ehrlich_holo_small":
return _load_ehrlich_holo(size="small")
case "ehrlich_holo_medium":
return _load_ehrlich_holo(size="medium")
case "ehrlich_holo_large":
return _load_ehrlich_holo(size="large")
case "pest_control_equivalent":
Expand Down
Empty file.
16 changes: 16 additions & 0 deletions src/hdbo_benchmark/utils/slurm/batch_template.sht
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash
#SBATCH --job-name={{ job_name }}
#SBATCH --output={{ output_path }}/%j.log
#SBATCH --error={{ error_path }}/%j.err
#SBATCH -p gpu {{ gpu_resources }}
#SBATCH --array=1-{{ line_count }}%{{ parallel_count }}
#SBATCH --cpus-per-task={{ cpus_per_task }}
#SBATCH --mem={{ mem }}
#SBATCH --time={{ time }}

# Get the command from the corresponding line in the file
command=$(sed -n "${SLURM_ARRAY_TASK_ID}p" {{ commands_file }})

# Execute the command
echo "Running command: $command"
eval $command
Loading