Skip to content

Experiment scripts #12

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions boxplotting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import os
from pathlib import Path

import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

from experimental_env.analysis.analyze_summarizers.boxplot_error_summarizer import BoxplotErrorSummarizer
from experimental_env.analysis.metrics import SquaredError
from experimental_env.experiment.experiment_parser import ExperimentParser

box_plots_dir = input("Enter the directory where boxplots will be saved.")
WORKING_DIR = Path(box_plots_dir)

EM_200_dir = input("Enter the path to the results of the EM algorithm in the second stage with sample size 200.")
ELM_200_dir = input("Enter the path to the results of the ELM algorithm in the second stage with sample size 200.")

EM_500_dir = input("Enter the path to the results of the EM algorithm in the second stage with sample size 500.")
ELM_500_dir = input("Enter the path to the results of the ELM algorithm in the second stage with sample size 500.")

EM_1000_dir = input("Enter the path to the results of the EM algorithm in the second stage with sample size 1000.")
ELM_1000_dir = input("Enter the path to the results of the ELM algorithm in the second stage with sample size 1000.")

# Compare results
LMOMENTS_200_DIR = Path(ELM_200_dir)
LIKELIHOOD_200_DIR = Path(EM_200_dir)

LMOMENTS_500_DIR = Path(ELM_500_dir)
LIKELIHOOD_500_DIR = Path(EM_500_dir)

LMOMENTS_1000_DIR = Path(ELM_1000_dir)
LIKELIHOOD_1000_DIR = Path(EM_1000_dir)

ELM_200_results = ExperimentParser().parse(LMOMENTS_200_DIR)
EM_200_results = ExperimentParser().parse(LIKELIHOOD_200_DIR)

ELM_500_results = ExperimentParser().parse(LMOMENTS_500_DIR)
EM_500_results = ExperimentParser().parse(LIKELIHOOD_500_DIR)

ELM_1000_results = ExperimentParser().parse(LMOMENTS_1000_DIR)
EM_1000_results = ExperimentParser().parse(LIKELIHOOD_1000_DIR)

font_size = 16

plt.rcParams.update(
{
"font.size": font_size,
"axes.titlesize": font_size + 2,
"axes.labelsize": font_size,
"xtick.labelsize": font_size - 2,
"ytick.labelsize": font_size - 2,
"legend.fontsize": font_size - 2,
"legend.title_fontsize": font_size,
}
)

os.makedirs(WORKING_DIR, exist_ok=True)

mixture_keys = EM_200_results.keys()

for mixture in mixture_keys:
ELM_200_results_mixture = ELM_200_results[mixture]
EM_200_results_mixture = EM_200_results[mixture]

ELM_500_results_mixture = ELM_500_results[mixture]
EM_500_results_mixture = EM_500_results[mixture]

ELM_1000_results_mixture = ELM_1000_results[mixture]
EM_1000_results_mixture = EM_1000_results[mixture]

summarizer = BoxplotErrorSummarizer(SquaredError())

ELM_200_errors = summarizer.calculate(ELM_200_results_mixture)
EM_200_errors = summarizer.calculate(EM_200_results_mixture)

ELM_500_errors = summarizer.calculate(ELM_500_results_mixture)
EM_500_errors = summarizer.calculate(EM_500_results_mixture)

ELM_1000_errors = summarizer.calculate(ELM_1000_results_mixture)
EM_1000_errors = summarizer.calculate(EM_1000_results_mixture)

all_data_list = []

for error in EM_200_errors:
all_data_list.append({"size": 200, "Algorithm": "EM", "Error": error})
for error in ELM_200_errors:
all_data_list.append({"size": 200, "Algorithm": "ELM", "Error": error})

for error in EM_500_errors:
all_data_list.append({"size": 500, "Algorithm": "EM", "Error": error})
for error in ELM_500_errors:
all_data_list.append({"size": 500, "Algorithm": "ELM", "Error": error})

for error in EM_1000_errors:
all_data_list.append({"size": 1000, "Algorithm": "EM", "Error": error})
for error in ELM_1000_errors:
all_data_list.append({"size": 1000, "Algorithm": "ELM", "Error": error})

combined_data = pd.DataFrame(all_data_list)

fig, ax = plt.subplots(figsize=(10, 6))

sns.boxplot(x="size", y="Error", hue="Algorithm", data=combined_data, ax=ax)

ax.set_title(f"MISE on {mixture}")
ax.set_xlabel("Sample size")
ax.set_ylabel("Metric value")
ax.set_yscale("log")

plt.tight_layout()
plt.savefig(WORKING_DIR / f"boxplot_{mixture}.png")
plt.close(fig)
67 changes: 67 additions & 0 deletions script_stage_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""The script implements the first step of the experiment"""

from pathlib import Path

import numpy as np

from experimental_env.preparation.dataset_generator import (
ConcreteDatasetGenerator,
RandomDatasetGenerator,
)
from mpest.models import ExponentialModel, GaussianModel, WeibullModelExp

dir_stage_1 = input("Enter the path directory where the results of the first stage will be saved.")
WORKING_DIR = Path(dir_stage_1)
SAMPLES_SIZE = 1000

np.random.seed(42)

r_generator = RandomDatasetGenerator()
mixtures = [
[ExponentialModel],
[GaussianModel],
[WeibullModelExp],
[WeibullModelExp, GaussianModel],
[ExponentialModel, GaussianModel],
[WeibullModelExp, WeibullModelExp],
[ExponentialModel, ExponentialModel],
]
for models in mixtures:
r_generator.generate(SAMPLES_SIZE, models, Path(WORKING_DIR), exp_count=100)

c_generator2 = ConcreteDatasetGenerator()
models = [ExponentialModel]
c_generator2.add_distribution(models[0], [1.0], 1.0)
c_generator2.generate(SAMPLES_SIZE, Path(WORKING_DIR), 5)

c_generator3 = ConcreteDatasetGenerator()
models = [GaussianModel]
c_generator3.add_distribution(models[0], [0, 1.0], 1.0)
c_generator3.generate(SAMPLES_SIZE, Path(WORKING_DIR), 5)

c_generator4 = ConcreteDatasetGenerator()
models = [WeibullModelExp]
c_generator4.add_distribution(models[0], [1.0, 1.0], 1.0)
c_generator4.generate(SAMPLES_SIZE, Path(WORKING_DIR), 5)

c_generator5 = ConcreteDatasetGenerator()
models = [WeibullModelExp]
c_generator5.add_distribution(models[0], [1.0, 1.0], 1.0)
c_generator5.generate(SAMPLES_SIZE, Path(WORKING_DIR), 5)

c_generator6 = ConcreteDatasetGenerator()
models = [WeibullModelExp]
c_generator6.add_distribution(models[0], [1.0, 0.5], 1.0)
c_generator6.generate(SAMPLES_SIZE, Path(WORKING_DIR), 5)

c_generator7 = ConcreteDatasetGenerator()
models = [GaussianModel, GaussianModel]
c_generator7.add_distribution(models[0], [-1.0, 2.5], 0.3)
c_generator7.add_distribution(models[1], [1.0, 0.5], 0.7)
c_generator7.generate(SAMPLES_SIZE, Path(WORKING_DIR), 10)

c_generator8 = ConcreteDatasetGenerator()
models = [GaussianModel, GaussianModel]
c_generator8.add_distribution(models[0], [0.0, 1.5], 0.6)
c_generator8.add_distribution(models[1], [1.0, 1.0], 0.4)
c_generator8.generate(SAMPLES_SIZE, Path(WORKING_DIR), 10)
48 changes: 48 additions & 0 deletions script_stage_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""The script implements the second step of the experiment"""

import random
from pathlib import Path

from experimental_env.experiment.estimators import (
LikelihoodEstimator,
LMomentsEstimator,
)
from experimental_env.experiment.experiment_executors.random_executor import (
RandomExperimentExecutor,
)
from experimental_env.preparation.dataset_parser import SamplesDatasetParser
from mpest.em.breakpointers import StepCountBreakpointer
from mpest.em.distribution_checkers import (
FiniteChecker,
PriorProbabilityThresholdChecker,
)

dir_stage_1 = input("Enter the path to the directory where the experiment results are located.")
dir_stage_2 = input("Enter the directory where the results of the second stage will be saved.")
SOURCE_DIR = Path(dir_stage_1)
WORKING_DIR = Path(dir_stage_2)

random.seed(42)

# Parse stage 1
parser = SamplesDatasetParser()
datasets = parser.parse(SOURCE_DIR)

# Execute stage 2
executor = RandomExperimentExecutor(WORKING_DIR, 5)
executor.execute(
datasets,
LMomentsEstimator(
StepCountBreakpointer(max_step=16),
FiniteChecker() + PriorProbabilityThresholdChecker(),
),
)

executor = RandomExperimentExecutor(WORKING_DIR, 5)
executor.execute(
datasets,
LikelihoodEstimator(
StepCountBreakpointer(max_step=16),
FiniteChecker() + PriorProbabilityThresholdChecker(),
),
)
36 changes: 36 additions & 0 deletions script_stage_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""The script implements the third step of the experiment"""

from pathlib import Path

from experimental_env.analysis.analysis import Analysis
from experimental_env.analysis.analyze_strategies.density_plot import DensityPlot
from experimental_env.analysis.analyze_strategies.error_convergence import (
ErrorConvergence,
)
from experimental_env.analysis.analyze_strategies.time_plot import TimePlot
from experimental_env.analysis.analyze_summarizers.error_summarizer import (
ErrorSummarizer,
)
from experimental_env.analysis.analyze_summarizers.time_summarizer import TimeSummarizer
from experimental_env.analysis.metrics import SquaredError
from experimental_env.experiment.experiment_parser import ExperimentParser

dir_stage_3 = input("Enter the directory where the results of the third stage will be saved.")
WORKING_DIR = Path(dir_stage_3)

EM_dir = input("Enter the path to the results of the EM algorithm in the second stage")
ELM_dir = input("Enter the path to the results of the ELM algorithm in the second stage")

# Compare results
LMOMENTS_DIR = Path(ELM_dir)
LIKELIHOOD_DIR = Path(EM_dir)

results_1 = ExperimentParser().parse(LMOMENTS_DIR)
results_2 = ExperimentParser().parse(LIKELIHOOD_DIR)

analyze_actions = [DensityPlot(), TimePlot(), ErrorConvergence(SquaredError())]
analyze_summarizers = [ErrorSummarizer(SquaredError()), TimeSummarizer()]

Analysis(WORKING_DIR, analyze_actions, analyze_summarizers).analyze(results_1, "ELM")
Analysis(WORKING_DIR, analyze_actions, analyze_summarizers).analyze(results_2, "EM")
Analysis(WORKING_DIR, analyze_actions, analyze_summarizers).compare(results_1, results_2, "ELM", "EM")