"
+ ],
+ "text/plain": [
+ " replications data cumulative_mean stdev lower_ci upper_ci \\\n",
+ "0 1 9.842380 9.842380 NaN NaN NaN \n",
+ "1 2 10.060424 9.951402 NaN NaN NaN \n",
+ "2 3 9.925090 9.942631 0.110075 9.669189 10.216073 \n",
+ "3 4 9.938504 9.941599 0.089900 9.798549 10.084650 \n",
+ "4 5 10.016611 9.956602 0.084775 9.851339 10.061864 \n",
+ "5 6 9.883801 9.944468 0.081442 9.859000 10.029936 \n",
+ "6 7 10.040555 9.958195 0.082742 9.881671 10.034719 \n",
+ "7 8 10.086612 9.974247 0.089048 9.899801 10.048693 \n",
+ "8 1 0.501873 0.501873 NaN NaN NaN \n",
+ "9 2 0.516264 0.509069 NaN NaN NaN \n",
+ "10 3 0.531280 0.516472 0.014704 0.479945 0.553000 \n",
+ "11 4 0.491104 0.510130 0.017465 0.482339 0.537921 \n",
+ "12 5 0.463252 0.500755 0.025851 0.468656 0.532853 \n",
+ "13 6 0.390495 0.482378 0.050605 0.429272 0.535484 \n",
+ "14 7 0.473277 0.481078 0.046323 0.438236 0.523920 \n",
+ "15 8 0.611502 0.497381 0.062973 0.444734 0.550028 \n",
+ "16 9 0.478926 0.495330 0.059226 0.449805 0.540856 \n",
+ "17 10 0.564747 0.502272 0.059999 0.459351 0.545193 \n",
+ "18 11 0.571089 0.508528 0.060584 0.467827 0.549229 \n",
+ "19 12 0.393387 0.498933 0.066645 0.456589 0.541277 \n",
+ "20 13 0.444403 0.494738 0.065575 0.455112 0.534365 \n",
+ "21 14 0.447649 0.491375 0.064247 0.454280 0.528470 \n",
+ "22 15 0.614848 0.499606 0.069637 0.461043 0.538170 \n",
+ "23 16 0.461512 0.497225 0.067946 0.461020 0.533431 \n",
+ "24 17 0.425752 0.493021 0.068034 0.458041 0.528001 \n",
+ "25 18 0.481117 0.492360 0.066062 0.459508 0.525212 \n",
+ "26 19 0.376149 0.486243 0.069517 0.452738 0.519749 \n",
+ "27 20 0.429078 0.483385 0.068859 0.451158 0.515612 \n",
+ "28 21 0.624518 0.490106 0.073845 0.456492 0.523719 \n",
+ "29 22 0.515244 0.491248 0.072264 0.459208 0.523288 \n",
+ "30 23 0.525237 0.492726 0.070957 0.462042 0.523410 \n",
+ "31 24 0.520427 0.493880 0.069628 0.464479 0.523282 \n",
+ "32 25 0.519650 0.494911 0.068356 0.466695 0.523127 \n",
+ "33 26 0.638633 0.500439 0.072664 0.471089 0.529789 \n",
+ "34 27 0.514672 0.500966 0.071306 0.472758 0.529174 \n",
+ "35 28 0.503287 0.501049 0.069974 0.473916 0.528182 \n",
+ "36 29 0.461636 0.499690 0.069102 0.473405 0.525975 \n",
+ "37 30 0.551549 0.501419 0.068557 0.475819 0.527018 \n",
+ "38 31 0.509837 0.501690 0.067422 0.476960 0.526421 \n",
+ "39 32 0.490932 0.501354 0.066353 0.477431 0.525277 \n",
+ "40 33 0.558335 0.503081 0.066057 0.479658 0.526503 \n",
+ "41 34 0.548240 0.504409 0.065508 0.481552 0.527266 \n",
+ "42 35 0.457886 0.503080 0.065014 0.480746 0.525413 \n",
+ "43 36 0.464776 0.502016 0.064396 0.480227 0.523804 \n",
+ "44 1 0.499589 0.499589 NaN NaN NaN \n",
+ "45 2 0.501977 0.500783 NaN NaN NaN \n",
+ "46 3 0.498094 0.499887 0.001959 0.495021 0.504753 \n",
+ "47 4 0.498249 0.499477 0.001797 0.496618 0.502336 \n",
+ "48 5 0.496934 0.498969 0.001928 0.496575 0.501362 \n",
+ "49 6 0.492854 0.497949 0.003034 0.494766 0.501133 \n",
+ "50 7 0.503386 0.498726 0.003448 0.495537 0.501915 \n",
+ "51 8 0.503411 0.499312 0.003597 0.496305 0.502319 \n",
+ "\n",
+ " deviation metric \n",
+ "0 NaN mean_time_with_nurse \n",
+ "1 NaN mean_time_with_nurse \n",
+ "2 0.027502 mean_time_with_nurse \n",
+ "3 0.014389 mean_time_with_nurse \n",
+ "4 0.010572 mean_time_with_nurse \n",
+ "5 0.008595 mean_time_with_nurse \n",
+ "6 0.007685 mean_time_with_nurse \n",
+ "7 0.007464 mean_time_with_nurse \n",
+ "8 NaN mean_q_time_nurse \n",
+ "9 NaN mean_q_time_nurse \n",
+ "10 0.070725 mean_q_time_nurse \n",
+ "11 0.054479 mean_q_time_nurse \n",
+ "12 0.064100 mean_q_time_nurse \n",
+ "13 0.110092 mean_q_time_nurse \n",
+ "14 0.089054 mean_q_time_nurse \n",
+ "15 0.105848 mean_q_time_nurse \n",
+ "16 0.091909 mean_q_time_nurse \n",
+ "17 0.085453 mean_q_time_nurse \n",
+ "18 0.080036 mean_q_time_nurse \n",
+ "19 0.084869 mean_q_time_nurse \n",
+ "20 0.080096 mean_q_time_nurse \n",
+ "21 0.075493 mean_q_time_nurse \n",
+ "22 0.077188 mean_q_time_nurse \n",
+ "23 0.072816 mean_q_time_nurse \n",
+ "24 0.070950 mean_q_time_nurse \n",
+ "25 0.066724 mean_q_time_nurse \n",
+ "26 0.068908 mean_q_time_nurse \n",
+ "27 0.066670 mean_q_time_nurse \n",
+ "28 0.068584 mean_q_time_nurse \n",
+ "29 0.065222 mean_q_time_nurse \n",
+ "30 0.062274 mean_q_time_nurse \n",
+ "31 0.059531 mean_q_time_nurse \n",
+ "32 0.057012 mean_q_time_nurse \n",
+ "33 0.058648 mean_q_time_nurse \n",
+ "34 0.056307 mean_q_time_nurse \n",
+ "35 0.054153 mean_q_time_nurse \n",
+ "36 0.052603 mean_q_time_nurse \n",
+ "37 0.051055 mean_q_time_nurse \n",
+ "38 0.049294 mean_q_time_nurse \n",
+ "39 0.047716 mean_q_time_nurse \n",
+ "40 0.046559 mean_q_time_nurse \n",
+ "41 0.045314 mean_q_time_nurse \n",
+ "42 0.044393 mean_q_time_nurse \n",
+ "43 0.043402 mean_q_time_nurse \n",
+ "44 NaN mean_nurse_utilisation \n",
+ "45 NaN mean_nurse_utilisation \n",
+ "46 0.009734 mean_nurse_utilisation \n",
+ "47 0.005725 mean_nurse_utilisation \n",
+ "48 0.004797 mean_nurse_utilisation \n",
+ "49 0.006394 mean_nurse_utilisation \n",
+ "50 0.006395 mean_nurse_utilisation \n",
+ "51 0.006022 mean_nurse_utilisation "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Set up and run the algorithm\n",
+ "analyser = ReplicationsAlgorithm()\n",
+ "solutions, cumulative = analyser.select(runner=Runner(Param()),\n",
+ " metrics=METRICS)\n",
+ "\n",
+ "# View results\n",
+ "display(solutions)\n",
+ "display(cumulative)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create plots"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/svg+xml": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/svg+xml": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "for metric in METRICS:\n",
+ " p = plotly_confidence_interval_method(\n",
+ " n_reps = solutions[metric],\n",
+ " conf_ints = cumulative[cumulative['metric'] == metric],\n",
+ " metric_name=LABELS[metric])\n",
+ " p.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Explanation of the automated method\n",
+ "\n",
+ "This section walks through how the automation code is structured. The algorithm that determines the number of replications is `ReplicationsAlgorithm`. This depends on other classes including `OnlineStatistics` and `ReplicationTabulizer`.\n",
+ "\n",
+ "### OnlineStatistics\n",
+ "\n",
+ "`OnlineStatistics` is designed to:\n",
+ "\n",
+ "* Keep a **running mean and variance**.\n",
+ "* Return **other statistics** based on these (e.g. standard deviation, confidence intervals).\n",
+ "* **Call the `update()`** method of `ReplicationTabulizer` whenever a new data point is processed by `OnlineStatistics.\n",
+ "\n",
+ "#### How do the running mean and variance calculations work?\n",
+ "\n",
+ "The running mean and variance are updated iteratively with each new data point provided, **without requiring the storage of all previous data points**. This approach is called \"online\" because we only need to store a small set of values (such as the current mean and variance), rather than maintaining an entire list of past values.\n",
+ "\n",
+ "For example, focusing on the mean, normally you would need to store all the data points in a list and sum them up to compute the average - for example:\n",
+ "\n",
+ "```\n",
+ "data_points = [1, 2, 3, 4, 5]\n",
+ "mean = sum(data_points) / len(data_points)\n",
+ "```\n",
+ "\n",
+ "This works fine for small datasets, but as the data grows, maintaining the entire list becomes impractical. Instead, we can update the mean without storing the previous data points using **Welford's algorithm**. The formula for the running mean is:\n",
+ "\n",
+ "$$\n",
+ "\\mu_n = \\mu_{n-1} + \\frac{x_n - \\mu_{n-1}}{n}\n",
+ "$$\n",
+ "\n",
+ "Where:\n",
+ "\n",
+ "- $\\mu_n$ is the running mean after the $n$-th data point.\n",
+ "- $x_n$ is the new data point.\n",
+ "- $\\mu_{n-1}$ is the running mean before the new data point.\n",
+ "\n",
+ "The key thing to notice here is that, to update the mean, **all we needed to know was the current running mean, the new data point, and the number of data points**. A similar formula exists for calculating variance.\n",
+ "\n",
+ "In our code, every time we call `update()` with a new data point, the mean and variance are adjusted, with `n` keeping track of the number of data points so far.\n",
+ "\n",
+ "```\n",
+ "class OnlineStatistics:\n",
+ " def __init__(self):\n",
+ " self.mean = 0\n",
+ " self.n = 0\n",
+ "\n",
+ " def update(self, new_data_point):\n",
+ " self.n += 1\n",
+ " self.mean += (new_data_point - self.mean) / self.n\n",
+ "```\n",
+ "\n",
+ "#### What other statistics can it calculate?\n",
+ " \n",
+ "`OnlineStatistics` then has a series of methods which can return other statistics based on the current mean, variance, and count:\n",
+ "\n",
+ "* Variance\n",
+ "* Standard deviation\n",
+ "* Standard error\n",
+ "* Half width of the confidence interval\n",
+ "* Lower confidence interval bound\n",
+ "* Upper confidence interval bound\n",
+ "* Deviation of confidence interval from the mean\n",
+ "\n",
+ "These are each marked with `@property`. This is a stylist choice, that simply means, we are able to get the value by running e.g. `stats.lci`, instead of `stats.lci()`.\n",
+ "\n",
+ "### ReplicationTabulizer\n",
+ "\n",
+ "`ReplicationTabulizer` keeps track of our results. It:\n",
+ "\n",
+ "* Stores **lists with various statistics**, which are updated whenever `update()` is called.\n",
+ "* Can convert these into a **dataframe** using the `summary_frame()` method.\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### ReplicationsAlgorithm\n",
+ "\n",
+ "The diagram below is a visual representation of the logic in the **ReplicationsAlgorithm**.\n",
+ "\n",
+ "Once set up with the relevant parameters, it will first check if there are **initial_replications** to run. These might be specified if the user knows that the model will need at least X amount of replications before any metrics start to get close to the desired precision. The benefit of specifying these is that they are run using **run_reps()** and so can be run in parallel if chosen.\n",
+ "\n",
+ "Once these are run, it checks if any metrics meet precision already. Typically more replications will be required (for the length of the lookahead period) - but if there is no lookahead, they can be marked as solved.\n",
+ "\n",
+ "> **What is the lookahead period?**\n",
+ ">\n",
+ "> We want to make sure that the desired precision is stable and maintained for several replications. Here, we refer to this as the lookahead period.\n",
+ ">\n",
+ "> The user will specify **look_ahead** - as noted in [sim-tools](https://tommonks.github.io/sim-tools/04_replications/01_automated_reps.html), this is recommended to be **5** by [Hoad et al. (2010)](https://www.jstor.org/stable/40926090).\n",
+ ">\n",
+ "> The algorithm contains a method **_k_limit()** which will scale up the lookahead if more than 100 replications have been run, to ensure a sufficient period is being checked for stability, relative to the number of replications. This is simply: `look_ahead/100 * replications`. For example, if we have run 200 replications and look_ahead is 5: `5/100 * 200 = 10`.\n",
+ "\n",
+ "After any initial replications, the algorithm enters a while loop. This continues until all metrics are solved or the number of replications surpasses the user-specified **replication_budget** - whichever comes first!\n",
+ "\n",
+ "With each loop, it runs the model for another replication, then updates the results for any unsolved metrics from this replication, and checks if precision is met. The **target_met** is a record of how many times in a row precision has been met - once this passes the lookahead period, the metric is marked as solved.\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Run time"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Notebook run time: 0m 10s\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get run time in seconds\n",
+ "notebook_end_time = time.time()\n",
+ "runtime = round(notebook_end_time - notebook_start_time)\n",
+ "\n",
+ "# Display converted to minutes and seconds\n",
+ "print(f'Notebook run time: {runtime // 60}m {runtime % 60}s')"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "template-des",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.1"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/choosing_warmup.ipynb b/notebooks/choosing_warmup.ipynb
new file mode 100644
index 0000000..0f60e40
--- /dev/null
+++ b/notebooks/choosing_warmup.ipynb
@@ -0,0 +1,365 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Choosing warm-up length\n",
+ "\n",
+ "A suitable length for the warm-up period can be determined using the **time series inspection approach**. This involves looking at performance measures over time to identify when the system is exhibiting **steady state behaviour** (even though the system will never truly reach a \"steady state\").\n",
+ "\n",
+ "If we simply plot the mean result at regular intervals, this would vary too much. Therefore, we plot the **cumulative mean** of the performance measure, and look for the point at which this **smoothes out and stabilises**. This indicates the point for the warm-up period to end.\n",
+ "\n",
+ "This should be assessed when running the model using the base case parameters. If these change, you should reassess the appropriate warm-up period.\n",
+ "\n",
+ "The run time is provided at the end of the notebook.\n",
+ "\n",
+ "Credit:\n",
+ "\n",
+ "* Code for choice of warm-up period was adapted from Tom Monks (2024) [HPDM097 - Making a difference with health data](https://github.com/health-data-science-OR/stochastic_systems) (MIT Licence)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Set-up\n",
+ "\n",
+ "Load required packages."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# pylint: disable=missing-module-docstring\n",
+ "# To ensure any updates to `simulation/` are fetched without needing to restart\n",
+ "# the notebook environment, reload `simulation/` before execution of each cell\n",
+ "%load_ext autoreload\n",
+ "%autoreload 1\n",
+ "%aimport simulation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# pylint: disable=wrong-import-position\n",
+ "import os\n",
+ "import time\n",
+ "\n",
+ "import plotly.io as pio\n",
+ "import plotly.graph_objects as go\n",
+ "import plotly.subplots as sp\n",
+ "\n",
+ "from simulation.model import Param, Runner"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Display plotly express figures as non-interactive figures. This means they will be visible when browsing the notebooks on GitHub. To switch these back to interactive figures, simply remove this line."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pio.renderers.default = 'svg'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Start timer."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "notebook_start_time = time.time()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Define path to outputs folder"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# pylint: disable=duplicate-code\n",
+ "OUTPUT_DIR = '../outputs/'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Define labels for variables in the dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "LABELS = {\n",
+ " 'arrivals': 'Patient arrivals (n)',\n",
+ " 'mean_q_time_nurse': 'Mean wait time for nurse (minutes)',\n",
+ " 'mean_n_consult_time': 'Mean consultation time with nurse (minutes)',\n",
+ " 'mean_time_with_nurse': 'Mean consultation time with nurse (minutes)',\n",
+ " 'mean_nurse_utilisation': 'Mean nurse utilisation',\n",
+ " 'adj_mean_nurse_utilisation': 'Mean nurse utilisation (*100 - %)',\n",
+ " 'adj_mean_q_time_nurse': 'Mean wait time for nurse (*100) (minutes)',\n",
+ " 'mean_nurse_utilisation_tw': 'Time-weighted mean nurse utilisation',\n",
+ " 'mean_nurse_q_length': 'Time-weighted mean queue length for nurse (n)',\n",
+ " 'patient_inter': 'Patient inter-arrival time',\n",
+ " 'number_of_nurses': 'Number of nurses',\n",
+ " 'utilisation': 'Utilisation',\n",
+ " 'running_mean_wait_time': 'Running mean nurse wait time (minutes)'\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Determining appropriate warm-up length"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def time_series_inspection(\n",
+ " file, data_collection_period, warm_up=None, path=OUTPUT_DIR, labels=None\n",
+ "):\n",
+ " \"\"\"\n",
+ " Time series inspection method for determining length of warm-up.\n",
+ "\n",
+ " Arguments:\n",
+ " file (str):\n",
+ " Filename to save figure to.\n",
+ " data_collection_period (float):\n",
+ " Length of time to run the simulation for.\n",
+ " warm_up (float, optional):\n",
+ " Location on X axis to plot vertical red line indicating the chosen\n",
+ " warm-up period. Defaults to None, which will not plot a line.\n",
+ " path (str):\n",
+ " Path to save file to (exc. filename)\n",
+ " labels (dict):\n",
+ " Contains mappings from variable names to full labels. If none\n",
+ " provided, will default to using variable names.\n",
+ " \"\"\"\n",
+ " # Use default parameters, but with no warm-up and specified run length,\n",
+ " # and with no replications\n",
+ " param = Param(warm_up_period=0,\n",
+ " data_collection_period=data_collection_period,\n",
+ " number_of_runs=1)\n",
+ " # display(param.__dict__)\n",
+ "\n",
+ " # Run model\n",
+ " choose_warmup = Runner(param)\n",
+ " choose_warmup.run_reps()\n",
+ "\n",
+ " # Filter to nurse results\n",
+ " nurse = choose_warmup.interval_audit_df[\n",
+ " choose_warmup.interval_audit_df['resource_name'] == 'nurse']\n",
+ "\n",
+ " # Define columns to analyse\n",
+ " plot = ['utilisation', 'running_mean_wait_time']\n",
+ "\n",
+ " # Create 1x2 subplot\n",
+ " full_figure = sp.make_subplots(rows=2, cols=1, shared_xaxes=True)\n",
+ "\n",
+ " counter = 1\n",
+ " for var in plot:\n",
+ " # Reformat so index is simulation time and columns are each run\n",
+ " reformat = (\n",
+ " nurse[['simulation_time', var, 'run']]\n",
+ " .pivot(index='simulation_time',\n",
+ " columns='run',\n",
+ " values=var))\n",
+ "\n",
+ " # For utilisation, calculate cumulative mean (not necessary\n",
+ " # for wait time as that is already a running mean)\n",
+ " if var == 'utilisation':\n",
+ " cumulative = reformat.expanding().mean()\n",
+ " elif var == 'running_mean_wait_time':\n",
+ " cumulative = reformat.copy()\n",
+ " else:\n",
+ " print('Expected var to be utilisation or running_mean_wait_time.')\n",
+ " break\n",
+ "\n",
+ " # Create plot (using go.Scatter instead of px.express, for sub-plots)\n",
+ " full_figure.add_trace(\n",
+ " go.Scatter(\n",
+ " x=cumulative.index,\n",
+ " y=cumulative[0],\n",
+ " mode='lines',\n",
+ " line={'color': 'royalblue'}\n",
+ " ),\n",
+ " row=counter, col=1\n",
+ " )\n",
+ "\n",
+ " # Add y axis label\n",
+ " full_figure.update_yaxes(title_text=labels.get(var, var),\n",
+ " row=counter, col=1)\n",
+ "\n",
+ " # Add vertical line for warm-up period specified\n",
+ " if warm_up is not None:\n",
+ " full_figure.add_vline(\n",
+ " x=warm_up, line_color='red', line_dash='dash',\n",
+ " annotation_text='Suggested warm-up length',\n",
+ " annotation_position='top left',\n",
+ " annotation_font_color='red')\n",
+ " counter += 1\n",
+ "\n",
+ " # Add x axis title\n",
+ " full_figure.update_xaxes(title_text='Run time (minutes)')\n",
+ "\n",
+ " # Set figure dimensions and hide legend\n",
+ " full_figure.update_layout(\n",
+ " width=1400,\n",
+ " height=800,\n",
+ " showlegend=False,\n",
+ " template='plotly_white'\n",
+ " )\n",
+ "\n",
+ " # Show figure\n",
+ " full_figure.show()\n",
+ "\n",
+ " # Save figure\n",
+ " full_figure.write_image(os.path.join(path, file))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Having run the model for three days, it appears to reach a steady state at around 2500 minutes."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "time_series_inspection(\n",
+ " file='choose_param_time_series_1.png',\n",
+ " data_collection_period=1440*3, warm_up=2520,\n",
+ " labels=LABELS)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "However, it is important to look far ahead - so we run it for more days, and find actually a later warm-up is more appropriate."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "time_series_inspection(\n",
+ " file='choose_param_time_series_2.png',\n",
+ " data_collection_period=1440*40, warm_up=1440*13,\n",
+ " labels=LABELS)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Run time"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Notebook run time: 0m 2s\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get run time in seconds\n",
+ "notebook_end_time = time.time()\n",
+ "runtime = round(notebook_end_time - notebook_start_time)\n",
+ "\n",
+ "# Display converted to minutes and seconds\n",
+ "print(f'Notebook run time: {runtime // 60}m {runtime % 60}s')"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "template-des",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.1"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/generate_exp_results.ipynb b/notebooks/generate_exp_results.ipynb
index 3e8180d..9b7063f 100644
--- a/notebooks/generate_exp_results.ipynb
+++ b/notebooks/generate_exp_results.ipynb
@@ -4,11 +4,14 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Generate expected results\n",
+ "## Generate expected results for back tests\n",
"\n",
- "This notebook is used to run a specific version of the model and save each results dataframe as a csv. These are used in `test_backtest.py` to verify that the model produces consistent results.\n",
+ "This notebook generates results for backtests. These include\n",
"\n",
- "The notebook is provided as it is possible that results may change due to alterations to the model structure and operations. Once it has been confirmed that changes are intentional and not any introduced errors, this script can be run to regenerate the `.csv` files used in the test.\n",
+ "* Backtests for run of simulation with default parameters (`test_backtest.py`).\n",
+ "* Backtests for runs of simulation where you are determining an appropriate number of replications (`test_backtest_replications.py`).\n",
+ "\n",
+ "They save the relevant results dataframe as `.csv`, and use these to verify that the model produces consistent results.\n",
"\n",
"The run time is provided at the end of this notebook."
]
@@ -53,7 +56,8 @@
"import time\n",
"from IPython.display import display\n",
"\n",
- "from simulation.model import Param, Runner"
+ "from simulation.model import Param, Runner\n",
+ "from simulation.replications import confidence_interval_method"
]
},
{
@@ -94,7 +98,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Run model and save results"
+ "## Run of simulation with default parameters"
]
},
{
@@ -739,16 +743,264 @@
" os.path.join(TESTS, 'overall.csv'), index=True)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Running the simulation when attempting to determine an appropriate number of parameters\n",
+ "\n",
+ "Each of these should return the same output dataframe:\n",
+ "\n",
+ "```\n",
+ "_, man_df_simple = confidence_interval_method_simple(\n",
+ " replications=20, metric='mean_time_with_nurse')\n",
+ "```\n",
+ "\n",
+ "```\n",
+ "_, man_df = confidence_interval_method(\n",
+ " replications=20, metric='mean_time_with_nurse')\n",
+ "```\n",
+ "\n",
+ "```\n",
+ "observer = ReplicationTabulizer()\n",
+ "analyser = ReplicationsAlgorithm(\n",
+ " verbose=False,\n",
+ " observer=observer,\n",
+ " initial_replications=20,\n",
+ " replication_budget=20)\n",
+ "_ = analyser.select(runner=Runner(Param()), metric='mean_time_with_nurse')\n",
+ "auto_df = observer.summary_table().head(20)\n",
+ "```\n",
+ "\n",
+ "Hence, we will just run one, but will compare all against it in backtests."
+ ]
+ },
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
replications
\n",
+ "
data
\n",
+ "
cumulative_mean
\n",
+ "
stdev
\n",
+ "
lower_ci
\n",
+ "
upper_ci
\n",
+ "
deviation
\n",
+ "
metric
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "
\n",
+ "
0
\n",
+ "
1
\n",
+ "
9.842380
\n",
+ "
9.842380
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
mean_time_with_nurse
\n",
+ "
\n",
+ "
\n",
+ "
1
\n",
+ "
2
\n",
+ "
10.060424
\n",
+ "
9.951402
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
mean_time_with_nurse
\n",
+ "
\n",
+ "
\n",
+ "
2
\n",
+ "
3
\n",
+ "
9.925090
\n",
+ "
9.942631
\n",
+ "
0.110075
\n",
+ "
9.669189
\n",
+ "
10.216073
\n",
+ "
0.027502
\n",
+ "
mean_time_with_nurse
\n",
+ "
\n",
+ "
\n",
+ "
3
\n",
+ "
4
\n",
+ "
9.938504
\n",
+ "
9.941599
\n",
+ "
0.089900
\n",
+ "
9.798549
\n",
+ "
10.084650
\n",
+ "
0.014389
\n",
+ "
mean_time_with_nurse
\n",
+ "
\n",
+ "
\n",
+ "
4
\n",
+ "
5
\n",
+ "
10.016611
\n",
+ "
9.956602
\n",
+ "
0.084775
\n",
+ "
9.851339
\n",
+ "
10.061864
\n",
+ "
0.010572
\n",
+ "
mean_time_with_nurse
\n",
+ "
\n",
+ "
\n",
+ "
...
\n",
+ "
...
\n",
+ "
...
\n",
+ "
...
\n",
+ "
...
\n",
+ "
...
\n",
+ "
...
\n",
+ "
...
\n",
+ "
...
\n",
+ "
\n",
+ "
\n",
+ "
35
\n",
+ "
36
\n",
+ "
0.492981
\n",
+ "
0.497883
\n",
+ "
0.007143
\n",
+ "
0.495466
\n",
+ "
0.500300
\n",
+ "
0.004854
\n",
+ "
mean_nurse_utilisation
\n",
+ "
\n",
+ "
\n",
+ "
36
\n",
+ "
37
\n",
+ "
0.508118
\n",
+ "
0.498159
\n",
+ "
0.007242
\n",
+ "
0.495745
\n",
+ "
0.500574
\n",
+ "
0.004847
\n",
+ "
mean_nurse_utilisation
\n",
+ "
\n",
+ "
\n",
+ "
37
\n",
+ "
38
\n",
+ "
0.502153
\n",
+ "
0.498264
\n",
+ "
0.007172
\n",
+ "
0.495907
\n",
+ "
0.500622
\n",
+ "
0.004731
\n",
+ "
mean_nurse_utilisation
\n",
+ "
\n",
+ "
\n",
+ "
38
\n",
+ "
39
\n",
+ "
0.499857
\n",
+ "
0.498305
\n",
+ "
0.007082
\n",
+ "
0.496010
\n",
+ "
0.500601
\n",
+ "
0.004607
\n",
+ "
mean_nurse_utilisation
\n",
+ "
\n",
+ "
\n",
+ "
39
\n",
+ "
40
\n",
+ "
0.512021
\n",
+ "
0.498648
\n",
+ "
0.007319
\n",
+ "
0.496307
\n",
+ "
0.500989
\n",
+ "
0.004694
\n",
+ "
mean_nurse_utilisation
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
120 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " replications data cumulative_mean stdev lower_ci upper_ci \\\n",
+ "0 1 9.842380 9.842380 NaN NaN NaN \n",
+ "1 2 10.060424 9.951402 NaN NaN NaN \n",
+ "2 3 9.925090 9.942631 0.110075 9.669189 10.216073 \n",
+ "3 4 9.938504 9.941599 0.089900 9.798549 10.084650 \n",
+ "4 5 10.016611 9.956602 0.084775 9.851339 10.061864 \n",
+ ".. ... ... ... ... ... ... \n",
+ "35 36 0.492981 0.497883 0.007143 0.495466 0.500300 \n",
+ "36 37 0.508118 0.498159 0.007242 0.495745 0.500574 \n",
+ "37 38 0.502153 0.498264 0.007172 0.495907 0.500622 \n",
+ "38 39 0.499857 0.498305 0.007082 0.496010 0.500601 \n",
+ "39 40 0.512021 0.498648 0.007319 0.496307 0.500989 \n",
+ "\n",
+ " deviation metric \n",
+ "0 NaN mean_time_with_nurse \n",
+ "1 NaN mean_time_with_nurse \n",
+ "2 0.027502 mean_time_with_nurse \n",
+ "3 0.014389 mean_time_with_nurse \n",
+ "4 0.010572 mean_time_with_nurse \n",
+ ".. ... ... \n",
+ "35 0.004854 mean_nurse_utilisation \n",
+ "36 0.004847 mean_nurse_utilisation \n",
+ "37 0.004731 mean_nurse_utilisation \n",
+ "38 0.004607 mean_nurse_utilisation \n",
+ "39 0.004694 mean_nurse_utilisation \n",
+ "\n",
+ "[120 rows x 8 columns]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "_, man_df = confidence_interval_method(\n",
+ " replications=40, metrics=['mean_time_with_nurse',\n",
+ " 'mean_q_time_nurse',\n",
+ " 'mean_nurse_utilisation'])\n",
+ "\n",
+ "display(man_df)\n",
+ "\n",
+ "man_df.to_csv(\n",
+ " os.path.join(TESTS, 'replications.csv'), index=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Run time"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Notebook run time: 0m 0s\n"
+ "Notebook run time: 0m 2s\n"
]
}
],
diff --git a/outputs/choose_param_conf_int_1.png b/outputs/choose_param_conf_int_1.png
deleted file mode 100644
index 97fd28a..0000000
Binary files a/outputs/choose_param_conf_int_1.png and /dev/null differ
diff --git a/outputs/choose_param_conf_int_2.png b/outputs/choose_param_conf_int_2.png
deleted file mode 100644
index 1664ca0..0000000
Binary files a/outputs/choose_param_conf_int_2.png and /dev/null differ
diff --git a/outputs/choose_param_conf_int_3.png b/outputs/choose_param_conf_int_3.png
deleted file mode 100644
index 0174c74..0000000
Binary files a/outputs/choose_param_conf_int_3.png and /dev/null differ
diff --git a/outputs/choose_param_cores.png b/outputs/choose_param_cores.png
index 0264681..64df120 100644
Binary files a/outputs/choose_param_cores.png and b/outputs/choose_param_cores.png differ
diff --git a/outputs/choose_reps_mean_nurse_utilisation.png b/outputs/choose_reps_mean_nurse_utilisation.png
new file mode 100644
index 0000000..9eb9357
Binary files /dev/null and b/outputs/choose_reps_mean_nurse_utilisation.png differ
diff --git a/outputs/choose_reps_mean_q_time_nurse.png b/outputs/choose_reps_mean_q_time_nurse.png
new file mode 100644
index 0000000..094fdbd
Binary files /dev/null and b/outputs/choose_reps_mean_q_time_nurse.png differ
diff --git a/outputs/choose_reps_mean_time_with_nurse.png b/outputs/choose_reps_mean_time_with_nurse.png
new file mode 100644
index 0000000..810dafb
Binary files /dev/null and b/outputs/choose_reps_mean_time_with_nurse.png differ
diff --git a/simulation/helper.py b/simulation/helper.py
index 058fce5..472019f 100644
--- a/simulation/helper.py
+++ b/simulation/helper.py
@@ -34,8 +34,8 @@ def summary_stats(data):
# If there are no observations, then set all to NaN
if count == 0:
mean, std_dev, ci_lower, ci_upper = np.nan, np.nan, np.nan, np.nan
- # If there is only one observation, can do mean but not others
- elif count == 1:
+ # If there is only one or two observations, can do mean but not others
+ elif count < 3:
mean = data.mean()
std_dev, ci_lower, ci_upper = np.nan, np.nan, np.nan
# With more than one observation, can calculate all...
diff --git a/simulation/model.py b/simulation/model.py
index fd1ca89..adaf8ed 100644
--- a/simulation/model.py
+++ b/simulation/model.py
@@ -231,7 +231,8 @@ def request(self, *args, **kwargs):
Keyword arguments to be passed to the parent class.
Returns:
- simpy.events.Event: Event representing the request.
+ simpy.events.Event:
+ Event representing the request.
"""
# Update time-weighted statistics
self.update_time_weighted_stats()
@@ -250,7 +251,8 @@ def release(self, *args, **kwargs):
Keyword arguments to be passed to the parent class.#
Returns:
- simpy.events.Event: Event representing the request.
+ simpy.events.Event:
+ Event representing the request.
"""
# Update time-weighted statistics
self.update_time_weighted_stats()
@@ -309,6 +311,9 @@ def __init__(self, mean, random_seed):
random_seed (int|None):
Random seed to reproduce samples.
"""
+ if mean <= 0:
+ raise ValueError('Exponential mean must be greater than 0.')
+
self.mean = mean
self.rand = np.random.default_rng(random_seed)
@@ -422,7 +427,8 @@ def valid_inputs(self):
# Doesn't include number_of_nurses as this is tested by simpy.Resource
validation_rules = {
'positive': ['patient_inter', 'mean_n_consult_time',
- 'number_of_runs', 'audit_interval'],
+ 'number_of_runs', 'audit_interval',
+ 'number_of_nurses'],
'non_negative': ['warm_up_period', 'data_collection_period']
}
# Iterate over the validation rules
@@ -676,37 +682,67 @@ def run_single(self, run):
model = Model(param=self.param, run_number=run)
model.run()
+ # PATIENT RESULTS
# Convert patient-level results to a dataframe and add column with run
patient_results = pd.DataFrame(model.results_list)
patient_results['run'] = run
+ # If there was at least one patient...
+ if len(patient_results) > 0:
+ # Add a column with the wait time of patients who remained unseen
+ # at the end of the simulation
+ patient_results['q_time_unseen'] = np.where(
+ patient_results['time_with_nurse'].isna(),
+ model.env.now - patient_results['arrival_time'], np.nan
+ )
+ else:
+ # Set to NaN if no patients
+ patient_results['q_time_unseen'] = np.nan
- # Add a column with the wait time of patients who remained unseen
- # at the end of the simulation
- patient_results['q_time_unseen'] = np.where(
- patient_results['time_with_nurse'].isna(),
- model.env.now - patient_results['arrival_time'], np.nan
- )
-
- # Create dictionary recording the run results
- # Currently has two alternative methods of measuring utilisation
+ # RUN RESULTS
+ # The run, scenario and arrivals are handled the same regardless of
+ # whether there were any patients
run_results = {
'run_number': run,
'scenario': self.param.scenario_name,
- 'arrivals': len(patient_results),
- 'mean_q_time_nurse': patient_results['q_time_nurse'].mean(),
- 'mean_time_with_nurse': patient_results['time_with_nurse'].mean(),
- 'mean_nurse_utilisation': (model.nurse_time_used /
- (self.param.number_of_nurses *
- self.param.data_collection_period)),
- 'mean_nurse_utilisation_tw': (sum(model.nurse.area_resource_busy) /
- (self.param.number_of_nurses *
- self.param.data_collection_period)),
- 'mean_nurse_q_length': (sum(model.nurse.area_n_in_queue) /
- self.param.data_collection_period),
- 'count_unseen': patient_results['time_with_nurse'].isna().sum(),
- 'mean_q_time_unseen': patient_results['q_time_unseen'].mean()
+ 'arrivals': len(patient_results)
}
-
+ # If there was at least one patient...
+ if len(patient_results) > 0:
+ # Create dictionary recording the run results
+ # Currently has two alternative methods of measuring utilisation
+ run_results = {
+ **run_results,
+ 'mean_q_time_nurse': patient_results['q_time_nurse'].mean(),
+ 'mean_time_with_nurse': (
+ patient_results['time_with_nurse'].mean()),
+ 'mean_nurse_utilisation': (
+ model.nurse_time_used / (
+ self.param.number_of_nurses *
+ self.param.data_collection_period)),
+ 'mean_nurse_utilisation_tw': (
+ sum(model.nurse.area_resource_busy) / (
+ self.param.number_of_nurses *
+ self.param.data_collection_period)),
+ 'mean_nurse_q_length': (sum(model.nurse.area_n_in_queue) /
+ self.param.data_collection_period),
+ 'count_unseen': (
+ patient_results['time_with_nurse'].isna().sum()),
+ 'mean_q_time_unseen': patient_results['q_time_unseen'].mean()
+ }
+ else:
+ # Set results to NaN if no patients
+ run_results = {
+ **run_results,
+ 'mean_q_time_nurse': np.nan,
+ 'mean_time_with_nurse': np.nan,
+ 'mean_nurse_utilisation': np.nan,
+ 'mean_nurse_utilisation_tw': np.nan,
+ 'mean_nurse_q_length': np.nan,
+ 'count_unseen': np.nan,
+ 'mean_q_time_unseen': np.nan
+ }
+
+ # INTERVAL AUDIT RESULTS
# Convert interval audit results to a dataframe and add run column
interval_audit_df = pd.DataFrame(model.audit_list)
interval_audit_df['run'] = run
diff --git a/simulation/replications.py b/simulation/replications.py
new file mode 100644
index 0000000..1566511
--- /dev/null
+++ b/simulation/replications.py
@@ -0,0 +1,744 @@
+"""Selecting the number of replications.
+
+Credit:
+ > These functions are adapted from Tom Monks (2021) sim-tools:
+ fundamental tools to support the simulation process in python
+ (https://github.com/TomMonks/sim-tools) (MIT Licence).
+ > In sim-tools, they cite that their implementation is of the "replications
+ algorithm" from: Hoad, Robinson, & Davies (2010). Automated selection of
+ the number of replications for a discrete-event simulation. Journal of the
+ Operational Research Society. https://www.jstor.org/stable/40926090.
+
+Licence:
+ This project is licensed under the MIT Licence. See the LICENSE file for
+ more details.
+"""
+
+import warnings
+
+import plotly.graph_objects as go
+import numpy as np
+import pandas as pd
+from scipy.stats import t
+
+from simulation.model import Param, Runner
+from simulation.helper import summary_stats
+
+
+class OnlineStatistics:
+ """
+ Computes running sample mean and variance (using Welford's algorithm),
+ which then allows computation of confidence intervals (CIs).
+
+ The statistics are referred to as "online" as they are computed via updates
+ to it's value, rather than storing lots of data and repeatedly taking the
+ mean after new values have been added.
+
+ Attributes:
+ n (int):
+ Number of data points processed.
+ x_i (float):
+ Most recent data point.
+ mean (float):
+ Running mean.
+ _sq (float):
+ Sum of squared differences from the mean.
+ alpha (float):
+ Significance level for confidence interval calculations.
+ observer (list):
+ object to notify on updates.
+ """
+ def __init__(self, data=None, alpha=0.1, observer=None):
+ """
+ Initialises the OnlineStatistics instance.
+
+ Arguments:
+ data (np.ndarray, optional):
+ Array containing an initial data sample.
+ alpha (float, optional):
+ Significance level for confidence interval calculations.
+ observer (object, optional):
+ Observer to notify on updates.
+ """
+ self.n = 0
+ self.x_i = None
+ self.mean = None
+ self._sq = None
+ self.alpha = alpha
+ self.observer = observer
+
+ # If an array of initial values are supplied, then run update()
+ if data is not None:
+ if isinstance(data, np.ndarray):
+ for x in data:
+ self.update(x)
+ # Raise an error if in different format - else will invisibly
+ # proceed and won't notice it hasn't done this
+ else:
+ raise ValueError(
+ f'data must be np.ndarray but is type {type(data)}')
+
+ def update(self, x):
+ """
+ Running update of mean and variance implemented using Welford's
+ algorithm (1962).
+
+ See Knuth. D `The Art of Computer Programming` Vol 2. 2nd ed. Page 216.
+
+ Arguments:
+ x (float):
+ A new data point.
+ """
+ self.n += 1
+ self.x_i = x
+
+ if self.n == 1:
+ self.mean = x
+ self._sq = 0
+ else:
+ updated_mean = self.mean + ((x - self.mean) / self.n)
+ self._sq += (x - self.mean) * (x - updated_mean)
+ self.mean = updated_mean
+
+ # Run the observer update() method
+ if self.observer is not None:
+ self.observer.update(self)
+
+ @property
+ def variance(self):
+ """
+ Computes and returns the variance of the data points.
+ """
+ # Sum of squares of differences from the current mean divided by n - 1
+ return self._sq / (self.n - 1)
+
+ @property
+ def std(self):
+ """
+ Computes and returns the standard deviation, or NaN if not enough data.
+ """
+ if self.n > 2:
+ return np.sqrt(self.variance)
+ return np.nan
+
+ @property
+ def std_error(self):
+ """
+ Computes and returns the standard error of the mean.
+ """
+ return self.std / np.sqrt(self.n)
+
+ @property
+ def half_width(self):
+ """
+ Computes and returns the half-width of the confidence interval.
+ """
+ dof = self.n - 1
+ t_value = t.ppf(1 - (self.alpha / 2), dof)
+ return t_value * self.std_error
+
+ @property
+ def lci(self):
+ """
+ Computes and returns the lower confidence interval bound, or NaN if
+ not enough data.
+ """
+ if self.n > 2:
+ return self.mean - self.half_width
+ return np.nan
+
+ @property
+ def uci(self):
+ """
+ Computes and returns the upper confidence interval bound, or NaN if
+ not enough data.
+ """
+ if self.n > 2:
+ return self.mean + self.half_width
+ return np.nan
+
+ @property
+ def deviation(self):
+ """
+ Computes and returns the precision of the confidence interval
+ expressed as the percentage deviation of the half width from the mean.
+ """
+ if self.n > 2:
+ return self.half_width / self.mean
+ return np.nan
+
+
+class ReplicationTabulizer:
+ """
+ Observes and records results from OnlineStatistics, updating each time new
+ data is processed.
+
+ Attributes:
+ n (int):
+ Number of data points processed.
+ x_i (list):
+ List containing each data point.
+ cumulative_mean (list):
+ List of the running mean.
+ stdev (list):
+ List of the standard deviation.
+ lower (list):
+ List of the lower confidence interval bound.
+ upper (list):
+ List of the upper confidence interval bound.
+ dev (list):
+ List of the percentage deviation of the confidence interval
+ half width from the mean.
+ """
+ def __init__(self):
+ """
+ Initialises empty lists for storing statistics, and n is set to zero.
+ """
+ self.n = 0
+ self.x_i = []
+ self.cumulative_mean = []
+ self.stdev = []
+ self.lower = []
+ self.upper = []
+ self.dev = []
+
+ def update(self, results):
+ """
+ Add new results from OnlineStatistics to the appropriate lists.
+
+ Arguments:
+ results (OnlineStatistics):
+ An instance of OnlineStatistics containing updated statistical
+ measures like the mean, standard deviation and confidence
+ intervals.
+ """
+ self.x_i.append(results.x_i)
+ self.cumulative_mean.append(results.mean)
+ self.stdev.append(results.std)
+ self.lower.append(results.lci)
+ self.upper.append(results.uci)
+ self.dev.append(results.deviation)
+ self.n += 1
+
+ def summary_table(self):
+ """
+ Create a results table from the stored lists.
+
+ Returns:
+ results (pd.DataFrame):
+ Dataframe summarising the replication statistics.
+ """
+ results = pd.DataFrame(
+ {
+ 'replications': np.arange(1, self.n + 1),
+ 'data': self.x_i,
+ 'cumulative_mean': self.cumulative_mean,
+ 'stdev': self.stdev,
+ 'lower_ci': self.lower,
+ 'upper_ci': self.upper,
+ 'deviation': self.dev
+ }
+ )
+ return results
+
+
+class ReplicationsAlgorithm:
+ """
+ Implements an adaptive replication algorithm for selecting the
+ appropriate number of simulation replications based on statistical
+ precision.
+
+ Uses the "Replications Algorithm" from Hoad, Robinson, & Davies (2010).
+ Given a model's performance measure and a user-set confidence interval
+ half width prevision, automatically select the number of replications.
+ Combines the "confidence intervals" method with a sequential look-ahead
+ procedure to determine if a desired precision in the confidence interval
+ is maintained.
+
+ Attributes:
+ alpha (float):
+ Significance level for confidence interval calculations.
+ half_width_precision (float):
+ The target half width precision for the algorithm (i.e. percentage
+ deviation of the confidence interval from the mean).
+ initial_replications (int):
+ Number of initial replications to perform. Note that the minimum
+ solution will be the value of initial_replications (i.e. if require
+ 20 initial replications but was resolved in 5, solution output
+ will still be 20). Although, if initial_replications < 3, solution
+ will still be at least 3, as that is the minimum required to
+ calculate the confidence intervals.
+ look_ahead (int):
+ Minimum additional replications to look ahead to assess stability
+ of precision. When the number of replications is <= 100, the value
+ of look_ahead is used. When they are > 100, then
+ look_ahead / 100 * max(n, 100) is used.
+ replication_budget (int):
+ Maximum allowed replications. Use for larger models where
+ replication runtime is a constraint.
+ n (int):
+ Current number of replications performed.
+ """
+ # pylint: disable=too-many-arguments,too-many-positional-arguments
+ def __init__(
+ self,
+ alpha=0.05,
+ half_width_precision=0.05,
+ initial_replications=3,
+ look_ahead=5,
+ replication_budget=1000
+ ):
+ """
+ Initialise an instance of the ReplicationsAlgorithm.
+
+ Arguments are described in the class docstring.
+ """
+ self.alpha = alpha
+ self.half_width_precision = half_width_precision
+ self.initial_replications = initial_replications
+ self.look_ahead = look_ahead
+ self.replication_budget = replication_budget
+
+ # Initially set n to number of initial replications
+ self.n = self.initial_replications
+
+ # Check validity of provided parameters
+ self.valid_inputs()
+
+ def valid_inputs(self):
+ """
+ Checks validity of provided parameters.
+ """
+ for p in [self.initial_replications, self.look_ahead]:
+ if not isinstance(p, int) or p < 0:
+ raise ValueError(f'{p} must be a non-negative integer.')
+
+ if self.half_width_precision <= 0:
+ raise ValueError('half_width_precision must be greater than 0.')
+
+ if self.replication_budget < self.initial_replications:
+ raise ValueError(
+ 'replication_budget must be less than initial_replications.')
+
+ def _klimit(self):
+ """
+ Determines the number of additional replications to check after
+ precision is reached, scaling with total replications if they are
+ greater than 100.
+
+ Returns:
+ int:
+ Number of additional replications to verify stability.
+ """
+ return int((self.look_ahead / 100) * max(self.n, 100))
+
+ # pylint: disable=too-many-branches
+ def select(self, runner, metrics):
+ """
+ Executes the replication algorithm, determining the necessary number
+ of replications to achieve and maintain the desired precision.
+
+ Arguments:
+ runner (Runner):
+ An instance of Runner which executes the model replications.
+ metrics (list):
+ List of performance measure to track (should correspond to
+ column names from the run results dataframe).
+
+ Returns:
+ tuple[dict, pd.DataFrame]:
+ - A dictionary with the minimum number of replications required
+ to meet the precision for each metric.
+ - DataFrame containing cumulative statistics for each
+ replication for each metric.
+
+ Warnings:
+ Issues a warning if the desired precision is not met for any
+ metrics before the replication limit is met.
+ """
+ # Create instances of observers for each metric
+ observers = {
+ metric: ReplicationTabulizer()
+ for metric in metrics}
+
+ # Create dictionary to store record for each metric of:
+ # - nreps (the solution - replications required for precision)
+ # - target_met (record of how many times in a row the target has
+ # has been met, used to check if lookahead period has been passed)
+ # - solved (whether it has maintained precision for lookahead)
+ solutions = {
+ metric: {'nreps': None,
+ 'target_met': 0,
+ 'solved': False} for metric in metrics}
+
+ # If there are no initial replications, create empty instances of stats
+ # for each metric...
+ if self.initial_replications == 0:
+ stats = {
+ metric: OnlineStatistics(
+ alpha=self.alpha, observer=observers[metric])
+ for metric in metrics
+ }
+ # If there are, run the replications, then create instances of stats
+ # pre-loaded with data from the initial replications...
+ # (we use run_reps() which allows for parallel processing if desired)
+ else:
+ stats = {}
+ runner.param.number_of_runs = self.initial_replications
+ runner.run_reps()
+ for metric in metrics:
+ stats[metric] = OnlineStatistics(
+ alpha=self.alpha,
+ observer=observers[metric],
+ data=np.array(runner.run_results_df[metric]))
+
+ # After completing all replications, check if any have met precision,
+ # add solution and update count
+ for metric in metrics:
+ if stats[metric].deviation <= self.half_width_precision:
+ solutions[metric]['nreps'] = self.n
+ solutions[metric]['target_met'] = 1
+ # If there is no lookahead, mark as solved
+ if self._klimit() == 0:
+ solutions[metric]['solved'] = True
+
+ # Whilst under replication budget + lookahead, and have not yet
+ # got all metrics marked as solved = TRUE...
+ while (
+ sum(1 for v in solutions.values()
+ if v['solved']) < len(metrics)
+ and self.n < self.replication_budget + self._klimit()
+ ):
+
+ # Run another replication
+ results = runner.run_single(self.n)['run']
+
+ # Increment counter
+ self.n += 1
+
+ # Loop through the metrics...
+ for metric in metrics:
+
+ # If it is not yet solved...
+ if not solutions[metric]['solved']:
+
+ # Update the running mean and stdev for that metric
+ stats[metric].update(results[metric])
+
+ # If precision has been achieved...
+ if stats[metric].deviation <= self.half_width_precision:
+
+ # Check if target met the time prior - if not, record
+ # the solution.
+ if solutions[metric]['target_met'] == 0:
+ solutions[metric]['nreps'] = self.n
+
+ # Update how many times precision has been met in a row
+ solutions[metric]['target_met'] += 1
+
+ # Mark as solved if have finished lookahead period
+ if solutions[metric]['target_met'] > self._klimit():
+ solutions[metric]['solved'] = True
+
+ # If precision was not achieved, ensure nreps is None
+ # (e.g. in cases where precision is lost after a success)
+ else:
+ solutions[metric]['nreps'] = None
+
+ # Extract minimum replications for each metric
+ nreps = {metric: value['nreps'] for metric, value in solutions.items()}
+
+ # Combine observer summary frames into a single table
+ summary_frame = pd.concat(
+ [observer.summary_table().assign(metric=metric)
+ for metric, observer in observers.items()]
+ ).reset_index(drop=True)
+
+ # Extract any metrics that were not solved and return warning
+ if None in nreps.values():
+ unsolved = [k for k, v in nreps.items() if v is None]
+ warnings.warn(
+ 'WARNING: the replications did not reach the desired ' +
+ f'precision for the following metrics: {unsolved}.')
+
+ return nreps, summary_frame
+
+
+# pylint: disable=too-many-arguments,too-many-positional-arguments
+# pylint: disable=too-many-locals
+def confidence_interval_method(
+ replications,
+ metrics,
+ alpha=0.05,
+ desired_precision=0.05,
+ min_rep=5,
+ verbose=False
+):
+ """
+ The confidence interval method for selecting the number of replications.
+
+ This method will run the model for the specified number of replications.
+ It then calculates the cumulative mean and confidence intervals with
+ each of those replications. It then checks the results to find when the
+ precision is first achieved. It does not check if this precision is
+ maintained.
+
+ Arguments:
+ replications (int):
+ Number of times to run the model.
+ metrics (list):
+ List of performance metrics to assess (should correspond to
+ column names from the run results dataframe).
+ alpha (float, optional):
+ Significance level for confidence interval calculations.
+ desired_precision (float, optional):
+ The target half width precision (i.e. percentage deviation of the
+ confidence interval from the mean).
+ min_rep (int, optional):
+ Minimum number of replications before checking precision. Useful
+ when the number of replications returned does not provide a stable
+ precision below target.
+ verbose (bool, optional):
+ Whether to print progress updates.
+
+ Returns:
+ tuple[dict, pd.DataFrame]:
+ - A dictionary with the minimum number of replications required
+ to meet the precision for each metric.
+ - DataFrame containing cumulative statistics for each
+ replication for each metric.
+
+ Warnings:
+ Issues a warning if the desired precision is not met within the
+ provided replications.
+ """
+ # Run model for specified number of replications
+ param = Param(number_of_runs=replications)
+ choose_rep = Runner(param)
+ choose_rep.run_reps()
+
+ nreps_dict = {}
+ summary_table_list = []
+
+ for metric in metrics:
+ # Extract replication results for the specified metric
+ rep_res = choose_rep.run_results_df[metric]
+
+ # Set up method for calculating statistics and saving them as a table
+ observer = ReplicationTabulizer()
+ stats = OnlineStatistics(
+ alpha=alpha, data=np.array(rep_res[:2]), observer=observer)
+
+ # Calculate statistics with each replication, and get summary table
+ for i in range(2, len(rep_res)):
+ stats.update(rep_res[i])
+ results = observer.summary_table()
+
+ # Get minimum number of replications where deviation is below target
+ try:
+ nreps = (
+ results.iloc[min_rep:]
+ .loc[results['deviation'] <= desired_precision]
+ .iloc[0]
+ .name
+ )
+ if verbose:
+ print(f'{metric}: Reached desired precision in {nreps} ' +
+ 'replications.')
+ # Return warning if there are no replications with desired precision
+ except IndexError:
+ message = f'WARNING: {metric} does not reach desired precision.'
+ warnings.warn(message)
+ nreps = None
+
+ # Add solution to dictionary
+ nreps_dict[metric] = nreps
+
+ # Add metric name to table then append to list
+ results['metric'] = metric
+ summary_table_list.append(results)
+
+ # Combine into a single table
+ summary_frame = pd.concat(summary_table_list)
+
+ return nreps_dict, summary_frame
+
+
+def confidence_interval_method_simple(
+ replications, metrics, desired_precision=0.05, min_rep=5, verbose=False
+):
+ """
+ Simple implementation using the confidence interval method to select the
+ number of replications.
+
+ This will produce the same results as confidence_interval_method(),
+ but that depends on ReplicationTabulizer and OnlineStatistics, whilst
+ this method using summary_stats(). These are both provided to give you
+ a few options of possible ways to do this!
+
+ Arguments:
+ replications (int):
+ Number of times to run the model.
+ metrics (list):
+ List of performance metrics to assess.
+ desired_precision (float, optional):
+ The target half width precision (i.e. percentage deviation of the
+ confidence interval from the mean).
+ min_rep (int, optional):
+ Minimum number of replications before checking precision. Useful
+ when the number of replications returned does not provide a stable
+ precision below target.
+ verbose (bool, optional):
+ Whether to print progress updates.
+
+ Returns:
+ tuple[dict, pd.DataFrame]:
+ - A dictionary with the minimum number of replications required
+ to meet the precision for each metric.
+ - DataFrame containing cumulative statistics for each
+ replication for each metric.
+
+ Warnings:
+ Issues a warning if the desired precision is not met within the
+ provided replications.
+ """
+ # Run model for specified number of replications
+ param = Param(number_of_runs=replications)
+ choose_rep = Runner(param)
+ choose_rep.run_reps()
+ df = choose_rep.run_results_df
+
+ nreps_dict = {}
+ summary_table_list = []
+
+ for metric in metrics:
+ # Compute cumulative statistics
+ cumulative = pd.DataFrame([
+ {
+ 'replications': i + 1, # Adjusted as counted from zero
+ 'data': df[metric][i],
+ 'cumulative_mean': stats[0],
+ 'stdev': stats[1],
+ 'lower_ci': stats[2],
+ 'upper_ci': stats[3],
+ 'deviation': (stats[3] - stats[0]) / stats[0]
+ }
+ for i, stats in enumerate(
+ (summary_stats(df[metric].iloc[:i])
+ for i in range(1, replications + 1))
+ )
+ ])
+
+ # Get minimum number of replications where deviation is below target
+ try:
+ nreps = (
+ cumulative.iloc[min_rep:]
+ .loc[cumulative['deviation'] <= desired_precision]
+ .iloc[0]
+ .name
+ ) + 1
+ if verbose:
+ print(f'{metric}: Reached desired precision in {nreps} ' +
+ 'replications.')
+ # Return warning if there are no replications with desired precision
+ except IndexError:
+ warnings.warn(f'Running {replications} replications did not ' +
+ f'reach desired precision ({desired_precision})' +
+ f'for metric {metric}.')
+ nreps = None
+
+ # Add solution to dictionary
+ nreps_dict[metric] = nreps
+
+ # Add metric name to table then append to list
+ cumulative['metric'] = metric
+ summary_table_list.append(cumulative)
+
+ # Combine into a single table
+ summary_frame = pd.concat(summary_table_list)
+
+ return nreps_dict, summary_frame
+
+
+def plotly_confidence_interval_method(
+ n_reps, conf_ints, metric_name, figsize=(1200, 400), file_path=None
+):
+ """
+ Generates an interactive Plotly visualisation of confidence intervals
+ with increasing simulation replications.
+
+ Arguments:
+ n_reps (int):
+ The number of replications required to meet the desired precision.
+ conf_ints (pd.DataFrame):
+ A DataFrame containing confidence interval statistics, including
+ cumulative mean, upper/lower bounds, and deviations. As returned
+ by ReplicationTabulizer summary_table() method.
+ metric_name (str):
+ Name of metric being analysed.
+ figsize (tuple, optional):
+ Plot dimensions in pixels (width, height).
+ file_path (str):
+ Path and filename to save the plot to.
+ """
+ fig = go.Figure()
+
+ # Calculate relative deviations [1][4]
+ deviation_pct = (
+ (conf_ints['upper_ci'] - conf_ints['cumulative_mean'])
+ / conf_ints['cumulative_mean']
+ * 100
+ ).round(2)
+
+ # Confidence interval bands with hover info
+ for col, color, dash in zip(
+ ['lower_ci', 'upper_ci'],
+ ['lightblue', 'lightblue'], ['dot', 'dot']
+ ):
+ fig.add_trace(
+ go.Scatter(
+ x=conf_ints['replications'],
+ y=conf_ints[col],
+ line={'color': color, 'dash': dash},
+ name=col,
+ text=['Deviation: {d}%' for d in deviation_pct],
+ hoverinfo='x+y+name+text',
+ )
+ )
+
+ # Cumulative mean line with enhanced hover
+ fig.add_trace(
+ go.Scatter(
+ x=conf_ints['replications'],
+ y=conf_ints['cumulative_mean'],
+ line={'color': 'blue', 'width': 2},
+ name='Cumulative Mean',
+ hoverinfo='x+y+name',
+ )
+ )
+
+ # Vertical threshold line
+ fig.add_shape(
+ type='line',
+ x0=n_reps,
+ x1=n_reps,
+ y0=0,
+ y1=1,
+ yref='paper',
+ line={'color': 'red', 'dash': 'dash'},
+ )
+
+ # Configure layout
+ fig.update_layout(
+ width=figsize[0],
+ height=figsize[1],
+ yaxis_title=f'Cumulative Mean:\n{metric_name}',
+ hovermode='x unified',
+ showlegend=True,
+ )
+
+ # Save figure
+ if file_path is not None:
+ fig.write_image(file_path)
+
+ return fig
diff --git a/tests/exp_results/replications.csv b/tests/exp_results/replications.csv
new file mode 100644
index 0000000..e874a8e
--- /dev/null
+++ b/tests/exp_results/replications.csv
@@ -0,0 +1,121 @@
+replications,data,cumulative_mean,stdev,lower_ci,upper_ci,deviation,metric
+1,9.842379990825465,9.842379990825465,,,,,mean_time_with_nurse
+2,10.060423560536304,9.951401775680885,,,,,mean_time_with_nurse
+3,9.925089673744292,9.942631075035354,0.11007508865543447,9.669189396185518,10.216072753885191,0.02750194357873877,mean_time_with_nurse
+4,9.938503720153147,9.941599236314802,0.08989962287415575,9.79854887498365,10.084649597645955,0.01438906939726725,mean_time_with_nurse
+5,10.016611116790997,9.956601612410042,0.08477507350159925,9.851339455698028,10.061863769122056,0.010572096866948431,mean_time_with_nurse
+6,9.883800932020227,9.944468165678407,0.08144184211630967,9.85900019398277,10.029936137374044,0.00859452413861758,mean_time_with_nurse
+7,10.040555259165185,9.958194893319375,0.08274220649011921,9.881671115440534,10.034718671198215,0.007684502934379949,mean_time_with_nurse
+8,10.086612424440187,9.974247084709477,0.08904839448371414,9.899800763883556,10.048693405535397,0.007463853681752782,mean_time_with_nurse
+9,10.202806309000646,9.999642554075162,0.1128839371604415,9.91287227877846,10.086412829371865,0.00867733769757002,mean_time_with_nurse
+10,10.093238146039786,10.009002113271624,0.11046688166466083,9.929978866589812,10.088025359953436,0.007895217304133596,mean_time_with_nurse
+11,10.082693576415949,10.015701337193835,0.1071275611735933,9.943732062327035,10.087670612060636,0.007185645063071047,mean_time_with_nurse
+12,9.895606898184578,10.005693467276398,0.10786520560721555,9.937159185316156,10.07422774923664,0.006849528439421251,mean_time_with_nurse
+13,9.808885513355504,9.990554393897868,0.11681103296143085,9.919966193143505,10.061142594652232,0.007065493862630651,mean_time_with_nurse
+14,9.932816012126937,9.986430223771373,0.11328432808933316,9.921021800077227,10.05183864746552,0.0065497302067409405,mean_time_with_nurse
+15,10.165898811805276,9.998394796306966,0.11859147043224914,9.932721028994578,10.064068563619355,0.0065684310982244845,mean_time_with_nurse
+16,9.97527206509048,9.996949625605936,0.11471598564431212,9.935821791738446,10.058077459473425,0.006114648583495752,mean_time_with_nurse
+17,9.80931192832356,9.98591211400109,0.12003468577232226,9.92419597341084,10.047628254591341,0.006180320824546403,mean_time_with_nurse
+18,9.988073964553942,9.986032216809582,0.11645186497858809,9.928122066759526,10.043942366859637,0.005799115083223496,mean_time_with_nurse
+19,9.801716381189982,9.976331383355918,0.12081251236817435,9.918101616330164,10.034561150381672,0.005836791580811218,mean_time_with_nurse
+20,9.92338975169577,9.973684301772911,0.11818464860447063,9.918372183608344,10.028996419937478,0.0055458059921482256,mean_time_with_nurse
+21,9.813121481621002,9.96603845319425,0.12040294688137244,9.911231688864293,10.020845217524206,0.005499353086721291,mean_time_with_nurse
+22,9.861201095110324,9.961273118735889,0.119608225336162,9.908241802518038,10.01430443495374,0.005323748840708513,mean_time_with_nurse
+23,9.971738394928153,9.961728130744248,0.11687861595939265,9.911186027112416,10.01227023437608,0.0050736280862602205,mean_time_with_nurse
+24,9.923495967713496,9.9601351239513,0.11457563161954815,9.911754077854523,10.008516170048077,0.0048574688490353405,mean_time_with_nurse
+25,9.963032663479305,9.96025102553242,0.11216473993070172,9.913951696450749,10.006550354614092,0.004648409860653723,mean_time_with_nurse
+26,10.236576381308916,9.970878923831517,0.12253346280517494,9.92138658712308,10.020371260539953,0.004963688465832612,mean_time_with_nurse
+27,10.040644164410518,9.97346282163074,0.12090176562576795,9.925635676040924,10.021289967220556,0.004795440304453471,mean_time_with_nurse
+28,10.052037201655754,9.976269049488776,0.11956736762166252,9.929905664420165,10.022632434557387,0.004647367150847594,mean_time_with_nurse
+29,10.043531523096945,9.978588445130438,0.1180753131127768,9.933674998037883,10.023501892222992,0.004500982011586242,mean_time_with_nurse
+30,10.035942383790465,9.980500243085771,0.11649325169210528,9.937000948013033,10.02399953815851,0.004358428336582947,mean_time_with_nurse
+31,9.921220126790663,9.9785879812698,0.11502904359180041,9.936394987440801,10.0207809750988,0.004228353140564279,mean_time_with_nurse
+32,10.10698707509288,9.982600452951772,0.11541252177493729,9.940989801578949,10.024211104324595,0.004168317821487046,mean_time_with_nurse
+33,10.091438780533322,9.985898584090606,0.11516406423682789,9.94506318253854,10.026733985642672,0.004089306656601115,mean_time_with_nurse
+34,10.060623794026464,9.988096384382837,0.11412752038985839,9.948275409989549,10.027917358776126,0.003986843224255531,mean_time_with_nurse
+35,9.908652113406877,9.98582654806924,0.11323570767376603,9.946928721532805,10.024724374605674,0.0038953036435382124,mean_time_with_nurse
+36,10.000181132785418,9.986225286533578,0.11163197149795141,9.948454461469543,10.023996111597613,0.0037822925059550296,mean_time_with_nurse
+37,10.061135603094858,9.988249889683884,0.11075740368814466,9.951321532730077,10.02517824663769,0.0036971799225756,mean_time_with_nurse
+38,9.992774949405481,9.988368970202872,0.10925289609639212,9.952458436853231,10.024279503552513,0.003595234963462883,mean_time_with_nurse
+39,10.04386112055994,9.989791845853054,0.10817136096117745,9.954726747051039,10.024856944655069,0.0035100930372809185,mean_time_with_nurse
+40,10.049263121363516,9.991278627740815,0.1071887932358456,9.956997988577033,10.025559266904597,0.0034310562682739907,mean_time_with_nurse
+1,0.501873288433248,0.501873288433248,,,,,mean_q_time_nurse
+2,0.5162638608821064,0.5090685746576772,,,,,mean_q_time_nurse
+3,0.5312797210924174,0.5164722901359239,0.014704324278930744,0.4799447236692787,0.5529998566025691,0.07072512342730322,mean_q_time_nurse
+4,0.4911035979224117,0.5101301170825459,0.017465320051155087,0.482338895448843,0.5379213387162487,0.0544786922062981,mean_q_time_nurse
+5,0.4632522371350816,0.500754541093053,0.02585121233212735,0.4686560214303292,0.5328530607557767,0.06410030669449093,mean_q_time_nurse
+6,0.3904951453981618,0.48237797514390446,0.05060451758225758,0.4292717908533404,0.5354841594344685,0.11009247317877909,mean_q_time_nurse
+7,0.4732773513358222,0.48107788602846413,0.04632327660115864,0.438236000615722,0.5239197714412063,0.08905394876165086,mean_q_time_nurse
+8,0.611501683470203,0.4973808607086815,0.06297294975070981,0.4447341572203516,0.5500275641970114,0.10584786759449791,mean_q_time_nurse
+9,0.47892602400167145,0.4953303232967915,0.05922614020316781,0.44980508188989593,0.540855564703687,0.09190885206439862,mean_q_time_nurse
+10,0.5647473631653535,0.5022720272836477,0.059998831792909366,0.4593514486093572,0.5451926059579382,0.08545285491292547,mean_q_time_nurse
+11,0.5710886426068235,0.5085280832221183,0.060583780253632605,0.4678273458415839,0.5492288206026527,0.08003636126179654,mean_q_time_nurse
+12,0.39338731144465283,0.4989330189073295,0.06664461958318071,0.45658904780066173,0.5412769900139973,0.08486904955579344,mean_q_time_nurse
+13,0.44440250011421983,0.49473836361555185,0.0655752743442491,0.45511162192358634,0.5343651053075174,0.08009635921979646,mean_q_time_nurse
+14,0.44764948026318635,0.4913748719475257,0.06424734602865995,0.45427955875510284,0.5284701851399486,0.07549289821312699,mean_q_time_nurse
+15,0.6148477335265125,0.4996063960527915,0.06963658564418193,0.4610429403054582,0.5381698518001248,0.07718767424118086,mean_q_time_nurse
+16,0.46151166088825735,0.49722547510500814,0.06794608899061828,0.46101955997961147,0.5334313902304048,0.07281588924571977,mean_q_time_nurse
+17,0.4257522707631643,0.4930211689672526,0.06803399372924115,0.45804131714446994,0.5280010207900353,0.0709499997658439,mean_q_time_nurse
+18,0.4811166622747489,0.4923598074843357,0.06606228475568435,0.4595078061569663,0.5252118088117051,0.06672356440957988,mean_q_time_nurse
+19,0.37614947541125254,0.48624347421733133,0.06951654203693261,0.452737573479909,0.5197493749547537,0.06890766151947685,mean_q_time_nurse
+20,0.42907788720455214,0.48338519486669235,0.0688592776394449,0.4511580609157638,0.5156123288176209,0.06666967522622641,mean_q_time_nurse
+21,0.6245182330573088,0.4901058157329122,0.07384457835583941,0.45649216666682224,0.5237194647990021,0.06858447295881108,mean_q_time_nurse
+22,0.5152436681690306,0.4912484453890994,0.07226394194711226,0.4592084084620812,0.5232884823161176,0.0652216556159083,mean_q_time_nurse
+23,0.5252369665044344,0.49272620717672266,0.07095729062751036,0.4620419737359734,0.523410440617472,0.06227440918267207,mean_q_time_nurse
+24,0.5204269703372153,0.4938804056417432,0.06962757329619379,0.46447925999119566,0.5232815512922907,0.05953090123578396,mean_q_time_nurse
+25,0.5196503817355377,0.494911204685495,0.06835614583221682,0.4666951744731855,0.5231272348978044,0.05701230835992116,mean_q_time_nurse
+26,0.6386328892399044,0.5004389617837415,0.07266442546446965,0.47108916516915744,0.5297887583983255,0.0586481046758849,mean_q_time_nurse
+27,0.514672047994919,0.5009661131248963,0.07130596208610374,0.4727584139861572,0.5291738122636352,0.05630660118463254,mean_q_time_nurse
+28,0.5032874412345651,0.5010490177002416,0.06997439798217155,0.4739157787310838,0.5281822566693993,0.05415286331404512,mean_q_time_nurse
+29,0.4616356744718863,0.49968993689926383,0.0691021727424766,0.473404876139057,0.5259749976594706,0.05260274185890957,mean_q_time_nurse
+30,0.5515493444111204,0.5014185838163258,0.06855726006451052,0.47581888218891655,0.527018285443735,0.0510545529297467,mean_q_time_nurse
+31,0.50983654907801,0.5016901310828317,0.06742190929332628,0.4769595750951834,0.52642068707048,0.04929448369707941,mean_q_time_nurse
+32,0.49093155636997543,0.5013539256230549,0.06635280712661132,0.47743118758308956,0.5252766636630203,0.04771626752545226,mean_q_time_nurse
+33,0.5583354096854821,0.5030806372613102,0.06605680423773559,0.4796579129577727,0.5265033615648478,0.046558588362786237,mean_q_time_nurse
+34,0.5482397065988831,0.5044088451830036,0.06550766905487178,0.48155213890315846,0.5272655514628487,0.045313849069305086,mean_q_time_nurse
+35,0.4578860911336867,0.5030796236387374,0.06501446283948678,0.4807463742614676,0.5254128730160071,0.04439307085374489,mean_q_time_nurse
+36,0.4647760722399565,0.5020156360998823,0.06439617122842345,0.4802271064732508,0.5238041657265138,0.043402093599922066,mean_q_time_nurse
+37,0.6453011612930271,0.5058882178618592,0.06772415399354129,0.4833078607047891,0.5284685750189293,0.044635072254709106,mean_q_time_nurse
+38,0.629223046319056,0.5091338712423118,0.06973451102588799,0.4862127078380333,0.5320550346465903,0.04501991460192925,mean_q_time_nurse
+39,0.5493007154256917,0.5101637903239369,0.06911077717181525,0.48776066801614365,0.5325669126317302,0.04391358762166953,mean_q_time_nurse
+40,0.6398790963369202,0.5134066729742615,0.07123539722742252,0.490624487688633,0.5361888582598899,0.04437454066120134,mean_q_time_nurse
+1,0.49958878609667684,0.49958878609667684,,,,,mean_nurse_utilisation
+2,0.5019774882864758,0.5007831371915763,,,,,mean_nurse_utilisation
+3,0.49809386117421955,0.49988671185245737,0.0019588797062941353,0.49502058490133,0.5047528388035847,0.009734459500022913,mean_nurse_utilisation
+4,0.4982487932220592,0.49947723219485785,0.0017968957016028138,0.4966179701515843,0.5023364942381313,0.0057245092648348355,mean_nurse_utilisation
+5,0.49693389991134934,0.4989685657381561,0.0019275200239417237,0.4965752335186108,0.5013618979577015,0.004796559109900422,mean_nurse_utilisation
+6,0.4928539383795759,0.4979494611783928,0.003033761918236169,0.49476572329843654,0.5011331990583491,0.006393696806944939,mean_nurse_utilisation
+7,0.5033858255951804,0.4987260846665053,0.0034484442545104858,0.4955368056594095,0.5019153636736011,0.006394851011710062,mean_nurse_utilisation
+8,0.5034113289416777,0.49931174020090185,0.0035967878792269982,0.4963047502832039,0.5023187301185997,0.006022269607536255,mean_nurse_utilisation
+9,0.5088177294470143,0.5003679612282477,0.004621709466701935,0.4968154008476054,0.50392052160889,0.007099895788535041,mean_nurse_utilisation
+10,0.49994369431171704,0.5003255345365946,0.004359454468057957,0.4972069686765276,0.5034441003966615,0.006233073558708982,mean_nurse_utilisation
+11,0.5097942141014932,0.501186323587949,0.005025424821187901,0.49781019725485304,0.5045624499210449,0.006736269874498095,mean_nurse_utilisation
+12,0.48962260282436343,0.5002226801909836,0.005839717311605493,0.49651230082862413,0.503933059553343,0.007417455284000405,mean_nurse_utilisation
+13,0.5033978686932535,0.500466925460389,0.0056600322314152305,0.4970466022143885,0.5038872487063893,0.006834264307984002,mean_nurse_utilisation
+14,0.4892236002933267,0.4996638308055988,0.006212979495790504,0.4960765632451358,0.5032510983660617,0.007179362081660522,mean_nurse_utilisation
+15,0.5131047752810153,0.5005598937706266,0.006920102913294844,0.49672766851151623,0.504392119029737,0.007655877561909585,mean_nurse_utilisation
+16,0.5009476110379539,0.5005841260998346,0.006686157192364971,0.4970213244225335,0.5041469277771358,0.007117288566578672,mean_nurse_utilisation
+17,0.48669127709236415,0.49976689968763044,0.007298236603839275,0.49601449267789494,0.503519306697366,0.007508314400335199,mean_nurse_utilisation
+18,0.4953608235769949,0.499522117681484,0.007156087901644342,0.4959634788891228,0.5030807564738452,0.007124086534703485,mean_nurse_utilisation
+19,0.4794301592441245,0.49846464618478087,0.008343338059131563,0.4944432859640797,0.502486006405482,0.008067493354805411,mean_nurse_utilisation
+20,0.4916972800926532,0.4981262778801745,0.008260593202462406,0.4942602012558477,0.5019923545045013,0.007761238055497196,mean_nurse_utilisation
+21,0.4926570184731424,0.4978658369560301,0.008139407166869865,0.49416082323390154,0.5015708506781587,0.007441791436787729,mean_nurse_utilisation
+22,0.4895340968911088,0.4974871214985337,0.008139443979842372,0.4938782942099475,0.5010959487871198,0.007254111981262222,mean_nurse_utilisation
+23,0.4893427907640909,0.4971330201622536,0.008131609812209985,0.4936166483910039,0.5006493919335033,0.007073301568465589,mean_nurse_utilisation
+24,0.5013717929835146,0.4973096356964728,0.00799979967396101,0.49393161655470574,0.5006876548382398,0.0067925873526182325,mean_nurse_utilisation
+25,0.4915750702731453,0.4970802530795397,0.007914901852179692,0.49381314216991157,0.5003473639891678,0.006572602491021388,mean_nurse_utilisation
+26,0.5060739795771849,0.4974261656371414,0.007953042651188475,0.49421386219171437,0.5006384690825685,0.006457849762109921,mean_nurse_utilisation
+27,0.5023111150028697,0.49760708968772394,0.007855059885121982,0.49449973148829673,0.5007144478871511,0.0062446019436283185,mean_nurse_utilisation
+28,0.4999763333861762,0.49769170553409725,0.0077212164434829565,0.4946977303526746,0.5006856807155199,0.006015722480666376,mean_nurse_utilisation
+29,0.49232621417270916,0.4975066885906011,0.007647267816605914,0.49459782348214554,0.5004155536990567,0.005846886434222975,mean_nurse_utilisation
+30,0.5046625973715585,0.4977452188832997,0.007626993512415558,0.4948972527007495,0.5005931850658499,0.005721734884645698,mean_nurse_utilisation
+31,0.4957265964121678,0.4976801020293922,0.0075075589939981,0.4949263071579969,0.5004338969007875,0.005533262953785293,mean_nurse_utilisation
+32,0.5035722335646021,0.49786423113986755,0.007458564167237306,0.49517513199766217,0.5005533302820729,0.0054012700130086945,mean_nurse_utilisation
+33,0.503455200045186,0.49803365444002873,0.007405334343055277,0.4954078370591477,0.5006594718209098,0.005272369361932795,mean_nurse_utilisation
+34,0.4996207391525908,0.4980803334021629,0.007297346985441026,0.4955341686694714,0.5006264981348544,0.005111955967624256,mean_nurse_utilisation
+35,0.4960667598321286,0.49802280272873334,0.007197284304297146,0.4955504490679206,0.5004951563895461,0.004964338273802657,mean_nurse_utilisation
+36,0.492980852621203,0.4978827485590797,0.007143320078197071,0.4954657967716168,0.5002997003465427,0.004854459798934277,mean_nurse_utilisation
+37,0.5081176936109076,0.4981593686956156,0.007241601781836921,0.49574489845153186,0.5005738389396994,0.0048467827683454905,mean_nurse_utilisation
+38,0.5021531975212685,0.4982644694541854,0.007172393741642748,0.49590696229548475,0.500621976612886,0.004731437425757259,mean_nurse_utilisation
+39,0.4998568861129943,0.4983053006505651,0.007081982984991065,0.49600958734090245,0.5006010139602277,0.004607041720538521,mean_nurse_utilisation
+40,0.5120208842516952,0.49864819024059337,0.0073192486945782165,0.4963073809463162,0.5009889995348705,0.004694310217285113,mean_nurse_utilisation
diff --git a/tests/test_backtest.py b/tests/test_backtest.py
index 0930cd5..0ca87b4 100644
--- a/tests/test_backtest.py
+++ b/tests/test_backtest.py
@@ -10,7 +10,6 @@
Typical usage example:
pytest
-
"""
from pathlib import Path
diff --git a/tests/test_backtest_replications.py b/tests/test_backtest_replications.py
new file mode 100644
index 0000000..20f02e7
--- /dev/null
+++ b/tests/test_backtest_replications.py
@@ -0,0 +1,74 @@
+"""Back Testing
+
+Back tests check that the model code produces results consistent with those
+generated historically/from prior code.
+
+Licence:
+ This project is licensed under the MIT Licence. See the LICENSE file for
+ more details.
+
+Typical usage example:
+
+ pytest
+"""
+
+from pathlib import Path
+
+import pandas as pd
+import pytest
+
+from simulation.model import Runner, Param
+from simulation.replications import (
+ confidence_interval_method, confidence_interval_method_simple,
+ ReplicationsAlgorithm)
+
+
+@pytest.mark.parametrize('ci_function', [
+ confidence_interval_method,
+ confidence_interval_method_simple
+])
+def test_cimethods(ci_function):
+ """
+ Check that results from the manual confidence interval methods are
+ consistent with those generated previously.
+
+ Arguments:
+ ci_function (function):
+ Function to run the manual confidence interval method.
+ """
+ # Run the confidence interval method
+ _, cumulative_df = ci_function(
+ replications=40, metrics=['mean_time_with_nurse',
+ 'mean_q_time_nurse',
+ 'mean_nurse_utilisation'])
+
+ # Import the expected results
+ exp_df = pd.read_csv(
+ Path(__file__).parent.joinpath('exp_results/replications.csv'))
+
+ # Compare them
+ pd.testing.assert_frame_equal(cumulative_df.reset_index(drop=True),
+ exp_df.reset_index(drop=True))
+
+
+def test_algorithm():
+ """
+ Check that the ReplicationsAlgorithm produces results consistent with those
+ previously generated.
+ """
+ # Import the expected results
+ exp_df = pd.read_csv(
+ Path(__file__).parent.joinpath('exp_results/replications.csv'))
+
+ # Run the algorithm, forcing only 40 replications
+ analyser = ReplicationsAlgorithm(initial_replications=40,
+ replication_budget=40,
+ look_ahead=0)
+ _, summary_table = analyser.select(
+ runner=Runner(Param()), metrics=['mean_time_with_nurse',
+ 'mean_q_time_nurse',
+ 'mean_nurse_utilisation'])
+
+ # Compare dataframes
+ pd.testing.assert_frame_equal(summary_table.reset_index(drop=True),
+ exp_df.reset_index(drop=True))
diff --git a/tests/test_functionaltest.py b/tests/test_functionaltest.py
index bf2a826..d62058f 100644
--- a/tests/test_functionaltest.py
+++ b/tests/test_functionaltest.py
@@ -248,7 +248,7 @@ def test_waiting_time_utilisation(param_name, initial_value, adjusted_value):
def helper_param(param_name, value):
"""
Helper function to set a specific parameter value, run the model,
- and return the waiting time.
+ and return the results from a single run.
Arguments:
param_name (string):
@@ -459,3 +459,110 @@ def process_task(env, resource, duration):
f'Expected queue time {expected_busy_time} but ' +
f'observed {resource.area_resource_busy}.'
)
+
+
+def test_extreme_interarrival():
+ """
+ Check that extremely high interarrival time results in no arrivals, and
+ that the model can cope with having no arrivals.
+ """
+ param = Param(patient_inter=99999999,
+ warm_up_period=0,
+ data_collection_period=10000)
+ experiment = Runner(param)
+ results = experiment.run_single(run=0)
+
+ assert results['run']['arrivals'] < 1, (
+ 'Expect no arrivals due to extreme interarrival time.'
+ )
+
+
+def test_extreme_nurses():
+ """
+ Check that extremely high number of nurses results in no wait times and all
+ patients being seen.
+ """
+ # Run model with extremely large number of nurses
+ param = Param(number_of_nurses=10_000_000)
+ experiment = Runner(param)
+ results = experiment.run_single(run=0)
+
+ # Check that no patients wait
+ assert results['run']['mean_q_time_nurse'] == 0, (
+ 'Expect no patients to wait but mean wait time is ' +
+ f'{results['run']['mean_q_time_nurse']:.2f}.'
+ )
+
+ # Check that all patients are seen
+ assert results['run']['count_unseen'] == 0, (
+ 'Expect all patients to be seen, but ' +
+ f'{results['run']['count_unseen']} were not seen by a nurse.'
+ )
+ assert np.isnan(results['run']['mean_q_time_unseen']), (
+ 'Expect all patients to be seen, and so have no mean wait time for ' +
+ f'unseen patients, but was {results['run']['mean_q_time_unseen']:.2f}.'
+ )
+
+
+def test_no_missing_values():
+ """
+ Some columns are expected to have some NaN - but for those that don't,
+ check that no missing values exist in the final output.
+ """
+ param = Param()
+ experiment = Runner(param)
+ experiment.run_reps()
+
+ # Define required columns we expect to have no missing values
+ req_patient = ['patient_id', 'arrival_time', 'run']
+ req_run = ['run_number', 'scenario', 'arrivals', 'mean_q_time_nurse',
+ 'mean_time_with_nurse', 'mean_nurse_utilisation',
+ 'mean_nurse_utilisation_tw', 'mean_nurse_q_length',
+ 'count_unseen']
+
+ # Check for missing values
+ res_patient = experiment.patient_results_df[req_patient].isnull().any()
+ assert not res_patient.any(), {
+ 'Found missing values in patient results in columns that we expect ' +
+ f'to have none - in the columns marked True: {res_patient}'
+ }
+ res_run = experiment.run_results_df[req_run].isnull().any()
+ assert not res_run.any(), {
+ 'Found missing values in run results in columns that we expect ' +
+ f'to have none - in the columns marked True: {res_run}'
+ }
+ res_interval = experiment.interval_audit_df.isnull().any()
+ assert not res_interval.any(), {
+ 'Found missing values in interval results - in the columns marked ' +
+ f'True: {res_interval}'
+ }
+ res_overall = experiment.overall_results_df.isnull().any()
+ assert not res_overall.any(), {
+ 'Found missing values in overall results - in the columns marked ' +
+ f'True: {res_overall}'
+ }
+
+
+def test_sampled_times():
+ """
+ Ensure that the mean of inter-arrival and consultation times are close to
+ expected value.
+ """
+ param = Param(patient_inter=5, mean_n_consult_time=8)
+ experiment = Runner(param)
+ results = experiment.run_single(run=0)
+
+ # Calculate the inter-arrival times between patients (from arrival times)
+ actual_interarrival = np.diff(sorted(results['patient']['arrival_time']))
+
+ # Check that the mean inter-arrival time is close to 5
+ observed_mean_iat = np.mean(actual_interarrival)
+ assert np.isclose(observed_mean_iat, 5, atol=0.5), (
+ f'Expected mean interarrival time ≈ 5, but got {observed_mean_iat}.'
+ )
+
+ # Check that the mean nurse consultation time is close to 8
+ observed_mean_nur = results['patient']['time_with_nurse'].mean()
+ assert np.isclose(observed_mean_nur, 8, atol=0.5), (
+ f"Expected mean consultation time ≈ 8, but got {observed_mean_nur}."
+ )
diff --git a/tests/test_functionaltest_replications.py b/tests/test_functionaltest_replications.py
new file mode 100644
index 0000000..7648bd9
--- /dev/null
+++ b/tests/test_functionaltest_replications.py
@@ -0,0 +1,160 @@
+"""Functional Testing for objects in replications.py
+
+Functional tests verify that the system or components perform their intended
+functionality.
+
+Licence:
+ This project is licensed under the MIT Licence. See the LICENSE file for
+ more details.
+
+Typical usage example:
+
+ pytest
+"""
+
+import pandas as pd
+import pytest
+
+from simulation.model import Param, Runner
+from simulation.replications import (
+ confidence_interval_method, confidence_interval_method_simple,
+ ReplicationsAlgorithm)
+
+
+@pytest.mark.parametrize('ci_function', [
+ confidence_interval_method,
+ confidence_interval_method_simple
+])
+def test_ci_method_output(ci_function):
+ """
+ Check that the output from confidence_interval_method and
+ confidence_interval_method_simple meets our expectations.
+
+ Arguments:
+ ci_function (function):
+ Function to run the confidence interval method.
+ """
+ # Create empty list to store errors (else if each were an assert
+ # statement, it would stop after the first)
+ errors = []
+
+ # Choose a number of replications to run for
+ reps = 20
+
+ # Run the confidence interval method
+ min_reps, cumulative_df = ci_function(
+ replications=reps, metrics=['mean_time_with_nurse'])
+
+ # Check that the results dataframe contains the right number of rows
+ if not len(cumulative_df) == reps:
+ errors.append(
+ f'Ran {reps} replications but cumulative_df only has ' +
+ f'{len(cumulative_df)} entries.')
+
+ # Check that the replications are appropriately numbered
+ if not min(cumulative_df['replications']) == 1:
+ errors.append(
+ 'Minimum replication in cumulative_df should be 1 but it is ' +
+ f'{min(cumulative_df['replications'])}.')
+
+ if not max(cumulative_df['replications']) == reps:
+ errors.append(
+ f'As we ran {reps} replications, the maximum replication in ' +
+ f'cumulative_df should be {reps} but it is ' +
+ f'{max(cumulative_df['replications'])}.')
+
+ # Check that min_reps is no more than the number run
+ if not min_reps['mean_time_with_nurse'] <= reps:
+ errors.append(
+ 'The minimum number of replications required as returned by the ' +
+ 'confidence_interval_method should be less than the number we ' +
+ f'ran ({reps}) but it was {min_reps['mean_time_with_nurse']}.')
+
+ # Check if there were any errors
+ assert not errors, 'Errors occurred:\n{}'.format('\n'.join(errors))
+
+
+@pytest.mark.parametrize('ci_function', [
+ confidence_interval_method,
+ confidence_interval_method_simple
+])
+def test_consistent_outputs(ci_function):
+ """
+ Check that the output cumulative statistics from the manual confidence
+ interval methods are consistent with those from the algorithm.
+
+ Arguments:
+ ci_function (function):
+ Function to run the manual confidence interval method.
+ """
+ # Choose a number of replications to run for
+ reps = 20
+
+ # Run the manual confidence interval method
+ _, man_df = ci_function(
+ replications=reps, metrics=['mean_time_with_nurse'])
+
+ # Run the algorithm
+ analyser = ReplicationsAlgorithm(initial_replications=reps,
+ replication_budget=reps)
+ _, summary_table = analyser.select(runner=Runner(Param()),
+ metrics=['mean_time_with_nurse'])
+ # Get first 20 rows (may have more if met precision and went into
+ # look ahead period beyond budget) and compare dataframes
+ pd.testing.assert_frame_equal(
+ man_df, summary_table.head(20))
+
+
+def test_algorithm_initial():
+ """
+ Check that the solution from the ReplicationsAlgorithm is as expected when
+ there is a high number of initial replications specified.
+ """
+ # Set parameters - inc. high number of initial replications & no look-ahead
+ # As model is currently designed, all would be solved before 200 reps
+ initial_replications = 200
+ look_ahead = 0
+ metrics = ['mean_time_with_nurse',
+ 'mean_q_time_nurse',
+ 'mean_nurse_utilisation']
+
+ # Set up algorithm class
+ analyser = ReplicationsAlgorithm(
+ initial_replications=initial_replications,
+ look_ahead=look_ahead)
+
+ # Run the algorithm
+ nreps, summary_table = analyser.select(
+ runner=Runner(Param()),
+ metrics=metrics)
+
+ # For each metric...
+ for metric in metrics:
+
+ # Check that nrow for each metric in summary table equals initial reps
+ nrows = len(summary_table[summary_table['metric'] == metric])
+ assert nrows == initial_replications
+
+ # Check that solution is equal to the initial replications
+ assert nreps[metric] == initial_replications
+
+
+def test_algorithm_nosolution():
+ """
+ Check that running for less than 3 replications in total will result
+ in no solution, and that a warning message is then created.
+ """
+ # Set up algorithm to run max of 2 replications
+ analyser = ReplicationsAlgorithm(
+ initial_replications=0, replication_budget=2, look_ahead=0)
+
+ # Run algorithm, checking that it produces a warning
+ with pytest.warns(UserWarning):
+ solutions, summary_table = analyser.select(
+ runner=Runner(Param()), metrics=['mean_time_with_nurse'])
+
+ # Check that there is no solution
+ assert solutions['mean_time_with_nurse'] is None
+
+ # Check that the summary tables has no more than 2 rows
+ assert len(summary_table) < 3
diff --git a/tests/test_unittest.py b/tests/test_unittest.py
index 9b0080f..b717e2f 100644
--- a/tests/test_unittest.py
+++ b/tests/test_unittest.py
@@ -4,6 +4,10 @@
components (e.g. methods, classes) and tests them in isolation to ensure they
work as intended.
+SimPy itself has lots of tests of the SimPy components themselves, as can view:
+https://gitlab.com/team-simpy/simpy/-/tree/master/tests?ref_type=heads.
+Hence, our focus here is testing components we have written ourselves.
+
Licence:
This project is licensed under the MIT Licence. See the LICENSE file for
more details.
@@ -43,6 +47,7 @@ def test_new_attribute():
('mean_n_consult_time', 0, 'positive'),
('number_of_runs', 0, 'positive'),
('audit_interval', 0, 'positive'),
+ ('number_of_nurses', 0, 'positive'),
('warm_up_period', -1, 'non_negative'),
('data_collection_period', -1, 'non_negative')
])
@@ -112,6 +117,19 @@ def test_exponentional():
)
+def test_invalid_exponential():
+ """
+ Ensure that Exponential distribution cannot be created with a negative
+ or zero mean.
+ """
+ # Negative mean
+ with pytest.raises(ValueError):
+ Exponential(mean=-5, random_seed=42)
+ # Zero mean
+ with pytest.raises(ValueError):
+ Exponential(mean=0, random_seed=42)
+
+
def test_log_to_console():
"""
Confirm that logger.log() prints the provided message to the console.
diff --git a/tests/test_unittest_replications.py b/tests/test_unittest_replications.py
new file mode 100644
index 0000000..3da0137
--- /dev/null
+++ b/tests/test_unittest_replications.py
@@ -0,0 +1,184 @@
+"""Unit Testing for objects in replications.py
+
+Unit tests are a type of functional testing that focuses on individual
+components (e.g. methods, classes) and tests them in isolation to ensure they
+work as intended.
+
+Licence:
+ This project is licensed under the MIT Licence. See the LICENSE file for
+ more details.
+
+Typical usage example:
+
+ pytest
+"""
+
+from unittest.mock import MagicMock
+
+import numpy as np
+import pandas as pd
+import pytest
+import scipy.stats as st
+
+from simulation.replications import (
+ ReplicationsAlgorithm, OnlineStatistics, ReplicationTabulizer)
+
+
+# pylint: disable=protected-access
+@pytest.mark.parametrize('look_ahead, n, exp', [
+ (100, 100, 100),
+ (100, 101, 101),
+ (0, 500, 0)
+])
+def test_klimit(look_ahead, n, exp):
+ """
+ Check that the _klimit() calculations are as expected.
+
+ Arguments:
+ look_ahead (int):
+ Minimum additional replications to look ahead to assess stability
+ of precision.
+ n (int):
+ Number of replications that would already be completed.
+ exp (int):
+ Expected number of replications for _klimit() to return.
+ """
+ # Calculate additional replications that would be required
+ calc = ReplicationsAlgorithm(
+ look_ahead=100, initial_replications=100)._klimit()
+ # Check that this meets our expected value
+ assert calc == 100, (
+ f'With look_ahead {look_ahead} and n={n}, the additional ' +
+ f'replications required should be {exp} but _klimit() returned {calc}.'
+ )
+
+
+@pytest.mark.parametrize('arg, value', [
+ ('initial_replications', -1),
+ ('initial_replications', 0.5),
+ ('look_ahead', -1),
+ ('look_ahead', 0.5),
+ ('half_width_precision', 0)
+])
+def test_algorithm_invalid(arg, value):
+ """
+ Ensure that ReplicationsAlgorithm responds appropriately to invalid inputs.
+ """
+ with pytest.raises(ValueError):
+ ReplicationsAlgorithm(**{arg: value})
+
+
+def test_algorithm_invalid_budget():
+ """
+ Ensure that ReplicationsAlgorithm responds appropriately when
+ replication_budget is less than initial_replications.
+ """
+ with pytest.raises(ValueError):
+ ReplicationsAlgorithm(initial_replications=10,
+ replication_budget=9)
+
+
+def test_onlinestat_data():
+ """
+ Check that OnlineStatistics will fail if an invalid data type is provided.
+ """
+ with pytest.raises(ValueError):
+ OnlineStatistics(data=pd.Series([9, 2, 3]))
+
+
+def test_onlinestat_computations():
+ """
+ Feed three values into OnlineStatistics and verify mean, standard
+ deviation, confidence intervals, and deviation calculations.
+ """
+ values = [10, 20, 30]
+
+ # Provide three values
+ stats = OnlineStatistics(data=np.array(values), alpha=0.05, observer=None)
+
+ # Expected values
+ expected_mean = np.mean(values)
+ expected_std = np.std(values, ddof=1)
+ expected_lci, expected_uci = st.t.interval(
+ confidence=0.95,
+ df=len(values)-1,
+ loc=np.mean(values),
+ scale=st.sem(values)
+ )
+ expected_dev = (expected_uci - expected_mean) / expected_mean
+
+ # Assertions
+ assert np.isclose(stats.mean, expected_mean), (
+ f'Expected mean {expected_mean}, got {stats.mean}')
+ assert np.isclose(stats.std, expected_std), (
+ f'Expected std dev {expected_std}, got {stats.std}')
+ assert np.isclose(stats.lci, expected_lci), (
+ f'Expected lower confidence interval {expected_lci}, got {stats.lci}')
+ assert np.isclose(stats.uci, expected_uci), (
+ f'Expected upper confidence interval {expected_uci}, got {stats.uci}')
+ assert np.isclose(stats.deviation, expected_dev), (
+ f'Expected deviation {expected_dev}, got {stats.deviation}')
+
+
+def test_tabulizer_initial_state():
+ """
+ Test that ReplicationTabulizer initializes with empty lists and n = 0.
+ """
+ tab = ReplicationTabulizer()
+ assert tab.n == 0
+ # Checks for empty lists (equivalent to len(tab.x_i)==0)
+ assert not tab.x_i
+ assert not tab.cumulative_mean
+ assert not tab.stdev
+ assert not tab.lower
+ assert not tab.upper
+ assert not tab.dev
+
+
+def test_tabulizer_update():
+ """
+ Test that update correctly appends new statistical data.
+ """
+ tab = ReplicationTabulizer()
+ mock_results = MagicMock()
+ mock_results.x_i = 10
+ mock_results.mean = 5.5
+ mock_results.std = 1.2
+ mock_results.lci = 4.8
+ mock_results.uci = 6.2
+ mock_results.deviation = 0.1
+
+ tab.update(mock_results)
+
+ assert tab.n == 1
+ assert tab.x_i == [10]
+ assert tab.cumulative_mean == [5.5]
+ assert tab.stdev == [1.2]
+ assert tab.lower == [4.8]
+ assert tab.upper == [6.2]
+ assert tab.dev == [0.1]
+
+
+def test_tabulizer_summary_table():
+ """
+ Test that summary_table returns a properly formatted DataFrame.
+ """
+ tab = ReplicationTabulizer()
+ tab.n = 3
+ tab.x_i = [10, 20, 30]
+ tab.cumulative_mean = [5, 10, 15]
+ tab.stdev = [1, 2, 3]
+ tab.lower = [3, 8, 13]
+ tab.upper = [7, 12, 17]
+ tab.dev = [0.1, 0.2, 0.3]
+
+ df = tab.summary_table()
+
+ assert isinstance(df, pd.DataFrame)
+ assert len(df) == 3
+ assert df['data'].tolist() == [10, 20, 30]
+ assert df['cumulative_mean'].tolist() == [5, 10, 15]
+ assert df['stdev'].tolist() == [1, 2, 3]
+ assert df['lower_ci'].tolist() == [3, 8, 13]
+ assert df['upper_ci'].tolist() == [7, 12, 17]
+ assert df['deviation'].tolist() == [0.1, 0.2, 0.3]