diff --git a/Pipfile b/Pipfile index 467d552..18a48bd 100644 --- a/Pipfile +++ b/Pipfile @@ -10,6 +10,8 @@ matplotlib = "*" pexpect = "*" seaborn = "*" tabulate = "*" +numpy = "*" +asttokens = "*" [dev-packages] black = "*" diff --git a/notebooks/assurance.ipynb b/notebooks/assurance.ipynb new file mode 100644 index 0000000..34ca9ac --- /dev/null +++ b/notebooks/assurance.ipynb @@ -0,0 +1,204 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reliable Statistics: Assurance\n", + "\n", + "Assurance provides optimal reliability and confidence levels given number of\n", + "samples and number of failures.\n", + "\n", + "### Setup\n", + "\n", + "- Local use: see instructions in `README.md` in the project's root directory.\n", + "- [Google colab](https://colab.research.google.com/github/sanjaymjoshi/relistats/blob/main/notebooks/assurance.ipynb) use: Execute the code block below for set up." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install relistats\n", + "%pip install tabulate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Assurance 75.2% at 10 samples with 1 failures\n" + ] + } + ], + "source": [ + "#@title Assurance {vertical-output: true}\n", + "from relistats.binomial import assurance\n", + "\n", + "num_samples = 10 #@param {type:\"integer\"}\n", + "num_failures = 1 #@param {type:\"integer\"}\n", + "assurance_value = assurance(num_samples, num_failures)\n", + "\n", + "print(f\"Assurance {assurance_value*100:.1f}% at {num_samples} samples with {num_failures} failures\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Assurance below desired level at 0 failures: 96.7%\n" + ] + }, + { + "ename": "AssertionError", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAssertionError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[19], line 12\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m aa0 \u001b[38;5;241m<\u001b[39m assurance_pct \u001b[38;5;241m/\u001b[39m \u001b[38;5;241m100\u001b[39m:\n\u001b[0;32m 11\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAssurance below desired level at 0 failures: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00maa0\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m100\u001b[39m\u001b[38;5;132;01m:\u001b[39;00m\u001b[38;5;124m.1f\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m---> 12\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m 14\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m assurance(num_samples, num_failures) \u001b[38;5;241m>\u001b[39m assurance_pct \u001b[38;5;241m/\u001b[39m \u001b[38;5;241m100\u001b[39m:\n\u001b[0;32m 15\u001b[0m num_failures \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n", + "\u001b[1;31mAssertionError\u001b[0m: " + ] + } + ], + "source": [ + "# @title Maximum Tolerable Failures {vertical-output: true}\n", + "from relistats.binomial import assurance\n", + "\n", + "assurance_pct = 99 # @param {type:\"integer\"}\n", + "num_samples = 100 # @param {type:\"integer\"}\n", + "\n", + "# Start with 0 failures and increase until we reach the desired assurance\n", + "num_failures = 0\n", + "aa0 = assurance(num_samples, 0)\n", + "if aa0 < assurance_pct / 100:\n", + " print(f\"Assurance below desired level at 0 failures: {aa0*100:.1f}%\")\n", + " assert False\n", + "\n", + "while assurance(num_samples, num_failures) > assurance_pct / 100:\n", + " num_failures += 1\n", + "aa1 = assurance(num_samples, num_failures-1)\n", + "print(\n", + " f\"{num_failures} failures out of {num_samples} yield assurance {aa1*100:.1f}% >= {assurance_pct}%\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Minimum 152 samples needed to tolerate 10 failures for assurance 90.1% >= 90%\n" + ] + } + ], + "source": [ + "# @title Minimum Samples Needed {vertical-output: true}\n", + "from relistats.binomial import assurance\n", + "\n", + "assurance_pct = 90 # @param {type:\"integer\"}\n", + "num_failures = 10 # @param {type:\"integer\"}\n", + "\n", + "# Start with 0 failures and increase until we reach the desired assurance\n", + "num_samples = num_failures + 1\n", + "\n", + "while assurance(num_samples, num_failures) < assurance_pct / 100:\n", + " num_samples += 1\n", + "\n", + "aa1 = assurance(num_samples, num_failures)\n", + "print(\n", + " f\"Minimum {num_samples} samples needed to tolerate {num_failures} failures for assurance {aa1*100:.1f}% >= {assurance_pct}%\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Assurance levels with 2 failure(s).\n", + "Starting number of samples for each row is in first column.\n", + " n % % % % %\n", + "--- ---- ---- ---- ---- ----\n", + " 10 67.7 69.6 71.3 72.9 74.2\n", + " 15 75.4 76.4 77.4 78.3 79.1\n", + " 20 79.8 80.6 81.2 81.8 82.3\n", + " 25 82.8 83.3 83.8 84.2 84.6\n", + " 30 85.0 85.3 85.6 86.0 86.3\n", + " 35 86.6 86.9 87.2 87.4 87.6\n", + " 40 87.9 88.1 88.3 88.5 88.7\n", + " 45 88.9 89.1 89.3 89.4 89.6\n" + ] + } + ], + "source": [ + "#@title Assurance Table {vertical-output: true}\n", + "from relistats.binomial import assurance\n", + "from tabulate import tabulate\n", + "table_format = \"simple\" # for notebook\n", + "\n", + "n_start = 10 #@param {type:\"integer\"}\n", + "n_end = 50 #@param {type:\"integer\"}\n", + "n_step = 1 #@param {type:\"integer\"}\n", + "n_per_row = 5 #@param {type:\"integer\"}\n", + "\n", + "num_failures = 2 #@param {type:\"integer\"}\n", + "\n", + "all_a = []\n", + "all_n_row_starts = range(n_start, n_end, n_per_row)\n", + "for n_row_start in all_n_row_starts:\n", + " nn = range(n_row_start, n_row_start+n_step*n_per_row, n_step)\n", + " # assurance may return None, so provide default of 0\n", + " aa = [(assurance(n, num_failures) or 0)*100 for n in nn]\n", + " all_a.append(aa)\n", + "\n", + "nh = range(n_start, n_start+n_step*n_per_row, n_step)\n", + "headers = [\"n\"] + [\"%\" for _ in nh]\n", + "print(f\"Assurance levels with {num_failures} failure(s).\")\n", + "print(\"Starting number of samples for each row is in first column.\")\n", + "print(tabulate(all_a, tablefmt=table_format, headers=headers, floatfmt=\".1f\", showindex=all_n_row_starts))\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}