Merge pull request #203 from leaf-ai/prepend-past-to-presc

EKMeyerson · web-flow · commit 1a9bc7d77109 · 2021-02-11T16:34:31.000-08:00
Prepend past ips to prescriptions
diff --git a/covid_xprize/scoring/prescriptor_scoring.py b/covid_xprize/scoring/prescriptor_scoring.py
@@ -16,7 +16,7 @@ def weight_prescriptions_by_cost(pres_df, cost_df):
     return weighted_df
 
 
-def generate_cases_and_stringency_for_prescriptions(start_date, end_date, prescription_file, costs_file):
+def generate_cases_and_stringency_for_prescriptions(start_date, end_date, prescription_file, costs_file, past_ips_file=None):
     start_time = time.time()
     # Load the prescriptions, handling Date and regions
     pres_df = XPrizePredictor.load_original_data(prescription_file)
@@ -27,6 +27,14 @@ def generate_cases_and_stringency_for_prescriptions(start_date, end_date, prescr
     for idx in pres_df['PrescriptionIndex'].unique():
         idx_df = pres_df[pres_df['PrescriptionIndex'] == idx]
         idx_df = idx_df.drop(columns='PrescriptionIndex')  # Predictor doesn't need this
+
+        # Prepend past ips if provided. This is used to handle the case when
+        # prescriptions start after predictor's dataset ends.
+        if past_ips_file:
+            past_ips_df = XPrizePredictor.load_original_data(past_ips_file)
+            past_ips_df = past_ips_df[past_ips_df['Date'] < start_date]
+            idx_df = past_ips_df.append(idx_df)
+
         # Generate the predictions
         pred_df = predictor.predict_from_df(start_date, end_date, idx_df)
         print(f"Generated predictions for PrescriptionIndex {idx}")
diff --git a/prescriptor_robojudge.ipynb b/prescriptor_robojudge.ipynb
@@ -66,7 +66,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "scrolled": false
+   },
    "outputs": [],
    "source": [
     "from covid_xprize.scoring.predictor_scoring import load_dataset\n",
@@ -81,7 +83,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "scrolled": false
+   },
    "outputs": [],
    "source": [
     "IP_FILE = \"prescriptions/robojudge_test_scenario.csv\"\n",
@@ -100,7 +104,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "scrolled": false
+   },
    "outputs": [],
    "source": [
     "# Cost weightings for each IP for each geo\n",
@@ -128,7 +134,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "scrolled": false
+   },
    "outputs": [],
    "source": [
     "# Generate blind_greedy prescriptions\n",
@@ -159,7 +167,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "scrolled": false
+   },
    "outputs": [],
    "source": [
     "# Validate the prescription files\n",
@@ -183,15 +193,15 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "scrolled": false
+    "scrolled": true
    },
    "outputs": [],
    "source": [
     "# Collect case and stringency data for all prescriptors\n",
     "dfs = []\n",
     "for prescriptor_name, prescription_file in sorted(prescription_files.items()):\n",
     "    print(\"Generating predictions for\", prescriptor_name)\n",
-    "    df, _ = generate_cases_and_stringency_for_prescriptions(START_DATE, END_DATE, prescription_file, TEST_COST)\n",
+    "    df, preds = generate_cases_and_stringency_for_prescriptions(START_DATE, END_DATE, prescription_file, TEST_COST, IP_FILE)\n",
     "    df['PrescriptorName'] = prescriptor_name\n",
     "    dfs.append(df)\n",
     "df = pd.concat(dfs)"
@@ -200,7 +210,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "scrolled": false
+   },
    "outputs": [],
    "source": [
     "df[df['CountryName'] == 'Afghanistan']"
@@ -232,7 +244,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "scrolled": false
+   },
    "outputs": [],
    "source": [
     "def plot_pareto_curve(objective1_list, objective2_list):\n",
@@ -321,7 +335,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "scrolled": false
+   },
    "outputs": [],
    "source": [
     "# Plot stringency and cases of each prescription for a particular country\n",
@@ -347,7 +363,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "scrolled": false
+   },
    "outputs": [],
    "source": []
   }