fix warning in statsforecast

khuyentran1401 · khuyentran1401 · commit 97b37889010b · 2024-06-03T19:05:13.000-05:00
diff --git a/Chapter5/time_series.ipynb b/Chapter5/time_series.ipynb
@@ -2271,20 +2271,6 @@
     "!pip install statsforecast pyspark\n"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "da891a0f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os \n",
-    "\n",
-    "# this makes it so that the outputs of the predict methods have the id as a column \n",
-    "# instead of as the index\n",
-    "os.environ['NIXTLA_ID_AS_COL'] = '1'"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "3d8c3905",
@@ -2304,15 +2290,26 @@
    "source": [
     "from pyspark.sql import SparkSession\n",
     "\n",
-    "spark = SparkSession.builder.getOrCreate()\n"
+    "spark = SparkSession.builder.config(\n",
+    "    \"spark.executorEnv.NIXTLA_ID_AS_COL\", \"1\"\n",
+    ").getOrCreate()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 2,
    "id": "87bfec5a",
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/site-packages/statsforecast/core.py:27: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n",
+      "  from tqdm.autonotebook import tqdm\n",
+      "                                                                                \r"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -2349,20 +2346,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 3,
    "id": "be012038",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/site-packages/statsforecast/core.py:485: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.\n",
-      "  warnings.warn(\n",
-      "/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/site-packages/statsforecast/core.py:485: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.\n",
-      "  warnings.warn(\n",
-      "/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/site-packages/statsforecast/core.py:485: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.\n",
-      "  warnings.warn(\n"
+      "[Stage 3:>                                                          (0 + 1) / 1]\r"
      ]
     },
     {
@@ -2386,10 +2378,9 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/site-packages/statsforecast/core.py:485: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.\n",
-      "  warnings.warn(\n",
       "/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown\n",
-      "  warnings.warn('resource_tracker: There appear to be %d '\n"
+      "  warnings.warn('resource_tracker: There appear to be %d '\n",
+      "                                                                                \r"
      ]
     }
    ],
diff --git a/docs/Chapter5/time_series.html b/docs/Chapter5/time_series.html
@@ -1882,24 +1882,15 @@ <h2><span class="section-number">6.7.16. </span>Scaling Time-Series Forecasting
 </div>
 </details>
 </div>
-<div class="cell docutils container">
-<div class="cell_input docutils container">
-<div class="highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">os</span> 
-
-<span class="c1"># this makes it so that the outputs of the predict methods have the id as a column </span>
-<span class="c1"># instead of as the index</span>
-<span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">&#39;NIXTLA_ID_AS_COL&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;1&#39;</span>
-</pre></div>
-</div>
-</div>
-</div>
 <p>Traditional time series libraries are typically built to run in-memory on single machines, which poses challenges when handling extremely large datasets.</p>
 <p>StatsForecast, however, provides seamless compatibility with Spark, allowing users to perform scalable and efficient time-series forecasting on large datasets directly within Spark.</p>
 <div class="cell docutils container">
 <div class="cell_input docutils container">
 <div class="highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span>
 
-<span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span>
+<span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">config</span><span class="p">(</span>
+    <span class="s2">&quot;spark.executorEnv.NIXTLA_ID_AS_COL&quot;</span><span class="p">,</span> <span class="s2">&quot;1&quot;</span>
+<span class="p">)</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span>
 </pre></div>
 </div>
 </div>
@@ -1923,6 +1914,11 @@ <h2><span class="section-number">6.7.16. </span>Scaling Time-Series Forecasting
 </div>
 </div>
 <div class="cell_output docutils container">
+<div class="output stderr highlight-myst-ansi notranslate"><div class="highlight"><pre><span></span>/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/site-packages/statsforecast/core.py:27: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)
+  from tqdm.autonotebook import tqdm
+                                                                                
+</pre></div>
+</div>
 <div class="output stream highlight-myst-ansi notranslate"><div class="highlight"><pre><span></span>+---------+-------------------+-------------------+
 |unique_id|                 ds|                  y|
 +---------+-------------------+-------------------+
@@ -1947,12 +1943,7 @@ <h2><span class="section-number">6.7.16. </span>Scaling Time-Series Forecasting
 </div>
 </div>
 <div class="cell_output docutils container">
-<div class="output stderr highlight-myst-ansi notranslate"><div class="highlight"><pre><span></span>/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/site-packages/statsforecast/core.py:485: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.
-  warnings.warn(
-/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/site-packages/statsforecast/core.py:485: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.
-  warnings.warn(
-/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/site-packages/statsforecast/core.py:485: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.
-  warnings.warn(
+<div class="output stderr highlight-myst-ansi notranslate"><div class="highlight"><pre><span></span>[Stage 3:&gt;                                                          (0 + 1) / 1]
 </pre></div>
 </div>
 <div class="output stream highlight-myst-ansi notranslate"><div class="highlight"><pre><span></span>+---------+-------------------+----------+-------------+-------------+
@@ -1967,10 +1958,9 @@ <h2><span class="section-number">6.7.16. </span>Scaling Time-Series Forecasting
 only showing top 5 rows
 </pre></div>
 </div>
-<div class="output stderr highlight-myst-ansi notranslate"><div class="highlight"><pre><span></span>/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/site-packages/statsforecast/core.py:485: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.
-  warnings.warn(
-/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown
+<div class="output stderr highlight-myst-ansi notranslate"><div class="highlight"><pre><span></span>/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown
   warnings.warn(&#39;resource_tracker: There appear to be %d &#39;
+                                                                                
 </pre></div>
 </div>
 </div>
diff --git a/docs/_sources/Chapter5/time_series.ipynb b/docs/_sources/Chapter5/time_series.ipynb
@@ -2271,20 +2271,6 @@
     "!pip install statsforecast pyspark\n"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "da891a0f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os \n",
-    "\n",
-    "# this makes it so that the outputs of the predict methods have the id as a column \n",
-    "# instead of as the index\n",
-    "os.environ['NIXTLA_ID_AS_COL'] = '1'"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "3d8c3905",
@@ -2304,15 +2290,26 @@
    "source": [
     "from pyspark.sql import SparkSession\n",
     "\n",
-    "spark = SparkSession.builder.getOrCreate()\n"
+    "spark = SparkSession.builder.config(\n",
+    "    \"spark.executorEnv.NIXTLA_ID_AS_COL\", \"1\"\n",
+    ").getOrCreate()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 2,
    "id": "87bfec5a",
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/site-packages/statsforecast/core.py:27: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n",
+      "  from tqdm.autonotebook import tqdm\n",
+      "                                                                                \r"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -2349,20 +2346,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 3,
    "id": "be012038",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/site-packages/statsforecast/core.py:485: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.\n",
-      "  warnings.warn(\n",
-      "/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/site-packages/statsforecast/core.py:485: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.\n",
-      "  warnings.warn(\n",
-      "/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/site-packages/statsforecast/core.py:485: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.\n",
-      "  warnings.warn(\n"
+      "[Stage 3:>                                                          (0 + 1) / 1]\r"
      ]
     },
     {
@@ -2386,10 +2378,9 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/site-packages/statsforecast/core.py:485: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.\n",
-      "  warnings.warn(\n",
       "/Users/khuyentran/.pyenv/versions/3.8.16/lib/python3.8/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown\n",
-      "  warnings.warn('resource_tracker: There appear to be %d '\n"
+      "  warnings.warn('resource_tracker: There appear to be %d '\n",
+      "                                                                                \r"
      ]
     }
    ],
diff --git a/docs/searchindex.js b/docs/searchindex.js