Time Series QA: Add CI configuration

amotl · amotl · commit c7c0fbe4e563 · 2024-03-19T20:53:01.000+01:00
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -97,6 +97,11 @@ updates:
     schedule:
       interval: "weekly"
 
+  - directory: "/topic/timeseries"
+    package-ecosystem: "pip"
+    schedule:
+      interval: "weekly"
+
   # Testing.
 
   - directory: "/testing/testcontainers/java"
diff --git a/.github/workflows/timeseries.yml b/.github/workflows/timeseries.yml
@@ -0,0 +1,77 @@
+name: Time Series
+
+on:
+  pull_request:
+    branches: ~
+    paths:
+    - '.github/workflows/timeseries.yml'
+    - 'topic/timeseries/**'
+    - 'requirements.txt'
+  push:
+    branches: [ main ]
+    paths:
+    - '.github/workflows/timeseries.yml'
+    - 'topic/timeseries/**'
+    - 'requirements.txt'
+
+  # Allow job to be triggered manually.
+  workflow_dispatch:
+
+  # Run job each night after CrateDB nightly has been published.
+  schedule:
+    - cron: '0 3 * * *'
+
+# Cancel in-progress jobs when pushing to the same branch.
+concurrency:
+  cancel-in-progress: true
+  group: ${{ github.workflow }}-${{ github.ref }}
+
+jobs:
+  test:
+    name: "
+     Python: ${{ matrix.python-version }}
+     CrateDB: ${{ matrix.cratedb-version }}
+     on ${{ matrix.os }}"
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ 'ubuntu-latest' ]
+        python-version: [ '3.11' ]
+        cratedb-version: [ 'nightly' ]
+
+    services:
+      cratedb:
+        image: crate/crate:${{ matrix.cratedb-version }}
+        ports:
+          - 4200:4200
+          - 5432:5432
+        env:
+          CRATE_HEAP_SIZE: 4g
+
+    env:
+      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+
+    steps:
+
+      - name: Acquire sources
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          architecture: x64
+          cache: 'pip'
+          cache-dependency-path: |
+            requirements.txt
+            topic/timeseries/requirements.txt
+            topic/timeseries/requirements-dev.txt
+
+      - name: Install utilities
+        run: |
+          pip install -r requirements.txt
+
+      - name: Validate topic/timeseries
+        run: |
+          ngr test --accept-no-venv topic/timeseries
diff --git a/topic/timeseries/README.md b/topic/timeseries/README.md
@@ -48,4 +48,28 @@ repository, e.g. about machine learning, to see predictions and AutoML in action
   To ensure the dashboard functions correctly, it's necessary to configure the data source within Grafana. This dashboard uses the `grafana-postgresql-datasource` or another configured default data source. In the data source settings, fill in the necessary parameters to connect to your CrateDB instance. This includes setting up the database name (`database=doc`), user, password, and host.
 
 
+## Software Tests
+
+For running the software tests, install a development sandbox in this
+folder, also satisfying all the dependencies.
+```console
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt -r requirements-dev.txt
+```
+
+Then, invoke the software tests, roughly validating all notebooks within
+this folder, by running them to completion.
+```console
+time pytest
+```
+
+In order to run tests for individual notebooks by name, use the
+`-k` option for selecting by name fragment.
+```console
+time pytest -k explo
+time pytest -k visu
+```
+
+
 [CrateDB]: https://github.com/crate/crate
diff --git a/topic/timeseries/conftest.py b/topic/timeseries/conftest.py
@@ -0,0 +1,35 @@
+import os
+from pathlib import Path
+
+import pytest
+import sqlalchemy as sa
+from pueblo.testing.notebook import generate_tests
+
+
+def pytest_generate_tests(metafunc):
+    """
+    Generate pytest test case per Jupyter Notebook.
+    """
+    here = Path(__file__).parent
+    generate_tests(metafunc, path=here)
+
+
+@pytest.fixture(autouse=True)
+def reset_database_tables():
+    """
+    Before running a test case, reset relevant tables in database.
+    """
+
+    connection_string = os.environ.get("CRATEDB_CONNECTION_STRING")
+
+    engine = sa.create_engine(connection_string, echo=os.environ.get("DEBUG"))
+    connection = engine.connect()
+
+    reset_tables = [
+        "cities",
+        "weather_data",
+        "weather_stations",
+    ]
+
+    for table in reset_tables:
+        connection.execute(sa.text(f"DROP TABLE IF EXISTS {table};"))
diff --git a/topic/timeseries/pyproject.toml b/topic/timeseries/pyproject.toml
@@ -0,0 +1,30 @@
+[tool.pytest.ini_options]
+minversion = "2.0"
+addopts = """
+  -rfEX -p pytester --strict-markers --verbosity=3 --capture=no
+  """
+env = [
+    "CRATEDB_CONNECTION_STRING=crate://crate@localhost/?schema=notebook",
+    "PYDEVD_DISABLE_FILE_VALIDATION=1",
+]
+
+log_level = "DEBUG"
+log_cli_level = "DEBUG"
+
+testpaths = [
+    "*.py",
+]
+xfail_strict = true
+markers = [
+]
+
+[tool.coverage.run]
+branch = false
+
+[tool.coverage.report]
+fail_under = 0
+show_missing = true
+omit = [
+    "conftest.py",
+    "test*.py",
+]
diff --git a/topic/timeseries/requirements-dev.txt b/topic/timeseries/requirements-dev.txt
@@ -0,0 +1,5 @@
+# Real.
+# pueblo[notebook,testing]>=0.0.7
+
+# Development.
+pueblo[notebook,testing] @ git+https://github.com/pyveci/pueblo.git@amo/testbook
diff --git a/topic/timeseries/requirements.txt b/topic/timeseries/requirements.txt
@@ -1,3 +1,6 @@
-pycaret>=3.0
-refinitiv-data
 crate[sqlalchemy]==0.34.0
+refinitiv-data<1.7
+pandas<2
+pycaret>=3.0,<3.4
+pydantic<2
+sqlalchemy<2
diff --git a/topic/timeseries/test.py b/topic/timeseries/test.py
@@ -0,0 +1,12 @@
+import pytest
+from testbook import testbook
+
+
+def test_notebook(notebook):
+    """
+    Execute Jupyter Notebook, one test case per .ipynb file.
+    """
+    if notebook.name == "dask-weather-data-import.ipynb":
+        raise pytest.skip("Depends on DOWNLOAD_PATH/daily_weather.parquet")
+    with testbook(notebook) as tb:
+        tb.execute()