ML: Use testbook instead of pytest-notebook

amotl · amotl · commit 8328f99d8c04 · 2024-04-19T10:53:08.000+02:00
testbook, while the most recent release was in 2021 already, provides are more convenient interface, effectively only running the notebooks to completion, not bothering about in-detail cell comparison. https://pypi.org/project/testbook/ `cratedb_rag_customer_support_langchain.ipynb` needs this, because the bottom half, where it connects to Jina API, needs to be masked.
diff --git a/topic/machine-learning/automl/pyproject.toml b/topic/machine-learning/automl/pyproject.toml
@@ -19,38 +19,6 @@ xfail_strict = true
 markers = [
 ]
 
-# pytest-notebook settings
-nb_test_files = true
-nb_coverage = false
-# Default cell timeout is 120 seconds. For heavy computing, it needs to be increased.
-nb_exec_timeout = 240
-nb_diff_replace = [
-    # Compensate output of `crash`.
-    '"/cells/*/outputs/*/text" "\(\d.\d+ sec\)" "(0.000 sec)"',
-    # Compensate other outputs.
-    '"/cells/*/outputs/*/data/text/html" "T_....." "T_na"',
-    '"/cells/*/outputs/*/data/text/plain" "IPython.core.display.HTML object" "pandas.io.formats.style.Styler"',
-    '"/cells/*/outputs/*/data/text/plain" "pandas.io.formats.style.Styler at 0x.+" "pandas.io.formats.style.Styler"',
-    '"/cells/*/outputs/*/data/application/vnd.jupyter.widget-view+json" "model_id: .+" "model_id: na"',
-    '"/cells/*/outputs/*/data/text/html" "\>\d+\.\d+\<\/td\>" "0.3333"',
-]
-# `vector_search.py` does not include any output(s).
-nb_diff_ignore = [
-    "/metadata/language_info",
-    "/metadata/widgets",
-    "/cells/*/execution_count",
-    "/cells/*/outputs/*/execution_count",
-    "/cells/*/outputs/*/metadata/nbreg",
-    # Ignore images.
-    "/cells/*/outputs/*/data/image/png",
-    # Ignore all cell output. It is too tedious to compare and maintain.
-    # The validation hereby extends exclusively to the _execution_ of notebook cells,
-    # able to catch syntax errors, module import flaws, and runtime errors.
-    # However, the validation will not catch any regressions on actual cell output,
-    # or whether any output is produced at all.
-    "/cells/*/outputs",
-]
-
 [tool.coverage.run]
 branch = false
 
diff --git a/topic/machine-learning/automl/requirements.txt b/topic/machine-learning/automl/requirements.txt
@@ -5,6 +5,7 @@ plotly<5.21
 pycaret[models,parallel,test]==3.3.1
 pydantic<2
 python-dotenv<2
+sqlalchemy==2.*
 
 # Development.
 # mlflow-cratedb @ git+https://github.com/crate-workbench/mlflow-cratedb.git@main
diff --git a/topic/machine-learning/automl/test.py b/topic/machine-learning/automl/test.py
@@ -22,8 +22,10 @@
 
 import pytest
 from cratedb_toolkit.util import DatabaseAdapter
-from pueblo.testing.folder import str_list, list_notebooks, list_python_files
-from pueblo.testing.snippet import pytest_notebook, pytest_module_function
+from pueblo.testing.folder import str_list, list_python_files
+from pueblo.testing.notebook import generate_tests
+from pueblo.testing.snippet import pytest_module_function
+from testbook import testbook
 
 HERE = Path(__file__).parent
 
@@ -57,15 +59,20 @@ def churn_dataset(cratedb):
     cratedb.run_sql("REFRESH TABLE pycaret_churn;")
 
 
-@pytest.mark.parametrize("notebook", str_list(list_notebooks(HERE)))
-def test_notebook(request, notebook: str):
+def pytest_generate_tests(metafunc):
     """
-    From individual Jupyter Notebook file, collect cells as pytest
-    test cases, and run them.
+    Generate pytest test case per Jupyter Notebook.
+    """
+    here = Path(__file__).parent
+    generate_tests(metafunc, path=here)
+
 
-    Not using `NBRegressionFixture`, because it would manually need to be configured.
+def test_notebook(notebook):
+    """
+    Execute Jupyter Notebook, one test case per .ipynb file.
     """
-    pytest_notebook(request=request, filepath=notebook)
+    with testbook(notebook) as tb:
+        tb.execute()
 
 
 @pytest.mark.parametrize("pyfile", str_list(list_python_files(HERE)))
diff --git a/topic/machine-learning/llm-langchain/conftest.py b/topic/machine-learning/llm-langchain/conftest.py
diff --git a/topic/machine-learning/llm-langchain/pyproject.toml b/topic/machine-learning/llm-langchain/pyproject.toml
@@ -19,24 +19,6 @@ xfail_strict = true
 markers = [
 ]
 
-# pytest-notebook settings
-nb_test_files = true
-nb_coverage = true
-nb_diff_replace = [
-    # Compensate output of `crash`.
-    '"/cells/*/outputs/*/text" "\(\d.\d+ sec\)" "(0.000 sec)"',
-]
-# `vector_search.py` does not include any output(s).
-nb_diff_ignore = [
-    "/metadata/language_info",
-    "/cells/*/execution_count",
-    "/cells/*/outputs/*/execution_count",
-
-    # Do not compare details of cell outputs.
-    # It is impossible to maintain efficiently.
-    "/cells/*/outputs",
-]
-
 [tool.coverage.run]
 branch = false
 
diff --git a/topic/machine-learning/llm-langchain/requirements.txt b/topic/machine-learning/llm-langchain/requirements.txt
@@ -11,6 +11,7 @@ pydantic>=1,<3
 pypdf<5
 python-dotenv<2
 requests-cache<2
+sqlalchemy==2.*
 unstructured<0.12
 google-cloud-aiplatform
 langchain-google-vertexai
diff --git a/topic/machine-learning/llm-langchain/test.py b/topic/machine-learning/llm-langchain/test.py
@@ -5,8 +5,11 @@
 import pytest
 
 from cratedb_toolkit.io.sql import DatabaseAdapter
-from pueblo.testing.folder import str_list, list_notebooks, list_python_files
-from pueblo.testing.snippet import pytest_module_function, pytest_notebook
+from nbclient.exceptions import CellExecutionError
+from pueblo.testing.folder import str_list, list_python_files
+from pueblo.testing.notebook import generate_tests
+from pueblo.testing.snippet import pytest_module_function
+from testbook import testbook
 
 HERE = Path(__file__).parent
 
@@ -26,20 +29,32 @@ def reset_database(cratedb):
     time.sleep(0.01)
 
 
-@pytest.mark.parametrize("notebook", str_list(list_notebooks(HERE)))
-def test_notebook(request, notebook: str):
+def pytest_generate_tests(metafunc):
     """
-    From individual Jupyter Notebook file, collect cells as pytest
-    test cases, and run them.
-
-    Not using `NBRegressionFixture`, because it would manually need to be configured.
+    Generate pytest test case per Jupyter Notebook.
     """
+    here = Path(__file__).parent
+    generate_tests(metafunc, path=here)
 
-    # Skip Vertex AI examples, because authenticating is more complicated.
-    if "vertexai" in str(notebook):
-        raise pytest.skip("Skipping Vertex AI due to lack of authentication")
 
-    pytest_notebook(request=request, filepath=notebook)
+def test_notebook(notebook):
+    """
+    Execute Jupyter Notebook, one test case per .ipynb file.
+    """
+    # Skip Vertex AI examples, because authenticating is more complicated.
+    if "vertexai" in notebook.name:
+        raise pytest.skip(f"Skipping Vertex AI due to lack of authentication: {notebook.name}")
+
+    with testbook(notebook) as tb:
+        try:
+            tb.execute()
+
+        # Skip notebook if `pytest.exit()` is invoked, usually by
+        # `getenvpass()`, when authentication token is not given.
+        except CellExecutionError as ex:
+            msg = str(ex)
+            if "[skip-notebook]" in msg:
+                raise pytest.skip(msg)
 
 
 @pytest.mark.parametrize("pyfile", str_list(list_python_files(HERE)))
diff --git a/topic/machine-learning/mlops-mlflow/pyproject.toml b/topic/machine-learning/mlops-mlflow/pyproject.toml
@@ -18,22 +18,6 @@ xfail_strict = true
 markers = [
 ]
 
-# pytest-notebook settings
-nb_test_files = true
-nb_coverage = true
-nb_diff_replace = [
-    # Compensate output of `crash`.
-    '"/cells/*/outputs/*/text" "\(\d.\d+ sec\)" "(0.000 sec)"',
-]
-# `vector_search.py` does not include any output(s).
-nb_diff_ignore = [
-    "/metadata/language_info",
-    "/cells/*/execution_count",
-    "/cells/*/outputs/*/execution_count",
-    # Ignore images.
-    "/cells/*/outputs/*/data/image/png",
-]
-
 [tool.coverage.run]
 branch = false
 
diff --git a/topic/machine-learning/mlops-mlflow/requirements.txt b/topic/machine-learning/mlops-mlflow/requirements.txt
@@ -4,6 +4,7 @@ distributed>=2024.4.1  # Python 3.11.9 breaks previous Dask
 mlflow-cratedb==2.11.3
 pydantic<3
 salesforce-merlion>=2,<3
+sqlalchemy==2.*
 
 # Development.
 # mlflow-cratedb @ git+https://github.com/crate-workbench/mlflow-cratedb.git@main
diff --git a/topic/machine-learning/mlops-mlflow/test.py b/topic/machine-learning/mlops-mlflow/test.py
@@ -3,8 +3,10 @@
 import pytest
 
 from cratedb_toolkit.util import DatabaseAdapter
-from pueblo.testing.folder import str_list, list_notebooks, list_python_files
-from pueblo.testing.snippet import pytest_module_function, pytest_notebook
+from pueblo.testing.folder import str_list, list_python_files
+from pueblo.testing.notebook import generate_tests
+from pueblo.testing.snippet import pytest_module_function
+from testbook import testbook
 
 HERE = Path(__file__).parent
 
@@ -22,15 +24,20 @@ def db_init(cratedb):
     cratedb.run_sql("DROP TABLE IF EXISTS machine_data;")
 
 
-@pytest.mark.parametrize("notebook", str_list(list_notebooks(HERE)))
-def test_notebook(request, notebook: str):
+def pytest_generate_tests(metafunc):
     """
-    From individual Jupyter Notebook file, collect cells as pytest
-    test cases, and run them.
+    Generate pytest test case per Jupyter Notebook.
+    """
+    here = Path(__file__).parent
+    generate_tests(metafunc, path=here)
+
 
-    Not using `NBRegressionFixture`, because it would manually need to be configured.
+def test_notebook(notebook):
+    """
+    Execute Jupyter Notebook, one test case per .ipynb file.
     """
-    pytest_notebook(request=request, filepath=notebook)
+    with testbook(notebook) as tb:
+        tb.execute()
 
 
 @pytest.mark.parametrize("pyfile", str_list(list_python_files(HERE)))