Skip to content

Commit 8328f99

Browse files
committed
ML: Use testbook instead of pytest-notebook
testbook, while the most recent release was in 2021 already, provides are more convenient interface, effectively only running the notebooks to completion, not bothering about in-detail cell comparison. https://pypi.org/project/testbook/ `cratedb_rag_customer_support_langchain.ipynb` needs this, because the bottom half, where it connects to Jina API, needs to be masked.
1 parent 9e66e2c commit 8328f99

File tree

10 files changed

+60
-100
lines changed

10 files changed

+60
-100
lines changed

topic/machine-learning/automl/pyproject.toml

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -19,38 +19,6 @@ xfail_strict = true
1919
markers = [
2020
]
2121

22-
# pytest-notebook settings
23-
nb_test_files = true
24-
nb_coverage = false
25-
# Default cell timeout is 120 seconds. For heavy computing, it needs to be increased.
26-
nb_exec_timeout = 240
27-
nb_diff_replace = [
28-
# Compensate output of `crash`.
29-
'"/cells/*/outputs/*/text" "\(\d.\d+ sec\)" "(0.000 sec)"',
30-
# Compensate other outputs.
31-
'"/cells/*/outputs/*/data/text/html" "T_....." "T_na"',
32-
'"/cells/*/outputs/*/data/text/plain" "IPython.core.display.HTML object" "pandas.io.formats.style.Styler"',
33-
'"/cells/*/outputs/*/data/text/plain" "pandas.io.formats.style.Styler at 0x.+" "pandas.io.formats.style.Styler"',
34-
'"/cells/*/outputs/*/data/application/vnd.jupyter.widget-view+json" "model_id: .+" "model_id: na"',
35-
'"/cells/*/outputs/*/data/text/html" "\>\d+\.\d+\<\/td\>" "0.3333"',
36-
]
37-
# `vector_search.py` does not include any output(s).
38-
nb_diff_ignore = [
39-
"/metadata/language_info",
40-
"/metadata/widgets",
41-
"/cells/*/execution_count",
42-
"/cells/*/outputs/*/execution_count",
43-
"/cells/*/outputs/*/metadata/nbreg",
44-
# Ignore images.
45-
"/cells/*/outputs/*/data/image/png",
46-
# Ignore all cell output. It is too tedious to compare and maintain.
47-
# The validation hereby extends exclusively to the _execution_ of notebook cells,
48-
# able to catch syntax errors, module import flaws, and runtime errors.
49-
# However, the validation will not catch any regressions on actual cell output,
50-
# or whether any output is produced at all.
51-
"/cells/*/outputs",
52-
]
53-
5422
[tool.coverage.run]
5523
branch = false
5624

topic/machine-learning/automl/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ plotly<5.21
55
pycaret[models,parallel,test]==3.3.1
66
pydantic<2
77
python-dotenv<2
8+
sqlalchemy==2.*
89

910
# Development.
1011
# mlflow-cratedb @ git+https://github.com/crate-workbench/mlflow-cratedb.git@main

topic/machine-learning/automl/test.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@
2222

2323
import pytest
2424
from cratedb_toolkit.util import DatabaseAdapter
25-
from pueblo.testing.folder import str_list, list_notebooks, list_python_files
26-
from pueblo.testing.snippet import pytest_notebook, pytest_module_function
25+
from pueblo.testing.folder import str_list, list_python_files
26+
from pueblo.testing.notebook import generate_tests
27+
from pueblo.testing.snippet import pytest_module_function
28+
from testbook import testbook
2729

2830
HERE = Path(__file__).parent
2931

@@ -57,15 +59,20 @@ def churn_dataset(cratedb):
5759
cratedb.run_sql("REFRESH TABLE pycaret_churn;")
5860

5961

60-
@pytest.mark.parametrize("notebook", str_list(list_notebooks(HERE)))
61-
def test_notebook(request, notebook: str):
62+
def pytest_generate_tests(metafunc):
6263
"""
63-
From individual Jupyter Notebook file, collect cells as pytest
64-
test cases, and run them.
64+
Generate pytest test case per Jupyter Notebook.
65+
"""
66+
here = Path(__file__).parent
67+
generate_tests(metafunc, path=here)
68+
6569

66-
Not using `NBRegressionFixture`, because it would manually need to be configured.
70+
def test_notebook(notebook):
71+
"""
72+
Execute Jupyter Notebook, one test case per .ipynb file.
6773
"""
68-
pytest_notebook(request=request, filepath=notebook)
74+
with testbook(notebook) as tb:
75+
tb.execute()
6976

7077

7178
@pytest.mark.parametrize("pyfile", str_list(list_python_files(HERE)))

topic/machine-learning/llm-langchain/conftest.py

Lines changed: 0 additions & 6 deletions
This file was deleted.

topic/machine-learning/llm-langchain/pyproject.toml

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,6 @@ xfail_strict = true
1919
markers = [
2020
]
2121

22-
# pytest-notebook settings
23-
nb_test_files = true
24-
nb_coverage = true
25-
nb_diff_replace = [
26-
# Compensate output of `crash`.
27-
'"/cells/*/outputs/*/text" "\(\d.\d+ sec\)" "(0.000 sec)"',
28-
]
29-
# `vector_search.py` does not include any output(s).
30-
nb_diff_ignore = [
31-
"/metadata/language_info",
32-
"/cells/*/execution_count",
33-
"/cells/*/outputs/*/execution_count",
34-
35-
# Do not compare details of cell outputs.
36-
# It is impossible to maintain efficiently.
37-
"/cells/*/outputs",
38-
]
39-
4022
[tool.coverage.run]
4123
branch = false
4224

topic/machine-learning/llm-langchain/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ pydantic>=1,<3
1111
pypdf<5
1212
python-dotenv<2
1313
requests-cache<2
14+
sqlalchemy==2.*
1415
unstructured<0.12
1516
google-cloud-aiplatform
1617
langchain-google-vertexai

topic/machine-learning/llm-langchain/test.py

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,11 @@
55
import pytest
66

77
from cratedb_toolkit.io.sql import DatabaseAdapter
8-
from pueblo.testing.folder import str_list, list_notebooks, list_python_files
9-
from pueblo.testing.snippet import pytest_module_function, pytest_notebook
8+
from nbclient.exceptions import CellExecutionError
9+
from pueblo.testing.folder import str_list, list_python_files
10+
from pueblo.testing.notebook import generate_tests
11+
from pueblo.testing.snippet import pytest_module_function
12+
from testbook import testbook
1013

1114
HERE = Path(__file__).parent
1215

@@ -26,20 +29,32 @@ def reset_database(cratedb):
2629
time.sleep(0.01)
2730

2831

29-
@pytest.mark.parametrize("notebook", str_list(list_notebooks(HERE)))
30-
def test_notebook(request, notebook: str):
32+
def pytest_generate_tests(metafunc):
3133
"""
32-
From individual Jupyter Notebook file, collect cells as pytest
33-
test cases, and run them.
34-
35-
Not using `NBRegressionFixture`, because it would manually need to be configured.
34+
Generate pytest test case per Jupyter Notebook.
3635
"""
36+
here = Path(__file__).parent
37+
generate_tests(metafunc, path=here)
3738

38-
# Skip Vertex AI examples, because authenticating is more complicated.
39-
if "vertexai" in str(notebook):
40-
raise pytest.skip("Skipping Vertex AI due to lack of authentication")
4139

42-
pytest_notebook(request=request, filepath=notebook)
40+
def test_notebook(notebook):
41+
"""
42+
Execute Jupyter Notebook, one test case per .ipynb file.
43+
"""
44+
# Skip Vertex AI examples, because authenticating is more complicated.
45+
if "vertexai" in notebook.name:
46+
raise pytest.skip(f"Skipping Vertex AI due to lack of authentication: {notebook.name}")
47+
48+
with testbook(notebook) as tb:
49+
try:
50+
tb.execute()
51+
52+
# Skip notebook if `pytest.exit()` is invoked, usually by
53+
# `getenvpass()`, when authentication token is not given.
54+
except CellExecutionError as ex:
55+
msg = str(ex)
56+
if "[skip-notebook]" in msg:
57+
raise pytest.skip(msg)
4358

4459

4560
@pytest.mark.parametrize("pyfile", str_list(list_python_files(HERE)))

topic/machine-learning/mlops-mlflow/pyproject.toml

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,6 @@ xfail_strict = true
1818
markers = [
1919
]
2020

21-
# pytest-notebook settings
22-
nb_test_files = true
23-
nb_coverage = true
24-
nb_diff_replace = [
25-
# Compensate output of `crash`.
26-
'"/cells/*/outputs/*/text" "\(\d.\d+ sec\)" "(0.000 sec)"',
27-
]
28-
# `vector_search.py` does not include any output(s).
29-
nb_diff_ignore = [
30-
"/metadata/language_info",
31-
"/cells/*/execution_count",
32-
"/cells/*/outputs/*/execution_count",
33-
# Ignore images.
34-
"/cells/*/outputs/*/data/image/png",
35-
]
36-
3721
[tool.coverage.run]
3822
branch = false
3923

topic/machine-learning/mlops-mlflow/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ distributed>=2024.4.1 # Python 3.11.9 breaks previous Dask
44
mlflow-cratedb==2.11.3
55
pydantic<3
66
salesforce-merlion>=2,<3
7+
sqlalchemy==2.*
78

89
# Development.
910
# mlflow-cratedb @ git+https://github.com/crate-workbench/mlflow-cratedb.git@main

topic/machine-learning/mlops-mlflow/test.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
import pytest
44

55
from cratedb_toolkit.util import DatabaseAdapter
6-
from pueblo.testing.folder import str_list, list_notebooks, list_python_files
7-
from pueblo.testing.snippet import pytest_module_function, pytest_notebook
6+
from pueblo.testing.folder import str_list, list_python_files
7+
from pueblo.testing.notebook import generate_tests
8+
from pueblo.testing.snippet import pytest_module_function
9+
from testbook import testbook
810

911
HERE = Path(__file__).parent
1012

@@ -22,15 +24,20 @@ def db_init(cratedb):
2224
cratedb.run_sql("DROP TABLE IF EXISTS machine_data;")
2325

2426

25-
@pytest.mark.parametrize("notebook", str_list(list_notebooks(HERE)))
26-
def test_notebook(request, notebook: str):
27+
def pytest_generate_tests(metafunc):
2728
"""
28-
From individual Jupyter Notebook file, collect cells as pytest
29-
test cases, and run them.
29+
Generate pytest test case per Jupyter Notebook.
30+
"""
31+
here = Path(__file__).parent
32+
generate_tests(metafunc, path=here)
33+
3034

31-
Not using `NBRegressionFixture`, because it would manually need to be configured.
35+
def test_notebook(notebook):
36+
"""
37+
Execute Jupyter Notebook, one test case per .ipynb file.
3238
"""
33-
pytest_notebook(request=request, filepath=notebook)
39+
with testbook(notebook) as tb:
40+
tb.execute()
3441

3542

3643
@pytest.mark.parametrize("pyfile", str_list(list_python_files(HERE)))

0 commit comments

Comments
 (0)