diff --git a/.gitignore b/.gitignore index 73489df..d587fb0 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,6 @@ dist build .ipynb_checkpoints __pycache__ +output-2* +jupyter_output_monitor/_version.py +jupyter_output_monitor/__pycache__/* diff --git a/README.md b/README.md index 9dd581c..1df8de5 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ The R and G values should be kept as (143, 56), and the B color should be unique Then, to run the notebook and monitor the changes in widget output, run: - jupyter-output-monitor --notebook mynotebook.ipynb + jupyter-output-monitor monitor --notebook mynotebook.ipynb Where ``mynotebook.ipynb`` is the name of your notebook. By default, this will open a window showing you what is happening, but you can also pass ``--headless`` @@ -36,7 +36,7 @@ to run in headless mode. If you want to test this on an existing Jupyter Lab instance, including remote ones, you can use ``--url`` instead of ``--notebook``: - jupyter-output-monitor http://localhost:8987/lab/tree/notebook.ipynb?token=7bb9a... + jupyter-output-monitor monitor --url http://localhost:8987/lab/tree/notebook.ipynb?token=7bb9a... Note that the URL should include the path to the notebook, and will likely require the token too. @@ -123,3 +123,14 @@ after the previous one. This is 10s by default but can be customized with ``--wait-after-execute=20`` for example. You should set this value so that the cell that takes the longest to fully execute will be expected to take less than this time. + +## Generating a report + +You can generate a copy of the input notebook with output screenshots and profiling +results inserted by using e.g.: + + jupyter-output-monitor report --notebook mynotebook.ipynb --results-dir=output + +Where ``--results-dir`` is the output directory generated with the ``monitor`` +command. BY default, this will write a ``report.ipynb`` notebook, but you can +overwrite the filename with ``--output-report-name``. diff --git a/jupyter_output_monitor/__init__.py b/jupyter_output_monitor/__init__.py index 598ec7d..d0fcea4 100644 --- a/jupyter_output_monitor/__init__.py +++ b/jupyter_output_monitor/__init__.py @@ -1,4 +1,6 @@ +from .__main__ import main from ._monitor import monitor +from ._report import report from ._version import __version__ -__all__ = ["monitor", "__version__"] +__all__ = ["monitor", "report", "__version__", "main"] diff --git a/jupyter_output_monitor/__main__.py b/jupyter_output_monitor/__main__.py index 12c22cd..5137fcf 100644 --- a/jupyter_output_monitor/__main__.py +++ b/jupyter_output_monitor/__main__.py @@ -1,4 +1,9 @@ -from ._monitor import monitor +import click + +from ._monitor import monitor_group +from ._report import report_group + +main = click.CommandCollection(sources=[monitor_group, report_group]) if __name__ == "__main__": - monitor() + main() diff --git a/jupyter_output_monitor/_monitor.py b/jupyter_output_monitor/_monitor.py index 6791394..c9a5aa4 100644 --- a/jupyter_output_monitor/_monitor.py +++ b/jupyter_output_monitor/_monitor.py @@ -15,10 +15,21 @@ from ._server import jupyter_server from ._utils import clear_notebook, isotime +__all__ = ["monitor", "monitor_group"] + RG_SPECIAL = (143, 56) -@click.command() +def iso_to_path(time): + return time.replace(":", "-") + + +@click.group() +def monitor_group(): + pass + + +@monitor_group.command() @click.option( "--notebook", default=None, @@ -42,7 +53,7 @@ @click.option("--headless", is_flag=True, help="Whether to run in headless mode") def monitor(notebook, url, output, wait_after_execute, headless): if output is None: - output = f"output-{isotime()}" + output = f"output-{iso_to_path(isotime())}" if os.path.exists(output): print(f"Output directory {output} already exists") @@ -124,12 +135,9 @@ def _monitor_output(url, output, wait_after_execute, headless): timestamp = isotime() - # Colons are invalid in filenames on Windows - filename_timestamp = timestamp.replace(":", "-") - screenshot_filename = os.path.join( output, - f"input-{input_index:03d}-{filename_timestamp}.png", + f"input-{input_index:03d}-{iso_to_path(timestamp)}.png", ) image = Image.open(BytesIO(screenshot_bytes)) image.save(screenshot_filename) @@ -192,12 +200,9 @@ def _monitor_output(url, output, wait_after_execute, headless): timestamp = isotime() - # Colons are invalid in filenames on Windows - filename_timestamp = timestamp.replace(":", "-") - screenshot_filename = os.path.join( output, - f"output-{output_index:03d}-{filename_timestamp}.png", + f"output-{output_index:03d}-{iso_to_path(timestamp)}.png", ) image = Image.open(BytesIO(screenshot_bytes)) image.save(screenshot_filename) diff --git a/jupyter_output_monitor/_report.py b/jupyter_output_monitor/_report.py new file mode 100644 index 0000000..67e5fbe --- /dev/null +++ b/jupyter_output_monitor/_report.py @@ -0,0 +1,116 @@ +import csv +import datetime +import os + +import click +import nbformat + +__all__ = ["report", "report_group"] + + +@click.group() +def report_group(): + pass + + +@report_group.command() +@click.option( + "--notebook", + default=None, + help="The notebook that was profiled.", +) +@click.option( + "--results-dir", + default=None, + help="Output results directory from the profiling", +) +@click.option( + "--output-report-name", + default="report.ipynb", + help="Write a copy of the notebook containing screenshots and profiling results to a notebook with the specified name, in the results directory", +) +def report(notebook, results_dir, output_report_name): + with open(os.path.join(results_dir, "event_log.csv")) as csvfile: + reader = csv.DictReader(csvfile) + log = list(reader) + + # convert ISO times to elapsed times from first executed cell: + start_time = datetime.datetime.fromisoformat(log[0]["time"]) + for row in log: + row["time"] = ( + datetime.datetime.fromisoformat(row["time"]) - start_time + ).total_seconds() + + results = {} + last_executed_cell = None + + # group timing results by execution cell + for row in log: + index = row["index"] + event = row["event"] + + if index not in results and event == "execute-input": + results[index] = { + "execute-input": None, + "output-changed": [], + } + + results[index][event] = row + last_executed_cell = index + + elif event == "output-changed": + row["output_from_cell"] = last_executed_cell + row["dt"] = ( + row["time"] - results[last_executed_cell]["execute-input"]["time"] + ) + results[last_executed_cell][event].append(row) + + # compute "final" timing results per execution cell + for result in results.values(): + has_outputs = len(result["output-changed"]) + result["total"] = result["output-changed"][-1]["dt"] if has_outputs else None + result["n_updates"] = len(result["output-changed"]) if has_outputs else None + + # assemble annotations in markdown format for each executed code cell: + markdown_annotations = [] + for idx, result in results.items(): + if len(result["output-changed"]): + screenshot_path = os.path.basename( + result["output-changed"][-1]["screenshot"], + ) + markdown_annotations.append( + f"![output screenshot]({screenshot_path})\n\n" + f"#### Profiling result for cell {idx}: \n * {result['total']:.2f} seconds " + f"elapsed\n * {result['n_updates']:d} output updates\n", + ) + else: + markdown_annotations.append( + f"#### Profiling result for cell {idx}: \nNo output.\n", + ) + + # read in the source notebook: + nb = nbformat.read(notebook, nbformat.NO_CONVERT) + + # create new list of cells, weaving together the existing + # cells and the new markdown cells with profiling results + # and screenshots: + new_cells = [] + nonempty_code_cell_idx = -1 + for cell in nb["cells"]: + new_cells.append(cell) + if cell["cell_type"] == "code" and len(cell["source"]): + nonempty_code_cell_idx += 1 + new_cells.append( + nbformat.v4.new_markdown_cell( + markdown_annotations[nonempty_code_cell_idx], + ), + ) + + nb["cells"] = new_cells + + output_notebook = os.path.join(results_dir, output_report_name) + + print(f"Writing notebook with profiling results to: {output_notebook}") + + new_notebook = nbformat.from_dict(nb) + nbformat.write(new_notebook, output_notebook) diff --git a/jupyter_output_monitor/tests/test_monitor.py b/jupyter_output_monitor/tests/test_monitor.py index 51fb922..70f25b0 100644 --- a/jupyter_output_monitor/tests/test_monitor.py +++ b/jupyter_output_monitor/tests/test_monitor.py @@ -13,6 +13,7 @@ def test_simple(tmp_path): sys.executable, "-m", "jupyter_output_monitor", + "monitor", "--notebook", str(DATA / "simple.ipynb"), "--output", @@ -40,3 +41,19 @@ def test_simple(tmp_path): with open(output_path / "event_log.csv") as f: reader = csv.reader(f, delimiter=",") assert len(list(reader)) == 10 + + subprocess.run( + [ + sys.executable, + "-m", + "jupyter_output_monitor", + "report", + "--notebook", + str(DATA / "simple.ipynb"), + "--results-dir", + str(output_path), + ], + check=True, + ) + + assert (output_path / "report.ipynb").exists() diff --git a/pyproject.toml b/pyproject.toml index ee5ff53..5d91608 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,8 @@ dependencies = [ "click", "pillow", "playwright", - "solara[pytest]" + "solara[pytest]", + "nbformat", ] dynamic = ["version"] @@ -26,7 +27,7 @@ requires = ["setuptools", build-backend = 'setuptools.build_meta' [project.scripts] -jupyter-output-monitor = "jupyter_output_monitor:monitor" +jupyter-output-monitor = "jupyter_output_monitor:main" [tool.setuptools] zip-safe = false