Skip to content

Optional notebook output with interleaved runtime and screenshots #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ dist
build
.ipynb_checkpoints
__pycache__
output-2*
jupyter_output_monitor/_version.py
jupyter_output_monitor/__pycache__/*
15 changes: 13 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ The R and G values should be kept as (143, 56), and the B color should be unique

Then, to run the notebook and monitor the changes in widget output, run:

jupyter-output-monitor --notebook mynotebook.ipynb
jupyter-output-monitor monitor --notebook mynotebook.ipynb

Where ``mynotebook.ipynb`` is the name of your notebook. By default, this will
open a window showing you what is happening, but you can also pass ``--headless``
Expand All @@ -36,7 +36,7 @@ to run in headless mode.
If you want to test this on an existing Jupyter Lab instance, including
remote ones, you can use ``--url`` instead of ``--notebook``:

jupyter-output-monitor http://localhost:8987/lab/tree/notebook.ipynb?token=7bb9a...
jupyter-output-monitor monitor --url http://localhost:8987/lab/tree/notebook.ipynb?token=7bb9a...

Note that the URL should include the path to the notebook, and will likely
require the token too.
Expand Down Expand Up @@ -123,3 +123,14 @@ after the previous one. This is 10s by default but can be customized with
``--wait-after-execute=20`` for example. You should set this value so that the
cell that takes the longest to fully execute will be expected to take less than
this time.

## Generating a report

You can generate a copy of the input notebook with output screenshots and profiling
results inserted by using e.g.:

jupyter-output-monitor report --notebook mynotebook.ipynb --results-dir=output

Where ``--results-dir`` is the output directory generated with the ``monitor``
command. BY default, this will write a ``report.ipynb`` notebook, but you can
overwrite the filename with ``--output-report-name``.
4 changes: 3 additions & 1 deletion jupyter_output_monitor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from .__main__ import main
from ._monitor import monitor
from ._report import report
from ._version import __version__

__all__ = ["monitor", "__version__"]
__all__ = ["monitor", "report", "__version__", "main"]
9 changes: 7 additions & 2 deletions jupyter_output_monitor/__main__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
from ._monitor import monitor
import click

from ._monitor import monitor_group
from ._report import report_group

main = click.CommandCollection(sources=[monitor_group, report_group])

if __name__ == "__main__":
monitor()
main()
25 changes: 15 additions & 10 deletions jupyter_output_monitor/_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,21 @@
from ._server import jupyter_server
from ._utils import clear_notebook, isotime

__all__ = ["monitor", "monitor_group"]

RG_SPECIAL = (143, 56)


@click.command()
def iso_to_path(time):
return time.replace(":", "-")


@click.group()
def monitor_group():
pass


@monitor_group.command()
@click.option(
"--notebook",
default=None,
Expand All @@ -42,7 +53,7 @@
@click.option("--headless", is_flag=True, help="Whether to run in headless mode")
def monitor(notebook, url, output, wait_after_execute, headless):
if output is None:
output = f"output-{isotime()}"
output = f"output-{iso_to_path(isotime())}"

if os.path.exists(output):
print(f"Output directory {output} already exists")
Expand Down Expand Up @@ -124,12 +135,9 @@ def _monitor_output(url, output, wait_after_execute, headless):

timestamp = isotime()

# Colons are invalid in filenames on Windows
filename_timestamp = timestamp.replace(":", "-")

screenshot_filename = os.path.join(
output,
f"input-{input_index:03d}-{filename_timestamp}.png",
f"input-{input_index:03d}-{iso_to_path(timestamp)}.png",
)
image = Image.open(BytesIO(screenshot_bytes))
image.save(screenshot_filename)
Expand Down Expand Up @@ -192,12 +200,9 @@ def _monitor_output(url, output, wait_after_execute, headless):

timestamp = isotime()

# Colons are invalid in filenames on Windows
filename_timestamp = timestamp.replace(":", "-")

screenshot_filename = os.path.join(
output,
f"output-{output_index:03d}-{filename_timestamp}.png",
f"output-{output_index:03d}-{iso_to_path(timestamp)}.png",
)
image = Image.open(BytesIO(screenshot_bytes))
image.save(screenshot_filename)
Expand Down
116 changes: 116 additions & 0 deletions jupyter_output_monitor/_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import csv
import datetime
import os

import click
import nbformat

__all__ = ["report", "report_group"]


@click.group()
def report_group():
pass


@report_group.command()
@click.option(
"--notebook",
default=None,
help="The notebook that was profiled.",
)
@click.option(
"--results-dir",
default=None,
help="Output results directory from the profiling",
)
@click.option(
"--output-report-name",
default="report.ipynb",
help="Write a copy of the notebook containing screenshots and profiling results to a notebook with the specified name, in the results directory",
)
def report(notebook, results_dir, output_report_name):
with open(os.path.join(results_dir, "event_log.csv")) as csvfile:
reader = csv.DictReader(csvfile)
log = list(reader)

# convert ISO times to elapsed times from first executed cell:
start_time = datetime.datetime.fromisoformat(log[0]["time"])
for row in log:
row["time"] = (
datetime.datetime.fromisoformat(row["time"]) - start_time
).total_seconds()

results = {}
last_executed_cell = None

# group timing results by execution cell
for row in log:
index = row["index"]
event = row["event"]

if index not in results and event == "execute-input":
results[index] = {
"execute-input": None,
"output-changed": [],
}

results[index][event] = row
last_executed_cell = index

elif event == "output-changed":
row["output_from_cell"] = last_executed_cell
row["dt"] = (
row["time"] - results[last_executed_cell]["execute-input"]["time"]
)
results[last_executed_cell][event].append(row)

# compute "final" timing results per execution cell
for result in results.values():
has_outputs = len(result["output-changed"])
result["total"] = result["output-changed"][-1]["dt"] if has_outputs else None
result["n_updates"] = len(result["output-changed"]) if has_outputs else None

# assemble annotations in markdown format for each executed code cell:
markdown_annotations = []
for idx, result in results.items():
if len(result["output-changed"]):
screenshot_path = os.path.basename(
result["output-changed"][-1]["screenshot"],
)
markdown_annotations.append(
f"![output screenshot]({screenshot_path})\n\n"
f"#### Profiling result for cell {idx}: \n * {result['total']:.2f} seconds "
f"elapsed\n * {result['n_updates']:d} output updates\n",
)
else:
markdown_annotations.append(
f"#### Profiling result for cell {idx}: \nNo output.\n",
)

# read in the source notebook:
nb = nbformat.read(notebook, nbformat.NO_CONVERT)

# create new list of cells, weaving together the existing
# cells and the new markdown cells with profiling results
# and screenshots:
new_cells = []
nonempty_code_cell_idx = -1
for cell in nb["cells"]:
new_cells.append(cell)
if cell["cell_type"] == "code" and len(cell["source"]):
nonempty_code_cell_idx += 1
new_cells.append(
nbformat.v4.new_markdown_cell(
markdown_annotations[nonempty_code_cell_idx],
),
)

nb["cells"] = new_cells

output_notebook = os.path.join(results_dir, output_report_name)

print(f"Writing notebook with profiling results to: {output_notebook}")

new_notebook = nbformat.from_dict(nb)
nbformat.write(new_notebook, output_notebook)
17 changes: 17 additions & 0 deletions jupyter_output_monitor/tests/test_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def test_simple(tmp_path):
sys.executable,
"-m",
"jupyter_output_monitor",
"monitor",
"--notebook",
str(DATA / "simple.ipynb"),
"--output",
Expand Down Expand Up @@ -40,3 +41,19 @@ def test_simple(tmp_path):
with open(output_path / "event_log.csv") as f:
reader = csv.reader(f, delimiter=",")
assert len(list(reader)) == 10

subprocess.run(
[
sys.executable,
"-m",
"jupyter_output_monitor",
"report",
"--notebook",
str(DATA / "simple.ipynb"),
"--results-dir",
str(output_path),
],
check=True,
)

assert (output_path / "report.ipynb").exists()
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ dependencies = [
"click",
"pillow",
"playwright",
"solara[pytest]"
"solara[pytest]",
"nbformat",
]
dynamic = ["version"]

Expand All @@ -26,7 +27,7 @@ requires = ["setuptools",
build-backend = 'setuptools.build_meta'

[project.scripts]
jupyter-output-monitor = "jupyter_output_monitor:monitor"
jupyter-output-monitor = "jupyter_output_monitor:main"

[tool.setuptools]
zip-safe = false
Expand Down