From 5903f997ab71bd3fb61c6512a8831d15a6a1cafd Mon Sep 17 00:00:00 2001 From: "Brett M. Morris" Date: Thu, 7 Nov 2024 15:15:27 -0500 Subject: [PATCH 1/4] mac OS-friendly isotime paths --- jupyter_output_monitor/_monitor.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/jupyter_output_monitor/_monitor.py b/jupyter_output_monitor/_monitor.py index 6791394..a633a46 100644 --- a/jupyter_output_monitor/_monitor.py +++ b/jupyter_output_monitor/_monitor.py @@ -18,6 +18,10 @@ RG_SPECIAL = (143, 56) +def iso_to_path(time): + return time.replace(':', '-') + + @click.command() @click.option( "--notebook", @@ -42,7 +46,7 @@ @click.option("--headless", is_flag=True, help="Whether to run in headless mode") def monitor(notebook, url, output, wait_after_execute, headless): if output is None: - output = f"output-{isotime()}" + output = f'output-{iso_to_path(isotime())}' if os.path.exists(output): print(f"Output directory {output} already exists") @@ -124,12 +128,9 @@ def _monitor_output(url, output, wait_after_execute, headless): timestamp = isotime() - # Colons are invalid in filenames on Windows - filename_timestamp = timestamp.replace(":", "-") - screenshot_filename = os.path.join( output, - f"input-{input_index:03d}-{filename_timestamp}.png", + f"input-{input_index:03d}-{iso_to_path(timestamp)}.png", ) image = Image.open(BytesIO(screenshot_bytes)) image.save(screenshot_filename) @@ -192,12 +193,9 @@ def _monitor_output(url, output, wait_after_execute, headless): timestamp = isotime() - # Colons are invalid in filenames on Windows - filename_timestamp = timestamp.replace(":", "-") - screenshot_filename = os.path.join( output, - f"output-{output_index:03d}-{filename_timestamp}.png", + f"output-{output_index:03d}-{iso_to_path(timestamp)}.png", ) image = Image.open(BytesIO(screenshot_bytes)) image.save(screenshot_filename) From 8a6ca7609e4f4cb6bd28c3333993be9757d0a8f9 Mon Sep 17 00:00:00 2001 From: "Brett M. Morris" Date: Thu, 7 Nov 2024 20:47:21 -0500 Subject: [PATCH 2/4] add notebook-copy option --- .gitignore | 3 + README.md | 6 ++ jupyter_output_monitor/_monitor.py | 107 ++++++++++++++++++++++++++++- pyproject.toml | 1 + 4 files changed, 115 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 73489df..d587fb0 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,6 @@ dist build .ipynb_checkpoints __pycache__ +output-2* +jupyter_output_monitor/_version.py +jupyter_output_monitor/__pycache__/* diff --git a/README.md b/README.md index 9dd581c..9df5d06 100644 --- a/README.md +++ b/README.md @@ -123,3 +123,9 @@ after the previous one. This is 10s by default but can be customized with ``--wait-after-execute=20`` for example. You should set this value so that the cell that takes the longest to fully execute will be expected to take less than this time. + +### Notebook copy + +To save a copy of the notebook with the profiling results and +screenshots inserted after the executed code cells, +include ``--notebook-copy /path/to/notebook.ipynb``. diff --git a/jupyter_output_monitor/_monitor.py b/jupyter_output_monitor/_monitor.py index a633a46..6575c2c 100644 --- a/jupyter_output_monitor/_monitor.py +++ b/jupyter_output_monitor/_monitor.py @@ -43,8 +43,14 @@ def iso_to_path(time): default=10, help="Time in s to wait after executing each cell", ) -@click.option("--headless", is_flag=True, help="Whether to run in headless mode") -def monitor(notebook, url, output, wait_after_execute, headless): +@click.option("--headless", is_flag=True, help="Whether to run in headless mode",) +@click.option( + '--write-notebook-report', + default=None, + help='Write a copy of the notebook containing screenshots and profiling results to the specified path' +) +def monitor(notebook, url, output, wait_after_execute, headless, write_notebook_report): + if output is None: output = f'output-{iso_to_path(isotime())}' @@ -54,6 +60,11 @@ def monitor(notebook, url, output, wait_after_execute, headless): os.makedirs(output) + if write_notebook_report: + if os.path.exists(write_notebook_report): + print(f"Output notebook {write_notebook_report} already exists") + sys.exit(1) + if notebook is None and url is None: print("Either --notebook or --url should be specified") sys.exit(1) @@ -213,6 +224,98 @@ def _monitor_output(url, output, wait_after_execute, headless): print("Stopping monitoring output and moving on to next input cell") + if write_notebook_report: + _write_profiled_notebook_copy(output, write_notebook_report) + + +def _write_profiled_notebook_copy(output, event_log_path, copy_notebook): + log = np.recfromcsv(event_log_path, encoding='utf-8') + columns = open(event_log_path).read().splitlines()[0].split(',') + + # convert ISO times to elapsed times from first executed cell: + datetimes = [datetime.datetime.fromisoformat(dt) for dt in log['time']] + log['time'] = [(dt - datetimes[0]).total_seconds() for dt in datetimes] + + # cast ∆t's from strings to floats: + dtype = log.dtype.descr + dtype[0] = ('time', float) + log = log.astype(dtype) + + def row_to_dict(row): + return {k: v for k, v in zip(columns, row)} + + results = OrderedDict() + last_executed_cell = None + + # group timing results by execution cell + for i, row in enumerate(log): + isotime, event, index, screenshot_path = row + + if index not in results and event == 'execute-input': + results[index] = { + 'execute-input': None, + 'output-changed': [], + } + + results[index][event] = row_to_dict(row) + last_executed_cell = index + + elif event == 'output-changed': + row_dict = row_to_dict(row) + row_dict['output_from_cell'] = last_executed_cell + row_dict['dt'] = row_dict['time'] - results[last_executed_cell]['execute-input']['time'] + results[last_executed_cell][event].append(row_dict) + + # compute "final" timing results per execution cell + for idx, result in results.items(): + has_outputs = len(result['output-changed']) + result['total'] = result['output-changed'][-1]['dt'] if has_outputs else None + result['n_updates'] = len(result['output-changed']) if has_outputs else None + + # assemble annotations in markdown format for each executed code cell: + markdown_annotations = [] + for idx, result in results.items(): + if len(result['output-changed']): + screenshot_path = os.path.basename( + result['output-changed'][-1]['screenshot'] + ) + markdown_annotations.append( + f"![output screenshot]({screenshot_path})\n\n" + + f"#### Profiling result for cell {idx}: \n * {result['total']:.2f} seconds " + + f"elapsed\n * {result['n_updates']:d} output updates\n" + ) + else: + markdown_annotations.append( + f"#### Profiling result for cell {idx}: \nNo output.\n" + ) + + # read in the source notebook: + nb = nbformat.read(copy_notebook, nbformat.NO_CONVERT) + + # create new list of cells, weaving together the existing + # cells and the new markdown cells with profiling results + # and screenshots: + new_cells = [] + nonempty_code_cell_idx = -1 + for i, cell in enumerate(nb['cells']): + new_cells.append(cell) + if cell['cell_type'] == 'code' and len(cell['source']): + nonempty_code_cell_idx += 1 + new_cells.append( + nbformat.v4.new_markdown_cell( + markdown_annotations[nonempty_code_cell_idx] + ) + ) + + nb['cells'] = new_cells + + notebook_copy_path = os.path.join( + output, + os.path.basename(copy_notebook).replace('.ipynb', '-profiling.ipynb') + ) + print(f'Writing notebook with profiling results to: {notebook_copy_path}') + new_notebook = nbformat.from_dict(nb) + nbformat.write(new_notebook, notebook_copy_path) if __name__ == "__main__": monitor() diff --git a/pyproject.toml b/pyproject.toml index ee5ff53..20b1a24 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ dependencies = [ "pillow", "playwright", "solara[pytest]" + "nbformat", ] dynamic = ["version"] From 5a4db1b27b3d4cb5086f607bbbfa8e8eab8645fc Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Wed, 13 Nov 2024 14:40:56 +0000 Subject: [PATCH 3/4] Add new report command --- jupyter_output_monitor/__init__.py | 3 +- jupyter_output_monitor/_monitor.py | 111 ++--------------------------- jupyter_output_monitor/_report.py | 111 +++++++++++++++++++++++++++++ pyproject.toml | 3 +- 4 files changed, 119 insertions(+), 109 deletions(-) create mode 100644 jupyter_output_monitor/_report.py diff --git a/jupyter_output_monitor/__init__.py b/jupyter_output_monitor/__init__.py index 598ec7d..70698bc 100644 --- a/jupyter_output_monitor/__init__.py +++ b/jupyter_output_monitor/__init__.py @@ -1,4 +1,5 @@ from ._monitor import monitor +from ._report import report from ._version import __version__ -__all__ = ["monitor", "__version__"] +__all__ = ["monitor", "report", "__version__"] diff --git a/jupyter_output_monitor/_monitor.py b/jupyter_output_monitor/_monitor.py index 6575c2c..70fc048 100644 --- a/jupyter_output_monitor/_monitor.py +++ b/jupyter_output_monitor/_monitor.py @@ -19,7 +19,7 @@ def iso_to_path(time): - return time.replace(':', '-') + return time.replace(":", "-") @click.command() @@ -43,16 +43,10 @@ def iso_to_path(time): default=10, help="Time in s to wait after executing each cell", ) -@click.option("--headless", is_flag=True, help="Whether to run in headless mode",) -@click.option( - '--write-notebook-report', - default=None, - help='Write a copy of the notebook containing screenshots and profiling results to the specified path' -) -def monitor(notebook, url, output, wait_after_execute, headless, write_notebook_report): - +@click.option("--headless", is_flag=True, help="Whether to run in headless mode") +def monitor(notebook, url, output, wait_after_execute, headless): if output is None: - output = f'output-{iso_to_path(isotime())}' + output = f"output-{iso_to_path(isotime())}" if os.path.exists(output): print(f"Output directory {output} already exists") @@ -60,11 +54,6 @@ def monitor(notebook, url, output, wait_after_execute, headless, write_notebook_ os.makedirs(output) - if write_notebook_report: - if os.path.exists(write_notebook_report): - print(f"Output notebook {write_notebook_report} already exists") - sys.exit(1) - if notebook is None and url is None: print("Either --notebook or --url should be specified") sys.exit(1) @@ -224,98 +213,6 @@ def _monitor_output(url, output, wait_after_execute, headless): print("Stopping monitoring output and moving on to next input cell") - if write_notebook_report: - _write_profiled_notebook_copy(output, write_notebook_report) - - -def _write_profiled_notebook_copy(output, event_log_path, copy_notebook): - log = np.recfromcsv(event_log_path, encoding='utf-8') - columns = open(event_log_path).read().splitlines()[0].split(',') - - # convert ISO times to elapsed times from first executed cell: - datetimes = [datetime.datetime.fromisoformat(dt) for dt in log['time']] - log['time'] = [(dt - datetimes[0]).total_seconds() for dt in datetimes] - - # cast ∆t's from strings to floats: - dtype = log.dtype.descr - dtype[0] = ('time', float) - log = log.astype(dtype) - - def row_to_dict(row): - return {k: v for k, v in zip(columns, row)} - - results = OrderedDict() - last_executed_cell = None - - # group timing results by execution cell - for i, row in enumerate(log): - isotime, event, index, screenshot_path = row - - if index not in results and event == 'execute-input': - results[index] = { - 'execute-input': None, - 'output-changed': [], - } - - results[index][event] = row_to_dict(row) - last_executed_cell = index - - elif event == 'output-changed': - row_dict = row_to_dict(row) - row_dict['output_from_cell'] = last_executed_cell - row_dict['dt'] = row_dict['time'] - results[last_executed_cell]['execute-input']['time'] - results[last_executed_cell][event].append(row_dict) - - # compute "final" timing results per execution cell - for idx, result in results.items(): - has_outputs = len(result['output-changed']) - result['total'] = result['output-changed'][-1]['dt'] if has_outputs else None - result['n_updates'] = len(result['output-changed']) if has_outputs else None - - # assemble annotations in markdown format for each executed code cell: - markdown_annotations = [] - for idx, result in results.items(): - if len(result['output-changed']): - screenshot_path = os.path.basename( - result['output-changed'][-1]['screenshot'] - ) - markdown_annotations.append( - f"![output screenshot]({screenshot_path})\n\n" + - f"#### Profiling result for cell {idx}: \n * {result['total']:.2f} seconds " + - f"elapsed\n * {result['n_updates']:d} output updates\n" - ) - else: - markdown_annotations.append( - f"#### Profiling result for cell {idx}: \nNo output.\n" - ) - - # read in the source notebook: - nb = nbformat.read(copy_notebook, nbformat.NO_CONVERT) - - # create new list of cells, weaving together the existing - # cells and the new markdown cells with profiling results - # and screenshots: - new_cells = [] - nonempty_code_cell_idx = -1 - for i, cell in enumerate(nb['cells']): - new_cells.append(cell) - if cell['cell_type'] == 'code' and len(cell['source']): - nonempty_code_cell_idx += 1 - new_cells.append( - nbformat.v4.new_markdown_cell( - markdown_annotations[nonempty_code_cell_idx] - ) - ) - - nb['cells'] = new_cells - - notebook_copy_path = os.path.join( - output, - os.path.basename(copy_notebook).replace('.ipynb', '-profiling.ipynb') - ) - print(f'Writing notebook with profiling results to: {notebook_copy_path}') - new_notebook = nbformat.from_dict(nb) - nbformat.write(new_notebook, notebook_copy_path) if __name__ == "__main__": monitor() diff --git a/jupyter_output_monitor/_report.py b/jupyter_output_monitor/_report.py new file mode 100644 index 0000000..5e80387 --- /dev/null +++ b/jupyter_output_monitor/_report.py @@ -0,0 +1,111 @@ +import csv +import datetime +import os + +import click +import nbformat + +__all__ = ["report"] + + +@click.command() +@click.option( + "--notebook", + default=None, + help="The notebook that was profiled.", +) +@click.option( + "--results-dir", + default=None, + help="Output results directory from the profiling", +) +@click.option( + "--output-report-name", + default="report.ipynb", + help="Write a copy of the notebook containing screenshots and profiling results to a notebook with the specified name, in the results directory", +) +def report(notebook, results_dir, output_report_name): + with open(os.path.join(results_dir, "event_log.csv")) as csvfile: + reader = csv.DictReader(csvfile) + log = list(reader) + + # convert ISO times to elapsed times from first executed cell: + start_time = datetime.datetime.fromisoformat(log[0]["time"]) + for row in log: + row["time"] = ( + datetime.datetime.fromisoformat(row["time"]) - start_time + ).total_seconds() + + results = {} + last_executed_cell = None + + # group timing results by execution cell + for row in log: + index = row["index"] + event = row["event"] + + if index not in results and event == "execute-input": + results[index] = { + "execute-input": None, + "output-changed": [], + } + + results[index][event] = row + last_executed_cell = index + + elif event == "output-changed": + row["output_from_cell"] = last_executed_cell + row["dt"] = ( + row["time"] - results[last_executed_cell]["execute-input"]["time"] + ) + results[last_executed_cell][event].append(row) + + # compute "final" timing results per execution cell + for result in results.values(): + has_outputs = len(result["output-changed"]) + result["total"] = result["output-changed"][-1]["dt"] if has_outputs else None + result["n_updates"] = len(result["output-changed"]) if has_outputs else None + + # assemble annotations in markdown format for each executed code cell: + markdown_annotations = [] + for idx, result in results.items(): + if len(result["output-changed"]): + screenshot_path = os.path.basename( + result["output-changed"][-1]["screenshot"], + ) + markdown_annotations.append( + f"![output screenshot]({screenshot_path})\n\n" + f"#### Profiling result for cell {idx}: \n * {result['total']:.2f} seconds " + f"elapsed\n * {result['n_updates']:d} output updates\n", + ) + else: + markdown_annotations.append( + f"#### Profiling result for cell {idx}: \nNo output.\n", + ) + + # read in the source notebook: + nb = nbformat.read(notebook, nbformat.NO_CONVERT) + + # create new list of cells, weaving together the existing + # cells and the new markdown cells with profiling results + # and screenshots: + new_cells = [] + nonempty_code_cell_idx = -1 + for cell in nb["cells"]: + new_cells.append(cell) + if cell["cell_type"] == "code" and len(cell["source"]): + nonempty_code_cell_idx += 1 + new_cells.append( + nbformat.v4.new_markdown_cell( + markdown_annotations[nonempty_code_cell_idx], + ), + ) + + nb["cells"] = new_cells + + output_notebook = os.path.join(results_dir, output_report_name) + + print(f"Writing notebook with profiling results to: {output_notebook}") + + new_notebook = nbformat.from_dict(nb) + nbformat.write(new_notebook, output_notebook) diff --git a/pyproject.toml b/pyproject.toml index 20b1a24..c8fd0bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ dependencies = [ "click", "pillow", "playwright", - "solara[pytest]" + "solara[pytest]", "nbformat", ] dynamic = ["version"] @@ -28,6 +28,7 @@ build-backend = 'setuptools.build_meta' [project.scripts] jupyter-output-monitor = "jupyter_output_monitor:monitor" +jupyter-output-monitor-report = "jupyter_output_monitor:report" [tool.setuptools] zip-safe = false From a40f822534405623e6f9e5385d55da9b5e0b2434 Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Wed, 13 Nov 2024 15:00:08 +0000 Subject: [PATCH 4/4] Make monitor and report sub-commands --- README.md | 17 +++++++++++------ jupyter_output_monitor/__init__.py | 3 ++- jupyter_output_monitor/__main__.py | 9 +++++++-- jupyter_output_monitor/_monitor.py | 9 ++++++++- jupyter_output_monitor/_report.py | 9 +++++++-- jupyter_output_monitor/tests/test_monitor.py | 17 +++++++++++++++++ pyproject.toml | 3 +-- 7 files changed, 53 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 9df5d06..1df8de5 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ The R and G values should be kept as (143, 56), and the B color should be unique Then, to run the notebook and monitor the changes in widget output, run: - jupyter-output-monitor --notebook mynotebook.ipynb + jupyter-output-monitor monitor --notebook mynotebook.ipynb Where ``mynotebook.ipynb`` is the name of your notebook. By default, this will open a window showing you what is happening, but you can also pass ``--headless`` @@ -36,7 +36,7 @@ to run in headless mode. If you want to test this on an existing Jupyter Lab instance, including remote ones, you can use ``--url`` instead of ``--notebook``: - jupyter-output-monitor http://localhost:8987/lab/tree/notebook.ipynb?token=7bb9a... + jupyter-output-monitor monitor --url http://localhost:8987/lab/tree/notebook.ipynb?token=7bb9a... Note that the URL should include the path to the notebook, and will likely require the token too. @@ -124,8 +124,13 @@ after the previous one. This is 10s by default but can be customized with cell that takes the longest to fully execute will be expected to take less than this time. -### Notebook copy +## Generating a report -To save a copy of the notebook with the profiling results and -screenshots inserted after the executed code cells, -include ``--notebook-copy /path/to/notebook.ipynb``. +You can generate a copy of the input notebook with output screenshots and profiling +results inserted by using e.g.: + + jupyter-output-monitor report --notebook mynotebook.ipynb --results-dir=output + +Where ``--results-dir`` is the output directory generated with the ``monitor`` +command. BY default, this will write a ``report.ipynb`` notebook, but you can +overwrite the filename with ``--output-report-name``. diff --git a/jupyter_output_monitor/__init__.py b/jupyter_output_monitor/__init__.py index 70698bc..d0fcea4 100644 --- a/jupyter_output_monitor/__init__.py +++ b/jupyter_output_monitor/__init__.py @@ -1,5 +1,6 @@ +from .__main__ import main from ._monitor import monitor from ._report import report from ._version import __version__ -__all__ = ["monitor", "report", "__version__"] +__all__ = ["monitor", "report", "__version__", "main"] diff --git a/jupyter_output_monitor/__main__.py b/jupyter_output_monitor/__main__.py index 12c22cd..5137fcf 100644 --- a/jupyter_output_monitor/__main__.py +++ b/jupyter_output_monitor/__main__.py @@ -1,4 +1,9 @@ -from ._monitor import monitor +import click + +from ._monitor import monitor_group +from ._report import report_group + +main = click.CommandCollection(sources=[monitor_group, report_group]) if __name__ == "__main__": - monitor() + main() diff --git a/jupyter_output_monitor/_monitor.py b/jupyter_output_monitor/_monitor.py index 70fc048..c9a5aa4 100644 --- a/jupyter_output_monitor/_monitor.py +++ b/jupyter_output_monitor/_monitor.py @@ -15,6 +15,8 @@ from ._server import jupyter_server from ._utils import clear_notebook, isotime +__all__ = ["monitor", "monitor_group"] + RG_SPECIAL = (143, 56) @@ -22,7 +24,12 @@ def iso_to_path(time): return time.replace(":", "-") -@click.command() +@click.group() +def monitor_group(): + pass + + +@monitor_group.command() @click.option( "--notebook", default=None, diff --git a/jupyter_output_monitor/_report.py b/jupyter_output_monitor/_report.py index 5e80387..67e5fbe 100644 --- a/jupyter_output_monitor/_report.py +++ b/jupyter_output_monitor/_report.py @@ -5,10 +5,15 @@ import click import nbformat -__all__ = ["report"] +__all__ = ["report", "report_group"] -@click.command() +@click.group() +def report_group(): + pass + + +@report_group.command() @click.option( "--notebook", default=None, diff --git a/jupyter_output_monitor/tests/test_monitor.py b/jupyter_output_monitor/tests/test_monitor.py index 51fb922..70f25b0 100644 --- a/jupyter_output_monitor/tests/test_monitor.py +++ b/jupyter_output_monitor/tests/test_monitor.py @@ -13,6 +13,7 @@ def test_simple(tmp_path): sys.executable, "-m", "jupyter_output_monitor", + "monitor", "--notebook", str(DATA / "simple.ipynb"), "--output", @@ -40,3 +41,19 @@ def test_simple(tmp_path): with open(output_path / "event_log.csv") as f: reader = csv.reader(f, delimiter=",") assert len(list(reader)) == 10 + + subprocess.run( + [ + sys.executable, + "-m", + "jupyter_output_monitor", + "report", + "--notebook", + str(DATA / "simple.ipynb"), + "--results-dir", + str(output_path), + ], + check=True, + ) + + assert (output_path / "report.ipynb").exists() diff --git a/pyproject.toml b/pyproject.toml index c8fd0bf..5d91608 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,8 +27,7 @@ requires = ["setuptools", build-backend = 'setuptools.build_meta' [project.scripts] -jupyter-output-monitor = "jupyter_output_monitor:monitor" -jupyter-output-monitor-report = "jupyter_output_monitor:report" +jupyter-output-monitor = "jupyter_output_monitor:main" [tool.setuptools] zip-safe = false