Skip to content

Commit 544a060

Browse files
tomwhiteTomNicholas
authored andcommitted
Memray integration (#558)
* Use Memray to examine tasks running on `lithops` or `processes` executors * Add Memray documentation * Fix mypy * Update docs/user-guide/diagnostics.md Co-authored-by: Tom Nicholas <tom@cworthy.org> * Make memray check more defensive --------- Co-authored-by: Tom Nicholas <tom@cworthy.org>
1 parent 8ca25f1 commit 544a060

File tree

6 files changed

+85
-2
lines changed

6 files changed

+85
-2
lines changed

cubed/runtime/executors/lithops.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,17 @@
2727
)
2828
from cubed.runtime.pipeline import visit_node_generations, visit_nodes
2929
from cubed.runtime.types import Callback, DagExecutor
30-
from cubed.runtime.utils import handle_callbacks, handle_operation_start_callbacks
30+
from cubed.runtime.utils import (
31+
handle_callbacks,
32+
handle_operation_start_callbacks,
33+
profile_memray,
34+
)
3135
from cubed.spec import Spec
3236

3337
logger = logging.getLogger(__name__)
3438

3539

40+
@profile_memray
3641
def run_func(input, func=None, config=None, name=None, compute_id=None):
3742
result = func(input, config=config)
3843
return result
@@ -171,6 +176,7 @@ def execute_dag(
171176
) -> None:
172177
use_backups = kwargs.pop("use_backups", True)
173178
wait_dur_sec = kwargs.pop("wait_dur_sec", None)
179+
compute_id = kwargs.pop("compute_id")
174180
allowed_mem = spec.allowed_mem if spec is not None else None
175181
function_executor = FunctionExecutor(**kwargs)
176182
runtime_memory_mb = function_executor.config[function_executor.backend].get(
@@ -199,6 +205,7 @@ def execute_dag(
199205
func=pipeline.function,
200206
config=pipeline.config,
201207
name=name,
208+
compute_id=compute_id,
202209
):
203210
handle_callbacks(callbacks, stats)
204211
else:
@@ -224,7 +231,8 @@ def execute_dag(
224231
use_backups=use_backups,
225232
return_stats=True,
226233
wait_dur_sec=wait_dur_sec,
227-
# TODO: kwargs
234+
# TODO: other kwargs (func, config, name)
235+
compute_id=compute_id,
228236
):
229237
handle_callbacks(callbacks, stats)
230238

cubed/runtime/executors/local.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
execution_stats,
2020
handle_callbacks,
2121
handle_operation_start_callbacks,
22+
profile_memray,
2223
)
2324
from cubed.spec import Spec
2425

@@ -59,6 +60,7 @@ def execute_dag(
5960
[callback.on_task_end(event) for callback in callbacks]
6061

6162

63+
@profile_memray
6264
@execution_stats
6365
def run_func(input, func=None, config=None, name=None, compute_id=None):
6466
return func(input, config=config)

cubed/runtime/utils.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
11
import time
2+
from contextlib import nullcontext
23
from functools import partial
34
from itertools import islice
5+
from pathlib import Path
46

57
from cubed.runtime.types import OperationStartEvent, TaskEndEvent
68
from cubed.utils import peak_measured_mem
79

10+
try:
11+
import memray
12+
except ImportError:
13+
memray = None
14+
815
sym_counter = 0
916

1017

@@ -39,6 +46,32 @@ def execution_stats(func):
3946
return partial(execute_with_stats, func)
4047

4148

49+
def execute_with_memray(function, input, **kwargs):
50+
# only run memray if installed, and only for first input (for operations that run on block locations)
51+
if (
52+
memray is not None
53+
and "compute_id" in kwargs
54+
and isinstance(input, list)
55+
and all(isinstance(i, int) for i in input)
56+
and sum(input) == 0
57+
):
58+
compute_id = kwargs["compute_id"]
59+
name = kwargs["name"]
60+
memray_dir = Path(f"history/{compute_id}/memray")
61+
memray_dir.mkdir(parents=True, exist_ok=True)
62+
cm = memray.Tracker(memray_dir / f"{name}.bin")
63+
else:
64+
cm = nullcontext()
65+
with cm:
66+
result = result = function(input, **kwargs)
67+
return result
68+
69+
70+
def profile_memray(func):
71+
"""Decorator to profile a function call with memray."""
72+
return partial(execute_with_memray, func)
73+
74+
4275
def handle_operation_start_callbacks(callbacks, name):
4376
if callbacks is not None:
4477
event = OperationStartEvent(name)

docs/images/memray-add.png

75.8 KB
Loading

docs/user-guide/diagnostics.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,41 @@ The timeline callback will write a graphic `timeline.svg` to a directory with th
9292

9393
### Examples in use
9494
See the [examples](https://github.com/cubed-dev/cubed/blob/main/examples/README.md) for more information about how to use them.
95+
96+
## Memray
97+
98+
[Memray](https://github.com/bloomberg/memray), a memory profiler for Python, can be used to track and view memory allocations when running a single task in a Cubed computation.
99+
100+
This is not usually needed when using Cubed, but for developers writing new operations, improving projected memory sizes, or for debugging a memory issue, it can be very useful to understand how memory is actually allocated in Cubed.
101+
102+
To enable Memray memory profiling in Cubed, simply install memray (`pip install memray`). Then use a local executor that runs tasks in separate processes, such as `processes` (Python 3.11 or later) or `lithops`. When you run a computation, Cubed will enable Memray for the first task in each operation (so if an array has 100 chunks it will only produce one Memray trace).
103+
104+
Here is an example of a simple addition operation, with 200MB chunks. (It is adapted from [test_mem_utilization.py](https://github.com/cubed-dev/cubed/blob/main/cubed/tests/test_mem_utilization.py) in Cubed's test suite.)
105+
106+
```python
107+
import cubed.array_api as xp
108+
import cubed.random
109+
110+
a = cubed.random.random(
111+
(10000, 10000), chunks=(5000, 5000), spec=spec
112+
) # 200MB chunks
113+
b = cubed.random.random(
114+
(10000, 10000), chunks=(5000, 5000), spec=spec
115+
) # 200MB chunks
116+
c = xp.add(a, b)
117+
c.compute(optimize_graph=False)
118+
```
119+
120+
The optimizer is turned off so that generation of the random arrays is not fused with the add operation. This way we can see the memory allocations for that operation alone.
121+
122+
After the computation is complete there will be a collection of `.bin` files in the `history/compute-{id}/memray` directory - with one for each operation. To view them we convert them to HTML flame graphs as follows:
123+
124+
```shell
125+
(cd $(ls -d history/compute-* | tail -1)/memray; for f in $(ls *.bin); do echo $f; python -m memray flamegraph --temporal -f -o $f.html $f; done)
126+
```
127+
128+
Here is the flame graph for the add operation:
129+
130+
![Memray temporal view of an 'add' operation](../images/memray-add.png)
131+
132+
Annotations have been added to explain what is going on in this example. Note that reading a chunk from Zarr requires twice the chunk memory (400MB) since there is a buffer for the compressed Zarr block (200MB), as well as the resulting array (200MB). After the first chunk has been loaded the memory dips back to 200MB since the compressed buffer is no longer retained.

setup.cfg

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ ignore_missing_imports = True
4040
ignore_missing_imports = True
4141
[mypy-IPython.*]
4242
ignore_missing_imports = True
43+
[mypy-memray.*]
44+
ignore_missing_imports = True
4345
[mypy-modal.*]
4446
ignore_missing_imports = True
4547
[mypy-matplotlib.*]

0 commit comments

Comments
 (0)