From 6cbbd8e1dfdef8ed70b676a400441f9cf34fc10a Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Mon, 7 Apr 2025 19:20:54 -0400 Subject: [PATCH 1/2] gguf-py : support lazy tensor splitting Splitting usually involves returning tuples of tensors, which need to be handled properly to avoid early eager evaluation. --- gguf-py/gguf/lazy.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/gguf-py/gguf/lazy.py b/gguf-py/gguf/lazy.py index 8d4fece2dca86..8ebbe92b54ed2 100644 --- a/gguf-py/gguf/lazy.py +++ b/gguf-py/gguf/lazy.py @@ -139,6 +139,15 @@ def wrapped_fn(*args, **kwargs): if isinstance(res, cls._tensor_type): return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn) + elif isinstance(res, tuple) and all(isinstance(t, cls._tensor_type) for t in res): + # share the evaluation between lazy tuple elements + shared_args: list = [args, None] + def eager_tuple_element(a: list[Any], i: int = 0, /, **kw) -> LazyBase: + assert len(a) == 2 + if a[1] is None: + a[1] = fn(*a[0], **kw) + return a[1][i] + return tuple(cls(meta=cls.eager_to_meta(res[i]), args=(shared_args, i), kwargs=kwargs, func=eager_tuple_element) for i in range(len(res))) else: del res # not needed # non-tensor return likely relies on the contents of the args From da140da72a0d8fc49372fae4fc80eed41a26b732 Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Mon, 7 Apr 2025 19:38:35 -0400 Subject: [PATCH 2/2] gguf-py : fix flake8 lint --- gguf-py/gguf/lazy.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gguf-py/gguf/lazy.py b/gguf-py/gguf/lazy.py index 8ebbe92b54ed2..f9bcadae0224b 100644 --- a/gguf-py/gguf/lazy.py +++ b/gguf-py/gguf/lazy.py @@ -142,6 +142,7 @@ def wrapped_fn(*args, **kwargs): elif isinstance(res, tuple) and all(isinstance(t, cls._tensor_type) for t in res): # share the evaluation between lazy tuple elements shared_args: list = [args, None] + def eager_tuple_element(a: list[Any], i: int = 0, /, **kw) -> LazyBase: assert len(a) == 2 if a[1] is None: