(fix): bring to gpu as needed

ilan-gold · ilan-gold · commit a12700b3417c · 2025-03-21T15:01:23.000+01:00
diff --git a/src/anndata/_core/sparse_dataset.py b/src/anndata/_core/sparse_dataset.py
@@ -19,7 +19,7 @@
 from itertools import accumulate, chain, pairwise
 from math import floor
 from pathlib import Path
-from typing import TYPE_CHECKING, NamedTuple
+from typing import TYPE_CHECKING, Generic, NamedTuple, TypeVar
 
 import h5py
 import numpy as np
@@ -33,6 +33,7 @@
 from ..compat import (
     CSArray,
     CSMatrix,
+    CupyArray,
     CupyCSCMatrix,
     CupyCSMatrix,
     CupyCSRMatrix,
@@ -53,11 +54,13 @@
 
 SCIPY_1_15 = Version(scipy.__version__) >= Version("1.15rc0")
 
+DenseType = TypeVar("DenseType", np.ndarray, CupyArray)
 
-class CompressedVectors(NamedTuple):
-    data: np.ndarray
-    indices: np.ndarray
-    indptr: np.ndarray
+
+class CompressedVectors(NamedTuple, Generic[DenseType]):
+    data: DenseType
+    indices: DenseType
+    indptr: DenseType
 
 
 def slice_len(s: slice, l: int) -> int:
@@ -153,7 +156,11 @@ def _get_contiguous_compressed_slice(self, s: slice) -> CompressedVectors:
         new_data: np.ndarray = self.data[start:stop]
         new_indices: np.ndarray = self.indices[start:stop]
 
-        return CompressedVectors(new_data, new_indices, new_indptr)
+        return CompressedVectors(
+            new_data,
+            new_indices,
+            new_indptr if isinstance(new_indptr, np.ndarray) else CupyArray(new_indptr),
+        )
 
     def get_compressed_vectors(self, row_idxs: Iterable[int]) -> CompressedVectors:
         indptr_slices = [slice(*(self.indptr[i : i + 2])) for i in row_idxs]
@@ -172,7 +179,11 @@ def get_compressed_vectors(self, row_idxs: Iterable[int]) -> CompressedVectors:
         indptr = np.array(
             list(accumulate(chain((0,), (s.stop - s.start for s in indptr_slices))))
         )
-        return CompressedVectors(data, indices, indptr)
+        return CompressedVectors(
+            data,
+            indices,
+            indptr if isinstance(indptr, np.ndarray) else CupyArray(indptr),
+        )
 
     def get_compressed_vectors_for_slices(
         self, slices: Iterable[slice]
@@ -194,19 +205,33 @@ def get_compressed_vectors_for_slices(
         offsets = accumulate(chain([indptr_limits[0].start], gaps))
         start_indptr = indptr_indices[0] - next(offsets)
         if len(slices) < 2:  # there is only one slice so no need to concatenate
-            return CompressedVectors(data, indices, start_indptr)
+            return CompressedVectors(
+                data,
+                indices,
+                start_indptr
+                if isinstance(start_indptr, np.ndarray)
+                else CupyArray(start_indptr),
+            )
         end_indptr = np.concatenate(
             [s[1:] - o for s, o in zip(indptr_indices[1:], offsets)]
         )
         indptr = np.concatenate([start_indptr, end_indptr])
-        return CompressedVectors(data, indices, indptr)
+        return CompressedVectors(
+            data,
+            indices,
+            indptr if isinstance(indptr, np.ndarray) else CupyArray(indptr),
+        )
 
     def get_compressed_vector(self, idx: int) -> CompressedVectors:
         s = slice(*(self.indptr[idx : idx + 2]))
         data: np.ndarray = self.data[s]
         indices: np.ndarray = self.indices[s]
-        indptr: np.ndarray = [0, len(data)]
-        return CompressedVectors(data, indices, indptr)
+        indptr: np.ndarray = np.array([0, len(data)])
+        return CompressedVectors(
+            data,
+            indices,
+            indptr if isinstance(indptr, np.ndarray) else CupyArray(indptr),
+        )
 
     def __getitem__(self, key):
         if isinstance(key, tuple):
@@ -546,6 +571,8 @@ def _indptr(self) -> np.ndarray:
         It should therefore fit into memory, so we cache it for faster access.
         """
         arr = self.group["indptr"][...]
+        if isinstance(arr, CupyArray):
+            arr = arr.get()
         return arr
 
     @cached_property