Skip to content

Commit 424cfb9

Browse files
author
Xuye (Chris) Qin
authored
Fix mt.{cumsum, cumprod} when the first chunk is empty (#3134)
1 parent 221e4b3 commit 424cfb9

File tree

5 files changed

+29
-3
lines changed

5 files changed

+29
-3
lines changed

mars/tensor/arithmetic/add.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from functools import reduce
1919

2020
from ... import opcodes as OperandDef
21+
from ...serialization.serializables import BoolField
2122
from ..array_utils import device, as_same_device
2223
from ..datasource import scalar
2324
from ..utils import infer_dtype
@@ -89,17 +90,21 @@ class TensorTreeAdd(TensorMultiOp):
8990
_op_type_ = OperandDef.TREE_ADD
9091
_func_name = "add"
9192

93+
ignore_empty_input = BoolField("ignore_empty_input", default=False)
94+
9295
@classmethod
9396
def _is_sparse(cls, *args):
9497
if args and all(hasattr(x, "issparse") and x.issparse() for x in args):
9598
return True
9699
return False
97100

98101
@classmethod
99-
def execute(cls, ctx, op):
102+
def execute(cls, ctx, op: "TensorTreeAdd"):
100103
inputs, device_id, xp = as_same_device(
101104
[ctx[c.key] for c in op.inputs], device=op.device, ret_extra=True
102105
)
106+
if op.ignore_empty_input:
107+
inputs = [inp for inp in inputs if not hasattr(inp, "size") or inp.size > 0]
103108

104109
with device(device_id):
105110
ctx[op.outputs[0].key] = reduce(xp.add, inputs)

mars/tensor/arithmetic/multiply.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from functools import reduce
1919

2020
from ... import opcodes as OperandDef
21+
from ...serialization.serializables import BoolField
2122
from ..array_utils import device, as_same_device
2223
from ..datasource import scalar
2324
from ..utils import infer_dtype
@@ -88,6 +89,8 @@ class TensorTreeMultiply(TensorMultiOp):
8889
_op_type_ = OperandDef.TREE_MULTIPLY
8990
_func_name = "multiply"
9091

92+
ignore_empty_input = BoolField("ignore_empty_input", default=False)
93+
9194
def __init__(self, sparse=False, **kw):
9295
super().__init__(sparse=sparse, **kw)
9396

@@ -106,6 +109,8 @@ def execute(cls, ctx, op):
106109
inputs, device_id, xp = as_same_device(
107110
[ctx[c.key] for c in op.inputs], device=op.device, ret_extra=True
108111
)
112+
if op.ignore_empty_input:
113+
inputs = [inp for inp in inputs if not hasattr(inp, "size") or inp.size > 0]
109114

110115
with device(device_id):
111116
ctx[op.outputs[0].key] = reduce(xp.multiply, inputs)

mars/tensor/reduction/core.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -586,7 +586,11 @@ def tile(cls, op):
586586
to_cum_chunks.append(sliced_chunk)
587587
to_cum_chunks.append(chunk)
588588

589-
bin_op = bin_op_type(args=to_cum_chunks, dtype=chunk.dtype)
589+
# GH#3132: some chunks of to_cum_chunks may be empty,
590+
# so we tell tree_add&tree_multiply to ignore them
591+
bin_op = bin_op_type(
592+
args=to_cum_chunks, dtype=chunk.dtype, ignore_empty_input=True
593+
)
590594
output_chunk = bin_op.new_chunk(
591595
to_cum_chunks,
592596
shape=chunk.shape,

mars/tensor/reduction/tests/test_reduction_execution.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,16 @@ def test_cum_reduction(setup):
497497
np.cumsum(np.array(list("abcdefghi"), dtype=object)),
498498
)
499499

500+
# test empty chunks
501+
raw = np.random.rand(100)
502+
arr = tensor(raw, chunk_size=((0, 100),))
503+
res = arr.cumsum().execute().fetch()
504+
expected = raw.cumsum()
505+
np.testing.assert_allclose(res, expected)
506+
res = arr.cumprod().execute().fetch()
507+
expected = raw.cumprod()
508+
np.testing.assert_allclose(res, expected)
509+
500510

501511
def test_nan_cum_reduction(setup):
502512
raw = np.random.randint(5, size=(8, 8, 8)).astype(float)

mars/tensor/reshape/reshape.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -603,7 +603,9 @@ def reshape(a, newshape, order="C"):
603603

604604
tensor_order = get_order(order, a.order, available_options="CFA")
605605

606-
if a.shape == newshape and tensor_order == a.order:
606+
if a.shape == newshape and (
607+
a.ndim <= 1 or (a.ndim > 1 and tensor_order == a.order)
608+
):
607609
# does not need to reshape
608610
return a
609611
return _reshape(

0 commit comments

Comments
 (0)