From 94f24b8bddf07927eb03f84ba9ef50f966a26633 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Thu, 29 Aug 2024 10:32:55 -0400 Subject: [PATCH 1/2] unify reduce/reducedim empty case behaviors Previously, Julia tried to guess at initial values for empty dimensional reductions in a slightly different way than whole-array reductions. This change unifies those behaviors, such that `mapreduce_empty` is called for empty dimensional reductions, just like it is called for empty whole-array reductions. Beyond the unification (which is valuable in its own right), this change enables some downstream simplifications to dimensional reductions and is likely to be the "most breaking" public behavior in a refactoring targeting more correctness improvements. --- NEWS.md | 4 +++ base/reducedim.jl | 14 ++++----- test/reducedim.jl | 74 ++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 69 insertions(+), 23 deletions(-) diff --git a/NEWS.md b/NEWS.md index 211c561167122..8faba05eb3702 100644 --- a/NEWS.md +++ b/NEWS.md @@ -41,6 +41,10 @@ New library features Standard library changes ------------------------ +* Empty dimensional reductions (e.g., `reduce` and `mapreduce` with the `dims` keyword + selecting one or more dimensions) now behave like their whole-array (`dims=:`) counterparts, + only returning values in unambiguous cases and erroring otherwise. + #### JuliaSyntaxHighlighting #### LinearAlgebra diff --git a/base/reducedim.jl b/base/reducedim.jl index ba3434494bc0b..5717e99d193de 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -341,8 +341,10 @@ _mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) = _mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, dims) = mapreducedim!(f, op, reducedim_initarray(A, dims, nt), A) -_mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, dims) = +function _mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, dims) + isempty(A) && return fill(mapreduce_empty(f, op, eltype(A)), reduced_indices(A, dims)) mapreducedim!(f, op, reducedim_init(f, op, A, dims), A) +end """ reduce(f, A::AbstractArray; dims=:, [init]) @@ -1128,10 +1130,7 @@ findmin(f, A::AbstractArray; dims=:) = _findmin(f, A, dims) function _findmin(f, A, region) ri = reduced_indices0(A, region) if isempty(A) - if prod(map(length, reduced_indices(A, region))) != 0 - throw(ArgumentError("collection slices must be non-empty")) - end - similar(A, promote_op(f, eltype(A)), ri), zeros(eltype(keys(A)), ri) + _empty_reduce_error() else fA = f(first(A)) findminmax!(f, isgreater, fill!(similar(A, _findminmax_inittype(f, A), ri), fA), @@ -1201,10 +1200,7 @@ findmax(f, A::AbstractArray; dims=:) = _findmax(f, A, dims) function _findmax(f, A, region) ri = reduced_indices0(A, region) if isempty(A) - if prod(map(length, reduced_indices(A, region))) != 0 - throw(ArgumentError("collection slices must be non-empty")) - end - similar(A, promote_op(f, eltype(A)), ri), zeros(eltype(keys(A)), ri) + _empty_reduce_error() else fA = f(first(A)) findminmax!(f, isless, fill!(similar(A, _findminmax_inittype(f, A), ri), fA), diff --git a/test/reducedim.jl b/test/reducedim.jl index 6a6f20214058c..d68f29e8237f2 100644 --- a/test/reducedim.jl +++ b/test/reducedim.jl @@ -207,23 +207,34 @@ end @test isequal(prod(A, dims=(1, 2)), fill(1, 1, 1)) @test isequal(prod(A, dims=3), fill(1, 0, 1)) - for f in (minimum, maximum) + for f in (minimum, maximum, findmin, findmax) @test_throws "reducing over an empty collection is not allowed" f(A, dims=1) - @test isequal(f(A, dims=2), zeros(Int, 0, 1)) + @test_throws "reducing over an empty collection is not allowed" f(A, dims=2) @test_throws "reducing over an empty collection is not allowed" f(A, dims=(1, 2)) - @test isequal(f(A, dims=3), zeros(Int, 0, 1)) - end - for f in (findmin, findmax) - @test_throws ArgumentError f(A, dims=1) - @test isequal(f(A, dims=2), (zeros(Int, 0, 1), zeros(Int, 0, 1))) - @test_throws ArgumentError f(A, dims=(1, 2)) - @test isequal(f(A, dims=3), (zeros(Int, 0, 1), zeros(Int, 0, 1))) - @test_throws ArgumentError f(abs2, A, dims=1) - @test isequal(f(abs2, A, dims=2), (zeros(Int, 0, 1), zeros(Int, 0, 1))) - @test_throws ArgumentError f(abs2, A, dims=(1, 2)) - @test isequal(f(abs2, A, dims=3), (zeros(Int, 0, 1), zeros(Int, 0, 1))) + @test_throws "reducing over an empty collection is not allowed" f(A, dims=3) + if f === maximum + # maximum allows some empty reductions with abs/abs2 + z = f(abs, A) + @test isequal(f(abs, A, dims=1), fill(z, (1,1))) + @test isequal(f(abs, A, dims=2), fill(z, (0,1))) + @test isequal(f(abs, A, dims=(1,2)), fill(z, (1,1))) + @test isequal(f(abs, A, dims=3), fill(z, (0,1))) + z = f(abs2, A) + @test isequal(f(abs2, A, dims=1), fill(z, (1,1))) + @test isequal(f(abs2, A, dims=2), fill(z, (0,1))) + @test isequal(f(abs2, A, dims=(1,2)), fill(z, (1,1))) + @test isequal(f(abs2, A, dims=3), fill(z, (0,1))) + else + @test_throws "reducing over an empty collection is not allowed" f(abs, A, dims=1) + @test_throws "reducing over an empty collection is not allowed" f(abs, A, dims=2) + @test_throws "reducing over an empty collection is not allowed" f(abs, A, dims=(1, 2)) + @test_throws "reducing over an empty collection is not allowed" f(abs, A, dims=3) + @test_throws "reducing over an empty collection is not allowed" f(abs2, A, dims=1) + @test_throws "reducing over an empty collection is not allowed" f(abs2, A, dims=2) + @test_throws "reducing over an empty collection is not allowed" f(abs2, A, dims=(1, 2)) + @test_throws "reducing over an empty collection is not allowed" f(abs2, A, dims=3) + end end - end ## findmin/findmax/minimum/maximum @@ -720,3 +731,38 @@ end @test_broken @inferred(maximum(exp, A; dims = 1))[1] === missing @test_broken @inferred(extrema(exp, A; dims = 1))[1] === (missing, missing) end + +some_exception(op) = try return (Some(op()), nothing); catch ex; return (nothing, ex); end +reduced_shape(sz, dims) = ntuple(d -> d in dims ? 1 : sz[d], length(sz)) + +@testset "Ensure that calling, e.g., sum(empty; dims) has the same behavior as sum(empty)" begin + @testset "$r(Array{$T}(undef, $sz); dims=$dims)" for + r in (minimum, maximum, findmin, findmax, extrema, sum, prod, all, any, count), + T in (Int, Union{Missing, Int}, Number, Union{Missing, Number}, Bool, Union{Missing, Bool}, Any), + sz in ((0,), (0,1), (1,0), (0,0), (0,0,1), (1,0,1)), + dims in (1, 2, 3, 4, (1,2), (1,3), (2,3,4), (1,2,3)) + + A = Array{T}(undef, sz) + rsz = reduced_shape(sz, dims) + + v, ex = some_exception() do; r(A); end + if isnothing(v) + @test_throws typeof(ex) r(A; dims) + else + actual = fill(something(v), rsz) + @test isequal(r(A; dims), actual) + @test eltype(r(A; dims)) === eltype(actual) + end + + for f in (identity, abs, abs2) + v, ex = some_exception() do; r(f, A); end + if isnothing(v) + @test_throws typeof(ex) r(f, A; dims) + else + actual = fill(something(v), rsz) + @test isequal(r(f, A; dims), actual) + @test eltype(r(f, A; dims)) === eltype(actual) + end + end + end +end From 0e924cf003de1673af4ca775504049636ed7ffd1 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Fri, 18 Apr 2025 22:43:12 -0400 Subject: [PATCH 2/2] TEMPORARILY USE SPARSEARRAYS PR BRANCH --- .../md5 | 1 + .../sha512 | 1 + stdlib/SparseArrays.version | 8 ++++---- 3 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 deps/checksums/SparseArrays-ac0cc6e8278bb614dc2a16e9353fa092dae6606a.tar.gz/md5 create mode 100644 deps/checksums/SparseArrays-ac0cc6e8278bb614dc2a16e9353fa092dae6606a.tar.gz/sha512 diff --git a/deps/checksums/SparseArrays-ac0cc6e8278bb614dc2a16e9353fa092dae6606a.tar.gz/md5 b/deps/checksums/SparseArrays-ac0cc6e8278bb614dc2a16e9353fa092dae6606a.tar.gz/md5 new file mode 100644 index 0000000000000..edb55470f2e63 --- /dev/null +++ b/deps/checksums/SparseArrays-ac0cc6e8278bb614dc2a16e9353fa092dae6606a.tar.gz/md5 @@ -0,0 +1 @@ +edafbcbfc29cb7a86aef4a92bf7e7267 diff --git a/deps/checksums/SparseArrays-ac0cc6e8278bb614dc2a16e9353fa092dae6606a.tar.gz/sha512 b/deps/checksums/SparseArrays-ac0cc6e8278bb614dc2a16e9353fa092dae6606a.tar.gz/sha512 new file mode 100644 index 0000000000000..f57ee756ca6fc --- /dev/null +++ b/deps/checksums/SparseArrays-ac0cc6e8278bb614dc2a16e9353fa092dae6606a.tar.gz/sha512 @@ -0,0 +1 @@ +230151395d43b005e1285f5938a5800650314dc562e4cc6363775b54b45b33076a551276e45a977a0ea3b07ca0088a2ea27def63196f435ff1d6153b93999485 diff --git a/stdlib/SparseArrays.version b/stdlib/SparseArrays.version index 9498fbe4943f9..7a1d9697c9769 100644 --- a/stdlib/SparseArrays.version +++ b/stdlib/SparseArrays.version @@ -1,4 +1,4 @@ -SPARSEARRAYS_BRANCH = main -SPARSEARRAYS_SHA1 = f3610c07fe0403792743d9c9802a25642a5f2d18 -SPARSEARRAYS_GIT_URL := https://github.com/JuliaSparse/SparseArrays.jl.git -SPARSEARRAYS_TAR_URL = https://api.github.com/repos/JuliaSparse/SparseArrays.jl/tarball/$1 +SPARSEARRAYS_BRANCH = mb/mapreducedim_empty +SPARSEARRAYS_SHA1 = ac0cc6e8278bb614dc2a16e9353fa092dae6606a +SPARSEARRAYS_GIT_URL := https://github.com/mbauman/SparseArrays.jl.git +SPARSEARRAYS_TAR_URL = https://api.github.com/repos/mbauman/SparseArrays.jl/tarball/$1