Skip to content

WIP: a more principled take on dimensional reduction inits #55318

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
b9bdd63
a more principled take on dimensional reduction inits
mbauman Jul 31, 2024
2b000b1
add support for broadcasteds
mbauman Jul 31, 2024
07c1da1
add tests for complex reductions
mbauman Jul 31, 2024
2179df3
add tests for bitwise operators on integers
mbauman Jul 31, 2024
344ed43
fixup! add tests for bitwise operators on integers
mbauman Jul 31, 2024
8eb7d91
restore performance for linear fast dim=1+
mbauman Jul 31, 2024
11e98ed
fixup! add support for broadcasteds
mbauman Jul 31, 2024
c32c6ed
add more tests
mcabbott Jul 31, 2024
58d2b27
perf: add loop switching
mbauman Jul 31, 2024
2d595bb
perf: slice up the slow part
mbauman Jul 31, 2024
b7adc89
use builtin incremental widening slowly when needed
mbauman Aug 1, 2024
e59a4d8
fixup! use builtin incremental widening slowly when needed
mbauman Aug 2, 2024
6af4850
add tests for 44906
mbauman Aug 2, 2024
d31cba6
also revamp findmin/findmax
mbauman Aug 4, 2024
6025380
minor cleanups, slightly better naming and hoist some merges
mbauman Aug 6, 2024
3e9e9d0
fixup findmin/max/etc and add support for custom allocators
mbauman Aug 27, 2024
6bfd6c1
more tests
mbauman Aug 29, 2024
1635026
better comment and slight cleanups
mbauman Aug 29, 2024
64a27ea
Merge branch 'master' into mapreducedim-init-adienes-testing
adienes Dec 16, 2024
ce4c0cd
Merge remote-tracking branch 'origin' into mapreducedim-init-adienes-…
adienes Dec 21, 2024
48b4ad0
couple tuneups to dimensional mapreduce refactor
adienes Dec 21, 2024
aacdcee
Merge branch 'master' into mb/mapreducedim-init
mbauman May 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions base/fastmath.jl
Original file line number Diff line number Diff line change
Expand Up @@ -403,10 +403,10 @@ minimum_fast(a; kw...) = Base.reduce(min_fast, a; kw...)
maximum_fast(f, a; kw...) = Base.mapreduce(f, max_fast, a; kw...)
minimum_fast(f, a; kw...) = Base.mapreduce(f, min_fast, a; kw...)

Base.reducedim_init(f, ::typeof(max_fast), A::AbstractArray, region) =
Base.reducedim_init(f, max, A::AbstractArray, region)
Base.reducedim_init(f, ::typeof(min_fast), A::AbstractArray, region) =
Base.reducedim_init(f, min, A::AbstractArray, region)
Base._mapreduce_similar(f, ::typeof(max_fast), A, ::Type{T}, axes) where {T} =
Base._mapreduce_similar(f, max, A, T, axes)
Base._mapreduce_similar(f, ::typeof(min_fast), A, ::Type{T}, axes) where {T} =
Base._mapreduce_similar(f, min, A, T, axes)

maximum!_fast(r::AbstractArray, A::AbstractArray; kw...) =
maximum!_fast(identity, r, A; kw...)
Expand Down
227 changes: 91 additions & 136 deletions base/reducedim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,17 @@ function reduced_indices0(axs::Indices{N}, region) where N
ntuple(d -> d in region && !isempty(axs[d]) ? reduced_index(axs[d]) : axs[d], Val(N))
end

# The inverse of reduced_indices
inner_indices(a::AbstractArrayOrBroadcasted, region) = inner_indices(axes(a), region)
function inner_indices(axs::Indices{N}, region) where N
ntuple(d -> d in region ? axs[d] : reduced_index(axs[d]), Val(N))
end

# Given an outer and an inner cartesian index, merge them depending on the dims
merge_outer_inner(outer::CartesianIndex{N}, inner::CartesianIndex{N}, region) where {N} =
CartesianIndex(ntuple(d->d in region ? inner.I[d] : outer.I[d], Val(N)))


function _check_valid_region(region)
for d in region
isa(d, Integer) || throw(ArgumentError("reduced dimension(s) must be integers"))
Expand All @@ -53,10 +64,6 @@ end
reducedim_initarray(A::AbstractArrayOrBroadcasted, region, init, ::Type{R}) where {R} = fill!(similar(A,R,reduced_indices(A,region)), init)
reducedim_initarray(A::AbstractArrayOrBroadcasted, region, init::T) where {T} = reducedim_initarray(A, region, init, T)

# TODO: better way to handle reducedim initialization
#
# The current scheme is basically following Steven G. Johnson's original implementation
#
promote_union(T::Union) = promote_type(promote_union(T.a), promote_union(T.b))
promote_union(T) = T

Expand All @@ -66,134 +73,6 @@ _realtype(T::Type) = T
_realtype(::Union{typeof(abs),typeof(abs2)}, T) = _realtype(T)
_realtype(::Any, T) = T

function reducedim_init(f, op::Union{typeof(+),typeof(add_sum)}, A::AbstractArray, region)
_reducedim_init(f, op, zero, sum, A, region)
end
function reducedim_init(f, op::Union{typeof(*),typeof(mul_prod)}, A::AbstractArray, region)
_reducedim_init(f, op, one, prod, A, region)
end
function _reducedim_init(f, op, fv, fop, A, region)
T = _realtype(f, promote_union(eltype(A)))
if T !== Any && applicable(zero, T)
x = f(zero(T))
z = op(fv(x), fv(x))
Tr = z isa T ? T : typeof(z)
else
z = fv(fop(f, A))
Tr = typeof(z)
end
return reducedim_initarray(A, region, z, Tr)
end

# initialization when computing minima and maxima requires a little care
for (f1, f2, initval, typeextreme) in ((:min, :max, :Inf, :typemax), (:max, :min, :(-Inf), :typemin))
@eval function reducedim_init(f, op::typeof($f1), A::AbstractArray, region)
# First compute the reduce indices. This will throw an ArgumentError
# if any region is invalid
ri = reduced_indices(A, region)

# Next, throw if reduction is over a region with length zero
any(i -> isempty(axes(A, i)), region) && _empty_reduce_error()

# Make a view of the first slice of the region
A1 = view(A, ri...)

if isempty(A1)
# If the slice is empty just return non-view version as the initial array
return map(f, A1)
else
# otherwise use the min/max of the first slice as initial value
v0 = mapreduce(f, $f2, A1)

T = _realtype(f, promote_union(eltype(A)))
Tr = v0 isa T ? T : typeof(v0)

# but NaNs, missing and unordered values need to be avoided as initial values
if v0 isa Number && isnan(v0)
# v0 is NaN
v0 = oftype(v0, $initval)
elseif isunordered(v0)
# v0 is missing or a third-party unordered value
Tnm = nonmissingtype(Tr)
if Tnm <: Union{BitInteger, IEEEFloat, BigFloat}
v0 = $typeextreme(Tnm)
elseif !all(isunordered, A1)
v0 = mapreduce(f, $f2, Iterators.filter(!isunordered, A1))
end
end
# v0 may have changed type.
Tr = v0 isa T ? T : typeof(v0)

return reducedim_initarray(A, region, v0, Tr)
end
end
end

function reducedim_init(f::ExtremaMap, op::typeof(_extrema_rf), A::AbstractArray, region)
# First compute the reduce indices. This will throw an ArgumentError
# if any region is invalid
ri = reduced_indices(A, region)

# Next, throw if reduction is over a region with length zero
any(i -> isempty(axes(A, i)), region) && _empty_reduce_error()

# Make a view of the first slice of the region
A1 = view(A, ri...)

isempty(A1) && return map(f, A1)
# use the max/min of the first slice as initial value for non-empty cases
v0 = reverse(mapreduce(f, op, A1)) # turn minmax to maxmin

T = _realtype(f.f, promote_union(eltype(A)))
Tmin = v0[1] isa T ? T : typeof(v0[1])
Tmax = v0[2] isa T ? T : typeof(v0[2])

# but NaNs and missing need to be avoided as initial values
if v0[1] isa Number && isnan(v0[1])
# v0 is NaN
v0 = oftype(v0[1], Inf), oftype(v0[2], -Inf)
elseif isunordered(v0[1])
# v0 is missing or a third-party unordered value
Tminnm = nonmissingtype(Tmin)
Tmaxnm = nonmissingtype(Tmax)
if Tminnm <: Union{BitInteger, IEEEFloat, BigFloat} &&
Tmaxnm <: Union{BitInteger, IEEEFloat, BigFloat}
v0 = (typemax(Tminnm), typemin(Tmaxnm))
elseif !all(isunordered, A1)
v0 = reverse(mapreduce(f, op, Iterators.filter(!isunordered, A1)))
end
end
# v0 may have changed type.
Tmin = v0[1] isa T ? T : typeof(v0[1])
Tmax = v0[2] isa T ? T : typeof(v0[2])

return reducedim_initarray(A, region, v0, Tuple{Tmin,Tmax})
end

reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(max), A::AbstractArray{T}, region) where {T} =
reducedim_initarray(A, region, zero(f(zero(T))), _realtype(f, T))

reducedim_init(f, op::typeof(&), A::AbstractArrayOrBroadcasted, region) = reducedim_initarray(A, region, true)
reducedim_init(f, op::typeof(|), A::AbstractArrayOrBroadcasted, region) = reducedim_initarray(A, region, false)

# specialize to make initialization more efficient for common cases

let
BitIntFloat = Union{BitInteger, IEEEFloat}
T = Union{
Any[AbstractArray{t} for t in uniontypes(BitIntFloat)]...,
Any[AbstractArray{Complex{t}} for t in uniontypes(BitIntFloat)]...}

global function reducedim_init(f, op::Union{typeof(+),typeof(add_sum)}, A::T, region)
z = zero(f(zero(eltype(A))))
reducedim_initarray(A, region, op(z, z))
end
global function reducedim_init(f, op::Union{typeof(*),typeof(mul_prod)}, A::T, region)
u = one(f(one(eltype(A))))
reducedim_initarray(A, region, op(u, u))
end
end

## generic (map)reduction

has_fast_linear_indexing(a::AbstractArrayOrBroadcasted) = IndexStyle(a) === IndexLinear()
Expand Down Expand Up @@ -273,7 +152,7 @@ function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted)
IR = Broadcast.newindex(IA, keep, Idefault)
r = R[i1,IR]
@simd for i in axes(A, 1)
r = op(r, f(A[i, IA]))
r = op(r, f(A[i, IA])) # this could also be done pairwise
end
R[i1,IR] = r
end
Expand All @@ -288,6 +167,85 @@ function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted)
return R
end

#### Reduction initialization ####
#
# Similar to the scalar mapreduce, there are two*three cases we need to consider
# when constructing the initial results array:
# * Have init: Fill results array with init, proceed (regardless of reduction size)
# * Don't worry about widening and do a straight iteration with in-place updates over all values
# * Otherwise:
# * No values: Fill with `mapreduce_empty` & return
# * One value: Fill with `mapreduce_first` & return
# * Two+ values: Fill with `op(f(a1), f(a2))`, proceed <--- this is the hard case:
# * Proceeding involves skipping the first two inner iterations
function _mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, dims)
rinds = reduced_indices(A, dims)
outer_inds = CartesianIndices(rinds)
n = prod(map(d->size(A, d), dims))
(n == 0 || isempty(A)) && return _mapreduce_empty_array(f, op, A, rinds)
n == 1 && return _mapreduce_one_array(f, op, A, rinds)

inner_inds = CartesianIndices(inner_indices(A, dims))
inner1, inner2 = inner_inds
# Create the result vector with the first step of the reduction
# note that this is using a (potentially) bad cache ordering, so we don't want to continue like this
i1 = first(outer_inds)
v = op(f(A[merge_outer_inner(i1, inner1, dims)]), f(A[merge_outer_inner(i1, inner2, dims)]))
R = _mapreduce_similar(f, op, A, typeof(v), rinds)
R[begin] = v
for i in Iterators.drop(outer_inds, 1)
v = op(f(A[merge_outer_inner(i, inner1, dims)]), f(A[merge_outer_inner(i, inner2, dims)]))
if !(typeof(v) <: eltype(R))
Rnew = _mapreduce_similar(f, op, A, promote_type(eltype(R), typeof(v)), rinds)
copyto!(Rnew, R) # TODO: could only copy from begin:i
R = Rnew
end
R[i] = v
end

# TODO: It's easy to add reducedim1 performance optimizations here
# TODO: Would also be good to do loop switching in the general case, too
for IR in outer_inds
for IA in Iterators.drop(inner_inds, 2)
iA = merge_outer_inner(IR, IA, dims)
R[IR] = op(R[IR], f(A[iA]))
end
end
return R
end

_mapreduce_similar(f, op, A, ::Type{T}, axes) where {T} = similar(A,T,axes)
# Extrema, max and min have special support for preserving the original matrix eltype with identity maps
function _mapreduce_similar(f::ExtremaMap{typeof(identity)}, op::typeof(_extrema_rf), A, ::Type{Tuple{T,T}}, axes) where {T}
similar(A,Tuple{promote_type(eltype(A),T),promote_type(eltype(A),T)},axes)
end
function _mapreduce_similar(f::typeof(identity), op::Union{typeof(min), typeof(max)}, A, ::Type{T}, axes) where {T}
similar(A,promote_type(eltype(A),T),axes)
end

function _mapreduce_empty_array(f, op, A, axes)
init = mapreduce_empty(f, op, eltype(A))
return fill!(_mapreduce_similar(f, op, A, typeof(init), axes), init)
end

function _mapreduce_one_array(f, op, A, axes)
idxs = CartesianIndices(axes)
i1 = first(idxs)
v = mapreduce_first(f, op, A[i1])
R = _mapreduce_similar(f, op, A, typeof(v), axes)
R[i1] = v
for i in Iterators.drop(idxs, 1)
v = mapreduce_first(f, op, A[i])
if !(typeof(v) <: eltype(R))
Rnew = _mapreduce_similar(f, op, A, promote_type(eltype(R), typeof(v)), axes)
copyto!(Rnew, R)
R = Rnew
end
R[i] = v
end
return R
end

mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted) =
(_mapreducedim!(f, op, R, A); R)

Expand Down Expand Up @@ -335,9 +293,6 @@ _mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) =
_mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, dims) =
mapreducedim!(f, op, reducedim_initarray(A, dims, nt), A)

_mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, dims) =
mapreducedim!(f, op, reducedim_init(f, op, A, dims), A)

"""
reduce(f, A::AbstractArray; dims=:, [init])

Expand Down
8 changes: 5 additions & 3 deletions test/reduce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -666,10 +666,12 @@ x = [j+7 for j in i]
end

# make sure we specialize on mapfoldl(::Type, ...)
@test @inferred(mapfoldl(Int, +, [1, 2, 3]; init=0)) === 6
@testset "inference" begin
@test @inferred(mapfoldl(Int, +, [1, 2, 3]; init=0)) === 6

# issue #39281
@test @inferred(extrema(rand(2), dims=1)) isa Vector{Tuple{Float64,Float64}}
# issue #39281
@test @inferred(extrema(rand(2), dims=1)) isa Vector{Tuple{Float64,Float64}}
end

# issue #38627
@testset "overflow in mapreduce" begin
Expand Down
Loading
Loading