diff --git a/NEWS.md b/NEWS.md
index 879f68c47bb6a..00feb25a7689e 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -58,6 +58,12 @@ New library features
 Standard library changes
 ------------------------
 
+* Empty dimensional reductions (`reduce`, `mapreduce`, `sum`, `minimum`, etc., with the `dims` keyword
+  selecting one or more dimensions) now behave like their whole-array (`dims=:`) counterparts,
+  only returning values in unambiguous cases and erroring otherwise.  For example,
+  `minimum([], dims=2)` is now an error; it would previously return an empty array.
+  This can be addressed by specifying an `init` keyword argument as the error message directs.
+
 #### JuliaSyntaxHighlighting
 
 #### LinearAlgebra
diff --git a/base/broadcast.jl b/base/broadcast.jl
index b86baf08ddfe0..74421c1ca5384 100644
--- a/base/broadcast.jl
+++ b/base/broadcast.jl
@@ -236,13 +236,11 @@ Base.similar(::Broadcasted{ArrayConflict}, ::Type{Bool}, dims) =
     similar(BitArray, dims)
 
 @inline Base.axes(bc::Broadcasted) = _axes(bc, bc.axes)
+@inline Base.axes(bc::Broadcasted, d::Integer) = get(axes(bc), d, OneTo(1))
 _axes(::Broadcasted, axes::Tuple) = axes
 @inline _axes(bc::Broadcasted, ::Nothing)  = combine_axes(bc.args...)
 _axes(bc::Broadcasted{<:AbstractArrayStyle{0}}, ::Nothing) = ()
 
-@inline Base.axes(bc::Broadcasted{<:Any, <:NTuple{N}}, d::Integer) where N =
-    d <= N ? axes(bc)[d] : OneTo(1)
-
 BroadcastStyle(::Type{<:Broadcasted{Style}}) where {Style} = Style()
 BroadcastStyle(::Type{<:Broadcasted{S}}) where {S<:Union{Nothing,Unknown}} =
     throw(ArgumentError("Broadcasted{Unknown} wrappers do not have a style assigned"))
@@ -266,6 +264,7 @@ Base.ndims(bc::Broadcasted) = ndims(typeof(bc))
 Base.ndims(::Type{<:Broadcasted{<:Any,<:NTuple{N,Any}}}) where {N} = N
 
 Base.size(bc::Broadcasted) = map(length, axes(bc))
+Base.size(bc::Broadcasted, d::Integer) = length(axes(bc, d))
 Base.length(bc::Broadcasted) = prod(size(bc))
 
 function Base.iterate(bc::Broadcasted)
diff --git a/base/cartesian.jl b/base/cartesian.jl
index ca0fc0aac0cfc..faf8b82835776 100644
--- a/base/cartesian.jl
+++ b/base/cartesian.jl
@@ -402,7 +402,7 @@ function exprresolve_conditional(ex::Expr)
                 return true, exprresolve_cond_dict[callee](ex.args[2], ex.args[3])
             end
         end
-    elseif Meta.isexpr(ex, :block, 2) && ex.args[1] isa LineNumberNode
+    elseif ex.head === :block && length(ex.args) == 2 && ex.args[1] isa LineNumberNode
         return exprresolve_conditional(ex.args[2])
     end
     false, false
diff --git a/base/deprecated.jl b/base/deprecated.jl
index e7ea1e15e7b50..11697a4ddbb5f 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -27,6 +27,7 @@ const __internal_changes_list = (
     :codeinfonargs,  # #54341
     :ocnopartial,
     :printcodeinfocalls,
+    :reducerefactor,
     # Add new change names above this line
 )
 
@@ -558,3 +559,29 @@ true
 isbindingresolved
 
 # END 1.12 deprecations
+
+# BEGIN 1.13 deprecations
+
+## These are all mapreduce internals that were not exported or public, but have deprecations to minimize disruptions
+@deprecate reducedim_init(f, op, A::AbstractArray, region) (v = mapreduce_empty(f, op, eltype(A)); fill!(mapreduce_similar(A, typeof(v), reduced_indices(A,region)), v)) false
+const _dep_message_reducedim_init = ", these internals have been removed. To customize the array returned by dimensional reductions, implement mapreduce_similar instead"
+deprecate(Base, :reducedim_init)
+deprecate(Base, Symbol("#reducedim_init"))
+
+@deprecate (reducedim_initarray(A::Union{Base.AbstractBroadcasted, AbstractArray}, region, init, ::Type{T}) where {T}) fill!(mapreduce_similar(A,T,reduced_indices(A,region)), init) false
+@deprecate reducedim_initarray(A::Union{Base.AbstractBroadcasted, AbstractArray}, region, init) fill!(mapreduce_similar(A,typeof(init),reduced_indices(A,region)), init) false
+const _dep_message_reducedim_init = ", these internals have been removed. To customize the array returned by dimensional reductions, implement mapreduce_similar instead"
+deprecate(Base, :reducedim_initarray)
+deprecate(Base, Symbol("#reducedim_initarray"))
+
+@deprecate _mapreduce_dim(f, op, nt, A::Union{Base.AbstractBroadcasted, AbstractArray}, dims) mapreducedim(f, op, A, nt, dims) false
+
+@deprecate_binding mapreducedim! Base.mapreduce! false
+@deprecate_binding _mapreducedim! Base.mapreduce! false
+@deprecate_binding reducedim! Base.reduce! false
+
+@deprecate mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted, ifirst::Integer, ilast::Integer, blksize::Int=pairwise_blocksize(f, op)) Base.mapreduce_pairwise(f, op, A, _InitialValue(), ifirst:ilast) false
+@deprecate _mapreduce(f, op, A) Base.mapreduce_pairwise(f, op, A, Base._InitialValue()) false
+@deprecate _mapreduce(f, op, _, A) Base.mapreduce_pairwise(f, op, A, Base._InitialValue()) false
+
+# END 1.13 deprecations
diff --git a/base/dict.jl b/base/dict.jl
index 68aa4f1a7a543..d43da732ee856 100644
--- a/base/dict.jl
+++ b/base/dict.jl
@@ -846,7 +846,15 @@ function iterate(d::ImmutableDict{K,V}, t=d) where {K, V}
     !isdefined(t, :parent) && return nothing
     (Pair{K,V}(t.key, t.value), t.parent)
 end
-length(t::ImmutableDict) = count(Returns(true), t)
+# length is defined in terms of iteration; using a higher-order function to do the iteration
+# is likely to call `length` again, so this is a manual for loop:
+function length(t::ImmutableDict)
+    r = 0
+    for _ in t
+        r+=1
+    end
+    return r
+end
 isempty(t::ImmutableDict) = !isdefined(t, :parent)
 empty(::ImmutableDict, ::Type{K}, ::Type{V}) where {K, V} = ImmutableDict{K,V}()
 
diff --git a/base/docs/Docs.jl b/base/docs/Docs.jl
index 13a5b35a115da..887352f8bbedc 100644
--- a/base/docs/Docs.jl
+++ b/base/docs/Docs.jl
@@ -841,7 +841,8 @@ See also: [`names`](@ref), [`Docs.hasdoc`](@ref), [`Base.ispublic`](@ref).
 """
 function undocumented_names(mod::Module; private::Bool=false)
     filter!(names(mod; all=true)) do sym
-        !hasdoc(mod, sym) && !startswith(string(sym), '#') &&
+        !Base.isdeprecated(mod, sym) &&
+            !hasdoc(mod, sym) && !startswith(string(sym), '#') &&
             (private || Base.ispublic(mod, sym))
     end
 end
diff --git a/base/fastmath.jl b/base/fastmath.jl
index f2f60519b99ac..ce3c3a1c40bf6 100644
--- a/base/fastmath.jl
+++ b/base/fastmath.jl
@@ -401,19 +401,14 @@ minimum_fast(a; kw...) = Base.reduce(min_fast, a; kw...)
 maximum_fast(f, a; kw...) = Base.mapreduce(f, max_fast, a; kw...)
 minimum_fast(f, a; kw...) = Base.mapreduce(f, min_fast, a; kw...)
 
-Base.reducedim_init(f, ::typeof(max_fast), A::AbstractArray, region) =
-    Base.reducedim_init(f, max, A::AbstractArray, region)
-Base.reducedim_init(f, ::typeof(min_fast), A::AbstractArray, region) =
-    Base.reducedim_init(f, min, A::AbstractArray, region)
-
 maximum!_fast(r::AbstractArray, A::AbstractArray; kw...) =
     maximum!_fast(identity, r, A; kw...)
 minimum!_fast(r::AbstractArray, A::AbstractArray; kw...) =
     minimum!_fast(identity, r, A; kw...)
 
 maximum!_fast(f::Function, r::AbstractArray, A::AbstractArray; init::Bool=true) =
-    Base.mapreducedim!(f, max_fast, Base.initarray!(r, f, max, init, A), A)
+    Base.mapreduce!(f, max_fast, r, A; update=!init)
 minimum!_fast(f::Function, r::AbstractArray, A::AbstractArray; init::Bool=true) =
-    Base.mapreducedim!(f, min_fast, Base.initarray!(r, f, min, init, A), A)
+    Base.mapreduce!(f, min_fast, r, A; update=!init)
 
 end
diff --git a/base/intfuncs.jl b/base/intfuncs.jl
index 925dafdd8f378..b3f87fd43bfc7 100644
--- a/base/intfuncs.jl
+++ b/base/intfuncs.jl
@@ -838,7 +838,7 @@ function oct(x::Unsigned, pad::Int, neg::Bool)
 end
 
 # 2-digit decimal characters ("00":"99")
-const _dec_d100 = UInt16[
+const _dec_d100 = [
 # generating expression: UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
 #    0 0,    0 1,    0 2,    0 3, and so on in little-endian
   0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, 0x3930,
@@ -928,8 +928,16 @@ function hex(x::Unsigned, pad::Int, neg::Bool)
     unsafe_takestring(a)
 end
 
-const base36digits = UInt8['0':'9';'a':'z']
-const base62digits = UInt8['0':'9';'A':'Z';'a':'z']
+# UInt8['0':'9';'a':'z']
+const base36digits = [0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+    0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a]
+# UInt8['0':'9';'A':'Z';'a':'z']
+const base62digits = [0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+    0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a,
+    0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a]
 
 function _base(base::Integer, x::Integer, pad::Int, neg::Bool)
     (x >= 0) | (base < 0) || throw(DomainError(x, "For negative `x`, `base` must be negative."))
diff --git a/base/iterators.jl b/base/iterators.jl
index 53d7b28316ee1..085f00e324dce 100644
--- a/base/iterators.jl
+++ b/base/iterators.jl
@@ -192,6 +192,7 @@ enumerate(iter) = Enumerate(iter)
 
 length(e::Enumerate) = length(e.itr)
 size(e::Enumerate) = size(e.itr)
+axes(e::Enumerate) = axes(e.itr)
 @propagate_inbounds function iterate(e::Enumerate, state=(1,))
     i, rest = state[1], tail(state)
     n = iterate(e.itr, rest...)
diff --git a/base/mathconstants.jl b/base/mathconstants.jl
index d26f5115b5ccb..d2895199f6024 100644
--- a/base/mathconstants.jl
+++ b/base/mathconstants.jl
@@ -96,7 +96,7 @@ julia> Base.MathConstants.eulergamma
 julia> dx = 10^-6;
 
 julia> sum(-exp(-x) * log(x) for x in dx:dx:100) * dx
-0.5772078382499133
+0.5772078382090373
 ```
 """
 γ, const eulergamma = γ
@@ -129,7 +129,7 @@ julia> Base.MathConstants.catalan
 catalan = 0.9159655941772...
 
 julia> sum(log(x)/(1+x^2) for x in 1:0.01:10^6) * 0.01
-0.9159466120554123
+0.9159466120556183
 ```
 """
 catalan
diff --git a/base/missing.jl b/base/missing.jl
index 5fc0e6ec9e620..376118133d572 100644
--- a/base/missing.jl
+++ b/base/missing.jl
@@ -269,100 +269,71 @@ function show(io::IO, s::SkipMissing)
     print(io, ')')
 end
 
-# Optimized mapreduce implementation
-# The generic method is faster when !(eltype(A) >: Missing) since it does not need
-# additional loops to identify the two first non-missing values of each block
-mapreduce(f, op, itr::SkipMissing{<:AbstractArray}) =
-    _mapreduce(f, op, IndexStyle(itr.x), eltype(itr.x) >: Missing ? itr : itr.x)
-
-function _mapreduce(f, op, ::IndexLinear, itr::SkipMissing{<:AbstractArray})
-    A = itr.x
-    ai = missing
-    inds = LinearIndices(A)
-    i = first(inds)
-    ilast = last(inds)
-    for outer i in i:ilast
-        @inbounds ai = A[i]
+# Simple optimization: catch skipmissings that wrap iterators that don't include Missing
+function mapreduce(f::F, op::G, itr::SkipMissing; init=Base._InitialValue()) where {F,G}
+    (IteratorEltype(itr.x) === HasEltype() && !(eltype(itr.x) >: Missing)) && return mapreduce(f, op, itr.x; init)
+    return mapreduce_pairwise(f, op, itr, init)
+end
+
+function mapreduce_pairwise(f::F, op::G, itr::SkipMissing{<:AbstractArray}, init) where {F,G}
+    v = mapreduce_skipmissing_pairwise(f, op, itr.x, init, eachindex(itr.x))
+    return ismissing(v) ? _mapreduce_start(f, op, itr, init) : v
+end
+
+# This is based on mapreduce_pairwise, but with special sauce that allows one or both of the pairwise splits
+# to not process any elements; returning `missing` as the sentinel in such a situation
+function mapreduce_skipmissing_pairwise(f::F, op::G, A, init, inds) where {F,G}
+    if length(inds) <= max(10, pairwise_blocksize(f, op))
+        return mapreduce_skipmissing_kernel(f, op, A, init, inds)
+    else
+        p1, p2 = halves(inds)
+        v1 = mapreduce_skipmissing_pairwise(f, op, A, init, p1)
+        v2 = mapreduce_skipmissing_pairwise(f, op, A, init, p2)
+        return ismissing(v1) ? v2 : ismissing(v2) ? v1 : op(v1, v2)
+    end
+end
+
+function mapreduce_skipmissing_kernel(f, op, A, init, inds::AbstractUnitRange)
+    i1, iN = first(inds), last(inds)
+    i = i1; ai = missing
+    for outer i in i1:iN
+        ai = @inbounds A[i]
         !ismissing(ai) && break
     end
-    ismissing(ai) && return mapreduce_empty(f, op, eltype(itr))
-    a1::eltype(itr) = ai
-    i == typemax(typeof(i)) && return mapreduce_first(f, op, a1)
-    i += 1
-    ai = missing
-    for outer i in i:ilast
+    ismissing(ai) && return missing
+    v = _mapreduce_start(f, op, A, init, ai)
+    i == typemax(typeof(i)) && return v
+    for i in i+1:iN
         @inbounds ai = A[i]
-        !ismissing(ai) && break
+        if !ismissing(ai)
+            v = op(v, f(ai))
+        end
     end
-    ismissing(ai) && return mapreduce_first(f, op, a1)
-    # We know A contains at least two non-missing entries: the result cannot be nothing
-    something(mapreduce_impl(f, op, itr, first(inds), last(inds)))
+    return v
 end
 
-_mapreduce(f, op, ::IndexCartesian, itr::SkipMissing) = mapfoldl(f, op, itr)
-
-mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) =
-    mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op))
-
-# Returns nothing when the input contains only missing values, and Some(x) otherwise
-@noinline function mapreduce_impl(f, op, itr::SkipMissing{<:AbstractArray},
-                                  ifirst::Integer, ilast::Integer, blksize::Int)
-    A = itr.x
-    if ifirst > ilast
-        return nothing
-    elseif ifirst == ilast
-        @inbounds a1 = A[ifirst]
-        if ismissing(a1)
-            return nothing
-        else
-            return Some(mapreduce_first(f, op, a1))
-        end
-    elseif ilast - ifirst < blksize
-        # sequential portion
-        ai = missing
-        i = ifirst
-        for outer i in i:ilast
-            @inbounds ai = A[i]
-            !ismissing(ai) && break
-        end
-        ismissing(ai) && return nothing
-        a1 = ai::eltype(itr)
-        i == typemax(typeof(i)) && return Some(mapreduce_first(f, op, a1))
-        i += 1
-        ai = missing
-        for outer i in i:ilast
-            @inbounds ai = A[i]
-            !ismissing(ai) && break
-        end
-        ismissing(ai) && return Some(mapreduce_first(f, op, a1))
-        a2 = ai::eltype(itr)
-        i == typemax(typeof(i)) && return Some(op(f(a1), f(a2)))
-        i += 1
-        v = op(f(a1), f(a2))
-        @simd for i = i:ilast
-            @inbounds ai = A[i]
-            if !ismissing(ai)
-                v = op(v, f(ai))
-            end
-        end
-        return Some(v)
-    else
-        # pairwise portion
-        imid = ifirst + (ilast - ifirst) >> 1
-        v1 = mapreduce_impl(f, op, itr, ifirst, imid, blksize)
-        v2 = mapreduce_impl(f, op, itr, imid+1, ilast, blksize)
-        if v1 === nothing && v2 === nothing
-            return nothing
-        elseif v1 === nothing
-            return v2
-        elseif v2 === nothing
-            return v1
-        else
-            return Some(op(something(v1), something(v2)))
+function mapreduce_skipmissing_kernel(f, op, A, init, inds)
+    it = iterate(inds)
+    local s
+    ai = missing
+    while it !== nothing
+        i, s = it
+        ai = @inbounds A[i]
+        !ismissing(ai) && break
+        it = iterate(inds, s)
+    end
+    ismissing(ai) && return missing
+    v = _mapreduce_start(f, op, A, init, ai)
+    for i in Iterators.rest(inds, s)
+        @inbounds ai = A[i]
+        if !ismissing(ai)
+            v = op(v, f(ai))
         end
     end
+    return v
 end
 
+
 """
     filter(f, itr::SkipMissing{<:AbstractArray})
 
diff --git a/base/multidimensional.jl b/base/multidimensional.jl
index 83a03fc1b45bf..50e3d17a4ca68 100644
--- a/base/multidimensional.jl
+++ b/base/multidimensional.jl
@@ -1064,6 +1064,213 @@ end
     $(_generate_unsafe_setindex!_body(2))
 end
 
+# Reduction support for CartesianIndices
+halves(inds::CartesianIndices) = map(CartesianIndices∘tuple, _halves(inds.indices)...)
+function mapreduce_kernel(f, op, A, init, inds::CartesianIndices{N}) where {N}
+    N == 0 && return _mapreduce_start(f, op, A, init, A[inds[]])
+    N == 1 && return mapreduce_kernel(f, op, A, init, inds.indices[1])
+    is_commutative_op(op) && return mapreduce_kernel_commutative(f, op, A, init, inds)
+    r = inds.indices[1]
+    if length(r) == 1
+        # A one-element inner loop is less-than-helpful
+        i1, s = iterate(inds)
+        a1 = @inbounds A[i1]
+        v = _mapreduce_start(f, op, A, init, a1)
+        for i in Iterators.rest(inds, s)
+            ai = @inbounds A[i]
+            v = op(v, f(ai))
+        end
+    else
+        # Divide the loop into an outer and inner loop, and skip the first element
+        # in the first iteration of the outer loop
+        outer = CartesianIndices(tail(inds.indices))
+        o1, so = iterate(outer)
+        v = _mapreduce_start(f, op, A, init, A[r[begin], o1])
+        for i in r[begin+1:end]
+            ai = @inbounds A[i, o1]
+            v = op(v, f(ai))
+        end
+        for o in Iterators.rest(outer, so)
+            for i in r
+                ai = @inbounds A[i, o]
+                v = op(v, f(ai))
+            end
+        end
+    end
+    return v
+end
+
+function mapreduce_kernel_commutative(f, op, A, init, inds::AbstractArray)
+    if length(inds) < 16
+        i1, iN = firstindex(inds), lastindex(inds)
+        v_1 = _mapreduce_start(f, op, A, init, @inbounds A[inds[i1]])
+        for i in i1+1:iN
+            a = @inbounds A[inds[i]]
+            v_1 = op(v_1, f(a))
+        end
+        return v_1
+    end
+    return _mapreduce_kernel_commutative(f, op, A, init, inds)
+end
+
+# This special internal method must have at least 8 indices and allows passing
+# optional scalar leading and trailing dimensions
+function _mapreduce_kernel_commutative(f, op, A, init, inds, leading=(), trailing=())
+    i1, iN = firstindex(inds), lastindex(inds)
+    n = length(inds)
+    @nexprs 8 N->a_N = @inbounds A[leading..., inds[i1+(N-1)], trailing...]
+    @nexprs 8 N->v_N = _mapreduce_start(f, op, A, init, a_N)
+    for batch in 1:(n>>3)-1
+        i = i1 + batch*8
+        @nexprs 8 N->a_N = @inbounds A[leading..., inds[i+(N-1)], trailing...]
+        @nexprs 8 N->fa_N = f(a_N)
+        @nexprs 8 N->v_N = op(v_N, fa_N)
+    end
+    v = op(op(op(v_1, v_2), op(v_3, v_4)), op(op(v_5, v_6), op(v_7, v_8)))
+    i = i1 + (n>>3)*8 - 1
+    i == iN && return v
+    for i in i+1:iN
+        ai = @inbounds A[leading..., inds[i], trailing...]
+        v = op(v, f(ai))
+    end
+    return v
+end
+
+function mapreduce_kernel_commutative(f::F, op::G, A, init, inds::CartesianIndices{N}) where {N,F,G}
+    N == 0 && return _mapreduce_start(f, op, A, init, A[inds[]])
+    N == 1 && return mapreduce_kernel_commutative(f, op, A, init, inds.indices[1])
+    is = inds.indices[1]
+    js = inds.indices[2]
+    if length(is) == 1 && length(js) >= 8
+        # It's quite useful to optimize this case for dimensional reductions
+        i = only(is)
+        outer = CartesianIndices(tail(tail(inds.indices)))
+        o1, s = iterate(outer)
+        v = _mapreduce_kernel_commutative(f, op, A, init, js, (i,), o1.I)
+        for o in Iterators.rest(outer, s)
+            v = op(v, _mapreduce_kernel_commutative(f, op, A, init, js, (i,), o.I))
+        end
+        return v
+    elseif length(is) < 8 # TODO: tune this number
+        # These small cases could be further optimized
+        return mapreduce_kernel_commutative(i->f(A[i]), op, inds, init, HasShape{N}(), length(inds))[1]
+    else
+        outer = CartesianIndices(tail(inds.indices))
+        o1, s = iterate(outer)
+        v = _mapreduce_kernel_commutative(f, op, A, init, is, (), o1.I)
+        for o in Iterators.rest(outer, s)
+            v = op(v, _mapreduce_kernel_commutative(f, op, A, init, is, (), o.I))
+        end
+        return v
+    end
+end
+
+@noinline _throw_iterator_assertion_error() = throw(AssertionError("iterator promised a length longer than it iterates"))
+function mapreduce_kernel_commutative(f, op, itr, init, ::Union{HasLength, HasShape}, n, state...)
+    it = iterate(itr, state...)
+    it === nothing && _throw_iterator_assertion_error()
+    a, s = it
+    v_1 = _mapreduce_start(f, op, itr, init, a)
+    if n < 16
+        for _ in 2:n
+            it = iterate(itr, s)
+            it === nothing && _throw_iterator_assertion_error()
+            a, s = it
+            v_1 = op(v_1, f(a))
+        end
+        return v_1, s
+    end
+    @nexprs 7 n->begin
+        it = iterate(itr, s)
+        it === nothing && _throw_iterator_assertion_error()
+        a, s = it
+        v_{n+1} = _mapreduce_start(f, op, itr, init, a)
+    end
+    i = 8
+    for outer i in 16:8:n
+        @nexprs 8 n->begin
+            it = iterate(itr, s)
+            it === nothing && _throw_iterator_assertion_error()
+            a_n, s = it
+        end
+        @nexprs 8 n-> fa_n = f(a_n)
+        @nexprs 8 n-> v_n = op(v_n, fa_n)
+    end
+    v = op(op(op(v_1, v_2), op(v_3, v_4)), op(op(v_5, v_6), op(v_7, v_8)))
+    for _ in i+1:n
+        it = iterate(itr, s)
+        it === nothing && _throw_iterator_assertion_error()
+        a, s = it
+        v = op(v, f(a))
+    end
+    return v, s
+end
+function mapreduce_kernel_commutative(f, op, itr, init, ::IteratorSize, n, state...)
+    it = iterate(itr, state...)
+    it === nothing && return nothing
+    a, s = it
+    v_1 = _mapreduce_start(f, op, itr, init, a)
+    it = iterate(itr, s)
+    it === nothing && return Some(v_1)
+    a, s = it
+    v_2 = _mapreduce_start(f, op, itr, init, a)
+    it = iterate(itr, s)
+    it === nothing && return Some(op(v_1, v_2))
+    a, s = it
+    v_3 = _mapreduce_start(f, op, itr, init, a)
+    it = iterate(itr, s)
+    it === nothing && return Some(op(op(v_1, v_2), v_3))
+    a, s = it
+    v_4 = _mapreduce_start(f, op, itr, init, a)
+    it = iterate(itr, s)
+    it === nothing && return Some(op(op(v_1, v_2), op(v_3, v_4)))
+    a, s = it
+    v_5 = _mapreduce_start(f, op, itr, init, a)
+    it = iterate(itr, s)
+    it === nothing && return Some(op(op(op(v_1, v_2), op(v_3, v_4)), v_5))
+    a, s = it
+    v_6 = _mapreduce_start(f, op, itr, init, a)
+    it = iterate(itr, s)
+    it === nothing && return Some(op(op(op(v_1, v_2), op(v_3, v_4)), op(v_5, v_6)))
+    a, s = it
+    v_7 = _mapreduce_start(f, op, itr, init, a)
+    it = iterate(itr, s)
+    it === nothing && return Some(op(op(op(v_1, v_2), op(v_3, v_4)), op(op(v_5, v_6), v_7)))
+    a, s = it
+    v_8 = _mapreduce_start(f, op, itr, init, a)
+    for _ in 3:n>>3
+        @nexprs 8 N->begin
+            it = iterate(itr, s)
+            if it === nothing
+                N > 7 && (v_7 = op(v_7, f(a_7)))
+                N > 6 && (v_6 = op(v_6, f(a_6)))
+                N > 5 && (v_5 = op(v_5, f(a_5)))
+                N > 4 && (v_4 = op(v_4, f(a_4)))
+                N > 3 && (v_3 = op(v_3, f(a_3)))
+                N > 2 && (v_2 = op(v_2, f(a_2)))
+                N > 1 && (v_1 = op(v_1, f(a_1)))
+                return Some(op(op(op(v_1, v_2), op(v_3, v_4)), op(op(v_5, v_6), op(v_7, v_8))))
+            end
+            a_N, s = it
+        end
+        @nexprs 8 N->fa_N = f(a_N)
+        @nexprs 8 N->v_N = op(v_N, fa_N)
+    end
+    v = op(op(op(v_1, v_2), op(v_3, v_4)), op(op(v_5, v_6), op(v_7, v_8)))
+    i = (n>>3)*8
+    @nexprs 8 N->begin
+        it = iterate(itr, s)
+        if it === nothing
+            return Some(v)
+        elseif n < i+N
+            return (v, s)
+        end
+        a, s = it
+        v = op(v, f(a))
+    end
+    return (v, s)
+end
+
 diff(a::AbstractVector) = diff(a, dims=1)
 
 """
diff --git a/base/permuteddimsarray.jl b/base/permuteddimsarray.jl
index cf9748168aac2..542e9561a6ec0 100644
--- a/base/permuteddimsarray.jl
+++ b/base/permuteddimsarray.jl
@@ -343,23 +343,22 @@ end
     P
 end
 
-const CommutativeOps = Union{typeof(+),typeof(Base.add_sum),typeof(min),typeof(max),typeof(Base._extrema_rf),typeof(|),typeof(&)}
-
-function Base._mapreduce_dim(f, op::CommutativeOps, init::Base._InitialValue, A::PermutedDimsArray, dims::Colon)
-    Base._mapreduce_dim(f, op, init, parent(A), dims)
+using Base: CommutativeOps
+function Base.mapreducedim(f, op::CommutativeOps, A::PermutedDimsArray, init, dims::Colon)
+    Base.mapreducedim(f, op, parent(A), init, dims)
 end
-function Base._mapreduce_dim(f::typeof(identity), op::Union{typeof(Base.mul_prod),typeof(*)}, init::Base._InitialValue, A::PermutedDimsArray{<:Union{Real,Complex}}, dims::Colon)
-    Base._mapreduce_dim(f, op, init, parent(A), dims)
+function Base.mapreducedim(f::typeof(identity), op::Union{typeof(Base.mul_prod),typeof(*)}, A::PermutedDimsArray{<:Union{Real,Complex}}, init, dims::Colon)
+    Base.mapreducedim(f, op, parent(A), init, dims)
 end
 
-function Base.mapreducedim!(f, op::CommutativeOps, B::AbstractArray{T,N}, A::PermutedDimsArray{S,N,perm,iperm}) where {T,S,N,perm,iperm}
+function Base.mapreduce!(f, op::CommutativeOps, B::AbstractArray{T,N}, A::PermutedDimsArray{S,N,perm,iperm}; init=Base._InitialValue()) where {T,S,N,perm,iperm}
     C = PermutedDimsArray{T,N,iperm,perm,typeof(B)}(B) # make the inverse permutation for the output
-    Base.mapreducedim!(f, op, C, parent(A))
+    Base.mapreduce!(f, op, C, parent(A); init)
     B
 end
-function Base.mapreducedim!(f::typeof(identity), op::Union{typeof(Base.mul_prod),typeof(*)}, B::AbstractArray{T,N}, A::PermutedDimsArray{<:Union{Real,Complex},N,perm,iperm}) where {T,N,perm,iperm}
+function Base.mapreduce!(f::typeof(identity), op::Union{typeof(Base.mul_prod),typeof(*)}, B::AbstractArray{T,N}, A::PermutedDimsArray{<:Union{Real,Complex},N,perm,iperm}; init=Base._InitialValue()) where {T,N,perm,iperm}
     C = PermutedDimsArray{T,N,iperm,perm,typeof(B)}(B) # make the inverse permutation for the output
-    Base.mapreducedim!(f, op, C, parent(A))
+    Base.mapreduce!(f, op, C, parent(A); init)
     B
 end
 
diff --git a/base/reduce.jl b/base/reduce.jl
index 968ccda4db28e..d5d111f7a900f 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -7,6 +7,9 @@
 abstract type AbstractBroadcasted end
 const AbstractArrayOrBroadcasted = Union{AbstractArray, AbstractBroadcasted}
 
+has_fast_linear_indexing(a::AbstractArrayOrBroadcasted) = IndexStyle(a) === IndexLinear()
+has_fast_linear_indexing(a::AbstractVector) = true
+
 """
     Base.add_sum(x, y)
 
@@ -239,42 +242,6 @@ julia> foldr(=>, 1:4; init=0)
 """
 foldr(op, itr; kw...) = mapfoldr(identity, op, itr; kw...)
 
-## reduce & mapreduce
-
-# `mapreduce_impl()` is called by `mapreduce()` (via `_mapreduce()`, when `A`
-# supports linear indexing) and does actual calculations (for `A[ifirst:ilast]` subset).
-# For efficiency, no parameter validity checks are done, it's the caller's responsibility.
-# `ifirst:ilast` range is assumed to be a valid non-empty subset of `A` indices.
-
-# This is a generic implementation of `mapreduce_impl()`,
-# certain `op` (e.g. `min` and `max`) may have their own specialized versions.
-@noinline function mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted,
-                                  ifirst::Integer, ilast::Integer, blksize::Int)
-    if ifirst == ilast
-        @inbounds a1 = A[ifirst]
-        return mapreduce_first(f, op, a1)
-    elseif ilast - ifirst < blksize
-        # sequential portion
-        @inbounds a1 = A[ifirst]
-        @inbounds a2 = A[ifirst+1]
-        v = op(f(a1), f(a2))
-        @simd for i = ifirst + 2 : ilast
-            @inbounds ai = A[i]
-            v = op(v, f(ai))
-        end
-        return v
-    else
-        # pairwise portion
-        imid = ifirst + (ilast - ifirst) >> 1
-        v1 = mapreduce_impl(f, op, A, ifirst, imid, blksize)
-        v2 = mapreduce_impl(f, op, A, imid+1, ilast, blksize)
-        return op(v1, v2)
-    end
-end
-
-mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted, ifirst::Integer, ilast::Integer) =
-    mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op))
-
 """
     mapreduce(f, op, itrs...; [init])
 
@@ -302,8 +269,40 @@ implementations may reuse the return value of `f` for elements that appear multi
 `itr`. Use [`mapfoldl`](@ref) or [`mapfoldr`](@ref) instead for
 guaranteed left or right associativity and invocation of `f` for every value.
 """
-mapreduce(f, op, itr; kw...) = mapfoldl(f, op, itr; kw...)
-mapreduce(f, op, itr, itrs...; kw...) = reduce(op, Generator(f, itr, itrs...); kw...)
+mapreduce(f::F, op::G, x; init=_InitialValue()) where {F,G} = mapreduce_pairwise(f, op, x, init)
+mapreduce(f::F, op::G, x, xs...; init=_InitialValue()) where {F,G} = mapreduce_pairwise(identity, op, Generator(f, x, xs...), init)
+
+"""
+    mapreduce(f, op, A::AbstractArray...; dims=:, [init])
+
+Evaluates to the same as `reduce(op, map(f, A...); dims=dims, init=init)`, but is generally
+faster because the intermediate array is avoided.
+
+!!! compat "Julia 1.2"
+    `mapreduce` with multiple iterators requires Julia 1.2 or later.
+
+# Examples
+```jldoctest
+julia> a = reshape(Vector(1:16), (4,4))
+4×4 Matrix{Int64}:
+ 1  5   9  13
+ 2  6  10  14
+ 3  7  11  15
+ 4  8  12  16
+
+julia> mapreduce(isodd, *, a, dims=1)
+1×4 Matrix{Bool}:
+ 0  0  0  0
+
+julia> mapreduce(isodd, |, a, dims=1)
+1×4 Matrix{Bool}:
+ 1  1  1  1
+```
+"""
+mapreduce(f::F, op::G, A::AbstractArrayOrBroadcasted; init=_InitialValue(), dims=(:)) where {F,G} = mapreducedim(f, op, A, init, dims)
+mapreduce(f::F, op::G, A::AbstractArrayOrBroadcasted, As::AbstractArrayOrBroadcasted...; init=_InitialValue(), dims=(:)) where {F,G} =
+    reduce(op, map(f, A, As...); init, dims)
+mapreducedim(f::F, op::G, A, init, ::Colon) where {F,G} = mapreduce_pairwise(f, op, A, init)
 
 # Note: sum_seq usually uses four or more accumulators after partial
 # unrolling, so each accumulator gets at most 256 numbers
@@ -422,34 +421,203 @@ The default is `reduce_first(op, f(x))`.
 """
 mapreduce_first(f, op, x) = reduce_first(op, f(x))
 
-_mapreduce(f, op, A::AbstractArrayOrBroadcasted) = _mapreduce(f, op, IndexStyle(A), A)
-
-function _mapreduce(f, op, ::IndexLinear, A::AbstractArrayOrBroadcasted)
-    inds = LinearIndices(A)
-    n = length(inds)
-    if n == 0
-        return mapreduce_empty_iter(f, op, A, IteratorEltype(A))
-    elseif n == 1
-        @inbounds a1 = A[first(inds)]
-        return mapreduce_first(f, op, a1)
-    elseif n < 16 # process short array here, avoid mapreduce_impl() compilation
-        @inbounds i = first(inds)
-        @inbounds a1 = A[i]
-        @inbounds a2 = A[i+=1]
-        s = op(f(a1), f(a2))
-        while i < last(inds)
-            @inbounds Ai = A[i+=1]
-            s = op(s, f(Ai))
-        end
-        return s
-    else
-        return mapreduce_impl(f, op, A, first(inds), last(inds))
+_empty_eltype(x) = _empty_eltype(x, IteratorEltype(x))
+_empty_eltype(x, ::HasEltype) = eltype(x)
+_empty_eltype(_, _) = _empty_reduce_error()
+"""
+    _mapreduce_start(f, op, A, init, [a1])
+Perform the first step in a mapped reduction over `A` with 0 or one or more elements.
+The one-element method may be called multiple times within a single reduction at
+the start of each new chain of `op` calls.
+"""
+@inline _mapreduce_start(f, op, A, ::_InitialValue) = mapreduce_empty(f, op, _empty_eltype(A))
+@inline _mapreduce_start(f, op, A, ::_InitialValue, a1) = mapreduce_first(f, op, a1)
+@inline _mapreduce_start(f, op, A, init) = init
+@inline _mapreduce_start(f, op, A, init, a1) = op(init, f(a1))
+
+"""
+    mapreduce_pairwise(f, op, A, init)
+    mapreduce_pairwise(f, op, A, init, indices::AbstractArray)
+    mapreduce_pairwise(f, op, itr, init, ::Union{HasLength, HasShape})
+    mapreduce_pairwise(f, op, itr, init, ::IteratorSize)
+
+Perform a pairwise reassociation of the mapped reduction with `f` and `op`
+Returns the result `v` or, for the 6+ argument iterator methods, a tuple `(v, state)` or `(v, iterate(itr, state))`
+
+These entry-points handle the empty case and dispatches to a polyalgorithm
+with three distinct mechanisms for accessing elements and structuring the subsequent
+recursion; the methods with length must process at least one element.
+
+    mapreduce_pairwise(f, op, A, init, indices::AbstractArray)
+
+    * An indexing method recursively splits `indices` until there are fewer than
+      `pairwise_blocksize` indices to process. CartesianIndices split into halves
+      across columns first, then within a single column (if necessary).
+
+    mapreduce_pairwise(f, op, itr, init, ::Union{HasLength, HasShape}, n, [state]) -> (value, state)
+
+    * An iterable method (with length) recursively takes half as many elements from
+      the iterable until `n` is less than or equal to the `pairwise_blocksize`. This
+      is slightly more complicated than the indexing case as we must additionally pass
+      (and return) the iterable's state along with the result of each part of the
+      reduction.
+
+    mapreduce_pairwise(f, op, itr, init, ::IteratorSize, n, [state]) -> (value, state) | nothing | Some(value)
+
+    * An iterable method (without length) that — unlike the above two methods — might
+      not process any values (because we don't know if there are values in the iterable).
+      This is slightly more complicated than the length case as there are three possibilities:
+        - there may not be any values left in the iterator at all (returning `nothing`)
+        - there may have been _some_ values but the iterator was exhausted (returning `Some` value)
+        - there may be more values left, returning a `(value, state)` tuple
+      Additionally this algorithm effectively works backwards from the other two;
+      instead of starting at the *final* `op` reduction of the (possibly many) branches
+      of reduction chains, this starts at the initial chain of length **up to** the
+      `pairwise_blocksize` and then iteratively widens to incorporate larger chunks
+      (that are then recursively split) as necessary.
+
+All implementations dispatch to a similarly structured `mapreduce_kernel` to handle the
+base case that's essentially a `mapfoldl` that allows for further SIMD-like optimizations
+and reassociations.
+"""
+function mapreduce_pairwise(f::F, op::G, A::AbstractArrayOrBroadcasted, init) where {F, G}
+    n = length(A)
+    n == 0 && return _mapreduce_start(f, op, A, init)
+    n <= pairwise_blocksize(f, op) && return mapreduce_kernel(f, op, A, init, eachindex(A))
+    return mapreduce_pairwise(f, op, A, init, eachindex(A))
+end
+mapreduce_pairwise(f::F, op::G, itr, init) where {F, G} = mapreduce_pairwise(f, op, itr, init, IteratorSize(itr))
+function mapreduce_pairwise(f, op, itr, init, S::Union{HasLength, HasShape})
+    n = length(itr)
+    n == 0 && return _mapreduce_start(f, op, itr, init)
+    # Note that there can be a significant advantage to the for-loop lowering of mapfoldl
+    n <= pairwise_blocksize(f, op) && return mapreduce_kernel(f, op, itr, init, S, n)[1]
+    return mapreduce_pairwise(f, op, itr, init, S, n)[1]
+end
+# There are three possible return values for partial reductions of SizeUnknown
+# No values -> return nothing
+# Some values, but ended -> return Some(value)
+# Some values, more to come -> return value, state
+function mapreduce_pairwise(f::F, op::G, itr, init, S::IteratorSize) where {F, G}
+    n = pairwise_blocksize(f, op)
+    ret = mapreduce_kernel(f, op, itr, init, S, n)
+    ret === nothing && return _mapreduce_start(f, op, itr, init)
+    ret isa Some && return something(ret)
+    v, s = ret
+    while n < prevpow(2, typemax(n))
+        n <<= 1
+        ret = mapreduce_pairwise(f, op, itr, init, S, n, s)
+        ret === nothing && return v
+        ret isa Some && return op(v, something(ret))
+        v1, s = ret
+        v = op(v, v1)
     end
+    return v
 end
 
-mapreduce(f, op, a::Number) = mapreduce_first(f, op, a)
+function mapreduce_pairwise(f::F, op::G, A, init, inds) where {F,G}
+    if length(inds) <= max(10, pairwise_blocksize(f, op))
+        return mapreduce_kernel(f, op, A, init, inds)
+    else
+        p1, p2 = halves(inds)
+        v1 = mapreduce_pairwise(f, op, A, init, p1)
+        v2 = mapreduce_pairwise(f, op, A, init, p2)
+        return op(v1, v2)
+    end
+end
+function halves(inds)
+    n = length(inds) >> 1
+    return (inds[begin:begin+n-1], inds[begin+n:end])
+end
+# Recursively find the last non-singleton range in the tuple to halve, keeping the rest whole
+_halves(tup) = length(tup[end]) > 1 ?
+                    (map(_wholes, front(tup))..., halves(tup[end])) :
+                    (_halves(front(tup))..., _wholes(tup[end]))
+_halves(::Tuple{}) = ()
+# This unconditionally re-indexes by begin:end to ensure type stability with the halving re-index
+_wholes(r) = r[begin:end], r[begin:end]
+
+function mapreduce_pairwise(f::F, op::G, itr, init, S::Union{HasLength,HasShape}, n, state...) where {F,G}
+    if n <= max(10, pairwise_blocksize(f, op))
+        return mapreduce_kernel(f, op, itr, init, S, n, state...)
+    else
+        ndiv2 = n >> 1
+        v1, s = mapreduce_pairwise(f, op, itr, init, S, ndiv2, state...)
+        v2, s = mapreduce_pairwise(f, op, itr, init, S, n-ndiv2, s)
+        return (op(v1, v2), s)
+    end
+end
+function mapreduce_pairwise(f::F, op::G, itr, init, S::IteratorSize, n, state...) where {F,G}
+    if n <= max(10, pairwise_blocksize(f, op))
+        return mapreduce_kernel(f, op, itr, init, S, n, state...)
+    else
+        ndiv2 = n >> 1
+        ret = mapreduce_pairwise(f, op, itr, init, S, ndiv2, state...)
+        ret === nothing && return nothing
+        ret isa Some && return ret
+        v1, s = ret
+        ret = mapreduce_pairwise(f, op, itr, init, S, n-ndiv2, s)
+        ret === nothing && return Some(v1)
+        ret isa Some && return Some(op(v1, something(ret)))
+        v2, s = ret
+        return (op(v1, v2), s)
+    end
+end
 
-_mapreduce(f, op, ::IndexCartesian, A::AbstractArrayOrBroadcasted) = mapfoldl(f, op, A)
+"""
+    mapreduce_kernel(f, op, A, init, inds::AbstractArray) -> v
+    mapreduce_kernel(f, op, itr, init, ::Union{HasLength, HasShape}, n, [state]) -> (v, state)
+    mapreduce_kernel(f, op, itr, init, ::IteratorSize, n, [state]) -> (v, state) | nothing | Some(v)
+
+Perform the mapped reduction with `f` and `op` akin to a `mapfoldl` but permitting SIMD-like
+optimizations and reassociations. Corresponding to `mapreduce_pairwise`, this function uses
+three (and a half) different mechanisms for accessing elements and structuring its loop(s):
+
+* Indexing with `inds` into `A`
+    * (with a dedicated optimization for CartesianIndices)
+* Iterating exactly `n` elements from `itr` with a known length and at least `n` elements remaining
+    This method additionally takes (and returns as the second element in a tuple) the current iterator state
+* Iterating *up to* `n` elements from `itr`, but possibly stopping early
+    This method additionally takes (and returns as the second element in a tuple) the *next* `(val, state)`
+    tuple from the iterator or `nothing`
+"""
+function mapreduce_kernel(f::F, op::G, A, init, inds::AbstractArray) where {F, G}
+    is_commutative_op(op) && return mapreduce_kernel_commutative(f, op, A, init, inds)
+    a1 = @inbounds A[inds[begin]]
+    v = _mapreduce_start(f, op, A, init, a1)
+    length(inds) == 1 && return v
+    for i in inds[begin+1:end]
+        a = @inbounds A[i]
+        v = op(v, f(a))
+    end
+    return v
+end
+function mapreduce_kernel(f::F, op::G, itr, init, S::Union{HasLength, HasShape}, n, state...) where {F, G}
+    is_commutative_op(op) && return mapreduce_kernel_commutative(f, op, itr, init, S, n, state...)
+    a1, s = iterate(itr, state...)
+    v = _mapreduce_start(f, op, itr, init, a1)
+    for _ in 2:n
+        it = iterate(itr, s)
+        it === nothing && return v, s # This will only happen if an iterator lied about its length
+        a, s = it
+        v = op(v, f(a))
+    end
+    return v, s
+end
+function mapreduce_kernel(f::F, op::G, itr, init, S::IteratorSize, n, state...) where {F, G}
+    is_commutative_op(op) && return mapreduce_kernel_commutative(f, op, itr, init, S, n, state...)
+    it = iterate(itr, state...)
+    it === nothing && return nothing
+    a1, s = it
+    v = _mapreduce_start(f, op, itr, init, a1)
+    for _ in 2:n
+        it = iterate(itr, s)
+        it === nothing && return Some(v)
+        a, s = it
+        v = op(v, f(a))
+    end
+    return (v, s)
+end
 
 """
     reduce(op, itr; [init])
@@ -486,9 +654,41 @@ julia> reduce(*, Int[]; init=1)
 1
 ```
 """
-reduce(op, itr; kw...) = mapreduce(identity, op, itr; kw...)
+reduce(op, itr; init=_InitialValue()) = mapreduce(identity, op, itr; init)
+
+"""
+    reduce(f, A::AbstractArray; dims=:, [init])
+
+Reduce 2-argument function `f` along dimensions of `A`. `dims` is a vector specifying the
+dimensions to reduce, and the keyword argument `init` is the initial value to use in the
+reductions. For `+`, `*`, `max` and `min` the `init` argument is optional.
 
-reduce(op, a::Number) = a  # Do we want this?
+The associativity of the reduction is implementation-dependent; if you need a particular
+associativity, e.g. left-to-right, you should write your own loop or consider using
+[`foldl`](@ref) or [`foldr`](@ref). See documentation for [`reduce`](@ref).
+
+# Examples
+```jldoctest
+julia> a = reshape(Vector(1:16), (4,4))
+4×4 Matrix{Int64}:
+ 1  5   9  13
+ 2  6  10  14
+ 3  7  11  15
+ 4  8  12  16
+
+julia> reduce(max, a, dims=2)
+4×1 Matrix{Int64}:
+ 13
+ 14
+ 15
+ 16
+
+julia> reduce(max, a, dims=1)
+1×4 Matrix{Int64}:
+ 4  8  12  16
+```
+"""
+reduce(op, A::AbstractArrayOrBroadcasted; init=_InitialValue(), dims=(:)) = mapreduce(identity, op, A; init, dims)
 
 ###### Specific reduction functions ######
 
@@ -1109,3 +1309,8 @@ for (fred, f) in ((maximum, max), (minimum, min), (sum, add_sum))
         isempty(r) ? init : op(init, $fred(r))
     end
 end
+
+# Reduction kernels that explicitly encode simplistic SIMD-ish reorderings
+const CommutativeOps = Union{typeof(+),typeof(Base.add_sum),typeof(min),typeof(max),typeof(Base._extrema_rf),typeof(|),typeof(&)}
+is_commutative_op(::CommutativeOps) = true
+is_commutative_op(::Any) = false
diff --git a/base/reducedim.jl b/base/reducedim.jl
index ba3434494bc0b..81aec43b5df0d 100644
--- a/base/reducedim.jl
+++ b/base/reducedim.jl
@@ -36,347 +36,212 @@ end
 
 ###### Generic reduction functions #####
 
-## initialization
-# initarray! is only called by sum!, prod!, etc.
-for (Op, initfun) in ((:(typeof(add_sum)), :zero), (:(typeof(mul_prod)), :one))
-    @eval initarray!(a::AbstractArray{T}, ::Any, ::$(Op), init::Bool, src::AbstractArray) where {T} = (init && fill!(a, $(initfun)(T)); a)
-end
-
-initarray!(a::AbstractArray{T}, f, ::Union{typeof(min),typeof(max),typeof(_extrema_rf)},
-    init::Bool, src::AbstractArray) where {T} = (init && mapfirst!(f, a, src); a)
-
-for (Op, initval) in ((:(typeof(and_all)), true), (:(typeof(or_any)), false))
-    @eval initarray!(a::AbstractArray, ::Any, ::$(Op), init::Bool, src::AbstractArray) = (init && fill!(a, $initval); a)
-end
-
-# reducedim_initarray is called by
-reducedim_initarray(A::AbstractArrayOrBroadcasted, region, init, ::Type{R}) where {R} = fill!(similar(A,R,reduced_indices(A,region)), init)
-reducedim_initarray(A::AbstractArrayOrBroadcasted, region, init::T) where {T} = reducedim_initarray(A, region, init, T)
-
-# TODO: better way to handle reducedim initialization
-#
-# The current scheme is basically following Steven G. Johnson's original implementation
-#
+# Given two indices or ranges, merge them by dimension akin to a broadcasted `ifelse` over the dims
+_sliceall(x) = x[begin]:x[end] # avoid instabilities with OneTos and offset axes
+_ifelseslice(b,x,y) = ifelse(b, _sliceall(x), _sliceall(y))
+mergeindices(b::NTuple{N,Bool}, x::CartesianIndices{N}, y::CartesianIndices{N}) where {N} =
+    CartesianIndices(map(_ifelseslice, b, x.indices, y.indices))
+mergeindices(b::NTuple{N,Bool}, x::CartesianIndex{N}, y::CartesianIndices{N}) where {N} =
+    CartesianIndices(map(_ifelseslice, b, x.I, y.indices))
+mergeindices(b::NTuple{N,Bool}, x::CartesianIndices{N}, y::CartesianIndex{N}) where {N} =
+    CartesianIndices(map(_ifelseslice, b, x.indices, y.I))
+mergeindices(b::NTuple{N,Bool}, x::CartesianIndex{N}, y::CartesianIndex{N}) where {N} =
+    CartesianIndex(map((b,x,y)->ifelse(b, x, y), b, x.I, y.I))
+
+keep_first_trues(::Tuple{}) = ()
+keep_first_trues(t) = t[1] ? (true, keep_first_trues(tail(t))...) : ntuple(Returns(false), length(t))
+
+# These functions aren't used in the implementation here, but are used widely in the ecosystem
 promote_union(T::Union) = promote_type(promote_union(T.a), promote_union(T.b))
 promote_union(T) = T
-
 _realtype(::Type{<:Complex}) = Real
 _realtype(::Type{Complex{T}}) where T<:Real = T
 _realtype(T::Type) = T
 _realtype(::Union{typeof(abs),typeof(abs2)}, T) = _realtype(T)
 _realtype(::Any, T) = T
 
-function reducedim_init(f, op::Union{typeof(+),typeof(add_sum)}, A::AbstractArray, region)
-    _reducedim_init(f, op, zero, sum, A, region)
+mapreduce_similar(A, ::Type{T}, dims) where {T} = similar(A, T, dims)
+
+# These special internal types allow exposing both in- and out-of-place array initialization
+# as a comprehension-like function call with a generator
+struct _MapReduceAllocator{T}
+    itr::T
 end
-function reducedim_init(f, op::Union{typeof(*),typeof(mul_prod)}, A::AbstractArray, region)
-    _reducedim_init(f, op, one, prod, A, region)
+_similar_for(c::_MapReduceAllocator, ::Type{T}, itr, ::SizeUnknown, ::Nothing) where {T} =
+    mapreduce_similar(c.itr, T, (0,))
+_similar_for(c::_MapReduceAllocator, ::Type{T}, itr, ::HasLength, len::Integer) where {T} =
+    mapreduce_similar(c.itr, T, (len,))
+_similar_for(c::_MapReduceAllocator, ::Type{T}, itr, ::HasShape, axs) where {T} =
+    mapreduce_similar(c.itr, T, axs)
+(mra::_MapReduceAllocator)(itr) = collect_similar(mra, itr)
+
+struct _MapReduceInPlace{T,F}
+    A::T
+    op::F
+    update::Bool
 end
-function _reducedim_init(f, op, fv, fop, A, region)
-    T = _realtype(f, promote_union(eltype(A)))
-    if T !== Any && applicable(zero, T)
-        x = f(zero(T))
-        z = op(fv(x), fv(x))
-        Tr = z isa T ? T : typeof(z)
+function (mri::_MapReduceInPlace)(g::Generator{<:AbstractArray})
+    A = mri.A
+    if mri.update
+        for i in g.iter
+            A[i] = mri.op(A[i], g.f(i))
+        end
     else
-        z = fv(fop(f, A))
-        Tr = typeof(z)
+        for i in g.iter
+            A[i] = g.f(i)
+        end
     end
-    return reducedim_initarray(A, region, z, Tr)
+    return A
 end
-
-# initialization when computing minima and maxima requires a little care
-for (f1, f2, initval, typeextreme) in ((:min, :max, :Inf, :typemax), (:max, :min, :(-Inf), :typemin))
-    @eval function reducedim_init(f, op::typeof($f1), A::AbstractArray, region)
-        # First compute the reduce indices. This will throw an ArgumentError
-        # if any region is invalid
-        ri = reduced_indices(A, region)
-
-        # Next, throw if reduction is over a region with length zero
-        any(i -> isempty(axes(A, i)), region) && _empty_reduce_error()
-
-        # Make a view of the first slice of the region
-        A1 = view(A, ri...)
-
-        if isempty(A1)
-            # If the slice is empty just return non-view version as the initial array
-            return map(f, A1)
-        else
-            # otherwise use the min/max of the first slice as initial value
-            v0 = mapreduce(f, $f2, A1)
-
-            T = _realtype(f, promote_union(eltype(A)))
-            Tr = v0 isa T ? T : typeof(v0)
-
-            # but NaNs, missing and unordered values need to be avoided as initial values
-            if v0 isa Number && isnan(v0)
-                # v0 is NaN
-                v0 = oftype(v0, $initval)
-            elseif isunordered(v0)
-                # v0 is missing or a third-party unordered value
-                Tnm = nonmissingtype(Tr)
-                if Tnm <: Union{BitInteger, IEEEFloat, BigFloat}
-                    v0 = $typeextreme(Tnm)
-                elseif !all(isunordered, A1)
-                    v0 = mapreduce(f, $f2, Iterators.filter(!isunordered, A1))
-                end
-            end
-            # v0 may have changed type.
-            Tr = v0 isa T ? T : typeof(v0)
-
-            return reducedim_initarray(A, region, v0, Tr)
+function (mri::_MapReduceInPlace)(g::Generator{<:Iterators.Enumerate{<:AbstractArray}})
+    A = mri.A
+    if mri.update
+        for ki in g.iter
+            A[ki[2]] = mri.op(A[ki[2]], g.f(ki))
+        end
+    else
+        for ki in g.iter
+            A[ki[2]] = g.f(ki)
         end
     end
+    return A
 end
 
-function reducedim_init(f::ExtremaMap, op::typeof(_extrema_rf), A::AbstractArray, region)
-    # First compute the reduce indices. This will throw an ArgumentError
-    # if any region is invalid
-    ri = reduced_indices(A, region)
-
-    # Next, throw if reduction is over a region with length zero
-    any(i -> isempty(axes(A, i)), region) && _empty_reduce_error()
-
-    # Make a view of the first slice of the region
-    A1 = view(A, ri...)
-
-    isempty(A1) && return map(f, A1)
-    # use the max/min of the first slice as initial value for non-empty cases
-    v0 = reverse(mapreduce(f, op, A1)) # turn minmax to maxmin
-
-    T = _realtype(f.f, promote_union(eltype(A)))
-    Tmin = v0[1] isa T ? T : typeof(v0[1])
-    Tmax = v0[2] isa T ? T : typeof(v0[2])
-
-    # but NaNs and missing need to be avoided as initial values
-    if v0[1] isa Number && isnan(v0[1])
-        # v0 is NaN
-        v0 = oftype(v0[1], Inf), oftype(v0[2], -Inf)
-    elseif isunordered(v0[1])
-        # v0 is missing or a third-party unordered value
-        Tminnm = nonmissingtype(Tmin)
-        Tmaxnm = nonmissingtype(Tmax)
-        if Tminnm <: Union{BitInteger, IEEEFloat, BigFloat} &&
-            Tmaxnm <: Union{BitInteger, IEEEFloat, BigFloat}
-            v0 = (typemax(Tminnm), typemin(Tmaxnm))
-        elseif !all(isunordered, A1)
-            v0 = reverse(mapreduce(f, op, Iterators.filter(!isunordered, A1)))
-        end
+if isdefined(Core, :Compiler)
+    _mapreduce_might_widen(_, _, _, _, ::_MapReduceInPlace) = false
+    function _mapreduce_might_widen(f::F, op::G, A::T, init, _) where {F,G,T}
+        return !isconcretetype(Core.Compiler.return_type(x->op(_mapreduce_start(f, op, first(x), init), f(first(x))), Tuple{T}))
     end
-    # v0 may have changed type.
-    Tmin = v0[1] isa T ? T : typeof(v0[1])
-    Tmax = v0[2] isa T ? T : typeof(v0[2])
-
-    return reducedim_initarray(A, region, v0, Tuple{Tmin,Tmax})
+else
+    _mapreduce_might_widen(_, _, _, _, ::_MapReduceInPlace) = false
+    _mapreduce_might_widen(_, _, _, _, _) = true
 end
 
-reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(max), A::AbstractArray{T}, region) where {T} =
-    reducedim_initarray(A, region, zero(f(zero(T))), _realtype(f, T))
-
-reducedim_init(f, op::typeof(and_all), A::AbstractArrayOrBroadcasted, region) = reducedim_initarray(A, region, true)
-reducedim_init(f, op::typeof(or_any), A::AbstractArrayOrBroadcasted, region) = reducedim_initarray(A, region, false)
-
-# These definitions are wrong in general; Cf. JuliaLang/julia#45562
-reducedim_init(f, op::typeof(&), A::AbstractArrayOrBroadcasted, region) = reducedim_initarray(A, region, true)
-reducedim_init(f, op::typeof(|), A::AbstractArrayOrBroadcasted, region) = reducedim_initarray(A, region, false)
-
-# specialize to make initialization more efficient for common cases
-
-let
-    BitIntFloat = Union{BitInteger, IEEEFloat}
-    T = Union{
-        Any[AbstractArray{t} for t in uniontypes(BitIntFloat)]...,
-        Any[AbstractArray{Complex{t}} for t in uniontypes(BitIntFloat)]...}
-
-    global function reducedim_init(f, op::Union{typeof(+),typeof(add_sum)}, A::T, region)
-        z = zero(f(zero(eltype(A))))
-        reducedim_initarray(A, region, op(z, z))
+# When performing dimensional reductions over arrays with singleton dimensions, we have
+# a choice as to whether that singleton dimenion should be a part of the reduction or not;
+# it does not affect the output. It's advantageous to additionally consider _leading_ singleton
+# dimensions as part of the reduction as that allows more cases to be considered contiguous;
+# but once we've broken contiguity it's more helpful to _ignore_ those cases.
+compute_inner_dims(flagged_dims, source_size) =
+    flagged_dims[1] || source_size[1] == 1 ?
+        (true, compute_inner_dims(tail(flagged_dims), tail(source_size))...) :
+        (false, map((flag,sz)->flag && sz != 1, tail(flagged_dims), tail(source_size))...)
+compute_inner_dims(::Tuple{}, ::Tuple{}) = ()
+
+function mapreducedim(f::F, op::OP, A, init, dims, alloc=_MapReduceAllocator(A)) where {F, OP}
+    if alloc isa _MapReduceInPlace
+        # We can ignore dims and just trust the output array's axes. Note that the
+        # other branch here optimizes for the case where the input array _also_ has
+        # a singleton dimension, but we cannot do that here because OffsetArrays
+        # supports reductions into differently-offset singleton dimensions. This means
+        # we cannot index directly into A with an `outer` index. The output may also have
+        # fewer dimensions than A, so we may need to add trailing dims here:
+        outer = CartesianIndices(ntuple(d->axes(alloc.A, d), ndims(A)))
+        is_inner_dim = map(==(1), size(outer))
+    else
+        is_inner_dim = compute_inner_dims(ntuple(d->d in dims, ndims(A)), size(A))
+        outer = CartesianIndices(reduced_indices(A, dims))
     end
-    global function reducedim_init(f, op::Union{typeof(*),typeof(mul_prod)}, A::T, region)
-        u = one(f(one(eltype(A))))
-        reducedim_initarray(A, region, op(u, u))
+    inner = CartesianIndices(map((b,ax)->b ? ax : reduced_index(ax), is_inner_dim, axes(A)))
+    n = length(inner)
+    # Handle the empty and trivial 1-element cases:
+    if (n == 0 || isempty(A))
+        # This is broken out of the comprehension to ensure it's called, avoiding an empty Vector{Union{}}
+        v = _mapreduce_start(f, op, A, init)
+        return alloc(v for _ in outer)
     end
-end
-
-## generic (map)reduction
-
-has_fast_linear_indexing(a::AbstractArrayOrBroadcasted) = IndexStyle(a) === IndexLinear()
-has_fast_linear_indexing(a::AbstractVector) = true
-
-function check_reducedims(R, A)
-    # Check whether R has compatible dimensions w.r.t. A for reduction
-    #
-    # It returns an integer value (useful for choosing implementation)
-    # - If it reduces only along leading dimensions, e.g. sum(A, dims=1) or sum(A, dims=(1,2)),
-    #   it returns the length of the leading slice. For the two examples above,
-    #   it will be size(A, 1) or size(A, 1) * size(A, 2).
-    # - Otherwise, e.g. sum(A, dims=2) or sum(A, dims=(1,3)), it returns 0.
-    #
-    ndims(R) <= ndims(A) || throw(DimensionMismatch("cannot reduce $(ndims(A))-dimensional array to $(ndims(R)) dimensions"))
-    lsiz = 1
-    had_nonreduc = false
-    for i = 1:ndims(A)
-        Ri, Ai = axes(R, i), axes(A, i)
-        sRi, sAi = length(Ri), length(Ai)
-        if sRi == 1
-            if sAi > 1
-                if had_nonreduc
-                    lsiz = 0  # to reduce along i, but some previous dimensions were non-reducing
-                else
-                    lsiz *= sAi  # if lsiz was set to zero, it will stay to be zero
-                end
-            end
-        else
-            Ri == Ai || throw(DimensionMismatch("reduction on array with indices $(axes(A)) with output with indices $(axes(R))"))
-            had_nonreduc = true
-        end
+    n == 1 && return alloc(_mapreduce_start(f, op, A, init, A[i]) for i in outer)
+    # Now there are multiple loop ordering strategies depending upon the `dims`:
+    if is_inner_dim == keep_first_trues(is_inner_dim) || _mapreduce_might_widen(f, op, A, init, alloc)
+        # Column major contiguous reduction! This is the easy case
+        return mapreducedim_naive(f, op, A, init, is_inner_dim, inner, outer, alloc)
+    elseif is_inner_dim[1] # `dims` includes the first dimension
+        return mapreducedim_colmajor(f, op, A, init, is_inner_dim, inner, outer, alloc)
+    else
+        return mapreducedim_rowmajor(f, op, A, init, is_inner_dim, inner, outer, alloc)
     end
-    return lsiz
 end
 
-"""
-Extract first entry of slices of array A into existing array R.
-"""
-copyfirst!(R::AbstractArray, A::AbstractArray) = mapfirst!(identity, R, A)
+linear_size(A, is_inner_dim) = linear_size(IndexStyle(A), A, is_inner_dim)
+linear_size(::IndexLinear, A, is_inner_dim) = if is_inner_dim == keep_first_trues(is_inner_dim)
+    prod(map((b,sz)->ifelse(b, sz, 1), is_inner_dim, size(A)))
+else
+    0
+end
+linear_size(::IndexStyle, _, _) = 0
 
-function mapfirst!(f::F, R::AbstractArray, A::AbstractArray{<:Any,N}) where {N, F}
-    lsiz = check_reducedims(R, A)
-    t = _firstreducedslice(axes(R), axes(A))
-    map!(f, R, view(A, t...))
+function mapreducedim_naive(f::F, op::OP, A, init, is_inner_dim, inner, outer, alloc) where {F, OP}
+    lsiz = linear_size(A, is_inner_dim)
+    if lsiz > 0
+        i0 = first(LinearIndices(A))
+        alloc(mapreduce_pairwise(f, op, A, init, (i0:i0+lsiz-1) .+ (lsiz*(i-1))) for (i,_) in enumerate(outer))
+    else
+        alloc(mapreduce_pairwise(f, op, A, init, mergeindices(is_inner_dim, inner, i)) for i in outer)
+    end
 end
-# We know that the axes of R and A are compatible, but R might have a different number of
-# dimensions than A, which is trickier than it seems due to offset arrays and type stability
-_firstreducedslice(::Tuple{}, a::Tuple{}) = ()
-_firstreducedslice(::Tuple, ::Tuple{}) = ()
-@inline _firstreducedslice(::Tuple{}, a::Tuple) = (_firstslice(a[1]), _firstreducedslice((), tail(a))...)
-@inline _firstreducedslice(r::Tuple, a::Tuple) = (length(r[1])==1 ? _firstslice(a[1]) : r[1], _firstreducedslice(tail(r), tail(a))...)
-_firstslice(i::OneTo) = OneTo(1)
-_firstslice(i::Slice) = Slice(_firstslice(i.indices))
-_firstslice(i) = i[firstindex(i):firstindex(i)]
-
-function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted)
-    lsiz = check_reducedims(R,A)
-    isempty(A) && return R
-
-    if has_fast_linear_indexing(A) && lsiz > 16
-        # use mapreduce_impl, which is probably better tuned to achieve higher performance
-        nslices = div(length(A), lsiz)
-        ibase = first(LinearIndices(A))-1
-        for i in eachindex(R)
-            r = op(@inbounds(R[i]), mapreduce_impl(f, op, A, ibase+1, ibase+lsiz))
-            @inbounds R[i] = r
-            ibase += lsiz
-        end
-        return R
+function mapreducedim_colmajor(f::F, op::OP, A, init, is_inner_dim, inner, outer, alloc, enforce_pairwise=true) where {F, OP}
+    is_contiguous_inner = keep_first_trues(is_inner_dim)
+    contiguous_inner = mergeindices(is_contiguous_inner, inner, first(inner))
+    discontiguous_inner = mergeindices(is_contiguous_inner, first(inner), inner)
+    if enforce_pairwise && length(discontiguous_inner) > pairwise_blocksize(f, op)
+        return mapreducedim_naive(f, op, A, init, is_inner_dim, inner, outer, alloc)
     end
-    indsAt, indsRt = safe_tail(axes(A)), safe_tail(axes(R)) # handle d=1 manually
-    keep, Idefault = Broadcast.shapeindexer(indsRt)
-    if reducedim1(R, A)
-        # keep the accumulator as a local variable when reducing along the first dimension
-        i1 = first(axes1(R))
-        for IA in CartesianIndices(indsAt)
-            IR = Broadcast.newindex(IA, keep, Idefault)
-            @inbounds r = R[i1,IR]
-            @simd for i in axes(A, 1)
-                r = op(r, f(@inbounds(A[i, IA])))
-            end
-            @inbounds R[i1,IR] = r
-        end
-    else
-        for IA in CartesianIndices(indsAt)
-            IR = Broadcast.newindex(IA, keep, Idefault)
-            @simd for i in axes(A, 1)
-                v = op(@inbounds(R[i,IR]), f(@inbounds(A[i,IA])))
-                @inbounds R[i,IR] = v
-            end
+    # Initialize our reduction with the result of doing the first contiguous reduction
+    R = alloc(mapreduce_pairwise(f, op, A, init, mergeindices(is_inner_dim, contiguous_inner, o)) for o in outer)
+    for dci in Iterators.drop(discontiguous_inner, 1)
+        for o in outer
+            i = mergeindices(is_inner_dim, dci, o)
+            R[o] = op(R[o], mapreduce_pairwise(f, op, A, init, mergeindices(is_contiguous_inner, contiguous_inner, i)))
         end
     end
     return R
 end
 
-mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted) =
-    (_mapreducedim!(f, op, R, A); R)
-
-reducedim!(op, R::AbstractArray{RT}, A::AbstractArrayOrBroadcasted) where {RT} =
-    mapreducedim!(identity, op, R, A)
+function mapreducedim_rowmajor(f::F, op::OP, A, init, is_inner_dim, inner, outer, alloc, enforce_pairwise=true) where {F, OP}
+    # This will only be cache-optimal in the fully "backwards" case — that is, dims are `twoplus:ndims(A)`
+    # And even then, it's simply not possible to do this cache-optimally with a pairwise reassociation;
+    # this is effectively a foldl.
+    if enforce_pairwise && length(inner) > pairwise_blocksize(f, op)
+        return mapreducedim_naive(f, op, A, init, is_inner_dim, inner, outer, alloc)
+    end
+    # Take one element from each outer iteration to initialize R
+    R = alloc(_mapreduce_start(f, op, A, init, A[mergeindices(is_inner_dim, first(inner), o)]) for o in outer)
+    for i in Iterators.drop(inner, 1)
+        @simd for o in outer
+            # SIMD still helps here! It doesn't reassociate _within_ each reduction, but it can allow
+            # evaluations _across_ the multiple reductions to coalesce into a single instruction at times
+            iA = mergeindices(is_inner_dim, i, o)
+            v = op(@inbounds(R[o]), f(@inbounds(A[iA])))
+            @inbounds R[o] = v
+        end
+    end
+    return R
+end
 
 """
-    mapreduce(f, op, A::AbstractArray...; dims=:, [init])
-
-Evaluates to the same as `reduce(op, map(f, A...); dims=dims, init=init)`, but is generally
-faster because the intermediate array is avoided.
+    mapreduce!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted; [init])
 
-!!! compat "Julia 1.2"
-    `mapreduce` with multiple iterators requires Julia 1.2 or later.
+Compute `mapreduce(f, op, A; init, dims)` where `dims` are the singleton dimensions of `R`, storing the result into `R`.
 
-# Examples
-```jldoctest
-julia> a = reshape(Vector(1:16), (4,4))
-4×4 Matrix{Int64}:
- 1  5   9  13
- 2  6  10  14
- 3  7  11  15
- 4  8  12  16
-
-julia> mapreduce(isodd, *, a, dims=1)
-1×4 Matrix{Bool}:
- 0  0  0  0
-
-julia> mapreduce(isodd, |, a, dims=1)
-1×4 Matrix{Bool}:
- 1  1  1  1
-```
+!!! note
+    The previous values in `R` are _not_ used as initial values; they are completely ignored
 """
-mapreduce(f, op, A::AbstractArrayOrBroadcasted; dims=:, init=_InitialValue()) =
-    _mapreduce_dim(f, op, init, A, dims)
-mapreduce(f, op, A::AbstractArrayOrBroadcasted, B::AbstractArrayOrBroadcasted...; kw...) =
-    reduce(op, map(f, A, B...); kw...)
-
-_mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, ::Colon) =
-    mapfoldl_impl(f, op, nt, A)
-
-_mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) =
-    _mapreduce(f, op, IndexStyle(A), A)
-
-_mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, dims) =
-    mapreducedim!(f, op, reducedim_initarray(A, dims, nt), A)
-
-_mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, dims) =
-    mapreducedim!(f, op, reducedim_init(f, op, A, dims), A)
+function mapreduce!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted; init=_InitialValue(), update=false)
+    if ndims(R) > ndims(A) || !all(d->size(R, d) == 1 || axes(R, d) == axes(A, d), 1:max(ndims(R), ndims(A)))
+        throw(DimensionMismatch())
+    end
+    return mapreducedim(f, op, A, init, nothing, _MapReduceInPlace(R, op, update))
+end
 
 """
-    reduce(f, A::AbstractArray; dims=:, [init])
-
-Reduce 2-argument function `f` along dimensions of `A`. `dims` is a vector specifying the
-dimensions to reduce, and the keyword argument `init` is the initial value to use in the
-reductions. For `+`, `*`, `max` and `min` the `init` argument is optional.
+    reduce!(op, R::AbstractArray, A::AbstractArrayOrBroadcasted; [init])
 
-The associativity of the reduction is implementation-dependent; if you need a particular
-associativity, e.g. left-to-right, you should write your own loop or consider using
-[`foldl`](@ref) or [`foldr`](@ref). See documentation for [`reduce`](@ref).
+Compute `reduce(op, A; init, dims)` where `dims` are the singleton dimensions of `R`, storing the result into `R`.
 
-# Examples
-```jldoctest
-julia> a = reshape(Vector(1:16), (4,4))
-4×4 Matrix{Int64}:
- 1  5   9  13
- 2  6  10  14
- 3  7  11  15
- 4  8  12  16
-
-julia> reduce(max, a, dims=2)
-4×1 Matrix{Int64}:
- 13
- 14
- 15
- 16
-
-julia> reduce(max, a, dims=1)
-1×4 Matrix{Int64}:
- 4  8  12  16
-```
+!!! note
+    The previous values in `R` are _not_ used as initial values; they are completely ignored
 """
-reduce(op, A::AbstractArray; kw...) = mapreduce(identity, op, A; kw...)
+reduce!(op, R::AbstractArray, A::AbstractArrayOrBroadcasted; init=_InitialValue(), update=false) = mapreduce!(identity, op, R, A; init, update)
 
 ##### Specific reduction functions #####
 
@@ -445,7 +310,7 @@ julia> count!(<=(2), [1; 1], A)
 """
 count!(r::AbstractArray, A::AbstractArrayOrBroadcasted; init::Bool=true) = count!(identity, r, A; init=init)
 count!(f, r::AbstractArray, A::AbstractArrayOrBroadcasted; init::Bool=true) =
-    mapreducedim!(_bool(f), add_sum, initarray!(r, f, add_sum, init, A), A)
+    mapreduce!(_bool(f), add_sum, r, A; update=!init)
 
 """
     sum(A::AbstractArray; dims)
@@ -1005,7 +870,7 @@ for (fname, op) in [(:sum, :add_sum), (:prod, :mul_prod),
     mapf = fname === :extrema ? :(ExtremaMap(f)) : :f
     @eval begin
         $(fname!)(f::Function, r::AbstractArray, A::AbstractArray; init::Bool=true) =
-            mapreducedim!($mapf, $(op), initarray!(r, $mapf, $(op), init, A), A)
+            mapreduce!($mapf, $op, r, A; update=!init)
         $(fname!)(r::AbstractArray, A::AbstractArray; init::Bool=true) = $(fname!)(identity, r, A; init=init)
 
         $(_fname)(A, dims; kw...)    = $(_fname)(identity, A, dims; kw...)
@@ -1066,6 +931,58 @@ function findminmax!(f, op, Rval, Rind, A::AbstractArray{T,N}) where {T,N}
     Rval, Rind
 end
 
+struct PairsArray{T,N,A} <: AbstractArray{T,N}
+    array::A
+end
+PairsArray(array::AbstractArray{T,N}) where {T, N} = PairsArray{Tuple{keytype(array),T}, N, typeof(array)}(array)
+const PairsVector{T,A} = PairsArray{T, 1, A}
+IndexStyle(::PairsVector) = IndexLinear()
+IndexStyle(::PairsArray) = IndexCartesian()
+size(P::PairsArray) = size(P.array)
+axes(P::PairsArray) = axes(P.array)
+@inline function getindex(P::PairsVector, i::Int)
+    @boundscheck checkbounds(P, i)
+    @inbounds (i, P.array[i])
+end
+@inline function getindex(P::PairsArray{<:Any,N}, I::CartesianIndex{N}) where {N}
+    @boundscheck checkbounds(P, I)
+    @inbounds (I, P.array[I])
+end
+@propagate_inbounds getindex(P::PairsVector, i::CartesianIndex{1}) = P[i.I[1]]
+@propagate_inbounds getindex(P::PairsArray{<:Any,N}, I::Vararg{Int, N}) where {N} = P[CartesianIndex(I)]
+mapreduce_similar(P::PairsArray, ::Type{T}, dims) where {T} = mapreduce_similar(P.array, T, dims)
+
+# Use an ad-hoc specialized StructArray to allow in-place AoS->SoA transform
+struct ZippedArray{T,N,Style,A,B} <: AbstractArray{T,N}
+    first::A
+    second::B
+end
+function ZippedArray(A::AbstractArray{T,N},B::AbstractArray{S,N}) where {T,S,N}
+    axes(A) == axes(B) || throw(DimensionMismatch("both arrays must have the same shape"))
+    # TODO: It'd be better if we could transform a Tuple{Int, Union{Int, Missing}} to Union{Tuple{Int,Int}, Tuple{Int, Missing}}
+    ZippedArray{Tuple{T,S},N,IndexStyle(A,B),typeof(A),typeof(B)}(A,B)
+end
+size(Z::ZippedArray) = size(Z.first)
+axes(Z::ZippedArray) = axes(Z.first)
+IndexStyle(::ZippedArray{<:Any,<:Any,Style}) where {Style} = Style
+@inline function getindex(Z::ZippedArray, I::Int...)
+    @boundscheck checkbounds(Z, I...)
+    @inbounds (Z.first[I...], Z.second[I...])
+end
+@propagate_inbounds setindex!(Z::ZippedArray{T}, v, I::Int...) where {T} = setindex!(Z, convert(T, v), I...)
+@inline function setindex!(Z::ZippedArray{T}, v::T, I::Int...) where {T}
+    @boundscheck checkbounds(Z, I...)
+    @inbounds Z.first[I...] = v[1]
+    @inbounds Z.second[I...] = v[2]
+    return Z
+end
+_unzip(Z::ZippedArray) = (Z.first, Z.second)
+_unzip(A::AbstractArray) = ([a[1] for a in A], [a[2] for a in A])
+
+_transform_pair(f) = x-> (x[1], f(x[2]))
+_transform_pair(::Type{F}) where {F} = x-> (x[1], F(x[2]))
+_transform_pair(f::typeof(identity)) = f
+
 """
     findmin!(rval, rind, A) -> (minval, index)
 
@@ -1075,9 +992,11 @@ dimensions of `rval` and `rind`, and store the results in `rval` and `rind`.
 
 $(_DOCS_ALIASING_WARNING)
 """
-function findmin!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
+findmin!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray; init::Bool=true) = findmin!(identity, rval, rind, A; init)
+function findmin!(f, rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
                   init::Bool=true)
-    findminmax!(identity, isgreater, init && !isempty(A) ? fill!(rval, first(A)) : rval, fill!(rind,zero(eltype(keys(A)))), A)
+    mapreduce!(_transform_pair(f), (x,y)->ifelse(isgreater(x[2], y[2]), y, x), ZippedArray(rind, rval), PairsArray(A); update=!init)
+    return (rval, rind)
 end
 
 """
@@ -1126,19 +1045,16 @@ julia> findmin(abs2, A, dims=2)
 findmin(f, A::AbstractArray; dims=:) = _findmin(f, A, dims)
 
 function _findmin(f, A, region)
-    ri = reduced_indices0(A, region)
-    if isempty(A)
-        if prod(map(length, reduced_indices(A, region))) != 0
-            throw(ArgumentError("collection slices must be non-empty"))
-        end
-        similar(A, promote_op(f, eltype(A)), ri), zeros(eltype(keys(A)), ri)
+    if f === identity
+        # Fast path with pre-allocated arrays
+        axs = reduced_indices(A, region)
+        return findmin!(identity, mapreduce_similar(A, eltype(A), axs), mapreduce_similar(A, keytype(A), axs), A)
     else
-        fA = f(first(A))
-        findminmax!(f, isgreater, fill!(similar(A, _findminmax_inittype(f, A), ri), fA),
-                    zeros(eltype(keys(A)), ri), A)
+        P = mapreduce(_transform_pair(f), (x,y)->ifelse(isgreater(x[2], y[2]), y, x), PairsArray(A); dims=region)
+        (inds, vals) = _unzip(P)
+        return (vals, inds)
     end
 end
-
 """
     findmax!(rval, rind, A) -> (maxval, index)
 
@@ -1148,9 +1064,11 @@ dimensions of `rval` and `rind`, and store the results in `rval` and `rind`.
 
 $(_DOCS_ALIASING_WARNING)
 """
-function findmax!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
+findmax!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray; init::Bool=true) = findmax!(identity, rval, rind, A; init)
+function findmax!(f, rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
                   init::Bool=true)
-    findminmax!(identity, isless, init && !isempty(A) ? fill!(rval, first(A)) : rval, fill!(rind,zero(eltype(keys(A)))), A)
+    mapreduce!(_transform_pair(f), (x,y)->ifelse(isless(x[2], y[2]), y, x), ZippedArray(rind, rval), PairsArray(A); update=!init)
+    return (rval, rind)
 end
 
 """
@@ -1197,33 +1115,17 @@ julia> findmax(abs2, A, dims=2)
 ```
 """
 findmax(f, A::AbstractArray; dims=:) = _findmax(f, A, dims)
-
 function _findmax(f, A, region)
-    ri = reduced_indices0(A, region)
-    if isempty(A)
-        if prod(map(length, reduced_indices(A, region))) != 0
-            throw(ArgumentError("collection slices must be non-empty"))
-        end
-        similar(A, promote_op(f, eltype(A)), ri), zeros(eltype(keys(A)), ri)
+    if f === identity
+        axs = reduced_indices(A, region)
+        return findmax!(identity, mapreduce_similar(A, eltype(A), axs), mapreduce_similar(A, keytype(A), axs), A)
     else
-        fA = f(first(A))
-        findminmax!(f, isless, fill!(similar(A, _findminmax_inittype(f, A), ri), fA),
-                    zeros(eltype(keys(A)), ri), A)
+        P = mapreduce(_transform_pair(f), (x,y)->ifelse(isless(x[2], y[2]), y, x), PairsArray(A); dims=region)
+        (inds, vals) = _unzip(P)
+        return (vals, inds)
     end
 end
 
-function _findminmax_inittype(f, A::AbstractArray)
-    T = _realtype(f, promote_union(eltype(A)))
-    v0 = f(first(A))
-    # First conditional: T is >: typeof(v0), so return it
-    # Second conditional: handle missing specifically, as most often, f(missing) = missing;
-    # certainly, some predicate functions return Bool, but not all.
-    # Else, return the type of the transformation.
-    Tr = v0 isa T ? T : Missing <: eltype(A) ? Union{Missing, typeof(v0)} : typeof(v0)
-end
-
-reducedim1(R, A) = length(axes1(R)) == 1
-
 """
     argmin(A; dims) -> indices
 
diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl
index e1ee16be367b2..623eb414cd6ac 100644
--- a/base/reinterpretarray.jl
+++ b/base/reinterpretarray.jl
@@ -905,43 +905,29 @@ end
 
 # Reductions with IndexSCartesian2
 
-function _mapreduce(f::F, op::OP, style::IndexSCartesian2{K}, A::AbstractArrayOrBroadcasted) where {F,OP,K}
-    inds = eachindex(style, A)
-    n = size(inds)[2]
-    if n == 0
-        return mapreduce_empty_iter(f, op, A, IteratorEltype(A))
-    else
-        return mapreduce_impl(f, op, A, first(inds), last(inds))
-    end
-end
-
-@noinline function mapreduce_impl(f::F, op::OP, A::AbstractArrayOrBroadcasted,
-                                  ifirst::SCI, ilast::SCI, blksize::Int) where {F,OP,SCI<:SCartesianIndex2{K}} where K
-    if ilast.j - ifirst.j < blksize
-        # sequential portion
-        @inbounds a1 = A[ifirst]
-        @inbounds a2 = A[SCI(2,ifirst.j)]
-        v = op(f(a1), f(a2))
-        @simd for i = ifirst.i + 2 : K
-            @inbounds ai = A[SCI(i,ifirst.j)]
+@noinline _halve_error() = throw(AssertionError("cannot split SCartesianIndices2{$K} into halves smaller than $K"))
+
+function halves(inds::SCartesianIndices2{K}) where {K}
+    inds2 = inds.indices2
+    length(inds2) == 1 && _halve_error()
+    map(SCartesianIndices2{K}, halves(inds2))
+end
+
+function mapreduce_kernel(f, op, A, init, inds::SCartesianIndices2{K}) where K
+    js = inds.indices2
+    j1 = first(js)
+    v = _mapreduce_start(f, op, A, init, @inbounds A[SCartesianIndex2{K}(1, j1)])
+    # finish the first column
+    @simd for i in 2:K
+        @inbounds ai = A[SCartesianIndex2{K}(i, j1)]
+        v = op(v, f(ai))
+    end
+    # Remaining columns
+    for j in js[begin+1:end]
+        @simd for i in 1:K
+            @inbounds ai = A[SCartesianIndex2{K}(i,j)]
             v = op(v, f(ai))
         end
-        # Remaining columns
-        for j = ifirst.j+1 : ilast.j
-            @simd for i = 1:K
-                @inbounds ai = A[SCI(i,j)]
-                v = op(v, f(ai))
-            end
-        end
-        return v
-    else
-        # pairwise portion
-        jmid = ifirst.j + (ilast.j - ifirst.j) >> 1
-        v1 = mapreduce_impl(f, op, A, ifirst, SCI(K,jmid), blksize)
-        v2 = mapreduce_impl(f, op, A, SCI(1,jmid+1), ilast, blksize)
-        return op(v1, v2)
     end
+    return v
 end
-
-mapreduce_impl(f::F, op::OP, A::AbstractArrayOrBroadcasted, ifirst::SCartesianIndex2, ilast::SCartesianIndex2) where {F,OP} =
-    mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op))
diff --git a/base/runtime_internals.jl b/base/runtime_internals.jl
index 98dd111ccbf68..d7d092f7a7c93 100644
--- a/base/runtime_internals.jl
+++ b/base/runtime_internals.jl
@@ -1833,7 +1833,13 @@ function iterate(specs::MethodSpecializations, i::Int)
     item === nothing && return nothing
     return (item, i)
 end
-length(specs::MethodSpecializations) = count(Returns(true), specs)
+function length(specs::MethodSpecializations)
+    r = 0
+    for _ in specs
+        r += 1
+    end
+    return r
+end
 
 function length(mt::Core.MethodTable)
     n = 0
diff --git a/contrib/juliac/juliac-trim-base.jl b/contrib/juliac/juliac-trim-base.jl
index 96fed77969c97..2ef2ebf083349 100644
--- a/contrib/juliac/juliac-trim-base.jl
+++ b/contrib/juliac/juliac-trim-base.jl
@@ -79,31 +79,31 @@ end
     end
     show_type_name(io::IO, tn::Core.TypeName) = print(io, tn.name)
 
-    mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted; dims=:, init=_InitialValue()) where {F, F2} =
-    _mapreduce_dim(f, op, init, A, dims)
-    mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted...; kw...) where {F, F2} =
-        reduce(op, map(f, A...); kw...)
+    # mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted; dims=:, init=_InitialValue()) where {F, F2} =
+    # _mapreduce_dim(f, op, init, A, dims)
+    # mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted...; kw...) where {F, F2} =
+    #     reduce(op, map(f, A...); kw...)
 
-    _mapreduce_dim(f::F, op::F2, nt, A::AbstractArrayOrBroadcasted, ::Colon) where {F, F2} =
-        mapfoldl_impl(f, op, nt, A)
+    # _mapreduce_dim(f::F, op::F2, nt, A::AbstractArrayOrBroadcasted, ::Colon) where {F, F2} =
+    #     mapfoldl_impl(f, op, nt, A)
 
-    _mapreduce_dim(f::F, op::F2, ::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) where {F, F2} =
-        _mapreduce(f, op, IndexStyle(A), A)
+    # _mapreduce_dim(f::F, op::F2, ::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) where {F, F2} =
+    #     _mapreduce(f, op, IndexStyle(A), A)
 
-    _mapreduce_dim(f::F, op::F2, nt, A::AbstractArrayOrBroadcasted, dims) where {F, F2} =
-        mapreducedim!(f, op, reducedim_initarray(A, dims, nt), A)
+    # _mapreduce_dim(f::F, op::F2, nt, A::AbstractArrayOrBroadcasted, dims) where {F, F2} =
+    #     mapreducedim!(f, op, reducedim_initarray(A, dims, nt), A)
 
-    _mapreduce_dim(f::F, op::F2, ::_InitialValue, A::AbstractArrayOrBroadcasted, dims) where {F,F2} =
-        mapreducedim!(f, op, reducedim_init(f, op, A, dims), A)
+    # _mapreduce_dim(f::F, op::F2, ::_InitialValue, A::AbstractArrayOrBroadcasted, dims) where {F,F2} =
+    #     mapreducedim!(f, op, reducedim_init(f, op, A, dims), A)
 
-    mapreduce_empty_iter(f::F, op::F2, itr, ItrEltype) where {F, F2} =
-        reduce_empty_iter(MappingRF(f, op), itr, ItrEltype)
-        mapreduce_first(f::F, op::F2, x) where {F,F2} = reduce_first(op, f(x))
+    # mapreduce_empty_iter(f::F, op::F2, itr, ItrEltype) where {F, F2} =
+    #     reduce_empty_iter(MappingRF(f, op), itr, ItrEltype)
+    #     mapreduce_first(f::F, op::F2, x) where {F,F2} = reduce_first(op, f(x))
 
-    _mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted) where {F,F2} = _mapreduce(f, op, IndexStyle(A), A)
-    mapreduce_empty(::typeof(identity), op::F, T) where {F} = reduce_empty(op, T)
-    mapreduce_empty(::typeof(abs), op::F, T) where {F}     = abs(reduce_empty(op, T))
-    mapreduce_empty(::typeof(abs2), op::F, T) where {F}    = abs2(reduce_empty(op, T))
+    # _mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted) where {F,F2} = _mapreduce(f, op, IndexStyle(A), A)
+    # mapreduce_empty(::typeof(identity), op::F, T) where {F} = reduce_empty(op, T)
+    # mapreduce_empty(::typeof(abs), op::F, T) where {F}     = abs(reduce_empty(op, T))
+    # mapreduce_empty(::typeof(abs2), op::F, T) where {F}    = abs2(reduce_empty(op, T))
 end
 @eval Base.Sys begin
     __init_build() = nothing
diff --git a/deps/checksums/LinearAlgebra-5c6d351459459d2032443973bc28c4f346cc094c.tar.gz/md5 b/deps/checksums/LinearAlgebra-5c6d351459459d2032443973bc28c4f346cc094c.tar.gz/md5
new file mode 100644
index 0000000000000..8eb75fdb8c94b
--- /dev/null
+++ b/deps/checksums/LinearAlgebra-5c6d351459459d2032443973bc28c4f346cc094c.tar.gz/md5
@@ -0,0 +1 @@
+c02013c82a3c13c8ccc603b6f4419b85
diff --git a/deps/checksums/LinearAlgebra-5c6d351459459d2032443973bc28c4f346cc094c.tar.gz/sha512 b/deps/checksums/LinearAlgebra-5c6d351459459d2032443973bc28c4f346cc094c.tar.gz/sha512
new file mode 100644
index 0000000000000..ed3452cbcb6e1
--- /dev/null
+++ b/deps/checksums/LinearAlgebra-5c6d351459459d2032443973bc28c4f346cc094c.tar.gz/sha512
@@ -0,0 +1 @@
+10d131c4f42fc69c556f54d2ef7c1d954d61a3d940141cbb48e3fbfbcde7628a3a0e0ee1bb069586048913d2b8e141dbe87347e46922b09a400d898beed4b35e
diff --git a/deps/checksums/LinearAlgebra-e70953e599be1d43ead342c0f56b44f008900be8.tar.gz/md5 b/deps/checksums/LinearAlgebra-e70953e599be1d43ead342c0f56b44f008900be8.tar.gz/md5
new file mode 100644
index 0000000000000..73207af2d33f4
--- /dev/null
+++ b/deps/checksums/LinearAlgebra-e70953e599be1d43ead342c0f56b44f008900be8.tar.gz/md5
@@ -0,0 +1 @@
+443d2498a80ad374894811c0b2d5e6bf
diff --git a/deps/checksums/LinearAlgebra-e70953e599be1d43ead342c0f56b44f008900be8.tar.gz/sha512 b/deps/checksums/LinearAlgebra-e70953e599be1d43ead342c0f56b44f008900be8.tar.gz/sha512
new file mode 100644
index 0000000000000..c253cdae5d70a
--- /dev/null
+++ b/deps/checksums/LinearAlgebra-e70953e599be1d43ead342c0f56b44f008900be8.tar.gz/sha512
@@ -0,0 +1 @@
+09077d9524aba81f3b2639843ce52153fa575faf40c2e05faca28a36350653c980617959f38f329bd4763f3def6821e619ddaf0050642be868b7c83883da53b1
diff --git a/deps/checksums/SparseArrays-1b37c433f659f6187fa6c41037e082887b6f7387.tar.gz/md5 b/deps/checksums/SparseArrays-1b37c433f659f6187fa6c41037e082887b6f7387.tar.gz/md5
new file mode 100644
index 0000000000000..b2b324144751a
--- /dev/null
+++ b/deps/checksums/SparseArrays-1b37c433f659f6187fa6c41037e082887b6f7387.tar.gz/md5
@@ -0,0 +1 @@
+04f0d741c51ca4322cba9d8acbd099e3
diff --git a/deps/checksums/SparseArrays-1b37c433f659f6187fa6c41037e082887b6f7387.tar.gz/sha512 b/deps/checksums/SparseArrays-1b37c433f659f6187fa6c41037e082887b6f7387.tar.gz/sha512
new file mode 100644
index 0000000000000..e3d5002842415
--- /dev/null
+++ b/deps/checksums/SparseArrays-1b37c433f659f6187fa6c41037e082887b6f7387.tar.gz/sha512
@@ -0,0 +1 @@
+8eadeed9dbe51a9f5cc7c81ff2bb19240118c57644a4dd5c448b35f84d214cb851162d2cb75bad3687becc8324e4fa6dbc3d0ff0ae3300df218ce11ccb4ae09d
diff --git a/deps/checksums/SparseArrays-975f04a739ddbe9e8dc04ed46f3227da0aa912e8.tar.gz/md5 b/deps/checksums/SparseArrays-975f04a739ddbe9e8dc04ed46f3227da0aa912e8.tar.gz/md5
new file mode 100644
index 0000000000000..cd5efe15436be
--- /dev/null
+++ b/deps/checksums/SparseArrays-975f04a739ddbe9e8dc04ed46f3227da0aa912e8.tar.gz/md5
@@ -0,0 +1 @@
+1fe453902519d84138fef290da911c64
diff --git a/deps/checksums/SparseArrays-975f04a739ddbe9e8dc04ed46f3227da0aa912e8.tar.gz/sha512 b/deps/checksums/SparseArrays-975f04a739ddbe9e8dc04ed46f3227da0aa912e8.tar.gz/sha512
new file mode 100644
index 0000000000000..9ac8d534f5af4
--- /dev/null
+++ b/deps/checksums/SparseArrays-975f04a739ddbe9e8dc04ed46f3227da0aa912e8.tar.gz/sha512
@@ -0,0 +1 @@
+e592a2a9e967b765b51a282d1a57b88e1891ec4c892cae8a5bdc4a576a228f56fd93c5da9aedd0ba002aa33c4772b25f6bfc786c241a5c552b7290d86e1720a5
diff --git a/deps/checksums/SparseArrays-ac0cc6e8278bb614dc2a16e9353fa092dae6606a.tar.gz/md5 b/deps/checksums/SparseArrays-ac0cc6e8278bb614dc2a16e9353fa092dae6606a.tar.gz/md5
new file mode 100644
index 0000000000000..edb55470f2e63
--- /dev/null
+++ b/deps/checksums/SparseArrays-ac0cc6e8278bb614dc2a16e9353fa092dae6606a.tar.gz/md5
@@ -0,0 +1 @@
+edafbcbfc29cb7a86aef4a92bf7e7267
diff --git a/deps/checksums/SparseArrays-ac0cc6e8278bb614dc2a16e9353fa092dae6606a.tar.gz/sha512 b/deps/checksums/SparseArrays-ac0cc6e8278bb614dc2a16e9353fa092dae6606a.tar.gz/sha512
new file mode 100644
index 0000000000000..f57ee756ca6fc
--- /dev/null
+++ b/deps/checksums/SparseArrays-ac0cc6e8278bb614dc2a16e9353fa092dae6606a.tar.gz/sha512
@@ -0,0 +1 @@
+230151395d43b005e1285f5938a5800650314dc562e4cc6363775b54b45b33076a551276e45a977a0ea3b07ca0088a2ea27def63196f435ff1d6153b93999485
diff --git a/deps/checksums/Statistics-c674ab823526d05e7a85b468065089bf563823aa.tar.gz/md5 b/deps/checksums/Statistics-c674ab823526d05e7a85b468065089bf563823aa.tar.gz/md5
new file mode 100644
index 0000000000000..145e567ddf5d4
--- /dev/null
+++ b/deps/checksums/Statistics-c674ab823526d05e7a85b468065089bf563823aa.tar.gz/md5
@@ -0,0 +1 @@
+d0a92cb724c9861390066b2262e771fc
diff --git a/deps/checksums/Statistics-c674ab823526d05e7a85b468065089bf563823aa.tar.gz/sha512 b/deps/checksums/Statistics-c674ab823526d05e7a85b468065089bf563823aa.tar.gz/sha512
new file mode 100644
index 0000000000000..58ac4fd864ed9
--- /dev/null
+++ b/deps/checksums/Statistics-c674ab823526d05e7a85b468065089bf563823aa.tar.gz/sha512
@@ -0,0 +1 @@
+b723403585a6a6fff94b4f1ebff0172937723507dc5a76756bd5788d6f13be465847079b28eea09a8b5896d49b0e7584e1a122049c87c4e237402a16eba5c943
diff --git a/doc/src/manual/arrays.md b/doc/src/manual/arrays.md
index 6fa27ba75c90f..834ac2031cfab 100644
--- a/doc/src/manual/arrays.md
+++ b/doc/src/manual/arrays.md
@@ -407,7 +407,7 @@ sums a series without allocating memory:
 
 ```jldoctest
 julia> sum(1/n^2 for n=1:1000)
-1.6439345666815615
+1.6439345666815603
 ```
 
 When writing a generator expression with multiple dimensions inside an argument list, parentheses
diff --git a/doc/src/manual/interfaces.md b/doc/src/manual/interfaces.md
index e752448f14a25..4e6d35c388913 100644
--- a/doc/src/manual/interfaces.md
+++ b/doc/src/manual/interfaces.md
@@ -61,10 +61,12 @@ julia> struct Squares
        end
 
 julia> Base.iterate(S::Squares, state=1) = state > S.count ? nothing : (state*state, state+1)
+
+julia> Base.length(S::Squares) = S.count
 ```
 
-With only [`iterate`](@ref) definition, the `Squares` type is already pretty powerful.
-We can iterate over all the elements:
+With just these [`iterate`](@ref) and [`length](@ref) definitions, the `Squares` type is already
+pretty powerful. We can iterate over all the elements:
 
 ```jldoctest squaretype
 julia> for item in Squares(7)
@@ -90,16 +92,13 @@ julia> sum(Squares(100))
 338350
 ```
 
-There are a few more methods we can extend to give Julia more information about this iterable
+There are other methods we can extend to give Julia more information about this iterable
 collection. We know that the elements in a `Squares` sequence will always be `Int`. By extending
 the [`eltype`](@ref) method, we can give that information to Julia and help it make more specialized
-code in the more complicated methods. We also know the number of elements in our sequence, so
-we can extend [`length`](@ref), too:
+code in the more complicated methods:
 
 ```jldoctest squaretype
 julia> Base.eltype(::Type{Squares}) = Int # Note that this is defined for the type
-
-julia> Base.length(S::Squares) = S.count
 ```
 
 Now, when we ask Julia to [`collect`](@ref) all the elements into an array it can preallocate a `Vector{Int}`
diff --git a/stdlib/LinearAlgebra.version b/stdlib/LinearAlgebra.version
index 5136a508fca27..ff2ced8da6765 100644
--- a/stdlib/LinearAlgebra.version
+++ b/stdlib/LinearAlgebra.version
@@ -1,4 +1,4 @@
-LINEARALGEBRA_BRANCH = master
-LINEARALGEBRA_SHA1 = 3e4d569804bdc8eb891ff1982eb9e90246656d0c
-LINEARALGEBRA_GIT_URL := https://github.com/JuliaLang/LinearAlgebra.jl.git
-LINEARALGEBRA_TAR_URL = https://api.github.com/repos/JuliaLang/LinearAlgebra.jl/tarball/$1
+LINEARALGEBRA_BRANCH = mb/ReduceReuse♻
+LINEARALGEBRA_SHA1 = 5c6d351459459d2032443973bc28c4f346cc094c
+LINEARALGEBRA_GIT_URL := https://github.com/mbauman/LinearAlgebra.jl.git
+LINEARALGEBRA_TAR_URL = https://api.github.com/repos/mbauman/LinearAlgebra.jl/tarball/$1
diff --git a/stdlib/SparseArrays.version b/stdlib/SparseArrays.version
index 9230b5c3e6d94..a5493b8c48b3e 100644
--- a/stdlib/SparseArrays.version
+++ b/stdlib/SparseArrays.version
@@ -1,4 +1,4 @@
-SPARSEARRAYS_BRANCH = main
-SPARSEARRAYS_SHA1 = 6d072a81fca5f4394f88a012f4ce914c70769303
-SPARSEARRAYS_GIT_URL := https://github.com/JuliaSparse/SparseArrays.jl.git
-SPARSEARRAYS_TAR_URL = https://api.github.com/repos/JuliaSparse/SparseArrays.jl/tarball/$1
+SPARSEARRAYS_BRANCH = mb/ReduceReuse♻
+SPARSEARRAYS_SHA1 = 1b37c433f659f6187fa6c41037e082887b6f7387
+SPARSEARRAYS_GIT_URL := https://github.com/mbauman/SparseArrays.jl.git
+SPARSEARRAYS_TAR_URL = https://api.github.com/repos/mbauman/SparseArrays.jl/tarball/$1
diff --git a/stdlib/Statistics.version b/stdlib/Statistics.version
index e6f0a62b3cec5..a435760ee57aa 100644
--- a/stdlib/Statistics.version
+++ b/stdlib/Statistics.version
@@ -1,4 +1,4 @@
-STATISTICS_BRANCH = master
-STATISTICS_SHA1 = 77bd5707f143eb624721a7df28ddef470e70ecef
-STATISTICS_GIT_URL := https://github.com/JuliaStats/Statistics.jl.git
-STATISTICS_TAR_URL = https://api.github.com/repos/JuliaStats/Statistics.jl/tarball/$1
+STATISTICS_BRANCH = mb/reduce-reuse-recycle
+STATISTICS_SHA1 = c674ab823526d05e7a85b468065089bf563823aa
+STATISTICS_GIT_URL := https://github.com/mbauman/Statistics.jl.git
+STATISTICS_TAR_URL = https://api.github.com/repos/mbauman/Statistics.jl/tarball/$1
diff --git a/test/broadcast.jl b/test/broadcast.jl
index a751d9d381ce6..abb1c300051db 100644
--- a/test/broadcast.jl
+++ b/test/broadcast.jl
@@ -998,14 +998,10 @@ end
     # Now let's try it with `Broadcasted`:
     bcraw = Broadcast.broadcasted(identity, xs)
     bc = Broadcast.instantiate(bcraw)
-    # If `Broadcasted` has `IndexLinear` style, it should hit the
-    # `reduce` branch:
     @test IndexStyle(bc) == IndexLinear()
     @test reduce(paren, bc) == reduce(paren, xs)
-    # If `Broadcasted` does not have `IndexLinear` style, it should
-    # hit the `foldl` branch:
     @test IndexStyle(bcraw) == IndexCartesian()
-    @test reduce(paren, bcraw) == foldl(paren, xs)
+    @test reduce(paren, bcraw) == reduce(paren, xs)
 
     # issue #41055
     bc = Broadcast.instantiate(Broadcast.broadcasted(Base.literal_pow, Ref(^), [1,2], Ref(Val(2))))
diff --git a/test/error.jl b/test/error.jl
index 0d8047aa92a44..0f7c0aaa7f927 100644
--- a/test/error.jl
+++ b/test/error.jl
@@ -110,7 +110,8 @@ end
             push!(visited, mod)
             for name in names(mod, all=true)
                 isdefinedglobal(mod, name) || continue
-                value = getglobal(mod, name)
+                Base.isdeprecated(mod, name) && continue
+                value = getfield(mod, name)
                 if value isa Module
                     value === Main && continue
                     test_exceptions(value, visited)
diff --git a/test/offsetarray.jl b/test/offsetarray.jl
index 8e2ee33c49ed6..003d51447f547 100644
--- a/test/offsetarray.jl
+++ b/test/offsetarray.jl
@@ -925,3 +925,11 @@ end
     @test parent(B) == A0
     @test axes(B) == Base.IdentityUnitRange.((-10:-9, 9:10))
 end
+
+# issue #38660
+@testset "`findmin/max` for OffsetArray" begin
+    ov = OffsetVector([-1, 1], 0:1)
+    @test @inferred(findmin(ov; dims = 1)) .|> first == (-1, 0)
+    ov = OffsetVector([-1, 1], -1:0)
+    @test @inferred(findmax(ov; dims = 1)) .|> first == (1, 0)
+end
diff --git a/test/reduce.jl b/test/reduce.jl
index a1d66a03e2c42..260fd8dc82099 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -44,6 +44,8 @@ end
 let x = rand(10)
     @test 0 == @allocated(sum(Iterators.reverse(x)))
     @test 0 == @allocated(foldr(-, x))
+    @test 0 == @allocated(sum(x))
+    @test 0 == @allocated(sum(log, x))
 end
 
 # reduce
@@ -772,6 +774,58 @@ end
     end |> only == Val{true}
 end
 
+@testset "type stability of internals for mean (etc); Statistics#160" begin
+    @test (@inferred Missing mapreduce(identity, (x,y)->Base.add_sum(x, y)/1, view([1.0,2.0,missing],1:2), dims=:)) == 3.0
+    @test (@inferred Missing mapreduce(identity, (x,y)->Base.add_sum(x, y)/1, view([1,2,missing],1:2), dims=:, init=0.0)) == 3.0
+    @test (@inferred Union{Missing,Int} mapreduce(identity, (x,y)->Base.add_sum(x, y)/1, view([1,2,missing],1:2), dims=:, init=0)) == 3.0
+    @test_broken (@inferred Union{Missing,Int} mapreduce(identity, (x,y)->Base.add_sum(x, y)/1, view([1,2,missing],1:2), dims=:)) == 3.0
+
+    @test (@inferred Missing reduce((x,y)->Base.add_sum(x, y)/1, view([1.0,2.0,missing],1:2), dims=:)) == 3.0
+    @test (@inferred Missing reduce((x,y)->Base.add_sum(x, y)/1, view([1,2,missing],1:2), dims=:, init=0.0)) == 3.0
+    @test (@inferred Union{Missing,Int} reduce((x,y)->Base.add_sum(x, y)/1, view([1,2,missing],1:2), dims=:, init=0)) == 3.0
+    @test_broken (@inferred Union{Missing,Int} reduce((x,y)->Base.add_sum(x, y)/1, view([1,2,missing],1:2), dims=:)) == 3.0
+
+    @test (@inferred Missing mapreduce(identity, (x,y)->Base.add_sum(x, y)/1, skipmissing(view([1.0,2.0,missing],1:2)))) == 3.0
+    @test (@inferred Missing mapreduce(identity, (x,y)->Base.add_sum(x, y)/1, skipmissing(view([1,2,missing],1:2)), init=0.0)) == 3.0
+    @test (@inferred Int mapreduce(identity, (x,y)->Base.add_sum(x, y)/1, skipmissing(view([1,2,missing],1:2)), init=0)) == 3.0
+    @test (@inferred Int mapreduce(identity, (x,y)->Base.add_sum(x, y)/1, skipmissing(view([1,2,missing],1:2)))) == 3.0
+
+    @test (@inferred Missing reduce((x,y)->Base.add_sum(x, y)/1, skipmissing(view([1.0,2.0,missing],1:2)))) == 3.0
+    @test (@inferred Missing reduce((x,y)->Base.add_sum(x, y)/1, skipmissing(view([1,2,missing],1:2)), init=0.0)) == 3.0
+    @test (@inferred Int reduce((x,y)->Base.add_sum(x, y)/1, skipmissing(view([1,2,missing],1:2)), init=0)) == 3.0
+    @test (@inferred Int reduce((x,y)->Base.add_sum(x, y)/1, skipmissing(view([1,2,missing],1:2)))) == 3.0
+end
+
+linear(x) = (@assert(IndexStyle(x) == IndexLinear()); x)
+cartesian(x) = (y = view(x, axes(x)...); @assert(IndexStyle(y) isa IndexCartesian); y)
+iterlen(x) = (y = Iterators.take(x, length(x)); @assert(Base.IteratorSize(y) isa Union{Base.HasLength, Base.HasShape}); y)
+iterunknown(x) = (y = skipmissing(convert(Array{Union{Missing, eltype(x)}}, x)); @assert(Base.IteratorSize(y) isa Base.SizeUnknown); y)
+# Issues #29883, #52365, #30421, #52457
+@testset "all floating point reductions are pairwise" begin
+    ulp = nextfloat(1f0)-1f0
+    sz = (100000,10)
+    len = prod(sz)
+    arr = fill(0.1f0, sz)
+    @test foldl(+, arr) === 100958.34f0 # Should not SIMD, should be exact
+    @test !(foldl(+, arr) ≈ len/10)
+    @test !isapprox(foldl(+, arr), len/10; rtol=100ulp)
+    @test sum(arr) ≈ len/10
+    @test sum(arr) ≈ len/10 rtol=100ulp
+    for f in (linear, cartesian, iterlen, iterunknown)
+        A = f(arr)
+        @test sum(A) ≈ (len/10) rtol=100ulp
+        @test sum(A; init=0.0) ≈ (len/10) rtol=100ulp
+        if A isa AbstractArray
+            @test only(unique(sum(A; dims=1))) ≈ (sz[1]/10) rtol=100ulp
+            @test only(unique(sum(permutedims(A); dims=2))) ≈ (sz[1]/10) rtol=100ulp
+            @test only(unique(sum(A; dims=1, init=0.0))) ≈ (sz[1]/10) rtol=100ulp
+            @test only(unique(sum(permutedims(A); dims=2, init=0.0))) ≈ (sz[1]/10) rtol=100ulp
+        end
+    end
+
+    @test sum((rand(Float32) for _ in 1:100000000))/100000000 ≈ 0.5
+end
+
 # `reduce(vcat, A)` should not alias the input for length-1 collections
 let A=[1;;]
     @test reduce(vcat, Any[A]) !== A
diff --git a/test/reducedim.jl b/test/reducedim.jl
index 1664b2708d7e3..8193522669442 100644
--- a/test/reducedim.jl
+++ b/test/reducedim.jl
@@ -1,6 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Random
+isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
+using .Main.OffsetArrays: OffsetVector, OffsetArray
 
 # main tests
 
@@ -173,7 +175,7 @@ end
 
     x = sum(Real[1.0], dims=1)
     @test x == [1.0]
-    @test x isa Vector{Real}
+    @test x isa Vector{<:Real}
 
     x = mapreduce(cos, +, Union{Int,Missing}[1, 2], dims=1)
     @test x == mapreduce(cos, +, [1, 2], dims=1)
@@ -207,23 +209,34 @@ end
     @test isequal(prod(A, dims=(1, 2)), fill(1, 1, 1))
     @test isequal(prod(A, dims=3), fill(1, 0, 1))
 
-    for f in (minimum, maximum)
+    for f in (minimum, maximum, findmin, findmax)
         @test_throws "reducing over an empty collection is not allowed" f(A, dims=1)
-        @test isequal(f(A, dims=2), zeros(Int, 0, 1))
+        @test_throws "reducing over an empty collection is not allowed" f(A, dims=2)
         @test_throws "reducing over an empty collection is not allowed" f(A, dims=(1, 2))
-        @test isequal(f(A, dims=3), zeros(Int, 0, 1))
-    end
-    for f in (findmin, findmax)
-        @test_throws ArgumentError f(A, dims=1)
-        @test isequal(f(A, dims=2), (zeros(Int, 0, 1), zeros(Int, 0, 1)))
-        @test_throws ArgumentError f(A, dims=(1, 2))
-        @test isequal(f(A, dims=3), (zeros(Int, 0, 1), zeros(Int, 0, 1)))
-        @test_throws ArgumentError f(abs2, A, dims=1)
-        @test isequal(f(abs2, A, dims=2), (zeros(Int, 0, 1), zeros(Int, 0, 1)))
-        @test_throws ArgumentError f(abs2, A, dims=(1, 2))
-        @test isequal(f(abs2, A, dims=3), (zeros(Int, 0, 1), zeros(Int, 0, 1)))
+        @test_throws "reducing over an empty collection is not allowed" f(A, dims=3)
+        if f === maximum
+            # maximum allows some empty reductions with abs/abs2
+            z = f(abs, A)
+            @test isequal(f(abs, A, dims=1), fill(z, (1,1)))
+            @test isequal(f(abs, A, dims=2), fill(z, (0,1)))
+            @test isequal(f(abs, A, dims=(1,2)), fill(z, (1,1)))
+            @test isequal(f(abs, A, dims=3), fill(z, (0,1)))
+            z = f(abs2, A)
+            @test isequal(f(abs2, A, dims=1), fill(z, (1,1)))
+            @test isequal(f(abs2, A, dims=2), fill(z, (0,1)))
+            @test isequal(f(abs2, A, dims=(1,2)), fill(z, (1,1)))
+            @test isequal(f(abs2, A, dims=3), fill(z, (0,1)))
+        else
+            @test_throws "reducing over an empty collection is not allowed" f(abs, A, dims=1)
+            @test_throws "reducing over an empty collection is not allowed" f(abs, A, dims=2)
+            @test_throws "reducing over an empty collection is not allowed" f(abs, A, dims=(1, 2))
+            @test_throws "reducing over an empty collection is not allowed" f(abs, A, dims=3)
+            @test_throws "reducing over an empty collection is not allowed" f(abs2, A, dims=1)
+            @test_throws "reducing over an empty collection is not allowed" f(abs2, A, dims=2)
+            @test_throws "reducing over an empty collection is not allowed" f(abs2, A, dims=(1, 2))
+            @test_throws "reducing over an empty collection is not allowed" f(abs2, A, dims=3)
+        end
     end
-
 end
 
 ## findmin/findmax/minimum/maximum
@@ -294,33 +307,48 @@ end
     end
 end
 
+struct WithIteratorsKeys{T,N} <: AbstractArray{T,N}
+    data::Array{T,N}
+end
+Base.size(a::WithIteratorsKeys) = size(a.data)
+Base.getindex(a::WithIteratorsKeys, inds...) = a.data[inds...]
+struct IteratorsKeys{T,A<:AbstractArray{T}}
+    iter::A
+    IteratorsKeys(iter) = new{eltype(iter),typeof(iter)}(iter)
+end
+Base.iterate(a::IteratorsKeys, state...) = Base.iterate(a.iter, state...)
+Base.keys(a::WithIteratorsKeys) = IteratorsKeys(keys(a.data))
+Base.eltype(::IteratorsKeys{T}) where {T} = T
+
 # findmin/findmax function arguments: output type inference
 @testset "findmin/findmax output type inference" begin
-    A = ["1" "22"; "333" "4444"]
-    for (tup, rval, rind) in [((1,), [1 2], [CartesianIndex(1, 1) CartesianIndex(1, 2)]),
-                              ((2,), reshape([1, 3], 2, 1), reshape([CartesianIndex(1, 1), CartesianIndex(2, 1)], 2, 1)),
-                              ((1,2), fill(1,1,1), fill(CartesianIndex(1,1),1,1))]
-        rval′, rind′ = findmin(length, A, dims=tup)
-        @test (rval, rind) == (rval′, rind′)
-        @test typeof(rval′) == Matrix{Int}
-    end
-    for (tup, rval, rind) in [((1,), [3 4], [CartesianIndex(2, 1) CartesianIndex(2, 2)]),
-                              ((2,), reshape([2, 4], 2, 1), reshape([CartesianIndex(1, 2), CartesianIndex(2, 2)], 2, 1)),
-                              ((1,2), fill(4,1,1), fill(CartesianIndex(2,2),1,1))]
-        rval′, rind′ = findmax(length, A, dims=tup)
-        @test (rval, rind) == (rval′, rind′)
-        @test typeof(rval) == Matrix{Int}
-    end
-    B = [1.5 1.0; 5.5 6.0]
-    for (tup, rval, rind) in [((1,), [3//2 1//1], [CartesianIndex(1, 1) CartesianIndex(1, 2)]),
-                              ((2,), reshape([1//1, 11//2], 2, 1), reshape([CartesianIndex(1, 2), CartesianIndex(2, 1)], 2, 1)),
-                              ((1,2), fill(1//1,1,1), fill(CartesianIndex(1,2),1,1))]
-        rval′, rind′ = findmin(Rational, B, dims=tup)
-        @test (rval, rind) == (rval′, rind′)
-        @test typeof(rval) == Matrix{Rational{Int}}
-        rval′, rind′ = findmin(Rational ∘ abs ∘ complex, B, dims=tup)
-        @test (rval, rind) == (rval′, rind′)
-        @test typeof(rval) == Matrix{Rational{Int}}
+    for wrapper in (identity, WithIteratorsKeys)
+        A = wrapper(["1" "22"; "333" "4444"])
+        for (tup, rval, rind) in [((1,), [1 2], [CartesianIndex(1, 1) CartesianIndex(1, 2)]),
+                                ((2,), reshape([1, 3], 2, 1), reshape([CartesianIndex(1, 1), CartesianIndex(2, 1)], 2, 1)),
+                                ((1,2), fill(1,1,1), fill(CartesianIndex(1,1),1,1))]
+            rval′, rind′ = @inferred findmin(length, A, dims=tup)
+            @test (rval, rind) == (rval′, rind′)
+            @test typeof(rval′) == Matrix{Int}
+        end
+        for (tup, rval, rind) in [((1,), [3 4], [CartesianIndex(2, 1) CartesianIndex(2, 2)]),
+                                ((2,), reshape([2, 4], 2, 1), reshape([CartesianIndex(1, 2), CartesianIndex(2, 2)], 2, 1)),
+                                ((1,2), fill(4,1,1), fill(CartesianIndex(2,2),1,1))]
+            rval′, rind′ = @inferred findmax(length, A, dims=tup)
+            @test (rval, rind) == (rval′, rind′)
+            @test typeof(rval) == Matrix{Int}
+        end
+        B = wrapper([1.5 1.0; 5.5 6.0])
+        for (tup, rval, rind) in [((1,), [3//2 1//1], [CartesianIndex(1, 1) CartesianIndex(1, 2)]),
+                                ((2,), reshape([1//1, 11//2], 2, 1), reshape([CartesianIndex(1, 2), CartesianIndex(2, 1)], 2, 1)),
+                                ((1,2), fill(1//1,1,1), fill(CartesianIndex(1,2),1,1))]
+            rval′, rind′ = @inferred findmin(Rational, B, dims=tup)
+            @test (rval, rind) == (rval′, rind′)
+            @test typeof(rval) == Matrix{Rational{Int}}
+            rval′, rind′ = @inferred findmin(Rational ∘ abs ∘ complex, B, dims=tup)
+            @test (rval, rind) == (rval′, rind′)
+            @test typeof(rval) == Matrix{Rational{Int}}
+        end
     end
 end
 
@@ -541,6 +569,14 @@ end
     end
 end
 
+f44906(x) = maximum(f44906, x); f44906(x::Number) = x
+g44906(x) = mapreduce(g44906, max, x); g44906(x::Number) = x
+@testset "inference with minimum/maximum, issue #44906" begin
+    x = [[1, 2], [3, 4]];
+    @test @inferred(f44906(x)) == 4
+    @test @inferred(g44906(x)) == 4
+end
+
 # issue #6672
 @test sum(Real[1 2 3; 4 5.3 7.1], dims=2) == reshape([6, 16.4], 2, 1)
 @test sum(Any[1 2;3 4], dims=1) == [4 6]
@@ -595,11 +631,7 @@ end
         @test_throws BoundsError mapreduce(f, +, arr; dims)
         @test_throws BoundsError mapreduce(f, +, arr; dims, init=0)
         @test_throws BoundsError mapreduce(identity, op, arr)
-        try
-            #=@test_throws BoundsError=# mapreduce(identity, op, arr; dims)
-        catch ex
-            @test_broken ex isa BoundsError
-        end
+        @test_throws BoundsError mapreduce(identity, op, arr; dims)
         @test_throws BoundsError mapreduce(identity, op, arr; dims, init=0)
 
         @test_throws BoundsError findmin(f, arr)
@@ -634,7 +666,7 @@ function unordered_test_for_extrema(a; dims_test = ((), 1, 2, (1,2), 3))
     for dims in dims_test
         vext = extrema(a; dims)
         vmin, vmax = minimum(a; dims), maximum(a; dims)
-        @test isequal(extrema!(copy(vext), a), vext)
+        @test isequal(extrema!(similar(vext, NTuple{2, eltype(a)}), a), vext)
         @test all(x -> isequal(x[1], x[2:3]), zip(vext,vmin,vmax))
     end
 end
@@ -720,3 +752,345 @@ end
     @test_broken @inferred(maximum(exp, A; dims = 1))[1] === missing
     @test_broken @inferred(extrema(exp, A; dims = 1))[1] === (missing, missing)
 end
+
+some_exception(op) = try return (Some(op()), nothing); catch ex; return (nothing, ex); end
+reduced_shape(sz, dims) = ntuple(d -> d in dims ? 1 : sz[d], length(sz))
+
+@testset "Ensure that calling, e.g., sum(empty; dims) has the same behavior as sum(empty)" begin
+    @testset "$r(Array{$T}(undef, $sz); dims=$dims)" for
+        r in (minimum, maximum, findmin, findmax, extrema, sum, prod, all, any, count),
+        T in (Int, Union{Missing, Int}, Number, Union{Missing, Number}, Bool, Union{Missing, Bool}, Any),
+        sz in ((0,), (0,1), (1,0), (0,0), (0,0,1), (1,0,1)),
+        dims in (1, 2, 3, 4, (1,2), (1,3), (2,3,4), (1,2,3))
+
+        A = Array{T}(undef, sz)
+        rsz = reduced_shape(sz, dims)
+
+        v, ex = some_exception() do; r(A); end
+        if isnothing(v)
+            @test_throws typeof(ex) r(A; dims)
+        else
+            actual = fill(something(v), rsz)
+            @test isequal(r(A; dims), actual)
+            @test eltype(r(A; dims)) === eltype(actual)
+        end
+
+        for f in (identity, abs, abs2)
+            v, ex = some_exception() do; r(f, A); end
+            if isnothing(v)
+                @test_throws typeof(ex) r(f, A; dims)
+            else
+                actual = fill(something(v), rsz)
+                @test isequal(r(f, A; dims), actual)
+                @test eltype(r(f, A; dims)) === eltype(actual)
+            end
+        end
+    end
+end
+
+some_exception(op) = try return (Some(op()), nothing); catch ex; return (nothing, ex); end
+reduced_shape(sz, dims) = ntuple(d -> d in dims ? 1 : sz[d], length(sz))
+
+@testset "Ensure that calling, e.g., sum(empty; dims) has the same behavior as sum(empty)" begin
+    @testset "$r(Array{$T}(undef, $sz); dims=$dims)" for
+        r in (minimum, maximum, findmin, findmax, extrema, sum, prod, all, any, count),
+        T in (Int, Union{Missing, Int}, Number, Union{Missing, Number}, Bool, Union{Missing, Bool}, Any),
+        sz in ((0,), (0,1), (1,0), (0,0), (0,0,1), (1,0,1)),
+        dims in (1, 2, 3, 4, (1,2), (1,3), (2,3,4), (1,2,3))
+
+        A = Array{T}(undef, sz)
+        rsz = reduced_shape(sz, dims)
+
+        v, ex = some_exception() do; r(A); end
+        if isnothing(v)
+            @test_throws typeof(ex) r(A; dims)
+        else
+            actual = fill(something(v), rsz)
+            @test isequal(r(A; dims), actual)
+            @test eltype(r(A; dims)) === eltype(actual)
+        end
+
+        for f in (identity, abs, abs2)
+            v, ex = some_exception() do; r(f, A); end
+            if isnothing(v)
+                @test_throws typeof(ex) r(f, A; dims)
+            else
+                actual = fill(something(v), rsz)
+                @test isequal(r(f, A; dims), actual)
+                @test eltype(r(f, A; dims)) === eltype(actual)
+            end
+        end
+    end
+end
+
+@testset "generic sum reductions; issue #31427" begin
+    add31427(a, b) = a+b
+    A = rand(1:10, 5, 5)
+    @test reduce(add31427, A, dims = (1, 2)) == reduce(+, A, dims = (1, 2)) == [sum(A);;]
+
+    As = [rand(5, 4) for _ in 1:2, _ in 1:3]
+    @test reduce(hcat, reduce(vcat, As, dims=1)) == [As[1,1] As[1,2] As[1,3]; As[2,1] As[2,2] As[2,3]]
+end
+
+@testset "sum with `missing`s; issue #55213 and #32366" begin
+    @test isequal(sum([0.0 1; 0.0 missing], dims=2), [1.0; missing;;])
+    @test sum([0.0 1; 0.0 missing], dims=2)[1] === 1.0
+    @test isequal(sum(Any[0.0 1; 0.0 missing], dims=2), [1.0; missing;;])
+    @test sum(Any[0.0 1; 0.0 missing], dims=2)[1] === 1.0
+
+    @test isequal(sum([1 0.0; 0.0 missing], dims=1), [1.0 missing])
+    @test sum([1 0.0; 0.0 missing], dims=1)[1] === 1.0
+    @test isequal(sum(Any[1 0.0; 0.0 missing], dims=1), [1.0 missing])
+    @test sum(Any[1 0.0; 0.0 missing], dims=1)[1] === 1.0
+
+    @test isequal(sum([true false missing], dims=2), sum([1 0 missing], dims=2))
+    @test isequal(sum([true false missing], dims=2), [sum([true false missing]);;])
+    @test isequal(sum([true false missing], dims=2), [missing;;])
+end
+
+@testset "issues #45566 and #47231; initializers" begin
+    @test reduce(gcd, [1]; dims=1) == [reduce(gcd, [1])] == [1]
+
+    x = reshape(1:6, 2, 3)
+    @test reduce(+, x, dims=2) == [9;12;;]
+    @test reduce(-, x, dims=1) == [1-2 3-4 5-6]
+    @test reduce(/, x, dims=1) == [1/2 3/4 5/6]
+    @test reduce(^, x, dims=1) == [1^2 3^4 5^6]
+    # These have arbitrary associativity, but they shouldn't error
+    @test reduce(-, x, dims=2) in ([(1-3)-5; (2-4)-6;;], [1-(3-5); 2-(4-6);;])
+    @test reduce(/, x, dims=2) in ([(1/3)/5; (2/4)/6;;], [1/(3/5); 2/(4/6);;])
+    @test reduce(^, x, dims=2) in ([(1^3)^5; (2^4)^6;;], [1^(3^5); 2^(4^6);;])
+end
+
+@testset "better initializations" begin
+    @test mapreduce(_ -> pi, +, [1,2]; dims=1) == [mapreduce(_ -> pi, +, [1,2])] == [2pi]
+    @test mapreduce(_ -> pi, +, [1 2]; dims=2) == [mapreduce(_ -> pi, +, [1 2]);;] == [2pi;;]
+    @test mapreduce(_ -> pi, +, [1 2]; dims=(1,2)) == [2pi;;]
+    @test mapreduce(_ -> pi, +, [1]; dims=1) == [mapreduce(_ -> pi, +, [1])] == [pi]
+    @test mapreduce(_ -> pi, +, [1;;]; dims=2) == [mapreduce(_ -> pi, +, [1;;]);;] == [pi;;]
+    @test_throws ArgumentError mapreduce(_ -> pi, +, []; dims=1)
+    @test_throws ArgumentError mapreduce(_ -> pi, +, [;;]; dims=2)
+    @test_throws ArgumentError mapreduce(_ -> pi, +, [;;]; dims=(1,2))
+    @test_throws ArgumentError mapreduce(_ -> pi, +, [])
+    @test_throws ArgumentError mapreduce(_ -> pi, +, [;;])
+
+    @test mapreduce(x -> log(x-1), +, [2,3,4]; dims=1) == [mapreduce(x -> log(x-1), +, [2,3,4])] == [log(1) + log(2) + log(3)]
+    @test mapreduce(x -> log(x-1), +, [2,3]; dims=1) == [mapreduce(x -> log(x-1), +, [2,3])] == [log(1) + log(2)]
+    @test mapreduce(x -> log(x-1), +, [2]; dims=1) == [mapreduce(x -> log(x-1), +, [2])] == [log(1)]
+    @test mapreduce(x -> log(x-1), +, [2 3 4]; dims=2) == [mapreduce(x -> log(x-1), +, [2,3,4]);;] == [log(1) + log(2) + log(3);;]
+    @test mapreduce(x -> log(x-1), +, [2 3]; dims=2) == [mapreduce(x -> log(x-1), +, [2,3]);;] == [log(1) + log(2);;]
+    @test mapreduce(x -> log(x-1), +, [2;;]; dims=2) == [mapreduce(x -> log(x-1), +, [2]);;] == [log(1);;]
+    @test_throws ArgumentError mapreduce(x -> log(x-1), +, []; dims=1)
+    @test_throws ArgumentError mapreduce(x -> log(x-1), +, [;;]; dims=2)
+    @test_throws ArgumentError mapreduce(x -> log(x-1), +, [;;]; dims=(1,2))
+    @test_throws ArgumentError mapreduce(x -> log(x-1), +, [])
+    @test_throws ArgumentError mapreduce(x -> log(x-1), +, [;;])
+
+    @test sum(x->sqrt(x-1), ones(5); dims=1) == [sum(x->sqrt(x-1), ones(5))] == [0.0]
+    @test sum(x->sqrt(x-1), ones(1); dims=1) == [sum(x->sqrt(x-1), ones(1))] == [0.0]
+    @test_throws ArgumentError sum(x->sqrt(x-1), ones(0); dims=1)
+    @test_throws ArgumentError sum(x->sqrt(x-1), ones(0))
+end
+
+@testset "reductions on broadcasted; issue #41054" begin
+    A = clamp.(randn(3,4), -1, 1)
+    bc = Base.broadcasted(+, A, 2)
+    @test sum(bc, dims=1) ≈ sum(A .+ 2, dims=1)
+    @test mapreduce(sqrt, +, bc, dims=1) ≈ mapreduce(sqrt, +, A .+ 2, dims=1)
+
+    @test sum(bc, dims=2) ≈ sum(A .+ 2, dims=2)
+    @test mapreduce(sqrt, +, bc, dims=2) ≈ mapreduce(sqrt, +, A .+ 2, dims=2)
+
+    @test sum(bc, dims=(1,2)) ≈ [sum(A .+ 2)]
+    @test mapreduce(sqrt, +, bc, dims=(1,2)) ≈ [mapreduce(sqrt, +, A .+ 2)]
+end
+
+@testset "reductions over complex values; issue #54920" begin
+    A = Complex{Int}.(rand(Complex{Int8}, 2, 2, 2));
+    @test maximum(abs, A; dims=(1,)) == mapreduce(abs, max, A, dims=(1,)) == [maximum(abs, A[:,1,1]) maximum(abs, A[:,2,1]);;; maximum(abs, A[:,1,2]) maximum(abs, A[:,2,2])]
+    @test maximum(abs, A; dims=(2,)) == mapreduce(abs, max, A, dims=(2,)) == [maximum(abs, A[1,:,1]); maximum(abs, A[2,:,1]);;; maximum(abs, A[1,:,2]); maximum(abs, A[2,:,2])]
+    @test maximum(abs, A; dims=(1, 2)) == mapreduce(abs, max, A, dims=(1, 2)) == [maximum(abs, A[:,:,1]);;; maximum(abs, A[:,:,2])]
+    @test maximum(abs, A; dims=(1, 2, 3)) == mapreduce(abs, max, A, dims=(1, 2, 3)) == [maximum(abs, A);;;]
+end
+
+@testset "bitwise operators on integers; part of issue #45562" begin
+    @test mapreduce(identity, &, [3,3,3]; dims=1) == [mapreduce(identity, &, [3,3,3])] == [3 & 3 & 3] == [3]
+    @test mapreduce(identity, |, [3,3,3]; dims=1) == [mapreduce(identity, |, [3,3,3])] == [3 | 3 | 3] == [3]
+    @test mapreduce(identity, xor, [3,3,3]; dims=1) == [mapreduce(identity, xor, [3,3,3])] == [xor(xor(3, 3), 3)] == [3]
+
+    @test mapreduce(identity, &, [3,7,6]; dims=1) == [mapreduce(identity, &, [3,7,6])] == [3 & 7 & 6] == [2]
+    @test mapreduce(identity, |, [3,7,6]; dims=1) == [mapreduce(identity, |, [3,7,6])] == [3 | 7 | 6] == [7]
+    @test mapreduce(identity, xor, [3,7,6]; dims=1) == [mapreduce(identity, xor, [3,7,6])] == [xor(xor(3, 7), 6)] == [2]
+end
+
+@testset "indexing" begin
+    A = [1 2; 3 4]
+    B = [5 6; 7 8]
+
+    @test mapreduce(/, +, A, B) ≈ sum(A./B)
+    @test mapreduce(i -> A[i]/B[i], +, eachindex(A,B)) ≈ sum(A./B)
+    @test mapreduce(i -> A[i]/B'[i], +, eachindex(A,B')) ≈ sum(A./B')
+
+    @test mapreduce(/, +, A, B; dims=1) ≈ sum(A./B; dims=1)
+    @test mapreduce(i -> A[i]/B[i], +, reshape(eachindex(A,B), size(A)); dims=1) ≈ sum(A./B; dims=1)  # BoundsError
+    @test mapreduce(i -> A[i]/B'[i], +, eachindex(A,B'); dims=1) ≈ sum(A./B'; dims=1)  # BoundsError
+end
+
+@testset "more related to issue #26488" begin
+    @test mapreduce(x -> log10(x-1), +, [11, 101]) ≈ 3.0
+    @test mapreduce(x -> log10(x-1), *, [11, 101]) ≈ 2.0
+    @test mapreduce(x -> log10(x-1), +, [11, 101]; dims=1) ≈ [3.0]
+    @test mapreduce(x -> log10(x-1), +, [11, 101]; init=-10) ≈ -7.0
+    @test mapreduce(x -> log10(x-1), +, [11, 101]; init=-10, dims=1) ≈ [-7.0]
+
+    @test mapreduce(_ -> pi, +, [1,2]) ≈ 2pi
+    @test mapreduce(_ -> pi, *, [1,2]) ≈ pi^2
+    @test mapreduce(_ -> pi, +, [1,2]; dims=1) ≈ [2pi]
+end
+
+_add(x,y) = x+y  # this avoids typeof(+) dispatch
+
+@testset "mapreduce fast paths, dims=:, op=$op, kw=$kw" for op in (+,_add),
+                                                            kw in ((;), (; init=0.0))
+    @test_broken 0 == @allocated mapreduce(/, op, 1:3, 4:7; kw...)
+    @test_broken 0 == @allocated mapreduce(/, op, 1:3, 4:9; kw...)  # stops early
+    @test mapreduce(/, op, 1:3, 4:7; kw...) ≈ reduce(op, map(/, 1:3, 4:7); kw...)
+    @test mapreduce(/, op, 1:3, 4:9; kw...) ≈ reduce(op, map(/, 1:3, 4:9); kw...)
+
+    A = [1 2; 3 4]
+    B = [5 6; 7 8]
+
+    @test_broken 0 == @allocated mapreduce(*, op, A, B; kw...)  # LinearIndices
+    @test_broken 0 == @allocated mapreduce(*, op, A, B'; kw...)  # CartesianIndices
+
+    @test mapreduce(*, op, A, B; kw...) == reduce(op, map(*, A, B); kw...)
+    @test mapreduce(*, op, A, B'; kw...) == reduce(op, map(*, A, B'); kw...)
+
+    @test_broken 0 == @allocated mapreduce(*, op, A, 5:7; kw...)  # stops early
+    @test_broken 0 == @allocated mapreduce(*, op, 1:3, B'; kw...)  # stops early
+    @test mapreduce(*, op, A, 5:7; kw...) == reduce(op, map(*, A, 5:7); kw...)
+    @test mapreduce(*, op, 1:3, B'; kw...) == reduce(op, map(*, 1:3, B'); kw...)
+    @test mapreduce(*, op, 1:3, B', 10:20; kw...) == reduce(op, map(*, 1:3, B', 10:20); kw...)
+
+    @test_throws DimensionMismatch map(*, A, hcat(B, 9:10))  # same ndims, does not stop early
+    @test_throws DimensionMismatch mapreduce(*, op, A, hcat(B, 9:10); kw...)
+end
+
+@testset "mapreduce fast paths, dims=$dims, op=$op, kw=$kw" for dims in (1,2,[2],(1,2),3),
+                                                                op in (+,*,_add),
+                                                                kw in ((;), (; init=0.0))
+    (kw == (;) && op == _add) && continue
+
+    @test mapreduce(/, op, 1:3, 4:7; dims, kw...) ≈ reduce(op, map(/, 1:3, 4:7); dims, kw...)
+    @test mapreduce(/, op, 1:3, 4:9; dims, kw...) ≈ reduce(op, map(/, 1:3, 4:9); dims, kw...)
+
+    A = [1 2; 3 4]
+    B = [5 6; 7 8]
+    @test mapreduce(*, op, A, B; dims, kw...) == reduce(op, map(*, A, B); dims, kw...)  # LinearIndices
+    @test mapreduce(*, op, A, B'; dims, kw...) == reduce(op, map(*, A, B'); dims, kw...)  # CartesianIndices
+
+    @test_broken @allocated(mapreduce(*, op, A, B; dims, kw...)) < @allocated(reduce(op, map(*, A, B); dims, kw...))
+    @test_broken @allocated(mapreduce(*, op, A, B'; dims, kw...)) < @allocated(reduce(op, map(*, A, B'); dims, kw...))
+
+    @test mapreduce(*, op, A, 5:7; dims, kw...) == reduce(op, map(*, A, 5:7); dims, kw...)  # stops early
+    @test mapreduce(*, op, 1:3, B'; dims, kw...) == reduce(op, map(*, 1:3, B'); dims, kw...)
+
+    @test_throws DimensionMismatch mapreduce(*, +, A, hcat(B, 9:10); dims, kw...)
+end
+
+struct Infinity21097 <: Number
+end
+Base.:+(::Infinity21097,::Infinity21097) = Infinity21097()
+@testset "don't call zero on unknown numbers, issue #21097" begin
+    @test reduce(+, [Infinity21097()], init=Infinity21097()) == Infinity21097()
+    @test reduce(+, [Infinity21097()], init=Infinity21097(), dims=1) == [Infinity21097()]
+    @test reduce(+, [Infinity21097();;], init=Infinity21097(), dims=2) == [Infinity21097();;]
+    for len in [2,15,16,17,31,32,33,63,64,65,127,128,129]
+        for init in ((), (; init=Infinity21097()))
+            @test reduce(+,fill(Infinity21097(), len); init...) == Infinity21097()
+            @test reduce(+,fill(Infinity21097(), len, 2); init...) == Infinity21097()
+            @test reduce(+,fill(Infinity21097(), len, 16); init...) == Infinity21097()
+            @test reduce(+,fill(Infinity21097(), len); dims = 1, init...) == [Infinity21097()]
+            @test reduce(+,fill(Infinity21097(), len, 2); dims = 1, init...) == fill(Infinity21097(), 1, 2)
+            @test reduce(+,fill(Infinity21097(), len, 16); dims = 1, init...) == fill(Infinity21097(), 1, 16)
+            @test reduce(+,fill(Infinity21097(), len, 2); dims = 2, init...) == fill(Infinity21097(), len, 1)
+            @test reduce(+,fill(Infinity21097(), len, 16); dims = 2, init...) == fill(Infinity21097(), len, 1)
+        end
+    end
+end
+
+@testset "don't assume zero is the neutral element; issue #54875" begin
+    A = rand(3,4)
+    @test sum(I -> A[I], CartesianIndices(A); dims=1) == sum(I -> A[I], CartesianIndices(A); dims=1, init=0.)
+end
+
+@testset "cannot construct a value of type Union{} for return result; issue #43731" begin
+    a = collect(1.0:4)
+    f(x) = x < 3 ? missing : x
+    @test ismissing(minimum(f, a))
+    @test ismissing(only(minimum(f, a; dims = 1)))
+end
+
+@testset "zero indices are not special, issue #38660" begin
+    v = OffsetVector([-1, 1], 0:1)
+    @test minimum(v) == -1
+    @test findmin(v, dims=1) == OffsetVector.(([-1], [0]), (0:0,))
+
+    A = rand(3,4,5)
+    OA = OffsetArray(A, (-1,-2,-3))
+    for dims in ((), 1, 2, 3, (1,2), (1,3), (2,3))
+        av, ai = findmin(A; dims)
+        ov, oi = findmin(OA; dims)
+        @test av == ov.parent
+        @test ai == oi.parent .+ CartesianIndex(1,2,3)
+    end
+end
+
+@testset "in-place reduction aliasing, issue #39385" begin
+    a = [1 2 3];
+    b = copy(a); @test sum!(b, b) == sum(a; dims=()) == a
+    b = copy(a); @test prod!(b, b) == prod(a; dims=()) == a
+    a = [true, false]
+    b = copy(a); @test any!(a, a) == any(a; dims=()) == a
+    b = copy(a); @test all!(a, a) == all(a; dims=()) == a
+
+    A = collect(reshape(1:105, 7, 5, 3))
+    B = copy(A); @test sum!(B,B) == A
+    B = copy(A); @test prod!(B,B) == A
+
+    B = copy(A); @test sum!(view(B, 1:1, :, :),B) == sum(A; dims=1)
+    B = copy(A); @test sum!(view(B, :, 1:1, :),B) == sum(A; dims=2)
+    B = copy(A); @test sum!(view(B, :, :, 1:1),B) == sum(A; dims=3)
+    B = copy(A); @test sum!(view(B, 1:1, 1:1, :),B) == sum(A; dims=(1,2))
+    B = copy(A); @test sum!(view(B, 1:1, :, 1:1),B) == sum(A; dims=(1,3))
+    B = copy(A); @test sum!(view(B, :, 1:1, 1:1),B) == sum(A; dims=(2,3))
+    B = copy(A); @test sum!(view(B, 1:1, 1:1, 1:1),B) == sum(A; dims=(1,2,3))
+
+    B = copy(A); @test prod!(view(B, 1:1, :, :),B) == prod(A; dims=1)
+    B = copy(A); @test prod!(view(B, :, 1:1, :),B) == prod(A; dims=2)
+    B = copy(A); @test prod!(view(B, :, :, 1:1),B) == prod(A; dims=3)
+    B = copy(A); @test prod!(view(B, 1:1, 1:1, :),B) == prod(A; dims=(1,2))
+    B = copy(A); @test prod!(view(B, 1:1, :, 1:1),B) == prod(A; dims=(1,3))
+    B = copy(A); @test prod!(view(B, :, 1:1, 1:1),B) == prod(A; dims=(2,3))
+    B = copy(A); @test prod!(view(B, 1:1, 1:1, 1:1),B) == prod(A; dims=(1,2,3))
+
+    A = rand(7, 5, 3) .> 0.9
+    B = copy(A); @test any!(B, B) == any(A, dims=())
+    B = copy(A); @test all!(!, B, B) == all(!, A, dims=())
+
+    B = copy(A); @test any!(view(B, 1:1, :, :),B) == any(A; dims=1)
+    B = copy(A); @test any!(view(B, :, 1:1, :),B) == any(A; dims=2)
+    B = copy(A); @test any!(view(B, :, :, 1:1),B) == any(A; dims=3)
+    B = copy(A); @test any!(view(B, 1:1, 1:1, :),B) == any(A; dims=(1,2))
+    B = copy(A); @test any!(view(B, 1:1, :, 1:1),B) == any(A; dims=(1,3))
+    B = copy(A); @test any!(view(B, :, 1:1, 1:1),B) == any(A; dims=(2,3))
+    B = copy(A); @test any!(view(B, 1:1, 1:1, 1:1),B) == any(A; dims=(1,2,3))
+
+    B = copy(A); @test all!(!, view(B, 1:1, :, :),B) == all(!, A; dims=1)
+    B = copy(A); @test all!(!, view(B, :, 1:1, :),B) == all(!, A; dims=2)
+    B = copy(A); @test all!(!, view(B, :, :, 1:1),B) == all(!, A; dims=3)
+    B = copy(A); @test all!(!, view(B, 1:1, 1:1, :),B) == all(!, A; dims=(1,2))
+    B = copy(A); @test all!(!, view(B, 1:1, :, 1:1),B) == all(!, A; dims=(1,3))
+    B = copy(A); @test all!(!, view(B, :, 1:1, 1:1),B) == all(!, A; dims=(2,3))
+    B = copy(A); @test all!(!, view(B, 1:1, 1:1, 1:1),B) == all(!, A; dims=(1,2,3))
+end
diff --git a/test/testhelpers/OffsetArrays.jl b/test/testhelpers/OffsetArrays.jl
index 5acaa88064245..cce3ed0170c1f 100644
--- a/test/testhelpers/OffsetArrays.jl
+++ b/test/testhelpers/OffsetArrays.jl
@@ -211,7 +211,6 @@ Base.show(io::IO, r::IdOffsetRange) = print(io, IdOffsetRange, "(values=",first(
 
 # Optimizations
 @inline Base.checkindex(::Type{Bool}, inds::IdOffsetRange, i::Real) = Base.checkindex(Bool, inds.parent, i - inds.offset)
-Base._firstslice(i::IdOffsetRange) = IdOffsetRange(Base._firstslice(i.parent), i.offset)
 
 ########################################################################################################
 # origin.jl