From 3c007afc89799e2eb9104e7b41cc798fdd989a13 Mon Sep 17 00:00:00 2001
From: felix <felixrehren@gmail.com>
Date: Sat, 15 Jul 2017 13:56:09 -0700
Subject: [PATCH 1/4] Default reduce promotion to system default

---
 base/reduce.jl | 28 +++++++++++++++++-----------
 test/reduce.jl | 18 +++++++++---------
 2 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/base/reduce.jl b/base/reduce.jl
index 87fe2e44329e2..0d8072f85f987 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -12,15 +12,18 @@ const SmallSigned = Union{Int8,Int16,Int32}
 const SmallUnsigned = Union{UInt8,UInt16,UInt32}
 end
 
-const CommonReduceResult = Union{UInt64,UInt128,Int64,Int128,Float32,Float64}
-const WidenReduceResult = Union{SmallSigned, SmallUnsigned, Float16}
+const CommonReduceResult = Union{UInt64,UInt128,Int64,Int128,Float16,Float32,Float64}
+const WidenReduceResult = Union{SmallSigned, SmallUnsigned}
 
+promote_sys_size{T}(::Type{T}) = T
+promote_sys_size{T<:SmallSigned}(::Type{T}) = Int
+promote_sys_size{T<:SmallUnsigned}(::Type{T}) = UInt
 # r_promote_type: promote T to the type of reduce(op, ::Array{T})
 # (some "extra" methods are required here to avoid ambiguity warnings)
 r_promote_type(op, ::Type{T}) where {T} = T
-r_promote_type(op, ::Type{T}) where {T<:WidenReduceResult} = widen(T)
-r_promote_type(::typeof(+), ::Type{T}) where {T<:WidenReduceResult} = widen(T)
-r_promote_type(::typeof(*), ::Type{T}) where {T<:WidenReduceResult} = widen(T)
+r_promote_type(op, ::Type{T}) where {T<:WidenReduceResult} = promote_sys_size(T)
+r_promote_type(::typeof(+), ::Type{T}) where {T<:WidenReduceResult} = promote_sys_size(T)
+r_promote_type(::typeof(*), ::Type{T}) where {T<:WidenReduceResult} = promote_sys_size(T)
 r_promote_type(::typeof(+), ::Type{T}) where {T<:Number} = typeof(zero(T)+zero(T))
 r_promote_type(::typeof(*), ::Type{T}) where {T<:Number} = typeof(one(T)*one(T))
 r_promote_type(::typeof(scalarmax), ::Type{T}) where {T<:WidenReduceResult} = T
@@ -296,21 +299,24 @@ mapreduce(f, op, a::Number) = f(a)
 """
     reduce(op, v0, itr)
 
-Reduce the given collection `ìtr` with the given binary operator `op`. `v0` must be a
+Reduce the given collection `itr` with the given binary operator `op`. `v0` must be a
 neutral element for `op` that will be returned for empty collections. It is unspecified
 whether `v0` is used for non-empty collections.
 
-Reductions for certain commonly-used operators have special implementations which should be
-used instead: `maximum(itr)`, `minimum(itr)`, `sum(itr)`, `prod(itr)`, `any(itr)`,
-`all(itr)`.
+The return type is `Int` (`UInt`) for (un)signed integers of less than system word size.
+For all other arguments, a common return type is found to which all arguments are promoted.
+
+Reductions for certain commonly-used operators may have special implementations, and
+should be used instead: `maximum(itr)`, `minimum(itr)`, `sum(itr)`, `prod(itr)`,
+ `any(itr)`, `all(itr)`.
 
 The associativity of the reduction is implementation dependent. This means that you can't
 use non-associative operations like `-` because it is undefined whether `reduce(-,[1,2,3])`
 should be evaluated as `(1-2)-3` or `1-(2-3)`. Use [`foldl`](@ref) or
 [`foldr`](@ref) instead for guaranteed left or right associativity.
 
-Some operations accumulate error, and parallelism will also be easier if the reduction can
-be executed in groups. Future versions of Julia might change the algorithm. Note that the
+Some operations accumulate error. Parallelism will be easier if the reduction can be
+executed in groups. Future versions of Julia might change the algorithm. Note that the
 elements are not reordered if you use an ordered collection.
 
 # Examples
diff --git a/test/reduce.jl b/test/reduce.jl
index bbcf4b1b9400b..b2050939150f3 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -2,7 +2,7 @@
 
 # fold(l|r) & mapfold(l|r)
 @test foldl(+, Int64[]) === Int64(0) # In reference to issues #7465/#20144 (PR #20160)
-@test foldl(+, Int16[]) === Int32(0)
+@test foldl(+, Int16[]) === Int(0)
 @test foldl(-, 1:5) == -13
 @test foldl(-, 10, 1:5) == -5
 
@@ -19,7 +19,7 @@
 @test Base.mapfoldl((x)-> x ⊻ true, |, false, [true false true false false]) == true
 
 @test foldr(+, Int64[]) === Int64(0) # In reference to issue #20144 (PR #20160)
-@test foldr(+, Int16[]) === Int32(0)
+@test foldr(+, Int16[]) === Int(0)
 @test foldr(-, 1:5) == 3
 @test foldr(-, 10, 1:5) == -7
 @test foldr(+, [1]) == 1 # Issue #21493
@@ -29,7 +29,7 @@
 
 # reduce
 @test reduce(+, Int64[]) === Int64(0) # In reference to issue #20144 (PR #20160)
-@test reduce(+, Int16[]) === Int32(0)
+@test reduce(+, Int16[]) === Int(0)
 @test reduce((x,y)->"($x+$y)", 9:11) == "((9+10)+11)"
 @test reduce(max, [8 6 7 5 3 0 9]) == 9
 @test reduce(+, 1000, 1:5) == (1000 + 1 + 2 + 3 + 4 + 5)
@@ -70,7 +70,7 @@
 
 # sum
 
-@test sum(Int8[]) === Int32(0)
+@test sum(Int8[]) === Int(0)
 @test sum(Int[]) === Int(0)
 @test sum(Float64[]) === 0.0
 
@@ -78,7 +78,7 @@
 @test sum(3) === 3
 @test sum(3.0) === 3.0
 
-@test sum([Int8(3)]) === Int32(3)
+@test sum([Int8(3)]) === Int(3)
 @test sum([3]) === 3
 @test sum([3.0]) === 3.0
 
@@ -138,11 +138,11 @@ end
 # prod
 
 @test prod(Int[]) === 1
-@test prod(Int8[]) === Int32(1)
+@test prod(Int8[]) === Int(1)
 @test prod(Float64[]) === 1.0
 
 @test prod([3]) === 3
-@test prod([Int8(3)]) === Int32(3)
+@test prod([Int8(3)]) === Int(3)
 @test prod([3.0]) === 3.0
 
 @test prod(z) === 120
@@ -157,8 +157,8 @@ end
 prod2(itr) = invoke(prod, Tuple{Any}, itr)
 @test prod(Int[]) === prod2(Int[]) === 1
 @test prod(Int[7]) === prod2(Int[7]) === 7
-@test typeof(prod(Int8[])) == typeof(prod(Int8[1])) == typeof(prod(Int8[1, 7])) == Int32
-@test typeof(prod2(Int8[])) == typeof(prod2(Int8[1])) == typeof(prod2(Int8[1 7])) == Int32
+@test typeof(prod(Int8[])) == typeof(prod(Int8[1])) == typeof(prod(Int8[1, 7])) == Int
+@test typeof(prod2(Int8[])) == typeof(prod2(Int8[1])) == typeof(prod2(Int8[1 7])) == Int
 
 # maximum & minimum & extrema
 

From 3272a404dabbb0237592f83f6cfa378329d722c2 Mon Sep 17 00:00:00 2001
From: Fengyang Wang <fengyang.wang.0@gmail.com>
Date: Sat, 15 Jul 2017 15:48:01 -0700
Subject: [PATCH 2/4] Decouple sum/prod promotion from reduce

---
 NEWS.md                     |   3 +
 base/process.jl             |   2 -
 base/reduce.jl              | 144 +++++++++++++++++++++---------------
 base/reducedim.jl           |  36 +++++----
 base/sparse/sparsematrix.jl |   2 +-
 base/tuple.jl               |   4 +
 test/reduce.jl              |  42 ++++++++---
 test/reducedim.jl           |   8 ++
 8 files changed, 155 insertions(+), 86 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 77a3150a36e39..62bfef0702f20 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -115,6 +115,9 @@ Language changes
   * The keyword `importall` is deprecated. Use `using` and/or individual `import` statements
     instead ([#22789]).
 
+  * `reduce(+, [...])` and `reduce(*, [...])` no longer widen the iterated over arguments to
+    system word size. `sum` and `prod` still preserve this behavior. ([#22825])
+
 Breaking changes
 ----------------
 
diff --git a/base/process.jl b/base/process.jl
index cc188a084e675..fe41bd90e8132 100644
--- a/base/process.jl
+++ b/base/process.jl
@@ -230,8 +230,6 @@ setenv(cmd::Cmd; dir="") = Cmd(cmd; dir=dir)
 (&)(left::AbstractCmd, right::AbstractCmd) = AndCmds(left, right)
 redir_out(src::AbstractCmd, dest::AbstractCmd) = OrCmds(src, dest)
 redir_err(src::AbstractCmd, dest::AbstractCmd) = ErrOrCmds(src, dest)
-Base.mr_empty(f, op::typeof(&), T::Type{<:Base.AbstractCmd}) =
-    throw(ArgumentError("reducing over an empty collection of type $T with operator & is not allowed"))
 
 # Stream Redirects
 redir_out(dest::Redirectable, src::AbstractCmd) = CmdRedirect(src, dest, STDIN_NO)
diff --git a/base/reduce.jl b/base/reduce.jl
index 0d8072f85f987..2795ef7ccec19 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -5,34 +5,18 @@
 ###### Generic (map)reduce functions ######
 
 if Int === Int32
-const SmallSigned = Union{Int8,Int16}
-const SmallUnsigned = Union{UInt8,UInt16}
+    const SmallSigned = Union{Int8,Int16}
+    const SmallUnsigned = Union{UInt8,UInt16}
 else
-const SmallSigned = Union{Int8,Int16,Int32}
-const SmallUnsigned = Union{UInt8,UInt16,UInt32}
+    const SmallSigned = Union{Int8,Int16,Int32}
+    const SmallUnsigned = Union{UInt8,UInt16,UInt32}
 end
 
-const CommonReduceResult = Union{UInt64,UInt128,Int64,Int128,Float16,Float32,Float64}
-const WidenReduceResult = Union{SmallSigned, SmallUnsigned}
-
-promote_sys_size{T}(::Type{T}) = T
-promote_sys_size{T<:SmallSigned}(::Type{T}) = Int
-promote_sys_size{T<:SmallUnsigned}(::Type{T}) = UInt
-# r_promote_type: promote T to the type of reduce(op, ::Array{T})
-# (some "extra" methods are required here to avoid ambiguity warnings)
-r_promote_type(op, ::Type{T}) where {T} = T
-r_promote_type(op, ::Type{T}) where {T<:WidenReduceResult} = promote_sys_size(T)
-r_promote_type(::typeof(+), ::Type{T}) where {T<:WidenReduceResult} = promote_sys_size(T)
-r_promote_type(::typeof(*), ::Type{T}) where {T<:WidenReduceResult} = promote_sys_size(T)
-r_promote_type(::typeof(+), ::Type{T}) where {T<:Number} = typeof(zero(T)+zero(T))
-r_promote_type(::typeof(*), ::Type{T}) where {T<:Number} = typeof(one(T)*one(T))
-r_promote_type(::typeof(scalarmax), ::Type{T}) where {T<:WidenReduceResult} = T
-r_promote_type(::typeof(scalarmin), ::Type{T}) where {T<:WidenReduceResult} = T
-r_promote_type(::typeof(max), ::Type{T}) where {T<:WidenReduceResult} = T
-r_promote_type(::typeof(min), ::Type{T}) where {T<:WidenReduceResult} = T
-
-# r_promote: promote x to the type of reduce(op, [x])
-r_promote(op, x::T) where {T} = convert(r_promote_type(op, T), x)
+# Certain reductions like sum and prod may wish to promote the items being
+# reduced over to an appropriate size.
+promote_sys_size(x) = x
+promote_sys_size(x::Union{Bool, SmallSigned}) = Int(x)
+promote_sys_size(x::SmallUnsigned) = UInt(x)
 
 ## foldl && mapfoldl
 
@@ -40,10 +24,10 @@ r_promote(op, x::T) where {T} = convert(r_promote_type(op, T), x)
     # Unroll the while loop once; if v0 is known, the call to op may
     # be evaluated at compile time
     if done(itr, i)
-        return r_promote(op, v0)
+        return v0
     else
         (x, i) = next(itr, i)
-        v = op(r_promote(op, v0), f(x))
+        v = op(v0, f(x))
         while !done(itr, i)
             @inbounds (x, i) = next(itr, i)
             v = op(v, f(x))
@@ -71,7 +55,7 @@ In general, this cannot be used with empty collections (see [`reduce(op, itr)`](
 function mapfoldl(f, op, itr)
     i = start(itr)
     if done(itr, i)
-        return Base.mr_empty_iter(f, op, itr, iteratoreltype(itr))
+        return Base.mapreduce_empty_iter(f, op, itr, iteratoreltype(itr))
     end
     (x, i) = next(itr, i)
     v0 = f(x)
@@ -110,10 +94,10 @@ function mapfoldr_impl(f, op, v0, itr, i::Integer)
     # Unroll the while loop once; if v0 is known, the call to op may
     # be evaluated at compile time
     if isempty(itr) || i == 0
-        return r_promote(op, v0)
+        return v0
     else
         x = itr[i]
-        v  = op(f(x), r_promote(op, v0))
+        v  = op(f(x), v0)
         while i > 1
             x = itr[i -= 1]
             v = op(f(x), v)
@@ -141,7 +125,7 @@ In general, this cannot be used with empty collections (see [`reduce(op, itr)`](
 function mapfoldr(f, op, itr)
     i = endof(itr)
     if isempty(itr)
-        return Base.mr_empty_iter(f, op, itr, iteratoreltype(itr))
+        return Base.mapreduce_empty_iter(f, op, itr, iteratoreltype(itr))
     end
     return mapfoldr_impl(f, op, f(itr[i]), itr, i-1)
 end
@@ -184,12 +168,12 @@ foldr(op, itr) = mapfoldr(identity, op, itr)
 @noinline function mapreduce_impl(f, op, A::AbstractArray, ifirst::Integer, ilast::Integer, blksize::Int)
     if ifirst == ilast
         @inbounds a1 = A[ifirst]
-        return r_promote(op, f(a1))
+        return f(a1)
     elseif ifirst + blksize > ilast
         # sequential portion
         @inbounds a1 = A[ifirst]
         @inbounds a2 = A[ifirst+1]
-        v = op(r_promote(op, f(a1)), r_promote(op, f(a2)))
+        v = op(f(a1), f(a2))
         @simd for i = ifirst + 2 : ilast
             @inbounds ai = A[i]
             v = op(v, f(ai))
@@ -248,23 +232,33 @@ pairwise_blocksize(::typeof(abs2), ::typeof(+)) = 4096
 
 # handling empty arrays
 _empty_reduce_error() = throw(ArgumentError("reducing over an empty collection is not allowed"))
-mr_empty(f, op, T) = _empty_reduce_error()
-# use zero(T)::T to improve type information when zero(T) is not defined
-mr_empty(::typeof(identity), op::typeof(+), T) = r_promote(op, zero(T)::T)
-mr_empty(::typeof(abs), op::typeof(+), T) = r_promote(op, abs(zero(T)::T))
-mr_empty(::typeof(abs2), op::typeof(+), T) = r_promote(op, abs2(zero(T)::T))
-mr_empty(::typeof(identity), op::typeof(*), T) = r_promote(op, one(T)::T)
-mr_empty(::typeof(abs), op::typeof(scalarmax), T) = abs(zero(T)::T)
-mr_empty(::typeof(abs2), op::typeof(scalarmax), T) = abs2(zero(T)::T)
-mr_empty(::typeof(abs), op::typeof(max), T) = mr_empty(abs, scalarmax, T)
-mr_empty(::typeof(abs2), op::typeof(max), T) = mr_empty(abs2, scalarmax, T)
-mr_empty(f, op::typeof(&), T) = true
-mr_empty(f, op::typeof(|), T) = false
-
-mr_empty_iter(f, op, itr, ::HasEltype) = mr_empty(f, op, eltype(itr))
-mr_empty_iter(f, op::typeof(&), itr, ::EltypeUnknown) = true
-mr_empty_iter(f, op::typeof(|), itr, ::EltypeUnknown) = false
-mr_empty_iter(f, op, itr, ::EltypeUnknown) = _empty_reduce_error()
+reduce_empty(op, T) = _empty_reduce_error()
+reduce_empty(::typeof(+), T) = zero(T)
+reduce_empty(::typeof(*), T) = one(T)
+reduce_empty(::typeof(&), ::Type{Bool}) = true
+reduce_empty(::typeof(|), ::Type{Bool}) = false
+
+mapreduce_empty(f, op, T) = _empty_reduce_error()
+mapreduce_empty(::typeof(identity), op, T) = reduce_empty(op, T)
+mapreduce_empty(::typeof(promote_sys_size), op, T) =
+    promote_sys_size(mapreduce_empty(identity, op, T))
+mapreduce_empty(::typeof(abs), ::typeof(+), T) = abs(zero(T))
+mapreduce_empty(::typeof(abs2), ::typeof(+), T) = abs2(zero(T))
+mapreduce_empty(::typeof(abs), ::Union{typeof(scalarmax), typeof(max)}, T) =
+    abs(zero(T))
+mapreduce_empty(::typeof(abs2), ::Union{typeof(scalarmax), typeof(max)}, T) =
+    abs2(zero(T))
+
+# Allow mapreduce_empty to “see through” promote_sys_size
+let ComposedFunction = typename(typeof(identity ∘ identity)).wrapper
+    global mapreduce_empty(f::ComposedFunction{typeof(promote_sys_size)}, op, T) =
+        promote_sys_size(mapreduce_empty(f.g, op, T))
+end
+
+mapreduce_empty_iter(f, op, itr, ::HasEltype) = mapreduce_empty(f, op, eltype(itr))
+mapreduce_empty_iter(f, op::typeof(&), itr, ::EltypeUnknown) = true
+mapreduce_empty_iter(f, op::typeof(|), itr, ::EltypeUnknown) = false
+mapreduce_empty_iter(f, op, itr, ::EltypeUnknown) = _empty_reduce_error()
 
 _mapreduce(f, op, A::AbstractArray) = _mapreduce(f, op, IndexStyle(A), A)
 
@@ -272,15 +266,15 @@ function _mapreduce(f, op, ::IndexLinear, A::AbstractArray{T}) where T
     inds = linearindices(A)
     n = length(inds)
     if n == 0
-        return mr_empty(f, op, T)
+        return mapreduce_empty(f, op, T)
     elseif n == 1
         @inbounds a1 = A[inds[1]]
-        return r_promote(op, f(a1))
+        return f(a1)
     elseif n < 16 # process short array here, avoid mapreduce_impl() compilation
         @inbounds i = inds[1]
         @inbounds a1 = A[i]
         @inbounds a2 = A[i+=1]
-        s = op(r_promote(op, f(a1)), r_promote(op, f(a2)))
+        s = op(f(a1), f(a2))
         while i < last(inds)
             @inbounds Ai = A[i+=1]
             s = op(s, f(Ai))
@@ -303,9 +297,6 @@ Reduce the given collection `itr` with the given binary operator `op`. `v0` must
 neutral element for `op` that will be returned for empty collections. It is unspecified
 whether `v0` is used for non-empty collections.
 
-The return type is `Int` (`UInt`) for (un)signed integers of less than system word size.
-For all other arguments, a common return type is found to which all arguments are promoted.
-
 Reductions for certain commonly-used operators may have special implementations, and
 should be used instead: `maximum(itr)`, `minimum(itr)`, `sum(itr)`, `prod(itr)`,
  `any(itr)`, `all(itr)`.
@@ -351,24 +342,47 @@ reduce(op, a::Number) = a
 
 Sum the results of calling function `f` on each element of `itr`.
 
+The return type is `Int` for signed integers of less than system word size, and
+`UInt` for unsigned integers of less than system word size.  For all other
+arguments, a common return type is found to which all arguments are promoted.
+
 ```jldoctest
 julia> sum(abs2, [2; 3; 4])
 29
 ```
+
+Note the important difference between `sum(A)` and `reduce(+, A)` for arrays
+with small integer eltype:
+
+```jldoctest
+julia> sum(Int8[100, 28])
+128
+
+julia> reduce(+, Int8[100, 28])
+-128
+```
+
+In the former case, the integers are widened to system word size and therefore
+the result is 128. In the latter case, no such widening happens and integer
+overflow results in -128.
 """
-sum(f::Callable, a) = mapreduce(f, +, a)
+sum(f::Callable, a) = mapreduce(promote_sys_size ∘ f, +, a)
 
 """
     sum(itr)
 
 Returns the sum of all elements in a collection.
 
+The return type is `Int` for signed integers of less than system word size, and
+`UInt` for unsigned integers of less than system word size.  For all other
+arguments, a common return type is found to which all arguments are promoted.
+
 ```jldoctest
 julia> sum(1:20)
 210
 ```
 """
-sum(a) = mapreduce(identity, +, a)
+sum(a) = mapreduce(promote_sys_size, +, a)
 sum(a::AbstractArray{Bool}) = count(a)
 
 
@@ -383,7 +397,7 @@ summation algorithm for additional accuracy.
 """
 function sum_kbn(A)
     T = _default_eltype(typeof(A))
-    c = r_promote(+, zero(T)::T)
+    c = promote_sys_size(zero(T)::T)
     i = start(A)
     if done(A, i)
         return c
@@ -409,24 +423,32 @@ end
 
 Returns the product of `f` applied to each element of `itr`.
 
+The return type is `Int` for signed integers of less than system word size, and
+`UInt` for unsigned integers of less than system word size.  For all other
+arguments, a common return type is found to which all arguments are promoted.
+
 ```jldoctest
 julia> prod(abs2, [2; 3; 4])
 576
 ```
 """
-prod(f::Callable, a) = mapreduce(f, *, a)
+prod(f::Callable, a) = mapreduce(promote_sys_size ∘ f, *, a)
 
 """
     prod(itr)
 
 Returns the product of all elements of a collection.
 
+The return type is `Int` for signed integers of less than system word size, and
+`UInt` for unsigned integers of less than system word size.  For all other
+arguments, a common return type is found to which all arguments are promoted.
+
 ```jldoctest
 julia> prod(1:20)
 2432902008176640000
 ```
 """
-prod(a) = mapreduce(identity, *, a)
+prod(a) = mapreduce(promote_sys_size, *, a)
 
 ## maximum & minimum
 
diff --git a/base/reducedim.jl b/base/reducedim.jl
index b2bee574032e7..54c97f4d830d9 100644
--- a/base/reducedim.jl
+++ b/base/reducedim.jl
@@ -137,19 +137,22 @@ reducedim_init(f, op::typeof(|), A::AbstractArray, region) = reducedim_initarray
 
 # specialize to make initialization more efficient for common cases
 
-for (IT, RT) in ((CommonReduceResult, :(eltype(A))), (SmallSigned, :Int), (SmallUnsigned, :UInt))
-    T = Union{[AbstractArray{t} for t in uniontypes(IT)]..., [AbstractArray{Complex{t}} for t in uniontypes(IT)]...}
-    @eval begin
-        reducedim_init(f::typeof(identity), op::typeof(+), A::$T, region) =
-            reducedim_initarray(A, region, zero($RT))
-        reducedim_init(f::typeof(identity), op::typeof(*), A::$T, region) =
-            reducedim_initarray(A, region, one($RT))
-        reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(+), A::$T, region) =
-            reducedim_initarray(A, region, real(zero($RT)))
-        reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(*), A::$T, region) =
-            reducedim_initarray(A, region, real(one($RT)))
-    end
+let
+    BitIntFloat = Union{BitInteger, Math.IEEEFloat}
+    T = Union{
+        [AbstractArray{t} for t in uniontypes(BitIntFloat)]...,
+        [AbstractArray{Complex{t}} for t in uniontypes(BitIntFloat)]...}
+
+    global reducedim_init(f::typeof(identity), op::typeof(+), A::T, region) =
+        reducedim_initarray(A, region, zero(eltype(A)))
+    global reducedim_init(f::typeof(identity), op::typeof(*), A::T, region) =
+        reducedim_initarray(A, region, one(eltype(A)))
+    global reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(+), A::T, region) =
+        reducedim_initarray(A, region, real(zero(eltype(A))))
+    global reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(*), A::T, region) =
+        reducedim_initarray(A, region, real(one(eltype(A))))
 end
+
 reducedim_init(f::Union{typeof(identity),typeof(abs),typeof(abs2)}, op::typeof(+), A::AbstractArray{Bool}, region) =
     reducedim_initarray(A, region, 0)
 
@@ -610,6 +613,13 @@ any!(r, A)
 for (fname, op) in [(:sum, :+), (:prod, :*),
                     (:maximum, :scalarmax), (:minimum, :scalarmin),
                     (:all, :&), (:any, :|)]
+    function compose_promote_sys_size(x)
+        if fname in [:sum, :prod]
+            :(promote_sys_size ∘ $x)
+        else
+            x
+        end
+    end
     fname! = Symbol(fname, '!')
     @eval begin
         $(fname!)(f::Function, r::AbstractArray, A::AbstractArray; init::Bool=true) =
@@ -617,7 +627,7 @@ for (fname, op) in [(:sum, :+), (:prod, :*),
         $(fname!)(r::AbstractArray, A::AbstractArray; init::Bool=true) = $(fname!)(identity, r, A; init=init)
 
         $(fname)(f::Function, A::AbstractArray, region) =
-            mapreducedim(f, $(op), A, region)
+            mapreducedim($(compose_promote_sys_size(:f)), $(op), A, region)
         $(fname)(A::AbstractArray, region) = $(fname)(identity, A, region)
     end
 end
diff --git a/base/sparse/sparsematrix.jl b/base/sparse/sparsematrix.jl
index 09255dacdaefa..ef1ff15459cde 100644
--- a/base/sparse/sparsematrix.jl
+++ b/base/sparse/sparsematrix.jl
@@ -1674,7 +1674,7 @@ function Base._mapreduce(f, op, ::Base.IndexCartesian, A::SparseMatrixCSC{T}) wh
     n = length(A)
     if z == 0
         if n == 0
-            Base.mr_empty(f, op, T)
+            Base.mapreduce_empty(f, op, T)
         else
             _mapreducezeros(f, op, T, n-z-1, f(zero(T)))
         end
diff --git a/base/tuple.jl b/base/tuple.jl
index 1ffc85f9c47c2..529ae05d12156 100644
--- a/base/tuple.jl
+++ b/base/tuple.jl
@@ -308,9 +308,13 @@ reverse(t::Tuple) = revargs(t...)
 
 # TODO: these definitions cannot yet be combined, since +(x...)
 # where x might be any tuple matches too many methods.
+# TODO: this is inconsistent with the regular sum in cases where the arguments
+# require size promotion to system size.
 sum(x::Tuple{Any, Vararg{Any}}) = +(x...)
 
 # NOTE: should remove, but often used on array sizes
+# TODO: this is inconsistent with the regular prod in cases where the arguments
+# require size promotion to system size.
 prod(x::Tuple{}) = 1
 prod(x::Tuple{Any, Vararg{Any}}) = *(x...)
 
diff --git a/test/reduce.jl b/test/reduce.jl
index b2050939150f3..e3c3273247dcf 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -2,7 +2,7 @@
 
 # fold(l|r) & mapfold(l|r)
 @test foldl(+, Int64[]) === Int64(0) # In reference to issues #7465/#20144 (PR #20160)
-@test foldl(+, Int16[]) === Int(0)
+@test foldl(+, Int16[]) === Int16(0) # In reference to issues #21536
 @test foldl(-, 1:5) == -13
 @test foldl(-, 10, 1:5) == -5
 
@@ -19,7 +19,7 @@
 @test Base.mapfoldl((x)-> x ⊻ true, |, false, [true false true false false]) == true
 
 @test foldr(+, Int64[]) === Int64(0) # In reference to issue #20144 (PR #20160)
-@test foldr(+, Int16[]) === Int(0)
+@test foldr(+, Int16[]) === Int16(0) # In reference to issues #21536
 @test foldr(-, 1:5) == 3
 @test foldr(-, 10, 1:5) == -7
 @test foldr(+, [1]) == 1 # Issue #21493
@@ -29,7 +29,7 @@
 
 # reduce
 @test reduce(+, Int64[]) === Int64(0) # In reference to issue #20144 (PR #20160)
-@test reduce(+, Int16[]) === Int(0)
+@test reduce(+, Int16[]) === Int16(0) # In reference to issues #21536
 @test reduce((x,y)->"($x+$y)", 9:11) == "((9+10)+11)"
 @test reduce(max, [8 6 7 5 3 0 9]) == 9
 @test reduce(+, 1000, 1:5) == (1000 + 1 + 2 + 3 + 4 + 5)
@@ -48,8 +48,8 @@
 @test mapreduce(abs2, +, Float64[]) === 0.0
 @test mapreduce(abs2, Base.scalarmax, Float64[]) === 0.0
 @test mapreduce(abs, max, Float64[]) === 0.0
-@test mapreduce(abs2, &, Float64[]) === true
-@test mapreduce(abs2, |, Float64[]) === false
+@test_throws ArgumentError mapreduce(abs2, &, Float64[])
+@test_throws ArgumentError mapreduce(abs2, |, Float64[])
 
 # mapreduce() type stability
 @test typeof(mapreduce(*, +, Int8[10])) ===
@@ -69,12 +69,24 @@
       typeof(mapreduce(abs, +, Float32[10, 11, 12, 13]))
 
 # sum
+@testset "sums promote to at least machine size" begin
+    @testset for T in [Int8, Int16, Int32]
+        @test sum(T[]) === Int(0)
+    end
+    @testset for T in [UInt8, UInt16, UInt32]
+        @test sum(T[]) === UInt(0)
+    end
+    @testset for T in [Int, Int64, Int128, UInt, UInt64, UInt128,
+                       Float16, Float32, Float64]
+        @test sum(T[]) === T(0)
+    end
+    @test sum(BigInt[]) == big(0) && sum(BigInt[]) isa BigInt
+end
 
-@test sum(Int8[]) === Int(0)
-@test sum(Int[]) === Int(0)
-@test sum(Float64[]) === 0.0
+@test sum(Bool[]) === sum(Bool[false]) === sum(Bool[false, false]) === 0
+@test sum(Bool[true, false, true]) === 2
 
-@test sum(Int8(3)) === Int8(3)
+@test sum(Int8(3)) === Int(3)
 @test sum(3) === 3
 @test sum(3.0) === 3.0
 
@@ -135,6 +147,14 @@ end
 @test sum_kbn([-0.0]) === -0.0
 @test sum_kbn([-0.0,-0.0]) === -0.0
 
+# check sum(abs, ...) for support of empty collections
+@testset "sum(abs, [])" begin
+    @test @inferred(sum(abs, Float64[])) === 0.0
+    @test @inferred(sum(abs, Int[])) === 0
+    @test @inferred(sum(abs, Set{Int}())) === 0
+    @test_throws MethodError sum(abs, Any[])
+end
+
 # prod
 
 @test prod(Int[]) === 1
@@ -380,3 +400,7 @@ test18695(r) = sum( t^2 for t in r )
 
 # issue #21107
 @test foldr(-,2:2) == 2
+
+# test neutral element not picked incorrectly for &, |
+@test @inferred(foldl(&, Int[1])) === 1
+@test_throws ArgumentError foldl(&, Int[])
diff --git a/test/reducedim.jl b/test/reducedim.jl
index 3edbdaa3f99a1..dc59889b70f50 100644
--- a/test/reducedim.jl
+++ b/test/reducedim.jl
@@ -337,3 +337,11 @@ for region in Any[-1, 0, (-1, 2), [0, 1], (1,-2,3), [0 1;
     @test_throws ArgumentError maximum(abs, Areduc, region)
     @test_throws ArgumentError minimum(abs, Areduc, region)
 end
+
+# check type of result
+under_test = [UInt8, Int8, Int32, Int64, BigInt]
+@testset "type of sum(::Array{$T}" for T in under_test
+    result = sum(T[1 2 3; 4 5 6; 7 8 9], 2)
+    @test result == hcat([6, 15, 24])
+    @test eltype(result) === typeof(Base.promote_sys_size(zero(T)))
+end

From 1f587a456777c577ee76e5b967f9bf2b32c2f9e3 Mon Sep 17 00:00:00 2001
From: Fengyang Wang <fengyang.wang.0@gmail.com>
Date: Fri, 6 Oct 2017 15:46:50 -0400
Subject: [PATCH 3/4] Modifications to work well with Bool

---
 base/reduce.jl    | 34 ++++++++++++++++++++--------------
 base/reducedim.jl |  6 ++++--
 2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/base/reduce.jl b/base/reduce.jl
index 2795ef7ccec19..a23faf9c99b2e 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -12,11 +12,17 @@ else
     const SmallUnsigned = Union{UInt8,UInt16,UInt32}
 end
 
-# Certain reductions like sum and prod may wish to promote the items being
-# reduced over to an appropriate size.
-promote_sys_size(x) = x
-promote_sys_size(x::Union{Bool, SmallSigned}) = Int(x)
-promote_sys_size(x::SmallUnsigned) = UInt(x)
+# Certain reductions like sum and prod may wish to promote the items being reduced over to
+# an appropriate size. Note we need x + zero(x) because some types like Bool have their sum
+# lie in a larger type.
+promote_sys_size(T::Type) = T
+promote_sys_size(::Type{<:SmallSigned}) = Int
+promote_sys_size(::Type{<:SmallUnsigned}) = UInt
+
+promote_sys_size_add(x) = convert(promote_sys_size(typeof(x + zero(x))), x)
+promote_sys_size_mul(x) = convert(promote_sys_size(typeof(x * one(x))), x)
+const _PromoteSysSizeFunction = Union{typeof(promote_sys_size_add),
+                                      typeof(promote_sys_size_mul)}
 
 ## foldl && mapfoldl
 
@@ -240,8 +246,8 @@ reduce_empty(::typeof(|), ::Type{Bool}) = false
 
 mapreduce_empty(f, op, T) = _empty_reduce_error()
 mapreduce_empty(::typeof(identity), op, T) = reduce_empty(op, T)
-mapreduce_empty(::typeof(promote_sys_size), op, T) =
-    promote_sys_size(mapreduce_empty(identity, op, T))
+mapreduce_empty(f::_PromoteSysSizeFunction, op, T) =
+    f(mapreduce_empty(identity, op, T))
 mapreduce_empty(::typeof(abs), ::typeof(+), T) = abs(zero(T))
 mapreduce_empty(::typeof(abs2), ::typeof(+), T) = abs2(zero(T))
 mapreduce_empty(::typeof(abs), ::Union{typeof(scalarmax), typeof(max)}, T) =
@@ -251,8 +257,8 @@ mapreduce_empty(::typeof(abs2), ::Union{typeof(scalarmax), typeof(max)}, T) =
 
 # Allow mapreduce_empty to “see through” promote_sys_size
 let ComposedFunction = typename(typeof(identity ∘ identity)).wrapper
-    global mapreduce_empty(f::ComposedFunction{typeof(promote_sys_size)}, op, T) =
-        promote_sys_size(mapreduce_empty(f.g, op, T))
+    global mapreduce_empty(f::ComposedFunction{<:_PromoteSysSizeFunction}, op, T) =
+        f.f(mapreduce_empty(f.g, op, T))
 end
 
 mapreduce_empty_iter(f, op, itr, ::HasEltype) = mapreduce_empty(f, op, eltype(itr))
@@ -366,7 +372,7 @@ In the former case, the integers are widened to system word size and therefore
 the result is 128. In the latter case, no such widening happens and integer
 overflow results in -128.
 """
-sum(f::Callable, a) = mapreduce(promote_sys_size ∘ f, +, a)
+sum(f::Callable, a) = mapreduce(promote_sys_size_add ∘ f, +, a)
 
 """
     sum(itr)
@@ -382,7 +388,7 @@ julia> sum(1:20)
 210
 ```
 """
-sum(a) = mapreduce(promote_sys_size, +, a)
+sum(a) = mapreduce(promote_sys_size_add, +, a)
 sum(a::AbstractArray{Bool}) = count(a)
 
 
@@ -397,7 +403,7 @@ summation algorithm for additional accuracy.
 """
 function sum_kbn(A)
     T = _default_eltype(typeof(A))
-    c = promote_sys_size(zero(T)::T)
+    c = promote_sys_size_add(zero(T)::T)
     i = start(A)
     if done(A, i)
         return c
@@ -432,7 +438,7 @@ julia> prod(abs2, [2; 3; 4])
 576
 ```
 """
-prod(f::Callable, a) = mapreduce(promote_sys_size ∘ f, *, a)
+prod(f::Callable, a) = mapreduce(promote_sys_size_mul ∘ f, *, a)
 
 """
     prod(itr)
@@ -448,7 +454,7 @@ julia> prod(1:20)
 2432902008176640000
 ```
 """
-prod(a) = mapreduce(promote_sys_size, *, a)
+prod(a) = mapreduce(promote_sys_size_mul, *, a)
 
 ## maximum & minimum
 
diff --git a/base/reducedim.jl b/base/reducedim.jl
index 54c97f4d830d9..955578dcf0586 100644
--- a/base/reducedim.jl
+++ b/base/reducedim.jl
@@ -614,8 +614,10 @@ for (fname, op) in [(:sum, :+), (:prod, :*),
                     (:maximum, :scalarmax), (:minimum, :scalarmin),
                     (:all, :&), (:any, :|)]
     function compose_promote_sys_size(x)
-        if fname in [:sum, :prod]
-            :(promote_sys_size ∘ $x)
+        if fname === :sum
+            :(promote_sys_size_add ∘ $x)
+        elseif fname === :prod
+            :(promote_sys_size_mul ∘ $x)
         else
             x
         end

From 6347488dca2a18a8183b7c0146c3a9ffa574b9fa Mon Sep 17 00:00:00 2001
From: Fengyang Wang <fengyang.wang.0@gmail.com>
Date: Fri, 6 Oct 2017 17:19:17 -0400
Subject: [PATCH 4/4] Fix reducedim test

---
 test/reducedim.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/reducedim.jl b/test/reducedim.jl
index dc59889b70f50..72e9b991a7c4a 100644
--- a/test/reducedim.jl
+++ b/test/reducedim.jl
@@ -343,5 +343,5 @@ under_test = [UInt8, Int8, Int32, Int64, BigInt]
 @testset "type of sum(::Array{$T}" for T in under_test
     result = sum(T[1 2 3; 4 5 6; 7 8 9], 2)
     @test result == hcat([6, 15, 24])
-    @test eltype(result) === typeof(Base.promote_sys_size(zero(T)))
+    @test eltype(result) === typeof(Base.promote_sys_size_add(zero(T)))
 end