From 9fe9e98a1bcff950f69d30394b1580cefdd66992 Mon Sep 17 00:00:00 2001 From: N5N3 <2642243996@qq.com> Date: Thu, 13 Jan 2022 23:15:33 +0800 Subject: [PATCH] Help LLVM better vectorize reduction over `skipmissing` --- base/missing.jl | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/base/missing.jl b/base/missing.jl index 3176c56772602..a0488229b1c29 100644 --- a/base/missing.jl +++ b/base/missing.jl @@ -310,6 +310,15 @@ _mapreduce(f, op, ::IndexCartesian, itr::SkipMissing) = mapfoldl(f, op, itr) mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) = mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op)) +# Some help function to make LLVM better vectorizing these reduction. +# It returns a `noop`, which make sure `op(v, noop) === v` +_fast_noop(::Union{typeof(add_sum),typeof(+)}, ::Type{T}, ::T) where {T<:Union{HWReal,Complex{<:HWReal}}} = zero(T) +_fast_noop(::Union{typeof(mul_prod),typeof(*)}, ::Type{T}, ::T) where {T<:HWReal} = one(T) +# TODO: min/max for IEEEFloat need manually unroll to vectorize. +_fast_noop(::Union{typeof(min),typeof(max)}, ::Type{T}, v::T) where {T<:Integer} = T <: HWReal ? v : nothing +# General fallback +_fast_noop(f, T, v) = nothing + # Returns nothing when the input contains only missing values, and Some(x) otherwise @noinline function mapreduce_impl(f, op, itr::SkipMissing{<:AbstractArray}, ifirst::Integer, ilast::Integer, blksize::Int) @@ -345,10 +354,19 @@ mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) = i == typemax(typeof(i)) && return Some(op(f(a1), f(a2))) i += 1 v = op(f(a1), f(a2)) - @simd for i = i:ilast - @inbounds ai = A[i] - if ai !== missing - v = op(v, f(ai)) + # We need to make sure `fskip` is stable. + noop = _fast_noop(op, _return_type(f, Tuple{nonmissingtype(eltype(A))}), v) + if isnothing(noop) + @simd for i = i:ilast + @inbounds ai = A[i] + if !ismissing(ai) + v = op(v, f(ai)) + end + end + else + @inline fskip(x) = ismissing(x) ? noop : f(x) + @inbounds @simd for i = i:ilast + v = op(v, fskip(A[i])) end end return Some(v) @@ -463,4 +481,3 @@ macro coalesce(args...) end return esc(:(let val; $expr; end)) end -