fix #484 -- broaden pooling method signatures (#485)

manuelbb-upb · ToucheSir · web-flow · commit 305b3059fcc8 · 2023-07-15T16:53:06.000-07:00
* broaden pooling method signatures

* pooling: revert type predition for target array

* add ReverseDiff as test dependency

* Update test/pooling.jl

reformat whitespace in tests

Co-authored-by: Brian Chen &lt;ToucheSir@users.noreply.github.com&gt;

* Update test/pooling.jl

remove unused type parameters from function signatures

Co-authored-by: Brian Chen &lt;ToucheSir@users.noreply.github.com&gt;

* Update src/impl/pooling_direct.jl

remove unused type parameters from function signatures

Co-authored-by: Brian Chen &lt;ToucheSir@users.noreply.github.com&gt;

* remove unused type parameters in pooling methods

---------

Co-authored-by: Brian Chen &lt;ToucheSir@users.noreply.github.com&gt;
diff --git a/src/impl/pooling_direct.jl b/src/impl/pooling_direct.jl
@@ -2,8 +2,8 @@
 # the inner loop operation and a few initialization parameters.
 for name in (:max, :mean, :lpnorm)
     @eval function $((Symbol("$(name)pool_direct!")))(
-                    y::AbstractArray{T, 5}, x::AbstractArray{T, 5},
-                    pdims::PoolDims; alpha::T=T(1), beta::T=T(0), kwargs...) where T
+                    y::AbstractArray{<:Any, 5}, x::AbstractArray{<:Any, 5},
+                    pdims::PoolDims; alpha=1, beta=0, kwargs...) 
         $((Symbol("$(name)pool_direct!")))(
             y, x, pdims,
             Val(kernel_size(pdims)), Val(channels_out(pdims)),
@@ -13,13 +13,13 @@ for name in (:max, :mean, :lpnorm)
     end
 
     @eval function $((Symbol("$(name)pool_direct!")))(
-        y::AbstractArray{T,5}, x::AbstractArray{T,5},
+        y::AbstractArray{T,5}, x::AbstractArray{<:Any,5},
         pdims::PoolDims,
         # kernel size, channels out, padding, dilation, stride
         ::Val{K}, ::Val{C}, ::Val{P}, ::Val{D}, ::Val{S};
-        alpha::T=T(1), beta::T=T(0), kwargs...
+        alpha=1, beta=0, kwargs...
     ) where {T, K, C, P, D, S}
-        @assert beta == T(0) "beta not supported yet"
+        @assert iszero(beta) "beta not supported yet"
         check_dims(size(x), size(y), pdims)
 
         width, height, depth = input_size(pdims)
@@ -36,10 +36,21 @@ for name in (:max, :mean, :lpnorm)
         @inline project(idx, stride, pad) = (idx - 1) * stride - pad + 1
 
         # If we're doing mean pooling, we represent division by kernel size by rolling it
-        # into the `alpha` multiplier.
-        if $(name == :mean)
-            alpha = alpha / prod(K)
+        # into the `alpha` multiplier. 
+        # The type might change here, that's why we prepend the underscore 
+        # (does it make a difference, though?)
+        _alpha = if $(name == :mean)
+            T(alpha / prod(K))
+        else
+            T(alpha)
         end
+        # _beta = T(beta)
+
+        # A quick note on the array element types `T` and `R`:
+        # Ideally, `T == R`, but in some edge-cases, this might not be the case 
+        # (e.g. with `ReverseDiff.TrackedArray`, see issue #484).
+        # If the types differ, we will initialize variables (like `_alpha` above) with the 
+        # target eltype `T`.
 
         p = if $(name != :lpnorm) 0 else
             !haskey(kwargs, :p) && error("lpnormpool needs keyword argument `p`")
@@ -94,7 +105,7 @@ for name in (:max, :mean, :lpnorm)
             # for lpnormpool, y = (∑ᵢ xᵢ^p)^(1 / p)
             m = $(name == :lpnorm) ? m^(T(1) / p) : m
 
-            y[w, h, d, c, batch_idx] = alpha * m # + beta * y[w, h, d, c, batch_idx]
+            y[w, h, d, c, batch_idx] = _alpha * m # + _beta * y[w, h, d, c, batch_idx]
             end
             end
             end
@@ -148,7 +159,7 @@ for name in (:max, :mean, :lpnorm)
                     end
                 end
                 $(name == :lpnorm) && (m = m^(T(1) / p))
-                y[w, h, d, c, batch_idx] = alpha * m # + beta * y[w, h, d, c, batch_idx]
+                y[w, h, d, c, batch_idx] = _alpha * m # + _beta * y[w, h, d, c, batch_idx]
                 end
                 end
                 end
@@ -159,9 +170,9 @@ for name in (:max, :mean, :lpnorm)
     end
 
     @eval function $((Symbol("∇$(name)pool_direct!")))(
-                    dx::AbstractArray{T,5}, dy::AbstractArray{T,5},
-                    y::AbstractArray{T,5}, x::AbstractArray{T,5},
-                    pdims::PoolDims; kwargs...) where T
+                    dx::AbstractArray{<:Any,5}, dy::AbstractArray{<:Any,5},
+                    y::AbstractArray{<:Any,5}, x::AbstractArray{<:Any,5},
+                    pdims::PoolDims; kwargs...)
         $((Symbol("∇$(name)pool_direct!")))(
             dx, dy, y, x, pdims, Val(kernel_size(pdims)); kwargs...)
         return dx
@@ -170,10 +181,10 @@ for name in (:max, :mean, :lpnorm)
     # Same story for gradients, and although this is very similar to the forward pass,
     # it's unfortunately different enough that I think we need a separate function.  :(
     @eval function $((Symbol("∇$(name)pool_direct!")))(
-                    dx::AbstractArray{T,5}, dy::AbstractArray{T,5},
-                    y::AbstractArray{T,5}, x::AbstractArray{T,5},
+                    dx::AbstractArray{T,5}, dy::AbstractArray{<:Any,5},
+                    y::AbstractArray{<:Any,5}, x::AbstractArray{<:Any,5},
                     pdims::PoolDims, ::Val{K}; # == kernel_size(pdims)
-                    alpha::T=T(1), beta::T=T(0), kwargs...) where {T, K}
+                    alpha=1, beta=0, kwargs...) where {T, K}
         check_dims(size(x), size(dy), pdims)
 
         width, height, depth = input_size(pdims)
@@ -183,6 +194,10 @@ for name in (:max, :mean, :lpnorm)
         dil_w, dil_h, dil_d = dilation(pdims)
         stride_w, stride_h, stride_d = stride(pdims)
 
+        # Concerning array eltypes `DX, DY, X, Y`, we want handle them like above, i.e.,
+        # initialize everything with the left-hand-side type (target type).
+        # Of course, ideally the types are all the same anyways.
+
         # We use calc_padding_regions to split outselves up into separate regions that
         # may or may not need to worry about padding:
         padded_regions, central_region = calc_padding_regions(pdims)
@@ -191,9 +206,11 @@ for name in (:max, :mean, :lpnorm)
         @inline project(idx, stride, pad) = (idx - 1) * stride - pad + 1
 
         # If we're doing mean pooling, we represent division by kernel size by rolling
-        # it into the `alpha` multiplier.
-        if $(name == :mean)
-            alpha = alpha / prod(K)
+        # it into the `_alpha` multiplier.
+        _alpha = if $(name == :mean)
+            T(alpha / prod(K))
+        else
+            T(alpha)
         end
 
         p = if $(name != :lpnorm) 0 else
@@ -236,15 +253,15 @@ for name in (:max, :mean, :lpnorm)
                     # Uncomment line below if using with non-precise output (e.g. by NNPACK)
                     # if abs(y_idx - x[x_idxs...]) < 1e-5 && !maxpool_already_chose
                     if y_idx ≈ x[input_kw, input_kh, input_kd, c, batch_idx]
-                        dx[input_kw, input_kh, input_kd, c, batch_idx] += dy_idx * alpha #+ beta * dx[x_idxs...]
+                        dx[input_kw, input_kh, input_kd, c, batch_idx] += dy_idx * _alpha #+ _beta * dx[x_idxs...]
                         maxpool_already_chose = true
                     # Maxpooling does not support `beta` right now.  :(
                     # else
                     #    dx[x_idxs...] = T(0) + beta*dx[x_idxs...]
                     end
                 elseif $(name == :mean)
                     # Either does meanpool :(
-                    dx[input_kw, input_kh, input_kd, c, batch_idx] += dy_idx * alpha
+                    dx[input_kw, input_kh, input_kd, c, batch_idx] += dy_idx * _alpha
                 elseif $(name == :lpnorm)
                     # y = (∑ᵢ xᵢ^p)^(1 / p), ∂y/∂xᵢ = xᵢ^(p-1) × y^(1-p)
                     grad = x[input_kw, input_kh, input_kd, c, batch_idx]^(p-1) * y_idx^(1-p)
@@ -302,13 +319,13 @@ for name in (:max, :mean, :lpnorm)
                                 # Uncomment line below if using with non-precise output
                                 # if abs(y_idx - x[x_idxs...]) < 1e-5 && !maxpool_already_chose
                                 if y_idx ≈ x[input_kw, input_kh, input_kd, c, batch_idx]
-                                    dx[input_kw, input_kh, input_kd, c, batch_idx] += dy_idx * alpha #+ beta * dx[x_idxs...]
+                                    dx[input_kw, input_kh, input_kd, c, batch_idx] += dy_idx * _alpha #+ _beta * dx[x_idxs...]
                                     maxpool_already_chose = true
                                 # else
                                 #    dx[x_idxs...] = T(0) + beta*dx[x_idxs...]
                                 end
                             elseif $(name == :mean)
-                                dx[input_kw, input_kh, input_kd, c, batch_idx] += dy_idx * alpha #+ beta * dx[x_idxs...]
+                                dx[input_kw, input_kh, input_kd, c, batch_idx] += dy_idx * _alpha #+ _beta * dx[x_idxs...]
                             elseif $(name == :lpnorm)
                                 grad = x[input_kw, input_kh, input_kd, c, batch_idx]^(p-1) * y_idx^(1-p)
                                 dx[input_kw, input_kh, input_kd, c, batch_idx] += dy_idx * grad
diff --git a/src/pooling.jl b/src/pooling.jl
@@ -36,8 +36,8 @@ for (front_name, backend) in (
     # We only define 3d pooling primitives, we reshape lower down to get 1d and 2d pooling
     @eval begin
         function $(Symbol("$(front_name)!"))(
-                y::AbstractArray{T,5}, x::AbstractArray{T,5},
-                pdims::PoolDims; kwargs...) where {T}
+                y::AbstractArray{<:Any,5}, x::AbstractArray{<:Any,5},
+                pdims::PoolDims; kwargs...)
             $(Symbol("$(front_name)_$(backend)!"))(y, x, pdims; kwargs...)
         end
     end
@@ -51,9 +51,9 @@ for (front_name, backend) in (
     )
     @eval begin
         function $(Symbol("$(front_name)!"))(
-                        dx::AbstractArray{T,5}, dy::AbstractArray{T,5},
-                        y::AbstractArray{T,5}, x::AbstractArray{T,5},
-                        pdims::PoolDims; kwargs...) where {T}
+                        dx::AbstractArray{<:Any,5}, dy::AbstractArray{<:Any,5},
+                        y::AbstractArray{<:Any,5}, x::AbstractArray{<:Any,5},
+                        pdims::PoolDims; kwargs...)
             $(Symbol("$(front_name)_$(backend)!"))(dx, dy, y, x, pdims; kwargs...)
         end
     end
@@ -68,8 +68,8 @@ for front_name in (:maxpool, :meanpool, :lpnormpool)
         for N in (3, 4)
             @eval begin
                 function $(Symbol("$(front_name)$(backend)!"))(
-                                y::AbstractArray{T,$N}, x::AbstractArray{T,$N},
-                                pdims::PoolDims; kwargs...) where {T}
+                                y::AbstractArray{<:Any,$N}, x::AbstractArray{<:Any,$N},
+                                pdims::PoolDims; kwargs...)
                     $(Symbol("$(front_name)$(backend)!"))(
                         insert_singleton_spatial_dimension(y, $(5 - N)),
                         insert_singleton_spatial_dimension(x, $(5 - N)),
@@ -84,9 +84,9 @@ for front_name in (:maxpool, :meanpool, :lpnormpool)
 
                 # backprops too
                 function $(Symbol("∇$(front_name)$(backend)!"))(
-                                dx::AbstractArray{T,$N}, dy::AbstractArray{T,$N},
-                                y::AbstractArray{T,$N}, x::AbstractArray{T,$N},
-                                pdims::PoolDims; kwargs...) where {T}
+                                dx::AbstractArray{<:Any,$N}, dy::AbstractArray{<:Any,$N},
+                                y::AbstractArray{<:Any,$N}, x::AbstractArray{<:Any,$N},
+                                pdims::PoolDims; kwargs...)
                     $(Symbol("∇$(front_name)$(backend)!"))(
                         insert_singleton_spatial_dimension(dx, $(5 - N)),
                         insert_singleton_spatial_dimension(dy, $(5 - N)),
@@ -112,20 +112,20 @@ for backend in (Symbol(), :_direct, :_nnpack)
     for name in (:maxpool, :meanpool, :lpnormpool)
         @eval begin
             function $(Symbol("$(name)$(backend)"))(
-                            x::AbstractArray{xT,N},
-                            pdims::PoolDims; kwargs...) where {xT, N}
+                            x::AbstractArray{<:Any,N},
+                            pdims::PoolDims; kwargs...) where {N}
                 y = similar(x, output_size(pdims)..., channels_out(pdims), size(x, N))
-                fill!(y, xT(0))
+                fill!(y, 0)
                 return $(Symbol("$(name)$(backend)!"))(y, x, pdims; kwargs...)
             end
 
             # Backprops too
             function $(Symbol("∇$(name)$(backend)"))(
-                            dy::AbstractArray{T,N}, y::AbstractArray{T,N},
-                            x::AbstractArray{T,N}, pdims::PoolDims;
-                            kwargs...) where {T, N}
+                            dy::AbstractArray{<:Any,N}, y::AbstractArray{<:Any,N},
+                            x::AbstractArray{<:Any,N}, pdims::PoolDims;
+                            kwargs...) where {N}
                 dx = similar(x, input_size(pdims)..., channels_in(pdims), size(dy, N))
-                fill!(dx, T(0))
+                fill!(dx, 0)
                 return $(Symbol("∇$(name)$(backend)!"))(dx, dy, y, x, pdims; kwargs...)
             end
         end
diff --git a/test/Project.toml b/test/Project.toml
@@ -11,9 +11,10 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
-cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
+cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
diff --git a/test/pooling.jl b/test/pooling.jl
@@ -905,6 +905,42 @@ maxpool_answer_nature = Dict(
     -0.25,  -0.5,  -0.25], (3, 3, 1, 1))
     @test all(pool .== valid)
 
+    # issue #484
+    # Description: some in-place pooling functions only accepted arrays with the same eltype.
+    # The strict method signatures were based on assumption on the return type of `similar`.
+    # For ReverseDiff, this caused problems, e.g. with taking derivatives of pooling 
+    # operations.
+    # Now, if explicitly calling an in-place pooling functions, a different `yT` is allowed.
+    for xT in (Int32, Int64, Float16, Float32, Float64, BigFloat)
+        for (xsz, psz) in (     # test a few different data and kernel sizes
+            ((1,1), (1,1)),
+            ((1,2), (1,1)), ((1,2), (1,2)),
+            ((2,1), (1,1)), ((2,1), (2,1)),
+            ((2,2), (1,1)), ((2,2), (1,2)), ((2,2), (2,1)),
+        )
+            x = ones(xT, xsz..., 1, 1)
+            pdims = PoolDims(x, psz)
+            for yT in (Float16, Float32, Float64, BigFloat) 
+                # `yT` is the target eltype and we do not test integer types here
+                # because those cannot always store the pooling results.
+                y = similar(x, yT, NNlib.output_size(pdims)..., NNlib.channels_out(pdims), size(x, 4))
+                @test maxpool!(y, x, pdims) isa Array{yT}
+                @test meanpool!(y, x, pdims) isa Array{yT}
+                @test lpnormpool!(y, x, pdims; p=2) isa Array{yT}
+                @test lpnormpool!(y, x, pdims; p=1.0) isa Array{yT}
+            end
+        end
+    end
+    
+    # This is how to test #484 with ReverseDiff:
+    x = reshape(Float32[ 1 2; 3 4 ], (2,2,1,1))
+    @test only(maxpool(x, (2,2))) == 4
+    # define typemin, because of https://github.com/JuliaDiff/ReverseDiff.jl/issues/225
+    Base.typemin(tr::Type{<:T}) where{V, T<:RD.TrackedReal{V, <:Any, <:Any}} = T(typemin(V))
+    @test RD.gradient(_x -> only(maxpool(_x,(2,2))), x)[:,:,1,1] == [0 0; 0 1]
+    @test only(meanpool(x, (2,2))) == 2.5
+    @test all(==(0.25), RD.gradient(_x -> only(meanpool(_x,(2,2))), x))
+
     # if NNlib.is_nnpack_available()
     #     if NNlib.nnpack_supported_operation(pdims1)
     #         @test NNlib.maxpool_nnpack(x, pdims1) isa Array{Float32, 4}
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -9,6 +9,8 @@ using StableRNGs
 using Documenter
 using Adapt
 using KernelAbstractions
+import ReverseDiff as RD        # used in `pooling.jl`
+
 DocMeta.setdocmeta!(NNlib, :DocTestSetup, :(using NNlib, UnicodePlots); recursive=true)
 
 ENV["NNLIB_TEST_CUDA"] = true # uncomment to run CUDA tests