_match_eltype

mcabbott · mcabbott · commit 8d7b27f52530 · 2023-01-07T22:38:16.000-05:00
diff --git a/src/layers/basic.jl b/src/layers/basic.jl
@@ -169,7 +169,8 @@ end
 
 function (a::Dense)(x::AbstractVecOrMat)
   σ = NNlib.fast_act(a.σ, x)  # replaces tanh => tanh_fast, etc
-  return σ.(a.weight * x .+ a.bias)
+  xT = _match_eltype(a, x)  # fixes Float64 input, etc.
+  return σ.(a.weight * xT .+ a.bias)
 end
 
 (a::Dense)(x::AbstractArray) = 
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -197,7 +197,8 @@ ChainRulesCore.@non_differentiable conv_dims(::Any, ::Any)
 function (c::Conv)(x::AbstractArray)
   σ = NNlib.fast_act(c.σ, x)
   cdims = conv_dims(c, x)
-  σ.(conv(x, c.weight, cdims) .+ conv_reshape_bias(c))
+  xT = _match_eltype(c, x)
+  σ.(conv(xT, c.weight, cdims) .+ conv_reshape_bias(c))
 end
 
 _channels_in(l::Conv) = size(l.weight, ndims(l.weight)-1) * l.groups
@@ -330,7 +331,8 @@ ChainRulesCore.@non_differentiable conv_transpose_dims(::Any, ::Any)
 function (c::ConvTranspose)(x::AbstractArray)
   σ = NNlib.fast_act(c.σ, x)
   cdims = conv_transpose_dims(c, x)
-  σ.(∇conv_data(x, c.weight, cdims) .+ conv_reshape_bias(c))
+  xT = _match_eltype(c, x)
+  σ.(∇conv_data(xT, c.weight, cdims) .+ conv_reshape_bias(c))
 end
 
 function Base.show(io::IO, l::ConvTranspose)
@@ -468,7 +470,8 @@ ChainRulesCore.@non_differentiable crosscor_dims(::Any, ::Any)
 function (c::CrossCor)(x::AbstractArray)
   σ = NNlib.fast_act(c.σ, x)
   cdims = crosscor_dims(c, x)
-  σ.(crosscor(x, c.weight, cdims) .+ conv_reshape_bias(c))
+  xT = _match_eltype(c, x)
+  σ.(crosscor(xT, c.weight, cdims) .+ conv_reshape_bias(c))
 end
 
 function Base.show(io::IO, l::CrossCor)
diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl
@@ -200,10 +200,11 @@ end
 RNNCell((in, out)::Pair, σ=tanh; init=Flux.glorot_uniform, initb=zeros32, init_state=zeros32) =
   RNNCell(σ, init(out, in), init(out, out), initb(out), init_state(out,1))
 
-function (m::RNNCell{F,I,H,V,<:AbstractMatrix{T}})(h, x::Union{AbstractVecOrMat{T},OneHotArray}) where {F,I,H,V,T}
+function (m::RNNCell{F,I,H,V,<:AbstractMatrix{T}})(h, x::Union{AbstractVecOrMat{<:AbstractFloat},OneHotArray}) where {F,I,H,V,T}
   Wi, Wh, b = m.Wi, m.Wh, m.b
   σ = NNlib.fast_act(m.σ, x)
-  h = σ.(Wi*x .+ Wh*h .+ b)
+  xT = _match_eltype(m, T, x)
+  h = σ.(Wi*xT .+ Wh*h .+ b)
   return h, reshape_cell_output(h, x)
 end
 
@@ -305,9 +306,10 @@ function LSTMCell((in, out)::Pair;
   return cell
 end
 
-function (m::LSTMCell{I,H,V,<:NTuple{2,AbstractMatrix{T}}})((h, c), x::Union{AbstractVecOrMat{T},OneHotArray}) where {I,H,V,T}
+function (m::LSTMCell{I,H,V,<:NTuple{2,AbstractMatrix{T}}})((h, c), x::Union{AbstractVecOrMat{<:AbstractFloat},OneHotArray}) where {I,H,V,T}
   b, o = m.b, size(h, 1)
-  g = muladd(m.Wi, x, muladd(m.Wh, h, b))
+  xT = _match_eltype(m, T, x)
+  g = muladd(m.Wi, xT, muladd(m.Wh, h, b))
   input, forget, cell, output = multigate(g, o, Val(4))
   c′ = @. sigmoid_fast(forget) * c + sigmoid_fast(input) * tanh_fast(cell)
   h′ = @. sigmoid_fast(output) * tanh_fast(c′)
@@ -376,9 +378,10 @@ end
 GRUCell((in, out)::Pair; init = glorot_uniform, initb = zeros32, init_state = zeros32) =
   GRUCell(init(out * 3, in), init(out * 3, out), initb(out * 3), init_state(out,1))
 
-function (m::GRUCell{I,H,V,<:AbstractMatrix{T}})(h, x::Union{AbstractVecOrMat{T},OneHotArray}) where {I,H,V,T}
+function (m::GRUCell{I,H,V,<:AbstractMatrix{T}})(h, x::Union{AbstractVecOrMat{<:AbstractFloat},OneHotArray}) where {I,H,V,T}
   Wi, Wh, b, o = m.Wi, m.Wh, m.b, size(h, 1)
-  gxs, ghs, bs = multigate(Wi*x, o, Val(3)), multigate(Wh*h, o, Val(3)), multigate(b, o, Val(3))
+  xT = _match_eltype(m, T, x)
+  gxs, ghs, bs = multigate(Wi*xT, o, Val(3)), multigate(Wh*h, o, Val(3)), multigate(b, o, Val(3))
   r, z = _gru_output(gxs, ghs, bs)
   h̃ = @. tanh_fast(gxs[3] + r * ghs[3] + bs[3])
   h′ = @. (1 - z) * h̃ + z * h
@@ -444,9 +447,10 @@ GRUv3Cell((in, out)::Pair; init = glorot_uniform, initb = zeros32, init_state =
   GRUv3Cell(init(out * 3, in), init(out * 2, out), initb(out * 3),
             init(out, out), init_state(out,1))
 
-function (m::GRUv3Cell{I,H,V,HH,<:AbstractMatrix{T}})(h, x::Union{AbstractVecOrMat{T},OneHotArray}) where {I,H,V,HH,T}
+function (m::GRUv3Cell{I,H,V,HH,<:AbstractMatrix{T}})(h, x::Union{AbstractVecOrMat{<:AbstractFloat},OneHotArray}) where {I,H,V,HH,T}
   Wi, Wh, b, Wh_h̃, o = m.Wi, m.Wh, m.b, m.Wh_h̃, size(h, 1)
-  gxs, ghs, bs = multigate(Wi*x, o, Val(3)), multigate(Wh*h, o, Val(2)), multigate(b, o, Val(3))
+  xT = _match_eltype(m, T, x)
+  gxs, ghs, bs = multigate(Wi*xT, o, Val(3)), multigate(Wh*h, o, Val(2)), multigate(b, o, Val(3))
   r, z = _gru_output(gxs, ghs, bs)
   h̃ = tanh_fast.(gxs[3] .+ (Wh_h̃ * (r .* h)) .+ bs[3])
   h′ = @. (1 - z) * h̃ + z * h
diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl
@@ -57,3 +57,47 @@ true
   σ = std(x, dims=dims, mean=μ, corrected=false)
   return @. (x - μ) / (σ + ϵ)
 end
+
+"""
+    _match_eltype(layer, ::Type{T}, x)
+    _match_eltype(layer, x)
+
+This internal function corrects most layer input to match the type of the weights.
+The second method uses `T = eltype(layer.weight)`.
+
+It solves a common performance bug: Before, accidentally supplying `Float64` input,
+or an activation function which produces `Float64`, would silently run the
+entire forward pass in this precision.
+"""
+_match_eltype(layer, ::Type{T}, x::AbstractArray{T}) where {T} = x
+
+# A common mistake, print a friendly warning, and fix it:
+function _match_eltype(layer, ::Type{Float32}, x::AbstractArray{Float64})
+  # This warning is the only reason this needs to take the layer.
+  @warn "Layer with Float32 parameters got Float64 input.
+  The input will be converted, but any earlier layers may be very slow." layer summary(x) maxlog=1
+  convert(AbstractArray{Float32}, x)
+end
+
+# Allow OneHot to reach specialisation of * etc:
+_match_eltype(layer, ::Type, x::OneHotLike) = x
+
+# Other floats, and integers, silently fix.
+function _match_eltype(layer, ::Type{T}, x::AbstractArray{<:Union{AbstractFloat, Integer}}) where {T}
+  convert(AbstractArray{T}, x)
+end
+
+# Weird types like Nil, Dual, etc, we allow through:
+_match_eltype(layer, ::Type, x::AbstractArray) = x
+
+# 2-arg method, for common layers with layer.weight
+_match_eltype(layer, x) = _match_eltype(layer, eltype(layer.weight), x)
+
+# Trivial rule:
+function ChainRulesCore.rrule(::typeof(_match_eltype), layer, ::Type{T}, x::AbstractArray) where {T}
+  _match_eltype(layer, T, x), dx -> (NoTangent(), ZeroTangent(), NoTangent(), dx)  # does not un-thunk dx
+end
+function ChainRulesCore.rrule(::typeof(_match_eltype), layer, x::AbstractArray)
+  _match_eltype(layer, x), dx -> (ZeroTangent(), NoTangent(), dx)  # does not un-thunk dx
+end
+
diff --git a/src/outputsize.jl b/src/outputsize.jl
@@ -173,6 +173,16 @@ for (fn, Dims) in ((:conv, DenseConvDims),)
   end
 end
 
+# Recurrent layers: just convert to the type they like & convert back.
+
+for Cell in [:RNNCell, :LSTMCell, :GRUCell, :GRUv3Cell]
+  @eval function (m::Recur{<:$Cell})(x::AbstractArray{Nil})
+    xT = fill!(similar(m.cell.Wi, size(x)), 0)
+    _, y = m.cell(m.state, xT)  # discard the new state
+    return similar(x, size(y))
+  end
+end
+
 
 """
     @autosize (size...,) Chain(Layer(_ => 2), Layer(_), ...)
@@ -229,7 +239,6 @@ Limitations:
 * While `@autosize (5, 32) Flux.Bilinear(_ => 7)` is OK, something like `Bilinear((_, _) => 7)` will fail.
 * While `Scale(_)` and `LayerNorm(_)` are fine (and use the first dimension), `Scale(_,_)` and `LayerNorm(_,_)`
   will fail if `size(x,1) != size(x,2)`.
-* RNNs won't work: `@autosize (7, 11) LSTM(_ => 5)` fails, because `outputsize(RNN(3=>7), (3,))` also fails, a known issue.
 """
 macro autosize(size, model)
   Meta.isexpr(size, :tuple) || error("@autosize's first argument must be a tuple, the size of the input")
diff --git a/test/layers/basic.jl b/test/layers/basic.jl
@@ -89,6 +89,23 @@ import Flux: activations
       @test Dense(10, 2, identity, init = ones)([ones(10,1) 2*ones(10,1)]) == [10 20; 10 20]
       @test Dense(10, 2, identity, init = ones, bias = false)([ones(10,1) 2*ones(10,1)]) == [10 20; 10 20]
     end
+    @testset "type matching" begin
+       d1 = Dense(2 => 3)
+       d2 = Dense(d1.weight, false)
+       x1 = randn(Float32, 2, 4)
+       @test d1(x1) ≈ d2(x1) ≈ d1.weight * x1
+       x2 = Float64.(x1)
+       @test d1(x2) ≈ d2(x2) ≈ d1.weight * x2
+       @test d1(x2) isa Array{Float32}  # tests _match_eltype, will print a warning
+       @test d2(x2) isa Array{Float32}
+
+       x3 = rand(-5:5, 2, 4)
+       @test d1(x3) ≈ d2(x3) ≈ d1.weight * x3
+       x4 = rand(Bool, 2, 4)
+       @test d1(x4) ≈ d2(x4) ≈ d1.weight * x4
+       x5 = Flux.onehotbatch(rand(Bool, 5), (true, false))
+       @test d1(x5) ≈ d2(x5) ≈ d1.weight * x5
+     end
   end
 
   @testset "Scale" begin
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
@@ -286,3 +286,17 @@ end
   end
   @test_throws DimensionMismatch fun(rand(2,3,4), rand(6))
 end
+
+@testset "type matching" begin
+  x = rand(Float64, 10,2,5)
+  xi = rand(-3:3, 10,2,5)
+  c1 = Conv((3,), 2=>4, relu)
+  @test @inferred(c1(x)) isa Array{Float32, 3}
+  @test c1(xi) isa Array{Float32, 3}
+
+  c2 = CrossCor((3,), 2=>1, relu)
+  @test @inferred(c2(x)) isa Array{Float32, 3}
+
+  c3 = ConvTranspose((3,), 2=>4, relu)
+  @test @inferred(c3(x)) isa Array{Float32, 3}
+end
diff --git a/test/layers/recurrent.jl b/test/layers/recurrent.jl
@@ -169,3 +169,27 @@ end
     @test size(m(x3)) == (5, 1, 2)
   end
 end
+
+@testset "type matching" begin
+  x = rand(Float64, 2, 4)
+  m1 = RNN(2=>3)
+  @test m1(x) isa Matrix{Float32}  # uses _match_eltype, may print a warning
+  @test m1.state isa Matrix{Float32}
+  @test (@inferred m1(x); true)
+  @test Flux.outputsize(m1, size(x)) == size(m1(x))
+
+  m2 = LSTM(2=>3)
+  @test m2(x) isa Matrix{Float32}
+  @test (@inferred m2(x); true)
+  @test Flux.outputsize(m2, size(x)) == size(m2(x))
+
+  m3 = GRU(2=>3)
+  @test m3(x) isa Matrix{Float32}
+  @test (@inferred m3(x); true)
+  @test Flux.outputsize(m3, size(x)) == size(m3(x))
+
+  m4 = GRUv3(2=>3)
+  @test m4(x) isa Matrix{Float32}
+  @test (@inferred m4(x); true)
+  @test Flux.outputsize(m4, size(x)) == size(m4(x))
+end
diff --git a/test/outputsize.jl b/test/outputsize.jl
@@ -257,3 +257,10 @@ end
   # Can't let |> gpu act before the arrays are materialized... so it's an error: 
   @test_throws ErrorException @eval @autosize (1,2,3) Dense(_=>2) |> f64
 end
+
+@testset "type matching" begin
+  # Check that _match_eltype doesn't replace this with an array of Float32:
+  @test Flux._match_eltype(Dense(2=>3), fill(Flux.Nil(),2,4)) isa Matrix{Flux.Nil}
+  # For RNN etc there's a special path:
+  @test RNN(2=>3)(fill(Flux.Nil(),2,4)) isa Matrix{Flux.Nil}
+end