extend to generic arrays; add cuda tests

CarloLucibello · CarloLucibello · commit 017acdf9604d · 2021-02-22T14:12:56.000+01:00
diff --git a/src/layers/basic.jl b/src/layers/basic.jl
@@ -120,14 +120,13 @@ end
 
 @functor Dense
 
-function (a::Dense)(x::AbstractArray)
+function (a::Dense)(x::Union{AbstractVector, AbstractMatrix})
   W, b, σ = a.W, a.b, a.σ
-  sz = size(x)
-  x = reshape(x, sz[1], :) # reshape to handle dims > 1 as batch dimensions 
-  x = σ.(W*x .+ b)
-  return reshape(x, :, sz[2:end]...)
+  return σ.(W*x .+ b)
 end
 
+(a::Dense)(x::AbstractArray) = reshape(a(mat(x)), :, size(x)[2:end]...)
+
 function Base.show(io::IO, l::Dense)
   print(io, "Dense(", size(l.W, 2), ", ", size(l.W, 1))
   l.σ == identity || print(io, ", ", l.σ)
@@ -326,7 +325,7 @@ function Base.show(io::IO, l::Bilinear)
 end
 
 """
-Parallel(connection, layers...)
+    Parallel(connection, layers...)
 
 Create a 'Parallel' layer that passes an input array to each path in
 `layers`, reducing the output with `connection`.
@@ -416,10 +415,9 @@ function Embedding(in::Integer, out::Integer;
   return Embedding(init(out, in))
 end
 
-(m::Embedding)(x::OneHotMatrix) = m.weight * x # equivalent to m.weight[:, onecold(x)]
-(m::Embedding)(x::OneHotVector) = m.weight * x
-(m::Embedding)(x::AbstractVector) = m.weight[:, x]
-(m::Embedding)(x::Int) = m.weight[:, x]
+(m::Embedding)(x::Union{OneHotVector, OneHotMatrix}) = m.weight * x # equivalent to m.weight[:,onecold(x)]
+(m::Embedding)(x::Union{Int,AbstractVector}) = m.weight[:, x]
+(m::Embedding)(x::AbstractArray) = reshape(m(mat(x)), :, size(x)[2:end]...)
 
 function Base.show(io::IO, m::Embedding)
   print(io, "Embedding($(size(m.weight, 2)), $(size(m.weight, 1)))")
diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl
@@ -4,7 +4,7 @@
 Reshape arbitrarly-shaped input into a matrix-shaped output,
 preserving the size of the last dimension.
 
-See also [`unsqueeze`](@ref).
+See also [`unsqueeze`](@ref) and [`mat`](@ref).
 
 # Examples
 ```jldoctest
@@ -26,6 +26,18 @@ function flatten(x::AbstractArray)
   return reshape(x, :, size(x)[end])
 end
 
+"""
+    mat(x::AbstractArray)
+
+Reshape arbitrarly-shaped input into a matrix-shaped output,
+preserving the size of the first dimension.
+
+See also [`flatten`](@ref) and [`unsqueeze`](@ref).
+"""
+function mat(x::AbstractArray)
+  return reshape(x, size(x,1), :)
+end
+
 """
     normalise(x; dims=ndims(x), ϵ=1e-5)
 
diff --git a/test/cuda/layers.jl b/test/cuda/layers.jl
@@ -218,4 +218,17 @@ end
       @test gs_cpu[pcpu] ≈ gs_gpu[pgpu]
     end
   end
+
+  @testset "Embedding" begin
+    vocab_size, embed_size = 10, 4
+    m = Embedding(vocab_size, embed_size)
+    x = rand(1:vocab_size, 3)
+    y = m(x)
+    m_g = m |> gpu
+    x_g = x |> gpu
+    y_g = m_g(x_g)
+    @test collect(y_g) == y
+    gs = gradient(() -> sum(tanh.(m(x))), params(m))
+    gs_g = gradient(() -> sum(tanh.(m_g(x_g))), params(m_g))
+    @test collect(gs_g[m_g.weight]) ≈ gs[m.weight]
 end