extend to generic arrays; add cuda tests

CarloLucibello · CarloLucibello · commit 9ce73473de23 · 2021-07-09T18:02:53.000+02:00
diff --git a/src/layers/basic.jl b/src/layers/basic.jl
@@ -143,13 +143,22 @@ end
 
 @functor Dense
 
+<<<<<<< HEAD
 function (a::Dense)(x::AbstractVecOrMat)
   W, b, σ = a.weight, a.bias, a.σ
   return σ.(W*x .+ b)
 end
 
 (a::Dense)(x::AbstractArray) = 
   reshape(a(reshape(x, size(x,1), :)), :, size(x)[2:end]...)
+=======
+function (a::Dense)(x::Union{AbstractVector, AbstractMatrix})
+  W, b, σ = a.W, a.b, a.σ
+  return σ.(W*x .+ b)
+end
+
+(a::Dense)(x::AbstractArray) = reshape(a(mat(x)), :, size(x)[2:end]...)
+>>>>>>> 017acdf9 (extend to generic arrays; add cuda tests)
 
 function Base.show(io::IO, l::Dense)
   print(io, "Dense(", size(l.weight, 2), ", ", size(l.weight, 1))
@@ -467,10 +476,9 @@ function Embedding(in::Integer, out::Integer;
   return Embedding(init(out, in))
 end
 
-(m::Embedding)(x::OneHotMatrix) = m.weight * x # equivalent to m.weight[:, onecold(x)]
-(m::Embedding)(x::OneHotVector) = m.weight * x
-(m::Embedding)(x::AbstractVector) = m.weight[:, x]
-(m::Embedding)(x::Int) = m.weight[:, x]
+(m::Embedding)(x::Union{OneHotVector, OneHotMatrix}) = m.weight * x # equivalent to m.weight[:,onecold(x)]
+(m::Embedding)(x::Union{Int,AbstractVector}) = m.weight[:, x]
+(m::Embedding)(x::AbstractArray) = reshape(m(mat(x)), :, size(x)[2:end]...)
 
 function Base.show(io::IO, m::Embedding)
   print(io, "Embedding($(size(m.weight, 2)), $(size(m.weight, 1)))")
diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl
@@ -4,7 +4,7 @@
 Reshape arbitrarly-shaped input into a matrix-shaped output,
 preserving the size of the last dimension.
 
-See also [`unsqueeze`](@ref).
+See also [`unsqueeze`](@ref) and [`mat`](@ref).
 
 # Examples
 ```jldoctest
@@ -26,6 +26,18 @@ function flatten(x::AbstractArray)
   return reshape(x, :, size(x)[end])
 end
 
+"""
+    mat(x::AbstractArray)
+
+Reshape arbitrarly-shaped input into a matrix-shaped output,
+preserving the size of the first dimension.
+
+See also [`flatten`](@ref) and [`unsqueeze`](@ref).
+"""
+function mat(x::AbstractArray)
+  return reshape(x, size(x,1), :)
+end
+
 """
     normalise(x; dims=ndims(x), ϵ=1e-5)
 
diff --git a/test/cuda/layers.jl b/test/cuda/layers.jl
@@ -258,4 +258,17 @@ end
       @test gs_cpu[pcpu] ≈ gs_gpu[pgpu]
     end
   end
+
+  @testset "Embedding" begin
+    vocab_size, embed_size = 10, 4
+    m = Embedding(vocab_size, embed_size)
+    x = rand(1:vocab_size, 3)
+    y = m(x)
+    m_g = m |> gpu
+    x_g = x |> gpu
+    y_g = m_g(x_g)
+    @test collect(y_g) == y
+    gs = gradient(() -> sum(tanh.(m(x))), params(m))
+    gs_g = gradient(() -> sum(tanh.(m_g(x_g))), params(m_g))
+    @test collect(gs_g[m_g.weight]) ≈ gs[m.weight]
 end