Relax type constraints to allow complex jvp (#43)

simeonschaub · wesselb · commit 87f44c8a8117 · 2019-09-02T20:44:22.000+01:00
* relax type constraints to allow complex jvp

* fix docstrings, allow to_vec(::Vector{&lt;:Number})

* another round of fixing docstrings

* add tests

* tests for to_vec(::Complex), correct to_vec for Adjoint

* delete stray comment

* use `@testset "bla" for ...`

* remove accidental indents
diff --git a/src/grad.jl b/src/grad.jl
@@ -1,11 +1,11 @@
 export grad, jacobian, jvp, j′vp, to_vec
 
 """
-    grad(fdm, f, x::AbstractVector)
+    grad(fdm, f, x::Vector{<:Number})
 
 Approximate the gradient of `f` at `x` using `fdm`. Assumes that `f(x)` is scalar.
 """
-function grad(fdm, f, x::Vector{T}) where T<:Real
+function grad(fdm, f, x::Vector{T}) where T<:Number
     v, dx, tmp = fill(zero(T), size(x)), similar(x), similar(x)
     for n in eachindex(x)
         v[n] = one(T)
@@ -21,34 +21,34 @@ function grad(fdm, f, x::Vector{T}) where T<:Real
 end
 
 """
-    jacobian(fdm, f, x::AbstractVector{<:Real}, D::Int)
-    jacobian(fdm, f, x::AbstractVector{<:Real})
+    jacobian(fdm, f, x::Vector{<:Number}, D::Int)
+    jacobian(fdm, f, x::Vector{<:Number})
 
 Approximate the Jacobian of `f` at `x` using `fdm`. `f(x)` must be a length `D` vector. If
 `D` is not provided, then `f(x)` is computed once to determine the output size.
 """
-function jacobian(fdm, f, x::Vector{T}, D::Int) where {T<:Real}
+function jacobian(fdm, f, x::Vector{T}, D::Int) where {T<:Number}
     J = Matrix{T}(undef, D, length(x))
     for d in 1:D
         J[d, :] = grad(fdm, x->f(x)[d], x)
     end
     return J
 end
-jacobian(fdm, f, x::Vector{<:Real}) = jacobian(fdm, f, x, length(f(x)))
+jacobian(fdm, f, x::Vector{<:Number}) = jacobian(fdm, f, x, length(f(x)))
 
 """
-    _jvp(fdm, f, x::Vector{<:Real}, ẋ::AbstractVector{<:Real})
+    _jvp(fdm, f, x::Vector{<:Number}, ẋ::AbstractVector{<:Number})
 
 Convenience function to compute `jacobian(f, x) * ẋ`.
 """
-_jvp(fdm, f, x::Vector{<:Real}, ẋ::AV{<:Real}) = fdm(ε -> f(x .+ ε .* ẋ), zero(eltype(x)))
+_jvp(fdm, f, x::Vector{<:Number}, ẋ::AV{<:Number}) = fdm(ε -> f(x .+ ε .* ẋ), zero(eltype(x)))
 
 """
-    _j′vp(fdm, f, ȳ::AbstractVector{<:Real}, x::Vector{<:Real})
+    _j′vp(fdm, f, ȳ::AbstractVector{<:Number}, x::Vector{<:Number})
 
-Convenience function to compute `jacobian(f, x)' * ȳ`.
+Convenience function to compute `transpose(jacobian(f, x)) * ȳ`.
 """
-_j′vp(fdm, f, ȳ::AV{<:Real}, x::Vector{<:Real}) = jacobian(fdm, f, x, length(ȳ))' * ȳ
+_j′vp(fdm, f, ȳ::AV{<:Number}, x::Vector{<:Number}) = transpose(jacobian(fdm, f, x, length(ȳ))) * ȳ
 
 """
     jvp(fdm, f, x, ẋ)
@@ -83,10 +83,10 @@ j′vp(fdm, f, ȳ, xs...) = j′vp(fdm, xs->f(xs...), ȳ, xs)
 
 Transform `x` into a `Vector`, and return a closure which inverts the transformation.
 """
-to_vec(x::Real) = ([x], first)
+to_vec(x::Number) = ([x], first)
 
 # Vectors
-to_vec(x::Vector{<:Real}) = (x, identity)
+to_vec(x::Vector{<:Number}) = (x, identity)
 function to_vec(x::Vector)
     x_vecs_and_backs = map(to_vec, x)
     x_vecs, backs = first.(x_vecs_and_backs), last.(x_vecs_and_backs)
@@ -97,7 +97,7 @@ function to_vec(x::Vector)
 end
 
 # Arrays
-to_vec(x::Array{<:Real}) = vec(x), x_vec->reshape(x_vec, size(x))
+to_vec(x::Array{<:Number}) = vec(x), x_vec->reshape(x_vec, size(x))
 function to_vec(x::Array)
     x_vec, back = to_vec(reshape(x, :))
     return x_vec, x_vec->reshape(back(x_vec), size(x))
@@ -111,9 +111,11 @@ end
 to_vec(x::Symmetric) = vec(Matrix(x)), x_vec->Symmetric(reshape(x_vec, size(x)))
 to_vec(X::Diagonal) = vec(Matrix(X)), x_vec->Diagonal(reshape(x_vec, size(X)...))
 
-function to_vec(X::T) where T<:Union{Adjoint,Transpose}
-    U = T.name.wrapper
-    return vec(Matrix(X)), x_vec->U(permutedims(reshape(x_vec, size(X))))
+function to_vec(X::Transpose)
+    return vec(Matrix(X)), x_vec->Transpose(permutedims(reshape(x_vec, size(X))))
+end
+function to_vec(X::Adjoint)
+    return vec(Matrix(X)), x_vec->Adjoint(conj!(permutedims(reshape(x_vec, size(X)))))
 end
 
 # Non-array data structures
diff --git a/test/grad.jl b/test/grad.jl
@@ -16,9 +16,9 @@ Base.length(x::DummyType) = size(x.X, 1)
 
 @testset "grad" begin
 
-    @testset "grad" begin
+    @testset "grad(::$T)" for T in (Float64, ComplexF64)
         rng, fdm = MersenneTwister(123456), central_fdm(5, 1)
-        x = randn(rng, 2)
+        x = randn(rng, T, 2)
         xc = copy(x)
         @test grad(fdm, x->sin(x[1]) + cos(x[2]), x) ≈ [cos(x[1]), -sin(x[2])]
         @test xc == x
@@ -29,13 +29,13 @@ Base.length(x::DummyType) = size(x.X, 1)
         @test jacobian(fdm, f, x, length(ȳ)) ≈ J_exact
         @test jacobian(fdm, f, x) == jacobian(fdm, f, x, length(ȳ))
         @test _jvp(fdm, f, x, ẋ) ≈ J_exact * ẋ
-        @test _j′vp(fdm, f, ȳ, x) ≈ J_exact' * ȳ
+        @test _j′vp(fdm, f, ȳ, x) ≈ transpose(J_exact) * ȳ
         @test xc == x
     end
 
-    @testset "jacobian / _jvp / _j′vp" begin
+    @testset "jacobian / _jvp / _j′vp (::$T)" for T in (Float64, ComplexF64)
         rng, P, Q, fdm = MersenneTwister(123456), 3, 2, central_fdm(5, 1)
-        ȳ, A, x, ẋ = randn(rng, P), randn(rng, P, Q), randn(rng, Q), randn(rng, Q)
+        ȳ, A, x, ẋ = randn(rng, T, P), randn(rng, T, P, Q), randn(rng, T, Q), randn(rng, T, Q)
         Ac = copy(A)
 
         check_jac_and_jvp_and_j′vp(fdm, x->A * x, ȳ, x, ẋ, A)
@@ -51,45 +51,54 @@ Base.length(x::DummyType) = size(x.X, 1)
         return nothing
     end
 
-    @testset "to_vec" begin
-        test_to_vec(1.0)
-        test_to_vec(1)
-        test_to_vec(randn(3))
-        test_to_vec(randn(5, 11))
-        test_to_vec(randn(13, 17, 19))
-        test_to_vec(randn(13, 0, 19))
-        test_to_vec([1.0, randn(2), randn(1), 2.0])
-        test_to_vec([randn(5, 4, 3), (5, 4, 3), 2.0])
-        test_to_vec(reshape([1.0, randn(5, 4, 3), randn(4, 3), 2.0], 2, 2))
-        test_to_vec(UpperTriangular(randn(13, 13)))
-        test_to_vec(Symmetric(randn(11, 11)))
-        test_to_vec(Diagonal(randn(7)))
-        test_to_vec(DummyType(randn(2, 9)))
-
-        @testset "$T" for T in (Adjoint, Transpose)
-            test_to_vec(T(randn(4, 4)))
-            test_to_vec(T(randn(6)))
-            test_to_vec(T(randn(2, 5)))
+    @testset "to_vec(::$T)" for T in (Float64, ComplexF64)
+        if T == Float64
+            test_to_vec(1.0)
+            test_to_vec(1)
+        else
+            test_to_vec(.7 + .8im)
+            test_to_vec(1 + 2im)
         end
-
+        test_to_vec(randn(T, 3))
+        test_to_vec(randn(T, 5, 11))
+        test_to_vec(randn(T, 13, 17, 19))
+        test_to_vec(randn(T, 13, 0, 19))
+        test_to_vec([1.0, randn(T, 2), randn(T, 1), 2.0])
+        test_to_vec([randn(T, 5, 4, 3), (5, 4, 3), 2.0])
+        test_to_vec(reshape([1.0, randn(T, 5, 4, 3), randn(T, 4, 3), 2.0], 2, 2))
+        test_to_vec(UpperTriangular(randn(T, 13, 13)))
+        test_to_vec(Symmetric(randn(T, 11, 11)))
+        test_to_vec(Diagonal(randn(T, 7)))
+        test_to_vec(DummyType(randn(T, 2, 9)))
+    
+        @testset "$Op" for Op in (Adjoint, Transpose)
+            test_to_vec(Op(randn(T, 4, 4)))
+            test_to_vec(Op(randn(T, 6)))
+            test_to_vec(Op(randn(T, 2, 5)))
+        end
+    
         @testset "Tuples" begin
             test_to_vec((5, 4))
-            test_to_vec((5, randn(5)))
-            test_to_vec((randn(4), randn(4, 3, 2), 1))
-            test_to_vec((5, randn(4, 3, 2), UpperTriangular(randn(4, 4)), 2.5))
-            test_to_vec(((6, 5), 3, randn(3, 2, 0, 1)))
-            test_to_vec((DummyType(randn(2, 7)), DummyType(randn(3, 9))))
-            test_to_vec((DummyType(randn(3, 2)), randn(11, 8)))
+            test_to_vec((5, randn(T, 5)))
+            test_to_vec((randn(T, 4), randn(T, 4, 3, 2), 1))
+            test_to_vec((5, randn(T, 4, 3, 2), UpperTriangular(randn(T, 4, 4)), 2.5))
+            test_to_vec(((6, 5), 3, randn(T, 3, 2, 0, 1)))
+            test_to_vec((DummyType(randn(T, 2, 7)), DummyType(randn(T, 3, 9))))
+            test_to_vec((DummyType(randn(T, 3, 2)), randn(T, 11, 8)))
         end
         @testset "Dictionary" begin
-            test_to_vec(Dict(:a=>5, :b=>randn(10, 11), :c=>(5, 4, 3)))
+            if T == Float64
+                test_to_vec(Dict(:a=>5, :b=>randn(10, 11), :c=>(5, 4, 3)))
+            else
+                test_to_vec(Dict(:a=>3 + 2im, :b=>randn(T, 10, 11), :c=>(5+im, 2-im, 1+im)))
+            end
         end
     end
 
-    @testset "jvp" begin
+    @testset "jvp(::$T)" for T in (Float64, ComplexF64)
         rng, N, M, fdm = MersenneTwister(123456), 2, 3, central_fdm(5, 1)
-        x, y = randn(rng, N), randn(rng, M)
-        ẋ, ẏ = randn(rng, N), randn(rng, M)
+        x, y = randn(rng, T, N), randn(rng, T, M)
+        ẋ, ẏ = randn(rng, T, N), randn(rng, T, M)
         xy, ẋẏ = vcat(x, y), vcat(ẋ, ẏ)
         ż_manual = _jvp(fdm, (xy)->sum(sin, xy), xy, ẋẏ)[1]
         ż_auto = jvp(fdm, x->sum(sin, x[1]) + sum(sin, x[2]), ((x, y), (ẋ, ẏ)))
@@ -98,10 +107,10 @@ Base.length(x::DummyType) = size(x.X, 1)
         @test ż_manual ≈ ż_multi
     end
 
-    @testset "j′vp" begin
+    @testset "j′vp(::$T)" for T in (Float64, ComplexF64)
         rng, N, M, fdm = MersenneTwister(123456), 2, 3, central_fdm(5, 1)
-        x, y = randn(rng, N), randn(rng, M)
-        z̄ = randn(rng, N + M)
+        x, y = randn(rng, T, N), randn(rng, T, M)
+        z̄ = randn(rng, T, N + M)
         xy = vcat(x, y)
         x̄ȳ_manual = j′vp(fdm, xy->sin.(xy), z̄, xy)
         x̄ȳ_auto = j′vp(fdm, x->sin.(vcat(x[1], x[2])), z̄, (x, y))