Merge pull request #31 from gdalle/dev

gdalle · web-flow · commit 26666c92d045 · 2023-03-10T16:38:00.000+01:00
Enable higher-order derivatives
diff --git a/src/implicit_function.jl b/src/implicit_function.jl
@@ -57,11 +57,10 @@ Keyword arguments are given to both `implicit.forward` and `implicit.conditions`
 function ChainRulesCore.frule(
     rc::RuleConfig, (_, dx), implicit::ImplicitFunction, x::AbstractArray{R}; kwargs...
 ) where {R<:Real}
-    forward = implicit.forward
     conditions = implicit.conditions
     linear_solver = implicit.linear_solver
 
-    y = forward(x; kwargs...)
+    y = implicit(x; kwargs...)
 
     conditions_x(x̃; kwargs...) = conditions(x̃, y; kwargs...)
     conditions_y(ỹ; kwargs...) = -conditions(x, ỹ; kwargs...)
@@ -98,11 +97,10 @@ Keyword arguments are given to both `implicit.forward` and `implicit.conditions`
 function ChainRulesCore.rrule(
     rc::RuleConfig, implicit::ImplicitFunction, x::AbstractArray{R}; kwargs...
 ) where {R<:Real}
-    forward = implicit.forward
     conditions = implicit.conditions
     linear_solver = implicit.linear_solver
 
-    y = forward(x; kwargs...)
+    y = implicit(x; kwargs...)
 
     conditions_x(x̃; kwargs...) = conditions(x̃, y; kwargs...)
     conditions_y(ỹ; kwargs...) = -conditions(x, ỹ; kwargs...)
diff --git a/test/1_unconstrained_optimization.jl b/test/1_unconstrained_optimization.jl
@@ -9,9 +9,20 @@ The optimality conditions are given by gradient stationarity:
 ```math
 F(x, \hat{y}(x)) = 0 \quad \text{with} \quad F(x,y) = \nabla_2 f(x, y) = 0
 ```
-
 =#
 
+using ChainRulesTestUtils  #src
+using ForwardDiff
+using ForwardDiffChainRules
+using ImplicitDifferentiation
+using LinearAlgebra  #src
+using Optim
+using Random
+using Test  #src
+using Zygote
+
+Random.seed!(63);
+
 # ## Implicit function wrapper
 
 #=
@@ -22,8 +33,6 @@ f(x, y) = \lVert y - x \rVert^2
 In this case, the optimization algorithm is very simple (the identity function does the job), but still we implement it using a black box solver from [Optim.jl](https://github.com/JuliaNLSolvers/Optim.jl) to show that it doesn't change the result.
 =#
 
-using Optim
-
 function dumb_identity(x::AbstractArray{Float64})
     f(y) = sum(abs2, y - x)
     y0 = zero(x)
@@ -40,72 +49,88 @@ zero_gradient(x, y) = 2(y - x);
 
 # We now have all the ingredients to construct our implicit function.
 
-using ImplicitDifferentiation
-
 implicit = ImplicitFunction(dumb_identity, zero_gradient);
 
 # Time to test!
 
-using Random
-Random.seed!(63)
-
 x = rand(3, 2)
 
 # Let's start by taking a look at the forward pass, which should be the identity function.
 
 implicit(x)
 
-# ## Autodiff with Zygote.jl
+# ## Why bother?
 
-using Zygote
+# It is important to understand why implicit differentiation is necessary here. Indeed, our optimization solver alone doesn't support autodiff with ForwardDiff.jl (due to type constraints)
+
+try
+    ForwardDiff.jacobian(dumb_identity, x)
+catch e
+    e
+end
 
-# If we use an autodiff package compatible with [ChainRules.jl](https://github.com/JuliaDiff/ChainRules.jl), such as [Zygote.jl](https://github.com/FluxML/Zygote.jl), differentiation works out of the box.
+# ... nor is it compatible with Zygote.jl (due to unsupported `try/catch` statements).
+
+try
+    Zygote.jacobian(dumb_identity, x)[1]
+catch e
+    e
+end
+
+# ## Autodiff with Zygote.jl
+
+# If we use an autodiff package compatible with [ChainRules.jl](https://github.com/JuliaDiff/ChainRules.jl), such as [Zygote.jl](https://github.com/FluxML/Zygote.jl), implicit differentiation works out of the box.
 
 Zygote.jacobian(implicit, x)[1]
 
 # As expected, we recover the identity matrix as Jacobian. Strictly speaking, the Jacobian should be a 4D tensor, but it is flattened into a 2D matrix.
 
 # ## Autodiff with ForwardDiff.jl
 
-using ForwardDiff
-
 # If we want to use [ForwardDiff.jl](https://github.com/JuliaDiff/ForwardDiff.jl) instead, we run into a problem: custom chain rules are not directly translated into dual number dispatch. Luckily, [ForwardDiffChainRules.jl](https://github.com/ThummeTo/ForwardDiffChainRules.jl) provides us with a workaround. All we need to do is to apply the following macro:
 
-using ForwardDiffChainRules
-
 @ForwardDiff_frule (f::typeof(implicit))(x::AbstractArray{<:ForwardDiff.Dual}; kwargs...)
 
 # And then things work like a charm!
 
 ForwardDiff.jacobian(implicit, x)
 
-# ## Why did we bother?
+# ## Higher order differentiation
 
-# It is important to understand that implicit differentiation was necessary here. Indeed our solver alone doesn't support autodiff with ForwardDiff.jl (due to type constraints)
+h = rand(size(x));
 
-try
-    ForwardDiff.jacobian(dumb_identity, x)
-catch e
-    e
-end
+# Assuming we need second-order derivatives, nesting calls to Zygote.jl is generally a bad idea. We can, however, nest calls to ForwardDiff.jl.
 
-# ... nor was it compatible with Zygote.jl (due to unsupported `try/catch` statements).
+D(x, h) = ForwardDiff.derivative(t -> implicit(x .+ t .* h), 0)
+DD(x, h1, h2) = ForwardDiff.derivative(t -> D(x .+ t .* h2, h1), 0);
+
+#-
 
 try
-    Zygote.jacobian(dumb_identity, x)[1]
+    DD(x, h, h)  # fails
 catch e
     e
 end
 
-# The following tests are not included in the docs.  #src
+# The only requirement is to switch to a linear solver that is compatible with dual numbers (which the default `gmres` from [Krylov.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl) is not).
 
-using ChainRulesTestUtils  #src
-using LinearAlgebra  #src
-using Test  #src
+linear_solver2(A, b) = (Matrix(A) \ b, (solved=true,))
+implicit2 = ImplicitFunction(dumb_identity, zero_gradient, linear_solver2);
+@ForwardDiff_frule (f::typeof(implicit2))(x::AbstractArray{<:ForwardDiff.Dual}; kwargs...)
+
+D2(x, h) = ForwardDiff.derivative(t -> implicit2(x .+ t .* h), 0)
+DD2(x, h1, h2) = ForwardDiff.derivative(t -> D2(x .+ t .* h2, h1), 0);
+
+#-
+
+DD2(x, h, h)
+
+# The following tests are not included in the docs.  #src
 
 @testset verbose = true "ForwardDiff.jl" begin  #src
     @test_throws MethodError ForwardDiff.jacobian(dumb_identity, x)  #src
     @test ForwardDiff.jacobian(implicit, x) == I  #src
+    @test all(DD2(x, h, h) .≈ 0)  #src
 end  #src
 
 @testset verbose = true "Zygote.jl" begin  #src
diff --git a/test/2_sparse_linear_regression.jl b/test/2_sparse_linear_regression.jl
@@ -20,6 +20,7 @@ using MathOptInterface
 using MathOptSetDistances
 using Random
 using SCS
+using Test  #src
 using Zygote
 
 Random.seed!(63);
@@ -111,8 +112,6 @@ sum(abs, J - J_ref) / prod(size(J))
 
 # The following tests are not included in the docs.  #src
 
-using Test  #src
-
 @testset verbose = true "FiniteDifferences.jl" begin  #src
     @test sum(abs, J - J_ref) / prod(size(J)) <= 1e-2  #src
 end  #src
diff --git a/test/3_optimal_transport.jl b/test/3_optimal_transport.jl
@@ -9,6 +9,7 @@ using FiniteDifferences
 using ImplicitDifferentiation
 using LinearAlgebra
 using Random
+using Test  #src
 using Zygote
 
 Random.seed!(63);
@@ -160,8 +161,6 @@ sum(abs, J2 - J_ref) / prod(size(J_ref))
 
 # The following tests are not included in the docs.  #src
 
-using Test  #src
-
 @testset verbose = true "FiniteDifferences.jl" begin  #src
     @test u1 == u2  #src
     @test all(iszero, sinkhorn_fixed_point(C, u1; a=a, b=b, ε=ε, T=T))  #src