JuliaManifolds
diff --git a/‎.github/workflows/ci.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/ci.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎Project.toml
Lines changed: 5 additions & 3 deletions b/‎Project.toml
Lines changed: 5 additions & 3 deletions
diff --git a/‎docs/make.jl
Lines changed: 2 additions & 1 deletion b/‎docs/make.jl
Lines changed: 2 additions & 1 deletion
diff --git a/‎docs/src/solvers/ChambollePock.md
Lines changed: 1 addition & 0 deletions b/‎docs/src/solvers/ChambollePock.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/src/solvers/gradient_descent.md
Lines changed: 2 additions & 2 deletions b/‎docs/src/solvers/gradient_descent.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/src/solvers/quasi_Newton.md
Lines changed: 115 additions & 0 deletions b/‎docs/src/solvers/quasi_Newton.md
Lines changed: 115 additions & 0 deletions
diff --git a/‎examples/Quasi_Newton/Brockett_Benchmark.jl
Lines changed: 57 additions & 0 deletions b/‎examples/Quasi_Newton/Brockett_Benchmark.jl
Lines changed: 57 additions & 0 deletions
diff --git a/‎examples/Quasi_Newton/Joint_Diagonalization_Problem.jl
Lines changed: 36 additions & 0 deletions b/‎examples/Quasi_Newton/Joint_Diagonalization_Problem.jl
Lines changed: 36 additions & 0 deletions
diff --git a/‎examples/Quasi_Newton/Positive_Definite_Karcher_Mean.jl
Lines changed: 26 additions & 0 deletions b/‎examples/Quasi_Newton/Positive_Definite_Karcher_Mean.jl
Lines changed: 26 additions & 0 deletions
diff --git a/‎examples/Quasi_Newton/Rayleigh_Quotient_Benchmark.jl
Lines changed: 30 additions & 0 deletions b/‎examples/Quasi_Newton/Rayleigh_Quotient_Benchmark.jl
Lines changed: 30 additions & 0 deletions
diff --git a/‎examples/Quasi_Newton/Thomson_Problem.jl
Lines changed: 48 additions & 0 deletions b/‎examples/Quasi_Newton/Thomson_Problem.jl
Lines changed: 48 additions & 0 deletions
@@ -11,7 +11,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        julia-version: [1.3, 1.5]
+        julia-version: [1.4, 1.5]
         os: [ubuntu-latest, macOS-latest, windows-latest]
     steps:
       - uses: actions/checkout@v2
 
@@ -13,3 +13,4 @@ examples/**/*.mp4
 examples/**/*.jld2
 docs/src/tutorials/*.md
 .vscode
+Manifest.toml
@@ -1,12 +1,13 @@
 name = "Manopt"
 uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5"
 authors = ["Ronny Bergmann <manopt@ronnybergmann.net>"]
-version = "0.2.14"
+version = "0.2.15"
 
 [deps]
 ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
 ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
 Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
+DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Manifolds = "1cead3c2-87b3-11e9-0ccd-23c62b72b94e"
@@ -22,10 +23,11 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 ColorSchemes = "3.5.0"
 ColorTypes = "0.9.1, 0.10"
 Colors = "0.11.2, 0.12"
-Manifolds = "0.4.11"
+DataStructures = "0.17, 0.18"
+Manifolds = "0.4.14"
 ManifoldsBase = "0.10.0"
 StaticArrays = "0.12, 1.0"
-julia = "1.3"
+julia = "1.4"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
@@ -44,11 +44,12 @@ makedocs(;
             "Gradient Descent" => "solvers/gradient_descent.md",
             "Nelder–Mead" => "solvers/NelderMead.md",
             "Particle Swarm Optimization" => "solvers/particle_swarm.md",
+            "Quasi-Newton" => "solvers/quasi_Newton.md",
             "Stochastic Gradient Descent" => "solvers/stochastic_gradient_descent.md",
             "Subgradient method" => "solvers/subgradient.md",
             "Steihaug-Toint TCG Method" =>
                 "solvers/truncated_conjugate_gradient_descent.md",
-            "Riemannian Trust-Regions Solver" => "solvers/trust_regions.md",
+            "Trust-Regions Solver" => "solvers/trust_regions.md",
         ],
         "Functions" => [
             "Introduction" => "functions/index.md",
 
@@ -56,6 +56,7 @@ CurrentModule = Manopt
 
 ```@docs
 ChambollePock
+ChambollePock!
 ```
 
 ## Problem & Options
 
@@ -18,11 +18,11 @@ GradientDescentOptions
 
 ## Direction Update Rules
 
-A field of the options is the `direction`, a [`DirectionUpdateRule`](@ref), which by default [`Gradient`](@ref) just evaluates the gradient but can be enhanced for example to
+A field of the options is the `direction`, a [`DirectionUpdateRule`](@ref), which by default [`IdentityUpdateRule`](@ref) just evaluates the gradient but can be enhanced for example to
 
 ```@docs
 DirectionUpdateRule
-Gradient
+IdentityUpdateRule
 MomentumGradient
 AverageGradient
 Nesterov
 
@@ -0,0 +1,115 @@
+# [Riemannian quasi-Newton methods](@id quasiNewton)
+
+```@meta
+    CurrentModule = Manopt
+```
+
+```@docs
+    quasi_Newton
+    quasi_Newton!
+```
+
+## Background
+
+The aim is to minimize a real-valued function on a Riemannian manifold, i.e.
+
+```math
+\min f(x), \quad x \in \mathcal{M}.
+```
+
+Riemannian quasi-Newtonian methods are as generalizations of their Euclidean counterparts Riemannian line search methods. These methods determine a search direction ``η_k ∈ T_{x_k} \mathcal{M}`` at the current iterate ``x_k`` and a suitable stepsize ``α_k`` along ``\gamma(α) = R_{x_k}(α η_k)``, where ``R \colon T \mathcal{M} \to \mathcal{M}`` is a retraction. The next iterate is obtained by
+
+```math
+x_{k+1} = R_{x_k}(α_k η_k).
+```
+
+In quasi-Newton methods, the search direction is given by
+
+```math
+η_k = -{\mathcal{H}_k}^{-1}[∇ f (x_k)] = -\mathcal{B}_k [∇f (x_k)],
+```
+
+where ``\mathcal{H}_k \colon T_{x_k} \mathcal{M} \to T_{x_k} \mathcal{M}`` is a positive definite self-adjoint operator, which approximates the action of the Hessian ``\operatorname{Hess} f (x_k)[\cdot]`` and ``\mathcal{B}_k = {\mathcal{H}_k}^{-1}``. The idea of quasi-Newton methods is instead of creating a complete new approximation of the Hessian operator ``\operatorname{Hess} f(x_{k+1})`` or its inverse at every iteration, the previous operator ``\mathcal{H}_k`` or ``\mathcal{B}_k`` is updated by a convenient formula using the obtained information about the curvature of the objective function during the iteration. The resulting operator ``\mathcal{H}_{k+1}`` or ``\mathcal{B}_{k+1}`` acts on the tangent space ``T_{x_{k+1}} \mathcal{M}`` of the freshly computed iterate ``x_{k+1}``.
+In order to get a well-defined method, the following requirements are placed on the new operator ``\mathcal{H}_{k+1}`` or ``\mathcal{B}_{k+1}`` that is created by an update. Since the Hessian ``\operatorname{Hess} f(x_{k+1})`` is a self-adjoint operator on the tangent space ``T_{x_{k+1}} \mathcal{M}``, and ``\mathcal{H}_{k+1}`` approximates it, we require that ``\mathcal{H}_{k+1}`` or ``\mathcal{B}_{k+1}`` is also self-adjoint on ``T_{x_{k+1}} \mathcal{M}``. In order to achieve a steady descent, we want ``η_k`` to be a descent direction in each iteration. Therefore we require, that ``\mathcal{H}_{k+1}`` or ``\mathcal{B}_{k+1}`` is a positive definite operator on ``T_{x_{k+1}} \mathcal{M}``. In order to get information about the cruvature of the objective function into the new operator ``\mathcal{H}_{k+1}`` or ``\mathcal{B}_{k+1}``, we require that it satisfies a form of a Riemannian quasi-Newton equation:
+
+```math
+\mathcal{H}_{k+1} [T_{x_k \rightarrow x_{k+1}}({R_{x_k}}^{-1}(x_{k+1}))] = ∇f(x_{k+1}) - T_{x_k \rightarrow x_{k+1}}(∇f(x_k))
+```
+
+or
+
+```math
+\mathcal{B}_{k+1} [∇f(x_{k+1}) - T_{x_k \rightarrow x_{k+1}}(∇f(x_k))] = T_{x_k \rightarrow x_{k+1}}({R_{x_k}}^{-1}(x_{k+1}))
+```
+
+where ``T_{x_k \rightarrow x_{k+1}} \colon T_{x_k} \mathcal{M} \to T_{x_{k+1}} \mathcal{M}`` and the chosen retraction ``R`` is the associated retraction of ``T``. We note that, of course, not all updates in all situations will meet these conditions in every iteration.
+For specific quasi-Newton updates, the fulfilment of the Riemannian curvature condition, which requires that
+
+```math
+g_{x_{k+1}}(s_k, y_k) > 0
+```
+
+holds, is a requirement for the inheritance of the self-adjointness and positive definiteness of the ``\mathcal{H}_k`` or ``\mathcal{B}_k`` to the operator ``\mathcal{H}_{k+1}`` or ``\mathcal{B}_{k+1}``. Unfortunately, the fulfillment of the Riemannian curvature condition is not given by a step size ```\alpha_k > 0`` that satisfies the generalised Wolfe conditions. However, in order to create a positive definite operator ``\mathcal{H}_{k+1}`` or ``\mathcal{B}_{k+1}`` in each iteration, in [^HuangGallivanAbsil2015] the so-called locking condition was introduced, which requires that the isometric vector transport ``T^S``, which is used in the update formula, and its associate retraction ``R`` fulfill
+
+```math
+T^{S}{x, \xi_x}(\xi_x) = \beta T^{R}{x, \xi_x}(\xi_x), \quad \beta = \frac{\lVert \xi_x \rVert_x}{\lVert T^{R}{x, \xi_x}(\xi_x) \rVert_{R_{x}(\xi_x)}},
+```
+
+where ``T^R`` is the vector transport by differentiated retraction. With the requirement that the isometric vector transport ``T^S`` and its associated retraction ``R`` satisfies the locking condition and using the tangent vector
+
+```math
+y_k = {\beta_k}^{-1} ∇f(x_{k+1}) - T^{S}{x_k, α_k η_k}(∇f(x_k)),
+```
+
+where
+
+```math
+\beta_k = \frac{\lVert α_k η_k \rVert_{x_k}}{\lVert T^{R}{x_k, α_k η_k}(α_k η_k) \rVert_{x_{k+1}}},
+```
+
+in the update, it can be shown that choosing a stepsize ``α_k > 0`` that satisfies the Riemannian wolfe conditions leads to the fulfilment of the Riemannian curvature condition, which in turn implies that the operator generated by the updates is positive definite.
+In the following we denote the specific operators in matrix notation and hence use ``H_k`` and ``B_k``, respectively.
+
+## Direction Updates
+
+In general there are different ways to compute a fixed [`AbstractQuasiNewtonUpdateRule`](@ref).
+In general these are represented by
+
+```@docs
+AbstractQuasiNewtonDirectionUpdate
+QuasiNewtonMatrixDirectionUpdate
+QuasiNewtonLimitedMemoryDirectionUpdate
+QuasiNewtonCautiousDirectionUpdate
+```
+
+## Hessian Update Rules
+
+Using
+
+```@docs
+update_hessian!
+```
+
+the following update formulae for either ``H_{k+1}`` or `` B_{k+1}`` are available.
+
+```@docs
+AbstractQuasiNewtonUpdateRule
+BFGS
+DFP
+Broyden
+SR1
+InverseBFGS
+InverseDFP
+InverseBroyden
+InverseSR1
+```
+
+## Options
+
+The quasi Newton algorithm is based on a [`GradientProblem`](@ref).
+
+```@docs
+QuasiNewtonOptions
+```
+
+## Literature
@@ -0,0 +1,57 @@
+using Manifolds, Manopt, Random, LinearAlgebra, BenchmarkTools, Profile
+import Manifolds: vector_transport_to!
+vector_transport_to!(M::Stiefel, Y, p, X, q, ::ProjectionTransport) = project!(M, Y, q, X)
+
+struct GradF
+    A::Matrix{Float64}
+    N::Diagonal{Float64,Vector{Float64}}
+end
+function (∇F::GradF)(X::Array{Float64,2})
+    AX = ∇F.A * X
+    XpAX = X' * AX
+    return 2 .* AX * ∇F.N .- X * XpAX * ∇F.N .- X * ∇F.N * XpAX
+end
+
+function run_brocket_experiment(n::Int, k::Int, m::Int; seed=42)
+    Random.seed!(42)
+    M = Stiefel(n, k)
+    A = randn(n, n)
+    A = (A + A') / 2
+    F(X::Array{Float64,2}) = tr((X' * A * X) * Diagonal(k:-1:1))
+    ∇F = GradF(A, Diagonal(Float64.(collect(k:-1:1))))
+    x = random_point(M)
+    return quasi_Newton(
+        M,
+        F,
+        ∇F,
+        x;
+        memory_size=m,
+        vector_transport_method=ProjectionTransport(),
+        retraction_method=QRRetraction(),
+        stopping_criterion=StopWhenGradientNormLess(norm(M, x, ∇F(x)) * 10^(-6)),
+        cautious_update=true,
+        #        debug = [:Iteration," ", :Cost, " ", DebugGradientNorm(), "\n", 10],
+    )
+end
+
+io = IOBuffer()
+
+for e in [
+    (32, 32, 1),
+    (32, 32, 2),
+    (32, 32, 4),
+    (32, 32, 8),
+    (32, 32, 16),
+    (32, 32, 32),
+    (1000, 2, 4),
+    (1000, 3, 4),
+    (1000, 4, 4),
+    (1000, 5, 4),
+]
+    println("Benchmarking $(e):")
+    b = @benchmark run_brocket_experiment($(e[1]), $(e[2]), $(e[3])) samples = 50
+    #run_brocket_experiment(e[1], e[2], e[3])
+    show(io, "text/plain", b)
+    s = String(take!(io))
+    println(s, "\n\n")
+end
@@ -0,0 +1,36 @@
+#
+#   The joint diagonalization problem on the Stiefel manifold St(n,k)
+#
+using Manopt, Manifolds, ManifoldsBase, LinearAlgebra, Random
+import Manifolds: vector_transport_to!
+struct IdentityTransport <: AbstractVectorTransportMethod end
+vector_transport_to!(::Stiefel, Y, p, X, q, ::IdentityTransport) = (Y .= project(M, q, X))
+Random.seed!(42)
+n = 12
+k = 8
+m = 512
+A = randn(n, n, m)
+
+for i in 1:m
+    A[:, :, i] = diagm(n:-1:1) + 0.1 * (transpose(A[:, :, i]) + A[:, :, i])
+end
+
+M = Stiefel(n, k)
+F(X::Array{Float64,2}) = -sum([norm(diag(X' * A[:, :, i] * X))^2 for i in 1:m])
+function ∇F(X::Array{Float64,2})
+    return project(
+        M, X, -4 * sum([A[:, :, i] * X * norm(diag(X' * A[:, :, i] * X)) for i in 1:m])
+    )
+end
+x = random_point(M)
+@time quasi_Newton(
+    M,
+    F,
+    ∇F,
+    x;
+    memory_size=32,
+    cautious_update=true,
+    vector_transport_method=IdentityTransport(),
+    stopping_criterion=StopWhenGradientNormLess(norm(M, x, ∇F(x)) * 10^(-6)),
+    debug=[:Iteration, " ", :Cost, "\n", 1, :Stop],
+)
@@ -0,0 +1,26 @@
+#
+#   Posituve Definite Karcher Mean (Matlab Manopt)
+#
+using Manopt, Manifolds, ManifoldsBase, LinearAlgebra, Random
+Random.seed!(42)
+n = 5
+m = 5
+M = SymmetricPositiveDefinite(n)
+x = random_point(M)
+A = [random_point(M) for _ in 1:m]
+A = [Symmetric(a) for a in A]
+F(X::Array{Float64,2}) = sum([distance(M, X, B)^2 for B in A]) / (2 * m)
+∇F(X::Array{Float64,2}) = -sum([log(M, X, B) for B in A]) / m
+
+@time quasi_Newton(
+    M,
+    F,
+    ∇F,
+    x;
+    memory_size=100,
+    stopping_criterion=StopWhenGradientNormLess(norm(M, x, ∇F(x)) * 10^(-6)),
+    debug=[:Iteration, " ", :Cost, "\n", 1, :Stop],
+)
+
+# B1 = quasi_Newton(M,F,∇F,x; memory_size = 100, debug = [:Iteration, " ", :Cost, "\n", 1, :Stop])
+# B2 = mean(M,A)
@@ -0,0 +1,30 @@
+using Manopt, Manifolds, ManifoldsBase, Random, LinearAlgebra, BenchmarkTools
+Random.seed!(42)
+
+function run_rayleigh_experiment(n::Int)
+    A = randn(n, n)
+    A = (A + A') / 2
+    F(X::Array{Float64,1}) = X' * A * X
+    ∇F(X::Array{Float64,1}) = 2 * (A * X - X * X' * A * X)
+    M = Sphere(n - 1)
+    x = random_point(M)
+    return quasi_Newton(
+        M,
+        F,
+        ∇F,
+        x;
+        #memory_size=-1,
+        stopping_criterion=StopWhenAny(
+            StopAfterIteration(max(1000)), StopWhenGradientNormLess(10^(-6))
+        ),
+        debug=[:Iteration, " ", :Cost, "\n", 1, :Stop],
+    )
+end
+io = IOBuffer()
+
+for n in [100]
+    b = @benchmark run_rayleigh_experiment($n) samples = 30
+    show(io, "text/plain", b)
+    s = String(take!(io))
+    println("Benchmarking $(n):\n", s, "\n\n")
+end
@@ -0,0 +1,48 @@
+#
+#   Thomson Problem on Oblique(n,m)
+#
+using Manopt, Manifolds, ManifoldsBase, LinearAlgebra, Random
+Random.seed!(42)
+n = 50
+m = 20
+M = Oblique(n, m)
+
+function F(X::Array{Float64,2})
+    f = 0
+    for i in 1:m
+        for j in 1:m
+            if i != j
+                f = f + 1 / (norm(X[:, i] - X[:, j])^2)
+            end
+        end
+    end
+    return f
+end
+
+function ∇F(X::Array{Float64,2})
+    g = zeros(n, m)
+    Id = Matrix(I, n, n)
+    for i in 1:m
+        f = zeros(n, 1)
+        for j in 1:m
+            if i != j
+                f = f + 1 / (1.0 - X[:, i]' * X[:, j]) * X[:, j]
+            end
+        end
+        g[:, i] = (Id - X[:, i] * X[:, i]') * f
+    end
+    return g
+end
+
+x = random_point(M)
+
+@time quasi_Newton(
+    M,
+    F,
+    ∇F,
+    x;
+    memory_size=100,
+    vector_transport_method=PowerVectorTransport(ParallelTransport()),
+    stopping_criterion=StopWhenGradientNormLess(norm(M, x, ∇F(x)) * 10^(-6)),
+    debug=[:Iteration, " ", :Cost, "\n", 1, :Stop],
+)