[CUSPARSE] Interface gtsv2 (#1795)

amontoison · web-flow · commit 5b204112d795 · 2023-03-15T12:42:35.000+01:00
diff --git a/lib/cusparse/preconditioners.jl b/lib/cusparse/preconditioners.jl
@@ -1,14 +1,34 @@
 # routines that implement different preconditioners
 
-export ic02!, ic02, ilu02!, ilu02
+export ic02!, ic02, ilu02!, ilu02, gtsv2!, gtsv2
 
 """
-    ic02!(A::CuSparseMatrix, index::SparseChar)
+    ic02!(A::CuSparseMatrix, index::SparseChar='O')
 
 Incomplete Cholesky factorization with no pivoting.
 Preserves the sparse layout of matrix `A`.
 """
-ic02!(A::CuSparseMatrix, index::SparseChar)
+function ic02! end
+
+"""
+    ilu02!(A::CuSparseMatrix, index::SparseChar='O')
+
+Incomplete LU factorization with no pivoting.
+Preserves the sparse layout of matrix `A`.
+"""
+function ilu02! end
+
+"""
+    gtsv2!(dl::CuVector, d::CuVector, du::CuVector, B::CuVecOrMat, index::SparseChar='O'; pivoting::Bool=true)
+
+Solve the linear system `A * X = B` where `A` is a tridiagonal matrix defined
+by three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) diagonals.
+With `pivoting`, the solution is more accurate but also more expensive.
+Note that the solution `X` overwrites the right-hand side `B`.
+"""
+function gtsv2! end
+
+# csric02
 for (bname,aname,sname,elty) in ((:cusparseScsric02_bufferSize, :cusparseScsric02_analysis, :cusparseScsric02, :Float32),
                                  (:cusparseDcsric02_bufferSize, :cusparseDcsric02_analysis, :cusparseDcsric02, :Float64),
                                  (:cusparseCcsric02_bufferSize, :cusparseCcsric02_analysis, :cusparseCcsric02, :ComplexF32),
@@ -88,13 +108,7 @@ for (bname,aname,sname,elty) in ((:cusparseScsric02_bufferSize, :cusparseScsric0
     end
 end
 
-"""
-    ilu02!(A::CuSparseMatrix, index::SparseChar)
-
-Incomplete LU factorization with no pivoting.
-Preserves the sparse layout of matrix `A`.
-"""
-ilu02!(A::CuSparseMatrix, index::SparseChar)
+# csrilu02
 for (bname,aname,sname,elty) in ((:cusparseScsrilu02_bufferSize, :cusparseScsrilu02_analysis, :cusparseScsrilu02, :Float32),
                                  (:cusparseDcsrilu02_bufferSize, :cusparseDcsrilu02_analysis, :cusparseDcsrilu02, :Float64),
                                  (:cusparseCcsrilu02_bufferSize, :cusparseCcsrilu02_analysis, :cusparseCcsrilu02, :ComplexF32),
@@ -280,3 +294,49 @@ for elty in (:Float32, :Float64, :ComplexF32, :ComplexF64)
         end
     end
 end
+
+# gtsv2
+for (bname_pivot,fname_pivot,bname_nopivot,fname_nopivot,elty) in ((:cusparseSgtsv2_bufferSizeExt, :cusparseSgtsv2, :cusparseSgtsv2_nopivot_bufferSizeExt, :cusparseSgtsv2_nopivot, :Float32),
+                                                                   (:cusparseDgtsv2_bufferSizeExt, :cusparseDgtsv2, :cusparseDgtsv2_nopivot_bufferSizeExt, :cusparseDgtsv2_nopivot, :Float64),
+                                                                   (:cusparseCgtsv2_bufferSizeExt, :cusparseCgtsv2, :cusparseCgtsv2_nopivot_bufferSizeExt, :cusparseCgtsv2_nopivot, :ComplexF32),
+                                                                   (:cusparseZgtsv2_bufferSizeExt, :cusparseZgtsv2, :cusparseZgtsv2_nopivot_bufferSizeExt, :cusparseZgtsv2_nopivot, :ComplexF64))
+    @eval begin
+        function gtsv2!(dl::CuVector{$elty}, d::CuVector{$elty}, du::CuVector{$elty}, B::CuVecOrMat{$elty}, index::SparseChar='O'; pivoting::Bool=true)
+            ml = length(dl)
+            m = length(d)
+            mu = length(du)
+            mB = size(B,1)
+            (m ≤ 2) && throw(DimensionMismatch("The size of the linear system must be at least 3."))
+            !(ml == m == mu) && throw(DimensionMismatch("(dl, d, du) must have the same length, the size of the vectors is ($ml,$m,$mu)!"))
+            (m != mB) && throw(DimensionMismatch("The tridiagonal matrix and the right-hand side B have inconsistent dimensions ($m != $mB)!"))
+            n = size(B,2)
+            ldb = max(1,stride(B,2))
+
+            function bufferSize()
+                out = Ref{Csize_t}(1)
+                if pivoting
+                    $bname_pivot(handle(), m, n, dl, d, du, B, ldb, out)
+                else
+                    $bname_nopivot(handle(), m, n, dl, d, du, B, ldb, out)
+                end
+                return out[]
+            end
+            with_workspace(bufferSize) do buffer
+                if pivoting
+                    $fname_pivot(handle(), m, n, dl, d, du, B, ldb, buffer)
+                else
+                    $fname_nopivot(handle(), m, n, dl, d, du, B, ldb, buffer)
+                end
+            end
+            B
+        end
+    end
+end
+
+for elty in (:Float32, :Float64, :ComplexF32, :ComplexF64)
+    @eval begin
+        function gtsv2(dl::CuVector{$elty}, d::CuVector{$elty}, du::CuVector{$elty}, B::CuVecOrMat{$elty}, index::SparseChar='O'; pivoting::Bool=true)
+            gtsv2!(dl, d, du, copy(B), index; pivoting)
+        end
+    end
+end
diff --git a/test/cusparse.jl b/test/cusparse.jl
@@ -972,3 +972,57 @@ for SparseMatrixType in [CuSparseMatrixCSC, CuSparseMatrixCSR]
         end
     end
 end
+
+@testset "gtsv2" begin
+    dl1 = [0; 1; 3]
+    d1 = [1; 1; 4]
+    du1 = [1; 2; 0]
+    B1 = [1 0 0; 0 1 0; 0 0 1]
+    X1 = [1/3 2/3 -1/3; 2/3 -2/3 1/3; -1/2 1/2 0]
+
+    dl2 = [0; 1; 1; 1; 1; 1; 0]
+    d2 = [6; 4; 4; 4; 4; 4; 6]
+    du2 = [0; 1; 1; 1; 1; 1; 0]
+    B2 = [0; 1; 2; -6; 2; 1; 0]
+    X2 = [0; 0; 1; -2; 1; 0; 0]
+
+    dl3 = [0; 1; 1; 7; 6; 3; 8; 6; 5; 4]
+    d3 = [2; 3; 3; 2; 2; 4; 1; 2; 4; 5]
+    du3 = [1; 2; 1; 6; 1; 3; 5; 7; 3; 0]
+    B3 = [1; 2; 6; 34; 10; 1; 4; 22; 25; 3]
+    X3 = [1; -1; 2; 1; 3; -2; 0; 4; 2; -1]
+    for pivoting ∈ (false, true)
+        @testset "gtsv2 with pivoting=$pivoting -- $elty" for elty in [Float32,Float64,ComplexF32,ComplexF64]
+            @testset "example 1" begin
+                dl1_d = CuVector{elty}(dl1)
+                d1_d = CuVector{elty}(d1)
+                du1_d = CuVector{elty}(du1)
+                B1_d = CuArray{elty}(B1)
+                X1_d = gtsv2(dl1_d, d1_d, du1_d, B1_d; pivoting)
+                @test collect(X1_d) ≈ X1
+                gtsv2!(dl1_d, d1_d, du1_d, B1_d; pivoting)
+                @test collect(B1_d) ≈ X1
+            end
+            @testset "example 2" begin
+                dl2_d = CuVector{elty}(dl2)
+                d2_d = CuVector{elty}(d2)
+                du2_d = CuVector{elty}(du2)
+                B2_d = CuArray{elty}(B2)
+                X2_d = gtsv2(dl2_d, d2_d, du2_d, B2_d; pivoting)
+                @test collect(X2_d) ≈ X2
+                gtsv2!(dl2_d, d2_d, du2_d, B2_d; pivoting)
+                @test collect(B2_d) ≈ X2
+            end
+            @testset "example 3" begin
+                dl3_d = CuVector{elty}(dl3)
+                d3_d = CuVector{elty}(d3)
+                du3_d = CuVector{elty}(du3)
+                B3_d = CuArray{elty}(B3)
+                X3_d = gtsv2(dl3_d, d3_d, du3_d, B3_d; pivoting)
+                @test collect(X3_d) ≈ X3
+                gtsv2!(dl3_d, d3_d, du3_d, B3_d; pivoting)
+                @test collect(B3_d) ≈ X3
+            end
+        end
+    end
+end