From b5e53e605541b98f303096e49571b29e6088ef10 Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Wed, 1 Feb 2023 18:31:38 -0500 Subject: [PATCH 01/18] Adding qr tests for view support --- test/cusolver/dense.jl | 162 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) diff --git a/test/cusolver/dense.jl b/test/cusolver/dense.jl index d9d22152e2..1c5de40fd0 100644 --- a/test/cusolver/dense.jl +++ b/test/cusolver/dense.jl @@ -8,6 +8,23 @@ n = 10 l = 13 k = 1 +m_sub_length = 7 +n_sub_length = 3 +l_sub_length= 11 +m_sub_start = 4 +n_sub_start = 2 +l_sub_start = 1 +m_subrange = (1:m_sub_length) .+ (m_sub_start-1) +n_subrange = (1:n_sub_length) .+ (n_sub_start -1) +l_subrange = (1:l_sub_length) .+ (l_sub_start -1) + +m_large=50 +n_large=30 +l_large=20 +m_range = (1:m) .+ (m_sub_start-1) +n_range = (1:n) .+ (n_sub_start -1) +l_range = (1:l) .+ (l_sub_start -1) + @testset "elty = $elty" for elty in [Float32, Float64, ComplexF32, ComplexF64] @testset "Cholesky (po)" begin A = rand(elty,n,n) @@ -377,6 +394,47 @@ k = 1 end end + A_view = view(A, m_sub_range, n_sub_range) + F = qr(A_view) + + d_A_view = view(d_A, m_sub_range, n_sub_range) + d_F = qr(d_A_view) + + d_RR = d_F.Q'*d_A_view + @test collect(d_RR[1:n,:]) ≈ collect(d_F.R) atol=tol*norm(A_view) + @test norm(d_RR[n+1:end,:]) < tol*norm(A_view) + + d_RRt = d_A_view'*d_F.Q + @test collect(d_RRt[:,1:n]) ≈ collect(d_F.R') atol=tol*norm(A_view) + @test norm(d_RRt[:,n+1:end]) < tol*norm(A_view) + + @test size(d_F) == size(A_view) + @test size(d_F.Q) == (m,m) + @test size(d_F.R) == (n,n) + @test size(d_RR) == size(d_A_view) + @test size(d_RRt) == size(d_A_view') + + d_I = CuMatrix{elty}(I, size(d_F.Q)) + @test det(d_F.Q) ≈ det(collect(d_F.Q * CuMatrix{elty}(I, size(d_F.Q)))) atol=tol*norm(A_view) + @test collect(d_F.Q * d_I) ≈ collect(d_F.Q) + @test collect(d_I * d_F.Q) ≈ collect(d_F.Q) + + d_I = CuMatrix{elty}(I, size(d_F.R)) + @test collect(d_F.R * d_I) ≈ collect(d_F.R) + @test collect(d_I * d_F.R) ≈ collect(d_F.R) + + CUDA.@allowscalar begin + qval = d_F.Q[1, 1] + @test qval ≈ F.Q[1, 1] + qrstr = sprint(show, MIME"text/plain"(), d_F) + if VERSION >= v"1.8-" + @test qrstr == "$(typeof(d_F))\nQ factor:\n$(sprint(show, MIME"text/plain"(), d_F.Q))\nR factor:\n$(sprint(show, MIME"text/plain"(), d_F.R))" + else + @test qrstr == "$(typeof(d_F)) with factors Q and R:\n$(sprint(show, d_F.Q))\n$(sprint(show, d_F.R))" + end + end + + Q, R = F dQ, dR = d_F @test collect(dQ*dR) ≈ A @@ -385,6 +443,11 @@ k = 1 d_A = CuArray(A) d_F = qr(d_A) @test det(d_F.Q) ≈ det(collect(d_F.Q * CuMatrix{elty}(I, size(d_F.Q)))) atol=tol*norm(A) + A_view = view(A, m_sub_range, n_sub_range) + d_A_view = view(d_A, m_sub_range, n_sub_range) + d_F = qr(d_A_view) + @test det(d_F.Q) ≈ det(collect(d_F.Q * CuMatrix{elty}(I, size(d_F.Q)))) atol=tol*norm(A_view) + A = rand(elty, m, n) d_A = CuArray(A) d_q, d_r = qr(d_A) @@ -393,12 +456,28 @@ k = 1 @test collect(CuArray(d_q)) ≈ Array(q) @test Array(d_r) ≈ Array(r) @test CuArray(d_q) ≈ convert(typeof(d_A), d_q) + A_view = view(A, m_sub_range, n_sub_range) + d_A_view = view(d_A, m_sub_range, n_sub_range) + d_q, d_r = qr(d_A_view) + q, r = qr(A_view) + @test Array(d_q) ≈ Array(q) + @test collect(CuArray(d_q)) ≈ Array(q) + @test Array(d_r) ≈ Array(r) + @test CuArray(d_q) ≈ convert(typeof(d_A), d_q) + A = rand(elty, n, m) d_A = CuArray(A) d_q, d_r = qr(d_A) q, r = qr(A) @test Array(d_q) ≈ Array(q) @test Array(d_r) ≈ Array(r) + A_view = view(A, m_sub_range, n_sub_range) + d_A_view = view(d_A, m_sub_range, n_sub_range) + d_q, d_r = qr(d_A_view) + q, r = qr(A_view) + @test Array(d_q) ≈ Array(q) + @test Array(d_r) ≈ Array(r) + A = rand(elty, n) # A and B are vectors d_A = CuArray(A) M = qr(A) @@ -406,6 +485,19 @@ k = 1 B = rand(elty, n) d_B = CuArray(B) @test collect(d_M \ d_B) ≈ M \ B + A_view = view(A, n_sub_range) + d_A_view = view(d_A, n_sub_range) + M_view = qr(A_view) + d_M_view = qr(d_A_view) + B_view = view(B, n_subrange) + d_B_view = view(d_B, n_subrange) + @test collect(d_M_view \ d_B_view) ≈ M_view \ B_view + B_large = rand(elty, n_large) + B = view(B_large, n_range) + d_B_large = CuArray(B_large) + d_B = view(d_B_large, n_range) + @test collect(d_M \ d_B) ≈ M \ B + A = rand(elty, m, n) # A is a matrix and B,C is a vector d_A = CuArray(A) M = qr(A) @@ -423,6 +515,41 @@ k = 1 @test collect(d_M.R' * d_C) ≈ (M.R' * C) @test collect(d_C' * d_M.R) ≈ (C' * M.R) @test collect(d_C' * d_M.R') ≈ (C' * M.R') + A_view = view(A, m_sub_range, n_sub_range) + d_A_view = view(d_A, m_sub_range, n_sub_range) + M_view = qr(A_view) + d_M_view = qr(d_A_view) + B_view = view(B, m_subrange) + d_B_view = view(d_B, m_subrange) + C_view = view(C, n_subrange) + d_C_view = view(d_C, n_subrange) + @test collect(d_M_view \ d_B_view) ≈ M_view \ B_view + @test collect(d_M_view.Q * d_B_view) ≈ (M_view.Q * B_view) + @test collect(d_M_view.Q' * d_B_view) ≈ (M_view.Q' * B_view) + @test collect(d_B_view' * d_M_view.Q) ≈ (B_view' * M_view.Q) + @test collect(d_B_view' * d_M_view.Q') ≈ (B_view' * M_view.Q') + @test collect(d_M_view.R * d_C_view) ≈ (M_view.R * C_view) + @test collect(d_M_view.R' * d_C_view) ≈ (M_view.R' * C_view) + @test collect(d_C_view' * d_M_view.R) ≈ (C_view' * M_view.R) + @test collect(d_C_view' * d_M_view.R') ≈ (C_view' * M_view.R') + B_large = rand(elty, m_large) + B = view(B_large, m_range) + d_B_large = CuArray(B_large) + d_B = view(d_B_large, m_range) + C_large = rand(elty, n_large) + C = view(C_large, n_range) + d_C_large = CuArray(C_large) + d_C = view(d_C_large, n_range) + @test collect(d_M \ d_B) ≈ M \ B + @test collect(d_M.Q * d_B) ≈ (M.Q * B) + @test collect(d_M.Q' * d_B) ≈ (M.Q' * B) + @test collect(d_B' * d_M.Q) ≈ (B' * M.Q) + @test collect(d_B' * d_M.Q') ≈ (B' * M.Q') + @test collect(d_M.R * d_C) ≈ (M.R * C) + @test collect(d_M.R' * d_C) ≈ (M.R' * C) + @test collect(d_C' * d_M.R) ≈ (C' * M.R) + @test collect(d_C' * d_M.R') ≈ (C' * M.R') + A = rand(elty, m, n) # A and B,C are matrices d_A = CuArray(A) M = qr(A) @@ -440,6 +567,41 @@ k = 1 @test collect(d_M.R' * d_C) ≈ (M.R' * C) @test collect(d_C' * d_M.R) ≈ (C' * M.R) @test collect(d_C' * d_M.R') ≈ (C' * M.R') + A_view = view(A, m_sub_range, n_sub_range) + d_A_view = view(d_A, m_sub_range, n_sub_range) + M_view = qr(A_view) + d_M_view = qr(d_A_view) + B_view = view(B, m_subrange, l_subrange) + d_B_view = view(d_B, m_subrange, l_subrange) + C_view = view(C, n_subrange, l_subrange) + d_C_view = view(d_C, n_subrange, l_subrange) + @test collect(d_M_view \ d_B_view) ≈ M_view \ B_view + @test collect(d_M_view.Q * d_B_view) ≈ (M_view.Q * B_view) + @test collect(d_M_view.Q' * d_B_view) ≈ (M_view.Q' * B_view) + @test collect(d_B_view' * d_M_view.Q) ≈ (B_view' * M_view.Q) + @test collect(d_B_view' * d_M_view.Q') ≈ (B_view' * M_view.Q') + @test collect(d_M_view.R * d_C_view) ≈ (M_view.R * C_view) + @test collect(d_M_view.R' * d_C_view) ≈ (M_view.R' * C_view) + @test collect(d_C_view' * d_M_view.R) ≈ (C_view' * M_view.R) + @test collect(d_C_view' * d_M_view.R') ≈ (C_view' * M_view.R') + B_large = rand(elty, m_large, l_large) + B = view(B_large, m_range, l_range) + d_B_large = CuArray(B_large) + d_B = view(d_B_large, m_range, l_range) + C_large = rand(elty, n_large, l_large) + C = view(C_large, n_range, l_range) + d_C_large = CuArray(C_large) + d_C = view(d_C_large, n_range, l_range) + @test collect(d_M \ d_B) ≈ M \ B + @test collect(d_M.Q * d_B) ≈ (M.Q * B) + @test collect(d_M.Q' * d_B) ≈ (M.Q' * B) + @test collect(d_B' * d_M.Q) ≈ (B' * M.Q) + @test collect(d_B' * d_M.Q') ≈ (B' * M.Q') + @test collect(d_M.R * d_C) ≈ (M.R * C) + @test collect(d_M.R' * d_C) ≈ (M.R' * C) + @test collect(d_C' * d_M.R) ≈ (C' * M.R) + @test collect(d_C' * d_M.R') ≈ (C' * M.R') + end @testset "potrsBatched!" begin From 1673bd9be99bbc0586d208471762db1e6e544f7f Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Thu, 9 Feb 2023 15:54:55 -0500 Subject: [PATCH 02/18] Adding QR for views on CUDA --- lib/cusolver/linalg.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/cusolver/linalg.jl b/lib/cusolver/linalg.jl index b95a45594d..89949b408a 100644 --- a/lib/cusolver/linalg.jl +++ b/lib/cusolver/linalg.jl @@ -102,6 +102,7 @@ using LinearAlgebra: Factorization, AbstractQ, QRCompactWY, QRCompactWYQ, QRPack if VERSION >= v"1.8-" LinearAlgebra.qr!(A::CuMatrix{T}) where T = QR(geqrf!(A::CuMatrix{T})...) +LinearAlgebra.qr!(A::StridedCuMatrix{T}) where T = QR(geqrf!(A::StridedCuMatrix{T})...) # conversions CuMatrix(F::Union{QR,QRCompactWY}) = CuArray(AbstractArray(F)) From f37ed222dfb0aef709c432eafcd900b0ef86366e Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Thu, 9 Feb 2023 15:55:57 -0500 Subject: [PATCH 03/18] Resolving typos in tests, commenting a TO DO --- test/cusolver/dense.jl | 55 ++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/test/cusolver/dense.jl b/test/cusolver/dense.jl index 1c5de40fd0..9029b34fd7 100644 --- a/test/cusolver/dense.jl +++ b/test/cusolver/dense.jl @@ -394,23 +394,23 @@ l_range = (1:l) .+ (l_sub_start -1) end end - A_view = view(A, m_sub_range, n_sub_range) + A_view = view(A, m_subrange, n_subrange) F = qr(A_view) - d_A_view = view(d_A, m_sub_range, n_sub_range) + d_A_view = view(d_A, m_subrange, n_subrange) d_F = qr(d_A_view) d_RR = d_F.Q'*d_A_view - @test collect(d_RR[1:n,:]) ≈ collect(d_F.R) atol=tol*norm(A_view) - @test norm(d_RR[n+1:end,:]) < tol*norm(A_view) + @test collect(d_RR[1:n_sub_length,:]) ≈ collect(d_F.R) atol=tol*norm(A_view) + @test norm(d_RR[n_sub_length+1:end,:]) < tol*norm(A_view) d_RRt = d_A_view'*d_F.Q - @test collect(d_RRt[:,1:n]) ≈ collect(d_F.R') atol=tol*norm(A_view) - @test norm(d_RRt[:,n+1:end]) < tol*norm(A_view) + @test collect(d_RRt[:,1:n_sub_length]) ≈ collect(d_F.R') atol=tol*norm(A_view) + @test norm(d_RRt[:,n_sub_length+1:end]) < tol*norm(A_view) @test size(d_F) == size(A_view) - @test size(d_F.Q) == (m,m) - @test size(d_F.R) == (n,n) + @test size(d_F.Q) == (m_sub_length,m_sub_length) + @test size(d_F.R) == (n_sub_length,n_sub_length) @test size(d_RR) == size(d_A_view) @test size(d_RRt) == size(d_A_view') @@ -443,8 +443,8 @@ l_range = (1:l) .+ (l_sub_start -1) d_A = CuArray(A) d_F = qr(d_A) @test det(d_F.Q) ≈ det(collect(d_F.Q * CuMatrix{elty}(I, size(d_F.Q)))) atol=tol*norm(A) - A_view = view(A, m_sub_range, n_sub_range) - d_A_view = view(d_A, m_sub_range, n_sub_range) + A_view = view(A, m_subrange, n_subrange) + d_A_view = view(d_A, m_subrange, n_subrange) d_F = qr(d_A_view) @test det(d_F.Q) ≈ det(collect(d_F.Q * CuMatrix{elty}(I, size(d_F.Q)))) atol=tol*norm(A_view) @@ -456,8 +456,8 @@ l_range = (1:l) .+ (l_sub_start -1) @test collect(CuArray(d_q)) ≈ Array(q) @test Array(d_r) ≈ Array(r) @test CuArray(d_q) ≈ convert(typeof(d_A), d_q) - A_view = view(A, m_sub_range, n_sub_range) - d_A_view = view(d_A, m_sub_range, n_sub_range) + A_view = view(A, m_subrange, n_subrange) + d_A_view = view(d_A, m_subrange, n_subrange) d_q, d_r = qr(d_A_view) q, r = qr(A_view) @test Array(d_q) ≈ Array(q) @@ -471,8 +471,8 @@ l_range = (1:l) .+ (l_sub_start -1) q, r = qr(A) @test Array(d_q) ≈ Array(q) @test Array(d_r) ≈ Array(r) - A_view = view(A, m_sub_range, n_sub_range) - d_A_view = view(d_A, m_sub_range, n_sub_range) + A_view = view(A, m_subrange, n_subrange) + d_A_view = view(d_A, m_subrange, n_subrange) d_q, d_r = qr(d_A_view) q, r = qr(A_view) @test Array(d_q) ≈ Array(q) @@ -485,8 +485,8 @@ l_range = (1:l) .+ (l_sub_start -1) B = rand(elty, n) d_B = CuArray(B) @test collect(d_M \ d_B) ≈ M \ B - A_view = view(A, n_sub_range) - d_A_view = view(d_A, n_sub_range) + A_view = view(A, n_subrange) + d_A_view = view(d_A, n_subrange) M_view = qr(A_view) d_M_view = qr(d_A_view) B_view = view(B, n_subrange) @@ -509,14 +509,15 @@ l_range = (1:l) .+ (l_sub_start -1) @test collect(d_M \ d_B) ≈ M \ B @test collect(d_M.Q * d_B) ≈ (M.Q * B) @test collect(d_M.Q' * d_B) ≈ (M.Q' * B) - @test collect(d_B' * d_M.Q) ≈ (B' * M.Q) - @test collect(d_B' * d_M.Q') ≈ (B' * M.Q') + #TO DO: this needs to be fixed in GPUArrays + #@test collect(d_B' * d_M.Q) ≈ (B' * M.Q) + #@test collect(d_B' * d_M.Q') ≈ (B' * M.Q') @test collect(d_M.R * d_C) ≈ (M.R * C) @test collect(d_M.R' * d_C) ≈ (M.R' * C) @test collect(d_C' * d_M.R) ≈ (C' * M.R) @test collect(d_C' * d_M.R') ≈ (C' * M.R') - A_view = view(A, m_sub_range, n_sub_range) - d_A_view = view(d_A, m_sub_range, n_sub_range) + A_view = view(A, m_subrange, n_subrange) + d_A_view = view(d_A, m_subrange, n_subrange) M_view = qr(A_view) d_M_view = qr(d_A_view) B_view = view(B, m_subrange) @@ -526,8 +527,9 @@ l_range = (1:l) .+ (l_sub_start -1) @test collect(d_M_view \ d_B_view) ≈ M_view \ B_view @test collect(d_M_view.Q * d_B_view) ≈ (M_view.Q * B_view) @test collect(d_M_view.Q' * d_B_view) ≈ (M_view.Q' * B_view) - @test collect(d_B_view' * d_M_view.Q) ≈ (B_view' * M_view.Q) - @test collect(d_B_view' * d_M_view.Q') ≈ (B_view' * M_view.Q') + #TO DO: this needs to be fixed in GPUArrays + #@test collect(d_B_view' * d_M_view.Q) ≈ (B_view' * M_view.Q) + #@test collect(d_B_view' * d_M_view.Q') ≈ (B_view' * M_view.Q') @test collect(d_M_view.R * d_C_view) ≈ (M_view.R * C_view) @test collect(d_M_view.R' * d_C_view) ≈ (M_view.R' * C_view) @test collect(d_C_view' * d_M_view.R) ≈ (C_view' * M_view.R) @@ -543,8 +545,9 @@ l_range = (1:l) .+ (l_sub_start -1) @test collect(d_M \ d_B) ≈ M \ B @test collect(d_M.Q * d_B) ≈ (M.Q * B) @test collect(d_M.Q' * d_B) ≈ (M.Q' * B) - @test collect(d_B' * d_M.Q) ≈ (B' * M.Q) - @test collect(d_B' * d_M.Q') ≈ (B' * M.Q') + #TO DO: this needs to be fixed in GPUArrays + #@test collect(d_B' * d_M.Q) ≈ (B' * M.Q) + #@test collect(d_B' * d_M.Q') ≈ (B' * M.Q') @test collect(d_M.R * d_C) ≈ (M.R * C) @test collect(d_M.R' * d_C) ≈ (M.R' * C) @test collect(d_C' * d_M.R) ≈ (C' * M.R) @@ -567,8 +570,8 @@ l_range = (1:l) .+ (l_sub_start -1) @test collect(d_M.R' * d_C) ≈ (M.R' * C) @test collect(d_C' * d_M.R) ≈ (C' * M.R) @test collect(d_C' * d_M.R') ≈ (C' * M.R') - A_view = view(A, m_sub_range, n_sub_range) - d_A_view = view(d_A, m_sub_range, n_sub_range) + A_view = view(A, m_subrange, n_subrange) + d_A_view = view(d_A, m_subrange, n_subrange) M_view = qr(A_view) d_M_view = qr(d_A_view) B_view = view(B, m_subrange, l_subrange) From 952e1ed83fcf018c55c46c854eb15dedfbf52bf4 Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Thu, 9 Feb 2023 16:59:42 -0500 Subject: [PATCH 04/18] Resolving TODOs in test --- test/cusolver/dense.jl | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/test/cusolver/dense.jl b/test/cusolver/dense.jl index 9029b34fd7..90c04b0971 100644 --- a/test/cusolver/dense.jl +++ b/test/cusolver/dense.jl @@ -509,9 +509,8 @@ l_range = (1:l) .+ (l_sub_start -1) @test collect(d_M \ d_B) ≈ M \ B @test collect(d_M.Q * d_B) ≈ (M.Q * B) @test collect(d_M.Q' * d_B) ≈ (M.Q' * B) - #TO DO: this needs to be fixed in GPUArrays - #@test collect(d_B' * d_M.Q) ≈ (B' * M.Q) - #@test collect(d_B' * d_M.Q') ≈ (B' * M.Q') + @test collect(d_B' * d_M.Q) ≈ (B' * M.Q) + @test collect(d_B' * d_M.Q') ≈ (B' * M.Q') @test collect(d_M.R * d_C) ≈ (M.R * C) @test collect(d_M.R' * d_C) ≈ (M.R' * C) @test collect(d_C' * d_M.R) ≈ (C' * M.R) @@ -527,9 +526,8 @@ l_range = (1:l) .+ (l_sub_start -1) @test collect(d_M_view \ d_B_view) ≈ M_view \ B_view @test collect(d_M_view.Q * d_B_view) ≈ (M_view.Q * B_view) @test collect(d_M_view.Q' * d_B_view) ≈ (M_view.Q' * B_view) - #TO DO: this needs to be fixed in GPUArrays - #@test collect(d_B_view' * d_M_view.Q) ≈ (B_view' * M_view.Q) - #@test collect(d_B_view' * d_M_view.Q') ≈ (B_view' * M_view.Q') + @test collect(d_B_view' * d_M_view.Q) ≈ (B_view' * M_view.Q) + @test collect(d_B_view' * d_M_view.Q') ≈ (B_view' * M_view.Q') @test collect(d_M_view.R * d_C_view) ≈ (M_view.R * C_view) @test collect(d_M_view.R' * d_C_view) ≈ (M_view.R' * C_view) @test collect(d_C_view' * d_M_view.R) ≈ (C_view' * M_view.R) @@ -545,9 +543,8 @@ l_range = (1:l) .+ (l_sub_start -1) @test collect(d_M \ d_B) ≈ M \ B @test collect(d_M.Q * d_B) ≈ (M.Q * B) @test collect(d_M.Q' * d_B) ≈ (M.Q' * B) - #TO DO: this needs to be fixed in GPUArrays - #@test collect(d_B' * d_M.Q) ≈ (B' * M.Q) - #@test collect(d_B' * d_M.Q') ≈ (B' * M.Q') + @test collect(d_B' * d_M.Q) ≈ (B' * M.Q) + @test collect(d_B' * d_M.Q') ≈ (B' * M.Q') @test collect(d_M.R * d_C) ≈ (M.R * C) @test collect(d_M.R' * d_C) ≈ (M.R' * C) @test collect(d_C' * d_M.R) ≈ (C' * M.R) From 3c0642c040e98edebffc440f0ca32d9cec53523c Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Thu, 9 Feb 2023 17:06:17 -0500 Subject: [PATCH 05/18] Changes to manifest --- Manifest.toml | 54 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index d727d962dd..5cc3bdee68 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -8,12 +8,13 @@ version = "1.2.1" [[Adapt]] deps = ["LinearAlgebra"] -git-tree-sha1 = "0310e08cb19f5da31d08341c6120c047598f5b9c" +git-tree-sha1 = "195c5505521008abea5aee4f96930717958eac6f" uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -version = "3.5.0" +version = "3.4.0" [[ArgTools]] uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.1" [[Artifacts]] uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" @@ -58,9 +59,9 @@ version = "1.15.7" [[ChangesOfVariables]] deps = ["ChainRulesCore", "LinearAlgebra", "Test"] -git-tree-sha1 = "844b061c104c408b24537482469400af6075aae4" +git-tree-sha1 = "38f7a08f19d8810338d4f5085211c7dfa5d5bdd8" uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" -version = "0.1.5" +version = "0.1.4" [[Compat]] deps = ["Dates", "LinearAlgebra", "UUIDs"] @@ -71,6 +72,7 @@ version = "4.5.0" [[CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "0.5.2+0" [[Dates]] deps = ["Printf"] @@ -83,23 +85,27 @@ uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" version = "0.9.3" [[Downloads]] -deps = ["ArgTools", "LibCURL", "NetworkOptions"] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" [[ExprTools]] git-tree-sha1 = "56559bbef6ca5ea0c0818fa5c90320398a6fbf8d" uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" version = "0.1.8" +[[FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" + [[GPUArrays]] deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"] -git-tree-sha1 = "494f1e456000c00c93dde79b38094e023f639dac" +path = "C:\\Users\\evely\\.julia\\dev\\GPUArrays" uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" -version = "8.6.1" +version = "8.6.2" [[GPUArraysCore]] deps = ["Adapt"] -git-tree-sha1 = "57f7cde02d7a53c9d1d28443b9f11ac5fbe7ebc9" +path = "../GPUArrays/lib/GPUArraysCore" uuid = "46192b85-c4d5-4398-a991-12ede77f4527" version = "0.1.3" @@ -149,10 +155,12 @@ uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" [[LibCURL]] deps = ["LibCURL_jll", "MozillaCACerts_jll"] uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.3" [[LibCURL_jll]] deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "7.84.0+0" [[LibGit2]] deps = ["Base64", "NetworkOptions", "Printf", "SHA"] @@ -161,19 +169,20 @@ uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" [[LibSSH2_jll]] deps = ["Artifacts", "Libdl", "MbedTLS_jll"] uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.10.2+0" [[Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" [[LinearAlgebra]] -deps = ["Libdl"] +deps = ["Libdl", "libblastrampoline_jll"] uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" [[LogExpFunctions]] deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"] -git-tree-sha1 = "45b288af6956e67e621c5cbb2d75a261ab58300b" +git-tree-sha1 = "946607f84feb96220f480e0422d3484c49c00239" uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" -version = "0.3.20" +version = "0.3.19" [[Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" @@ -185,16 +194,25 @@ uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" [[MbedTLS_jll]] deps = ["Artifacts", "Libdl"] uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.0+0" [[MozillaCACerts_jll]] uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2022.2.1" [[NetworkOptions]] uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" + +[[OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.20+0" [[OpenLibm_jll]] deps = ["Artifacts", "Libdl"] uuid = "05823500-19ac-5b8b-9628-191a04bc5112" +version = "0.8.1+0" [[OpenSpecFun_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] @@ -205,6 +223,7 @@ version = "0.5.5+0" [[Pkg]] deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.8.0" [[Preferences]] deps = ["TOML"] @@ -221,7 +240,7 @@ deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" [[Random]] -deps = ["Serialization"] +deps = ["SHA", "Serialization"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [[Random123]] @@ -249,6 +268,7 @@ version = "1.3.0" [[SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" [[Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" @@ -273,10 +293,12 @@ uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [[TOML]] deps = ["Dates"] uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.0" [[Tar]] deps = ["ArgTools", "SHA"] uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" [[Test]] deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] @@ -298,11 +320,19 @@ uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" [[Zlib_jll]] deps = ["Libdl"] uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.12+3" + +[[libblastrampoline_jll]] +deps = ["Artifacts", "Libdl", "OpenBLAS_jll"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "5.1.1+0" [[nghttp2_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.48.0+0" [[p7zip_jll]] deps = ["Artifacts", "Libdl"] uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+0" From 81d489b4823580b422d29842a80124462855959f Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Thu, 9 Feb 2023 17:43:24 -0500 Subject: [PATCH 06/18] use Manifest.toml from master --- Manifest.toml | 54 ++++++++++++--------------------------------------- 1 file changed, 12 insertions(+), 42 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 5cc3bdee68..d727d962dd 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -8,13 +8,12 @@ version = "1.2.1" [[Adapt]] deps = ["LinearAlgebra"] -git-tree-sha1 = "195c5505521008abea5aee4f96930717958eac6f" +git-tree-sha1 = "0310e08cb19f5da31d08341c6120c047598f5b9c" uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -version = "3.4.0" +version = "3.5.0" [[ArgTools]] uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" -version = "1.1.1" [[Artifacts]] uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" @@ -59,9 +58,9 @@ version = "1.15.7" [[ChangesOfVariables]] deps = ["ChainRulesCore", "LinearAlgebra", "Test"] -git-tree-sha1 = "38f7a08f19d8810338d4f5085211c7dfa5d5bdd8" +git-tree-sha1 = "844b061c104c408b24537482469400af6075aae4" uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" -version = "0.1.4" +version = "0.1.5" [[Compat]] deps = ["Dates", "LinearAlgebra", "UUIDs"] @@ -72,7 +71,6 @@ version = "4.5.0" [[CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "0.5.2+0" [[Dates]] deps = ["Printf"] @@ -85,27 +83,23 @@ uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" version = "0.9.3" [[Downloads]] -deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +deps = ["ArgTools", "LibCURL", "NetworkOptions"] uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" -version = "1.6.0" [[ExprTools]] git-tree-sha1 = "56559bbef6ca5ea0c0818fa5c90320398a6fbf8d" uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" version = "0.1.8" -[[FileWatching]] -uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" - [[GPUArrays]] deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"] -path = "C:\\Users\\evely\\.julia\\dev\\GPUArrays" +git-tree-sha1 = "494f1e456000c00c93dde79b38094e023f639dac" uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" -version = "8.6.2" +version = "8.6.1" [[GPUArraysCore]] deps = ["Adapt"] -path = "../GPUArrays/lib/GPUArraysCore" +git-tree-sha1 = "57f7cde02d7a53c9d1d28443b9f11ac5fbe7ebc9" uuid = "46192b85-c4d5-4398-a991-12ede77f4527" version = "0.1.3" @@ -155,12 +149,10 @@ uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" [[LibCURL]] deps = ["LibCURL_jll", "MozillaCACerts_jll"] uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" -version = "0.6.3" [[LibCURL_jll]] deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" -version = "7.84.0+0" [[LibGit2]] deps = ["Base64", "NetworkOptions", "Printf", "SHA"] @@ -169,20 +161,19 @@ uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" [[LibSSH2_jll]] deps = ["Artifacts", "Libdl", "MbedTLS_jll"] uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" -version = "1.10.2+0" [[Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" [[LinearAlgebra]] -deps = ["Libdl", "libblastrampoline_jll"] +deps = ["Libdl"] uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" [[LogExpFunctions]] deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"] -git-tree-sha1 = "946607f84feb96220f480e0422d3484c49c00239" +git-tree-sha1 = "45b288af6956e67e621c5cbb2d75a261ab58300b" uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" -version = "0.3.19" +version = "0.3.20" [[Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" @@ -194,25 +185,16 @@ uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" [[MbedTLS_jll]] deps = ["Artifacts", "Libdl"] uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" -version = "2.28.0+0" [[MozillaCACerts_jll]] uuid = "14a3606d-f60d-562e-9121-12d972cd8159" -version = "2022.2.1" [[NetworkOptions]] uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" -version = "1.2.0" - -[[OpenBLAS_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] -uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" -version = "0.3.20+0" [[OpenLibm_jll]] deps = ["Artifacts", "Libdl"] uuid = "05823500-19ac-5b8b-9628-191a04bc5112" -version = "0.8.1+0" [[OpenSpecFun_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] @@ -223,7 +205,6 @@ version = "0.5.5+0" [[Pkg]] deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -version = "1.8.0" [[Preferences]] deps = ["TOML"] @@ -240,7 +221,7 @@ deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" [[Random]] -deps = ["SHA", "Serialization"] +deps = ["Serialization"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [[Random123]] @@ -268,7 +249,6 @@ version = "1.3.0" [[SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" -version = "0.7.0" [[Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" @@ -293,12 +273,10 @@ uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [[TOML]] deps = ["Dates"] uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" -version = "1.0.0" [[Tar]] deps = ["ArgTools", "SHA"] uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" -version = "1.10.0" [[Test]] deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] @@ -320,19 +298,11 @@ uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" [[Zlib_jll]] deps = ["Libdl"] uuid = "83775a58-1f1d-513f-b197-d71354ab007a" -version = "1.2.12+3" - -[[libblastrampoline_jll]] -deps = ["Artifacts", "Libdl", "OpenBLAS_jll"] -uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" -version = "5.1.1+0" [[nghttp2_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" -version = "1.48.0+0" [[p7zip_jll]] deps = ["Artifacts", "Libdl"] uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" -version = "17.4.0+0" From 875dddbe2d773df539d04a5d14cf7e06901cee78 Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Thu, 9 Feb 2023 17:46:53 -0500 Subject: [PATCH 07/18] Update lib/cusolver/linalg.jl Co-authored-by: Valentin Churavy --- lib/cusolver/linalg.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/cusolver/linalg.jl b/lib/cusolver/linalg.jl index 89949b408a..c252d10202 100644 --- a/lib/cusolver/linalg.jl +++ b/lib/cusolver/linalg.jl @@ -101,7 +101,6 @@ using LinearAlgebra: Factorization, AbstractQ, QRCompactWY, QRCompactWYQ, QRPack if VERSION >= v"1.8-" -LinearAlgebra.qr!(A::CuMatrix{T}) where T = QR(geqrf!(A::CuMatrix{T})...) LinearAlgebra.qr!(A::StridedCuMatrix{T}) where T = QR(geqrf!(A::StridedCuMatrix{T})...) # conversions From aa88c71d82816680a1dbf6f07da2af67944d1180 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 15 Feb 2023 14:54:22 -0500 Subject: [PATCH 08/18] Update Manifest and Project --- Manifest.toml | 24 ++++++++++++------------ Project.toml | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index d727d962dd..7c514e9a95 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -64,9 +64,9 @@ version = "0.1.5" [[Compat]] deps = ["Dates", "LinearAlgebra", "UUIDs"] -git-tree-sha1 = "00a2cccc7f098ff3b66806862d275ca3db9e6e5a" +git-tree-sha1 = "61fdd77467a5c3ad071ef8277ac6bd6af7dd4c04" uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "4.5.0" +version = "4.6.0" [[CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] @@ -93,21 +93,21 @@ version = "0.1.8" [[GPUArrays]] deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"] -git-tree-sha1 = "494f1e456000c00c93dde79b38094e023f639dac" +git-tree-sha1 = "a28f752ffab0ccd6660fc7af5ad1c9ad176f45f7" uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" -version = "8.6.1" +version = "8.6.3" [[GPUArraysCore]] deps = ["Adapt"] -git-tree-sha1 = "57f7cde02d7a53c9d1d28443b9f11ac5fbe7ebc9" +git-tree-sha1 = "1cd7f0af1aa58abc02ea1d872953a97359cb87fa" uuid = "46192b85-c4d5-4398-a991-12ede77f4527" -version = "0.1.3" +version = "0.1.4" [[GPUCompiler]] deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"] -git-tree-sha1 = "48832a7cacbe56e591a7bef690c78b9d00bcc692" +git-tree-sha1 = "95185985a5d2388c6d0fedb06181ad4ddd40e0cb" uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" -version = "0.17.1" +version = "0.17.2" [[InteractiveUtils]] deps = ["Markdown"] @@ -132,9 +132,9 @@ version = "1.4.1" [[LLVM]] deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"] -git-tree-sha1 = "b8ae281340f0d3e973aae7b96fb7502b0119b376" +git-tree-sha1 = "df115c31f5c163697eede495918d8e85045c8f04" uuid = "929cbde3-209d-540e-8aea-75f648917ca0" -version = "4.15.0" +version = "4.16.0" [[LLVMExtra_jll]] deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg", "TOML"] @@ -171,9 +171,9 @@ uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" [[LogExpFunctions]] deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"] -git-tree-sha1 = "45b288af6956e67e621c5cbb2d75a261ab58300b" +git-tree-sha1 = "680e733c3a0a9cea9e935c8c2184aea6a63fa0b5" uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" -version = "0.3.20" +version = "0.3.21" [[Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" diff --git a/Project.toml b/Project.toml index 378f23c04f..39eb586e99 100644 --- a/Project.toml +++ b/Project.toml @@ -38,7 +38,7 @@ CUDA_Driver_jll = "0.2" CUDA_Runtime_Discovery = "0.1" CUDA_Runtime_jll = "0.2.3" ExprTools = "0.1" -GPUArrays = "8.6" +GPUArrays = "8.6.3" GPUCompiler = "0.17" LLVM = "4.15" Preferences = "1" From 928cbf804b0a117380486cfc9306e0dc9f0cf036 Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Thu, 16 Feb 2023 11:14:35 -0500 Subject: [PATCH 09/18] Repairs to tests - dimensions of test for views --- test/cusolver/dense.jl | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/test/cusolver/dense.jl b/test/cusolver/dense.jl index 90c04b0971..b29fea8640 100644 --- a/test/cusolver/dense.jl +++ b/test/cusolver/dense.jl @@ -434,18 +434,23 @@ l_range = (1:l) .+ (l_sub_start -1) end end - - Q, R = F - dQ, dR = d_F - @test collect(dQ*dR) ≈ A - @test collect(dR * dQ') ≈ (R * Q') A = rand(elty, n, m) + F = qr(A) d_A = CuArray(A) d_F = qr(d_A) + Q, R = F + dQ, dR = d_F + @test collect(dQ*dR) ≈ A + @test collect(dR * dQ') ≈ (R * Q') @test det(d_F.Q) ≈ det(collect(d_F.Q * CuMatrix{elty}(I, size(d_F.Q)))) atol=tol*norm(A) A_view = view(A, m_subrange, n_subrange) + F = qr(A_view) d_A_view = view(d_A, m_subrange, n_subrange) d_F = qr(d_A_view) + Q, R = F + dQ, dR = d_F + @test collect(dQ*dR) ≈ A_view + @test collect(dR * dQ') ≈ (R * Q') @test det(d_F.Q) ≈ det(collect(d_F.Q * CuMatrix{elty}(I, size(d_F.Q)))) atol=tol*norm(A_view) A = rand(elty, m, n) From 34b71d505bcbff5f75dffacb4d5dbeb621f86b85 Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Wed, 22 Feb 2023 01:32:46 -0500 Subject: [PATCH 10/18] Resolving dimension mismatches in tests --- test/cusolver/dense.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/cusolver/dense.jl b/test/cusolver/dense.jl index b29fea8640..feed661c9a 100644 --- a/test/cusolver/dense.jl +++ b/test/cusolver/dense.jl @@ -441,7 +441,7 @@ l_range = (1:l) .+ (l_sub_start -1) Q, R = F dQ, dR = d_F @test collect(dQ*dR) ≈ A - @test collect(dR * dQ') ≈ (R * Q') + @test collect(dR' * dQ') ≈ A' @test det(d_F.Q) ≈ det(collect(d_F.Q * CuMatrix{elty}(I, size(d_F.Q)))) atol=tol*norm(A) A_view = view(A, m_subrange, n_subrange) F = qr(A_view) @@ -450,7 +450,7 @@ l_range = (1:l) .+ (l_sub_start -1) Q, R = F dQ, dR = d_F @test collect(dQ*dR) ≈ A_view - @test collect(dR * dQ') ≈ (R * Q') + @test collect(dR' * dQ') ≈ A_view' @test det(d_F.Q) ≈ det(collect(d_F.Q * CuMatrix{elty}(I, size(d_F.Q)))) atol=tol*norm(A_view) A = rand(elty, m, n) From 040cf8aeae532b1958f1ebd12e4d08d84177d58a Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Wed, 22 Feb 2023 18:22:30 -0500 Subject: [PATCH 11/18] Commenting out one buggy test - TO DO --- test/cusolver/dense.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/cusolver/dense.jl b/test/cusolver/dense.jl index feed661c9a..efb6ef6f88 100644 --- a/test/cusolver/dense.jl +++ b/test/cusolver/dense.jl @@ -450,7 +450,7 @@ l_range = (1:l) .+ (l_sub_start -1) Q, R = F dQ, dR = d_F @test collect(dQ*dR) ≈ A_view - @test collect(dR' * dQ') ≈ A_view' + #@test collect(dR' * dQ') ≈ A_view' #TO DO: resolve this bug @test det(d_F.Q) ≈ det(collect(d_F.Q * CuMatrix{elty}(I, size(d_F.Q)))) atol=tol*norm(A_view) A = rand(elty, m, n) From 7ebff725996b03b02381d73440d75136eae3dafe Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Fri, 24 Feb 2023 10:10:48 -0500 Subject: [PATCH 12/18] Resolving todo by adapting dimension issues in test --- test/cusolver/dense.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/cusolver/dense.jl b/test/cusolver/dense.jl index efb6ef6f88..ef55d98dc1 100644 --- a/test/cusolver/dense.jl +++ b/test/cusolver/dense.jl @@ -443,14 +443,14 @@ l_range = (1:l) .+ (l_sub_start -1) @test collect(dQ*dR) ≈ A @test collect(dR' * dQ') ≈ A' @test det(d_F.Q) ≈ det(collect(d_F.Q * CuMatrix{elty}(I, size(d_F.Q)))) atol=tol*norm(A) - A_view = view(A, m_subrange, n_subrange) + A_view = view(A, n_subrange, m_subrange) F = qr(A_view) - d_A_view = view(d_A, m_subrange, n_subrange) + d_A_view = view(d_A, n_subrange, m_subrange) d_F = qr(d_A_view) Q, R = F dQ, dR = d_F @test collect(dQ*dR) ≈ A_view - #@test collect(dR' * dQ') ≈ A_view' #TO DO: resolve this bug + @test collect(dR' * dQ') ≈ A_view' @test det(d_F.Q) ≈ det(collect(d_F.Q * CuMatrix{elty}(I, size(d_F.Q)))) atol=tol*norm(A_view) A = rand(elty, m, n) From 0b16b3a114dd7462d5d7883a71ce0941f1541989 Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Thu, 2 Mar 2023 15:56:07 -0500 Subject: [PATCH 13/18] Adding support for in-place qr of views --- lib/cusolver/linalg.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/cusolver/linalg.jl b/lib/cusolver/linalg.jl index c252d10202..95a71d2681 100644 --- a/lib/cusolver/linalg.jl +++ b/lib/cusolver/linalg.jl @@ -101,7 +101,8 @@ using LinearAlgebra: Factorization, AbstractQ, QRCompactWY, QRCompactWYQ, QRPack if VERSION >= v"1.8-" -LinearAlgebra.qr!(A::StridedCuMatrix{T}) where T = QR(geqrf!(A::StridedCuMatrix{T})...) +LinearAlgebra.qr!(A::CuMatrix{T}) where T = CuQR(geqrf!(A::CuMatrix{T})...) +LinearAlgebra.qr!(A::StridedCuMatrix{T}) where T = CuQR(geqrf!(A::StridedCuMatrix{T})...) # conversions CuMatrix(F::Union{QR,QRCompactWY}) = CuArray(AbstractArray(F)) @@ -234,7 +235,7 @@ end Base.similar(Q::CuQRPackedQ, ::Type{T}, dims::Dims{N}) where {T,N} = CuArray{T,N}(undef, dims) -LinearAlgebra.qr!(A::CuMatrix{T}) where T = CuQR(geqrf!(A::CuMatrix{T})...) + Base.size(A::CuQR) = size(A.factors) Base.size(A::CuQRPackedQ, dim::Integer) = 0 < dim ? (dim <= 2 ? size(A.factors, 1) : 1) : throw(BoundsError()) CUDA.CuMatrix(A::CuQRPackedQ) = orgqr!(copy(A.factors), A.τ) From 8941d0a9b2acffe51cd4aed0d6bc76ed41e1cbe2 Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Thu, 2 Mar 2023 17:36:53 -0500 Subject: [PATCH 14/18] Restoring order of function definitions --- lib/cusolver/linalg.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/cusolver/linalg.jl b/lib/cusolver/linalg.jl index 95a71d2681..dfc6bd80b9 100644 --- a/lib/cusolver/linalg.jl +++ b/lib/cusolver/linalg.jl @@ -101,8 +101,7 @@ using LinearAlgebra: Factorization, AbstractQ, QRCompactWY, QRCompactWYQ, QRPack if VERSION >= v"1.8-" -LinearAlgebra.qr!(A::CuMatrix{T}) where T = CuQR(geqrf!(A::CuMatrix{T})...) -LinearAlgebra.qr!(A::StridedCuMatrix{T}) where T = CuQR(geqrf!(A::StridedCuMatrix{T})...) +LinearAlgebra.qr!(A::StridedCuMatrix{T}) where T = QR(geqrf!(A::StridedCuMatrix{T})...) # conversions CuMatrix(F::Union{QR,QRCompactWY}) = CuArray(AbstractArray(F)) @@ -234,7 +233,8 @@ end # avoid the generic similar fallback that returns a CPU array Base.similar(Q::CuQRPackedQ, ::Type{T}, dims::Dims{N}) where {T,N} = CuArray{T,N}(undef, dims) - + +LinearAlgebra.qr!(A::StridedCuMatrix{T}) where T = CuQR(geqrf!(A::StridedCuMatrix{T})...) Base.size(A::CuQR) = size(A.factors) Base.size(A::CuQRPackedQ, dim::Integer) = 0 < dim ? (dim <= 2 ? size(A.factors, 1) : 1) : throw(BoundsError()) From 762f3e098f5a785c7dd5b301fdbb946c1af74918 Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Fri, 3 Mar 2023 14:28:45 -0500 Subject: [PATCH 15/18] Adding tests for inplace qr of views --- test/cusolver/dense.jl | 138 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) diff --git a/test/cusolver/dense.jl b/test/cusolver/dense.jl index ef55d98dc1..cfa20d80a7 100644 --- a/test/cusolver/dense.jl +++ b/test/cusolver/dense.jl @@ -483,6 +483,20 @@ l_range = (1:l) .+ (l_sub_start -1) @test Array(d_q) ≈ Array(q) @test Array(d_r) ≈ Array(r) + + A = rand(elty, n, m) + d_A = CuArray(A) + d_q, d_r = qr!(d_A) + q, r = qr!(A) + @test collect(d_q) ≈ Array(q) + @test collect(d_r) ≈ Array(r) + A_view = view(A, m_subrange, n_subrange) + d_A_view = view(d_A, m_subrange, n_subrange) + d_q, d_r = qr!(d_A_view) + q, r = qr!(A_view) + @test collect(d_q) ≈ Array(q) + @test collect(d_r) ≈ Array(r) + A = rand(elty, n) # A and B are vectors d_A = CuArray(A) M = qr(A) @@ -503,6 +517,26 @@ l_range = (1:l) .+ (l_sub_start -1) d_B = view(d_B_large, n_range) @test collect(d_M \ d_B) ≈ M \ B + A = rand(elty, n) # A and B are vectors + d_A = CuArray(A) + M = qr!(A) + d_M = qr!(d_A) + B = rand(elty, n) + d_B = CuArray(B) + @test collect(d_M \ d_B) ≈ M \ B + A_view = view(A, n_subrange) + d_A_view = view(d_A, n_subrange) + M_view = qr!(A_view) + d_M_view = qr!(d_A_view) + B_view = view(B, n_subrange) + d_B_view = view(d_B, n_subrange) + @test collect(d_M_view \ d_B_view) ≈ M_view \ B_view + B_large = rand(elty, n_large) + B = view(B_large, n_range) + d_B_large = CuArray(B_large) + d_B = view(d_B_large, n_range) + @test collect(d_M \ d_B) ≈ M \ B + A = rand(elty, m, n) # A is a matrix and B,C is a vector d_A = CuArray(A) M = qr(A) @@ -555,6 +589,58 @@ l_range = (1:l) .+ (l_sub_start -1) @test collect(d_C' * d_M.R) ≈ (C' * M.R) @test collect(d_C' * d_M.R') ≈ (C' * M.R') + A = rand(elty, m, n) # A is a matrix and B,C is a vector + d_A = CuArray(A) + M = qr!(A) + d_M = qr!(d_A) + B = rand(elty, m) + d_B = CuArray(B) + C = rand(elty, n) + d_C = CuArray(C) + @test collect(d_M \ d_B) ≈ M \ B + @test collect(d_M.Q * d_B) ≈ (M.Q * B) + @test collect(d_M.Q' * d_B) ≈ (M.Q' * B) + @test collect(d_B' * d_M.Q) ≈ (B' * M.Q) + @test collect(d_B' * d_M.Q') ≈ (B' * M.Q') + @test collect(d_M.R * d_C) ≈ (M.R * C) + @test collect(d_M.R' * d_C) ≈ (M.R' * C) + @test collect(d_C' * d_M.R) ≈ (C' * M.R) + @test collect(d_C' * d_M.R') ≈ (C' * M.R') + A_view = view(A, m_subrange, n_subrange) + d_A_view = view(d_A, m_subrange, n_subrange) + M_view = qr!(A_view) + d_M_view = qr!(d_A_view) + B_view = view(B, m_subrange) + d_B_view = view(d_B, m_subrange) + C_view = view(C, n_subrange) + d_C_view = view(d_C, n_subrange) + @test collect(d_M_view \ d_B_view) ≈ M_view \ B_view + @test collect(d_M_view.Q * d_B_view) ≈ (M_view.Q * B_view) + @test collect(d_M_view.Q' * d_B_view) ≈ (M_view.Q' * B_view) + @test collect(d_B_view' * d_M_view.Q) ≈ (B_view' * M_view.Q) + @test collect(d_B_view' * d_M_view.Q') ≈ (B_view' * M_view.Q') + @test collect(d_M_view.R * d_C_view) ≈ (M_view.R * C_view) + @test collect(d_M_view.R' * d_C_view) ≈ (M_view.R' * C_view) + @test collect(d_C_view' * d_M_view.R) ≈ (C_view' * M_view.R) + @test collect(d_C_view' * d_M_view.R') ≈ (C_view' * M_view.R') + B_large = rand(elty, m_large) + B = view(B_large, m_range) + d_B_large = CuArray(B_large) + d_B = view(d_B_large, m_range) + C_large = rand(elty, n_large) + C = view(C_large, n_range) + d_C_large = CuArray(C_large) + d_C = view(d_C_large, n_range) + @test collect(d_M \ d_B) ≈ M \ B + @test collect(d_M.Q * d_B) ≈ (M.Q * B) + @test collect(d_M.Q' * d_B) ≈ (M.Q' * B) + @test collect(d_B' * d_M.Q) ≈ (B' * M.Q) + @test collect(d_B' * d_M.Q') ≈ (B' * M.Q') + @test collect(d_M.R * d_C) ≈ (M.R * C) + @test collect(d_M.R' * d_C) ≈ (M.R' * C) + @test collect(d_C' * d_M.R) ≈ (C' * M.R) + @test collect(d_C' * d_M.R') ≈ (C' * M.R') + A = rand(elty, m, n) # A and B,C are matrices d_A = CuArray(A) M = qr(A) @@ -607,6 +693,58 @@ l_range = (1:l) .+ (l_sub_start -1) @test collect(d_C' * d_M.R) ≈ (C' * M.R) @test collect(d_C' * d_M.R') ≈ (C' * M.R') + A = rand(elty, m, n) # A and B,C are matrices + d_A = CuArray(A) + M = qr!(A) + d_M = qr!(d_A) + B = rand(elty, m, l) #different second dimension to verify whether dimensions agree + d_B = CuArray(B) + C = rand(elty, n, l) #different second dimension to verify whether dimensions agree + d_C = CuArray(C) + @test collect(d_M \ d_B) ≈ (M \ B) + @test collect(d_M.Q * d_B) ≈ (M.Q * B) + @test collect(d_M.Q' * d_B) ≈ (M.Q' * B) + @test collect(d_B' * d_M.Q) ≈ (B' * M.Q) + @test collect(d_B' * d_M.Q') ≈ (B' * M.Q') + @test collect(d_M.R * d_C) ≈ (M.R * C) + @test collect(d_M.R' * d_C) ≈ (M.R' * C) + @test collect(d_C' * d_M.R) ≈ (C' * M.R) + @test collect(d_C' * d_M.R') ≈ (C' * M.R') + A_view = view(A, m_subrange, n_subrange) + d_A_view = view(d_A, m_subrange, n_subrange) + M_view = qr!(A_view) + d_M_view = qr!(d_A_view) + B_view = view(B, m_subrange, l_subrange) + d_B_view = view(d_B, m_subrange, l_subrange) + C_view = view(C, n_subrange, l_subrange) + d_C_view = view(d_C, n_subrange, l_subrange) + @test collect(d_M_view \ d_B_view) ≈ M_view \ B_view + @test collect(d_M_view.Q * d_B_view) ≈ (M_view.Q * B_view) + @test collect(d_M_view.Q' * d_B_view) ≈ (M_view.Q' * B_view) + @test collect(d_B_view' * d_M_view.Q) ≈ (B_view' * M_view.Q) + @test collect(d_B_view' * d_M_view.Q') ≈ (B_view' * M_view.Q') + @test collect(d_M_view.R * d_C_view) ≈ (M_view.R * C_view) + @test collect(d_M_view.R' * d_C_view) ≈ (M_view.R' * C_view) + @test collect(d_C_view' * d_M_view.R) ≈ (C_view' * M_view.R) + @test collect(d_C_view' * d_M_view.R') ≈ (C_view' * M_view.R') + B_large = rand(elty, m_large, l_large) + B = view(B_large, m_range, l_range) + d_B_large = CuArray(B_large) + d_B = view(d_B_large, m_range, l_range) + C_large = rand(elty, n_large, l_large) + C = view(C_large, n_range, l_range) + d_C_large = CuArray(C_large) + d_C = view(d_C_large, n_range, l_range) + @test collect(d_M \ d_B) ≈ M \ B + @test collect(d_M.Q * d_B) ≈ (M.Q * B) + @test collect(d_M.Q' * d_B) ≈ (M.Q' * B) + @test collect(d_B' * d_M.Q) ≈ (B' * M.Q) + @test collect(d_B' * d_M.Q') ≈ (B' * M.Q') + @test collect(d_M.R * d_C) ≈ (M.R * C) + @test collect(d_M.R' * d_C) ≈ (M.R' * C) + @test collect(d_C' * d_M.R) ≈ (C' * M.R) + @test collect(d_C' * d_M.R') ≈ (C' * M.R') + end @testset "potrsBatched!" begin From 31fb9084ce268b4e5a81fa95143f0875142d7fe6 Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Thu, 9 Mar 2023 19:09:36 -0500 Subject: [PATCH 16/18] Updating dependency on QR_views branch of GPUArrays --- Manifest.toml | 10 +++++++--- Project.toml | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 7c514e9a95..8885c645e3 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -83,7 +83,7 @@ uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" version = "0.9.3" [[Downloads]] -deps = ["ArgTools", "LibCURL", "NetworkOptions"] +deps = ["ArgTools","LibCURL", "NetworkOptions"] uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" [[ExprTools]] @@ -93,13 +93,17 @@ version = "0.1.8" [[GPUArrays]] deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"] -git-tree-sha1 = "a28f752ffab0ccd6660fc7af5ad1c9ad176f45f7" +git-tree-sha1 = "01f58909a0fc17890e586d1873426d5b9f211c8c" +repo-rev = "QR_views" +repo-url = "https://github.com/evelyne-ringoot/GPUArrays.jl" uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" version = "8.6.3" [[GPUArraysCore]] deps = ["Adapt"] -git-tree-sha1 = "1cd7f0af1aa58abc02ea1d872953a97359cb87fa" +git-tree-sha1 = "8b526908028c5d7a0e2ac340b99f6cbd170ae370" +repo-rev = "QR_views:lib/GPUArraysCore" +repo-url = "https://github.com/evelyne-ringoot/GPUArrays.jl" uuid = "46192b85-c4d5-4398-a991-12ede77f4527" version = "0.1.4" diff --git a/Project.toml b/Project.toml index 876af18df8..fb0b9bdc05 100644 --- a/Project.toml +++ b/Project.toml @@ -13,6 +13,7 @@ CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2" CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae" ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04" GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" +GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3" From 455c49b7495299b4260eaeb52f3423cb03f2dbc3 Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Thu, 9 Mar 2023 19:11:41 -0500 Subject: [PATCH 17/18] Adding support for in place QR of views --- lib/cusolver/dense.jl | 6 +++--- test/cusolver/dense.jl | 30 ++++++++---------------------- 2 files changed, 11 insertions(+), 25 deletions(-) diff --git a/lib/cusolver/dense.jl b/lib/cusolver/dense.jl index e06073dfb3..b23e5403ff 100644 --- a/lib/cusolver/dense.jl +++ b/lib/cusolver/dense.jl @@ -245,9 +245,9 @@ for (bname, fname, elty) in ((:cusolverDnSormqr_bufferSize, :cusolverDnSormqr, : @eval begin function ormqr!(side::Char, trans::Char, - A::CuMatrix{$elty}, - tau::CuVector{$elty}, - C::CuVecOrMat{$elty}) + A::StridedCuMatrix{$elty}, + tau::StridedCuVector{$elty}, + C::StridedCuVecOrMat{$elty}) m,n = ndims(C) == 2 ? size(C) : (size(C, 1), 1) mA = size(A, 1) k = length(tau) diff --git a/test/cusolver/dense.jl b/test/cusolver/dense.jl index cfa20d80a7..42c00f7a99 100644 --- a/test/cusolver/dense.jl +++ b/test/cusolver/dense.jl @@ -490,12 +490,14 @@ l_range = (1:l) .+ (l_sub_start -1) q, r = qr!(A) @test collect(d_q) ≈ Array(q) @test collect(d_r) ≈ Array(r) + A = rand(elty, n, m) + d_A = CuArray(A) A_view = view(A, m_subrange, n_subrange) d_A_view = view(d_A, m_subrange, n_subrange) d_q, d_r = qr!(d_A_view) q, r = qr!(A_view) - @test collect(d_q) ≈ Array(q) - @test collect(d_r) ≈ Array(r) + @test collect(d_q) ≈ collect(q) + @test collect(d_r) ≈ collect(r) A = rand(elty, n) # A and B are vectors d_A = CuArray(A) @@ -517,26 +519,6 @@ l_range = (1:l) .+ (l_sub_start -1) d_B = view(d_B_large, n_range) @test collect(d_M \ d_B) ≈ M \ B - A = rand(elty, n) # A and B are vectors - d_A = CuArray(A) - M = qr!(A) - d_M = qr!(d_A) - B = rand(elty, n) - d_B = CuArray(B) - @test collect(d_M \ d_B) ≈ M \ B - A_view = view(A, n_subrange) - d_A_view = view(d_A, n_subrange) - M_view = qr!(A_view) - d_M_view = qr!(d_A_view) - B_view = view(B, n_subrange) - d_B_view = view(d_B, n_subrange) - @test collect(d_M_view \ d_B_view) ≈ M_view \ B_view - B_large = rand(elty, n_large) - B = view(B_large, n_range) - d_B_large = CuArray(B_large) - d_B = view(d_B_large, n_range) - @test collect(d_M \ d_B) ≈ M \ B - A = rand(elty, m, n) # A is a matrix and B,C is a vector d_A = CuArray(A) M = qr(A) @@ -606,6 +588,8 @@ l_range = (1:l) .+ (l_sub_start -1) @test collect(d_M.R' * d_C) ≈ (M.R' * C) @test collect(d_C' * d_M.R) ≈ (C' * M.R) @test collect(d_C' * d_M.R') ≈ (C' * M.R') + A = rand(elty, m, n) + d_A = CuArray(A) A_view = view(A, m_subrange, n_subrange) d_A_view = view(d_A, m_subrange, n_subrange) M_view = qr!(A_view) @@ -710,6 +694,8 @@ l_range = (1:l) .+ (l_sub_start -1) @test collect(d_M.R' * d_C) ≈ (M.R' * C) @test collect(d_C' * d_M.R) ≈ (C' * M.R) @test collect(d_C' * d_M.R') ≈ (C' * M.R') + A = rand(elty, m, n) + d_A = CuArray(A) A_view = view(A, m_subrange, n_subrange) d_A_view = view(d_A, m_subrange, n_subrange) M_view = qr!(A_view) From d9ca07a8fc63e56702d292f4f408a1efa51c4eeb Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Thu, 9 Mar 2023 19:12:15 -0500 Subject: [PATCH 18/18] Adding support for in place QR of views in LinearAlgebra --- lib/cusolver/linalg.jl | 93 +++++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 42 deletions(-) diff --git a/lib/cusolver/linalg.jl b/lib/cusolver/linalg.jl index dfc6bd80b9..7f702a68e8 100644 --- a/lib/cusolver/linalg.jl +++ b/lib/cusolver/linalg.jl @@ -20,12 +20,15 @@ _copywitheltype(::Type{T}, As...) where {T} = map(A -> copyto!(similar(A, T), A) # matrix division -const CuMatOrAdj{T} = Union{CuMatrix, - LinearAlgebra.Adjoint{T, <:CuMatrix{T}}, - LinearAlgebra.Transpose{T, <:CuMatrix{T}}} -const CuOrAdj{T} = Union{CuVecOrMat, - LinearAlgebra.Adjoint{T, <:CuVecOrMat{T}}, - LinearAlgebra.Transpose{T, <:CuVecOrMat{T}}} +const CuMatOrAdj{T} = Union{StridedCuMatrix, + LinearAlgebra.Adjoint{T, <:StridedCuMatrix{T}}, + LinearAlgebra.Transpose{T, <:StridedCuMatrix{T}}} +const CuOrAdj{T} = Union{StridedCuVector, + LinearAlgebra.Adjoint{T, <:StridedCuVector{T}}, + LinearAlgebra.Transpose{T, <:StridedCuVector{T}}, + StridedCuMatrix, + LinearAlgebra.Adjoint{T, <:StridedCuMatrix{T}}, + LinearAlgebra.Transpose{T, <:StridedCuMatrix{T}}} function Base.:\(_A::CuMatOrAdj, _B::CuOrAdj) A, B = copy_cublasfloat(_A, _B) @@ -101,15 +104,18 @@ using LinearAlgebra: Factorization, AbstractQ, QRCompactWY, QRCompactWYQ, QRPack if VERSION >= v"1.8-" + + LinearAlgebra.qr!(A::StridedCuMatrix{T}) where T = QR(geqrf!(A::StridedCuMatrix{T})...) + # conversions CuMatrix(F::Union{QR,QRCompactWY}) = CuArray(AbstractArray(F)) CuArray(F::Union{QR,QRCompactWY}) = CuMatrix(F) CuMatrix(F::QRPivoted) = CuArray(AbstractArray(F)) CuArray(F::QRPivoted) = CuMatrix(F) -function LinearAlgebra.ldiv!(_qr::QR, b::CuVector) +function LinearAlgebra.ldiv!(_qr::QR, b::StridedCuVector) m,n = size(_qr) _x = UpperTriangular(_qr.R[1:min(m,n), 1:n]) \ ((_qr.Q' * b)[1:n]) b[1:n] .= _x @@ -117,7 +123,7 @@ function LinearAlgebra.ldiv!(_qr::QR, b::CuVector) return b[1:n] end -function LinearAlgebra.ldiv!(_qr::QR, B::CuMatrix) +function LinearAlgebra.ldiv!(_qr::QR, B::StridedCuMatrix) m,n = size(_qr) _x = UpperTriangular(_qr.R[1:min(m,n), 1:n]) \ ((_qr.Q' * B)[1:n, 1:size(B, 2)]) B[1:n, 1:size(B, 2)] .= _x @@ -125,7 +131,7 @@ function LinearAlgebra.ldiv!(_qr::QR, B::CuMatrix) return B[1:n, 1:size(B, 2)] end -function LinearAlgebra.ldiv!(x::CuArray, _qr::QR, b::CuArray) +function LinearAlgebra.ldiv!(x::StridedCuArray, _qr::QR, b::StridedCuArray) _x = ldiv!(_qr, b) x .= vec(_x) unsafe_free!(_x) @@ -146,9 +152,11 @@ CuMatrix{T}(Q::QRCompactWYQ) where {T} = error("QRCompactWY format is not suppor Matrix{T}(Q::QRPackedQ{S,<:CuArray,<:CuArray}) where {T,S} = Array(CuMatrix{T}(Q)) Matrix{T}(Q::QRCompactWYQ{S,<:CuArray,<:CuArray}) where {T,S} = Array(CuMatrix{T}(Q)) + + # extracting the full matrix can be done with `collect` (which defaults to `Array`) -function Base.collect(src::Union{QRPackedQ{<:Any,<:CuArray,<:CuArray}, - QRCompactWYQ{<:Any,<:CuArray,<:CuArray}}) +function Base.collect(src::Union{QRPackedQ{<:Any,<:StridedCuArray,<:StridedCuArray}, + QRCompactWYQ{<:Any,<:StridedCuArray,<:StridedCuArray}}) dest = similar(src) copyto!(dest, I) lmul!(src, dest) @@ -156,61 +164,62 @@ function Base.collect(src::Union{QRPackedQ{<:Any,<:CuArray,<:CuArray}, end # avoid the generic similar fallback that returns a CPU array -Base.similar(Q::Union{QRPackedQ{<:Any,<:CuArray,<:CuArray}, - QRCompactWYQ{<:Any,<:CuArray,<:CuArray}}, +Base.similar(Q::Union{QRPackedQ{<:Any,<:StridedCuArray,<:StridedCuArray}, + QRCompactWYQ{<:Any,<:StridedCuArray,<:StridedCuArray}}, ::Type{T}, dims::Dims{N}) where {T,N} = CuArray{T,N}(undef, dims) -function Base.getindex(Q::QRPackedQ{<:Any, <:CuArray}, ::Colon, j::Int) +function Base.getindex(Q::QRPackedQ{<:Any, <:StridedCuArray}, ::Colon, j::Int) y = CUDA.zeros(eltype(Q), size(Q, 2)) y[j] = 1 lmul!(Q, y) end + # multiplication by Q -LinearAlgebra.lmul!(A::QRPackedQ{T,<:CuArray,<:CuArray}, +LinearAlgebra.lmul!(A::QRPackedQ{T,<:StridedCuArray,<:StridedCuArray}, B::CuVecOrMat{T}) where {T<:BlasFloat} = ormqr!('L', 'N', A.factors, A.τ, B) -LinearAlgebra.lmul!(adjA::Adjoint{T,<:QRPackedQ{T,<:CuArray,<:CuArray}}, +LinearAlgebra.lmul!(adjA::Adjoint{T,<:QRPackedQ{T,<:StridedCuArray,<:StridedCuArray}}, B::CuVecOrMat{T}) where {T<:BlasReal} = ormqr!('L', 'T', parent(adjA).factors, parent(adjA).τ, B) -LinearAlgebra.lmul!(adjA::Adjoint{T,<:QRPackedQ{T,<:CuArray,<:CuArray}}, +LinearAlgebra.lmul!(adjA::Adjoint{T,<:QRPackedQ{T,<:StridedCuArray,<:StridedCuArray}}, B::CuVecOrMat{T}) where {T<:BlasComplex} = ormqr!('L', 'C', parent(adjA).factors, parent(adjA).τ, B) -LinearAlgebra.lmul!(trA::Transpose{T,<:QRPackedQ{T,<:CuArray,<:CuArray}}, +LinearAlgebra.lmul!(trA::Transpose{T,<:QRPackedQ{T,<:StridedCuArray,<:StridedCuArray}}, B::CuVecOrMat{T}) where {T<:BlasFloat} = ormqr!('L', 'T', parent(trA).factors, parent(trA).τ, B) LinearAlgebra.rmul!(A::CuVecOrMat{T}, - B::QRPackedQ{T,<:CuArray,<:CuArray}) where {T<:BlasFloat} = + B::QRPackedQ{T,<:StridedCuArray,<:StridedCuArray}) where {T<:BlasFloat} = ormqr!('R', 'N', B.factors, B.τ, A) LinearAlgebra.rmul!(A::CuVecOrMat{T}, - adjB::Adjoint{<:Any,<:QRPackedQ{T,<:CuArray,<:CuArray}}) where {T<:BlasReal} = + adjB::Adjoint{<:Any,<:QRPackedQ{T,<:StridedCuArray,<:StridedCuArray}}) where {T<:BlasReal} = ormqr!('R', 'T', parent(adjB).factors, parent(adjB).τ, A) LinearAlgebra.rmul!(A::CuVecOrMat{T}, - adjB::Adjoint{<:Any,<:QRPackedQ{T,<:CuArray,<:CuArray}}) where {T<:BlasComplex} = + adjB::Adjoint{<:Any,<:QRPackedQ{T,<:StridedCuArray,<:StridedCuArray}}) where {T<:BlasComplex} = ormqr!('R', 'C', parent(adjB).factors, parent(adjB).τ, A) LinearAlgebra.rmul!(A::CuVecOrMat{T}, - trA::Transpose{<:Any,<:QRPackedQ{T,<:CuArray,<:CuArray}}) where {T<:BlasFloat} = + trA::Transpose{<:Any,<:QRPackedQ{T,<:StridedCuArray,<:StridedCuArray}}) where {T<:BlasFloat} = ormqr!('R', 'T', parent(trA).factors, parent(adjB).τ, A) else struct CuQR{T} <: Factorization{T} - factors::CuMatrix - τ::CuVector{T} - CuQR{T}(factors::CuMatrix{T}, τ::CuVector{T}) where {T} = new(factors, τ) + factors::StridedCuMatrix + τ::StridedCuVector{T} + CuQR{T}(factors::StridedCuMatrix{T}, τ::StridedCuVector{T}) where {T} = new(factors, τ) end struct CuQRPackedQ{T} <: AbstractQ{T} - factors::CuMatrix{T} - τ::CuVector{T} - CuQRPackedQ{T}(factors::CuMatrix{T}, τ::CuVector{T}) where {T} = new(factors, τ) + factors::StridedCuMatrix{T} + τ::StridedCuVector{T} + CuQRPackedQ{T}(factors::StridedCuMatrix{T}, τ::StridedCuVector{T}) where {T} = new(factors, τ) end -CuQR(factors::CuMatrix{T}, τ::CuVector{T}) where {T} = +CuQR(factors::StridedCuMatrix{T}, τ::StridedCuVector{T}) where {T} = CuQR{T}(factors, τ) -CuQRPackedQ(factors::CuMatrix{T}, τ::CuVector{T}) where {T} = +CuQRPackedQ(factors::StridedCuMatrix{T}, τ::StridedCuVector{T}) where {T} = CuQRPackedQ{T}(factors, τ) # AbstractQ's `size` is the size of the full matrix, @@ -245,7 +254,7 @@ Base.Matrix(A::CuQRPackedQ) = Matrix(CuMatrix(A)) function Base.getproperty(A::CuQR, d::Symbol) m, n = size(getfield(A, :factors)) if d == :R - return triu!(A.factors[1:min(m, n), 1:n]) + return triu!(view(A.factors,1:min(m, n), 1:n)) elseif d == :Q return CuQRPackedQ(A.factors, A.τ) else @@ -259,25 +268,25 @@ Base.iterate(S::CuQR, ::Val{:R}) = (S.R, Val(:done)) Base.iterate(S::CuQR, ::Val{:done}) = nothing # Apply changes Q from the left -LinearAlgebra.lmul!(A::CuQRPackedQ{T}, B::CuVecOrMat{T}) where {T<:BlasFloat} = +LinearAlgebra.lmul!(A::CuQRPackedQ{T}, B::StridedCuVecOrMat{T}) where {T<:BlasFloat} = ormqr!('L', 'N', A.factors, A.τ, B) -LinearAlgebra.lmul!(adjA::Adjoint{T,<:CuQRPackedQ{T}}, B::CuVecOrMat{T}) where {T<:BlasReal} = +LinearAlgebra.lmul!(adjA::Adjoint{T,<:CuQRPackedQ{T}}, B::StridedCuVecOrMat{T}) where {T<:BlasReal} = ormqr!('L', 'T', parent(adjA).factors, parent(adjA).τ, B) -LinearAlgebra.lmul!(adjA::Adjoint{T,<:CuQRPackedQ{T}}, B::CuVecOrMat{T}) where {T<:BlasComplex} = +LinearAlgebra.lmul!(adjA::Adjoint{T,<:CuQRPackedQ{T}}, B::StridedCuVecOrMat{T}) where {T<:BlasComplex} = ormqr!('L', 'C', parent(adjA).factors, parent(adjA).τ, B) -LinearAlgebra.lmul!(trA::Transpose{T,<:CuQRPackedQ{T}}, B::CuVecOrMat{T}) where {T<:BlasFloat} = +LinearAlgebra.lmul!(trA::Transpose{T,<:CuQRPackedQ{T}}, B::StridedCuVecOrMat{T}) where {T<:BlasFloat} = ormqr!('L', 'T', parent(trA).factors, parent(trA).τ, B) # Apply changes Q from the right -LinearAlgebra.rmul!(A::CuVecOrMat{T}, B::CuQRPackedQ{T}) where {T<:BlasFloat} = +LinearAlgebra.rmul!(A::StridedCuVecOrMat{T}, B::CuQRPackedQ{T}) where {T<:BlasFloat} = ormqr!('R', 'N', B.factors, B.τ, A) -LinearAlgebra.rmul!(A::CuVecOrMat{T}, +LinearAlgebra.rmul!(A::StridedCuVecOrMat{T}, adjB::Adjoint{<:Any,<:CuQRPackedQ{T}}) where {T<:BlasReal} = ormqr!('R', 'T', parent(adjB).factors, parent(adjB).τ, A) -LinearAlgebra.rmul!(A::CuVecOrMat{T}, +LinearAlgebra.rmul!(A::StridedCuVecOrMat{T}, adjB::Adjoint{<:Any,<:CuQRPackedQ{T}}) where {T<:BlasComplex} = ormqr!('R', 'C', parent(adjB).factors, parent(adjB).τ, A) -LinearAlgebra.rmul!(A::CuVecOrMat{T}, +LinearAlgebra.rmul!(A::StridedCuVecOrMat{T}, trA::Transpose{<:Any,<:CuQRPackedQ{T}}) where {T<:BlasFloat} = ormqr!('R', 'T', parent(trA).factors, parent(adjB).τ, A) @@ -300,7 +309,7 @@ end LinearAlgebra.det(Q::CuQRPackedQ{<:Real}) = isodd(count(!iszero, Q.τ)) ? -1 : 1 LinearAlgebra.det(Q::CuQRPackedQ) = prod(τ -> iszero(τ) ? one(τ) : -sign(τ)^2, Q.τ) -function LinearAlgebra.ldiv!(_qr::CuQR, b::CuVector) +function LinearAlgebra.ldiv!(_qr::CuQR, b::StridedCuVector) m,n = size(_qr) _x = UpperTriangular(_qr.R[1:min(m,n), 1:n]) \ ((_qr.Q' * b)[1:n]) b[1:n] .= _x @@ -308,7 +317,7 @@ function LinearAlgebra.ldiv!(_qr::CuQR, b::CuVector) return b[1:n] end -function LinearAlgebra.ldiv!(_qr::CuQR, B::CuMatrix) +function LinearAlgebra.ldiv!(_qr::CuQR, B::StridedCuMatrix) m,n = size(_qr) _x = UpperTriangular(_qr.R[1:min(m,n), 1:n]) \ ((_qr.Q' * B)[1:n, 1:size(B, 2)]) B[1:n, 1:size(B, 2)] .= _x @@ -316,7 +325,7 @@ function LinearAlgebra.ldiv!(_qr::CuQR, B::CuMatrix) return B[1:n, 1:size(B, 2)] end -function LinearAlgebra.ldiv!(x::CuArray,_qr::CuQR, b::CuArray) +function LinearAlgebra.ldiv!(x::StridedCuArray,_qr::CuQR, b::StridedCuArray) _x = ldiv!(_qr, b) x .= vec(_x) unsafe_free!(_x)