diff --git a/lib/cusolver/dense.jl b/lib/cusolver/dense.jl index 27712a0fd5..0604623a56 100644 --- a/lib/cusolver/dense.jl +++ b/lib/cusolver/dense.jl @@ -248,9 +248,9 @@ for (bname, fname, elty) in ((:cusolverDnSormqr_bufferSize, :cusolverDnSormqr, : @eval begin function ormqr!(side::Char, trans::Char, - A::CuMatrix{$elty}, - tau::CuVector{$elty}, - C::CuVecOrMat{$elty}) + A::StridedCuMatrix{$elty}, + tau::StridedCuVector{$elty}, + C::StridedCuVecOrMat{$elty}) # Support transa = 'C' for real matrices trans = $elty <: Real && trans == 'C' ? 'T' : trans diff --git a/lib/cusolver/linalg.jl b/lib/cusolver/linalg.jl index c380e5f37b..a20ddb52b3 100644 --- a/lib/cusolver/linalg.jl +++ b/lib/cusolver/linalg.jl @@ -20,12 +20,15 @@ _copywitheltype(::Type{T}, As...) where {T} = map(A -> copyto!(similar(A, T), A) # matrix division -const CuMatOrAdj{T} = Union{CuMatrix{T}, - LinearAlgebra.Adjoint{T, <:CuMatrix{T}}, - LinearAlgebra.Transpose{T, <:CuMatrix{T}}} -const CuOrAdj{T} = Union{CuVecOrMat{T}, - LinearAlgebra.Adjoint{T, <:CuVecOrMat{T}}, - LinearAlgebra.Transpose{T, <:CuVecOrMat{T}}} +const CuMatOrAdj{T} = Union{StridedCuMatrix, + LinearAlgebra.Adjoint{T, <:StridedCuMatrix{T}}, + LinearAlgebra.Transpose{T, <:StridedCuMatrix{T}}} +const CuOrAdj{T} = Union{StridedCuVector, + LinearAlgebra.Adjoint{T, <:StridedCuVector{T}}, + LinearAlgebra.Transpose{T, <:StridedCuVector{T}}, + StridedCuMatrix, + LinearAlgebra.Adjoint{T, <:StridedCuMatrix{T}}, + LinearAlgebra.Transpose{T, <:StridedCuMatrix{T}}} function Base.:\(_A::CuMatOrAdj, _B::CuOrAdj) A, B = copy_cublasfloat(_A, _B) @@ -129,7 +132,10 @@ using LinearAlgebra: Factorization, AbstractQ, QRCompactWY, QRCompactWYQ, QRPack ## QR -LinearAlgebra.qr!(A::CuMatrix{T}) where T = QR(geqrf!(A::CuMatrix{T})...) + + +LinearAlgebra.qr!(A::StridedCuMatrix{T}) where T = QR(geqrf!(A::StridedCuMatrix{T})...) + # conversions CuMatrix(F::Union{QR,QRCompactWY}) = CuArray(AbstractArray(F)) @@ -137,7 +143,7 @@ CuArray(F::Union{QR,QRCompactWY}) = CuMatrix(F) CuMatrix(F::QRPivoted) = CuArray(AbstractArray(F)) CuArray(F::QRPivoted) = CuMatrix(F) -function LinearAlgebra.ldiv!(_qr::QR, b::CuVector) +function LinearAlgebra.ldiv!(_qr::QR, b::StridedCuVector) m,n = size(_qr) _x = UpperTriangular(_qr.R[1:min(m,n), 1:n]) \ ((_qr.Q' * b)[1:n]) b[1:n] .= _x @@ -145,7 +151,7 @@ function LinearAlgebra.ldiv!(_qr::QR, b::CuVector) return b[1:n] end -function LinearAlgebra.ldiv!(_qr::QR, B::CuMatrix) +function LinearAlgebra.ldiv!(_qr::QR, B::StridedCuMatrix) m,n = size(_qr) _x = UpperTriangular(_qr.R[1:min(m,n), 1:n]) \ ((_qr.Q' * B)[1:n, 1:size(B, 2)]) B[1:n, 1:size(B, 2)] .= _x @@ -153,7 +159,7 @@ function LinearAlgebra.ldiv!(_qr::QR, B::CuMatrix) return B[1:n, 1:size(B, 2)] end -function LinearAlgebra.ldiv!(x::CuArray, _qr::QR, b::CuArray) +function LinearAlgebra.ldiv!(x::StridedCuArray, _qr::QR, b::StridedCuArray) _x = ldiv!(_qr, b) x .= vec(_x) unsafe_free!(_x) @@ -174,10 +180,12 @@ CuMatrix{T}(Q::QRCompactWYQ) where {T} = error("QRCompactWY format is not suppor Matrix{T}(Q::QRPackedQ{S,<:CuArray,<:CuArray}) where {T,S} = Array(CuMatrix{T}(Q)) Matrix{T}(Q::QRCompactWYQ{S,<:CuArray,<:CuArray}) where {T,S} = Array(CuMatrix{T}(Q)) + + if VERSION < v"1.10-" # extracting the full matrix can be done with `collect` (which defaults to `Array`) -function Base.collect(src::Union{QRPackedQ{<:Any,<:CuArray,<:CuArray}, - QRCompactWYQ{<:Any,<:CuArray,<:CuArray}}) +function Base.collect(src::Union{QRPackedQ{<:Any,<:StridedCuArray,<:StridedCuArray}, + QRCompactWYQ{<:Any,<:StridedCuArray,<:StridedCuArray}}) dest = similar(src) copyto!(dest, I) lmul!(src, dest) @@ -185,46 +193,175 @@ function Base.collect(src::Union{QRPackedQ{<:Any,<:CuArray,<:CuArray}, end # avoid the generic similar fallback that returns a CPU array -Base.similar(Q::Union{QRPackedQ{<:Any,<:CuArray,<:CuArray}, - QRCompactWYQ{<:Any,<:CuArray,<:CuArray}}, +Base.similar(Q::Union{QRPackedQ{<:Any,<:StridedCuArray,<:StridedCuArray}, + QRCompactWYQ{<:Any,<:StridedCuArray,<:StridedCuArray}}, ::Type{T}, dims::Dims{N}) where {T,N} = CuArray{T,N}(undef, dims) -end - -function Base.getindex(Q::QRPackedQ{<:Any, <:CuArray}, ::Colon, j::Int) +function Base.getindex(Q::QRPackedQ{<:Any, <:StridedCuArray}, ::Colon, j::Int) y = CUDA.zeros(eltype(Q), size(Q, 2)) y[j] = 1 lmul!(Q, y) end + # multiplication by Q -LinearAlgebra.lmul!(A::QRPackedQ{T,<:CuArray,<:CuArray}, +LinearAlgebra.lmul!(A::QRPackedQ{T,<:StridedCuArray,<:StridedCuArray}, B::CuVecOrMat{T}) where {T<:BlasFloat} = ormqr!('L', 'N', A.factors, A.τ, B) -LinearAlgebra.lmul!(adjA::Adjoint{T,<:QRPackedQ{T,<:CuArray,<:CuArray}}, +LinearAlgebra.lmul!(adjA::Adjoint{T,<:QRPackedQ{T,<:StridedCuArray,<:StridedCuArray}}, B::CuVecOrMat{T}) where {T<:BlasReal} = ormqr!('L', 'T', parent(adjA).factors, parent(adjA).τ, B) -LinearAlgebra.lmul!(adjA::Adjoint{T,<:QRPackedQ{T,<:CuArray,<:CuArray}}, +LinearAlgebra.lmul!(adjA::Adjoint{T,<:QRPackedQ{T,<:StridedCuArray,<:StridedCuArray}}, B::CuVecOrMat{T}) where {T<:BlasComplex} = ormqr!('L', 'C', parent(adjA).factors, parent(adjA).τ, B) -LinearAlgebra.lmul!(trA::Transpose{T,<:QRPackedQ{T,<:CuArray,<:CuArray}}, +LinearAlgebra.lmul!(trA::Transpose{T,<:QRPackedQ{T,<:StridedCuArray,<:StridedCuArray}}, B::CuVecOrMat{T}) where {T<:BlasFloat} = ormqr!('L', 'T', parent(trA).factors, parent(trA).τ, B) LinearAlgebra.rmul!(A::CuVecOrMat{T}, - B::QRPackedQ{T,<:CuArray,<:CuArray}) where {T<:BlasFloat} = + B::QRPackedQ{T,<:StridedCuArray,<:StridedCuArray}) where {T<:BlasFloat} = ormqr!('R', 'N', B.factors, B.τ, A) LinearAlgebra.rmul!(A::CuVecOrMat{T}, - adjB::Adjoint{<:Any,<:QRPackedQ{T,<:CuArray,<:CuArray}}) where {T<:BlasReal} = + adjB::Adjoint{<:Any,<:QRPackedQ{T,<:StridedCuArray,<:StridedCuArray}}) where {T<:BlasReal} = ormqr!('R', 'T', parent(adjB).factors, parent(adjB).τ, A) LinearAlgebra.rmul!(A::CuVecOrMat{T}, - adjB::Adjoint{<:Any,<:QRPackedQ{T,<:CuArray,<:CuArray}}) where {T<:BlasComplex} = + adjB::Adjoint{<:Any,<:QRPackedQ{T,<:StridedCuArray,<:StridedCuArray}}) where {T<:BlasComplex} = ormqr!('R', 'C', parent(adjB).factors, parent(adjB).τ, A) LinearAlgebra.rmul!(A::CuVecOrMat{T}, - trA::Transpose{<:Any,<:QRPackedQ{T,<:CuArray,<:CuArray}}) where {T<:BlasFloat} = + trA::Transpose{<:Any,<:QRPackedQ{T,<:StridedCuArray,<:StridedCuArray}}) where {T<:BlasFloat} = + ormqr!('R', 'T', parent(trA).factors, parent(adjB).τ, A) + +else + +struct CuQR{T} <: Factorization{T} + factors::StridedCuMatrix + τ::StridedCuVector{T} + CuQR{T}(factors::StridedCuMatrix{T}, τ::StridedCuVector{T}) where {T} = new(factors, τ) +end + +struct CuQRPackedQ{T} <: AbstractQ{T} + factors::StridedCuMatrix{T} + τ::StridedCuVector{T} + CuQRPackedQ{T}(factors::StridedCuMatrix{T}, τ::StridedCuVector{T}) where {T} = new(factors, τ) +end + +CuQR(factors::StridedCuMatrix{T}, τ::StridedCuVector{T}) where {T} = + CuQR{T}(factors, τ) +CuQRPackedQ(factors::StridedCuMatrix{T}, τ::StridedCuVector{T}) where {T} = + CuQRPackedQ{T}(factors, τ) + +# AbstractQ's `size` is the size of the full matrix, +# while `Matrix(Q)` only gives the compact Q. +# See JuliaLang/julia#26591 and JuliaGPU/CUDA.jl#969. +CuMatrix{T}(Q::AbstractQ{S}) where {T,S} = convert(CuArray{T}, Matrix(Q)) +CuMatrix{T, B}(Q::AbstractQ{S}) where {T, B, S} = CuMatrix{T}(Q) +CuMatrix(Q::AbstractQ{T}) where {T} = CuMatrix{T}(Q) +CuArray{T}(Q::AbstractQ) where {T} = CuMatrix{T}(Q) +CuArray(Q::AbstractQ) = CuMatrix(Q) + +# extracting the full matrix can be done with `collect` (which defaults to `Array`) +function Base.collect(src::CuQRPackedQ) + dest = similar(src) + copyto!(dest, I) + lmul!(src, dest) + collect(dest) +end + +# avoid the generic similar fallback that returns a CPU array +Base.similar(Q::CuQRPackedQ, ::Type{T}, dims::Dims{N}) where {T,N} = + CuArray{T,N}(undef, dims) + +LinearAlgebra.qr!(A::StridedCuMatrix{T}) where T = CuQR(geqrf!(A::StridedCuMatrix{T})...) + +Base.size(A::CuQR) = size(A.factors) +Base.size(A::CuQRPackedQ, dim::Integer) = 0 < dim ? (dim <= 2 ? size(A.factors, 1) : 1) : throw(BoundsError()) +CUDA.CuMatrix(A::CuQRPackedQ) = orgqr!(copy(A.factors), A.τ) +CUDA.CuArray(A::CuQRPackedQ) = CuMatrix(A) +Base.Matrix(A::CuQRPackedQ) = Matrix(CuMatrix(A)) + +function Base.getproperty(A::CuQR, d::Symbol) + m, n = size(getfield(A, :factors)) + if d == :R + return triu!(view(A.factors,1:min(m, n), 1:n)) + elseif d == :Q + return CuQRPackedQ(A.factors, A.τ) + else + getfield(A, d) + end +end + +# iteration for destructuring into components +Base.iterate(S::CuQR) = (S.Q, Val(:R)) +Base.iterate(S::CuQR, ::Val{:R}) = (S.R, Val(:done)) +Base.iterate(S::CuQR, ::Val{:done}) = nothing + +# Apply changes Q from the left +LinearAlgebra.lmul!(A::CuQRPackedQ{T}, B::StridedCuVecOrMat{T}) where {T<:BlasFloat} = + ormqr!('L', 'N', A.factors, A.τ, B) +LinearAlgebra.lmul!(adjA::Adjoint{T,<:CuQRPackedQ{T}}, B::StridedCuVecOrMat{T}) where {T<:BlasReal} = + ormqr!('L', 'T', parent(adjA).factors, parent(adjA).τ, B) +LinearAlgebra.lmul!(adjA::Adjoint{T,<:CuQRPackedQ{T}}, B::StridedCuVecOrMat{T}) where {T<:BlasComplex} = + ormqr!('L', 'C', parent(adjA).factors, parent(adjA).τ, B) +LinearAlgebra.lmul!(trA::Transpose{T,<:CuQRPackedQ{T}}, B::StridedCuVecOrMat{T}) where {T<:BlasFloat} = + ormqr!('L', 'T', parent(trA).factors, parent(trA).τ, B) + +# Apply changes Q from the right +LinearAlgebra.rmul!(A::StridedCuVecOrMat{T}, B::CuQRPackedQ{T}) where {T<:BlasFloat} = + ormqr!('R', 'N', B.factors, B.τ, A) +LinearAlgebra.rmul!(A::StridedCuVecOrMat{T}, + adjB::Adjoint{<:Any,<:CuQRPackedQ{T}}) where {T<:BlasReal} = + ormqr!('R', 'T', parent(adjB).factors, parent(adjB).τ, A) +LinearAlgebra.rmul!(A::StridedCuVecOrMat{T}, + adjB::Adjoint{<:Any,<:CuQRPackedQ{T}}) where {T<:BlasComplex} = + ormqr!('R', 'C', parent(adjB).factors, parent(adjB).τ, A) +LinearAlgebra.rmul!(A::StridedCuVecOrMat{T}, + trA::Transpose{<:Any,<:CuQRPackedQ{T}}) where {T<:BlasFloat} = ormqr!('R', 'T', parent(trA).factors, parent(adjB).τ, A) +function Base.getindex(A::CuQRPackedQ{T}, i::Int, j::Int) where {T} + assertscalar("CuQRPackedQ getindex") + x = CUDA.zeros(T, size(A, 2)) + x[j] = 1 + lmul!(A, x) + return x[i] +end + +function Base.show(io::IO, F::CuQR) + println(io, "$(typeof(F)) with factors Q and R:") + show(io, F.Q) + println(io) + show(io, F.R) +end + +# https://github.com/JuliaLang/julia/pull/32887 +LinearAlgebra.det(Q::CuQRPackedQ{<:Real}) = isodd(count(!iszero, Q.τ)) ? -1 : 1 +LinearAlgebra.det(Q::CuQRPackedQ) = prod(τ -> iszero(τ) ? one(τ) : -sign(τ)^2, Q.τ) + +function LinearAlgebra.ldiv!(_qr::CuQR, b::StridedCuVector) + m,n = size(_qr) + _x = UpperTriangular(_qr.R[1:min(m,n), 1:n]) \ ((_qr.Q' * b)[1:n]) + b[1:n] .= _x + unsafe_free!(_x) + return b[1:n] +end + +function LinearAlgebra.ldiv!(_qr::CuQR, B::StridedCuMatrix) + m,n = size(_qr) + _x = UpperTriangular(_qr.R[1:min(m,n), 1:n]) \ ((_qr.Q' * B)[1:n, 1:size(B, 2)]) + B[1:n, 1:size(B, 2)] .= _x + unsafe_free!(_x) + return B[1:n, 1:size(B, 2)] +end + +function LinearAlgebra.ldiv!(x::StridedCuArray,_qr::CuQR, b::StridedCuArray) + _x = ldiv!(_qr, b) + x .= vec(_x) + unsafe_free!(_x) + return x +end + +end ## SVD diff --git a/test/libraries/cusolver/dense.jl b/test/libraries/cusolver/dense.jl index 060118d268..335adea7d9 100644 --- a/test/libraries/cusolver/dense.jl +++ b/test/libraries/cusolver/dense.jl @@ -8,6 +8,23 @@ n = 10 l = 13 k = 1 +m_sub_length = 7 +n_sub_length = 3 +l_sub_length= 11 +m_sub_start = 4 +n_sub_start = 2 +l_sub_start = 1 +m_subrange = (1:m_sub_length) .+ (m_sub_start-1) +n_subrange = (1:n_sub_length) .+ (n_sub_start -1) +l_subrange = (1:l_sub_length) .+ (l_sub_start -1) + +m_large=50 +n_large=30 +l_large=20 +m_range = (1:m) .+ (m_sub_start-1) +n_range = (1:n) .+ (n_sub_start -1) +l_range = (1:l) .+ (l_sub_start -1) + @testset "elty = $elty" for elty in [Float32, Float64, ComplexF32, ComplexF64] @testset "Cholesky (po)" begin A = rand(elty,n,n) @@ -409,14 +426,65 @@ k = 1 end end - Q, R = F - dQ, dR = d_F - @test collect(dQ*dR) ≈ A - @test collect(dR * dQ') ≈ (R * Q') + A_view = view(A, m_subrange, n_subrange) + F = qr(A_view) + + d_A_view = view(d_A, m_subrange, n_subrange) + d_F = qr(d_A_view) + + d_RR = d_F.Q'*d_A_view + @test collect(d_RR[1:n_sub_length,:]) ≈ collect(d_F.R) atol=tol*norm(A_view) + @test norm(d_RR[n_sub_length+1:end,:]) < tol*norm(A_view) + + d_RRt = d_A_view'*d_F.Q + @test collect(d_RRt[:,1:n_sub_length]) ≈ collect(d_F.R') atol=tol*norm(A_view) + @test norm(d_RRt[:,n_sub_length+1:end]) < tol*norm(A_view) + + @test size(d_F) == size(A_view) + @test size(d_F.Q) == (m_sub_length,m_sub_length) + @test size(d_F.R) == (n_sub_length,n_sub_length) + @test size(d_RR) == size(d_A_view) + @test size(d_RRt) == size(d_A_view') + + d_I = CuMatrix{elty}(I, size(d_F.Q)) + @test det(d_F.Q) ≈ det(collect(d_F.Q * CuMatrix{elty}(I, size(d_F.Q)))) atol=tol*norm(A_view) + @test collect(d_F.Q * d_I) ≈ collect(d_F.Q) + @test collect(d_I * d_F.Q) ≈ collect(d_F.Q) + + d_I = CuMatrix{elty}(I, size(d_F.R)) + @test collect(d_F.R * d_I) ≈ collect(d_F.R) + @test collect(d_I * d_F.R) ≈ collect(d_F.R) + + CUDA.@allowscalar begin + qval = d_F.Q[1, 1] + @test qval ≈ F.Q[1, 1] + qrstr = sprint(show, MIME"text/plain"(), d_F) + if VERSION >= v"1.8-" + @test qrstr == "$(typeof(d_F))\nQ factor:\n$(sprint(show, MIME"text/plain"(), d_F.Q))\nR factor:\n$(sprint(show, MIME"text/plain"(), d_F.R))" + else + @test qrstr == "$(typeof(d_F)) with factors Q and R:\n$(sprint(show, d_F.Q))\n$(sprint(show, d_F.R))" + end + end + A = rand(elty, n, m) + F = qr(A) d_A = CuArray(A) d_F = qr(d_A) + Q, R = F + dQ, dR = d_F + @test collect(dQ*dR) ≈ A + @test collect(dR' * dQ') ≈ A' @test det(d_F.Q) ≈ det(collect(d_F.Q * CuMatrix{elty}(I, size(d_F.Q)))) atol=tol*norm(A) + A_view = view(A, n_subrange, m_subrange) + F = qr(A_view) + d_A_view = view(d_A, n_subrange, m_subrange) + d_F = qr(d_A_view) + Q, R = F + dQ, dR = d_F + @test collect(dQ*dR) ≈ A_view + @test collect(dR' * dQ') ≈ A_view' + @test det(d_F.Q) ≈ det(collect(d_F.Q * CuMatrix{elty}(I, size(d_F.Q)))) atol=tol*norm(A_view) + A = rand(elty, m, n) d_A = CuArray(A) d_q, d_r = qr(d_A) @@ -425,12 +493,44 @@ k = 1 @test collect(CuArray(d_q)) ≈ Array(q) @test Array(d_r) ≈ Array(r) @test CuArray(d_q) ≈ convert(typeof(d_A), d_q) + A_view = view(A, m_subrange, n_subrange) + d_A_view = view(d_A, m_subrange, n_subrange) + d_q, d_r = qr(d_A_view) + q, r = qr(A_view) + @test Array(d_q) ≈ Array(q) + @test collect(CuArray(d_q)) ≈ Array(q) + @test Array(d_r) ≈ Array(r) + @test CuArray(d_q) ≈ convert(typeof(d_A), d_q) + A = rand(elty, n, m) d_A = CuArray(A) d_q, d_r = qr(d_A) q, r = qr(A) @test Array(d_q) ≈ Array(q) @test Array(d_r) ≈ Array(r) + A_view = view(A, m_subrange, n_subrange) + d_A_view = view(d_A, m_subrange, n_subrange) + d_q, d_r = qr(d_A_view) + q, r = qr(A_view) + @test Array(d_q) ≈ Array(q) + @test Array(d_r) ≈ Array(r) + + + A = rand(elty, n, m) + d_A = CuArray(A) + d_q, d_r = qr!(d_A) + q, r = qr!(A) + @test collect(d_q) ≈ Array(q) + @test collect(d_r) ≈ Array(r) + A = rand(elty, n, m) + d_A = CuArray(A) + A_view = view(A, m_subrange, n_subrange) + d_A_view = view(d_A, m_subrange, n_subrange) + d_q, d_r = qr!(d_A_view) + q, r = qr!(A_view) + @test collect(d_q) ≈ collect(q) + @test collect(d_r) ≈ collect(r) + A = rand(elty, n) # A and B are vectors d_A = CuArray(A) M = qr(A) @@ -438,6 +538,19 @@ k = 1 B = rand(elty, n) d_B = CuArray(B) @test collect(d_M \ d_B) ≈ M \ B + A_view = view(A, n_subrange) + d_A_view = view(d_A, n_subrange) + M_view = qr(A_view) + d_M_view = qr(d_A_view) + B_view = view(B, n_subrange) + d_B_view = view(d_B, n_subrange) + @test collect(d_M_view \ d_B_view) ≈ M_view \ B_view + B_large = rand(elty, n_large) + B = view(B_large, n_range) + d_B_large = CuArray(B_large) + d_B = view(d_B_large, n_range) + @test collect(d_M \ d_B) ≈ M \ B + A = rand(elty, m, n) # A is a matrix and B,C is a vector d_A = CuArray(A) M = qr(A) @@ -455,6 +568,95 @@ k = 1 @test collect(d_M.R' * d_C) ≈ (M.R' * C) @test collect(d_C' * d_M.R) ≈ (C' * M.R) @test collect(d_C' * d_M.R') ≈ (C' * M.R') + A_view = view(A, m_subrange, n_subrange) + d_A_view = view(d_A, m_subrange, n_subrange) + M_view = qr(A_view) + d_M_view = qr(d_A_view) + B_view = view(B, m_subrange) + d_B_view = view(d_B, m_subrange) + C_view = view(C, n_subrange) + d_C_view = view(d_C, n_subrange) + @test collect(d_M_view \ d_B_view) ≈ M_view \ B_view + @test collect(d_M_view.Q * d_B_view) ≈ (M_view.Q * B_view) + @test collect(d_M_view.Q' * d_B_view) ≈ (M_view.Q' * B_view) + @test collect(d_B_view' * d_M_view.Q) ≈ (B_view' * M_view.Q) + @test collect(d_B_view' * d_M_view.Q') ≈ (B_view' * M_view.Q') + @test collect(d_M_view.R * d_C_view) ≈ (M_view.R * C_view) + @test collect(d_M_view.R' * d_C_view) ≈ (M_view.R' * C_view) + @test collect(d_C_view' * d_M_view.R) ≈ (C_view' * M_view.R) + @test collect(d_C_view' * d_M_view.R') ≈ (C_view' * M_view.R') + B_large = rand(elty, m_large) + B = view(B_large, m_range) + d_B_large = CuArray(B_large) + d_B = view(d_B_large, m_range) + C_large = rand(elty, n_large) + C = view(C_large, n_range) + d_C_large = CuArray(C_large) + d_C = view(d_C_large, n_range) + @test collect(d_M \ d_B) ≈ M \ B + @test collect(d_M.Q * d_B) ≈ (M.Q * B) + @test collect(d_M.Q' * d_B) ≈ (M.Q' * B) + @test collect(d_B' * d_M.Q) ≈ (B' * M.Q) + @test collect(d_B' * d_M.Q') ≈ (B' * M.Q') + @test collect(d_M.R * d_C) ≈ (M.R * C) + @test collect(d_M.R' * d_C) ≈ (M.R' * C) + @test collect(d_C' * d_M.R) ≈ (C' * M.R) + @test collect(d_C' * d_M.R') ≈ (C' * M.R') + + A = rand(elty, m, n) # A is a matrix and B,C is a vector + d_A = CuArray(A) + M = qr!(A) + d_M = qr!(d_A) + B = rand(elty, m) + d_B = CuArray(B) + C = rand(elty, n) + d_C = CuArray(C) + @test collect(d_M \ d_B) ≈ M \ B + @test collect(d_M.Q * d_B) ≈ (M.Q * B) + @test collect(d_M.Q' * d_B) ≈ (M.Q' * B) + @test collect(d_B' * d_M.Q) ≈ (B' * M.Q) + @test collect(d_B' * d_M.Q') ≈ (B' * M.Q') + @test collect(d_M.R * d_C) ≈ (M.R * C) + @test collect(d_M.R' * d_C) ≈ (M.R' * C) + @test collect(d_C' * d_M.R) ≈ (C' * M.R) + @test collect(d_C' * d_M.R') ≈ (C' * M.R') + A = rand(elty, m, n) + d_A = CuArray(A) + A_view = view(A, m_subrange, n_subrange) + d_A_view = view(d_A, m_subrange, n_subrange) + M_view = qr!(A_view) + d_M_view = qr!(d_A_view) + B_view = view(B, m_subrange) + d_B_view = view(d_B, m_subrange) + C_view = view(C, n_subrange) + d_C_view = view(d_C, n_subrange) + @test collect(d_M_view \ d_B_view) ≈ M_view \ B_view + @test collect(d_M_view.Q * d_B_view) ≈ (M_view.Q * B_view) + @test collect(d_M_view.Q' * d_B_view) ≈ (M_view.Q' * B_view) + @test collect(d_B_view' * d_M_view.Q) ≈ (B_view' * M_view.Q) + @test collect(d_B_view' * d_M_view.Q') ≈ (B_view' * M_view.Q') + @test collect(d_M_view.R * d_C_view) ≈ (M_view.R * C_view) + @test collect(d_M_view.R' * d_C_view) ≈ (M_view.R' * C_view) + @test collect(d_C_view' * d_M_view.R) ≈ (C_view' * M_view.R) + @test collect(d_C_view' * d_M_view.R') ≈ (C_view' * M_view.R') + B_large = rand(elty, m_large) + B = view(B_large, m_range) + d_B_large = CuArray(B_large) + d_B = view(d_B_large, m_range) + C_large = rand(elty, n_large) + C = view(C_large, n_range) + d_C_large = CuArray(C_large) + d_C = view(d_C_large, n_range) + @test collect(d_M \ d_B) ≈ M \ B + @test collect(d_M.Q * d_B) ≈ (M.Q * B) + @test collect(d_M.Q' * d_B) ≈ (M.Q' * B) + @test collect(d_B' * d_M.Q) ≈ (B' * M.Q) + @test collect(d_B' * d_M.Q') ≈ (B' * M.Q') + @test collect(d_M.R * d_C) ≈ (M.R * C) + @test collect(d_M.R' * d_C) ≈ (M.R' * C) + @test collect(d_C' * d_M.R) ≈ (C' * M.R) + @test collect(d_C' * d_M.R') ≈ (C' * M.R') + A = rand(elty, m, n) # A and B,C are matrices d_A = CuArray(A) M = qr(A) @@ -472,6 +674,95 @@ k = 1 @test collect(d_M.R' * d_C) ≈ (M.R' * C) @test collect(d_C' * d_M.R) ≈ (C' * M.R) @test collect(d_C' * d_M.R') ≈ (C' * M.R') + A_view = view(A, m_subrange, n_subrange) + d_A_view = view(d_A, m_subrange, n_subrange) + M_view = qr(A_view) + d_M_view = qr(d_A_view) + B_view = view(B, m_subrange, l_subrange) + d_B_view = view(d_B, m_subrange, l_subrange) + C_view = view(C, n_subrange, l_subrange) + d_C_view = view(d_C, n_subrange, l_subrange) + @test collect(d_M_view \ d_B_view) ≈ M_view \ B_view + @test collect(d_M_view.Q * d_B_view) ≈ (M_view.Q * B_view) + @test collect(d_M_view.Q' * d_B_view) ≈ (M_view.Q' * B_view) + @test collect(d_B_view' * d_M_view.Q) ≈ (B_view' * M_view.Q) + @test collect(d_B_view' * d_M_view.Q') ≈ (B_view' * M_view.Q') + @test collect(d_M_view.R * d_C_view) ≈ (M_view.R * C_view) + @test collect(d_M_view.R' * d_C_view) ≈ (M_view.R' * C_view) + @test collect(d_C_view' * d_M_view.R) ≈ (C_view' * M_view.R) + @test collect(d_C_view' * d_M_view.R') ≈ (C_view' * M_view.R') + B_large = rand(elty, m_large, l_large) + B = view(B_large, m_range, l_range) + d_B_large = CuArray(B_large) + d_B = view(d_B_large, m_range, l_range) + C_large = rand(elty, n_large, l_large) + C = view(C_large, n_range, l_range) + d_C_large = CuArray(C_large) + d_C = view(d_C_large, n_range, l_range) + @test collect(d_M \ d_B) ≈ M \ B + @test collect(d_M.Q * d_B) ≈ (M.Q * B) + @test collect(d_M.Q' * d_B) ≈ (M.Q' * B) + @test collect(d_B' * d_M.Q) ≈ (B' * M.Q) + @test collect(d_B' * d_M.Q') ≈ (B' * M.Q') + @test collect(d_M.R * d_C) ≈ (M.R * C) + @test collect(d_M.R' * d_C) ≈ (M.R' * C) + @test collect(d_C' * d_M.R) ≈ (C' * M.R) + @test collect(d_C' * d_M.R') ≈ (C' * M.R') + + A = rand(elty, m, n) # A and B,C are matrices + d_A = CuArray(A) + M = qr!(A) + d_M = qr!(d_A) + B = rand(elty, m, l) #different second dimension to verify whether dimensions agree + d_B = CuArray(B) + C = rand(elty, n, l) #different second dimension to verify whether dimensions agree + d_C = CuArray(C) + @test collect(d_M \ d_B) ≈ (M \ B) + @test collect(d_M.Q * d_B) ≈ (M.Q * B) + @test collect(d_M.Q' * d_B) ≈ (M.Q' * B) + @test collect(d_B' * d_M.Q) ≈ (B' * M.Q) + @test collect(d_B' * d_M.Q') ≈ (B' * M.Q') + @test collect(d_M.R * d_C) ≈ (M.R * C) + @test collect(d_M.R' * d_C) ≈ (M.R' * C) + @test collect(d_C' * d_M.R) ≈ (C' * M.R) + @test collect(d_C' * d_M.R') ≈ (C' * M.R') + A = rand(elty, m, n) + d_A = CuArray(A) + A_view = view(A, m_subrange, n_subrange) + d_A_view = view(d_A, m_subrange, n_subrange) + M_view = qr!(A_view) + d_M_view = qr!(d_A_view) + B_view = view(B, m_subrange, l_subrange) + d_B_view = view(d_B, m_subrange, l_subrange) + C_view = view(C, n_subrange, l_subrange) + d_C_view = view(d_C, n_subrange, l_subrange) + @test collect(d_M_view \ d_B_view) ≈ M_view \ B_view + @test collect(d_M_view.Q * d_B_view) ≈ (M_view.Q * B_view) + @test collect(d_M_view.Q' * d_B_view) ≈ (M_view.Q' * B_view) + @test collect(d_B_view' * d_M_view.Q) ≈ (B_view' * M_view.Q) + @test collect(d_B_view' * d_M_view.Q') ≈ (B_view' * M_view.Q') + @test collect(d_M_view.R * d_C_view) ≈ (M_view.R * C_view) + @test collect(d_M_view.R' * d_C_view) ≈ (M_view.R' * C_view) + @test collect(d_C_view' * d_M_view.R) ≈ (C_view' * M_view.R) + @test collect(d_C_view' * d_M_view.R') ≈ (C_view' * M_view.R') + B_large = rand(elty, m_large, l_large) + B = view(B_large, m_range, l_range) + d_B_large = CuArray(B_large) + d_B = view(d_B_large, m_range, l_range) + C_large = rand(elty, n_large, l_large) + C = view(C_large, n_range, l_range) + d_C_large = CuArray(C_large) + d_C = view(d_C_large, n_range, l_range) + @test collect(d_M \ d_B) ≈ M \ B + @test collect(d_M.Q * d_B) ≈ (M.Q * B) + @test collect(d_M.Q' * d_B) ≈ (M.Q' * B) + @test collect(d_B' * d_M.Q) ≈ (B' * M.Q) + @test collect(d_B' * d_M.Q') ≈ (B' * M.Q') + @test collect(d_M.R * d_C) ≈ (M.R * C) + @test collect(d_M.R' * d_C) ≈ (M.R' * C) + @test collect(d_C' * d_M.R) ≈ (C' * M.R) + @test collect(d_C' * d_M.R') ≈ (C' * M.R') + end @testset "potrsBatched!" begin