Skip to content

Commit fb0a550

Browse files
authored
Merge pull request #1414 from JuliaGPU/tb/version
Drop support for CUDA 10.1 and below
2 parents cea77a5 + f58408c commit fb0a550

File tree

6 files changed

+56
-232
lines changed

6 files changed

+56
-232
lines changed

.buildkite/pipeline.yml

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -260,27 +260,6 @@ steps:
260260
if: build.message !~ /\[skip tests\]/ && !build.pull_request.draft
261261
timeout_in_minutes: 120
262262

263-
- label: "CUDA 10.1"
264-
plugins:
265-
- JuliaCI/julia#v1:
266-
version: 1.6
267-
- JuliaCI/julia-test#v1: ~
268-
- JuliaCI/julia-coverage#v1:
269-
codecov: true
270-
dirs:
271-
- src
272-
- lib
273-
- examples
274-
agents:
275-
queue: "juliagpu"
276-
cuda: "10.1"
277-
cap: "sm_75"
278-
env:
279-
JULIA_CUDA_VERSION: '10.1'
280-
JULIA_CUDA_USE_BINARYBUILDER: 'true'
281-
if: build.message !~ /\[skip tests\]/ && !build.pull_request.draft
282-
timeout_in_minutes: 120
283-
284263

285264
# special tests
286265

Artifacts.toml

Lines changed: 0 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -1,129 +1,5 @@
11
# CUDA
22

3-
[[CUDA]]
4-
arch = "x86_64"
5-
cuda = "9.0"
6-
git-tree-sha1 = "dece02c5c692d30e57bbbf08c32fb796bb723a53"
7-
lazy = true
8-
os = "macos"
9-
10-
[[CUDA.download]]
11-
sha256 = "df896b2254231c3600460e9b0f928b66d3afe117b4bed29656b43a0415a32a37"
12-
url = "https://github.com/JuliaBinaryWrappers/CUDA_loader_jll.jl/releases/download/CUDA_loader-v0.2.1+4/CUDA_loader.v0.2.1.x86_64-apple-darwin-cuda+9.0.tar.gz"
13-
[[CUDA]]
14-
arch = "x86_64"
15-
cuda = "9.0"
16-
git-tree-sha1 = "0267859e1e69605ad53fc44027db413478bbef47"
17-
lazy = true
18-
libc = "glibc"
19-
os = "linux"
20-
21-
[[CUDA.download]]
22-
sha256 = "46ef49b23bb1070270c595e2e48219788d080e3fcec4e7f59301921785dcc10f"
23-
url = "https://github.com/JuliaBinaryWrappers/CUDA_loader_jll.jl/releases/download/CUDA_loader-v0.2.1+4/CUDA_loader.v0.2.1.x86_64-linux-gnu-cuda+9.0.tar.gz"
24-
[[CUDA]]
25-
arch = "x86_64"
26-
cuda = "9.0"
27-
git-tree-sha1 = "120edb04e8793822232aec254fd853c3fd73e0f6"
28-
lazy = true
29-
os = "windows"
30-
31-
[[CUDA.download]]
32-
sha256 = "154fb938f748055baf2bcc0176bb14348b04488531f7658af869135c7ab9c8f8"
33-
url = "https://github.com/JuliaBinaryWrappers/CUDA_loader_jll.jl/releases/download/CUDA_loader-v0.2.1+4/CUDA_loader.v0.2.1.x86_64-w64-mingw32-cuda+9.0.tar.gz"
34-
[[CUDA]]
35-
arch = "x86_64"
36-
cuda = "9.2"
37-
git-tree-sha1 = "ba2ce8e3de1877e78178e64d2fa42e19d09f3a4b"
38-
lazy = true
39-
os = "macos"
40-
41-
[[CUDA.download]]
42-
sha256 = "82b01b75581dbbd0ddcc32ad88f2790fdc2d2ae8ad368bed12e065f39a1310d9"
43-
url = "https://github.com/JuliaBinaryWrappers/CUDA_loader_jll.jl/releases/download/CUDA_loader-v0.2.1+4/CUDA_loader.v0.2.1.x86_64-apple-darwin-cuda+9.2.tar.gz"
44-
[[CUDA]]
45-
arch = "x86_64"
46-
cuda = "9.2"
47-
git-tree-sha1 = "db04ad24a9d0e49d9b9a55fb30e3428b0c1588ca"
48-
lazy = true
49-
libc = "glibc"
50-
os = "linux"
51-
52-
[[CUDA.download]]
53-
sha256 = "2c32943953f148ac15b3854f821b71a7bb5fca53b722c07758e1b974f67066c2"
54-
url = "https://github.com/JuliaBinaryWrappers/CUDA_loader_jll.jl/releases/download/CUDA_loader-v0.2.1+4/CUDA_loader.v0.2.1.x86_64-linux-gnu-cuda+9.2.tar.gz"
55-
[[CUDA]]
56-
arch = "x86_64"
57-
cuda = "9.2"
58-
git-tree-sha1 = "62fd7cb750233da012252650b69d79afe383ff49"
59-
lazy = true
60-
os = "windows"
61-
62-
[[CUDA.download]]
63-
sha256 = "1d8b6ae5f31a1790a812a614d28300407f3f435346e8047a06105ad15bb4bb3e"
64-
url = "https://github.com/JuliaBinaryWrappers/CUDA_loader_jll.jl/releases/download/CUDA_loader-v0.2.1+4/CUDA_loader.v0.2.1.x86_64-w64-mingw32-cuda+9.2.tar.gz"
65-
[[CUDA]]
66-
arch = "x86_64"
67-
cuda = "10.0"
68-
git-tree-sha1 = "52b7a2a1a93b057637c056797523d86dbe5e02be"
69-
lazy = true
70-
os = "macos"
71-
72-
[[CUDA.download]]
73-
sha256 = "bffb7a21701daa9b75ef888a7cfebf045ceec363b63b7fc840a1be41dd97eb94"
74-
url = "https://github.com/JuliaBinaryWrappers/CUDA_loader_jll.jl/releases/download/CUDA_loader-v0.2.1+4/CUDA_loader.v0.2.1.x86_64-apple-darwin-cuda+10.0.tar.gz"
75-
[[CUDA]]
76-
arch = "x86_64"
77-
cuda = "10.0"
78-
git-tree-sha1 = "70690cde550c5bac83be1738bc612adb4768def8"
79-
lazy = true
80-
libc = "glibc"
81-
os = "linux"
82-
83-
[[CUDA.download]]
84-
sha256 = "9320d913fa9f29151b1bb9dd78202004c9dc61203816f960369fbe4c81bdd9e3"
85-
url = "https://github.com/JuliaBinaryWrappers/CUDA_loader_jll.jl/releases/download/CUDA_loader-v0.2.1+4/CUDA_loader.v0.2.1.x86_64-linux-gnu-cuda+10.0.tar.gz"
86-
[[CUDA]]
87-
arch = "x86_64"
88-
cuda = "10.0"
89-
git-tree-sha1 = "5dc75d2507d2886ee5c309d7a9e4c2ecd23d0675"
90-
lazy = true
91-
os = "windows"
92-
93-
[[CUDA.download]]
94-
sha256 = "e97021f72258fea105c8e51c4b565dc8439e1be6489c3fa52851dadd73fd87f9"
95-
url = "https://github.com/JuliaBinaryWrappers/CUDA_loader_jll.jl/releases/download/CUDA_loader-v0.2.1+4/CUDA_loader.v0.2.1.x86_64-w64-mingw32-cuda+10.0.tar.gz"
96-
[[CUDA]]
97-
arch = "x86_64"
98-
cuda = "10.1"
99-
git-tree-sha1 = "ebf136c22650f0b8d32a6fda896026cc53a06098"
100-
lazy = true
101-
os = "macos"
102-
103-
[[CUDA.download]]
104-
sha256 = "87b1a50dbb2db4ac2611e1884445c6dd4051aff8c8cdb59dbfc8dde17fd36c2a"
105-
url = "https://github.com/JuliaBinaryWrappers/CUDA_loader_jll.jl/releases/download/CUDA_loader-v0.2.1+4/CUDA_loader.v0.2.1.x86_64-apple-darwin-cuda+10.1.tar.gz"
106-
[[CUDA]]
107-
arch = "x86_64"
108-
cuda = "10.1"
109-
git-tree-sha1 = "0549466c4aab1487f889291765a95d728870df83"
110-
lazy = true
111-
libc = "glibc"
112-
os = "linux"
113-
114-
[[CUDA.download]]
115-
sha256 = "9865dad0638b992461cd42ff264b137aeaacf8527ae17c73a8e0cb11023de285"
116-
url = "https://github.com/JuliaBinaryWrappers/CUDA_loader_jll.jl/releases/download/CUDA_loader-v0.2.1+4/CUDA_loader.v0.2.1.x86_64-linux-gnu-cuda+10.1.tar.gz"
117-
[[CUDA]]
118-
arch = "x86_64"
119-
cuda = "10.1"
120-
git-tree-sha1 = "a3cdc71ed971c74d70629e78ac6eae95f0187d4a"
121-
lazy = true
122-
os = "windows"
123-
124-
[[CUDA.download]]
125-
sha256 = "f58b3ace896dd58a9b19f15dd71d54ff21dea30b8b3110092388ebe6cb923852"
126-
url = "https://github.com/JuliaBinaryWrappers/CUDA_loader_jll.jl/releases/download/CUDA_loader-v0.2.1+4/CUDA_loader.v0.2.1.x86_64-w64-mingw32-cuda+10.1.tar.gz"
1273
[[CUDA]]
1284
arch = "aarch64"
1295
cuda = "10.2"

deps/compatibility.jl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ const cuda_cap_db = Dict(
5050
v"7.5" => v"10.0":highest,
5151
v"8.0" => v"11.0":highest,
5252
v"8.6" => v"11.1":highest,
53+
v"8.7" => v"11.4":highest,
5354
)
5455

5556
function cuda_cap_support(ver::VersionNumber)
@@ -95,6 +96,10 @@ const cuda_ptx_db = Dict(
9596
v"7.0" => v"11.0":highest,
9697
v"7.1" => v"11.1":highest,
9798
v"7.2" => v"11.2":highest,
99+
v"7.3" => v"11.3":highest,
100+
v"7.4" => v"11.4":highest,
101+
v"7.5" => v"11.5":highest,
102+
v"7.6" => v"11.6":highest,
98103
)
99104

100105
function cuda_ptx_support(ver::VersionNumber)
@@ -128,6 +133,7 @@ const llvm_cap_db = Dict(
128133
v"7.2" => v"7.0":highest,
129134
v"7.5" => v"8.0":highest,
130135
v"8.0" => v"11.0":highest,
136+
v"8.6" => v"13.0":highest,
131137
)
132138

133139
function llvm_cap_support(ver::VersionNumber)
@@ -159,6 +165,11 @@ const llvm_ptx_db = Dict(
159165
v"6.4" => v"9.0":highest,
160166
v"6.5" => v"11.0":highest,
161167
v"7.0" => v"11.0":highest,
168+
v"7.1" => v"13.0":highest,
169+
v"7.2" => v"13.0":highest,
170+
v"7.3" => v"14.0":highest,
171+
v"7.4" => v"14.0":highest,
172+
v"7.5" => v"14.0":highest,
162173
)
163174

164175
function llvm_ptx_support(ver::VersionNumber)

lib/cublas/wrappers.jl

Lines changed: 12 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -103,17 +103,9 @@ for (fname, elty) in ((:cublasDscal_v2,:Float64),
103103
end
104104
end
105105
function scal!(n::Integer, alpha::Number, x::StridedCuArray{Float16})
106-
if version() > v"10.1"
107-
α = convert(Float32, alpha)
108-
cublasScalEx(handle(), n, Ref{Float32}(α), Float32, x, Float16, stride(x, 1), Float32)
109-
return x
110-
else
111-
wide_x = widen.(x)
112-
scal!(n, alpha, wide_x)
113-
thin_x = convert(typeof(x), wide_x)
114-
copyto!(x, thin_x)
115-
return x
116-
end
106+
α = convert(Float32, alpha)
107+
cublasScalEx(handle(), n, Ref{Float32}(α), Float32, x, Float16, stride(x, 1), Float32)
108+
return x
117109
end
118110
# specific variants in case x is complex and alpha is real
119111
for (fname, elty, celty) in ((:cublasCsscal_v2, :Float32, :ComplexF32),
@@ -153,13 +145,9 @@ for (jname, fname, elty) in ((:dot,:cublasDdot_v2,:Float64),
153145
end
154146
end
155147
function dot(n::Integer, x::StridedCuArray{Float16}, y::StridedCuArray{Float16})
156-
if version() > v"10.1"
157-
result = Ref{Float16}()
158-
cublasDotEx(handle(), n, x, Float16, stride(x, 1), y, Float16, stride(y, 1), result, Float16, Float32)
159-
return result[]
160-
else
161-
return convert(Float16, dot(n, convert(CuArray{Float32}, x), convert(CuArray{Float32}, y)))
162-
end
148+
result = Ref{Float16}()
149+
cublasDotEx(handle(), n, x, Float16, stride(x, 1), y, Float16, stride(y, 1), result, Float16, Float32)
150+
return result[]
163151
end
164152
function dotc(n::Integer, x::StridedCuArray{ComplexF16}, y::StridedCuArray{ComplexF16})
165153
return convert(ComplexF16, dotc(n, convert(CuArray{ComplexF32}, x), convert(CuArray{ComplexF32}, y)))
@@ -185,15 +173,9 @@ end
185173
nrm2(x::StridedCuArray) = nrm2(length(x), x)
186174

187175
function nrm2(n::Integer, x::StridedCuArray{Float16})
188-
if version() > v"10.1"
189-
result = Ref{Float16}()
190-
cublasNrm2Ex(handle(), n, x, Float16, stride(x, 1), result, Float16, Float32)
191-
return result[]
192-
else
193-
wide_x = widen.(x)
194-
nrm = nrm2(n, wide_x)
195-
return convert(Float16, nrm)
196-
end
176+
result = Ref{Float16}()
177+
cublasNrm2Ex(handle(), n, x, Float16, stride(x, 1), result, Float16, Float32)
178+
return result[]
197179
end
198180
function nrm2(n::Integer, x::StridedCuArray{ComplexF16})
199181
wide_x = widen.(x)
@@ -233,18 +215,9 @@ for (fname, elty) in ((:cublasDaxpy_v2,:Float64),
233215
end
234216

235217
function axpy!(n::Integer, alpha::Number, dx::StridedCuArray{Float16}, dy::StridedCuArray{Float16})
236-
if version() >= v"10.1"
237-
α = convert(Float32, alpha)
238-
cublasAxpyEx(handle(), n, Ref{Float32}(α), Float32, dx, Float16, stride(dx, 1), dy, Float16, stride(dy, 1), Float32)
239-
return dy
240-
else
241-
wide_x = widen.(dx)
242-
wide_y = widen.(dy)
243-
axpy!(n, alpha, wide_x, wide_y)
244-
thin_y = convert(typeof(dy), wide_y)
245-
copyto!(dy, thin_y)
246-
return dy
247-
end
218+
α = convert(Float32, alpha)
219+
cublasAxpyEx(handle(), n, Ref{Float32}(α), Float32, dx, Float16, stride(dx, 1), dy, Float16, stride(dy, 1), Float32)
220+
return dy
248221
end
249222
function axpy!(n::Integer, alpha::Number, dx::StridedCuArray{ComplexF16}, dy::StridedCuArray{ComplexF16})
250223
wide_x = widen.(dx)

lib/cusparse/conversions.jl

Lines changed: 31 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -121,36 +121,27 @@ function CuSparseMatrixCSR{T}(S::Adjoint{T, <:CuSparseMatrixCSC{T}}) where {T <:
121121
end
122122

123123
# by flipping rows and columns, we can use that to get CSC to CSR too
124-
for (fname,elty) in ((:cusparseScsr2csc, :Float32),
125-
(:cusparseDcsr2csc, :Float64),
126-
(:cusparseCcsr2csc, :ComplexF32),
127-
(:cusparseZcsr2csc, :ComplexF64))
124+
for elty in (Float32, Float64, ComplexF32, ComplexF64)
128125
@eval begin
129126
function CuSparseMatrixCSC{$elty}(csr::CuSparseMatrixCSR{$elty}; inda::SparseChar='O')
130127
m,n = size(csr)
131128
colPtr = CUDA.zeros(Cint, n+1)
132129
rowVal = CUDA.zeros(Cint, nnz(csr))
133130
nzVal = CUDA.zeros($elty, nnz(csr))
134-
if version() >= v"10.2"
135-
# TODO: algorithm configuratibility?
136-
function bufferSize()
137-
out = Ref{Csize_t}(1)
138-
cusparseCsr2cscEx2_bufferSize(handle(), m, n, nnz(csr), nonzeros(csr),
139-
csr.rowPtr, csr.colVal, nzVal, colPtr, rowVal,
140-
$elty, CUSPARSE_ACTION_NUMERIC, inda,
141-
CUSPARSE_CSR2CSC_ALG1, out)
142-
return out[]
143-
end
144-
with_workspace(bufferSize) do buffer
145-
cusparseCsr2cscEx2(handle(), m, n, nnz(csr), nonzeros(csr),
146-
csr.rowPtr, csr.colVal, nzVal, colPtr, rowVal,
147-
$elty, CUSPARSE_ACTION_NUMERIC, inda,
148-
CUSPARSE_CSR2CSC_ALG1, buffer)
149-
end
150-
else
151-
$fname(handle(), m, n, nnz(csr), nonzeros(csr),
152-
csr.rowPtr, csr.colVal, nzVal, rowVal,
153-
colPtr, CUSPARSE_ACTION_NUMERIC, inda)
131+
# TODO: algorithm configuratibility?
132+
function bufferSize()
133+
out = Ref{Csize_t}(1)
134+
cusparseCsr2cscEx2_bufferSize(handle(), m, n, nnz(csr), nonzeros(csr),
135+
csr.rowPtr, csr.colVal, nzVal, colPtr, rowVal,
136+
$elty, CUSPARSE_ACTION_NUMERIC, inda,
137+
CUSPARSE_CSR2CSC_ALG1, out)
138+
return out[]
139+
end
140+
with_workspace(bufferSize) do buffer
141+
cusparseCsr2cscEx2(handle(), m, n, nnz(csr), nonzeros(csr),
142+
csr.rowPtr, csr.colVal, nzVal, colPtr, rowVal,
143+
$elty, CUSPARSE_ACTION_NUMERIC, inda,
144+
CUSPARSE_CSR2CSC_ALG1, buffer)
154145
end
155146
CuSparseMatrixCSC(colPtr,rowVal,nzVal,size(csr))
156147
end
@@ -160,26 +151,20 @@ for (fname,elty) in ((:cusparseScsr2csc, :Float32),
160151
rowPtr = CUDA.zeros(Cint,m+1)
161152
colVal = CUDA.zeros(Cint,nnz(csc))
162153
nzVal = CUDA.zeros($elty,nnz(csc))
163-
if version() >= v"10.2"
164-
# TODO: algorithm configuratibility?
165-
function bufferSize()
166-
out = Ref{Csize_t}(1)
167-
cusparseCsr2cscEx2_bufferSize(handle(), n, m, nnz(csc), nonzeros(csc),
168-
csc.colPtr, rowvals(csc), nzVal, rowPtr, colVal,
169-
$elty, CUSPARSE_ACTION_NUMERIC, inda,
170-
CUSPARSE_CSR2CSC_ALG1, out)
171-
return out[]
172-
end
173-
with_workspace(bufferSize) do buffer
174-
cusparseCsr2cscEx2(handle(), n, m, nnz(csc), nonzeros(csc),
175-
csc.colPtr, rowvals(csc), nzVal, rowPtr, colVal,
176-
$elty, CUSPARSE_ACTION_NUMERIC, inda,
177-
CUSPARSE_CSR2CSC_ALG1, buffer)
178-
end
179-
else
180-
$fname(handle(), n, m, nnz(csc), nonzeros(csc),
181-
csc.colPtr, rowvals(csc), nzVal, colVal,
182-
rowPtr, CUSPARSE_ACTION_NUMERIC, inda)
154+
# TODO: algorithm configuratibility?
155+
function bufferSize()
156+
out = Ref{Csize_t}(1)
157+
cusparseCsr2cscEx2_bufferSize(handle(), n, m, nnz(csc), nonzeros(csc),
158+
csc.colPtr, rowvals(csc), nzVal, rowPtr, colVal,
159+
$elty, CUSPARSE_ACTION_NUMERIC, inda,
160+
CUSPARSE_CSR2CSC_ALG1, out)
161+
return out[]
162+
end
163+
with_workspace(bufferSize) do buffer
164+
cusparseCsr2cscEx2(handle(), n, m, nnz(csc), nonzeros(csc),
165+
csc.colPtr, rowvals(csc), nzVal, rowPtr, colVal,
166+
$elty, CUSPARSE_ACTION_NUMERIC, inda,
167+
CUSPARSE_CSR2CSC_ALG1, buffer)
183168
end
184169
CuSparseMatrixCSR(rowPtr,colVal,nzVal,size(csc))
185170
end
@@ -197,7 +182,7 @@ for (elty, welty) in ((:Float16, :Float32),
197182
rowVal = CUDA.zeros(Cint, nnz(csr))
198183
nzVal = CUDA.zeros($elty, nnz(csr))
199184
# TODO: algorithm configuratibility?
200-
if version() >= v"10.2" && $elty == Float16 #broken for ComplexF16?
185+
if $elty == Float16 #broken for ComplexF16?
201186
function bufferSize()
202187
out = Ref{Csize_t}(1)
203188
cusparseCsr2cscEx2_bufferSize(handle(), m, n, nnz(csr), nonzeros(csr),
@@ -225,7 +210,7 @@ for (elty, welty) in ((:Float16, :Float32),
225210
rowPtr = CUDA.zeros(Cint,m+1)
226211
colVal = CUDA.zeros(Cint,nnz(csc))
227212
nzVal = CUDA.zeros($elty,nnz(csc))
228-
if version() >= v"10.2" && $elty == Float16 #broken for ComplexF16?
213+
if $elty == Float16 #broken for ComplexF16?
229214
# TODO: algorithm configuratibility?
230215
function bufferSize()
231216
out = Ref{Csize_t}(1)

src/initialization.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ end
6262
return
6363
end
6464

65-
if version() < v"10.1"
66-
@warn "This version of CUDA.jl only supports NVIDIA drivers for CUDA 10.1 or higher (yours is for CUDA $(version()))"
65+
if version() < v"10.2"
66+
@warn "This version of CUDA.jl only supports NVIDIA drivers for CUDA 10.2 or higher (yours is for CUDA $(version()))"
6767
end
6868

6969
if version() < v"11.2"

0 commit comments

Comments
 (0)