Skip to content

Commit a011e73

Browse files
authored
Diagnose kernel limits on launch failure. (#2329)
1 parent 545ffa2 commit a011e73

File tree

2 files changed

+19
-3
lines changed

2 files changed

+19
-3
lines changed

lib/cudadrv/execution.jl

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,9 @@ function launch(f::CuFunction, args::Vararg{Any,N}; blocks::CuDim=1, threads::Cu
7474
end
7575
end
7676

77-
@noinline function diagnose_launch_failure(f, err; blockdim, threaddim, shmem)
78-
if !isa(err, CuError) || err.code != ERROR_INVALID_VALUE
77+
@noinline function diagnose_launch_failure(f::CuFunction, err; blockdim, threaddim, shmem)
78+
if !isa(err, CuError) || !in(err.code, [ERROR_INVALID_VALUE,
79+
ERROR_LAUNCH_OUT_OF_RESOURCES])
7980
rethrow()
8081
end
8182

@@ -111,6 +112,19 @@ end
111112
error("Amount of dynamic shared memory exceeds device limit ($(Base.format_bytes(shmem)) > $(Base.format_bytes(shmem_lim))).")
112113
end
113114

115+
# check kernel limits
116+
fattr = attributes(f)
117+
## thread limit
118+
threadlim = fattr[FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK]
119+
if threaddim.x * threaddim.y * threaddim.z > threadlim
120+
error("Number of threads per block exceeds kernel limit ($(threaddim.x * threaddim.y * threaddim.z) > $threadlim).")
121+
end
122+
## shared memory limit
123+
shmem_lim = fattr[FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES]
124+
if shmem > shmem_lim
125+
error("Amount of dynamic shared memory exceeds kernel limit ($(Base.format_bytes(shmem)) > $(Base.format_bytes(shmem_lim))).")
126+
end
127+
114128
rethrow()
115129
end
116130

test/core/execution.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ end
4040
@testset "compilation params" begin
4141
@cuda dummy()
4242

43-
@test_throws CuError @cuda threads=2 maxthreads=1 dummy()
43+
@test_throws "Number of threads per block exceeds kernel limit" begin
44+
@cuda threads=2 maxthreads=1 dummy()
45+
end
4446
@cuda threads=2 dummy()
4547

4648
# sm_10 isn't supported by LLVM

0 commit comments

Comments
 (0)