Skip to content

Commit f7fd063

Browse files
committed
Add dynamic shared memory bounds checking.
1 parent efcfe39 commit f7fd063

File tree

3 files changed

+23
-16
lines changed

3 files changed

+23
-16
lines changed

src/device/intrinsics/memory_shared.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,10 @@ shared memory; in the case of a homogeneous multi-part buffer it is preferred to
4040
"""
4141
@inline function CuDynamicSharedArray(::Type{T}, dims, offset=0) where {T}
4242
len = prod(dims)
43+
@boundscheck if offset+len > dynamic_smem_size()
44+
throw(BoundsError())
45+
end
4346
ptr = emit_shmem(T) + offset
44-
# TODO: boundscheck against %dynamic_smem_size (currently unsupported by LLVM)
4547
CuDeviceArray(dims, ptr)
4648
end
4749

@@ -52,6 +54,8 @@ macro cuDynamicSharedMem(T, dims, offset=0)
5254
end
5355
end
5456

57+
dynamic_smem_size() = @asmcall("mov.u32 \$0, %dynamic_smem_size;", "=r", true, UInt32, Tuple{})
58+
5559
# get a pointer to shared memory, with known (static) or zero length (dynamic shared memory)
5660
@generated function emit_shmem(::Type{T}, ::Val{len}=Val(0)) where {T,len}
5761
Context() do ctx

test/device/intrinsics/memory.jl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,20 @@ n = 256
1919
@on_device CuStaticSharedArray(Tuple{RGB{Float32}, UInt32}, (1,2))
2020

2121
# dynamic
22-
@on_device CuDynamicSharedArray(Float32, 1)
23-
@on_device CuDynamicSharedArray(Float32, (1, 2))
24-
@on_device CuDynamicSharedArray(Tuple{Float32, Float32}, 1)
25-
@on_device CuDynamicSharedArray(Tuple{Float32, Float32}, (1,2))
26-
@on_device CuDynamicSharedArray(Tuple{RGB{Float32}, UInt32}, 1)
27-
@on_device CuDynamicSharedArray(Tuple{RGB{Float32}, UInt32}, (1,2))
22+
@on_device shmem=sizeof(Float32) CuDynamicSharedArray(Float32, 1)
23+
@on_device shmem=sizeof(Float32) CuDynamicSharedArray(Float32, (1, 2))
24+
@on_device shmem=sizeof(Tuple{Float32, Float32}) CuDynamicSharedArray(Tuple{Float32, Float32}, 1)
25+
@on_device shmem=sizeof(Tuple{Float32, Float32}) CuDynamicSharedArray(Tuple{Float32, Float32}, (1,2))
26+
@on_device shmem=sizeof(Tuple{RGB{Float32}, UInt32}) CuDynamicSharedArray(Tuple{RGB{Float32}, UInt32}, 1)
27+
@on_device shmem=sizeof(Tuple{RGB{Float32}, UInt32}) CuDynamicSharedArray(Tuple{RGB{Float32}, UInt32}, (1,2))
2828

2929
# dynamic with offset
30-
@on_device CuDynamicSharedArray(Float32, 1, 8)
31-
@on_device CuDynamicSharedArray(Float32, (1,2), 8)
32-
@on_device CuDynamicSharedArray(Tuple{Float32, Float32}, 1, 8)
33-
@on_device CuDynamicSharedArray(Tuple{Float32, Float32}, (1,2), 8)
34-
@on_device CuDynamicSharedArray(Tuple{RGB{Float32}, UInt32}, 1, 8)
35-
@on_device CuDynamicSharedArray(Tuple{RGB{Float32}, UInt32}, (1,2), 8)
30+
@on_device shmem=sizeof(Float32)+8 CuDynamicSharedArray(Float32, 1, 8)
31+
@on_device shmem=sizeof(Float32)+8 CuDynamicSharedArray(Float32, (1,2), 8)
32+
@on_device shmem=sizeof(Tuple{Float32, Float32})+8 CuDynamicSharedArray(Tuple{Float32, Float32}, 1, 8)
33+
@on_device shmem=sizeof(Tuple{Float32, Float32})+8 CuDynamicSharedArray(Tuple{Float32, Float32}, (1,2), 8)
34+
@on_device shmem=sizeof(Tuple{RGB{Float32}, UInt32})+8 CuDynamicSharedArray(Tuple{RGB{Float32}, UInt32}, 1, 8)
35+
@on_device shmem=sizeof(Tuple{RGB{Float32}, UInt32})+8 CuDynamicSharedArray(Tuple{RGB{Float32}, UInt32}, (1,2), 8)
3636
end
3737

3838

test/setup.jl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,16 +196,19 @@ macro test_throws_macro(ty, ex)
196196
end
197197

198198
# Run some code on-device
199-
macro on_device(ex)
199+
macro on_device(ex...)
200+
code = ex[end]
201+
kwargs = ex[1:end-1]
202+
200203
@gensym kernel
201204
esc(quote
202205
let
203206
function $kernel()
204-
$ex
207+
$code
205208
return
206209
end
207210

208-
CUDA.@sync @cuda $kernel()
211+
CUDA.@sync @cuda $(kwargs...) $kernel()
209212
end
210213
end)
211214
end

0 commit comments

Comments
 (0)