Skip to content

Commit 2b2d030

Browse files
committed
Remove the hacky unique'ing of shmem GVs.
Now that llvmcall is emitted in its own module, we can just rely on the linker to handle this.
1 parent b04eb2f commit 2b2d030

File tree

1 file changed

+4
-15
lines changed

1 file changed

+4
-15
lines changed

src/device/intrinsics/memory_shared.jl

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22

33
export @cuStaticSharedMem, @cuDynamicSharedMem
44

5-
shmem_id = 0
6-
75
"""
86
@cuStaticSharedMem(T::Type, dims) -> CuDeviceArray{T,AS.Shared}
97
@@ -13,15 +11,9 @@ inferable and the dimensions should be constant, or an error will be thrown and
1311
generator function will be called dynamically.
1412
"""
1513
macro cuStaticSharedMem(T, dims)
16-
# FIXME: generating a unique id in the macro is incorrect, as multiple parametrically typed
17-
# functions will alias the id (and the size might be a parameter). but incrementing in
18-
# the @generated function doesn't work, as it is supposed to be pure and identical
19-
# invocations will erroneously share (and even cause multiple shmem globals).
20-
id = gensym("static_shmem")
21-
2214
quote
2315
len = prod($(esc(dims)))
24-
ptr = emit_shmem(Val($(QuoteNode(id))), $(esc(T)), Val(len))
16+
ptr = emit_shmem($(esc(T)), Val(len))
2517
CuDeviceArray($(esc(dims)), ptr)
2618
end
2719
end
@@ -40,19 +32,16 @@ pointer can be specified. This is useful when dealing with a heterogeneous buffe
4032
shared memory; in the case of a homogeneous multi-part buffer it is preferred to use `view`.
4133
"""
4234
macro cuDynamicSharedMem(T, dims, offset=0)
43-
id = gensym("dynamic_shmem")
44-
4535
# TODO: boundscheck against %dynamic_smem_size (currently unsupported by LLVM)
46-
4736
quote
4837
len = prod($(esc(dims)))
49-
ptr = emit_shmem(Val($(QuoteNode(id))), $(esc(T))) + $(esc(offset))
38+
ptr = emit_shmem($(esc(T))) + $(esc(offset))
5039
CuDeviceArray($(esc(dims)), ptr)
5140
end
5241
end
5342

5443
# get a pointer to shared memory, with known (static) or zero length (dynamic shared memory)
55-
@generated function emit_shmem(::Val{id}, ::Type{T}, ::Val{len}=Val(0)) where {id,T,len}
44+
@generated function emit_shmem(::Type{T}, ::Val{len}=Val(0)) where {T,len}
5645
Context() do ctx
5746
eltyp = convert(LLVMType, T; ctx)
5847
T_ptr = convert(LLVMType, LLVMPtr{T,AS.Shared}; ctx)
@@ -63,7 +52,7 @@ end
6352
# create the global variable
6453
mod = LLVM.parent(llvm_f)
6554
gv_typ = LLVM.ArrayType(eltyp, len)
66-
gv = GlobalVariable(mod, gv_typ, GPUCompiler.safe_name(string(id)), AS.Shared)
55+
gv = GlobalVariable(mod, gv_typ, "shmem", AS.Shared)
6756
if len > 0
6857
# static shared memory should be demoted to local variables, whenever possible.
6958
# this is done by the NVPTX ASM printer:

0 commit comments

Comments
 (0)