Skip to content

Commit 7ad882a

Browse files
Remove GCN alloca hacks when using Julia 1.9 (#342)
Co-authored-by: Julian P Samaroo <jpsamaroo@jpsamaroo.me>
1 parent 99b7937 commit 7ad882a

File tree

3 files changed

+49
-31
lines changed

3 files changed

+49
-31
lines changed

src/gcn.jl

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,14 @@ function finish_module!(@nospecialize(job::CompilerJob{GCNCompilerTarget}),
6363
end
6464

6565
function optimize!(job::CompilerJob{GCNCompilerTarget}, mod::LLVM.Module)
66-
# we have to fake our target early in the pipeline because Julia's
67-
# optimization passes weren't designed for a non-0 stack addrspace, and the
68-
# AMDGPU target is very strict about which addrspaces are permitted for
69-
# various code patterns
70-
triple!(mod, llvm_triple(NativeCompilerTarget()))
71-
datalayout!(mod, julia_datalayout(NativeCompilerTarget()))
66+
@static if VERSION < v"1.9.0-DEV.1018"
67+
# we have to fake our target early in the pipeline because Julia's
68+
# optimization passes weren't designed for a non-0 stack addrspace, and the
69+
# AMDGPU target is very strict about which addrspaces are permitted for
70+
# various code patterns
71+
triple!(mod, llvm_triple(NativeCompilerTarget()))
72+
datalayout!(mod, julia_datalayout(NativeCompilerTarget()))
73+
end
7274

7375
invoke(optimize!, Tuple{CompilerJob, LLVM.Module}, job, mod)
7476
end
@@ -80,18 +82,20 @@ end
8082
# 2. We don't want any chance of messing with Julia's optimizations, since they
8183
# eliminate target-unsafe IR patterns
8284
function optimize_module!(job::CompilerJob{GCNCompilerTarget}, mod::LLVM.Module)
83-
# revert back to the AMDGPU target
84-
triple!(mod, llvm_triple(job.target))
85-
datalayout!(mod, julia_datalayout(job.target))
85+
@static if VERSION < v"1.9.0-DEV.1018"
86+
# revert back to the AMDGPU target
87+
triple!(mod, llvm_triple(job.target))
88+
datalayout!(mod, julia_datalayout(job.target))
8689

87-
tm = llvm_machine(job.target)
88-
@dispose pm=ModulePassManager() begin
89-
add_library_info!(pm, triple(mod))
90-
add_transform_info!(pm, tm)
90+
tm = llvm_machine(job.target)
91+
@dispose pm=ModulePassManager() begin
92+
add_library_info!(pm, triple(mod))
93+
add_transform_info!(pm, tm)
9194

92-
add!(pm, FunctionPass("FixAllocaAddrspace", fix_alloca_addrspace!))
95+
add!(pm, FunctionPass("FixAllocaAddrspace", fix_alloca_addrspace!))
9396

94-
run!(pm, mod)
97+
run!(pm, mod)
98+
end
9599
end
96100
end
97101

test/gcn.jl

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
if VERSION >= v"1.9.0-DEV.1018"
2+
@inline sink_gcn(i) = sink(i, Val(5))
3+
else
4+
@inline sink_gcn(i) = sink(i, Val(0))
5+
end
6+
17
@testset "GCN" begin
28

39
@test GCNCompilerTarget(dev_isa="gfx900") == GCNCompilerTarget("gfx900")
@@ -19,16 +25,18 @@ include("definitions/gcn.jl")
1925
@test occursin("amdgpu_kernel", ir)
2026
end
2127

28+
if VERSION < v"1.9.0-DEV.1018"
2229
@testset "alloca addrspace" begin
2330
function kernel(i)
24-
sink(i) # sink provides an alloca in addrspace 0
31+
sink(i, Val(0)) # sink provides an alloca in addrspace 0
2532
return
2633
end
2734

2835
ir = sprint(io->gcn_code_llvm(io, kernel, Tuple{Int64}; dump_module=true))
2936
@test occursin(r"alloca i64, (align 8, )?addrspace\(5\)$"m, ir)
3037
@test !occursin(r"alloca i64(, align \d)?$"m, ir)
3138
end
39+
end
3240

3341
end
3442

@@ -57,7 +65,7 @@ end
5765
@testset "child functions" begin
5866
# we often test using @noinline child functions, so test whether these survive
5967
# (despite not having side-effects)
60-
@noinline child(i) = sink(i)
68+
@noinline child(i) = sink_gcn(i)
6169
function parent(i)
6270
child(i)
6371
return
@@ -69,7 +77,7 @@ end
6977
end
7078

7179
@testset "kernel functions" begin
72-
@noinline nonentry(i) = sink(i)
80+
@noinline nonentry(i) = sink_gcn(i)
7381
function entry(i)
7482
nonentry(i)
7583
return
@@ -85,7 +93,7 @@ end
8593
# bug: depending on a child function from multiple parents resulted in
8694
# the child only being present once
8795

88-
@noinline child(i) = sink(i)
96+
@noinline child(i) = sink_gcn(i)
8997
function parent1(i)
9098
child(i)
9199
return
@@ -106,8 +114,8 @@ end
106114
@testset "child function reuse bis" begin
107115
# bug: similar, but slightly different issue as above
108116
# in the case of two child functions
109-
@noinline child1(i) = sink(i)
110-
@noinline child2(i) = sink(i+1)
117+
@noinline child1(i) = sink_gcn(i)
118+
@noinline child2(i) = sink_gcn(i+1)
111119
function parent1(i)
112120
child1(i) + child2(i)
113121
return

test/util.jl

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,22 @@ macro test_throws_message(f, typ, ex...)
2323
end
2424

2525
# helper function for sinking a value to prevent the callee from getting optimized away
26-
@inline sink(i::T) where T <: Union{Int32,UInt32} =
27-
Base.llvmcall("""%slot = alloca i32
28-
store volatile i32 %0, i32* %slot
29-
%value = load volatile i32, i32* %slot
30-
ret i32 %value""", T, Tuple{T}, i)
31-
@inline sink(i::T) where T <: Union{Int64,UInt64} =
32-
Base.llvmcall("""%slot = alloca i64
33-
store volatile i64 %0, i64* %slot
34-
%value = load volatile i64, i64* %slot
35-
ret i64 %value""", T, Tuple{T}, i)
26+
@inline @generated function sink(i::T, ::Val{addrspace}=Val(0)) where {T <: Union{Int32,UInt32}, addrspace}
27+
as_str = addrspace > 0 ? " addrspace($addrspace)" : ""
28+
llvmcall_str = """%slot = alloca i32$(addrspace > 0 ? ", addrspace($addrspace)" : "")
29+
store volatile i32 %0, i32$(as_str)* %slot
30+
%value = load volatile i32, i32$(as_str)* %slot
31+
ret i32 %value"""
32+
return :(Base.llvmcall($llvmcall_str, T, Tuple{T}, i))
33+
end
34+
@inline @generated function sink(i::T, ::Val{addrspace}=Val(0)) where {T <: Union{Int64,UInt64}, addrspace}
35+
as_str = addrspace > 0 ? " addrspace($addrspace)" : ""
36+
llvmcall_str = """%slot = alloca i64$(addrspace > 0 ? ", addrspace($addrspace)" : "")
37+
store volatile i64 %0, i64$(as_str)* %slot
38+
%value = load volatile i64, i64$(as_str)* %slot
39+
ret i64 %value"""
40+
return :(Base.llvmcall($llvmcall_str, T, Tuple{T}, i))
41+
end
3642

3743

3844
# the GPU runtime library

0 commit comments

Comments
 (0)