1
1
struct HIPCompilerParams <: AbstractCompilerParams
2
2
# Whether to compile kernel for the wavefront of size 64.
3
3
wavefrontsize64:: Bool
4
+ # AMD GPU devices support fast atomic read-modify-write (RMW)
5
+ # operations on floating-point values.
6
+ # On single- or double-precision floating-point values this may generate
7
+ # a hardware RMW instruction that is faster than emulating
8
+ # the atomic operation using an atomic compare-and-swap (CAS) loop.
9
+ unsafe_fp_atomics:: Bool
4
10
end
5
11
6
12
const HIPCompilerConfig = CompilerConfig{GCNCompilerTarget, HIPCompilerParams}
@@ -65,10 +71,18 @@ function GPUCompiler.finish_module!(
65
71
target_fns = (
66
72
" signal_exception" , " report_exception" , " malloc" , " __throw_" )
67
73
inline_attr = EnumAttribute (" alwaysinline" )
74
+ atomic_attr = StringAttribute (" amdgpu-unsafe-fp-atomics" , " true" )
75
+
68
76
for fn in LLVM. functions (mod)
69
- any (occursin .(target_fns, LLVM. name (fn))) || continue
70
- attrs = LLVM. function_attributes (fn)
71
- inline_attr ∈ collect (attrs) || push! (attrs, inline_attr)
77
+ do_inline = any (occursin .(target_fns, LLVM. name (fn)))
78
+ if job. config. params. unsafe_fp_atomics || do_inline
79
+ attrs = LLVM. function_attributes (fn)
80
+
81
+ do_inline && inline_attr ∉ collect (attrs) &&
82
+ push! (attrs, inline_attr)
83
+ job. config. params. unsafe_fp_atomics &&
84
+ push! (attrs, atomic_attr)
85
+ end
72
86
end
73
87
74
88
return entry
@@ -85,18 +99,36 @@ function parse_llvm_features(arch::String)
85
99
end
86
100
87
101
88
- function compiler_config (
89
- dev :: HIP.HIPDevice ; kernel:: Bool = true ,
90
- name :: Union{String, Nothing} = nothing , always_inline :: Bool = true ,
102
+ function compiler_config (dev :: HIP.HIPDevice ;
103
+ name :: Union{String, Nothing} = nothing , kernel:: Bool = true ,
104
+ unsafe_fp_atomics :: Bool = true ,
91
105
)
92
106
dev_isa, features = parse_llvm_features (HIP. gcn_arch (dev))
93
107
target = GCNCompilerTarget (; dev_isa, features)
94
- params = HIPCompilerParams (HIP. wavefrontsize (dev) == 64 )
95
- CompilerConfig (target, params; kernel, name, always_inline)
108
+ params = HIPCompilerParams (HIP. wavefrontsize (dev) == 64 , unsafe_fp_atomics )
109
+ CompilerConfig (target, params; kernel, name, always_inline= true )
96
110
end
97
111
98
112
const hipfunction_lock = ReentrantLock ()
99
113
114
+ """
115
+ hipfunction(f::F, tt::TT = Tuple{}; kwargs...)
116
+
117
+ Compile Julia function `f` to a HIP kernel given a tuple of
118
+ argument's types `tt` that it accepts.
119
+
120
+ The following kwargs are supported:
121
+
122
+ - `name::Union{String, Nothing} = nothing`:
123
+ A unique name to give a compiled kernel.
124
+ - `unsafe_fp_atomics::Bool = true`:
125
+ Whether to use 'unsafe' floating-point atomics.
126
+ AMD GPU devices support fast atomic read-modify-write (RMW)
127
+ operations on floating-point values.
128
+ On single- or double-precision floating-point values this may generate
129
+ a hardware RMW instruction that is faster than emulating
130
+ the atomic operation using an atomic compare-and-swap (CAS) loop.
131
+ """
100
132
function hipfunction (f:: F , tt:: TT = Tuple{}; kwargs... ) where {F <: Core.Function , TT}
101
133
Base. @lock hipfunction_lock begin
102
134
dev = AMDGPU. device ()
0 commit comments