|
1 | 1 | # Synchronization Functions
|
2 | 2 |
|
3 |
| -export barrier |
| 3 | +## SPIR-V wrappers |
4 | 4 |
|
5 |
| -const cl_mem_fence_flags = UInt32 |
6 |
| -const CLK_LOCAL_MEM_FENCE = cl_mem_fence_flags(1) |
7 |
| -const CLK_GLOBAL_MEM_FENCE = cl_mem_fence_flags(2) |
8 |
| - |
9 |
| -#barrier(flags=0) = @builtin_ccall("barrier", Cvoid, (UInt32,), flags) |
10 |
| -@device_function barrier(flags=0) = Base.llvmcall((""" |
11 |
| - declare void @_Z7barrierj(i32) #0 |
12 |
| - define void @entry(i32 %0) #1 { |
13 |
| - call void @_Z7barrierj(i32 %0) |
| 5 | +module Scope |
| 6 | + const CrossDevice = 0 |
| 7 | + const Device = 1 |
| 8 | + const Workgroup = 2 |
| 9 | + const Subgroup = 3 |
| 10 | + const Invocation = 4 |
| 11 | + const QueueFamily = 5 |
| 12 | + const ShaderCall = 6 |
| 13 | +end |
| 14 | + |
| 15 | +module MemorySemantics |
| 16 | + const None = const Relaxed = 0x0000 |
| 17 | + const Acquire = 0x0002 |
| 18 | + const Release = 0x0004 |
| 19 | + const AcquireRelease = 0x0008 |
| 20 | + const SequentiallyConsistent = 0x0010 |
| 21 | + const UniformMemory = 0x0040 |
| 22 | + const SubgroupMemory = 0x0080 |
| 23 | + const WorkgroupMemory = 0x0100 |
| 24 | + const CrossWorkgroupMemory = 0x0200 |
| 25 | + const AtomicCounterMemory = 0x0400 |
| 26 | + const ImageMemory = 0x0800 |
| 27 | + const OutputMemory = 0x1000 |
| 28 | + const MakeAvailable = 0x2000 |
| 29 | + const MakeVisible = 0x4000 |
| 30 | + const Signal = 0x8000 |
| 31 | +end |
| 32 | + |
| 33 | +# `@builtin_ccall` does not support additional attributes like `convergent` |
| 34 | +# XXX: is this even needed? Doesn't LLVM reconstruct these? |
| 35 | +# using the `@builtin_ccall` version causes validation issues. |
| 36 | + |
| 37 | +#@device_function @inline memory_barrier(scope, semantics) = |
| 38 | +# @builtin_ccall("__spirv_MemoryBarrier", Cvoid, (UInt32, UInt32), scope, semantics) |
| 39 | +@device_function memory_barrier(scope, semantics) = |
| 40 | + Base.llvmcall((""" |
| 41 | + declare void @_Z21__spirv_MemoryBarrierjj(i32, i32) #0 |
| 42 | + define void @entry(i32 %scope, i32 %semantics) #1 { |
| 43 | + call void @_Z21__spirv_MemoryBarrierjj(i32 %scope, i32 %semantics) |
| 44 | + ret void |
| 45 | + } |
| 46 | + attributes #0 = { convergent } |
| 47 | + attributes #1 = { alwaysinline } |
| 48 | + """, "entry"), |
| 49 | + Cvoid, Tuple{UInt32, UInt32}, convert(UInt32, scope), convert(UInt32, semantics)) |
| 50 | + |
| 51 | +#@device_function @inline control_barrier(execution_scope, memory_scope, memory_semantics) = |
| 52 | +# @builtin_ccall("__spirv_ControlBarrier", Cvoid, (UInt32, UInt32, UInt32), |
| 53 | +# execution_scope, memory_scope, memory_semantics) |
| 54 | +@device_function @inline control_barrier(execution_scope, memory_scope, memory_semantics) = |
| 55 | + Base.llvmcall((""" |
| 56 | + declare void @_Z22__spirv_ControlBarrierjjj(i32, i32, i32) #0 |
| 57 | + define void @entry(i32 %execution, i32 %memory, i32 %semantics) #1 { |
| 58 | + call void @_Z22__spirv_ControlBarrierjjj(i32 %execution, i32 %memory, i32 %semantics) |
14 | 59 | ret void
|
15 | 60 | }
|
16 | 61 | attributes #0 = { convergent }
|
17 | 62 | attributes #1 = { alwaysinline }
|
18 | 63 | """, "entry"),
|
19 |
| - Cvoid, Tuple{Int32}, convert(Int32, flags)) |
20 |
| -push!(opencl_builtins, "_Z7barrierj") |
21 |
| -# TODO: add support for attributes to @builting_ccall/LLVM.@typed_ccall |
| 64 | + Cvoid, |
| 65 | + Tuple{UInt32, UInt32, UInt32}, |
| 66 | + convert(UInt32, execution_scope), |
| 67 | + convert(UInt32, memory_scope), |
| 68 | + convert(UInt32, memory_semantics)) |
| 69 | + |
| 70 | +## OpenCL types |
| 71 | + |
| 72 | +const cl_mem_fence_flags = UInt32 |
| 73 | +const LOCAL_MEM_FENCE = cl_mem_fence_flags(1) |
| 74 | +const GLOBAL_MEM_FENCE = cl_mem_fence_flags(2) |
| 75 | +const IMAGE_MEM_FENCE = cl_mem_fence_flags(4) |
| 76 | + |
| 77 | +@inline function mem_fence_flags_to_semantics(flags) |
| 78 | + semantics = MemorySemantics.None |
| 79 | + if (flags & LOCAL_MEM_FENCE) == LOCAL_MEM_FENCE |
| 80 | + semantics |= MemorySemantics.WorkgroupMemory |
| 81 | + end |
| 82 | + if (flags & GLOBAL_MEM_FENCE) == GLOBAL_MEM_FENCE |
| 83 | + semantics |= MemorySemantics.CrossWorkgroupMemory |
| 84 | + end |
| 85 | + return semantics |
| 86 | +end |
| 87 | + |
| 88 | +@enum memory_scope begin |
| 89 | + memory_scope_work_item |
| 90 | + memory_scope_sub_group |
| 91 | + memory_scope_work_group |
| 92 | + memory_scope_device |
| 93 | + memory_scope_all_svm_devices |
| 94 | + memory_scope_all_devices |
| 95 | +end |
| 96 | + |
| 97 | +@inline function cl_scope_to_spirv(scope) |
| 98 | + if scope == memory_scope_work_item |
| 99 | + Scope.Invocation |
| 100 | + elseif scope == memory_scope_sub_group |
| 101 | + Scope.Subgroup |
| 102 | + elseif scope == memory_scope_work_group |
| 103 | + Scope.Workgroup |
| 104 | + elseif scope == memory_scope_device |
| 105 | + Scope.Device |
| 106 | + elseif scope == memory_scope_all_svm_devices || scope == memory_scope_all_devices |
| 107 | + Scope.CrossDevice |
| 108 | + else |
| 109 | + error("Invalid memory scope: $scope") |
| 110 | + end |
| 111 | +end |
| 112 | + |
| 113 | +@enum memory_order begin |
| 114 | + memory_order_relaxed |
| 115 | + memory_order_acquire |
| 116 | + memory_order_release |
| 117 | + memory_order_acq_rel |
| 118 | + memory_order_seq_cst |
| 119 | +end |
| 120 | + |
| 121 | + |
| 122 | +## OpenCL memory barriers |
| 123 | + |
| 124 | +export atomic_work_item_fence, mem_fence, read_mem_fence, write_mem_fence |
| 125 | + |
| 126 | +@inline function atomic_work_item_fence(flags, order, scope) |
| 127 | + semantics = mem_fence_flags_to_semantics(flags) |
| 128 | + if order == memory_order_relaxed |
| 129 | + semantics |= MemorySemantics.Relaxed |
| 130 | + elseif order == memory_order_acquire |
| 131 | + semantics |= MemorySemantics.Acquire |
| 132 | + elseif order == memory_order_release |
| 133 | + semantics |= MemorySemantics.Release |
| 134 | + elseif order == memory_order_acq_rel |
| 135 | + semantics |= MemorySemantics.AcquireRelease |
| 136 | + elseif order == memory_order_seq_cst |
| 137 | + semantics |= MemorySemantics.SequentiallyConsistent |
| 138 | + else |
| 139 | + error("Invalid memory order: $order") |
| 140 | + end |
| 141 | + memory_barrier(cl_scope_to_spirv(scope), semantics) |
| 142 | +end |
| 143 | + |
| 144 | +# legacy fence functions |
| 145 | +mem_fence(flags) = atomic_work_item_fence(flags, memory_order_acq_rel, memory_scope_work_group) |
| 146 | +read_mem_fence(flags) = atomic_work_item_fence(flags, memory_order_acquire, memory_scope_work_group) |
| 147 | +write_mem_fence(flags) = atomic_work_item_fence(flags, memory_order_release, memory_scope_work_group) |
| 148 | + |
| 149 | + |
| 150 | +## OpenCL execution barriers |
| 151 | + |
| 152 | +export barrier, work_group_barrier |
| 153 | + |
| 154 | +@inline work_group_barrier(flags, scope = memory_scope_work_group) = |
| 155 | + control_barrier(Scope.Workgroup, cl_scope_to_spirv(scope), |
| 156 | + MemorySemantics.SequentiallyConsistent | mem_fence_flags_to_semantics(flags)) |
| 157 | + |
| 158 | +barrier(flags) = work_group_barrier(flags) |
0 commit comments