Skip to content

Commit bb5da8a

Browse files
AlexeySotkinvmaksimo
authored andcommitted
Add 'convergent' attribute for group and subgroup instructions.
Signed-off-by: Alexey Sotkin <alexey.sotkin@intel.com>
1 parent 2d8889f commit bb5da8a

File tree

2 files changed

+189
-0
lines changed

2 files changed

+189
-0
lines changed

llvm-spirv/lib/SPIRV/SPIRVReader.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3262,6 +3262,9 @@ Instruction *SPIRVToLLVM::transBuiltinFromInst(const std::string &FuncName,
32623262
Func->setCallingConv(CallingConv::SPIR_FUNC);
32633263
if (isFuncNoUnwind())
32643264
Func->addFnAttr(Attribute::NoUnwind);
3265+
auto OC = BI->getOpCode();
3266+
if (isGroupOpCode(OC) || isIntelSubgroupOpCode(OC))
3267+
Func->addFnAttr(Attribute::Convergent);
32653268
}
32663269
auto Call =
32673270
CallInst::Create(Func, transValue(Ops, BB->getParent(), BB), "", BB);
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
; Generated with:
2+
; source.cl:
3+
; void foo(int x, int2 coord, uint c, short s, float f, size_t n,
4+
; __global uint* p, read_write image2d_t image) {
5+
; work_group_all(x);
6+
; work_group_any(x);
7+
; work_group_broadcast(x, n);
8+
; sub_group_barrier(CLK_LOCAL_MEM_FENCE, memory_scope_sub_group);
9+
; sub_group_all(x);
10+
; sub_group_any(x);
11+
; sub_group_broadcast(x, c);
12+
; sub_group_reduce_add(x);
13+
; sub_group_reduce_add(f);
14+
; sub_group_reduce_min(x);
15+
; sub_group_reduce_min(c);
16+
; sub_group_reduce_min(f);
17+
; sub_group_reduce_max(x);
18+
; sub_group_reduce_max(c);
19+
; sub_group_reduce_max(f);
20+
; intel_sub_group_shuffle(x, c);
21+
; intel_sub_group_shuffle_down(x, x, c);
22+
; intel_sub_group_shuffle_up(x, x, c);
23+
; intel_sub_group_shuffle_xor(x, c);
24+
; intel_sub_group_block_read(p);
25+
; intel_sub_group_block_write(p, c);
26+
; intel_sub_group_block_read(image, coord);
27+
; intel_sub_group_block_write(image, coord, c);
28+
; }
29+
; clang -cc1 -O2 -triple spir -cl-std=cl2.0 -finclude-default-header -cl-ext=+all source.cl -emit-llvm-bc -o tmp.bc
30+
; llvm-spirv tmp.bc --spirv-ext=+all -o tmp.spv
31+
; spirv-dis tmp.spv -o llvm-spirv/test/GroupAndSubgroupInstructions.spvasm
32+
33+
; REQUIRES: spirv-as
34+
; RUN: spirv-as < %s --target-env spv1.0 -o %t.spv
35+
; RUN: spirv-val %t.spv
36+
; RUN: llvm-spirv -r %t.spv --spirv-target-env=CL1.2 -o %t.bc
37+
; RUN: llvm-dis %t.bc -o %t.ll
38+
; RUN: FileCheck < %t.ll %s --check-prefixes=CHECK-COMMON,CHECK-CL,CHECK-CL12
39+
40+
; RUN: llvm-spirv -r %t.spv --spirv-target-env=CL2.0 -o %t.bc
41+
; RUN: llvm-dis %t.bc -o %t.ll
42+
; RUN: FileCheck < %t.ll %s --check-prefixes=CHECK-COMMON,CHECK-CL,CHECK-CL20
43+
44+
; RUN: llvm-spirv -r %t.spv --spirv-target-env=SPV-IR -o %t.bc
45+
; RUN: llvm-dis %t.bc -o %t.ll
46+
; RUN: FileCheck < %t.ll %s --check-prefixes=CHECK-COMMON,CHECK-SPV-IR
47+
48+
49+
; CHECK-CL-DAG: declare spir_func i32 @_Z14work_group_alli(i32) #[[#Attrs:]]
50+
; CHECK-CL-DAG: declare spir_func i32 @_Z14work_group_anyi(i32) #[[#Attrs]]
51+
; CHECK-CL-DAG: declare spir_func i32 @_Z20work_group_broadcastjj(i32, i32) #[[#Attrs]]
52+
; CHECK-CL12-DAG: declare spir_func void @_Z7barrierj(i32) #[[#Attrs]]
53+
; CHECK-CL20-DAG: declare spir_func void @_Z17sub_group_barrierj12memory_scope(i32, i32) #[[#Attrs]]
54+
; CHECK-CL-DAG: declare spir_func i32 @_Z13sub_group_alli(i32) #[[#Attrs]]
55+
; CHECK-CL-DAG: declare spir_func i32 @_Z13sub_group_anyi(i32) #[[#Attrs]]
56+
; CHECK-CL-DAG: declare spir_func i32 @_Z19sub_group_broadcastjj(i32, i32) #[[#Attrs]]
57+
; CHECK-CL-DAG: declare spir_func i32 @_Z20sub_group_reduce_addi(i32) #[[#Attrs]]
58+
; CHECK-CL-DAG: declare spir_func float @_Z20sub_group_reduce_addf(float) #[[#Attrs]]
59+
; CHECK-CL-DAG: declare spir_func i32 @_Z20sub_group_reduce_mini(i32) #[[#Attrs]]
60+
; CHECK-CL-DAG: declare spir_func i32 @_Z20sub_group_reduce_minj(i32) #[[#Attrs]]
61+
; CHECK-CL-DAG: declare spir_func float @_Z20sub_group_reduce_minf(float) #[[#Attrs]]
62+
; CHECK-CL-DAG: declare spir_func i32 @_Z20sub_group_reduce_maxi(i32) #[[#Attrs]]
63+
; CHECK-CL-DAG: declare spir_func i32 @_Z20sub_group_reduce_maxj(i32) #[[#Attrs]]
64+
; CHECK-CL-DAG: declare spir_func float @_Z20sub_group_reduce_maxf(float) #[[#Attrs]]
65+
; CHECK-CL-DAG: declare spir_func i32 @_Z23intel_sub_group_shuffleij(i32, i32) #[[#Attrs]]
66+
; CHECK-CL-DAG: declare spir_func i32 @_Z28intel_sub_group_shuffle_downiij(i32, i32, i32) #[[#Attrs]]
67+
; CHECK-CL-DAG: declare spir_func i32 @_Z26intel_sub_group_shuffle_upiij(i32, i32, i32) #[[#Attrs]]
68+
; CHECK-CL-DAG: declare spir_func i32 @_Z27intel_sub_group_shuffle_xorij(i32, i32) #[[#Attrs]]
69+
; CHECK-CL-DAG: declare spir_func i32 @_Z26intel_sub_group_block_readPU3AS1Kj(i32 addrspace(1)*) #[[#Attrs]]
70+
; CHECK-CL-DAG: declare spir_func void @_Z27intel_sub_group_block_writePU3AS1jj(i32 addrspace(1)*, i32) #[[#Attrs]]
71+
; CHECK-CL-DAG: declare spir_func i32 @_Z26intel_sub_group_block_read14ocl_image2d_rwDv2_i(%opencl.image2d_rw_t addrspace(1)*, <2 x i32>) #[[#Attrs]]
72+
; CHECK-CL-DAG: declare spir_func void @_Z27intel_sub_group_block_write14ocl_image2d_rwDv2_ij(%opencl.image2d_rw_t addrspace(1)*, <2 x i32>, i32) #[[#Attrs]]
73+
74+
75+
; CHECK-SPV-IR: declare spir_func i1 @_Z16__spirv_GroupAllib(i32, i1) #[[#Attrs:]]
76+
; CHECK-SPV-IR: declare spir_func i1 @_Z16__spirv_GroupAnyib(i32, i1) #[[#Attrs]]
77+
; CHECK-SPV-IR: declare spir_func i32 @_Z22__spirv_GroupBroadcastiii(i32, i32, i32) #[[#Attrs]]
78+
; CHECK-SPV-IR: declare spir_func void @_Z22__spirv_ControlBarrieriii(i32, i32, i32) #0
79+
; CHECK-SPV-IR: declare spir_func i32 @_Z17__spirv_GroupIAddiii(i32, i32, i32) #[[#Attrs]]
80+
; CHECK-SPV-IR: declare spir_func float @_Z17__spirv_GroupFAddiif(i32, i32, float) #[[#Attrs]]
81+
; CHECK-SPV-IR: declare spir_func i32 @_Z17__spirv_GroupSMiniii(i32, i32, i32) #[[#Attrs]]
82+
; CHECK-SPV-IR: declare spir_func i32 @_Z17__spirv_GroupUMiniii(i32, i32, i32) #[[#Attrs]]
83+
; CHECK-SPV-IR: declare spir_func float @_Z17__spirv_GroupFMiniif(i32, i32, float) #[[#Attrs]]
84+
; CHECK-SPV-IR: declare spir_func i32 @_Z17__spirv_GroupSMaxiii(i32, i32, i32) #[[#Attrs]]
85+
; CHECK-SPV-IR: declare spir_func i32 @_Z17__spirv_GroupUMaxiii(i32, i32, i32) #[[#Attrs]]
86+
; CHECK-SPV-IR: declare spir_func float @_Z17__spirv_GroupFMaxiif(i32, i32, float) #[[#Attrs]]
87+
; CHECK-SPV-IR: declare spir_func i32 @_Z23intel_sub_group_shuffleij(i32, i32) #[[#Attrs]]
88+
; CHECK-SPV-IR: declare spir_func i32 @_Z28intel_sub_group_shuffle_downiij(i32, i32, i32) #[[#Attrs]]
89+
; CHECK-SPV-IR: declare spir_func i32 @_Z26intel_sub_group_shuffle_upiij(i32, i32, i32) #[[#Attrs]]
90+
; CHECK-SPV-IR: declare spir_func i32 @_Z27intel_sub_group_shuffle_xorij(i32, i32) #[[#Attrs]]
91+
; CHECK-SPV-IR: declare spir_func i32 @_Z26intel_sub_group_block_readPU3AS1Kj(i32 addrspace(1)*) #[[#Attrs]]
92+
; CHECK-SPV-IR: declare spir_func void @_Z27intel_sub_group_block_writePU3AS1jj(i32 addrspace(1)*, i32) #[[#Attrs]]
93+
; CHECK-SPV-IR: declare spir_func i32 @_Z26intel_sub_group_block_read14ocl_image2d_rwDv2_i(%opencl.image2d_rw_t addrspace(1)*, <2 x i32>) #[[#Attrs]]
94+
; CHECK-SPV-IR: declare spir_func void @_Z27intel_sub_group_block_write14ocl_image2d_rwDv2_ij(%opencl.image2d_rw_t addrspace(1)*, <2 x i32>, i32) #[[#Attrs]]
95+
96+
; CHECK-COMMON: attributes #[[#Attrs]] =
97+
; CHECK-COMMON-SAME: convergent
98+
99+
; SPIR-V
100+
; Version: 1.0
101+
; Generator: Khronos LLVM/SPIR-V Translator; 14
102+
; Bound: 60
103+
; Schema: 0
104+
OpCapability Addresses
105+
OpCapability Linkage
106+
OpCapability Kernel
107+
OpCapability ImageBasic
108+
OpCapability ImageReadWrite
109+
OpCapability Groups
110+
OpCapability Int16
111+
OpCapability GenericPointer
112+
OpCapability SubgroupShuffleINTEL
113+
OpCapability SubgroupBufferBlockIOINTEL
114+
OpCapability SubgroupImageBlockIOINTEL
115+
OpExtension "SPV_INTEL_subgroups"
116+
%1 = OpExtInstImport "OpenCL.std"
117+
OpMemoryModel Physical32 OpenCL
118+
OpSource OpenCL_C 200000
119+
OpDecorate %s FuncParamAttr Sext
120+
OpDecorate %dst FuncParamAttr NoCapture
121+
OpDecorate %src FuncParamAttr NoCapture
122+
OpDecorate %e FuncParamAttr NoCapture
123+
OpDecorate %foo LinkageAttributes "foo" Export
124+
%uint = OpTypeInt 32 0
125+
%ushort = OpTypeInt 16 0
126+
%uint_0 = OpConstant %uint 0
127+
%uint_2 = OpConstant %uint 2
128+
%uint_1 = OpConstant %uint 1
129+
%uint_3 = OpConstant %uint 3
130+
%uint_272 = OpConstant %uint 272
131+
%void = OpTypeVoid
132+
%v2uint = OpTypeVector %uint 2
133+
%float = OpTypeFloat 32
134+
%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
135+
%_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
136+
%Event = OpTypeEvent
137+
%_ptr_Generic_Event = OpTypePointer Generic %Event
138+
%11 = OpTypeImage %void 2D 0 0 0 0 Unknown ReadWrite
139+
%12 = OpTypeFunction %void %uint %v2uint %uint %ushort %float %uint %_ptr_CrossWorkgroup_uint %_ptr_Workgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_Generic_Event %11
140+
%bool = OpTypeBool
141+
%foo = OpFunction %void None %12
142+
%x = OpFunctionParameter %uint
143+
%coord = OpFunctionParameter %v2uint
144+
%c = OpFunctionParameter %uint
145+
%s = OpFunctionParameter %ushort
146+
%f = OpFunctionParameter %float
147+
%n = OpFunctionParameter %uint
148+
%p = OpFunctionParameter %_ptr_CrossWorkgroup_uint
149+
%dst = OpFunctionParameter %_ptr_Workgroup_uint
150+
%src = OpFunctionParameter %_ptr_CrossWorkgroup_uint
151+
%e = OpFunctionParameter %_ptr_Generic_Event
152+
%image = OpFunctionParameter %11
153+
%entry = OpLabel
154+
%28 = OpINotEqual %bool %x %uint_0
155+
%call20 = OpGroupAll %bool %uint_2 %28
156+
%call = OpSelect %uint %call20 %uint_1 %uint_0
157+
%33 = OpINotEqual %bool %x %uint_0
158+
%call121 = OpGroupAny %bool %uint_2 %33
159+
%call1 = OpSelect %uint %call121 %uint_1 %uint_0
160+
%call2 = OpGroupBroadcast %uint %uint_2 %x %n
161+
OpControlBarrier %uint_3 %uint_3 %uint_272
162+
%39 = OpINotEqual %bool %x %uint_0
163+
%call322 = OpGroupAll %bool %uint_3 %39
164+
%call3 = OpSelect %uint %call322 %uint_1 %uint_0
165+
%42 = OpINotEqual %bool %x %uint_0
166+
%call423 = OpGroupAny %bool %uint_3 %42
167+
%call4 = OpSelect %uint %call423 %uint_1 %uint_0
168+
%call5 = OpGroupBroadcast %uint %uint_3 %x %c
169+
%call6 = OpGroupIAdd %uint %uint_3 Reduce %x
170+
%call7 = OpGroupFAdd %float %uint_3 Reduce %f
171+
%call8 = OpGroupSMin %uint %uint_3 Reduce %x
172+
%call9 = OpGroupUMin %uint %uint_3 Reduce %c
173+
%call10 = OpGroupFMin %float %uint_3 Reduce %f
174+
%call11 = OpGroupSMax %uint %uint_3 Reduce %x
175+
%call12 = OpGroupUMax %uint %uint_3 Reduce %c
176+
%call13 = OpGroupFMax %float %uint_3 Reduce %f
177+
%call14 = OpSubgroupShuffleINTEL %uint %x %c
178+
%call15 = OpSubgroupShuffleDownINTEL %uint %x %x %c
179+
%call16 = OpSubgroupShuffleUpINTEL %uint %x %x %c
180+
%call17 = OpSubgroupShuffleXorINTEL %uint %x %c
181+
%call18 = OpSubgroupBlockReadINTEL %uint %p
182+
OpSubgroupBlockWriteINTEL %p %c
183+
%call19 = OpSubgroupImageBlockReadINTEL %uint %image %coord
184+
OpSubgroupImageBlockWriteINTEL %image %coord %c
185+
OpReturn
186+
OpFunctionEnd

0 commit comments

Comments
 (0)