Skip to content

Commit ba7e37b

Browse files
MrSidimsjsji
authored andcommitted
Allow short16 for ushort16 intel_sub_group_block_read/write (#2718)
Per cl_intel_subgroups_short V 1.1.0 short16 is allowed for these builtins. Signed-off-by: Sidorov, Dmitry <dmitry.sidorov@intel.com> Original commit: KhronosGroup/SPIRV-LLVM-Translator@6895a2eb5d053d8
1 parent 78314ef commit ba7e37b

File tree

2 files changed

+74
-2
lines changed

2 files changed

+74
-2
lines changed

llvm-spirv/lib/SPIRV/OCLUtil.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1471,8 +1471,8 @@ std::string getIntelSubgroupBlockDataPostfix(unsigned ElementBitSize,
14711471
OSS << VectorNumElements;
14721472
break;
14731473
case 16:
1474-
assert(ElementBitSize == 8 &&
1475-
"16 elements vector allowed only for char builtins");
1474+
assert((ElementBitSize == 8 || ElementBitSize == 16) &&
1475+
"16 elements vector allowed only for char and short builtins");
14761476
OSS << VectorNumElements;
14771477
break;
14781478
default:

llvm-spirv/test/extensions/INTEL/SPV_INTEL_subgroups/cl_intel_sub_groups.ll

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,16 @@
3131
; intel_sub_group_block_write_ul2(image_out, coord, ul2);
3232
; ul2 = intel_sub_group_block_read_ul2(lp);
3333
; intel_sub_group_block_write_ul2(lp, ul2);
34+
;
35+
; uchar16 uc16 = intel_sub_group_block_read_uc16(image_in, coord);
36+
; intel_sub_group_block_write_uc16(image_out, coord, uc16);
37+
; uc16 = intel_sub_group_block_read_uc16(cp);
38+
; intel_sub_group_block_write_uc2(cp, uc16);
39+
;
40+
; ushort16 us16 = intel_sub_group_block_read_us16(image_in, coord);
41+
; intel_sub_group_block_write_us16(image_out, coord, us16);
42+
; us16 = intel_sub_group_block_read_us16(sp);
43+
; intel_sub_group_block_write_us16(sp, us16);
3444
;}
3545

3646
; RUN: llvm-as %s -o %t.bc
@@ -75,6 +85,16 @@
7585
; CHECK-SPIRV: SubgroupBlockReadINTEL
7686
; CHECK-SPIRV: SubgroupBlockWriteINTEL
7787

88+
; CHECK-SPIRV: SubgroupImageBlockReadINTEL
89+
; CHECK-SPIRV: SubgroupImageBlockWriteINTEL
90+
; CHECK-SPIRV: SubgroupBlockReadINTEL
91+
; CHECK-SPIRV: SubgroupBlockWriteINTEL
92+
93+
; CHECK-SPIRV: SubgroupImageBlockReadINTEL
94+
; CHECK-SPIRV: SubgroupImageBlockWriteINTEL
95+
; CHECK-SPIRV: SubgroupBlockReadINTEL
96+
; CHECK-SPIRV: SubgroupBlockWriteINTEL
97+
7898
; CHECK-SPIRV-LABEL: Return
7999

80100
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
@@ -108,6 +128,14 @@ define spir_kernel void @test(<2 x float> %x, i32 %c, ptr addrspace(1) %image_in
108128
; CHECK-LLVM-NEXT: call spir_func void @_Z31intel_sub_group_block_write_ul214ocl_image2d_woDv2_iDv2_m(ptr addrspace(1) [[IMAGE_OUT]], <2 x i32> [[COORD]], <2 x i64> [[CALL10]])
109129
; CHECK-LLVM-NEXT: [[CALL11:%.*]] = call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(ptr addrspace(1) [[LP:%.*]])
110130
; CHECK-LLVM-NEXT: call spir_func void @_Z31intel_sub_group_block_write_ul2PU3AS1mDv2_m(ptr addrspace(1) [[LP]], <2 x i64> [[CALL11]])
131+
; CHECK-LLVM-NEXT: [[CALL12:%.*]] = call spir_func <16 x i8> @_Z31intel_sub_group_block_read_uc1614ocl_image2d_roDv2_i(ptr addrspace(1) [[IMAGE_IN]], <2 x i32> [[COORD]])
132+
; CHECK-LLVM-NEXT: call spir_func void @_Z32intel_sub_group_block_write_uc1614ocl_image2d_woDv2_iDv16_h(ptr addrspace(1) [[IMAGE_OUT]], <2 x i32> [[COORD]], <16 x i8> [[CALL12]])
133+
; CHECK-LLVM-NEXT: [[CALL13:%.*]] = call spir_func <16 x i8> @_Z31intel_sub_group_block_read_uc16PU3AS1Kh(ptr addrspace(1) [[CP]])
134+
; CHECK-LLVM-NEXT: call spir_func void @_Z32intel_sub_group_block_write_uc16PU3AS1hDv16_h(ptr addrspace(1) [[CP]], <16 x i8> [[CALL13]])
135+
; CHECK-LLVM-NEXT: [[CALL14:%.*]] = call spir_func <16 x i16> @_Z31intel_sub_group_block_read_us1614ocl_image2d_roDv2_i(ptr addrspace(1) [[IMAGE_IN]], <2 x i32> [[COORD]])
136+
; CHECK-LLVM-NEXT: call spir_func void @_Z32intel_sub_group_block_write_us1614ocl_image2d_woDv2_iDv16_t(ptr addrspace(1) [[IMAGE_OUT]], <2 x i32> [[COORD]], <16 x i16> [[CALL14]])
137+
; CHECK-LLVM-NEXT: [[CALL15:%.*]] = call spir_func <16 x i16> @_Z31intel_sub_group_block_read_us16PU3AS1Kt(ptr addrspace(1) [[SP]])
138+
; CHECK-LLVM-NEXT: call spir_func void @_Z32intel_sub_group_block_write_us16PU3AS1tDv16_t(ptr addrspace(1) [[SP]], <16 x i16> [[CALL15]])
111139
; CHECK-LLVM-NEXT: ret void
112140

113141
; CHECK-LLVM-SPIRV: call spir_func <2 x float> @_Z28__spirv_SubgroupShuffleINTELDv2_fj(
@@ -131,6 +159,16 @@ define spir_kernel void @test(<2 x float> %x, i32 %c, ptr addrspace(1) %image_in
131159
; CHECK-LLVM-SPIRV: call spir_func <2 x i64> @_Z37__spirv_SubgroupBlockReadINTEL_Rlong2PU3AS1Km(
132160
; CHECK-LLVM-SPIRV: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1mDv2_m(
133161

162+
; CHECK-LLVM-SPIRV: call spir_func <16 x i8> @_Z43__spirv_SubgroupImageBlockReadINTEL_Rchar16PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)
163+
; CHECK-LLVM-SPIRV: call spir_func void @_Z36__spirv_SubgroupImageBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iDv16_h(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)
164+
; CHECK-LLVM-SPIRV: call spir_func <16 x i8> @_Z38__spirv_SubgroupBlockReadINTEL_Rchar16PU3AS1Kh(
165+
; CHECK-LLVM-SPIRV: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1hDv16_h(
166+
; CHECK-LLVM-SPIRV: call spir_func <16 x i16> @_Z44__spirv_SubgroupImageBlockReadINTEL_Rshort16PU3AS133__spirv_Image__void_1_0_0_0_0_0_0Dv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)
167+
; CHECK-LLVM-SPIRV: call spir_func void @_Z36__spirv_SubgroupImageBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_1Dv2_iDv16_t(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)
168+
; CHECK-LLVM-SPIRV: call spir_func <16 x i16> @_Z39__spirv_SubgroupBlockReadINTEL_Rshort16PU3AS1Kt(
169+
; CHECK-LLVM-SPIRV: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1tDv16_t(
170+
171+
134172
entry:
135173
%call = tail call spir_func <2 x float> @_Z23intel_sub_group_shuffleDv2_fj(<2 x float> %x, i32 %c) #2
136174
%call1 = tail call spir_func <2 x float> @_Z28intel_sub_group_shuffle_downDv2_fS_j(<2 x float> %x, <2 x float> %x, i32 %c) #2
@@ -156,6 +194,16 @@ entry:
156194
tail call spir_func void @_Z31intel_sub_group_block_write_ul214ocl_image2d_woDv2_iDv2_m(ptr addrspace(1) %image_out, <2 x i32> %coord, <2 x i64> %call10) #2
157195
%call11 = tail call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(ptr addrspace(1) %lp) #2
158196
tail call spir_func void @_Z31intel_sub_group_block_write_ul2PU3AS1mDv2_m(ptr addrspace(1) %lp, <2 x i64> %call11) #2
197+
198+
%call12 = tail call spir_func <16 x i8> @_Z31intel_sub_group_block_read_uc1614ocl_image2d_roDv2_i(ptr addrspace(1) %image_in, <2 x i32> %coord) #2
199+
tail call spir_func void @_Z32intel_sub_group_block_write_uc1614ocl_image2d_woDv2_iDv16_h(ptr addrspace(1) %image_out, <2 x i32> %coord, <16 x i8> %call12) #2
200+
%call13 = tail call spir_func <16 x i8> @_Z31intel_sub_group_block_read_uc16PU3AS1Kh(ptr addrspace(1) %cp) #2
201+
tail call spir_func void @_Z32intel_sub_group_block_write_uc16PU3AS1hDv16_h(ptr addrspace(1) %cp, <16 x i8> %call13) #2
202+
203+
%call14 = tail call spir_func <16 x i16> @_Z31intel_sub_group_block_read_us1614ocl_image2d_roDv2_i(ptr addrspace(1) %image_in, <2 x i32> %coord) #2
204+
tail call spir_func void @_Z32intel_sub_group_block_write_us1614ocl_image2d_woDv2_iDv16_t(ptr addrspace(1) %image_out, <2 x i32> %coord, <16 x i16> %call14) #2
205+
%call15 = tail call spir_func <16 x i16> @_Z31intel_sub_group_block_read_us16PU3AS1Kt(ptr addrspace(1) %sp) #2
206+
tail call spir_func void @_Z32intel_sub_group_block_write_us16PU3AS1tDv16_t(ptr addrspace(1) %sp, <16 x i16> %call15) #2
159207

160208
ret void
161209
}
@@ -220,6 +268,30 @@ declare spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(ptr addr
220268
; Function Attrs: convergent
221269
declare spir_func void @_Z31intel_sub_group_block_write_ul2PU3AS1mDv2_m(ptr addrspace(1), <2 x i64>) local_unnamed_addr #1
222270

271+
; Function Attrs: convergent
272+
declare spir_func <16 x i8> @_Z31intel_sub_group_block_read_uc1614ocl_image2d_roDv2_i(ptr addrspace(1), <2 x i32>) #1
273+
274+
; Function Attrs: convergent
275+
declare spir_func void @_Z32intel_sub_group_block_write_uc1614ocl_image2d_woDv2_iDv16_h(ptr addrspace(1), <2 x i32>, <16 x i8>) #1
276+
277+
; Function Attrs: convergent
278+
declare spir_func <16 x i8> @_Z31intel_sub_group_block_read_uc16PU3AS1Kh(ptr addrspace(1)) #1
279+
280+
; Function Attrs: convergent
281+
declare spir_func void @_Z32intel_sub_group_block_write_uc16PU3AS1hDv16_h(ptr addrspace(1), <16 x i8>) #1
282+
283+
; Function Attrs: convergent
284+
declare spir_func <16 x i16> @_Z31intel_sub_group_block_read_us1614ocl_image2d_roDv2_i(ptr addrspace(1), <2 x i32>) local_unnamed_addr #1
285+
286+
; Function Attrs: convergent
287+
declare spir_func void @_Z32intel_sub_group_block_write_us1614ocl_image2d_woDv2_iDv16_t(ptr addrspace(1), <2 x i32>, <16 x i16>) local_unnamed_addr #1
288+
289+
; Function Attrs: convergent
290+
declare spir_func <16 x i16> @_Z31intel_sub_group_block_read_us16PU3AS1Kt(ptr addrspace(1)) local_unnamed_addr #1
291+
292+
; Function Attrs: convergent
293+
declare spir_func void @_Z32intel_sub_group_block_write_us16PU3AS1tDv16_t(ptr addrspace(1), <16 x i16>) local_unnamed_addr #1
294+
223295
attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
224296
attributes #1 = { convergent "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
225297
attributes #2 = { convergent nounwind }

0 commit comments

Comments
 (0)