You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[ESIMD] Refactor esimd intrinsic mapping to BE intrinsics. (#4720)
* [ESIMD] Refactor esimd intrinsic mapping to BE intrinsics.
This patch
- makes names and parameter lists of __esimd* intrinsics match their
@llvm.genx counterparts. The benefits are:
* this removes the extra logical translation layer between __esimd* and
@llvm.genx thus simplifying overall user-level esimd intrinsic translation
* allows to reuse lots of functionality between SLM and surface memory
accesses
- moves some of the translations and argument setting (like accessor field to
surface index, setting scale) from LowerESIMD.cpp to the ESIMD headers, which
simplifies code base.
- for all memory intrinsics moves host and device implementations to the same
intrinsic function prototype separating them via __SYCL_DEVICE_ONLY__ macro
thus avoiding duplication of the prototypes
- removes certain redundant __esimd* intrinsics, such as SLM memory accesses
(which are normal surface accesses with special surface index 254), and
__esimd_reduced_fmax,... which have the same functionality as usual fmax,...
This is also a preparatory step for fixing SLM memory accesses (revising vector
lengths, element types restirictions)
Signed-off-by: Konstantin S Bobrovsky <konstantin.s.bobrovsky@intel.com>
%ret_val = call spir_func <32 x i32> @_Z20__esimd_flat_atomic0ILN2cm3gen14CmAtomicOpTypeE2EjLi32ELNS1_9CacheHintE0ELS3_0EENS1_13__vector_typeIT0_XT1_EE4typeENS4_IyXT1_EE4typeENS4_ItXT1_EE4typeE(<32 x i64> %1, <32 x i16> %2)
25
-
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.inc.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef)
26
-
ret <32 x i32> %ret_val
27
-
}
28
-
29
-
define dso_local spir_func <32 x i32> @FUNC_2() {
30
-
%a_1 = alloca <32 x i64>
31
-
%1 = load <32 x i64>, <32 x i64>* %a_1
32
-
%a_2 = alloca <32 x i32>
33
-
%2 = load <32 x i32>, <32 x i32>* %a_2
34
-
%a_3 = alloca <32 x i16>
35
-
%3 = load <32 x i16>, <32 x i16>* %a_3
36
-
%ret_val = call spir_func <32 x i32> @_Z20__esimd_flat_atomic1ILN2cm3gen14CmAtomicOpTypeE0EjLi32ELNS1_9CacheHintE0ELS3_0EENS1_13__vector_typeIT0_XT1_EE4typeENS4_IyXT1_EE4typeES7_NS4_ItXT1_EE4typeE(<32 x i64> %1, <32 x i32> %2, <32 x i16> %3)
37
-
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.add.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef)
38
-
ret <32 x i32> %ret_val
39
-
}
40
-
41
-
define dso_local spir_func <32 x i32> @FUNC_3() {
42
-
%a_1 = alloca <32 x i64>
43
-
%1 = load <32 x i64>, <32 x i64>* %a_1
44
-
%a_2 = alloca <32 x i32>
45
-
%2 = load <32 x i32>, <32 x i32>* %a_2
46
-
%a_3 = alloca <32 x i32>
47
-
%3 = load <32 x i32>, <32 x i32>* %a_3
48
-
%a_4 = alloca <32 x i16>
49
-
%4 = load <32 x i16>, <32 x i16>* %a_4
50
-
%ret_val = call spir_func <32 x i32> @_Z20__esimd_flat_atomic2ILN2cm3gen14CmAtomicOpTypeE7EjLi32ELNS1_9CacheHintE0ELS3_0EENS1_13__vector_typeIT0_XT1_EE4typeENS4_IyXT1_EE4typeES7_S7_NS4_ItXT1_EE4typeE(<32 x i64> %1, <32 x i32> %2, <32 x i32> %3, <32 x i16> %4)
51
-
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.cmpxchg.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef)
52
-
ret <32 x i32> %ret_val
53
-
}
54
-
55
-
define dso_local spir_func <32 x i32> @FUNC_4() {
56
-
%ret_val = call spir_func <32 x i32> @_Z33__esimd_flat_block_read_unalignedIjLi32ELN2cm3gen9CacheHintE0ELS2_0EENS1_13__vector_typeIT_XT0_EE4typeEy(i640)
57
-
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.block.ld.unaligned.v32i32.i64(i64 0)
58
-
ret <32 x i32> %ret_val
59
-
}
60
-
61
-
define dso_local spir_func void@FUNC_5() {
62
-
%a_1 = alloca <32 x i32>
63
-
%1 = load <32 x i32>, <32 x i32>* %a_1
64
-
call spir_func void@_Z24__esimd_flat_block_writeIjLi32ELN2cm3gen9CacheHintE0ELS2_0EEvyNS1_13__vector_typeIT_XT0_EE4typeE(i640, <32 x i32> %1)
65
-
; CHECK: call void @llvm.genx.svm.block.st.i64.v32i32(i64 0, <32 x i32> %{{[0-9a-zA-Z_.]+}})
66
-
retvoid
67
-
}
68
-
69
-
define dso_local spir_func <32 x i32> @FUNC_6() {
70
-
%a_1 = alloca <32 x i64>
71
-
%1 = load <32 x i64>, <32 x i64>* %a_1
72
-
%a_2 = alloca <32 x i16>
73
-
%2 = load <32 x i16>, <32 x i16>* %a_2
74
-
%ret_val = call spir_func <32 x i32> @_Z17__esimd_flat_readIjLi32ELi0ELN2cm3gen9CacheHintE0ELS2_0EENS1_13__vector_typeIT_XmlT0_clL_ZNS1_20ElemsPerAddrDecodingEjET1_EEE4typeENS3_IyXT0_EE4typeEiNS3_ItXT0_EE4typeE(<32 x i64> %1, i320, <32 x i16> %2)
75
-
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.gather.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef)
76
-
ret <32 x i32> %ret_val
77
-
}
78
-
79
-
define dso_local spir_func void@FUNC_7() {
80
-
%a_1 = alloca <32 x i64>
81
-
%1 = load <32 x i64>, <32 x i64>* %a_1
82
-
%a_2 = alloca <32 x i32>
83
-
%2 = load <32 x i32>, <32 x i32>* %a_2
84
-
%a_3 = alloca <32 x i16>
85
-
%3 = load <32 x i16>, <32 x i16>* %a_3
86
-
call spir_func void@_Z18__esimd_flat_writeIjLi32ELi0ELN2cm3gen9CacheHintE0ELS2_0EEvNS1_13__vector_typeIyXT0_EE4typeENS3_IT_XmlT0_clL_ZNS1_20ElemsPerAddrDecodingEjET1_EEE4typeEiNS3_ItXT0_EE4typeE(<32 x i64> %1, <32 x i32> %2, i320, <32 x i16> %3)
87
-
; CHECK: call void @llvm.genx.svm.scatter.v32i1.v32i64.v32i32(<32 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}})
declare dso_local spir_func <32 x i32> @_Z20__esimd_flat_atomic0ILN2cm3gen14CmAtomicOpTypeE2EjLi32ELNS1_9CacheHintE0ELS3_0EENS1_13__vector_typeIT0_XT1_EE4typeENS4_IyXT1_EE4typeENS4_ItXT1_EE4typeE(<32 x i64> %0, <32 x i16> %1)
383
-
declare dso_local spir_func <32 x i32> @_Z20__esimd_flat_atomic1ILN2cm3gen14CmAtomicOpTypeE0EjLi32ELNS1_9CacheHintE0ELS3_0EENS1_13__vector_typeIT0_XT1_EE4typeENS4_IyXT1_EE4typeES7_NS4_ItXT1_EE4typeE(<32 x i64> %0, <32 x i32> %1, <32 x i16> %2)
384
-
declare dso_local spir_func <32 x i32> @_Z20__esimd_flat_atomic2ILN2cm3gen14CmAtomicOpTypeE7EjLi32ELNS1_9CacheHintE0ELS3_0EENS1_13__vector_typeIT0_XT1_EE4typeENS4_IyXT1_EE4typeES7_S7_NS4_ItXT1_EE4typeE(<32 x i64> %0, <32 x i32> %1, <32 x i32> %2, <32 x i16> %3)
385
-
declare dso_local spir_func <32 x i32> @_Z33__esimd_flat_block_read_unalignedIjLi32ELN2cm3gen9CacheHintE0ELS2_0EENS1_13__vector_typeIT_XT0_EE4typeEy(i64%0)
386
-
declare dso_local spir_func void@_Z24__esimd_flat_block_writeIjLi32ELN2cm3gen9CacheHintE0ELS2_0EEvyNS1_13__vector_typeIT_XT0_EE4typeE(i64%0, <32 x i32> %1)
387
-
declare dso_local spir_func <32 x i32> @_Z17__esimd_flat_readIjLi32ELi0ELN2cm3gen9CacheHintE0ELS2_0EENS1_13__vector_typeIT_XmlT0_clL_ZNS1_20ElemsPerAddrDecodingEjET1_EEE4typeENS3_IyXT0_EE4typeEiNS3_ItXT0_EE4typeE(<32 x i64> %0, i32%1, <32 x i16> %2)
388
-
declare dso_local spir_func void@_Z18__esimd_flat_writeIjLi32ELi0ELN2cm3gen9CacheHintE0ELS2_0EEvNS1_13__vector_typeIyXT0_EE4typeENS3_IT_XmlT0_clL_ZNS1_20ElemsPerAddrDecodingEjET1_EEE4typeEiNS3_ItXT0_EE4typeE(<32 x i64> %0, <32 x i32> %1, i32%2, <32 x i16> %3)
389
287
declare dso_local spir_func <16 x i16> @_Z12__esimd_sminIsLi16EEN2cm3gen13__vector_typeIT_XT0_EE4typeES5_S5_(<16 x i16> %0, <16 x i16> %1)
390
-
declare dso_local spir_func <1 x float> @_Z16__esimd_div_ieeeILi1EEN2cm3gen13__vector_typeIfXT_EE4typeES4_S4_(<1 x float> %0, <1 x float> %1)
391
288
declare dso_local spir_func <8 x float> @_Z16__esimd_rdregionIfLi16ELi8ELi0ELi8ELi1ELi0EEN2cm3gen13__vector_typeIT_XT1_EE4typeENS2_IS3_XT0_EE4typeEt(<16 x float> %0, i16zeroext%1)
392
289
declare dso_local spir_func <16 x float> @_Z16__esimd_wrregionIfLi16ELi8ELi0ELi8ELi1ELi0EEN2cm3gen13__vector_typeIT_XT0_EE4typeES5_NS2_IS3_XT1_EE4typeEtNS2_ItXT1_EE4typeE(<16 x float> %0, <8 x float> %1, i16zeroext%2, <8 x i16> %3)
393
290
declare dso_local spir_func <16 x i32> @_Z13__esimd_vloadIiLi16EEN2cm3gen13__vector_typeIT_XT0_EE4typeEPKS5_(<16 x i32> addrspace(4)* %0)
declare dso_local spir_func <16 x i32> @_Z18__esimd_usdp4a_satIjiiiLi16EEN2cl4sycl3ext5intel3gpu11vector_typeIT_XT3_EE4typeENS4_IT0_XT3_EE4typeENS4_IT1_XT3_EE4typeENS4_IT2_XT3_EE4typeE(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
418
313
declare dso_local spir_func <16 x i32> @_Z18__esimd_sudp4a_satIijjjLi16EEN2cl4sycl3ext5intel3gpu11vector_typeIT_XT3_EE4typeENS4_IT0_XT3_EE4typeENS4_IT1_XT3_EE4typeENS4_IT2_XT3_EE4typeE(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
419
314
declare dso_local spir_func <16 x i32> @_Z18__esimd_ssdp4a_satIiiiiLi16EEN2cl4sycl3ext5intel3gpu11vector_typeIT_XT3_EE4typeENS4_IT0_XT3_EE4typeENS4_IT1_XT3_EE4typeENS4_IT2_XT3_EE4typeE(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
420
-
declare dso_local spir_func <8 x i32> @_Z22__esimd_slm_block_readIiLi8EEN2cl4sycl3ext5intel3gpu11vector_typeIT_XT0_EE4typeEj(i32%0)
declare dso_local spir_func <8 x i32> @_Z18__esimd_rdindirectIiLi16ELi8ELi0EEN2cl4sycl3ext5intel3gpu11vector_typeIT_XT1_EE4typeENS4_IS5_XT0_EE4typeENS4_ItXT1_EE4typeE(<16 x i32>, <8 x i16>)
423
317
declare dso_local spir_func <16 x i32> @_Z18__esimd_wrindirectIiLi16ELi8ELi0EEN2cl4sycl3ext5intel3gpu11vector_typeIT_XT0_EE4typeES7_NS4_IS5_XT1_EE4typeENS4_ItXT1_EE4typeESB_(<16 x i32>, <8 x i32>, <8 x i16>, <8 x i16>)
0 commit comments