4
4
// and the generic `convert-to-llvm` pass.
5
5
// RUN: mlir-opt --convert-to-llvm --split-input-file %s | FileCheck %s
6
6
7
- // CHECK: llvm.func spir_funccc @_Z41intel_sub_group_2d_block_read_16b_8r16x1cPU3AS1viiiDv2_iPt(
7
+ // CHECK-LABEL : llvm.func spir_funccc @_Z41intel_sub_group_2d_block_read_16b_8r16x1cPU3AS1viiiDv2_iPt(
8
8
// CHECK-SAME: !llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>,
9
9
// CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.writeonly}) attributes {no_unwind, will_return}
10
10
// CHECK: llvm.func @blockload2d(%[[ARG0:.*]]: !llvm.ptr<1>,
@@ -18,11 +18,11 @@ llvm.func @blockload2d(%a: !llvm.ptr<1>, %base_width_a: i32, %base_height_a: i32
18
18
// CHECK: %[[VAR5:.*]] = llvm.mlir.constant(8 : i32) : i32
19
19
// CHECK: %[[VAR6:.*]] = llvm.alloca %[[VAR5]] x i16 : (i32) -> !llvm.ptr
20
20
// CHECK: llvm.call spir_funccc @_Z41intel_sub_group_2d_block_read_16b_8r16x1cPU3AS1viiiDv2_iPt(
21
- // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR6]])
21
+ // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR6]])
22
22
// CHECK-SAME: {function_type = !llvm.func<void (ptr<1>, i32, i32, i32, vector<2xi32>, ptr)>,
23
23
// CHECK-SAME: linkage = #llvm.linkage<external>, no_unwind, sym_name =
24
24
// CHECK-SAME: "_Z41intel_sub_group_2d_block_read_16b_8r16x1cPU3AS1viiiDv2_iPt", visibility_ = 0 : i64,
25
- // CHECK-SAME: will_return}
25
+ // CHECK-SAME: will_return} :
26
26
// CHECK-SAME: (!llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>,
27
27
// CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.writeonly}) -> ()
28
28
// CHECK: %[[VAR7:.*]] = llvm.load %[[VAR6]] : !llvm.ptr -> vector<8xi16>
@@ -33,7 +33,94 @@ llvm.func @blockload2d(%a: !llvm.ptr<1>, %base_width_a: i32, %base_height_a: i32
33
33
}
34
34
35
35
// -----
36
- // CHECK: llvm.func spir_funccc @_Z42intel_sub_group_2d_block_write_32b_8r16x1cPU3AS1viiiDv2_iPj(
36
+ // CHECK-LABEL: llvm.func spir_funccc @_Z41intel_sub_group_2d_block_read_16b_8r16x2cPU3AS1viiiDv2_iPt(
37
+ // CHECK-SAME: !llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>,
38
+ // CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.writeonly}) attributes {no_unwind, will_return}
39
+ // CHECK: llvm.func @blockload2d(%[[ARG0:.*]]: !llvm.ptr<1>,
40
+ // CHECK-SAME: %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32, %[[ARG3:.*]]: i32, %[[ARG4:.*]]: i32, %[[ARG5:.*]]: i32)
41
+ llvm.func @blockload2d (%a: !llvm.ptr <1 >, %base_width_a: i32 , %base_height_a: i32 , %base_pitch_a: i32 , %x: i32 , %y: i32 ) -> vector <16 xi16 > {
42
+ // CHECK: %[[VAR0:.*]] = llvm.mlir.undef : vector<2xi32>
43
+ // CHECK: %[[VAR1:.*]] = llvm.mlir.constant(0 : i32) : i32
44
+ // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32
45
+ // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32>
46
+ // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32>
47
+ // CHECK: %[[VAR5:.*]] = llvm.mlir.constant(16 : i32) : i32
48
+ // CHECK: %[[VAR6:.*]] = llvm.alloca %[[VAR5]] x i16 : (i32) -> !llvm.ptr
49
+ // CHECK: llvm.call spir_funccc @_Z41intel_sub_group_2d_block_read_16b_8r16x2cPU3AS1viiiDv2_iPt(
50
+ // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR6]])
51
+ // CHECK-SAME: {function_type = !llvm.func<void (ptr<1>, i32, i32, i32, vector<2xi32>, ptr)>,
52
+ // CHECK-SAME: linkage = #llvm.linkage<external>, no_unwind, sym_name =
53
+ // CHECK-SAME: "_Z41intel_sub_group_2d_block_read_16b_8r16x2cPU3AS1viiiDv2_iPt", visibility_ = 0 : i64,
54
+ // CHECK-SAME: will_return}
55
+ // CHECK-SAME: (!llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>,
56
+ // CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.writeonly}) -> ()
57
+ // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR6]] : !llvm.ptr -> vector<16xi16>
58
+ %loaded_a = xevm.blockload2d %a , %base_width_a , %base_height_a , %base_pitch_a , %x , %y
59
+ <{elem_size_in_bits =16 : i32 , tile_width =16 : i32 , tile_height =8 : i32 , v_blocks =2 : i32 , transpose =false ,
60
+ pack_register =false }> : (!llvm.ptr <1 >, i32 , i32 , i32 , i32 , i32 ) -> vector <16 xi16 >
61
+ llvm.return %loaded_a : vector <16 xi16 >
62
+ }
63
+
64
+ // -----
65
+ // CHECK-LABEL: llvm.func spir_funccc @_Z52intel_sub_group_2d_block_read_transform_16b_16r16x1cPU3AS1viiiDv2_iPj(
66
+ // CHECK-SAME: !llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>,
67
+ // CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.writeonly}) attributes {no_unwind, will_return}
68
+ // CHECK: llvm.func @blockload2d(%[[ARG0:.*]]: !llvm.ptr<1>,
69
+ // CHECK-SAME: %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32, %[[ARG3:.*]]: i32, %[[ARG4:.*]]: i32, %[[ARG5:.*]]: i32)
70
+ llvm.func @blockload2d (%a: !llvm.ptr <1 >, %base_width_a: i32 , %base_height_a: i32 , %base_pitch_a: i32 , %x: i32 , %y: i32 ) -> vector <8 xi32 > {
71
+ // CHECK: %[[VAR0:.*]] = llvm.mlir.undef : vector<2xi32>
72
+ // CHECK: %[[VAR1:.*]] = llvm.mlir.constant(0 : i32) : i32
73
+ // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32
74
+ // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32>
75
+ // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32>
76
+ // CHECK: %[[VAR5:.*]] = llvm.mlir.constant(8 : i32) : i32
77
+ // CHECK: %[[VAR6:.*]] = llvm.alloca %[[VAR5]] x i32 : (i32) -> !llvm.ptr
78
+ // CHECK: llvm.call spir_funccc @_Z52intel_sub_group_2d_block_read_transform_16b_16r16x1cPU3AS1viiiDv2_iPj(
79
+ // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR6]])
80
+ // CHECK-SAME: {function_type = !llvm.func<void (ptr<1>, i32, i32, i32, vector<2xi32>, ptr)>,
81
+ // CHECK-SAME: linkage = #llvm.linkage<external>, no_unwind, sym_name =
82
+ // CHECK-SAME: "_Z52intel_sub_group_2d_block_read_transform_16b_16r16x1cPU3AS1viiiDv2_iPj", visibility_ = 0 : i64,
83
+ // CHECK-SAME: will_return} :
84
+ // CHECK-SAME: (!llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>,
85
+ // CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.writeonly}) -> ()
86
+ // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR6]] : !llvm.ptr -> vector<8xi32>
87
+ %loaded_a = xevm.blockload2d %a , %base_width_a , %base_height_a , %base_pitch_a , %x , %y
88
+ <{elem_size_in_bits =16 : i32 , tile_width =16 : i32 , tile_height =16 : i32 , v_blocks =1 : i32 , transpose =false ,
89
+ pack_register =true }> : (!llvm.ptr <1 >, i32 , i32 , i32 , i32 , i32 ) -> vector <8 xi32 >
90
+ llvm.return %loaded_a : vector <8 xi32 >
91
+ }
92
+
93
+ // -----
94
+ // CHECK-LABEL: llvm.func spir_funccc @_Z51intel_sub_group_2d_block_read_transpose_32b_16r8x1cPU3AS1viiiDv2_iPj(
95
+ // CHECK-SAME: !llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>,
96
+ // CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.writeonly}) attributes {no_unwind, will_return}
97
+ // CHECK: llvm.func @blockload2d(%[[ARG0:.*]]: !llvm.ptr<1>,
98
+ // CHECK-SAME: %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32, %[[ARG3:.*]]: i32, %[[ARG4:.*]]: i32, %[[ARG5:.*]]: i32)
99
+ llvm.func @blockload2d (%a: !llvm.ptr <1 >, %base_width_a: i32 , %base_height_a: i32 , %base_pitch_a: i32 , %x: i32 , %y: i32 ) -> vector <8 xi32 > {
100
+ // CHECK: %[[VAR0:.*]] = llvm.mlir.undef : vector<2xi32>
101
+ // CHECK: %[[VAR1:.*]] = llvm.mlir.constant(0 : i32) : i32
102
+ // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32
103
+ // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32>
104
+ // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32>
105
+ // CHECK: %[[VAR5:.*]] = llvm.mlir.constant(8 : i32) : i32
106
+ // CHECK: %[[VAR6:.*]] = llvm.alloca %[[VAR5]] x i32 : (i32) -> !llvm.ptr
107
+ // CHECK: llvm.call spir_funccc @_Z51intel_sub_group_2d_block_read_transpose_32b_16r8x1cPU3AS1viiiDv2_iPj(
108
+ // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR6]])
109
+ // CHECK-SAME: {function_type = !llvm.func<void (ptr<1>, i32, i32, i32, vector<2xi32>, ptr)>,
110
+ // CHECK-SAME: linkage = #llvm.linkage<external>, no_unwind, sym_name =
111
+ // CHECK-SAME: "_Z51intel_sub_group_2d_block_read_transpose_32b_16r8x1cPU3AS1viiiDv2_iPj", visibility_ = 0 : i64,
112
+ // CHECK-SAME: will_return}
113
+ // CHECK-SAME: (!llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>,
114
+ // CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.writeonly}) -> ()
115
+ // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR6]] : !llvm.ptr -> vector<8xi32>
116
+ %loaded_a = xevm.blockload2d %a , %base_width_a , %base_height_a , %base_pitch_a , %x , %y
117
+ <{elem_size_in_bits =32 : i32 , tile_width =8 : i32 , tile_height =16 : i32 , v_blocks =1 : i32 , transpose =true ,
118
+ pack_register =false }> : (!llvm.ptr <1 >, i32 , i32 , i32 , i32 , i32 ) -> vector <8 xi32 >
119
+ llvm.return %loaded_a : vector <8 xi32 >
120
+ }
121
+
122
+ // -----
123
+ // CHECK-LABEL: llvm.func spir_funccc @_Z42intel_sub_group_2d_block_write_32b_8r16x1cPU3AS1viiiDv2_iPj(
37
124
// CHECK-SAME: !llvm.ptr<1> {llvm.nonnull, llvm.writeonly}, i32, i32, i32, vector<2xi32>,
38
125
// CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.readonly}) attributes {no_unwind, will_return}
39
126
// CHECK: llvm.func @blockstore2d(%[[ARG0:.*]]: !llvm.ptr<1>,
@@ -62,7 +149,7 @@ llvm.func @blockstore2d(%c: !llvm.ptr<1>, %base_width_c: i32, %base_height_c: i3
62
149
}
63
150
64
151
// -----
65
- // CHECK: llvm.func spir_funccc @_Z44intel_sub_group_2d_block_prefetch_8b_8r32x1cPU3AS1viiiDv2_i(
152
+ // CHECK-LABEL : llvm.func spir_funccc @_Z44intel_sub_group_2d_block_prefetch_8b_8r32x1cPU3AS1viiiDv2_i(
66
153
// CHECK-SAME: !llvm.ptr<1> {llvm.nonnull}, i32, i32, i32, vector<2xi32>) attributes
67
154
// CHECK-SAME: {memory_effects = #llvm.memory_effects<other = none, argMem = read, inaccessibleMem = none>, no_unwind}
68
155
// CHECK: llvm.func @blockprefetch2d(%[[ARG0:.*]]: !llvm.ptr<1>,
@@ -86,7 +173,7 @@ llvm.func @blockprefetch2d(%ptr: !llvm.ptr<1>, %base_width: i32, %base_height: i
86
173
}
87
174
88
175
// -----
89
- // CHECK: llvm.func spir_funccc @_Z38intel_sub_group_f16_f16_matrix_mad_k16Dv8_sDv8_iDv8_f(
176
+ // CHECK-LABEL : llvm.func spir_funccc @_Z38intel_sub_group_f16_f16_matrix_mad_k16Dv8_sDv8_iDv8_f(
90
177
// CHECK-SAME: vector<8xi16>, vector<8xi32>, vector<8xf32>) -> vector<8xf32> attributes
91
178
// CHECK-SAME: {convergent, memory_effects = #llvm.memory_effects<other = none, argMem = none,
92
179
// CHECK-SAME: inaccessibleMem = none>, no_unwind, will_return}
@@ -105,7 +192,7 @@ llvm.func @mma(%loaded_c_casted: vector<8xf32>, %loaded_a: vector<8xi16>, %loade
105
192
}
106
193
107
194
// -----
108
- // CHECK: llvm.func spir_funccc @_Z22atomic_work_item_fenceiii(i32, i32, i32) attributes {no_unwind}
195
+ // CHECK-LABEL : llvm.func spir_funccc @_Z22atomic_work_item_fenceiii(i32, i32, i32) attributes {no_unwind}
109
196
llvm.func @memfence () {
110
197
// CHECK: %[[VAR0:.*]] = llvm.mlir.constant(4 : i32) : i32
111
198
// CHECK: %[[VAR1:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -118,7 +205,7 @@ llvm.func @memfence() {
118
205
}
119
206
120
207
// -----
121
- // CHECK: llvm.func spir_funccc @_Z8prefetchPU3AS1Kcm(!llvm.ptr<1>, i64) attributes
208
+ // CHECK-LABEL : llvm.func spir_funccc @_Z8prefetchPU3AS1Kcm(!llvm.ptr<1>, i64) attributes
122
209
// CHECK-SAME: {memory_effects = #llvm.memory_effects<other = none, argMem = read, inaccessibleMem = none>, no_unwind}
123
210
// CHECK: llvm.func @prefetch(%[[ARG0:.*]]: !llvm.ptr<1>) {
124
211
llvm.func @prefetch (%ptr: !llvm.ptr <1 >) {
0 commit comments