Skip to content

Commit e0209aa

Browse files
chencha3GeorgeARM
authored andcommitted
[MLIR][XeGPU] refine verifier for TensorDescType (llvm#137226)
This PR updates the verifier of TensorDescType after the extension of LayoutAttr in llvm#132425.
1 parent 5403c31 commit e0209aa

File tree

6 files changed

+164
-61
lines changed

6 files changed

+164
-61
lines changed

mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,14 @@ class TensorDescType;
2525
} // namespace xegpu
2626
} // namespace mlir
2727

28-
#include <mlir/Dialect/XeGPU/IR/XeGPUDialect.h.inc>
2928
#include <mlir/Dialect/XeGPU/IR/XeGPUEnums.h.inc>
3029
#define GET_ATTRDEF_CLASSES
3130
#include <mlir/Dialect/XeGPU/IR/XeGPUAttrs.h.inc>
3231
#define GET_TYPEDEF_CLASSES
3332
#include <mlir/Dialect/XeGPU/IR/XeGPUTypes.h.inc>
33+
34+
#include <mlir/Dialect/XeGPU/IR/XeGPUDialect.h.inc>
35+
3436
#define GET_OP_CLASSES
3537
#include <mlir/Dialect/XeGPU/IR/XeGPU.h.inc>
3638

mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@ def XeGPU_Dialect : Dialect {
3636

3737
let useDefaultTypePrinterParser = true;
3838
let useDefaultAttributePrinterParser = true;
39+
40+
let extraClassDeclaration = [{
41+
/// Checks if the given shape can be evenly distributed based on the layout
42+
/// and data factors provided by the LayoutAttr.
43+
static bool isEvenlyDistributable(llvm::ArrayRef<int64_t> shape, xegpu::LayoutAttr attr);
44+
}];
3945
}
4046

4147
#endif // MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD

mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp

Lines changed: 82 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,15 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include "mlir/Dialect/Utils/IndexingUtils.h"
910
#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
1011
#include "mlir/IR/Builders.h"
1112
#include "mlir/IR/DialectImplementation.h"
1213
#include "llvm/ADT/TypeSwitch.h"
1314
#include <numeric>
1415

16+
using std::optional;
17+
1518
namespace mlir {
1619
namespace xegpu {
1720

@@ -30,6 +33,71 @@ void XeGPUDialect::initialize() {
3033
>();
3134
}
3235

36+
// Checks if the given shape can be evenly distributed based on the layout
37+
// and data factors provided by the LayoutAttr.
38+
bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef<int64_t> shape,
39+
xegpu::LayoutAttr attr) {
40+
assert(attr && "Layout attribute is missing.");
41+
42+
// Checks whether the given shape can be evenly distributed using the
43+
// specified layout and data attributes. If successful, it returns the work
44+
// size for each compute unit; otherwise, it returns `std::nullopt`. The work
45+
// size per compute unit is calculated as follows:
46+
// - If `data` is null: newShape[i] = shape[i] / layout[i]
47+
// - If `data` is not null: newShape[i] = data[i]
48+
// When round-robin distribution (`rr`) is enabled, `shape[i]` can be
49+
// smaller than `layout[i] * data[i]`, allowing multiple compute units to
50+
// share the data.
51+
auto tryDistribute = [&](llvm::ArrayRef<int64_t> shape,
52+
DenseI32ArrayAttr layout, DenseI32ArrayAttr data,
53+
bool rr = true) -> optional<SmallVector<int64_t>> {
54+
llvm::SmallVector<int64_t> newShape(shape);
55+
if (layout) {
56+
auto vec = llvm::to_vector_of<int64_t>(layout.asArrayRef());
57+
if (vec.size() != shape.size())
58+
return std::nullopt;
59+
auto ratio = computeShapeRatio(shape, vec);
60+
if (!ratio.has_value())
61+
return std::nullopt;
62+
newShape = ratio.value();
63+
}
64+
65+
if (data) {
66+
auto vec = llvm::to_vector_of<int64_t>(data.asArrayRef());
67+
if (vec.size() != shape.size())
68+
return std::nullopt;
69+
auto ratio = computeShapeRatio(newShape, vec);
70+
if (!ratio.has_value() && rr)
71+
ratio = computeShapeRatio(vec, newShape);
72+
if (!ratio.has_value())
73+
return std::nullopt;
74+
75+
// if data is not null, we always return it for next phase.
76+
newShape = vec;
77+
}
78+
return newShape;
79+
};
80+
81+
// check the sgLayout and sgData
82+
auto maybeSgShape =
83+
tryDistribute(shape, attr.getSgLayout(), attr.getSgData());
84+
if (!maybeSgShape)
85+
return false;
86+
auto sgShape = maybeSgShape.value();
87+
88+
// check InstData, it neither have layout nor need round-robin
89+
auto maybeInstShape =
90+
tryDistribute(sgShape, nullptr, attr.getInstData(), false);
91+
if (!maybeInstShape)
92+
return false;
93+
auto instShape = maybeInstShape.value();
94+
95+
// check LaneLayout and LaneData
96+
auto maybeLaneShape =
97+
tryDistribute(instShape, attr.getLaneLayout(), attr.getLaneData(), false);
98+
return maybeLaneShape.has_value();
99+
}
100+
33101
//===----------------------------------------------------------------------===//
34102
// XeGPU_BlockTensorDescAttr
35103
//===----------------------------------------------------------------------===//
@@ -241,7 +309,7 @@ LogicalResult TensorDescType::verify(
241309
llvm::ArrayRef<int64_t> shape, mlir::Type elementType,
242310
mlir::Attribute encoding, mlir::Attribute layout) {
243311
size_t rank = shape.size();
244-
// Low-pressure types are packed in 32-bit units.
312+
// Low-precision types are packed in 32-bit units.
245313
int32_t packingFactor = 32 / elementType.getIntOrFloatBitWidth();
246314
if (rank != 1 && rank != 2)
247315
return emitError() << "expected 1D or 2D tensor";
@@ -268,23 +336,21 @@ LogicalResult TensorDescType::verify(
268336
}
269337
}
270338

271-
if (auto blockAttr =
272-
mlir::dyn_cast_if_present<BlockTensorDescAttr>(encoding)) {
339+
auto blockAttr = mlir::dyn_cast_if_present<BlockTensorDescAttr>(encoding);
340+
if (blockAttr) {
273341
MemorySpaceAttr memorySpaceAttr = blockAttr.getMemorySpace();
274342
if (rank == 2 && memorySpaceAttr &&
275343
memorySpaceAttr.getValue() == MemorySpace::SLM)
276344
return emitError() << "SLM is not supported for 2D block tensor";
277345
}
278346

279-
if (auto layoutAttr = llvm::dyn_cast_if_present<LayoutAttr>(layout)) {
280-
347+
auto layoutAttr = llvm::dyn_cast_if_present<LayoutAttr>(layout);
348+
if (layoutAttr) {
281349
if (rank != (size_t)layoutAttr.getRank())
282350
return emitError() << "expected layout rank to match tensor rank";
283351

284-
ArrayRef<int32_t> laneLayout = layoutAttr.getLaneLayout().asArrayRef();
285-
ArrayRef<int32_t> laneData = layoutAttr.getLaneData().asArrayRef();
286-
287-
if (scatterAttr) {
352+
auto laneData = layoutAttr.getLaneData();
353+
if (scatterAttr && laneData) {
288354
// Validate subgroup mapping rules for scattered tensors.
289355
// A work-item's slice of the tensor with shape [sg_size] or
290356
// [sg_size, chunk_size] will be [1] or [1, 32/element_ty_bit_width]
@@ -294,20 +360,19 @@ LogicalResult TensorDescType::verify(
294360
if (rank > 1 && laneData[0] != 1)
295361
return emitError()
296362
<< "cannot map over non-contiguous scattered row elements";
297-
if (laneData.back() != packingFactor)
363+
if (laneData[rank - 1] != packingFactor)
298364
return emitError() << "work item data mapping must match the number of "
299365
"contiguous elements";
300366
}
301367

302-
for (size_t i = 0; i < shape.size(); ++i) {
303-
uint32_t numElemPerWi = laneLayout[i] * laneData[i];
304-
if (shape[i] < numElemPerWi || shape[i] % numElemPerWi != 0)
305-
return emitError() << "cannot distribute " << shape[i] << " over "
306-
<< laneLayout[i] << " work items with "
307-
<< laneData[i] << " elements each";
368+
if (!XeGPUDialect::isEvenlyDistributable(shape, layoutAttr)) {
369+
std::string shapeStr;
370+
llvm::raw_string_ostream stream(shapeStr);
371+
llvm::interleaveComma(shape, stream);
372+
return emitError() << "cannot distribute [" << shapeStr << "] using "
373+
<< layoutAttr;
308374
}
309375
}
310-
311376
return success();
312377
}
313378

mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp

Lines changed: 2 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -73,34 +73,6 @@ static bool isWriteHintOrNone(const CachePolicyAttr &attr) {
7373
kind == CachePolicy::WRITE_BACK || kind == CachePolicy::WRITE_THROUGH;
7474
}
7575

76-
// Checks if the given shape is evenly distributed based on the layout
77-
// and data factors provided by the LayoutAttr. The function ensures that
78-
// each dimension of the shape can be evenly divided by the corresponding
79-
// data factor, and the resulting quotient can be evenly divided by the
80-
// layout factor. Returns `true` if the shape is evenly distributed,
81-
// otherwise `false`.
82-
static bool isEvenDistributed(llvm::ArrayRef<int64_t> shape,
83-
xegpu::LayoutAttr attr) {
84-
assert(attr && "Layout attribute is missing.");
85-
llvm::SmallVector<int32_t> defaults(shape.size(), 1);
86-
llvm::ArrayRef<int32_t> layout, data;
87-
if (auto sg_layout = attr.getSgLayout()) {
88-
layout = sg_layout.asArrayRef();
89-
auto sg_data = attr.getSgData();
90-
data = sg_data ? sg_data.asArrayRef() : defaults;
91-
} else {
92-
layout = attr.getLaneLayout().asArrayRef();
93-
auto lane_data = attr.getLaneData();
94-
data = lane_data ? lane_data.asArrayRef() : defaults;
95-
}
96-
for (auto [dimSize, dataFactor, layoutFactor] :
97-
llvm::zip_equal(shape, data, layout)) {
98-
if (dimSize % dataFactor != 0 || (dimSize / dataFactor) % layoutFactor != 0)
99-
return false;
100-
}
101-
return true;
102-
}
103-
10476
static LogicalResult
10577
isValidGatherScatterParams(Type maskTy, VectorType valueTy,
10678
TensorDescType tdescTy, UnitAttr transposeAttr,
@@ -685,10 +657,10 @@ LogicalResult ConvertLayoutOp::verify() {
685657
"expected srcMap and resMap be WgLayout or SgLayout at the same time.");
686658

687659
auto shape = getSource().getType().getShape();
688-
if (!isEvenDistributed(shape, srcMap))
660+
if (!XeGPUDialect::isEvenlyDistributable(shape, srcMap))
689661
return emitOpError("invalid srcMap, data cannot be evenly distributed.");
690662

691-
if (!isEvenDistributed(shape, resMap))
663+
if (!XeGPUDialect::isEvenlyDistributable(shape, resMap))
692664
return emitOpError("invalid resMap, data cannot be evenly distributed.");
693665

694666
return mlir::success();

mlir/test/Dialect/XeGPU/invalid.mlir

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,27 @@ func.func @test_create_nd_tdesc_vc_4(%src: memref<2x24x32xf32, 3>) {
2929
return
3030
}
3131

32+
// -----
33+
func.func @test_create_nd_tdesc_subgroup_1(%src: memref<128x128xf32>) {
34+
// expected-error@+1 {{cannot distribute [128, 128] using #xegpu.layout<sg_layout = [4, 2], sg_data = [24, 48]>}}
35+
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<128x128xf32> -> !xegpu.tensor_desc<128x128xf32, #xegpu.layout<sg_layout = [4, 2], sg_data = [24, 48]>>
36+
return
37+
}
38+
39+
// -----
40+
func.func @test_create_nd_tdesc_subgroup_1(%src: memref<128x128xf32>) {
41+
// expected-error@+1 {{cannot distribute [128, 128] using #xegpu.layout<sg_layout = [4, 2], sg_data = [32, 64], inst_data = [24, 48]>}}
42+
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<128x128xf32> -> !xegpu.tensor_desc<128x128xf32, #xegpu.layout<sg_layout = [4, 2], sg_data = [32, 64], inst_data = [24, 48]>>
43+
return
44+
}
45+
46+
// -----
47+
func.func @test_create_nd_tdesc_subgroup_1(%src: memref<128x128xf32>) {
48+
// expected-error@+1 {{cannot distribute [128, 128] using #xegpu.layout<sg_layout = [4, 2], sg_data = [32, 64], inst_data = [64, 32]>}}
49+
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<128x128xf32> -> !xegpu.tensor_desc<128x128xf32, #xegpu.layout<sg_layout = [4, 2], sg_data = [32, 64], inst_data = [64, 32]>>
50+
return
51+
}
52+
3253
// -----
3354
func.func @test_prefetch_nd_vc_1(%src: memref<24x32xf16>) {
3455
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16>
@@ -77,6 +98,17 @@ func.func @test_load_nd_vc_3(%src: memref<8x16xf16>) {
7798
return
7899
}
79100

101+
// -----
102+
func.func @test_load_nd_vc_4(%src: memref<24x32xf32>) {
103+
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> ->
104+
!xegpu.tensor_desc<8x16xf32>
105+
// expected-error@+1 {{Result shape [8, 1] is not consistent with tensor descriptor}}
106+
%2 = xegpu.load_nd %1 <{l1_hint = #xegpu.cache_hint<cached>,
107+
l2_hint = #xegpu.cache_hint<uncached>}>
108+
: !xegpu.tensor_desc<8x16xf32> -> vector<8x1xf32>
109+
return
110+
}
111+
80112
// -----
81113
func.func @test_load_nd_layout(%src: memref<24x32xf32>) {
82114
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<16xf32>
@@ -87,13 +119,10 @@ func.func @test_load_nd_layout(%src: memref<24x32xf32>) {
87119
}
88120

89121
// -----
90-
func.func @test_load_nd_vc_6(%src: memref<24x32xf32>) {
91-
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> ->
92-
!xegpu.tensor_desc<8x16xf32>
93-
// expected-error@+1 {{Result shape [8, 1] is not consistent with tensor descriptor}}
94-
%2 = xegpu.load_nd %1 <{l1_hint = #xegpu.cache_hint<cached>,
95-
l2_hint = #xegpu.cache_hint<uncached>}>
96-
: !xegpu.tensor_desc<8x16xf32> -> vector<8x1xf32>
122+
func.func @test_load_nd_simt(%src: memref<24x32xf32>) {
123+
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
124+
// expected-error@+1 {{TensorDesc doesn't need LayoutAttr for SIMT code}}
125+
%2 = xegpu.load_nd %1 : !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8xf32>
97126
return
98127
}
99128

@@ -135,6 +164,14 @@ func.func @test_store_nd_simt(%dst: memref<24x32xf32>, %data: vector<3xf32>) {
135164
return
136165
}
137166

167+
// -----
168+
func.func @test_store_nd_simt(%src: memref<24x32xf32>, %data: vector<8xf32>) {
169+
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
170+
// expected-error@+1 {{TensorDesc doesn't need LayoutAttr for SIMT code}}
171+
xegpu.store_nd %data, %1 : vector<8xf32>, !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
172+
return
173+
}
174+
138175
// -----
139176
func.func @test_store_nd_vc_5(%dst: memref<24x32xf32>, %data: vector<8x1xf32>) {
140177
%1 = xegpu.create_nd_tdesc %dst[0, 0] : memref<24x32xf32> ->
@@ -404,31 +441,31 @@ func.func @tensor_desc_1D_invalid_map_data(%src: memref<24x32xf32>) {
404441
// -----
405442
func.func @tensor_desc_invalid_map_layout(%src: memref<24x32xf32>) {
406443
%0 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> ->
407-
// expected-error@+1 {{cannot distribute 8 over 16 work items with 1 elements each}}
444+
// expected-error@+1 {{cannot distribute [4, 8] using #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}}
408445
!xegpu.tensor_desc<4x8xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
409446
return
410447
}
411448

412449
// -----
413450
func.func @tensor_desc_invalid_map_layout_1(%src: memref<24x32xf32>) {
414451
%0 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> ->
415-
// expected-error@+1 {{cannot distribute 4 over 8 work items with 1 elements each}}
452+
// expected-error@+1 {{cannot distribute [4, 8] using #xegpu.layout<lane_layout = [8, 2], lane_data = [1, 1]>}}
416453
!xegpu.tensor_desc<4x8xf32, #xegpu.layout<lane_layout = [8, 2], lane_data = [1, 1]>>
417454
return
418455
}
419456

420457
// -----
421458
func.func @tensor_desc_invalid_map_data(%src: memref<24x32xf32>) {
422459
%0 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> ->
423-
// expected-error@+1 {{cannot distribute 4 over 2 work items with 4 elements each}}
460+
// expected-error@+1 {{cannot distribute [4, 8] using #xegpu.layout<lane_layout = [2, 8], lane_data = [4, 1]>}}
424461
!xegpu.tensor_desc<4x8xf32, #xegpu.layout<lane_layout = [2, 8], lane_data = [4, 1]>>
425462
return
426463
}
427464

428465
// -----
429466
func.func @tensor_desc_invalid_map_data_1(%src: memref<24x32xf32>) {
430467
%0 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> ->
431-
// expected-error@+1 {{cannot distribute 4 over 8 work items with 1 elements each}}
468+
// expected-error@+1 {{cannot distribute [4, 8] using #xegpu.layout<lane_layout = [8, 2], lane_data = [1, 2]>}}
432469
!xegpu.tensor_desc<4x8xf32, #xegpu.layout<lane_layout = [8, 2], lane_data = [1, 2]>>
433470
return
434471
}

mlir/test/Dialect/XeGPU/ops.mlir

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,27 @@ gpu.func @test_create_nd_tdesc_simt_6(%src: memref<24x32xf32>) {
9595
gpu.return
9696
}
9797

98+
// CHECK: gpu.func @test_create_nd_tdesc_subgroup_1(%[[arg0:.*]]: memref<128x128xf32>) {
99+
gpu.func @test_create_nd_tdesc_subgroup_1(%src: memref<128x128xf32>) {
100+
// CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<128x128xf32> -> !xegpu.tensor_desc<128x128xf32, #xegpu.layout<sg_layout = [4, 2], sg_data = [32, 64]>>
101+
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<128x128xf32> -> !xegpu.tensor_desc<128x128xf32, #xegpu.layout<sg_layout = [4, 2], sg_data = [32, 64]>>
102+
gpu.return
103+
}
104+
105+
// CHECK: gpu.func @test_create_nd_tdesc_subgroup_2(%[[arg0:.*]]: memref<128x128xf32>) {
106+
gpu.func @test_create_nd_tdesc_subgroup_2(%src: memref<128x128xf32>) {
107+
// CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<128x128xf32> -> !xegpu.tensor_desc<128x128xf32, #xegpu.layout<sg_layout = [4, 2], sg_data = [32, 64], inst_data = [8, 16]>>
108+
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<128x128xf32> -> !xegpu.tensor_desc<128x128xf32, #xegpu.layout<sg_layout = [4, 2], sg_data = [32, 64], inst_data = [8, 16]>>
109+
gpu.return
110+
}
111+
112+
// CHECK: gpu.func @test_create_nd_tdesc_subgroup_3(%[[arg0:.*]]: memref<128x128xf32>) {
113+
gpu.func @test_create_nd_tdesc_subgroup_3(%src: memref<128x128xf32>) {
114+
// CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<128x128xf32> -> !xegpu.tensor_desc<128x128xf32, #xegpu.layout<sg_layout = [4, 2], sg_data = [32, 64], inst_data = [8, 16], lane_layout = [1, 16], lane_data = [1, 1]>>
115+
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<128x128xf32> -> !xegpu.tensor_desc<128x128xf32, #xegpu.layout<sg_layout = [4, 2], sg_data = [32, 64], inst_data = [8, 16], lane_layout = [1, 16], lane_data = [1, 1]>>
116+
gpu.return
117+
}
118+
98119
// CHECK: gpu.func @test_prefetch_nd_vc(%[[arg0:.*]]: memref<24x32xf16>) {
99120
gpu.func @test_prefetch_nd_vc(%src: memref<24x32xf16>) {
100121
// CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16>
@@ -127,8 +148,8 @@ gpu.func @test_load_nd_vc(%src: memref<8x16xf16>) {
127148
gpu.func @test_load_nd_simt(%src: memref<8x16xf16>) {
128149
// CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
129150
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
130-
// CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>, packed}> : !xegpu.tensor_desc<8x16xf16> -> vector<8xf16>
131-
%2 = xegpu.load_nd %1 <{packed, l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
151+
// CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<8x16xf16> -> vector<8xf16>
152+
%2 = xegpu.load_nd %1 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
132153
: !xegpu.tensor_desc<8x16xf16> -> vector<8xf16>
133154
gpu.return
134155
}

0 commit comments

Comments
 (0)