Modify the uArch definition.

mshahneo · mshahneo · commit 97898353bb65 · 2025-07-02T00:56:36.000Z
This version focuses on the utilities to be the pivot.
It also saves info directly in C++ files as part of get functions.
Don't use the yamls anymore.

Adds support for DPAS instruction.
diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/IntelGpuPVC.h b/mlir/include/mlir/Dialect/XeGPU/Utils/IntelGpuPVC.h
@@ -37,17 +37,25 @@ struct Xe2Plus : public uArch {
 };
 
 // struct to represent DPAS instruction
-struct DPASInstruction : public Instruction {
-  Range systolic_depth;
-  Range repreat_count;
-  Range execution_size;
-  std::map<std::string, uint> ops_per_channel;
-  std::vector<std::vector<std::string>> supported_types;
-  std::map<std::string, std::map<std::string, std::vector<std::string>>>
-      matrix_size;
+struct DPASInstruction : public Instruction, public MatrixOpInterface {
+  // Range systolic_depth;
+  // Range repreat_count;
+  // Range execution_size;
+  // std::map<std::string, uint> ops_per_channel;
+  // std::vector<std::vector<std::string>> supported_types;
+  // std::map<std::string, std::map<std::string, std::vector<std::string>>>
+  //     matrix_size;
 
-  bool checkSupportedDPASTypes(mlir::Type dstType, mlir::Type src0Type,
-                               mlir::Type src1Type, mlir::Type src2Type);
+  // bool checkSupportedDPASTypes(mlir::Type dstType, mlir::Type src0Type,
+  //                              mlir::Type src1Type, mlir::Type src2Type);
+  virtual bool checkSupportedMMATypes(mlir::Type AType, mlir::Type BType,
+                                      mlir::Type CType,
+                                      mlir::Type DType) override;
+  virtual std::vector<uint> getSupportedM(mlir::Type type) override;
+  virtual std::vector<uint> getSupportedK(mlir::Type type) override;
+  virtual std::vector<uint> getSupportedN(mlir::Type type) override;
+  virtual std::vector<std::pair<unsigned, unsigned>>
+  getSupportedMatrix(mlir::Type type, MatrixType matrixType) override;
 };
 
 struct LoadStore2DTileInfo : public RangeTile {
diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/uArch.h b/mlir/include/mlir/Dialect/XeGPU/Utils/uArch.h
@@ -277,6 +277,35 @@ struct SharedMemory {
 //     uint num_matrix_units;
 //     SharedMemory shared_memory;
 // };
+
+// Create a TileLikeOp Interface
+struct TileOpInterface {
+  // Get the supported tiles for the specific data type.
+  // Can provide load/store/prefetch ops supported tile sizes for a specific
+  // uarch
+  virtual DiscreteTile getSupportedTiles(mlir::Type type) = 0;
+
+  // Validate the tile ops restrictions
+  // @param tile, tile to load/store/prefetch
+  // @param surface, surface to load/store/prefetch data from
+  // @param dataType, data type of the data
+  // @param surface_pitch, suface pitch
+  // @param array_len, array length
+  virtual bool validate(Tile tile, Tile surface, mlir::Type dataType,
+                        uint surface_pitch, uint array_len = 1) = 0;
+};
+
+enum class MatrixType { MatrixA, MatrixB, MatrixC, MatrixD };
+struct MatrixOpInterface {
+  virtual bool checkSupportedMMATypes(mlir::Type AType, mlir::Type BType,
+                                      mlir::Type CType, mlir::Type DType) = 0;
+  virtual std::vector<uint> getSupportedM(mlir::Type type) = 0;
+  virtual std::vector<uint> getSupportedK(mlir::Type type) = 0;
+  virtual std::vector<uint> getSupportedN(mlir::Type type) = 0;
+  virtual std::vector<std::pair<unsigned, unsigned>>
+  getSupportedMatrix(mlir::Type type, MatrixType matrixType) = 0;
+};
+
 } // namespace uArch
 } // namespace xegpu
 } // namespace mlir
diff --git a/mlir/lib/Dialect/XeGPU/Utils/IntelGpuPVC.cpp b/mlir/lib/Dialect/XeGPU/Utils/IntelGpuPVC.cpp
@@ -7,26 +7,160 @@
 using namespace mlir::xegpu::uArch;
 using namespace mlir::xegpu::uArch::PVCuArch;
 
-namespace llvm {
-namespace yaml {
-template <>
-struct MappingTraits<XeCoreInfo> {
-  static void mapping(IO &io, XeCoreInfo &xe_core) {
-    io.mapRequired("num_threads", xe_core.num_threads);
-    io.mapRequired("shared_memory", xe_core.shared_memory);
-    io.mapRequired("num_vector_units", xe_core.num_vector_units);
-    io.mapRequired("num_matrix_units", xe_core.num_matrix_units);
+namespace mlir {
+namespace xegpu {
+namespace uArch {
+namespace PVCuArch {
+bool DPASInstruction::checkSupportedMMATypes(mlir::Type AType, mlir::Type BType,
+                                             mlir::Type CType,
+                                             mlir::Type DType) {
+  if (AType.isF16() || BType.isF16()) {
+    if (AType != BType || (CType && (!CType.isF32() && !CType.isF16())) ||
+        (!DType.isF32() && !DType.isF16()))
+      llvm::errs()
+          << "Unsupported dpas combinations of Dst, Acc, A and B matrices, "
+          << "Supported types are:\n"
+          << "  Dst    |   Acc   |   A   |  B  \n"
+          << " f, hf   |  f, hf  |   hf  |  hf \n"
+          << "AType: " << AType << " BType: " << BType << " CType: " << CType
+          << " DType: " << DType;
+    return false;
+  } else if (AType.isBF16() || BType.isBF16()) {
+    if (AType != BType || (CType && (!CType.isF32() && !CType.isBF16())) ||
+        (!DType.isF32() && !DType.isBF16()))
+      llvm::errs()
+          << "Unsupported dpas combinations of Dst, Acc, A and B matrices, "
+          << "Supported types are:\n"
+          << "  Dst    |   Acc   |   A   |  B  \n"
+          << " f, bf   |  f, bf  |   bf  |  bf \n"
+          << "AType: " << AType << " BType: " << BType << " CType: " << CType
+          << " DType: " << DType;
+    return false;
+  } else if (AType.isTF32() || BType.isTF32()) {
+    if (AType != BType || (CType && (!CType.isF32() && !DType.isF32())) ||
+        (!DType.isF32()))
+      llvm::errs()
+          << "Unsupported dpas combinations of Dst, Acc, A and B matrices, "
+          << "Supported types are:\n"
+          << "  Dst    |   Acc   |   A    |   B  \n"
+          << "   f     |    f    |  tf32  |  tf32 \n"
+          << "AType: " << AType << " BType: " << BType << " CType: " << CType
+          << " DType: " << DType;
+    return false;
+  } else if (!(AType.isInteger(2) || AType.isInteger(4) ||
+               AType.isInteger(8)) &&
+             !(BType.isInteger(2) || BType.isInteger(4) ||
+               BType.isInteger(8))) {
+    llvm::errs()
+        << "Unsupported dpas combinations of Dst, Acc, A and B matrices, "
+        << "Supported types are:\n"
+        << "  Dst     |   Acc    |         A           |         B          "
+           " \n"
+        << " ud, d    |  ud,d    |  ub,b,u4,s4,u2,s2   |  ub,b,u4,s4,u2,s2  "
+        << "AType: " << AType << " BType: " << BType << " CType: " << CType
+        << " DType: " << DType;
+    return false;
   }
-};
 
-template <>
-struct MappingTraits<Xe2Plus> {
-  static void mapping(IO &io, Xe2Plus &xe2plus) {
-    io.mapRequired("xe_core", xe2plus.xe_core);
+  return true;
+}
+
+std::vector<uint> DPASInstruction::getSupportedM(mlir::Type type) {
+  return {1, 2, 3, 4, 5, 6, 7, 8};
+}
+
+std::vector<uint> DPASInstruction::getSupportedK(mlir::Type type) {
+  // assert if data type is not int or float type
+  assert(type.isIntOrFloat() && "Matrix type must be int or float");
+  auto bitWidth = type.getIntOrFloatBitWidth();
+  uint kSize = -1;
+  switch (bitWidth) {
+  case 2:
+    kSize = 64;
+    break;
+  case 4:
+    kSize = 64;
+    break;
+  case 8:
+    kSize = 32;
+    break;
+  case 16:
+    kSize = 16;
+    break;
+  case 32:
+    kSize = 8;
+    break;
+  default:
+    llvm_unreachable("Invalid int or float");
+  }
+}
+
+std::vector<uint> DPASInstruction::getSupportedN(mlir::Type type) {
+  return {16};
+}
+
+std::vector<std::pair<unsigned, unsigned>>
+DPASInstruction::getSupportedMatrix(mlir::Type type, MatrixType matrixType) {
+  auto combineVectors = [](const std::vector<unsigned> &a,
+                           const std::vector<unsigned> &b)
+      -> std::vector<std::pair<unsigned, unsigned>> {
+    std::vector<std::pair<unsigned, unsigned>> result;
+    for (unsigned x : a) {
+      for (unsigned y : b) {
+        result.emplace_back(x, y);
+      }
+    }
+    return result;
+  };
+
+  auto M = getSupportedM(type);
+  auto K = getSupportedK(type);
+  auto N = getSupportedN(type);
+  std::vector<std::pair<unsigned, unsigned>> resultMatrix;
+
+  switch (matrixType) {
+  case MatrixType::MatrixA:
+    resultMatrix = combineVectors(M, K);
+    break;
+  case MatrixType::MatrixB:
+    resultMatrix = combineVectors(K, N);
+    break;
+  case MatrixType::MatrixC:
+    resultMatrix = combineVectors(M, N);
+    break;
+  case MatrixType::MatrixD:
+    resultMatrix = combineVectors(M, N);
+    break;
+  default:
+    break;
   }
-};
-} // namespace yaml
-} // namespace llvm
+}
+
+} // namespace PVCuArch
+} // namespace uArch
+} // namespace xegpu
+} // namespace mlir
+
+// namespace llvm {
+// namespace yaml {
+// template <>
+// struct MappingTraits<XeCoreInfo> {
+//   static void mapping(IO &io, XeCoreInfo &xe_core) {
+//     io.mapRequired("num_threads", xe_core.num_threads);
+//     io.mapRequired("shared_memory", xe_core.shared_memory);
+//     io.mapRequired("num_vector_units", xe_core.num_vector_units);
+//     io.mapRequired("num_matrix_units", xe_core.num_matrix_units);
+//   }
+// };
+
+// template <>
+// struct MappingTraits<Xe2Plus> {
+//   static void mapping(IO &io, Xe2Plus &xe2plus) {
+//     io.mapRequired("xe_core", xe2plus.xe_core);
+//   }
+// };
+// } // namespace yaml
+// } // namespace llvm
 
 // namespace mlir {
 // namespace xe_gpu {