|
| 1 | +//===--- uArch.h ---------------------------------------*- C++ -*-===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | +// |
| 9 | +/// \file |
| 10 | +/// Base uArch definition for different architectures. |
| 11 | +/// |
| 12 | +// |
| 13 | +//===----------------------------------------------------------------------===// |
| 14 | +#ifndef MLIR_DIALECT_XEGPU_UTILS_UARCH_H |
| 15 | +#define MLIR_DIALECT_XEGPU_UTILS_UARCH_H |
| 16 | + |
| 17 | +#include <functional> |
| 18 | +#include <iostream> |
| 19 | +#include <tuple> |
| 20 | +namespace mlir { |
| 21 | +namespace xegpu { |
| 22 | +namespace uArch { |
| 23 | + |
| 24 | +// Data types we need for YAML to uArch translation |
| 25 | +struct Range { |
| 26 | + int start; |
| 27 | + int end; |
| 28 | +}; |
| 29 | + |
| 30 | +// Tile can be multi-dimensional |
| 31 | +// For example, a 2D tile can be represented as: |
| 32 | +// Tile: |
| 33 | +// no_of_dims: 2 |
| 34 | +// dim: [2, 2] |
| 35 | +// This represents a 2x2 tile |
| 36 | +struct Tile { |
| 37 | + uint no_of_dims; |
| 38 | + std::vector<uint> dims; |
| 39 | +}; |
| 40 | + |
| 41 | +// RangeTile represents a range of tiles instead of a single tile |
| 42 | +// RangeTile essentially provides a way represent the supported range of values |
| 43 | +// in each dimension For each dimension, the range of values is represented as a |
| 44 | +// Range For example, a 2D RangeTile can be represented as: RangeTile: |
| 45 | +// no_of_dims: 2 |
| 46 | +// dims: |
| 47 | +// - [1, 32] |
| 48 | +// - [2, 16] |
| 49 | +// This represents a 2x2 RangeTile where the first dimension can have values |
| 50 | +// from 1 to 32 and the second dimension can have values from 2 to 16 |
| 51 | +struct RangeTile { |
| 52 | + uint no_of_dims; |
| 53 | + std::vector<Range> dims; |
| 54 | +}; |
| 55 | + |
| 56 | +// DiscreteTile represents a set of tiles instead of a single tile |
| 57 | +// DiscreteTile essentially provides a way represent the supported set of values |
| 58 | +// in each dimension For each dimension, the set of values is represented as a |
| 59 | +// vector of integers For example, a 2D DiscreteTile can be represented as: |
| 60 | +// DiscreteTile: |
| 61 | +// no_of_dims: 2 |
| 62 | +// dims: |
| 63 | +// - [1, 2, 4, 8, 16, 32] |
| 64 | +// - [2, 4, 8, 16] |
| 65 | +// This represents a 2x2 DiscreteTile where the first dimension can have values |
| 66 | +// 1, 2, 4, 8, 16, 32 and the second dimension can have values 2, 4, 8, 16 |
| 67 | +struct DiscreteTile { |
| 68 | + uint no_of_dims; |
| 69 | + std::vector<std::vector<uint>> dims; |
| 70 | +}; |
| 71 | + |
| 72 | +// Restriction struct |
| 73 | +// This struct is used to represent a restriction on the uArch |
| 74 | +// The restriction is represented as a range of necessary parameters (template |
| 75 | +// arguments) and a lambda function (validate()) that takes the same number of |
| 76 | +// arguments as the number of template arguments The lambda function returns |
| 77 | +// true if the arguments satisfy the restriction The lambda function returns |
| 78 | +// false if the arguments do not satisfy the restriction |
| 79 | + |
| 80 | +// For example, a restriction that checks if the number of dimensions in a |
| 81 | +// RangeTile is 2 can be represented as: RangeTile rt = {2, {{1, 32}, {2, 16}}}; |
| 82 | +// Restriction<RangeTile> r1(rt, [](RangeTile t) { return t.no_of_dims == 2; }); |
| 83 | +// r1.validate() will return true if the number of dimensions in the RangeTile |
| 84 | +// is 2 r1.validate() will return false if the number of dimensions in the |
| 85 | +// RangeTile is not 2 |
| 86 | + |
| 87 | +// The primary purpose of Restriction struct is to provide a generic way to |
| 88 | +// represent restrictions on the uArch and to validate if the uArch satisfies |
| 89 | +// the restrictions |
| 90 | +template <typename... Args> |
| 91 | +struct Restriction { |
| 92 | + std::tuple<Args...> data; |
| 93 | + std::function<void(Args...)> func; |
| 94 | + |
| 95 | + Restriction(Args... args, std::function<void(Args...)> f) |
| 96 | + : data(args...), func(f) {} |
| 97 | + |
| 98 | + bool validate() { return std::apply(func, data); } |
| 99 | + std::any apply() { return std::apply(func, data); } |
| 100 | +}; |
| 101 | + |
| 102 | +// An enum class to represent the functional unit of an instruction |
| 103 | +enum class FunctionalUnit { |
| 104 | + ALU, |
| 105 | + Tensor, |
| 106 | + Matrix, |
| 107 | + Load, |
| 108 | + Store, |
| 109 | + Branch, |
| 110 | + Barrier, |
| 111 | + Memory, |
| 112 | + Atomic, |
| 113 | + Interconnect, |
| 114 | + Other |
| 115 | +}; |
| 116 | + |
| 117 | +// An enum class to represent the type of memory |
| 118 | +enum class MemoryType { Shared, Local, Global, Constant, Texture, Other }; |
| 119 | + |
| 120 | +// An enum class to represent the memory access type |
| 121 | +enum class MemoryAccessType { Read, Write, ReadWrite, Other }; |
| 122 | + |
| 123 | +// An enum class to represent the type of an instruction |
| 124 | +enum class InstructionType { SIMT, SIMD, SPMD, MIMD, Other }; |
| 125 | + |
| 126 | +// An enum class to represent the scope of an instruction |
| 127 | +enum class InstructionScope { |
| 128 | + WorkItem, |
| 129 | + Subgroup, |
| 130 | + Workgroup, |
| 131 | + Cluster, |
| 132 | + Thread, // For CPU |
| 133 | + Core, // For CPU |
| 134 | + Other |
| 135 | +}; |
| 136 | + |
| 137 | +// An enum class to represent the unit of computation of an instruction |
| 138 | +enum class UnitOfComputation { |
| 139 | + Scalar, |
| 140 | + Vector, // 1-D vector |
| 141 | + Matrix, |
| 142 | + Tile, |
| 143 | + Other |
| 144 | +}; |
| 145 | + |
| 146 | +// A struct to represent basic information about an instruction |
| 147 | +// This struct is used to represent the information about an instruction in the |
| 148 | +// uArch The information includes: |
| 149 | +// - the name of the instruction, |
| 150 | +// - the opcode, |
| 151 | +// - the functional unit, |
| 152 | +// - the type of the instruction, |
| 153 | +// - the scope of the instruction, |
| 154 | +// - the unit of computation, |
| 155 | +// - the description of the instruction |
| 156 | +// The information is represented as strings |
| 157 | +// For example, the information about an instruction can be represented as: |
| 158 | +// Instruction info = {"dpas", "0x83", "matrix", "simd", "subgroup", "tile", |
| 159 | +// "Dot Product Accumulate Systolic (DPAS) is a matrix multiply-add |
| 160 | +// operation"}; |
| 161 | + |
| 162 | +// The primary purpose of Instruction struct is to provide a generic way to |
| 163 | +// represent information about an instruction and to use this information to |
| 164 | +// generate the uArch. Specifc instruction in a uArch can inherit from this |
| 165 | +// struct and add more fields as needed |
| 166 | + |
| 167 | +struct Instruction { |
| 168 | + std::string name; |
| 169 | + std::string description; |
| 170 | + std::string opcode; |
| 171 | + FunctionalUnit functional_unit; |
| 172 | + InstructionType type; |
| 173 | + InstructionScope scope; |
| 174 | + UnitOfComputation unit_of_computation; |
| 175 | + |
| 176 | + // @TODO: Add more fields as needed |
| 177 | + // std::string latency; |
| 178 | + // std::string throughput; |
| 179 | + // std::string pipeline; |
| 180 | + // std::string resource; |
| 181 | + // std::string comment; |
| 182 | +}; |
| 183 | + |
| 184 | +// A struct to represent register file information |
| 185 | +struct RegisterFileInfo { |
| 186 | + uint size; // size per register in bits |
| 187 | + std::vector<std::string> mode; // e.g., "small", "large" GRF modes |
| 188 | + std::vector<uint> |
| 189 | + num_regs_per_thread_per_mode; // number of registers per thread per mode |
| 190 | + uint num_banks; |
| 191 | + uint bank_size; |
| 192 | +}; |
| 193 | + |
| 194 | +// A struct to represent cache information |
| 195 | +struct CacheInfo { |
| 196 | + uint size; |
| 197 | + uint associativity; |
| 198 | + uint line_size; |
| 199 | + uint num_banks; |
| 200 | + uint bank_size; |
| 201 | + uint num_ports; |
| 202 | + uint port_width; |
| 203 | + uint bank_conflicts; |
| 204 | +}; |
| 205 | + |
| 206 | +// A struct to represent the uArch |
| 207 | +// This struct is used to represent the microarchitecture of a target device |
| 208 | +// The uArch includes: |
| 209 | +// - the name of the uArch, |
| 210 | +// - the description of the uArch, |
| 211 | +// - the range of tiles supported by the uArch, |
| 212 | +// - the set of tiles supported by the uArch, |
| 213 | +// - the set of instructions supported by the uArch, |
| 214 | +// - the set of restrictions on the uArch |
| 215 | +// The information is represented as strings, RangeTile, DiscreteTile, |
| 216 | +// Instruction and Restriction structs For example, the information about a |
| 217 | +// uArch can be represented as: uArch uarch = {"XeHPG", "Intel Xe HPG |
| 218 | +// microarchitecture", {2, {{1, 32}, {1, 32}}}, {2, {{1, 2, 4, 8, 16, 32}, {1, |
| 219 | +// 2, 4, 8, 16, 32}}}, {{"dpas", "0x83", "matrix", "simd", "subgroup", "tile", |
| 220 | +// "Dot Product Accumulate Systolic (DPAS) is a matrix multiply-add |
| 221 | +// operation"}}, {r1, r2, r3}}; This represents a uArch named "XeHPG" with |
| 222 | +// description "Intel Xe HPG microarchitecture" that supports 2x2 tiles with |
| 223 | +// dimensions ranging from 1 to 32, 1 to 32, supports a DPAS instruction and has |
| 224 | +// 3 restrictions r1, r2, r3 on the uArch |
| 225 | +struct uArch { |
| 226 | + std::string name; // similar to target triple |
| 227 | + std::string description; |
| 228 | + // Different kind of regiger file information (e.g., GRF, ARF, etc.) |
| 229 | + std::vector<RegisterFileInfo> register_file_info; |
| 230 | + // Each level of cache is indexed lower to higher in the vector |
| 231 | + // (e.g., L1 indexed at 0, L2 at 1 and so on) L1, L2, L3, etc. |
| 232 | + std::vector<CacheInfo> cache_info; |
| 233 | + std::vector<Instruction *> instructions; |
| 234 | + std::vector<Restriction<> *> restrictions; |
| 235 | +}; |
| 236 | + |
| 237 | +// A struct to represent shared memory information |
| 238 | +struct SharedMemory { |
| 239 | + uint size; |
| 240 | + uint alignment; |
| 241 | + // @TODO: Add more fields as needed |
| 242 | + // uint latency; |
| 243 | + // uint throughput; |
| 244 | + // uint bandwidth; |
| 245 | + // uint num_ports; |
| 246 | + // uint port_width; |
| 247 | + // uint bank_size; |
| 248 | + // uint bank_conflicts; |
| 249 | + // uint num_banks; |
| 250 | +}; |
| 251 | + |
| 252 | +// For future use case in Xe4+ |
| 253 | + |
| 254 | +// struct EUInfo { |
| 255 | +// uint num_eu_threads; |
| 256 | +// SharedMemory shared_memory; |
| 257 | +// }; |
| 258 | + |
| 259 | +// uint num_simd_units; |
| 260 | +// uint num_spus; |
| 261 | +// uint num_smt; |
| 262 | +// uint num_hardware_threads; |
| 263 | +// uint num_threads_per_spu; |
| 264 | +// uint num_threads_per_simd_unit; |
| 265 | +// uint num_threads_per_hardware_thread; |
| 266 | +// uint num_threads_per_smt; |
| 267 | +// SharedMemory shared_memory; |
| 268 | +// }; |
| 269 | + |
| 270 | +// A struct to represent a GPU uArch |
| 271 | +// This struct is used to represent the GPU microarchitecture of a target device |
| 272 | +// struct GPUuArch : public uArch { |
| 273 | +// uint num_compute_units; |
| 274 | +// uint num_vector_units; |
| 275 | +// uint num_scalar_units; |
| 276 | +// uint num_tensor_units; |
| 277 | +// uint num_matrix_units; |
| 278 | +// SharedMemory shared_memory; |
| 279 | +// }; |
| 280 | +} // namespace uArch |
| 281 | +} // namespace xegpu |
| 282 | +} // namespace mlir |
| 283 | + |
| 284 | +#endif // MLIR_DIALECT_XEGPU_UTILS_UARCH_H |
| 285 | +//===--- uArch.h ---------------------------------------*- C++ -*-===// |
0 commit comments