Skip to content

Commit 5dd0ebe

Browse files
committed
[uArch][XeGPU] Add uArch definition.
1 parent 0ef39a8 commit 5dd0ebe

File tree

5 files changed

+649
-0
lines changed

5 files changed

+649
-0
lines changed
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
//===--- uArch.h ---------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file
10+
/// PVC uArch definition.
11+
///
12+
//
13+
//===----------------------------------------------------------------------===//
14+
#ifndef MLIR_DIALECT_XEGPU_UTILS_INTEL_GPU_PVC_H
15+
#define MLIR_DIALECT_XEGPU_UTILS_INTEL_GPU_PVC_H
16+
17+
#include "mlir/Dialect/XeGPU/Utils/uArch.h"
18+
#include "mlir/IR/BuiltinTypes.h"
19+
#include "mlir/IR/TypeUtilities.h"
20+
#include <map>
21+
#include <string>
22+
#include <vector>
23+
24+
namespace mlir {
25+
namespace xegpu {
26+
namespace uArch {
27+
namespace PVCuArch {
28+
struct XeCoreInfo {
29+
uint num_threads;
30+
SharedMemory shared_memory;
31+
uint num_vector_units;
32+
uint num_matrix_units;
33+
};
34+
35+
struct Xe2Plus : public uArch {
36+
XeCoreInfo xe_core;
37+
};
38+
39+
// struct to represent DPAS instruction
40+
struct DPASInstruction : public Instruction {
41+
Range systolic_depth;
42+
Range repreat_count;
43+
Range execution_size;
44+
std::map<std::string, uint> ops_per_channel;
45+
std::vector<std::vector<std::string>> supported_types;
46+
std::map<std::string, std::map<std::string, std::vector<std::string>>>
47+
matrix_size;
48+
49+
bool checkSupportedDPASTypes(mlir::Type dstType, mlir::Type src0Type,
50+
mlir::Type src1Type, mlir::Type src2Type);
51+
};
52+
53+
struct LoadStore2DTileInfo : public RangeTile {
54+
std::vector<uint> array_len;
55+
};
56+
57+
// struct to represent Load2D/Store2D/Prefetch instruction
58+
struct LoadStorePrefetch2DInstruction : public Instruction {
59+
MemoryType memory_type;
60+
MemoryAccessType memory_access_type;
61+
// std::vector<std::string> supported_types;
62+
std::vector<uint> supported_types_bitwidth;
63+
std::map<std::string, uint> alignment;
64+
LoadStore2DTileInfo supported_tile_sizes;
65+
uint min_surface_pitch;
66+
67+
// Validate Array length restriction on a given tile
68+
bool validateArrayLenRestriction(Tile tile, uint array_len,
69+
mlir::Type dataType) {
70+
71+
Restriction<Tile, uint, mlir::Type> width_array_len_restriction(
72+
tile, array_len, dataType,
73+
[](Tile tile, uint array_len, mlir::Type dataType) {
74+
assert(tile.no_of_dims == 2);
75+
return tile.dims[1] * array_len *
76+
(dataType.getIntOrFloatBitWidth() / 8) <=
77+
64;
78+
});
79+
return width_array_len_restriction.validate();
80+
}
81+
82+
// Validate Surface Pitch restriction on a given tile
83+
bool validateSurfacePitchRestriction(Tile tile,
84+
uint surfacePitch /*in bytes*/) {
85+
Restriction<Tile, uint> surface_pitch_restriction(
86+
tile, surfacePitch, [](Tile tile, uint surfacePitch) {
87+
assert(tile.no_of_dims == 2);
88+
return surfacePitch >= 64;
89+
});
90+
return surface_pitch_restriction.validate();
91+
}
92+
};
93+
94+
} // namespace PVCuArch
95+
} // namespace uArch
96+
} // namespace xegpu
97+
} // namespace mlir
98+
99+
#endif // MLIR_DIALECT_XEGPU_UTILS_INTEL_GPU_PVC_H
100+
//===--- IntelGpuPVC.h ---------------------------------------*- C++ -*-===//
Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
//===--- uArch.h ---------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file
10+
/// Base uArch definition for different architectures.
11+
///
12+
//
13+
//===----------------------------------------------------------------------===//
14+
#ifndef MLIR_DIALECT_XEGPU_UTILS_UARCH_H
15+
#define MLIR_DIALECT_XEGPU_UTILS_UARCH_H
16+
17+
#include <functional>
18+
#include <iostream>
19+
#include <tuple>
20+
namespace mlir {
21+
namespace xegpu {
22+
namespace uArch {
23+
24+
// Data types we need for YAML to uArch translation
25+
struct Range {
26+
int start;
27+
int end;
28+
};
29+
30+
// Tile can be multi-dimensional
31+
// For example, a 2D tile can be represented as:
32+
// Tile:
33+
// no_of_dims: 2
34+
// dim: [2, 2]
35+
// This represents a 2x2 tile
36+
struct Tile {
37+
uint no_of_dims;
38+
std::vector<uint> dims;
39+
};
40+
41+
// RangeTile represents a range of tiles instead of a single tile
42+
// RangeTile essentially provides a way represent the supported range of values
43+
// in each dimension For each dimension, the range of values is represented as a
44+
// Range For example, a 2D RangeTile can be represented as: RangeTile:
45+
// no_of_dims: 2
46+
// dims:
47+
// - [1, 32]
48+
// - [2, 16]
49+
// This represents a 2x2 RangeTile where the first dimension can have values
50+
// from 1 to 32 and the second dimension can have values from 2 to 16
51+
struct RangeTile {
52+
uint no_of_dims;
53+
std::vector<Range> dims;
54+
};
55+
56+
// DiscreteTile represents a set of tiles instead of a single tile
57+
// DiscreteTile essentially provides a way represent the supported set of values
58+
// in each dimension For each dimension, the set of values is represented as a
59+
// vector of integers For example, a 2D DiscreteTile can be represented as:
60+
// DiscreteTile:
61+
// no_of_dims: 2
62+
// dims:
63+
// - [1, 2, 4, 8, 16, 32]
64+
// - [2, 4, 8, 16]
65+
// This represents a 2x2 DiscreteTile where the first dimension can have values
66+
// 1, 2, 4, 8, 16, 32 and the second dimension can have values 2, 4, 8, 16
67+
struct DiscreteTile {
68+
uint no_of_dims;
69+
std::vector<std::vector<uint>> dims;
70+
};
71+
72+
// Restriction struct
73+
// This struct is used to represent a restriction on the uArch
74+
// The restriction is represented as a range of necessary parameters (template
75+
// arguments) and a lambda function (validate()) that takes the same number of
76+
// arguments as the number of template arguments The lambda function returns
77+
// true if the arguments satisfy the restriction The lambda function returns
78+
// false if the arguments do not satisfy the restriction
79+
80+
// For example, a restriction that checks if the number of dimensions in a
81+
// RangeTile is 2 can be represented as: RangeTile rt = {2, {{1, 32}, {2, 16}}};
82+
// Restriction<RangeTile> r1(rt, [](RangeTile t) { return t.no_of_dims == 2; });
83+
// r1.validate() will return true if the number of dimensions in the RangeTile
84+
// is 2 r1.validate() will return false if the number of dimensions in the
85+
// RangeTile is not 2
86+
87+
// The primary purpose of Restriction struct is to provide a generic way to
88+
// represent restrictions on the uArch and to validate if the uArch satisfies
89+
// the restrictions
90+
template <typename... Args>
91+
struct Restriction {
92+
std::tuple<Args...> data;
93+
std::function<void(Args...)> func;
94+
95+
Restriction(Args... args, std::function<void(Args...)> f)
96+
: data(args...), func(f) {}
97+
98+
bool validate() { return std::apply(func, data); }
99+
std::any apply() { return std::apply(func, data); }
100+
};
101+
102+
// An enum class to represent the functional unit of an instruction
103+
enum class FunctionalUnit {
104+
ALU,
105+
Tensor,
106+
Matrix,
107+
Load,
108+
Store,
109+
Branch,
110+
Barrier,
111+
Memory,
112+
Atomic,
113+
Interconnect,
114+
Other
115+
};
116+
117+
// An enum class to represent the type of memory
118+
enum class MemoryType { Shared, Local, Global, Constant, Texture, Other };
119+
120+
// An enum class to represent the memory access type
121+
enum class MemoryAccessType { Read, Write, ReadWrite, Other };
122+
123+
// An enum class to represent the type of an instruction
124+
enum class InstructionType { SIMT, SIMD, SPMD, MIMD, Other };
125+
126+
// An enum class to represent the scope of an instruction
127+
enum class InstructionScope {
128+
WorkItem,
129+
Subgroup,
130+
Workgroup,
131+
Cluster,
132+
Thread, // For CPU
133+
Core, // For CPU
134+
Other
135+
};
136+
137+
// An enum class to represent the unit of computation of an instruction
138+
enum class UnitOfComputation {
139+
Scalar,
140+
Vector, // 1-D vector
141+
Matrix,
142+
Tile,
143+
Other
144+
};
145+
146+
// A struct to represent basic information about an instruction
147+
// This struct is used to represent the information about an instruction in the
148+
// uArch The information includes:
149+
// - the name of the instruction,
150+
// - the opcode,
151+
// - the functional unit,
152+
// - the type of the instruction,
153+
// - the scope of the instruction,
154+
// - the unit of computation,
155+
// - the description of the instruction
156+
// The information is represented as strings
157+
// For example, the information about an instruction can be represented as:
158+
// Instruction info = {"dpas", "0x83", "matrix", "simd", "subgroup", "tile",
159+
// "Dot Product Accumulate Systolic (DPAS) is a matrix multiply-add
160+
// operation"};
161+
162+
// The primary purpose of Instruction struct is to provide a generic way to
163+
// represent information about an instruction and to use this information to
164+
// generate the uArch. Specifc instruction in a uArch can inherit from this
165+
// struct and add more fields as needed
166+
167+
struct Instruction {
168+
std::string name;
169+
std::string description;
170+
std::string opcode;
171+
FunctionalUnit functional_unit;
172+
InstructionType type;
173+
InstructionScope scope;
174+
UnitOfComputation unit_of_computation;
175+
176+
// @TODO: Add more fields as needed
177+
// std::string latency;
178+
// std::string throughput;
179+
// std::string pipeline;
180+
// std::string resource;
181+
// std::string comment;
182+
};
183+
184+
// A struct to represent register file information
185+
struct RegisterFileInfo {
186+
uint size; // size per register in bits
187+
std::vector<std::string> mode; // e.g., "small", "large" GRF modes
188+
std::vector<uint>
189+
num_regs_per_thread_per_mode; // number of registers per thread per mode
190+
uint num_banks;
191+
uint bank_size;
192+
};
193+
194+
// A struct to represent cache information
195+
struct CacheInfo {
196+
uint size;
197+
uint associativity;
198+
uint line_size;
199+
uint num_banks;
200+
uint bank_size;
201+
uint num_ports;
202+
uint port_width;
203+
uint bank_conflicts;
204+
};
205+
206+
// A struct to represent the uArch
207+
// This struct is used to represent the microarchitecture of a target device
208+
// The uArch includes:
209+
// - the name of the uArch,
210+
// - the description of the uArch,
211+
// - the range of tiles supported by the uArch,
212+
// - the set of tiles supported by the uArch,
213+
// - the set of instructions supported by the uArch,
214+
// - the set of restrictions on the uArch
215+
// The information is represented as strings, RangeTile, DiscreteTile,
216+
// Instruction and Restriction structs For example, the information about a
217+
// uArch can be represented as: uArch uarch = {"XeHPG", "Intel Xe HPG
218+
// microarchitecture", {2, {{1, 32}, {1, 32}}}, {2, {{1, 2, 4, 8, 16, 32}, {1,
219+
// 2, 4, 8, 16, 32}}}, {{"dpas", "0x83", "matrix", "simd", "subgroup", "tile",
220+
// "Dot Product Accumulate Systolic (DPAS) is a matrix multiply-add
221+
// operation"}}, {r1, r2, r3}}; This represents a uArch named "XeHPG" with
222+
// description "Intel Xe HPG microarchitecture" that supports 2x2 tiles with
223+
// dimensions ranging from 1 to 32, 1 to 32, supports a DPAS instruction and has
224+
// 3 restrictions r1, r2, r3 on the uArch
225+
struct uArch {
226+
std::string name; // similar to target triple
227+
std::string description;
228+
// Different kind of regiger file information (e.g., GRF, ARF, etc.)
229+
std::vector<RegisterFileInfo> register_file_info;
230+
// Each level of cache is indexed lower to higher in the vector
231+
// (e.g., L1 indexed at 0, L2 at 1 and so on) L1, L2, L3, etc.
232+
std::vector<CacheInfo> cache_info;
233+
std::vector<Instruction *> instructions;
234+
std::vector<Restriction<> *> restrictions;
235+
};
236+
237+
// A struct to represent shared memory information
238+
struct SharedMemory {
239+
uint size;
240+
uint alignment;
241+
// @TODO: Add more fields as needed
242+
// uint latency;
243+
// uint throughput;
244+
// uint bandwidth;
245+
// uint num_ports;
246+
// uint port_width;
247+
// uint bank_size;
248+
// uint bank_conflicts;
249+
// uint num_banks;
250+
};
251+
252+
// For future use case in Xe4+
253+
254+
// struct EUInfo {
255+
// uint num_eu_threads;
256+
// SharedMemory shared_memory;
257+
// };
258+
259+
// uint num_simd_units;
260+
// uint num_spus;
261+
// uint num_smt;
262+
// uint num_hardware_threads;
263+
// uint num_threads_per_spu;
264+
// uint num_threads_per_simd_unit;
265+
// uint num_threads_per_hardware_thread;
266+
// uint num_threads_per_smt;
267+
// SharedMemory shared_memory;
268+
// };
269+
270+
// A struct to represent a GPU uArch
271+
// This struct is used to represent the GPU microarchitecture of a target device
272+
// struct GPUuArch : public uArch {
273+
// uint num_compute_units;
274+
// uint num_vector_units;
275+
// uint num_scalar_units;
276+
// uint num_tensor_units;
277+
// uint num_matrix_units;
278+
// SharedMemory shared_memory;
279+
// };
280+
} // namespace uArch
281+
} // namespace xegpu
282+
} // namespace mlir
283+
284+
#endif // MLIR_DIALECT_XEGPU_UTILS_UARCH_H
285+
//===--- uArch.h ---------------------------------------*- C++ -*-===//
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
add_mlir_dialect_library(MLIRIntelGpuPVC
2+
IntelGpuPVC.cpp
3+
4+
ADDITIONAL_HEADER_DIRS
5+
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/XeGPU/Utils
6+
7+
LINK_LIBS PUBLIC
8+
MLIRDialectUtils
9+
MLIRIR
10+
MLIRXeGPUDialect
11+
)

0 commit comments

Comments
 (0)