Skip to content

Commit 94ae51e

Browse files
committed
Move uArch to a new folder.
Remove compiler warnings and errors.
1 parent 03a635f commit 94ae51e

File tree

7 files changed

+117
-97
lines changed

7 files changed

+117
-97
lines changed

mlir/include/mlir/Dialect/XeGPU/Utils/IntelGpuXe2.h renamed to mlir/include/mlir/Dialect/XeGPU/uArch/IntelGpuXe2.h

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111
///
1212
//
1313
//===----------------------------------------------------------------------===//
14-
#ifndef MLIR_DIALECT_XEGPU_UTILS_INTEL_GPU_PVC_H
15-
#define MLIR_DIALECT_XEGPU_UTILS_INTEL_GPU_PVC_H
14+
#ifndef MLIR_DIALECT_XEGPU_UTILS_INTEL_GPU_XE2_H
15+
#define MLIR_DIALECT_XEGPU_UTILS_INTEL_GPU_XE2_H
1616

17-
#include "mlir/Dialect/XeGPU/Utils/uArch.h"
17+
#include "mlir/Dialect/XeGPU/uArch/uArch.h"
1818
#include "mlir/IR/BuiltinTypes.h"
1919
#include "mlir/IR/TypeUtilities.h"
2020
#include <map>
@@ -26,14 +26,14 @@ namespace xegpu {
2626
namespace uArch {
2727
namespace Xe2Plus {
2828
struct XeCoreInfo {
29-
uint num_threads;
29+
uint32_t num_threads;
3030
SharedMemory shared_memory;
31-
uint num_vector_units;
32-
uint num_matrix_units;
31+
uint32_t num_vector_units;
32+
uint32_t num_matrix_units;
3333

3434
// Constructor
35-
XeCoreInfo(uint num_threads, const SharedMemory &shared_memory,
36-
uint num_vector_units, uint num_matrix_units)
35+
XeCoreInfo(uint32_t num_threads, const SharedMemory &shared_memory,
36+
uint32_t num_vector_units, uint32_t num_matrix_units)
3737
: num_threads(num_threads), shared_memory(shared_memory),
3838
num_vector_units(num_vector_units), num_matrix_units(num_matrix_units) {
3939
}
@@ -58,7 +58,7 @@ struct DPASInstruction : public Instruction, public MatrixOpInterface {
5858
// Range systolic_depth;
5959
// Range repreat_count;
6060
// Range execution_size;
61-
// std::map<std::string, uint> ops_per_channel;
61+
// std::map<std::string, uint32_t> ops_per_channel;
6262
// std::vector<std::vector<std::string>> supported_types;
6363
// std::map<std::string, std::map<std::string, std::vector<std::string>>>
6464
// matrix_size;
@@ -80,34 +80,34 @@ struct DPASInstruction : public Instruction, public MatrixOpInterface {
8080
virtual bool checkSupportedMMATypes(mlir::Type AType, mlir::Type BType,
8181
mlir::Type CType,
8282
mlir::Type DType) override;
83-
virtual std::vector<uint> getSupportedM(mlir::Type type) override;
84-
virtual std::vector<uint> getSupportedK(mlir::Type type) override;
85-
virtual std::vector<uint> getSupportedN(mlir::Type type) override;
83+
virtual std::vector<uint32_t> getSupportedM(mlir::Type type) override;
84+
virtual std::vector<uint32_t> getSupportedK(mlir::Type type) override;
85+
virtual std::vector<uint32_t> getSupportedN(mlir::Type type) override;
8686
virtual std::vector<std::pair<unsigned, unsigned>>
8787
getSupportedMatrix(mlir::Type type, MatrixType matrixType) override;
8888
};
8989

9090
struct LoadStore2DTileInfo : public RangeTile {
91-
std::vector<uint> array_len;
91+
std::vector<uint32_t> array_len;
9292
};
9393

9494
// struct to represent Load2D/Store2D/Prefetch instruction
9595
struct LoadStorePrefetch2DInstruction : public Instruction {
9696
MemoryType memory_type;
9797
MemoryAccessType memory_access_type;
9898
// std::vector<std::string> supported_types;
99-
std::vector<uint> supported_types_bitwidth;
100-
std::map<std::string, uint> alignment;
99+
std::vector<uint32_t> supported_types_bitwidth;
100+
std::map<std::string, uint32_t> alignment;
101101
LoadStore2DTileInfo supported_tile_sizes;
102-
uint min_surface_pitch;
102+
uint32_t min_surface_pitch;
103103

104104
// Validate Array length restriction on a given tile
105-
bool validateArrayLenRestriction(Tile tile, uint array_len,
105+
bool validateArrayLenRestriction(Tile tile, uint32_t array_len,
106106
mlir::Type dataType) {
107107

108-
Restriction<Tile, uint, mlir::Type> width_array_len_restriction(
108+
Restriction<Tile, uint32_t, mlir::Type> width_array_len_restriction(
109109
tile, array_len, dataType,
110-
[](Tile tile, uint array_len, mlir::Type dataType) {
110+
[](Tile tile, uint32_t array_len, mlir::Type dataType) {
111111
assert(tile.no_of_dims == 2);
112112
return tile.dims[1] * array_len *
113113
(dataType.getIntOrFloatBitWidth() / 8) <=
@@ -118,9 +118,9 @@ struct LoadStorePrefetch2DInstruction : public Instruction {
118118

119119
// Validate Surface Pitch restriction on a given tile
120120
bool validateSurfacePitchRestriction(Tile tile,
121-
uint surfacePitch /*in bytes*/) {
122-
Restriction<Tile, uint> surface_pitch_restriction(
123-
tile, surfacePitch, [](Tile tile, uint surfacePitch) {
121+
uint32_t surfacePitch /*in bytes*/) {
122+
Restriction<Tile, uint32_t> surface_pitch_restriction(
123+
tile, surfacePitch, [](Tile tile, uint32_t surfacePitch) {
124124
assert(tile.no_of_dims == 2);
125125
return surfacePitch >= 64;
126126
});
@@ -149,13 +149,14 @@ struct PVCuArch : public Xe2Plus {
149149
this->uArch_hierarchy.push_back(uArchHierarchyComponent("gpu", 2));
150150
// Intialize register file info
151151
// GRF
152-
this->register_file_info["GRF"] =
152+
this->register_file_info.emplace(
153+
"GRF",
153154
RegisterFileInfo(64 * 1024, // size in bits
154155
{"small", "large"}, // GRF modes
155156
{128, 256}, // registers per thread per mode
156157
0, // number of banks
157158
0 // bank size
158-
);
159+
));
159160
// Initialize cache info
160161
// L1 cache, XeCore level
161162
this->cache_info.push_back(
@@ -221,4 +222,4 @@ struct BMGuArch : public Xe2Plus {
221222
} // namespace xegpu
222223
} // namespace mlir
223224

224-
#endif // MLIR_DIALECT_XEGPU_UTILS_INTEL_GPU_PVC_H
225+
#endif // MLIR_DIALECT_XEGPU_UTILS_INTEL_GPU_XE2_H

mlir/include/mlir/Dialect/XeGPU/Utils/uArch.h renamed to mlir/include/mlir/Dialect/XeGPU/uArch/uArch.h

Lines changed: 68 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,16 @@
1414
#ifndef MLIR_DIALECT_XEGPU_UTILS_UARCH_H
1515
#define MLIR_DIALECT_XEGPU_UTILS_UARCH_H
1616

17+
#include <any>
1718
#include <functional>
1819
#include <iostream>
20+
#include <map>
1921
#include <mutex>
2022
#include <shared_mutex>
2123
#include <tuple>
2224

25+
#include "mlir/IR/Types.h"
26+
2327
namespace mlir {
2428
namespace xegpu {
2529
namespace uArch {
@@ -37,8 +41,8 @@ struct Range {
3741
// dim: [2, 2]
3842
// This represents a 2x2 tile
3943
struct Tile {
40-
uint no_of_dims;
41-
std::vector<uint> dims;
44+
uint32_t no_of_dims;
45+
std::vector<uint32_t> dims;
4246
};
4347

4448
// RangeTile represents a range of tiles instead of a single tile
@@ -52,7 +56,7 @@ struct Tile {
5256
// This represents a 2x2 RangeTile where the first dimension can have values
5357
// from 1 to 32 and the second dimension can have values from 2 to 16
5458
struct RangeTile {
55-
uint no_of_dims;
59+
uint32_t no_of_dims;
5660
std::vector<Range> dims;
5761
};
5862

@@ -68,8 +72,8 @@ struct RangeTile {
6872
// This represents a 2x2 DiscreteTile where the first dimension can have values
6973
// 1, 2, 4, 8, 16, 32 and the second dimension can have values 2, 4, 8, 16
7074
struct DiscreteTile {
71-
uint no_of_dims;
72-
std::vector<std::vector<uint>> dims;
75+
uint32_t no_of_dims;
76+
std::vector<std::vector<uint32_t>> dims;
7377
};
7478

7579
// Restriction struct
@@ -93,9 +97,9 @@ struct DiscreteTile {
9397
template <typename... Args>
9498
struct Restriction {
9599
std::tuple<Args...> data;
96-
std::function<void(Args...)> func;
100+
std::function<bool(Args...)> func;
97101

98-
Restriction(Args... args, std::function<void(Args...)> f)
102+
Restriction(Args... args, std::function<bool(Args...)> f)
99103
: data(args...), func(f) {}
100104

101105
bool validate() { return std::apply(func, data); }
@@ -107,9 +111,9 @@ struct uArchHierarchyComponent {
107111
std::string name = ""; // optional name of the hierarchy component
108112
// no. of lower hierarchy component it contains, e.g., for PVC XeCore it
109113
// contains 8 threads, so no_of_component=8
110-
uint no_of_component;
114+
uint32_t no_of_component;
111115
// Constructor
112-
uArchHierarchyComponent(const std::string &name, uint no_of_component)
116+
uArchHierarchyComponent(const std::string &name, uint32_t no_of_component)
113117
: name(name), no_of_component(no_of_component) {}
114118
};
115119

@@ -203,35 +207,37 @@ struct Instruction {
203207

204208
// A struct to represent register file information
205209
struct RegisterFileInfo {
206-
uint size; // size per register in bits
210+
uint32_t size; // size per register in bits
207211
std::vector<std::string> mode; // e.g., "small", "large" GRF modes
208-
std::vector<uint>
212+
std::vector<uint32_t>
209213
num_regs_per_thread_per_mode; // number of registers per thread per mode
210-
uint num_banks;
211-
uint bank_size;
214+
uint32_t num_banks;
215+
uint32_t bank_size;
212216

213217
// Constructor
214-
RegisterFileInfo(uint size, const std::vector<std::string> &mode,
215-
const std::vector<uint> &numRegs, uint num_banks,
216-
uint bank_size)
218+
RegisterFileInfo() = default;
219+
RegisterFileInfo(uint32_t size, const std::vector<std::string> &mode,
220+
const std::vector<uint32_t> &numRegs, uint32_t num_banks,
221+
uint32_t bank_size)
217222
: size(size), mode(mode), num_regs_per_thread_per_mode(numRegs),
218223
num_banks(num_banks), bank_size(bank_size) {}
219224
};
220225

221226
// A struct to represent cache information
222227
struct CacheInfo {
223-
uint size;
224-
uint line_size;
228+
uint32_t size;
229+
uint32_t line_size;
225230
// At which component level the cache is shared
226231
uArchHierarchyComponent component;
227-
// uint associativity;
228-
// uint num_banks;
229-
// uint bank_size;
230-
// uint num_ports;
231-
// uint port_width;
232-
// uint bank_conflicts;
232+
// uint32_t associativity;
233+
// uint32_t num_banks;
234+
// uint32_t bank_size;
235+
// uint32_t num_ports;
236+
// uint32_t port_width;
237+
// uint32_t bank_conflicts;
233238
// Constructor
234-
CacheInfo(uint size, uint line_size, const uArchHierarchyComponent &component)
239+
CacheInfo(uint32_t size, uint32_t line_size,
240+
const uArchHierarchyComponent &component)
235241
: size(size), line_size(line_size), component(component) {}
236242
};
237243

@@ -274,6 +280,7 @@ struct uArch {
274280
std::vector<Restriction<> *> restrictions;
275281

276282
// Constructor
283+
uArch() = default;
277284
uArch(const std::string &name, const std::string &description,
278285
const std::vector<uArchHierarchyComponent> &uArch_hierarchy = {},
279286
const std::map<std::string, RegisterFileInfo> &register_file_info = {},
@@ -287,48 +294,49 @@ struct uArch {
287294

288295
// A struct to represent shared memory information
289296
struct SharedMemory {
290-
uint size; // in bytes
291-
uint alignment; // in bytes
297+
uint32_t size; // in bytes
298+
uint32_t alignment; // in bytes
292299
// @TODO: Add more fields as needed
293-
// uint latency;
294-
// uint throughput;
295-
// uint bandwidth;
296-
// uint num_ports;
297-
// uint port_width;
298-
// uint bank_size;
299-
// uint bank_conflicts;
300-
// uint num_banks;
300+
// uint32_t latency;
301+
// uint32_t throughput;
302+
// uint32_t bandwidth;
303+
// uint32_t num_ports;
304+
// uint32_t port_width;
305+
// uint32_t bank_size;
306+
// uint32_t bank_conflicts;
307+
// uint32_t num_banks;
301308

302309
// Constructor
303-
SharedMemory(uint size, uint alignment) : size(size), alignment(alignment) {}
310+
SharedMemory(uint32_t size, uint32_t alignment)
311+
: size(size), alignment(alignment) {}
304312
};
305313

306314
// For future use case in Xe4+
307315

308316
// struct EUInfo {
309-
// uint num_eu_threads;
317+
// uint32_t num_eu_threads;
310318
// SharedMemory shared_memory;
311319
// };
312320

313-
// uint num_simd_units;
314-
// uint num_spus;
315-
// uint num_smt;
316-
// uint num_hardware_threads;
317-
// uint num_threads_per_spu;
318-
// uint num_threads_per_simd_unit;
319-
// uint num_threads_per_hardware_thread;
320-
// uint num_threads_per_smt;
321+
// uint32_t num_simd_units;
322+
// uint32_t num_spus;
323+
// uint32_t num_smt;
324+
// uint32_t num_hardware_threads;
325+
// uint32_t num_threads_per_spu;
326+
// uint32_t num_threads_per_simd_unit;
327+
// uint32_t num_threads_per_hardware_thread;
328+
// uint32_t num_threads_per_smt;
321329
// SharedMemory shared_memory;
322330
// };
323331

324332
// A struct to represent a GPU uArch
325333
// This struct is used to represent the GPU microarchitecture of a target device
326334
// struct GPUuArch : public uArch {
327-
// uint num_compute_units;
328-
// uint num_vector_units;
329-
// uint num_scalar_units;
330-
// uint num_tensor_units;
331-
// uint num_matrix_units;
335+
// uint32_t num_compute_units;
336+
// uint32_t num_vector_units;
337+
// uint32_t num_scalar_units;
338+
// uint32_t num_tensor_units;
339+
// uint32_t num_matrix_units;
332340
// SharedMemory shared_memory;
333341
// };
334342

@@ -346,17 +354,17 @@ struct TileOpInterface {
346354
// @param surface_pitch, suface pitch
347355
// @param array_len, array length
348356
virtual bool validate(Tile tile, Tile surface, mlir::Type dataType,
349-
uint surface_pitch, uint array_len = 1) = 0;
357+
uint32_t surface_pitch, uint32_t array_len = 1) = 0;
350358
virtual ~TileOpInterface() = default;
351359
};
352360

353361
enum class MatrixType { MatrixA, MatrixB, MatrixC, MatrixD };
354362
struct MatrixOpInterface {
355363
virtual bool checkSupportedMMATypes(mlir::Type AType, mlir::Type BType,
356364
mlir::Type CType, mlir::Type DType) = 0;
357-
virtual std::vector<uint> getSupportedM(mlir::Type type) = 0;
358-
virtual std::vector<uint> getSupportedK(mlir::Type type) = 0;
359-
virtual std::vector<uint> getSupportedN(mlir::Type type) = 0;
365+
virtual std::vector<uint32_t> getSupportedM(mlir::Type type) = 0;
366+
virtual std::vector<uint32_t> getSupportedK(mlir::Type type) = 0;
367+
virtual std::vector<uint32_t> getSupportedN(mlir::Type type) = 0;
360368
virtual std::vector<std::pair<unsigned, unsigned>>
361369
getSupportedMatrix(mlir::Type type, MatrixType matrixType) = 0;
362370

@@ -373,13 +381,14 @@ struct uArchMap {
373381

374382
// Insert or update a key-value pair
375383
void insert(const std::string &key, uArch value) {
376-
std::unique_lock lock(mutex_);
377-
map_[key] = value;
384+
std::unique_lock<std::shared_mutex> lock(mutex_);
385+
// map_[key] = value;
386+
map_.emplace(key, value);
378387
}
379388

380389
// Get a value by key (concurrent safe read)
381390
std::optional<uArch> get(const std::string &key) const {
382-
std::shared_lock lock(mutex_);
391+
std::shared_lock<std::shared_mutex> lock(mutex_);
383392
auto it = map_.find(key);
384393
if (it != map_.end())
385394
return it->second;
@@ -388,13 +397,13 @@ struct uArchMap {
388397

389398
// Check if a key exists
390399
bool contains(const std::string &key) const {
391-
std::shared_lock lock(mutex_);
400+
std::shared_lock<std::shared_mutex> lock(mutex_);
392401
return map_.find(key) != map_.end();
393402
}
394403

395404
// Remove a key
396405
bool erase(const std::string &key) {
397-
std::unique_lock lock(mutex_);
406+
std::unique_lock<std::shared_mutex> lock(mutex_);
398407
return map_.erase(key) > 0;
399408
}
400409

mlir/lib/Dialect/XeGPU/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
add_subdirectory(IR)
22
add_subdirectory(Transforms)
3+
add_subdirectory(uArch)
34
add_subdirectory(Utils)

0 commit comments

Comments
 (0)