Skip to content

Commit 2107a33

Browse files
committed
Addressed comments
1 parent aa94dfc commit 2107a33

File tree

3 files changed

+30
-31
lines changed

3 files changed

+30
-31
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -884,11 +884,6 @@ struct AMDGPUKernelTy : public GenericKernelTy {
884884
/// Indicates whether or not we need to set up our own private segment size.
885885
bool usesDynamicStack() const { return DynamicStack; }
886886

887-
/// Get the execution mode of this kernel.
888-
OMPTgtExecModeFlags getExecutionMode() const {
889-
return getExecutionModeFlags();
890-
}
891-
892887
/// Envar to disable host-exec thread creation.
893888
BoolEnvar OMPX_DisableHostExec;
894889

@@ -1717,7 +1712,7 @@ struct AMDGPUStreamTy {
17171712
std::string KernelName;
17181713
uint32_t NumTeams;
17191714
uint32_t NumThreads;
1720-
KernelRunRecord *KernelRunRecords;
1715+
KernelRunRecordTy *KernelRunRecords;
17211716
};
17221717

17231718
using AMDGPUStreamCallbackTy = Error(void *Data);
@@ -2096,7 +2091,7 @@ struct AMDGPUStreamTy {
20962091
PostKernelRunProcessingArgsTy *Args =
20972092
reinterpret_cast<PostKernelRunProcessingArgsTy *>(Data);
20982093

2099-
KernelRunRecord *KernelRecord = Args->KernelRunRecords;
2094+
KernelRunRecordTy *KernelRecord = Args->KernelRunRecords;
21002095
assert(KernelRecord && "KernelRunRecord is null!");
21012096

21022097
uint64_t KernelDuration = getKernelDuration(Args);
@@ -2191,10 +2186,10 @@ struct AMDGPUStreamTy {
21912186

21922187
// If runtime autotuning is enabled, setup the callback functions to process
21932188
// the data after kernel completed.
2194-
if (Device.enableRuntimeAutotuning() &&
2195-
Kernel.getExecutionMode() == OMP_TGT_EXEC_MODE_SPMD) {
2189+
if (Device.enableRuntimeAutotuning() && Kernel.isSPMDMode()) {
21962190
std::string KernelName(Kernel.getName());
2197-
KernelRunRecord *KernelRecords = Device.getKernelRunRecords();
2191+
KernelRunRecordTy *KernelRecords = Device.getKernelRunRecords();
2192+
assert(KernelRecords && "No KernelRecords!");
21982193

21992194
// If this kernel has reached the run limit,
22002195
// skip registering the callback function.

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ struct GenericPluginTy;
6060
struct GenericKernelTy;
6161
struct GenericDeviceTy;
6262
struct RecordReplayTy;
63-
struct KernelRunRecord;
63+
struct KernelRunRecordTy;
6464

6565
/// Class that wraps the __tgt_async_info to simply its usage. In case the
6666
/// object is constructed without a valid __tgt_async_info, the object will use
@@ -1108,7 +1108,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
11081108

11091109
bool getMultiDeviceKernelValue(void *EntryPtr);
11101110

1111-
KernelRunRecord *getKernelRunRecords() const { return KernelRunRecords; }
1111+
KernelRunRecordTy *getKernelRunRecords() const { return KernelRunRecords; }
11121112

11131113
/// Return true if a descriptor of size 'Size' should be allocated using
11141114
/// shared memory. Default implementation returns 'false',
@@ -1262,7 +1262,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
12621262
RPCServerTy *RPCServer;
12631263

12641264
/// Structs for functions and data used in runtime autotuning.
1265-
KernelRunRecord *KernelRunRecords;
1265+
KernelRunRecordTy *KernelRunRecords;
12661266

12671267
private:
12681268
#ifdef OMPT_SUPPORT
@@ -1291,35 +1291,39 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
12911291
};
12921292

12931293
/// Struct represents the metadata for each kernel run on the device.
1294-
struct KernelRunRecord {
1294+
struct KernelRunRecordTy {
12951295

1296-
struct KernelRunEntry {
1296+
struct KernelRunEntryTy {
12971297
std::string KernelName;
1298-
uint32_t NumTeams;
1299-
uint32_t NumThreads;
1300-
uint64_t RunDuration;
1298+
uint32_t NumTeams = 0;
1299+
uint32_t NumThreads = 0;
1300+
uint64_t RunDuration = 0;
13011301
};
13021302

13031303
// Metadata used in tuning process.
1304-
struct TuningMetadata {
1304+
struct TuningMetadataTy {
13051305
uint32_t IdxThread = 0;
13061306
uint32_t IdxCUMultiplier = 0;
13071307
// Run counters.
13081308
uint32_t RunCounters = 0;
13091309
// Entry with minimum running time.
1310-
KernelRunEntry MinEntries;
1310+
KernelRunEntryTy MinEntry;
13111311
};
13121312

13131313
// Add a new entry
13141314
void addEntry(std::string KernelName, uint32_t NumTeams, uint32_t NumThreads,
13151315
uint64_t RunDuration) {
1316-
KernelRunEntry NewRunEnry = {KernelName, NumTeams, NumThreads, RunDuration};
13171316
TuningData[KernelName].RunCounters++;
13181317

13191318
// Update min entries.
1320-
auto MinDuration = TuningData[KernelName].MinEntries.RunDuration;
1319+
uint64_t MinDuration = 0;
1320+
auto It = TuningData.find(KernelName);
1321+
if (It != TuningData.end()) {
1322+
MinDuration = It->second.MinEntry.RunDuration;
1323+
}
13211324
if (MinDuration > RunDuration || MinDuration == 0) {
1322-
TuningData[KernelName].MinEntries = NewRunEnry;
1325+
TuningData[KernelName].MinEntry = {KernelName, NumTeams, NumThreads,
1326+
RunDuration};
13231327
}
13241328
}
13251329

@@ -1330,7 +1334,7 @@ struct KernelRunRecord {
13301334
// If the kernel reaches the run limit,
13311335
// return the current optimal launch parameters.
13321336
if (reachedRunLimitForKernel(KernelName)) {
1333-
auto MinEntry = TuningData[KernelName].MinEntries;
1337+
auto MinEntry = TuningData[KernelName].MinEntry;
13341338
return {MinEntry.NumTeams, MinEntry.NumThreads};
13351339
}
13361340

@@ -1341,8 +1345,8 @@ struct KernelRunRecord {
13411345
if (IdxCUMulti >= CUMultiplierCandidate.size()) {
13421346
// No more element to search.
13431347
// Return current optimal launch parameters.
1344-
return {TuningData[KernelName].MinEntries.NumTeams,
1345-
TuningData[KernelName].MinEntries.NumThreads};
1348+
return {TuningData[KernelName].MinEntry.NumTeams,
1349+
TuningData[KernelName].MinEntry.NumThreads};
13461350
}
13471351

13481352
// New team/thread pair for launch parameters.
@@ -1363,7 +1367,7 @@ struct KernelRunRecord {
13631367
}
13641368

13651369
bool reachedRunLimitForKernel(std::string KernelName) {
1366-
if (TuningData.count(KernelName) == 0) {
1370+
if (TuningData.find(KernelName) == TuningData.end()) {
13671371
// If no record for this kernel.
13681372
return false;
13691373
}
@@ -1372,7 +1376,7 @@ struct KernelRunRecord {
13721376
}
13731377

13741378
uint32_t getRunCounterForKernel(std::string KernelName) {
1375-
if (TuningData.count(KernelName) == 0) {
1379+
if (TuningData.find(KernelName) == TuningData.end()) {
13761380
return 0;
13771381
}
13781382

@@ -1386,7 +1390,7 @@ struct KernelRunRecord {
13861390
// The max number of tuning runs for each kernel.
13871391
uint32_t RunLimiter = ThreadCandidate.size() * CUMultiplierCandidate.size();
13881392
// Used for keeping track of the metatdata used in tuning for each kernel.
1389-
std::unordered_map<std::string, TuningMetadata> TuningData;
1393+
std::unordered_map<std::string, TuningMetadataTy> TuningData;
13901394
};
13911395

13921396
/// Class implementing common functionalities of offload plugins. Each plugin

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -727,7 +727,7 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
727727
KernelArgs.NumTeams[2]};
728728

729729
std::string KernelName = getName();
730-
KernelRunRecord *KernelRecord = GenericDevice.getKernelRunRecords();
730+
KernelRunRecordTy *KernelRecord = GenericDevice.getKernelRunRecords();
731731
uint32_t KernelRunCounter = 0;
732732

733733
if (KernelRecord) {
@@ -1033,7 +1033,7 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
10331033

10341034
// Allocate resources for autotuning if enabled.
10351035
if (OMPX_EnableRuntimeAutotuning) {
1036-
KernelRunRecords = new KernelRunRecord();
1036+
KernelRunRecords = new KernelRunRecordTy();
10371037
}
10381038

10391039
return Plugin::success();

0 commit comments

Comments
 (0)