Skip to content

Commit f6a1b02

Browse files
committed
[Offload] Allow "tagging" device info entries with offload keys
When generating the device info tree, nodes can be marked with an offload Device Info value. The nodes can also look up children based on this value.
1 parent 8c06d0e commit f6a1b02

File tree

3 files changed

+36
-10
lines changed

3 files changed

+36
-10
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2562,7 +2562,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
25622562
Status2 = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &Minor);
25632563
if (Status == HSA_STATUS_SUCCESS && Status2 == HSA_STATUS_SUCCESS)
25642564
Info.add("HSA Runtime Version",
2565-
std::to_string(Major) + "." + std::to_string(Minor));
2565+
std::to_string(Major) + "." + std::to_string(Minor), "",
2566+
DeviceInfo::DRIVER_VERSION);
25662567

25672568
Info.add("HSA OpenMP Device Number", DeviceId);
25682569

@@ -2572,11 +2573,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
25722573

25732574
Status = getDeviceAttrRaw(HSA_AGENT_INFO_NAME, TmpChar);
25742575
if (Status == HSA_STATUS_SUCCESS)
2575-
Info.add("Device Name", TmpChar);
2576+
Info.add("Device Name", TmpChar, "", DeviceInfo::NAME);
25762577

25772578
Status = getDeviceAttrRaw(HSA_AGENT_INFO_VENDOR_NAME, TmpChar);
25782579
if (Status == HSA_STATUS_SUCCESS)
2579-
Info.add("Vendor Name", TmpChar);
2580+
Info.add("Vendor Name", TmpChar, "", DeviceInfo::VENDOR);
25802581

25812582
hsa_device_type_t DevType;
25822583
Status = getDeviceAttrRaw(HSA_AGENT_INFO_DEVICE, DevType);
@@ -2652,7 +2653,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
26522653

26532654
Status = getDeviceAttrRaw(HSA_AGENT_INFO_WORKGROUP_MAX_DIM, WorkgrpMaxDim);
26542655
if (Status == HSA_STATUS_SUCCESS) {
2655-
auto &MaxSize = *Info.add("Workgroup Max Size per Dimension");
2656+
auto &MaxSize =
2657+
*Info.add("Workgroup Max Size per Dimension", std::monostate{}, "",
2658+
DeviceInfo::MAX_WORK_GROUP_SIZE);
26562659
MaxSize.add("x", WorkgrpMaxDim[0]);
26572660
MaxSize.add("y", WorkgrpMaxDim[1]);
26582661
MaxSize.add("z", WorkgrpMaxDim[2]);

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,12 @@ struct AsyncInfoWrapperTy {
113113
__tgt_async_info *AsyncInfoPtr;
114114
};
115115

116+
enum class DeviceInfo {
117+
#define OFFLOAD_DEVINFO(Name, _, Value) Name = Value,
118+
#include "OffloadInfo.inc"
119+
#undef OFFLOAD_DEVINFO
120+
};
121+
116122
/// Tree node for device information
117123
///
118124
/// This information is either printed or used by liboffload to extract certain
@@ -133,17 +139,21 @@ struct InfoTreeNode {
133139
// * The same key can appear multiple times
134140
std::unique_ptr<llvm::SmallVector<InfoTreeNode, 8>> Children;
135141

142+
std::map<DeviceInfo, size_t> DeviceInfoMap;
143+
136144
InfoTreeNode() : InfoTreeNode("", std::monostate{}, "") {}
137145
InfoTreeNode(std::string Key, VariantType Value, std::string Units)
138146
: Key(Key), Value(Value), Units(Units) {}
139147

140148
/// Add a new info entry as a child of this node. The entry requires at least
141149
/// a key string in \p Key. The value in \p Value is optional and can be any
142150
/// type that is representable as a string. The units in \p Units is optional
143-
/// and must be a string.
151+
/// and must be a string. Providing a device info key allows liboffload to
152+
/// use that value for an appropriate olGetDeviceInfo query
144153
template <typename T = std::monostate>
145154
InfoTreeNode *add(std::string Key, T Value = T(),
146-
const std::string &Units = std::string()) {
155+
const std::string &Units = std::string(),
156+
std::optional<DeviceInfo> DeviceInfoKey = std::nullopt) {
147157
assert(!Key.empty() && "Invalid info key");
148158

149159
if (!Children)
@@ -157,7 +167,12 @@ struct InfoTreeNode {
157167
else
158168
ValueVariant = std::string{Value};
159169

160-
return &Children->emplace_back(Key, ValueVariant, Units);
170+
auto Ptr = &Children->emplace_back(Key, ValueVariant, Units);
171+
172+
if (DeviceInfoKey)
173+
DeviceInfoMap[*DeviceInfoKey] = Children->size() - 1;
174+
175+
return Ptr;
161176
}
162177

163178
std::optional<InfoTreeNode *> get(StringRef Key) {
@@ -171,6 +186,12 @@ struct InfoTreeNode {
171186
return It;
172187
}
173188

189+
std::optional<InfoTreeNode *> get(DeviceInfo Info) {
190+
if (DeviceInfoMap.count(Info))
191+
return &(*Children)[DeviceInfoMap[Info]];
192+
return std::nullopt;
193+
}
194+
174195
/// Print all info entries in the tree
175196
void print() const {
176197
// Fake an additional indent so that values are offset from the keys

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -935,13 +935,14 @@ struct CUDADeviceTy : public GenericDeviceTy {
935935
if (Res == CUDA_SUCCESS)
936936
// For consistency with other drivers, store the version as a string
937937
// rather than an integer
938-
Info.add("CUDA Driver Version", std::to_string(TmpInt));
938+
Info.add("CUDA Driver Version", std::to_string(TmpInt), "",
939+
DeviceInfo::DRIVER_VERSION);
939940

940941
Info.add("CUDA OpenMP Device Number", DeviceId);
941942

942943
Res = cuDeviceGetName(TmpChar, 1000, Device);
943944
if (Res == CUDA_SUCCESS)
944-
Info.add("Device Name", TmpChar);
945+
Info.add("Device Name", TmpChar, "", DeviceInfo::NAME);
945946

946947
Info.add("Vendor Name", "NVIDIA");
947948

@@ -978,7 +979,8 @@ struct CUDADeviceTy : public GenericDeviceTy {
978979
if (Res == CUDA_SUCCESS)
979980
Info.add("Maximum Threads per Block", TmpInt);
980981

981-
auto &MaxBlock = *Info.add("Maximum Block Dimensions", "");
982+
auto &MaxBlock = *Info.add("Maximum Block Dimensions", std::monostate{}, "",
983+
DeviceInfo::MAX_WORK_GROUP_SIZE);
982984
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, TmpInt);
983985
if (Res == CUDA_SUCCESS)
984986
MaxBlock.add("x", TmpInt);

0 commit comments

Comments
 (0)