Skip to content

Commit e86117e

Browse files
authored
Error out of counter splitting if group max is zero (#70)
* Error out of counter splitting if group max is zero A counter group having a counter max of zero is invalid and will ultimately result in a hang when we try to split counters into multiple passes. This is one of various scenarios that result in a hang during counter splitting; see #69 This fixes only that specific scenario. We now check that the group max isn't zero, and if it is, we give up trying to split a public counter's HW counters into multiple passes. We log an error, too. Again, this isn't a comprehensive fix for issue 69. There could be other cases of bad data that result in a hang. Issue 69 should be fixed with a pass cap limit to cover all cases. But this commit still adds value in that it flags the specific invalid GPU counter metadata in addition to avoiding the hang. Change-Id: I56d7d2043ba92c1b6088f0fdd68f5ec844e7b823
1 parent 25eb330 commit e86117e

File tree

4 files changed

+24
-5
lines changed

4 files changed

+24
-5
lines changed

include/gpu_performance_api/gpu_perf_api_types.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,8 @@ typedef enum
170170
kGpaStatusErrorLibAlreadyLoaded = -41,
171171
kGpaStatusErrorOtherSessionActive = -42,
172172
kGpaStatusErrorException = -43,
173-
kGpaStatusMin = kGpaStatusErrorException,
173+
kGpaStatusErrorInvalidCounterGroupData = -44,
174+
kGpaStatusMin = kGpaStatusErrorInvalidCounterGroupData,
174175
kGpaStatusInternal = 256, ///< Status codes used internally within GPUPerfAPI.
175176
} GpaStatus;
176177

source/gpu_perf_api_common/gpu_perf_api.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1659,7 +1659,8 @@ static const char* kErrorString[] = {
16591659
GPA_ENUM_STRING_VAL(kGpaStatusErrorTimeout, "GPA Error: Attempt to Retrieve Data Failed due to Timeout."),
16601660
GPA_ENUM_STRING_VAL(kGpaStatusErrorLibAlreadyLoaded, "GPA Error: Library Is Already Loaded."),
16611661
GPA_ENUM_STRING_VAL(kGpaStatusErrorOtherSessionActive, "GPA Error: Other Session Is Active."),
1662-
GPA_ENUM_STRING_VAL(kGpaStatusErrorException, "GPA Error: Exception Occurred.")};
1662+
GPA_ENUM_STRING_VAL(kGpaStatusErrorException, "GPA Error: Exception Occurred."),
1663+
GPA_ENUM_STRING_VAL(kGpaStatusErrorInvalidCounterGroupData, "GPA Error: Counter group data is invalid.")};
16631664

16641665
/// Size of kErrorString array.
16651666
static size_t kErrorStringSize = sizeof(kErrorString) / sizeof(const char*);

source/gpu_perf_api_counter_generator/gpa_counter_scheduler_base.cc

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -273,13 +273,25 @@ GpaStatus GpaCounterSchedulerBase::GetNumRequiredPasses(GpaUInt32* num_required_
273273
// Add the HW groups max's.
274274
for (unsigned int i = 0; i < hw_counters->internal_counter_groups_.size(); ++i)
275275
{
276-
max_counters_per_group.push_back(hw_counters->internal_counter_groups_[i].max_active_discrete_counters);
276+
auto count = hw_counters->internal_counter_groups_[i].max_active_discrete_counters;
277+
if (count == 0)
278+
{
279+
GPA_LOG_DEBUG_ERROR("Hardware counter group '%s' has zero for max-counters-per-group.", hw_counters->internal_counter_groups_[i].name);
280+
return kGpaStatusErrorInvalidCounterGroupData;
281+
}
282+
max_counters_per_group.push_back(count);
277283
}
278284

279285
// Add the Additional groups max's.
280286
for (unsigned int i = 0; i < hw_counters->additional_group_count_; ++i)
281287
{
282-
max_counters_per_group.push_back(hw_counters->additional_groups_[i].max_active_discrete_counters);
288+
auto count = hw_counters->additional_groups_[i].max_active_discrete_counters;
289+
if (count == 0)
290+
{
291+
GPA_LOG_DEBUG_ERROR("Hardware counter additional group '%s' has zero for max-counters-per-group.", hw_counters->additional_groups_[i].name);
292+
return kGpaStatusErrorInvalidCounterGroupData;
293+
}
294+
max_counters_per_group.push_back(count);
283295
}
284296

285297
GpaCounterGroupAccessor accessor(hw_counters->internal_counter_groups_,

source/gpu_perf_api_counter_generator/gpa_split_counters_interfaces.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616

1717
#ifdef DEBUG_PUBLIC_COUNTER_SPLITTER
1818
#include <sstream>
19-
#include "gpu_perf_api_common/logging.h"
2019
#endif
2120

2221
#include "gpu_perf_api_counter_generator/gpa_derived_counter.h"
22+
#include "gpu_perf_api_common/logging.h"
2323

2424
/// @brief Enum to represent the different SQ shader stages.
2525
enum GpaSqShaderStage
@@ -378,6 +378,11 @@ class IGpaSplitCounters
378378
}
379379

380380
unsigned int group_limit = max_counters_per_group[group_index];
381+
if (group_limit == 0)
382+
{
383+
GPA_LOG_DEBUG_ERROR("Group(%d) counter limit is zero.", group_index);
384+
return false;
385+
}
381386

382387
// This should never occur. It indicates the counter relies on a block without any collectible events.
383388
assert(group_limit > 0);

0 commit comments

Comments
 (0)