Skip to content

Commit cf9cb02

Browse files
committed
drm/i915: Use internal class when counting engine resets
Commit 5035794 ("drm/i915/gsc: Mark internal GSC engine with reserved uabi class") made the GSC0 engine not have a valid uabi class and so broke the engine reset counting, which in turn was made class based in cb823ed ("drm/i915/gt: Use intel_gt as the primary object for handling resets"). Despite the title and commit text of the latter is not mentioning it (and has left the storage array incorrectly sized), tracking by class, despite it adding aliasing in hypthotetical multi-tile systems, is handy for virtual engines which for instance do not have a valid engine->id. Therefore we keep that but just change it to use the internal class which is always valid. We also add a helper to increment the count, which aligns with the existing getter. What was broken without this fix were out of bounds reads every time a reset would happen on the GSC0 engine, or during selftests when storing and cross-checking the counts in igt_live_test_begin and igt_live_test_end. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Fixes: dfed6b5 ("drm/i915/gsc: Mark internal GSC engine with reserved uabi class") [tursulin: fixed Fixes tag] Reported-by: Alan Previn Teres Alexis <alan.previn.teres.alexis@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20231201122109.729006-2-tvrtko.ursulin@linux.intel.com
1 parent 0647ece commit cf9cb02

File tree

3 files changed

+14
-5
lines changed

3 files changed

+14
-5
lines changed

drivers/gpu/drm/i915/gt/intel_reset.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1293,7 +1293,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
12931293
if (msg)
12941294
drm_notice(&engine->i915->drm,
12951295
"Resetting %s for %s\n", engine->name, msg);
1296-
atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
1296+
i915_increase_reset_engine_count(&engine->i915->gpu_error, engine);
12971297

12981298
ret = intel_gt_reset_engine(engine);
12991299
if (ret) {

drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5003,7 +5003,8 @@ static void capture_error_state(struct intel_guc *guc,
50035003
if (match) {
50045004
intel_engine_set_hung_context(e, ce);
50055005
engine_mask |= e->mask;
5006-
atomic_inc(&i915->gpu_error.reset_engine_count[e->uabi_class]);
5006+
i915_increase_reset_engine_count(&i915->gpu_error,
5007+
e);
50075008
}
50085009
}
50095010

@@ -5015,7 +5016,7 @@ static void capture_error_state(struct intel_guc *guc,
50155016
} else {
50165017
intel_engine_set_hung_context(ce->engine, ce);
50175018
engine_mask = ce->engine->mask;
5018-
atomic_inc(&i915->gpu_error.reset_engine_count[ce->engine->uabi_class]);
5019+
i915_increase_reset_engine_count(&i915->gpu_error, ce->engine);
50195020
}
50205021

50215022
with_intel_runtime_pm(&i915->runtime_pm, wakeref)

drivers/gpu/drm/i915/i915_gpu_error.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include "display/intel_display_device.h"
1818
#include "gt/intel_engine.h"
19+
#include "gt/intel_engine_types.h"
1920
#include "gt/intel_gt_types.h"
2021
#include "gt/uc/intel_uc_fw.h"
2122

@@ -232,7 +233,7 @@ struct i915_gpu_error {
232233
atomic_t reset_count;
233234

234235
/** Number of times an engine has been reset */
235-
atomic_t reset_engine_count[I915_NUM_ENGINES];
236+
atomic_t reset_engine_count[MAX_ENGINE_CLASS];
236237
};
237238

238239
struct drm_i915_error_state_buf {
@@ -255,7 +256,14 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
255256
static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
256257
const struct intel_engine_cs *engine)
257258
{
258-
return atomic_read(&error->reset_engine_count[engine->uabi_class]);
259+
return atomic_read(&error->reset_engine_count[engine->class]);
260+
}
261+
262+
static inline void
263+
i915_increase_reset_engine_count(struct i915_gpu_error *error,
264+
const struct intel_engine_cs *engine)
265+
{
266+
atomic_inc(&error->reset_engine_count[engine->class]);
259267
}
260268

261269
#define CORE_DUMP_FLAG_NONE 0x0

0 commit comments

Comments
 (0)