Skip to content

Commit f99c7cc

Browse files
committed
Merge tag 'drm-xe-fixes-2024-10-31' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
Driver Changes: - Fix missing HPD interrupt enabling, bringing one PM refactor with it (Imre / Maarten) - Workaround LNL GGTT invalidation not being visible to GuC (Matthew Brost) - Avoid getting jobs stuck without a protecting timeout (Matthew Brost) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Lucas De Marchi <lucas.demarchi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/tsbftadm7owyizzdaqnqu7u4tqggxgeqeztlfvmj5fryxlfomi@5m5bfv2zvzmw
2 parents 4273607 + fe05cee commit f99c7cc

File tree

5 files changed

+72
-35
lines changed

5 files changed

+72
-35
lines changed

drivers/gpu/drm/xe/display/xe_display.c

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -309,18 +309,7 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe)
309309
}
310310

311311
/* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */
312-
void xe_display_pm_runtime_suspend(struct xe_device *xe)
313-
{
314-
if (!xe->info.probe_display)
315-
return;
316-
317-
if (xe->d3cold.allowed)
318-
xe_display_pm_suspend(xe, true);
319-
320-
intel_hpd_poll_enable(xe);
321-
}
322-
323-
void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
312+
static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime)
324313
{
325314
struct intel_display *display = &xe->display;
326315
bool s2idle = suspend_to_idle();
@@ -353,28 +342,38 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
353342
intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold);
354343

355344
intel_dmc_suspend(xe);
345+
346+
if (runtime && has_display(xe))
347+
intel_hpd_poll_enable(xe);
356348
}
357349

358-
void xe_display_pm_suspend_late(struct xe_device *xe)
350+
void xe_display_pm_suspend(struct xe_device *xe)
351+
{
352+
__xe_display_pm_suspend(xe, false);
353+
}
354+
355+
void xe_display_pm_runtime_suspend(struct xe_device *xe)
359356
{
360-
bool s2idle = suspend_to_idle();
361357
if (!xe->info.probe_display)
362358
return;
363359

364-
intel_power_domains_suspend(xe, s2idle);
360+
if (xe->d3cold.allowed) {
361+
__xe_display_pm_suspend(xe, true);
362+
return;
363+
}
365364

366-
intel_display_power_suspend_late(xe);
365+
intel_hpd_poll_enable(xe);
367366
}
368367

369-
void xe_display_pm_runtime_resume(struct xe_device *xe)
368+
void xe_display_pm_suspend_late(struct xe_device *xe)
370369
{
370+
bool s2idle = suspend_to_idle();
371371
if (!xe->info.probe_display)
372372
return;
373373

374-
intel_hpd_poll_disable(xe);
374+
intel_power_domains_suspend(xe, s2idle);
375375

376-
if (xe->d3cold.allowed)
377-
xe_display_pm_resume(xe, true);
376+
intel_display_power_suspend_late(xe);
378377
}
379378

380379
void xe_display_pm_resume_early(struct xe_device *xe)
@@ -387,7 +386,7 @@ void xe_display_pm_resume_early(struct xe_device *xe)
387386
intel_power_domains_resume(xe);
388387
}
389388

390-
void xe_display_pm_resume(struct xe_device *xe, bool runtime)
389+
static void __xe_display_pm_resume(struct xe_device *xe, bool runtime)
391390
{
392391
struct intel_display *display = &xe->display;
393392

@@ -411,16 +410,38 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime)
411410
intel_display_driver_resume(xe);
412411
drm_kms_helper_poll_enable(&xe->drm);
413412
intel_display_driver_enable_user_access(xe);
414-
intel_hpd_poll_disable(xe);
415413
}
416414

415+
if (has_display(xe))
416+
intel_hpd_poll_disable(xe);
417+
417418
intel_opregion_resume(display);
418419

419420
intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false);
420421

421422
intel_power_domains_enable(xe);
422423
}
423424

425+
void xe_display_pm_resume(struct xe_device *xe)
426+
{
427+
__xe_display_pm_resume(xe, false);
428+
}
429+
430+
void xe_display_pm_runtime_resume(struct xe_device *xe)
431+
{
432+
if (!xe->info.probe_display)
433+
return;
434+
435+
if (xe->d3cold.allowed) {
436+
__xe_display_pm_resume(xe, true);
437+
return;
438+
}
439+
440+
intel_hpd_init(xe);
441+
intel_hpd_poll_disable(xe);
442+
}
443+
444+
424445
static void display_device_remove(struct drm_device *dev, void *arg)
425446
{
426447
struct xe_device *xe = arg;

drivers/gpu/drm/xe/display/xe_display.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir);
3434
void xe_display_irq_reset(struct xe_device *xe);
3535
void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt);
3636

37-
void xe_display_pm_suspend(struct xe_device *xe, bool runtime);
37+
void xe_display_pm_suspend(struct xe_device *xe);
3838
void xe_display_pm_suspend_late(struct xe_device *xe);
3939
void xe_display_pm_resume_early(struct xe_device *xe);
40-
void xe_display_pm_resume(struct xe_device *xe, bool runtime);
40+
void xe_display_pm_resume(struct xe_device *xe);
4141
void xe_display_pm_runtime_suspend(struct xe_device *xe);
4242
void xe_display_pm_runtime_resume(struct xe_device *xe);
4343

@@ -65,10 +65,10 @@ static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir)
6565
static inline void xe_display_irq_reset(struct xe_device *xe) {}
6666
static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {}
6767

68-
static inline void xe_display_pm_suspend(struct xe_device *xe, bool runtime) {}
68+
static inline void xe_display_pm_suspend(struct xe_device *xe) {}
6969
static inline void xe_display_pm_suspend_late(struct xe_device *xe) {}
7070
static inline void xe_display_pm_resume_early(struct xe_device *xe) {}
71-
static inline void xe_display_pm_resume(struct xe_device *xe, bool runtime) {}
71+
static inline void xe_display_pm_resume(struct xe_device *xe) {}
7272
static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {}
7373
static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {}
7474

drivers/gpu/drm/xe/xe_ggtt.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,16 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
397397

398398
static void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
399399
{
400+
struct xe_device *xe = tile_to_xe(ggtt->tile);
401+
402+
/*
403+
* XXX: Barrier for GGTT pages. Unsure exactly why this required but
404+
* without this LNL is having issues with the GuC reading scratch page
405+
* vs. correct GGTT page. Not particularly a hot code path so blindly
406+
* do a mmio read here which results in GuC reading correct GGTT page.
407+
*/
408+
xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG);
409+
400410
/* Each GT in a tile has its own TLB to cache GGTT lookups */
401411
ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
402412
ggtt_invalidate_gt_tlb(ggtt->tile->media_gt);

drivers/gpu/drm/xe/xe_guc_submit.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -916,12 +916,22 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
916916
static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
917917
{
918918
struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q));
919-
u32 ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
920-
u32 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
919+
u32 ctx_timestamp, ctx_job_timestamp;
921920
u32 timeout_ms = q->sched_props.job_timeout_ms;
922921
u32 diff;
923922
u64 running_time_ms;
924923

924+
if (!xe_sched_job_started(job)) {
925+
xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started",
926+
xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
927+
q->guc->id);
928+
929+
return xe_sched_invalidate_job(job, 2);
930+
}
931+
932+
ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
933+
ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
934+
925935
/*
926936
* Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch
927937
* possible overflows with a high timeout.
@@ -1049,10 +1059,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
10491059
exec_queue_killed_or_banned_or_wedged(q) ||
10501060
exec_queue_destroyed(q);
10511061

1052-
/* Job hasn't started, can't be timed out */
1053-
if (!skip_timeout_check && !xe_sched_job_started(job))
1054-
goto rearm;
1055-
10561062
/*
10571063
* XXX: Sampling timeout doesn't work in wedged mode as we have to
10581064
* modify scheduling state to read timestamp. We could read the

drivers/gpu/drm/xe/xe_pm.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ int xe_pm_suspend(struct xe_device *xe)
123123
for_each_gt(gt, xe, id)
124124
xe_gt_suspend_prepare(gt);
125125

126-
xe_display_pm_suspend(xe, false);
126+
xe_display_pm_suspend(xe);
127127

128128
/* FIXME: Super racey... */
129129
err = xe_bo_evict_all(xe);
@@ -133,7 +133,7 @@ int xe_pm_suspend(struct xe_device *xe)
133133
for_each_gt(gt, xe, id) {
134134
err = xe_gt_suspend(gt);
135135
if (err) {
136-
xe_display_pm_resume(xe, false);
136+
xe_display_pm_resume(xe);
137137
goto err;
138138
}
139139
}
@@ -187,7 +187,7 @@ int xe_pm_resume(struct xe_device *xe)
187187
for_each_gt(gt, xe, id)
188188
xe_gt_resume(gt);
189189

190-
xe_display_pm_resume(xe, false);
190+
xe_display_pm_resume(xe);
191191

192192
err = xe_bo_restore_user(xe);
193193
if (err)

0 commit comments

Comments
 (0)