Skip to content

Commit c605acb

Browse files
committed
drm/xe/guc_pc: Retry and wait longer for GuC PC start
In a rare situation of thermal limit during resume, GuC can be slow and run into delays like this: xe 0000:00:02.0: [drm] GT1: excessive init time: 667ms! \ [status = 0x8002F034, timeouts = 0] xe 0000:00:02.0: [drm] GT1: excessive init time: \ [freq = 100MHz (req = 800MHz), before = 100MHz, \ perf_limit_reasons = 0x1C001000] xe 0000:00:02.0: [drm] *ERROR* GT1: GuC PC Start failed ------------[ cut here ]------------ xe 0000:00:02.0: [drm] GT1: Failed to start GuC PC: -EIO When this happens, it will block entirely the GPU to be used. So, let's try and with a huge timeout in the hope it comes back. Also, let's collect some information on how long it is usually taking on situations like this, so perhaps the time can be tuned later. Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> Cc: Jonathan Cavitt <jonathan.cavitt@intel.com> Cc: John Harrison <John.C.Harrison@Intel.com> Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20250307160307.1093391-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> (cherry picked from commit b4b05e5) Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
1 parent 3e331a6 commit c605acb

File tree

1 file changed

+40
-13
lines changed

1 file changed

+40
-13
lines changed

drivers/gpu/drm/xe/xe_guc_pc.c

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "xe_guc_pc.h"
77

88
#include <linux/delay.h>
9+
#include <linux/ktime.h>
910

1011
#include <drm/drm_managed.h>
1112
#include <generated/xe_wa_oob.h>
@@ -19,6 +20,7 @@
1920
#include "xe_gt.h"
2021
#include "xe_gt_idle.h"
2122
#include "xe_gt_printk.h"
23+
#include "xe_gt_throttle.h"
2224
#include "xe_gt_types.h"
2325
#include "xe_guc.h"
2426
#include "xe_guc_ct.h"
@@ -49,6 +51,9 @@
4951
#define LNL_MERT_FREQ_CAP 800
5052
#define BMG_MERT_FREQ_CAP 2133
5153

54+
#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */
55+
#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */
56+
5257
/**
5358
* DOC: GuC Power Conservation (PC)
5459
*
@@ -113,9 +118,10 @@ static struct iosys_map *pc_to_maps(struct xe_guc_pc *pc)
113118
FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count))
114119

115120
static int wait_for_pc_state(struct xe_guc_pc *pc,
116-
enum slpc_global_state state)
121+
enum slpc_global_state state,
122+
int timeout_ms)
117123
{
118-
int timeout_us = 5000; /* rought 5ms, but no need for precision */
124+
int timeout_us = 1000 * timeout_ms;
119125
int slept, wait = 10;
120126

121127
xe_device_assert_mem_access(pc_to_xe(pc));
@@ -164,7 +170,8 @@ static int pc_action_query_task_state(struct xe_guc_pc *pc)
164170
};
165171
int ret;
166172

167-
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
173+
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
174+
SLPC_RESET_TIMEOUT_MS))
168175
return -EAGAIN;
169176

170177
/* Blocking here to ensure the results are ready before reading them */
@@ -187,7 +194,8 @@ static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value)
187194
};
188195
int ret;
189196

190-
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
197+
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
198+
SLPC_RESET_TIMEOUT_MS))
191199
return -EAGAIN;
192200

193201
ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
@@ -208,7 +216,8 @@ static int pc_action_unset_param(struct xe_guc_pc *pc, u8 id)
208216
struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
209217
int ret;
210218

211-
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
219+
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
220+
SLPC_RESET_TIMEOUT_MS))
212221
return -EAGAIN;
213222

214223
ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
@@ -440,6 +449,15 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
440449
return freq;
441450
}
442451

452+
static u32 get_cur_freq(struct xe_gt *gt)
453+
{
454+
u32 freq;
455+
456+
freq = xe_mmio_read32(&gt->mmio, RPNSWREQ);
457+
freq = REG_FIELD_GET(REQ_RATIO_MASK, freq);
458+
return decode_freq(freq);
459+
}
460+
443461
/**
444462
* xe_guc_pc_get_cur_freq - Get Current requested frequency
445463
* @pc: The GuC PC
@@ -463,10 +481,7 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
463481
return -ETIMEDOUT;
464482
}
465483

466-
*freq = xe_mmio_read32(&gt->mmio, RPNSWREQ);
467-
468-
*freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq);
469-
*freq = decode_freq(*freq);
484+
*freq = get_cur_freq(gt);
470485

471486
xe_force_wake_put(gt_to_fw(gt), fw_ref);
472487
return 0;
@@ -1002,6 +1017,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
10021017
struct xe_gt *gt = pc_to_gt(pc);
10031018
u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
10041019
unsigned int fw_ref;
1020+
ktime_t earlier;
10051021
int ret;
10061022

10071023
xe_gt_assert(gt, xe_device_uc_enabled(xe));
@@ -1026,14 +1042,25 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
10261042
memset(pc->bo->vmap.vaddr, 0, size);
10271043
slpc_shared_data_write(pc, header.size, size);
10281044

1045+
earlier = ktime_get();
10291046
ret = pc_action_reset(pc);
10301047
if (ret)
10311048
goto out;
10321049

1033-
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) {
1034-
xe_gt_err(gt, "GuC PC Start failed\n");
1035-
ret = -EIO;
1036-
goto out;
1050+
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
1051+
SLPC_RESET_TIMEOUT_MS)) {
1052+
xe_gt_warn(gt, "GuC PC start taking longer than normal [freq = %dMHz (req = %dMHz), perf_limit_reasons = 0x%08X]\n",
1053+
xe_guc_pc_get_act_freq(pc), get_cur_freq(gt),
1054+
xe_gt_throttle_get_limit_reasons(gt));
1055+
1056+
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
1057+
SLPC_RESET_EXTENDED_TIMEOUT_MS)) {
1058+
xe_gt_err(gt, "GuC PC Start failed: Dynamic GT frequency control and GT sleep states are now disabled.\n");
1059+
goto out;
1060+
}
1061+
1062+
xe_gt_warn(gt, "GuC PC excessive start time: %lldms",
1063+
ktime_ms_delta(ktime_get(), earlier));
10371064
}
10381065

10391066
ret = pc_init_freqs(pc);

0 commit comments

Comments
 (0)