Skip to content

Commit 5dce85f

Browse files
johnharr-intelThomas Hellström
authored andcommitted
drm/xe: Move the coredump registration to the worker thread
Adding lockdep checking to the coredump code showed that there was an existing violation. The dev_coredumpm_timeout() call is used to register the dump with the base coredump subsystem. However, that makes multiple memory allocations, only some of which use the GFP_ flags passed in. So that also needs to be deferred to the worker function where it is safe to allocate with arbitrary flags. In order to not add protoypes for the callback functions, moving the _timeout call also means moving the worker thread function to later in the file. v2: Rebased after other changes to the worker function. Fixes: e799485 ("drm/xe: Introduce the dev_coredump infrastructure.") Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Matthew Brost <matthew.brost@intel.com> Cc: Jani Nikula <jani.nikula@linux.intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Francois Dugast <francois.dugast@intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Lucas De Marchi <lucas.demarchi@intel.com> Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com> Cc: Sumit Semwal <sumit.semwal@linaro.org> Cc: "Christian König" <christian.koenig@amd.com> Cc: intel-xe@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Cc: <stable@vger.kernel.org> # v6.8+ Signed-off-by: John Harrison <John.C.Harrison@Intel.com> Reviewed-by: Matthew Brost <matthew.brost@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20241128210824.3302147-3-John.C.Harrison@Intel.com (cherry picked from commit 90f51a7) Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
1 parent 4495816 commit 5dce85f

File tree

1 file changed

+39
-34
lines changed

1 file changed

+39
-34
lines changed

drivers/gpu/drm/xe/xe_devcoredump.c

Lines changed: 39 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -155,36 +155,6 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
155155
ss->vm = NULL;
156156
}
157157

158-
static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
159-
{
160-
struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
161-
struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot);
162-
struct xe_device *xe = coredump_to_xe(coredump);
163-
unsigned int fw_ref;
164-
165-
xe_pm_runtime_get(xe);
166-
167-
/* keep going if fw fails as we still want to save the memory and SW data */
168-
fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
169-
if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
170-
xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
171-
xe_vm_snapshot_capture_delayed(ss->vm);
172-
xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
173-
xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
174-
175-
xe_pm_runtime_put(xe);
176-
177-
/* Calculate devcoredump size */
178-
ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
179-
180-
ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
181-
if (!ss->read.buffer)
182-
return;
183-
184-
__xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump);
185-
xe_devcoredump_snapshot_free(ss);
186-
}
187-
188158
static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
189159
size_t count, void *data, size_t datalen)
190160
{
@@ -234,6 +204,45 @@ static void xe_devcoredump_free(void *data)
234204
"Xe device coredump has been deleted.\n");
235205
}
236206

207+
static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
208+
{
209+
struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
210+
struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot);
211+
struct xe_device *xe = coredump_to_xe(coredump);
212+
unsigned int fw_ref;
213+
214+
/*
215+
* NB: Despite passing a GFP_ flags parameter here, more allocations are done
216+
* internally using GFP_KERNEL expliictly. Hence this call must be in the worker
217+
* thread and not in the initial capture call.
218+
*/
219+
dev_coredumpm_timeout(gt_to_xe(ss->gt)->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
220+
xe_devcoredump_read, xe_devcoredump_free,
221+
XE_COREDUMP_TIMEOUT_JIFFIES);
222+
223+
xe_pm_runtime_get(xe);
224+
225+
/* keep going if fw fails as we still want to save the memory and SW data */
226+
fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
227+
if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
228+
xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
229+
xe_vm_snapshot_capture_delayed(ss->vm);
230+
xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
231+
xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
232+
233+
xe_pm_runtime_put(xe);
234+
235+
/* Calculate devcoredump size */
236+
ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
237+
238+
ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
239+
if (!ss->read.buffer)
240+
return;
241+
242+
__xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump);
243+
xe_devcoredump_snapshot_free(ss);
244+
}
245+
237246
static void devcoredump_snapshot(struct xe_devcoredump *coredump,
238247
struct xe_sched_job *job)
239248
{
@@ -310,10 +319,6 @@ void xe_devcoredump(struct xe_sched_job *job)
310319
drm_info(&xe->drm, "Xe device coredump has been created\n");
311320
drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
312321
xe->drm.primary->index);
313-
314-
dev_coredumpm_timeout(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
315-
xe_devcoredump_read, xe_devcoredump_free,
316-
XE_COREDUMP_TIMEOUT_JIFFIES);
317322
}
318323

319324
static void xe_driver_devcoredump_fini(void *arg)

0 commit comments

Comments
 (0)