Skip to content

Commit 915bac6

Browse files
committed
Merge tag 'drm-xe-fixes-2024-12-04' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
Driver Changes: - Missing init value and 64-bit write-order check (Zhanjung) - Fix a memory allocation issue causing lockdep violation (John) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Thomas Hellstrom <thomas.hellstrom@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/Z1BidZBFQOLjz__J@fedora
2 parents defc06f + 5dce85f commit 915bac6

File tree

2 files changed

+101
-49
lines changed

2 files changed

+101
-49
lines changed

drivers/gpu/drm/xe/xe_devcoredump.c

Lines changed: 39 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -155,36 +155,6 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
155155
ss->vm = NULL;
156156
}
157157

158-
static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
159-
{
160-
struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
161-
struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot);
162-
struct xe_device *xe = coredump_to_xe(coredump);
163-
unsigned int fw_ref;
164-
165-
xe_pm_runtime_get(xe);
166-
167-
/* keep going if fw fails as we still want to save the memory and SW data */
168-
fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
169-
if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
170-
xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
171-
xe_vm_snapshot_capture_delayed(ss->vm);
172-
xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
173-
xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
174-
175-
xe_pm_runtime_put(xe);
176-
177-
/* Calculate devcoredump size */
178-
ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
179-
180-
ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
181-
if (!ss->read.buffer)
182-
return;
183-
184-
__xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump);
185-
xe_devcoredump_snapshot_free(ss);
186-
}
187-
188158
static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
189159
size_t count, void *data, size_t datalen)
190160
{
@@ -234,6 +204,45 @@ static void xe_devcoredump_free(void *data)
234204
"Xe device coredump has been deleted.\n");
235205
}
236206

207+
static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
208+
{
209+
struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
210+
struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot);
211+
struct xe_device *xe = coredump_to_xe(coredump);
212+
unsigned int fw_ref;
213+
214+
/*
215+
* NB: Despite passing a GFP_ flags parameter here, more allocations are done
216+
* internally using GFP_KERNEL expliictly. Hence this call must be in the worker
217+
* thread and not in the initial capture call.
218+
*/
219+
dev_coredumpm_timeout(gt_to_xe(ss->gt)->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
220+
xe_devcoredump_read, xe_devcoredump_free,
221+
XE_COREDUMP_TIMEOUT_JIFFIES);
222+
223+
xe_pm_runtime_get(xe);
224+
225+
/* keep going if fw fails as we still want to save the memory and SW data */
226+
fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
227+
if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
228+
xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
229+
xe_vm_snapshot_capture_delayed(ss->vm);
230+
xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
231+
xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
232+
233+
xe_pm_runtime_put(xe);
234+
235+
/* Calculate devcoredump size */
236+
ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
237+
238+
ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
239+
if (!ss->read.buffer)
240+
return;
241+
242+
__xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump);
243+
xe_devcoredump_snapshot_free(ss);
244+
}
245+
237246
static void devcoredump_snapshot(struct xe_devcoredump *coredump,
238247
struct xe_sched_job *job)
239248
{
@@ -310,10 +319,6 @@ void xe_devcoredump(struct xe_sched_job *job)
310319
drm_info(&xe->drm, "Xe device coredump has been created\n");
311320
drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
312321
xe->drm.primary->index);
313-
314-
dev_coredumpm_timeout(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
315-
xe_devcoredump_read, xe_devcoredump_free,
316-
XE_COREDUMP_TIMEOUT_JIFFIES);
317322
}
318323

319324
static void xe_driver_devcoredump_fini(void *arg)

drivers/gpu/drm/xe/xe_guc_capture.c

Lines changed: 62 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ struct __guc_capture_parsed_output {
102102
* A 64 bit register define requires 2 consecutive entries,
103103
* with low dword first and hi dword the second.
104104
* 2. Register name: null for incompleted define
105+
* 3. Incorrect order will trigger XE_WARN.
105106
*/
106107
#define COMMON_XELP_BASE_GLOBAL \
107108
{ FORCEWAKE_GT, REG_32BIT, 0, 0, "FORCEWAKE_GT"}
@@ -1675,10 +1676,10 @@ snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_
16751676
struct xe_devcoredump *devcoredump = &xe->devcoredump;
16761677
struct xe_devcoredump_snapshot *devcore_snapshot = &devcoredump->snapshot;
16771678
struct gcap_reg_list_info *reginfo = NULL;
1678-
u32 last_value, i;
1679-
bool is_ext;
1679+
u32 i, last_value = 0;
1680+
bool is_ext, low32_ready = false;
16801681

1681-
if (!list || list->num_regs == 0)
1682+
if (!list || !list->list || list->num_regs == 0)
16821683
return;
16831684
XE_WARN_ON(!devcore_snapshot->matched_node);
16841685

@@ -1701,29 +1702,75 @@ snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_
17011702
continue;
17021703

17031704
value = reg->value;
1704-
if (reg_desc->data_type == REG_64BIT_LOW_DW) {
1705+
switch (reg_desc->data_type) {
1706+
case REG_64BIT_LOW_DW:
17051707
last_value = value;
1708+
1709+
/*
1710+
* A 64 bit register define requires 2 consecutive
1711+
* entries in register list, with low dword first
1712+
* and hi dword the second, like:
1713+
* { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1714+
* { XXX_REG_HI(0), REG_64BIT_HI_DW, 0, 0, "XXX_REG"},
1715+
*
1716+
* Incorrect order will trigger XE_WARN.
1717+
*
1718+
* Possible double low here, for example:
1719+
* { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1720+
* { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1721+
*/
1722+
XE_WARN_ON(low32_ready);
1723+
low32_ready = true;
17061724
/* Low 32 bit dword saved, continue for high 32 bit */
1707-
continue;
1708-
} else if (reg_desc->data_type == REG_64BIT_HI_DW) {
1725+
break;
1726+
1727+
case REG_64BIT_HI_DW: {
17091728
u64 value_qw = ((u64)value << 32) | last_value;
17101729

1730+
/*
1731+
* Incorrect 64bit register order. Possible missing low.
1732+
* for example:
1733+
* { XXX_REG(0), REG_32BIT, 0, 0, NULL},
1734+
* { XXX_REG_HI(0), REG_64BIT_HI_DW, 0, 0, NULL},
1735+
*/
1736+
XE_WARN_ON(!low32_ready);
1737+
low32_ready = false;
1738+
17111739
drm_printf(p, "\t%s: 0x%016llx\n", reg_desc->regname, value_qw);
1712-
continue;
1740+
break;
17131741
}
17141742

1715-
if (is_ext) {
1716-
int dss, group, instance;
1743+
case REG_32BIT:
1744+
/*
1745+
* Incorrect 64bit register order. Possible missing high.
1746+
* for example:
1747+
* { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1748+
* { XXX_REG(0), REG_32BIT, 0, 0, "XXX_REG"},
1749+
*/
1750+
XE_WARN_ON(low32_ready);
1751+
1752+
if (is_ext) {
1753+
int dss, group, instance;
17171754

1718-
group = FIELD_GET(GUC_REGSET_STEERING_GROUP, reg_desc->flags);
1719-
instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, reg_desc->flags);
1720-
dss = xe_gt_mcr_steering_info_to_dss_id(gt, group, instance);
1755+
group = FIELD_GET(GUC_REGSET_STEERING_GROUP, reg_desc->flags);
1756+
instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, reg_desc->flags);
1757+
dss = xe_gt_mcr_steering_info_to_dss_id(gt, group, instance);
17211758

1722-
drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, dss, value);
1723-
} else {
1724-
drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value);
1759+
drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, dss, value);
1760+
} else {
1761+
drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value);
1762+
}
1763+
break;
17251764
}
17261765
}
1766+
1767+
/*
1768+
* Incorrect 64bit register order. Possible missing high.
1769+
* for example:
1770+
* { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL},
1771+
* } // <- Register list end
1772+
*/
1773+
XE_WARN_ON(low32_ready);
17271774
}
17281775

17291776
/**

0 commit comments

Comments
 (0)