Skip to content
This repository was archived by the owner on Nov 8, 2023. It is now read-only.

Commit b212b79

Browse files
jkrzyszt-intelrodrigovivi
authored andcommitted
drm/i915/hwmon: Fix locking inversion in sysfs getter
In i915 hwmon sysfs getter path we now take a hwmon_lock, then acquire an rpm wakeref. That results in lock inversion: <4> [197.079335] ====================================================== <4> [197.085473] WARNING: possible circular locking dependency detected <4> [197.091611] 6.8.0-rc7-Patchwork_129026v7-gc4dc92fb1152+ #1 Not tainted <4> [197.098096] ------------------------------------------------------ <4> [197.104231] prometheus-node/839 is trying to acquire lock: <4> [197.109680] ffffffff82764d80 (fs_reclaim){+.+.}-{0:0}, at: __kmalloc+0x9a/0x350 <4> [197.116939] but task is already holding lock: <4> [197.122730] ffff88811b772a40 (&hwmon->hwmon_lock){+.+.}-{3:3}, at: hwm_energy+0x4b/0x100 [i915] <4> [197.131543] which lock already depends on the new lock. ... <4> [197.507922] Chain exists of: fs_reclaim --> &gt->reset.mutex --> &hwmon->hwmon_lock <4> [197.518528] Possible unsafe locking scenario: <4> [197.524411] CPU0 CPU1 <4> [197.528916] ---- ---- <4> [197.533418] lock(&hwmon->hwmon_lock); <4> [197.537237] lock(&gt->reset.mutex); <4> [197.543376] lock(&hwmon->hwmon_lock); <4> [197.549682] lock(fs_reclaim); ... <4> [197.632548] Call Trace: <4> [197.634990] <TASK> <4> [197.637088] dump_stack_lvl+0x64/0xb0 <4> [197.640738] check_noncircular+0x15e/0x180 <4> [197.652968] check_prev_add+0xe9/0xce0 <4> [197.656705] __lock_acquire+0x179f/0x2300 <4> [197.660694] lock_acquire+0xd8/0x2d0 <4> [197.673009] fs_reclaim_acquire+0xa1/0xd0 <4> [197.680478] __kmalloc+0x9a/0x350 <4> [197.689063] acpi_ns_internalize_name.part.0+0x4a/0xb0 <4> [197.694170] acpi_ns_get_node_unlocked+0x60/0xf0 <4> [197.720608] acpi_ns_get_node+0x3b/0x60 <4> [197.724428] acpi_get_handle+0x57/0xb0 <4> [197.728164] acpi_has_method+0x20/0x50 <4> [197.731896] acpi_pci_set_power_state+0x43/0x120 <4> [197.736485] pci_power_up+0x24/0x1c0 <4> [197.740047] pci_pm_default_resume_early+0x9/0x30 <4> [197.744725] pci_pm_runtime_resume+0x2d/0x90 <4> [197.753911] __rpm_callback+0x3c/0x110 <4> [197.762586] rpm_callback+0x58/0x70 <4> [197.766064] rpm_resume+0x51e/0x730 <4> [197.769542] rpm_resume+0x267/0x730 <4> [197.773020] rpm_resume+0x267/0x730 <4> [197.776498] rpm_resume+0x267/0x730 <4> [197.779974] __pm_runtime_resume+0x49/0x90 <4> [197.784055] __intel_runtime_pm_get+0x19/0xa0 [i915] <4> [197.789070] hwm_energy+0x55/0x100 [i915] <4> [197.793183] hwm_read+0x9a/0x310 [i915] <4> [197.797124] hwmon_attr_show+0x36/0x120 <4> [197.800946] dev_attr_show+0x15/0x60 <4> [197.804509] sysfs_kf_seq_show+0xb5/0x100 Acquire the wakeref before the lock and hold it as long as the lock is also held. Follow that pattern across the whole source file where similar lock inversion can happen. v2: Keep hardware read under the lock so the whole operation of updating energy from hardware is still atomic (Guenter), - instead, acquire the rpm wakeref before the lock and hold it as long as the lock is held, - use the same aproach for other similar places across the i915_hwmon.c source file (Rodrigo). Fixes: 1b44019 ("drm/i915/guc: Disable PL1 power limit when loading GuC firmware") Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Guenter Roeck <linux@roeck-us.net> Cc: <stable@vger.kernel.org> # v6.5+ Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com> Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240311203500.518675-2-janusz.krzysztofik@linux.intel.com (cherry picked from commit 71b2187) Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
1 parent f127511 commit b212b79

File tree

1 file changed

+19
-18
lines changed

1 file changed

+19
-18
lines changed

drivers/gpu/drm/i915/i915_hwmon.c

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,13 @@ hwm_locked_with_pm_intel_uncore_rmw(struct hwm_drvdata *ddat,
7272
struct intel_uncore *uncore = ddat->uncore;
7373
intel_wakeref_t wakeref;
7474

75-
mutex_lock(&hwmon->hwmon_lock);
75+
with_intel_runtime_pm(uncore->rpm, wakeref) {
76+
mutex_lock(&hwmon->hwmon_lock);
7677

77-
with_intel_runtime_pm(uncore->rpm, wakeref)
7878
intel_uncore_rmw(uncore, reg, clear, set);
7979

80-
mutex_unlock(&hwmon->hwmon_lock);
80+
mutex_unlock(&hwmon->hwmon_lock);
81+
}
8182
}
8283

8384
/*
@@ -136,20 +137,21 @@ hwm_energy(struct hwm_drvdata *ddat, long *energy)
136137
else
137138
rgaddr = hwmon->rg.energy_status_all;
138139

139-
mutex_lock(&hwmon->hwmon_lock);
140+
with_intel_runtime_pm(uncore->rpm, wakeref) {
141+
mutex_lock(&hwmon->hwmon_lock);
140142

141-
with_intel_runtime_pm(uncore->rpm, wakeref)
142143
reg_val = intel_uncore_read(uncore, rgaddr);
143144

144-
if (reg_val >= ei->reg_val_prev)
145-
ei->accum_energy += reg_val - ei->reg_val_prev;
146-
else
147-
ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
148-
ei->reg_val_prev = reg_val;
145+
if (reg_val >= ei->reg_val_prev)
146+
ei->accum_energy += reg_val - ei->reg_val_prev;
147+
else
148+
ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
149+
ei->reg_val_prev = reg_val;
149150

150-
*energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
151-
hwmon->scl_shift_energy);
152-
mutex_unlock(&hwmon->hwmon_lock);
151+
*energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
152+
hwmon->scl_shift_energy);
153+
mutex_unlock(&hwmon->hwmon_lock);
154+
}
153155
}
154156

155157
static ssize_t
@@ -404,6 +406,7 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
404406

405407
/* Block waiting for GuC reset to complete when needed */
406408
for (;;) {
409+
wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
407410
mutex_lock(&hwmon->hwmon_lock);
408411

409412
prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE);
@@ -417,14 +420,13 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
417420
}
418421

419422
mutex_unlock(&hwmon->hwmon_lock);
423+
intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
420424

421425
schedule();
422426
}
423427
finish_wait(&ddat->waitq, &wait);
424428
if (ret)
425-
goto unlock;
426-
427-
wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
429+
goto exit;
428430

429431
/* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */
430432
if (val == PL1_DISABLE) {
@@ -444,9 +446,8 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
444446
intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit,
445447
PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval);
446448
exit:
447-
intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
448-
unlock:
449449
mutex_unlock(&hwmon->hwmon_lock);
450+
intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
450451
return ret;
451452
}
452453

0 commit comments

Comments
 (0)