Skip to content

Commit 41a2d82

Browse files
committed
accel/ivpu: Fix error handling in recovery/reset
Disable runtime PM for the duration of reset/recovery so it is possible to set the correct runtime PM state depending on the outcome of the `ivpu_resume()`. Don’t suspend or reset the HW if the NPU is suspended when the reset/recovery is requested. Also, move common reset/recovery code to separate functions for better code readability. Fixes: 27d1926 ("accel/ivpu: Improve recovery and reset support") Cc: stable@vger.kernel.org # v6.8+ Reviewed-by: Maciej Falkowski <maciej.falkowski@linux.intel.com> Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20250129124009.1039982-4-jacek.lawrynowicz@linux.intel.com
1 parent f2bc2af commit 41a2d82

File tree

1 file changed

+43
-36
lines changed

1 file changed

+43
-36
lines changed

drivers/accel/ivpu/ivpu_pm.c

Lines changed: 43 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -115,41 +115,57 @@ static int ivpu_resume(struct ivpu_device *vdev)
115115
return ret;
116116
}
117117

118-
static void ivpu_pm_recovery_work(struct work_struct *work)
118+
static void ivpu_pm_reset_begin(struct ivpu_device *vdev)
119119
{
120-
struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work);
121-
struct ivpu_device *vdev = pm->vdev;
122-
char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
123-
int ret;
124-
125-
ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter));
126-
127-
ret = pm_runtime_resume_and_get(vdev->drm.dev);
128-
if (ret)
129-
ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
130-
131-
ivpu_jsm_state_dump(vdev);
132-
ivpu_dev_coredump(vdev);
120+
pm_runtime_disable(vdev->drm.dev);
133121

134122
atomic_inc(&vdev->pm->reset_counter);
135123
atomic_set(&vdev->pm->reset_pending, 1);
136124
down_write(&vdev->pm->reset_lock);
125+
}
126+
127+
static void ivpu_pm_reset_complete(struct ivpu_device *vdev)
128+
{
129+
int ret;
137130

138-
ivpu_suspend(vdev);
139131
ivpu_pm_prepare_cold_boot(vdev);
140132
ivpu_jobs_abort_all(vdev);
141133
ivpu_ms_cleanup_all(vdev);
142134

143135
ret = ivpu_resume(vdev);
144-
if (ret)
136+
if (ret) {
145137
ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
138+
pm_runtime_set_suspended(vdev->drm.dev);
139+
} else {
140+
pm_runtime_set_active(vdev->drm.dev);
141+
}
146142

147143
up_write(&vdev->pm->reset_lock);
148144
atomic_set(&vdev->pm->reset_pending, 0);
149145

150-
kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
151146
pm_runtime_mark_last_busy(vdev->drm.dev);
152-
pm_runtime_put_autosuspend(vdev->drm.dev);
147+
pm_runtime_enable(vdev->drm.dev);
148+
}
149+
150+
static void ivpu_pm_recovery_work(struct work_struct *work)
151+
{
152+
struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work);
153+
struct ivpu_device *vdev = pm->vdev;
154+
char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
155+
156+
ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter));
157+
158+
ivpu_pm_reset_begin(vdev);
159+
160+
if (!pm_runtime_status_suspended(vdev->drm.dev)) {
161+
ivpu_jsm_state_dump(vdev);
162+
ivpu_dev_coredump(vdev);
163+
ivpu_suspend(vdev);
164+
}
165+
166+
ivpu_pm_reset_complete(vdev);
167+
168+
kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
153169
}
154170

155171
void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason)
@@ -328,35 +344,26 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev)
328344
struct ivpu_device *vdev = pci_get_drvdata(pdev);
329345

330346
ivpu_dbg(vdev, PM, "Pre-reset..\n");
331-
atomic_inc(&vdev->pm->reset_counter);
332-
atomic_set(&vdev->pm->reset_pending, 1);
333347

334-
pm_runtime_get_sync(vdev->drm.dev);
335-
down_write(&vdev->pm->reset_lock);
336-
ivpu_prepare_for_reset(vdev);
337-
ivpu_hw_reset(vdev);
338-
ivpu_pm_prepare_cold_boot(vdev);
339-
ivpu_jobs_abort_all(vdev);
340-
ivpu_ms_cleanup_all(vdev);
348+
ivpu_pm_reset_begin(vdev);
349+
350+
if (!pm_runtime_status_suspended(vdev->drm.dev)) {
351+
ivpu_prepare_for_reset(vdev);
352+
ivpu_hw_reset(vdev);
353+
}
341354

342355
ivpu_dbg(vdev, PM, "Pre-reset done.\n");
343356
}
344357

345358
void ivpu_pm_reset_done_cb(struct pci_dev *pdev)
346359
{
347360
struct ivpu_device *vdev = pci_get_drvdata(pdev);
348-
int ret;
349361

350362
ivpu_dbg(vdev, PM, "Post-reset..\n");
351-
ret = ivpu_resume(vdev);
352-
if (ret)
353-
ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
354-
up_write(&vdev->pm->reset_lock);
355-
atomic_set(&vdev->pm->reset_pending, 0);
356-
ivpu_dbg(vdev, PM, "Post-reset done.\n");
357363

358-
pm_runtime_mark_last_busy(vdev->drm.dev);
359-
pm_runtime_put_autosuspend(vdev->drm.dev);
364+
ivpu_pm_reset_complete(vdev);
365+
366+
ivpu_dbg(vdev, PM, "Post-reset done.\n");
360367
}
361368

362369
void ivpu_pm_init(struct ivpu_device *vdev)

0 commit comments

Comments
 (0)