Skip to content

Commit c3e4a25

Browse files
committed
drm/v3d: Set job pointer to NULL when the job's fence has an error
Similar to commit e4b5ccd ("drm/v3d: Ensure job pointer is set to NULL after job completion"), ensure the job pointer is set to `NULL` when a job's fence has an error. Failing to do so can trigger kernel warnings in specific scenarios, such as: 1. v3d_csd_job_run() assigns `v3d->csd_job = job` 2. CSD job exceeds hang limit, causing a timeout → v3d_gpu_reset_for_timeout() 3. GPU reset 4. drm_sched_resubmit_jobs() sets the job's fence to `-ECANCELED`. 5. v3d_csd_job_run() detects the fence error and returns NULL, not submitting the job to the GPU 6. User-space runs `modprobe -r v3d` 7. v3d_gem_destroy() v3d_gem_destroy() triggers a warning indicating that the CSD job never ended, as we didn't set `v3d->csd_job` to NULL after the timeout. The same can also happen to BIN, RENDER, and TFU jobs. Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Signed-off-by: Maíra Canal <mcanal@igalia.com> Link: https://patchwork.freedesktop.org/patch/msgid/20250313-v3d-gpu-reset-fixes-v4-2-c1e780d8e096@igalia.com
1 parent 80cbee8 commit c3e4a25

File tree

1 file changed

+14
-4
lines changed

1 file changed

+14
-4
lines changed

drivers/gpu/drm/v3d/v3d_sched.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,12 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
226226
struct dma_fence *fence;
227227
unsigned long irqflags;
228228

229-
if (unlikely(job->base.base.s_fence->finished.error))
229+
if (unlikely(job->base.base.s_fence->finished.error)) {
230+
spin_lock_irqsave(&v3d->job_lock, irqflags);
231+
v3d->bin_job = NULL;
232+
spin_unlock_irqrestore(&v3d->job_lock, irqflags);
230233
return NULL;
234+
}
231235

232236
/* Lock required around bin_job update vs
233237
* v3d_overflow_mem_work().
@@ -281,8 +285,10 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
281285
struct drm_device *dev = &v3d->drm;
282286
struct dma_fence *fence;
283287

284-
if (unlikely(job->base.base.s_fence->finished.error))
288+
if (unlikely(job->base.base.s_fence->finished.error)) {
289+
v3d->render_job = NULL;
285290
return NULL;
291+
}
286292

287293
v3d->render_job = job;
288294

@@ -327,8 +333,10 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
327333
struct drm_device *dev = &v3d->drm;
328334
struct dma_fence *fence;
329335

330-
if (unlikely(job->base.base.s_fence->finished.error))
336+
if (unlikely(job->base.base.s_fence->finished.error)) {
337+
v3d->tfu_job = NULL;
331338
return NULL;
339+
}
332340

333341
v3d->tfu_job = job;
334342

@@ -373,8 +381,10 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
373381
struct dma_fence *fence;
374382
int i, csd_cfg0_reg;
375383

376-
if (unlikely(job->base.base.s_fence->finished.error))
384+
if (unlikely(job->base.base.s_fence->finished.error)) {
385+
v3d->csd_job = NULL;
377386
return NULL;
387+
}
378388

379389
v3d->csd_job = job;
380390

0 commit comments

Comments
 (0)