Skip to content

Commit 264b271

Browse files
committed
accel/ivpu: Improve stability of ivpu_submit_ioctl()
- Wake up the device as late as possible - Remove job reference counting in order to simplify the code - Don't put jobs that are not fully submitted on submitted_jobs_xa in order to avoid potential races with reset/recovery Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> Reviewed-by: Wachowski, Karol <karol.wachowski@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240122120945.1150728-3-jacek.lawrynowicz@linux.intel.com
1 parent f1cc6ac commit 264b271

File tree

2 files changed

+62
-78
lines changed

2 files changed

+62
-78
lines changed

drivers/accel/ivpu/ivpu_job.c

Lines changed: 62 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv)
125125
/*
126126
* Mark the doorbell as unregistered and reset job queue pointers.
127127
* This function needs to be called when the VPU hardware is restarted
128-
* and FW looses job queue state. The next time job queue is used it
128+
* and FW loses job queue state. The next time job queue is used it
129129
* will be registered again.
130130
*/
131131
static void ivpu_cmdq_reset_locked(struct ivpu_file_priv *file_priv, u16 engine)
@@ -239,60 +239,32 @@ static struct dma_fence *ivpu_fence_create(struct ivpu_device *vdev)
239239
return &fence->base;
240240
}
241241

242-
static void job_get(struct ivpu_job *job, struct ivpu_job **link)
242+
static void ivpu_job_destroy(struct ivpu_job *job)
243243
{
244244
struct ivpu_device *vdev = job->vdev;
245-
246-
kref_get(&job->ref);
247-
*link = job;
248-
249-
ivpu_dbg(vdev, KREF, "Job get: id %u refcount %u\n", job->job_id, kref_read(&job->ref));
250-
}
251-
252-
static void job_release(struct kref *ref)
253-
{
254-
struct ivpu_job *job = container_of(ref, struct ivpu_job, ref);
255-
struct ivpu_device *vdev = job->vdev;
256245
u32 i;
257246

247+
ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d engine %d",
248+
job->job_id, job->file_priv->ctx.id, job->engine_idx);
249+
258250
for (i = 0; i < job->bo_count; i++)
259251
if (job->bos[i])
260252
drm_gem_object_put(&job->bos[i]->base.base);
261253

262254
dma_fence_put(job->done_fence);
263255
ivpu_file_priv_put(&job->file_priv);
264-
265-
ivpu_dbg(vdev, KREF, "Job released: id %u\n", job->job_id);
266256
kfree(job);
267-
268-
/* Allow the VPU to get suspended, must be called after ivpu_file_priv_put() */
269-
ivpu_rpm_put(vdev);
270-
}
271-
272-
static void job_put(struct ivpu_job *job)
273-
{
274-
struct ivpu_device *vdev = job->vdev;
275-
276-
ivpu_dbg(vdev, KREF, "Job put: id %u refcount %u\n", job->job_id, kref_read(&job->ref));
277-
kref_put(&job->ref, job_release);
278257
}
279258

280259
static struct ivpu_job *
281-
ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count)
260+
ivpu_job_create(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count)
282261
{
283262
struct ivpu_device *vdev = file_priv->vdev;
284263
struct ivpu_job *job;
285-
int ret;
286-
287-
ret = ivpu_rpm_get(vdev);
288-
if (ret < 0)
289-
return NULL;
290264

291265
job = kzalloc(struct_size(job, bos, bo_count), GFP_KERNEL);
292266
if (!job)
293-
goto err_rpm_put;
294-
295-
kref_init(&job->ref);
267+
return NULL;
296268

297269
job->vdev = vdev;
298270
job->engine_idx = engine_idx;
@@ -306,17 +278,14 @@ ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count)
306278
job->file_priv = ivpu_file_priv_get(file_priv);
307279

308280
ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx);
309-
310281
return job;
311282

312283
err_free_job:
313284
kfree(job);
314-
err_rpm_put:
315-
ivpu_rpm_put(vdev);
316285
return NULL;
317286
}
318287

319-
static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status)
288+
static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status)
320289
{
321290
struct ivpu_job *job;
322291

@@ -333,9 +302,10 @@ static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status)
333302
ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n",
334303
job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status);
335304

305+
ivpu_job_destroy(job);
336306
ivpu_stop_job_timeout_detection(vdev);
337307

338-
job_put(job);
308+
ivpu_rpm_put(vdev);
339309
return 0;
340310
}
341311

@@ -345,64 +315,76 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev)
345315
unsigned long id;
346316

347317
xa_for_each(&vdev->submitted_jobs_xa, id, job)
348-
ivpu_job_done(vdev, id, VPU_JSM_STATUS_ABORTED);
318+
ivpu_job_signal_and_destroy(vdev, id, VPU_JSM_STATUS_ABORTED);
349319
}
350320

351-
static int ivpu_direct_job_submission(struct ivpu_job *job)
321+
static int ivpu_job_submit(struct ivpu_job *job)
352322
{
353323
struct ivpu_file_priv *file_priv = job->file_priv;
354324
struct ivpu_device *vdev = job->vdev;
355325
struct xa_limit job_id_range;
356326
struct ivpu_cmdq *cmdq;
357327
int ret;
358328

329+
ret = ivpu_rpm_get(vdev);
330+
if (ret < 0)
331+
return ret;
332+
359333
mutex_lock(&file_priv->lock);
360334

361335
cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx);
362336
if (!cmdq) {
363-
ivpu_warn(vdev, "Failed get job queue, ctx %d engine %d\n",
364-
file_priv->ctx.id, job->engine_idx);
337+
ivpu_warn_ratelimited(vdev, "Failed get job queue, ctx %d engine %d\n",
338+
file_priv->ctx.id, job->engine_idx);
365339
ret = -EINVAL;
366-
goto err_unlock;
340+
goto err_unlock_file_priv;
367341
}
368342

369343
job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1));
370344
job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK;
371345

372-
job_get(job, &job);
373-
ret = xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL);
346+
xa_lock(&vdev->submitted_jobs_xa);
347+
ret = __xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL);
374348
if (ret) {
375-
ivpu_warn_ratelimited(vdev, "Failed to allocate job id: %d\n", ret);
376-
goto err_job_put;
349+
ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n",
350+
file_priv->ctx.id);
351+
ret = -EBUSY;
352+
goto err_unlock_submitted_jobs_xa;
377353
}
378354

379355
ret = ivpu_cmdq_push_job(cmdq, job);
380356
if (ret)
381-
goto err_xa_erase;
357+
goto err_erase_xa;
382358

383359
ivpu_start_job_timeout_detection(vdev);
384360

385-
ivpu_dbg(vdev, JOB, "Job submitted: id %3u addr 0x%llx ctx %2d engine %d next %d\n",
386-
job->job_id, job->cmd_buf_vpu_addr, file_priv->ctx.id,
387-
job->engine_idx, cmdq->jobq->header.tail);
388-
389-
if (ivpu_test_mode & IVPU_TEST_MODE_NULL_HW) {
390-
ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
361+
if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) {
391362
cmdq->jobq->header.head = cmdq->jobq->header.tail;
392363
wmb(); /* Flush WC buffer for jobq header */
393364
} else {
394365
ivpu_cmdq_ring_db(vdev, cmdq);
395366
}
396367

368+
ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d addr 0x%llx next %d\n",
369+
job->job_id, file_priv->ctx.id, job->engine_idx,
370+
job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
371+
372+
xa_unlock(&vdev->submitted_jobs_xa);
373+
397374
mutex_unlock(&file_priv->lock);
375+
376+
if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW))
377+
ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
378+
398379
return 0;
399380

400-
err_xa_erase:
401-
xa_erase(&vdev->submitted_jobs_xa, job->job_id);
402-
err_job_put:
403-
job_put(job);
404-
err_unlock:
381+
err_erase_xa:
382+
__xa_erase(&vdev->submitted_jobs_xa, job->job_id);
383+
err_unlock_submitted_jobs_xa:
384+
xa_unlock(&vdev->submitted_jobs_xa);
385+
err_unlock_file_priv:
405386
mutex_unlock(&file_priv->lock);
387+
ivpu_rpm_put(vdev);
406388
return ret;
407389
}
408390

@@ -508,44 +490,47 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
508490
params->buffer_count * sizeof(u32));
509491
if (ret) {
510492
ret = -EFAULT;
511-
goto free_handles;
493+
goto err_free_handles;
512494
}
513495

514496
if (!drm_dev_enter(&vdev->drm, &idx)) {
515497
ret = -ENODEV;
516-
goto free_handles;
498+
goto err_free_handles;
517499
}
518500

519501
ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n",
520502
file_priv->ctx.id, params->buffer_count);
521503

522-
job = ivpu_create_job(file_priv, params->engine, params->buffer_count);
504+
job = ivpu_job_create(file_priv, params->engine, params->buffer_count);
523505
if (!job) {
524506
ivpu_err(vdev, "Failed to create job\n");
525507
ret = -ENOMEM;
526-
goto dev_exit;
508+
goto err_exit_dev;
527509
}
528510

529511
ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count,
530512
params->commands_offset);
531513
if (ret) {
532-
ivpu_err(vdev, "Failed to prepare job, ret %d\n", ret);
533-
goto job_put;
514+
ivpu_err(vdev, "Failed to prepare job: %d\n", ret);
515+
goto err_destroy_job;
534516
}
535517

536-
ret = ivpu_direct_job_submission(job);
537-
if (ret) {
538-
dma_fence_signal(job->done_fence);
539-
ivpu_err(vdev, "Failed to submit job to the HW, ret %d\n", ret);
540-
}
518+
ret = ivpu_job_submit(job);
519+
if (ret)
520+
goto err_signal_fence;
541521

542-
job_put:
543-
job_put(job);
544-
dev_exit:
545522
drm_dev_exit(idx);
546-
free_handles:
547523
kfree(buf_handles);
524+
return ret;
548525

526+
err_signal_fence:
527+
dma_fence_signal(job->done_fence);
528+
err_destroy_job:
529+
ivpu_job_destroy(job);
530+
err_exit_dev:
531+
drm_dev_exit(idx);
532+
err_free_handles:
533+
kfree(buf_handles);
549534
return ret;
550535
}
551536

@@ -567,7 +552,7 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
567552
}
568553

569554
payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload;
570-
ret = ivpu_job_done(vdev, payload->job_id, payload->job_status);
555+
ret = ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
571556
if (!ret && !xa_empty(&vdev->submitted_jobs_xa))
572557
ivpu_start_job_timeout_detection(vdev);
573558
}

drivers/accel/ivpu/ivpu_job.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ struct ivpu_cmdq {
4343
will update the job status
4444
*/
4545
struct ivpu_job {
46-
struct kref ref;
4746
struct ivpu_device *vdev;
4847
struct ivpu_file_priv *file_priv;
4948
struct dma_fence *done_fence;

0 commit comments

Comments
 (0)