Skip to content
This repository was archived by the owner on Nov 8, 2023. It is now read-only.

Commit 3e3eb55

Browse files
committed
Merge tag 'drm-misc-next-fixes-2024-05-16' of https://gitlab.freedesktop.org/drm/misc/kernel into drm-next
drm-misc-next-fixes for v6.10-rc1: - VM_BIND fix for nouveau. - Lots of panthor fixes: * Fixes for panthor's heap logical block. * Reset on unrecoverable fault * Fix VM references. * Reset fix. - xlnx compile and doc fixes. Signed-off-by: Dave Airlie <airlied@redhat.com> From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/54d2c8b9-8b04-45fc-b483-200ffac9d344@linux.intel.com
2 parents 5a5a10d + 959314c commit 3e3eb55

File tree

13 files changed

+123
-73
lines changed

13 files changed

+123
-73
lines changed

drivers/gpu/drm/nouveau/nouveau_abi16.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,9 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS)
272272
getparam->value = (u64)ttm_resource_manager_usage(vram_mgr);
273273
break;
274274
}
275+
case NOUVEAU_GETPARAM_HAS_VMA_TILEMODE:
276+
getparam->value = 1;
277+
break;
275278
default:
276279
NV_PRINTK(dbg, cli, "unknown parameter %lld\n", getparam->param);
277280
return -EINVAL;

drivers/gpu/drm/nouveau/nouveau_bo.c

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -241,28 +241,28 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain,
241241
}
242242

243243
nvbo->contig = !(tile_flags & NOUVEAU_GEM_TILE_NONCONTIG);
244-
if (!nouveau_cli_uvmm(cli) || internal) {
245-
/* for BO noVM allocs, don't assign kinds */
246-
if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) {
247-
nvbo->kind = (tile_flags & 0x0000ff00) >> 8;
248-
if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
249-
kfree(nvbo);
250-
return ERR_PTR(-EINVAL);
251-
}
252244

253-
nvbo->comp = mmu->kind[nvbo->kind] != nvbo->kind;
254-
} else if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
255-
nvbo->kind = (tile_flags & 0x00007f00) >> 8;
256-
nvbo->comp = (tile_flags & 0x00030000) >> 16;
257-
if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
258-
kfree(nvbo);
259-
return ERR_PTR(-EINVAL);
260-
}
261-
} else {
262-
nvbo->zeta = (tile_flags & 0x00000007);
245+
if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) {
246+
nvbo->kind = (tile_flags & 0x0000ff00) >> 8;
247+
if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
248+
kfree(nvbo);
249+
return ERR_PTR(-EINVAL);
250+
}
251+
252+
nvbo->comp = mmu->kind[nvbo->kind] != nvbo->kind;
253+
} else if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
254+
nvbo->kind = (tile_flags & 0x00007f00) >> 8;
255+
nvbo->comp = (tile_flags & 0x00030000) >> 16;
256+
if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
257+
kfree(nvbo);
258+
return ERR_PTR(-EINVAL);
263259
}
264-
nvbo->mode = tile_mode;
260+
} else {
261+
nvbo->zeta = (tile_flags & 0x00000007);
262+
}
263+
nvbo->mode = tile_mode;
265264

265+
if (!nouveau_cli_uvmm(cli) || internal) {
266266
/* Determine the desirable target GPU page size for the buffer. */
267267
for (i = 0; i < vmm->page_nr; i++) {
268268
/* Because we cannot currently allow VMM maps to fail
@@ -304,12 +304,6 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain,
304304
}
305305
nvbo->page = vmm->page[pi].shift;
306306
} else {
307-
/* reject other tile flags when in VM mode. */
308-
if (tile_mode)
309-
return ERR_PTR(-EINVAL);
310-
if (tile_flags & ~NOUVEAU_GEM_TILE_NONCONTIG)
311-
return ERR_PTR(-EINVAL);
312-
313307
/* Determine the desirable target GPU page size for the buffer. */
314308
for (i = 0; i < vmm->page_nr; i++) {
315309
/* Because we cannot currently allow VMM maps to fail

drivers/gpu/drm/panthor/panthor_device.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -129,13 +129,8 @@ static void panthor_device_reset_work(struct work_struct *work)
129129
panthor_gpu_l2_power_on(ptdev);
130130
panthor_mmu_post_reset(ptdev);
131131
ret = panthor_fw_post_reset(ptdev);
132-
if (ret)
133-
goto out_dev_exit;
134-
135132
atomic_set(&ptdev->reset.pending, 0);
136-
panthor_sched_post_reset(ptdev);
137-
138-
out_dev_exit:
133+
panthor_sched_post_reset(ptdev, ret != 0);
139134
drm_dev_exit(cookie);
140135

141136
if (ret) {
@@ -293,6 +288,7 @@ static const struct panthor_exception_info panthor_exception_infos[] = {
293288
PANTHOR_EXCEPTION(ACTIVE),
294289
PANTHOR_EXCEPTION(CS_RES_TERM),
295290
PANTHOR_EXCEPTION(CS_CONFIG_FAULT),
291+
PANTHOR_EXCEPTION(CS_UNRECOVERABLE),
296292
PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT),
297293
PANTHOR_EXCEPTION(CS_BUS_FAULT),
298294
PANTHOR_EXCEPTION(CS_INSTR_INVALID),

drivers/gpu/drm/panthor/panthor_device.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ enum drm_panthor_exception_type {
216216
DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f,
217217
DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f,
218218
DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40,
219+
DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE = 0x41,
219220
DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44,
220221
DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48,
221222
DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49,

drivers/gpu/drm/panthor/panthor_fw.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,7 @@ panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
453453

454454
ret = panthor_kernel_bo_vmap(mem);
455455
if (ret) {
456-
panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), mem);
456+
panthor_kernel_bo_destroy(mem);
457457
return ERR_PTR(ret);
458458
}
459459

@@ -1134,14 +1134,15 @@ void panthor_fw_unplug(struct panthor_device *ptdev)
11341134
panthor_fw_stop(ptdev);
11351135

11361136
list_for_each_entry(section, &ptdev->fw->sections, node)
1137-
panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), section->mem);
1137+
panthor_kernel_bo_destroy(section->mem);
11381138

11391139
/* We intentionally don't call panthor_vm_idle() and let
11401140
* panthor_mmu_unplug() release the AS we acquired with
11411141
* panthor_vm_active() so we don't have to track the VM active/idle
11421142
* state to keep the active_refcnt balanced.
11431143
*/
11441144
panthor_vm_put(ptdev->fw->vm);
1145+
ptdev->fw->vm = NULL;
11451146

11461147
panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
11471148
}

drivers/gpu/drm/panthor/panthor_gem.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,18 @@ static void panthor_gem_free_object(struct drm_gem_object *obj)
2626

2727
/**
2828
* panthor_kernel_bo_destroy() - Destroy a kernel buffer object
29-
* @vm: The VM this BO was mapped to.
3029
* @bo: Kernel buffer object to destroy. If NULL or an ERR_PTR(), the destruction
3130
* is skipped.
3231
*/
33-
void panthor_kernel_bo_destroy(struct panthor_vm *vm,
34-
struct panthor_kernel_bo *bo)
32+
void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo)
3533
{
34+
struct panthor_vm *vm;
3635
int ret;
3736

3837
if (IS_ERR_OR_NULL(bo))
3938
return;
4039

40+
vm = bo->vm;
4141
panthor_kernel_bo_vunmap(bo);
4242

4343
if (drm_WARN_ON(bo->obj->dev,
@@ -53,6 +53,7 @@ void panthor_kernel_bo_destroy(struct panthor_vm *vm,
5353
drm_gem_object_put(bo->obj);
5454

5555
out_free_bo:
56+
panthor_vm_put(vm);
5657
kfree(bo);
5758
}
5859

@@ -106,6 +107,7 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
106107
if (ret)
107108
goto err_free_va;
108109

110+
kbo->vm = panthor_vm_get(vm);
109111
bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm);
110112
drm_gem_object_get(bo->exclusive_vm_root_gem);
111113
bo->base.base.resv = bo->exclusive_vm_root_gem->resv;

drivers/gpu/drm/panthor/panthor_gem.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ struct panthor_kernel_bo {
6161
*/
6262
struct drm_gem_object *obj;
6363

64+
/**
65+
* @vm: VM this private buffer is attached to.
66+
*/
67+
struct panthor_vm *vm;
68+
6469
/**
6570
* @va_node: VA space allocated to this GEM.
6671
*/
@@ -136,7 +141,6 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
136141
size_t size, u32 bo_flags, u32 vm_map_flags,
137142
u64 gpu_va);
138143

139-
void panthor_kernel_bo_destroy(struct panthor_vm *vm,
140-
struct panthor_kernel_bo *bo);
144+
void panthor_kernel_bo_destroy(struct panthor_kernel_bo *bo);
141145

142146
#endif /* __PANTHOR_GEM_H__ */

drivers/gpu/drm/panthor/panthor_heap.c

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ static void panthor_free_heap_chunk(struct panthor_vm *vm,
127127
heap->chunk_count--;
128128
mutex_unlock(&heap->lock);
129129

130-
panthor_kernel_bo_destroy(vm, chunk->bo);
130+
panthor_kernel_bo_destroy(chunk->bo);
131131
kfree(chunk);
132132
}
133133

@@ -183,7 +183,7 @@ static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
183183
return 0;
184184

185185
err_destroy_bo:
186-
panthor_kernel_bo_destroy(vm, chunk->bo);
186+
panthor_kernel_bo_destroy(chunk->bo);
187187

188188
err_free_chunk:
189189
kfree(chunk);
@@ -253,8 +253,8 @@ int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
253253
* @pool: Pool to instantiate the heap context from.
254254
* @initial_chunk_count: Number of chunk allocated at initialization time.
255255
* Must be at least 1.
256-
* @chunk_size: The size of each chunk. Must be a power of two between 256k
257-
* and 2M.
256+
* @chunk_size: The size of each chunk. Must be page-aligned and lie in the
257+
* [128k:8M] range.
258258
* @max_chunks: Maximum number of chunks that can be allocated.
259259
* @target_in_flight: Maximum number of in-flight render passes.
260260
* @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
@@ -281,8 +281,11 @@ int panthor_heap_create(struct panthor_heap_pool *pool,
281281
if (initial_chunk_count == 0)
282282
return -EINVAL;
283283

284-
if (hweight32(chunk_size) != 1 ||
285-
chunk_size < SZ_256K || chunk_size > SZ_2M)
284+
if (initial_chunk_count > max_chunks)
285+
return -EINVAL;
286+
287+
if (!IS_ALIGNED(chunk_size, PAGE_SIZE) ||
288+
chunk_size < SZ_128K || chunk_size > SZ_8M)
286289
return -EINVAL;
287290

288291
down_read(&pool->lock);
@@ -320,7 +323,8 @@ int panthor_heap_create(struct panthor_heap_pool *pool,
320323
if (!pool->vm) {
321324
ret = -EINVAL;
322325
} else {
323-
ret = xa_alloc(&pool->xa, &id, heap, XA_LIMIT(1, MAX_HEAPS_PER_POOL), GFP_KERNEL);
326+
ret = xa_alloc(&pool->xa, &id, heap,
327+
XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL);
324328
if (!ret) {
325329
void *gpu_ctx = panthor_get_heap_ctx(pool, id);
326330

@@ -391,7 +395,7 @@ int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
391395
mutex_unlock(&heap->lock);
392396

393397
if (removed) {
394-
panthor_kernel_bo_destroy(pool->vm, chunk->bo);
398+
panthor_kernel_bo_destroy(chunk->bo);
395399
kfree(chunk);
396400
ret = 0;
397401
} else {
@@ -410,6 +414,13 @@ int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
410414
* @renderpasses_in_flight: Number of render passes currently in-flight.
411415
* @pending_frag_count: Number of fragment jobs waiting for execution/completion.
412416
* @new_chunk_gpu_va: Pointer used to return the chunk VA.
417+
*
418+
* Return:
419+
* - 0 if a new heap was allocated
420+
* - -ENOMEM if the tiler context reached the maximum number of chunks
421+
* or if too many render passes are in-flight
422+
* or if the allocation failed
423+
* - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid
413424
*/
414425
int panthor_heap_grow(struct panthor_heap_pool *pool,
415426
u64 heap_gpu_va,
@@ -439,10 +450,7 @@ int panthor_heap_grow(struct panthor_heap_pool *pool,
439450
* handler provided by the userspace driver, if any).
440451
*/
441452
if (renderpasses_in_flight > heap->target_in_flight ||
442-
(pending_frag_count > 0 && heap->chunk_count >= heap->max_chunks)) {
443-
ret = -EBUSY;
444-
goto out_unlock;
445-
} else if (heap->chunk_count >= heap->max_chunks) {
453+
heap->chunk_count >= heap->max_chunks) {
446454
ret = -ENOMEM;
447455
goto out_unlock;
448456
}
@@ -536,7 +544,7 @@ panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
536544
pool->vm = vm;
537545
pool->ptdev = ptdev;
538546
init_rwsem(&pool->lock);
539-
xa_init_flags(&pool->xa, XA_FLAGS_ALLOC1);
547+
xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
540548
kref_init(&pool->refcount);
541549

542550
pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
@@ -587,7 +595,7 @@ void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
587595
drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
588596

589597
if (!IS_ERR_OR_NULL(pool->gpu_contexts))
590-
panthor_kernel_bo_destroy(pool->vm, pool->gpu_contexts);
598+
panthor_kernel_bo_destroy(pool->gpu_contexts);
591599

592600
/* Reflects the fact the pool has been destroyed. */
593601
pool->vm = NULL;

drivers/gpu/drm/panthor/panthor_sched.c

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -826,8 +826,8 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue *
826826

827827
panthor_queue_put_syncwait_obj(queue);
828828

829-
panthor_kernel_bo_destroy(group->vm, queue->ringbuf);
830-
panthor_kernel_bo_destroy(panthor_fw_vm(group->ptdev), queue->iface.mem);
829+
panthor_kernel_bo_destroy(queue->ringbuf);
830+
panthor_kernel_bo_destroy(queue->iface.mem);
831831

832832
kfree(queue);
833833
}
@@ -837,15 +837,14 @@ static void group_release_work(struct work_struct *work)
837837
struct panthor_group *group = container_of(work,
838838
struct panthor_group,
839839
release_work);
840-
struct panthor_device *ptdev = group->ptdev;
841840
u32 i;
842841

843842
for (i = 0; i < group->queue_count; i++)
844843
group_free_queue(group, group->queues[i]);
845844

846-
panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->suspend_buf);
847-
panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->protm_suspend_buf);
848-
panthor_kernel_bo_destroy(group->vm, group->syncobjs);
845+
panthor_kernel_bo_destroy(group->suspend_buf);
846+
panthor_kernel_bo_destroy(group->protm_suspend_buf);
847+
panthor_kernel_bo_destroy(group->syncobjs);
849848

850849
panthor_vm_put(group->vm);
851850
kfree(group);
@@ -1281,7 +1280,16 @@ cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
12811280
if (group)
12821281
group->fatal_queues |= BIT(cs_id);
12831282

1284-
sched_queue_delayed_work(sched, tick, 0);
1283+
if (CS_EXCEPTION_TYPE(fatal) == DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE) {
1284+
/* If this exception is unrecoverable, queue a reset, and make
1285+
* sure we stop scheduling groups until the reset has happened.
1286+
*/
1287+
panthor_device_schedule_reset(ptdev);
1288+
cancel_delayed_work(&sched->tick_work);
1289+
} else {
1290+
sched_queue_delayed_work(sched, tick, 0);
1291+
}
1292+
12851293
drm_warn(&ptdev->base,
12861294
"CSG slot %d CS slot: %d\n"
12871295
"CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n"
@@ -1385,7 +1393,12 @@ static int group_process_tiler_oom(struct panthor_group *group, u32 cs_id)
13851393
pending_frag_count, &new_chunk_va);
13861394
}
13871395

1388-
if (ret && ret != -EBUSY) {
1396+
/* If the heap context doesn't have memory for us, we want to let the
1397+
* FW try to reclaim memory by waiting for fragment jobs to land or by
1398+
* executing the tiler OOM exception handler, which is supposed to
1399+
* implement incremental rendering.
1400+
*/
1401+
if (ret && ret != -ENOMEM) {
13891402
drm_warn(&ptdev->base, "Failed to extend the tiler heap\n");
13901403
group->fatal_queues |= BIT(cs_id);
13911404
sched_queue_delayed_work(sched, tick, 0);
@@ -2720,25 +2733,34 @@ void panthor_sched_pre_reset(struct panthor_device *ptdev)
27202733
mutex_unlock(&sched->reset.lock);
27212734
}
27222735

2723-
void panthor_sched_post_reset(struct panthor_device *ptdev)
2736+
void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed)
27242737
{
27252738
struct panthor_scheduler *sched = ptdev->scheduler;
27262739
struct panthor_group *group, *group_tmp;
27272740

27282741
mutex_lock(&sched->reset.lock);
27292742

2730-
list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node)
2743+
list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node) {
2744+
/* Consider all previously running group as terminated if the
2745+
* reset failed.
2746+
*/
2747+
if (reset_failed)
2748+
group->state = PANTHOR_CS_GROUP_TERMINATED;
2749+
27312750
panthor_group_start(group);
2751+
}
27322752

27332753
/* We're done resetting the GPU, clear the reset.in_progress bit so we can
27342754
* kick the scheduler.
27352755
*/
27362756
atomic_set(&sched->reset.in_progress, false);
27372757
mutex_unlock(&sched->reset.lock);
27382758

2739-
sched_queue_delayed_work(sched, tick, 0);
2740-
2741-
sched_queue_work(sched, sync_upd);
2759+
/* No need to queue a tick and update syncs if the reset failed. */
2760+
if (!reset_failed) {
2761+
sched_queue_delayed_work(sched, tick, 0);
2762+
sched_queue_work(sched, sync_upd);
2763+
}
27422764
}
27432765

27442766
static void group_sync_upd_work(struct work_struct *work)

0 commit comments

Comments
 (0)