Skip to content

Commit 84211b1

Browse files
Thomas Hellströmrodrigovivi
authored andcommitted
drm/xe: Fix fault mode invalidation with unbind
Fix fault mode invalidation racing with unbind leading to the PTE zapping potentially traversing an invalid page-table tree. Do this by holding the notifier lock across PTE zapping. This might transfer any contention waiting on the notifier seqlock read side to the notifier lock read side, but that shouldn't be a major problem. At the same time get rid of the open-coded invalidation in the bind code by relying on the notifier even when the vma bind is not yet committed. Finally let userptr invalidation call a dedicated xe_vm function performing a full invalidation. Fixes: e8babb2 ("drm/xe: Convert multiple bind ops into single job") Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Matthew Brost <matthew.brost@intel.com> Cc: Matthew Auld <matthew.auld@intel.com> Cc: <stable@vger.kernel.org> # v6.12+ Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Reviewed-by: Matthew Brost <matthew.brost@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20250228073058.59510-4-thomas.hellstrom@linux.intel.com (cherry picked from commit 100a5b8) Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
1 parent 1414d95 commit 84211b1

File tree

4 files changed

+75
-60
lines changed

4 files changed

+75
-60
lines changed

drivers/gpu/drm/xe/xe_pt.c

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1213,42 +1213,22 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
12131213
return 0;
12141214

12151215
uvma = to_userptr_vma(vma);
1216-
notifier_seq = uvma->userptr.notifier_seq;
1216+
if (xe_pt_userptr_inject_eagain(uvma))
1217+
xe_vma_userptr_force_invalidate(uvma);
12171218

1218-
if (uvma->userptr.initial_bind && !xe_vm_in_fault_mode(vm))
1219-
return 0;
1219+
notifier_seq = uvma->userptr.notifier_seq;
12201220

12211221
if (!mmu_interval_read_retry(&uvma->userptr.notifier,
1222-
notifier_seq) &&
1223-
!xe_pt_userptr_inject_eagain(uvma))
1222+
notifier_seq))
12241223
return 0;
12251224

1226-
if (xe_vm_in_fault_mode(vm)) {
1225+
if (xe_vm_in_fault_mode(vm))
12271226
return -EAGAIN;
1228-
} else {
1229-
spin_lock(&vm->userptr.invalidated_lock);
1230-
list_move_tail(&uvma->userptr.invalidate_link,
1231-
&vm->userptr.invalidated);
1232-
spin_unlock(&vm->userptr.invalidated_lock);
1233-
1234-
if (xe_vm_in_preempt_fence_mode(vm)) {
1235-
struct dma_resv_iter cursor;
1236-
struct dma_fence *fence;
1237-
long err;
1238-
1239-
dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
1240-
DMA_RESV_USAGE_BOOKKEEP);
1241-
dma_resv_for_each_fence_unlocked(&cursor, fence)
1242-
dma_fence_enable_sw_signaling(fence);
1243-
dma_resv_iter_end(&cursor);
1244-
1245-
err = dma_resv_wait_timeout(xe_vm_resv(vm),
1246-
DMA_RESV_USAGE_BOOKKEEP,
1247-
false, MAX_SCHEDULE_TIMEOUT);
1248-
XE_WARN_ON(err <= 0);
1249-
}
1250-
}
12511227

1228+
/*
1229+
* Just continue the operation since exec or rebind worker
1230+
* will take care of rebinding.
1231+
*/
12521232
return 0;
12531233
}
12541234

drivers/gpu/drm/xe/xe_vm.c

Lines changed: 56 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -579,51 +579,26 @@ static void preempt_rebind_work_func(struct work_struct *w)
579579
trace_xe_vm_rebind_worker_exit(vm);
580580
}
581581

582-
static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
583-
const struct mmu_notifier_range *range,
584-
unsigned long cur_seq)
582+
static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
585583
{
586-
struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier);
587-
struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr);
584+
struct xe_userptr *userptr = &uvma->userptr;
588585
struct xe_vma *vma = &uvma->vma;
589-
struct xe_vm *vm = xe_vma_vm(vma);
590586
struct dma_resv_iter cursor;
591587
struct dma_fence *fence;
592588
long err;
593589

594-
xe_assert(vm->xe, xe_vma_is_userptr(vma));
595-
trace_xe_vma_userptr_invalidate(vma);
596-
597-
if (!mmu_notifier_range_blockable(range))
598-
return false;
599-
600-
vm_dbg(&xe_vma_vm(vma)->xe->drm,
601-
"NOTIFIER: addr=0x%016llx, range=0x%016llx",
602-
xe_vma_start(vma), xe_vma_size(vma));
603-
604-
down_write(&vm->userptr.notifier_lock);
605-
mmu_interval_set_seq(mni, cur_seq);
606-
607-
/* No need to stop gpu access if the userptr is not yet bound. */
608-
if (!userptr->initial_bind) {
609-
up_write(&vm->userptr.notifier_lock);
610-
return true;
611-
}
612-
613590
/*
614591
* Tell exec and rebind worker they need to repin and rebind this
615592
* userptr.
616593
*/
617594
if (!xe_vm_in_fault_mode(vm) &&
618-
!(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
595+
!(vma->gpuva.flags & XE_VMA_DESTROYED)) {
619596
spin_lock(&vm->userptr.invalidated_lock);
620597
list_move_tail(&userptr->invalidate_link,
621598
&vm->userptr.invalidated);
622599
spin_unlock(&vm->userptr.invalidated_lock);
623600
}
624601

625-
up_write(&vm->userptr.notifier_lock);
626-
627602
/*
628603
* Preempt fences turn into schedule disables, pipeline these.
629604
* Note that even in fault mode, we need to wait for binds and
@@ -641,11 +616,35 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
641616
false, MAX_SCHEDULE_TIMEOUT);
642617
XE_WARN_ON(err <= 0);
643618

644-
if (xe_vm_in_fault_mode(vm)) {
619+
if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
645620
err = xe_vm_invalidate_vma(vma);
646621
XE_WARN_ON(err);
647622
}
623+
}
648624

625+
static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
626+
const struct mmu_notifier_range *range,
627+
unsigned long cur_seq)
628+
{
629+
struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
630+
struct xe_vma *vma = &uvma->vma;
631+
struct xe_vm *vm = xe_vma_vm(vma);
632+
633+
xe_assert(vm->xe, xe_vma_is_userptr(vma));
634+
trace_xe_vma_userptr_invalidate(vma);
635+
636+
if (!mmu_notifier_range_blockable(range))
637+
return false;
638+
639+
vm_dbg(&xe_vma_vm(vma)->xe->drm,
640+
"NOTIFIER: addr=0x%016llx, range=0x%016llx",
641+
xe_vma_start(vma), xe_vma_size(vma));
642+
643+
down_write(&vm->userptr.notifier_lock);
644+
mmu_interval_set_seq(mni, cur_seq);
645+
646+
__vma_userptr_invalidate(vm, uvma);
647+
up_write(&vm->userptr.notifier_lock);
649648
trace_xe_vma_userptr_invalidate_complete(vma);
650649

651650
return true;
@@ -655,6 +654,34 @@ static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
655654
.invalidate = vma_userptr_invalidate,
656655
};
657656

657+
#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
658+
/**
659+
* xe_vma_userptr_force_invalidate() - force invalidate a userptr
660+
* @uvma: The userptr vma to invalidate
661+
*
662+
* Perform a forced userptr invalidation for testing purposes.
663+
*/
664+
void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
665+
{
666+
struct xe_vm *vm = xe_vma_vm(&uvma->vma);
667+
668+
/* Protect against concurrent userptr pinning */
669+
lockdep_assert_held(&vm->lock);
670+
/* Protect against concurrent notifiers */
671+
lockdep_assert_held(&vm->userptr.notifier_lock);
672+
/*
673+
* Protect against concurrent instances of this function and
674+
* the critical exec sections
675+
*/
676+
xe_vm_assert_held(vm);
677+
678+
if (!mmu_interval_read_retry(&uvma->userptr.notifier,
679+
uvma->userptr.notifier_seq))
680+
uvma->userptr.notifier_seq -= 2;
681+
__vma_userptr_invalidate(vm, uvma);
682+
}
683+
#endif
684+
658685
int xe_vm_userptr_pin(struct xe_vm *vm)
659686
{
660687
struct xe_userptr_vma *uvma, *next;

drivers/gpu/drm/xe/xe_vm.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,4 +279,12 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm);
279279
void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap);
280280
void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p);
281281
void xe_vm_snapshot_free(struct xe_vm_snapshot *snap);
282+
283+
#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
284+
void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma);
285+
#else
286+
static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
287+
{
288+
}
289+
#endif
282290
#endif

drivers/gpu/drm/xe/xe_vm_types.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,8 @@ struct xe_vm {
227227
* up for revalidation. Protected from access with the
228228
* @invalidated_lock. Removing items from the list
229229
* additionally requires @lock in write mode, and adding
230-
* items to the list requires the @userptr.notifer_lock in
231-
* write mode.
230+
* items to the list requires either the @userptr.notifer_lock in
231+
* write mode, OR @lock in write mode.
232232
*/
233233
struct list_head invalidated;
234234
} userptr;

0 commit comments

Comments
 (0)