Skip to content

Commit eea6520

Browse files
committed
Merge tag 'drm-xe-fixes-2025-03-06' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
- Remove double page flip on initial plane (Maarten) - Properly setup userptr pfn_flags_mask (Auld) - Fix GT "for each engine" workarounds (Tvrtko) - Fix userptr races and missed validations (Thomas, Brost) - Userptr invalid page access fixes (Thomas) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/Z8ni6w3tskCFL11O@intel.com
2 parents 019899b + 333b890 commit eea6520

File tree

10 files changed

+289
-141
lines changed

10 files changed

+289
-141
lines changed

drivers/gpu/drm/xe/display/xe_plane_initial.c

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,6 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc,
194194
to_intel_plane(crtc->base.primary);
195195
struct intel_plane_state *plane_state =
196196
to_intel_plane_state(plane->base.state);
197-
struct intel_crtc_state *crtc_state =
198-
to_intel_crtc_state(crtc->base.state);
199197
struct drm_framebuffer *fb;
200198
struct i915_vma *vma;
201199

@@ -241,14 +239,6 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc,
241239
atomic_or(plane->frontbuffer_bit, &to_intel_frontbuffer(fb)->bits);
242240

243241
plane_config->vma = vma;
244-
245-
/*
246-
* Flip to the newly created mapping ASAP, so we can re-use the
247-
* first part of GGTT for WOPCM, prevent flickering, and prevent
248-
* the lookup of sysmem scratch pages.
249-
*/
250-
plane->check_plane(crtc_state, plane_state);
251-
plane->async_flip(NULL, plane, crtc_state, plane_state, true);
252242
return;
253243

254244
nofb:

drivers/gpu/drm/xe/xe_gt.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -380,9 +380,7 @@ int xe_gt_init_early(struct xe_gt *gt)
380380
if (err)
381381
return err;
382382

383-
xe_wa_process_gt(gt);
384383
xe_wa_process_oob(gt);
385-
xe_tuning_process_gt(gt);
386384

387385
xe_force_wake_init_gt(gt, gt_to_fw(gt));
388386
spin_lock_init(&gt->global_invl_lock);
@@ -474,6 +472,8 @@ static int all_fw_domain_init(struct xe_gt *gt)
474472
}
475473

476474
xe_gt_mcr_set_implicit_defaults(gt);
475+
xe_wa_process_gt(gt);
476+
xe_tuning_process_gt(gt);
477477
xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
478478

479479
err = xe_gt_clock_init(gt);

drivers/gpu/drm/xe/xe_hmm.c

Lines changed: 140 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,10 @@ static u64 xe_npages_in_range(unsigned long start, unsigned long end)
1919
return (end - start) >> PAGE_SHIFT;
2020
}
2121

22-
/*
22+
/**
2323
* xe_mark_range_accessed() - mark a range is accessed, so core mm
2424
* have such information for memory eviction or write back to
2525
* hard disk
26-
*
2726
* @range: the range to mark
2827
* @write: if write to this range, we mark pages in this range
2928
* as dirty
@@ -43,15 +42,51 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write)
4342
}
4443
}
4544

46-
/*
45+
static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st,
46+
struct hmm_range *range, struct rw_semaphore *notifier_sem)
47+
{
48+
unsigned long i, npages, hmm_pfn;
49+
unsigned long num_chunks = 0;
50+
int ret;
51+
52+
/* HMM docs says this is needed. */
53+
ret = down_read_interruptible(notifier_sem);
54+
if (ret)
55+
return ret;
56+
57+
if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) {
58+
up_read(notifier_sem);
59+
return -EAGAIN;
60+
}
61+
62+
npages = xe_npages_in_range(range->start, range->end);
63+
for (i = 0; i < npages;) {
64+
unsigned long len;
65+
66+
hmm_pfn = range->hmm_pfns[i];
67+
xe_assert(xe, hmm_pfn & HMM_PFN_VALID);
68+
69+
len = 1UL << hmm_pfn_to_map_order(hmm_pfn);
70+
71+
/* If order > 0 the page may extend beyond range->start */
72+
len -= (hmm_pfn & ~HMM_PFN_FLAGS) & (len - 1);
73+
i += len;
74+
num_chunks++;
75+
}
76+
up_read(notifier_sem);
77+
78+
return sg_alloc_table(st, num_chunks, GFP_KERNEL);
79+
}
80+
81+
/**
4782
* xe_build_sg() - build a scatter gather table for all the physical pages/pfn
4883
* in a hmm_range. dma-map pages if necessary. dma-address is save in sg table
4984
* and will be used to program GPU page table later.
50-
*
5185
* @xe: the xe device who will access the dma-address in sg table
5286
* @range: the hmm range that we build the sg table from. range->hmm_pfns[]
5387
* has the pfn numbers of pages that back up this hmm address range.
5488
* @st: pointer to the sg table.
89+
* @notifier_sem: The xe notifier lock.
5590
* @write: whether we write to this range. This decides dma map direction
5691
* for system pages. If write we map it bi-diretional; otherwise
5792
* DMA_TO_DEVICE
@@ -78,43 +113,84 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write)
78113
* Returns 0 if successful; -ENOMEM if fails to allocate memory
79114
*/
80115
static int xe_build_sg(struct xe_device *xe, struct hmm_range *range,
81-
struct sg_table *st, bool write)
116+
struct sg_table *st,
117+
struct rw_semaphore *notifier_sem,
118+
bool write)
82119
{
120+
unsigned long npages = xe_npages_in_range(range->start, range->end);
83121
struct device *dev = xe->drm.dev;
84-
struct page **pages;
85-
u64 i, npages;
86-
int ret;
122+
struct scatterlist *sgl;
123+
struct page *page;
124+
unsigned long i, j;
87125

88-
npages = xe_npages_in_range(range->start, range->end);
89-
pages = kvmalloc_array(npages, sizeof(*pages), GFP_KERNEL);
90-
if (!pages)
91-
return -ENOMEM;
126+
lockdep_assert_held(notifier_sem);
92127

93-
for (i = 0; i < npages; i++) {
94-
pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);
95-
xe_assert(xe, !is_device_private_page(pages[i]));
128+
i = 0;
129+
for_each_sg(st->sgl, sgl, st->nents, j) {
130+
unsigned long hmm_pfn, size;
131+
132+
hmm_pfn = range->hmm_pfns[i];
133+
page = hmm_pfn_to_page(hmm_pfn);
134+
xe_assert(xe, !is_device_private_page(page));
135+
136+
size = 1UL << hmm_pfn_to_map_order(hmm_pfn);
137+
size -= page_to_pfn(page) & (size - 1);
138+
i += size;
139+
140+
if (unlikely(j == st->nents - 1)) {
141+
if (i > npages)
142+
size -= (i - npages);
143+
sg_mark_end(sgl);
144+
}
145+
sg_set_page(sgl, page, size << PAGE_SHIFT, 0);
96146
}
147+
xe_assert(xe, i == npages);
97148

98-
ret = sg_alloc_table_from_pages_segment(st, pages, npages, 0, npages << PAGE_SHIFT,
99-
xe_sg_segment_size(dev), GFP_KERNEL);
100-
if (ret)
101-
goto free_pages;
149+
return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE,
150+
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING);
151+
}
152+
153+
static void xe_hmm_userptr_set_mapped(struct xe_userptr_vma *uvma)
154+
{
155+
struct xe_userptr *userptr = &uvma->userptr;
156+
struct xe_vm *vm = xe_vma_vm(&uvma->vma);
157+
158+
lockdep_assert_held_write(&vm->lock);
159+
lockdep_assert_held(&vm->userptr.notifier_lock);
160+
161+
mutex_lock(&userptr->unmap_mutex);
162+
xe_assert(vm->xe, !userptr->mapped);
163+
userptr->mapped = true;
164+
mutex_unlock(&userptr->unmap_mutex);
165+
}
166+
167+
void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma)
168+
{
169+
struct xe_userptr *userptr = &uvma->userptr;
170+
struct xe_vma *vma = &uvma->vma;
171+
bool write = !xe_vma_read_only(vma);
172+
struct xe_vm *vm = xe_vma_vm(vma);
173+
struct xe_device *xe = vm->xe;
102174

103-
ret = dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE,
104-
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING);
105-
if (ret) {
106-
sg_free_table(st);
107-
st = NULL;
175+
if (!lockdep_is_held_type(&vm->userptr.notifier_lock, 0) &&
176+
!lockdep_is_held_type(&vm->lock, 0) &&
177+
!(vma->gpuva.flags & XE_VMA_DESTROYED)) {
178+
/* Don't unmap in exec critical section. */
179+
xe_vm_assert_held(vm);
180+
/* Don't unmap while mapping the sg. */
181+
lockdep_assert_held(&vm->lock);
108182
}
109183

110-
free_pages:
111-
kvfree(pages);
112-
return ret;
184+
mutex_lock(&userptr->unmap_mutex);
185+
if (userptr->sg && userptr->mapped)
186+
dma_unmap_sgtable(xe->drm.dev, userptr->sg,
187+
write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0);
188+
userptr->mapped = false;
189+
mutex_unlock(&userptr->unmap_mutex);
113190
}
114191

115-
/*
192+
/**
116193
* xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr
117-
*
118194
* @uvma: the userptr vma which hold the scatter gather table
119195
*
120196
* With function xe_userptr_populate_range, we allocate storage of
@@ -124,16 +200,9 @@ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range,
124200
void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma)
125201
{
126202
struct xe_userptr *userptr = &uvma->userptr;
127-
struct xe_vma *vma = &uvma->vma;
128-
bool write = !xe_vma_read_only(vma);
129-
struct xe_vm *vm = xe_vma_vm(vma);
130-
struct xe_device *xe = vm->xe;
131-
struct device *dev = xe->drm.dev;
132-
133-
xe_assert(xe, userptr->sg);
134-
dma_unmap_sgtable(dev, userptr->sg,
135-
write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0);
136203

204+
xe_assert(xe_vma_vm(&uvma->vma)->xe, userptr->sg);
205+
xe_hmm_userptr_unmap(uvma);
137206
sg_free_table(userptr->sg);
138207
userptr->sg = NULL;
139208
}
@@ -166,13 +235,20 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
166235
{
167236
unsigned long timeout =
168237
jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
169-
unsigned long *pfns, flags = HMM_PFN_REQ_FAULT;
238+
unsigned long *pfns;
170239
struct xe_userptr *userptr;
171240
struct xe_vma *vma = &uvma->vma;
172241
u64 userptr_start = xe_vma_userptr(vma);
173242
u64 userptr_end = userptr_start + xe_vma_size(vma);
174243
struct xe_vm *vm = xe_vma_vm(vma);
175-
struct hmm_range hmm_range;
244+
struct hmm_range hmm_range = {
245+
.pfn_flags_mask = 0, /* ignore pfns */
246+
.default_flags = HMM_PFN_REQ_FAULT,
247+
.start = userptr_start,
248+
.end = userptr_end,
249+
.notifier = &uvma->userptr.notifier,
250+
.dev_private_owner = vm->xe,
251+
};
176252
bool write = !xe_vma_read_only(vma);
177253
unsigned long notifier_seq;
178254
u64 npages;
@@ -199,19 +275,14 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
199275
return -ENOMEM;
200276

201277
if (write)
202-
flags |= HMM_PFN_REQ_WRITE;
278+
hmm_range.default_flags |= HMM_PFN_REQ_WRITE;
203279

204280
if (!mmget_not_zero(userptr->notifier.mm)) {
205281
ret = -EFAULT;
206282
goto free_pfns;
207283
}
208284

209-
hmm_range.default_flags = flags;
210285
hmm_range.hmm_pfns = pfns;
211-
hmm_range.notifier = &userptr->notifier;
212-
hmm_range.start = userptr_start;
213-
hmm_range.end = userptr_end;
214-
hmm_range.dev_private_owner = vm->xe;
215286

216287
while (true) {
217288
hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier);
@@ -238,16 +309,37 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
238309
if (ret)
239310
goto free_pfns;
240311

241-
ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, write);
312+
ret = xe_alloc_sg(vm->xe, &userptr->sgt, &hmm_range, &vm->userptr.notifier_lock);
242313
if (ret)
243314
goto free_pfns;
244315

316+
ret = down_read_interruptible(&vm->userptr.notifier_lock);
317+
if (ret)
318+
goto free_st;
319+
320+
if (mmu_interval_read_retry(hmm_range.notifier, hmm_range.notifier_seq)) {
321+
ret = -EAGAIN;
322+
goto out_unlock;
323+
}
324+
325+
ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt,
326+
&vm->userptr.notifier_lock, write);
327+
if (ret)
328+
goto out_unlock;
329+
245330
xe_mark_range_accessed(&hmm_range, write);
246331
userptr->sg = &userptr->sgt;
332+
xe_hmm_userptr_set_mapped(uvma);
247333
userptr->notifier_seq = hmm_range.notifier_seq;
334+
up_read(&vm->userptr.notifier_lock);
335+
kvfree(pfns);
336+
return 0;
248337

338+
out_unlock:
339+
up_read(&vm->userptr.notifier_lock);
340+
free_st:
341+
sg_free_table(&userptr->sgt);
249342
free_pfns:
250343
kvfree(pfns);
251344
return ret;
252345
}
253-

drivers/gpu/drm/xe/xe_hmm.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,16 @@
33
* Copyright © 2024 Intel Corporation
44
*/
55

6+
#ifndef _XE_HMM_H_
7+
#define _XE_HMM_H_
8+
69
#include <linux/types.h>
710

811
struct xe_userptr_vma;
912

1013
int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked);
14+
1115
void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma);
16+
17+
void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma);
18+
#endif

0 commit comments

Comments
 (0)