Skip to content

Commit f45f8f0

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fix from Paolo Bonzini: "A lone fix for a s390 regression. An earlier 6.14 commit stopped taking the pte lock for pages that are being converted to secure, but it was needed to avoid races. The patch was in development for a while and is finally ready, but I wish it was split into 3-4 commits at least" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: s390: pv: fix race when making a page secure
2 parents 5fc3193 + abab683 commit f45f8f0

File tree

6 files changed

+151
-144
lines changed

6 files changed

+151
-144
lines changed

arch/s390/include/asm/gmap.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,6 @@ int s390_replace_asce(struct gmap *gmap);
139139
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
140140
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
141141
unsigned long end, bool interruptible);
142-
int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split);
143142
unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level);
144143

145144
/**

arch/s390/include/asm/uv.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -631,7 +631,7 @@ int uv_pin_shared(unsigned long paddr);
631631
int uv_destroy_folio(struct folio *folio);
632632
int uv_destroy_pte(pte_t pte);
633633
int uv_convert_from_secure_pte(pte_t pte);
634-
int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb);
634+
int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb);
635635
int uv_convert_from_secure(unsigned long paddr);
636636
int uv_convert_from_secure_folio(struct folio *folio);
637637

arch/s390/kernel/uv.c

Lines changed: 129 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,39 @@ int uv_convert_from_secure_pte(pte_t pte)
206206
return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte)));
207207
}
208208

209+
/**
210+
* should_export_before_import - Determine whether an export is needed
211+
* before an import-like operation
212+
* @uvcb: the Ultravisor control block of the UVC to be performed
213+
* @mm: the mm of the process
214+
*
215+
* Returns whether an export is needed before every import-like operation.
216+
* This is needed for shared pages, which don't trigger a secure storage
217+
* exception when accessed from a different guest.
218+
*
219+
* Although considered as one, the Unpin Page UVC is not an actual import,
220+
* so it is not affected.
221+
*
222+
* No export is needed also when there is only one protected VM, because the
223+
* page cannot belong to the wrong VM in that case (there is no "other VM"
224+
* it can belong to).
225+
*
226+
* Return: true if an export is needed before every import, otherwise false.
227+
*/
228+
static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
229+
{
230+
/*
231+
* The misc feature indicates, among other things, that importing a
232+
* shared page from a different protected VM will automatically also
233+
* transfer its ownership.
234+
*/
235+
if (uv_has_feature(BIT_UV_FEAT_MISC))
236+
return false;
237+
if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
238+
return false;
239+
return atomic_read(&mm->context.protected_count) > 1;
240+
}
241+
209242
/*
210243
* Calculate the expected ref_count for a folio that would otherwise have no
211244
* further pins. This was cribbed from similar functions in other places in
@@ -228,7 +261,7 @@ static int expected_folio_refs(struct folio *folio)
228261
}
229262

230263
/**
231-
* make_folio_secure() - make a folio secure
264+
* __make_folio_secure() - make a folio secure
232265
* @folio: the folio to make secure
233266
* @uvcb: the uvcb that describes the UVC to be used
234267
*
@@ -237,20 +270,18 @@ static int expected_folio_refs(struct folio *folio)
237270
*
238271
* Return: 0 on success;
239272
* -EBUSY if the folio is in writeback or has too many references;
240-
* -E2BIG if the folio is large;
241273
* -EAGAIN if the UVC needs to be attempted again;
242274
* -ENXIO if the address is not mapped;
243275
* -EINVAL if the UVC failed for other reasons.
244276
*
245277
* Context: The caller must hold exactly one extra reference on the folio
246-
* (it's the same logic as split_folio())
278+
* (it's the same logic as split_folio()), and the folio must be
279+
* locked.
247280
*/
248-
int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
281+
static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
249282
{
250283
int expected, cc = 0;
251284

252-
if (folio_test_large(folio))
253-
return -E2BIG;
254285
if (folio_test_writeback(folio))
255286
return -EBUSY;
256287
expected = expected_folio_refs(folio) + 1;
@@ -277,7 +308,98 @@ int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
277308
return -EAGAIN;
278309
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
279310
}
280-
EXPORT_SYMBOL_GPL(make_folio_secure);
311+
312+
static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb)
313+
{
314+
int rc;
315+
316+
if (!folio_trylock(folio))
317+
return -EAGAIN;
318+
if (should_export_before_import(uvcb, mm))
319+
uv_convert_from_secure(folio_to_phys(folio));
320+
rc = __make_folio_secure(folio, uvcb);
321+
folio_unlock(folio);
322+
323+
return rc;
324+
}
325+
326+
/**
327+
* s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split.
328+
* @mm: the mm containing the folio to work on
329+
* @folio: the folio
330+
* @split: whether to split a large folio
331+
*
332+
* Context: Must be called while holding an extra reference to the folio;
333+
* the mm lock should not be held.
334+
* Return: 0 if the folio was split successfully;
335+
* -EAGAIN if the folio was not split successfully but another attempt
336+
* can be made, or if @split was set to false;
337+
* -EINVAL in case of other errors. See split_folio().
338+
*/
339+
static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split)
340+
{
341+
int rc;
342+
343+
lockdep_assert_not_held(&mm->mmap_lock);
344+
folio_wait_writeback(folio);
345+
lru_add_drain_all();
346+
if (split) {
347+
folio_lock(folio);
348+
rc = split_folio(folio);
349+
folio_unlock(folio);
350+
351+
if (rc != -EBUSY)
352+
return rc;
353+
}
354+
return -EAGAIN;
355+
}
356+
357+
int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb)
358+
{
359+
struct vm_area_struct *vma;
360+
struct folio_walk fw;
361+
struct folio *folio;
362+
int rc;
363+
364+
mmap_read_lock(mm);
365+
vma = vma_lookup(mm, hva);
366+
if (!vma) {
367+
mmap_read_unlock(mm);
368+
return -EFAULT;
369+
}
370+
folio = folio_walk_start(&fw, vma, hva, 0);
371+
if (!folio) {
372+
mmap_read_unlock(mm);
373+
return -ENXIO;
374+
}
375+
376+
folio_get(folio);
377+
/*
378+
* Secure pages cannot be huge and userspace should not combine both.
379+
* In case userspace does it anyway this will result in an -EFAULT for
380+
* the unpack. The guest is thus never reaching secure mode.
381+
* If userspace plays dirty tricks and decides to map huge pages at a
382+
* later point in time, it will receive a segmentation fault or
383+
* KVM_RUN will return -EFAULT.
384+
*/
385+
if (folio_test_hugetlb(folio))
386+
rc = -EFAULT;
387+
else if (folio_test_large(folio))
388+
rc = -E2BIG;
389+
else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID))
390+
rc = -ENXIO;
391+
else
392+
rc = make_folio_secure(mm, folio, uvcb);
393+
folio_walk_end(&fw, vma);
394+
mmap_read_unlock(mm);
395+
396+
if (rc == -E2BIG || rc == -EBUSY)
397+
rc = s390_wiggle_split_folio(mm, folio, rc == -E2BIG);
398+
folio_put(folio);
399+
400+
return rc;
401+
}
402+
EXPORT_SYMBOL_GPL(make_hva_secure);
281403

282404
/*
283405
* To be called with the folio locked or with an extra reference! This will

arch/s390/kvm/gmap.c

Lines changed: 6 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -22,117 +22,26 @@
2222

2323
#include "gmap.h"
2424

25-
/**
26-
* should_export_before_import - Determine whether an export is needed
27-
* before an import-like operation
28-
* @uvcb: the Ultravisor control block of the UVC to be performed
29-
* @mm: the mm of the process
30-
*
31-
* Returns whether an export is needed before every import-like operation.
32-
* This is needed for shared pages, which don't trigger a secure storage
33-
* exception when accessed from a different guest.
34-
*
35-
* Although considered as one, the Unpin Page UVC is not an actual import,
36-
* so it is not affected.
37-
*
38-
* No export is needed also when there is only one protected VM, because the
39-
* page cannot belong to the wrong VM in that case (there is no "other VM"
40-
* it can belong to).
41-
*
42-
* Return: true if an export is needed before every import, otherwise false.
43-
*/
44-
static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
45-
{
46-
/*
47-
* The misc feature indicates, among other things, that importing a
48-
* shared page from a different protected VM will automatically also
49-
* transfer its ownership.
50-
*/
51-
if (uv_has_feature(BIT_UV_FEAT_MISC))
52-
return false;
53-
if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
54-
return false;
55-
return atomic_read(&mm->context.protected_count) > 1;
56-
}
57-
58-
static int __gmap_make_secure(struct gmap *gmap, struct page *page, void *uvcb)
59-
{
60-
struct folio *folio = page_folio(page);
61-
int rc;
62-
63-
/*
64-
* Secure pages cannot be huge and userspace should not combine both.
65-
* In case userspace does it anyway this will result in an -EFAULT for
66-
* the unpack. The guest is thus never reaching secure mode.
67-
* If userspace plays dirty tricks and decides to map huge pages at a
68-
* later point in time, it will receive a segmentation fault or
69-
* KVM_RUN will return -EFAULT.
70-
*/
71-
if (folio_test_hugetlb(folio))
72-
return -EFAULT;
73-
if (folio_test_large(folio)) {
74-
mmap_read_unlock(gmap->mm);
75-
rc = kvm_s390_wiggle_split_folio(gmap->mm, folio, true);
76-
mmap_read_lock(gmap->mm);
77-
if (rc)
78-
return rc;
79-
folio = page_folio(page);
80-
}
81-
82-
if (!folio_trylock(folio))
83-
return -EAGAIN;
84-
if (should_export_before_import(uvcb, gmap->mm))
85-
uv_convert_from_secure(folio_to_phys(folio));
86-
rc = make_folio_secure(folio, uvcb);
87-
folio_unlock(folio);
88-
89-
/*
90-
* In theory a race is possible and the folio might have become
91-
* large again before the folio_trylock() above. In that case, no
92-
* action is performed and -EAGAIN is returned; the callers will
93-
* have to try again later.
94-
* In most cases this implies running the VM again, getting the same
95-
* exception again, and make another attempt in this function.
96-
* This is expected to happen extremely rarely.
97-
*/
98-
if (rc == -E2BIG)
99-
return -EAGAIN;
100-
/* The folio has too many references, try to shake some off */
101-
if (rc == -EBUSY) {
102-
mmap_read_unlock(gmap->mm);
103-
kvm_s390_wiggle_split_folio(gmap->mm, folio, false);
104-
mmap_read_lock(gmap->mm);
105-
return -EAGAIN;
106-
}
107-
108-
return rc;
109-
}
110-
11125
/**
11226
* gmap_make_secure() - make one guest page secure
11327
* @gmap: the guest gmap
11428
* @gaddr: the guest address that needs to be made secure
11529
* @uvcb: the UVCB specifying which operation needs to be performed
11630
*
11731
* Context: needs to be called with kvm->srcu held.
118-
* Return: 0 on success, < 0 in case of error (see __gmap_make_secure()).
32+
* Return: 0 on success, < 0 in case of error.
11933
*/
12034
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
12135
{
12236
struct kvm *kvm = gmap->private;
123-
struct page *page;
124-
int rc = 0;
37+
unsigned long vmaddr;
12538

12639
lockdep_assert_held(&kvm->srcu);
12740

128-
page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
129-
mmap_read_lock(gmap->mm);
130-
if (page)
131-
rc = __gmap_make_secure(gmap, page, uvcb);
132-
kvm_release_page_clean(page);
133-
mmap_read_unlock(gmap->mm);
134-
135-
return rc;
41+
vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
42+
if (kvm_is_error_hva(vmaddr))
43+
return -EFAULT;
44+
return make_hva_secure(gmap->mm, vmaddr, uvcb);
13645
}
13746

13847
int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)

arch/s390/kvm/kvm-s390.c

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4952,6 +4952,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
49524952
{
49534953
unsigned int flags = 0;
49544954
unsigned long gaddr;
4955+
int rc;
49554956

49564957
gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
49574958
if (kvm_s390_cur_gmap_fault_is_write())
@@ -4961,16 +4962,6 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
49614962
case 0:
49624963
vcpu->stat.exit_null++;
49634964
break;
4964-
case PGM_NON_SECURE_STORAGE_ACCESS:
4965-
kvm_s390_assert_primary_as(vcpu);
4966-
/*
4967-
* This is normal operation; a page belonging to a protected
4968-
* guest has not been imported yet. Try to import the page into
4969-
* the protected guest.
4970-
*/
4971-
if (gmap_convert_to_secure(vcpu->arch.gmap, gaddr) == -EINVAL)
4972-
send_sig(SIGSEGV, current, 0);
4973-
break;
49744965
case PGM_SECURE_STORAGE_ACCESS:
49754966
case PGM_SECURE_STORAGE_VIOLATION:
49764967
kvm_s390_assert_primary_as(vcpu);
@@ -4995,6 +4986,20 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
49954986
send_sig(SIGSEGV, current, 0);
49964987
}
49974988
break;
4989+
case PGM_NON_SECURE_STORAGE_ACCESS:
4990+
kvm_s390_assert_primary_as(vcpu);
4991+
/*
4992+
* This is normal operation; a page belonging to a protected
4993+
* guest has not been imported yet. Try to import the page into
4994+
* the protected guest.
4995+
*/
4996+
rc = gmap_convert_to_secure(vcpu->arch.gmap, gaddr);
4997+
if (rc == -EINVAL)
4998+
send_sig(SIGSEGV, current, 0);
4999+
if (rc != -ENXIO)
5000+
break;
5001+
flags = FAULT_FLAG_WRITE;
5002+
fallthrough;
49985003
case PGM_PROTECTION:
49995004
case PGM_SEGMENT_TRANSLATION:
50005005
case PGM_PAGE_TRANSLATION:

arch/s390/mm/gmap.c

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2626,31 +2626,3 @@ int s390_replace_asce(struct gmap *gmap)
26262626
return 0;
26272627
}
26282628
EXPORT_SYMBOL_GPL(s390_replace_asce);
2629-
2630-
/**
2631-
* kvm_s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split
2632-
* @mm: the mm containing the folio to work on
2633-
* @folio: the folio
2634-
* @split: whether to split a large folio
2635-
*
2636-
* Context: Must be called while holding an extra reference to the folio;
2637-
* the mm lock should not be held.
2638-
*/
2639-
int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split)
2640-
{
2641-
int rc;
2642-
2643-
lockdep_assert_not_held(&mm->mmap_lock);
2644-
folio_wait_writeback(folio);
2645-
lru_add_drain_all();
2646-
if (split) {
2647-
folio_lock(folio);
2648-
rc = split_folio(folio);
2649-
folio_unlock(folio);
2650-
2651-
if (rc != -EBUSY)
2652-
return rc;
2653-
}
2654-
return -EAGAIN;
2655-
}
2656-
EXPORT_SYMBOL_GPL(kvm_s390_wiggle_split_folio);

0 commit comments

Comments
 (0)