Skip to content

Commit 2a8859f

Browse files
committed
KVM: x86/mmu: do compare-and-exchange of gPTE via the user address
FNAME(cmpxchg_gpte) is an inefficient mess. It is at least decent if it can go through get_user_pages_fast(), but if it cannot then it tries to use memremap(); that is not just terribly slow, it is also wrong because it assumes that the VM_PFNMAP VMA is contiguous. The right way to do it would be to do the same thing as hva_to_pfn_remapped() does since commit add6a0c ("KVM: MMU: try to fix up page faults before giving up", 2016-07-05), using follow_pte() and fixup_user_fault() to determine the correct address to use for memremap(). To do this, one could for example extract hva_to_pfn() for use outside virt/kvm/kvm_main.c. But really there is no reason to do that either, because there is already a perfectly valid address to do the cmpxchg() on, only it is a userspace address. That means doing user_access_begin()/user_access_end() and writing the code in assembly to handle exceptions correctly. Worse, the guest PTE can be 8-byte even on i686 so there is the extra complication of using cmpxchg8b to account for. But at least it is an efficient mess. (Thanks to Linus for suggesting improvement on the inline assembly). Reported-by: Qiuhao Li <qiuhao@sysec.org> Reported-by: Gaoning Pan <pgn@zju.edu.cn> Reported-by: Yongkang Jia <kangel@zju.edu.cn> Reported-by: syzbot+6cde2282daa792c49ab8@syzkaller.appspotmail.com Debugged-by: Tadeusz Struk <tadeusz.struk@linaro.org> Tested-by: Maxim Levitsky <mlevitsk@redhat.com> Cc: stable@vger.kernel.org Fixes: bd53cb3 ("X86/KVM: Handle PFNs outside of kernel reach when touching GPTEs") Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent 4335edb commit 2a8859f

File tree

1 file changed

+34
-40
lines changed

1 file changed

+34
-40
lines changed

arch/x86/kvm/mmu/paging_tmpl.h

Lines changed: 34 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,8 @@
3434
#define PT_HAVE_ACCESSED_DIRTY(mmu) true
3535
#ifdef CONFIG_X86_64
3636
#define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
37-
#define CMPXCHG cmpxchg
37+
#define CMPXCHG "cmpxchgq"
3838
#else
39-
#define CMPXCHG cmpxchg64
4039
#define PT_MAX_FULL_LEVELS 2
4140
#endif
4241
#elif PTTYPE == 32
@@ -52,7 +51,7 @@
5251
#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
5352
#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
5453
#define PT_HAVE_ACCESSED_DIRTY(mmu) true
55-
#define CMPXCHG cmpxchg
54+
#define CMPXCHG "cmpxchgl"
5655
#elif PTTYPE == PTTYPE_EPT
5756
#define pt_element_t u64
5857
#define guest_walker guest_walkerEPT
@@ -65,7 +64,9 @@
6564
#define PT_GUEST_DIRTY_SHIFT 9
6665
#define PT_GUEST_ACCESSED_SHIFT 8
6766
#define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
68-
#define CMPXCHG cmpxchg64
67+
#ifdef CONFIG_X86_64
68+
#define CMPXCHG "cmpxchgq"
69+
#endif
6970
#define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
7071
#else
7172
#error Invalid PTTYPE value
@@ -147,43 +148,36 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
147148
pt_element_t __user *ptep_user, unsigned index,
148149
pt_element_t orig_pte, pt_element_t new_pte)
149150
{
150-
int npages;
151-
pt_element_t ret;
152-
pt_element_t *table;
153-
struct page *page;
154-
155-
npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page);
156-
if (likely(npages == 1)) {
157-
table = kmap_atomic(page);
158-
ret = CMPXCHG(&table[index], orig_pte, new_pte);
159-
kunmap_atomic(table);
160-
161-
kvm_release_page_dirty(page);
162-
} else {
163-
struct vm_area_struct *vma;
164-
unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK;
165-
unsigned long pfn;
166-
unsigned long paddr;
167-
168-
mmap_read_lock(current->mm);
169-
vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE);
170-
if (!vma || !(vma->vm_flags & VM_PFNMAP)) {
171-
mmap_read_unlock(current->mm);
172-
return -EFAULT;
173-
}
174-
pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
175-
paddr = pfn << PAGE_SHIFT;
176-
table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB);
177-
if (!table) {
178-
mmap_read_unlock(current->mm);
179-
return -EFAULT;
180-
}
181-
ret = CMPXCHG(&table[index], orig_pte, new_pte);
182-
memunmap(table);
183-
mmap_read_unlock(current->mm);
184-
}
151+
signed char r;
185152

186-
return (ret != orig_pte);
153+
if (!user_access_begin(ptep_user, sizeof(pt_element_t)))
154+
return -EFAULT;
155+
156+
#ifdef CMPXCHG
157+
asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n"
158+
"setnz %b[r]\n"
159+
"2:"
160+
_ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r])
161+
: [ptr] "+m" (*ptep_user),
162+
[old] "+a" (orig_pte),
163+
[r] "=q" (r)
164+
: [new] "r" (new_pte)
165+
: "memory");
166+
#else
167+
asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n"
168+
"setnz %b[r]\n"
169+
"2:"
170+
_ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r])
171+
: [ptr] "+m" (*ptep_user),
172+
[old] "+A" (orig_pte),
173+
[r] "=q" (r)
174+
: [new_lo] "b" ((u32)new_pte),
175+
[new_hi] "c" ((u32)(new_pte >> 32))
176+
: "memory");
177+
#endif
178+
179+
user_access_end();
180+
return r;
187181
}
188182

189183
static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,

0 commit comments

Comments
 (0)