Skip to content

Commit f18b4ae

Browse files
committed
kvm: selftests: do not use bitfields larger than 32-bits for PTEs
Red Hat's QE team reported test failure on access_tracking_perf_test: Testing guest mode: PA-bits:ANY, VA-bits:48, 4K pages guest physical test memory offset: 0x3fffbffff000 Populating memory : 0.684014577s Writing to populated memory : 0.006230175s Reading from populated memory : 0.004557805s ==== Test Assertion Failure ==== lib/kvm_util.c:1411: false pid=125806 tid=125809 errno=4 - Interrupted system call 1 0x0000000000402f7c: addr_gpa2hva at kvm_util.c:1411 2 (inlined by) addr_gpa2hva at kvm_util.c:1405 3 0x0000000000401f52: lookup_pfn at access_tracking_perf_test.c:98 4 (inlined by) mark_vcpu_memory_idle at access_tracking_perf_test.c:152 5 (inlined by) vcpu_thread_main at access_tracking_perf_test.c:232 6 0x00007fefe9ff81ce: ?? ??:0 7 0x00007fefe9c64d82: ?? ??:0 No vm physical memory at 0xffbffff000 I can easily reproduce it with a Intel(R) Xeon(R) CPU E5-2630 with 46 bits PA. It turns out that the address translation for clearing idle page tracking returned a wrong result; addr_gva2gpa()'s last step, which is based on "pte[index[0]].pfn", did the calculation with 40 bits length and the high 12 bits got truncated. In above case the GPA address to be returned should be 0x3fffbffff000 for GVA 0xc0000000, but it got truncated into 0xffbffff000 and the subsequent gpa2hva lookup failed. The width of operations on bit fields greater than 32-bit is implementation defined, and differs between GCC (which uses the bitfield precision) and clang (which uses 64-bit arithmetic), so this is a potential minefield. Remove the bit fields and using manual masking instead. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2075036 Reported-by: Nana Liu <nanliu@redhat.com> Reviewed-by: Peter Xu <peterx@redhat.com> Tested-by: Peter Xu <peterx@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent 683412c commit f18b4ae

File tree

2 files changed

+92
-115
lines changed

2 files changed

+92
-115
lines changed

tools/testing/selftests/kvm/include/x86_64/processor.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,21 @@
6060
/* CPUID.0x8000_0001.EDX */
6161
#define CPUID_GBPAGES (1ul << 26)
6262

63+
/* Page table bitfield declarations */
64+
#define PTE_PRESENT_MASK BIT_ULL(0)
65+
#define PTE_WRITABLE_MASK BIT_ULL(1)
66+
#define PTE_USER_MASK BIT_ULL(2)
67+
#define PTE_ACCESSED_MASK BIT_ULL(5)
68+
#define PTE_DIRTY_MASK BIT_ULL(6)
69+
#define PTE_LARGE_MASK BIT_ULL(7)
70+
#define PTE_GLOBAL_MASK BIT_ULL(8)
71+
#define PTE_NX_MASK BIT_ULL(63)
72+
73+
#define PAGE_SHIFT 12
74+
75+
#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12)
76+
#define PTE_GET_PFN(pte) (((pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
77+
6378
/* General Registers in 64-Bit Mode */
6479
struct gpr64_regs {
6580
u64 rax;

tools/testing/selftests/kvm/lib/x86_64/processor.c

Lines changed: 77 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -19,38 +19,6 @@
1919

2020
vm_vaddr_t exception_handlers;
2121

22-
/* Virtual translation table structure declarations */
23-
struct pageUpperEntry {
24-
uint64_t present:1;
25-
uint64_t writable:1;
26-
uint64_t user:1;
27-
uint64_t write_through:1;
28-
uint64_t cache_disable:1;
29-
uint64_t accessed:1;
30-
uint64_t ignored_06:1;
31-
uint64_t page_size:1;
32-
uint64_t ignored_11_08:4;
33-
uint64_t pfn:40;
34-
uint64_t ignored_62_52:11;
35-
uint64_t execute_disable:1;
36-
};
37-
38-
struct pageTableEntry {
39-
uint64_t present:1;
40-
uint64_t writable:1;
41-
uint64_t user:1;
42-
uint64_t write_through:1;
43-
uint64_t cache_disable:1;
44-
uint64_t accessed:1;
45-
uint64_t dirty:1;
46-
uint64_t reserved_07:1;
47-
uint64_t global:1;
48-
uint64_t ignored_11_09:3;
49-
uint64_t pfn:40;
50-
uint64_t ignored_62_52:11;
51-
uint64_t execute_disable:1;
52-
};
53-
5422
void regs_dump(FILE *stream, struct kvm_regs *regs,
5523
uint8_t indent)
5624
{
@@ -195,23 +163,21 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
195163
return &page_table[index];
196164
}
197165

198-
static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
199-
uint64_t pt_pfn,
200-
uint64_t vaddr,
201-
uint64_t paddr,
202-
int level,
203-
enum x86_page_size page_size)
166+
static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
167+
uint64_t pt_pfn,
168+
uint64_t vaddr,
169+
uint64_t paddr,
170+
int level,
171+
enum x86_page_size page_size)
204172
{
205-
struct pageUpperEntry *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
206-
207-
if (!pte->present) {
208-
pte->writable = true;
209-
pte->present = true;
210-
pte->page_size = (level == page_size);
211-
if (pte->page_size)
212-
pte->pfn = paddr >> vm->page_shift;
173+
uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
174+
175+
if (!(*pte & PTE_PRESENT_MASK)) {
176+
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
177+
if (level == page_size)
178+
*pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
213179
else
214-
pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
180+
*pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
215181
} else {
216182
/*
217183
* Entry already present. Assert that the caller doesn't want
@@ -221,7 +187,7 @@ static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
221187
TEST_ASSERT(level != page_size,
222188
"Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
223189
page_size, vaddr);
224-
TEST_ASSERT(!pte->page_size,
190+
TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
225191
"Cannot create page table at level: %u, vaddr: 0x%lx\n",
226192
level, vaddr);
227193
}
@@ -232,8 +198,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
232198
enum x86_page_size page_size)
233199
{
234200
const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
235-
struct pageUpperEntry *pml4e, *pdpe, *pde;
236-
struct pageTableEntry *pte;
201+
uint64_t *pml4e, *pdpe, *pde;
202+
uint64_t *pte;
237203

238204
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
239205
"Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -257,37 +223,35 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
257223
*/
258224
pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
259225
vaddr, paddr, 3, page_size);
260-
if (pml4e->page_size)
226+
if (*pml4e & PTE_LARGE_MASK)
261227
return;
262228

263-
pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, paddr, 2, page_size);
264-
if (pdpe->page_size)
229+
pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size);
230+
if (*pdpe & PTE_LARGE_MASK)
265231
return;
266232

267-
pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, paddr, 1, page_size);
268-
if (pde->page_size)
233+
pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size);
234+
if (*pde & PTE_LARGE_MASK)
269235
return;
270236

271237
/* Fill in page table entry. */
272-
pte = virt_get_pte(vm, pde->pfn, vaddr, 0);
273-
TEST_ASSERT(!pte->present,
238+
pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0);
239+
TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
274240
"PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
275-
pte->pfn = paddr >> vm->page_shift;
276-
pte->writable = true;
277-
pte->present = 1;
241+
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
278242
}
279243

280244
void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
281245
{
282246
__virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
283247
}
284248

285-
static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
249+
static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
286250
uint64_t vaddr)
287251
{
288252
uint16_t index[4];
289-
struct pageUpperEntry *pml4e, *pdpe, *pde;
290-
struct pageTableEntry *pte;
253+
uint64_t *pml4e, *pdpe, *pde;
254+
uint64_t *pte;
291255
struct kvm_cpuid_entry2 *entry;
292256
struct kvm_sregs sregs;
293257
int max_phy_addr;
@@ -329,57 +293,55 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
329293
index[3] = (vaddr >> 39) & 0x1ffu;
330294

331295
pml4e = addr_gpa2hva(vm, vm->pgd);
332-
TEST_ASSERT(pml4e[index[3]].present,
296+
TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK,
333297
"Expected pml4e to be present for gva: 0x%08lx", vaddr);
334-
TEST_ASSERT((*(uint64_t*)(&pml4e[index[3]]) &
335-
(rsvd_mask | (1ull << 7))) == 0,
298+
TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0,
336299
"Unexpected reserved bits set.");
337300

338-
pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
339-
TEST_ASSERT(pdpe[index[2]].present,
301+
pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
302+
TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK,
340303
"Expected pdpe to be present for gva: 0x%08lx", vaddr);
341-
TEST_ASSERT(pdpe[index[2]].page_size == 0,
304+
TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK),
342305
"Expected pdpe to map a pde not a 1-GByte page.");
343-
TEST_ASSERT((*(uint64_t*)(&pdpe[index[2]]) & rsvd_mask) == 0,
306+
TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0,
344307
"Unexpected reserved bits set.");
345308

346-
pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
347-
TEST_ASSERT(pde[index[1]].present,
309+
pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
310+
TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK,
348311
"Expected pde to be present for gva: 0x%08lx", vaddr);
349-
TEST_ASSERT(pde[index[1]].page_size == 0,
312+
TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK),
350313
"Expected pde to map a pte not a 2-MByte page.");
351-
TEST_ASSERT((*(uint64_t*)(&pde[index[1]]) & rsvd_mask) == 0,
314+
TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0,
352315
"Unexpected reserved bits set.");
353316

354-
pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
355-
TEST_ASSERT(pte[index[0]].present,
317+
pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
318+
TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK,
356319
"Expected pte to be present for gva: 0x%08lx", vaddr);
357320

358321
return &pte[index[0]];
359322
}
360323

361324
uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
362325
{
363-
struct pageTableEntry *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
326+
uint64_t *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
364327

365328
return *(uint64_t *)pte;
366329
}
367330

368331
void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
369332
uint64_t pte)
370333
{
371-
struct pageTableEntry *new_pte = _vm_get_page_table_entry(vm, vcpuid,
372-
vaddr);
334+
uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
373335

374336
*(uint64_t *)new_pte = pte;
375337
}
376338

377339
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
378340
{
379-
struct pageUpperEntry *pml4e, *pml4e_start;
380-
struct pageUpperEntry *pdpe, *pdpe_start;
381-
struct pageUpperEntry *pde, *pde_start;
382-
struct pageTableEntry *pte, *pte_start;
341+
uint64_t *pml4e, *pml4e_start;
342+
uint64_t *pdpe, *pdpe_start;
343+
uint64_t *pde, *pde_start;
344+
uint64_t *pte, *pte_start;
383345

384346
if (!vm->pgd_created)
385347
return;
@@ -389,58 +351,58 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
389351
fprintf(stream, "%*s index hvaddr gpaddr "
390352
"addr w exec dirty\n",
391353
indent, "");
392-
pml4e_start = (struct pageUpperEntry *) addr_gpa2hva(vm, vm->pgd);
354+
pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
393355
for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
394356
pml4e = &pml4e_start[n1];
395-
if (!pml4e->present)
357+
if (!(*pml4e & PTE_PRESENT_MASK))
396358
continue;
397-
fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
359+
fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
398360
" %u\n",
399361
indent, "",
400362
pml4e - pml4e_start, pml4e,
401-
addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->pfn,
402-
pml4e->writable, pml4e->execute_disable);
363+
addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
364+
!!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
403365

404-
pdpe_start = addr_gpa2hva(vm, pml4e->pfn * vm->page_size);
366+
pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
405367
for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
406368
pdpe = &pdpe_start[n2];
407-
if (!pdpe->present)
369+
if (!(*pdpe & PTE_PRESENT_MASK))
408370
continue;
409-
fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
371+
fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx "
410372
"%u %u\n",
411373
indent, "",
412374
pdpe - pdpe_start, pdpe,
413375
addr_hva2gpa(vm, pdpe),
414-
(uint64_t) pdpe->pfn, pdpe->writable,
415-
pdpe->execute_disable);
376+
PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
377+
!!(*pdpe & PTE_NX_MASK));
416378

417-
pde_start = addr_gpa2hva(vm, pdpe->pfn * vm->page_size);
379+
pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
418380
for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
419381
pde = &pde_start[n3];
420-
if (!pde->present)
382+
if (!(*pde & PTE_PRESENT_MASK))
421383
continue;
422384
fprintf(stream, "%*spde 0x%-3zx %p "
423-
"0x%-12lx 0x%-10lx %u %u\n",
385+
"0x%-12lx 0x%-10llx %u %u\n",
424386
indent, "", pde - pde_start, pde,
425387
addr_hva2gpa(vm, pde),
426-
(uint64_t) pde->pfn, pde->writable,
427-
pde->execute_disable);
388+
PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
389+
!!(*pde & PTE_NX_MASK));
428390

429-
pte_start = addr_gpa2hva(vm, pde->pfn * vm->page_size);
391+
pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
430392
for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
431393
pte = &pte_start[n4];
432-
if (!pte->present)
394+
if (!(*pte & PTE_PRESENT_MASK))
433395
continue;
434396
fprintf(stream, "%*spte 0x%-3zx %p "
435-
"0x%-12lx 0x%-10lx %u %u "
397+
"0x%-12lx 0x%-10llx %u %u "
436398
" %u 0x%-10lx\n",
437399
indent, "",
438400
pte - pte_start, pte,
439401
addr_hva2gpa(vm, pte),
440-
(uint64_t) pte->pfn,
441-
pte->writable,
442-
pte->execute_disable,
443-
pte->dirty,
402+
PTE_GET_PFN(*pte),
403+
!!(*pte & PTE_WRITABLE_MASK),
404+
!!(*pte & PTE_NX_MASK),
405+
!!(*pte & PTE_DIRTY_MASK),
444406
((uint64_t) n1 << 27)
445407
| ((uint64_t) n2 << 18)
446408
| ((uint64_t) n3 << 9)
@@ -558,8 +520,8 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
558520
vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
559521
{
560522
uint16_t index[4];
561-
struct pageUpperEntry *pml4e, *pdpe, *pde;
562-
struct pageTableEntry *pte;
523+
uint64_t *pml4e, *pdpe, *pde;
524+
uint64_t *pte;
563525

564526
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
565527
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -572,22 +534,22 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
572534
if (!vm->pgd_created)
573535
goto unmapped_gva;
574536
pml4e = addr_gpa2hva(vm, vm->pgd);
575-
if (!pml4e[index[3]].present)
537+
if (!(pml4e[index[3]] & PTE_PRESENT_MASK))
576538
goto unmapped_gva;
577539

578-
pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
579-
if (!pdpe[index[2]].present)
540+
pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
541+
if (!(pdpe[index[2]] & PTE_PRESENT_MASK))
580542
goto unmapped_gva;
581543

582-
pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
583-
if (!pde[index[1]].present)
544+
pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
545+
if (!(pde[index[1]] & PTE_PRESENT_MASK))
584546
goto unmapped_gva;
585547

586-
pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
587-
if (!pte[index[0]].present)
548+
pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
549+
if (!(pte[index[0]] & PTE_PRESENT_MASK))
588550
goto unmapped_gva;
589551

590-
return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu);
552+
return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & 0xfffu);
591553

592554
unmapped_gva:
593555
TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);

0 commit comments

Comments
 (0)