Skip to content

Commit b6c304a

Browse files
committed
KVM: selftests: Verify KVM correctly handles mprotect(PROT_READ)
Add two phases to mmu_stress_test to verify that KVM correctly handles guest memory that was writable, and then made read-only in the primary MMU, and then made writable again. Add bonus coverage for x86 and arm64 to verify that all of guest memory was marked read-only. Making forward progress (without making memory writable) requires arch specific code to skip over the faulting instruction, but the test can at least verify each vCPU's starting page was made read-only for other architectures. Link: https://lore.kernel.org/r/20241128005547.4077116-14-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent 80b7859 commit b6c304a

File tree

1 file changed

+101
-3
lines changed

1 file changed

+101
-3
lines changed

tools/testing/selftests/kvm/mmu_stress_test.c

Lines changed: 101 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include "processor.h"
1818
#include "ucall_common.h"
1919

20+
static bool mprotect_ro_done;
21+
2022
static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
2123
{
2224
uint64_t gpa;
@@ -32,6 +34,42 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
3234
*((volatile uint64_t *)gpa);
3335
GUEST_SYNC(2);
3436

37+
/*
38+
* Write to the region while mprotect(PROT_READ) is underway. Keep
39+
* looping until the memory is guaranteed to be read-only, otherwise
40+
* vCPUs may complete their writes and advance to the next stage
41+
* prematurely.
42+
*
43+
* For architectures that support skipping the faulting instruction,
44+
* generate the store via inline assembly to ensure the exact length
45+
* of the instruction is known and stable (vcpu_arch_put_guest() on
46+
* fixed-length architectures should work, but the cost of paranoia
47+
* is low in this case). For x86, hand-code the exact opcode so that
48+
* there is no room for variability in the generated instruction.
49+
*/
50+
do {
51+
for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
52+
#ifdef __x86_64__
53+
asm volatile(".byte 0x48,0x89,0x00" :: "a"(gpa) : "memory"); /* mov %rax, (%rax) */
54+
#elif defined(__aarch64__)
55+
asm volatile("str %0, [%0]" :: "r" (gpa) : "memory");
56+
#else
57+
vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
58+
#endif
59+
} while (!READ_ONCE(mprotect_ro_done));
60+
61+
/*
62+
* Only architectures that write the entire range can explicitly sync,
63+
* as other architectures will be stuck on the write fault.
64+
*/
65+
#if defined(__x86_64__) || defined(__aarch64__)
66+
GUEST_SYNC(3);
67+
#endif
68+
69+
for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
70+
vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
71+
GUEST_SYNC(4);
72+
3573
GUEST_ASSERT(0);
3674
}
3775

@@ -79,6 +117,7 @@ static void *vcpu_worker(void *data)
79117
struct vcpu_info *info = data;
80118
struct kvm_vcpu *vcpu = info->vcpu;
81119
struct kvm_vm *vm = vcpu->vm;
120+
int r;
82121

83122
vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size);
84123

@@ -101,6 +140,57 @@ static void *vcpu_worker(void *data)
101140

102141
/* Stage 2, read all of guest memory, which is now read-only. */
103142
run_vcpu(vcpu, 2);
143+
144+
/*
145+
* Stage 3, write guest memory and verify KVM returns -EFAULT for once
146+
* the mprotect(PROT_READ) lands. Only architectures that support
147+
* validating *all* of guest memory sync for this stage, as vCPUs will
148+
* be stuck on the faulting instruction for other architectures. Go to
149+
* stage 3 without a rendezvous
150+
*/
151+
do {
152+
r = _vcpu_run(vcpu);
153+
} while (!r);
154+
TEST_ASSERT(r == -1 && errno == EFAULT,
155+
"Expected EFAULT on write to RO memory, got r = %d, errno = %d", r, errno);
156+
157+
#if defined(__x86_64__) || defined(__aarch64__)
158+
/*
159+
* Verify *all* writes from the guest hit EFAULT due to the VMA now
160+
* being read-only. x86 and arm64 only at this time as skipping the
161+
* instruction that hits the EFAULT requires advancing the program
162+
* counter, which is arch specific and relies on inline assembly.
163+
*/
164+
#ifdef __x86_64__
165+
vcpu->run->kvm_valid_regs = KVM_SYNC_X86_REGS;
166+
#endif
167+
for (;;) {
168+
r = _vcpu_run(vcpu);
169+
if (!r)
170+
break;
171+
TEST_ASSERT_EQ(errno, EFAULT);
172+
#if defined(__x86_64__)
173+
WRITE_ONCE(vcpu->run->kvm_dirty_regs, KVM_SYNC_X86_REGS);
174+
vcpu->run->s.regs.regs.rip += 3;
175+
#elif defined(__aarch64__)
176+
vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc),
177+
vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)) + 4);
178+
#endif
179+
180+
}
181+
assert_sync_stage(vcpu, 3);
182+
#endif /* __x86_64__ || __aarch64__ */
183+
rendezvous_with_boss();
184+
185+
/*
186+
* Stage 4. Run to completion, waiting for mprotect(PROT_WRITE) to
187+
* make the memory writable again.
188+
*/
189+
do {
190+
r = _vcpu_run(vcpu);
191+
} while (r && errno == EFAULT);
192+
TEST_ASSERT_EQ(r, 0);
193+
assert_sync_stage(vcpu, 4);
104194
rendezvous_with_boss();
105195

106196
return NULL;
@@ -183,7 +273,7 @@ int main(int argc, char *argv[])
183273
const uint64_t start_gpa = SZ_4G;
184274
const int first_slot = 1;
185275

186-
struct timespec time_start, time_run1, time_reset, time_run2, time_ro;
276+
struct timespec time_start, time_run1, time_reset, time_run2, time_ro, time_rw;
187277
uint64_t max_gpa, gpa, slot_size, max_mem, i;
188278
int max_slots, slot, opt, fd;
189279
bool hugepages = false;
@@ -288,19 +378,27 @@ int main(int argc, char *argv[])
288378
rendezvous_with_vcpus(&time_run2, "run 2");
289379

290380
mprotect(mem, slot_size, PROT_READ);
381+
usleep(10);
382+
mprotect_ro_done = true;
383+
sync_global_to_guest(vm, mprotect_ro_done);
384+
291385
rendezvous_with_vcpus(&time_ro, "mprotect RO");
386+
mprotect(mem, slot_size, PROT_READ | PROT_WRITE);
387+
rendezvous_with_vcpus(&time_rw, "mprotect RW");
292388

389+
time_rw = timespec_sub(time_rw, time_ro);
293390
time_ro = timespec_sub(time_ro, time_run2);
294391
time_run2 = timespec_sub(time_run2, time_reset);
295392
time_reset = timespec_sub(time_reset, time_run1);
296393
time_run1 = timespec_sub(time_run1, time_start);
297394

298395
pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds, "
299-
"ro = %ld.%.9lds\n",
396+
"ro = %ld.%.9lds, rw = %ld.%.9lds\n",
300397
time_run1.tv_sec, time_run1.tv_nsec,
301398
time_reset.tv_sec, time_reset.tv_nsec,
302399
time_run2.tv_sec, time_run2.tv_nsec,
303-
time_ro.tv_sec, time_ro.tv_nsec);
400+
time_ro.tv_sec, time_ro.tv_nsec,
401+
time_rw.tv_sec, time_rw.tv_nsec);
304402

305403
/*
306404
* Delete even numbered slots (arbitrary) and unmap the first half of

0 commit comments

Comments
 (0)