17
17
#include "processor.h"
18
18
#include "ucall_common.h"
19
19
20
+ static bool mprotect_ro_done ;
21
+
20
22
static void guest_code (uint64_t start_gpa , uint64_t end_gpa , uint64_t stride )
21
23
{
22
24
uint64_t gpa ;
@@ -32,6 +34,42 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
32
34
* ((volatile uint64_t * )gpa );
33
35
GUEST_SYNC (2 );
34
36
37
+ /*
38
+ * Write to the region while mprotect(PROT_READ) is underway. Keep
39
+ * looping until the memory is guaranteed to be read-only, otherwise
40
+ * vCPUs may complete their writes and advance to the next stage
41
+ * prematurely.
42
+ *
43
+ * For architectures that support skipping the faulting instruction,
44
+ * generate the store via inline assembly to ensure the exact length
45
+ * of the instruction is known and stable (vcpu_arch_put_guest() on
46
+ * fixed-length architectures should work, but the cost of paranoia
47
+ * is low in this case). For x86, hand-code the exact opcode so that
48
+ * there is no room for variability in the generated instruction.
49
+ */
50
+ do {
51
+ for (gpa = start_gpa ; gpa < end_gpa ; gpa += stride )
52
+ #ifdef __x86_64__
53
+ asm volatile (".byte 0x48,0x89,0x00" :: "a" (gpa ) : "memory" ); /* mov %rax, (%rax) */
54
+ #elif defined(__aarch64__ )
55
+ asm volatile ("str %0, [%0]" :: "r" (gpa ) : "memory" );
56
+ #else
57
+ vcpu_arch_put_guest (* ((volatile uint64_t * )gpa ), gpa );
58
+ #endif
59
+ } while (!READ_ONCE (mprotect_ro_done ));
60
+
61
+ /*
62
+ * Only architectures that write the entire range can explicitly sync,
63
+ * as other architectures will be stuck on the write fault.
64
+ */
65
+ #if defined(__x86_64__ ) || defined(__aarch64__ )
66
+ GUEST_SYNC (3 );
67
+ #endif
68
+
69
+ for (gpa = start_gpa ; gpa < end_gpa ; gpa += stride )
70
+ vcpu_arch_put_guest (* ((volatile uint64_t * )gpa ), gpa );
71
+ GUEST_SYNC (4 );
72
+
35
73
GUEST_ASSERT (0 );
36
74
}
37
75
@@ -79,6 +117,7 @@ static void *vcpu_worker(void *data)
79
117
struct vcpu_info * info = data ;
80
118
struct kvm_vcpu * vcpu = info -> vcpu ;
81
119
struct kvm_vm * vm = vcpu -> vm ;
120
+ int r ;
82
121
83
122
vcpu_args_set (vcpu , 3 , info -> start_gpa , info -> end_gpa , vm -> page_size );
84
123
@@ -101,6 +140,57 @@ static void *vcpu_worker(void *data)
101
140
102
141
/* Stage 2, read all of guest memory, which is now read-only. */
103
142
run_vcpu (vcpu , 2 );
143
+
144
+ /*
145
+ * Stage 3, write guest memory and verify KVM returns -EFAULT for once
146
+ * the mprotect(PROT_READ) lands. Only architectures that support
147
+ * validating *all* of guest memory sync for this stage, as vCPUs will
148
+ * be stuck on the faulting instruction for other architectures. Go to
149
+ * stage 3 without a rendezvous
150
+ */
151
+ do {
152
+ r = _vcpu_run (vcpu );
153
+ } while (!r );
154
+ TEST_ASSERT (r == -1 && errno == EFAULT ,
155
+ "Expected EFAULT on write to RO memory, got r = %d, errno = %d" , r , errno );
156
+
157
+ #if defined(__x86_64__ ) || defined(__aarch64__ )
158
+ /*
159
+ * Verify *all* writes from the guest hit EFAULT due to the VMA now
160
+ * being read-only. x86 and arm64 only at this time as skipping the
161
+ * instruction that hits the EFAULT requires advancing the program
162
+ * counter, which is arch specific and relies on inline assembly.
163
+ */
164
+ #ifdef __x86_64__
165
+ vcpu -> run -> kvm_valid_regs = KVM_SYNC_X86_REGS ;
166
+ #endif
167
+ for (;;) {
168
+ r = _vcpu_run (vcpu );
169
+ if (!r )
170
+ break ;
171
+ TEST_ASSERT_EQ (errno , EFAULT );
172
+ #if defined(__x86_64__ )
173
+ WRITE_ONCE (vcpu -> run -> kvm_dirty_regs , KVM_SYNC_X86_REGS );
174
+ vcpu -> run -> s .regs .regs .rip += 3 ;
175
+ #elif defined(__aarch64__ )
176
+ vcpu_set_reg (vcpu , ARM64_CORE_REG (regs .pc ),
177
+ vcpu_get_reg (vcpu , ARM64_CORE_REG (regs .pc )) + 4 );
178
+ #endif
179
+
180
+ }
181
+ assert_sync_stage (vcpu , 3 );
182
+ #endif /* __x86_64__ || __aarch64__ */
183
+ rendezvous_with_boss ();
184
+
185
+ /*
186
+ * Stage 4. Run to completion, waiting for mprotect(PROT_WRITE) to
187
+ * make the memory writable again.
188
+ */
189
+ do {
190
+ r = _vcpu_run (vcpu );
191
+ } while (r && errno == EFAULT );
192
+ TEST_ASSERT_EQ (r , 0 );
193
+ assert_sync_stage (vcpu , 4 );
104
194
rendezvous_with_boss ();
105
195
106
196
return NULL ;
@@ -183,7 +273,7 @@ int main(int argc, char *argv[])
183
273
const uint64_t start_gpa = SZ_4G ;
184
274
const int first_slot = 1 ;
185
275
186
- struct timespec time_start , time_run1 , time_reset , time_run2 , time_ro ;
276
+ struct timespec time_start , time_run1 , time_reset , time_run2 , time_ro , time_rw ;
187
277
uint64_t max_gpa , gpa , slot_size , max_mem , i ;
188
278
int max_slots , slot , opt , fd ;
189
279
bool hugepages = false;
@@ -288,19 +378,27 @@ int main(int argc, char *argv[])
288
378
rendezvous_with_vcpus (& time_run2 , "run 2" );
289
379
290
380
mprotect (mem , slot_size , PROT_READ );
381
+ usleep (10 );
382
+ mprotect_ro_done = true;
383
+ sync_global_to_guest (vm , mprotect_ro_done );
384
+
291
385
rendezvous_with_vcpus (& time_ro , "mprotect RO" );
386
+ mprotect (mem , slot_size , PROT_READ | PROT_WRITE );
387
+ rendezvous_with_vcpus (& time_rw , "mprotect RW" );
292
388
389
+ time_rw = timespec_sub (time_rw , time_ro );
293
390
time_ro = timespec_sub (time_ro , time_run2 );
294
391
time_run2 = timespec_sub (time_run2 , time_reset );
295
392
time_reset = timespec_sub (time_reset , time_run1 );
296
393
time_run1 = timespec_sub (time_run1 , time_start );
297
394
298
395
pr_info ("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds, "
299
- "ro = %ld.%.9lds\n" ,
396
+ "ro = %ld.%.9lds, rw = %ld.%.9lds \n" ,
300
397
time_run1 .tv_sec , time_run1 .tv_nsec ,
301
398
time_reset .tv_sec , time_reset .tv_nsec ,
302
399
time_run2 .tv_sec , time_run2 .tv_nsec ,
303
- time_ro .tv_sec , time_ro .tv_nsec );
400
+ time_ro .tv_sec , time_ro .tv_nsec ,
401
+ time_rw .tv_sec , time_rw .tv_nsec );
304
402
305
403
/*
306
404
* Delete even numbered slots (arbitrary) and unmap the first half of
0 commit comments