Skip to content

Commit 7056c4e

Browse files
committed
Merge tag 'kvm-x86-generic-6.12' of https://github.com/kvm-x86/linux into HEAD
KVK generic changes for 6.12: - Fix a bug that results in KVM prematurely exiting to userspace for coalesced MMIO/PIO in many cases, clean up the related code, and add a testcase. - Fix a bug in kvm_clear_guest() where it would trigger a buffer overflow _if_ the gpa+len crosses a page boundary, which thankfully is guaranteed to not happen in the current code base. Add WARNs in more helpers that read/write guest memory to detect similar bugs.
2 parents c09dd2b + 025dde5 commit 7056c4e

File tree

5 files changed

+283
-24
lines changed

5 files changed

+283
-24
lines changed

tools/testing/selftests/kvm/Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
130130
TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test
131131
TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test
132132
TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
133+
TEST_GEN_PROGS_x86_64 += coalesced_io_test
133134
TEST_GEN_PROGS_x86_64 += demand_paging_test
134135
TEST_GEN_PROGS_x86_64 += dirty_log_test
135136
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
@@ -167,6 +168,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
167168
TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3
168169
TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
169170
TEST_GEN_PROGS_aarch64 += arch_timer
171+
TEST_GEN_PROGS_aarch64 += coalesced_io_test
170172
TEST_GEN_PROGS_aarch64 += demand_paging_test
171173
TEST_GEN_PROGS_aarch64 += dirty_log_test
172174
TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
@@ -201,6 +203,7 @@ TEST_GEN_PROGS_s390x += kvm_binary_stats_test
201203
TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
202204
TEST_GEN_PROGS_riscv += riscv/ebreak_test
203205
TEST_GEN_PROGS_riscv += arch_timer
206+
TEST_GEN_PROGS_riscv += coalesced_io_test
204207
TEST_GEN_PROGS_riscv += demand_paging_test
205208
TEST_GEN_PROGS_riscv += dirty_log_test
206209
TEST_GEN_PROGS_riscv += get-reg-list
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
#include <signal.h>
3+
#include <stdio.h>
4+
#include <stdlib.h>
5+
#include <string.h>
6+
#include <sys/ioctl.h>
7+
8+
#include <linux/sizes.h>
9+
10+
#include <kvm_util.h>
11+
#include <processor.h>
12+
13+
#include "ucall_common.h"
14+
15+
struct kvm_coalesced_io {
16+
struct kvm_coalesced_mmio_ring *ring;
17+
uint32_t ring_size;
18+
uint64_t mmio_gpa;
19+
uint64_t *mmio;
20+
21+
/*
22+
* x86-only, but define pio_port for all architectures to minimize the
23+
* amount of #ifdeffery and complexity, without having to sacrifice
24+
* verbose error messages.
25+
*/
26+
uint8_t pio_port;
27+
};
28+
29+
static struct kvm_coalesced_io kvm_builtin_io_ring;
30+
31+
#ifdef __x86_64__
32+
static const int has_pio = 1;
33+
#else
34+
static const int has_pio = 0;
35+
#endif
36+
37+
static void guest_code(struct kvm_coalesced_io *io)
38+
{
39+
int i, j;
40+
41+
for (;;) {
42+
for (j = 0; j < 1 + has_pio; j++) {
43+
/*
44+
* KVM always leaves one free entry, i.e. exits to
45+
* userspace before the last entry is filled.
46+
*/
47+
for (i = 0; i < io->ring_size - 1; i++) {
48+
#ifdef __x86_64__
49+
if (i & 1)
50+
outl(io->pio_port, io->pio_port + i);
51+
else
52+
#endif
53+
WRITE_ONCE(*io->mmio, io->mmio_gpa + i);
54+
}
55+
#ifdef __x86_64__
56+
if (j & 1)
57+
outl(io->pio_port, io->pio_port + i);
58+
else
59+
#endif
60+
WRITE_ONCE(*io->mmio, io->mmio_gpa + i);
61+
}
62+
GUEST_SYNC(0);
63+
64+
WRITE_ONCE(*io->mmio, io->mmio_gpa + i);
65+
#ifdef __x86_64__
66+
outl(io->pio_port, io->pio_port + i);
67+
#endif
68+
}
69+
}
70+
71+
static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu,
72+
struct kvm_coalesced_io *io,
73+
uint32_t ring_start,
74+
uint32_t expected_exit)
75+
{
76+
const bool want_pio = expected_exit == KVM_EXIT_IO;
77+
struct kvm_coalesced_mmio_ring *ring = io->ring;
78+
struct kvm_run *run = vcpu->run;
79+
uint32_t pio_value;
80+
81+
WRITE_ONCE(ring->first, ring_start);
82+
WRITE_ONCE(ring->last, ring_start);
83+
84+
vcpu_run(vcpu);
85+
86+
/*
87+
* Annoyingly, reading PIO data is safe only for PIO exits, otherwise
88+
* data_offset is garbage, e.g. an MMIO gpa.
89+
*/
90+
if (run->exit_reason == KVM_EXIT_IO)
91+
pio_value = *(uint32_t *)((void *)run + run->io.data_offset);
92+
else
93+
pio_value = 0;
94+
95+
TEST_ASSERT((!want_pio && (run->exit_reason == KVM_EXIT_MMIO && run->mmio.is_write &&
96+
run->mmio.phys_addr == io->mmio_gpa && run->mmio.len == 8 &&
97+
*(uint64_t *)run->mmio.data == io->mmio_gpa + io->ring_size - 1)) ||
98+
(want_pio && (run->exit_reason == KVM_EXIT_IO && run->io.port == io->pio_port &&
99+
run->io.direction == KVM_EXIT_IO_OUT && run->io.count == 1 &&
100+
pio_value == io->pio_port + io->ring_size - 1)),
101+
"For start = %u, expected exit on %u-byte %s write 0x%llx = %lx, got exit_reason = %u (%s)\n "
102+
"(MMIO addr = 0x%llx, write = %u, len = %u, data = %lx)\n "
103+
"(PIO port = 0x%x, write = %u, len = %u, count = %u, data = %x",
104+
ring_start, want_pio ? 4 : 8, want_pio ? "PIO" : "MMIO",
105+
want_pio ? (unsigned long long)io->pio_port : io->mmio_gpa,
106+
(want_pio ? io->pio_port : io->mmio_gpa) + io->ring_size - 1, run->exit_reason,
107+
run->exit_reason == KVM_EXIT_MMIO ? "MMIO" : run->exit_reason == KVM_EXIT_IO ? "PIO" : "other",
108+
run->mmio.phys_addr, run->mmio.is_write, run->mmio.len, *(uint64_t *)run->mmio.data,
109+
run->io.port, run->io.direction, run->io.size, run->io.count, pio_value);
110+
}
111+
112+
static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu,
113+
struct kvm_coalesced_io *io,
114+
uint32_t ring_start,
115+
uint32_t expected_exit)
116+
{
117+
struct kvm_coalesced_mmio_ring *ring = io->ring;
118+
int i;
119+
120+
vcpu_run_and_verify_io_exit(vcpu, io, ring_start, expected_exit);
121+
122+
TEST_ASSERT((ring->last + 1) % io->ring_size == ring->first,
123+
"Expected ring to be full (minus 1), first = %u, last = %u, max = %u, start = %u",
124+
ring->first, ring->last, io->ring_size, ring_start);
125+
126+
for (i = 0; i < io->ring_size - 1; i++) {
127+
uint32_t idx = (ring->first + i) % io->ring_size;
128+
struct kvm_coalesced_mmio *entry = &ring->coalesced_mmio[idx];
129+
130+
#ifdef __x86_64__
131+
if (i & 1)
132+
TEST_ASSERT(entry->phys_addr == io->pio_port &&
133+
entry->len == 4 && entry->pio &&
134+
*(uint32_t *)entry->data == io->pio_port + i,
135+
"Wanted 4-byte port I/O 0x%x = 0x%x in entry %u, got %u-byte %s 0x%llx = 0x%x",
136+
io->pio_port, io->pio_port + i, i,
137+
entry->len, entry->pio ? "PIO" : "MMIO",
138+
entry->phys_addr, *(uint32_t *)entry->data);
139+
else
140+
#endif
141+
TEST_ASSERT(entry->phys_addr == io->mmio_gpa &&
142+
entry->len == 8 && !entry->pio,
143+
"Wanted 8-byte MMIO to 0x%lx = %lx in entry %u, got %u-byte %s 0x%llx = 0x%lx",
144+
io->mmio_gpa, io->mmio_gpa + i, i,
145+
entry->len, entry->pio ? "PIO" : "MMIO",
146+
entry->phys_addr, *(uint64_t *)entry->data);
147+
}
148+
}
149+
150+
static void test_coalesced_io(struct kvm_vcpu *vcpu,
151+
struct kvm_coalesced_io *io, uint32_t ring_start)
152+
{
153+
struct kvm_coalesced_mmio_ring *ring = io->ring;
154+
155+
kvm_vm_register_coalesced_io(vcpu->vm, io->mmio_gpa, 8, false /* pio */);
156+
#ifdef __x86_64__
157+
kvm_vm_register_coalesced_io(vcpu->vm, io->pio_port, 8, true /* pio */);
158+
#endif
159+
160+
vcpu_run_and_verify_coalesced_io(vcpu, io, ring_start, KVM_EXIT_MMIO);
161+
#ifdef __x86_64__
162+
vcpu_run_and_verify_coalesced_io(vcpu, io, ring_start, KVM_EXIT_IO);
163+
#endif
164+
165+
/*
166+
* Verify ucall, which may use non-coalesced MMIO or PIO, generates an
167+
* immediate exit.
168+
*/
169+
WRITE_ONCE(ring->first, ring_start);
170+
WRITE_ONCE(ring->last, ring_start);
171+
vcpu_run(vcpu);
172+
TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
173+
TEST_ASSERT_EQ(ring->first, ring_start);
174+
TEST_ASSERT_EQ(ring->last, ring_start);
175+
176+
/* Verify that non-coalesced MMIO/PIO generates an exit to userspace. */
177+
kvm_vm_unregister_coalesced_io(vcpu->vm, io->mmio_gpa, 8, false /* pio */);
178+
vcpu_run_and_verify_io_exit(vcpu, io, ring_start, KVM_EXIT_MMIO);
179+
180+
#ifdef __x86_64__
181+
kvm_vm_unregister_coalesced_io(vcpu->vm, io->pio_port, 8, true /* pio */);
182+
vcpu_run_and_verify_io_exit(vcpu, io, ring_start, KVM_EXIT_IO);
183+
#endif
184+
}
185+
186+
int main(int argc, char *argv[])
187+
{
188+
struct kvm_vcpu *vcpu;
189+
struct kvm_vm *vm;
190+
int i;
191+
192+
TEST_REQUIRE(kvm_has_cap(KVM_CAP_COALESCED_MMIO));
193+
194+
#ifdef __x86_64__
195+
TEST_REQUIRE(kvm_has_cap(KVM_CAP_COALESCED_PIO));
196+
#endif
197+
198+
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
199+
200+
kvm_builtin_io_ring = (struct kvm_coalesced_io) {
201+
/*
202+
* The I/O ring is a kernel-allocated page whose address is
203+
* relative to each vCPU's run page, with the page offset
204+
* provided by KVM in the return of KVM_CAP_COALESCED_MMIO.
205+
*/
206+
.ring = (void *)vcpu->run +
207+
(kvm_check_cap(KVM_CAP_COALESCED_MMIO) * getpagesize()),
208+
209+
/*
210+
* The size of the I/O ring is fixed, but KVM defines the sized
211+
* based on the kernel's PAGE_SIZE. Thus, userspace must query
212+
* the host's page size at runtime to compute the ring size.
213+
*/
214+
.ring_size = (getpagesize() - sizeof(struct kvm_coalesced_mmio_ring)) /
215+
sizeof(struct kvm_coalesced_mmio),
216+
217+
/*
218+
* Arbitrary address+port (MMIO mustn't overlap memslots), with
219+
* the MMIO GPA identity mapped in the guest.
220+
*/
221+
.mmio_gpa = 4ull * SZ_1G,
222+
.mmio = (uint64_t *)(4ull * SZ_1G),
223+
.pio_port = 0x80,
224+
};
225+
226+
virt_map(vm, (uint64_t)kvm_builtin_io_ring.mmio, kvm_builtin_io_ring.mmio_gpa, 1);
227+
228+
sync_global_to_guest(vm, kvm_builtin_io_ring);
229+
vcpu_args_set(vcpu, 1, &kvm_builtin_io_ring);
230+
231+
for (i = 0; i < kvm_builtin_io_ring.ring_size; i++)
232+
test_coalesced_io(vcpu, &kvm_builtin_io_ring, i);
233+
234+
kvm_vm_free(vm);
235+
return 0;
236+
}

tools/testing/selftests/kvm/include/kvm_util.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,32 @@ static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
460460
return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL);
461461
}
462462

463+
static inline void kvm_vm_register_coalesced_io(struct kvm_vm *vm,
464+
uint64_t address,
465+
uint64_t size, bool pio)
466+
{
467+
struct kvm_coalesced_mmio_zone zone = {
468+
.addr = address,
469+
.size = size,
470+
.pio = pio,
471+
};
472+
473+
vm_ioctl(vm, KVM_REGISTER_COALESCED_MMIO, &zone);
474+
}
475+
476+
static inline void kvm_vm_unregister_coalesced_io(struct kvm_vm *vm,
477+
uint64_t address,
478+
uint64_t size, bool pio)
479+
{
480+
struct kvm_coalesced_mmio_zone zone = {
481+
.addr = address,
482+
.size = size,
483+
.pio = pio,
484+
};
485+
486+
vm_ioctl(vm, KVM_UNREGISTER_COALESCED_MMIO, &zone);
487+
}
488+
463489
static inline int vm_get_stats_fd(struct kvm_vm *vm)
464490
{
465491
int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL);

virt/kvm/coalesced_mmio.c

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -40,27 +40,6 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
4040
return 1;
4141
}
4242

43-
static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev, u32 last)
44-
{
45-
struct kvm_coalesced_mmio_ring *ring;
46-
unsigned avail;
47-
48-
/* Are we able to batch it ? */
49-
50-
/* last is the first free entry
51-
* check if we don't meet the first used entry
52-
* there is always one unused entry in the buffer
53-
*/
54-
ring = dev->kvm->coalesced_mmio_ring;
55-
avail = (ring->first - last - 1) % KVM_COALESCED_MMIO_MAX;
56-
if (avail == 0) {
57-
/* full */
58-
return 0;
59-
}
60-
61-
return 1;
62-
}
63-
6443
static int coalesced_mmio_write(struct kvm_vcpu *vcpu,
6544
struct kvm_io_device *this, gpa_t addr,
6645
int len, const void *val)
@@ -74,9 +53,15 @@ static int coalesced_mmio_write(struct kvm_vcpu *vcpu,
7453

7554
spin_lock(&dev->kvm->ring_lock);
7655

56+
/*
57+
* last is the index of the entry to fill. Verify userspace hasn't
58+
* set last to be out of range, and that there is room in the ring.
59+
* Leave one entry free in the ring so that userspace can differentiate
60+
* between an empty ring and a full ring.
61+
*/
7762
insert = READ_ONCE(ring->last);
78-
if (!coalesced_mmio_has_room(dev, insert) ||
79-
insert >= KVM_COALESCED_MMIO_MAX) {
63+
if (insert >= KVM_COALESCED_MMIO_MAX ||
64+
(insert + 1) % KVM_COALESCED_MMIO_MAX == READ_ONCE(ring->first)) {
8065
spin_unlock(&dev->kvm->ring_lock);
8166
return -EOPNOTSUPP;
8267
}

virt/kvm/kvm_main.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3275,6 +3275,9 @@ static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
32753275
int r;
32763276
unsigned long addr;
32773277

3278+
if (WARN_ON_ONCE(offset + len > PAGE_SIZE))
3279+
return -EFAULT;
3280+
32783281
addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
32793282
if (kvm_is_error_hva(addr))
32803283
return -EFAULT;
@@ -3348,6 +3351,9 @@ static int __kvm_read_guest_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
33483351
int r;
33493352
unsigned long addr;
33503353

3354+
if (WARN_ON_ONCE(offset + len > PAGE_SIZE))
3355+
return -EFAULT;
3356+
33513357
addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
33523358
if (kvm_is_error_hva(addr))
33533359
return -EFAULT;
@@ -3378,6 +3384,9 @@ static int __kvm_write_guest_page(struct kvm *kvm,
33783384
int r;
33793385
unsigned long addr;
33803386

3387+
if (WARN_ON_ONCE(offset + len > PAGE_SIZE))
3388+
return -EFAULT;
3389+
33813390
addr = gfn_to_hva_memslot(memslot, gfn);
33823391
if (kvm_is_error_hva(addr))
33833392
return -EFAULT;
@@ -3581,7 +3590,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
35813590
int ret;
35823591

35833592
while ((seg = next_segment(len, offset)) != 0) {
3584-
ret = kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
3593+
ret = kvm_write_guest_page(kvm, gfn, zero_page, offset, seg);
35853594
if (ret < 0)
35863595
return ret;
35873596
offset = 0;

0 commit comments

Comments
 (0)