Skip to content

Commit b6b2648

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "ARM: - Take care of faults occuring between the PARange and IPA range by injecting an exception - Fix S2 faults taken from a host EL0 in protected mode - Work around Oops caused by a PMU access from a 32bit guest when PMU has been created. This is a temporary bodge until we fix it for good. x86: - Fix potential races when walking host page table - Fix shadow page table leak when KVM runs nested - Work around bug in userspace when KVM synthesizes leaf 0x80000021 on older (pre-EPYC) or Intel processors Generic (but affects only RISC-V): - Fix bad user ABI for KVM_EXIT_SYSTEM_EVENT" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: work around QEMU issue with synthetic CPUID leaves Revert "x86/mm: Introduce lookup_address_in_mm()" KVM: x86/mmu: fix potential races when walking host page table KVM: fix bad user ABI for KVM_EXIT_SYSTEM_EVENT KVM: x86/mmu: Do not create SPTEs for GFNs that exceed host.MAXPHYADDR KVM: arm64: Inject exception on out-of-IPA-range translation fault KVM/arm64: Don't emulate a PMU for 32-bit guests if feature not set KVM: arm64: Handle host stage-2 faults from 32-bit EL0
2 parents b2da7df + f751d8e commit b6b2648

File tree

18 files changed

+214
-62
lines changed

18 files changed

+214
-62
lines changed

Documentation/virt/kvm/api.rst

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5986,16 +5986,16 @@ should put the acknowledged interrupt vector into the 'epr' field.
59865986
#define KVM_SYSTEM_EVENT_RESET 2
59875987
#define KVM_SYSTEM_EVENT_CRASH 3
59885988
__u32 type;
5989-
__u64 flags;
5989+
__u32 ndata;
5990+
__u64 data[16];
59905991
} system_event;
59915992

59925993
If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered
59935994
a system-level event using some architecture specific mechanism (hypercall
59945995
or some special instruction). In case of ARM64, this is triggered using
5995-
HVC instruction based PSCI call from the vcpu. The 'type' field describes
5996-
the system-level event type. The 'flags' field describes architecture
5997-
specific flags for the system-level event.
5996+
HVC instruction based PSCI call from the vcpu.
59985997

5998+
The 'type' field describes the system-level event type.
59995999
Valid values for 'type' are:
60006000

60016001
- KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the
@@ -6010,10 +6010,20 @@ Valid values for 'type' are:
60106010
to ignore the request, or to gather VM memory core dump and/or
60116011
reset/shutdown of the VM.
60126012

6013-
Valid flags are:
6013+
If KVM_CAP_SYSTEM_EVENT_DATA is present, the 'data' field can contain
6014+
architecture specific information for the system-level event. Only
6015+
the first `ndata` items (possibly zero) of the data array are valid.
60146016

6015-
- KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (arm64 only) -- the guest issued
6016-
a SYSTEM_RESET2 call according to v1.1 of the PSCI specification.
6017+
- for arm64, data[0] is set to KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 if
6018+
the guest issued a SYSTEM_RESET2 call according to v1.1 of the PSCI
6019+
specification.
6020+
6021+
- for RISC-V, data[0] is set to the value of the second argument of the
6022+
``sbi_system_reset`` call.
6023+
6024+
Previous versions of Linux defined a `flags` member in this struct. The
6025+
field is now aliased to `data[0]`. Userspace can assume that it is only
6026+
written if ndata is greater than 0.
60176027

60186028
::
60196029

arch/arm64/include/asm/kvm_emulate.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
4040
void kvm_inject_vabt(struct kvm_vcpu *vcpu);
4141
void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
4242
void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
43+
void kvm_inject_size_fault(struct kvm_vcpu *vcpu);
4344

4445
void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
4546

arch/arm64/kvm/hyp/nvhe/host.S

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -198,15 +198,15 @@ SYM_CODE_START(__kvm_hyp_host_vector)
198198
invalid_host_el2_vect // FIQ EL2h
199199
invalid_host_el2_vect // Error EL2h
200200

201-
host_el1_sync_vect // Synchronous 64-bit EL1
202-
invalid_host_el1_vect // IRQ 64-bit EL1
203-
invalid_host_el1_vect // FIQ 64-bit EL1
204-
invalid_host_el1_vect // Error 64-bit EL1
205-
206-
invalid_host_el1_vect // Synchronous 32-bit EL1
207-
invalid_host_el1_vect // IRQ 32-bit EL1
208-
invalid_host_el1_vect // FIQ 32-bit EL1
209-
invalid_host_el1_vect // Error 32-bit EL1
201+
host_el1_sync_vect // Synchronous 64-bit EL1/EL0
202+
invalid_host_el1_vect // IRQ 64-bit EL1/EL0
203+
invalid_host_el1_vect // FIQ 64-bit EL1/EL0
204+
invalid_host_el1_vect // Error 64-bit EL1/EL0
205+
206+
host_el1_sync_vect // Synchronous 32-bit EL1/EL0
207+
invalid_host_el1_vect // IRQ 32-bit EL1/EL0
208+
invalid_host_el1_vect // FIQ 32-bit EL1/EL0
209+
invalid_host_el1_vect // Error 32-bit EL1/EL0
210210
SYM_CODE_END(__kvm_hyp_host_vector)
211211

212212
/*

arch/arm64/kvm/inject_fault.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,34 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
145145
inject_abt64(vcpu, true, addr);
146146
}
147147

148+
void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
149+
{
150+
unsigned long addr, esr;
151+
152+
addr = kvm_vcpu_get_fault_ipa(vcpu);
153+
addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
154+
155+
if (kvm_vcpu_trap_is_iabt(vcpu))
156+
kvm_inject_pabt(vcpu, addr);
157+
else
158+
kvm_inject_dabt(vcpu, addr);
159+
160+
/*
161+
* If AArch64 or LPAE, set FSC to 0 to indicate an Address
162+
* Size Fault at level 0, as if exceeding PARange.
163+
*
164+
* Non-LPAE guests will only get the external abort, as there
165+
* is no way to to describe the ASF.
166+
*/
167+
if (vcpu_el1_is_32bit(vcpu) &&
168+
!(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE))
169+
return;
170+
171+
esr = vcpu_read_sys_reg(vcpu, ESR_EL1);
172+
esr &= ~GENMASK_ULL(5, 0);
173+
vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
174+
}
175+
148176
/**
149177
* kvm_inject_undefined - inject an undefined instruction into the guest
150178
* @vcpu: The vCPU in which to inject the exception

arch/arm64/kvm/mmu.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1337,6 +1337,25 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
13371337
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
13381338
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
13391339

1340+
if (fault_status == FSC_FAULT) {
1341+
/* Beyond sanitised PARange (which is the IPA limit) */
1342+
if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
1343+
kvm_inject_size_fault(vcpu);
1344+
return 1;
1345+
}
1346+
1347+
/* Falls between the IPA range and the PARange? */
1348+
if (fault_ipa >= BIT_ULL(vcpu->arch.hw_mmu->pgt->ia_bits)) {
1349+
fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
1350+
1351+
if (is_iabt)
1352+
kvm_inject_pabt(vcpu, fault_ipa);
1353+
else
1354+
kvm_inject_dabt(vcpu, fault_ipa);
1355+
return 1;
1356+
}
1357+
}
1358+
13401359
/* Synchronous External Abort? */
13411360
if (kvm_vcpu_abt_issea(vcpu)) {
13421361
/*

arch/arm64/kvm/pmu-emul.c

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,9 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
177177
struct kvm_pmu *pmu = &vcpu->arch.pmu;
178178
struct kvm_pmc *pmc = &pmu->pmc[select_idx];
179179

180+
if (!kvm_vcpu_has_pmu(vcpu))
181+
return 0;
182+
180183
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
181184

182185
if (kvm_pmu_pmc_is_chained(pmc) &&
@@ -198,6 +201,9 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
198201
{
199202
u64 reg;
200203

204+
if (!kvm_vcpu_has_pmu(vcpu))
205+
return;
206+
201207
reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
202208
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
203209
__vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
@@ -322,6 +328,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
322328
struct kvm_pmu *pmu = &vcpu->arch.pmu;
323329
struct kvm_pmc *pmc;
324330

331+
if (!kvm_vcpu_has_pmu(vcpu))
332+
return;
333+
325334
if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
326335
return;
327336

@@ -357,7 +366,7 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
357366
struct kvm_pmu *pmu = &vcpu->arch.pmu;
358367
struct kvm_pmc *pmc;
359368

360-
if (!val)
369+
if (!kvm_vcpu_has_pmu(vcpu) || !val)
361370
return;
362371

363372
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
@@ -527,6 +536,9 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
527536
struct kvm_pmu *pmu = &vcpu->arch.pmu;
528537
int i;
529538

539+
if (!kvm_vcpu_has_pmu(vcpu))
540+
return;
541+
530542
if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
531543
return;
532544

@@ -576,6 +588,9 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
576588
{
577589
int i;
578590

591+
if (!kvm_vcpu_has_pmu(vcpu))
592+
return;
593+
579594
if (val & ARMV8_PMU_PMCR_E) {
580595
kvm_pmu_enable_counter_mask(vcpu,
581596
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
@@ -739,6 +754,9 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
739754
{
740755
u64 reg, mask;
741756

757+
if (!kvm_vcpu_has_pmu(vcpu))
758+
return;
759+
742760
mask = ARMV8_PMU_EVTYPE_MASK;
743761
mask &= ~ARMV8_PMU_EVTYPE_EVENT;
744762
mask |= kvm_pmu_event_mask(vcpu->kvm);
@@ -827,6 +845,9 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
827845
u64 val, mask = 0;
828846
int base, i, nr_events;
829847

848+
if (!kvm_vcpu_has_pmu(vcpu))
849+
return 0;
850+
830851
if (!pmceid1) {
831852
val = read_sysreg(pmceid0_el0);
832853
base = 0;

arch/arm64/kvm/psci.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,8 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags)
181181

182182
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
183183
vcpu->run->system_event.type = type;
184-
vcpu->run->system_event.flags = flags;
184+
vcpu->run->system_event.ndata = 1;
185+
vcpu->run->system_event.data[0] = flags;
185186
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
186187
}
187188

arch/riscv/kvm/vcpu_sbi.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
8383

8484
void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
8585
struct kvm_run *run,
86-
u32 type, u64 flags)
86+
u32 type, u64 reason)
8787
{
8888
unsigned long i;
8989
struct kvm_vcpu *tmp;
@@ -94,7 +94,8 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
9494

9595
memset(&run->system_event, 0, sizeof(run->system_event));
9696
run->system_event.type = type;
97-
run->system_event.flags = flags;
97+
run->system_event.ndata = 1;
98+
run->system_event.data[0] = reason;
9899
run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
99100
}
100101

arch/x86/include/asm/pgtable_types.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -559,10 +559,6 @@ static inline void update_page_count(int level, unsigned long pages) { }
559559
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
560560
extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
561561
unsigned int *level);
562-
563-
struct mm_struct;
564-
extern pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address,
565-
unsigned int *level);
566562
extern pmd_t *lookup_pmd_address(unsigned long address);
567563
extern phys_addr_t slow_virt_to_phys(void *__address);
568564
extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,

arch/x86/kvm/cpuid.c

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1085,12 +1085,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
10851085
case 0x80000000:
10861086
entry->eax = min(entry->eax, 0x80000021);
10871087
/*
1088-
* Serializing LFENCE is reported in a multitude of ways,
1089-
* and NullSegClearsBase is not reported in CPUID on Zen2;
1090-
* help userspace by providing the CPUID leaf ourselves.
1088+
* Serializing LFENCE is reported in a multitude of ways, and
1089+
* NullSegClearsBase is not reported in CPUID on Zen2; help
1090+
* userspace by providing the CPUID leaf ourselves.
1091+
*
1092+
* However, only do it if the host has CPUID leaf 0x8000001d.
1093+
* QEMU thinks that it can query the host blindly for that
1094+
* CPUID leaf if KVM reports that it supports 0x8000001d or
1095+
* above. The processor merrily returns values from the
1096+
* highest Intel leaf which QEMU tries to use as the guest's
1097+
* 0x8000001d. Even worse, this can result in an infinite
1098+
* loop if said highest leaf has no subleaves indexed by ECX.
10911099
*/
1092-
if (static_cpu_has(X86_FEATURE_LFENCE_RDTSC)
1093-
|| !static_cpu_has_bug(X86_BUG_NULL_SEG))
1100+
if (entry->eax >= 0x8000001d &&
1101+
(static_cpu_has(X86_FEATURE_LFENCE_RDTSC)
1102+
|| !static_cpu_has_bug(X86_BUG_NULL_SEG)))
10941103
entry->eax = max(entry->eax, 0x80000021);
10951104
break;
10961105
case 0x80000001:

0 commit comments

Comments
 (0)