Skip to content

Commit 5fdb262

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "ARM: - A couple of fixes when handling an exception while a SError has been delivered - Workaround for Cortex-A510's single-step erratum RISC-V: - Make CY, TM, and IR counters accessible in VU mode - Fix SBI implementation version x86: - Report deprecation of x87 features in supported CPUID - Preparation for fixing an interrupt delivery race on AMD hardware - Sparse fix All except POWER and s390: - Rework guest entry code to correctly mark noinstr areas and fix vtime' accounting (for x86, this was already mostly correct but not entirely; for ARM, MIPS and RISC-V it wasn't)" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: Use ERR_PTR_USR() to return -EFAULT as a __user pointer KVM: x86: Report deprecated x87 features in supported CPUID KVM: arm64: Workaround Cortex-A510's single-step and PAC trap errata KVM: arm64: Stop handle_exit() from handling HVC twice when an SError occurs KVM: arm64: Avoid consuming a stale esr value when SError occur RISC-V: KVM: Fix SBI implementation version RISC-V: KVM: make CY, TM, and IR counters accessible in VU mode kvm/riscv: rework guest entry logic kvm/arm64: rework guest entry logic kvm/x86: rework guest entry logic kvm/mips: rework guest entry logic kvm: add guest_state_{enter,exit}_irqoff() KVM: x86: Move delivery of non-APICv interrupt into vendor code kvm: Move KVM_GET_XSAVE2 IOCTL definition at the end of kvm.h
2 parents fbc04bf + 7e6a6b4 commit 5fdb262

File tree

20 files changed

+336
-121
lines changed

20 files changed

+336
-121
lines changed

Documentation/arm64/silicon-errata.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ stable kernels.
100100
+----------------+-----------------+-----------------+-----------------------------+
101101
| ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 |
102102
+----------------+-----------------+-----------------+-----------------------------+
103+
| ARM | Cortex-A510 | #2077057 | ARM64_ERRATUM_2077057 |
104+
+----------------+-----------------+-----------------+-----------------------------+
103105
| ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 |
104106
+----------------+-----------------+-----------------+-----------------------------+
105107
| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 |

arch/arm64/Kconfig

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,22 @@ config ARM64_ERRATUM_2051678
680680

681681
If unsure, say Y.
682682

683+
config ARM64_ERRATUM_2077057
684+
bool "Cortex-A510: 2077057: workaround software-step corrupting SPSR_EL2"
685+
help
686+
This option adds the workaround for ARM Cortex-A510 erratum 2077057.
687+
Affected Cortex-A510 may corrupt SPSR_EL2 when the a step exception is
688+
expected, but a Pointer Authentication trap is taken instead. The
689+
erratum causes SPSR_EL1 to be copied to SPSR_EL2, which could allow
690+
EL1 to cause a return to EL2 with a guest controlled ELR_EL2.
691+
692+
This can only happen when EL2 is stepping EL1.
693+
694+
When these conditions occur, the SPSR_EL2 value is unchanged from the
695+
previous guest entry, and can be restored from the in-memory copy.
696+
697+
If unsure, say Y.
698+
683699
config ARM64_ERRATUM_2119858
684700
bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode"
685701
default y

arch/arm64/kernel/cpu_errata.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
600600
CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus),
601601
},
602602
#endif
603+
#ifdef CONFIG_ARM64_ERRATUM_2077057
604+
{
605+
.desc = "ARM erratum 2077057",
606+
.capability = ARM64_WORKAROUND_2077057,
607+
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
608+
ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2),
609+
},
610+
#endif
603611
#ifdef CONFIG_ARM64_ERRATUM_2064142
604612
{
605613
.desc = "ARM erratum 2064142",

arch/arm64/kvm/arm.c

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,24 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
797797
xfer_to_guest_mode_work_pending();
798798
}
799799

800+
/*
801+
* Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
802+
* the vCPU is running.
803+
*
804+
* This must be noinstr as instrumentation may make use of RCU, and this is not
805+
* safe during the EQS.
806+
*/
807+
static int noinstr kvm_arm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
808+
{
809+
int ret;
810+
811+
guest_state_enter_irqoff();
812+
ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
813+
guest_state_exit_irqoff();
814+
815+
return ret;
816+
}
817+
800818
/**
801819
* kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
802820
* @vcpu: The VCPU pointer
@@ -881,9 +899,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
881899
* Enter the guest
882900
*/
883901
trace_kvm_entry(*vcpu_pc(vcpu));
884-
guest_enter_irqoff();
902+
guest_timing_enter_irqoff();
885903

886-
ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
904+
ret = kvm_arm_vcpu_enter_exit(vcpu);
887905

888906
vcpu->mode = OUTSIDE_GUEST_MODE;
889907
vcpu->stat.exits++;
@@ -918,26 +936,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
918936
kvm_arch_vcpu_ctxsync_fp(vcpu);
919937

920938
/*
921-
* We may have taken a host interrupt in HYP mode (ie
922-
* while executing the guest). This interrupt is still
923-
* pending, as we haven't serviced it yet!
939+
* We must ensure that any pending interrupts are taken before
940+
* we exit guest timing so that timer ticks are accounted as
941+
* guest time. Transiently unmask interrupts so that any
942+
* pending interrupts are taken.
924943
*
925-
* We're now back in SVC mode, with interrupts
926-
* disabled. Enabling the interrupts now will have
927-
* the effect of taking the interrupt again, in SVC
928-
* mode this time.
944+
* Per ARM DDI 0487G.b section D1.13.4, an ISB (or other
945+
* context synchronization event) is necessary to ensure that
946+
* pending interrupts are taken.
929947
*/
930948
local_irq_enable();
949+
isb();
950+
local_irq_disable();
951+
952+
guest_timing_exit_irqoff();
953+
954+
local_irq_enable();
931955

932-
/*
933-
* We do local_irq_enable() before calling guest_exit() so
934-
* that if a timer interrupt hits while running the guest we
935-
* account that tick as being spent in the guest. We enable
936-
* preemption after calling guest_exit() so that if we get
937-
* preempted we make sure ticks after that is not counted as
938-
* guest time.
939-
*/
940-
guest_exit();
941956
trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
942957

943958
/* Exit types that need handling before we can be preempted */

arch/arm64/kvm/handle_exit.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,14 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
228228
{
229229
struct kvm_run *run = vcpu->run;
230230

231+
if (ARM_SERROR_PENDING(exception_index)) {
232+
/*
233+
* The SError is handled by handle_exit_early(). If the guest
234+
* survives it will re-execute the original instruction.
235+
*/
236+
return 1;
237+
}
238+
231239
exception_index = ARM_EXCEPTION_CODE(exception_index);
232240

233241
switch (exception_index) {

arch/arm64/kvm/hyp/include/hyp/switch.h

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,24 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
402402
return false;
403403
}
404404

405+
static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code)
406+
{
407+
/*
408+
* Check for the conditions of Cortex-A510's #2077057. When these occur
409+
* SPSR_EL2 can't be trusted, but isn't needed either as it is
410+
* unchanged from the value in vcpu_gp_regs(vcpu)->pstate.
411+
* Are we single-stepping the guest, and took a PAC exception from the
412+
* active-not-pending state?
413+
*/
414+
if (cpus_have_final_cap(ARM64_WORKAROUND_2077057) &&
415+
vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
416+
*vcpu_cpsr(vcpu) & DBG_SPSR_SS &&
417+
ESR_ELx_EC(read_sysreg_el2(SYS_ESR)) == ESR_ELx_EC_PAC)
418+
write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
419+
420+
vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
421+
}
422+
405423
/*
406424
* Return true when we were able to fixup the guest exit and should return to
407425
* the guest, false when we should restore the host state and return to the
@@ -413,7 +431,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
413431
* Save PSTATE early so that we can evaluate the vcpu mode
414432
* early on.
415433
*/
416-
vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
434+
synchronize_vcpu_pstate(vcpu, exit_code);
417435

418436
/*
419437
* Check whether we want to repaint the state one way or
@@ -424,7 +442,8 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
424442
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
425443
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
426444

427-
if (ARM_SERROR_PENDING(*exit_code)) {
445+
if (ARM_SERROR_PENDING(*exit_code) &&
446+
ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) {
428447
u8 esr_ec = kvm_vcpu_trap_get_class(vcpu);
429448

430449
/*

arch/arm64/tools/cpucaps

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,10 @@ WORKAROUND_1418040
5555
WORKAROUND_1463225
5656
WORKAROUND_1508412
5757
WORKAROUND_1542419
58-
WORKAROUND_2064142
59-
WORKAROUND_2038923
6058
WORKAROUND_1902691
59+
WORKAROUND_2038923
60+
WORKAROUND_2064142
61+
WORKAROUND_2077057
6162
WORKAROUND_TRBE_OVERWRITE_FILL_MODE
6263
WORKAROUND_TSB_FLUSH_FAILURE
6364
WORKAROUND_TRBE_WRITE_OUT_OF_RANGE

arch/mips/kvm/mips.c

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,24 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
414414
return -ENOIOCTLCMD;
415415
}
416416

417+
/*
418+
* Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
419+
* the vCPU is running.
420+
*
421+
* This must be noinstr as instrumentation may make use of RCU, and this is not
422+
* safe during the EQS.
423+
*/
424+
static int noinstr kvm_mips_vcpu_enter_exit(struct kvm_vcpu *vcpu)
425+
{
426+
int ret;
427+
428+
guest_state_enter_irqoff();
429+
ret = kvm_mips_callbacks->vcpu_run(vcpu);
430+
guest_state_exit_irqoff();
431+
432+
return ret;
433+
}
434+
417435
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
418436
{
419437
int r = -EINTR;
@@ -434,7 +452,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
434452
lose_fpu(1);
435453

436454
local_irq_disable();
437-
guest_enter_irqoff();
455+
guest_timing_enter_irqoff();
438456
trace_kvm_enter(vcpu);
439457

440458
/*
@@ -445,10 +463,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
445463
*/
446464
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
447465

448-
r = kvm_mips_callbacks->vcpu_run(vcpu);
466+
r = kvm_mips_vcpu_enter_exit(vcpu);
467+
468+
/*
469+
* We must ensure that any pending interrupts are taken before
470+
* we exit guest timing so that timer ticks are accounted as
471+
* guest time. Transiently unmask interrupts so that any
472+
* pending interrupts are taken.
473+
*
474+
* TODO: is there a barrier which ensures that pending interrupts are
475+
* recognised? Currently this just hopes that the CPU takes any pending
476+
* interrupts between the enable and disable.
477+
*/
478+
local_irq_enable();
479+
local_irq_disable();
449480

450481
trace_kvm_out(vcpu);
451-
guest_exit_irqoff();
482+
guest_timing_exit_irqoff();
452483
local_irq_enable();
453484

454485
out:
@@ -1168,7 +1199,7 @@ static void kvm_mips_set_c0_status(void)
11681199
/*
11691200
* Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
11701201
*/
1171-
int kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
1202+
static int __kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
11721203
{
11731204
struct kvm_run *run = vcpu->run;
11741205
u32 cause = vcpu->arch.host_cp0_cause;
@@ -1357,6 +1388,17 @@ int kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
13571388
return ret;
13581389
}
13591390

1391+
int noinstr kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
1392+
{
1393+
int ret;
1394+
1395+
guest_state_exit_irqoff();
1396+
ret = __kvm_mips_handle_exit(vcpu);
1397+
guest_state_enter_irqoff();
1398+
1399+
return ret;
1400+
}
1401+
13601402
/* Enable FPU for guest and restore context */
13611403
void kvm_own_fpu(struct kvm_vcpu *vcpu)
13621404
{

arch/riscv/kvm/vcpu.c

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
9090
int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
9191
{
9292
struct kvm_cpu_context *cntx;
93+
struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
9394

9495
/* Mark this VCPU never ran */
9596
vcpu->arch.ran_atleast_once = false;
@@ -106,6 +107,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
106107
cntx->hstatus |= HSTATUS_SPVP;
107108
cntx->hstatus |= HSTATUS_SPV;
108109

110+
/* By default, make CY, TM, and IR counters accessible in VU mode */
111+
reset_csr->scounteren = 0x7;
112+
109113
/* Setup VCPU timer */
110114
kvm_riscv_vcpu_timer_init(vcpu);
111115

@@ -699,6 +703,20 @@ static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
699703
csr_write(CSR_HVIP, csr->hvip);
700704
}
701705

706+
/*
707+
* Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
708+
* the vCPU is running.
709+
*
710+
* This must be noinstr as instrumentation may make use of RCU, and this is not
711+
* safe during the EQS.
712+
*/
713+
static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu)
714+
{
715+
guest_state_enter_irqoff();
716+
__kvm_riscv_switch_to(&vcpu->arch);
717+
guest_state_exit_irqoff();
718+
}
719+
702720
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
703721
{
704722
int ret;
@@ -790,9 +808,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
790808
continue;
791809
}
792810

793-
guest_enter_irqoff();
811+
guest_timing_enter_irqoff();
794812

795-
__kvm_riscv_switch_to(&vcpu->arch);
813+
kvm_riscv_vcpu_enter_exit(vcpu);
796814

797815
vcpu->mode = OUTSIDE_GUEST_MODE;
798816
vcpu->stat.exits++;
@@ -812,25 +830,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
812830
kvm_riscv_vcpu_sync_interrupts(vcpu);
813831

814832
/*
815-
* We may have taken a host interrupt in VS/VU-mode (i.e.
816-
* while executing the guest). This interrupt is still
817-
* pending, as we haven't serviced it yet!
833+
* We must ensure that any pending interrupts are taken before
834+
* we exit guest timing so that timer ticks are accounted as
835+
* guest time. Transiently unmask interrupts so that any
836+
* pending interrupts are taken.
818837
*
819-
* We're now back in HS-mode with interrupts disabled
820-
* so enabling the interrupts now will have the effect
821-
* of taking the interrupt again, in HS-mode this time.
838+
* There's no barrier which ensures that pending interrupts are
839+
* recognised, so we just hope that the CPU takes any pending
840+
* interrupts between the enable and disable.
822841
*/
823842
local_irq_enable();
843+
local_irq_disable();
824844

825-
/*
826-
* We do local_irq_enable() before calling guest_exit() so
827-
* that if a timer interrupt hits while running the guest
828-
* we account that tick as being spent in the guest. We
829-
* enable preemption after calling guest_exit() so that if
830-
* we get preempted we make sure ticks after that is not
831-
* counted as guest time.
832-
*/
833-
guest_exit();
845+
guest_timing_exit_irqoff();
846+
847+
local_irq_enable();
834848

835849
preempt_enable();
836850

arch/riscv/kvm/vcpu_sbi_base.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <linux/errno.h>
1010
#include <linux/err.h>
1111
#include <linux/kvm_host.h>
12+
#include <linux/version.h>
1213
#include <asm/csr.h>
1314
#include <asm/sbi.h>
1415
#include <asm/kvm_vcpu_timer.h>
@@ -32,7 +33,7 @@ static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
3233
*out_val = KVM_SBI_IMPID;
3334
break;
3435
case SBI_EXT_BASE_GET_IMP_VERSION:
35-
*out_val = 0;
36+
*out_val = LINUX_VERSION_CODE;
3637
break;
3738
case SBI_EXT_BASE_PROBE_EXT:
3839
if ((cp->a0 >= SBI_EXT_EXPERIMENTAL_START &&

0 commit comments

Comments
 (0)