Skip to content

Commit a24dbf9

Browse files
committed
Merge tag 'kvm-x86-vmx-6.15' of https://github.com/kvm-x86/linux into HEAD
KVM VMX changes for 6.15 - Fix a bug where KVM unnecessarily reads XFD_ERR from hardware and thus modifies the vCPU's XFD_ERR on a #NM due to CR0.TS=1. - Pass XFD_ERR as a psueo-payload when injecting #NM as a preparatory step for upcoming FRED virtualization support. - Decouple the EPT entry RWX protection bit macros from the EPT Violation bits as a general cleanup, and in anticipation of adding support for emulating Mode-Based Execution (MBEC). - Reject KVM_RUN if userspace manages to gain control and stuff invalid guest state while KVM is in the middle of emulating nested VM-Enter. - Add a macro to handle KVM's sanity checks on entry/exit VMCS control pairs in anticipation of adding sanity checks for secondary exit controls (the primary field is out of bits).
2 parents 783e9cd + 0c3566b commit a24dbf9

File tree

3 files changed

+92
-45
lines changed

3 files changed

+92
-45
lines changed

arch/x86/include/asm/vmx.h

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -580,18 +580,22 @@ enum vm_entry_failure_code {
580580
/*
581581
* Exit Qualifications for EPT Violations
582582
*/
583-
#define EPT_VIOLATION_ACC_READ_BIT 0
584-
#define EPT_VIOLATION_ACC_WRITE_BIT 1
585-
#define EPT_VIOLATION_ACC_INSTR_BIT 2
586-
#define EPT_VIOLATION_RWX_SHIFT 3
587-
#define EPT_VIOLATION_GVA_IS_VALID_BIT 7
588-
#define EPT_VIOLATION_GVA_TRANSLATED_BIT 8
589-
#define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT)
590-
#define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT)
591-
#define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT)
592-
#define EPT_VIOLATION_RWX_MASK (VMX_EPT_RWX_MASK << EPT_VIOLATION_RWX_SHIFT)
593-
#define EPT_VIOLATION_GVA_IS_VALID (1 << EPT_VIOLATION_GVA_IS_VALID_BIT)
594-
#define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT)
583+
#define EPT_VIOLATION_ACC_READ BIT(0)
584+
#define EPT_VIOLATION_ACC_WRITE BIT(1)
585+
#define EPT_VIOLATION_ACC_INSTR BIT(2)
586+
#define EPT_VIOLATION_PROT_READ BIT(3)
587+
#define EPT_VIOLATION_PROT_WRITE BIT(4)
588+
#define EPT_VIOLATION_PROT_EXEC BIT(5)
589+
#define EPT_VIOLATION_PROT_MASK (EPT_VIOLATION_PROT_READ | \
590+
EPT_VIOLATION_PROT_WRITE | \
591+
EPT_VIOLATION_PROT_EXEC)
592+
#define EPT_VIOLATION_GVA_IS_VALID BIT(7)
593+
#define EPT_VIOLATION_GVA_TRANSLATED BIT(8)
594+
595+
#define EPT_VIOLATION_RWX_TO_PROT(__epte) (((__epte) & VMX_EPT_RWX_MASK) << 3)
596+
597+
static_assert(EPT_VIOLATION_RWX_TO_PROT(VMX_EPT_RWX_MASK) ==
598+
(EPT_VIOLATION_PROT_READ | EPT_VIOLATION_PROT_WRITE | EPT_VIOLATION_PROT_EXEC));
595599

596600
/*
597601
* Exit Qualifications for NOTIFY VM EXIT

arch/x86/kvm/mmu/paging_tmpl.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -510,8 +510,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
510510
* Note, pte_access holds the raw RWX bits from the EPTE, not
511511
* ACC_*_MASK flags!
512512
*/
513-
walker->fault.exit_qualification |= (pte_access & VMX_EPT_RWX_MASK) <<
514-
EPT_VIOLATION_RWX_SHIFT;
513+
walker->fault.exit_qualification |= EPT_VIOLATION_RWX_TO_PROT(pte_access);
515514
}
516515
#endif
517516
walker->fault.address = addr;

arch/x86/kvm/vmx/vmx.c

Lines changed: 75 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2578,6 +2578,34 @@ static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
25782578
return ctl_opt & allowed;
25792579
}
25802580

2581+
#define vmx_check_entry_exit_pairs(pairs, entry_controls, exit_controls) \
2582+
({ \
2583+
int i, r = 0; \
2584+
\
2585+
BUILD_BUG_ON(sizeof(pairs[0].entry_control) != sizeof(entry_controls)); \
2586+
BUILD_BUG_ON(sizeof(pairs[0].exit_control) != sizeof(exit_controls)); \
2587+
\
2588+
for (i = 0; i < ARRAY_SIZE(pairs); i++) { \
2589+
typeof(entry_controls) n_ctrl = pairs[i].entry_control; \
2590+
typeof(exit_controls) x_ctrl = pairs[i].exit_control; \
2591+
\
2592+
if (!(entry_controls & n_ctrl) == !(exit_controls & x_ctrl)) \
2593+
continue; \
2594+
\
2595+
pr_warn_once("Inconsistent VM-Entry/VM-Exit pair, " \
2596+
"entry = %llx (%llx), exit = %llx (%llx)\n", \
2597+
(u64)(entry_controls & n_ctrl), (u64)n_ctrl, \
2598+
(u64)(exit_controls & x_ctrl), (u64)x_ctrl); \
2599+
\
2600+
if (error_on_inconsistent_vmcs_config) \
2601+
r = -EIO; \
2602+
\
2603+
entry_controls &= ~n_ctrl; \
2604+
exit_controls &= ~x_ctrl; \
2605+
} \
2606+
r; \
2607+
})
2608+
25812609
static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
25822610
struct vmx_capability *vmx_cap)
25832611
{
@@ -2589,7 +2617,6 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
25892617
u32 _vmentry_control = 0;
25902618
u64 basic_msr;
25912619
u64 misc_msr;
2592-
int i;
25932620

25942621
/*
25952622
* LOAD/SAVE_DEBUG_CONTROLS are absent because both are mandatory.
@@ -2693,22 +2720,9 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
26932720
&_vmentry_control))
26942721
return -EIO;
26952722

2696-
for (i = 0; i < ARRAY_SIZE(vmcs_entry_exit_pairs); i++) {
2697-
u32 n_ctrl = vmcs_entry_exit_pairs[i].entry_control;
2698-
u32 x_ctrl = vmcs_entry_exit_pairs[i].exit_control;
2699-
2700-
if (!(_vmentry_control & n_ctrl) == !(_vmexit_control & x_ctrl))
2701-
continue;
2702-
2703-
pr_warn_once("Inconsistent VM-Entry/VM-Exit pair, entry = %x, exit = %x\n",
2704-
_vmentry_control & n_ctrl, _vmexit_control & x_ctrl);
2705-
2706-
if (error_on_inconsistent_vmcs_config)
2707-
return -EIO;
2708-
2709-
_vmentry_control &= ~n_ctrl;
2710-
_vmexit_control &= ~x_ctrl;
2711-
}
2723+
if (vmx_check_entry_exit_pairs(vmcs_entry_exit_pairs,
2724+
_vmentry_control, _vmexit_control))
2725+
return -EIO;
27122726

27132727
/*
27142728
* Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they
@@ -5211,6 +5225,12 @@ bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu)
52115225
(kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
52125226
}
52135227

5228+
static bool is_xfd_nm_fault(struct kvm_vcpu *vcpu)
5229+
{
5230+
return vcpu->arch.guest_fpu.fpstate->xfd &&
5231+
!kvm_is_cr0_bit_set(vcpu, X86_CR0_TS);
5232+
}
5233+
52145234
static int handle_exception_nmi(struct kvm_vcpu *vcpu)
52155235
{
52165236
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5237,7 +5257,8 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
52375257
* point.
52385258
*/
52395259
if (is_nm_fault(intr_info)) {
5240-
kvm_queue_exception(vcpu, NM_VECTOR);
5260+
kvm_queue_exception_p(vcpu, NM_VECTOR,
5261+
is_xfd_nm_fault(vcpu) ? vcpu->arch.guest_fpu.xfd_err : 0);
52415262
return 1;
52425263
}
52435264

@@ -5817,7 +5838,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
58175838
error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
58185839
? PFERR_FETCH_MASK : 0;
58195840
/* ept page table entry is present? */
5820-
error_code |= (exit_qualification & EPT_VIOLATION_RWX_MASK)
5841+
error_code |= (exit_qualification & EPT_VIOLATION_PROT_MASK)
58215842
? PFERR_PRESENT_MASK : 0;
58225843

58235844
if (error_code & EPT_VIOLATION_GVA_IS_VALID)
@@ -5871,11 +5892,35 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
58715892
return 1;
58725893
}
58735894

5874-
static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu)
5895+
/*
5896+
* Returns true if emulation is required (due to the vCPU having invalid state
5897+
* with unsrestricted guest mode disabled) and KVM can't faithfully emulate the
5898+
* current vCPU state.
5899+
*/
5900+
static bool vmx_unhandleable_emulation_required(struct kvm_vcpu *vcpu)
58755901
{
58765902
struct vcpu_vmx *vmx = to_vmx(vcpu);
58775903

5878-
return vmx->emulation_required && !vmx->rmode.vm86_active &&
5904+
if (!vmx->emulation_required)
5905+
return false;
5906+
5907+
/*
5908+
* It is architecturally impossible for emulation to be required when a
5909+
* nested VM-Enter is pending completion, as VM-Enter will VM-Fail if
5910+
* guest state is invalid and unrestricted guest is disabled, i.e. KVM
5911+
* should synthesize VM-Fail instead emulation L2 code. This path is
5912+
* only reachable if userspace modifies L2 guest state after KVM has
5913+
* performed the nested VM-Enter consistency checks.
5914+
*/
5915+
if (vmx->nested.nested_run_pending)
5916+
return true;
5917+
5918+
/*
5919+
* KVM only supports emulating exceptions if the vCPU is in Real Mode.
5920+
* If emulation is required, KVM can't perform a successful VM-Enter to
5921+
* inject the exception.
5922+
*/
5923+
return !vmx->rmode.vm86_active &&
58795924
(kvm_is_exception_pending(vcpu) || vcpu->arch.exception.injected);
58805925
}
58815926

@@ -5898,7 +5943,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
58985943
if (!kvm_emulate_instruction(vcpu, 0))
58995944
return 0;
59005945

5901-
if (vmx_emulation_required_with_pending_exception(vcpu)) {
5946+
if (vmx_unhandleable_emulation_required(vcpu)) {
59025947
kvm_prepare_emulation_failure_exit(vcpu);
59035948
return 0;
59045949
}
@@ -5922,7 +5967,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
59225967

59235968
int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu)
59245969
{
5925-
if (vmx_emulation_required_with_pending_exception(vcpu)) {
5970+
if (vmx_unhandleable_emulation_required(vcpu)) {
59265971
kvm_prepare_emulation_failure_exit(vcpu);
59275972
return 0;
59285973
}
@@ -6997,16 +7042,15 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
69977042
* MSR value is not clobbered by the host activity before the guest
69987043
* has chance to consume it.
69997044
*
7000-
* Do not blindly read xfd_err here, since this exception might
7001-
* be caused by L1 interception on a platform which doesn't
7002-
* support xfd at all.
7003-
*
7004-
* Do it conditionally upon guest_fpu::xfd. xfd_err matters
7005-
* only when xfd contains a non-zero value.
7045+
* Update the guest's XFD_ERR if and only if XFD is enabled, as the #NM
7046+
* interception may have been caused by L1 interception. Per the SDM,
7047+
* XFD_ERR is not modified for non-XFD #NM, i.e. if CR0.TS=1.
70067048
*
7007-
* Queuing exception is done in vmx_handle_exit. See comment there.
7049+
* Note, XFD_ERR is updated _before_ the #NM interception check, i.e.
7050+
* unlike CR2 and DR6, the value is not a payload that is attached to
7051+
* the #NM exception.
70087052
*/
7009-
if (vcpu->arch.guest_fpu.fpstate->xfd)
7053+
if (is_xfd_nm_fault(vcpu))
70107054
rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
70117055
}
70127056

0 commit comments

Comments
 (0)