Skip to content

Commit 4d9a677

Browse files
committed
Merge tag 'kvm-x86-misc-6.15' of https://github.com/kvm-x86/linux into HEAD
KVM x86 misc changes for 6.15: - Fix a bug in PIC emulation that caused KVM to emit a spurious KVM_REQ_EVENT. - Add a helper to consolidate handling of mp_state transitions, and use it to clear pv_unhalted whenever a vCPU is made RUNNABLE. - Defer runtime CPUID updates until KVM emulates a CPUID instruction, to coalesce updates when multiple pieces of vCPU state are changing, e.g. as part of a nested transition. - Fix a variety of nested emulation bugs, and add VMX support for synthesizing nested VM-Exit on interception (instead of injecting #UD into L2). - Drop "support" for PV Async #PF with proctected guests without SEND_ALWAYS, as KVM can't get the current CPL. - Misc cleanups
2 parents 4286a3e + e6c8728 commit 4d9a677

File tree

20 files changed

+416
-181
lines changed

20 files changed

+416
-181
lines changed

arch/x86/include/asm/kvm_host.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -881,6 +881,7 @@ struct kvm_vcpu_arch {
881881

882882
int cpuid_nent;
883883
struct kvm_cpuid_entry2 *cpuid_entries;
884+
bool cpuid_dynamic_bits_dirty;
884885
bool is_amd_compatible;
885886

886887
/*
@@ -998,8 +999,8 @@ struct kvm_vcpu_arch {
998999
u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */
9991000
u16 vec;
10001001
u32 id;
1001-
bool send_user_only;
10021002
u32 host_apf_flags;
1003+
bool send_always;
10031004
bool delivery_as_pf_vmexit;
10041005
bool pageready_pending;
10051006
} apf;
@@ -1355,8 +1356,6 @@ struct kvm_arch {
13551356

13561357
u64 shadow_mmio_value;
13571358

1358-
struct iommu_domain *iommu_domain;
1359-
bool iommu_noncoherent;
13601359
#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
13611360
atomic_t noncoherent_dma_count;
13621361
#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
@@ -2166,8 +2165,8 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu);
21662165
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
21672166
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
21682167
void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload);
2169-
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
2170-
void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
2168+
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr,
2169+
bool has_error_code, u32 error_code);
21712170
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
21722171
void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
21732172
struct x86_exception *fault);

arch/x86/kvm/cpuid.c

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -58,25 +58,24 @@ void __init kvm_init_xstate_sizes(void)
5858

5959
u32 xstate_required_size(u64 xstate_bv, bool compacted)
6060
{
61-
int feature_bit = 0;
6261
u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
62+
int i;
6363

6464
xstate_bv &= XFEATURE_MASK_EXTEND;
65-
while (xstate_bv) {
66-
if (xstate_bv & 0x1) {
67-
struct cpuid_xstate_sizes *xs = &xstate_sizes[feature_bit];
68-
u32 offset;
69-
70-
/* ECX[1]: 64B alignment in compacted form */
71-
if (compacted)
72-
offset = (xs->ecx & 0x2) ? ALIGN(ret, 64) : ret;
73-
else
74-
offset = xs->ebx;
75-
ret = max(ret, offset + xs->eax);
76-
}
65+
for (i = XFEATURE_YMM; i < ARRAY_SIZE(xstate_sizes) && xstate_bv; i++) {
66+
struct cpuid_xstate_sizes *xs = &xstate_sizes[i];
67+
u32 offset;
68+
69+
if (!(xstate_bv & BIT_ULL(i)))
70+
continue;
7771

78-
xstate_bv >>= 1;
79-
feature_bit++;
72+
/* ECX[1]: 64B alignment in compacted form */
73+
if (compacted)
74+
offset = (xs->ecx & 0x2) ? ALIGN(ret, 64) : ret;
75+
else
76+
offset = xs->ebx;
77+
ret = max(ret, offset + xs->eax);
78+
xstate_bv &= ~BIT_ULL(i);
8079
}
8180

8281
return ret;
@@ -196,6 +195,7 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu)
196195
}
197196

198197
static u32 kvm_apply_cpuid_pv_features_quirk(struct kvm_vcpu *vcpu);
198+
static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
199199

200200
/* Check whether the supplied CPUID data is equal to what is already set for the vCPU. */
201201
static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
@@ -300,10 +300,12 @@ static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
300300
guest_cpu_cap_change(vcpu, x86_feature, has_feature);
301301
}
302302

303-
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
303+
static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
304304
{
305305
struct kvm_cpuid_entry2 *best;
306306

307+
vcpu->arch.cpuid_dynamic_bits_dirty = false;
308+
307309
best = kvm_find_cpuid_entry(vcpu, 1);
308310
if (best) {
309311
kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSXSAVE,
@@ -333,7 +335,6 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
333335
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
334336
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
335337
}
336-
EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
337338

338339
static bool kvm_cpuid_has_hyperv(struct kvm_vcpu *vcpu)
339340
{
@@ -646,6 +647,9 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
646647
if (cpuid->nent < vcpu->arch.cpuid_nent)
647648
return -E2BIG;
648649

650+
if (vcpu->arch.cpuid_dynamic_bits_dirty)
651+
kvm_update_cpuid_runtime(vcpu);
652+
649653
if (copy_to_user(entries, vcpu->arch.cpuid_entries,
650654
vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
651655
return -EFAULT;
@@ -1704,7 +1708,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
17041708
phys_as = entry->eax & 0xff;
17051709
g_phys_as = phys_as;
17061710
if (kvm_mmu_get_max_tdp_level() < 5)
1707-
g_phys_as = min(g_phys_as, 48);
1711+
g_phys_as = min(g_phys_as, 48U);
17081712
}
17091713

17101714
entry->eax = phys_as | (virt_as << 8) | (g_phys_as << 16);
@@ -1769,13 +1773,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
17691773

17701774
cpuid_entry_override(entry, CPUID_8000_0022_EAX);
17711775

1772-
if (kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2))
1773-
ebx.split.num_core_pmc = kvm_pmu_cap.num_counters_gp;
1774-
else if (kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE))
1775-
ebx.split.num_core_pmc = AMD64_NUM_COUNTERS_CORE;
1776-
else
1777-
ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
1778-
1776+
ebx.split.num_core_pmc = kvm_pmu_cap.num_counters_gp;
17791777
entry->ebx = ebx.full;
17801778
break;
17811779
}
@@ -1985,6 +1983,9 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
19851983
struct kvm_cpuid_entry2 *entry;
19861984
bool exact, used_max_basic = false;
19871985

1986+
if (vcpu->arch.cpuid_dynamic_bits_dirty)
1987+
kvm_update_cpuid_runtime(vcpu);
1988+
19881989
entry = kvm_find_cpuid_entry_index(vcpu, function, index);
19891990
exact = !!entry;
19901991

@@ -2000,7 +2001,8 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
20002001
*edx = entry->edx;
20012002
if (function == 7 && index == 0) {
20022003
u64 data;
2003-
if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
2004+
if ((*ebx & (feature_bit(RTM) | feature_bit(HLE))) &&
2005+
!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
20042006
(data & TSX_CTRL_CPUID_CLEAR))
20052007
*ebx &= ~(feature_bit(RTM) | feature_bit(HLE));
20062008
} else if (function == 0x80000007) {

arch/x86/kvm/cpuid.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
1111
void kvm_set_cpu_caps(void);
1212

1313
void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu);
14-
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
1514
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
1615
u32 function, u32 index);
1716
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
@@ -232,6 +231,14 @@ static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu,
232231
{
233232
unsigned int x86_leaf = __feature_leaf(x86_feature);
234233

234+
/*
235+
* Except for MWAIT, querying dynamic feature bits is disallowed, so
236+
* that KVM can defer runtime updates until the next CPUID emulation.
237+
*/
238+
BUILD_BUG_ON(x86_feature == X86_FEATURE_APIC ||
239+
x86_feature == X86_FEATURE_OSXSAVE ||
240+
x86_feature == X86_FEATURE_OSPKE);
241+
235242
return vcpu->arch.cpu_caps[x86_leaf] & __feature_bit(x86_feature);
236243
}
237244

arch/x86/kvm/emulate.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -477,8 +477,11 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
477477
.dst_val = ctxt->dst.val64,
478478
.src_bytes = ctxt->src.bytes,
479479
.dst_bytes = ctxt->dst.bytes,
480+
.src_type = ctxt->src.type,
481+
.dst_type = ctxt->dst.type,
480482
.ad_bytes = ctxt->ad_bytes,
481-
.next_rip = ctxt->eip,
483+
.rip = ctxt->eip,
484+
.next_rip = ctxt->_eip,
482485
};
483486

484487
return ctxt->ops->intercept(ctxt, &info, stage);

arch/x86/kvm/i8259.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ static void pic_irq_request(struct kvm *kvm, int level)
567567
{
568568
struct kvm_pic *s = kvm->arch.vpic;
569569

570-
if (!s->output)
570+
if (!s->output && level)
571571
s->wakeup_needed = true;
572572
s->output = level;
573573
}

arch/x86/kvm/kvm_emulate.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,10 @@ struct x86_instruction_info {
4444
u64 dst_val; /* value of destination operand */
4545
u8 src_bytes; /* size of source operand */
4646
u8 dst_bytes; /* size of destination operand */
47+
u8 src_type; /* type of source operand */
48+
u8 dst_type; /* type of destination operand */
4749
u8 ad_bytes; /* size of src/dst address */
50+
u64 rip; /* rip of the instruction */
4851
u64 next_rip; /* rip following the instruction */
4952
};
5053

@@ -272,8 +275,10 @@ struct operand {
272275
};
273276
};
274277

278+
#define X86_MAX_INSTRUCTION_LENGTH 15
279+
275280
struct fetch_cache {
276-
u8 data[15];
281+
u8 data[X86_MAX_INSTRUCTION_LENGTH];
277282
u8 *ptr;
278283
u8 *end;
279284
};

arch/x86/kvm/lapic.c

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -221,13 +221,6 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
221221
}
222222
}
223223

224-
static void kvm_apic_map_free(struct rcu_head *rcu)
225-
{
226-
struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
227-
228-
kvfree(map);
229-
}
230-
231224
static int kvm_recalculate_phys_map(struct kvm_apic_map *new,
232225
struct kvm_vcpu *vcpu,
233226
bool *xapic_id_mismatch)
@@ -489,7 +482,7 @@ static void kvm_recalculate_apic_map(struct kvm *kvm)
489482
mutex_unlock(&kvm->arch.apic_map_lock);
490483

491484
if (old)
492-
call_rcu(&old->rcu, kvm_apic_map_free);
485+
kvfree_rcu(old, rcu);
493486

494487
kvm_make_scan_ioapic_request(kvm);
495488
}
@@ -2593,7 +2586,7 @@ static void __kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value)
25932586
vcpu->arch.apic_base = value;
25942587

25952588
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
2596-
kvm_update_cpuid_runtime(vcpu);
2589+
vcpu->arch.cpuid_dynamic_bits_dirty = true;
25972590

25982591
if (!apic)
25992592
return;
@@ -3397,9 +3390,9 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
33973390
if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) {
33983391
kvm_vcpu_reset(vcpu, true);
33993392
if (kvm_vcpu_is_bsp(apic->vcpu))
3400-
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3393+
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
34013394
else
3402-
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
3395+
kvm_set_mp_state(vcpu, KVM_MP_STATE_INIT_RECEIVED);
34033396
}
34043397
if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events)) {
34053398
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
@@ -3408,7 +3401,7 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
34083401
sipi_vector = apic->sipi_vector;
34093402
kvm_x86_call(vcpu_deliver_sipi_vector)(vcpu,
34103403
sipi_vector);
3411-
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3404+
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
34123405
}
34133406
}
34143407
return 0;

arch/x86/kvm/smm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ void enter_smm(struct kvm_vcpu *vcpu)
358358
goto error;
359359
#endif
360360

361-
kvm_update_cpuid_runtime(vcpu);
361+
vcpu->arch.cpuid_dynamic_bits_dirty = true;
362362
kvm_mmu_reset_context(vcpu);
363363
return;
364364
error:

arch/x86/kvm/svm/nested.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -994,7 +994,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
994994
kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
995995

996996
/* in case we halted in L2 */
997-
svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
997+
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
998998

999999
/* Give the current vmcb to the guest */
10001000

arch/x86/kvm/svm/sev.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3271,7 +3271,7 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
32713271

32723272
if (kvm_ghcb_xcr0_is_valid(svm)) {
32733273
vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
3274-
kvm_update_cpuid_runtime(vcpu);
3274+
vcpu->arch.cpuid_dynamic_bits_dirty = true;
32753275
}
32763276

32773277
/* Copy the GHCB exit information into the VMCB fields */
@@ -3855,7 +3855,7 @@ static int __sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu)
38553855

38563856
/* Mark the vCPU as offline and not runnable */
38573857
vcpu->arch.pv.pv_unhalted = false;
3858-
vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
3858+
kvm_set_mp_state(vcpu, KVM_MP_STATE_HALTED);
38593859

38603860
/* Clear use of the VMSA */
38613861
svm->vmcb->control.vmsa_pa = INVALID_PAGE;
@@ -3893,8 +3893,7 @@ static int __sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu)
38933893
svm->vmcb->control.vmsa_pa = pfn_to_hpa(pfn);
38943894

38953895
/* Mark the vCPU as runnable */
3896-
vcpu->arch.pv.pv_unhalted = false;
3897-
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3896+
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
38983897

38993898
svm->sev_es.snp_vmsa_gpa = INVALID_PAGE;
39003899

0 commit comments

Comments
 (0)