Skip to content

Commit 84b1349

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "A bit on the bigger side, mostly due to me being on vacation, then busy, then on parental leave, but there's nothing worrisome. ARM: - Multiple stolen time fixes, with a new capability to match x86 - Fix for hugetlbfs mappings when PUD and PMD are the same level - Fix for hugetlbfs mappings when PTE mappings are enforced (dirty logging, for example) - Fix tracing output of 64bit values x86: - nSVM state restore fixes - Async page fault fixes - Lots of small fixes everywhere" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (25 commits) KVM: emulator: more strict rsm checks. KVM: nSVM: more strict SMM checks when returning to nested guest SVM: nSVM: setup nested msr permission bitmap on nested state load SVM: nSVM: correctly restore GIF on vmexit from nesting after migration x86/kvm: don't forget to ACK async PF IRQ x86/kvm: properly use DEFINE_IDTENTRY_SYSVEC() macro KVM: VMX: Don't freeze guest when event delivery causes an APIC-access exit KVM: SVM: avoid emulation with stale next_rip KVM: x86: always allow writing '0' to MSR_KVM_ASYNC_PF_EN KVM: SVM: Periodically schedule when unregistering regions on destroy KVM: MIPS: Change the definition of kvm type kvm x86/mmu: use KVM_REQ_MMU_SYNC to sync when needed KVM: nVMX: Fix the update value of nested load IA32_PERF_GLOBAL_CTRL control KVM: fix memory leak in kvm_io_bus_unregister_dev() KVM: Check the allocation of pv cpu mask KVM: nVMX: Update VMCS02 when L2 PAE PDPTE updates detected KVM: arm64: Update page shift if stage 2 block mapping not supported KVM: arm64: Fix address truncation in traces KVM: arm64: Do not try to map PUDs when they are folded into PMD arm64/x86: KVM: Introduce steal-time cap ...
2 parents b952e97 + 37f66bb commit 84b1349

File tree

21 files changed

+180
-81
lines changed

21 files changed

+180
-81
lines changed

Documentation/virt/kvm/api.rst

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6130,7 +6130,7 @@ HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
61306130
8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH
61316131
-----------------------------------
61326132

6133-
:Architecture: x86
6133+
:Architectures: x86
61346134

61356135
This capability indicates that KVM running on top of Hyper-V hypervisor
61366136
enables Direct TLB flush for its guests meaning that TLB flush
@@ -6143,19 +6143,33 @@ in CPUID and only exposes Hyper-V identification. In this case, guest
61436143
thinks it's running on Hyper-V and only use Hyper-V hypercalls.
61446144

61456145
8.22 KVM_CAP_S390_VCPU_RESETS
6146+
-----------------------------
61466147

6147-
Architectures: s390
6148+
:Architectures: s390
61486149

61496150
This capability indicates that the KVM_S390_NORMAL_RESET and
61506151
KVM_S390_CLEAR_RESET ioctls are available.
61516152

61526153
8.23 KVM_CAP_S390_PROTECTED
6154+
---------------------------
61536155

6154-
Architecture: s390
6155-
6156+
:Architectures: s390
61566157

61576158
This capability indicates that the Ultravisor has been initialized and
61586159
KVM can therefore start protected VMs.
61596160
This capability governs the KVM_S390_PV_COMMAND ioctl and the
61606161
KVM_MP_STATE_LOAD MP_STATE. KVM_SET_MP_STATE can fail for protected
61616162
guests when the state change is invalid.
6163+
6164+
8.24 KVM_CAP_STEAL_TIME
6165+
-----------------------
6166+
6167+
:Architectures: arm64, x86
6168+
6169+
This capability indicates that KVM supports steal time accounting.
6170+
When steal time accounting is supported it may be enabled with
6171+
architecture-specific interfaces. This capability and the architecture-
6172+
specific interfaces must be consistent, i.e. if one says the feature
6173+
is supported, than the other should as well and vice versa. For arm64
6174+
see Documentation/virt/kvm/devices/vcpu.rst "KVM_ARM_VCPU_PVTIME_CTRL".
6175+
For x86 see Documentation/virt/kvm/msr.rst "MSR_KVM_STEAL_TIME".

arch/arm64/include/asm/kvm_host.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,6 @@ struct kvm_vcpu_arch {
368368

369369
/* Guest PV state */
370370
struct {
371-
u64 steal;
372371
u64 last_steal;
373372
gpa_t base;
374373
} steal;
@@ -544,6 +543,7 @@ long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
544543
gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);
545544
void kvm_update_stolen_time(struct kvm_vcpu *vcpu);
546545

546+
bool kvm_arm_pvtime_supported(void);
547547
int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
548548
struct kvm_device_attr *attr);
549549
int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,

arch/arm64/kvm/arm.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
206206
*/
207207
r = 1;
208208
break;
209+
case KVM_CAP_STEAL_TIME:
210+
r = kvm_arm_pvtime_supported();
211+
break;
209212
default:
210213
r = kvm_arch_vm_ioctl_check_extension(kvm, ext);
211214
break;

arch/arm64/kvm/mmu.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1877,6 +1877,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
18771877
!fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) {
18781878
force_pte = true;
18791879
vma_pagesize = PAGE_SIZE;
1880+
vma_shift = PAGE_SHIFT;
18801881
}
18811882

18821883
/*
@@ -1970,7 +1971,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
19701971
(fault_status == FSC_PERM &&
19711972
stage2_is_exec(mmu, fault_ipa, vma_pagesize));
19721973

1973-
if (vma_pagesize == PUD_SIZE) {
1974+
/*
1975+
* If PUD_SIZE == PMD_SIZE, there is no real PUD level, and
1976+
* all we have is a 2-level page table. Trying to map a PUD in
1977+
* this case would be fatally wrong.
1978+
*/
1979+
if (PUD_SIZE != PMD_SIZE && vma_pagesize == PUD_SIZE) {
19741980
pud_t new_pud = kvm_pfn_pud(pfn, mem_type);
19751981

19761982
new_pud = kvm_pud_mkhuge(new_pud);

arch/arm64/kvm/pvtime.c

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,25 +13,22 @@
1313
void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
1414
{
1515
struct kvm *kvm = vcpu->kvm;
16-
u64 steal;
17-
__le64 steal_le;
18-
u64 offset;
19-
int idx;
2016
u64 base = vcpu->arch.steal.base;
17+
u64 last_steal = vcpu->arch.steal.last_steal;
18+
u64 offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time);
19+
u64 steal = 0;
20+
int idx;
2121

2222
if (base == GPA_INVALID)
2323
return;
2424

25-
/* Let's do the local bookkeeping */
26-
steal = vcpu->arch.steal.steal;
27-
steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal;
28-
vcpu->arch.steal.last_steal = current->sched_info.run_delay;
29-
vcpu->arch.steal.steal = steal;
30-
31-
steal_le = cpu_to_le64(steal);
3225
idx = srcu_read_lock(&kvm->srcu);
33-
offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time);
34-
kvm_put_guest(kvm, base + offset, steal_le, u64);
26+
if (!kvm_get_guest(kvm, base + offset, steal)) {
27+
steal = le64_to_cpu(steal);
28+
vcpu->arch.steal.last_steal = READ_ONCE(current->sched_info.run_delay);
29+
steal += vcpu->arch.steal.last_steal - last_steal;
30+
kvm_put_guest(kvm, base + offset, cpu_to_le64(steal));
31+
}
3532
srcu_read_unlock(&kvm->srcu, idx);
3633
}
3734

@@ -43,7 +40,8 @@ long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
4340
switch (feature) {
4441
case ARM_SMCCC_HV_PV_TIME_FEATURES:
4542
case ARM_SMCCC_HV_PV_TIME_ST:
46-
val = SMCCC_RET_SUCCESS;
43+
if (vcpu->arch.steal.base != GPA_INVALID)
44+
val = SMCCC_RET_SUCCESS;
4745
break;
4846
}
4947

@@ -64,7 +62,6 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
6462
* Start counting stolen time from the time the guest requests
6563
* the feature enabled.
6664
*/
67-
vcpu->arch.steal.steal = 0;
6865
vcpu->arch.steal.last_steal = current->sched_info.run_delay;
6966

7067
idx = srcu_read_lock(&kvm->srcu);
@@ -74,7 +71,7 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
7471
return base;
7572
}
7673

77-
static bool kvm_arm_pvtime_supported(void)
74+
bool kvm_arm_pvtime_supported(void)
7875
{
7976
return !!sched_info_on();
8077
}

arch/arm64/kvm/trace_arm.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ TRACE_EVENT(kvm_entry,
2323
__entry->vcpu_pc = vcpu_pc;
2424
),
2525

26-
TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
26+
TP_printk("PC: 0x%016lx", __entry->vcpu_pc)
2727
);
2828

2929
TRACE_EVENT(kvm_exit,
@@ -42,7 +42,7 @@ TRACE_EVENT(kvm_exit,
4242
__entry->vcpu_pc = vcpu_pc;
4343
),
4444

45-
TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx",
45+
TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%016lx",
4646
__print_symbolic(__entry->ret, kvm_arm_exception_type),
4747
__entry->esr_ec,
4848
__print_symbolic(__entry->esr_ec, kvm_arm_exception_class),
@@ -69,7 +69,7 @@ TRACE_EVENT(kvm_guest_fault,
6969
__entry->ipa = ipa;
7070
),
7171

72-
TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx",
72+
TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#016lx",
7373
__entry->ipa, __entry->hsr,
7474
__entry->hxfar, __entry->vcpu_pc)
7575
);
@@ -131,7 +131,7 @@ TRACE_EVENT(kvm_mmio_emulate,
131131
__entry->cpsr = cpsr;
132132
),
133133

134-
TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)",
134+
TP_printk("Emulate MMIO at: 0x%016lx (instr: %08lx, cpsr: %08lx)",
135135
__entry->vcpu_pc, __entry->instr, __entry->cpsr)
136136
);
137137

@@ -149,7 +149,7 @@ TRACE_EVENT(kvm_unmap_hva_range,
149149
__entry->end = end;
150150
),
151151

152-
TP_printk("mmu notifier unmap range: %#08lx -- %#08lx",
152+
TP_printk("mmu notifier unmap range: %#016lx -- %#016lx",
153153
__entry->start, __entry->end)
154154
);
155155

@@ -165,7 +165,7 @@ TRACE_EVENT(kvm_set_spte_hva,
165165
__entry->hva = hva;
166166
),
167167

168-
TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
168+
TP_printk("mmu notifier set pte hva: %#016lx", __entry->hva)
169169
);
170170

171171
TRACE_EVENT(kvm_age_hva,
@@ -182,7 +182,7 @@ TRACE_EVENT(kvm_age_hva,
182182
__entry->end = end;
183183
),
184184

185-
TP_printk("mmu notifier age hva: %#08lx -- %#08lx",
185+
TP_printk("mmu notifier age hva: %#016lx -- %#016lx",
186186
__entry->start, __entry->end)
187187
);
188188

@@ -198,7 +198,7 @@ TRACE_EVENT(kvm_test_age_hva,
198198
__entry->hva = hva;
199199
),
200200

201-
TP_printk("mmu notifier test age hva: %#08lx", __entry->hva)
201+
TP_printk("mmu notifier test age hva: %#016lx", __entry->hva)
202202
);
203203

204204
TRACE_EVENT(kvm_set_way_flush,

arch/arm64/kvm/trace_handle_exit.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ TRACE_EVENT(kvm_wfx_arm64,
2222
__entry->is_wfe = is_wfe;
2323
),
2424

25-
TP_printk("guest executed wf%c at: 0x%08lx",
25+
TP_printk("guest executed wf%c at: 0x%016lx",
2626
__entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
2727
);
2828

@@ -42,7 +42,7 @@ TRACE_EVENT(kvm_hvc_arm64,
4242
__entry->imm = imm;
4343
),
4444

45-
TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)",
45+
TP_printk("HVC at 0x%016lx (r0: 0x%016lx, imm: 0x%lx)",
4646
__entry->vcpu_pc, __entry->r0, __entry->imm)
4747
);
4848

@@ -135,7 +135,7 @@ TRACE_EVENT(trap_reg,
135135
__entry->write_value = write_value;
136136
),
137137

138-
TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
138+
TP_printk("%s %s reg %d (0x%016llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
139139
);
140140

141141
TRACE_EVENT(kvm_handle_sys_reg,

arch/mips/kvm/mips.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ extern void kvm_init_loongson_ipi(struct kvm *kvm);
137137
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
138138
{
139139
switch (type) {
140+
case KVM_VM_MIPS_AUTO:
141+
break;
140142
#ifdef CONFIG_KVM_MIPS_VZ
141143
case KVM_VM_MIPS_VZ:
142144
#else

arch/x86/kernel/kvm.c

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -270,9 +270,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
270270
{
271271
struct pt_regs *old_regs = set_irq_regs(regs);
272272
u32 token;
273-
irqentry_state_t state;
274273

275-
state = irqentry_enter(regs);
274+
ack_APIC_irq();
276275

277276
inc_irq_stat(irq_hv_callback_count);
278277

@@ -283,7 +282,6 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
283282
wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
284283
}
285284

286-
irqentry_exit(regs, state);
287285
set_irq_regs(old_regs);
288286
}
289287

@@ -654,7 +652,6 @@ static void __init kvm_guest_init(void)
654652
}
655653

656654
if (pv_tlb_flush_supported()) {
657-
pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
658655
pv_ops.mmu.tlb_remove_table = tlb_remove_table;
659656
pr_info("KVM setup pv remote TLB flush\n");
660657
}
@@ -767,6 +764,14 @@ static __init int activate_jump_labels(void)
767764
}
768765
arch_initcall(activate_jump_labels);
769766

767+
static void kvm_free_pv_cpu_mask(void)
768+
{
769+
unsigned int cpu;
770+
771+
for_each_possible_cpu(cpu)
772+
free_cpumask_var(per_cpu(__pv_cpu_mask, cpu));
773+
}
774+
770775
static __init int kvm_alloc_cpumask(void)
771776
{
772777
int cpu;
@@ -785,11 +790,20 @@ static __init int kvm_alloc_cpumask(void)
785790

786791
if (alloc)
787792
for_each_possible_cpu(cpu) {
788-
zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
789-
GFP_KERNEL, cpu_to_node(cpu));
793+
if (!zalloc_cpumask_var_node(
794+
per_cpu_ptr(&__pv_cpu_mask, cpu),
795+
GFP_KERNEL, cpu_to_node(cpu))) {
796+
goto zalloc_cpumask_fail;
797+
}
790798
}
791799

800+
apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
801+
pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
792802
return 0;
803+
804+
zalloc_cpumask_fail:
805+
kvm_free_pv_cpu_mask();
806+
return -ENOMEM;
793807
}
794808
arch_initcall(kvm_alloc_cpumask);
795809

arch/x86/kvm/emulate.c

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2505,9 +2505,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
25052505
*reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
25062506

25072507
val = GET_SMSTATE(u32, smstate, 0x7fcc);
2508-
ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
2508+
2509+
if (ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1))
2510+
return X86EMUL_UNHANDLEABLE;
2511+
25092512
val = GET_SMSTATE(u32, smstate, 0x7fc8);
2510-
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2513+
2514+
if (ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1))
2515+
return X86EMUL_UNHANDLEABLE;
25112516

25122517
selector = GET_SMSTATE(u32, smstate, 0x7fc4);
25132518
set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
@@ -2560,16 +2565,23 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
25602565
ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
25612566

25622567
val = GET_SMSTATE(u32, smstate, 0x7f68);
2563-
ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
2568+
2569+
if (ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1))
2570+
return X86EMUL_UNHANDLEABLE;
2571+
25642572
val = GET_SMSTATE(u32, smstate, 0x7f60);
2565-
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2573+
2574+
if (ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1))
2575+
return X86EMUL_UNHANDLEABLE;
25662576

25672577
cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
25682578
cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
25692579
cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
25702580
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
25712581
val = GET_SMSTATE(u64, smstate, 0x7ed0);
2572-
ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
2582+
2583+
if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
2584+
return X86EMUL_UNHANDLEABLE;
25732585

25742586
selector = GET_SMSTATE(u32, smstate, 0x7e90);
25752587
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);

0 commit comments

Comments
 (0)