Skip to content

Commit 683412c

Browse files
mzhang3579bonzini
authored andcommitted
KVM: SEV: add cache flush to solve SEV cache incoherency issues
Flush the CPU caches when memory is reclaimed from an SEV guest (where reclaim also includes it being unmapped from KVM's memslots). Due to lack of coherency for SEV encrypted memory, failure to flush results in silent data corruption if userspace is malicious/broken and doesn't ensure SEV guest memory is properly pinned and unpinned. Cache coherency is not enforced across the VM boundary in SEV (AMD APM vol.2 Section 15.34.7). Confidential cachelines, generated by confidential VM guests have to be explicitly flushed on the host side. If a memory page containing dirty confidential cachelines was released by VM and reallocated to another user, the cachelines may corrupt the new user at a later time. KVM takes a shortcut by assuming all confidential memory remain pinned until the end of VM lifetime. Therefore, KVM does not flush cache at mmu_notifier invalidation events. Because of this incorrect assumption and the lack of cache flushing, malicous userspace can crash the host kernel: creating a malicious VM and continuously allocates/releases unpinned confidential memory pages when the VM is running. Add cache flush operations to mmu_notifier operations to ensure that any physical memory leaving the guest VM get flushed. In particular, hook mmu_notifier_invalidate_range_start and mmu_notifier_release events and flush cache accordingly. The hook after releasing the mmu lock to avoid contention with other vCPUs. Cc: stable@vger.kernel.org Suggested-by: Sean Christpherson <seanjc@google.com> Reported-by: Mingwei Zhang <mizhang@google.com> Signed-off-by: Mingwei Zhang <mizhang@google.com> Message-Id: <20220421031407.2516575-4-mizhang@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent d45829b commit 683412c

File tree

8 files changed

+44
-3
lines changed

8 files changed

+44
-3
lines changed

arch/x86/include/asm/kvm-x86-ops.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ KVM_X86_OP_OPTIONAL(mem_enc_register_region)
118118
KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
119119
KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)
120120
KVM_X86_OP_OPTIONAL(vm_move_enc_context_from)
121+
KVM_X86_OP_OPTIONAL(guest_memory_reclaimed)
121122
KVM_X86_OP(get_msr_feature)
122123
KVM_X86_OP(can_emulate_instruction)
123124
KVM_X86_OP(apic_init_signal_blocked)

arch/x86/include/asm/kvm_host.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1484,6 +1484,7 @@ struct kvm_x86_ops {
14841484
int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
14851485
int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
14861486
int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
1487+
void (*guest_memory_reclaimed)(struct kvm *kvm);
14871488

14881489
int (*get_msr_feature)(struct kvm_msr_entry *entry);
14891490

arch/x86/kvm/svm/sev.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2262,6 +2262,14 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
22622262
wbinvd_on_all_cpus();
22632263
}
22642264

2265+
void sev_guest_memory_reclaimed(struct kvm *kvm)
2266+
{
2267+
if (!sev_guest(kvm))
2268+
return;
2269+
2270+
wbinvd_on_all_cpus();
2271+
}
2272+
22652273
void sev_free_vcpu(struct kvm_vcpu *vcpu)
22662274
{
22672275
struct vcpu_svm *svm;

arch/x86/kvm/svm/svm.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4620,6 +4620,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
46204620
.mem_enc_ioctl = sev_mem_enc_ioctl,
46214621
.mem_enc_register_region = sev_mem_enc_register_region,
46224622
.mem_enc_unregister_region = sev_mem_enc_unregister_region,
4623+
.guest_memory_reclaimed = sev_guest_memory_reclaimed,
46234624

46244625
.vm_copy_enc_context_from = sev_vm_copy_enc_context_from,
46254626
.vm_move_enc_context_from = sev_vm_move_enc_context_from,

arch/x86/kvm/svm/svm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,8 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
609609
struct kvm_enc_region *range);
610610
int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd);
611611
int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd);
612+
void sev_guest_memory_reclaimed(struct kvm *kvm);
613+
612614
void pre_sev_run(struct vcpu_svm *svm, int cpu);
613615
void __init sev_set_cpu_caps(void);
614616
void __init sev_hardware_setup(void);

arch/x86/kvm/x86.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9889,6 +9889,11 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
98899889
kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
98909890
}
98919891

9892+
void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
9893+
{
9894+
static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm);
9895+
}
9896+
98929897
static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
98939898
{
98949899
if (!lapic_in_kernel(vcpu))

include/linux/kvm_host.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2219,6 +2219,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
22192219
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
22202220
unsigned long start, unsigned long end);
22212221

2222+
void kvm_arch_guest_memory_reclaimed(struct kvm *kvm);
2223+
22222224
#ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
22232225
int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
22242226
#else

virt/kvm/kvm_main.c

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,10 @@ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
164164
{
165165
}
166166

167+
__weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
168+
{
169+
}
170+
167171
bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
168172
{
169173
/*
@@ -357,6 +361,12 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
357361
EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
358362
#endif
359363

364+
static void kvm_flush_shadow_all(struct kvm *kvm)
365+
{
366+
kvm_arch_flush_shadow_all(kvm);
367+
kvm_arch_guest_memory_reclaimed(kvm);
368+
}
369+
360370
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
361371
static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
362372
gfp_t gfp_flags)
@@ -485,12 +495,15 @@ typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
485495
typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
486496
unsigned long end);
487497

498+
typedef void (*on_unlock_fn_t)(struct kvm *kvm);
499+
488500
struct kvm_hva_range {
489501
unsigned long start;
490502
unsigned long end;
491503
pte_t pte;
492504
hva_handler_t handler;
493505
on_lock_fn_t on_lock;
506+
on_unlock_fn_t on_unlock;
494507
bool flush_on_ret;
495508
bool may_block;
496509
};
@@ -578,8 +591,11 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
578591
if (range->flush_on_ret && ret)
579592
kvm_flush_remote_tlbs(kvm);
580593

581-
if (locked)
594+
if (locked) {
582595
KVM_MMU_UNLOCK(kvm);
596+
if (!IS_KVM_NULL_FN(range->on_unlock))
597+
range->on_unlock(kvm);
598+
}
583599

584600
srcu_read_unlock(&kvm->srcu, idx);
585601

@@ -600,6 +616,7 @@ static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
600616
.pte = pte,
601617
.handler = handler,
602618
.on_lock = (void *)kvm_null_fn,
619+
.on_unlock = (void *)kvm_null_fn,
603620
.flush_on_ret = true,
604621
.may_block = false,
605622
};
@@ -619,6 +636,7 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
619636
.pte = __pte(0),
620637
.handler = handler,
621638
.on_lock = (void *)kvm_null_fn,
639+
.on_unlock = (void *)kvm_null_fn,
622640
.flush_on_ret = false,
623641
.may_block = false,
624642
};
@@ -687,6 +705,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
687705
.pte = __pte(0),
688706
.handler = kvm_unmap_gfn_range,
689707
.on_lock = kvm_inc_notifier_count,
708+
.on_unlock = kvm_arch_guest_memory_reclaimed,
690709
.flush_on_ret = true,
691710
.may_block = mmu_notifier_range_blockable(range),
692711
};
@@ -741,6 +760,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
741760
.pte = __pte(0),
742761
.handler = (void *)kvm_null_fn,
743762
.on_lock = kvm_dec_notifier_count,
763+
.on_unlock = (void *)kvm_null_fn,
744764
.flush_on_ret = false,
745765
.may_block = mmu_notifier_range_blockable(range),
746766
};
@@ -813,7 +833,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
813833
int idx;
814834

815835
idx = srcu_read_lock(&kvm->srcu);
816-
kvm_arch_flush_shadow_all(kvm);
836+
kvm_flush_shadow_all(kvm);
817837
srcu_read_unlock(&kvm->srcu, idx);
818838
}
819839

@@ -1225,7 +1245,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
12251245
WARN_ON(rcuwait_active(&kvm->mn_memslots_update_rcuwait));
12261246
kvm->mn_active_invalidate_count = 0;
12271247
#else
1228-
kvm_arch_flush_shadow_all(kvm);
1248+
kvm_flush_shadow_all(kvm);
12291249
#endif
12301250
kvm_arch_destroy_vm(kvm);
12311251
kvm_destroy_devices(kvm);
@@ -1652,6 +1672,7 @@ static void kvm_invalidate_memslot(struct kvm *kvm,
16521672
* - kvm_is_visible_gfn (mmu_check_root)
16531673
*/
16541674
kvm_arch_flush_shadow_memslot(kvm, old);
1675+
kvm_arch_guest_memory_reclaimed(kvm);
16551676

16561677
/* Was released by kvm_swap_active_memslots, reacquire. */
16571678
mutex_lock(&kvm->slots_arch_lock);

0 commit comments

Comments
 (0)