Skip to content

Commit 3fc3f71

Browse files
yamahatabonzini
authored andcommitted
KVM: x86/mmu: Support GFN direct bits
Teach the MMU to map guest GFNs at a massaged position on the TDP, to aid in implementing TDX shared memory. Like other Coco technologies, TDX has the concept of private and shared memory. For TDX the private and shared mappings are managed on separate EPT roots. The private half is managed indirectly through calls into a protected runtime environment called the TDX module, where the shared half is managed within KVM in normal page tables. For TDX, the shared half will be mapped in the higher alias, with a "shared bit" set in the GPA. However, KVM will still manage it with the same memslots as the private half. This means memslot looks ups and zapping operations will be provided with a GFN without the shared bit set. So KVM will either need to apply or strip the shared bit before mapping or zapping the shared EPT. Having GFNs sometimes have the shared bit and sometimes not would make the code confusing. So instead arrange the code such that GFNs never have shared bit set. Create a concept of "direct bits", that is stripped from the fault address when setting fault->gfn, and applied within the TDP MMU iterator. Calling code will behave as if it is operating on the PTE mapping the GFN (without shared bits) but within the iterator, the actual mappings will be shifted using bits specific for the root. SPs will have the GFN set without the shared bit. In the end the TDP MMU will behave like it is mapping things at the GFN without the shared bit but with a strange page table format where everything is offset by the shared bit. Since TDX only needs to shift the mapping like this for the shared bit, which is mapped as the normal TDP root, add a "gfn_direct_bits" field to the kvm_arch structure for each VM with a default value of 0. It will have the bit set at the position of the GPA shared bit in GFN through TD specific initialization code. Keep TDX specific concepts out of the MMU code by not naming it "shared". Ranged TLB flushes (i.e. flush_remote_tlbs_range()) target specific GFN ranges. In convention established above, these would need to target the shifted GFN range. It won't matter functionally, since the actual implementation will always result in a full flush for the only planned user (TDX). For correctness reasons, future changes can provide a TDX x86_ops.flush_remote_tlbs_range implementation to return -EOPNOTSUPP and force the full flush for TDs. This leaves one problem. Some operations use a concept of max GFN (i.e. kvm_mmu_max_gfn()), to iterate over the whole TDP range. When applying the direct mask to the start of the range, the iterator would end up skipping iterating over the range not covered by the direct mask bit. For safety, make sure the __tdp_mmu_zap_root() operation iterates over the full GFN range supported by the underlying TDP format. Add a new iterator helper, for_each_tdp_pte_min_level_all(), that iterates the entire TDP GFN range, regardless of root. Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> Co-developed-by: Yan Zhao <yan.y.zhao@intel.com> Signed-off-by: Yan Zhao <yan.y.zhao@intel.com> Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Message-ID: <20240718211230.1492011-9-rick.p.edgecombe@intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent e23186d commit 3fc3f71

File tree

6 files changed

+51
-14
lines changed

6 files changed

+51
-14
lines changed

arch/x86/include/asm/kvm_host.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1543,6 +1543,8 @@ struct kvm_arch {
15431543
*/
15441544
#define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
15451545
struct kvm_mmu_memory_cache split_desc_cache;
1546+
1547+
gfn_t gfn_direct_bits;
15461548
};
15471549

15481550
struct kvm_vm_stat {

arch/x86/kvm/mmu.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,4 +292,9 @@ static inline bool kvm_has_mirrored_tdp(const struct kvm *kvm)
292292
{
293293
return kvm->arch.vm_type == KVM_X86_TDX_VM;
294294
}
295+
296+
static inline gfn_t kvm_gfn_direct_bits(const struct kvm *kvm)
297+
{
298+
return kvm->arch.gfn_direct_bits;
299+
}
295300
#endif

arch/x86/kvm/mmu/mmu_internal.h

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
#include <linux/kvm_host.h>
77
#include <asm/kvm_host.h>
88

9+
#include "mmu.h"
10+
911
#ifdef CONFIG_KVM_PROVE_MMU
1012
#define KVM_MMU_WARN_ON(x) WARN_ON_ONCE(x)
1113
#else
@@ -173,6 +175,18 @@ static inline void kvm_mmu_alloc_external_spt(struct kvm_vcpu *vcpu, struct kvm_
173175
sp->external_spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_external_spt_cache);
174176
}
175177

178+
static inline gfn_t kvm_gfn_root_bits(const struct kvm *kvm, const struct kvm_mmu_page *root)
179+
{
180+
/*
181+
* Since mirror SPs are used only for TDX, which maps private memory
182+
* at its "natural" GFN, no mask needs to be applied to them - and, dually,
183+
* we expect that the bits is only used for the shared PT.
184+
*/
185+
if (is_mirror_sp(root))
186+
return 0;
187+
return kvm_gfn_direct_bits(kvm);
188+
}
189+
176190
static inline bool kvm_mmu_page_ad_need_write_protect(struct kvm_mmu_page *sp)
177191
{
178192
/*
@@ -257,7 +271,12 @@ struct kvm_page_fault {
257271
*/
258272
u8 goal_level;
259273

260-
/* Shifted addr, or result of guest page table walk if addr is a gva. */
274+
/*
275+
* Shifted addr, or result of guest page table walk if addr is a gva. In
276+
* the case of VM where memslot's can be mapped at multiple GPA aliases
277+
* (i.e. TDX), the gfn field does not contain the bit that selects between
278+
* the aliases (i.e. the shared bit for TDX).
279+
*/
261280
gfn_t gfn;
262281

263282
/* The memslot containing gfn. May be NULL. */
@@ -345,7 +364,12 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
345364
int r;
346365

347366
if (vcpu->arch.mmu->root_role.direct) {
348-
fault.gfn = fault.addr >> PAGE_SHIFT;
367+
/*
368+
* Things like memslots don't understand the concept of a shared
369+
* bit. Strip it so that the GFN can be used like normal, and the
370+
* fault.addr can be used when the shared bit is needed.
371+
*/
372+
fault.gfn = gpa_to_gfn(fault.addr) & ~kvm_gfn_direct_bits(vcpu->kvm);
349373
fault.slot = kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn);
350374
}
351375

arch/x86/kvm/mmu/tdp_iter.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
static void tdp_iter_refresh_sptep(struct tdp_iter *iter)
1313
{
1414
iter->sptep = iter->pt_path[iter->level - 1] +
15-
SPTE_INDEX(iter->gfn << PAGE_SHIFT, iter->level);
15+
SPTE_INDEX((iter->gfn | iter->gfn_bits) << PAGE_SHIFT, iter->level);
1616
iter->old_spte = kvm_tdp_mmu_read_spte(iter->sptep);
1717
}
1818

@@ -37,15 +37,17 @@ void tdp_iter_restart(struct tdp_iter *iter)
3737
* rooted at root_pt, starting with the walk to translate next_last_level_gfn.
3838
*/
3939
void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
40-
int min_level, gfn_t next_last_level_gfn)
40+
int min_level, gfn_t next_last_level_gfn, gfn_t gfn_bits)
4141
{
4242
if (WARN_ON_ONCE(!root || (root->role.level < 1) ||
43-
(root->role.level > PT64_ROOT_MAX_LEVEL))) {
43+
(root->role.level > PT64_ROOT_MAX_LEVEL) ||
44+
(gfn_bits && next_last_level_gfn >= gfn_bits))) {
4445
iter->valid = false;
4546
return;
4647
}
4748

4849
iter->next_last_level_gfn = next_last_level_gfn;
50+
iter->gfn_bits = gfn_bits;
4951
iter->root_level = root->role.level;
5052
iter->min_level = min_level;
5153
iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root->spt;
@@ -113,7 +115,7 @@ static bool try_step_side(struct tdp_iter *iter)
113115
* Check if the iterator is already at the end of the current page
114116
* table.
115117
*/
116-
if (SPTE_INDEX(iter->gfn << PAGE_SHIFT, iter->level) ==
118+
if (SPTE_INDEX((iter->gfn | iter->gfn_bits) << PAGE_SHIFT, iter->level) ==
117119
(SPTE_ENT_PER_PAGE - 1))
118120
return false;
119121

arch/x86/kvm/mmu/tdp_iter.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,10 @@ struct tdp_iter {
9393
tdp_ptep_t pt_path[PT64_ROOT_MAX_LEVEL];
9494
/* A pointer to the current SPTE */
9595
tdp_ptep_t sptep;
96-
/* The lowest GFN mapped by the current SPTE */
96+
/* The lowest GFN (mask bits excluded) mapped by the current SPTE */
9797
gfn_t gfn;
98+
/* Mask applied to convert the GFN to the mapping GPA */
99+
gfn_t gfn_bits;
98100
/* The level of the root page given to the iterator */
99101
int root_level;
100102
/* The lowest level the iterator should traverse to */
@@ -123,17 +125,22 @@ struct tdp_iter {
123125
* preorder traversal.
124126
*/
125127
#define for_each_tdp_pte_min_level(iter, kvm, root, min_level, start, end) \
126-
for (tdp_iter_start(&iter, root, min_level, start); \
127-
iter.valid && iter.gfn < end; \
128+
for (tdp_iter_start(&iter, root, min_level, start, kvm_gfn_root_bits(kvm, root)); \
129+
iter.valid && iter.gfn < end; \
128130
tdp_iter_next(&iter))
129131

132+
#define for_each_tdp_pte_min_level_all(iter, root, min_level) \
133+
for (tdp_iter_start(&iter, root, min_level, 0, 0); \
134+
iter.valid && iter.gfn < tdp_mmu_max_gfn_exclusive(); \
135+
tdp_iter_next(&iter))
136+
130137
#define for_each_tdp_pte(iter, kvm, root, start, end) \
131138
for_each_tdp_pte_min_level(iter, kvm, root, PG_LEVEL_4K, start, end)
132139

133140
tdp_ptep_t spte_to_child_pt(u64 pte, int level);
134141

135142
void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
136-
int min_level, gfn_t next_last_level_gfn);
143+
int min_level, gfn_t next_last_level_gfn, gfn_t gfn_bits);
137144
void tdp_iter_next(struct tdp_iter *iter);
138145
void tdp_iter_restart(struct tdp_iter *iter);
139146

arch/x86/kvm/mmu/tdp_mmu.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -705,10 +705,7 @@ static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
705705
{
706706
struct tdp_iter iter;
707707

708-
gfn_t end = tdp_mmu_max_gfn_exclusive();
709-
gfn_t start = 0;
710-
711-
for_each_tdp_pte_min_level(iter, kvm, root, zap_level, start, end) {
708+
for_each_tdp_pte_min_level_all(iter, root, zap_level) {
712709
retry:
713710
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, shared))
714711
continue;

0 commit comments

Comments
 (0)