Skip to content

Commit 7f26fea

Browse files
committed
Merge tag 'kvm-x86-mmu-6.8' of https://github.com/kvm-x86/linux into HEAD
KVM x86 MMU changes for 6.8: - Fix a relatively benign off-by-one error when splitting huge pages during CLEAR_DIRTY_LOG. - Fix a bug where KVM could incorrectly test-and-clear dirty bits in non-leaf TDP MMU SPTEs if a racing thread replaces a huge SPTE with a non-huge SPTE. - Relax the TDP MMU's lockdep assertions related to holding mmu_lock for read versus write so that KVM doesn't pass "bool shared" all over the place just to have precise assertions in paths that don't actually care about whether the caller is a reader or a writer.
2 parents 3115d2d + e59f75d commit 7f26fea

File tree

5 files changed

+57
-67
lines changed

5 files changed

+57
-67
lines changed

Documentation/virt/kvm/locking.rst

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,9 @@ On x86:
4343

4444
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock and kvm->arch.xen.xen_lock
4545

46-
- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock and
47-
kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and
48-
cannot be taken without already holding kvm->arch.mmu_lock (typically with
49-
``read_lock`` for the TDP MMU, thus the need for additional spinlocks).
46+
- kvm->arch.mmu_lock is an rwlock; critical sections for
47+
kvm->arch.tdp_mmu_pages_lock and kvm->arch.mmu_unsync_pages_lock must
48+
also take kvm->arch.mmu_lock
5049

5150
Everything else is a leaf: no other lock is taken inside the critical
5251
sections.

arch/x86/include/asm/kvm_host.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,9 +1433,8 @@ struct kvm_arch {
14331433
* the MMU lock in read mode + RCU or
14341434
* the MMU lock in write mode
14351435
*
1436-
* For writes, this list is protected by:
1437-
* the MMU lock in read mode + the tdp_mmu_pages_lock or
1438-
* the MMU lock in write mode
1436+
* For writes, this list is protected by tdp_mmu_pages_lock; see
1437+
* below for the details.
14391438
*
14401439
* Roots will remain in the list until their tdp_mmu_root_count
14411440
* drops to zero, at which point the thread that decremented the
@@ -1452,8 +1451,10 @@ struct kvm_arch {
14521451
* - possible_nx_huge_pages;
14531452
* - the possible_nx_huge_page_link field of kvm_mmu_page structs used
14541453
* by the TDP MMU
1455-
* It is acceptable, but not necessary, to acquire this lock when
1456-
* the thread holds the MMU lock in write mode.
1454+
* Because the lock is only taken within the MMU lock, strictly
1455+
* speaking it is redundant to acquire this lock when the thread
1456+
* holds the MMU lock in write mode. However it often simplifies
1457+
* the code to do so.
14571458
*/
14581459
spinlock_t tdp_mmu_pages_lock;
14591460
#endif /* CONFIG_X86_64 */

arch/x86/kvm/mmu/mmu.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1388,7 +1388,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
13881388
gfn_t end = slot->base_gfn + gfn_offset + __fls(mask);
13891389

13901390
if (READ_ONCE(eager_page_split))
1391-
kvm_mmu_try_split_huge_pages(kvm, slot, start, end, PG_LEVEL_4K);
1391+
kvm_mmu_try_split_huge_pages(kvm, slot, start, end + 1, PG_LEVEL_4K);
13921392

13931393
kvm_mmu_slot_gfn_write_protect(kvm, slot, start, PG_LEVEL_2M);
13941394

@@ -2846,9 +2846,9 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
28462846
/*
28472847
* Recheck after taking the spinlock, a different vCPU
28482848
* may have since marked the page unsync. A false
2849-
* positive on the unprotected check above is not
2849+
* negative on the unprotected check above is not
28502850
* possible as clearing sp->unsync _must_ hold mmu_lock
2851-
* for write, i.e. unsync cannot transition from 0->1
2851+
* for write, i.e. unsync cannot transition from 1->0
28522852
* while this CPU holds mmu_lock for read (or write).
28532853
*/
28542854
if (READ_ONCE(sp->unsync))
@@ -3576,7 +3576,7 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
35763576
return;
35773577

35783578
if (is_tdp_mmu_page(sp))
3579-
kvm_tdp_mmu_put_root(kvm, sp, false);
3579+
kvm_tdp_mmu_put_root(kvm, sp);
35803580
else if (!--sp->root_count && sp->role.invalid)
35813581
kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
35823582

arch/x86/kvm/mmu/tdp_mmu.c

Lines changed: 43 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,8 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
7373
tdp_mmu_free_sp(sp);
7474
}
7575

76-
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
77-
bool shared)
76+
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root)
7877
{
79-
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
80-
8178
if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
8279
return;
8380

@@ -106,10 +103,16 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
106103
*/
107104
static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
108105
struct kvm_mmu_page *prev_root,
109-
bool shared, bool only_valid)
106+
bool only_valid)
110107
{
111108
struct kvm_mmu_page *next_root;
112109

110+
/*
111+
* While the roots themselves are RCU-protected, fields such as
112+
* role.invalid are protected by mmu_lock.
113+
*/
114+
lockdep_assert_held(&kvm->mmu_lock);
115+
113116
rcu_read_lock();
114117

115118
if (prev_root)
@@ -132,7 +135,7 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
132135
rcu_read_unlock();
133136

134137
if (prev_root)
135-
kvm_tdp_mmu_put_root(kvm, prev_root, shared);
138+
kvm_tdp_mmu_put_root(kvm, prev_root);
136139

137140
return next_root;
138141
}
@@ -144,26 +147,22 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
144147
* recent root. (Unless keeping a live reference is desirable.)
145148
*
146149
* If shared is set, this function is operating under the MMU lock in read
147-
* mode. In the unlikely event that this thread must free a root, the lock
148-
* will be temporarily dropped and reacquired in write mode.
150+
* mode.
149151
*/
150-
#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, _only_valid)\
151-
for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, _only_valid); \
152-
_root; \
153-
_root = tdp_mmu_next_root(_kvm, _root, _shared, _only_valid)) \
154-
if (kvm_lockdep_assert_mmu_lock_held(_kvm, _shared) && \
155-
kvm_mmu_page_as_id(_root) != _as_id) { \
152+
#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _only_valid)\
153+
for (_root = tdp_mmu_next_root(_kvm, NULL, _only_valid); \
154+
({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root; \
155+
_root = tdp_mmu_next_root(_kvm, _root, _only_valid)) \
156+
if (kvm_mmu_page_as_id(_root) != _as_id) { \
156157
} else
157158

158-
#define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \
159-
__for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, true)
159+
#define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id) \
160+
__for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, true)
160161

161-
#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _shared) \
162-
for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, false); \
163-
_root; \
164-
_root = tdp_mmu_next_root(_kvm, _root, _shared, false)) \
165-
if (!kvm_lockdep_assert_mmu_lock_held(_kvm, _shared)) { \
166-
} else
162+
#define for_each_tdp_mmu_root_yield_safe(_kvm, _root) \
163+
for (_root = tdp_mmu_next_root(_kvm, NULL, false); \
164+
({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root; \
165+
_root = tdp_mmu_next_root(_kvm, _root, false))
167166

168167
/*
169168
* Iterate over all TDP MMU roots. Requires that mmu_lock be held for write,
@@ -276,28 +275,18 @@ static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
276275
*
277276
* @kvm: kvm instance
278277
* @sp: the page to be removed
279-
* @shared: This operation may not be running under the exclusive use of
280-
* the MMU lock and the operation must synchronize with other
281-
* threads that might be adding or removing pages.
282278
*/
283-
static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
284-
bool shared)
279+
static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
285280
{
286281
tdp_unaccount_mmu_page(kvm, sp);
287282

288283
if (!sp->nx_huge_page_disallowed)
289284
return;
290285

291-
if (shared)
292-
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
293-
else
294-
lockdep_assert_held_write(&kvm->mmu_lock);
295-
286+
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
296287
sp->nx_huge_page_disallowed = false;
297288
untrack_possible_nx_huge_page(kvm, sp);
298-
299-
if (shared)
300-
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
289+
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
301290
}
302291

303292
/**
@@ -326,7 +315,7 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
326315

327316
trace_kvm_mmu_prepare_zap_page(sp);
328317

329-
tdp_mmu_unlink_sp(kvm, sp, shared);
318+
tdp_mmu_unlink_sp(kvm, sp);
330319

331320
for (i = 0; i < SPTE_ENT_PER_PAGE; i++) {
332321
tdp_ptep_t sptep = pt + i;
@@ -832,7 +821,8 @@ bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush)
832821
{
833822
struct kvm_mmu_page *root;
834823

835-
for_each_tdp_mmu_root_yield_safe(kvm, root, false)
824+
lockdep_assert_held_write(&kvm->mmu_lock);
825+
for_each_tdp_mmu_root_yield_safe(kvm, root)
836826
flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush);
837827

838828
return flush;
@@ -854,7 +844,8 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
854844
* is being destroyed or the userspace VMM has exited. In both cases,
855845
* KVM_RUN is unreachable, i.e. no vCPUs will ever service the request.
856846
*/
857-
for_each_tdp_mmu_root_yield_safe(kvm, root, false)
847+
lockdep_assert_held_write(&kvm->mmu_lock);
848+
for_each_tdp_mmu_root_yield_safe(kvm, root)
858849
tdp_mmu_zap_root(kvm, root, false);
859850
}
860851

@@ -868,7 +859,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
868859

869860
read_lock(&kvm->mmu_lock);
870861

871-
for_each_tdp_mmu_root_yield_safe(kvm, root, true) {
862+
for_each_tdp_mmu_root_yield_safe(kvm, root) {
872863
if (!root->tdp_mmu_scheduled_root_to_zap)
873864
continue;
874865

@@ -891,7 +882,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
891882
* the root must be reachable by mmu_notifiers while it's being
892883
* zapped
893884
*/
894-
kvm_tdp_mmu_put_root(kvm, root, true);
885+
kvm_tdp_mmu_put_root(kvm, root);
895886
}
896887

897888
read_unlock(&kvm->mmu_lock);
@@ -1125,7 +1116,7 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
11251116
{
11261117
struct kvm_mmu_page *root;
11271118

1128-
__for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false, false)
1119+
__for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false)
11291120
flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end,
11301121
range->may_block, flush);
11311122

@@ -1314,7 +1305,7 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
13141305

13151306
lockdep_assert_held_read(&kvm->mmu_lock);
13161307

1317-
for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
1308+
for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id)
13181309
spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn,
13191310
slot->base_gfn + slot->npages, min_level);
13201311

@@ -1346,6 +1337,8 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm,
13461337
{
13471338
struct kvm_mmu_page *sp;
13481339

1340+
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
1341+
13491342
/*
13501343
* Since we are allocating while under the MMU lock we have to be
13511344
* careful about GFP flags. Use GFP_NOWAIT to avoid blocking on direct
@@ -1496,11 +1489,10 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
14961489
int r = 0;
14971490

14981491
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
1499-
1500-
for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, shared) {
1492+
for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id) {
15011493
r = tdp_mmu_split_huge_pages_root(kvm, root, start, end, target_level, shared);
15021494
if (r) {
1503-
kvm_tdp_mmu_put_root(kvm, root, shared);
1495+
kvm_tdp_mmu_put_root(kvm, root);
15041496
break;
15051497
}
15061498
}
@@ -1522,12 +1514,13 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
15221514

15231515
rcu_read_lock();
15241516

1525-
tdp_root_for_each_leaf_pte(iter, root, start, end) {
1517+
tdp_root_for_each_pte(iter, root, start, end) {
15261518
retry:
1527-
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
1519+
if (!is_shadow_present_pte(iter.old_spte) ||
1520+
!is_last_spte(iter.old_spte, iter.level))
15281521
continue;
15291522

1530-
if (!is_shadow_present_pte(iter.old_spte))
1523+
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
15311524
continue;
15321525

15331526
KVM_MMU_WARN_ON(kvm_ad_enabled() &&
@@ -1560,8 +1553,7 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
15601553
bool spte_set = false;
15611554

15621555
lockdep_assert_held_read(&kvm->mmu_lock);
1563-
1564-
for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
1556+
for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id)
15651557
spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn,
15661558
slot->base_gfn + slot->npages);
15671559

@@ -1695,8 +1687,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
16951687
struct kvm_mmu_page *root;
16961688

16971689
lockdep_assert_held_read(&kvm->mmu_lock);
1698-
1699-
for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
1690+
for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id)
17001691
zap_collapsible_spte_range(kvm, root, slot);
17011692
}
17021693

arch/x86/kvm/mmu/tdp_mmu.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@ __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root)
1717
return refcount_inc_not_zero(&root->tdp_mmu_root_count);
1818
}
1919

20-
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
21-
bool shared);
20+
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root);
2221

2322
bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush);
2423
bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp);

0 commit comments

Comments
 (0)