Skip to content

Commit 1f0f4a2

Browse files
ouptonMarc Zyngier
authored andcommitted
KVM: arm64: Infer the PA offset from IPA in stage-2 map walker
Until now, the page table walker counted increments to the PA and IPA of a walk in two separate places. While the PA is incremented as soon as a leaf PTE is installed in stage2_map_walker_try_leaf(), the IPA is actually bumped in the generic table walker context. Critically, __kvm_pgtable_visit() rereads the PTE after the LEAF callback returns to work out if a table or leaf was installed, and only bumps the IPA for a leaf PTE. This arrangement worked fine when we handled faults behind the write lock, as the walker had exclusive access to the stage-2 page tables. However, commit 1577cb5 ("KVM: arm64: Handle stage-2 faults in parallel") started handling all stage-2 faults behind the read lock, opening up a race where a walker could increment the PA but not the IPA of a walk. Nothing good ensues, as the walker starts mapping with the incorrect IPA -> PA relationship. For example, assume that two vCPUs took a data abort on the same IPA. One observes that dirty logging is disabled, and the other observed that it is enabled: vCPU attempting PMD mapping vCPU attempting PTE mapping ====================================== ===================================== /* install PMD */ stage2_make_pte(ctx, leaf); data->phys += granule; /* replace PMD with a table */ stage2_try_break_pte(ctx, data->mmu); stage2_make_pte(ctx, table); /* table is observed */ ctx.old = READ_ONCE(*ptep); table = kvm_pte_table(ctx.old, level); /* * map walk continues w/o incrementing * IPA. */ __kvm_pgtable_walk(..., level + 1); Bring an end to the whole mess by using the IPA as the single source of truth for how far along a walk has gotten. Work out the correct PA to map by calculating the IPA offset from the beginning of the walk and add that to the starting physical address. Cc: stable@vger.kernel.org Fixes: 1577cb5 ("KVM: arm64: Handle stage-2 faults in parallel") Signed-off-by: Oliver Upton <oliver.upton@linux.dev> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20230421071606.1603916-2-oliver.upton@linux.dev
1 parent 197b6b6 commit 1f0f4a2

File tree

2 files changed

+29
-4
lines changed

2 files changed

+29
-4
lines changed

arch/arm64/include/asm/kvm_pgtable.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ struct kvm_pgtable_visit_ctx {
209209
kvm_pte_t old;
210210
void *arg;
211211
struct kvm_pgtable_mm_ops *mm_ops;
212+
u64 start;
212213
u64 addr;
213214
u64 end;
214215
u32 level;

arch/arm64/kvm/hyp/pgtable.c

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
struct kvm_pgtable_walk_data {
5959
struct kvm_pgtable_walker *walker;
6060

61+
u64 start;
6162
u64 addr;
6263
u64 end;
6364
};
@@ -201,6 +202,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
201202
.old = READ_ONCE(*ptep),
202203
.arg = data->walker->arg,
203204
.mm_ops = mm_ops,
205+
.start = data->start,
204206
.addr = data->addr,
205207
.end = data->end,
206208
.level = level,
@@ -293,6 +295,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
293295
struct kvm_pgtable_walker *walker)
294296
{
295297
struct kvm_pgtable_walk_data walk_data = {
298+
.start = ALIGN_DOWN(addr, PAGE_SIZE),
296299
.addr = ALIGN_DOWN(addr, PAGE_SIZE),
297300
.end = PAGE_ALIGN(walk_data.addr + size),
298301
.walker = walker,
@@ -794,20 +797,43 @@ static bool stage2_pte_executable(kvm_pte_t pte)
794797
return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
795798
}
796799

800+
static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
801+
const struct stage2_map_data *data)
802+
{
803+
u64 phys = data->phys;
804+
805+
/*
806+
* Stage-2 walks to update ownership data are communicated to the map
807+
* walker using an invalid PA. Avoid offsetting an already invalid PA,
808+
* which could overflow and make the address valid again.
809+
*/
810+
if (!kvm_phys_is_valid(phys))
811+
return phys;
812+
813+
/*
814+
* Otherwise, work out the correct PA based on how far the walk has
815+
* gotten.
816+
*/
817+
return phys + (ctx->addr - ctx->start);
818+
}
819+
797820
static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
798821
struct stage2_map_data *data)
799822
{
823+
u64 phys = stage2_map_walker_phys_addr(ctx, data);
824+
800825
if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1)))
801826
return false;
802827

803-
return kvm_block_mapping_supported(ctx, data->phys);
828+
return kvm_block_mapping_supported(ctx, phys);
804829
}
805830

806831
static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
807832
struct stage2_map_data *data)
808833
{
809834
kvm_pte_t new;
810-
u64 granule = kvm_granule_size(ctx->level), phys = data->phys;
835+
u64 phys = stage2_map_walker_phys_addr(ctx, data);
836+
u64 granule = kvm_granule_size(ctx->level);
811837
struct kvm_pgtable *pgt = data->mmu->pgt;
812838
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
813839

@@ -841,8 +867,6 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
841867

842868
stage2_make_pte(ctx, new);
843869

844-
if (kvm_phys_is_valid(phys))
845-
data->phys += granule;
846870
return 0;
847871
}
848872

0 commit comments

Comments
 (0)