Skip to content

Commit d58335d

Browse files
author
Marc Zyngier
committed
Merge branch kvm-arm64/tlbi-range into kvmarm-master/next
* kvm-arm64/tlbi-range: : . : FEAT_TLBIRANGE support, courtesy of Raghavendra Rao Ananta. : From the cover letter: : : "In certain code paths, KVM/ARM currently invalidates the entire VM's : page-tables instead of just invalidating a necessary range. For example, : when collapsing a table PTE to a block PTE, instead of iterating over : each PTE and flushing them, KVM uses 'vmalls12e1is' TLBI operation to : flush all the entries. This is inefficient since the guest would have : to refill the TLBs again, even for the addresses that aren't covered : by the table entry. The performance impact would scale poorly if many : addresses in the VM is going through this remapping. : : For architectures that implement FEAT_TLBIRANGE, KVM can replace such : inefficient paths by performing the invalidations only on the range of : addresses that are in scope. This series tries to achieve the same in : the areas of stage-2 map, unmap and write-protecting the pages." : . KVM: arm64: Use TLBI range-based instructions for unmap KVM: arm64: Invalidate the table entries upon a range KVM: arm64: Flush only the memslot after write-protect KVM: arm64: Implement kvm_arch_flush_remote_tlbs_range() KVM: arm64: Define kvm_tlb_flush_vmid_range() KVM: arm64: Implement __kvm_tlb_flush_vmid_range() arm64: tlb: Implement __flush_s2_tlb_range_op() arm64: tlb: Refactor the core flush algorithm of __flush_tlb_range KVM: Move kvm_arch_flush_remote_tlbs_memslot() to common code KVM: Allow range-based TLB invalidation from common code KVM: Remove CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL KVM: arm64: Use kvm_arch_flush_remote_tlbs() KVM: Declare kvm_arch_flush_remote_tlbs() globally KVM: Rename kvm_arch_flush_remote_tlb() to kvm_arch_flush_remote_tlbs() Signed-off-by: Marc Zyngier <maz@kernel.org>
2 parents c190762 + 7657ea9 commit d58335d

File tree

21 files changed

+286
-132
lines changed

21 files changed

+286
-132
lines changed

arch/arm64/include/asm/kvm_asm.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ enum __kvm_host_smccc_func {
7070
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
7171
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
7272
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
73+
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
7374
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
7475
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
7576
__KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
@@ -229,6 +230,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
229230
extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
230231
phys_addr_t ipa,
231232
int level);
233+
extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
234+
phys_addr_t start, unsigned long pages);
232235
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
233236

234237
extern void __kvm_timer_set_cntvoff(u64 cntvoff);

arch/arm64/include/asm/kvm_host.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,6 +1120,10 @@ int __init kvm_set_ipa_limit(void);
11201120
#define __KVM_HAVE_ARCH_VM_ALLOC
11211121
struct kvm *kvm_arch_alloc_vm(void);
11221122

1123+
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
1124+
1125+
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
1126+
11231127
static inline bool kvm_vm_is_protected(struct kvm *kvm)
11241128
{
11251129
return false;

arch/arm64/include/asm/kvm_pgtable.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,4 +746,14 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
746746
* kvm_pgtable_prot format.
747747
*/
748748
enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
749+
750+
/**
751+
* kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
752+
*
753+
* @mmu: Stage-2 KVM MMU struct
754+
* @addr: The base Intermediate physical address from which to invalidate
755+
* @size: Size of the range from the base to invalidate
756+
*/
757+
void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
758+
phys_addr_t addr, size_t size);
749759
#endif /* __ARM64_KVM_PGTABLE_H__ */

arch/arm64/include/asm/tlbflush.h

Lines changed: 71 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -278,14 +278,77 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
278278
*/
279279
#define MAX_TLBI_OPS PTRS_PER_PTE
280280

281+
/*
282+
* __flush_tlb_range_op - Perform TLBI operation upon a range
283+
*
284+
* @op: TLBI instruction that operates on a range (has 'r' prefix)
285+
* @start: The start address of the range
286+
* @pages: Range as the number of pages from 'start'
287+
* @stride: Flush granularity
288+
* @asid: The ASID of the task (0 for IPA instructions)
289+
* @tlb_level: Translation Table level hint, if known
290+
* @tlbi_user: If 'true', call an additional __tlbi_user()
291+
* (typically for user ASIDs). 'flase' for IPA instructions
292+
*
293+
* When the CPU does not support TLB range operations, flush the TLB
294+
* entries one by one at the granularity of 'stride'. If the TLB
295+
* range ops are supported, then:
296+
*
297+
* 1. If 'pages' is odd, flush the first page through non-range
298+
* operations;
299+
*
300+
* 2. For remaining pages: the minimum range granularity is decided
301+
* by 'scale', so multiple range TLBI operations may be required.
302+
* Start from scale = 0, flush the corresponding number of pages
303+
* ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
304+
* until no pages left.
305+
*
306+
* Note that certain ranges can be represented by either num = 31 and
307+
* scale or num = 0 and scale + 1. The loop below favours the latter
308+
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
309+
*/
310+
#define __flush_tlb_range_op(op, start, pages, stride, \
311+
asid, tlb_level, tlbi_user) \
312+
do { \
313+
int num = 0; \
314+
int scale = 0; \
315+
unsigned long addr; \
316+
\
317+
while (pages > 0) { \
318+
if (!system_supports_tlb_range() || \
319+
pages % 2 == 1) { \
320+
addr = __TLBI_VADDR(start, asid); \
321+
__tlbi_level(op, addr, tlb_level); \
322+
if (tlbi_user) \
323+
__tlbi_user_level(op, addr, tlb_level); \
324+
start += stride; \
325+
pages -= stride >> PAGE_SHIFT; \
326+
continue; \
327+
} \
328+
\
329+
num = __TLBI_RANGE_NUM(pages, scale); \
330+
if (num >= 0) { \
331+
addr = __TLBI_VADDR_RANGE(start, asid, scale, \
332+
num, tlb_level); \
333+
__tlbi(r##op, addr); \
334+
if (tlbi_user) \
335+
__tlbi_user(r##op, addr); \
336+
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
337+
pages -= __TLBI_RANGE_PAGES(num, scale); \
338+
} \
339+
scale++; \
340+
} \
341+
} while (0)
342+
343+
#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
344+
__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
345+
281346
static inline void __flush_tlb_range(struct vm_area_struct *vma,
282347
unsigned long start, unsigned long end,
283348
unsigned long stride, bool last_level,
284349
int tlb_level)
285350
{
286-
int num = 0;
287-
int scale = 0;
288-
unsigned long asid, addr, pages;
351+
unsigned long asid, pages;
289352

290353
start = round_down(start, stride);
291354
end = round_up(end, stride);
@@ -307,56 +370,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
307370
dsb(ishst);
308371
asid = ASID(vma->vm_mm);
309372

310-
/*
311-
* When the CPU does not support TLB range operations, flush the TLB
312-
* entries one by one at the granularity of 'stride'. If the TLB
313-
* range ops are supported, then:
314-
*
315-
* 1. If 'pages' is odd, flush the first page through non-range
316-
* operations;
317-
*
318-
* 2. For remaining pages: the minimum range granularity is decided
319-
* by 'scale', so multiple range TLBI operations may be required.
320-
* Start from scale = 0, flush the corresponding number of pages
321-
* ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
322-
* until no pages left.
323-
*
324-
* Note that certain ranges can be represented by either num = 31 and
325-
* scale or num = 0 and scale + 1. The loop below favours the latter
326-
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
327-
*/
328-
while (pages > 0) {
329-
if (!system_supports_tlb_range() ||
330-
pages % 2 == 1) {
331-
addr = __TLBI_VADDR(start, asid);
332-
if (last_level) {
333-
__tlbi_level(vale1is, addr, tlb_level);
334-
__tlbi_user_level(vale1is, addr, tlb_level);
335-
} else {
336-
__tlbi_level(vae1is, addr, tlb_level);
337-
__tlbi_user_level(vae1is, addr, tlb_level);
338-
}
339-
start += stride;
340-
pages -= stride >> PAGE_SHIFT;
341-
continue;
342-
}
343-
344-
num = __TLBI_RANGE_NUM(pages, scale);
345-
if (num >= 0) {
346-
addr = __TLBI_VADDR_RANGE(start, asid, scale,
347-
num, tlb_level);
348-
if (last_level) {
349-
__tlbi(rvale1is, addr);
350-
__tlbi_user(rvale1is, addr);
351-
} else {
352-
__tlbi(rvae1is, addr);
353-
__tlbi_user(rvae1is, addr);
354-
}
355-
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
356-
pages -= __TLBI_RANGE_PAGES(num, scale);
357-
}
358-
scale++;
359-
}
373+
if (last_level)
374+
__flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
375+
else
376+
__flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
377+
360378
dsb(ish);
361379
}
362380

arch/arm64/kvm/Kconfig

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ menuconfig KVM
2525
select MMU_NOTIFIER
2626
select PREEMPT_NOTIFIERS
2727
select HAVE_KVM_CPU_RELAX_INTERCEPT
28-
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
2928
select KVM_MMIO
3029
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
3130
select KVM_XFER_TO_GUEST_WORK

arch/arm64/kvm/arm.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,12 +1534,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
15341534

15351535
}
15361536

1537-
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
1538-
const struct kvm_memory_slot *memslot)
1539-
{
1540-
kvm_flush_remote_tlbs(kvm);
1541-
}
1542-
15431537
static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
15441538
struct kvm_arm_device_addr *dev_addr)
15451539
{

arch/arm64/kvm/hyp/nvhe/hyp-main.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,16 @@ static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctx
135135
__kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
136136
}
137137

138+
static void
139+
handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt)
140+
{
141+
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
142+
DECLARE_REG(phys_addr_t, start, host_ctxt, 2);
143+
DECLARE_REG(unsigned long, pages, host_ctxt, 3);
144+
145+
__kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages);
146+
}
147+
138148
static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
139149
{
140150
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
@@ -327,6 +337,7 @@ static const hcall_t host_hcall[] = {
327337
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
328338
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
329339
HANDLE_FUNC(__kvm_tlb_flush_vmid),
340+
HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
330341
HANDLE_FUNC(__kvm_flush_cpu_context),
331342
HANDLE_FUNC(__kvm_timer_set_cntvoff),
332343
HANDLE_FUNC(__vgic_v3_read_vmcr),

arch/arm64/kvm/hyp/nvhe/tlb.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,36 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
182182
__tlb_switch_to_host(&cxt);
183183
}
184184

185+
void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
186+
phys_addr_t start, unsigned long pages)
187+
{
188+
struct tlb_inv_context cxt;
189+
unsigned long stride;
190+
191+
/*
192+
* Since the range of addresses may not be mapped at
193+
* the same level, assume the worst case as PAGE_SIZE
194+
*/
195+
stride = PAGE_SIZE;
196+
start = round_down(start, stride);
197+
198+
/* Switch to requested VMID */
199+
__tlb_switch_to_guest(mmu, &cxt, false);
200+
201+
__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
202+
203+
dsb(ish);
204+
__tlbi(vmalle1is);
205+
dsb(ish);
206+
isb();
207+
208+
/* See the comment in __kvm_tlb_flush_vmid_ipa() */
209+
if (icache_is_vpipt())
210+
icache_inval_all_pou();
211+
212+
__tlb_switch_to_host(&cxt);
213+
}
214+
185215
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
186216
{
187217
struct tlb_inv_context cxt;

arch/arm64/kvm/hyp/pgtable.c

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,26 @@ static bool stage2_has_fwb(struct kvm_pgtable *pgt)
670670
return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
671671
}
672672

673+
void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
674+
phys_addr_t addr, size_t size)
675+
{
676+
unsigned long pages, inval_pages;
677+
678+
if (!system_supports_tlb_range()) {
679+
kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
680+
return;
681+
}
682+
683+
pages = size >> PAGE_SHIFT;
684+
while (pages > 0) {
685+
inval_pages = min(pages, MAX_TLBI_RANGE_PAGES);
686+
kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages);
687+
688+
addr += inval_pages << PAGE_SHIFT;
689+
pages -= inval_pages;
690+
}
691+
}
692+
673693
#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
674694

675695
static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
@@ -786,7 +806,8 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
786806
* evicted pte value (if any).
787807
*/
788808
if (kvm_pte_table(ctx->old, ctx->level))
789-
kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
809+
kvm_tlb_flush_vmid_range(mmu, ctx->addr,
810+
kvm_granule_size(ctx->level));
790811
else if (kvm_pte_valid(ctx->old))
791812
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
792813
ctx->addr, ctx->level);
@@ -810,16 +831,36 @@ static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t n
810831
smp_store_release(ctx->ptep, new);
811832
}
812833

813-
static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu,
814-
struct kvm_pgtable_mm_ops *mm_ops)
834+
static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt)
835+
{
836+
/*
837+
* If FEAT_TLBIRANGE is implemented, defer the individual
838+
* TLB invalidations until the entire walk is finished, and
839+
* then use the range-based TLBI instructions to do the
840+
* invalidations. Condition deferred TLB invalidation on the
841+
* system supporting FWB as the optimization is entirely
842+
* pointless when the unmap walker needs to perform CMOs.
843+
*/
844+
return system_supports_tlb_range() && stage2_has_fwb(pgt);
845+
}
846+
847+
static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
848+
struct kvm_s2_mmu *mmu,
849+
struct kvm_pgtable_mm_ops *mm_ops)
815850
{
851+
struct kvm_pgtable *pgt = ctx->arg;
852+
816853
/*
817-
* Clear the existing PTE, and perform break-before-make with
818-
* TLB maintenance if it was valid.
854+
* Clear the existing PTE, and perform break-before-make if it was
855+
* valid. Depending on the system support, defer the TLB maintenance
856+
* for the same until the entire unmap walk is completed.
819857
*/
820858
if (kvm_pte_valid(ctx->old)) {
821859
kvm_clear_pte(ctx->ptep);
822-
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
860+
861+
if (!stage2_unmap_defer_tlb_flush(pgt))
862+
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
863+
ctx->addr, ctx->level);
823864
}
824865

825866
mm_ops->put_page(ctx->ptep);
@@ -1077,7 +1118,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
10771118
* block entry and rely on the remaining portions being faulted
10781119
* back lazily.
10791120
*/
1080-
stage2_put_pte(ctx, mmu, mm_ops);
1121+
stage2_unmap_put_pte(ctx, mmu, mm_ops);
10811122

10821123
if (need_flush && mm_ops->dcache_clean_inval_poc)
10831124
mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
@@ -1091,13 +1132,19 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
10911132

10921133
int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
10931134
{
1135+
int ret;
10941136
struct kvm_pgtable_walker walker = {
10951137
.cb = stage2_unmap_walker,
10961138
.arg = pgt,
10971139
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
10981140
};
10991141

1100-
return kvm_pgtable_walk(pgt, addr, size, &walker);
1142+
ret = kvm_pgtable_walk(pgt, addr, size, &walker);
1143+
if (stage2_unmap_defer_tlb_flush(pgt))
1144+
/* Perform the deferred TLB invalidations */
1145+
kvm_tlb_flush_vmid_range(pgt->mmu, addr, size);
1146+
1147+
return ret;
11011148
}
11021149

11031150
struct stage2_attr_data {

0 commit comments

Comments
 (0)