Skip to content
This repository was archived by the owner on Nov 8, 2023. It is now read-only.

Commit 4f16345

Browse files
Merge patch series "riscv: ASID-related and UP-related TLB flush enhancements"
Samuel Holland <samuel.holland@sifive.com> says: This series converts uniprocessor kernel builds to use the same TLB flushing code as SMP builds, to take advantage of batching and existing range- and ASID-based TLB flush optimizations. It optimizes out IPIs and SBI calls based on the online CPU count, which also covers the scenario where SMP was enabled at build time but only one CPU is present/online. A final optimization is to use single-ASID flushes wherever possible, to avoid unnecessary TLB misses for kernel mappings. This series has a semantic conflict with the AIA patches that are in linux-next due to the removal of the third parameter of riscv_ipi_set_virq_range(), which is called from imsic_ipi_domain_init() in drivers/irqchip/irq-riscv-imsic-early.c. The resolution is to remove the extra argument from the call site. Here are some numbers from D1 which show the performance impact: v6.9-rc1: System Benchmarks Partial Index BASELINE RESULT INDEX Execl Throughput 43.0 198.5 46.2 File Copy 1024 bufsize 2000 maxblocks 3960.0 73934.4 186.7 File Copy 256 bufsize 500 maxblocks 1655.0 20242.6 122.3 File Copy 4096 bufsize 8000 maxblocks 5800.0 197706.4 340.9 Pipe Throughput 12440.0 176974.2 142.3 Pipe-based Context Switching 4000.0 23626.8 59.1 Process Creation 126.0 449.9 35.7 Shell Scripts (1 concurrent) 42.4 544.4 128.4 Shell Scripts (16 concurrent) --- 35.3 --- Shell Scripts (8 concurrent) 6.0 71.6 119.3 System Call Overhead 15000.0 248072.6 165.4 ======== System Benchmarks Index Score (Partial Only) 110.6 v6.9-rc1 + this patch series: System Benchmarks Partial Index BASELINE RESULT INDEX Execl Throughput 43.0 196.8 45.8 File Copy 1024 bufsize 2000 maxblocks 3960.0 71782.2 181.3 File Copy 256 bufsize 500 maxblocks 1655.0 21269.4 128.5 File Copy 4096 bufsize 8000 maxblocks 5800.0 199424.0 343.8 Pipe Throughput 12440.0 196468.6 157.9 Pipe-based Context Switching 4000.0 24261.8 60.7 Process Creation 126.0 459.0 36.4 Shell Scripts (1 concurrent) 42.4 543.8 128.2 Shell Scripts (16 concurrent) --- 35.5 --- Shell Scripts (8 concurrent) 6.0 71.7 119.6 System Call Overhead 15000.0 259415.2 172.9 ======== System Benchmarks Index Score (Partial Only) 113.0 * b4-shazam-lts: riscv: mm: Always use an ASID to flush mm contexts riscv: mm: Preserve global TLB entries when switching contexts riscv: mm: Make asid_bits a local variable riscv: mm: Use a fixed layout for the MM context ID riscv: mm: Introduce cntx2asid/cntx2version helper macros riscv: Avoid TLB flush loops when affected by SiFive CIP-1200 riscv: Apply SiFive CIP-1200 workaround to single-ASID sfence.vma riscv: mm: Combine the SMP and UP TLB flush code riscv: Only send remote fences when some other CPU is online riscv: mm: Broadcast kernel TLB flushes only when needed riscv: Use IPIs for remote cache/TLB flushes by default riscv: Factor out page table TLB synchronization riscv: Flush the instruction cache during SMP bringup Link: https://lore.kernel.org/r/20240327045035.368512-1-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2 parents 48b4fc6 + daef192 commit 4f16345

File tree

16 files changed

+114
-152
lines changed

16 files changed

+114
-152
lines changed

arch/riscv/Kconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ config RISCV
6262
select ARCH_USE_MEMTEST
6363
select ARCH_USE_QUEUED_RWLOCKS
6464
select ARCH_USES_CFI_TRAPS if CFI_CLANG
65-
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
65+
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if MMU
6666
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
6767
select ARCH_WANT_FRAME_POINTERS
6868
select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT

arch/riscv/errata/sifive/errata.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ static bool errata_cip_1200_check_func(unsigned long arch_id, unsigned long imp
4242
return false;
4343
if ((impid & 0xffffff) > 0x200630 || impid == 0x1200626)
4444
return false;
45+
46+
#ifdef CONFIG_MMU
47+
tlb_flush_all_threshold = 0;
48+
#endif
49+
4550
return true;
4651
}
4752

arch/riscv/include/asm/errata_list.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,21 @@ ALTERNATIVE(__stringify(RISCV_PTR do_page_fault), \
4343
CONFIG_ERRATA_SIFIVE_CIP_453)
4444
#else /* !__ASSEMBLY__ */
4545

46-
#define ALT_FLUSH_TLB_PAGE(x) \
46+
#define ALT_SFENCE_VMA_ASID(asid) \
47+
asm(ALTERNATIVE("sfence.vma x0, %0", "sfence.vma", SIFIVE_VENDOR_ID, \
48+
ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \
49+
: : "r" (asid) : "memory")
50+
51+
#define ALT_SFENCE_VMA_ADDR(addr) \
4752
asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \
4853
ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \
4954
: : "r" (addr) : "memory")
5055

56+
#define ALT_SFENCE_VMA_ADDR_ASID(addr, asid) \
57+
asm(ALTERNATIVE("sfence.vma %0, %1", "sfence.vma", SIFIVE_VENDOR_ID, \
58+
ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \
59+
: : "r" (addr), "r" (asid) : "memory")
60+
5161
/*
5262
* _val is marked as "will be overwritten", so need to set it to 0
5363
* in the default case.

arch/riscv/include/asm/mmu.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ typedef struct {
2828
#endif
2929
} mm_context_t;
3030

31+
#define cntx2asid(cntx) ((cntx) & SATP_ASID_MASK)
32+
#define cntx2version(cntx) ((cntx) & ~SATP_ASID_MASK)
33+
3134
void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa,
3235
phys_addr_t sz, pgprot_t prot);
3336
#endif /* __ASSEMBLY__ */

arch/riscv/include/asm/pgalloc.h

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,22 @@
88
#define _ASM_RISCV_PGALLOC_H
99

1010
#include <linux/mm.h>
11+
#include <asm/sbi.h>
1112
#include <asm/tlb.h>
1213

1314
#ifdef CONFIG_MMU
1415
#define __HAVE_ARCH_PUD_ALLOC_ONE
1516
#define __HAVE_ARCH_PUD_FREE
1617
#include <asm-generic/pgalloc.h>
1718

19+
static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt)
20+
{
21+
if (riscv_use_sbi_for_rfence())
22+
tlb_remove_ptdesc(tlb, pt);
23+
else
24+
tlb_remove_page_ptdesc(tlb, pt);
25+
}
26+
1827
static inline void pmd_populate_kernel(struct mm_struct *mm,
1928
pmd_t *pmd, pte_t *pte)
2029
{
@@ -102,10 +111,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
102111
struct ptdesc *ptdesc = virt_to_ptdesc(pud);
103112

104113
pagetable_pud_dtor(ptdesc);
105-
if (riscv_use_ipi_for_rfence())
106-
tlb_remove_page_ptdesc(tlb, ptdesc);
107-
else
108-
tlb_remove_ptdesc(tlb, ptdesc);
114+
riscv_tlb_remove_ptdesc(tlb, ptdesc);
109115
}
110116
}
111117

@@ -139,12 +145,8 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
139145
static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
140146
unsigned long addr)
141147
{
142-
if (pgtable_l5_enabled) {
143-
if (riscv_use_ipi_for_rfence())
144-
tlb_remove_page_ptdesc(tlb, virt_to_ptdesc(p4d));
145-
else
146-
tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d));
147-
}
148+
if (pgtable_l5_enabled)
149+
riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d));
148150
}
149151
#endif /* __PAGETABLE_PMD_FOLDED */
150152

@@ -176,10 +178,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
176178
struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
177179

178180
pagetable_pmd_dtor(ptdesc);
179-
if (riscv_use_ipi_for_rfence())
180-
tlb_remove_page_ptdesc(tlb, ptdesc);
181-
else
182-
tlb_remove_ptdesc(tlb, ptdesc);
181+
riscv_tlb_remove_ptdesc(tlb, ptdesc);
183182
}
184183

185184
#endif /* __PAGETABLE_PMD_FOLDED */
@@ -190,10 +189,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
190189
struct ptdesc *ptdesc = page_ptdesc(pte);
191190

192191
pagetable_pte_dtor(ptdesc);
193-
if (riscv_use_ipi_for_rfence())
194-
tlb_remove_page_ptdesc(tlb, ptdesc);
195-
else
196-
tlb_remove_ptdesc(tlb, ptdesc);
192+
riscv_tlb_remove_ptdesc(tlb, ptdesc);
197193
}
198194
#endif /* CONFIG_MMU */
199195

arch/riscv/include/asm/sbi.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,8 +375,12 @@ unsigned long riscv_cached_marchid(unsigned int cpu_id);
375375
unsigned long riscv_cached_mimpid(unsigned int cpu_id);
376376

377377
#if IS_ENABLED(CONFIG_SMP) && IS_ENABLED(CONFIG_RISCV_SBI)
378+
DECLARE_STATIC_KEY_FALSE(riscv_sbi_for_rfence);
379+
#define riscv_use_sbi_for_rfence() \
380+
static_branch_unlikely(&riscv_sbi_for_rfence)
378381
void sbi_ipi_init(void);
379382
#else
383+
static inline bool riscv_use_sbi_for_rfence(void) { return false; }
380384
static inline void sbi_ipi_init(void) { }
381385
#endif
382386

arch/riscv/include/asm/smp.h

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,7 @@ void riscv_ipi_disable(void);
4949
bool riscv_ipi_have_virq_range(void);
5050

5151
/* Set the IPI interrupt numbers for arch (called by irqchip drivers) */
52-
void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence);
53-
54-
/* Check if we can use IPIs for remote FENCEs */
55-
DECLARE_STATIC_KEY_FALSE(riscv_ipi_for_rfence);
56-
#define riscv_use_ipi_for_rfence() \
57-
static_branch_unlikely(&riscv_ipi_for_rfence)
52+
void riscv_ipi_set_virq_range(int virq, int nr);
5853

5954
/* Check other CPUs stop or not */
6055
bool smp_crash_stop_failed(void);
@@ -104,16 +99,10 @@ static inline bool riscv_ipi_have_virq_range(void)
10499
return false;
105100
}
106101

107-
static inline void riscv_ipi_set_virq_range(int virq, int nr,
108-
bool use_for_rfence)
102+
static inline void riscv_ipi_set_virq_range(int virq, int nr)
109103
{
110104
}
111105

112-
static inline bool riscv_use_ipi_for_rfence(void)
113-
{
114-
return false;
115-
}
116-
117106
#endif /* CONFIG_SMP */
118107

119108
#if defined(CONFIG_HOTPLUG_CPU) && (CONFIG_SMP)

arch/riscv/include/asm/tlbflush.h

Lines changed: 22 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,34 @@
1515
#define FLUSH_TLB_NO_ASID ((unsigned long)-1)
1616

1717
#ifdef CONFIG_MMU
18-
extern unsigned long asid_mask;
19-
2018
static inline void local_flush_tlb_all(void)
2119
{
2220
__asm__ __volatile__ ("sfence.vma" : : : "memory");
2321
}
2422

23+
static inline void local_flush_tlb_all_asid(unsigned long asid)
24+
{
25+
if (asid != FLUSH_TLB_NO_ASID)
26+
ALT_SFENCE_VMA_ASID(asid);
27+
else
28+
local_flush_tlb_all();
29+
}
30+
2531
/* Flush one page from local TLB */
2632
static inline void local_flush_tlb_page(unsigned long addr)
2733
{
28-
ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"));
34+
ALT_SFENCE_VMA_ADDR(addr);
35+
}
36+
37+
static inline void local_flush_tlb_page_asid(unsigned long addr,
38+
unsigned long asid)
39+
{
40+
if (asid != FLUSH_TLB_NO_ASID)
41+
ALT_SFENCE_VMA_ADDR_ASID(addr, asid);
42+
else
43+
local_flush_tlb_page(addr);
2944
}
30-
#else /* CONFIG_MMU */
31-
#define local_flush_tlb_all() do { } while (0)
32-
#define local_flush_tlb_page(addr) do { } while (0)
33-
#endif /* CONFIG_MMU */
3445

35-
#if defined(CONFIG_SMP) && defined(CONFIG_MMU)
3646
void flush_tlb_all(void);
3747
void flush_tlb_mm(struct mm_struct *mm);
3848
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
@@ -55,27 +65,9 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
5565
void arch_flush_tlb_batched_pending(struct mm_struct *mm);
5666
void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
5767

58-
#else /* CONFIG_SMP && CONFIG_MMU */
59-
60-
#define flush_tlb_all() local_flush_tlb_all()
61-
#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr)
62-
63-
static inline void flush_tlb_range(struct vm_area_struct *vma,
64-
unsigned long start, unsigned long end)
65-
{
66-
local_flush_tlb_all();
67-
}
68-
69-
/* Flush a range of kernel pages */
70-
static inline void flush_tlb_kernel_range(unsigned long start,
71-
unsigned long end)
72-
{
73-
local_flush_tlb_all();
74-
}
75-
76-
#define flush_tlb_mm(mm) flush_tlb_all()
77-
#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
78-
#define local_flush_tlb_kernel_range(start, end) flush_tlb_all()
79-
#endif /* !CONFIG_SMP || !CONFIG_MMU */
68+
extern unsigned long tlb_flush_all_threshold;
69+
#else /* CONFIG_MMU */
70+
#define local_flush_tlb_all() do { } while (0)
71+
#endif /* CONFIG_MMU */
8072

8173
#endif /* _ASM_RISCV_TLBFLUSH_H */

arch/riscv/kernel/sbi-ipi.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
#include <linux/irqdomain.h>
1414
#include <asm/sbi.h>
1515

16+
DEFINE_STATIC_KEY_FALSE(riscv_sbi_for_rfence);
17+
EXPORT_SYMBOL_GPL(riscv_sbi_for_rfence);
18+
1619
static int sbi_ipi_virq;
1720

1821
static void sbi_ipi_handle(struct irq_desc *desc)
@@ -72,6 +75,12 @@ void __init sbi_ipi_init(void)
7275
"irqchip/sbi-ipi:starting",
7376
sbi_ipi_starting_cpu, NULL);
7477

75-
riscv_ipi_set_virq_range(virq, BITS_PER_BYTE, false);
78+
riscv_ipi_set_virq_range(virq, BITS_PER_BYTE);
7679
pr_info("providing IPIs using SBI IPI extension\n");
80+
81+
/*
82+
* Use the SBI remote fence extension to avoid
83+
* the extra context switch needed to handle IPIs.
84+
*/
85+
static_branch_enable(&riscv_sbi_for_rfence);
7786
}

arch/riscv/kernel/smp.c

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,7 @@ bool riscv_ipi_have_virq_range(void)
171171
return (ipi_virq_base) ? true : false;
172172
}
173173

174-
DEFINE_STATIC_KEY_FALSE(riscv_ipi_for_rfence);
175-
EXPORT_SYMBOL_GPL(riscv_ipi_for_rfence);
176-
177-
void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence)
174+
void riscv_ipi_set_virq_range(int virq, int nr)
178175
{
179176
int i, err;
180177

@@ -197,12 +194,6 @@ void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence)
197194

198195
/* Enabled IPIs for boot CPU immediately */
199196
riscv_ipi_enable();
200-
201-
/* Update RFENCE static key */
202-
if (use_for_rfence)
203-
static_branch_enable(&riscv_ipi_for_rfence);
204-
else
205-
static_branch_disable(&riscv_ipi_for_rfence);
206197
}
207198

208199
static const char * const ipi_names[] = {

0 commit comments

Comments
 (0)