Skip to content

Commit 74f4bf9

Browse files
author
Alexandre Ghiti
committed
Merge patch series "riscv: Add runtime constant support"
Charlie Jenkins <charlie@rivosinc.com> says: Ard brought this to my attention in this patch [1]. I benchmarked this patch on the Nezha D1 (which does not contain Zba or Zbkb so it uses the default algorithm) by navigating through a large directory structure. I created a 1000-deep directory structure and then cd and ls through it. With this patch there was a 0.57% performance improvement. [1] https://lore.kernel.org/lkml/CAMj1kXE4DJnwFejNWQu784GvyJO=aGNrzuLjSxiowX_e7nW8QA@mail.gmail.com/ * patches from https://lore.kernel.org/r/20250319-runtime_const_riscv-v10-0-745b31a11d65@rivosinc.com: riscv: Add runtime constant support riscv: Move nop definition to insn-def.h Link: https://lore.kernel.org/linux-riscv/20250319-runtime_const_riscv-v10-0-745b31a11d65@rivosinc.com/ Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
2 parents d9b6582 + a44fb57 commit 74f4bf9

File tree

8 files changed

+299
-6
lines changed

8 files changed

+299
-6
lines changed

arch/riscv/Kconfig

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,28 @@ config RISCV_ISA_ZBC
785785

786786
If you don't know what to do here, say Y.
787787

788+
config TOOLCHAIN_HAS_ZBKB
789+
bool
790+
default y
791+
depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zbkb)
792+
depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zbkb)
793+
depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
794+
depends on AS_HAS_OPTION_ARCH
795+
796+
config RISCV_ISA_ZBKB
797+
bool "Zbkb extension support for bit manipulation instructions"
798+
depends on TOOLCHAIN_HAS_ZBKB
799+
depends on RISCV_ALTERNATIVE
800+
default y
801+
help
802+
Adds support to dynamically detect the presence of the ZBKB
803+
extension (bit manipulation for cryptography) and enable its usage.
804+
805+
The Zbkb extension provides instructions to accelerate a number
806+
of common cryptography operations (pack, zip, etc).
807+
808+
If you don't know what to do here, say Y.
809+
788810
config RISCV_ISA_ZICBOM
789811
bool "Zicbom extension support for non-coherent DMA operation"
790812
depends on MMU

arch/riscv/include/asm/asm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#define REG_ASM __REG_SEL(.dword, .word)
2828
#define SZREG __REG_SEL(8, 4)
2929
#define LGREG __REG_SEL(3, 2)
30+
#define SRLI __REG_SEL(srliw, srli)
3031

3132
#if __SIZEOF_POINTER__ == 8
3233
#ifdef __ASSEMBLY__

arch/riscv/include/asm/ftrace.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ struct dyn_arch_ftrace {
7979
#define AUIPC_RA (0x00000097)
8080
#define JALR_T0 (0x000282e7)
8181
#define AUIPC_T0 (0x00000297)
82-
#define NOP4 (0x00000013)
8382

8483
#define to_jalr_t0(offset) \
8584
(((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_T0)

arch/riscv/include/asm/insn-def.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,5 +199,8 @@
199199
#define RISCV_PAUSE ".4byte 0x100000f"
200200
#define ZAWRS_WRS_NTO ".4byte 0x00d00073"
201201
#define ZAWRS_WRS_STO ".4byte 0x01d00073"
202+
#define RISCV_NOP4 ".4byte 0x00000013"
203+
204+
#define RISCV_INSN_NOP4 _AC(0x00000013, U)
202205

203206
#endif /* __ASM_INSN_DEF_H */
Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef _ASM_RISCV_RUNTIME_CONST_H
3+
#define _ASM_RISCV_RUNTIME_CONST_H
4+
5+
#include <asm/asm.h>
6+
#include <asm/alternative.h>
7+
#include <asm/cacheflush.h>
8+
#include <asm/insn-def.h>
9+
#include <linux/memory.h>
10+
#include <asm/text-patching.h>
11+
12+
#include <linux/uaccess.h>
13+
14+
#ifdef CONFIG_32BIT
15+
#define runtime_const_ptr(sym) \
16+
({ \
17+
typeof(sym) __ret; \
18+
asm_inline(".option push\n\t" \
19+
".option norvc\n\t" \
20+
"1:\t" \
21+
"lui %[__ret],0x89abd\n\t" \
22+
"addi %[__ret],%[__ret],-0x211\n\t" \
23+
".option pop\n\t" \
24+
".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
25+
".long 1b - .\n\t" \
26+
".popsection" \
27+
: [__ret] "=r" (__ret)); \
28+
__ret; \
29+
})
30+
#else
31+
/*
32+
* Loading 64-bit constants into a register from immediates is a non-trivial
33+
* task on riscv64. To get it somewhat performant, load 32 bits into two
34+
* different registers and then combine the results.
35+
*
36+
* If the processor supports the Zbkb extension, we can combine the final
37+
* "slli,slli,srli,add" into the single "pack" instruction. If the processor
38+
* doesn't support Zbkb but does support the Zbb extension, we can
39+
* combine the final "slli,srli,add" into one instruction "add.uw".
40+
*/
41+
#define RISCV_RUNTIME_CONST_64_PREAMBLE \
42+
".option push\n\t" \
43+
".option norvc\n\t" \
44+
"1:\t" \
45+
"lui %[__ret],0x89abd\n\t" \
46+
"lui %[__tmp],0x1234\n\t" \
47+
"addiw %[__ret],%[__ret],-0x211\n\t" \
48+
"addiw %[__tmp],%[__tmp],0x567\n\t" \
49+
50+
#define RISCV_RUNTIME_CONST_64_BASE \
51+
"slli %[__tmp],%[__tmp],32\n\t" \
52+
"slli %[__ret],%[__ret],32\n\t" \
53+
"srli %[__ret],%[__ret],32\n\t" \
54+
"add %[__ret],%[__ret],%[__tmp]\n\t" \
55+
56+
#define RISCV_RUNTIME_CONST_64_ZBA \
57+
".option push\n\t" \
58+
".option arch,+zba\n\t" \
59+
"slli %[__tmp],%[__tmp],32\n\t" \
60+
"add.uw %[__ret],%[__ret],%[__tmp]\n\t" \
61+
"nop\n\t" \
62+
"nop\n\t" \
63+
".option pop\n\t" \
64+
65+
#define RISCV_RUNTIME_CONST_64_ZBKB \
66+
".option push\n\t" \
67+
".option arch,+zbkb\n\t" \
68+
"pack %[__ret],%[__ret],%[__tmp]\n\t" \
69+
"nop\n\t" \
70+
"nop\n\t" \
71+
"nop\n\t" \
72+
".option pop\n\t" \
73+
74+
#define RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
75+
".option pop\n\t" \
76+
".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
77+
".long 1b - .\n\t" \
78+
".popsection" \
79+
80+
#if defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_RISCV_ISA_ZBKB)
81+
#define runtime_const_ptr(sym) \
82+
({ \
83+
typeof(sym) __ret, __tmp; \
84+
asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \
85+
ALTERNATIVE_2( \
86+
RISCV_RUNTIME_CONST_64_BASE, \
87+
RISCV_RUNTIME_CONST_64_ZBA, \
88+
0, RISCV_ISA_EXT_ZBA, 1, \
89+
RISCV_RUNTIME_CONST_64_ZBKB, \
90+
0, RISCV_ISA_EXT_ZBKB, 1 \
91+
) \
92+
RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
93+
: [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \
94+
__ret; \
95+
})
96+
#elif defined(CONFIG_RISCV_ISA_ZBA)
97+
#define runtime_const_ptr(sym) \
98+
({ \
99+
typeof(sym) __ret, __tmp; \
100+
asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \
101+
ALTERNATIVE( \
102+
RISCV_RUNTIME_CONST_64_BASE, \
103+
RISCV_RUNTIME_CONST_64_ZBA, \
104+
0, RISCV_ISA_EXT_ZBA, 1 \
105+
) \
106+
RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
107+
: [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \
108+
__ret; \
109+
})
110+
#elif defined(CONFIG_RISCV_ISA_ZBKB)
111+
#define runtime_const_ptr(sym) \
112+
({ \
113+
typeof(sym) __ret, __tmp; \
114+
asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \
115+
ALTERNATIVE( \
116+
RISCV_RUNTIME_CONST_64_BASE, \
117+
RISCV_RUNTIME_CONST_64_ZBKB, \
118+
0, RISCV_ISA_EXT_ZBKB, 1 \
119+
) \
120+
RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
121+
: [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \
122+
__ret; \
123+
})
124+
#else
125+
#define runtime_const_ptr(sym) \
126+
({ \
127+
typeof(sym) __ret, __tmp; \
128+
asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \
129+
RISCV_RUNTIME_CONST_64_BASE \
130+
RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
131+
: [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \
132+
__ret; \
133+
})
134+
#endif
135+
#endif
136+
137+
#define runtime_const_shift_right_32(val, sym) \
138+
({ \
139+
u32 __ret; \
140+
asm_inline(".option push\n\t" \
141+
".option norvc\n\t" \
142+
"1:\t" \
143+
SRLI " %[__ret],%[__val],12\n\t" \
144+
".option pop\n\t" \
145+
".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
146+
".long 1b - .\n\t" \
147+
".popsection" \
148+
: [__ret] "=r" (__ret) \
149+
: [__val] "r" (val)); \
150+
__ret; \
151+
})
152+
153+
#define runtime_const_init(type, sym) do { \
154+
extern s32 __start_runtime_##type##_##sym[]; \
155+
extern s32 __stop_runtime_##type##_##sym[]; \
156+
\
157+
runtime_const_fixup(__runtime_fixup_##type, \
158+
(unsigned long)(sym), \
159+
__start_runtime_##type##_##sym, \
160+
__stop_runtime_##type##_##sym); \
161+
} while (0)
162+
163+
static inline void __runtime_fixup_caches(void *where, unsigned int insns)
164+
{
165+
/* On riscv there are currently only cache-wide flushes so va is ignored. */
166+
__always_unused uintptr_t va = (uintptr_t)where;
167+
168+
flush_icache_range(va, va + 4 * insns);
169+
}
170+
171+
/*
172+
* The 32-bit immediate is stored in a lui+addi pairing.
173+
* lui holds the upper 20 bits of the immediate in the first 20 bits of the instruction.
174+
* addi holds the lower 12 bits of the immediate in the first 12 bits of the instruction.
175+
*/
176+
static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, unsigned int val)
177+
{
178+
unsigned int lower_immediate, upper_immediate;
179+
u32 lui_insn, addi_insn, addi_insn_mask;
180+
__le32 lui_res, addi_res;
181+
182+
/* Mask out upper 12 bit of addi */
183+
addi_insn_mask = 0x000fffff;
184+
185+
lui_insn = (u32)le16_to_cpu(lui_parcel[0]) | (u32)le16_to_cpu(lui_parcel[1]) << 16;
186+
addi_insn = (u32)le16_to_cpu(addi_parcel[0]) | (u32)le16_to_cpu(addi_parcel[1]) << 16;
187+
188+
lower_immediate = sign_extend32(val, 11);
189+
upper_immediate = (val - lower_immediate);
190+
191+
if (upper_immediate & 0xfffff000) {
192+
/* replace upper 20 bits of lui with upper immediate */
193+
lui_insn &= 0x00000fff;
194+
lui_insn |= upper_immediate & 0xfffff000;
195+
} else {
196+
/* replace lui with nop if immediate is small enough to fit in addi */
197+
lui_insn = RISCV_INSN_NOP4;
198+
/*
199+
* lui is being skipped, so do a load instead of an add. A load
200+
* is performed by adding with the x0 register. Setting rs to
201+
* zero with the following mask will accomplish this goal.
202+
*/
203+
addi_insn_mask &= 0x07fff;
204+
}
205+
206+
if (lower_immediate & 0x00000fff) {
207+
/* replace upper 12 bits of addi with lower 12 bits of val */
208+
addi_insn &= addi_insn_mask;
209+
addi_insn |= (lower_immediate & 0x00000fff) << 20;
210+
} else {
211+
/* replace addi with nop if lower_immediate is empty */
212+
addi_insn = RISCV_INSN_NOP4;
213+
}
214+
215+
addi_res = cpu_to_le32(addi_insn);
216+
lui_res = cpu_to_le32(lui_insn);
217+
mutex_lock(&text_mutex);
218+
patch_insn_write(addi_parcel, &addi_res, sizeof(addi_res));
219+
patch_insn_write(lui_parcel, &lui_res, sizeof(lui_res));
220+
mutex_unlock(&text_mutex);
221+
}
222+
223+
static inline void __runtime_fixup_ptr(void *where, unsigned long val)
224+
{
225+
#ifdef CONFIG_32BIT
226+
__runtime_fixup_32(where, where + 4, val);
227+
__runtime_fixup_caches(where, 2);
228+
#else
229+
__runtime_fixup_32(where, where + 8, val);
230+
__runtime_fixup_32(where + 4, where + 12, val >> 32);
231+
__runtime_fixup_caches(where, 4);
232+
#endif
233+
}
234+
235+
/*
236+
* Replace the least significant 5 bits of the srli/srliw immediate that is
237+
* located at bits 20-24
238+
*/
239+
static inline void __runtime_fixup_shift(void *where, unsigned long val)
240+
{
241+
__le16 *parcel = where;
242+
__le32 res;
243+
u32 insn;
244+
245+
insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16;
246+
247+
insn &= 0xfe0fffff;
248+
insn |= (val & 0b11111) << 20;
249+
250+
res = cpu_to_le32(insn);
251+
mutex_lock(&text_mutex);
252+
patch_text_nosync(where, &res, sizeof(insn));
253+
mutex_unlock(&text_mutex);
254+
}
255+
256+
static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
257+
unsigned long val, s32 *start, s32 *end)
258+
{
259+
while (start < end) {
260+
fn(*start + (void *)start, val);
261+
start++;
262+
}
263+
}
264+
265+
#endif /* _ASM_RISCV_RUNTIME_CONST_H */

arch/riscv/kernel/ftrace.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ static int ftrace_check_current_call(unsigned long hook_pos,
3636
unsigned int *expected)
3737
{
3838
unsigned int replaced[2];
39-
unsigned int nops[2] = {NOP4, NOP4};
39+
unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4};
4040

4141
/* we expect nops at the hook position */
4242
if (!expected)
@@ -68,7 +68,7 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
6868
bool enable, bool ra)
6969
{
7070
unsigned int call[2];
71-
unsigned int nops[2] = {NOP4, NOP4};
71+
unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4};
7272

7373
if (ra)
7474
make_call_ra(hook_pos, target, call);
@@ -97,7 +97,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
9797
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
9898
unsigned long addr)
9999
{
100-
unsigned int nops[2] = {NOP4, NOP4};
100+
unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4};
101101

102102
if (patch_insn_write((void *)rec->ip, nops, MCOUNT_INSN_SIZE))
103103
return -EPERM;

arch/riscv/kernel/jump_label.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
#include <asm/bug.h>
1212
#include <asm/cacheflush.h>
1313
#include <asm/text-patching.h>
14+
#include <asm/insn-def.h>
1415

15-
#define RISCV_INSN_NOP 0x00000013U
1616
#define RISCV_INSN_JAL 0x0000006fU
1717

1818
bool arch_jump_label_transform_queue(struct jump_entry *entry,
@@ -33,7 +33,7 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
3333
(((u32)offset & GENMASK(10, 1)) << (21 - 1)) |
3434
(((u32)offset & GENMASK(20, 20)) << (31 - 20));
3535
} else {
36-
insn = RISCV_INSN_NOP;
36+
insn = RISCV_INSN_NOP4;
3737
}
3838

3939
if (early_boot_irqs_disabled) {

arch/riscv/kernel/vmlinux.lds.S

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ SECTIONS
9797
{
9898
EXIT_DATA
9999
}
100+
101+
RUNTIME_CONST_VARIABLES
102+
100103
PERCPU_SECTION(L1_CACHE_BYTES)
101104

102105
.rel.dyn : {

0 commit comments

Comments
 (0)