Skip to content

Commit 836ed3c

Browse files
kristina-martsenkoctmarinas
authored andcommitted
arm64: lib: Use MOPS for memcpy() routines
Make memcpy(), memmove() and memset() use the Armv8.8 FEAT_MOPS instructions when implemented on the CPU. The CPY*/SET* instructions copy or set a block of memory of arbitrary size and alignment. They can be interrupted by the CPU and the copying resumed later. Their performance is expected to be close to the best generic copy/set sequence of loads/stores for a given CPU. Using them in the kernel's copy/set routines therefore avoids the need to periodically rewrite the routines to optimize for new microarchitectures. It could also lead to a performance improvement for some CPUs and systems. With this change the kernel will always use the instructions if they are implemented on the CPU (and have not been disabled by the arm64.nomops command line parameter). When not implemented the usual routines will be used (patched via alternatives). Note, we need to patch B/NOP instead of the whole sequence to avoid executing a partially patched sequence in case the compiler generates a mem*() call inside the alternatives patching code. Note that MOPS instructions have relaxed behavior on Device memory, but it is expected that these routines are not generally used on MMIO. Note: For memcpy(), this uses the CPY* instructions instead of CPYF*, as CPY* allows overlaps between the source and destination buffers, and despite contradicting the C standard, compilers require that memcpy() work on exactly overlapping source and destination: https://gcc.gnu.org/onlinedocs/gcc/Standards.html#C-Language https://reviews.llvm.org/D86993 Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com> Link: https://lore.kernel.org/r/20240930161051.3777828-5-kristina.martsenko@arm.com Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
1 parent b616058 commit 836ed3c

File tree

3 files changed

+40
-2
lines changed

3 files changed

+40
-2
lines changed

arch/arm64/Kconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2155,6 +2155,9 @@ config ARM64_EPAN
21552155
if the cpu does not implement the feature.
21562156
endmenu # "ARMv8.7 architectural features"
21572157

2158+
config AS_HAS_MOPS
2159+
def_bool $(as-instr,.arch_extension mops)
2160+
21582161
menu "ARMv8.9 architectural features"
21592162

21602163
config ARM64_POE

arch/arm64/lib/memcpy.S

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
The loop tail is handled by always copying 64 bytes from the end.
5858
*/
5959

60-
SYM_FUNC_START(__pi_memcpy)
60+
SYM_FUNC_START_LOCAL(__pi_memcpy_generic)
6161
add srcend, src, count
6262
add dstend, dstin, count
6363
cmp count, 128
@@ -238,7 +238,24 @@ L(copy64_from_start):
238238
stp B_l, B_h, [dstin, 16]
239239
stp C_l, C_h, [dstin]
240240
ret
241+
SYM_FUNC_END(__pi_memcpy_generic)
242+
243+
#ifdef CONFIG_AS_HAS_MOPS
244+
.arch_extension mops
245+
SYM_FUNC_START(__pi_memcpy)
246+
alternative_if_not ARM64_HAS_MOPS
247+
b __pi_memcpy_generic
248+
alternative_else_nop_endif
249+
250+
mov dst, dstin
251+
cpyp [dst]!, [src]!, count!
252+
cpym [dst]!, [src]!, count!
253+
cpye [dst]!, [src]!, count!
254+
ret
241255
SYM_FUNC_END(__pi_memcpy)
256+
#else
257+
SYM_FUNC_ALIAS(__pi_memcpy, __pi_memcpy_generic)
258+
#endif
242259

243260
SYM_FUNC_ALIAS(__memcpy, __pi_memcpy)
244261
EXPORT_SYMBOL(__memcpy)

arch/arm64/lib/memset.S

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
*/
2727

2828
dstin .req x0
29+
val_x .req x1
2930
val .req w1
3031
count .req x2
3132
tmp1 .req x3
@@ -42,7 +43,7 @@ dst .req x8
4243
tmp3w .req w9
4344
tmp3 .req x9
4445

45-
SYM_FUNC_START(__pi_memset)
46+
SYM_FUNC_START_LOCAL(__pi_memset_generic)
4647
mov dst, dstin /* Preserve return value. */
4748
and A_lw, val, #255
4849
orr A_lw, A_lw, A_lw, lsl #8
@@ -201,7 +202,24 @@ SYM_FUNC_START(__pi_memset)
201202
ands count, count, zva_bits_x
202203
b.ne .Ltail_maybe_long
203204
ret
205+
SYM_FUNC_END(__pi_memset_generic)
206+
207+
#ifdef CONFIG_AS_HAS_MOPS
208+
.arch_extension mops
209+
SYM_FUNC_START(__pi_memset)
210+
alternative_if_not ARM64_HAS_MOPS
211+
b __pi_memset_generic
212+
alternative_else_nop_endif
213+
214+
mov dst, dstin
215+
setp [dst]!, count!, val_x
216+
setm [dst]!, count!, val_x
217+
sete [dst]!, count!, val_x
218+
ret
204219
SYM_FUNC_END(__pi_memset)
220+
#else
221+
SYM_FUNC_ALIAS(__pi_memset, __pi_memset_generic)
222+
#endif
205223

206224
SYM_FUNC_ALIAS(__memset, __pi_memset)
207225
EXPORT_SYMBOL(__memset)

0 commit comments

Comments
 (0)