Skip to content

Commit ce6b5ff

Browse files
kristina-martsenkoctmarinas
authored andcommitted
arm64: lib: Use MOPS for copy_page() and clear_page()
Similarly to what was done to the memcpy() routines, make copy_page() and clear_page() also use the Armv8.8 FEAT_MOPS instructions. Note: For copy_page() this uses the CPY* instructions instead of CPYF* as CPYF* doesn't allow src and dst to be equal. It's not clear if copy_page() needs to allow equal src and dst but it has worked so far with the current implementation and there is no documentation forbidding it. Note, the unoptimized version of copy_page() in assembler.h is left as it is. Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com> Link: https://lore.kernel.org/r/20240930161051.3777828-6-kristina.martsenko@arm.com Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
1 parent 836ed3c commit ce6b5ff

File tree

2 files changed

+26
-0
lines changed

2 files changed

+26
-0
lines changed

arch/arm64/lib/clear_page.S

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,19 @@
1515
* x0 - dest
1616
*/
1717
SYM_FUNC_START(__pi_clear_page)
18+
#ifdef CONFIG_AS_HAS_MOPS
19+
.arch_extension mops
20+
alternative_if_not ARM64_HAS_MOPS
21+
b .Lno_mops
22+
alternative_else_nop_endif
23+
24+
mov x1, #PAGE_SIZE
25+
setpn [x0]!, x1!, xzr
26+
setmn [x0]!, x1!, xzr
27+
seten [x0]!, x1!, xzr
28+
ret
29+
.Lno_mops:
30+
#endif
1831
mrs x1, dczid_el0
1932
tbnz x1, #4, 2f /* Branch if DC ZVA is prohibited */
2033
and w1, w1, #0xf

arch/arm64/lib/copy_page.S

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,19 @@
1818
* x1 - src
1919
*/
2020
SYM_FUNC_START(__pi_copy_page)
21+
#ifdef CONFIG_AS_HAS_MOPS
22+
.arch_extension mops
23+
alternative_if_not ARM64_HAS_MOPS
24+
b .Lno_mops
25+
alternative_else_nop_endif
26+
27+
mov x2, #PAGE_SIZE
28+
cpypwn [x0]!, [x1]!, x2!
29+
cpymwn [x0]!, [x1]!, x2!
30+
cpyewn [x0]!, [x1]!, x2!
31+
ret
32+
.Lno_mops:
33+
#endif
2134
ldp x2, x3, [x1]
2235
ldp x4, x5, [x1, #16]
2336
ldp x6, x7, [x1, #32]

0 commit comments

Comments
 (0)