Skip to content

Commit 97ddab7

Browse files
Alexandre Ghitipalmer-dabbelt
authored andcommitted
riscv: Implement xchg8/16() using Zabha
This adds runtime support for Zabha in xchg8/16() operations. Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> Reviewed-by: Andrew Jones <ajones@ventanamicro.com> Reviewed-by: Andrea Parri <parri.andrea@gmail.com> Link: https://lore.kernel.org/r/20241103145153.105097-9-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
1 parent f7bd2be commit 97ddab7

File tree

1 file changed

+41
-24
lines changed

1 file changed

+41
-24
lines changed

arch/riscv/include/asm/cmpxchg.h

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,29 +14,41 @@
1414
#include <asm/insn-def.h>
1515
#include <asm/cpufeature-macros.h>
1616

17-
#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \
18-
({ \
19-
u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
20-
ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
21-
ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
22-
<< __s; \
23-
ulong __newx = (ulong)(n) << __s; \
24-
ulong __retx; \
25-
ulong __rc; \
26-
\
27-
__asm__ __volatile__ ( \
28-
prepend \
29-
"0: lr.w %0, %2\n" \
30-
" and %1, %0, %z4\n" \
31-
" or %1, %1, %z3\n" \
32-
" sc.w" sc_sfx " %1, %1, %2\n" \
33-
" bnez %1, 0b\n" \
34-
append \
35-
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
36-
: "rJ" (__newx), "rJ" (~__mask) \
37-
: "memory"); \
38-
\
39-
r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
17+
#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \
18+
swap_append, r, p, n) \
19+
({ \
20+
if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
21+
riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \
22+
__asm__ __volatile__ ( \
23+
prepend \
24+
" amoswap" swap_sfx " %0, %z2, %1\n" \
25+
swap_append \
26+
: "=&r" (r), "+A" (*(p)) \
27+
: "rJ" (n) \
28+
: "memory"); \
29+
} else { \
30+
u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
31+
ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
32+
ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
33+
<< __s; \
34+
ulong __newx = (ulong)(n) << __s; \
35+
ulong __retx; \
36+
ulong __rc; \
37+
\
38+
__asm__ __volatile__ ( \
39+
prepend \
40+
"0: lr.w %0, %2\n" \
41+
" and %1, %0, %z4\n" \
42+
" or %1, %1, %z3\n" \
43+
" sc.w" sc_sfx " %1, %1, %2\n" \
44+
" bnez %1, 0b\n" \
45+
sc_append \
46+
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
47+
: "rJ" (__newx), "rJ" (~__mask) \
48+
: "memory"); \
49+
\
50+
r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
51+
} \
4052
})
4153

4254
#define __arch_xchg(sfx, prepend, append, r, p, n) \
@@ -59,8 +71,13 @@
5971
\
6072
switch (sizeof(*__ptr)) { \
6173
case 1: \
74+
__arch_xchg_masked(sc_sfx, ".b" swap_sfx, \
75+
prepend, sc_append, swap_append, \
76+
__ret, __ptr, __new); \
77+
break; \
6278
case 2: \
63-
__arch_xchg_masked(sc_sfx, prepend, sc_append, \
79+
__arch_xchg_masked(sc_sfx, ".h" swap_sfx, \
80+
prepend, sc_append, swap_append, \
6481
__ret, __ptr, __new); \
6582
break; \
6683
case 4: \

0 commit comments

Comments
 (0)