Skip to content

Commit f7bd2be

Browse files
Alexandre Ghitipalmer-dabbelt
authored andcommitted
riscv: Implement arch_cmpxchg128() using Zacas
Now that Zacas is supported in the kernel, let's use the double word atomic version of amocas to improve the SLUB allocator. Note that we have to select fixed registers, otherwise gcc fails to pick even registers and then produces a reserved encoding which fails to assemble. Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> Reviewed-by: Andrew Jones <ajones@ventanamicro.com> Reviewed-by: Andrea Parri <parri.andrea@gmail.com> Link: https://lore.kernel.org/r/20241103145153.105097-8-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
1 parent 6116e22 commit f7bd2be

File tree

2 files changed

+39
-0
lines changed

2 files changed

+39
-0
lines changed

arch/riscv/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ config RISCV
115115
select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
116116
select HARDIRQS_SW_RESEND
117117
select HAS_IOPORT if MMU
118+
select HAVE_ALIGNED_STRUCT_PAGE
118119
select HAVE_ARCH_AUDITSYSCALL
119120
select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
120121
select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT

arch/riscv/include/asm/cmpxchg.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,44 @@
296296
arch_cmpxchg_release((ptr), (o), (n)); \
297297
})
298298

299+
#if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS)
300+
301+
#define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)
302+
303+
union __u128_halves {
304+
u128 full;
305+
struct {
306+
u64 low, high;
307+
};
308+
};
309+
310+
#define __arch_cmpxchg128(p, o, n, cas_sfx) \
311+
({ \
312+
__typeof__(*(p)) __o = (o); \
313+
union __u128_halves __hn = { .full = (n) }; \
314+
union __u128_halves __ho = { .full = (__o) }; \
315+
register unsigned long t1 asm ("t1") = __hn.low; \
316+
register unsigned long t2 asm ("t2") = __hn.high; \
317+
register unsigned long t3 asm ("t3") = __ho.low; \
318+
register unsigned long t4 asm ("t4") = __ho.high; \
319+
\
320+
__asm__ __volatile__ ( \
321+
" amocas.q" cas_sfx " %0, %z3, %2" \
322+
: "+&r" (t3), "+&r" (t4), "+A" (*(p)) \
323+
: "rJ" (t1), "rJ" (t2) \
324+
: "memory"); \
325+
\
326+
((u128)t4 << 64) | t3; \
327+
})
328+
329+
#define arch_cmpxchg128(ptr, o, n) \
330+
__arch_cmpxchg128((ptr), (o), (n), ".aqrl")
331+
332+
#define arch_cmpxchg128_local(ptr, o, n) \
333+
__arch_cmpxchg128((ptr), (o), (n), "")
334+
335+
#endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */
336+
299337
#ifdef CONFIG_RISCV_ISA_ZAWRS
300338
/*
301339
* Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to

0 commit comments

Comments
 (0)