Skip to content

Commit 0420af5

Browse files
Merge patch series "membarrier: riscv: Core serializing command"
RISC-V was lacking a membarrier implementation for the store/fetch ordering, which is a bit tricky because of the deferred icache flushing we use in RISC-V. * b4-shazam-merge: membarrier: riscv: Provide core serializing command locking: Introduce prepare_sync_core_cmd() membarrier: Create Documentation/scheduler/membarrier.rst membarrier: riscv: Add full memory barrier in switch_mm() Link: https://lore.kernel.org/r/20240131144936.29190-1-parri.andrea@gmail.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2 parents cb4ede9 + cd9b290 commit 0420af5

File tree

12 files changed

+185
-10
lines changed

12 files changed

+185
-10
lines changed

Documentation/features/sched/membarrier-sync-core/arch-support.txt

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,22 @@
1010
# Rely on implicit context synchronization as a result of exception return
1111
# when returning from IPI handler, and when returning to user-space.
1212
#
13+
# * riscv
14+
#
15+
# riscv uses xRET as return from interrupt and to return to user-space.
16+
#
17+
# Given that xRET is not core serializing, we rely on FENCE.I for providing
18+
# core serialization:
19+
#
20+
# - by calling sync_core_before_usermode() on return from interrupt (cf.
21+
# ipi_sync_core()),
22+
#
23+
# - via switch_mm() and sync_core_before_usermode() (respectively, for
24+
# uthread->uthread and kthread->uthread transitions) before returning
25+
# to user-space.
26+
#
27+
# The serialization in switch_mm() is activated by prepare_sync_core_cmd().
28+
#
1329
# * x86
1430
#
1531
# x86-32 uses IRET as return from interrupt, which takes care of the IPI.
@@ -43,7 +59,7 @@
4359
| openrisc: | TODO |
4460
| parisc: | TODO |
4561
| powerpc: | ok |
46-
| riscv: | TODO |
62+
| riscv: | ok |
4763
| s390: | ok |
4864
| sh: | TODO |
4965
| sparc: | TODO |

Documentation/scheduler/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Scheduler
77

88

99
completion
10+
membarrier
1011
sched-arch
1112
sched-bwc
1213
sched-deadline
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
.. SPDX-License-Identifier: GPL-2.0
2+
3+
========================
4+
membarrier() System Call
5+
========================
6+
7+
MEMBARRIER_CMD_{PRIVATE,GLOBAL}_EXPEDITED - Architecture requirements
8+
=====================================================================
9+
10+
Memory barriers before updating rq->curr
11+
----------------------------------------
12+
13+
The commands MEMBARRIER_CMD_PRIVATE_EXPEDITED and MEMBARRIER_CMD_GLOBAL_EXPEDITED
14+
require each architecture to have a full memory barrier after coming from
15+
user-space, before updating rq->curr. This barrier is implied by the sequence
16+
rq_lock(); smp_mb__after_spinlock() in __schedule(). The barrier matches a full
17+
barrier in the proximity of the membarrier system call exit, cf.
18+
membarrier_{private,global}_expedited().
19+
20+
Memory barriers after updating rq->curr
21+
---------------------------------------
22+
23+
The commands MEMBARRIER_CMD_PRIVATE_EXPEDITED and MEMBARRIER_CMD_GLOBAL_EXPEDITED
24+
require each architecture to have a full memory barrier after updating rq->curr,
25+
before returning to user-space. The schemes providing this barrier on the various
26+
architectures are as follows.
27+
28+
- alpha, arc, arm, hexagon, mips rely on the full barrier implied by
29+
spin_unlock() in finish_lock_switch().
30+
31+
- arm64 relies on the full barrier implied by switch_to().
32+
33+
- powerpc, riscv, s390, sparc, x86 rely on the full barrier implied by
34+
switch_mm(), if mm is not NULL; they rely on the full barrier implied
35+
by mmdrop(), otherwise. On powerpc and riscv, switch_mm() relies on
36+
membarrier_arch_switch_mm().
37+
38+
The barrier matches a full barrier in the proximity of the membarrier system call
39+
entry, cf. membarrier_{private,global}_expedited().

MAINTAINERS

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14039,7 +14039,9 @@ M: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
1403914039
M: "Paul E. McKenney" <paulmck@kernel.org>
1404014040
L: linux-kernel@vger.kernel.org
1404114041
S: Supported
14042-
F: arch/powerpc/include/asm/membarrier.h
14042+
F: Documentation/scheduler/membarrier.rst
14043+
F: arch/*/include/asm/membarrier.h
14044+
F: arch/*/include/asm/sync_core.h
1404314045
F: include/uapi/linux/membarrier.h
1404414046
F: kernel/sched/membarrier.c
1404514047

arch/riscv/Kconfig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,18 @@ config RISCV
2727
select ARCH_HAS_GCOV_PROFILE_ALL
2828
select ARCH_HAS_GIGANTIC_PAGE
2929
select ARCH_HAS_KCOV
30+
select ARCH_HAS_MEMBARRIER_CALLBACKS
31+
select ARCH_HAS_MEMBARRIER_SYNC_CORE
3032
select ARCH_HAS_MMIOWB
3133
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
3234
select ARCH_HAS_PMEM_API
35+
select ARCH_HAS_PREPARE_SYNC_CORE_CMD
3336
select ARCH_HAS_PTE_SPECIAL
3437
select ARCH_HAS_SET_DIRECT_MAP if MMU
3538
select ARCH_HAS_SET_MEMORY if MMU
3639
select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
3740
select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
41+
select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
3842
select ARCH_HAS_SYSCALL_WRAPPER
3943
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
4044
select ARCH_HAS_UBSAN_SANITIZE_ALL

arch/riscv/include/asm/membarrier.h

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
#ifndef _ASM_RISCV_MEMBARRIER_H
3+
#define _ASM_RISCV_MEMBARRIER_H
4+
5+
static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
6+
struct mm_struct *next,
7+
struct task_struct *tsk)
8+
{
9+
/*
10+
* Only need the full barrier when switching between processes.
11+
* Barrier when switching from kernel to userspace is not
12+
* required here, given that it is implied by mmdrop(). Barrier
13+
* when switching from userspace to kernel is not needed after
14+
* store to rq->curr.
15+
*/
16+
if (IS_ENABLED(CONFIG_SMP) &&
17+
likely(!(atomic_read(&next->membarrier_state) &
18+
(MEMBARRIER_STATE_PRIVATE_EXPEDITED |
19+
MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev))
20+
return;
21+
22+
/*
23+
* The membarrier system call requires a full memory barrier
24+
* after storing to rq->curr, before going back to user-space.
25+
*
26+
* This barrier is also needed for the SYNC_CORE command when
27+
* switching between processes; in particular, on a transition
28+
* from a thread belonging to another mm to a thread belonging
29+
* to the mm for which a membarrier SYNC_CORE is done on CPU0:
30+
*
31+
* - [CPU0] sets all bits in the mm icache_stale_mask (in
32+
* prepare_sync_core_cmd());
33+
*
34+
* - [CPU1] stores to rq->curr (by the scheduler);
35+
*
36+
* - [CPU0] loads rq->curr within membarrier and observes
37+
* cpu_rq(1)->curr->mm != mm, so the IPI is skipped on
38+
* CPU1; this means membarrier relies on switch_mm() to
39+
* issue the sync-core;
40+
*
41+
* - [CPU1] switch_mm() loads icache_stale_mask; if the bit
42+
* is zero, switch_mm() may incorrectly skip the sync-core.
43+
*
44+
* Matches a full barrier in the proximity of the membarrier
45+
* system call entry.
46+
*/
47+
smp_mb();
48+
}
49+
50+
#endif /* _ASM_RISCV_MEMBARRIER_H */

arch/riscv/include/asm/sync_core.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef _ASM_RISCV_SYNC_CORE_H
3+
#define _ASM_RISCV_SYNC_CORE_H
4+
5+
/*
6+
* RISC-V implements return to user-space through an xRET instruction,
7+
* which is not core serializing.
8+
*/
9+
static inline void sync_core_before_usermode(void)
10+
{
11+
asm volatile ("fence.i" ::: "memory");
12+
}
13+
14+
#ifdef CONFIG_SMP
15+
/*
16+
* Ensure the next switch_mm() on every CPU issues a core serializing
17+
* instruction for the given @mm.
18+
*/
19+
static inline void prepare_sync_core_cmd(struct mm_struct *mm)
20+
{
21+
cpumask_setall(&mm->context.icache_stale_mask);
22+
}
23+
#else
24+
static inline void prepare_sync_core_cmd(struct mm_struct *mm)
25+
{
26+
}
27+
#endif /* CONFIG_SMP */
28+
29+
#endif /* _ASM_RISCV_SYNC_CORE_H */

arch/riscv/mm/context.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,8 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
323323
if (unlikely(prev == next))
324324
return;
325325

326+
membarrier_arch_switch_mm(prev, next, task);
327+
326328
/*
327329
* Mark the current MM context as inactive, and the next as
328330
* active. This is at least used by the icache flushing

include/linux/sync_core.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,19 @@ static inline void sync_core_before_usermode(void)
1717
}
1818
#endif
1919

20-
#endif /* _LINUX_SYNC_CORE_H */
20+
#ifdef CONFIG_ARCH_HAS_PREPARE_SYNC_CORE_CMD
21+
#include <asm/sync_core.h>
22+
#else
23+
/*
24+
* This is a dummy prepare_sync_core_cmd() implementation that can be used on
25+
* all architectures which provide unconditional core serializing instructions
26+
* in switch_mm().
27+
* If your architecture doesn't provide such core serializing instructions in
28+
* switch_mm(), you may need to write your own functions.
29+
*/
30+
static inline void prepare_sync_core_cmd(struct mm_struct *mm)
31+
{
32+
}
33+
#endif
2134

35+
#endif /* _LINUX_SYNC_CORE_H */

init/Kconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1970,6 +1970,9 @@ source "kernel/Kconfig.locks"
19701970
config ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
19711971
bool
19721972

1973+
config ARCH_HAS_PREPARE_SYNC_CORE_CMD
1974+
bool
1975+
19731976
config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
19741977
bool
19751978

0 commit comments

Comments
 (0)