Skip to content

Commit 3f02039

Browse files
committed
Merge tag 'sched-core-2024-11-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "Core facilities: - Add the "Lazy preemption" model (CONFIG_PREEMPT_LAZY=y), which optimizes fair-class preemption by delaying preemption requests to the tick boundary, while working as full preemption for RR/FIFO/DEADLINE classes. (Peter Zijlstra) - x86: Enable Lazy preemption (Peter Zijlstra) - riscv: Enable Lazy preemption (Jisheng Zhang) - Initialize idle tasks only once (Thomas Gleixner) - sched/ext: Remove sched_fork() hack (Thomas Gleixner) Fair scheduler: - Optimize the PLACE_LAG when se->vlag is zero (Huang Shijie) Idle loop: - Optimize the generic idle loop by removing unnecessary memory barrier (Zhongqiu Han) RSEQ: - Improve cache locality of RSEQ concurrency IDs for intermittent workloads (Mathieu Desnoyers) Waitqueues: - Make wake_up_{bit,var} less fragile (Neil Brown) PSI: - Pass enqueue/dequeue flags to psi callbacks directly (Johannes Weiner) Preparatory patches for proxy execution: - Add move_queued_task_locked helper (Connor O'Brien) - Consolidate pick_*_task to task_is_pushable helper (Connor O'Brien) - Split out __schedule() deactivate task logic into a helper (John Stultz) - Split scheduler and execution contexts (Peter Zijlstra) - Make mutex::wait_lock irq safe (Juri Lelli) - Expose __mutex_owner() (Juri Lelli) - Remove wakeups from under mutex::wait_lock (Peter Zijlstra) Misc fixes and cleanups: - Remove unused __HAVE_THREAD_FUNCTIONS hook support (David Disseldorp) - Update the comment for TIF_NEED_RESCHED_LAZY (Sebastian Andrzej Siewior) - Remove unused bit_wait_io_timeout (Dr. David Alan Gilbert) - remove the DOUBLE_TICK feature (Huang Shijie) - fix the comment for PREEMPT_SHORT (Huang Shijie) - Fix unnused variable warning (Christian Loehle) - No PREEMPT_RT=y for all{yes,mod}config" * tag 'sched-core-2024-11-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits) sched, x86: Update the comment for TIF_NEED_RESCHED_LAZY. sched: No PREEMPT_RT=y for all{yes,mod}config riscv: add PREEMPT_LAZY support sched, x86: Enable Lazy preemption sched: Enable PREEMPT_DYNAMIC for PREEMPT_RT sched: Add Lazy preemption model sched: Add TIF_NEED_RESCHED_LAZY infrastructure sched/ext: Remove sched_fork() hack sched: Initialize idle tasks only once sched: psi: pass enqueue/dequeue flags to psi callbacks directly sched/uclamp: Fix unnused variable warning sched: Split scheduler and execution contexts sched: Split out __schedule() deactivate task logic into a helper sched: Consolidate pick_*_task to task_is_pushable helper sched: Add move_queued_task_locked helper locking/mutex: Expose __mutex_owner() locking/mutex: Make mutex::wait_lock irq safe locking/mutex: Remove wakeups from under mutex::wait_lock sched: Improve cache locality of RSEQ concurrency IDs for intermittent workloads sched: idle: Optimize the generic idle loop by removing needless memory barrier ...
2 parents f41dac3 + 771d271 commit 3f02039

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+1105
-504
lines changed

arch/riscv/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ config RISCV
3939
select ARCH_HAS_MMIOWB
4040
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
4141
select ARCH_HAS_PMEM_API
42+
select ARCH_HAS_PREEMPT_LAZY
4243
select ARCH_HAS_PREPARE_SYNC_CORE_CMD
4344
select ARCH_HAS_PTE_DEVMAP if 64BIT && MMU
4445
select ARCH_HAS_PTE_SPECIAL

arch/riscv/include/asm/thread_info.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,19 +107,21 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
107107
* - pending work-to-be-done flags are in lowest half-word
108108
* - other flags in upper half-word(s)
109109
*/
110-
#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
111-
#define TIF_SIGPENDING 2 /* signal pending */
112-
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
110+
#define TIF_NEED_RESCHED 0 /* rescheduling necessary */
111+
#define TIF_NEED_RESCHED_LAZY 1 /* Lazy rescheduling needed */
112+
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
113+
#define TIF_SIGPENDING 3 /* signal pending */
113114
#define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */
114115
#define TIF_MEMDIE 5 /* is terminating due to OOM killer */
115116
#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */
116117
#define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */
117118
#define TIF_32BIT 11 /* compat-mode 32bit process */
118119
#define TIF_RISCV_V_DEFER_RESTORE 12 /* restore Vector before returing to user */
119120

121+
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
122+
#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
120123
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
121124
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
122-
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
123125
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
124126
#define _TIF_UPROBE (1 << TIF_UPROBE)
125127
#define _TIF_RISCV_V_DEFER_RESTORE (1 << TIF_RISCV_V_DEFER_RESTORE)

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ config X86
9393
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
9494
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
9595
select ARCH_HAS_PMEM_API if X86_64
96+
select ARCH_HAS_PREEMPT_LAZY
9697
select ARCH_HAS_PTE_DEVMAP if X86_64
9798
select ARCH_HAS_PTE_SPECIAL
9899
select ARCH_HAS_HW_PTE_YOUNG

arch/x86/include/asm/thread_info.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,9 @@ struct thread_info {
8787
#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
8888
#define TIF_SIGPENDING 2 /* signal pending */
8989
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
90-
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
91-
#define TIF_SSBD 5 /* Speculative store bypass disable */
90+
#define TIF_NEED_RESCHED_LAZY 4 /* Lazy rescheduling needed */
91+
#define TIF_SINGLESTEP 5 /* reenable singlestep on user return*/
92+
#define TIF_SSBD 6 /* Speculative store bypass disable */
9293
#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
9394
#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */
9495
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
@@ -110,6 +111,7 @@ struct thread_info {
110111
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
111112
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
112113
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
114+
#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
113115
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
114116
#define _TIF_SSBD (1 << TIF_SSBD)
115117
#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)

fs/exec.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -990,7 +990,7 @@ static int exec_mmap(struct mm_struct *mm)
990990
active_mm = tsk->active_mm;
991991
tsk->active_mm = mm;
992992
tsk->mm = mm;
993-
mm_init_cid(mm);
993+
mm_init_cid(mm, tsk);
994994
/*
995995
* This prevents preemption while active_mm is being loaded and
996996
* it and mm are being updated, which could cause problems for

include/linux/entry-common.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@
6464

6565
#define EXIT_TO_USER_MODE_WORK \
6666
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
67-
_TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
67+
_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
68+
_TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
6869
ARCH_EXIT_TO_USER_MODE_WORK)
6970

7071
/**

include/linux/entry-kvm.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@
1717
#endif
1818

1919
#define XFER_TO_GUEST_MODE_WORK \
20-
(_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \
21-
_TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK)
20+
(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_SIGPENDING | \
21+
_TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME | \
22+
ARCH_XFER_TO_GUEST_MODE_WORK)
2223

2324
struct kvm_vcpu;
2425

include/linux/mm_types.h

Lines changed: 63 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,7 @@ struct vm_area_struct {
782782
struct mm_cid {
783783
u64 time;
784784
int cid;
785+
int recent_cid;
785786
};
786787
#endif
787788

@@ -852,6 +853,27 @@ struct mm_struct {
852853
* When the next mm_cid scan is due (in jiffies).
853854
*/
854855
unsigned long mm_cid_next_scan;
856+
/**
857+
* @nr_cpus_allowed: Number of CPUs allowed for mm.
858+
*
859+
* Number of CPUs allowed in the union of all mm's
860+
* threads allowed CPUs.
861+
*/
862+
unsigned int nr_cpus_allowed;
863+
/**
864+
* @max_nr_cid: Maximum number of concurrency IDs allocated.
865+
*
866+
* Track the highest number of concurrency IDs allocated for the
867+
* mm.
868+
*/
869+
atomic_t max_nr_cid;
870+
/**
871+
* @cpus_allowed_lock: Lock protecting mm cpus_allowed.
872+
*
873+
* Provide mutual exclusion for mm cpus_allowed and
874+
* mm nr_cpus_allowed updates.
875+
*/
876+
raw_spinlock_t cpus_allowed_lock;
855877
#endif
856878
#ifdef CONFIG_MMU
857879
atomic_long_t pgtables_bytes; /* size of all page tables */
@@ -1170,36 +1192,53 @@ static inline int mm_cid_clear_lazy_put(int cid)
11701192
return cid & ~MM_CID_LAZY_PUT;
11711193
}
11721194

1195+
/*
1196+
* mm_cpus_allowed: Union of all mm's threads allowed CPUs.
1197+
*/
1198+
static inline cpumask_t *mm_cpus_allowed(struct mm_struct *mm)
1199+
{
1200+
unsigned long bitmap = (unsigned long)mm;
1201+
1202+
bitmap += offsetof(struct mm_struct, cpu_bitmap);
1203+
/* Skip cpu_bitmap */
1204+
bitmap += cpumask_size();
1205+
return (struct cpumask *)bitmap;
1206+
}
1207+
11731208
/* Accessor for struct mm_struct's cidmask. */
11741209
static inline cpumask_t *mm_cidmask(struct mm_struct *mm)
11751210
{
1176-
unsigned long cid_bitmap = (unsigned long)mm;
1211+
unsigned long cid_bitmap = (unsigned long)mm_cpus_allowed(mm);
11771212

1178-
cid_bitmap += offsetof(struct mm_struct, cpu_bitmap);
1179-
/* Skip cpu_bitmap */
1213+
/* Skip mm_cpus_allowed */
11801214
cid_bitmap += cpumask_size();
11811215
return (struct cpumask *)cid_bitmap;
11821216
}
11831217

1184-
static inline void mm_init_cid(struct mm_struct *mm)
1218+
static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p)
11851219
{
11861220
int i;
11871221

11881222
for_each_possible_cpu(i) {
11891223
struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, i);
11901224

11911225
pcpu_cid->cid = MM_CID_UNSET;
1226+
pcpu_cid->recent_cid = MM_CID_UNSET;
11921227
pcpu_cid->time = 0;
11931228
}
1229+
mm->nr_cpus_allowed = p->nr_cpus_allowed;
1230+
atomic_set(&mm->max_nr_cid, 0);
1231+
raw_spin_lock_init(&mm->cpus_allowed_lock);
1232+
cpumask_copy(mm_cpus_allowed(mm), &p->cpus_mask);
11941233
cpumask_clear(mm_cidmask(mm));
11951234
}
11961235

1197-
static inline int mm_alloc_cid_noprof(struct mm_struct *mm)
1236+
static inline int mm_alloc_cid_noprof(struct mm_struct *mm, struct task_struct *p)
11981237
{
11991238
mm->pcpu_cid = alloc_percpu_noprof(struct mm_cid);
12001239
if (!mm->pcpu_cid)
12011240
return -ENOMEM;
1202-
mm_init_cid(mm);
1241+
mm_init_cid(mm, p);
12031242
return 0;
12041243
}
12051244
#define mm_alloc_cid(...) alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__))
@@ -1212,16 +1251,31 @@ static inline void mm_destroy_cid(struct mm_struct *mm)
12121251

12131252
static inline unsigned int mm_cid_size(void)
12141253
{
1215-
return cpumask_size();
1254+
return 2 * cpumask_size(); /* mm_cpus_allowed(), mm_cidmask(). */
1255+
}
1256+
1257+
static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask)
1258+
{
1259+
struct cpumask *mm_allowed = mm_cpus_allowed(mm);
1260+
1261+
if (!mm)
1262+
return;
1263+
/* The mm_cpus_allowed is the union of each thread allowed CPUs masks. */
1264+
raw_spin_lock(&mm->cpus_allowed_lock);
1265+
cpumask_or(mm_allowed, mm_allowed, cpumask);
1266+
WRITE_ONCE(mm->nr_cpus_allowed, cpumask_weight(mm_allowed));
1267+
raw_spin_unlock(&mm->cpus_allowed_lock);
12161268
}
12171269
#else /* CONFIG_SCHED_MM_CID */
1218-
static inline void mm_init_cid(struct mm_struct *mm) { }
1219-
static inline int mm_alloc_cid(struct mm_struct *mm) { return 0; }
1270+
static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { }
1271+
static inline int mm_alloc_cid(struct mm_struct *mm, struct task_struct *p) { return 0; }
12201272
static inline void mm_destroy_cid(struct mm_struct *mm) { }
1273+
12211274
static inline unsigned int mm_cid_size(void)
12221275
{
12231276
return 0;
12241277
}
1278+
static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask) { }
12251279
#endif /* CONFIG_SCHED_MM_CID */
12261280

12271281
struct mmu_gather;

include/linux/preempt.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,7 @@ DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
486486
extern bool preempt_model_none(void);
487487
extern bool preempt_model_voluntary(void);
488488
extern bool preempt_model_full(void);
489+
extern bool preempt_model_lazy(void);
489490

490491
#else
491492

@@ -502,6 +503,11 @@ static inline bool preempt_model_full(void)
502503
return IS_ENABLED(CONFIG_PREEMPT);
503504
}
504505

506+
static inline bool preempt_model_lazy(void)
507+
{
508+
return IS_ENABLED(CONFIG_PREEMPT_LAZY);
509+
}
510+
505511
#endif
506512

507513
static inline bool preempt_model_rt(void)
@@ -519,7 +525,7 @@ static inline bool preempt_model_rt(void)
519525
*/
520526
static inline bool preempt_model_preemptible(void)
521527
{
522-
return preempt_model_full() || preempt_model_rt();
528+
return preempt_model_full() || preempt_model_lazy() || preempt_model_rt();
523529
}
524530

525531
#endif /* __LINUX_PREEMPT_H */

include/linux/sched.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1898,7 +1898,7 @@ extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)];
18981898

18991899
#ifdef CONFIG_THREAD_INFO_IN_TASK
19001900
# define task_thread_info(task) (&(task)->thread_info)
1901-
#elif !defined(__HAVE_THREAD_FUNCTIONS)
1901+
#else
19021902
# define task_thread_info(task) ((struct thread_info *)(task)->stack)
19031903
#endif
19041904

@@ -2002,7 +2002,8 @@ static inline void set_tsk_need_resched(struct task_struct *tsk)
20022002

20032003
static inline void clear_tsk_need_resched(struct task_struct *tsk)
20042004
{
2005-
clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
2005+
atomic_long_andnot(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY,
2006+
(atomic_long_t *)&task_thread_info(tsk)->flags);
20062007
}
20072008

20082009
static inline int test_tsk_need_resched(struct task_struct *tsk)

0 commit comments

Comments
 (0)