Skip to content

Commit e7db276

Browse files
Yang Shigregkh
authored andcommitted
mm: page_ref: remove folio_try_get_rcu()
commit fa2690a upstream. The below bug was reported on a non-SMP kernel: [ 275.267158][ T4335] ------------[ cut here ]------------ [ 275.267949][ T4335] kernel BUG at include/linux/page_ref.h:275! [ 275.268526][ T4335] invalid opcode: 0000 [raspberrypi#1] KASAN PTI [ 275.269001][ T4335] CPU: 0 PID: 4335 Comm: trinity-c3 Not tainted 6.7.0-rc4-00061-gefa7df3e3bb5 raspberrypi#1 [ 275.269787][ T4335] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 [ 275.270679][ T4335] RIP: 0010:try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.272813][ T4335] RSP: 0018:ffffc90005dcf650 EFLAGS: 00010202 [ 275.273346][ T4335] RAX: 0000000000000246 RBX: ffffea00066e0000 RCX: 0000000000000000 [ 275.274032][ T4335] RDX: fffff94000cdc007 RSI: 0000000000000004 RDI: ffffea00066e0034 [ 275.274719][ T4335] RBP: ffffea00066e0000 R08: 0000000000000000 R09: fffff94000cdc006 [ 275.275404][ T4335] R10: ffffea00066e0037 R11: 0000000000000000 R12: 0000000000000136 [ 275.276106][ T4335] R13: ffffea00066e0034 R14: dffffc0000000000 R15: ffffea00066e0008 [ 275.276790][ T4335] FS: 00007fa2f9b61740(0000) GS:ffffffff89d0d000(0000) knlGS:0000000000000000 [ 275.277570][ T4335] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 275.278143][ T4335] CR2: 00007fa2f6c00000 CR3: 0000000134b04000 CR4: 00000000000406f0 [ 275.278833][ T4335] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 275.279521][ T4335] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 275.280201][ T4335] Call Trace: [ 275.280499][ T4335] <TASK> [ 275.280751][ T4335] ? die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434 arch/x86/kernel/dumpstack.c:447) [ 275.281087][ T4335] ? do_trap (arch/x86/kernel/traps.c:112 arch/x86/kernel/traps.c:153) [ 275.281463][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.281884][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.282300][ T4335] ? do_error_trap (arch/x86/kernel/traps.c:174) [ 275.282711][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.283129][ T4335] ? handle_invalid_op (arch/x86/kernel/traps.c:212) [ 275.283561][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.283990][ T4335] ? exc_invalid_op (arch/x86/kernel/traps.c:264) [ 275.284415][ T4335] ? asm_exc_invalid_op (arch/x86/include/asm/idtentry.h:568) [ 275.284859][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.285278][ T4335] try_grab_folio (mm/gup.c:148) [ 275.285684][ T4335] __get_user_pages (mm/gup.c:1297 (discriminator 1)) [ 275.286111][ T4335] ? __pfx___get_user_pages (mm/gup.c:1188) [ 275.286579][ T4335] ? __pfx_validate_chain (kernel/locking/lockdep.c:3825) [ 275.287034][ T4335] ? mark_lock (kernel/locking/lockdep.c:4656 (discriminator 1)) [ 275.287416][ T4335] __gup_longterm_locked (mm/gup.c:1509 mm/gup.c:2209) [ 275.288192][ T4335] ? __pfx___gup_longterm_locked (mm/gup.c:2204) [ 275.288697][ T4335] ? __pfx_lock_acquire (kernel/locking/lockdep.c:5722) [ 275.289135][ T4335] ? __pfx___might_resched (kernel/sched/core.c:10106) [ 275.289595][ T4335] pin_user_pages_remote (mm/gup.c:3350) [ 275.290041][ T4335] ? __pfx_pin_user_pages_remote (mm/gup.c:3350) [ 275.290545][ T4335] ? find_held_lock (kernel/locking/lockdep.c:5244 (discriminator 1)) [ 275.290961][ T4335] ? mm_access (kernel/fork.c:1573) [ 275.291353][ T4335] process_vm_rw_single_vec+0x142/0x360 [ 275.291900][ T4335] ? __pfx_process_vm_rw_single_vec+0x10/0x10 [ 275.292471][ T4335] ? mm_access (kernel/fork.c:1573) [ 275.292859][ T4335] process_vm_rw_core+0x272/0x4e0 [ 275.293384][ T4335] ? hlock_class (arch/x86/include/asm/bitops.h:227 arch/x86/include/asm/bitops.h:239 include/asm-generic/bitops/instrumented-non-atomic.h:142 kernel/locking/lockdep.c:228) [ 275.293780][ T4335] ? __pfx_process_vm_rw_core+0x10/0x10 [ 275.294350][ T4335] process_vm_rw (mm/process_vm_access.c:284) [ 275.294748][ T4335] ? __pfx_process_vm_rw (mm/process_vm_access.c:259) [ 275.295197][ T4335] ? __task_pid_nr_ns (include/linux/rcupdate.h:306 (discriminator 1) include/linux/rcupdate.h:780 (discriminator 1) kernel/pid.c:504 (discriminator 1)) [ 275.295634][ T4335] __x64_sys_process_vm_readv (mm/process_vm_access.c:291) [ 275.296139][ T4335] ? syscall_enter_from_user_mode (kernel/entry/common.c:94 kernel/entry/common.c:112) [ 275.296642][ T4335] do_syscall_64 (arch/x86/entry/common.c:51 (discriminator 1) arch/x86/entry/common.c:82 (discriminator 1)) [ 275.297032][ T4335] ? __task_pid_nr_ns (include/linux/rcupdate.h:306 (discriminator 1) include/linux/rcupdate.h:780 (discriminator 1) kernel/pid.c:504 (discriminator 1)) [ 275.297470][ T4335] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4300 kernel/locking/lockdep.c:4359) [ 275.297988][ T4335] ? do_syscall_64 (arch/x86/include/asm/cpufeature.h:171 arch/x86/entry/common.c:97) [ 275.298389][ T4335] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4300 kernel/locking/lockdep.c:4359) [ 275.298906][ T4335] ? do_syscall_64 (arch/x86/include/asm/cpufeature.h:171 arch/x86/entry/common.c:97) [ 275.299304][ T4335] ? do_syscall_64 (arch/x86/include/asm/cpufeature.h:171 arch/x86/entry/common.c:97) [ 275.299703][ T4335] ? do_syscall_64 (arch/x86/include/asm/cpufeature.h:171 arch/x86/entry/common.c:97) [ 275.300115][ T4335] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) This BUG is the VM_BUG_ON(!in_atomic() && !irqs_disabled()) assertion in folio_ref_try_add_rcu() for non-SMP kernel. The process_vm_readv() calls GUP to pin the THP. An optimization for pinning THP instroduced by commit 57edfcf ("mm/gup: accelerate thp gup even for "pages != NULL"") calls try_grab_folio() to pin the THP, but try_grab_folio() is supposed to be called in atomic context for non-SMP kernel, for example, irq disabled or preemption disabled, due to the optimization introduced by commit e286781 ("mm: speculative page references"). The commit efa7df3 ("mm: align larger anonymous mappings on THP boundaries") is not actually the root cause although it was bisected to. It just makes the problem exposed more likely. The follow up discussion suggested the optimization for non-SMP kernel may be out-dated and not worth it anymore [1]. So removing the optimization to silence the BUG. However calling try_grab_folio() in GUP slow path actually is unnecessary, so the following patch will clean this up. [1] https://lore.kernel.org/linux-mm/821cf1d6-92b9-4ac4-bacc-d8f2364ac14f@paulmck-laptop/ Link: https://lkml.kernel.org/r/20240625205350.1777481-1-yang@os.amperecomputing.com Fixes: 57edfcf ("mm/gup: accelerate thp gup even for "pages != NULL"") Signed-off-by: Yang Shi <yang@os.amperecomputing.com> Reported-by: kernel test robot <oliver.sang@intel.com> Tested-by: Oliver Sang <oliver.sang@intel.com> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: David Hildenbrand <david@redhat.com> Cc: Christoph Lameter <cl@linux.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Rik van Riel <riel@surriel.com> Cc: Vivek Kasireddy <vivek.kasireddy@intel.com> Cc: <stable@vger.kernel.org> [6.6+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent 78959eb commit e7db276

File tree

5 files changed

+12
-57
lines changed

5 files changed

+12
-57
lines changed

fs/netfs/buffered_write.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -811,7 +811,7 @@ static void netfs_extend_writeback(struct address_space *mapping,
811811
break;
812812
}
813813

814-
if (!folio_try_get_rcu(folio)) {
814+
if (!folio_try_get(folio)) {
815815
xas_reset(xas);
816816
continue;
817817
}
@@ -1028,7 +1028,7 @@ static ssize_t netfs_writepages_begin(struct address_space *mapping,
10281028
if (!folio)
10291029
break;
10301030

1031-
if (!folio_try_get_rcu(folio)) {
1031+
if (!folio_try_get(folio)) {
10321032
xas_reset(xas);
10331033
continue;
10341034
}

fs/smb/client/file.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2753,7 +2753,7 @@ static void cifs_extend_writeback(struct address_space *mapping,
27532753
break;
27542754
}
27552755

2756-
if (!folio_try_get_rcu(folio)) {
2756+
if (!folio_try_get(folio)) {
27572757
xas_reset(xas);
27582758
continue;
27592759
}
@@ -2989,7 +2989,7 @@ static ssize_t cifs_writepages_begin(struct address_space *mapping,
29892989
if (!folio)
29902990
break;
29912991

2992-
if (!folio_try_get_rcu(folio)) {
2992+
if (!folio_try_get(folio)) {
29932993
xas_reset(xas);
29942994
continue;
29952995
}

include/linux/page_ref.h

Lines changed: 2 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -263,54 +263,9 @@ static inline bool folio_try_get(struct folio *folio)
263263
return folio_ref_add_unless(folio, 1, 0);
264264
}
265265

266-
static inline bool folio_ref_try_add_rcu(struct folio *folio, int count)
267-
{
268-
#ifdef CONFIG_TINY_RCU
269-
/*
270-
* The caller guarantees the folio will not be freed from interrupt
271-
* context, so (on !SMP) we only need preemption to be disabled
272-
* and TINY_RCU does that for us.
273-
*/
274-
# ifdef CONFIG_PREEMPT_COUNT
275-
VM_BUG_ON(!in_atomic() && !irqs_disabled());
276-
# endif
277-
VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio);
278-
folio_ref_add(folio, count);
279-
#else
280-
if (unlikely(!folio_ref_add_unless(folio, count, 0))) {
281-
/* Either the folio has been freed, or will be freed. */
282-
return false;
283-
}
284-
#endif
285-
return true;
286-
}
287-
288-
/**
289-
* folio_try_get_rcu - Attempt to increase the refcount on a folio.
290-
* @folio: The folio.
291-
*
292-
* This is a version of folio_try_get() optimised for non-SMP kernels.
293-
* If you are still holding the rcu_read_lock() after looking up the
294-
* page and know that the page cannot have its refcount decreased to
295-
* zero in interrupt context, you can use this instead of folio_try_get().
296-
*
297-
* Example users include get_user_pages_fast() (as pages are not unmapped
298-
* from interrupt context) and the page cache lookups (as pages are not
299-
* truncated from interrupt context). We also know that pages are not
300-
* frozen in interrupt context for the purposes of splitting or migration.
301-
*
302-
* You can also use this function if you're holding a lock that prevents
303-
* pages being frozen & removed; eg the i_pages lock for the page cache
304-
* or the mmap_lock or page table lock for page tables. In this case,
305-
* it will always succeed, and you could have used a plain folio_get(),
306-
* but it's sometimes more convenient to have a common function called
307-
* from both locked and RCU-protected contexts.
308-
*
309-
* Return: True if the reference count was successfully incremented.
310-
*/
311-
static inline bool folio_try_get_rcu(struct folio *folio)
266+
static inline bool folio_ref_try_add(struct folio *folio, int count)
312267
{
313-
return folio_ref_try_add_rcu(folio, 1);
268+
return folio_ref_add_unless(folio, count, 0);
314269
}
315270

316271
static inline int page_ref_freeze(struct page *page, int count)

mm/filemap.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1823,7 +1823,7 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index)
18231823
if (!folio || xa_is_value(folio))
18241824
goto out;
18251825

1826-
if (!folio_try_get_rcu(folio))
1826+
if (!folio_try_get(folio))
18271827
goto repeat;
18281828

18291829
if (unlikely(folio != xas_reload(&xas))) {
@@ -1977,7 +1977,7 @@ static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,
19771977
if (!folio || xa_is_value(folio))
19781978
return folio;
19791979

1980-
if (!folio_try_get_rcu(folio))
1980+
if (!folio_try_get(folio))
19811981
goto reset;
19821982

19831983
if (unlikely(folio != xas_reload(xas))) {
@@ -2157,7 +2157,7 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
21572157
if (xa_is_value(folio))
21582158
goto update_start;
21592159

2160-
if (!folio_try_get_rcu(folio))
2160+
if (!folio_try_get(folio))
21612161
goto retry;
21622162

21632163
if (unlikely(folio != xas_reload(&xas)))
@@ -2289,7 +2289,7 @@ static void filemap_get_read_batch(struct address_space *mapping,
22892289
break;
22902290
if (xa_is_sibling(folio))
22912291
break;
2292-
if (!folio_try_get_rcu(folio))
2292+
if (!folio_try_get(folio))
22932293
goto retry;
22942294

22952295
if (unlikely(folio != xas_reload(&xas)))
@@ -3449,7 +3449,7 @@ static struct folio *next_uptodate_folio(struct xa_state *xas,
34493449
continue;
34503450
if (folio_test_locked(folio))
34513451
continue;
3452-
if (!folio_try_get_rcu(folio))
3452+
if (!folio_try_get(folio))
34533453
continue;
34543454
/* Has the page moved or been split? */
34553455
if (unlikely(folio != xas_reload(xas)))

mm/gup.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ static inline struct folio *try_get_folio(struct page *page, int refs)
7676
folio = page_folio(page);
7777
if (WARN_ON_ONCE(folio_ref_count(folio) < 0))
7878
return NULL;
79-
if (unlikely(!folio_ref_try_add_rcu(folio, refs)))
79+
if (unlikely(!folio_ref_try_add(folio, refs)))
8080
return NULL;
8181

8282
/*

0 commit comments

Comments
 (0)