Skip to content

Commit c3f2d78

Browse files
committed
Merge tag 'mm-hotfixes-stable-2024-08-17-19-34' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull misc fixes from Andrew Morton: "16 hotfixes. All except one are for MM. 10 of these are cc:stable and the others pertain to post-6.10 issues. As usual with these merges, singletons and doubletons all over the place, no identifiable-by-me theme. Please see the lovingly curated changelogs to get the skinny" * tag 'mm-hotfixes-stable-2024-08-17-19-34' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: mm/migrate: fix deadlock in migrate_pages_batch() on large folios alloc_tag: mark pages reserved during CMA activation as not tagged alloc_tag: introduce clear_page_tag_ref() helper function crash: fix riscv64 crash memory reserve dead loop selftests: memfd_secret: don't build memfd_secret test on unsupported arches mm: fix endless reclaim on machines with unaccepted memory selftests/mm: compaction_test: fix off by one in check_compaction() mm/numa: no task_numa_fault() call if PMD is changed mm/numa: no task_numa_fault() call if PTE is changed mm/vmalloc: fix page mapping if vm_area_alloc_pages() with high order fallback to order 0 mm/memory-failure: use raw_spinlock_t in struct memory_failure_cpu mm: don't account memmap per-node mm: add system wide stats items category mm: don't account memmap on failure mm/hugetlb: fix hugetlb vs. core-mm PT locking mseal: fix is_madv_discard()
2 parents 810996a + 2e6506e commit c3f2d78

22 files changed

+201
-182
lines changed

include/linux/hugetlb.h

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -944,10 +944,37 @@ static inline bool htlb_allow_alloc_fallback(int reason)
944944
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
945945
struct mm_struct *mm, pte_t *pte)
946946
{
947-
if (huge_page_size(h) == PMD_SIZE)
947+
const unsigned long size = huge_page_size(h);
948+
949+
VM_WARN_ON(size == PAGE_SIZE);
950+
951+
/*
952+
* hugetlb must use the exact same PT locks as core-mm page table
953+
* walkers would. When modifying a PTE table, hugetlb must take the
954+
* PTE PT lock, when modifying a PMD table, hugetlb must take the PMD
955+
* PT lock etc.
956+
*
957+
* The expectation is that any hugetlb folio smaller than a PMD is
958+
* always mapped into a single PTE table and that any hugetlb folio
959+
* smaller than a PUD (but at least as big as a PMD) is always mapped
960+
* into a single PMD table.
961+
*
962+
* If that does not hold for an architecture, then that architecture
963+
* must disable split PT locks such that all *_lockptr() functions
964+
* will give us the same result: the per-MM PT lock.
965+
*
966+
* Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where
967+
* PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr()
968+
* and core-mm would use pmd_lockptr(). However, in such configurations
969+
* split PMD locks are disabled -- they don't make sense on a single
970+
* PGDIR page table -- and the end result is the same.
971+
*/
972+
if (size >= PUD_SIZE)
973+
return pud_lockptr(mm, (pud_t *) pte);
974+
else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE))
948975
return pmd_lockptr(mm, (pmd_t *) pte);
949-
VM_BUG_ON(huge_page_size(h) == PAGE_SIZE);
950-
return &mm->page_table_lock;
976+
/* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */
977+
return ptep_lockptr(mm, pte);
951978
}
952979

953980
#ifndef hugepages_supported

include/linux/mm.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2920,6 +2920,13 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
29202920
return ptlock_ptr(page_ptdesc(pmd_page(*pmd)));
29212921
}
29222922

2923+
static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
2924+
{
2925+
BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE));
2926+
BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE);
2927+
return ptlock_ptr(virt_to_ptdesc(pte));
2928+
}
2929+
29232930
static inline bool ptlock_init(struct ptdesc *ptdesc)
29242931
{
29252932
/*
@@ -2944,6 +2951,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
29442951
{
29452952
return &mm->page_table_lock;
29462953
}
2954+
static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
2955+
{
2956+
return &mm->page_table_lock;
2957+
}
29472958
static inline void ptlock_cache_init(void) {}
29482959
static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
29492960
static inline void ptlock_free(struct ptdesc *ptdesc) {}

include/linux/mmzone.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,6 @@ enum node_stat_item {
220220
PGDEMOTE_KSWAPD,
221221
PGDEMOTE_DIRECT,
222222
PGDEMOTE_KHUGEPAGED,
223-
NR_MEMMAP, /* page metadata allocated through buddy allocator */
224-
NR_MEMMAP_BOOT, /* page metadata allocated through boot allocator */
225223
NR_VM_NODE_STAT_ITEMS
226224
};
227225

include/linux/pgalloc_tag.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,18 @@ static inline void put_page_tag_ref(union codetag_ref *ref)
4343
page_ext_put(page_ext_from_codetag_ref(ref));
4444
}
4545

46+
static inline void clear_page_tag_ref(struct page *page)
47+
{
48+
if (mem_alloc_profiling_enabled()) {
49+
union codetag_ref *ref = get_page_tag_ref(page);
50+
51+
if (ref) {
52+
set_codetag_empty(ref);
53+
put_page_tag_ref(ref);
54+
}
55+
}
56+
}
57+
4658
static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
4759
unsigned int nr)
4860
{
@@ -126,6 +138,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
126138

127139
static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; }
128140
static inline void put_page_tag_ref(union codetag_ref *ref) {}
141+
static inline void clear_page_tag_ref(struct page *page) {}
129142
static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
130143
unsigned int nr) {}
131144
static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}

include/linux/vmstat.h

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,13 @@ struct reclaim_stat {
3434
unsigned nr_lazyfree_fail;
3535
};
3636

37-
enum writeback_stat_item {
37+
/* Stat data for system wide items */
38+
enum vm_stat_item {
3839
NR_DIRTY_THRESHOLD,
3940
NR_DIRTY_BG_THRESHOLD,
40-
NR_VM_WRITEBACK_STAT_ITEMS,
41+
NR_MEMMAP_PAGES, /* page metadata allocated through buddy allocator */
42+
NR_MEMMAP_BOOT_PAGES, /* page metadata allocated through boot allocator */
43+
NR_VM_STAT_ITEMS,
4144
};
4245

4346
#ifdef CONFIG_VM_EVENT_COUNTERS
@@ -514,21 +517,13 @@ static inline const char *lru_list_name(enum lru_list lru)
514517
return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
515518
}
516519

517-
static inline const char *writeback_stat_name(enum writeback_stat_item item)
518-
{
519-
return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
520-
NR_VM_NUMA_EVENT_ITEMS +
521-
NR_VM_NODE_STAT_ITEMS +
522-
item];
523-
}
524-
525520
#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
526521
static inline const char *vm_event_name(enum vm_event_item item)
527522
{
528523
return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
529524
NR_VM_NUMA_EVENT_ITEMS +
530525
NR_VM_NODE_STAT_ITEMS +
531-
NR_VM_WRITEBACK_STAT_ITEMS +
526+
NR_VM_STAT_ITEMS +
532527
item];
533528
}
534529
#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
@@ -625,7 +620,6 @@ static inline void lruvec_stat_sub_folio(struct folio *folio,
625620
lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
626621
}
627622

628-
void __meminit mod_node_early_perpage_metadata(int nid, long delta);
629-
void __meminit store_early_perpage_metadata(void);
630-
623+
void memmap_boot_pages_add(long delta);
624+
void memmap_pages_add(long delta);
631625
#endif /* _LINUX_VMSTAT_H */

kernel/crash_reserve.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,8 @@ void __init reserve_crashkernel_generic(char *cmdline,
423423
if (high && search_end == CRASH_ADDR_HIGH_MAX) {
424424
search_end = CRASH_ADDR_LOW_MAX;
425425
search_base = 0;
426-
goto retry;
426+
if (search_end != CRASH_ADDR_HIGH_MAX)
427+
goto retry;
427428
}
428429
pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
429430
crash_size);

mm/huge_memory.c

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1685,7 +1685,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
16851685
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
16861686
if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
16871687
spin_unlock(vmf->ptl);
1688-
goto out;
1688+
return 0;
16891689
}
16901690

16911691
pmd = pmd_modify(oldpmd, vma->vm_page_prot);
@@ -1728,22 +1728,16 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
17281728
if (!migrate_misplaced_folio(folio, vma, target_nid)) {
17291729
flags |= TNF_MIGRATED;
17301730
nid = target_nid;
1731-
} else {
1732-
flags |= TNF_MIGRATE_FAIL;
1733-
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
1734-
if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
1735-
spin_unlock(vmf->ptl);
1736-
goto out;
1737-
}
1738-
goto out_map;
1739-
}
1740-
1741-
out:
1742-
if (nid != NUMA_NO_NODE)
17431731
task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
1732+
return 0;
1733+
}
17441734

1745-
return 0;
1746-
1735+
flags |= TNF_MIGRATE_FAIL;
1736+
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
1737+
if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
1738+
spin_unlock(vmf->ptl);
1739+
return 0;
1740+
}
17471741
out_map:
17481742
/* Restore the PMD */
17491743
pmd = pmd_modify(oldpmd, vma->vm_page_prot);
@@ -1753,7 +1747,10 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
17531747
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
17541748
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
17551749
spin_unlock(vmf->ptl);
1756-
goto out;
1750+
1751+
if (nid != NUMA_NO_NODE)
1752+
task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
1753+
return 0;
17571754
}
17581755

17591756
/*

mm/hugetlb_vmemmap.c

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -185,11 +185,11 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end,
185185
static inline void free_vmemmap_page(struct page *page)
186186
{
187187
if (PageReserved(page)) {
188+
memmap_boot_pages_add(-1);
188189
free_bootmem_page(page);
189-
mod_node_page_state(page_pgdat(page), NR_MEMMAP_BOOT, -1);
190190
} else {
191+
memmap_pages_add(-1);
191192
__free_page(page);
192-
mod_node_page_state(page_pgdat(page), NR_MEMMAP, -1);
193193
}
194194
}
195195

@@ -341,7 +341,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
341341
copy_page(page_to_virt(walk.reuse_page),
342342
(void *)walk.reuse_addr);
343343
list_add(&walk.reuse_page->lru, vmemmap_pages);
344-
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, 1);
344+
memmap_pages_add(1);
345345
}
346346

347347
/*
@@ -392,14 +392,11 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end,
392392

393393
for (i = 0; i < nr_pages; i++) {
394394
page = alloc_pages_node(nid, gfp_mask, 0);
395-
if (!page) {
396-
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, i);
395+
if (!page)
397396
goto out;
398-
}
399397
list_add(&page->lru, list);
400398
}
401-
402-
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, nr_pages);
399+
memmap_pages_add(nr_pages);
403400

404401
return 0;
405402
out:

mm/memory-failure.c

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2417,7 +2417,7 @@ struct memory_failure_entry {
24172417
struct memory_failure_cpu {
24182418
DECLARE_KFIFO(fifo, struct memory_failure_entry,
24192419
MEMORY_FAILURE_FIFO_SIZE);
2420-
spinlock_t lock;
2420+
raw_spinlock_t lock;
24212421
struct work_struct work;
24222422
};
24232423

@@ -2443,20 +2443,22 @@ void memory_failure_queue(unsigned long pfn, int flags)
24432443
{
24442444
struct memory_failure_cpu *mf_cpu;
24452445
unsigned long proc_flags;
2446+
bool buffer_overflow;
24462447
struct memory_failure_entry entry = {
24472448
.pfn = pfn,
24482449
.flags = flags,
24492450
};
24502451

24512452
mf_cpu = &get_cpu_var(memory_failure_cpu);
2452-
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
2453-
if (kfifo_put(&mf_cpu->fifo, entry))
2453+
raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
2454+
buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry);
2455+
if (!buffer_overflow)
24542456
schedule_work_on(smp_processor_id(), &mf_cpu->work);
2455-
else
2457+
raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
2458+
put_cpu_var(memory_failure_cpu);
2459+
if (buffer_overflow)
24562460
pr_err("buffer overflow when queuing memory failure at %#lx\n",
24572461
pfn);
2458-
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
2459-
put_cpu_var(memory_failure_cpu);
24602462
}
24612463
EXPORT_SYMBOL_GPL(memory_failure_queue);
24622464

@@ -2469,9 +2471,9 @@ static void memory_failure_work_func(struct work_struct *work)
24692471

24702472
mf_cpu = container_of(work, struct memory_failure_cpu, work);
24712473
for (;;) {
2472-
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
2474+
raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
24732475
gotten = kfifo_get(&mf_cpu->fifo, &entry);
2474-
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
2476+
raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
24752477
if (!gotten)
24762478
break;
24772479
if (entry.flags & MF_SOFT_OFFLINE)
@@ -2501,7 +2503,7 @@ static int __init memory_failure_init(void)
25012503

25022504
for_each_possible_cpu(cpu) {
25032505
mf_cpu = &per_cpu(memory_failure_cpu, cpu);
2504-
spin_lock_init(&mf_cpu->lock);
2506+
raw_spin_lock_init(&mf_cpu->lock);
25052507
INIT_KFIFO(mf_cpu->fifo);
25062508
INIT_WORK(&mf_cpu->work, memory_failure_work_func);
25072509
}

mm/memory.c

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5295,7 +5295,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
52955295

52965296
if (unlikely(!pte_same(old_pte, vmf->orig_pte))) {
52975297
pte_unmap_unlock(vmf->pte, vmf->ptl);
5298-
goto out;
5298+
return 0;
52995299
}
53005300

53015301
pte = pte_modify(old_pte, vma->vm_page_prot);
@@ -5358,23 +5358,19 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
53585358
if (!migrate_misplaced_folio(folio, vma, target_nid)) {
53595359
nid = target_nid;
53605360
flags |= TNF_MIGRATED;
5361-
} else {
5362-
flags |= TNF_MIGRATE_FAIL;
5363-
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
5364-
vmf->address, &vmf->ptl);
5365-
if (unlikely(!vmf->pte))
5366-
goto out;
5367-
if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
5368-
pte_unmap_unlock(vmf->pte, vmf->ptl);
5369-
goto out;
5370-
}
5371-
goto out_map;
5361+
task_numa_fault(last_cpupid, nid, nr_pages, flags);
5362+
return 0;
53725363
}
53735364

5374-
out:
5375-
if (nid != NUMA_NO_NODE)
5376-
task_numa_fault(last_cpupid, nid, nr_pages, flags);
5377-
return 0;
5365+
flags |= TNF_MIGRATE_FAIL;
5366+
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
5367+
vmf->address, &vmf->ptl);
5368+
if (unlikely(!vmf->pte))
5369+
return 0;
5370+
if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
5371+
pte_unmap_unlock(vmf->pte, vmf->ptl);
5372+
return 0;
5373+
}
53785374
out_map:
53795375
/*
53805376
* Make it present again, depending on how arch implements
@@ -5387,7 +5383,10 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
53875383
numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte,
53885384
writable);
53895385
pte_unmap_unlock(vmf->pte, vmf->ptl);
5390-
goto out;
5386+
5387+
if (nid != NUMA_NO_NODE)
5388+
task_numa_fault(last_cpupid, nid, nr_pages, flags);
5389+
return 0;
53915390
}
53925391

53935392
static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)

0 commit comments

Comments
 (0)