Skip to content

Commit e72e784

Browse files
committed
Merge tag 'mm-hotfixes-stable-2025-05-17-09-41' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull hotfixes from Andrew Morton: "Nine singleton hotfixes, all MM. Four are cc:stable" * tag 'mm-hotfixes-stable-2025-05-17-09-41' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: mm: userfaultfd: correct dirty flags set for both present and swap pte zsmalloc: don't underflow size calculation in zs_obj_write() mm/page_alloc: fix race condition in unaccepted memory handling mm/page_alloc: ensure try_alloc_pages() plays well with unaccepted memory MAINTAINERS: add mm GUP section mm/codetag: move tag retrieval back upfront in __free_pages() mm/memory: fix mapcount / refcount sanity check for mTHP reuse kernel/fork: only call untrack_pfn_clear() on VMAs duplicated for fork() mm: hugetlb: fix incorrect fallback for subpool
2 parents 205b2bd + 75cb1cc commit e72e784

File tree

10 files changed

+82
-87
lines changed

10 files changed

+82
-87
lines changed

MAINTAINERS

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15549,6 +15549,18 @@ S: Maintained
1554915549
F: include/linux/execmem.h
1555015550
F: mm/execmem.c
1555115551

15552+
MEMORY MANAGEMENT - GUP (GET USER PAGES)
15553+
M: Andrew Morton <akpm@linux-foundation.org>
15554+
M: David Hildenbrand <david@redhat.com>
15555+
R: Jason Gunthorpe <jgg@nvidia.com>
15556+
R: John Hubbard <jhubbard@nvidia.com>
15557+
R: Peter Xu <peterx@redhat.com>
15558+
L: linux-mm@kvack.org
15559+
S: Maintained
15560+
W: http://www.linux-mm.org
15561+
T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
15562+
F: mm/gup.c
15563+
1555215564
MEMORY MANAGEMENT - NUMA MEMBLOCKS AND NUMA EMULATION
1555315565
M: Andrew Morton <akpm@linux-foundation.org>
1555415566
M: Mike Rapoport <rppt@kernel.org>

include/linux/pgalloc_tag.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,13 @@ static inline struct alloc_tag *__pgalloc_tag_get(struct page *page)
188188
return tag;
189189
}
190190

191+
static inline struct alloc_tag *pgalloc_tag_get(struct page *page)
192+
{
193+
if (mem_alloc_profiling_enabled())
194+
return __pgalloc_tag_get(page);
195+
return NULL;
196+
}
197+
191198
void pgalloc_tag_split(struct folio *folio, int old_order, int new_order);
192199
void pgalloc_tag_swap(struct folio *new, struct folio *old);
193200

@@ -199,6 +206,7 @@ static inline void clear_page_tag_ref(struct page *page) {}
199206
static inline void alloc_tag_sec_init(void) {}
200207
static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {}
201208
static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {}
209+
static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; }
202210

203211
#endif /* CONFIG_MEM_ALLOC_PROFILING */
204212

kernel/fork.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -498,10 +498,6 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
498498
vma_numab_state_init(new);
499499
dup_anon_vma_name(orig, new);
500500

501-
/* track_pfn_copy() will later take care of copying internal state. */
502-
if (unlikely(new->vm_flags & VM_PFNMAP))
503-
untrack_pfn_clear(new);
504-
505501
return new;
506502
}
507503

@@ -672,6 +668,11 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
672668
tmp = vm_area_dup(mpnt);
673669
if (!tmp)
674670
goto fail_nomem;
671+
672+
/* track_pfn_copy() will later take care of copying internal state. */
673+
if (unlikely(tmp->vm_flags & VM_PFNMAP))
674+
untrack_pfn_clear(tmp);
675+
675676
retval = vma_dup_policy(mpnt, tmp);
676677
if (retval)
677678
goto fail_nomem_policy;

mm/hugetlb.c

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3010,7 +3010,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
30103010
struct hugepage_subpool *spool = subpool_vma(vma);
30113011
struct hstate *h = hstate_vma(vma);
30123012
struct folio *folio;
3013-
long retval, gbl_chg;
3013+
long retval, gbl_chg, gbl_reserve;
30143014
map_chg_state map_chg;
30153015
int ret, idx;
30163016
struct hugetlb_cgroup *h_cg = NULL;
@@ -3163,8 +3163,16 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
31633163
hugetlb_cgroup_uncharge_cgroup_rsvd(idx, pages_per_huge_page(h),
31643164
h_cg);
31653165
out_subpool_put:
3166-
if (map_chg)
3167-
hugepage_subpool_put_pages(spool, 1);
3166+
/*
3167+
* put page to subpool iff the quota of subpool's rsv_hpages is used
3168+
* during hugepage_subpool_get_pages.
3169+
*/
3170+
if (map_chg && !gbl_chg) {
3171+
gbl_reserve = hugepage_subpool_put_pages(spool, 1);
3172+
hugetlb_acct_memory(h, -gbl_reserve);
3173+
}
3174+
3175+
31683176
out_end_reservation:
31693177
if (map_chg != MAP_CHG_ENFORCED)
31703178
vma_end_reservation(h, vma, addr);
@@ -7239,7 +7247,7 @@ bool hugetlb_reserve_pages(struct inode *inode,
72397247
struct vm_area_struct *vma,
72407248
vm_flags_t vm_flags)
72417249
{
7242-
long chg = -1, add = -1;
7250+
long chg = -1, add = -1, spool_resv, gbl_resv;
72437251
struct hstate *h = hstate_inode(inode);
72447252
struct hugepage_subpool *spool = subpool_inode(inode);
72457253
struct resv_map *resv_map;
@@ -7374,8 +7382,16 @@ bool hugetlb_reserve_pages(struct inode *inode,
73747382
return true;
73757383

73767384
out_put_pages:
7377-
/* put back original number of pages, chg */
7378-
(void)hugepage_subpool_put_pages(spool, chg);
7385+
spool_resv = chg - gbl_reserve;
7386+
if (spool_resv) {
7387+
/* put sub pool's reservation back, chg - gbl_reserve */
7388+
gbl_resv = hugepage_subpool_put_pages(spool, spool_resv);
7389+
/*
7390+
* subpool's reserved pages can not be put back due to race,
7391+
* return to hstate.
7392+
*/
7393+
hugetlb_acct_memory(h, -gbl_resv);
7394+
}
73797395
out_uncharge_cgroup:
73807396
hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
73817397
chg * pages_per_huge_page(h), h_cg);

mm/internal.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1590,7 +1590,6 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc);
15901590

15911591
#ifdef CONFIG_UNACCEPTED_MEMORY
15921592
void accept_page(struct page *page);
1593-
void unaccepted_cleanup_work(struct work_struct *work);
15941593
#else /* CONFIG_UNACCEPTED_MEMORY */
15951594
static inline void accept_page(struct page *page)
15961595
{

mm/memory.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3751,7 +3751,7 @@ static bool __wp_can_reuse_large_anon_folio(struct folio *folio,
37513751

37523752
/* Stabilize the mapcount vs. refcount and recheck. */
37533753
folio_lock_large_mapcount(folio);
3754-
VM_WARN_ON_ONCE(folio_large_mapcount(folio) < folio_ref_count(folio));
3754+
VM_WARN_ON_ONCE_FOLIO(folio_large_mapcount(folio) > folio_ref_count(folio), folio);
37553755

37563756
if (folio_test_large_maybe_mapped_shared(folio))
37573757
goto unlock;

mm/mm_init.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1441,7 +1441,6 @@ static void __meminit zone_init_free_lists(struct zone *zone)
14411441

14421442
#ifdef CONFIG_UNACCEPTED_MEMORY
14431443
INIT_LIST_HEAD(&zone->unaccepted_pages);
1444-
INIT_WORK(&zone->unaccepted_cleanup, unaccepted_cleanup_work);
14451444
#endif
14461445
}
14471446

mm/page_alloc.c

Lines changed: 20 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,8 @@ EXPORT_SYMBOL(nr_online_nodes);
290290
#endif
291291

292292
static bool page_contains_unaccepted(struct page *page, unsigned int order);
293-
static bool cond_accept_memory(struct zone *zone, unsigned int order);
293+
static bool cond_accept_memory(struct zone *zone, unsigned int order,
294+
int alloc_flags);
294295
static bool __free_unaccepted(struct page *page);
295296

296297
int page_group_by_mobility_disabled __read_mostly;
@@ -1151,14 +1152,9 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr)
11511152
__pgalloc_tag_sub(page, nr);
11521153
}
11531154

1154-
static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr)
1155+
/* When tag is not NULL, assuming mem_alloc_profiling_enabled */
1156+
static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
11551157
{
1156-
struct alloc_tag *tag;
1157-
1158-
if (!mem_alloc_profiling_enabled())
1159-
return;
1160-
1161-
tag = __pgalloc_tag_get(page);
11621158
if (tag)
11631159
this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr);
11641160
}
@@ -1168,7 +1164,7 @@ static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr)
11681164
static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
11691165
unsigned int nr) {}
11701166
static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
1171-
static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) {}
1167+
static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
11721168

11731169
#endif /* CONFIG_MEM_ALLOC_PROFILING */
11741170

@@ -3616,7 +3612,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
36163612
}
36173613
}
36183614

3619-
cond_accept_memory(zone, order);
3615+
cond_accept_memory(zone, order, alloc_flags);
36203616

36213617
/*
36223618
* Detect whether the number of free pages is below high
@@ -3643,7 +3639,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
36433639
gfp_mask)) {
36443640
int ret;
36453641

3646-
if (cond_accept_memory(zone, order))
3642+
if (cond_accept_memory(zone, order, alloc_flags))
36473643
goto try_this_zone;
36483644

36493645
/*
@@ -3696,7 +3692,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
36963692

36973693
return page;
36983694
} else {
3699-
if (cond_accept_memory(zone, order))
3695+
if (cond_accept_memory(zone, order, alloc_flags))
37003696
goto try_this_zone;
37013697

37023698
/* Try again if zone has deferred pages */
@@ -4849,7 +4845,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
48494845
goto failed;
48504846
}
48514847

4852-
cond_accept_memory(zone, 0);
4848+
cond_accept_memory(zone, 0, alloc_flags);
48534849
retry_this_zone:
48544850
mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK) + nr_pages;
48554851
if (zone_watermark_fast(zone, 0, mark,
@@ -4858,7 +4854,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
48584854
break;
48594855
}
48604856

4861-
if (cond_accept_memory(zone, 0))
4857+
if (cond_accept_memory(zone, 0, alloc_flags))
48624858
goto retry_this_zone;
48634859

48644860
/* Try again if zone has deferred pages */
@@ -5065,11 +5061,13 @@ static void ___free_pages(struct page *page, unsigned int order,
50655061
{
50665062
/* get PageHead before we drop reference */
50675063
int head = PageHead(page);
5064+
/* get alloc tag in case the page is released by others */
5065+
struct alloc_tag *tag = pgalloc_tag_get(page);
50685066

50695067
if (put_page_testzero(page))
50705068
__free_frozen_pages(page, order, fpi_flags);
50715069
else if (!head) {
5072-
pgalloc_tag_sub_pages(page, (1 << order) - 1);
5070+
pgalloc_tag_sub_pages(tag, (1 << order) - 1);
50735071
while (order-- > 0)
50745072
__free_frozen_pages(page + (1 << order), order,
50755073
fpi_flags);
@@ -7174,16 +7172,8 @@ bool has_managed_dma(void)
71747172

71757173
#ifdef CONFIG_UNACCEPTED_MEMORY
71767174

7177-
/* Counts number of zones with unaccepted pages. */
7178-
static DEFINE_STATIC_KEY_FALSE(zones_with_unaccepted_pages);
7179-
71807175
static bool lazy_accept = true;
71817176

7182-
void unaccepted_cleanup_work(struct work_struct *work)
7183-
{
7184-
static_branch_dec(&zones_with_unaccepted_pages);
7185-
}
7186-
71877177
static int __init accept_memory_parse(char *p)
71887178
{
71897179
if (!strcmp(p, "lazy")) {
@@ -7208,11 +7198,7 @@ static bool page_contains_unaccepted(struct page *page, unsigned int order)
72087198
static void __accept_page(struct zone *zone, unsigned long *flags,
72097199
struct page *page)
72107200
{
7211-
bool last;
7212-
72137201
list_del(&page->lru);
7214-
last = list_empty(&zone->unaccepted_pages);
7215-
72167202
account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
72177203
__mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
72187204
__ClearPageUnaccepted(page);
@@ -7221,28 +7207,6 @@ static void __accept_page(struct zone *zone, unsigned long *flags,
72217207
accept_memory(page_to_phys(page), PAGE_SIZE << MAX_PAGE_ORDER);
72227208

72237209
__free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL);
7224-
7225-
if (last) {
7226-
/*
7227-
* There are two corner cases:
7228-
*
7229-
* - If allocation occurs during the CPU bring up,
7230-
* static_branch_dec() cannot be used directly as
7231-
* it causes a deadlock on cpu_hotplug_lock.
7232-
*
7233-
* Instead, use schedule_work() to prevent deadlock.
7234-
*
7235-
* - If allocation occurs before workqueues are initialized,
7236-
* static_branch_dec() should be called directly.
7237-
*
7238-
* Workqueues are initialized before CPU bring up, so this
7239-
* will not conflict with the first scenario.
7240-
*/
7241-
if (system_wq)
7242-
schedule_work(&zone->unaccepted_cleanup);
7243-
else
7244-
unaccepted_cleanup_work(&zone->unaccepted_cleanup);
7245-
}
72467210
}
72477211

72487212
void accept_page(struct page *page)
@@ -7279,20 +7243,17 @@ static bool try_to_accept_memory_one(struct zone *zone)
72797243
return true;
72807244
}
72817245

7282-
static inline bool has_unaccepted_memory(void)
7283-
{
7284-
return static_branch_unlikely(&zones_with_unaccepted_pages);
7285-
}
7286-
7287-
static bool cond_accept_memory(struct zone *zone, unsigned int order)
7246+
static bool cond_accept_memory(struct zone *zone, unsigned int order,
7247+
int alloc_flags)
72887248
{
72897249
long to_accept, wmark;
72907250
bool ret = false;
72917251

7292-
if (!has_unaccepted_memory())
7252+
if (list_empty(&zone->unaccepted_pages))
72937253
return false;
72947254

7295-
if (list_empty(&zone->unaccepted_pages))
7255+
/* Bailout, since try_to_accept_memory_one() needs to take a lock */
7256+
if (alloc_flags & ALLOC_TRYLOCK)
72967257
return false;
72977258

72987259
wmark = promo_wmark_pages(zone);
@@ -7325,22 +7286,17 @@ static bool __free_unaccepted(struct page *page)
73257286
{
73267287
struct zone *zone = page_zone(page);
73277288
unsigned long flags;
7328-
bool first = false;
73297289

73307290
if (!lazy_accept)
73317291
return false;
73327292

73337293
spin_lock_irqsave(&zone->lock, flags);
7334-
first = list_empty(&zone->unaccepted_pages);
73357294
list_add_tail(&page->lru, &zone->unaccepted_pages);
73367295
account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
73377296
__mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
73387297
__SetPageUnaccepted(page);
73397298
spin_unlock_irqrestore(&zone->lock, flags);
73407299

7341-
if (first)
7342-
static_branch_inc(&zones_with_unaccepted_pages);
7343-
73447300
return true;
73457301
}
73467302

@@ -7351,7 +7307,8 @@ static bool page_contains_unaccepted(struct page *page, unsigned int order)
73517307
return false;
73527308
}
73537309

7354-
static bool cond_accept_memory(struct zone *zone, unsigned int order)
7310+
static bool cond_accept_memory(struct zone *zone, unsigned int order,
7311+
int alloc_flags)
73557312
{
73567313
return false;
73577314
}
@@ -7422,11 +7379,6 @@ struct page *try_alloc_pages_noprof(int nid, unsigned int order)
74227379
if (!pcp_allowed_order(order))
74237380
return NULL;
74247381

7425-
#ifdef CONFIG_UNACCEPTED_MEMORY
7426-
/* Bailout, since try_to_accept_memory_one() needs to take a lock */
7427-
if (has_unaccepted_memory())
7428-
return NULL;
7429-
#endif
74307382
/* Bailout, since _deferred_grow_zone() needs to take a lock */
74317383
if (deferred_pages_enabled())
74327384
return NULL;

mm/userfaultfd.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,8 +1064,13 @@ static int move_present_pte(struct mm_struct *mm,
10641064
src_folio->index = linear_page_index(dst_vma, dst_addr);
10651065

10661066
orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot);
1067-
/* Follow mremap() behavior and treat the entry dirty after the move */
1068-
orig_dst_pte = pte_mkwrite(pte_mkdirty(orig_dst_pte), dst_vma);
1067+
/* Set soft dirty bit so userspace can notice the pte was moved */
1068+
#ifdef CONFIG_MEM_SOFT_DIRTY
1069+
orig_dst_pte = pte_mksoft_dirty(orig_dst_pte);
1070+
#endif
1071+
if (pte_dirty(orig_src_pte))
1072+
orig_dst_pte = pte_mkdirty(orig_dst_pte);
1073+
orig_dst_pte = pte_mkwrite(orig_dst_pte, dst_vma);
10691074

10701075
set_pte_at(mm, dst_addr, dst_pte, orig_dst_pte);
10711076
out:
@@ -1100,6 +1105,9 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma,
11001105
}
11011106

11021107
orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte);
1108+
#ifdef CONFIG_MEM_SOFT_DIRTY
1109+
orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte);
1110+
#endif
11031111
set_pte_at(mm, dst_addr, dst_pte, orig_src_pte);
11041112
double_pt_unlock(dst_ptl, src_ptl);
11051113

0 commit comments

Comments
 (0)