Skip to content

Commit 204e9a1

Browse files
committed
Merge tag 'mm-hotfixes-stable-2025-04-02-21-57' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM hotfixes from Andrew Morton: "Five hotfixes. Three are cc:stable and the remainder address post-6.14 issues or aren't considered necessary for -stable kernels. All patches are for MM" * tag 'mm-hotfixes-stable-2025-04-02-21-57' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: mm: zswap: fix crypto_free_acomp() deadlock in zswap_cpu_comp_dead() mm/hugetlb: move hugetlb_sysctl_init() to the __init section mm: page_isolation: avoid calling folio_hstate() without hugetlb_lock mm/hugetlb_vmemmap: fix memory loads ordering mm/userfaultfd: fix release hang over concurrent GUP
2 parents ea59cb7 + c11bcbc commit 204e9a1

File tree

6 files changed

+94
-37
lines changed

6 files changed

+94
-37
lines changed

fs/userfaultfd.c

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -395,32 +395,6 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
395395
if (!(vmf->flags & FAULT_FLAG_USER) && (ctx->flags & UFFD_USER_MODE_ONLY))
396396
goto out;
397397

398-
/*
399-
* If it's already released don't get it. This avoids to loop
400-
* in __get_user_pages if userfaultfd_release waits on the
401-
* caller of handle_userfault to release the mmap_lock.
402-
*/
403-
if (unlikely(READ_ONCE(ctx->released))) {
404-
/*
405-
* Don't return VM_FAULT_SIGBUS in this case, so a non
406-
* cooperative manager can close the uffd after the
407-
* last UFFDIO_COPY, without risking to trigger an
408-
* involuntary SIGBUS if the process was starting the
409-
* userfaultfd while the userfaultfd was still armed
410-
* (but after the last UFFDIO_COPY). If the uffd
411-
* wasn't already closed when the userfault reached
412-
* this point, that would normally be solved by
413-
* userfaultfd_must_wait returning 'false'.
414-
*
415-
* If we were to return VM_FAULT_SIGBUS here, the non
416-
* cooperative manager would be instead forced to
417-
* always call UFFDIO_UNREGISTER before it can safely
418-
* close the uffd.
419-
*/
420-
ret = VM_FAULT_NOPAGE;
421-
goto out;
422-
}
423-
424398
/*
425399
* Check that we can return VM_FAULT_RETRY.
426400
*
@@ -457,6 +431,31 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
457431
if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
458432
goto out;
459433

434+
if (unlikely(READ_ONCE(ctx->released))) {
435+
/*
436+
* If a concurrent release is detected, do not return
437+
* VM_FAULT_SIGBUS or VM_FAULT_NOPAGE, but instead always
438+
* return VM_FAULT_RETRY with lock released proactively.
439+
*
440+
* If we were to return VM_FAULT_SIGBUS here, the non
441+
* cooperative manager would be instead forced to
442+
* always call UFFDIO_UNREGISTER before it can safely
443+
* close the uffd, to avoid involuntary SIGBUS triggered.
444+
*
445+
* If we were to return VM_FAULT_NOPAGE, it would work for
446+
* the fault path, in which the lock will be released
447+
* later. However for GUP, faultin_page() does nothing
448+
* special on NOPAGE, so GUP would spin retrying without
449+
* releasing the mmap read lock, causing possible livelock.
450+
*
451+
* Here only VM_FAULT_RETRY would make sure the mmap lock
452+
* be released immediately, so that the thread concurrently
453+
* releasing the userfault would always make progress.
454+
*/
455+
release_fault_lock(vmf);
456+
goto out;
457+
}
458+
460459
/* take the reference before dropping the mmap_lock */
461460
userfaultfd_ctx_get(ctx);
462461

include/linux/page-flags.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,11 +226,48 @@ static __always_inline const struct page *page_fixed_fake_head(const struct page
226226
}
227227
return page;
228228
}
229+
230+
static __always_inline bool page_count_writable(const struct page *page, int u)
231+
{
232+
if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key))
233+
return true;
234+
235+
/*
236+
* The refcount check is ordered before the fake-head check to prevent
237+
* the following race:
238+
* CPU 1 (HVO) CPU 2 (speculative PFN walker)
239+
*
240+
* page_ref_freeze()
241+
* synchronize_rcu()
242+
* rcu_read_lock()
243+
* page_is_fake_head() is false
244+
* vmemmap_remap_pte()
245+
* XXX: struct page[] becomes r/o
246+
*
247+
* page_ref_unfreeze()
248+
* page_ref_count() is not zero
249+
*
250+
* atomic_add_unless(&page->_refcount)
251+
* XXX: try to modify r/o struct page[]
252+
*
253+
* The refcount check also prevents modification attempts to other (r/o)
254+
* tail pages that are not fake heads.
255+
*/
256+
if (atomic_read_acquire(&page->_refcount) == u)
257+
return false;
258+
259+
return page_fixed_fake_head(page) == page;
260+
}
229261
#else
230262
static inline const struct page *page_fixed_fake_head(const struct page *page)
231263
{
232264
return page;
233265
}
266+
267+
static inline bool page_count_writable(const struct page *page, int u)
268+
{
269+
return true;
270+
}
234271
#endif
235272

236273
static __always_inline int page_is_fake_head(const struct page *page)

include/linux/page_ref.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ static inline bool page_ref_add_unless(struct page *page, int nr, int u)
234234

235235
rcu_read_lock();
236236
/* avoid writing to the vmemmap area being remapped */
237-
if (!page_is_fake_head(page) && page_ref_count(page) != u)
237+
if (page_count_writable(page, u))
238238
ret = atomic_add_unless(&page->_refcount, nr, u);
239239
rcu_read_unlock();
240240

mm/hugetlb.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5179,7 +5179,7 @@ static const struct ctl_table hugetlb_table[] = {
51795179
},
51805180
};
51815181

5182-
static void hugetlb_sysctl_init(void)
5182+
static void __init hugetlb_sysctl_init(void)
51835183
{
51845184
register_sysctl_init("vm", hugetlb_table);
51855185
}

mm/page_isolation.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,14 @@ static struct page *has_unmovable_pages(unsigned long start_pfn, unsigned long e
8383
unsigned int skip_pages;
8484

8585
if (PageHuge(page)) {
86-
if (!hugepage_migration_supported(folio_hstate(folio)))
86+
struct hstate *h;
87+
88+
/*
89+
* The huge page may be freed so can not
90+
* use folio_hstate() directly.
91+
*/
92+
h = size_to_hstate(folio_size(folio));
93+
if (h && !hugepage_migration_supported(h))
8794
return page;
8895
} else if (!folio_test_lru(folio) && !__folio_test_movable(folio)) {
8996
return page;

mm/zswap.c

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -883,18 +883,32 @@ static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
883883
{
884884
struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
885885
struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
886+
struct acomp_req *req;
887+
struct crypto_acomp *acomp;
888+
u8 *buffer;
889+
890+
if (IS_ERR_OR_NULL(acomp_ctx))
891+
return 0;
886892

887893
mutex_lock(&acomp_ctx->mutex);
888-
if (!IS_ERR_OR_NULL(acomp_ctx)) {
889-
if (!IS_ERR_OR_NULL(acomp_ctx->req))
890-
acomp_request_free(acomp_ctx->req);
891-
acomp_ctx->req = NULL;
892-
if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
893-
crypto_free_acomp(acomp_ctx->acomp);
894-
kfree(acomp_ctx->buffer);
895-
}
894+
req = acomp_ctx->req;
895+
acomp = acomp_ctx->acomp;
896+
buffer = acomp_ctx->buffer;
897+
acomp_ctx->req = NULL;
898+
acomp_ctx->acomp = NULL;
899+
acomp_ctx->buffer = NULL;
896900
mutex_unlock(&acomp_ctx->mutex);
897901

902+
/*
903+
* Do the actual freeing after releasing the mutex to avoid subtle
904+
* locking dependencies causing deadlocks.
905+
*/
906+
if (!IS_ERR_OR_NULL(req))
907+
acomp_request_free(req);
908+
if (!IS_ERR_OR_NULL(acomp))
909+
crypto_free_acomp(acomp);
910+
kfree(buffer);
911+
898912
return 0;
899913
}
900914

0 commit comments

Comments
 (0)