Skip to content

Commit 41e6ddc

Browse files
lorenzo-stoakesakpm00
authored andcommitted
mm/vma: add give_up_on_oom option on modify/merge, use in uffd release
Currently, if a VMA merge fails due to an OOM condition arising on commit merge or a failure to duplicate anon_vma's, we report this so the caller can handle it. However there are cases where the caller is only ostensibly trying a merge, and doesn't mind if it fails due to this condition. Since we do not want to introduce an implicit assumption that we only actually modify VMAs after OOM conditions might arise, add a 'give up on oom' option and make an explicit contract that, should this flag be set, we absolutely will not modify any VMAs should OOM arise and just bail out. Since it'd be very unusual for a user to try to vma_modify() with this flag set but be specifying a range within a VMA which ends up being split (which can fail due to rlimit issues, not only OOM), we add a debug warning for this condition. The motivating reason for this is uffd release - syzkaller (and Pedro Falcato's VERY astute analysis) found a way in which an injected fault on allocation, triggering an OOM condition on commit merge, would result in uffd code becoming confused and treating an error value as if it were a VMA pointer. To avoid this, we make use of this new VMG flag to ensure that this never occurs, utilising the fact that, should we be clearing entire VMAs, we do not wish an OOM event to be reported to us. Many thanks to Pedro Falcato for his excellent analysis and Jann Horn for his insightful and intelligent analysis of the situation, both of whom were instrumental in this fix. Link: https://lkml.kernel.org/r/20250321100937.46634-1-lorenzo.stoakes@oracle.com Reported-by: syzbot+20ed41006cf9d842c2b5@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/67dc67f0.050a0220.25ae54.001e.GAE@google.com/ Fixes: 47b16d0 ("mm: abort vma_modify() on merge out of memory failure") Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Suggested-by: Pedro Falcato <pfalcato@suse.de> Suggested-by: Jann Horn <jannh@google.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 9c02223 commit 41e6ddc

File tree

3 files changed

+66
-7
lines changed

3 files changed

+66
-7
lines changed

mm/userfaultfd.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1902,14 +1902,22 @@ struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi,
19021902
unsigned long end)
19031903
{
19041904
struct vm_area_struct *ret;
1905+
bool give_up_on_oom = false;
1906+
1907+
/*
1908+
* If we are modifying only and not splitting, just give up on the merge
1909+
* if OOM prevents us from merging successfully.
1910+
*/
1911+
if (start == vma->vm_start && end == vma->vm_end)
1912+
give_up_on_oom = true;
19051913

19061914
/* Reset ptes for the whole vma range if wr-protected */
19071915
if (userfaultfd_wp(vma))
19081916
uffd_wp_range(vma, start, end - start, false);
19091917

19101918
ret = vma_modify_flags_uffd(vmi, prev, vma, start, end,
19111919
vma->vm_flags & ~__VM_UFFD_FLAGS,
1912-
NULL_VM_UFFD_CTX);
1920+
NULL_VM_UFFD_CTX, give_up_on_oom);
19131921

19141922
/*
19151923
* In the vma_merge() successful mprotect-like case 8:
@@ -1960,7 +1968,8 @@ int userfaultfd_register_range(struct userfaultfd_ctx *ctx,
19601968
new_flags = (vma->vm_flags & ~__VM_UFFD_FLAGS) | vm_flags;
19611969
vma = vma_modify_flags_uffd(&vmi, prev, vma, start, vma_end,
19621970
new_flags,
1963-
(struct vm_userfaultfd_ctx){ctx});
1971+
(struct vm_userfaultfd_ctx){ctx},
1972+
/* give_up_on_oom = */false);
19641973
if (IS_ERR(vma))
19651974
return PTR_ERR(vma);
19661975

mm/vma.c

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,9 @@ static void vmg_adjust_set_range(struct vma_merge_struct *vmg)
666666
/*
667667
* Actually perform the VMA merge operation.
668668
*
669+
* IMPORTANT: We guarantee that, should vmg->give_up_on_oom is set, to not
670+
* modify any VMAs or cause inconsistent state should an OOM condition arise.
671+
*
669672
* Returns 0 on success, or an error value on failure.
670673
*/
671674
static int commit_merge(struct vma_merge_struct *vmg)
@@ -685,6 +688,12 @@ static int commit_merge(struct vma_merge_struct *vmg)
685688

686689
init_multi_vma_prep(&vp, vma, vmg);
687690

691+
/*
692+
* If vmg->give_up_on_oom is set, we're safe, because we don't actually
693+
* manipulate any VMAs until we succeed at preallocation.
694+
*
695+
* Past this point, we will not return an error.
696+
*/
688697
if (vma_iter_prealloc(vmg->vmi, vma))
689698
return -ENOMEM;
690699

@@ -915,7 +924,13 @@ static __must_check struct vm_area_struct *vma_merge_existing_range(
915924
if (anon_dup)
916925
unlink_anon_vmas(anon_dup);
917926

918-
vmg->state = VMA_MERGE_ERROR_NOMEM;
927+
/*
928+
* We've cleaned up any cloned anon_vma's, no VMAs have been
929+
* modified, no harm no foul if the user requests that we not
930+
* report this and just give up, leaving the VMAs unmerged.
931+
*/
932+
if (!vmg->give_up_on_oom)
933+
vmg->state = VMA_MERGE_ERROR_NOMEM;
919934
return NULL;
920935
}
921936

@@ -926,7 +941,15 @@ static __must_check struct vm_area_struct *vma_merge_existing_range(
926941
abort:
927942
vma_iter_set(vmg->vmi, start);
928943
vma_iter_load(vmg->vmi);
929-
vmg->state = VMA_MERGE_ERROR_NOMEM;
944+
945+
/*
946+
* This means we have failed to clone anon_vma's correctly, but no
947+
* actual changes to VMAs have occurred, so no harm no foul - if the
948+
* user doesn't want this reported and instead just wants to give up on
949+
* the merge, allow it.
950+
*/
951+
if (!vmg->give_up_on_oom)
952+
vmg->state = VMA_MERGE_ERROR_NOMEM;
930953
return NULL;
931954
}
932955

@@ -1068,6 +1091,10 @@ int vma_expand(struct vma_merge_struct *vmg)
10681091
/* This should already have been checked by this point. */
10691092
VM_WARN_ON_VMG(!can_merge_remove_vma(next), vmg);
10701093
vma_start_write(next);
1094+
/*
1095+
* In this case we don't report OOM, so vmg->give_up_on_mm is
1096+
* safe.
1097+
*/
10711098
ret = dup_anon_vma(middle, next, &anon_dup);
10721099
if (ret)
10731100
return ret;
@@ -1090,9 +1117,15 @@ int vma_expand(struct vma_merge_struct *vmg)
10901117
return 0;
10911118

10921119
nomem:
1093-
vmg->state = VMA_MERGE_ERROR_NOMEM;
10941120
if (anon_dup)
10951121
unlink_anon_vmas(anon_dup);
1122+
/*
1123+
* If the user requests that we just give upon OOM, we are safe to do so
1124+
* here, as commit merge provides this contract to us. Nothing has been
1125+
* changed - no harm no foul, just don't report it.
1126+
*/
1127+
if (!vmg->give_up_on_oom)
1128+
vmg->state = VMA_MERGE_ERROR_NOMEM;
10961129
return -ENOMEM;
10971130
}
10981131

@@ -1534,6 +1567,13 @@ static struct vm_area_struct *vma_modify(struct vma_merge_struct *vmg)
15341567
if (vmg_nomem(vmg))
15351568
return ERR_PTR(-ENOMEM);
15361569

1570+
/*
1571+
* Split can fail for reasons other than OOM, so if the user requests
1572+
* this it's probably a mistake.
1573+
*/
1574+
VM_WARN_ON(vmg->give_up_on_oom &&
1575+
(vma->vm_start != start || vma->vm_end != end));
1576+
15371577
/* Split any preceding portion of the VMA. */
15381578
if (vma->vm_start < start) {
15391579
int err = split_vma(vmg->vmi, vma, start, 1);
@@ -1602,12 +1642,15 @@ struct vm_area_struct
16021642
struct vm_area_struct *vma,
16031643
unsigned long start, unsigned long end,
16041644
unsigned long new_flags,
1605-
struct vm_userfaultfd_ctx new_ctx)
1645+
struct vm_userfaultfd_ctx new_ctx,
1646+
bool give_up_on_oom)
16061647
{
16071648
VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
16081649

16091650
vmg.flags = new_flags;
16101651
vmg.uffd_ctx = new_ctx;
1652+
if (give_up_on_oom)
1653+
vmg.give_up_on_oom = true;
16111654

16121655
return vma_modify(&vmg);
16131656
}

mm/vma.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,12 @@ struct vma_merge_struct {
114114
*/
115115
bool just_expand :1;
116116

117+
/*
118+
* If a merge is possible, but an OOM error occurs, give up and don't
119+
* execute the merge, returning NULL.
120+
*/
121+
bool give_up_on_oom :1;
122+
117123
/* Internal flags set during merge process: */
118124

119125
/*
@@ -255,7 +261,8 @@ __must_check struct vm_area_struct
255261
struct vm_area_struct *vma,
256262
unsigned long start, unsigned long end,
257263
unsigned long new_flags,
258-
struct vm_userfaultfd_ctx new_ctx);
264+
struct vm_userfaultfd_ctx new_ctx,
265+
bool give_up_on_oom);
259266

260267
__must_check struct vm_area_struct
261268
*vma_merge_new_range(struct vma_merge_struct *vmg);

0 commit comments

Comments
 (0)