Skip to content

Commit eb88e6b

Browse files
committed
Merge tag 'fsnotify_for_v6.14-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull fsnotify reverts from Jan Kara: "Syzbot has found out that fsnotify HSM events generated on page fault can be generated while we already hold freeze protection for the filesystem (when you do buffered write from a buffer which is mmapped file on the same filesystem) which violates expectations for HSM events and could lead to deadlocks of HSM clients with filesystem freezing. Since it's quite late in the cycle we've decided to revert changes implementing HSM events on page fault for now and instead just generate one event for the whole range on mmap(2) so that HSM client can fetch the data at that moment" * tag 'fsnotify_for_v6.14-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: Revert "fanotify: disable readahead if we have pre-content watches" Revert "mm: don't allow huge faults for files with pre content watches" Revert "fsnotify: generate pre-content permission event on page fault" Revert "xfs: add pre-content fsnotify hook for DAX faults" Revert "ext4: add pre-content fsnotify hook for DAX faults" fsnotify: add pre-content hooks on mmap()
2 parents 3571e8b + 252256e commit eb88e6b

File tree

9 files changed

+24
-143
lines changed

9 files changed

+24
-143
lines changed

fs/ext4/file.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -756,9 +756,6 @@ static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf, unsigned int order)
756756
return VM_FAULT_SIGBUS;
757757
}
758758
} else {
759-
result = filemap_fsnotify_fault(vmf);
760-
if (unlikely(result))
761-
return result;
762759
filemap_invalidate_lock_shared(mapping);
763760
}
764761
result = dax_iomap_fault(vmf, order, &pfn, &error, &ext4_iomap_ops);

fs/xfs/xfs_file.c

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1451,9 +1451,6 @@ xfs_dax_read_fault(
14511451

14521452
trace_xfs_read_fault(ip, order);
14531453

1454-
ret = filemap_fsnotify_fault(vmf);
1455-
if (unlikely(ret))
1456-
return ret;
14571454
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
14581455
ret = xfs_dax_fault_locked(vmf, order, false);
14591456
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
@@ -1482,16 +1479,6 @@ xfs_write_fault(
14821479
vm_fault_t ret;
14831480

14841481
trace_xfs_write_fault(ip, order);
1485-
/*
1486-
* Usually we get here from ->page_mkwrite callback but in case of DAX
1487-
* we will get here also for ordinary write fault. Handle HSM
1488-
* notifications for that case.
1489-
*/
1490-
if (IS_DAX(inode)) {
1491-
ret = filemap_fsnotify_fault(vmf);
1492-
if (unlikely(ret))
1493-
return ret;
1494-
}
14951482

14961483
sb_start_pagefault(inode->i_sb);
14971484
file_update_time(vmf->vma->vm_file);

include/linux/fsnotify.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,21 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
170170
return fsnotify_path(&file->f_path, FS_ACCESS_PERM);
171171
}
172172

173+
/*
174+
* fsnotify_mmap_perm - permission hook before mmap of file range
175+
*/
176+
static inline int fsnotify_mmap_perm(struct file *file, int prot,
177+
const loff_t off, size_t len)
178+
{
179+
/*
180+
* mmap() generates only pre-content events.
181+
*/
182+
if (!file || likely(!FMODE_FSNOTIFY_HSM(file->f_mode)))
183+
return 0;
184+
185+
return fsnotify_pre_content(&file->f_path, &off, len);
186+
}
187+
173188
/*
174189
* fsnotify_truncate_perm - permission hook before file truncate
175190
*/
@@ -223,6 +238,12 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
223238
return 0;
224239
}
225240

241+
static inline int fsnotify_mmap_perm(struct file *file, int prot,
242+
const loff_t off, size_t len)
243+
{
244+
return 0;
245+
}
246+
226247
static inline int fsnotify_truncate_perm(const struct path *path, loff_t length)
227248
{
228249
return 0;

include/linux/mm.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3420,7 +3420,6 @@ extern vm_fault_t filemap_fault(struct vm_fault *vmf);
34203420
extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
34213421
pgoff_t start_pgoff, pgoff_t end_pgoff);
34223422
extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
3423-
extern vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf);
34243423

34253424
extern unsigned long stack_guard_gap;
34263425
/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */

mm/filemap.c

Lines changed: 0 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@
4747
#include <linux/splice.h>
4848
#include <linux/rcupdate_wait.h>
4949
#include <linux/sched/mm.h>
50-
#include <linux/fsnotify.h>
5150
#include <asm/pgalloc.h>
5251
#include <asm/tlbflush.h>
5352
#include "internal.h"
@@ -3198,14 +3197,6 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
31983197
unsigned long vm_flags = vmf->vma->vm_flags;
31993198
unsigned int mmap_miss;
32003199

3201-
/*
3202-
* If we have pre-content watches we need to disable readahead to make
3203-
* sure that we don't populate our mapping with 0 filled pages that we
3204-
* never emitted an event for.
3205-
*/
3206-
if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode)))
3207-
return fpin;
3208-
32093200
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
32103201
/* Use the readahead code, even if readahead is disabled */
32113202
if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) {
@@ -3274,10 +3265,6 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
32743265
struct file *fpin = NULL;
32753266
unsigned int mmap_miss;
32763267

3277-
/* See comment in do_sync_mmap_readahead. */
3278-
if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode)))
3279-
return fpin;
3280-
32813268
/* If we don't want any read-ahead, don't bother */
32823269
if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)
32833270
return fpin;
@@ -3336,48 +3323,6 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf)
33363323
return ret;
33373324
}
33383325

3339-
/**
3340-
* filemap_fsnotify_fault - maybe emit a pre-content event.
3341-
* @vmf: struct vm_fault containing details of the fault.
3342-
*
3343-
* If we have a pre-content watch on this file we will emit an event for this
3344-
* range. If we return anything the fault caller should return immediately, we
3345-
* will return VM_FAULT_RETRY if we had to emit an event, which will trigger the
3346-
* fault again and then the fault handler will run the second time through.
3347-
*
3348-
* Return: a bitwise-OR of %VM_FAULT_ codes, 0 if nothing happened.
3349-
*/
3350-
vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf)
3351-
{
3352-
struct file *fpin = NULL;
3353-
int mask = (vmf->flags & FAULT_FLAG_WRITE) ? MAY_WRITE : MAY_ACCESS;
3354-
loff_t pos = vmf->pgoff >> PAGE_SHIFT;
3355-
size_t count = PAGE_SIZE;
3356-
int err;
3357-
3358-
/*
3359-
* We already did this and now we're retrying with everything locked,
3360-
* don't emit the event and continue.
3361-
*/
3362-
if (vmf->flags & FAULT_FLAG_TRIED)
3363-
return 0;
3364-
3365-
/* No watches, we're done. */
3366-
if (likely(!FMODE_FSNOTIFY_HSM(vmf->vma->vm_file->f_mode)))
3367-
return 0;
3368-
3369-
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
3370-
if (!fpin)
3371-
return VM_FAULT_SIGBUS;
3372-
3373-
err = fsnotify_file_area_perm(fpin, mask, &pos, count);
3374-
fput(fpin);
3375-
if (err)
3376-
return VM_FAULT_SIGBUS;
3377-
return VM_FAULT_RETRY;
3378-
}
3379-
EXPORT_SYMBOL_GPL(filemap_fsnotify_fault);
3380-
33813326
/**
33823327
* filemap_fault - read in file data for page fault handling
33833328
* @vmf: struct vm_fault containing details of the fault
@@ -3481,37 +3426,6 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
34813426
* or because readahead was otherwise unable to retrieve it.
34823427
*/
34833428
if (unlikely(!folio_test_uptodate(folio))) {
3484-
/*
3485-
* If this is a precontent file we have can now emit an event to
3486-
* try and populate the folio.
3487-
*/
3488-
if (!(vmf->flags & FAULT_FLAG_TRIED) &&
3489-
unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) {
3490-
loff_t pos = folio_pos(folio);
3491-
size_t count = folio_size(folio);
3492-
3493-
/* We're NOWAIT, we have to retry. */
3494-
if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) {
3495-
folio_unlock(folio);
3496-
goto out_retry;
3497-
}
3498-
3499-
if (mapping_locked)
3500-
filemap_invalidate_unlock_shared(mapping);
3501-
mapping_locked = false;
3502-
3503-
folio_unlock(folio);
3504-
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
3505-
if (!fpin)
3506-
goto out_retry;
3507-
3508-
error = fsnotify_file_area_perm(fpin, MAY_ACCESS, &pos,
3509-
count);
3510-
if (error)
3511-
ret = VM_FAULT_SIGBUS;
3512-
goto out_retry;
3513-
}
3514-
35153429
/*
35163430
* If the invalidate lock is not held, the folio was in cache
35173431
* and uptodate and now it is not. Strange but possible since we

mm/memory.c

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@
7676
#include <linux/ptrace.h>
7777
#include <linux/vmalloc.h>
7878
#include <linux/sched/sysctl.h>
79-
#include <linux/fsnotify.h>
8079

8180
#include <trace/events/kmem.h>
8281

@@ -5750,17 +5749,8 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
57505749
static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
57515750
{
57525751
struct vm_area_struct *vma = vmf->vma;
5753-
57545752
if (vma_is_anonymous(vma))
57555753
return do_huge_pmd_anonymous_page(vmf);
5756-
/*
5757-
* Currently we just emit PAGE_SIZE for our fault events, so don't allow
5758-
* a huge fault if we have a pre content watch on this file. This would
5759-
* be trivial to support, but there would need to be tests to ensure
5760-
* this works properly and those don't exist currently.
5761-
*/
5762-
if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
5763-
return VM_FAULT_FALLBACK;
57645754
if (vma->vm_ops->huge_fault)
57655755
return vma->vm_ops->huge_fault(vmf, PMD_ORDER);
57665756
return VM_FAULT_FALLBACK;
@@ -5784,9 +5774,6 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
57845774
}
57855775

57865776
if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
5787-
/* See comment in create_huge_pmd. */
5788-
if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
5789-
goto split;
57905777
if (vma->vm_ops->huge_fault) {
57915778
ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER);
57925779
if (!(ret & VM_FAULT_FALLBACK))
@@ -5809,9 +5796,6 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf)
58095796
/* No support for anonymous transparent PUD pages yet */
58105797
if (vma_is_anonymous(vma))
58115798
return VM_FAULT_FALLBACK;
5812-
/* See comment in create_huge_pmd. */
5813-
if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
5814-
return VM_FAULT_FALLBACK;
58155799
if (vma->vm_ops->huge_fault)
58165800
return vma->vm_ops->huge_fault(vmf, PUD_ORDER);
58175801
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -5829,9 +5813,6 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
58295813
if (vma_is_anonymous(vma))
58305814
goto split;
58315815
if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
5832-
/* See comment in create_huge_pmd. */
5833-
if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
5834-
goto split;
58355816
if (vma->vm_ops->huge_fault) {
58365817
ret = vma->vm_ops->huge_fault(vmf, PUD_ORDER);
58375818
if (!(ret & VM_FAULT_FALLBACK))

mm/nommu.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1613,13 +1613,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
16131613
}
16141614
EXPORT_SYMBOL(remap_vmalloc_range);
16151615

1616-
vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf)
1617-
{
1618-
BUG();
1619-
return 0;
1620-
}
1621-
EXPORT_SYMBOL_GPL(filemap_fsnotify_fault);
1622-
16231616
vm_fault_t filemap_fault(struct vm_fault *vmf)
16241617
{
16251618
BUG();

mm/readahead.c

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@
128128
#include <linux/blk-cgroup.h>
129129
#include <linux/fadvise.h>
130130
#include <linux/sched/mm.h>
131-
#include <linux/fsnotify.h>
132131

133132
#include "internal.h"
134133

@@ -558,15 +557,6 @@ void page_cache_sync_ra(struct readahead_control *ractl,
558557
unsigned long max_pages, contig_count;
559558
pgoff_t prev_index, miss;
560559

561-
/*
562-
* If we have pre-content watches we need to disable readahead to make
563-
* sure that we don't find 0 filled pages in cache that we never emitted
564-
* events for. Filesystems supporting HSM must make sure to not call
565-
* this function with ractl->file unset for files handled by HSM.
566-
*/
567-
if (ractl->file && unlikely(FMODE_FSNOTIFY_HSM(ractl->file->f_mode)))
568-
return;
569-
570560
/*
571561
* Even if readahead is disabled, issue this request as readahead
572562
* as we'll need it to satisfy the requested range. The forced
@@ -645,10 +635,6 @@ void page_cache_async_ra(struct readahead_control *ractl,
645635
if (!ra->ra_pages)
646636
return;
647637

648-
/* See the comment in page_cache_sync_ra. */
649-
if (ractl->file && unlikely(FMODE_FSNOTIFY_HSM(ractl->file->f_mode)))
650-
return;
651-
652638
/*
653639
* Same bit is used for PG_readahead and PG_reclaim.
654640
*/

mm/util.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <linux/processor.h>
2424
#include <linux/sizes.h>
2525
#include <linux/compat.h>
26+
#include <linux/fsnotify.h>
2627

2728
#include <linux/uaccess.h>
2829

@@ -569,6 +570,8 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
569570
LIST_HEAD(uf);
570571

571572
ret = security_mmap_file(file, prot, flag);
573+
if (!ret)
574+
ret = fsnotify_mmap_perm(file, prot, pgoff >> PAGE_SHIFT, len);
572575
if (!ret) {
573576
if (mmap_write_lock_killable(mm))
574577
return -EINTR;

0 commit comments

Comments
 (0)