Skip to content

Commit 40d49a3

Browse files
Matthew Wilcox (Oracle)akpm00
authored andcommitted
mm: allow ->huge_fault() to be called without the mmap_lock held
Remove the checks for the VMA lock being held, allowing the page fault path to call into the filesystem instead of retrying with the mmap_lock held. This will improve scalability for DAX page faults. Also update the documentation to match (and fix some other changes that have happened recently). Link: https://lkml.kernel.org/r/20230818202335.2739663-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 051ddcf commit 40d49a3

File tree

3 files changed

+36
-33
lines changed

3 files changed

+36
-33
lines changed

Documentation/filesystems/locking.rst

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -628,26 +628,29 @@ vm_operations_struct
628628

629629
prototypes::
630630

631-
void (*open)(struct vm_area_struct*);
632-
void (*close)(struct vm_area_struct*);
633-
vm_fault_t (*fault)(struct vm_area_struct*, struct vm_fault *);
631+
void (*open)(struct vm_area_struct *);
632+
void (*close)(struct vm_area_struct *);
633+
vm_fault_t (*fault)(struct vm_fault *);
634+
vm_fault_t (*huge_fault)(struct vm_fault *, unsigned int order);
635+
vm_fault_t (*map_pages)(struct vm_fault *, pgoff_t start, pgoff_t end);
634636
vm_fault_t (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
635637
vm_fault_t (*pfn_mkwrite)(struct vm_area_struct *, struct vm_fault *);
636638
int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
637639

638640
locking rules:
639641

640-
============= ========= ===========================
642+
============= ========== ===========================
641643
ops mmap_lock PageLocked(page)
642-
============= ========= ===========================
643-
open: yes
644-
close: yes
645-
fault: yes can return with page locked
646-
map_pages: read
647-
page_mkwrite: yes can return with page locked
648-
pfn_mkwrite: yes
649-
access: yes
650-
============= ========= ===========================
644+
============= ========== ===========================
645+
open: write
646+
close: read/write
647+
fault: read can return with page locked
648+
huge_fault: maybe-read
649+
map_pages: maybe-read
650+
page_mkwrite: read can return with page locked
651+
pfn_mkwrite: read
652+
access: read
653+
============= ========== ===========================
651654

652655
->fault() is called when a previously not present pte is about to be faulted
653656
in. The filesystem must find and return the page associated with the passed in
@@ -657,6 +660,13 @@ then ensure the page is not already truncated (invalidate_lock will block
657660
subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
658661
locked. The VM will unlock the page.
659662

663+
->huge_fault() is called when there is no PUD or PMD entry present. This
664+
gives the filesystem the opportunity to install a PUD or PMD sized page.
665+
Filesystems can also use the ->fault method to return a PMD sized page,
666+
so implementing this function may not be necessary. In particular,
667+
filesystems should not call filemap_fault() from ->huge_fault().
668+
The mmap_lock may not be held when this method is called.
669+
660670
->map_pages() is called when VM asks to map easy accessible pages.
661671
Filesystem should find and map pages associated with offsets from "start_pgoff"
662672
till "end_pgoff". ->map_pages() is called with the RCU lock held and must

Documentation/filesystems/porting.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -943,3 +943,14 @@ file pointer instead of struct dentry pointer. d_tmpfile() is similarly
943943
changed to simplify callers. The passed file is in a non-open state and on
944944
success must be opened before returning (e.g. by calling
945945
finish_open_simple()).
946+
947+
---
948+
949+
**mandatory**
950+
951+
Calling convention for ->huge_fault has changed. It now takes a page
952+
order instead of an enum page_entry_size, and it may be called without the
953+
mmap_lock held. All in-tree users have been audited and do not seem to
954+
depend on the mmap_lock being held, but out of tree users should verify
955+
for themselves. If they do need it, they can return VM_FAULT_RETRY to
956+
be called with the mmap_lock held.

mm/memory.c

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4854,13 +4854,8 @@ static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
48544854
struct vm_area_struct *vma = vmf->vma;
48554855
if (vma_is_anonymous(vma))
48564856
return do_huge_pmd_anonymous_page(vmf);
4857-
if (vma->vm_ops->huge_fault) {
4858-
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
4859-
vma_end_read(vma);
4860-
return VM_FAULT_RETRY;
4861-
}
4857+
if (vma->vm_ops->huge_fault)
48624858
return vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
4863-
}
48644859
return VM_FAULT_FALLBACK;
48654860
}
48664861

@@ -4880,10 +4875,6 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
48804875

48814876
if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
48824877
if (vma->vm_ops->huge_fault) {
4883-
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
4884-
vma_end_read(vma);
4885-
return VM_FAULT_RETRY;
4886-
}
48874878
ret = vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
48884879
if (!(ret & VM_FAULT_FALLBACK))
48894880
return ret;
@@ -4904,13 +4895,8 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf)
49044895
/* No support for anonymous transparent PUD pages yet */
49054896
if (vma_is_anonymous(vma))
49064897
return VM_FAULT_FALLBACK;
4907-
if (vma->vm_ops->huge_fault) {
4908-
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
4909-
vma_end_read(vma);
4910-
return VM_FAULT_RETRY;
4911-
}
4898+
if (vma->vm_ops->huge_fault)
49124899
return vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
4913-
}
49144900
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
49154901
return VM_FAULT_FALLBACK;
49164902
}
@@ -4927,10 +4913,6 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
49274913
goto split;
49284914
if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
49294915
if (vma->vm_ops->huge_fault) {
4930-
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
4931-
vma_end_read(vma);
4932-
return VM_FAULT_RETRY;
4933-
}
49344916
ret = vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
49354917
if (!(ret & VM_FAULT_FALLBACK))
49364918
return ret;

0 commit comments

Comments
 (0)