Skip to content

Commit 68d3252

Browse files
mjkravetzakpm00
authored andcommitted
hugetlbfs: zero partial pages during fallocate hole punch
hugetlbfs fallocate support was originally added with commit 70c3547 ("hugetlbfs: add hugetlbfs_fallocate()"). Initial support only operated on whole hugetlb pages. This makes sense for populating files as other interfaces such as mmap and truncate require hugetlb page size alignment. Only operating on whole hugetlb pages for the hole punch case was a simplification and there was no compelling use case to zero partial pages. In a recent discussion[1] it was assumed that hugetlbfs hole punch would zero partial hugetlb pages as that is in line with the man page description saying 'partial filesystem blocks are zeroed'. However, the hugetlbfs hole punch code actually does this: hole_start = round_up(offset, hpage_size); hole_end = round_down(offset + len, hpage_size); Modify code to zero partial hugetlb pages in hole punch range. It is possible that application code could note a change in behavior. However, that would imply the code is passing in an unaligned range and expecting only whole pages be removed. This is unlikely as the fallocate documentation states the opposite. The current hugetlbfs fallocate hole punch behavior is tested with the libhugetlbfs test fallocate_align[2]. This test will be updated to validate partial page zeroing. [1] https://lore.kernel.org/linux-mm/20571829-9d3d-0b48-817c-b6b15565f651@redhat.com/ [2] https://github.com/libhugetlbfs/libhugetlbfs/blob/master/tests/fallocate_align.c Link: https://lkml.kernel.org/r/YqeiMlZDKI1Kabfe@monkey Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Reviewed-by: Muchun Song <songmuchun@bytedance.com> Cc: David Hildenbrand <david@redhat.com> Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Matthew Wilcox <willy@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent df4ae28 commit 68d3252

File tree

1 file changed

+53
-15
lines changed

1 file changed

+53
-15
lines changed

fs/hugetlbfs/inode.c

Lines changed: 53 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -600,41 +600,79 @@ static void hugetlb_vmtruncate(struct inode *inode, loff_t offset)
600600
remove_inode_hugepages(inode, offset, LLONG_MAX);
601601
}
602602

603+
static void hugetlbfs_zero_partial_page(struct hstate *h,
604+
struct address_space *mapping,
605+
loff_t start,
606+
loff_t end)
607+
{
608+
pgoff_t idx = start >> huge_page_shift(h);
609+
struct folio *folio;
610+
611+
folio = filemap_lock_folio(mapping, idx);
612+
if (!folio)
613+
return;
614+
615+
start = start & ~huge_page_mask(h);
616+
end = end & ~huge_page_mask(h);
617+
if (!end)
618+
end = huge_page_size(h);
619+
620+
folio_zero_segment(folio, (size_t)start, (size_t)end);
621+
622+
folio_unlock(folio);
623+
folio_put(folio);
624+
}
625+
603626
static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
604627
{
628+
struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
629+
struct address_space *mapping = inode->i_mapping;
605630
struct hstate *h = hstate_inode(inode);
606631
loff_t hpage_size = huge_page_size(h);
607632
loff_t hole_start, hole_end;
608633

609634
/*
610-
* For hole punch round up the beginning offset of the hole and
611-
* round down the end.
635+
* hole_start and hole_end indicate the full pages within the hole.
612636
*/
613637
hole_start = round_up(offset, hpage_size);
614638
hole_end = round_down(offset + len, hpage_size);
615639

616-
if (hole_end > hole_start) {
617-
struct address_space *mapping = inode->i_mapping;
618-
struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
640+
inode_lock(inode);
619641

620-
inode_lock(inode);
642+
/* protected by i_rwsem */
643+
if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
644+
inode_unlock(inode);
645+
return -EPERM;
646+
}
621647

622-
/* protected by i_rwsem */
623-
if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
624-
inode_unlock(inode);
625-
return -EPERM;
626-
}
648+
i_mmap_lock_write(mapping);
649+
650+
/* If range starts before first full page, zero partial page. */
651+
if (offset < hole_start)
652+
hugetlbfs_zero_partial_page(h, mapping,
653+
offset, min(offset + len, hole_start));
627654

628-
i_mmap_lock_write(mapping);
655+
/* Unmap users of full pages in the hole. */
656+
if (hole_end > hole_start) {
629657
if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
630658
hugetlb_vmdelete_list(&mapping->i_mmap,
631659
hole_start >> PAGE_SHIFT,
632660
hole_end >> PAGE_SHIFT, 0);
633-
i_mmap_unlock_write(mapping);
634-
remove_inode_hugepages(inode, hole_start, hole_end);
635-
inode_unlock(inode);
636661
}
637662

663+
/* If range extends beyond last full page, zero partial page. */
664+
if ((offset + len) > hole_end && (offset + len) > hole_start)
665+
hugetlbfs_zero_partial_page(h, mapping,
666+
hole_end, offset + len);
667+
668+
i_mmap_unlock_write(mapping);
669+
670+
/* Remove full pages from the file. */
671+
if (hole_end > hole_start)
672+
remove_inode_hugepages(inode, hole_start, hole_end);
673+
674+
inode_unlock(inode);
675+
638676
return 0;
639677
}
640678

0 commit comments

Comments
 (0)