Skip to content

Commit 0797833

Browse files
committed
Merge tag 'for-6.10-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: - fix handling of folio private changes. The private value holds pointer to our extent buffer structure representing a metadata range. Release and create of the range was not properly synchronized when updating the private bit which ended up in double folio_put, leading to all sorts of breakage - fix a crash, reported as duplicate key in metadata, but caused by a race of fsync and size extending write. Requires prealloc target range + fsync and other conditions (log tree state, timing) - fix leak of qgroup extent records after transaction abort * tag 'for-6.10-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: protect folio::private when attaching extent buffer folios btrfs: fix leak of qgroup extent records after transaction abort btrfs: fix crash on racing fsync and size-extending write into prealloc
2 parents eecba7c + f3a5367 commit 0797833

File tree

3 files changed

+43
-44
lines changed

3 files changed

+43
-44
lines changed

fs/btrfs/disk-io.c

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4538,18 +4538,10 @@ static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
45384538
struct btrfs_fs_info *fs_info)
45394539
{
45404540
struct rb_node *node;
4541-
struct btrfs_delayed_ref_root *delayed_refs;
4541+
struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs;
45424542
struct btrfs_delayed_ref_node *ref;
45434543

4544-
delayed_refs = &trans->delayed_refs;
4545-
45464544
spin_lock(&delayed_refs->lock);
4547-
if (atomic_read(&delayed_refs->num_entries) == 0) {
4548-
spin_unlock(&delayed_refs->lock);
4549-
btrfs_debug(fs_info, "delayed_refs has NO entry");
4550-
return;
4551-
}
4552-
45534545
while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) {
45544546
struct btrfs_delayed_ref_head *head;
45554547
struct rb_node *n;

fs/btrfs/extent_io.c

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3689,6 +3689,8 @@ static struct extent_buffer *grab_extent_buffer(
36893689
struct folio *folio = page_folio(page);
36903690
struct extent_buffer *exists;
36913691

3692+
lockdep_assert_held(&page->mapping->i_private_lock);
3693+
36923694
/*
36933695
* For subpage case, we completely rely on radix tree to ensure we
36943696
* don't try to insert two ebs for the same bytenr. So here we always
@@ -3756,13 +3758,14 @@ static int check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
37563758
* The caller needs to free the existing folios and retry using the same order.
37573759
*/
37583760
static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
3761+
struct btrfs_subpage *prealloc,
37593762
struct extent_buffer **found_eb_ret)
37603763
{
37613764

37623765
struct btrfs_fs_info *fs_info = eb->fs_info;
37633766
struct address_space *mapping = fs_info->btree_inode->i_mapping;
37643767
const unsigned long index = eb->start >> PAGE_SHIFT;
3765-
struct folio *existing_folio;
3768+
struct folio *existing_folio = NULL;
37663769
int ret;
37673770

37683771
ASSERT(found_eb_ret);
@@ -3774,12 +3777,14 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
37743777
ret = filemap_add_folio(mapping, eb->folios[i], index + i,
37753778
GFP_NOFS | __GFP_NOFAIL);
37763779
if (!ret)
3777-
return 0;
3780+
goto finish;
37783781

37793782
existing_folio = filemap_lock_folio(mapping, index + i);
37803783
/* The page cache only exists for a very short time, just retry. */
3781-
if (IS_ERR(existing_folio))
3784+
if (IS_ERR(existing_folio)) {
3785+
existing_folio = NULL;
37823786
goto retry;
3787+
}
37833788

37843789
/* For now, we should only have single-page folios for btree inode. */
37853790
ASSERT(folio_nr_pages(existing_folio) == 1);
@@ -3790,21 +3795,21 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
37903795
return -EAGAIN;
37913796
}
37923797

3793-
if (fs_info->nodesize < PAGE_SIZE) {
3794-
/*
3795-
* We're going to reuse the existing page, can drop our page
3796-
* and subpage structure now.
3797-
*/
3798+
finish:
3799+
spin_lock(&mapping->i_private_lock);
3800+
if (existing_folio && fs_info->nodesize < PAGE_SIZE) {
3801+
/* We're going to reuse the existing page, can drop our folio now. */
37983802
__free_page(folio_page(eb->folios[i], 0));
37993803
eb->folios[i] = existing_folio;
3800-
} else {
3804+
} else if (existing_folio) {
38013805
struct extent_buffer *existing_eb;
38023806

38033807
existing_eb = grab_extent_buffer(fs_info,
38043808
folio_page(existing_folio, 0));
38053809
if (existing_eb) {
38063810
/* The extent buffer still exists, we can use it directly. */
38073811
*found_eb_ret = existing_eb;
3812+
spin_unlock(&mapping->i_private_lock);
38083813
folio_unlock(existing_folio);
38093814
folio_put(existing_folio);
38103815
return 1;
@@ -3813,6 +3818,22 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
38133818
__free_page(folio_page(eb->folios[i], 0));
38143819
eb->folios[i] = existing_folio;
38153820
}
3821+
eb->folio_size = folio_size(eb->folios[i]);
3822+
eb->folio_shift = folio_shift(eb->folios[i]);
3823+
/* Should not fail, as we have preallocated the memory. */
3824+
ret = attach_extent_buffer_folio(eb, eb->folios[i], prealloc);
3825+
ASSERT(!ret);
3826+
/*
3827+
* To inform we have an extra eb under allocation, so that
3828+
* detach_extent_buffer_page() won't release the folio private when the
3829+
* eb hasn't been inserted into radix tree yet.
3830+
*
3831+
* The ref will be decreased when the eb releases the page, in
3832+
* detach_extent_buffer_page(). Thus needs no special handling in the
3833+
* error path.
3834+
*/
3835+
btrfs_folio_inc_eb_refs(fs_info, eb->folios[i]);
3836+
spin_unlock(&mapping->i_private_lock);
38163837
return 0;
38173838
}
38183839

@@ -3824,7 +3845,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
38243845
int attached = 0;
38253846
struct extent_buffer *eb;
38263847
struct extent_buffer *existing_eb = NULL;
3827-
struct address_space *mapping = fs_info->btree_inode->i_mapping;
38283848
struct btrfs_subpage *prealloc = NULL;
38293849
u64 lockdep_owner = owner_root;
38303850
bool page_contig = true;
@@ -3890,7 +3910,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
38903910
for (int i = 0; i < num_folios; i++) {
38913911
struct folio *folio;
38923912

3893-
ret = attach_eb_folio_to_filemap(eb, i, &existing_eb);
3913+
ret = attach_eb_folio_to_filemap(eb, i, prealloc, &existing_eb);
38943914
if (ret > 0) {
38953915
ASSERT(existing_eb);
38963916
goto out;
@@ -3927,24 +3947,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
39273947
* and free the allocated page.
39283948
*/
39293949
folio = eb->folios[i];
3930-
eb->folio_size = folio_size(folio);
3931-
eb->folio_shift = folio_shift(folio);
3932-
spin_lock(&mapping->i_private_lock);
3933-
/* Should not fail, as we have preallocated the memory */
3934-
ret = attach_extent_buffer_folio(eb, folio, prealloc);
3935-
ASSERT(!ret);
3936-
/*
3937-
* To inform we have extra eb under allocation, so that
3938-
* detach_extent_buffer_page() won't release the folio private
3939-
* when the eb hasn't yet been inserted into radix tree.
3940-
*
3941-
* The ref will be decreased when the eb released the page, in
3942-
* detach_extent_buffer_page().
3943-
* Thus needs no special handling in error path.
3944-
*/
3945-
btrfs_folio_inc_eb_refs(fs_info, folio);
3946-
spin_unlock(&mapping->i_private_lock);
3947-
39483950
WARN_ON(btrfs_folio_test_dirty(fs_info, folio, eb->start, eb->len));
39493951

39503952
/*

fs/btrfs/tree-log.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4860,18 +4860,23 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
48604860
path->slots[0]++;
48614861
continue;
48624862
}
4863-
if (!dropped_extents) {
4864-
/*
4865-
* Avoid logging extent items logged in past fsync calls
4866-
* and leading to duplicate keys in the log tree.
4867-
*/
4863+
/*
4864+
* Avoid overlapping items in the log tree. The first time we
4865+
* get here, get rid of everything from a past fsync. After
4866+
* that, if the current extent starts before the end of the last
4867+
* extent we copied, truncate the last one. This can happen if
4868+
* an ordered extent completion modifies the subvolume tree
4869+
* while btrfs_next_leaf() has the tree unlocked.
4870+
*/
4871+
if (!dropped_extents || key.offset < truncate_offset) {
48684872
ret = truncate_inode_items(trans, root->log_root, inode,
4869-
truncate_offset,
4873+
min(key.offset, truncate_offset),
48704874
BTRFS_EXTENT_DATA_KEY);
48714875
if (ret)
48724876
goto out;
48734877
dropped_extents = true;
48744878
}
4879+
truncate_offset = btrfs_file_extent_end(path);
48754880
if (ins_nr == 0)
48764881
start_slot = slot;
48774882
ins_nr++;

0 commit comments

Comments
 (0)