Skip to content

Commit c059361

Browse files
committed
Merge tag 'for-6.13-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: "A few more fixes that accumulated over the last two weeks, fixing some user reported problems: - swapfile fixes: - conditional reschedule in the activation loop - fix race with memory mapped file when activating - make activation loop interruptible - rework and fix extent sharing checks - folio fixes: - in send, recheck folio mapping after unlock - in relocation, recheck folio mapping after unlock - fix waiting for encoded read io_uring requests - fix transaction atomicity when enabling simple quotas - move COW block trace point before the block gets freed - print various sizes in sysfs with correct endianity" * tag 'for-6.13-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: sysfs: fix direct super block member reads btrfs: fix transaction atomicity bug when enabling simple quotas btrfs: avoid monopolizing a core when activating a swap file btrfs: allow swap activation to be interruptible btrfs: fix swap file activation failure due to extents that used to be shared btrfs: fix race with memory mapped writes when activating swap file btrfs: check folio mapping after unlock in put_file_data() btrfs: check folio mapping after unlock in relocate_one_folio() btrfs: fix use-after-free when COWing tree bock and tracing is enabled btrfs: fix use-after-free waiting for encoded read endios
2 parents e1d9326 + fca432e commit c059361

File tree

6 files changed

+130
-56
lines changed

6 files changed

+130
-56
lines changed

fs/btrfs/ctree.c

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,8 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
654654
goto error_unlock_cow;
655655
}
656656
}
657+
658+
trace_btrfs_cow_block(root, buf, cow);
657659
if (unlock_orig)
658660
btrfs_tree_unlock(buf);
659661
free_extent_buffer_stale(buf);
@@ -710,7 +712,6 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
710712
{
711713
struct btrfs_fs_info *fs_info = root->fs_info;
712714
u64 search_start;
713-
int ret;
714715

715716
if (unlikely(test_bit(BTRFS_ROOT_DELETING, &root->state))) {
716717
btrfs_abort_transaction(trans, -EUCLEAN);
@@ -751,12 +752,8 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
751752
* Also We don't care about the error, as it's handled internally.
752753
*/
753754
btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
754-
ret = btrfs_force_cow_block(trans, root, buf, parent, parent_slot,
755-
cow_ret, search_start, 0, nest);
756-
757-
trace_btrfs_cow_block(root, buf, *cow_ret);
758-
759-
return ret;
755+
return btrfs_force_cow_block(trans, root, buf, parent, parent_slot,
756+
cow_ret, search_start, 0, nest);
760757
}
761758
ALLOW_ERROR_INJECTION(btrfs_cow_block, ERRNO);
762759

fs/btrfs/inode.c

Lines changed: 110 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -9078,9 +9078,9 @@ static ssize_t btrfs_encoded_read_inline(
90789078
}
90799079

90809080
struct btrfs_encoded_read_private {
9081-
wait_queue_head_t wait;
9081+
struct completion done;
90829082
void *uring_ctx;
9083-
atomic_t pending;
9083+
refcount_t pending_refs;
90849084
blk_status_t status;
90859085
};
90869086

@@ -9099,14 +9099,14 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
90999099
*/
91009100
WRITE_ONCE(priv->status, bbio->bio.bi_status);
91019101
}
9102-
if (atomic_dec_and_test(&priv->pending)) {
9102+
if (refcount_dec_and_test(&priv->pending_refs)) {
91039103
int err = blk_status_to_errno(READ_ONCE(priv->status));
91049104

91059105
if (priv->uring_ctx) {
91069106
btrfs_uring_read_extent_endio(priv->uring_ctx, err);
91079107
kfree(priv);
91089108
} else {
9109-
wake_up(&priv->wait);
9109+
complete(&priv->done);
91109110
}
91119111
}
91129112
bio_put(&bbio->bio);
@@ -9126,8 +9126,8 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
91269126
if (!priv)
91279127
return -ENOMEM;
91289128

9129-
init_waitqueue_head(&priv->wait);
9130-
atomic_set(&priv->pending, 1);
9129+
init_completion(&priv->done);
9130+
refcount_set(&priv->pending_refs, 1);
91319131
priv->status = 0;
91329132
priv->uring_ctx = uring_ctx;
91339133

@@ -9140,7 +9140,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
91409140
size_t bytes = min_t(u64, disk_io_size, PAGE_SIZE);
91419141

91429142
if (bio_add_page(&bbio->bio, pages[i], bytes, 0) < bytes) {
9143-
atomic_inc(&priv->pending);
9143+
refcount_inc(&priv->pending_refs);
91449144
btrfs_submit_bbio(bbio, 0);
91459145

91469146
bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
@@ -9155,11 +9155,11 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
91559155
disk_io_size -= bytes;
91569156
} while (disk_io_size);
91579157

9158-
atomic_inc(&priv->pending);
9158+
refcount_inc(&priv->pending_refs);
91599159
btrfs_submit_bbio(bbio, 0);
91609160

91619161
if (uring_ctx) {
9162-
if (atomic_dec_return(&priv->pending) == 0) {
9162+
if (refcount_dec_and_test(&priv->pending_refs)) {
91639163
ret = blk_status_to_errno(READ_ONCE(priv->status));
91649164
btrfs_uring_read_extent_endio(uring_ctx, ret);
91659165
kfree(priv);
@@ -9168,8 +9168,8 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
91689168

91699169
return -EIOCBQUEUED;
91709170
} else {
9171-
if (atomic_dec_return(&priv->pending) != 0)
9172-
io_wait_event(priv->wait, !atomic_read(&priv->pending));
9171+
if (!refcount_dec_and_test(&priv->pending_refs))
9172+
wait_for_completion_io(&priv->done);
91739173
/* See btrfs_encoded_read_endio() for ordering. */
91749174
ret = blk_status_to_errno(READ_ONCE(priv->status));
91759175
kfree(priv);
@@ -9799,15 +9799,25 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
97999799
struct btrfs_fs_info *fs_info = root->fs_info;
98009800
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
98019801
struct extent_state *cached_state = NULL;
9802-
struct extent_map *em = NULL;
98039802
struct btrfs_chunk_map *map = NULL;
98049803
struct btrfs_device *device = NULL;
98059804
struct btrfs_swap_info bsi = {
98069805
.lowest_ppage = (sector_t)-1ULL,
98079806
};
9807+
struct btrfs_backref_share_check_ctx *backref_ctx = NULL;
9808+
struct btrfs_path *path = NULL;
98089809
int ret = 0;
98099810
u64 isize;
9810-
u64 start;
9811+
u64 prev_extent_end = 0;
9812+
9813+
/*
9814+
* Acquire the inode's mmap lock to prevent races with memory mapped
9815+
* writes, as they could happen after we flush delalloc below and before
9816+
* we lock the extent range further below. The inode was already locked
9817+
* up in the call chain.
9818+
*/
9819+
btrfs_assert_inode_locked(BTRFS_I(inode));
9820+
down_write(&BTRFS_I(inode)->i_mmap_lock);
98119821

98129822
/*
98139823
* If the swap file was just created, make sure delalloc is done. If the
@@ -9816,22 +9826,32 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
98169826
*/
98179827
ret = btrfs_wait_ordered_range(BTRFS_I(inode), 0, (u64)-1);
98189828
if (ret)
9819-
return ret;
9829+
goto out_unlock_mmap;
98209830

98219831
/*
98229832
* The inode is locked, so these flags won't change after we check them.
98239833
*/
98249834
if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
98259835
btrfs_warn(fs_info, "swapfile must not be compressed");
9826-
return -EINVAL;
9836+
ret = -EINVAL;
9837+
goto out_unlock_mmap;
98279838
}
98289839
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
98299840
btrfs_warn(fs_info, "swapfile must not be copy-on-write");
9830-
return -EINVAL;
9841+
ret = -EINVAL;
9842+
goto out_unlock_mmap;
98319843
}
98329844
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
98339845
btrfs_warn(fs_info, "swapfile must not be checksummed");
9834-
return -EINVAL;
9846+
ret = -EINVAL;
9847+
goto out_unlock_mmap;
9848+
}
9849+
9850+
path = btrfs_alloc_path();
9851+
backref_ctx = btrfs_alloc_backref_share_check_ctx();
9852+
if (!path || !backref_ctx) {
9853+
ret = -ENOMEM;
9854+
goto out_unlock_mmap;
98359855
}
98369856

98379857
/*
@@ -9846,7 +9866,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
98469866
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_SWAP_ACTIVATE)) {
98479867
btrfs_warn(fs_info,
98489868
"cannot activate swapfile while exclusive operation is running");
9849-
return -EBUSY;
9869+
ret = -EBUSY;
9870+
goto out_unlock_mmap;
98509871
}
98519872

98529873
/*
@@ -9860,7 +9881,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
98609881
btrfs_exclop_finish(fs_info);
98619882
btrfs_warn(fs_info,
98629883
"cannot activate swapfile because snapshot creation is in progress");
9863-
return -EINVAL;
9884+
ret = -EINVAL;
9885+
goto out_unlock_mmap;
98649886
}
98659887
/*
98669888
* Snapshots can create extents which require COW even if NODATACOW is
@@ -9881,32 +9903,48 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
98819903
btrfs_warn(fs_info,
98829904
"cannot activate swapfile because subvolume %llu is being deleted",
98839905
btrfs_root_id(root));
9884-
return -EPERM;
9906+
ret = -EPERM;
9907+
goto out_unlock_mmap;
98859908
}
98869909
atomic_inc(&root->nr_swapfiles);
98879910
spin_unlock(&root->root_item_lock);
98889911

98899912
isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
98909913

98919914
lock_extent(io_tree, 0, isize - 1, &cached_state);
9892-
start = 0;
9893-
while (start < isize) {
9894-
u64 logical_block_start, physical_block_start;
9915+
while (prev_extent_end < isize) {
9916+
struct btrfs_key key;
9917+
struct extent_buffer *leaf;
9918+
struct btrfs_file_extent_item *ei;
98959919
struct btrfs_block_group *bg;
9896-
u64 len = isize - start;
9920+
u64 logical_block_start;
9921+
u64 physical_block_start;
9922+
u64 extent_gen;
9923+
u64 disk_bytenr;
9924+
u64 len;
98979925

9898-
em = btrfs_get_extent(BTRFS_I(inode), NULL, start, len);
9899-
if (IS_ERR(em)) {
9900-
ret = PTR_ERR(em);
9926+
key.objectid = btrfs_ino(BTRFS_I(inode));
9927+
key.type = BTRFS_EXTENT_DATA_KEY;
9928+
key.offset = prev_extent_end;
9929+
9930+
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9931+
if (ret < 0)
99019932
goto out;
9902-
}
99039933

9904-
if (em->disk_bytenr == EXTENT_MAP_HOLE) {
9934+
/*
9935+
* If key not found it means we have an implicit hole (NO_HOLES
9936+
* is enabled).
9937+
*/
9938+
if (ret > 0) {
99059939
btrfs_warn(fs_info, "swapfile must not have holes");
99069940
ret = -EINVAL;
99079941
goto out;
99089942
}
9909-
if (em->disk_bytenr == EXTENT_MAP_INLINE) {
9943+
9944+
leaf = path->nodes[0];
9945+
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
9946+
9947+
if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_INLINE) {
99109948
/*
99119949
* It's unlikely we'll ever actually find ourselves
99129950
* here, as a file small enough to fit inline won't be
@@ -9918,23 +9956,45 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
99189956
ret = -EINVAL;
99199957
goto out;
99209958
}
9921-
if (extent_map_is_compressed(em)) {
9959+
9960+
if (btrfs_file_extent_compression(leaf, ei) != BTRFS_COMPRESS_NONE) {
99229961
btrfs_warn(fs_info, "swapfile must not be compressed");
99239962
ret = -EINVAL;
99249963
goto out;
99259964
}
99269965

9927-
logical_block_start = extent_map_block_start(em) + (start - em->start);
9928-
len = min(len, em->len - (start - em->start));
9929-
free_extent_map(em);
9930-
em = NULL;
9966+
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei);
9967+
if (disk_bytenr == 0) {
9968+
btrfs_warn(fs_info, "swapfile must not have holes");
9969+
ret = -EINVAL;
9970+
goto out;
9971+
}
9972+
9973+
logical_block_start = disk_bytenr + btrfs_file_extent_offset(leaf, ei);
9974+
extent_gen = btrfs_file_extent_generation(leaf, ei);
9975+
prev_extent_end = btrfs_file_extent_end(path);
99319976

9932-
ret = can_nocow_extent(inode, start, &len, NULL, false, true);
9977+
if (prev_extent_end > isize)
9978+
len = isize - key.offset;
9979+
else
9980+
len = btrfs_file_extent_num_bytes(leaf, ei);
9981+
9982+
backref_ctx->curr_leaf_bytenr = leaf->start;
9983+
9984+
/*
9985+
* Don't need the path anymore, release to avoid deadlocks when
9986+
* calling btrfs_is_data_extent_shared() because when joining a
9987+
* transaction it can block waiting for the current one's commit
9988+
* which in turn may be trying to lock the same leaf to flush
9989+
* delayed items for example.
9990+
*/
9991+
btrfs_release_path(path);
9992+
9993+
ret = btrfs_is_data_extent_shared(BTRFS_I(inode), disk_bytenr,
9994+
extent_gen, backref_ctx);
99339995
if (ret < 0) {
99349996
goto out;
9935-
} else if (ret) {
9936-
ret = 0;
9937-
} else {
9997+
} else if (ret > 0) {
99389998
btrfs_warn(fs_info,
99399999
"swapfile must not be copy-on-write");
994010000
ret = -EINVAL;
@@ -9969,7 +10029,6 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
996910029

997010030
physical_block_start = (map->stripes[0].physical +
997110031
(logical_block_start - map->start));
9972-
len = min(len, map->chunk_len - (logical_block_start - map->start));
997310032
btrfs_free_chunk_map(map);
997410033
map = NULL;
997510034

@@ -10010,20 +10069,23 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
1001010069
if (ret)
1001110070
goto out;
1001210071
}
10013-
bsi.start = start;
10072+
bsi.start = key.offset;
1001410073
bsi.block_start = physical_block_start;
1001510074
bsi.block_len = len;
1001610075
}
1001710076

10018-
start += len;
10077+
if (fatal_signal_pending(current)) {
10078+
ret = -EINTR;
10079+
goto out;
10080+
}
10081+
10082+
cond_resched();
1001910083
}
1002010084

1002110085
if (bsi.block_len)
1002210086
ret = btrfs_add_swap_extent(sis, &bsi);
1002310087

1002410088
out:
10025-
if (!IS_ERR_OR_NULL(em))
10026-
free_extent_map(em);
1002710089
if (!IS_ERR_OR_NULL(map))
1002810090
btrfs_free_chunk_map(map);
1002910091

@@ -10036,6 +10098,10 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
1003610098

1003710099
btrfs_exclop_finish(fs_info);
1003810100

10101+
out_unlock_mmap:
10102+
up_write(&BTRFS_I(inode)->i_mmap_lock);
10103+
btrfs_free_backref_share_ctx(backref_ctx);
10104+
btrfs_free_path(path);
1003910105
if (ret)
1004010106
return ret;
1004110107

fs/btrfs/qgroup.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1121,6 +1121,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
11211121
fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON;
11221122
if (simple) {
11231123
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE;
1124+
btrfs_set_fs_incompat(fs_info, SIMPLE_QUOTA);
11241125
btrfs_set_qgroup_status_enable_gen(leaf, ptr, trans->transid);
11251126
} else {
11261127
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
@@ -1254,8 +1255,6 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
12541255
spin_lock(&fs_info->qgroup_lock);
12551256
fs_info->quota_root = quota_root;
12561257
set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
1257-
if (simple)
1258-
btrfs_set_fs_incompat(fs_info, SIMPLE_QUOTA);
12591258
spin_unlock(&fs_info->qgroup_lock);
12601259

12611260
/* Skip rescan for simple qgroups. */

fs/btrfs/relocation.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2902,6 +2902,7 @@ static int relocate_one_folio(struct reloc_control *rc,
29022902
const bool use_rst = btrfs_need_stripe_tree_update(fs_info, rc->block_group->flags);
29032903

29042904
ASSERT(index <= last_index);
2905+
again:
29052906
folio = filemap_lock_folio(inode->i_mapping, index);
29062907
if (IS_ERR(folio)) {
29072908

@@ -2937,6 +2938,11 @@ static int relocate_one_folio(struct reloc_control *rc,
29372938
ret = -EIO;
29382939
goto release_folio;
29392940
}
2941+
if (folio->mapping != inode->i_mapping) {
2942+
folio_unlock(folio);
2943+
folio_put(folio);
2944+
goto again;
2945+
}
29402946
}
29412947

29422948
/*

0 commit comments

Comments
 (0)