Skip to content

Commit 86286e4

Browse files
committed
Merge tag 'for-5.17-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: "A few fixes and error handling improvements: - fix deadlock between quota disable and qgroup rescan worker - fix use-after-free after failure to create a snapshot - skip warning on unmount after log cleanup failure - don't start transaction for scrub if the fs is mounted read-only - tree checker verifies item sizes" * tag 'for-5.17-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: skip reserved bytes warning on unmount after log cleanup failure btrfs: fix use of uninitialized variable at rm device ioctl btrfs: fix use-after-free after failure to create a snapshot btrfs: tree-checker: check item_size for dev_item btrfs: tree-checker: check item_size for inode_item btrfs: fix deadlock between quota disable and qgroup rescan worker btrfs: don't start transaction for scrub if the fs is mounted read-only
2 parents b0bc0cb + 40cdc50 commit 86286e4

File tree

8 files changed

+128
-9
lines changed

8 files changed

+128
-9
lines changed

fs/btrfs/block-group.c

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,16 @@ void btrfs_put_block_group(struct btrfs_block_group *cache)
124124
{
125125
if (refcount_dec_and_test(&cache->refs)) {
126126
WARN_ON(cache->pinned > 0);
127-
WARN_ON(cache->reserved > 0);
127+
/*
128+
* If there was a failure to cleanup a log tree, very likely due
129+
* to an IO failure on a writeback attempt of one or more of its
130+
* extent buffers, we could not do proper (and cheap) unaccounting
131+
* of their reserved space, so don't warn on reserved > 0 in that
132+
* case.
133+
*/
134+
if (!(cache->flags & BTRFS_BLOCK_GROUP_METADATA) ||
135+
!BTRFS_FS_LOG_CLEANUP_ERROR(cache->fs_info))
136+
WARN_ON(cache->reserved > 0);
128137

129138
/*
130139
* A block_group shouldn't be on the discard_list anymore.
@@ -2544,6 +2553,19 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
25442553
int ret;
25452554
bool dirty_bg_running;
25462555

2556+
/*
2557+
* This can only happen when we are doing read-only scrub on read-only
2558+
* mount.
2559+
* In that case we should not start a new transaction on read-only fs.
2560+
* Thus here we skip all chunk allocations.
2561+
*/
2562+
if (sb_rdonly(fs_info->sb)) {
2563+
mutex_lock(&fs_info->ro_block_group_mutex);
2564+
ret = inc_block_group_ro(cache, 0);
2565+
mutex_unlock(&fs_info->ro_block_group_mutex);
2566+
return ret;
2567+
}
2568+
25472569
do {
25482570
trans = btrfs_join_transaction(root);
25492571
if (IS_ERR(trans))
@@ -3974,9 +3996,22 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
39743996
* important and indicates a real bug if this happens.
39753997
*/
39763998
if (WARN_ON(space_info->bytes_pinned > 0 ||
3977-
space_info->bytes_reserved > 0 ||
39783999
space_info->bytes_may_use > 0))
39794000
btrfs_dump_space_info(info, space_info, 0, 0);
4001+
4002+
/*
4003+
* If there was a failure to cleanup a log tree, very likely due
4004+
* to an IO failure on a writeback attempt of one or more of its
4005+
* extent buffers, we could not do proper (and cheap) unaccounting
4006+
* of their reserved space, so don't warn on bytes_reserved > 0 in
4007+
* that case.
4008+
*/
4009+
if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) ||
4010+
!BTRFS_FS_LOG_CLEANUP_ERROR(info)) {
4011+
if (WARN_ON(space_info->bytes_reserved > 0))
4012+
btrfs_dump_space_info(info, space_info, 0, 0);
4013+
}
4014+
39804015
WARN_ON(space_info->reclaim_size > 0);
39814016
list_del(&space_info->list);
39824017
btrfs_sysfs_remove_space_info(space_info);

fs/btrfs/ctree.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,9 @@ enum {
145145
BTRFS_FS_STATE_DUMMY_FS_INFO,
146146

147147
BTRFS_FS_STATE_NO_CSUMS,
148+
149+
/* Indicates there was an error cleaning up a log tree. */
150+
BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
148151
};
149152

150153
#define BTRFS_BACKREF_REV_MAX 256
@@ -3593,6 +3596,9 @@ do { \
35933596

35943597
#define BTRFS_FS_ERROR(fs_info) (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \
35953598
&(fs_info)->fs_state)))
3599+
#define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info) \
3600+
(unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \
3601+
&(fs_info)->fs_state)))
35963602

35973603
__printf(5, 6)
35983604
__cold

fs/btrfs/ioctl.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -805,10 +805,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
805805
goto fail;
806806
}
807807

808-
spin_lock(&fs_info->trans_lock);
809-
list_add(&pending_snapshot->list,
810-
&trans->transaction->pending_snapshots);
811-
spin_unlock(&fs_info->trans_lock);
808+
trans->pending_snapshot = pending_snapshot;
812809

813810
ret = btrfs_commit_transaction(trans);
814811
if (ret)
@@ -3354,7 +3351,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
33543351
struct block_device *bdev = NULL;
33553352
fmode_t mode;
33563353
int ret;
3357-
bool cancel;
3354+
bool cancel = false;
33583355

33593356
if (!capable(CAP_SYS_ADMIN))
33603357
return -EPERM;

fs/btrfs/qgroup.c

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,9 +1185,24 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
11851185
struct btrfs_trans_handle *trans = NULL;
11861186
int ret = 0;
11871187

1188+
/*
1189+
* We need to have subvol_sem write locked, to prevent races between
1190+
* concurrent tasks trying to disable quotas, because we will unlock
1191+
* and relock qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes.
1192+
*/
1193+
lockdep_assert_held_write(&fs_info->subvol_sem);
1194+
11881195
mutex_lock(&fs_info->qgroup_ioctl_lock);
11891196
if (!fs_info->quota_root)
11901197
goto out;
1198+
1199+
/*
1200+
* Request qgroup rescan worker to complete and wait for it. This wait
1201+
* must be done before transaction start for quota disable since it may
1202+
* deadlock with transaction by the qgroup rescan worker.
1203+
*/
1204+
clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
1205+
btrfs_qgroup_wait_for_completion(fs_info, false);
11911206
mutex_unlock(&fs_info->qgroup_ioctl_lock);
11921207

11931208
/*
@@ -1205,14 +1220,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
12051220
if (IS_ERR(trans)) {
12061221
ret = PTR_ERR(trans);
12071222
trans = NULL;
1223+
set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
12081224
goto out;
12091225
}
12101226

12111227
if (!fs_info->quota_root)
12121228
goto out;
12131229

1214-
clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
1215-
btrfs_qgroup_wait_for_completion(fs_info, false);
12161230
spin_lock(&fs_info->qgroup_lock);
12171231
quota_root = fs_info->quota_root;
12181232
fs_info->quota_root = NULL;
@@ -3383,6 +3397,9 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
33833397
btrfs_warn(fs_info,
33843398
"qgroup rescan init failed, qgroup is not enabled");
33853399
ret = -EINVAL;
3400+
} else if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
3401+
/* Quota disable is in progress */
3402+
ret = -EBUSY;
33863403
}
33873404

33883405
if (ret) {

fs/btrfs/transaction.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2000,6 +2000,27 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
20002000
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
20012001
}
20022002

2003+
/*
2004+
* Add a pending snapshot associated with the given transaction handle to the
2005+
* respective handle. This must be called after the transaction commit started
2006+
* and while holding fs_info->trans_lock.
2007+
* This serves to guarantee a caller of btrfs_commit_transaction() that it can
2008+
* safely free the pending snapshot pointer in case btrfs_commit_transaction()
2009+
* returns an error.
2010+
*/
2011+
static void add_pending_snapshot(struct btrfs_trans_handle *trans)
2012+
{
2013+
struct btrfs_transaction *cur_trans = trans->transaction;
2014+
2015+
if (!trans->pending_snapshot)
2016+
return;
2017+
2018+
lockdep_assert_held(&trans->fs_info->trans_lock);
2019+
ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_START);
2020+
2021+
list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots);
2022+
}
2023+
20032024
int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
20042025
{
20052026
struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -2073,6 +2094,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
20732094
if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
20742095
enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
20752096

2097+
add_pending_snapshot(trans);
2098+
20762099
spin_unlock(&fs_info->trans_lock);
20772100
refcount_inc(&cur_trans->use_count);
20782101

@@ -2163,6 +2186,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
21632186
* COMMIT_DOING so make sure to wait for num_writers to == 1 again.
21642187
*/
21652188
spin_lock(&fs_info->trans_lock);
2189+
add_pending_snapshot(trans);
21662190
cur_trans->state = TRANS_STATE_COMMIT_DOING;
21672191
spin_unlock(&fs_info->trans_lock);
21682192
wait_event(cur_trans->writer_wait,

fs/btrfs/transaction.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ struct btrfs_trans_handle {
123123
struct btrfs_transaction *transaction;
124124
struct btrfs_block_rsv *block_rsv;
125125
struct btrfs_block_rsv *orig_rsv;
126+
/* Set by a task that wants to create a snapshot. */
127+
struct btrfs_pending_snapshot *pending_snapshot;
126128
refcount_t use_count;
127129
unsigned int type;
128130
/*

fs/btrfs/tree-checker.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -965,13 +965,21 @@ static int check_dev_item(struct extent_buffer *leaf,
965965
struct btrfs_key *key, int slot)
966966
{
967967
struct btrfs_dev_item *ditem;
968+
const u32 item_size = btrfs_item_size(leaf, slot);
968969

969970
if (unlikely(key->objectid != BTRFS_DEV_ITEMS_OBJECTID)) {
970971
dev_item_err(leaf, slot,
971972
"invalid objectid: has=%llu expect=%llu",
972973
key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
973974
return -EUCLEAN;
974975
}
976+
977+
if (unlikely(item_size != sizeof(*ditem))) {
978+
dev_item_err(leaf, slot, "invalid item size: has %u expect %zu",
979+
item_size, sizeof(*ditem));
980+
return -EUCLEAN;
981+
}
982+
975983
ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
976984
if (unlikely(btrfs_device_id(leaf, ditem) != key->offset)) {
977985
dev_item_err(leaf, slot,
@@ -1007,6 +1015,7 @@ static int check_inode_item(struct extent_buffer *leaf,
10071015
struct btrfs_inode_item *iitem;
10081016
u64 super_gen = btrfs_super_generation(fs_info->super_copy);
10091017
u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
1018+
const u32 item_size = btrfs_item_size(leaf, slot);
10101019
u32 mode;
10111020
int ret;
10121021
u32 flags;
@@ -1016,6 +1025,12 @@ static int check_inode_item(struct extent_buffer *leaf,
10161025
if (unlikely(ret < 0))
10171026
return ret;
10181027

1028+
if (unlikely(item_size != sizeof(*iitem))) {
1029+
generic_err(leaf, slot, "invalid item size: has %u expect %zu",
1030+
item_size, sizeof(*iitem));
1031+
return -EUCLEAN;
1032+
}
1033+
10191034
iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);
10201035

10211036
/* Here we use super block generation + 1 to handle log tree */

fs/btrfs/tree-log.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3414,6 +3414,29 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
34143414
if (log->node) {
34153415
ret = walk_log_tree(trans, log, &wc);
34163416
if (ret) {
3417+
/*
3418+
* We weren't able to traverse the entire log tree, the
3419+
* typical scenario is getting an -EIO when reading an
3420+
* extent buffer of the tree, due to a previous writeback
3421+
* failure of it.
3422+
*/
3423+
set_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
3424+
&log->fs_info->fs_state);
3425+
3426+
/*
3427+
* Some extent buffers of the log tree may still be dirty
3428+
* and not yet written back to storage, because we may
3429+
* have updates to a log tree without syncing a log tree,
3430+
* such as during rename and link operations. So flush
3431+
* them out and wait for their writeback to complete, so
3432+
* that we properly cleanup their state and pages.
3433+
*/
3434+
btrfs_write_marked_extents(log->fs_info,
3435+
&log->dirty_log_pages,
3436+
EXTENT_DIRTY | EXTENT_NEW);
3437+
btrfs_wait_tree_log_extents(log,
3438+
EXTENT_DIRTY | EXTENT_NEW);
3439+
34173440
if (trans)
34183441
btrfs_abort_transaction(trans, ret);
34193442
else

0 commit comments

Comments
 (0)