Skip to content

Commit 4667025

Browse files
committed
Merge tag 'for-6.5-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: "Stable fixes: - fix race between balance and cancel/pause - various iput() fixes - fix use-after-free of new block group that became unused - fix warning when putting transaction with qgroups enabled after abort - fix crash in subpage mode when page could be released between map and map read - when scrubbing raid56 verify the P/Q stripes unconditionally - fix minor memory leak in zoned mode when a block group with an unexpected superblock is found Regression fixes: - fix ordered extent split error handling when submitting direct IO - user irq-safe locking when adding delayed iputs" * tag 'for-6.5-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: fix warning when putting transaction with qgroups enabled after abort btrfs: fix ordered extent split error handling in btrfs_dio_submit_io btrfs: set_page_extent_mapped after read_folio in btrfs_cont_expand btrfs: raid56: always verify the P/Q contents for scrub btrfs: use irq safe locking when running and adding delayed iputs btrfs: fix iput() on error pointer after error during orphan cleanup btrfs: fix double iput() on inode after an error during orphan cleanup btrfs: zoned: fix memory leak after finding block group with super blocks btrfs: fix use-after-free of new block group that became unused btrfs: be a bit more careful when setting mirror_num_ret in btrfs_map_block btrfs: fix race between balance and cancel/pause
2 parents 2922800 + aa84ce8 commit 4667025

File tree

6 files changed

+79
-46
lines changed

6 files changed

+79
-46
lines changed

fs/btrfs/block-group.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1640,13 +1640,14 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
16401640
{
16411641
struct btrfs_fs_info *fs_info = bg->fs_info;
16421642

1643-
trace_btrfs_add_unused_block_group(bg);
16441643
spin_lock(&fs_info->unused_bgs_lock);
16451644
if (list_empty(&bg->bg_list)) {
16461645
btrfs_get_block_group(bg);
1646+
trace_btrfs_add_unused_block_group(bg);
16471647
list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
1648-
} else {
1648+
} else if (!test_bit(BLOCK_GROUP_FLAG_NEW, &bg->runtime_flags)) {
16491649
/* Pull out the block group from the reclaim_bgs list. */
1650+
trace_btrfs_add_unused_block_group(bg);
16501651
list_move_tail(&bg->bg_list, &fs_info->unused_bgs);
16511652
}
16521653
spin_unlock(&fs_info->unused_bgs_lock);
@@ -2087,6 +2088,7 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
20872088

20882089
/* Shouldn't have super stripes in sequential zones */
20892090
if (zoned && nr) {
2091+
kfree(logical);
20902092
btrfs_err(fs_info,
20912093
"zoned: block group %llu must not contain super block",
20922094
cache->start);
@@ -2668,6 +2670,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
26682670
next:
26692671
btrfs_delayed_refs_rsv_release(fs_info, 1);
26702672
list_del_init(&block_group->bg_list);
2673+
clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags);
26712674
}
26722675
btrfs_trans_release_chunk_metadata(trans);
26732676
}
@@ -2707,6 +2710,13 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
27072710
if (!cache)
27082711
return ERR_PTR(-ENOMEM);
27092712

2713+
/*
2714+
* Mark it as new before adding it to the rbtree of block groups or any
2715+
* list, so that no other task finds it and calls btrfs_mark_bg_unused()
2716+
* before the new flag is set.
2717+
*/
2718+
set_bit(BLOCK_GROUP_FLAG_NEW, &cache->runtime_flags);
2719+
27102720
cache->length = size;
27112721
set_free_space_tree_thresholds(cache);
27122722
cache->flags = type;

fs/btrfs/block-group.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,11 @@ enum btrfs_block_group_flags {
7070
BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
7171
/* Indicate that the block group is placed on a sequential zone */
7272
BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE,
73+
/*
74+
* Indicate that block group is in the list of new block groups of a
75+
* transaction.
76+
*/
77+
BLOCK_GROUP_FLAG_NEW,
7378
};
7479

7580
enum btrfs_caching_type {

fs/btrfs/inode.c

Lines changed: 52 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3482,15 +3482,21 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
34823482
void btrfs_add_delayed_iput(struct btrfs_inode *inode)
34833483
{
34843484
struct btrfs_fs_info *fs_info = inode->root->fs_info;
3485+
unsigned long flags;
34853486

34863487
if (atomic_add_unless(&inode->vfs_inode.i_count, -1, 1))
34873488
return;
34883489

34893490
atomic_inc(&fs_info->nr_delayed_iputs);
3490-
spin_lock(&fs_info->delayed_iput_lock);
3491+
/*
3492+
* Need to be irq safe here because we can be called from either an irq
3493+
* context (see bio.c and btrfs_put_ordered_extent()) or a non-irq
3494+
* context.
3495+
*/
3496+
spin_lock_irqsave(&fs_info->delayed_iput_lock, flags);
34913497
ASSERT(list_empty(&inode->delayed_iput));
34923498
list_add_tail(&inode->delayed_iput, &fs_info->delayed_iputs);
3493-
spin_unlock(&fs_info->delayed_iput_lock);
3499+
spin_unlock_irqrestore(&fs_info->delayed_iput_lock, flags);
34943500
if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
34953501
wake_up_process(fs_info->cleaner_kthread);
34963502
}
@@ -3499,37 +3505,46 @@ static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
34993505
struct btrfs_inode *inode)
35003506
{
35013507
list_del_init(&inode->delayed_iput);
3502-
spin_unlock(&fs_info->delayed_iput_lock);
3508+
spin_unlock_irq(&fs_info->delayed_iput_lock);
35033509
iput(&inode->vfs_inode);
35043510
if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
35053511
wake_up(&fs_info->delayed_iputs_wait);
3506-
spin_lock(&fs_info->delayed_iput_lock);
3512+
spin_lock_irq(&fs_info->delayed_iput_lock);
35073513
}
35083514

35093515
static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
35103516
struct btrfs_inode *inode)
35113517
{
35123518
if (!list_empty(&inode->delayed_iput)) {
3513-
spin_lock(&fs_info->delayed_iput_lock);
3519+
spin_lock_irq(&fs_info->delayed_iput_lock);
35143520
if (!list_empty(&inode->delayed_iput))
35153521
run_delayed_iput_locked(fs_info, inode);
3516-
spin_unlock(&fs_info->delayed_iput_lock);
3522+
spin_unlock_irq(&fs_info->delayed_iput_lock);
35173523
}
35183524
}
35193525

35203526
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
35213527
{
3522-
3523-
spin_lock(&fs_info->delayed_iput_lock);
3528+
/*
3529+
* btrfs_put_ordered_extent() can run in irq context (see bio.c), which
3530+
* calls btrfs_add_delayed_iput() and that needs to lock
3531+
* fs_info->delayed_iput_lock. So we need to disable irqs here to
3532+
* prevent a deadlock.
3533+
*/
3534+
spin_lock_irq(&fs_info->delayed_iput_lock);
35243535
while (!list_empty(&fs_info->delayed_iputs)) {
35253536
struct btrfs_inode *inode;
35263537

35273538
inode = list_first_entry(&fs_info->delayed_iputs,
35283539
struct btrfs_inode, delayed_iput);
35293540
run_delayed_iput_locked(fs_info, inode);
3530-
cond_resched_lock(&fs_info->delayed_iput_lock);
3541+
if (need_resched()) {
3542+
spin_unlock_irq(&fs_info->delayed_iput_lock);
3543+
cond_resched();
3544+
spin_lock_irq(&fs_info->delayed_iput_lock);
3545+
}
35313546
}
3532-
spin_unlock(&fs_info->delayed_iput_lock);
3547+
spin_unlock_irq(&fs_info->delayed_iput_lock);
35333548
}
35343549

35353550
/*
@@ -3659,11 +3674,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
36593674
found_key.type = BTRFS_INODE_ITEM_KEY;
36603675
found_key.offset = 0;
36613676
inode = btrfs_iget(fs_info->sb, last_objectid, root);
3662-
ret = PTR_ERR_OR_ZERO(inode);
3663-
if (ret && ret != -ENOENT)
3664-
goto out;
3677+
if (IS_ERR(inode)) {
3678+
ret = PTR_ERR(inode);
3679+
inode = NULL;
3680+
if (ret != -ENOENT)
3681+
goto out;
3682+
}
36653683

3666-
if (ret == -ENOENT && root == fs_info->tree_root) {
3684+
if (!inode && root == fs_info->tree_root) {
36673685
struct btrfs_root *dead_root;
36683686
int is_dead_root = 0;
36693687

@@ -3724,28 +3742,26 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
37243742
* deleted but wasn't. The inode number may have been reused,
37253743
* but either way, we can delete the orphan item.
37263744
*/
3727-
if (ret == -ENOENT || inode->i_nlink) {
3728-
if (!ret) {
3745+
if (!inode || inode->i_nlink) {
3746+
if (inode) {
37293747
ret = btrfs_drop_verity_items(BTRFS_I(inode));
37303748
iput(inode);
3749+
inode = NULL;
37313750
if (ret)
37323751
goto out;
37333752
}
37343753
trans = btrfs_start_transaction(root, 1);
37353754
if (IS_ERR(trans)) {
37363755
ret = PTR_ERR(trans);
3737-
iput(inode);
37383756
goto out;
37393757
}
37403758
btrfs_debug(fs_info, "auto deleting %Lu",
37413759
found_key.objectid);
37423760
ret = btrfs_del_orphan_item(trans, root,
37433761
found_key.objectid);
37443762
btrfs_end_transaction(trans);
3745-
if (ret) {
3746-
iput(inode);
3763+
if (ret)
37473764
goto out;
3748-
}
37493765
continue;
37503766
}
37513767

@@ -4847,9 +4863,6 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
48474863
ret = -ENOMEM;
48484864
goto out;
48494865
}
4850-
ret = set_page_extent_mapped(page);
4851-
if (ret < 0)
4852-
goto out_unlock;
48534866

48544867
if (!PageUptodate(page)) {
48554868
ret = btrfs_read_folio(NULL, page_folio(page));
@@ -4864,6 +4877,17 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
48644877
goto out_unlock;
48654878
}
48664879
}
4880+
4881+
/*
4882+
* We unlock the page after the io is completed and then re-lock it
4883+
* above. release_folio() could have come in between that and cleared
4884+
* PagePrivate(), but left the page in the mapping. Set the page mapped
4885+
* here to make sure it's properly set for the subpage stuff.
4886+
*/
4887+
ret = set_page_extent_mapped(page);
4888+
if (ret < 0)
4889+
goto out_unlock;
4890+
48674891
wait_on_page_writeback(page);
48684892

48694893
lock_extent(io_tree, block_start, block_end, &cached_state);
@@ -7849,8 +7873,11 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio,
78497873

78507874
ret = btrfs_extract_ordered_extent(bbio, dio_data->ordered);
78517875
if (ret) {
7852-
bbio->bio.bi_status = errno_to_blk_status(ret);
7853-
btrfs_dio_end_io(bbio);
7876+
btrfs_finish_ordered_extent(dio_data->ordered, NULL,
7877+
file_offset, dip->bytes,
7878+
!ret);
7879+
bio->bi_status = errno_to_blk_status(ret);
7880+
iomap_dio_bio_end_io(bio);
78547881
return;
78557882
}
78567883
}

fs/btrfs/qgroup.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4445,4 +4445,5 @@ void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
44454445
ulist_free(entry->old_roots);
44464446
kfree(entry);
44474447
}
4448+
*root = RB_ROOT;
44484449
}

fs/btrfs/raid56.c

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ static void rmw_rbio_work_locked(struct work_struct *work);
7171
static void index_rbio_pages(struct btrfs_raid_bio *rbio);
7272
static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
7373

74-
static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check);
74+
static int finish_parity_scrub(struct btrfs_raid_bio *rbio);
7575
static void scrub_rbio_work_locked(struct work_struct *work);
7676

7777
static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio)
@@ -2404,7 +2404,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
24042404
return 0;
24052405
}
24062406

2407-
static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
2407+
static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
24082408
{
24092409
struct btrfs_io_context *bioc = rbio->bioc;
24102410
const u32 sectorsize = bioc->fs_info->sectorsize;
@@ -2445,9 +2445,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
24452445
*/
24462446
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
24472447

2448-
if (!need_check)
2449-
goto writeback;
2450-
24512448
p_sector.page = alloc_page(GFP_NOFS);
24522449
if (!p_sector.page)
24532450
return -ENOMEM;
@@ -2516,7 +2513,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
25162513
q_sector.page = NULL;
25172514
}
25182515

2519-
writeback:
25202516
/*
25212517
* time to start writing. Make bios for everything from the
25222518
* higher layers (the bio_list in our rbio) and our p/q. Ignore
@@ -2699,7 +2695,6 @@ static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio)
26992695

27002696
static void scrub_rbio(struct btrfs_raid_bio *rbio)
27012697
{
2702-
bool need_check = false;
27032698
int sector_nr;
27042699
int ret;
27052700

@@ -2722,7 +2717,7 @@ static void scrub_rbio(struct btrfs_raid_bio *rbio)
27222717
* We have every sector properly prepared. Can finish the scrub
27232718
* and writeback the good content.
27242719
*/
2725-
ret = finish_parity_scrub(rbio, need_check);
2720+
ret = finish_parity_scrub(rbio);
27262721
wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
27272722
for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) {
27282723
int found_errors;

fs/btrfs/volumes.c

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4078,14 +4078,6 @@ static int alloc_profile_is_valid(u64 flags, int extended)
40784078
return has_single_bit_set(flags);
40794079
}
40804080

4081-
static inline int balance_need_close(struct btrfs_fs_info *fs_info)
4082-
{
4083-
/* cancel requested || normal exit path */
4084-
return atomic_read(&fs_info->balance_cancel_req) ||
4085-
(atomic_read(&fs_info->balance_pause_req) == 0 &&
4086-
atomic_read(&fs_info->balance_cancel_req) == 0);
4087-
}
4088-
40894081
/*
40904082
* Validate target profile against allowed profiles and return true if it's OK.
40914083
* Otherwise print the error message and return false.
@@ -4275,6 +4267,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
42754267
u64 num_devices;
42764268
unsigned seq;
42774269
bool reducing_redundancy;
4270+
bool paused = false;
42784271
int i;
42794272

42804273
if (btrfs_fs_closing(fs_info) ||
@@ -4405,6 +4398,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
44054398
if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req)) {
44064399
btrfs_info(fs_info, "balance: paused");
44074400
btrfs_exclop_balance(fs_info, BTRFS_EXCLOP_BALANCE_PAUSED);
4401+
paused = true;
44084402
}
44094403
/*
44104404
* Balance can be canceled by:
@@ -4433,8 +4427,8 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
44334427
btrfs_update_ioctl_balance_args(fs_info, bargs);
44344428
}
44354429

4436-
if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
4437-
balance_need_close(fs_info)) {
4430+
/* We didn't pause, we can clean everything up. */
4431+
if (!paused) {
44384432
reset_balance_state(fs_info);
44394433
btrfs_exclop_finish(fs_info);
44404434
}
@@ -6404,7 +6398,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
64046398
(op == BTRFS_MAP_READ || !dev_replace_is_ongoing ||
64056399
!dev_replace->tgtdev)) {
64066400
set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr);
6407-
*mirror_num_ret = mirror_num;
6401+
if (mirror_num_ret)
6402+
*mirror_num_ret = mirror_num;
64086403
*bioc_ret = NULL;
64096404
ret = 0;
64106405
goto out;

0 commit comments

Comments
 (0)