Skip to content

Commit 866e98a

Browse files
fdmananakdave
authored andcommitted
btrfs: use irq safe locking when running and adding delayed iputs
Running delayed iputs, which never happens in an irq context, needs to lock the spinlock fs_info->delayed_iput_lock. When finishing bios for data writes (irq context, bio.c) we call btrfs_put_ordered_extent() which needs to add a delayed iput and for that it needs to acquire the spinlock fs_info->delayed_iput_lock. Without disabling irqs when running delayed iputs we can therefore deadlock on that spinlock. The same deadlock can also happen when adding an inode to the delayed iputs list, since this can be done outside an irq context as well. Syzbot recently reported this, which results in the following trace: ================================ WARNING: inconsistent lock state 6.4.0-syzkaller-09904-ga507db1d8fdc #0 Not tainted -------------------------------- inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage. btrfs-cleaner/16079 [HC0[0]:SC0[0]:HE1:SE1] takes: ffff888107804d20 (&fs_info->delayed_iput_lock){+.?.}-{2:2}, at: spin_lock include/linux/spinlock.h:350 [inline] ffff888107804d20 (&fs_info->delayed_iput_lock){+.?.}-{2:2}, at: btrfs_run_delayed_iputs+0x28/0xe0 fs/btrfs/inode.c:3523 {IN-SOFTIRQ-W} state was registered at: lock_acquire kernel/locking/lockdep.c:5761 [inline] lock_acquire+0x1b1/0x520 kernel/locking/lockdep.c:5726 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:350 [inline] btrfs_add_delayed_iput+0x128/0x390 fs/btrfs/inode.c:3490 btrfs_put_ordered_extent fs/btrfs/ordered-data.c:559 [inline] btrfs_put_ordered_extent+0x2f6/0x610 fs/btrfs/ordered-data.c:547 __btrfs_bio_end_io fs/btrfs/bio.c:118 [inline] __btrfs_bio_end_io+0x136/0x180 fs/btrfs/bio.c:112 btrfs_orig_bbio_end_io+0x86/0x2b0 fs/btrfs/bio.c:163 btrfs_simple_end_io+0x105/0x380 fs/btrfs/bio.c:378 bio_endio+0x589/0x690 block/bio.c:1617 req_bio_endio block/blk-mq.c:766 [inline] blk_update_request+0x5c5/0x1620 block/blk-mq.c:911 blk_mq_end_request+0x59/0x680 block/blk-mq.c:1032 lo_complete_rq+0x1c6/0x280 drivers/block/loop.c:370 blk_complete_reqs+0xb3/0xf0 block/blk-mq.c:1110 __do_softirq+0x1d4/0x905 kernel/softirq.c:553 run_ksoftirqd kernel/softirq.c:921 [inline] run_ksoftirqd+0x31/0x60 kernel/softirq.c:913 smpboot_thread_fn+0x659/0x9e0 kernel/smpboot.c:164 kthread+0x344/0x440 kernel/kthread.c:389 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 irq event stamp: 39 hardirqs last enabled at (39): [<ffffffff81d5ebc4>] __do_kmem_cache_free mm/slab.c:3558 [inline] hardirqs last enabled at (39): [<ffffffff81d5ebc4>] kmem_cache_free mm/slab.c:3582 [inline] hardirqs last enabled at (39): [<ffffffff81d5ebc4>] kmem_cache_free+0x244/0x370 mm/slab.c:3575 hardirqs last disabled at (38): [<ffffffff81d5eb5e>] __do_kmem_cache_free mm/slab.c:3553 [inline] hardirqs last disabled at (38): [<ffffffff81d5eb5e>] kmem_cache_free mm/slab.c:3582 [inline] hardirqs last disabled at (38): [<ffffffff81d5eb5e>] kmem_cache_free+0x1de/0x370 mm/slab.c:3575 softirqs last enabled at (0): [<ffffffff814ac99f>] copy_process+0x227f/0x75c0 kernel/fork.c:2448 softirqs last disabled at (0): [<0000000000000000>] 0x0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&fs_info->delayed_iput_lock); <Interrupt> lock(&fs_info->delayed_iput_lock); *** DEADLOCK *** 1 lock held by btrfs-cleaner/16079: #0: ffff888107804860 (&fs_info->cleaner_mutex){+.+.}-{3:3}, at: cleaner_kthread+0x103/0x4b0 fs/btrfs/disk-io.c:1463 stack backtrace: CPU: 3 PID: 16079 Comm: btrfs-cleaner Not tainted 6.4.0-syzkaller-09904-ga507db1d8fdc #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-2 04/01/2014 Call Trace: <TASK> __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x150 lib/dump_stack.c:106 print_usage_bug kernel/locking/lockdep.c:3978 [inline] valid_state kernel/locking/lockdep.c:4020 [inline] mark_lock_irq kernel/locking/lockdep.c:4223 [inline] mark_lock.part.0+0x1102/0x1960 kernel/locking/lockdep.c:4685 mark_lock kernel/locking/lockdep.c:4649 [inline] mark_usage kernel/locking/lockdep.c:4598 [inline] __lock_acquire+0x8e4/0x5e20 kernel/locking/lockdep.c:5098 lock_acquire kernel/locking/lockdep.c:5761 [inline] lock_acquire+0x1b1/0x520 kernel/locking/lockdep.c:5726 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:350 [inline] btrfs_run_delayed_iputs+0x28/0xe0 fs/btrfs/inode.c:3523 cleaner_kthread+0x2e5/0x4b0 fs/btrfs/disk-io.c:1478 kthread+0x344/0x440 kernel/kthread.c:389 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 </TASK> So fix this by using spin_lock_irq() and spin_unlock_irq() when running delayed iputs, and using spin_lock_irqsave() and spin_unlock_irqrestore() when adding a delayed iput(). Reported-by: syzbot+da501a04be5ff533b102@syzkaller.appspotmail.com Fixes: ec63b84 ("btrfs: add an ordered_extent pointer to struct btrfs_bio") Link: https://lore.kernel.org/linux-btrfs/000000000000d5c89a05ffbd39dd@google.com/ Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent cbaee87 commit 866e98a

File tree

1 file changed

+25
-10
lines changed

1 file changed

+25
-10
lines changed

fs/btrfs/inode.c

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3482,15 +3482,21 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
34823482
void btrfs_add_delayed_iput(struct btrfs_inode *inode)
34833483
{
34843484
struct btrfs_fs_info *fs_info = inode->root->fs_info;
3485+
unsigned long flags;
34853486

34863487
if (atomic_add_unless(&inode->vfs_inode.i_count, -1, 1))
34873488
return;
34883489

34893490
atomic_inc(&fs_info->nr_delayed_iputs);
3490-
spin_lock(&fs_info->delayed_iput_lock);
3491+
/*
3492+
* Need to be irq safe here because we can be called from either an irq
3493+
* context (see bio.c and btrfs_put_ordered_extent()) or a non-irq
3494+
* context.
3495+
*/
3496+
spin_lock_irqsave(&fs_info->delayed_iput_lock, flags);
34913497
ASSERT(list_empty(&inode->delayed_iput));
34923498
list_add_tail(&inode->delayed_iput, &fs_info->delayed_iputs);
3493-
spin_unlock(&fs_info->delayed_iput_lock);
3499+
spin_unlock_irqrestore(&fs_info->delayed_iput_lock, flags);
34943500
if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
34953501
wake_up_process(fs_info->cleaner_kthread);
34963502
}
@@ -3499,37 +3505,46 @@ static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
34993505
struct btrfs_inode *inode)
35003506
{
35013507
list_del_init(&inode->delayed_iput);
3502-
spin_unlock(&fs_info->delayed_iput_lock);
3508+
spin_unlock_irq(&fs_info->delayed_iput_lock);
35033509
iput(&inode->vfs_inode);
35043510
if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
35053511
wake_up(&fs_info->delayed_iputs_wait);
3506-
spin_lock(&fs_info->delayed_iput_lock);
3512+
spin_lock_irq(&fs_info->delayed_iput_lock);
35073513
}
35083514

35093515
static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
35103516
struct btrfs_inode *inode)
35113517
{
35123518
if (!list_empty(&inode->delayed_iput)) {
3513-
spin_lock(&fs_info->delayed_iput_lock);
3519+
spin_lock_irq(&fs_info->delayed_iput_lock);
35143520
if (!list_empty(&inode->delayed_iput))
35153521
run_delayed_iput_locked(fs_info, inode);
3516-
spin_unlock(&fs_info->delayed_iput_lock);
3522+
spin_unlock_irq(&fs_info->delayed_iput_lock);
35173523
}
35183524
}
35193525

35203526
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
35213527
{
3522-
3523-
spin_lock(&fs_info->delayed_iput_lock);
3528+
/*
3529+
* btrfs_put_ordered_extent() can run in irq context (see bio.c), which
3530+
* calls btrfs_add_delayed_iput() and that needs to lock
3531+
* fs_info->delayed_iput_lock. So we need to disable irqs here to
3532+
* prevent a deadlock.
3533+
*/
3534+
spin_lock_irq(&fs_info->delayed_iput_lock);
35243535
while (!list_empty(&fs_info->delayed_iputs)) {
35253536
struct btrfs_inode *inode;
35263537

35273538
inode = list_first_entry(&fs_info->delayed_iputs,
35283539
struct btrfs_inode, delayed_iput);
35293540
run_delayed_iput_locked(fs_info, inode);
3530-
cond_resched_lock(&fs_info->delayed_iput_lock);
3541+
if (need_resched()) {
3542+
spin_unlock_irq(&fs_info->delayed_iput_lock);
3543+
cond_resched();
3544+
spin_lock_irq(&fs_info->delayed_iput_lock);
3545+
}
35313546
}
3532-
spin_unlock(&fs_info->delayed_iput_lock);
3547+
spin_unlock_irq(&fs_info->delayed_iput_lock);
35333548
}
35343549

35353550
/*

0 commit comments

Comments
 (0)