Skip to content

Commit e06cc89

Browse files
fdmananakdave
authored andcommitted
btrfs: fix data races when accessing the reserved amount of block reserves
At space_info.c we have several places where we access the ->reserved field of a block reserve without taking the block reserve's spinlock first, which makes KCSAN warn about a data race since that field is always updated while holding the spinlock. The reports from KCSAN are like the following: [117.193526] BUG: KCSAN: data-race in btrfs_block_rsv_release [btrfs] / need_preemptive_reclaim [btrfs] [117.195148] read to 0x000000017f587190 of 8 bytes by task 6303 on cpu 3: [117.195172] need_preemptive_reclaim+0x222/0x2f0 [btrfs] [117.195992] __reserve_bytes+0xbb0/0xdc8 [btrfs] [117.196807] btrfs_reserve_metadata_bytes+0x4c/0x120 [btrfs] [117.197620] btrfs_block_rsv_add+0x78/0xa8 [btrfs] [117.198434] btrfs_delayed_update_inode+0x154/0x368 [btrfs] [117.199300] btrfs_update_inode+0x108/0x1c8 [btrfs] [117.200122] btrfs_dirty_inode+0xb4/0x140 [btrfs] [117.200937] btrfs_update_time+0x8c/0xb0 [btrfs] [117.201754] touch_atime+0x16c/0x1e0 [117.201789] filemap_read+0x674/0x728 [117.201823] btrfs_file_read_iter+0xf8/0x410 [btrfs] [117.202653] vfs_read+0x2b6/0x498 [117.203454] ksys_read+0xa2/0x150 [117.203473] __s390x_sys_read+0x68/0x88 [117.203495] do_syscall+0x1c6/0x210 [117.203517] __do_syscall+0xc8/0xf0 [117.203539] system_call+0x70/0x98 [117.203579] write to 0x000000017f587190 of 8 bytes by task 11 on cpu 0: [117.203604] btrfs_block_rsv_release+0x2e8/0x578 [btrfs] [117.204432] btrfs_delayed_inode_release_metadata+0x7c/0x1d0 [btrfs] [117.205259] __btrfs_update_delayed_inode+0x37c/0x5e0 [btrfs] [117.206093] btrfs_async_run_delayed_root+0x356/0x498 [btrfs] [117.206917] btrfs_work_helper+0x160/0x7a0 [btrfs] [117.207738] process_one_work+0x3b6/0x838 [117.207768] worker_thread+0x75e/0xb10 [117.207797] kthread+0x21a/0x230 [117.207830] __ret_from_fork+0x6c/0xb8 [117.207861] ret_from_fork+0xa/0x30 So add a helper to get the reserved amount of a block reserve while holding the lock. The value may be not be up to date anymore when used by need_preemptive_reclaim() and btrfs_preempt_reclaim_metadata_space(), but that's ok since the worst it can do is cause more reclaim work do be done sooner rather than later. Reading the field while holding the lock instead of using the data_race() annotation is used in order to prevent load tearing. Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 5897710 commit e06cc89

File tree

2 files changed

+29
-13
lines changed

2 files changed

+29
-13
lines changed

fs/btrfs/block-rsv.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,4 +101,20 @@ static inline bool btrfs_block_rsv_full(const struct btrfs_block_rsv *rsv)
101101
return data_race(rsv->full);
102102
}
103103

104+
/*
105+
* Get the reserved mount of a block reserve in a context where getting a stale
106+
* value is acceptable, instead of accessing it directly and trigger data race
107+
* warning from KCSAN.
108+
*/
109+
static inline u64 btrfs_block_rsv_reserved(struct btrfs_block_rsv *rsv)
110+
{
111+
u64 ret;
112+
113+
spin_lock(&rsv->lock);
114+
ret = rsv->reserved;
115+
spin_unlock(&rsv->lock);
116+
117+
return ret;
118+
}
119+
104120
#endif /* BTRFS_BLOCK_RSV_H */

fs/btrfs/space-info.c

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -856,7 +856,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
856856
static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
857857
struct btrfs_space_info *space_info)
858858
{
859-
u64 global_rsv_size = fs_info->global_block_rsv.reserved;
859+
const u64 global_rsv_size = btrfs_block_rsv_reserved(&fs_info->global_block_rsv);
860860
u64 ordered, delalloc;
861861
u64 thresh;
862862
u64 used;
@@ -956,8 +956,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
956956
ordered = percpu_counter_read_positive(&fs_info->ordered_bytes) >> 1;
957957
delalloc = percpu_counter_read_positive(&fs_info->delalloc_bytes);
958958
if (ordered >= delalloc)
959-
used += fs_info->delayed_refs_rsv.reserved +
960-
fs_info->delayed_block_rsv.reserved;
959+
used += btrfs_block_rsv_reserved(&fs_info->delayed_refs_rsv) +
960+
btrfs_block_rsv_reserved(&fs_info->delayed_block_rsv);
961961
else
962962
used += space_info->bytes_may_use - global_rsv_size;
963963

@@ -1173,7 +1173,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
11731173
enum btrfs_flush_state flush;
11741174
u64 delalloc_size = 0;
11751175
u64 to_reclaim, block_rsv_size;
1176-
u64 global_rsv_size = global_rsv->reserved;
1176+
const u64 global_rsv_size = btrfs_block_rsv_reserved(global_rsv);
11771177

11781178
loops++;
11791179

@@ -1185,9 +1185,9 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
11851185
* assume it's tied up in delalloc reservations.
11861186
*/
11871187
block_rsv_size = global_rsv_size +
1188-
delayed_block_rsv->reserved +
1189-
delayed_refs_rsv->reserved +
1190-
trans_rsv->reserved;
1188+
btrfs_block_rsv_reserved(delayed_block_rsv) +
1189+
btrfs_block_rsv_reserved(delayed_refs_rsv) +
1190+
btrfs_block_rsv_reserved(trans_rsv);
11911191
if (block_rsv_size < space_info->bytes_may_use)
11921192
delalloc_size = space_info->bytes_may_use - block_rsv_size;
11931193

@@ -1207,16 +1207,16 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
12071207
to_reclaim = delalloc_size;
12081208
flush = FLUSH_DELALLOC;
12091209
} else if (space_info->bytes_pinned >
1210-
(delayed_block_rsv->reserved +
1211-
delayed_refs_rsv->reserved)) {
1210+
(btrfs_block_rsv_reserved(delayed_block_rsv) +
1211+
btrfs_block_rsv_reserved(delayed_refs_rsv))) {
12121212
to_reclaim = space_info->bytes_pinned;
12131213
flush = COMMIT_TRANS;
1214-
} else if (delayed_block_rsv->reserved >
1215-
delayed_refs_rsv->reserved) {
1216-
to_reclaim = delayed_block_rsv->reserved;
1214+
} else if (btrfs_block_rsv_reserved(delayed_block_rsv) >
1215+
btrfs_block_rsv_reserved(delayed_refs_rsv)) {
1216+
to_reclaim = btrfs_block_rsv_reserved(delayed_block_rsv);
12171217
flush = FLUSH_DELAYED_ITEMS_NR;
12181218
} else {
1219-
to_reclaim = delayed_refs_rsv->reserved;
1219+
to_reclaim = btrfs_block_rsv_reserved(delayed_refs_rsv);
12201220
flush = FLUSH_DELAYED_REFS_NR;
12211221
}
12221222

0 commit comments

Comments
 (0)