Skip to content

Commit 0eb4aaa

Browse files
committed
Merge tag 'for-6.14-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "User visible changes, features: - rebuilding of the free space tree at mount time is done in more transactions, fix potential hangs when the transaction thread is blocked due to large amount of block groups - more read IO balancing strategies (experimental config), add two new ways how to select a device for read if the profiles allow that (all RAID1*), the current default selects the device by pid which is good on average but less performant for single reader workloads - select preferred device for all reads (namely for testing) - round-robin, balance reads across devices relevant for the requested IO range - add encoded write ioctl support to io_uring (read was added in 6.12), basis for writing send stream using that instead of syscalls, non-blocking mode is not yet implemented - support FS_IOC_READ_VERITY_METADATA, applications can use the metadata to do their own verification - pass inode's i_write_hint to bios, for parity with other filesystems, ioctls F_GET_RW_HINT/F_SET_RW_HINT Core: - in zoned mode: allow to directly reclaim a block group by simply resetting it, then it can be reused and another block group does not need to be allocated - super block validation now also does more comprehensive sys array validation, adding it to the points where superblock is validated (post-read, pre-write) - subpage mode fixes: - fix double accounting of blocks due to some races - improved or fixed error handling in a few cases (compression, delalloc) - raid stripe tree: - fix various cases with extent range splitting or deleting - implement hole punching to extent range - reduce number of stripe tree lookups during bio submission - more self-tests - updated self-tests (delayed refs) - error handling improvements - cleanups, refactoring - remove rest of backref caching infrastructure from relocation, not needed anymore - error message updates - remove unnecessary calls when extent buffer was marked dirty - unused parameter removal - code moved to new files Other code changes: add rb_find_add_cached() to the rb-tree API" * tag 'for-6.14-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (127 commits) btrfs: selftests: add a selftest for deleting two out of three extents btrfs: selftests: add test for punching a hole into 3 RAID stripe-extents btrfs: selftests: add selftest for punching holes into the RAID stripe extents btrfs: selftests: test RAID stripe-tree deletion spanning two items btrfs: selftests: don't split RAID extents in half btrfs: selftests: check for correct return value of failed lookup btrfs: don't use btrfs_set_item_key_safe on RAID stripe-extents btrfs: implement hole punching for RAID stripe extents btrfs: fix deletion of a range spanning parts two RAID stripe extents btrfs: fix tail delete of RAID stripe-extents btrfs: fix front delete range calculation for RAID stripe extents btrfs: assert RAID stripe-extent length is always greater than 0 btrfs: don't try to delete RAID stripe-extents if we don't need to btrfs: selftests: correct RAID stripe-tree feature flag setting btrfs: add io_uring interface for encoded writes btrfs: remove the unused locked_folio parameter from btrfs_cleanup_ordered_extents() btrfs: add extra error messages for delalloc range related errors btrfs: subpage: dump the involved bitmap when ASSERT() failed btrfs: subpage: fix the bitmap dump of the locked flags btrfs: do proper folio cleanup when run_delalloc_nocow() failed ...
2 parents 1851bcc + 9d0c23d commit 0eb4aaa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+3748
-1340
lines changed

fs/btrfs/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,4 @@ btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
4444
tests/extent-buffer-tests.o tests/btrfs-tests.o \
4545
tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o \
4646
tests/free-space-tree-tests.o tests/extent-map-tests.o \
47-
tests/raid-stripe-tree-tests.o
47+
tests/raid-stripe-tree-tests.o tests/delayed-refs-tests.o

fs/btrfs/async-thread.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ enum {
1818
};
1919

2020
#define NO_THRESHOLD (-1)
21-
#define DFT_THRESHOLD (32)
21+
#define DEFAULT_THRESHOLD (32)
2222

2323
struct btrfs_workqueue {
2424
struct workqueue_struct *normal_wq;
@@ -94,9 +94,9 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
9494

9595
ret->limit_active = limit_active;
9696
if (thresh == 0)
97-
thresh = DFT_THRESHOLD;
97+
thresh = DEFAULT_THRESHOLD;
9898
/* For low threshold, disabling threshold is a better choice */
99-
if (thresh < DFT_THRESHOLD) {
99+
if (thresh < DEFAULT_THRESHOLD) {
100100
ret->current_active = limit_active;
101101
ret->thresh = NO_THRESHOLD;
102102
} else {

fs/btrfs/backref.c

Lines changed: 63 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,21 @@ static int prelim_ref_compare(const struct prelim_ref *ref1,
250250
return 0;
251251
}
252252

253+
static int prelim_ref_rb_add_cmp(const struct rb_node *new,
254+
const struct rb_node *exist)
255+
{
256+
const struct prelim_ref *ref_new =
257+
rb_entry(new, struct prelim_ref, rbnode);
258+
const struct prelim_ref *ref_exist =
259+
rb_entry(exist, struct prelim_ref, rbnode);
260+
261+
/*
262+
* prelim_ref_compare() expects the first parameter as the existing one,
263+
* different from the rb_find_add_cached() order.
264+
*/
265+
return prelim_ref_compare(ref_exist, ref_new);
266+
}
267+
253268
static void update_share_count(struct share_check *sc, int oldcount,
254269
int newcount, const struct prelim_ref *newref)
255270
{
@@ -278,55 +293,39 @@ static void prelim_ref_insert(const struct btrfs_fs_info *fs_info,
278293
struct share_check *sc)
279294
{
280295
struct rb_root_cached *root;
281-
struct rb_node **p;
282-
struct rb_node *parent = NULL;
283-
struct prelim_ref *ref;
284-
int result;
285-
bool leftmost = true;
296+
struct rb_node *exist;
286297

287298
root = &preftree->root;
288-
p = &root->rb_root.rb_node;
299+
exist = rb_find_add_cached(&newref->rbnode, root, prelim_ref_rb_add_cmp);
300+
if (exist) {
301+
struct prelim_ref *ref = rb_entry(exist, struct prelim_ref, rbnode);
302+
/* Identical refs, merge them and free @newref */
303+
struct extent_inode_elem *eie = ref->inode_list;
289304

290-
while (*p) {
291-
parent = *p;
292-
ref = rb_entry(parent, struct prelim_ref, rbnode);
293-
result = prelim_ref_compare(ref, newref);
294-
if (result < 0) {
295-
p = &(*p)->rb_left;
296-
} else if (result > 0) {
297-
p = &(*p)->rb_right;
298-
leftmost = false;
299-
} else {
300-
/* Identical refs, merge them and free @newref */
301-
struct extent_inode_elem *eie = ref->inode_list;
302-
303-
while (eie && eie->next)
304-
eie = eie->next;
305+
while (eie && eie->next)
306+
eie = eie->next;
305307

306-
if (!eie)
307-
ref->inode_list = newref->inode_list;
308-
else
309-
eie->next = newref->inode_list;
310-
trace_btrfs_prelim_ref_merge(fs_info, ref, newref,
311-
preftree->count);
312-
/*
313-
* A delayed ref can have newref->count < 0.
314-
* The ref->count is updated to follow any
315-
* BTRFS_[ADD|DROP]_DELAYED_REF actions.
316-
*/
317-
update_share_count(sc, ref->count,
318-
ref->count + newref->count, newref);
319-
ref->count += newref->count;
320-
free_pref(newref);
321-
return;
322-
}
308+
if (!eie)
309+
ref->inode_list = newref->inode_list;
310+
else
311+
eie->next = newref->inode_list;
312+
trace_btrfs_prelim_ref_merge(fs_info, ref, newref,
313+
preftree->count);
314+
/*
315+
* A delayed ref can have newref->count < 0.
316+
* The ref->count is updated to follow any
317+
* BTRFS_[ADD|DROP]_DELAYED_REF actions.
318+
*/
319+
update_share_count(sc, ref->count,
320+
ref->count + newref->count, newref);
321+
ref->count += newref->count;
322+
free_pref(newref);
323+
return;
323324
}
324325

325326
update_share_count(sc, 0, newref->count, newref);
326327
preftree->count++;
327328
trace_btrfs_prelim_ref_insert(fs_info, newref, NULL, preftree->count);
328-
rb_link_node(&newref->rbnode, parent, p);
329-
rb_insert_color_cached(&newref->rbnode, root, leftmost);
330329
}
331330

332331
/*
@@ -3022,9 +3021,6 @@ void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info,
30223021
cache->rb_root = RB_ROOT;
30233022
for (i = 0; i < BTRFS_MAX_LEVEL; i++)
30243023
INIT_LIST_HEAD(&cache->pending[i]);
3025-
INIT_LIST_HEAD(&cache->changed);
3026-
INIT_LIST_HEAD(&cache->detached);
3027-
INIT_LIST_HEAD(&cache->leaves);
30283024
INIT_LIST_HEAD(&cache->pending_edge);
30293025
INIT_LIST_HEAD(&cache->useless_node);
30303026
cache->fs_info = fs_info;
@@ -3132,29 +3128,17 @@ void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
31323128
void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
31333129
struct btrfs_backref_node *node)
31343130
{
3135-
struct btrfs_backref_node *upper;
31363131
struct btrfs_backref_edge *edge;
31373132

31383133
if (!node)
31393134
return;
31403135

3141-
BUG_ON(!node->lowest && !node->detached);
31423136
while (!list_empty(&node->upper)) {
31433137
edge = list_entry(node->upper.next, struct btrfs_backref_edge,
31443138
list[LOWER]);
3145-
upper = edge->node[UPPER];
31463139
list_del(&edge->list[LOWER]);
31473140
list_del(&edge->list[UPPER]);
31483141
btrfs_backref_free_edge(cache, edge);
3149-
3150-
/*
3151-
* Add the node to leaf node list if no other child block
3152-
* cached.
3153-
*/
3154-
if (list_empty(&upper->lower)) {
3155-
list_add_tail(&upper->lower, &cache->leaves);
3156-
upper->lowest = 1;
3157-
}
31583142
}
31593143

31603144
btrfs_backref_drop_node(cache, node);
@@ -3166,33 +3150,13 @@ void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
31663150
void btrfs_backref_release_cache(struct btrfs_backref_cache *cache)
31673151
{
31683152
struct btrfs_backref_node *node;
3169-
int i;
3170-
3171-
while (!list_empty(&cache->detached)) {
3172-
node = list_entry(cache->detached.next,
3173-
struct btrfs_backref_node, list);
3174-
btrfs_backref_cleanup_node(cache, node);
3175-
}
31763153

3177-
while (!list_empty(&cache->leaves)) {
3178-
node = list_entry(cache->leaves.next,
3179-
struct btrfs_backref_node, lower);
3154+
while ((node = rb_entry_safe(rb_first(&cache->rb_root),
3155+
struct btrfs_backref_node, rb_node)))
31803156
btrfs_backref_cleanup_node(cache, node);
3181-
}
31823157

3183-
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
3184-
while (!list_empty(&cache->pending[i])) {
3185-
node = list_first_entry(&cache->pending[i],
3186-
struct btrfs_backref_node,
3187-
list);
3188-
btrfs_backref_cleanup_node(cache, node);
3189-
}
3190-
}
31913158
ASSERT(list_empty(&cache->pending_edge));
31923159
ASSERT(list_empty(&cache->useless_node));
3193-
ASSERT(list_empty(&cache->changed));
3194-
ASSERT(list_empty(&cache->detached));
3195-
ASSERT(RB_EMPTY_ROOT(&cache->rb_root));
31963160
ASSERT(!cache->nr_nodes);
31973161
ASSERT(!cache->nr_edges);
31983162
}
@@ -3316,8 +3280,12 @@ static int handle_indirect_tree_backref(struct btrfs_trans_handle *trans,
33163280
root = btrfs_get_fs_root(fs_info, ref_key->offset, false);
33173281
if (IS_ERR(root))
33183282
return PTR_ERR(root);
3319-
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
3320-
cur->cowonly = 1;
3283+
3284+
/* We shouldn't be using backref cache for non-shareable roots. */
3285+
if (unlikely(!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))) {
3286+
btrfs_put_root(root);
3287+
return -EUCLEAN;
3288+
}
33213289

33223290
if (btrfs_root_level(&root->root_item) == cur->level) {
33233291
/* Tree root */
@@ -3403,8 +3371,15 @@ static int handle_indirect_tree_backref(struct btrfs_trans_handle *trans,
34033371
goto out;
34043372
}
34053373
upper->owner = btrfs_header_owner(eb);
3406-
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
3407-
upper->cowonly = 1;
3374+
3375+
/* We shouldn't be using backref cache for non shareable roots. */
3376+
if (unlikely(!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))) {
3377+
btrfs_put_root(root);
3378+
btrfs_backref_free_edge(cache, edge);
3379+
btrfs_backref_free_node(cache, upper);
3380+
ret = -EUCLEAN;
3381+
goto out;
3382+
}
34083383

34093384
/*
34103385
* If we know the block isn't shared we can avoid
@@ -3595,15 +3570,9 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
35953570

35963571
ASSERT(start->checked);
35973572

3598-
/* Insert this node to cache if it's not COW-only */
3599-
if (!start->cowonly) {
3600-
rb_node = rb_simple_insert(&cache->rb_root, start->bytenr,
3601-
&start->rb_node);
3602-
if (rb_node)
3603-
btrfs_backref_panic(cache->fs_info, start->bytenr,
3604-
-EEXIST);
3605-
list_add_tail(&start->lower, &cache->leaves);
3606-
}
3573+
rb_node = rb_simple_insert(&cache->rb_root, start->bytenr, &start->rb_node);
3574+
if (rb_node)
3575+
btrfs_backref_panic(cache->fs_info, start->bytenr, -EEXIST);
36073576

36083577
/*
36093578
* Use breadth first search to iterate all related edges.
@@ -3642,11 +3611,6 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
36423611
* parents have already been linked.
36433612
*/
36443613
if (!RB_EMPTY_NODE(&upper->rb_node)) {
3645-
if (upper->lowest) {
3646-
list_del_init(&upper->lower);
3647-
upper->lowest = 0;
3648-
}
3649-
36503614
list_add_tail(&edge->list[UPPER], &upper->lower);
36513615
continue;
36523616
}
@@ -3657,23 +3621,13 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
36573621
return -EUCLEAN;
36583622
}
36593623

3660-
/* Sanity check, COW-only node has non-COW-only parent */
3661-
if (start->cowonly != upper->cowonly) {
3662-
ASSERT(0);
3624+
rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr,
3625+
&upper->rb_node);
3626+
if (unlikely(rb_node)) {
3627+
btrfs_backref_panic(cache->fs_info, upper->bytenr, -EEXIST);
36633628
return -EUCLEAN;
36643629
}
36653630

3666-
/* Only cache non-COW-only (subvolume trees) tree blocks */
3667-
if (!upper->cowonly) {
3668-
rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr,
3669-
&upper->rb_node);
3670-
if (rb_node) {
3671-
btrfs_backref_panic(cache->fs_info,
3672-
upper->bytenr, -EEXIST);
3673-
return -EUCLEAN;
3674-
}
3675-
}
3676-
36773631
list_add_tail(&edge->list[UPPER], &upper->lower);
36783632

36793633
/*

fs/btrfs/backref.h

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,12 @@ struct btrfs_backref_node {
318318
u64 bytenr;
319319
}; /* Use rb_simple_node for search/insert */
320320

321+
/*
322+
* This is a sanity check, whenever we COW a block we will update
323+
* new_bytenr with it's current location, and we will check this in
324+
* various places to validate that the cache makes sense, it shouldn't
325+
* be used for anything else.
326+
*/
321327
u64 new_bytenr;
322328
/* Objectid of tree block owner, can be not uptodate */
323329
u64 owner;
@@ -335,10 +341,6 @@ struct btrfs_backref_node {
335341
struct extent_buffer *eb;
336342
/* Level of the tree block */
337343
unsigned int level:8;
338-
/* Is the block in a non-shareable tree */
339-
unsigned int cowonly:1;
340-
/* 1 if no child node is in the cache */
341-
unsigned int lowest:1;
342344
/* Is the extent buffer locked */
343345
unsigned int locked:1;
344346
/* Has the block been processed */
@@ -391,12 +393,6 @@ struct btrfs_backref_cache {
391393
* level blocks may not reflect the new location
392394
*/
393395
struct list_head pending[BTRFS_MAX_LEVEL];
394-
/* List of backref nodes with no child node */
395-
struct list_head leaves;
396-
/* List of blocks that have been COWed in current transaction */
397-
struct list_head changed;
398-
/* List of detached backref node. */
399-
struct list_head detached;
400396

401397
u64 last_trans;
402398

fs/btrfs/bio.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,14 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
453453
(unsigned long)dev->bdev->bd_dev, btrfs_dev_name(dev),
454454
dev->devid, bio->bi_iter.bi_size);
455455

456+
/*
457+
* Track reads if tracking is enabled; ignore I/O operations before the
458+
* filesystem is fully initialized.
459+
*/
460+
if (dev->fs_devices->collect_fs_stats && bio_op(bio) == REQ_OP_READ && dev->fs_info)
461+
percpu_counter_add(&dev->fs_info->stats_read_blocks,
462+
bio->bi_iter.bi_size >> dev->fs_info->sectorsize_bits);
463+
456464
if (bio->bi_opf & REQ_BTRFS_CGROUP_PUNT)
457465
blkcg_punt_bio_submit(bio);
458466
else
@@ -725,8 +733,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
725733
bio->bi_opf |= REQ_OP_ZONE_APPEND;
726734
}
727735

728-
if (is_data_bbio(bbio) && bioc &&
729-
btrfs_need_stripe_tree_update(bioc->fs_info, bioc->map_type)) {
736+
if (is_data_bbio(bbio) && bioc && bioc->use_rst) {
730737
/*
731738
* No locking for the list update, as we only add to
732739
* the list in the I/O submission path, and list

0 commit comments

Comments
 (0)