Skip to content

Commit f9299f8

Browse files
Daniel Vacekkdave
authored andcommitted
btrfs: index buffer_tree using node size
So far we've been deriving the buffer tree index using the sector size. But each extent buffer covers multiple sectors. This makes the buffer tree rather sparse. For example the typical and quite common configuration uses sector size of 4KiB and node size of 16KiB. In this case it means the buffer tree is using up to the maximum of 25% of it's slots. Or in other words at least 75% of the tree slots are wasted as never used. We can score significant memory savings on the required tree nodes by indexing the tree using the node size instead. As a result far less slots are wasted and the tree can now use up to all 100% of it's slots this way. Note: This works even with unaligned tree blocks as we can still get unique index by doing eb->start >> nodesize_shift. Getting some stats from running fio write test, there is a bit of variance. The values presented in the table below are medians from 5 test runs. The numbers are: - # of allocated ebs in the tree - # of leaf tree nodes - highest index in the tree (radix tree width)): ebs / leaves / Index | bare for-next | with fix ---------------------+--------------------+------------------- post mount | 16 / 11 / 10e5c | 16 / 10 / 4240 post test | 5810 / 891 / 11cfc | 4420 / 252 / 473a post rm | 574 / 300 / 10ef0 | 540 / 163 / 46e9 In this case (10GiB filesystem) the height of the tree is still 3 levels but the 4x width reduction is clearly visible as expected. But since the tree is more dense we can see the 54-72% reduction of leaf nodes. That's very close to ideal with this test. It means the tree is getting really dense with this kind of workload. Also, the fio results show no performance change. Reviewed-by: Qu Wenruo <wqu@suse.com> Signed-off-by: Daniel Vacek <neelx@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 74d91ab commit f9299f8

File tree

3 files changed

+18
-16
lines changed

3 files changed

+18
-16
lines changed

fs/btrfs/disk-io.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3397,6 +3397,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
33973397
fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
33983398

33993399
fs_info->nodesize = nodesize;
3400+
fs_info->nodesize_bits = ilog2(nodesize);
34003401
fs_info->sectorsize = sectorsize;
34013402
fs_info->sectorsize_bits = ilog2(sectorsize);
34023403
fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;

fs/btrfs/extent_io.c

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1803,7 +1803,7 @@ static noinline_for_stack bool lock_extent_buffer_for_io(struct extent_buffer *e
18031803
*/
18041804
spin_lock(&eb->refs_lock);
18051805
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
1806-
XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->sectorsize_bits);
1806+
XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->nodesize_bits);
18071807
unsigned long flags;
18081808

18091809
set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
@@ -1903,7 +1903,7 @@ static void set_btree_ioerr(struct extent_buffer *eb)
19031903
static void buffer_tree_set_mark(const struct extent_buffer *eb, xa_mark_t mark)
19041904
{
19051905
struct btrfs_fs_info *fs_info = eb->fs_info;
1906-
XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->sectorsize_bits);
1906+
XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->nodesize_bits);
19071907
unsigned long flags;
19081908

19091909
xas_lock_irqsave(&xas, flags);
@@ -1915,7 +1915,7 @@ static void buffer_tree_set_mark(const struct extent_buffer *eb, xa_mark_t mark)
19151915
static void buffer_tree_clear_mark(const struct extent_buffer *eb, xa_mark_t mark)
19161916
{
19171917
struct btrfs_fs_info *fs_info = eb->fs_info;
1918-
XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->sectorsize_bits);
1918+
XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->nodesize_bits);
19191919
unsigned long flags;
19201920

19211921
xas_lock_irqsave(&xas, flags);
@@ -2015,7 +2015,7 @@ static unsigned int buffer_tree_get_ebs_tag(struct btrfs_fs_info *fs_info,
20152015
rcu_read_lock();
20162016
while ((eb = find_get_eb(&xas, end, tag)) != NULL) {
20172017
if (!eb_batch_add(batch, eb)) {
2018-
*start = ((eb->start + eb->len) >> fs_info->sectorsize_bits);
2018+
*start = ((eb->start + eb->len) >> fs_info->nodesize_bits);
20192019
goto out;
20202020
}
20212021
}
@@ -2037,7 +2037,7 @@ static struct extent_buffer *find_extent_buffer_nolock(
20372037
struct btrfs_fs_info *fs_info, u64 start)
20382038
{
20392039
struct extent_buffer *eb;
2040-
unsigned long index = (start >> fs_info->sectorsize_bits);
2040+
unsigned long index = (start >> fs_info->nodesize_bits);
20412041

20422042
rcu_read_lock();
20432043
eb = xa_load(&fs_info->buffer_tree, index);
@@ -2143,8 +2143,8 @@ void btrfs_btree_wait_writeback_range(struct btrfs_fs_info *fs_info, u64 start,
21432143
u64 end)
21442144
{
21452145
struct eb_batch batch;
2146-
unsigned long start_index = (start >> fs_info->sectorsize_bits);
2147-
unsigned long end_index = (end >> fs_info->sectorsize_bits);
2146+
unsigned long start_index = (start >> fs_info->nodesize_bits);
2147+
unsigned long end_index = (end >> fs_info->nodesize_bits);
21482148

21492149
eb_batch_init(&batch);
21502150
while (start_index <= end_index) {
@@ -2180,7 +2180,7 @@ int btree_write_cache_pages(struct address_space *mapping,
21802180

21812181
eb_batch_init(&batch);
21822182
if (wbc->range_cyclic) {
2183-
index = ((mapping->writeback_index << PAGE_SHIFT) >> fs_info->sectorsize_bits);
2183+
index = ((mapping->writeback_index << PAGE_SHIFT) >> fs_info->nodesize_bits);
21842184
end = -1;
21852185

21862186
/*
@@ -2189,8 +2189,8 @@ int btree_write_cache_pages(struct address_space *mapping,
21892189
*/
21902190
scanned = (index == 0);
21912191
} else {
2192-
index = (wbc->range_start >> fs_info->sectorsize_bits);
2193-
end = (wbc->range_end >> fs_info->sectorsize_bits);
2192+
index = (wbc->range_start >> fs_info->nodesize_bits);
2193+
end = (wbc->range_end >> fs_info->nodesize_bits);
21942194

21952195
scanned = 1;
21962196
}
@@ -3070,7 +3070,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
30703070
eb->fs_info = fs_info;
30713071
again:
30723072
xa_lock_irq(&fs_info->buffer_tree);
3073-
exists = __xa_cmpxchg(&fs_info->buffer_tree, start >> fs_info->sectorsize_bits,
3073+
exists = __xa_cmpxchg(&fs_info->buffer_tree, start >> fs_info->nodesize_bits,
30743074
NULL, eb, GFP_NOFS);
30753075
if (xa_is_err(exists)) {
30763076
ret = xa_err(exists);
@@ -3387,7 +3387,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
33873387
again:
33883388
xa_lock_irq(&fs_info->buffer_tree);
33893389
existing_eb = __xa_cmpxchg(&fs_info->buffer_tree,
3390-
start >> fs_info->sectorsize_bits, NULL, eb,
3390+
start >> fs_info->nodesize_bits, NULL, eb,
33913391
GFP_NOFS);
33923392
if (xa_is_err(existing_eb)) {
33933393
ret = xa_err(existing_eb);
@@ -3490,7 +3490,7 @@ static int release_extent_buffer(struct extent_buffer *eb)
34903490
* in this case.
34913491
*/
34923492
xa_cmpxchg_irq(&fs_info->buffer_tree,
3493-
eb->start >> fs_info->sectorsize_bits, eb, NULL,
3493+
eb->start >> fs_info->nodesize_bits, eb, NULL,
34943494
GFP_ATOMIC);
34953495

34963496
btrfs_leak_debug_del_eb(eb);
@@ -4332,9 +4332,9 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
43324332
{
43334333
struct btrfs_fs_info *fs_info = folio_to_fs_info(folio);
43344334
struct extent_buffer *eb;
4335-
unsigned long start = (folio_pos(folio) >> fs_info->sectorsize_bits);
4335+
unsigned long start = (folio_pos(folio) >> fs_info->nodesize_bits);
43364336
unsigned long index = start;
4337-
unsigned long end = index + (PAGE_SIZE >> fs_info->sectorsize_bits) - 1;
4337+
unsigned long end = index + (PAGE_SIZE >> fs_info->nodesize_bits) - 1;
43384338
int ret;
43394339

43404340
xa_lock_irq(&fs_info->buffer_tree);

fs/btrfs/fs.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -773,7 +773,7 @@ struct btrfs_fs_info {
773773

774774
struct btrfs_delayed_root *delayed_root;
775775

776-
/* Entries are eb->start / sectorsize */
776+
/* Entries are eb->start >> nodesize_bits */
777777
struct xarray buffer_tree;
778778

779779
/* Next backup root to be overwritten */
@@ -805,6 +805,7 @@ struct btrfs_fs_info {
805805

806806
/* Cached block sizes */
807807
u32 nodesize;
808+
u32 nodesize_bits;
808809
u32 sectorsize;
809810
/* ilog2 of sectorsize, use to avoid 64bit division */
810811
u32 sectorsize_bits;

0 commit comments

Comments
 (0)