Skip to content

Commit ec4edd7

Browse files
author
Kent Overstreet
committed
bcachefs: Prep work for variable size btree node buffers
bcachefs btree nodes are big - typically 256k - and btree roots are pinned in memory. As we're now up to 18 btrees, we now have significant memory overhead in mostly empty btree roots. And in the future we're going to start enforcing that certain btree node boundaries exist, to solve lock contention issues - analagous to XFS's AGIs. Thus, we need to start allocating smaller btree node buffers when we can. This patch changes code that refers to the filesystem constant c->opts.btree_node_size to refer to the btree node buffer size - btree_buf_bytes() - where appropriate. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
1 parent 2acc59d commit ec4edd7

18 files changed

+87
-97
lines changed

fs/bcachefs/backpointers.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
560560

561561
si_meminfo(&i);
562562
mem_bytes = i.totalram * i.mem_unit;
563-
return div_u64(mem_bytes >> 1, btree_bytes(c));
563+
return div_u64(mem_bytes >> 1, c->opts.btree_node_size);
564564
}
565565

566566
static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,

fs/bcachefs/backpointers.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H
33
#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H
44

5+
#include "btree_cache.h"
56
#include "btree_iter.h"
67
#include "btree_update.h"
78
#include "buckets.h"

fs/bcachefs/bcachefs.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,11 +1204,6 @@ static inline unsigned block_sectors(const struct bch_fs *c)
12041204
return c->opts.block_size >> 9;
12051205
}
12061206

1207-
static inline size_t btree_sectors(const struct bch_fs *c)
1208-
{
1209-
return c->opts.btree_node_size >> 9;
1210-
}
1211-
12121207
static inline bool btree_id_cached(const struct bch_fs *c, enum btree_id btree)
12131208
{
12141209
return c->btree_key_cache_btrees & (1U << btree);

fs/bcachefs/bset.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -823,13 +823,12 @@ void bch2_bset_init_first(struct btree *b, struct bset *i)
823823
set_btree_bset(b, t, i);
824824
}
825825

826-
void bch2_bset_init_next(struct bch_fs *c, struct btree *b,
827-
struct btree_node_entry *bne)
826+
void bch2_bset_init_next(struct btree *b, struct btree_node_entry *bne)
828827
{
829828
struct bset *i = &bne->keys;
830829
struct bset_tree *t;
831830

832-
BUG_ON(bset_byte_offset(b, bne) >= btree_bytes(c));
831+
BUG_ON(bset_byte_offset(b, bne) >= btree_buf_bytes(b));
833832
BUG_ON((void *) bne < (void *) btree_bkey_last(b, bset_tree_last(b)));
834833
BUG_ON(b->nsets >= MAX_BSETS);
835834

fs/bcachefs/bset.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -264,8 +264,7 @@ static inline struct bset *bset_next_set(struct btree *b,
264264
void bch2_btree_keys_init(struct btree *);
265265

266266
void bch2_bset_init_first(struct btree *, struct bset *);
267-
void bch2_bset_init_next(struct bch_fs *, struct btree *,
268-
struct btree_node_entry *);
267+
void bch2_bset_init_next(struct btree *, struct btree_node_entry *);
269268
void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool);
270269

271270
void bch2_bset_insert(struct btree *, struct btree_node_iter *,

fs/bcachefs/btree_cache.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b)
6060

6161
clear_btree_node_just_written(b);
6262

63-
kvpfree(b->data, btree_bytes(c));
63+
kvpfree(b->data, btree_buf_bytes(b));
6464
b->data = NULL;
6565
#ifdef __KERNEL__
6666
kvfree(b->aux_data);
@@ -94,7 +94,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
9494
{
9595
BUG_ON(b->data || b->aux_data);
9696

97-
b->data = kvpmalloc(btree_bytes(c), gfp);
97+
b->data = kvpmalloc(btree_buf_bytes(b), gfp);
9898
if (!b->data)
9999
return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
100100
#ifdef __KERNEL__
@@ -107,7 +107,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
107107
b->aux_data = NULL;
108108
#endif
109109
if (!b->aux_data) {
110-
kvpfree(b->data, btree_bytes(c));
110+
kvpfree(b->data, btree_buf_bytes(b));
111111
b->data = NULL;
112112
return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
113113
}
@@ -126,7 +126,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
126126
bkey_btree_ptr_init(&b->key);
127127
INIT_LIST_HEAD(&b->list);
128128
INIT_LIST_HEAD(&b->write_blocked);
129-
b->byte_order = ilog2(btree_bytes(c));
129+
b->byte_order = ilog2(c->opts.btree_node_size);
130130
return b;
131131
}
132132

@@ -408,7 +408,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
408408
if (c->verify_data)
409409
list_move(&c->verify_data->list, &bc->live);
410410

411-
kvpfree(c->verify_ondisk, btree_bytes(c));
411+
kvpfree(c->verify_ondisk, c->opts.btree_node_size);
412412

413413
for (i = 0; i < btree_id_nr_alive(c); i++) {
414414
struct btree_root *r = bch2_btree_id_root(c, i);
@@ -1192,7 +1192,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struc
11921192
" failed unpacked %zu\n",
11931193
b->unpack_fn_len,
11941194
b->nr.live_u64s * sizeof(u64),
1195-
btree_bytes(c) - sizeof(struct btree_node),
1195+
btree_buf_bytes(b) - sizeof(struct btree_node),
11961196
b->nr.live_u64s * 100 / btree_max_u64s(c),
11971197
b->sib_u64s[0],
11981198
b->sib_u64s[1],

fs/bcachefs/btree_cache.h

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,22 +74,27 @@ static inline bool btree_node_hashed(struct btree *b)
7474
_iter = 0; _iter < (_tbl)->size; _iter++) \
7575
rht_for_each_entry_rcu((_b), (_pos), _tbl, _iter, hash)
7676

77-
static inline size_t btree_bytes(struct bch_fs *c)
77+
static inline size_t btree_buf_bytes(const struct btree *b)
7878
{
79-
return c->opts.btree_node_size;
79+
return 1UL << b->byte_order;
8080
}
8181

82-
static inline size_t btree_max_u64s(struct bch_fs *c)
82+
static inline size_t btree_buf_max_u64s(const struct btree *b)
8383
{
84-
return (btree_bytes(c) - sizeof(struct btree_node)) / sizeof(u64);
84+
return (btree_buf_bytes(b) - sizeof(struct btree_node)) / sizeof(u64);
8585
}
8686

87-
static inline size_t btree_pages(struct bch_fs *c)
87+
static inline size_t btree_max_u64s(const struct bch_fs *c)
8888
{
89-
return btree_bytes(c) / PAGE_SIZE;
89+
return (c->opts.btree_node_size - sizeof(struct btree_node)) / sizeof(u64);
9090
}
9191

92-
static inline unsigned btree_blocks(struct bch_fs *c)
92+
static inline size_t btree_sectors(const struct bch_fs *c)
93+
{
94+
return c->opts.btree_node_size >> SECTOR_SHIFT;
95+
}
96+
97+
static inline unsigned btree_blocks(const struct bch_fs *c)
9398
{
9499
return btree_sectors(c) >> c->block_bits;
95100
}

fs/bcachefs/btree_io.c

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
112112
unsigned flags = memalloc_nofs_save();
113113
void *p;
114114

115-
BUG_ON(size > btree_bytes(c));
115+
BUG_ON(size > c->opts.btree_node_size);
116116

117117
*used_mempool = false;
118118
p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
@@ -174,8 +174,8 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
174174

175175
ptrs = ptrs_end = ((void *) new_whiteouts + bytes);
176176

177-
for (k = unwritten_whiteouts_start(c, b);
178-
k != unwritten_whiteouts_end(c, b);
177+
for (k = unwritten_whiteouts_start(b);
178+
k != unwritten_whiteouts_end(b);
179179
k = bkey_p_next(k))
180180
*--ptrs = k;
181181

@@ -192,7 +192,7 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
192192
verify_no_dups(b, new_whiteouts,
193193
(void *) ((u64 *) new_whiteouts + b->whiteout_u64s));
194194

195-
memcpy_u64s(unwritten_whiteouts_start(c, b),
195+
memcpy_u64s(unwritten_whiteouts_start(b),
196196
new_whiteouts, b->whiteout_u64s);
197197

198198
btree_bounce_free(c, bytes, used_mempool, new_whiteouts);
@@ -313,7 +313,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
313313
}
314314

315315
bytes = sorting_entire_node
316-
? btree_bytes(c)
316+
? btree_buf_bytes(b)
317317
: __vstruct_bytes(struct btree_node, u64s);
318318

319319
out = btree_bounce_alloc(c, bytes, &used_mempool);
@@ -338,7 +338,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
338338
if (sorting_entire_node) {
339339
u64s = le16_to_cpu(out->keys.u64s);
340340

341-
BUG_ON(bytes != btree_bytes(c));
341+
BUG_ON(bytes != btree_buf_bytes(b));
342342

343343
/*
344344
* Our temporary buffer is the same size as the btree node's
@@ -502,7 +502,7 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
502502

503503
bne = want_new_bset(c, b);
504504
if (bne)
505-
bch2_bset_init_next(c, b, bne);
505+
bch2_bset_init_next(b, bne);
506506

507507
bch2_btree_build_aux_trees(b);
508508

@@ -1160,7 +1160,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
11601160
ptr_written, b->written);
11611161
} else {
11621162
for (bne = write_block(b);
1163-
bset_byte_offset(b, bne) < btree_bytes(c);
1163+
bset_byte_offset(b, bne) < btree_buf_bytes(b);
11641164
bne = (void *) bne + block_bytes(c))
11651165
btree_err_on(bne->keys.seq == b->data->keys.seq &&
11661166
!bch2_journal_seq_is_blacklisted(c,
@@ -1172,7 +1172,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
11721172
"found bset signature after last bset");
11731173
}
11741174

1175-
sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool);
1175+
sorted = btree_bounce_alloc(c, btree_buf_bytes(b), &used_mempool);
11761176
sorted->keys.u64s = 0;
11771177

11781178
set_btree_bset(b, b->set, &b->data->keys);
@@ -1188,7 +1188,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
11881188

11891189
BUG_ON(b->nr.live_u64s != u64s);
11901190

1191-
btree_bounce_free(c, btree_bytes(c), used_mempool, sorted);
1191+
btree_bounce_free(c, btree_buf_bytes(b), used_mempool, sorted);
11921192

11931193
if (updated_range)
11941194
bch2_btree_node_drop_keys_outside_node(b);
@@ -1284,7 +1284,7 @@ static void btree_node_read_work(struct work_struct *work)
12841284
rb->have_ioref = bch2_dev_get_ioref(ca, READ);
12851285
bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META);
12861286
bio->bi_iter.bi_sector = rb->pick.ptr.offset;
1287-
bio->bi_iter.bi_size = btree_bytes(c);
1287+
bio->bi_iter.bi_size = btree_buf_bytes(b);
12881288

12891289
if (rb->have_ioref) {
12901290
bio_set_dev(bio, ca->disk_sb.bdev);
@@ -1512,7 +1512,7 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done)
15121512
}
15131513

15141514
if (best >= 0) {
1515-
memcpy(b->data, ra->buf[best], btree_bytes(c));
1515+
memcpy(b->data, ra->buf[best], btree_buf_bytes(b));
15161516
ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error);
15171517
} else {
15181518
ret = -1;
@@ -1578,7 +1578,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
15781578
for (i = 0; i < ra->nr; i++) {
15791579
ra->buf[i] = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS);
15801580
ra->bio[i] = bio_alloc_bioset(NULL,
1581-
buf_pages(ra->buf[i], btree_bytes(c)),
1581+
buf_pages(ra->buf[i], btree_buf_bytes(b)),
15821582
REQ_OP_READ|REQ_SYNC|REQ_META,
15831583
GFP_NOFS,
15841584
&c->btree_bio);
@@ -1598,7 +1598,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
15981598
rb->pick = pick;
15991599
rb->bio.bi_iter.bi_sector = pick.ptr.offset;
16001600
rb->bio.bi_end_io = btree_node_read_all_replicas_endio;
1601-
bch2_bio_map(&rb->bio, ra->buf[i], btree_bytes(c));
1601+
bch2_bio_map(&rb->bio, ra->buf[i], btree_buf_bytes(b));
16021602

16031603
if (rb->have_ioref) {
16041604
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
@@ -1665,7 +1665,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
16651665
ca = bch_dev_bkey_exists(c, pick.ptr.dev);
16661666

16671667
bio = bio_alloc_bioset(NULL,
1668-
buf_pages(b->data, btree_bytes(c)),
1668+
buf_pages(b->data, btree_buf_bytes(b)),
16691669
REQ_OP_READ|REQ_SYNC|REQ_META,
16701670
GFP_NOFS,
16711671
&c->btree_bio);
@@ -1679,7 +1679,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
16791679
INIT_WORK(&rb->work, btree_node_read_work);
16801680
bio->bi_iter.bi_sector = pick.ptr.offset;
16811681
bio->bi_end_io = btree_node_read_endio;
1682-
bch2_bio_map(bio, b->data, btree_bytes(c));
1682+
bch2_bio_map(bio, b->data, btree_buf_bytes(b));
16831683

16841684
if (rb->have_ioref) {
16851685
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
@@ -2074,8 +2074,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
20742074
i->u64s = 0;
20752075

20762076
sort_iter_add(&sort_iter.iter,
2077-
unwritten_whiteouts_start(c, b),
2078-
unwritten_whiteouts_end(c, b));
2077+
unwritten_whiteouts_start(b),
2078+
unwritten_whiteouts_end(b));
20792079
SET_BSET_SEPARATE_WHITEOUTS(i, false);
20802080

20812081
b->whiteout_u64s = 0;
@@ -2251,7 +2251,7 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
22512251

22522252
bne = want_new_bset(c, b);
22532253
if (bne)
2254-
bch2_bset_init_next(c, b, bne);
2254+
bch2_bset_init_next(b, bne);
22552255

22562256
bch2_btree_build_aux_trees(b);
22572257

fs/bcachefs/btree_trans_commit.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans,
139139
EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
140140
EBUG_ON(bpos_lt(insert->k.p, b->data->min_key));
141141
EBUG_ON(bpos_gt(insert->k.p, b->data->max_key));
142-
EBUG_ON(insert->k.u64s >
143-
bch_btree_keys_u64s_remaining(trans->c, b));
142+
EBUG_ON(insert->k.u64s > bch2_btree_keys_u64s_remaining(b));
144143
EBUG_ON(!b->c.level && !bpos_eq(insert->k.p, path->pos));
145144

146145
k = bch2_btree_node_iter_peek_all(node_iter, b);
@@ -160,7 +159,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans,
160159
k->type = KEY_TYPE_deleted;
161160

162161
if (k->needs_whiteout)
163-
push_whiteout(trans->c, b, insert->k.p);
162+
push_whiteout(b, insert->k.p);
164163
k->needs_whiteout = false;
165164

166165
if (k >= btree_bset_last(b)->start) {
@@ -348,9 +347,7 @@ static noinline void journal_transaction_name(struct btree_trans *trans)
348347
static inline int btree_key_can_insert(struct btree_trans *trans,
349348
struct btree *b, unsigned u64s)
350349
{
351-
struct bch_fs *c = trans->c;
352-
353-
if (!bch2_btree_node_insert_fits(c, b, u64s))
350+
if (!bch2_btree_node_insert_fits(b, u64s))
354351
return -BCH_ERR_btree_insert_btree_node_full;
355352

356353
return 0;

fs/bcachefs/btree_update_interior.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ static bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
159159
{
160160
size_t u64s = btree_node_u64s_with_format(nr, &b->format, new_f);
161161

162-
return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c);
162+
return __vstruct_bytes(struct btree_node, u64s) < btree_buf_bytes(b);
163163
}
164164

165165
/* Btree node freeing/allocation: */
@@ -1097,7 +1097,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
10971097
* Always check for space for two keys, even if we won't have to
10981098
* split at prior level - it might have been a merge instead:
10991099
*/
1100-
if (bch2_btree_node_insert_fits(c, path->l[update_level].b,
1100+
if (bch2_btree_node_insert_fits(path->l[update_level].b,
11011101
BKEY_BTREE_PTR_U64s_MAX * 2))
11021102
break;
11031103

@@ -1401,7 +1401,7 @@ static void __btree_split_node(struct btree_update *as,
14011401

14021402
unsigned u64s = nr_keys[i].nr_keys * n[i]->data->format.key_u64s +
14031403
nr_keys[i].val_u64s;
1404-
if (__vstruct_bytes(struct btree_node, u64s) > btree_bytes(as->c))
1404+
if (__vstruct_bytes(struct btree_node, u64s) > btree_buf_bytes(b))
14051405
n[i]->data->format = b->format;
14061406

14071407
btree_node_set_format(n[i], n[i]->data->format);
@@ -1703,7 +1703,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
17031703

17041704
bch2_btree_node_prep_for_write(trans, path, b);
17051705

1706-
if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) {
1706+
if (!bch2_btree_node_insert_fits(b, bch2_keylist_u64s(keys))) {
17071707
bch2_btree_node_unlock_write(trans, path, b);
17081708
goto split;
17091709
}

0 commit comments

Comments
 (0)