Skip to content

Commit b5f1b48

Browse files
committed
Merge tag 'bcachefs-2024-11-07' of git://evilpiepirate.org/bcachefs
Pull bcachefs fixes from Kent Overstreet: "Some trivial syzbot fixes, two more serious btree fixes found by looping single_devices.ktest small_nodes: - Topology error on split after merge, where we accidentaly picked the node being deleted for the pivot, resulting in an assertion pop - New nodes being preallocated were left on the freedlist, unlocked, resulting in them sometimes being accidentally freed: this dated from pre-cycle detector, when we could leave them locked. This should have resulted in more explosions and fireworks, but turned out to be surprisingly hard to hit because the preallocated nodes were being used right away. The fix for this is bigger than we'd like - reworking btree list handling was a bit invasive - but we've now got more assertions and it's well tested. - Also another mishandled transaction restart fix (in btree_node_prefetch) - we're almost done with those" * tag 'bcachefs-2024-11-07' of git://evilpiepirate.org/bcachefs: bcachefs: Fix UAF in __promote_alloc() error path bcachefs: Change OPT_STR max to be 1 less than the size of choices array bcachefs: btree_cache.freeable list fixes bcachefs: check the invalid parameter for perf test bcachefs: add check NULL return of bio_kmalloc in journal_read_bucket bcachefs: Ensure BCH_FS_may_go_rw is set before exiting recovery bcachefs: Fix topology errors on split after merge bcachefs: Ancient versions with bad bkey_formats are no longer supported bcachefs: Fix error handling in bch2_btree_node_prefetch() bcachefs: Fix null ptr deref in bucket_gen_get()
2 parents 9ea7eda + 8440da9 commit b5f1b48

14 files changed

+131
-78
lines changed

fs/bcachefs/bkey.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -643,7 +643,7 @@ int bch2_bkey_format_invalid(struct bch_fs *c,
643643
enum bch_validate_flags flags,
644644
struct printbuf *err)
645645
{
646-
unsigned i, bits = KEY_PACKED_BITS_START;
646+
unsigned bits = KEY_PACKED_BITS_START;
647647

648648
if (f->nr_fields != BKEY_NR_FIELDS) {
649649
prt_printf(err, "incorrect number of fields: got %u, should be %u",
@@ -655,9 +655,8 @@ int bch2_bkey_format_invalid(struct bch_fs *c,
655655
* Verify that the packed format can't represent fields larger than the
656656
* unpacked format:
657657
*/
658-
for (i = 0; i < f->nr_fields; i++) {
659-
if ((!c || c->sb.version_min >= bcachefs_metadata_version_snapshot) &&
660-
bch2_bkey_format_field_overflows(f, i)) {
658+
for (unsigned i = 0; i < f->nr_fields; i++) {
659+
if (bch2_bkey_format_field_overflows(f, i)) {
661660
unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
662661
u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
663662
unsigned packed_bits = min(64, f->bits_per_field[i]);

fs/bcachefs/btree_cache.c

Lines changed: 66 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,38 @@ static inline size_t btree_cache_can_free(struct btree_cache_list *list)
5959

6060
static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b)
6161
{
62+
BUG_ON(!list_empty(&b->list));
63+
6264
if (b->c.lock.readers)
63-
list_move(&b->list, &bc->freed_pcpu);
65+
list_add(&b->list, &bc->freed_pcpu);
6466
else
65-
list_move(&b->list, &bc->freed_nonpcpu);
67+
list_add(&b->list, &bc->freed_nonpcpu);
68+
}
69+
70+
static void __bch2_btree_node_to_freelist(struct btree_cache *bc, struct btree *b)
71+
{
72+
BUG_ON(!list_empty(&b->list));
73+
BUG_ON(!b->data);
74+
75+
bc->nr_freeable++;
76+
list_add(&b->list, &bc->freeable);
6677
}
6778

68-
static void btree_node_data_free(struct bch_fs *c, struct btree *b)
79+
void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
6980
{
7081
struct btree_cache *bc = &c->btree_cache;
7182

83+
mutex_lock(&bc->lock);
84+
__bch2_btree_node_to_freelist(bc, b);
85+
mutex_unlock(&bc->lock);
86+
87+
six_unlock_write(&b->c.lock);
88+
six_unlock_intent(&b->c.lock);
89+
}
90+
91+
static void __btree_node_data_free(struct btree_cache *bc, struct btree *b)
92+
{
93+
BUG_ON(!list_empty(&b->list));
7294
BUG_ON(btree_node_hashed(b));
7395

7496
/*
@@ -94,11 +116,17 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b)
94116
#endif
95117
b->aux_data = NULL;
96118

97-
bc->nr_freeable--;
98-
99119
btree_node_to_freedlist(bc, b);
100120
}
101121

122+
static void btree_node_data_free(struct btree_cache *bc, struct btree *b)
123+
{
124+
BUG_ON(list_empty(&b->list));
125+
list_del_init(&b->list);
126+
--bc->nr_freeable;
127+
__btree_node_data_free(bc, b);
128+
}
129+
102130
static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
103131
const void *obj)
104132
{
@@ -174,21 +202,10 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
174202

175203
bch2_btree_lock_init(&b->c, 0);
176204

177-
bc->nr_freeable++;
178-
list_add(&b->list, &bc->freeable);
205+
__bch2_btree_node_to_freelist(bc, b);
179206
return b;
180207
}
181208

182-
void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
183-
{
184-
mutex_lock(&c->btree_cache.lock);
185-
list_move(&b->list, &c->btree_cache.freeable);
186-
mutex_unlock(&c->btree_cache.lock);
187-
188-
six_unlock_write(&b->c.lock);
189-
six_unlock_intent(&b->c.lock);
190-
}
191-
192209
static inline bool __btree_node_pinned(struct btree_cache *bc, struct btree *b)
193210
{
194211
struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
@@ -236,29 +253,34 @@ void bch2_btree_cache_unpin(struct bch_fs *c)
236253

237254
/* Btree in memory cache - hash table */
238255

239-
void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
256+
void __bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
240257
{
241258
lockdep_assert_held(&bc->lock);
242-
int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
243259

260+
int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
244261
BUG_ON(ret);
245262

246263
/* Cause future lookups for this node to fail: */
247264
b->hash_val = 0;
248265

249266
if (b->c.btree_id < BTREE_ID_NR)
250267
--bc->nr_by_btree[b->c.btree_id];
268+
--bc->live[btree_node_pinned(b)].nr;
269+
list_del_init(&b->list);
270+
}
251271

252-
bc->live[btree_node_pinned(b)].nr--;
253-
bc->nr_freeable++;
254-
list_move(&b->list, &bc->freeable);
272+
void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
273+
{
274+
__bch2_btree_node_hash_remove(bc, b);
275+
__bch2_btree_node_to_freelist(bc, b);
255276
}
256277

257278
int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
258279
{
280+
BUG_ON(!list_empty(&b->list));
259281
BUG_ON(b->hash_val);
260-
b->hash_val = btree_ptr_hash_val(&b->key);
261282

283+
b->hash_val = btree_ptr_hash_val(&b->key);
262284
int ret = rhashtable_lookup_insert_fast(&bc->table, &b->hash,
263285
bch_btree_cache_params);
264286
if (ret)
@@ -270,10 +292,8 @@ int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
270292
bool p = __btree_node_pinned(bc, b);
271293
mod_bit(BTREE_NODE_pinned, &b->flags, p);
272294

273-
list_move_tail(&b->list, &bc->live[p].list);
295+
list_add_tail(&b->list, &bc->live[p].list);
274296
bc->live[p].nr++;
275-
276-
bc->nr_freeable--;
277297
return 0;
278298
}
279299

@@ -485,7 +505,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
485505
goto out;
486506

487507
if (!btree_node_reclaim(c, b, true)) {
488-
btree_node_data_free(c, b);
508+
btree_node_data_free(bc, b);
489509
six_unlock_write(&b->c.lock);
490510
six_unlock_intent(&b->c.lock);
491511
freed++;
@@ -501,10 +521,10 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
501521
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_access_bit]++;
502522
--touched;;
503523
} else if (!btree_node_reclaim(c, b, true)) {
504-
bch2_btree_node_hash_remove(bc, b);
524+
__bch2_btree_node_hash_remove(bc, b);
525+
__btree_node_data_free(bc, b);
505526

506527
freed++;
507-
btree_node_data_free(c, b);
508528
bc->nr_freed++;
509529

510530
six_unlock_write(&b->c.lock);
@@ -587,7 +607,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
587607
BUG_ON(btree_node_read_in_flight(b) ||
588608
btree_node_write_in_flight(b));
589609

590-
btree_node_data_free(c, b);
610+
btree_node_data_free(bc, b);
591611
}
592612

593613
BUG_ON(!bch2_journal_error(&c->journal) &&
@@ -786,8 +806,8 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
786806

787807
BUG_ON(!six_trylock_intent(&b->c.lock));
788808
BUG_ON(!six_trylock_write(&b->c.lock));
789-
got_node:
790809

810+
got_node:
791811
/*
792812
* btree_free() doesn't free memory; it sticks the node on the end of
793813
* the list. Check if there's any freed nodes there:
@@ -796,7 +816,12 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
796816
if (!btree_node_reclaim(c, b2, false)) {
797817
swap(b->data, b2->data);
798818
swap(b->aux_data, b2->aux_data);
819+
820+
list_del_init(&b2->list);
821+
--bc->nr_freeable;
799822
btree_node_to_freedlist(bc, b2);
823+
mutex_unlock(&bc->lock);
824+
800825
six_unlock_write(&b2->c.lock);
801826
six_unlock_intent(&b2->c.lock);
802827
goto got_mem;
@@ -810,11 +835,8 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
810835
goto err;
811836
}
812837

813-
mutex_lock(&bc->lock);
814-
bc->nr_freeable++;
815838
got_mem:
816-
mutex_unlock(&bc->lock);
817-
839+
BUG_ON(!list_empty(&b->list));
818840
BUG_ON(btree_node_hashed(b));
819841
BUG_ON(btree_node_dirty(b));
820842
BUG_ON(btree_node_write_in_flight(b));
@@ -845,7 +867,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
845867
if (bc->alloc_lock == current) {
846868
b2 = btree_node_cannibalize(c);
847869
clear_btree_node_just_written(b2);
848-
bch2_btree_node_hash_remove(bc, b2);
870+
__bch2_btree_node_hash_remove(bc, b2);
849871

850872
if (b) {
851873
swap(b->data, b2->data);
@@ -855,9 +877,9 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
855877
six_unlock_intent(&b2->c.lock);
856878
} else {
857879
b = b2;
858-
list_del_init(&b->list);
859880
}
860881

882+
BUG_ON(!list_empty(&b->list));
861883
mutex_unlock(&bc->lock);
862884

863885
trace_and_count(c, btree_cache_cannibalize, trans);
@@ -936,7 +958,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
936958
b->hash_val = 0;
937959

938960
mutex_lock(&bc->lock);
939-
list_add(&b->list, &bc->freeable);
961+
__bch2_btree_node_to_freelist(bc, b);
940962
mutex_unlock(&bc->lock);
941963

942964
six_unlock_write(&b->c.lock);
@@ -1312,9 +1334,12 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,
13121334

13131335
b = bch2_btree_node_fill(trans, path, k, btree_id,
13141336
level, SIX_LOCK_read, false);
1315-
if (!IS_ERR_OR_NULL(b))
1337+
int ret = PTR_ERR_OR_ZERO(b);
1338+
if (ret)
1339+
return ret;
1340+
if (b)
13161341
six_unlock_read(&b->c.lock);
1317-
return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b);
1342+
return 0;
13181343
}
13191344

13201345
void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
@@ -1353,7 +1378,7 @@ void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
13531378

13541379
mutex_lock(&bc->lock);
13551380
bch2_btree_node_hash_remove(bc, b);
1356-
btree_node_data_free(c, b);
1381+
btree_node_data_free(bc, b);
13571382
mutex_unlock(&bc->lock);
13581383
out:
13591384
six_unlock_write(&b->c.lock);

fs/bcachefs/btree_cache.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ void bch2_recalc_btree_reserve(struct bch_fs *);
1414

1515
void bch2_btree_node_to_freelist(struct bch_fs *, struct btree *);
1616

17+
void __bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
1718
void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
19+
1820
int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
1921
int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
2022
unsigned, enum btree_id);

fs/bcachefs/btree_node_scan.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
186186
.ptrs[0].type = 1 << BCH_EXTENT_ENTRY_ptr,
187187
.ptrs[0].offset = offset,
188188
.ptrs[0].dev = ca->dev_idx,
189-
.ptrs[0].gen = *bucket_gen(ca, sector_to_bucket(ca, offset)),
189+
.ptrs[0].gen = bucket_gen_get(ca, sector_to_bucket(ca, offset)),
190190
};
191191
rcu_read_unlock();
192192

fs/bcachefs/btree_update_interior.c

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -237,10 +237,6 @@ static void __btree_node_free(struct btree_trans *trans, struct btree *b)
237237
BUG_ON(b->will_make_reachable);
238238

239239
clear_btree_node_noevict(b);
240-
241-
mutex_lock(&c->btree_cache.lock);
242-
list_move(&b->list, &c->btree_cache.freeable);
243-
mutex_unlock(&c->btree_cache.lock);
244240
}
245241

246242
static void bch2_btree_node_free_inmem(struct btree_trans *trans,
@@ -252,12 +248,12 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
252248

253249
bch2_btree_node_lock_write_nofail(trans, path, &b->c);
254250

251+
__btree_node_free(trans, b);
252+
255253
mutex_lock(&c->btree_cache.lock);
256254
bch2_btree_node_hash_remove(&c->btree_cache, b);
257255
mutex_unlock(&c->btree_cache.lock);
258256

259-
__btree_node_free(trans, b);
260-
261257
six_unlock_write(&b->c.lock);
262258
mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED);
263259

@@ -289,7 +285,7 @@ static void bch2_btree_node_free_never_used(struct btree_update *as,
289285
clear_btree_node_need_write(b);
290286

291287
mutex_lock(&c->btree_cache.lock);
292-
bch2_btree_node_hash_remove(&c->btree_cache, b);
288+
__bch2_btree_node_hash_remove(&c->btree_cache, b);
293289
mutex_unlock(&c->btree_cache.lock);
294290

295291
BUG_ON(p->nr >= ARRAY_SIZE(p->b));
@@ -521,8 +517,7 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *
521517
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
522518
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
523519
__btree_node_free(trans, b);
524-
six_unlock_write(&b->c.lock);
525-
six_unlock_intent(&b->c.lock);
520+
bch2_btree_node_to_freelist(c, b);
526521
}
527522
}
528523
}
@@ -1434,14 +1429,24 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
14341429
}
14351430
}
14361431

1432+
static bool key_deleted_in_insert(struct keylist *insert_keys, struct bpos pos)
1433+
{
1434+
if (insert_keys)
1435+
for_each_keylist_key(insert_keys, k)
1436+
if (bkey_deleted(&k->k) && bpos_eq(k->k.p, pos))
1437+
return true;
1438+
return false;
1439+
}
1440+
14371441
/*
14381442
* Move keys from n1 (original replacement node, now lower node) to n2 (higher
14391443
* node)
14401444
*/
14411445
static void __btree_split_node(struct btree_update *as,
14421446
struct btree_trans *trans,
14431447
struct btree *b,
1444-
struct btree *n[2])
1448+
struct btree *n[2],
1449+
struct keylist *insert_keys)
14451450
{
14461451
struct bkey_packed *k;
14471452
struct bpos n1_pos = POS_MIN;
@@ -1476,7 +1481,8 @@ static void __btree_split_node(struct btree_update *as,
14761481
if (b->c.level &&
14771482
u64s < n1_u64s &&
14781483
u64s + k->u64s >= n1_u64s &&
1479-
bch2_key_deleted_in_journal(trans, b->c.btree_id, b->c.level, uk.p))
1484+
(bch2_key_deleted_in_journal(trans, b->c.btree_id, b->c.level, uk.p) ||
1485+
key_deleted_in_insert(insert_keys, uk.p)))
14801486
n1_u64s += k->u64s;
14811487

14821488
i = u64s >= n1_u64s;
@@ -1603,7 +1609,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
16031609
n[0] = n1 = bch2_btree_node_alloc(as, trans, b->c.level);
16041610
n[1] = n2 = bch2_btree_node_alloc(as, trans, b->c.level);
16051611

1606-
__btree_split_node(as, trans, b, n);
1612+
__btree_split_node(as, trans, b, n, keys);
16071613

16081614
if (keys) {
16091615
btree_split_insert_keys(as, trans, path, n1, keys);

0 commit comments

Comments
 (0)