Skip to content

Commit fee3e84

Browse files
committed
Merge tag 'bcachefs-2025-05-15' of git://evilpiepirate.org/bcachefs
Pull bcachefs fixes from Kent Overstreet: "The main user reported ones are: - Fix a btree iterator locking inconsistency that's been causing us to go emergency read-only in evacuate: "Fix broken btree_path lock invariants in next_node()" - Minor btree node cache reclaim tweak that should help with OOMs: don't set btree nodes as accessed on fill - Fix a bch2_bkey_clear_rebalance() issue that was causing rebalance to do needless work" * tag 'bcachefs-2025-05-15' of git://evilpiepirate.org/bcachefs: bcachefs: fix wrong arg to fsck_err() bcachefs: Fix missing commit in backpointer to missing target bcachefs: Fix accidental O(n^2) in fiemap bcachefs: Fix set_should_be_locked() call in peek_slot() bcachefs: Fix self deadlock bcachefs: Don't set btree nodes as accessed on fill bcachefs: Fix livelock in journal_entry_open() bcachefs: Fix broken btree_path lock invariants in next_node() bcachefs: Don't strip rebalance_opts from indirect extents
2 parents 4d0be1a + 9c09e59 commit fee3e84

File tree

9 files changed

+140
-66
lines changed

9 files changed

+140
-66
lines changed

fs/bcachefs/backpointers.c

Lines changed: 79 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,8 @@ static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans,
192192
static int backpointer_target_not_found(struct btree_trans *trans,
193193
struct bkey_s_c_backpointer bp,
194194
struct bkey_s_c target_k,
195-
struct bkey_buf *last_flushed)
195+
struct bkey_buf *last_flushed,
196+
bool commit)
196197
{
197198
struct bch_fs *c = trans->c;
198199
struct printbuf buf = PRINTBUF;
@@ -228,18 +229,77 @@ static int backpointer_target_not_found(struct btree_trans *trans,
228229
}
229230

230231
if (fsck_err(trans, backpointer_to_missing_ptr,
231-
"%s", buf.buf))
232+
"%s", buf.buf)) {
232233
ret = bch2_backpointer_del(trans, bp.k->p);
234+
if (ret || !commit)
235+
goto out;
236+
237+
/*
238+
* Normally, on transaction commit from inside a transaction,
239+
* we'll return -BCH_ERR_transaction_restart_nested, since a
240+
* transaction commit invalidates pointers given out by peek().
241+
*
242+
* However, since we're updating a write buffer btree, if we
243+
* return a transaction restart and loop we won't see that the
244+
* backpointer has been deleted without an additional write
245+
* buffer flush - and those are expensive.
246+
*
247+
* So we're relying on the caller immediately advancing to the
248+
* next backpointer and starting a new transaction immediately
249+
* after backpointer_get_key() returns NULL:
250+
*/
251+
ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
252+
}
253+
out:
233254
fsck_err:
234255
printbuf_exit(&buf);
235256
return ret;
236257
}
237258

238-
struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
239-
struct bkey_s_c_backpointer bp,
240-
struct btree_iter *iter,
241-
unsigned iter_flags,
242-
struct bkey_buf *last_flushed)
259+
static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans,
260+
struct bkey_s_c_backpointer bp,
261+
struct btree_iter *iter,
262+
struct bkey_buf *last_flushed,
263+
bool commit)
264+
{
265+
struct bch_fs *c = trans->c;
266+
267+
BUG_ON(!bp.v->level);
268+
269+
bch2_trans_node_iter_init(trans, iter,
270+
bp.v->btree_id,
271+
bp.v->pos,
272+
0,
273+
bp.v->level - 1,
274+
0);
275+
struct btree *b = bch2_btree_iter_peek_node(trans, iter);
276+
if (IS_ERR_OR_NULL(b))
277+
goto err;
278+
279+
BUG_ON(b->c.level != bp.v->level - 1);
280+
281+
if (extent_matches_bp(c, bp.v->btree_id, bp.v->level,
282+
bkey_i_to_s_c(&b->key), bp))
283+
return b;
284+
285+
if (btree_node_will_make_reachable(b)) {
286+
b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
287+
} else {
288+
int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key),
289+
last_flushed, commit);
290+
b = ret ? ERR_PTR(ret) : NULL;
291+
}
292+
err:
293+
bch2_trans_iter_exit(trans, iter);
294+
return b;
295+
}
296+
297+
static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans,
298+
struct bkey_s_c_backpointer bp,
299+
struct btree_iter *iter,
300+
unsigned iter_flags,
301+
struct bkey_buf *last_flushed,
302+
bool commit)
243303
{
244304
struct bch_fs *c = trans->c;
245305

@@ -277,10 +337,10 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
277337
bch2_trans_iter_exit(trans, iter);
278338

279339
if (!bp.v->level) {
280-
int ret = backpointer_target_not_found(trans, bp, k, last_flushed);
340+
int ret = backpointer_target_not_found(trans, bp, k, last_flushed, commit);
281341
return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
282342
} else {
283-
struct btree *b = bch2_backpointer_get_node(trans, bp, iter, last_flushed);
343+
struct btree *b = __bch2_backpointer_get_node(trans, bp, iter, last_flushed, commit);
284344
if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node))
285345
return bkey_s_c_null;
286346
if (IS_ERR_OR_NULL(b))
@@ -295,35 +355,16 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
295355
struct btree_iter *iter,
296356
struct bkey_buf *last_flushed)
297357
{
298-
struct bch_fs *c = trans->c;
299-
300-
BUG_ON(!bp.v->level);
301-
302-
bch2_trans_node_iter_init(trans, iter,
303-
bp.v->btree_id,
304-
bp.v->pos,
305-
0,
306-
bp.v->level - 1,
307-
0);
308-
struct btree *b = bch2_btree_iter_peek_node(trans, iter);
309-
if (IS_ERR_OR_NULL(b))
310-
goto err;
311-
312-
BUG_ON(b->c.level != bp.v->level - 1);
313-
314-
if (extent_matches_bp(c, bp.v->btree_id, bp.v->level,
315-
bkey_i_to_s_c(&b->key), bp))
316-
return b;
358+
return __bch2_backpointer_get_node(trans, bp, iter, last_flushed, true);
359+
}
317360

318-
if (btree_node_will_make_reachable(b)) {
319-
b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
320-
} else {
321-
int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key), last_flushed);
322-
b = ret ? ERR_PTR(ret) : NULL;
323-
}
324-
err:
325-
bch2_trans_iter_exit(trans, iter);
326-
return b;
361+
struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
362+
struct bkey_s_c_backpointer bp,
363+
struct btree_iter *iter,
364+
unsigned iter_flags,
365+
struct bkey_buf *last_flushed)
366+
{
367+
return __bch2_backpointer_get_key(trans, bp, iter, iter_flags, last_flushed, true);
327368
}
328369

329370
static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, struct bkey_s_c k,
@@ -521,7 +562,7 @@ static int check_bp_exists(struct btree_trans *trans,
521562
struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k);
522563

523564
struct bkey_s_c other_extent =
524-
bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL);
565+
__bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL, false);
525566
ret = bkey_err(other_extent);
526567
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
527568
ret = 0;

fs/bcachefs/btree_cache.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -852,7 +852,6 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
852852
b->sib_u64s[1] = 0;
853853
b->whiteout_u64s = 0;
854854
bch2_btree_keys_init(b);
855-
set_btree_node_accessed(b);
856855

857856
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
858857
start_time);
@@ -1286,6 +1285,10 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans,
12861285
six_unlock_read(&b->c.lock);
12871286
goto retry;
12881287
}
1288+
1289+
/* avoid atomic set bit if it's not needed: */
1290+
if (!btree_node_accessed(b))
1291+
set_btree_node_accessed(b);
12891292
}
12901293

12911294
/* XXX: waiting on IO with btree locks held: */
@@ -1301,10 +1304,6 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans,
13011304
prefetch(p + L1_CACHE_BYTES * 2);
13021305
}
13031306

1304-
/* avoid atomic set bit if it's not needed: */
1305-
if (!btree_node_accessed(b))
1306-
set_btree_node_accessed(b);
1307-
13081307
if (unlikely(btree_node_read_error(b))) {
13091308
six_unlock_read(&b->c.lock);
13101309
b = ERR_PTR(-BCH_ERR_btree_node_read_err_cached);

fs/bcachefs/btree_iter.c

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1971,6 +1971,12 @@ struct btree *bch2_btree_iter_next_node(struct btree_trans *trans, struct btree_
19711971
return NULL;
19721972
}
19731973

1974+
/*
1975+
* We don't correctly handle nodes with extra intent locks here:
1976+
* downgrade so we don't violate locking invariants
1977+
*/
1978+
bch2_btree_path_downgrade(trans, path);
1979+
19741980
if (!bch2_btree_node_relock(trans, path, path->level + 1)) {
19751981
__bch2_btree_path_unlock(trans, path);
19761982
path->l[path->level].b = ERR_PTR(-BCH_ERR_no_btree_node_relock);
@@ -2743,7 +2749,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre
27432749
ret = trans_maybe_inject_restart(trans, _RET_IP_);
27442750
if (unlikely(ret)) {
27452751
k = bkey_s_c_err(ret);
2746-
goto out_no_locked;
2752+
goto out;
27472753
}
27482754

27492755
/* extents can't span inode numbers: */
@@ -2763,13 +2769,15 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre
27632769
ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
27642770
if (unlikely(ret)) {
27652771
k = bkey_s_c_err(ret);
2766-
goto out_no_locked;
2772+
goto out;
27672773
}
27682774

27692775
struct btree_path *path = btree_iter_path(trans, iter);
27702776
if (unlikely(!btree_path_node(path, path->level)))
27712777
return bkey_s_c_null;
27722778

2779+
btree_path_set_should_be_locked(trans, path);
2780+
27732781
if ((iter->flags & BTREE_ITER_cached) ||
27742782
!(iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots))) {
27752783
k = bkey_s_c_null;
@@ -2790,12 +2798,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre
27902798
if (!bkey_err(k))
27912799
iter->k = *k.k;
27922800
/* We're not returning a key from iter->path: */
2793-
goto out_no_locked;
2801+
goto out;
27942802
}
27952803

2796-
k = bch2_btree_path_peek_slot(trans->paths + iter->path, &iter->k);
2804+
k = bch2_btree_path_peek_slot(btree_iter_path(trans, iter), &iter->k);
27972805
if (unlikely(!k.k))
2798-
goto out_no_locked;
2806+
goto out;
27992807

28002808
if (unlikely(k.k->type == KEY_TYPE_whiteout &&
28012809
(iter->flags & BTREE_ITER_filter_snapshots) &&
@@ -2833,7 +2841,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre
28332841
}
28342842

28352843
if (unlikely(bkey_err(k)))
2836-
goto out_no_locked;
2844+
goto out;
28372845

28382846
next = k.k ? bkey_start_pos(k.k) : POS_MAX;
28392847

@@ -2855,8 +2863,6 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre
28552863
}
28562864
}
28572865
out:
2858-
btree_path_set_should_be_locked(trans, btree_iter_path(trans, iter));
2859-
out_no_locked:
28602866
bch2_btree_iter_verify_entry_exit(iter);
28612867
bch2_btree_iter_verify(trans, iter);
28622868
ret = bch2_btree_iter_verify_ret(trans, iter, k);

fs/bcachefs/disk_accounting.c

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,19 @@ int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a,
376376
return ret;
377377
}
378378

379+
int bch2_accounting_mem_insert_locked(struct bch_fs *c, struct bkey_s_c_accounting a,
380+
enum bch_accounting_mode mode)
381+
{
382+
struct bch_replicas_padded r;
383+
384+
if (mode != BCH_ACCOUNTING_read &&
385+
accounting_to_replicas(&r.e, a.k->p) &&
386+
!bch2_replicas_marked_locked(c, &r.e))
387+
return -BCH_ERR_btree_insert_need_mark_replicas;
388+
389+
return __bch2_accounting_mem_insert(c, a);
390+
}
391+
379392
static bool accounting_mem_entry_is_zero(struct accounting_mem_entry *e)
380393
{
381394
for (unsigned i = 0; i < e->nr_counters; i++)
@@ -583,7 +596,7 @@ int bch2_gc_accounting_done(struct bch_fs *c)
583596
accounting_key_init(&k_i.k, &acc_k, src_v, nr);
584597
bch2_accounting_mem_mod_locked(trans,
585598
bkey_i_to_s_c_accounting(&k_i.k),
586-
BCH_ACCOUNTING_normal);
599+
BCH_ACCOUNTING_normal, true);
587600

588601
preempt_disable();
589602
struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage);
@@ -612,7 +625,7 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k)
612625

613626
percpu_down_read(&c->mark_lock);
614627
int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k),
615-
BCH_ACCOUNTING_read);
628+
BCH_ACCOUNTING_read, false);
616629
percpu_up_read(&c->mark_lock);
617630
return ret;
618631
}

fs/bcachefs/disk_accounting.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ enum bch_accounting_mode {
136136
};
137137

138138
int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode);
139+
int bch2_accounting_mem_insert_locked(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode);
139140
void bch2_accounting_mem_gc(struct bch_fs *);
140141

141142
static inline bool bch2_accounting_is_mem(struct disk_accounting_pos acc)
@@ -150,7 +151,8 @@ static inline bool bch2_accounting_is_mem(struct disk_accounting_pos acc)
150151
*/
151152
static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
152153
struct bkey_s_c_accounting a,
153-
enum bch_accounting_mode mode)
154+
enum bch_accounting_mode mode,
155+
bool write_locked)
154156
{
155157
struct bch_fs *c = trans->c;
156158
struct bch_accounting_mem *acc = &c->accounting;
@@ -189,7 +191,11 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
189191

190192
while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
191193
accounting_pos_cmp, &a.k->p)) >= acc->k.nr) {
192-
int ret = bch2_accounting_mem_insert(c, a, mode);
194+
int ret = 0;
195+
if (unlikely(write_locked))
196+
ret = bch2_accounting_mem_insert_locked(c, a, mode);
197+
else
198+
ret = bch2_accounting_mem_insert(c, a, mode);
193199
if (ret)
194200
return ret;
195201
}
@@ -206,7 +212,7 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
206212
static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc)
207213
{
208214
percpu_down_read(&trans->c->mark_lock);
209-
int ret = bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal);
215+
int ret = bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal, false);
210216
percpu_up_read(&trans->c->mark_lock);
211217
return ret;
212218
}
@@ -259,7 +265,7 @@ static inline int bch2_accounting_trans_commit_hook(struct btree_trans *trans,
259265
EBUG_ON(bversion_zero(a->k.bversion));
260266

261267
return likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply))
262-
? bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal)
268+
? bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal, false)
263269
: 0;
264270
}
265271

@@ -271,7 +277,7 @@ static inline void bch2_accounting_trans_commit_revert(struct btree_trans *trans
271277
struct bkey_s_accounting a = accounting_i_to_s(a_i);
272278

273279
bch2_accounting_neg(a);
274-
bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal);
280+
bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal, false);
275281
bch2_accounting_neg(a);
276282
}
277283
}

fs/bcachefs/fs.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1429,7 +1429,9 @@ static int bch2_next_fiemap_extent(struct btree_trans *trans,
14291429
if (ret)
14301430
goto err;
14311431

1432-
ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, end, cur);
1432+
u64 pagecache_end = k.k ? max(start, bkey_start_offset(k.k)) : end;
1433+
1434+
ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, pagecache_end, cur);
14331435
if (ret)
14341436
goto err;
14351437

fs/bcachefs/fsck.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2446,7 +2446,7 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter,
24462446
u32 parent = le32_to_cpu(s.v->fs_path_parent);
24472447

24482448
if (darray_u32_has(&subvol_path, parent)) {
2449-
if (fsck_err(c, subvol_loop, "subvolume loop"))
2449+
if (fsck_err(trans, subvol_loop, "subvolume loop"))
24502450
ret = reattach_subvol(trans, s);
24512451
break;
24522452
}

0 commit comments

Comments
 (0)