Skip to content

Commit a4145ce

Browse files
committed
Merge tag 'bcachefs-2024-03-19' of https://evilpiepirate.org/git/bcachefs
Pull bcachefs fixes from Kent Overstreet: "Assorted bugfixes. Most are fixes for simple assertion pops; the most significant fix is for a deadlock in recovery when we have to rewrite large numbers of btree nodes to fix errors. This was incorrectly running out of the same workqueue as the core interior btree update path - we now give it its own single threaded workqueue. This was visible to users as "bch2_btree_update_start(): error: BCH_ERR_journal_reclaim_would_deadlock" - and then recovery hanging" * tag 'bcachefs-2024-03-19' of https://evilpiepirate.org/git/bcachefs: bcachefs: Fix lost wakeup on journal shutdown bcachefs; Fix deadlock in bch2_btree_update_start() bcachefs: ratelimit errors from async_btree_node_rewrite bcachefs: Run check_topology() first bcachefs: Improve bch2_fatal_error() bcachefs: Fix lost transaction restart error bcachefs: Don't corrupt journal keys gap buffer when dropping alloc info bcachefs: fix for building in userspace bcachefs: bch2_snapshot_is_ancestor() now safe to call in early recovery bcachefs: Fix nested transaction restart handling in bch2_bucket_gens_init() bcachefs: Improve sysfs internal/btree_updates bcachefs: Split out btree_node_rewrite_worker bcachefs: Fix locking in bch2_alloc_write_key() bcachefs: Avoid extent entry type assertions in .invalid() bcachefs: Fix spurious -BCH_ERR_transaction_restart_nested bcachefs: Fix check_key_has_snapshot() call bcachefs: Change "accounting overran journal reservation" to a warning
2 parents 78c3925 + 2e92d26 commit a4145ce

26 files changed

+157
-111
lines changed

fs/bcachefs/alloc_background.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -532,13 +532,13 @@ int bch2_bucket_gens_init(struct bch_fs *c)
532532
u8 gen = bch2_alloc_to_v4(k, &a)->gen;
533533
unsigned offset;
534534
struct bpos pos = alloc_gens_pos(iter.pos, &offset);
535+
int ret2 = 0;
535536

536537
if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) {
537-
ret = commit_do(trans, NULL, NULL,
538-
BCH_TRANS_COMMIT_no_enospc,
539-
bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
540-
if (ret)
541-
break;
538+
ret2 = bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?:
539+
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
540+
if (ret2)
541+
goto iter_err;
542542
have_bucket_gens_key = false;
543543
}
544544

@@ -549,7 +549,8 @@ int bch2_bucket_gens_init(struct bch_fs *c)
549549
}
550550

551551
g.v.gens[offset] = gen;
552-
0;
552+
iter_err:
553+
ret2;
553554
}));
554555

555556
if (have_bucket_gens_key && !ret)
@@ -852,7 +853,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
852853
bucket_journal_seq);
853854
if (ret) {
854855
bch2_fs_fatal_error(c,
855-
"error setting bucket_needs_journal_commit: %i", ret);
856+
"setting bucket_needs_journal_commit: %s", bch2_err_str(ret));
856857
return ret;
857858
}
858859
}

fs/bcachefs/alloc_foreground.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1356,15 +1356,17 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
13561356

13571357
/* Don't retry from all devices if we're out of open buckets: */
13581358
if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) {
1359-
int ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
1359+
int ret2 = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
13601360
target, erasure_code,
13611361
nr_replicas, &nr_effective,
13621362
&have_cache, watermark,
13631363
flags, cl);
1364-
if (!ret ||
1365-
bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
1366-
bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
1364+
if (!ret2 ||
1365+
bch2_err_matches(ret2, BCH_ERR_transaction_restart) ||
1366+
bch2_err_matches(ret2, BCH_ERR_open_buckets_empty)) {
1367+
ret = ret2;
13671368
goto alloc_done;
1369+
}
13681370
}
13691371

13701372
/*

fs/bcachefs/bcachefs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -849,6 +849,8 @@ struct bch_fs {
849849
struct workqueue_struct *btree_interior_update_worker;
850850
struct work_struct btree_interior_update_work;
851851

852+
struct workqueue_struct *btree_node_rewrite_worker;
853+
852854
struct list_head pending_node_rewrites;
853855
struct mutex pending_node_rewrites_lock;
854856

fs/bcachefs/btree_gc.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1392,11 +1392,11 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
13921392
*old,
13931393
b->data_type);
13941394
gc = *b;
1395-
percpu_up_read(&c->mark_lock);
13961395

13971396
if (gc.data_type != old_gc.data_type ||
13981397
gc.dirty_sectors != old_gc.dirty_sectors)
13991398
bch2_dev_usage_update_m(c, ca, &old_gc, &gc);
1399+
percpu_up_read(&c->mark_lock);
14001400

14011401
if (metadata_only &&
14021402
gc.data_type != BCH_DATA_sb &&

fs/bcachefs/btree_io.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,7 +1066,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
10661066

10671067
ret = bset_encrypt(c, i, b->written << 9);
10681068
if (bch2_fs_fatal_err_on(ret, c,
1069-
"error decrypting btree node: %i", ret))
1069+
"decrypting btree node: %s", bch2_err_str(ret)))
10701070
goto fsck_err;
10711071

10721072
btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
@@ -1107,7 +1107,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
11071107

11081108
ret = bset_encrypt(c, i, b->written << 9);
11091109
if (bch2_fs_fatal_err_on(ret, c,
1110-
"error decrypting btree node: %i\n", ret))
1110+
"decrypting btree node: %s", bch2_err_str(ret)))
11111111
goto fsck_err;
11121112

11131113
sectors = vstruct_sectors(bne, c->block_bits);
@@ -1338,7 +1338,7 @@ static void btree_node_read_work(struct work_struct *work)
13381338
if (saw_error && !btree_node_read_error(b)) {
13391339
printbuf_reset(&buf);
13401340
bch2_bpos_to_text(&buf, b->key.k.p);
1341-
bch_info(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
1341+
bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
13421342
__func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf);
13431343

13441344
bch2_btree_node_rewrite_async(c, b);
@@ -1874,8 +1874,8 @@ static void btree_node_write_work(struct work_struct *work)
18741874
return;
18751875
err:
18761876
set_btree_node_noevict(b);
1877-
if (!bch2_err_matches(ret, EROFS))
1878-
bch2_fs_fatal_error(c, "fatal error writing btree node: %s", bch2_err_str(ret));
1877+
bch2_fs_fatal_err_on(!bch2_err_matches(ret, EROFS), c,
1878+
"writing btree node: %s", bch2_err_str(ret));
18791879
goto out;
18801880
}
18811881

@@ -2131,7 +2131,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
21312131

21322132
ret = bset_encrypt(c, i, b->written << 9);
21332133
if (bch2_fs_fatal_err_on(ret, c,
2134-
"error encrypting btree node: %i\n", ret))
2134+
"encrypting btree node: %s", bch2_err_str(ret)))
21352135
goto err;
21362136

21372137
nonce = btree_nonce(i, b->written << 9);

fs/bcachefs/btree_key_cache.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -676,7 +676,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
676676
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
677677
!bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) &&
678678
!bch2_journal_error(j), c,
679-
"error flushing key cache: %s", bch2_err_str(ret));
679+
"flushing key cache: %s", bch2_err_str(ret));
680680
if (ret)
681681
goto out;
682682

fs/bcachefs/btree_update_interior.c

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,7 @@ static void btree_update_nodes_written(struct btree_update *as)
646646
bch2_trans_unlock(trans);
647647

648648
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
649-
"%s(): error %s", __func__, bch2_err_str(ret));
649+
"%s", bch2_err_str(ret));
650650
err:
651651
if (as->b) {
652652

@@ -1067,13 +1067,18 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
10671067
flags &= ~BCH_WATERMARK_MASK;
10681068
flags |= watermark;
10691069

1070-
if (!(flags & BCH_TRANS_COMMIT_journal_reclaim) &&
1071-
watermark < c->journal.watermark) {
1070+
if (watermark < c->journal.watermark) {
10721071
struct journal_res res = { 0 };
1072+
unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;
1073+
1074+
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
1075+
watermark != BCH_WATERMARK_reclaim)
1076+
journal_flags |= JOURNAL_RES_GET_NONBLOCK;
10731077

10741078
ret = drop_locks_do(trans,
1075-
bch2_journal_res_get(&c->journal, &res, 1,
1076-
watermark|JOURNAL_RES_GET_CHECK));
1079+
bch2_journal_res_get(&c->journal, &res, 1, journal_flags));
1080+
if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
1081+
ret = -BCH_ERR_journal_reclaim_would_deadlock;
10771082
if (ret)
10781083
return ERR_PTR(ret);
10791084
}
@@ -1117,6 +1122,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
11171122
closure_init(&as->cl, NULL);
11181123
as->c = c;
11191124
as->start_time = start_time;
1125+
as->ip_started = _RET_IP_;
11201126
as->mode = BTREE_INTERIOR_NO_UPDATE;
11211127
as->took_gc_lock = true;
11221128
as->btree_id = path->btree_id;
@@ -1192,7 +1198,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
11921198
err:
11931199
bch2_btree_update_free(as, trans);
11941200
if (!bch2_err_matches(ret, ENOSPC) &&
1195-
!bch2_err_matches(ret, EROFS))
1201+
!bch2_err_matches(ret, EROFS) &&
1202+
ret != -BCH_ERR_journal_reclaim_would_deadlock)
11961203
bch_err_fn_ratelimited(c, ret);
11971204
return ERR_PTR(ret);
11981205
}
@@ -2114,7 +2121,7 @@ static void async_btree_node_rewrite_work(struct work_struct *work)
21142121

21152122
ret = bch2_trans_do(c, NULL, NULL, 0,
21162123
async_btree_node_rewrite_trans(trans, a));
2117-
bch_err_fn(c, ret);
2124+
bch_err_fn_ratelimited(c, ret);
21182125
bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
21192126
kfree(a);
21202127
}
@@ -2161,7 +2168,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
21612168
bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
21622169
}
21632170

2164-
queue_work(c->btree_interior_update_worker, &a->work);
2171+
queue_work(c->btree_node_rewrite_worker, &a->work);
21652172
}
21662173

21672174
void bch2_do_pending_node_rewrites(struct bch_fs *c)
@@ -2173,7 +2180,7 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c)
21732180
list_del(&a->list);
21742181

21752182
bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
2176-
queue_work(c->btree_interior_update_worker, &a->work);
2183+
queue_work(c->btree_node_rewrite_worker, &a->work);
21772184
}
21782185
mutex_unlock(&c->pending_node_rewrites_lock);
21792186
}
@@ -2441,12 +2448,12 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
24412448

24422449
mutex_lock(&c->btree_interior_update_lock);
24432450
list_for_each_entry(as, &c->btree_interior_update_list, list)
2444-
prt_printf(out, "%p m %u w %u r %u j %llu\n",
2445-
as,
2446-
as->mode,
2447-
as->nodes_written,
2448-
closure_nr_remaining(&as->cl),
2449-
as->journal.seq);
2451+
prt_printf(out, "%ps: mode=%u nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
2452+
(void *) as->ip_started,
2453+
as->mode,
2454+
as->nodes_written,
2455+
closure_nr_remaining(&as->cl),
2456+
as->journal.seq);
24502457
mutex_unlock(&c->btree_interior_update_lock);
24512458
}
24522459

@@ -2510,6 +2517,8 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c,
25102517

25112518
void bch2_fs_btree_interior_update_exit(struct bch_fs *c)
25122519
{
2520+
if (c->btree_node_rewrite_worker)
2521+
destroy_workqueue(c->btree_node_rewrite_worker);
25132522
if (c->btree_interior_update_worker)
25142523
destroy_workqueue(c->btree_interior_update_worker);
25152524
mempool_exit(&c->btree_interior_update_pool);
@@ -2534,6 +2543,11 @@ int bch2_fs_btree_interior_update_init(struct bch_fs *c)
25342543
if (!c->btree_interior_update_worker)
25352544
return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;
25362545

2546+
c->btree_node_rewrite_worker =
2547+
alloc_ordered_workqueue("btree_node_rewrite", WQ_UNBOUND);
2548+
if (!c->btree_node_rewrite_worker)
2549+
return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;
2550+
25372551
if (mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
25382552
sizeof(struct btree_update)))
25392553
return -BCH_ERR_ENOMEM_btree_interior_update_pool_init;

fs/bcachefs/btree_update_interior.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ struct btree_update {
3232
struct closure cl;
3333
struct bch_fs *c;
3434
u64 start_time;
35+
unsigned long ip_started;
3536

3637
struct list_head list;
3738
struct list_head unwritten_list;

fs/bcachefs/btree_write_buffer.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
378378
}
379379
}
380380
err:
381-
bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret));
381+
bch2_fs_fatal_err_on(ret, c, "%s", bch2_err_str(ret));
382382
trace_write_buffer_flush(trans, wb->flushing.keys.nr, skipped, fast, 0);
383383
bch2_journal_pin_drop(j, &wb->flushing.pin);
384384
wb->flushing.keys.nr = 0;

fs/bcachefs/buckets.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -990,8 +990,8 @@ static int __trigger_extent(struct btree_trans *trans,
990990
ret = !gc
991991
? bch2_update_cached_sectors_list(trans, p.ptr.dev, disk_sectors)
992992
: update_cached_sectors(c, k, p.ptr.dev, disk_sectors, 0, true);
993-
bch2_fs_fatal_err_on(ret && gc, c, "%s(): no replicas entry while updating cached sectors",
994-
__func__);
993+
bch2_fs_fatal_err_on(ret && gc, c, "%s: no replicas entry while updating cached sectors",
994+
bch2_err_str(ret));
995995
if (ret)
996996
return ret;
997997
}
@@ -1020,7 +1020,7 @@ static int __trigger_extent(struct btree_trans *trans,
10201020
struct printbuf buf = PRINTBUF;
10211021

10221022
bch2_bkey_val_to_text(&buf, c, k);
1023-
bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf);
1023+
bch2_fs_fatal_error(c, ": no replicas entry for %s", buf.buf);
10241024
printbuf_exit(&buf);
10251025
}
10261026
if (ret)

0 commit comments

Comments
 (0)