Skip to content

Commit 72bea05

Browse files
committed
Merge tag 'bcachefs-2024-08-24' of git://evilpiepirate.org/bcachefs
Pull bcachefs fixes from Kent Overstreet: - assorted syzbot fixes - some upgrade fixes for old (pre 1.0) filesystems - fix for moving data off a device that was switched to durability=0 after data had been written to it. - nocow deadlock fix - fix for new rebalance_work accounting * tag 'bcachefs-2024-08-24' of git://evilpiepirate.org/bcachefs: (28 commits) bcachefs: Fix rebalance_work accounting bcachefs: Fix failure to flush moves before sleeping in copygc bcachefs: don't use rht_bucket() in btree_key_cache_scan() bcachefs: add missing inode_walker_exit() bcachefs: clear path->should_be_locked in bch2_btree_key_cache_drop() bcachefs: Fix double assignment in check_dirent_to_subvol() bcachefs: Fix refcounting in discard path bcachefs: Fix compat issue with old alloc_v4 keys bcachefs: Fix warning in bch2_fs_journal_stop() fs/super.c: improve get_tree() error message bcachefs: Fix missing validation in bch2_sb_journal_v2_validate() bcachefs: Fix replay_now_at() assert bcachefs: Fix locking in bch2_ioc_setlabel() bcachefs: fix failure to relock in btree_node_fill() bcachefs: fix failure to relock in bch2_btree_node_mem_alloc() bcachefs: unlock_long() before resort in journal replay bcachefs: fix missing bch2_err_str() bcachefs: fix time_stats_to_text() bcachefs: Fix bch2_bucket_gens_init() bcachefs: Fix bch2_trigger_alloc assert ...
2 parents 780bdc1 + 49aa783 commit 72bea05

25 files changed

+387
-192
lines changed

fs/bcachefs/alloc_background.c

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -240,71 +240,73 @@ int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k,
240240
int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k,
241241
enum bch_validate_flags flags)
242242
{
243-
struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
243+
struct bch_alloc_v4 a;
244244
int ret = 0;
245245

246-
bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k),
246+
bkey_val_copy(&a, bkey_s_c_to_alloc_v4(k));
247+
248+
bkey_fsck_err_on(alloc_v4_u64s_noerror(&a) > bkey_val_u64s(k.k),
247249
c, alloc_v4_val_size_bad,
248250
"bad val size (%u > %zu)",
249-
alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k));
251+
alloc_v4_u64s_noerror(&a), bkey_val_u64s(k.k));
250252

251-
bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) &&
252-
BCH_ALLOC_V4_NR_BACKPOINTERS(a.v),
253+
bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(&a) &&
254+
BCH_ALLOC_V4_NR_BACKPOINTERS(&a),
253255
c, alloc_v4_backpointers_start_bad,
254256
"invalid backpointers_start");
255257

256-
bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type,
258+
bkey_fsck_err_on(alloc_data_type(a, a.data_type) != a.data_type,
257259
c, alloc_key_data_type_bad,
258260
"invalid data type (got %u should be %u)",
259-
a.v->data_type, alloc_data_type(*a.v, a.v->data_type));
261+
a.data_type, alloc_data_type(a, a.data_type));
260262

261263
for (unsigned i = 0; i < 2; i++)
262-
bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX,
264+
bkey_fsck_err_on(a.io_time[i] > LRU_TIME_MAX,
263265
c, alloc_key_io_time_bad,
264266
"invalid io_time[%s]: %llu, max %llu",
265267
i == READ ? "read" : "write",
266-
a.v->io_time[i], LRU_TIME_MAX);
268+
a.io_time[i], LRU_TIME_MAX);
267269

268-
unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(a.v) * sizeof(u64) >
270+
unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(&a) * sizeof(u64) >
269271
offsetof(struct bch_alloc_v4, stripe_sectors)
270-
? a.v->stripe_sectors
272+
? a.stripe_sectors
271273
: 0;
272274

273-
switch (a.v->data_type) {
275+
switch (a.data_type) {
274276
case BCH_DATA_free:
275277
case BCH_DATA_need_gc_gens:
276278
case BCH_DATA_need_discard:
277279
bkey_fsck_err_on(stripe_sectors ||
278-
a.v->dirty_sectors ||
279-
a.v->cached_sectors ||
280-
a.v->stripe,
280+
a.dirty_sectors ||
281+
a.cached_sectors ||
282+
a.stripe,
281283
c, alloc_key_empty_but_have_data,
282284
"empty data type free but have data %u.%u.%u %u",
283285
stripe_sectors,
284-
a.v->dirty_sectors,
285-
a.v->cached_sectors,
286-
a.v->stripe);
286+
a.dirty_sectors,
287+
a.cached_sectors,
288+
a.stripe);
287289
break;
288290
case BCH_DATA_sb:
289291
case BCH_DATA_journal:
290292
case BCH_DATA_btree:
291293
case BCH_DATA_user:
292294
case BCH_DATA_parity:
293-
bkey_fsck_err_on(!a.v->dirty_sectors &&
295+
bkey_fsck_err_on(!a.dirty_sectors &&
294296
!stripe_sectors,
295297
c, alloc_key_dirty_sectors_0,
296298
"data_type %s but dirty_sectors==0",
297-
bch2_data_type_str(a.v->data_type));
299+
bch2_data_type_str(a.data_type));
298300
break;
299301
case BCH_DATA_cached:
300-
bkey_fsck_err_on(!a.v->cached_sectors ||
301-
a.v->dirty_sectors ||
302+
bkey_fsck_err_on(!a.cached_sectors ||
303+
a.dirty_sectors ||
302304
stripe_sectors ||
303-
a.v->stripe,
305+
a.stripe,
304306
c, alloc_key_cached_inconsistency,
305307
"data type inconsistency");
306308

307-
bkey_fsck_err_on(!a.v->io_time[READ] &&
309+
bkey_fsck_err_on(!a.io_time[READ] &&
308310
c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs,
309311
c, alloc_key_cached_but_read_time_zero,
310312
"cached bucket with read_time == 0");
@@ -556,7 +558,7 @@ int bch2_bucket_gens_init(struct bch_fs *c)
556558
struct bpos pos = alloc_gens_pos(iter.pos, &offset);
557559
int ret2 = 0;
558560

559-
if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) {
561+
if (have_bucket_gens_key && !bkey_eq(g.k.p, pos)) {
560562
ret2 = bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?:
561563
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
562564
if (ret2)
@@ -829,7 +831,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
829831
if (likely(new.k->type == KEY_TYPE_alloc_v4)) {
830832
new_a = bkey_s_to_alloc_v4(new).v;
831833
} else {
832-
BUG_ON(!(flags & BTREE_TRIGGER_gc));
834+
BUG_ON(!(flags & (BTREE_TRIGGER_gc|BTREE_TRIGGER_check_repair)));
833835

834836
struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c);
835837
ret = PTR_ERR_OR_ZERO(new_ka);
@@ -1872,26 +1874,26 @@ static void bch2_do_discards_work(struct work_struct *work)
18721874
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
18731875
bch2_err_str(ret));
18741876

1875-
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
18761877
percpu_ref_put(&ca->io_ref);
1878+
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
18771879
}
18781880

18791881
void bch2_dev_do_discards(struct bch_dev *ca)
18801882
{
18811883
struct bch_fs *c = ca->fs;
18821884

1883-
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
1885+
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard))
18841886
return;
18851887

1886-
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard))
1887-
goto put_ioref;
1888+
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
1889+
goto put_write_ref;
18881890

18891891
if (queue_work(c->write_ref_wq, &ca->discard_work))
18901892
return;
18911893

1892-
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
1893-
put_ioref:
18941894
percpu_ref_put(&ca->io_ref);
1895+
put_write_ref:
1896+
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
18951897
}
18961898

18971899
void bch2_do_discards(struct bch_fs *c)

fs/bcachefs/alloc_background_format.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ struct bch_alloc_v4 {
6969
__u64 io_time[2];
7070
__u32 stripe;
7171
__u32 nr_external_backpointers;
72+
/* end of fields in original version of alloc_v4 */
7273
__u64 fragmentation_lru;
7374
__u32 stripe_sectors;
7475
__u32 pad;

fs/bcachefs/bcachefs_format.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,8 @@ struct bch_sb_field_ext {
677677
x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \
678678
x(disk_accounting_v2, BCH_VERSION(1, 9)) \
679679
x(disk_accounting_v3, BCH_VERSION(1, 10)) \
680-
x(disk_accounting_inum, BCH_VERSION(1, 11))
680+
x(disk_accounting_inum, BCH_VERSION(1, 11)) \
681+
x(rebalance_work_acct_fix, BCH_VERSION(1, 12))
681682

682683
enum bcachefs_metadata_version {
683684
bcachefs_metadata_version_min = 9,

fs/bcachefs/btree_cache.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,16 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
159159
return b;
160160
}
161161

162+
void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
163+
{
164+
mutex_lock(&c->btree_cache.lock);
165+
list_move(&b->list, &c->btree_cache.freeable);
166+
mutex_unlock(&c->btree_cache.lock);
167+
168+
six_unlock_write(&b->c.lock);
169+
six_unlock_intent(&b->c.lock);
170+
}
171+
162172
/* Btree in memory cache - hash table */
163173

164174
void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
@@ -736,6 +746,13 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
736746
start_time);
737747

738748
memalloc_nofs_restore(flags);
749+
750+
int ret = bch2_trans_relock(trans);
751+
if (unlikely(ret)) {
752+
bch2_btree_node_to_freelist(c, b);
753+
return ERR_PTR(ret);
754+
}
755+
739756
return b;
740757
err:
741758
mutex_lock(&bc->lock);
@@ -856,6 +873,10 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
856873

857874
bch2_btree_node_read(trans, b, sync);
858875

876+
int ret = bch2_trans_relock(trans);
877+
if (ret)
878+
return ERR_PTR(ret);
879+
859880
if (!sync)
860881
return NULL;
861882

@@ -974,6 +995,10 @@ static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btr
974995

975996
bch2_btree_node_wait_on_read(b);
976997

998+
ret = bch2_trans_relock(trans);
999+
if (ret)
1000+
return ERR_PTR(ret);
1001+
9771002
/*
9781003
* should_be_locked is not set on this path yet, so we need to
9791004
* relock it specifically:

fs/bcachefs/btree_cache.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ struct btree_iter;
1212

1313
void bch2_recalc_btree_reserve(struct bch_fs *);
1414

15+
void bch2_btree_node_to_freelist(struct bch_fs *, struct btree *);
16+
1517
void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
1618
int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
1719
int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,

fs/bcachefs/btree_iter.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,15 @@ static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans,
569569
bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \
570570
_btree_id, _pos, _flags, KEY_TYPE_##_type))
571571

572+
#define bkey_val_copy(_dst_v, _src_k) \
573+
do { \
574+
unsigned b = min_t(unsigned, sizeof(*_dst_v), \
575+
bkey_val_bytes(_src_k.k)); \
576+
memcpy(_dst_v, _src_k.v, b); \
577+
if (b < sizeof(*_dst_v)) \
578+
memset((void *) (_dst_v) + b, 0, sizeof(*_dst_v) - b); \
579+
} while (0)
580+
572581
static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans,
573582
unsigned btree_id, struct bpos pos,
574583
unsigned flags, unsigned type,

fs/bcachefs/btree_key_cache.c

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,7 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans,
726726

727727
mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED);
728728
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
729+
path->should_be_locked = false;
729730
}
730731

731732
static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
@@ -777,14 +778,28 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
777778

778779
rcu_read_lock();
779780
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
781+
782+
/*
783+
* Scanning is expensive while a rehash is in progress - most elements
784+
* will be on the new hashtable, if it's in progress
785+
*
786+
* A rehash could still start while we're scanning - that's ok, we'll
787+
* still see most elements.
788+
*/
789+
if (unlikely(tbl->nest)) {
790+
rcu_read_unlock();
791+
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
792+
return SHRINK_STOP;
793+
}
794+
780795
if (bc->shrink_iter >= tbl->size)
781796
bc->shrink_iter = 0;
782797
start = bc->shrink_iter;
783798

784799
do {
785800
struct rhash_head *pos, *next;
786801

787-
pos = rht_ptr_rcu(rht_bucket(tbl, bc->shrink_iter));
802+
pos = rht_ptr_rcu(&tbl->buckets[bc->shrink_iter]);
788803

789804
while (!rht_is_a_nulls(pos)) {
790805
next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter);
@@ -865,12 +880,22 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
865880
while (atomic_long_read(&bc->nr_keys)) {
866881
rcu_read_lock();
867882
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
868-
if (tbl)
883+
if (tbl) {
884+
if (tbl->nest) {
885+
/* wait for in progress rehash */
886+
rcu_read_unlock();
887+
mutex_lock(&bc->table.mutex);
888+
mutex_unlock(&bc->table.mutex);
889+
rcu_read_lock();
890+
continue;
891+
}
869892
for (i = 0; i < tbl->size; i++)
870-
rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
893+
while (pos = rht_ptr_rcu(&tbl->buckets[i]), !rht_is_a_nulls(pos)) {
894+
ck = container_of(pos, struct bkey_cached, hash);
871895
bkey_cached_evict(bc, ck);
872896
list_add(&ck->list, &items);
873897
}
898+
}
874899
rcu_read_unlock();
875900
}
876901

0 commit comments

Comments
 (0)