Skip to content

Commit c3de9b5

Browse files
committed
Merge tag 'bcachefs-2024-06-22' of https://evilpiepirate.org/git/bcachefs
Pull bcachefs fixes from Kent Overstreet: "Lots of (mostly boring) fixes for syzbot bugs and rare(r) CI bugs. The LRU_TIME_BITS fix was slightly more involved; we only have 48 bits for the LRU position (we would prefer 64), so wraparound is possible for the cached data LRUs on a filesystem that has done sufficient (petabytes) reads; this is now handled. One notable user reported bugfix, where we were forgetting to correctly set the bucket data type, which should have been BCH_DATA_need_gc_gens instead of BCH_DATA_free; this was causing us to go emergency read-only on a filesystem that had seen heavy enough use to see bucket gen wraparoud. We're now starting to fix simple (safe) errors without requiring user intervention - i.e. a small incremental step towards full self healing. This is currently limited to just certain allocation information counters, and the error is still logged in the superblock; see that patch for more information. ("bcachefs: Fix safe errors by default")" * tag 'bcachefs-2024-06-22' of https://evilpiepirate.org/git/bcachefs: (22 commits) bcachefs: Move the ei_flags setting to after initialization bcachefs: Fix a UAF after write_super() bcachefs: Use bch2_print_string_as_lines for long err bcachefs: Fix I_NEW warning in race path in bch2_inode_insert() bcachefs: Replace bare EEXIST with private error codes bcachefs: Fix missing alloc_data_type_set() closures: Change BUG_ON() to WARN_ON() bcachefs: fix alignment of VMA for memory mapped files on THP bcachefs: Fix safe errors by default bcachefs: Fix bch2_trans_put() bcachefs: set_worker_desc() for delete_dead_snapshots bcachefs: Fix bch2_sb_downgrade_update() bcachefs: Handle cached data LRU wraparound bcachefs: Guard against overflowing LRU_TIME_BITS bcachefs: delete_dead_snapshots() doesn't need to go RW bcachefs: Fix early init error path in journal code bcachefs: Check for invalid btree IDs bcachefs: Fix btree ID bitmasks bcachefs: Fix shift overflow in read_one_super() bcachefs: Fix a locking bug in the do_discard_fast() path ...
2 parents da3b6ef + bd4da04 commit c3de9b5

26 files changed

+472
-355
lines changed

fs/bcachefs/alloc_background.c

Lines changed: 61 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,14 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
259259
"invalid data type (got %u should be %u)",
260260
a.v->data_type, alloc_data_type(*a.v, a.v->data_type));
261261

262+
for (unsigned i = 0; i < 2; i++)
263+
bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX,
264+
c, err,
265+
alloc_key_io_time_bad,
266+
"invalid io_time[%s]: %llu, max %llu",
267+
i == READ ? "read" : "write",
268+
a.v->io_time[i], LRU_TIME_MAX);
269+
262270
switch (a.v->data_type) {
263271
case BCH_DATA_free:
264272
case BCH_DATA_need_gc_gens:
@@ -757,8 +765,8 @@ int bch2_trigger_alloc(struct btree_trans *trans,
757765
alloc_data_type_set(new_a, new_a->data_type);
758766

759767
if (bch2_bucket_sectors_total(*new_a) > bch2_bucket_sectors_total(*old_a)) {
760-
new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
761-
new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now));
768+
new_a->io_time[READ] = bch2_current_io_time(c, READ);
769+
new_a->io_time[WRITE]= bch2_current_io_time(c, WRITE);
762770
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
763771
SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true);
764772
}
@@ -768,6 +776,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
768776
!bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) {
769777
new_a->gen++;
770778
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
779+
alloc_data_type_set(new_a, new_a->data_type);
771780
}
772781

773782
if (old_a->data_type != new_a->data_type ||
@@ -781,7 +790,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
781790

782791
if (new_a->data_type == BCH_DATA_cached &&
783792
!new_a->io_time[READ])
784-
new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
793+
new_a->io_time[READ] = bch2_current_io_time(c, READ);
785794

786795
u64 old_lru = alloc_lru_idx_read(*old_a);
787796
u64 new_lru = alloc_lru_idx_read(*new_a);
@@ -882,7 +891,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
882891
closure_wake_up(&c->freelist_wait);
883892

884893
if (statechange(a->data_type == BCH_DATA_need_discard) &&
885-
!bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) &&
894+
!bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset) &&
886895
bucket_flushed(new_a))
887896
bch2_discard_one_bucket_fast(c, new.k->p);
888897

@@ -1579,7 +1588,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
15791588
if (ret)
15801589
goto err;
15811590

1582-
a_mut->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
1591+
a_mut->v.io_time[READ] = bch2_current_io_time(c, READ);
15831592
ret = bch2_trans_update(trans, alloc_iter,
15841593
&a_mut->k_i, BTREE_TRIGGER_norun);
15851594
if (ret)
@@ -1634,7 +1643,7 @@ static int discard_in_flight_add(struct bch_fs *c, struct bpos bucket)
16341643
mutex_lock(&c->discard_buckets_in_flight_lock);
16351644
darray_for_each(c->discard_buckets_in_flight, i)
16361645
if (bkey_eq(*i, bucket)) {
1637-
ret = -EEXIST;
1646+
ret = -BCH_ERR_EEXIST_discard_in_flight_add;
16381647
goto out;
16391648
}
16401649

@@ -1788,8 +1797,9 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
17881797
}
17891798

17901799
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
1791-
alloc_data_type_set(&a->v, a->v.data_type);
17921800
write:
1801+
alloc_data_type_set(&a->v, a->v.data_type);
1802+
17931803
ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?:
17941804
bch2_trans_commit(trans, NULL, NULL,
17951805
BCH_WATERMARK_btree|
@@ -1975,8 +1985,8 @@ static int invalidate_one_bucket(struct btree_trans *trans,
19751985
a->v.data_type = 0;
19761986
a->v.dirty_sectors = 0;
19771987
a->v.cached_sectors = 0;
1978-
a->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
1979-
a->v.io_time[WRITE] = atomic64_read(&c->io_clock[WRITE].now);
1988+
a->v.io_time[READ] = bch2_current_io_time(c, READ);
1989+
a->v.io_time[WRITE] = bch2_current_io_time(c, WRITE);
19801990

19811991
ret = bch2_trans_commit(trans, NULL, NULL,
19821992
BCH_WATERMARK_btree|
@@ -2011,6 +2021,21 @@ static int invalidate_one_bucket(struct btree_trans *trans,
20112021
goto out;
20122022
}
20132023

2024+
static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter *iter,
2025+
struct bch_dev *ca, bool *wrapped)
2026+
{
2027+
struct bkey_s_c k;
2028+
again:
2029+
k = bch2_btree_iter_peek_upto(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX));
2030+
if (!k.k && !*wrapped) {
2031+
bch2_btree_iter_set_pos(iter, lru_pos(ca->dev_idx, 0, 0));
2032+
*wrapped = true;
2033+
goto again;
2034+
}
2035+
2036+
return k;
2037+
}
2038+
20142039
static void bch2_do_invalidates_work(struct work_struct *work)
20152040
{
20162041
struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work);
@@ -2024,12 +2049,33 @@ static void bch2_do_invalidates_work(struct work_struct *work)
20242049
for_each_member_device(c, ca) {
20252050
s64 nr_to_invalidate =
20262051
should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
2052+
struct btree_iter iter;
2053+
bool wrapped = false;
2054+
2055+
bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
2056+
lru_pos(ca->dev_idx, 0,
2057+
((bch2_current_io_time(c, READ) + U32_MAX) &
2058+
LRU_TIME_MAX)), 0);
20272059

2028-
ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru,
2029-
lru_pos(ca->dev_idx, 0, 0),
2030-
lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX),
2031-
BTREE_ITER_intent, k,
2032-
invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate));
2060+
while (true) {
2061+
bch2_trans_begin(trans);
2062+
2063+
struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped);
2064+
ret = bkey_err(k);
2065+
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
2066+
continue;
2067+
if (ret)
2068+
break;
2069+
if (!k.k)
2070+
break;
2071+
2072+
ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
2073+
if (ret)
2074+
break;
2075+
2076+
bch2_btree_iter_advance(&iter);
2077+
}
2078+
bch2_trans_iter_exit(trans, &iter);
20332079

20342080
if (ret < 0) {
20352081
bch2_dev_put(ca);
@@ -2204,7 +2250,7 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
22042250
if (ret)
22052251
return ret;
22062252

2207-
now = atomic64_read(&c->io_clock[rw].now);
2253+
now = bch2_current_io_time(c, rw);
22082254
if (a->v.io_time[rw] == now)
22092255
goto out;
22102256

fs/bcachefs/alloc_background.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,13 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
141141
!bch2_bucket_sectors_fragmented(ca, a))
142142
return 0;
143143

144-
u64 d = bch2_bucket_sectors_dirty(a);
144+
/*
145+
* avoid overflowing LRU_TIME_BITS on a corrupted fs, when
146+
* bucket_sectors_dirty is (much) bigger than bucket_size
147+
*/
148+
u64 d = min(bch2_bucket_sectors_dirty(a),
149+
ca->mi.bucket_size);
150+
145151
return div_u64(d * (1ULL << 31), ca->mi.bucket_size);
146152
}
147153

fs/bcachefs/bcachefs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,6 +1214,11 @@ static inline s64 bch2_current_time(const struct bch_fs *c)
12141214
return timespec_to_bch2_time(c, now);
12151215
}
12161216

1217+
static inline u64 bch2_current_io_time(const struct bch_fs *c, int rw)
1218+
{
1219+
return max(1ULL, (u64) atomic64_read(&c->io_clock[rw].now) & LRU_TIME_MAX);
1220+
}
1221+
12171222
static inline struct stdio_redirect *bch2_fs_stdio_redirect(struct bch_fs *c)
12181223
{
12191224
struct stdio_redirect *stdio = c->stdio;

fs/bcachefs/bcachefs_format.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,9 @@ struct bch_lru {
476476

477477
#define LRU_ID_STRIPES (1U << 16)
478478

479+
#define LRU_TIME_BITS 48
480+
#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1)
481+
479482
/* Optional/variable size superblock sections: */
480483

481484
struct bch_sb_field {
@@ -987,8 +990,9 @@ enum bch_version_upgrade_opts {
987990

988991
#define BCH_ERROR_ACTIONS() \
989992
x(continue, 0) \
990-
x(ro, 1) \
991-
x(panic, 2)
993+
x(fix_safe, 1) \
994+
x(panic, 2) \
995+
x(ro, 3)
992996

993997
enum bch_error_actions {
994998
#define x(t, n) BCH_ON_ERROR_##t = n,
@@ -1382,9 +1386,10 @@ enum btree_id {
13821386

13831387
/*
13841388
* Maximum number of btrees that we will _ever_ have under the current scheme,
1385-
* where we refer to them with bitfields
1389+
* where we refer to them with 64 bit bitfields - and we also need a bit for
1390+
* the interior btree node type:
13861391
*/
1387-
#define BTREE_ID_NR_MAX 64
1392+
#define BTREE_ID_NR_MAX 63
13881393

13891394
static inline bool btree_id_is_alloc(enum btree_id id)
13901395
{

fs/bcachefs/bkey.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1064,7 +1064,7 @@ void bch2_bkey_swab_key(const struct bkey_format *_f, struct bkey_packed *k)
10641064
{
10651065
const struct bkey_format *f = bkey_packed(k) ? _f : &bch2_bkey_format_current;
10661066
u8 *l = k->key_start;
1067-
u8 *h = (u8 *) (k->_data + f->key_u64s) - 1;
1067+
u8 *h = (u8 *) ((u64 *) k->_data + f->key_u64s) - 1;
10681068

10691069
while (l < h) {
10701070
swap(*l, *h);

fs/bcachefs/bkey_methods.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,8 +398,12 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
398398
for (i = 0; i < nr_compat; i++)
399399
switch (!write ? i : nr_compat - 1 - i) {
400400
case 0:
401-
if (big_endian != CPU_BIG_ENDIAN)
401+
if (big_endian != CPU_BIG_ENDIAN) {
402+
bch2_bkey_swab_key(f, k);
403+
} else if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
402404
bch2_bkey_swab_key(f, k);
405+
bch2_bkey_swab_key(f, k);
406+
}
403407
break;
404408
case 1:
405409
if (version < bcachefs_metadata_version_bkey_renumber)

fs/bcachefs/bkey_methods.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ static inline void bch2_bkey_compat(unsigned level, enum btree_id btree_id,
129129
struct bkey_packed *k)
130130
{
131131
if (version < bcachefs_metadata_version_current ||
132-
big_endian != CPU_BIG_ENDIAN)
132+
big_endian != CPU_BIG_ENDIAN ||
133+
IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
133134
__bch2_bkey_compat(level, btree_id, version,
134135
big_endian, write, f, k);
135136

fs/bcachefs/btree_iter.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3161,6 +3161,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
31613161
list_add_done:
31623162
seqmutex_unlock(&c->btree_trans_lock);
31633163
got_trans:
3164+
trans->ref.closure_get_happened = false;
31643165
trans->c = c;
31653166
trans->last_begin_time = local_clock();
31663167
trans->fn_idx = fn_idx;
@@ -3235,7 +3236,6 @@ void bch2_trans_put(struct btree_trans *trans)
32353236
trans_for_each_update(trans, i)
32363237
__btree_path_put(trans->paths + i->path, true);
32373238
trans->nr_updates = 0;
3238-
trans->locking_wait.task = NULL;
32393239

32403240
check_btree_paths_leaked(trans);
32413241

@@ -3256,6 +3256,13 @@ void bch2_trans_put(struct btree_trans *trans)
32563256
if (unlikely(trans->journal_replay_not_finished))
32573257
bch2_journal_keys_put(c);
32583258

3259+
/*
3260+
* trans->ref protects trans->locking_wait.task, btree_paths arary; used
3261+
* by cycle detector
3262+
*/
3263+
closure_sync(&trans->ref);
3264+
trans->locking_wait.task = NULL;
3265+
32593266
unsigned long *paths_allocated = trans->paths_allocated;
32603267
trans->paths_allocated = NULL;
32613268
trans->paths = NULL;
@@ -3273,8 +3280,6 @@ void bch2_trans_put(struct btree_trans *trans)
32733280
trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans);
32743281

32753282
if (trans) {
3276-
closure_sync(&trans->ref);
3277-
32783283
seqmutex_lock(&c->btree_trans_lock);
32793284
list_del(&trans->list);
32803285
seqmutex_unlock(&c->btree_trans_lock);

fs/bcachefs/btree_types.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -761,13 +761,13 @@ static inline bool btree_node_type_needs_gc(enum btree_node_type type)
761761

762762
static inline bool btree_node_type_is_extents(enum btree_node_type type)
763763
{
764-
const unsigned mask = 0
764+
const u64 mask = 0
765765
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1))
766766
BCH_BTREE_IDS()
767767
#undef x
768768
;
769769

770-
return (1U << type) & mask;
770+
return BIT_ULL(type) & mask;
771771
}
772772

773773
static inline bool btree_id_is_extents(enum btree_id btree)
@@ -777,35 +777,35 @@ static inline bool btree_id_is_extents(enum btree_id btree)
777777

778778
static inline bool btree_type_has_snapshots(enum btree_id id)
779779
{
780-
const unsigned mask = 0
780+
const u64 mask = 0
781781
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr)
782782
BCH_BTREE_IDS()
783783
#undef x
784784
;
785785

786-
return (1U << id) & mask;
786+
return BIT_ULL(id) & mask;
787787
}
788788

789789
static inline bool btree_type_has_snapshot_field(enum btree_id id)
790790
{
791-
const unsigned mask = 0
791+
const u64 mask = 0
792792
#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr)
793793
BCH_BTREE_IDS()
794794
#undef x
795795
;
796796

797-
return (1U << id) & mask;
797+
return BIT_ULL(id) & mask;
798798
}
799799

800800
static inline bool btree_type_has_ptrs(enum btree_id id)
801801
{
802-
const unsigned mask = 0
802+
const u64 mask = 0
803803
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr)
804804
BCH_BTREE_IDS()
805805
#undef x
806806
;
807807

808-
return (1U << id) & mask;
808+
return BIT_ULL(id) & mask;
809809
}
810810

811811
struct btree_root {

fs/bcachefs/errcode.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@
116116
x(ENOENT, ENOENT_dev_idx_not_found) \
117117
x(ENOTEMPTY, ENOTEMPTY_dir_not_empty) \
118118
x(ENOTEMPTY, ENOTEMPTY_subvol_not_empty) \
119+
x(EEXIST, EEXIST_str_hash_set) \
120+
x(EEXIST, EEXIST_discard_in_flight_add) \
121+
x(EEXIST, EEXIST_subvolume_create) \
119122
x(0, open_buckets_empty) \
120123
x(0, freelist_empty) \
121124
x(BCH_ERR_freelist_empty, no_buckets_found) \

0 commit comments

Comments
 (0)