Skip to content

Commit 981d041

Browse files
committed
Merge tag 'bcachefs-2024-01-01' of https://evilpiepirate.org/git/bcachefs
Pull bcachefs from Kent Overstreet: "More bcachefs bugfixes for 6.7, and forwards compatibility work: - fix for a nasty extents + snapshot interaction, reported when reflink of a snapshotted file wouldn't complete but turned out to be a more general bug - fix for an invalid free in dio write path when iov vector was longer than our inline vector - fix for a buffer overflow in the nocow write path - BCH_REPLICAS_MAX doesn't actually limit the number of pointers in an extent when cached pointers are included - RO snapshots are actually RO now - And, a new superblock section to avoid future breakage when the disk space acounting rewrite rolls out: the new superblock section describes versions that need work to downgrade, where the work required is a list of recovery passes and errors to silently fix" * tag 'bcachefs-2024-01-01' of https://evilpiepirate.org/git/bcachefs: bcachefs: make RO snapshots actually RO bcachefs: bch_sb_field_downgrade bcachefs: bch_sb.recovery_passes_required bcachefs: Add persistent identifiers for recovery passes bcachefs: prt_bitflags_vector() bcachefs: move BCH_SB_ERRS() to sb-errors_types.h bcachefs: fix buffer overflow in nocow write path bcachefs: DARRAY_PREALLOCATED() bcachefs: Switch darray to kvmalloc() bcachefs: Factor out darray resize slowpath bcachefs: fix setting version_upgrade_complete bcachefs: fix invalid free in dio write path bcachefs: Fix extents iteration + snapshots interaction
2 parents 610a9b8 + 0d72ab3 commit 981d041

30 files changed

+977
-423
lines changed

fs/bcachefs/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ bcachefs-y := \
2828
clock.o \
2929
compress.o \
3030
counters.o \
31+
darray.o \
3132
debug.o \
3233
dirent.o \
3334
disk_groups.o \
@@ -70,6 +71,7 @@ bcachefs-y := \
7071
reflink.o \
7172
replicas.o \
7273
sb-clean.o \
74+
sb-downgrade.o \
7375
sb-errors.o \
7476
sb-members.o \
7577
siphash.o \

fs/bcachefs/acl.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,8 @@ int bch2_set_acl(struct mnt_idmap *idmap,
366366
bch2_trans_begin(trans);
367367
acl = _acl;
368368

369-
ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
369+
ret = bch2_subvol_is_ro_trans(trans, inode->ei_subvol) ?:
370+
bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
370371
BTREE_ITER_INTENT);
371372
if (ret)
372373
goto btree_err;

fs/bcachefs/bcachefs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,7 @@ struct bch_fs {
737737
unsigned nsec_per_time_unit;
738738
u64 features;
739739
u64 compat;
740+
unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)];
740741
} sb;
741742

742743

fs/bcachefs/bcachefs_format.h

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,19 +1207,21 @@ struct bch_sb_field {
12071207
};
12081208

12091209
#define BCH_SB_FIELDS() \
1210-
x(journal, 0) \
1211-
x(members_v1, 1) \
1212-
x(crypt, 2) \
1213-
x(replicas_v0, 3) \
1214-
x(quota, 4) \
1215-
x(disk_groups, 5) \
1216-
x(clean, 6) \
1217-
x(replicas, 7) \
1218-
x(journal_seq_blacklist, 8) \
1219-
x(journal_v2, 9) \
1220-
x(counters, 10) \
1221-
x(members_v2, 11) \
1222-
x(errors, 12)
1210+
x(journal, 0) \
1211+
x(members_v1, 1) \
1212+
x(crypt, 2) \
1213+
x(replicas_v0, 3) \
1214+
x(quota, 4) \
1215+
x(disk_groups, 5) \
1216+
x(clean, 6) \
1217+
x(replicas, 7) \
1218+
x(journal_seq_blacklist, 8) \
1219+
x(journal_v2, 9) \
1220+
x(counters, 10) \
1221+
x(members_v2, 11) \
1222+
x(errors, 12) \
1223+
x(ext, 13) \
1224+
x(downgrade, 14)
12231225

12241226
enum bch_sb_field_type {
12251227
#define x(f, nr) BCH_SB_FIELD_##f = nr,
@@ -1631,6 +1633,24 @@ struct bch_sb_field_errors {
16311633
LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16);
16321634
LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64);
16331635

1636+
struct bch_sb_field_ext {
1637+
struct bch_sb_field field;
1638+
__le64 recovery_passes_required[2];
1639+
__le64 errors_silent[8];
1640+
};
1641+
1642+
struct bch_sb_field_downgrade_entry {
1643+
__le16 version;
1644+
__le64 recovery_passes[2];
1645+
__le16 nr_errors;
1646+
__le16 errors[] __counted_by(nr_errors);
1647+
} __packed __aligned(2);
1648+
1649+
struct bch_sb_field_downgrade {
1650+
struct bch_sb_field field;
1651+
struct bch_sb_field_downgrade_entry entries[];
1652+
};
1653+
16341654
/* Superblock: */
16351655

16361656
/*
@@ -1644,6 +1664,11 @@ LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64);
16441664

16451665
#define RECOVERY_PASS_ALL_FSCK (1ULL << 63)
16461666

1667+
/*
1668+
* field 1: version name
1669+
* field 2: BCH_VERSION(major, minor)
1670+
* field 3: recovery passess required on upgrade
1671+
*/
16471672
#define BCH_METADATA_VERSIONS() \
16481673
x(bkey_renumber, BCH_VERSION(0, 10), \
16491674
RECOVERY_PASS_ALL_FSCK) \

fs/bcachefs/btree_iter.c

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2085,18 +2085,16 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
20852085
goto out_no_locked;
20862086

20872087
/*
2088-
* iter->pos should be mononotically increasing, and always be
2089-
* equal to the key we just returned - except extents can
2090-
* straddle iter->pos:
2088+
* We need to check against @end before FILTER_SNAPSHOTS because
2089+
* if we get to a different inode that requested we might be
2090+
* seeing keys for a different snapshot tree that will all be
2091+
* filtered out.
2092+
*
2093+
* But we can't do the full check here, because bkey_start_pos()
2094+
* isn't monotonically increasing before FILTER_SNAPSHOTS, and
2095+
* that's what we check against in extents mode:
20912096
*/
2092-
if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
2093-
iter_pos = k.k->p;
2094-
else
2095-
iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k));
2096-
2097-
if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
2098-
? bkey_gt(iter_pos, end)
2099-
: bkey_ge(iter_pos, end)))
2097+
if (k.k->p.inode > end.inode)
21002098
goto end;
21012099

21022100
if (iter->update_path &&
@@ -2155,6 +2153,21 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
21552153
continue;
21562154
}
21572155

2156+
/*
2157+
* iter->pos should be mononotically increasing, and always be
2158+
* equal to the key we just returned - except extents can
2159+
* straddle iter->pos:
2160+
*/
2161+
if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
2162+
iter_pos = k.k->p;
2163+
else
2164+
iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k));
2165+
2166+
if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
2167+
? bkey_gt(iter_pos, end)
2168+
: bkey_ge(iter_pos, end)))
2169+
goto end;
2170+
21582171
break;
21592172
}
21602173

fs/bcachefs/darray.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#include <linux/log2.h>
4+
#include <linux/slab.h>
5+
#include "darray.h"
6+
7+
int __bch2_darray_resize(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp)
8+
{
9+
if (new_size > d->size) {
10+
new_size = roundup_pow_of_two(new_size);
11+
12+
void *data = kvmalloc_array(new_size, element_size, gfp);
13+
if (!data)
14+
return -ENOMEM;
15+
16+
memcpy(data, d->data, d->size * element_size);
17+
if (d->data != d->preallocated)
18+
kvfree(d->data);
19+
d->data = data;
20+
d->size = new_size;
21+
}
22+
23+
return 0;
24+
}

fs/bcachefs/darray.h

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,39 +8,48 @@
88
* Inspired by CCAN's darray
99
*/
1010

11-
#include "util.h"
1211
#include <linux/slab.h>
1312

14-
#define DARRAY(type) \
13+
#define DARRAY_PREALLOCATED(_type, _nr) \
1514
struct { \
1615
size_t nr, size; \
17-
type *data; \
16+
_type *data; \
17+
_type preallocated[_nr]; \
1818
}
1919

20-
typedef DARRAY(void) darray_void;
20+
#define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0)
2121

22-
static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, gfp_t gfp)
22+
typedef DARRAY(char) darray_char;
23+
24+
int __bch2_darray_resize(darray_char *, size_t, size_t, gfp_t);
25+
26+
static inline int __darray_resize(darray_char *d, size_t element_size,
27+
size_t new_size, gfp_t gfp)
2328
{
24-
if (d->nr + more > d->size) {
25-
size_t new_size = roundup_pow_of_two(d->nr + more);
26-
void *data = krealloc_array(d->data, new_size, t_size, gfp);
29+
return unlikely(new_size > d->size)
30+
? __bch2_darray_resize(d, element_size, new_size, gfp)
31+
: 0;
32+
}
2733

28-
if (!data)
29-
return -ENOMEM;
34+
#define darray_resize_gfp(_d, _new_size, _gfp) \
35+
unlikely(__darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp))
3036

31-
d->data = data;
32-
d->size = new_size;
33-
}
37+
#define darray_resize(_d, _new_size) \
38+
darray_resize_gfp(_d, _new_size, GFP_KERNEL)
3439

35-
return 0;
40+
static inline int __darray_make_room(darray_char *d, size_t t_size, size_t more, gfp_t gfp)
41+
{
42+
return __darray_resize(d, t_size, d->nr + more, gfp);
3643
}
3744

3845
#define darray_make_room_gfp(_d, _more, _gfp) \
39-
__darray_make_room((darray_void *) (_d), sizeof((_d)->data[0]), (_more), _gfp)
46+
__darray_make_room((darray_char *) (_d), sizeof((_d)->data[0]), (_more), _gfp)
4047

4148
#define darray_make_room(_d, _more) \
4249
darray_make_room_gfp(_d, _more, GFP_KERNEL)
4350

51+
#define darray_room(_d) ((_d).size - (_d).nr)
52+
4453
#define darray_top(_d) ((_d).data[(_d).nr])
4554

4655
#define darray_push_gfp(_d, _item, _gfp) \
@@ -80,13 +89,16 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more,
8089

8190
#define darray_init(_d) \
8291
do { \
83-
(_d)->data = NULL; \
84-
(_d)->nr = (_d)->size = 0; \
92+
(_d)->nr = 0; \
93+
(_d)->size = ARRAY_SIZE((_d)->preallocated); \
94+
(_d)->data = (_d)->size ? (_d)->preallocated : NULL; \
8595
} while (0)
8696

8797
#define darray_exit(_d) \
8898
do { \
89-
kfree((_d)->data); \
99+
if (!ARRAY_SIZE((_d)->preallocated) || \
100+
(_d)->data != (_d)->preallocated) \
101+
kvfree((_d)->data); \
90102
darray_init(_d); \
91103
} while (0)
92104

fs/bcachefs/errcode.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
x(ENOSPC, ENOSPC_sb_members) \
9696
x(ENOSPC, ENOSPC_sb_members_v2) \
9797
x(ENOSPC, ENOSPC_sb_crypt) \
98+
x(ENOSPC, ENOSPC_sb_downgrade) \
9899
x(ENOSPC, ENOSPC_btree_slot) \
99100
x(ENOSPC, ENOSPC_snapshot_tree) \
100101
x(ENOENT, ENOENT_bkey_type_mismatch) \
@@ -218,6 +219,8 @@
218219
x(BCH_ERR_invalid_sb, invalid_sb_quota) \
219220
x(BCH_ERR_invalid_sb, invalid_sb_errors) \
220221
x(BCH_ERR_invalid_sb, invalid_sb_opt_compression) \
222+
x(BCH_ERR_invalid_sb, invalid_sb_ext) \
223+
x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \
221224
x(BCH_ERR_invalid, invalid_bkey) \
222225
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
223226
x(EIO, btree_node_read_err) \

fs/bcachefs/error.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,9 @@ int bch2_fsck_err(struct bch_fs *c,
152152
struct printbuf buf = PRINTBUF, *out = &buf;
153153
int ret = -BCH_ERR_fsck_ignore;
154154

155+
if (test_bit(err, c->sb.errors_silent))
156+
return -BCH_ERR_fsck_fix;
157+
155158
bch2_sb_error_count(c, err);
156159

157160
va_start(args, fmt);

fs/bcachefs/fs-io-direct.c

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -216,11 +216,11 @@ struct dio_write {
216216
struct address_space *mapping;
217217
struct bch_inode_info *inode;
218218
struct mm_struct *mm;
219+
const struct iovec *iov;
219220
unsigned loop:1,
220221
extending:1,
221222
sync:1,
222-
flush:1,
223-
free_iov:1;
223+
flush:1;
224224
struct quota_res quota_res;
225225
u64 written;
226226

@@ -312,12 +312,10 @@ static noinline int bch2_dio_write_copy_iov(struct dio_write *dio)
312312
return -1;
313313

314314
if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
315-
iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov),
315+
dio->iov = iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov),
316316
GFP_KERNEL);
317317
if (unlikely(!iov))
318318
return -ENOMEM;
319-
320-
dio->free_iov = true;
321319
}
322320

323321
memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov));
@@ -381,8 +379,7 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio)
381379

382380
bch2_pagecache_block_put(inode);
383381

384-
if (dio->free_iov)
385-
kfree(dio->iter.__iov);
382+
kfree(dio->iov);
386383

387384
ret = dio->op.error ?: ((long) dio->written << 9);
388385
bio_put(&dio->op.wbio.bio);
@@ -626,11 +623,11 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
626623
dio->mapping = mapping;
627624
dio->inode = inode;
628625
dio->mm = current->mm;
626+
dio->iov = NULL;
629627
dio->loop = false;
630628
dio->extending = extending;
631629
dio->sync = is_sync_kiocb(req) || extending;
632630
dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled;
633-
dio->free_iov = false;
634631
dio->quota_res.sectors = 0;
635632
dio->written = 0;
636633
dio->iter = *iter;

0 commit comments

Comments
 (0)