Skip to content

Commit 32a5054

Browse files
committed
Merge tag 'bcachefs-2024-03-13' of https://evilpiepirate.org/git/bcachefs
Pull bcachefs updates from Kent Overstreet: - Subvolume children btree; this is needed for providing a userspace interface for walking subvolumes, which will come later - Lots of improvements to directory structure checking - Improved journal pipelining, significantly improving performance on high iodepth write workloads - Discard path improvements: the discard path is more efficient, and no longer flushes the journal unnecessarily - Buffered write path can now avoid taking the inode lock - new mm helper: memalloc_flags_{save|restore} - mempool now does kvmalloc mempools * tag 'bcachefs-2024-03-13' of https://evilpiepirate.org/git/bcachefs: (128 commits) bcachefs: time_stats: shrink time_stat_buffer for better alignment bcachefs: time_stats: split stats-with-quantiles into a separate structure bcachefs: mean_and_variance: put struct mean_and_variance_weighted on a diet bcachefs: time_stats: add larger units bcachefs: pull out time_stats.[ch] bcachefs: reconstruct_alloc cleanup bcachefs: fix bch_folio_sector padding bcachefs: Fix btree key cache coherency during replay bcachefs: Always flush write buffer in delete_dead_inodes() bcachefs: Fix order of gc_done passes bcachefs: fix deletion of indirect extents in btree_gc bcachefs: Prefer struct_size over open coded arithmetic bcachefs: Kill unused flags argument to btree_split() bcachefs: Check for writing superblocks with nonsense member seq fields bcachefs: fix bch2_journal_buf_to_text() lib/generic-radix-tree.c: Make nodes more reasonably sized bcachefs: copy_(to|from)_user_errcode() bcachefs: Split out bkey_types.h bcachefs: fix lost journal buf wakeup due to improved pipelining bcachefs: intercept mountoption value for bool type ...
2 parents e5eb28f + be28368 commit 32a5054

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+3770
-2253
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
.. SPDX-License-Identifier: GPL-2.0
2+
3+
bcachefs private error codes
4+
----------------------------
5+
6+
In bcachefs, as a hard rule we do not throw or directly use standard error
7+
codes (-EINVAL, -EBUSY, etc.). Instead, we define private error codes as needed
8+
in fs/bcachefs/errcode.h.
9+
10+
This gives us much better error messages and makes debugging much easier. Any
11+
direct uses of standard error codes you see in the source code are simply old
12+
code that has yet to be converted - feel free to clean it up!
13+
14+
Private error codes may subtype another error code, this allows for grouping of
15+
related errors that should be handled similarly (e.g. transaction restart
16+
errors), as well as specifying which standard error code should be returned at
17+
the bcachefs module boundary.
18+
19+
At the module boundary, we use bch2_err_class() to convert to a standard error
20+
code; this also emits a trace event so that the original error code be
21+
recovered even if it wasn't logged.
22+
23+
Do not reuse error codes! Generally speaking, a private error code should only
24+
be thrown in one place. That means that when we see it in a log message we can
25+
see, unambiguously, exactly which file and line number it was returned from.
26+
27+
Try to give error codes names that are as reasonably descriptive of the error
28+
as possible. Frequently, the error will be logged at a place far removed from
29+
where the error was generated; good names for error codes mean much more
30+
descriptive and useful error messages.

MAINTAINERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3555,6 +3555,7 @@ R: Brian Foster <bfoster@redhat.com>
35553555
L: linux-bcachefs@vger.kernel.org
35563556
S: Supported
35573557
C: irc://irc.oftc.net/bcache
3558+
T: git https://evilpiepirate.org/git/bcachefs.git
35583559
F: fs/bcachefs/
35593560

35603561
BDISP ST MEDIA DRIVER

fs/bcachefs/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ bcachefs-y := \
8282
super-io.o \
8383
sysfs.o \
8484
tests.o \
85+
time_stats.o \
8586
thread_with_file.o \
8687
trace.o \
8788
two_state_shared_lock.o \
@@ -90,3 +91,6 @@ bcachefs-y := \
9091
xattr.o
9192

9293
obj-$(CONFIG_MEAN_AND_VARIANCE_UNIT_TEST) += mean_and_variance_test.o
94+
95+
# Silence "note: xyz changed in GCC X.X" messages
96+
subdir-ccflags-y += $(call cc-disable-warning, psabi)

fs/bcachefs/alloc_background.c

Lines changed: 177 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
#include <linux/sched/task.h>
3030
#include <linux/sort.h>
3131

32+
static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket);
33+
3234
/* Persistent alloc info: */
3335

3436
static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
@@ -860,23 +862,28 @@ int bch2_trigger_alloc(struct btree_trans *trans,
860862
*bucket_gen(ca, new.k->p.offset) = new_a->gen;
861863

862864
bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, false);
865+
percpu_up_read(&c->mark_lock);
866+
867+
#define eval_state(_a, expr) ({ const struct bch_alloc_v4 *a = _a; expr; })
868+
#define statechange(expr) !eval_state(old_a, expr) && eval_state(new_a, expr)
869+
#define bucket_flushed(a) (!a->journal_seq || a->journal_seq <= c->journal.flushed_seq_ondisk)
863870

864-
if (new_a->data_type == BCH_DATA_free &&
865-
(!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk))
871+
if (statechange(a->data_type == BCH_DATA_free) &&
872+
bucket_flushed(new_a))
866873
closure_wake_up(&c->freelist_wait);
867874

868-
if (new_a->data_type == BCH_DATA_need_discard &&
869-
(!bucket_journal_seq || bucket_journal_seq < c->journal.flushed_seq_ondisk))
870-
bch2_do_discards(c);
875+
if (statechange(a->data_type == BCH_DATA_need_discard) &&
876+
!bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) &&
877+
bucket_flushed(new_a))
878+
bch2_discard_one_bucket_fast(c, new.k->p);
871879

872-
if (old_a->data_type != BCH_DATA_cached &&
873-
new_a->data_type == BCH_DATA_cached &&
880+
if (statechange(a->data_type == BCH_DATA_cached) &&
881+
!bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) &&
874882
should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
875883
bch2_do_invalidates(c);
876884

877-
if (new_a->data_type == BCH_DATA_need_gc_gens)
885+
if (statechange(a->data_type == BCH_DATA_need_gc_gens))
878886
bch2_do_gc_gens(c);
879-
percpu_up_read(&c->mark_lock);
880887
}
881888

882889
if ((flags & BTREE_TRIGGER_GC) &&
@@ -1045,14 +1052,13 @@ int bch2_check_alloc_key(struct btree_trans *trans,
10451052
if (ret)
10461053
goto err;
10471054

1048-
if (k.k->type != discard_key_type &&
1049-
(c->opts.reconstruct_alloc ||
1050-
fsck_err(c, need_discard_key_wrong,
1051-
"incorrect key in need_discard btree (got %s should be %s)\n"
1052-
" %s",
1053-
bch2_bkey_types[k.k->type],
1054-
bch2_bkey_types[discard_key_type],
1055-
(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)))) {
1055+
if (fsck_err_on(k.k->type != discard_key_type,
1056+
c, need_discard_key_wrong,
1057+
"incorrect key in need_discard btree (got %s should be %s)\n"
1058+
" %s",
1059+
bch2_bkey_types[k.k->type],
1060+
bch2_bkey_types[discard_key_type],
1061+
(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
10561062
struct bkey_i *update =
10571063
bch2_trans_kmalloc(trans, sizeof(*update));
10581064

@@ -1076,15 +1082,14 @@ int bch2_check_alloc_key(struct btree_trans *trans,
10761082
if (ret)
10771083
goto err;
10781084

1079-
if (k.k->type != freespace_key_type &&
1080-
(c->opts.reconstruct_alloc ||
1081-
fsck_err(c, freespace_key_wrong,
1082-
"incorrect key in freespace btree (got %s should be %s)\n"
1083-
" %s",
1084-
bch2_bkey_types[k.k->type],
1085-
bch2_bkey_types[freespace_key_type],
1086-
(printbuf_reset(&buf),
1087-
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)))) {
1085+
if (fsck_err_on(k.k->type != freespace_key_type,
1086+
c, freespace_key_wrong,
1087+
"incorrect key in freespace btree (got %s should be %s)\n"
1088+
" %s",
1089+
bch2_bkey_types[k.k->type],
1090+
bch2_bkey_types[freespace_key_type],
1091+
(printbuf_reset(&buf),
1092+
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
10881093
struct bkey_i *update =
10891094
bch2_trans_kmalloc(trans, sizeof(*update));
10901095

@@ -1108,14 +1113,13 @@ int bch2_check_alloc_key(struct btree_trans *trans,
11081113
if (ret)
11091114
goto err;
11101115

1111-
if (a->gen != alloc_gen(k, gens_offset) &&
1112-
(c->opts.reconstruct_alloc ||
1113-
fsck_err(c, bucket_gens_key_wrong,
1114-
"incorrect gen in bucket_gens btree (got %u should be %u)\n"
1115-
" %s",
1116-
alloc_gen(k, gens_offset), a->gen,
1117-
(printbuf_reset(&buf),
1118-
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)))) {
1116+
if (fsck_err_on(a->gen != alloc_gen(k, gens_offset),
1117+
c, bucket_gens_key_wrong,
1118+
"incorrect gen in bucket_gens btree (got %u should be %u)\n"
1119+
" %s",
1120+
alloc_gen(k, gens_offset), a->gen,
1121+
(printbuf_reset(&buf),
1122+
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
11191123
struct bkey_i_bucket_gens *g =
11201124
bch2_trans_kmalloc(trans, sizeof(*g));
11211125

@@ -1167,14 +1171,13 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
11671171

11681172
*end = bkey_min(k.k->p, *end);
11691173

1170-
if (k.k->type != KEY_TYPE_set &&
1171-
(c->opts.reconstruct_alloc ||
1172-
fsck_err(c, freespace_hole_missing,
1173-
"hole in alloc btree missing in freespace btree\n"
1174-
" device %llu buckets %llu-%llu",
1175-
freespace_iter->pos.inode,
1176-
freespace_iter->pos.offset,
1177-
end->offset))) {
1174+
if (fsck_err_on(k.k->type != KEY_TYPE_set,
1175+
c, freespace_hole_missing,
1176+
"hole in alloc btree missing in freespace btree\n"
1177+
" device %llu buckets %llu-%llu",
1178+
freespace_iter->pos.inode,
1179+
freespace_iter->pos.offset,
1180+
end->offset)) {
11781181
struct bkey_i *update =
11791182
bch2_trans_kmalloc(trans, sizeof(*update));
11801183

@@ -1604,6 +1607,36 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
16041607
return ret;
16051608
}
16061609

1610+
static int discard_in_flight_add(struct bch_fs *c, struct bpos bucket)
1611+
{
1612+
int ret;
1613+
1614+
mutex_lock(&c->discard_buckets_in_flight_lock);
1615+
darray_for_each(c->discard_buckets_in_flight, i)
1616+
if (bkey_eq(*i, bucket)) {
1617+
ret = -EEXIST;
1618+
goto out;
1619+
}
1620+
1621+
ret = darray_push(&c->discard_buckets_in_flight, bucket);
1622+
out:
1623+
mutex_unlock(&c->discard_buckets_in_flight_lock);
1624+
return ret;
1625+
}
1626+
1627+
static void discard_in_flight_remove(struct bch_fs *c, struct bpos bucket)
1628+
{
1629+
mutex_lock(&c->discard_buckets_in_flight_lock);
1630+
darray_for_each(c->discard_buckets_in_flight, i)
1631+
if (bkey_eq(*i, bucket)) {
1632+
darray_remove_item(&c->discard_buckets_in_flight, i);
1633+
goto found;
1634+
}
1635+
BUG();
1636+
found:
1637+
mutex_unlock(&c->discard_buckets_in_flight_lock);
1638+
}
1639+
16071640
struct discard_buckets_state {
16081641
u64 seen;
16091642
u64 open;
@@ -1642,6 +1675,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
16421675
struct bch_dev *ca;
16431676
struct bkey_i_alloc_v4 *a;
16441677
struct printbuf buf = PRINTBUF;
1678+
bool discard_locked = false;
16451679
int ret = 0;
16461680

16471681
ca = bch_dev_bkey_exists(c, pos.inode);
@@ -1709,6 +1743,11 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
17091743
goto out;
17101744
}
17111745

1746+
if (discard_in_flight_add(c, SPOS(iter.pos.inode, iter.pos.offset, true)))
1747+
goto out;
1748+
1749+
discard_locked = true;
1750+
17121751
if (!bkey_eq(*discard_pos_done, iter.pos) &&
17131752
ca->mi.discard && !c->opts.nochanges) {
17141753
/*
@@ -1740,6 +1779,8 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
17401779
count_event(c, bucket_discard);
17411780
s->discarded++;
17421781
out:
1782+
if (discard_locked)
1783+
discard_in_flight_remove(c, iter.pos);
17431784
s->seen++;
17441785
bch2_trans_iter_exit(trans, &iter);
17451786
percpu_ref_put(&ca->io_ref);
@@ -1779,6 +1820,93 @@ void bch2_do_discards(struct bch_fs *c)
17791820
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
17801821
}
17811822

1823+
static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpos bucket)
1824+
{
1825+
struct btree_iter iter;
1826+
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, BTREE_ITER_INTENT);
1827+
struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter);
1828+
int ret = bkey_err(k);
1829+
if (ret)
1830+
goto err;
1831+
1832+
struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut(trans, k);
1833+
ret = PTR_ERR_OR_ZERO(a);
1834+
if (ret)
1835+
goto err;
1836+
1837+
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
1838+
a->v.data_type = alloc_data_type(a->v, a->v.data_type);
1839+
1840+
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
1841+
err:
1842+
bch2_trans_iter_exit(trans, &iter);
1843+
return ret;
1844+
}
1845+
1846+
static void bch2_do_discards_fast_work(struct work_struct *work)
1847+
{
1848+
struct bch_fs *c = container_of(work, struct bch_fs, discard_fast_work);
1849+
1850+
while (1) {
1851+
bool got_bucket = false;
1852+
struct bpos bucket;
1853+
struct bch_dev *ca;
1854+
1855+
mutex_lock(&c->discard_buckets_in_flight_lock);
1856+
darray_for_each(c->discard_buckets_in_flight, i) {
1857+
if (i->snapshot)
1858+
continue;
1859+
1860+
ca = bch_dev_bkey_exists(c, i->inode);
1861+
1862+
if (!percpu_ref_tryget(&ca->io_ref)) {
1863+
darray_remove_item(&c->discard_buckets_in_flight, i);
1864+
continue;
1865+
}
1866+
1867+
got_bucket = true;
1868+
bucket = *i;
1869+
i->snapshot = true;
1870+
break;
1871+
}
1872+
mutex_unlock(&c->discard_buckets_in_flight_lock);
1873+
1874+
if (!got_bucket)
1875+
break;
1876+
1877+
if (ca->mi.discard && !c->opts.nochanges)
1878+
blkdev_issue_discard(ca->disk_sb.bdev,
1879+
bucket.offset * ca->mi.bucket_size,
1880+
ca->mi.bucket_size,
1881+
GFP_KERNEL);
1882+
1883+
int ret = bch2_trans_do(c, NULL, NULL,
1884+
BCH_WATERMARK_btree|
1885+
BCH_TRANS_COMMIT_no_enospc,
1886+
bch2_clear_bucket_needs_discard(trans, bucket));
1887+
bch_err_fn(c, ret);
1888+
1889+
percpu_ref_put(&ca->io_ref);
1890+
discard_in_flight_remove(c, bucket);
1891+
1892+
if (ret)
1893+
break;
1894+
}
1895+
1896+
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
1897+
}
1898+
1899+
static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket)
1900+
{
1901+
struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
1902+
1903+
if (!percpu_ref_is_dying(&ca->io_ref) &&
1904+
!discard_in_flight_add(c, bucket) &&
1905+
bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast) &&
1906+
!queue_work(c->write_ref_wq, &c->discard_fast_work))
1907+
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
1908+
}
1909+
17821910
static int invalidate_one_bucket(struct btree_trans *trans,
17831911
struct btree_iter *lru_iter,
17841912
struct bkey_s_c lru_k,
@@ -2210,9 +2338,16 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
22102338
set_bit(ca->dev_idx, c->rw_devs[i].d);
22112339
}
22122340

2341+
void bch2_fs_allocator_background_exit(struct bch_fs *c)
2342+
{
2343+
darray_exit(&c->discard_buckets_in_flight);
2344+
}
2345+
22132346
void bch2_fs_allocator_background_init(struct bch_fs *c)
22142347
{
22152348
spin_lock_init(&c->freelist_lock);
2349+
mutex_init(&c->discard_buckets_in_flight_lock);
22162350
INIT_WORK(&c->discard_work, bch2_do_discards_work);
2351+
INIT_WORK(&c->discard_fast_work, bch2_do_discards_fast_work);
22172352
INIT_WORK(&c->invalidate_work, bch2_do_invalidates_work);
22182353
}

fs/bcachefs/alloc_background.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *);
269269
void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
270270
void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
271271

272+
void bch2_fs_allocator_background_exit(struct bch_fs *);
272273
void bch2_fs_allocator_background_init(struct bch_fs *);
273274

274275
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */

0 commit comments

Comments
 (0)