Skip to content

Commit 1e690ef

Browse files
author
Kent Overstreet
committed
bcachefs: Split out journal pins by btree level
This lets us flush the journal to go read-only more effectively. Flushing the journal and going read-only requires halting mutually recursive processes, which strictly speaking are not guaranteed to terminate. Flushing btree node journal pins will kick off a btree node write, and btree node writes on completion must do another btree update to the parent node to update the 'sectors_written' field for that node's key. If the parent node is full and requires a split or compaction, that's going to generate a whole bunch of additional btree updates - alloc info, LRU btree, and more - which then have to be flushed, and the cycle repeats. This process will terminate much more effectively if we tweak journal reclaim to flush btree updates leaf to root: i.e., don't flush updates for a given btree node (kicking off a write, and consuming space within that node up to the next block boundary) if there might still be unflushed updates in child nodes. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
1 parent 1c316eb commit 1e690ef

File tree

2 files changed

+22
-20
lines changed

2 files changed

+22
-20
lines changed

fs/bcachefs/journal_reclaim.c

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -384,12 +384,16 @@ void bch2_journal_pin_drop(struct journal *j,
384384
spin_unlock(&j->lock);
385385
}
386386

387-
static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn)
387+
static enum journal_pin_type journal_pin_type(struct journal_entry_pin *pin,
388+
journal_pin_flush_fn fn)
388389
{
389390
if (fn == bch2_btree_node_flush0 ||
390-
fn == bch2_btree_node_flush1)
391-
return JOURNAL_PIN_TYPE_btree;
392-
else if (fn == bch2_btree_key_cache_journal_flush)
391+
fn == bch2_btree_node_flush1) {
392+
unsigned idx = fn == bch2_btree_node_flush1;
393+
struct btree *b = container_of(pin, struct btree, writes[idx].journal);
394+
395+
return JOURNAL_PIN_TYPE_btree0 - b->c.level;
396+
} else if (fn == bch2_btree_key_cache_journal_flush)
393397
return JOURNAL_PIN_TYPE_key_cache;
394398
else
395399
return JOURNAL_PIN_TYPE_other;
@@ -441,7 +445,7 @@ void bch2_journal_pin_copy(struct journal *j,
441445

442446
bool reclaim = __journal_pin_drop(j, dst);
443447

444-
bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(flush_fn));
448+
bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(dst, flush_fn));
445449

446450
if (reclaim)
447451
bch2_journal_reclaim_fast(j);
@@ -465,7 +469,7 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
465469

466470
bool reclaim = __journal_pin_drop(j, pin);
467471

468-
bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(flush_fn));
472+
bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn));
469473

470474
if (reclaim)
471475
bch2_journal_reclaim_fast(j);
@@ -587,7 +591,7 @@ static size_t journal_flush_pins(struct journal *j,
587591
spin_lock(&j->lock);
588592
/* Pin might have been dropped or rearmed: */
589593
if (likely(!err && !j->flush_in_progress_dropped))
590-
list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(flush_fn)]);
594+
list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]);
591595
j->flush_in_progress = NULL;
592596
j->flush_in_progress_dropped = false;
593597
spin_unlock(&j->lock);
@@ -869,18 +873,13 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
869873

870874
mutex_lock(&j->reclaim_lock);
871875

872-
if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
873-
BIT(JOURNAL_PIN_TYPE_key_cache)|
874-
BIT(JOURNAL_PIN_TYPE_other))) {
875-
*did_work = true;
876-
goto unlock;
877-
}
878-
879-
if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
880-
BIT(JOURNAL_PIN_TYPE_btree))) {
881-
*did_work = true;
882-
goto unlock;
883-
}
876+
for (int type = JOURNAL_PIN_TYPE_NR - 1;
877+
type >= 0;
878+
--type)
879+
if (journal_flush_pins_or_still_flushing(j, seq_to_flush, BIT(type))) {
880+
*did_work = true;
881+
goto unlock;
882+
}
884883

885884
if (seq_to_flush > journal_cur_seq(j))
886885
bch2_journal_entry_close(j);

fs/bcachefs/journal_types.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,10 @@ struct journal_buf {
5353
*/
5454

5555
enum journal_pin_type {
56-
JOURNAL_PIN_TYPE_btree,
56+
JOURNAL_PIN_TYPE_btree3,
57+
JOURNAL_PIN_TYPE_btree2,
58+
JOURNAL_PIN_TYPE_btree1,
59+
JOURNAL_PIN_TYPE_btree0,
5760
JOURNAL_PIN_TYPE_key_cache,
5861
JOURNAL_PIN_TYPE_other,
5962
JOURNAL_PIN_TYPE_NR,

0 commit comments

Comments
 (0)