Skip to content

Commit cd4284c

Browse files
committed
Merge tag 'vfs-6.6-merge-3' of ssh://gitolite.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs online fsck update from Darrick Wong: New code for 6.6: * Allow the kernel to initiate a freeze of a filesystem. The kernel and userspace can both hold a freeze on a filesystem at the same time; the freeze is not lifted until /both/ holders lift it. This will enable us to fix a longstanding bug in XFS online fsck. * Use kernel-initated fsfreeze to fix some longstanding false negatives in online fsck of the free space and inode counters. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Message-Id: <20230822182604.GB11286@frogsfrogsfrogs> Signed-off-by: Christian Brauner <brauner@kernel.org>
2 parents 3fb5a65 + ce85a1e commit cd4284c

File tree

4 files changed

+183
-38
lines changed

4 files changed

+183
-38
lines changed

fs/xfs/scrub/fscounters.c

Lines changed: 151 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-License-Identifier: GPL-2.0+
1+
// SPDX-License-Identifier: GPL-2.0-or-later
22
/*
33
* Copyright (C) 2019-2023 Oracle. All Rights Reserved.
44
* Author: Darrick J. Wong <djwong@kernel.org>
@@ -8,6 +8,8 @@
88
#include "xfs_shared.h"
99
#include "xfs_format.h"
1010
#include "xfs_trans_resv.h"
11+
#include "xfs_log_format.h"
12+
#include "xfs_trans.h"
1113
#include "xfs_mount.h"
1214
#include "xfs_alloc.h"
1315
#include "xfs_ialloc.h"
@@ -16,6 +18,7 @@
1618
#include "xfs_ag.h"
1719
#include "xfs_rtalloc.h"
1820
#include "xfs_inode.h"
21+
#include "xfs_icache.h"
1922
#include "scrub/scrub.h"
2023
#include "scrub/common.h"
2124
#include "scrub/trace.h"
@@ -53,6 +56,7 @@ struct xchk_fscounters {
5356
uint64_t frextents;
5457
unsigned long long icount_min;
5558
unsigned long long icount_max;
59+
bool frozen;
5660
};
5761

5862
/*
@@ -123,6 +127,82 @@ xchk_fscount_warmup(
123127
return error;
124128
}
125129

130+
static inline int
131+
xchk_fsfreeze(
132+
struct xfs_scrub *sc)
133+
{
134+
int error;
135+
136+
error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
137+
trace_xchk_fsfreeze(sc, error);
138+
return error;
139+
}
140+
141+
static inline int
142+
xchk_fsthaw(
143+
struct xfs_scrub *sc)
144+
{
145+
int error;
146+
147+
/* This should always succeed, we have a kernel freeze */
148+
error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
149+
trace_xchk_fsthaw(sc, error);
150+
return error;
151+
}
152+
153+
/*
154+
* We couldn't stabilize the filesystem long enough to sample all the variables
155+
* that comprise the summary counters and compare them to the percpu counters.
156+
* We need to disable all writer threads, which means taking the first two
157+
* freeze levels to put userspace to sleep, and the third freeze level to
158+
* prevent background threads from starting new transactions. Take one level
159+
* more to prevent other callers from unfreezing the filesystem while we run.
160+
*/
161+
STATIC int
162+
xchk_fscounters_freeze(
163+
struct xfs_scrub *sc)
164+
{
165+
struct xchk_fscounters *fsc = sc->buf;
166+
int error = 0;
167+
168+
if (sc->flags & XCHK_HAVE_FREEZE_PROT) {
169+
sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
170+
mnt_drop_write_file(sc->file);
171+
}
172+
173+
/* Try to grab a kernel freeze. */
174+
while ((error = xchk_fsfreeze(sc)) == -EBUSY) {
175+
if (xchk_should_terminate(sc, &error))
176+
return error;
177+
178+
delay(HZ / 10);
179+
}
180+
if (error)
181+
return error;
182+
183+
fsc->frozen = true;
184+
return 0;
185+
}
186+
187+
/* Thaw the filesystem after checking or repairing fscounters. */
188+
STATIC void
189+
xchk_fscounters_cleanup(
190+
void *buf)
191+
{
192+
struct xchk_fscounters *fsc = buf;
193+
struct xfs_scrub *sc = fsc->sc;
194+
int error;
195+
196+
if (!fsc->frozen)
197+
return;
198+
199+
error = xchk_fsthaw(sc);
200+
if (error)
201+
xfs_emerg(sc->mp, "still frozen after scrub, err=%d", error);
202+
else
203+
fsc->frozen = false;
204+
}
205+
126206
int
127207
xchk_setup_fscounters(
128208
struct xfs_scrub *sc)
@@ -140,6 +220,7 @@ xchk_setup_fscounters(
140220
sc->buf = kzalloc(sizeof(struct xchk_fscounters), XCHK_GFP_FLAGS);
141221
if (!sc->buf)
142222
return -ENOMEM;
223+
sc->buf_cleanup = xchk_fscounters_cleanup;
143224
fsc = sc->buf;
144225
fsc->sc = sc;
145226

@@ -150,7 +231,18 @@ xchk_setup_fscounters(
150231
if (error)
151232
return error;
152233

153-
return xchk_trans_alloc(sc, 0);
234+
/*
235+
* Pause all writer activity in the filesystem while we're scrubbing to
236+
* reduce the likelihood of background perturbations to the counters
237+
* throwing off our calculations.
238+
*/
239+
if (sc->flags & XCHK_TRY_HARDER) {
240+
error = xchk_fscounters_freeze(sc);
241+
if (error)
242+
return error;
243+
}
244+
245+
return xfs_trans_alloc_empty(sc->mp, &sc->tp);
154246
}
155247

156248
/*
@@ -290,8 +382,7 @@ xchk_fscount_aggregate_agcounts(
290382
if (fsc->ifree > fsc->icount) {
291383
if (tries--)
292384
goto retry;
293-
xchk_set_incomplete(sc);
294-
return 0;
385+
return -EDEADLOCK;
295386
}
296387

297388
return 0;
@@ -367,6 +458,8 @@ xchk_fscount_count_frextents(
367458
* Otherwise, we /might/ have a problem. If the change in the summations is
368459
* more than we want to tolerate, the filesystem is probably busy and we should
369460
* just send back INCOMPLETE and see if userspace will try again.
461+
*
462+
* If we're repairing then we require an exact match.
370463
*/
371464
static inline bool
372465
xchk_fscount_within_range(
@@ -396,21 +489,7 @@ xchk_fscount_within_range(
396489
if (expected >= min_value && expected <= max_value)
397490
return true;
398491

399-
/*
400-
* If the difference between the two summations is too large, the fs
401-
* might just be busy and so we'll mark the scrub incomplete. Return
402-
* true here so that we don't mark the counter corrupt.
403-
*
404-
* XXX: In the future when userspace can grant scrub permission to
405-
* quiesce the filesystem to solve the outsized variance problem, this
406-
* check should be moved up and the return code changed to signal to
407-
* userspace that we need quiesce permission.
408-
*/
409-
if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE) {
410-
xchk_set_incomplete(sc);
411-
return true;
412-
}
413-
492+
/* Everything else is bad. */
414493
return false;
415494
}
416495

@@ -422,6 +501,7 @@ xchk_fscounters(
422501
struct xfs_mount *mp = sc->mp;
423502
struct xchk_fscounters *fsc = sc->buf;
424503
int64_t icount, ifree, fdblocks, frextents;
504+
bool try_again = false;
425505
int error;
426506

427507
/* Snapshot the percpu counters. */
@@ -431,9 +511,26 @@ xchk_fscounters(
431511
frextents = percpu_counter_sum(&mp->m_frextents);
432512

433513
/* No negative values, please! */
434-
if (icount < 0 || ifree < 0 || fdblocks < 0 || frextents < 0)
514+
if (icount < 0 || ifree < 0)
435515
xchk_set_corrupt(sc);
436516

517+
/*
518+
* If the filesystem is not frozen, the counter summation calls above
519+
* can race with xfs_mod_freecounter, which subtracts a requested space
520+
* reservation from the counter and undoes the subtraction if that made
521+
* the counter go negative. Therefore, it's possible to see negative
522+
* values here, and we should only flag that as a corruption if we
523+
* froze the fs. This is much more likely to happen with frextents
524+
* since there are no reserved pools.
525+
*/
526+
if (fdblocks < 0 || frextents < 0) {
527+
if (!fsc->frozen)
528+
return -EDEADLOCK;
529+
530+
xchk_set_corrupt(sc);
531+
return 0;
532+
}
533+
437534
/* See if icount is obviously wrong. */
438535
if (icount < fsc->icount_min || icount > fsc->icount_max)
439536
xchk_set_corrupt(sc);
@@ -446,12 +543,6 @@ xchk_fscounters(
446543
if (frextents > mp->m_sb.sb_rextents)
447544
xchk_set_corrupt(sc);
448545

449-
/*
450-
* XXX: We can't quiesce percpu counter updates, so exit early.
451-
* This can be re-enabled when we gain exclusive freeze functionality.
452-
*/
453-
return 0;
454-
455546
/*
456547
* If ifree exceeds icount by more than the minimum variance then
457548
* something's probably wrong with the counters.
@@ -463,8 +554,6 @@ xchk_fscounters(
463554
error = xchk_fscount_aggregate_agcounts(sc, fsc);
464555
if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
465556
return error;
466-
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
467-
return 0;
468557

469558
/* Count the free extents counter for rt volumes. */
470559
error = xchk_fscount_count_frextents(sc, fsc);
@@ -473,20 +562,45 @@ xchk_fscounters(
473562
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
474563
return 0;
475564

476-
/* Compare the in-core counters with whatever we counted. */
477-
if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount))
478-
xchk_set_corrupt(sc);
565+
/*
566+
* Compare the in-core counters with whatever we counted. If the fs is
567+
* frozen, we treat the discrepancy as a corruption because the freeze
568+
* should have stabilized the counter values. Otherwise, we need
569+
* userspace to call us back having granted us freeze permission.
570+
*/
571+
if (!xchk_fscount_within_range(sc, icount, &mp->m_icount,
572+
fsc->icount)) {
573+
if (fsc->frozen)
574+
xchk_set_corrupt(sc);
575+
else
576+
try_again = true;
577+
}
479578

480-
if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree))
481-
xchk_set_corrupt(sc);
579+
if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree)) {
580+
if (fsc->frozen)
581+
xchk_set_corrupt(sc);
582+
else
583+
try_again = true;
584+
}
482585

483586
if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
484-
fsc->fdblocks))
485-
xchk_set_corrupt(sc);
587+
fsc->fdblocks)) {
588+
if (fsc->frozen)
589+
xchk_set_corrupt(sc);
590+
else
591+
try_again = true;
592+
}
486593

487594
if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents,
488-
fsc->frextents))
489-
xchk_set_corrupt(sc);
595+
fsc->frextents)) {
596+
if (fsc->frozen)
597+
xchk_set_corrupt(sc);
598+
else
599+
try_again = true;
600+
}
601+
602+
if (try_again)
603+
return -EDEADLOCK;
490604

491605
return 0;
492606
}

fs/xfs/scrub/scrub.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,10 @@ xchk_teardown(
184184
xchk_irele(sc, sc->ip);
185185
sc->ip = NULL;
186186
}
187-
if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
187+
if (sc->flags & XCHK_HAVE_FREEZE_PROT) {
188+
sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
188189
mnt_drop_write_file(sc->file);
190+
}
189191
if (sc->buf) {
190192
if (sc->buf_cleanup)
191193
sc->buf_cleanup(sc->buf);
@@ -505,6 +507,8 @@ xfs_scrub_metadata(
505507
error = mnt_want_write_file(sc->file);
506508
if (error)
507509
goto out_sc;
510+
511+
sc->flags |= XCHK_HAVE_FREEZE_PROT;
508512
}
509513

510514
/* Set up for the operation. */

fs/xfs/scrub/scrub.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ struct xfs_scrub {
106106

107107
/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
108108
#define XCHK_TRY_HARDER (1U << 0) /* can't get resources, try again */
109+
#define XCHK_HAVE_FREEZE_PROT (1U << 1) /* do we have freeze protection? */
109110
#define XCHK_FSGATES_DRAIN (1U << 2) /* defer ops draining enabled */
110111
#define XCHK_NEED_DRAIN (1U << 3) /* scrub needs to drain defer ops */
111112
#define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */

fs/xfs/scrub/trace.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
9898

9999
#define XFS_SCRUB_STATE_STRINGS \
100100
{ XCHK_TRY_HARDER, "try_harder" }, \
101+
{ XCHK_HAVE_FREEZE_PROT, "nofreeze" }, \
101102
{ XCHK_FSGATES_DRAIN, "fsgates_drain" }, \
102103
{ XCHK_NEED_DRAIN, "need_drain" }, \
103104
{ XREP_ALREADY_FIXED, "already_fixed" }
@@ -693,6 +694,31 @@ TRACE_EVENT(xchk_fscounters_within_range,
693694
__entry->old_value)
694695
)
695696

697+
DECLARE_EVENT_CLASS(xchk_fsfreeze_class,
698+
TP_PROTO(struct xfs_scrub *sc, int error),
699+
TP_ARGS(sc, error),
700+
TP_STRUCT__entry(
701+
__field(dev_t, dev)
702+
__field(unsigned int, type)
703+
__field(int, error)
704+
),
705+
TP_fast_assign(
706+
__entry->dev = sc->mp->m_super->s_dev;
707+
__entry->type = sc->sm->sm_type;
708+
__entry->error = error;
709+
),
710+
TP_printk("dev %d:%d type %s error %d",
711+
MAJOR(__entry->dev), MINOR(__entry->dev),
712+
__print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
713+
__entry->error)
714+
);
715+
#define DEFINE_XCHK_FSFREEZE_EVENT(name) \
716+
DEFINE_EVENT(xchk_fsfreeze_class, name, \
717+
TP_PROTO(struct xfs_scrub *sc, int error), \
718+
TP_ARGS(sc, error))
719+
DEFINE_XCHK_FSFREEZE_EVENT(xchk_fsfreeze);
720+
DEFINE_XCHK_FSFREEZE_EVENT(xchk_fsthaw);
721+
696722
TRACE_EVENT(xchk_refcount_incorrect,
697723
TP_PROTO(struct xfs_perag *pag, const struct xfs_refcount_irec *irec,
698724
xfs_nlink_t seen),

0 commit comments

Comments
 (0)