1
- // SPDX-License-Identifier: GPL-2.0+
1
+ // SPDX-License-Identifier: GPL-2.0-or-later
2
2
/*
3
3
* Copyright (C) 2019-2023 Oracle. All Rights Reserved.
4
4
* Author: Darrick J. Wong <djwong@kernel.org>
8
8
#include "xfs_shared.h"
9
9
#include "xfs_format.h"
10
10
#include "xfs_trans_resv.h"
11
+ #include "xfs_log_format.h"
12
+ #include "xfs_trans.h"
11
13
#include "xfs_mount.h"
12
14
#include "xfs_alloc.h"
13
15
#include "xfs_ialloc.h"
16
18
#include "xfs_ag.h"
17
19
#include "xfs_rtalloc.h"
18
20
#include "xfs_inode.h"
21
+ #include "xfs_icache.h"
19
22
#include "scrub/scrub.h"
20
23
#include "scrub/common.h"
21
24
#include "scrub/trace.h"
@@ -53,6 +56,7 @@ struct xchk_fscounters {
53
56
uint64_t frextents ;
54
57
unsigned long long icount_min ;
55
58
unsigned long long icount_max ;
59
+ bool frozen ;
56
60
};
57
61
58
62
/*
@@ -123,6 +127,82 @@ xchk_fscount_warmup(
123
127
return error ;
124
128
}
125
129
130
+ static inline int
131
+ xchk_fsfreeze (
132
+ struct xfs_scrub * sc )
133
+ {
134
+ int error ;
135
+
136
+ error = freeze_super (sc -> mp -> m_super , FREEZE_HOLDER_KERNEL );
137
+ trace_xchk_fsfreeze (sc , error );
138
+ return error ;
139
+ }
140
+
141
+ static inline int
142
+ xchk_fsthaw (
143
+ struct xfs_scrub * sc )
144
+ {
145
+ int error ;
146
+
147
+ /* This should always succeed, we have a kernel freeze */
148
+ error = thaw_super (sc -> mp -> m_super , FREEZE_HOLDER_KERNEL );
149
+ trace_xchk_fsthaw (sc , error );
150
+ return error ;
151
+ }
152
+
153
+ /*
154
+ * We couldn't stabilize the filesystem long enough to sample all the variables
155
+ * that comprise the summary counters and compare them to the percpu counters.
156
+ * We need to disable all writer threads, which means taking the first two
157
+ * freeze levels to put userspace to sleep, and the third freeze level to
158
+ * prevent background threads from starting new transactions. Take one level
159
+ * more to prevent other callers from unfreezing the filesystem while we run.
160
+ */
161
+ STATIC int
162
+ xchk_fscounters_freeze (
163
+ struct xfs_scrub * sc )
164
+ {
165
+ struct xchk_fscounters * fsc = sc -> buf ;
166
+ int error = 0 ;
167
+
168
+ if (sc -> flags & XCHK_HAVE_FREEZE_PROT ) {
169
+ sc -> flags &= ~XCHK_HAVE_FREEZE_PROT ;
170
+ mnt_drop_write_file (sc -> file );
171
+ }
172
+
173
+ /* Try to grab a kernel freeze. */
174
+ while ((error = xchk_fsfreeze (sc )) == - EBUSY ) {
175
+ if (xchk_should_terminate (sc , & error ))
176
+ return error ;
177
+
178
+ delay (HZ / 10 );
179
+ }
180
+ if (error )
181
+ return error ;
182
+
183
+ fsc -> frozen = true;
184
+ return 0 ;
185
+ }
186
+
187
+ /* Thaw the filesystem after checking or repairing fscounters. */
188
+ STATIC void
189
+ xchk_fscounters_cleanup (
190
+ void * buf )
191
+ {
192
+ struct xchk_fscounters * fsc = buf ;
193
+ struct xfs_scrub * sc = fsc -> sc ;
194
+ int error ;
195
+
196
+ if (!fsc -> frozen )
197
+ return ;
198
+
199
+ error = xchk_fsthaw (sc );
200
+ if (error )
201
+ xfs_emerg (sc -> mp , "still frozen after scrub, err=%d" , error );
202
+ else
203
+ fsc -> frozen = false;
204
+ }
205
+
126
206
int
127
207
xchk_setup_fscounters (
128
208
struct xfs_scrub * sc )
@@ -140,6 +220,7 @@ xchk_setup_fscounters(
140
220
sc -> buf = kzalloc (sizeof (struct xchk_fscounters ), XCHK_GFP_FLAGS );
141
221
if (!sc -> buf )
142
222
return - ENOMEM ;
223
+ sc -> buf_cleanup = xchk_fscounters_cleanup ;
143
224
fsc = sc -> buf ;
144
225
fsc -> sc = sc ;
145
226
@@ -150,7 +231,18 @@ xchk_setup_fscounters(
150
231
if (error )
151
232
return error ;
152
233
153
- return xchk_trans_alloc (sc , 0 );
234
+ /*
235
+ * Pause all writer activity in the filesystem while we're scrubbing to
236
+ * reduce the likelihood of background perturbations to the counters
237
+ * throwing off our calculations.
238
+ */
239
+ if (sc -> flags & XCHK_TRY_HARDER ) {
240
+ error = xchk_fscounters_freeze (sc );
241
+ if (error )
242
+ return error ;
243
+ }
244
+
245
+ return xfs_trans_alloc_empty (sc -> mp , & sc -> tp );
154
246
}
155
247
156
248
/*
@@ -290,8 +382,7 @@ xchk_fscount_aggregate_agcounts(
290
382
if (fsc -> ifree > fsc -> icount ) {
291
383
if (tries -- )
292
384
goto retry ;
293
- xchk_set_incomplete (sc );
294
- return 0 ;
385
+ return - EDEADLOCK ;
295
386
}
296
387
297
388
return 0 ;
@@ -367,6 +458,8 @@ xchk_fscount_count_frextents(
367
458
* Otherwise, we /might/ have a problem. If the change in the summations is
368
459
* more than we want to tolerate, the filesystem is probably busy and we should
369
460
* just send back INCOMPLETE and see if userspace will try again.
461
+ *
462
+ * If we're repairing then we require an exact match.
370
463
*/
371
464
static inline bool
372
465
xchk_fscount_within_range (
@@ -396,21 +489,7 @@ xchk_fscount_within_range(
396
489
if (expected >= min_value && expected <= max_value )
397
490
return true;
398
491
399
- /*
400
- * If the difference between the two summations is too large, the fs
401
- * might just be busy and so we'll mark the scrub incomplete. Return
402
- * true here so that we don't mark the counter corrupt.
403
- *
404
- * XXX: In the future when userspace can grant scrub permission to
405
- * quiesce the filesystem to solve the outsized variance problem, this
406
- * check should be moved up and the return code changed to signal to
407
- * userspace that we need quiesce permission.
408
- */
409
- if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE ) {
410
- xchk_set_incomplete (sc );
411
- return true;
412
- }
413
-
492
+ /* Everything else is bad. */
414
493
return false;
415
494
}
416
495
@@ -422,6 +501,7 @@ xchk_fscounters(
422
501
struct xfs_mount * mp = sc -> mp ;
423
502
struct xchk_fscounters * fsc = sc -> buf ;
424
503
int64_t icount , ifree , fdblocks , frextents ;
504
+ bool try_again = false;
425
505
int error ;
426
506
427
507
/* Snapshot the percpu counters. */
@@ -431,9 +511,26 @@ xchk_fscounters(
431
511
frextents = percpu_counter_sum (& mp -> m_frextents );
432
512
433
513
/* No negative values, please! */
434
- if (icount < 0 || ifree < 0 || fdblocks < 0 || frextents < 0 )
514
+ if (icount < 0 || ifree < 0 )
435
515
xchk_set_corrupt (sc );
436
516
517
+ /*
518
+ * If the filesystem is not frozen, the counter summation calls above
519
+ * can race with xfs_mod_freecounter, which subtracts a requested space
520
+ * reservation from the counter and undoes the subtraction if that made
521
+ * the counter go negative. Therefore, it's possible to see negative
522
+ * values here, and we should only flag that as a corruption if we
523
+ * froze the fs. This is much more likely to happen with frextents
524
+ * since there are no reserved pools.
525
+ */
526
+ if (fdblocks < 0 || frextents < 0 ) {
527
+ if (!fsc -> frozen )
528
+ return - EDEADLOCK ;
529
+
530
+ xchk_set_corrupt (sc );
531
+ return 0 ;
532
+ }
533
+
437
534
/* See if icount is obviously wrong. */
438
535
if (icount < fsc -> icount_min || icount > fsc -> icount_max )
439
536
xchk_set_corrupt (sc );
@@ -446,12 +543,6 @@ xchk_fscounters(
446
543
if (frextents > mp -> m_sb .sb_rextents )
447
544
xchk_set_corrupt (sc );
448
545
449
- /*
450
- * XXX: We can't quiesce percpu counter updates, so exit early.
451
- * This can be re-enabled when we gain exclusive freeze functionality.
452
- */
453
- return 0 ;
454
-
455
546
/*
456
547
* If ifree exceeds icount by more than the minimum variance then
457
548
* something's probably wrong with the counters.
@@ -463,8 +554,6 @@ xchk_fscounters(
463
554
error = xchk_fscount_aggregate_agcounts (sc , fsc );
464
555
if (!xchk_process_error (sc , 0 , XFS_SB_BLOCK (mp ), & error ))
465
556
return error ;
466
- if (sc -> sm -> sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE )
467
- return 0 ;
468
557
469
558
/* Count the free extents counter for rt volumes. */
470
559
error = xchk_fscount_count_frextents (sc , fsc );
@@ -473,20 +562,45 @@ xchk_fscounters(
473
562
if (sc -> sm -> sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE )
474
563
return 0 ;
475
564
476
- /* Compare the in-core counters with whatever we counted. */
477
- if (!xchk_fscount_within_range (sc , icount , & mp -> m_icount , fsc -> icount ))
478
- xchk_set_corrupt (sc );
565
+ /*
566
+ * Compare the in-core counters with whatever we counted. If the fs is
567
+ * frozen, we treat the discrepancy as a corruption because the freeze
568
+ * should have stabilized the counter values. Otherwise, we need
569
+ * userspace to call us back having granted us freeze permission.
570
+ */
571
+ if (!xchk_fscount_within_range (sc , icount , & mp -> m_icount ,
572
+ fsc -> icount )) {
573
+ if (fsc -> frozen )
574
+ xchk_set_corrupt (sc );
575
+ else
576
+ try_again = true;
577
+ }
479
578
480
- if (!xchk_fscount_within_range (sc , ifree , & mp -> m_ifree , fsc -> ifree ))
481
- xchk_set_corrupt (sc );
579
+ if (!xchk_fscount_within_range (sc , ifree , & mp -> m_ifree , fsc -> ifree )) {
580
+ if (fsc -> frozen )
581
+ xchk_set_corrupt (sc );
582
+ else
583
+ try_again = true;
584
+ }
482
585
483
586
if (!xchk_fscount_within_range (sc , fdblocks , & mp -> m_fdblocks ,
484
- fsc -> fdblocks ))
485
- xchk_set_corrupt (sc );
587
+ fsc -> fdblocks )) {
588
+ if (fsc -> frozen )
589
+ xchk_set_corrupt (sc );
590
+ else
591
+ try_again = true;
592
+ }
486
593
487
594
if (!xchk_fscount_within_range (sc , frextents , & mp -> m_frextents ,
488
- fsc -> frextents ))
489
- xchk_set_corrupt (sc );
595
+ fsc -> frextents )) {
596
+ if (fsc -> frozen )
597
+ xchk_set_corrupt (sc );
598
+ else
599
+ try_again = true;
600
+ }
601
+
602
+ if (try_again )
603
+ return - EDEADLOCK ;
490
604
491
605
return 0 ;
492
606
}
0 commit comments