Skip to content

Commit 220c8d5

Browse files
author
Chandan Babu R
committed
Merge tag 'scrub-bmap-fixes-6.6_2023-08-10' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.6-mergeA
xfs: fixes for the block mapping checker This series amends the file extent map checking code so that nonexistent cow/attr forks get the ENOENT return they're supposed to; and fixes some incorrect logic about the presence of a cow fork vs. reflink iflag. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Chandan Babu R <chandan.babu@oracle.com> * tag 'scrub-bmap-fixes-6.6_2023-08-10' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux: xfs: don't check reflink iflag state when checking cow fork xfs: simplify returns in xchk_bmap xfs: rewrite xchk_inode_is_allocated to work properly xfs: hide xfs_inode_is_allocated in scrub common code
2 parents 939c9de + e27a136 commit 220c8d5

File tree

7 files changed

+193
-62
lines changed

7 files changed

+193
-62
lines changed

fs/xfs/scrub/bmap.c

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,7 @@ xchk_bmap(
841841

842842
/* Non-existent forks can be ignored. */
843843
if (!ifp)
844-
goto out;
844+
return -ENOENT;
845845

846846
info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
847847
info.whichfork = whichfork;
@@ -850,10 +850,10 @@ xchk_bmap(
850850

851851
switch (whichfork) {
852852
case XFS_COW_FORK:
853-
/* No CoW forks on non-reflink inodes/filesystems. */
854-
if (!xfs_is_reflink_inode(ip)) {
853+
/* No CoW forks on non-reflink filesystems. */
854+
if (!xfs_has_reflink(mp)) {
855855
xchk_ino_set_corrupt(sc, sc->ip->i_ino);
856-
goto out;
856+
return 0;
857857
}
858858
break;
859859
case XFS_ATTR_FORK:
@@ -873,31 +873,31 @@ xchk_bmap(
873873
/* No mappings to check. */
874874
if (whichfork == XFS_COW_FORK)
875875
xchk_fblock_set_corrupt(sc, whichfork, 0);
876-
goto out;
876+
return 0;
877877
case XFS_DINODE_FMT_EXTENTS:
878878
break;
879879
case XFS_DINODE_FMT_BTREE:
880880
if (whichfork == XFS_COW_FORK) {
881881
xchk_fblock_set_corrupt(sc, whichfork, 0);
882-
goto out;
882+
return 0;
883883
}
884884

885885
error = xchk_bmap_btree(sc, whichfork, &info);
886886
if (error)
887-
goto out;
887+
return error;
888888
break;
889889
default:
890890
xchk_fblock_set_corrupt(sc, whichfork, 0);
891-
goto out;
891+
return 0;
892892
}
893893

894894
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
895-
goto out;
895+
return 0;
896896

897897
/* Find the offset of the last extent in the mapping. */
898898
error = xfs_bmap_last_offset(ip, &endoff, whichfork);
899899
if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
900-
goto out;
900+
return error;
901901

902902
/*
903903
* Scrub extent records. We use a special iterator function here that
@@ -910,12 +910,12 @@ xchk_bmap(
910910
while (xchk_bmap_iext_iter(&info, &irec)) {
911911
if (xchk_should_terminate(sc, &error) ||
912912
(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
913-
goto out;
913+
return 0;
914914

915915
if (irec.br_startoff >= endoff) {
916916
xchk_fblock_set_corrupt(sc, whichfork,
917917
irec.br_startoff);
918-
goto out;
918+
return 0;
919919
}
920920

921921
if (isnullstartblock(irec.br_startblock))
@@ -928,10 +928,10 @@ xchk_bmap(
928928
if (xchk_bmap_want_check_rmaps(&info)) {
929929
error = xchk_bmap_check_rmaps(sc, whichfork);
930930
if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
931-
goto out;
931+
return error;
932932
}
933-
out:
934-
return error;
933+
934+
return 0;
935935
}
936936

937937
/* Scrub an inode's data fork. */
@@ -955,8 +955,5 @@ int
955955
xchk_bmap_cow(
956956
struct xfs_scrub *sc)
957957
{
958-
if (!xfs_is_reflink_inode(sc->ip))
959-
return -ENOENT;
960-
961958
return xchk_bmap(sc, XFS_COW_FORK);
962959
}

fs/xfs/scrub/common.c

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,3 +1230,155 @@ xchk_fsgates_enable(
12301230

12311231
sc->flags |= scrub_fsgates;
12321232
}
1233+
1234+
/*
1235+
* Decide if this is this a cached inode that's also allocated. The caller
1236+
* must hold a reference to an AG and the AGI buffer lock to prevent inodes
1237+
* from being allocated or freed.
1238+
*
1239+
* Look up an inode by number in the given file system. If the inode number
1240+
* is invalid, return -EINVAL. If the inode is not in cache, return -ENODATA.
1241+
* If the inode is being reclaimed, return -ENODATA because we know the inode
1242+
* cache cannot be updating the ondisk metadata.
1243+
*
1244+
* Otherwise, the incore inode is the one we want, and it is either live,
1245+
* somewhere in the inactivation machinery, or reclaimable. The inode is
1246+
* allocated if i_mode is nonzero. In all three cases, the cached inode will
1247+
* be more up to date than the ondisk inode buffer, so we must use the incore
1248+
* i_mode.
1249+
*/
1250+
int
1251+
xchk_inode_is_allocated(
1252+
struct xfs_scrub *sc,
1253+
xfs_agino_t agino,
1254+
bool *inuse)
1255+
{
1256+
struct xfs_mount *mp = sc->mp;
1257+
struct xfs_perag *pag = sc->sa.pag;
1258+
xfs_ino_t ino;
1259+
struct xfs_inode *ip;
1260+
int error;
1261+
1262+
/* caller must hold perag reference */
1263+
if (pag == NULL) {
1264+
ASSERT(pag != NULL);
1265+
return -EINVAL;
1266+
}
1267+
1268+
/* caller must have AGI buffer */
1269+
if (sc->sa.agi_bp == NULL) {
1270+
ASSERT(sc->sa.agi_bp != NULL);
1271+
return -EINVAL;
1272+
}
1273+
1274+
/* reject inode numbers outside existing AGs */
1275+
ino = XFS_AGINO_TO_INO(sc->mp, pag->pag_agno, agino);
1276+
if (!xfs_verify_ino(mp, ino))
1277+
return -EINVAL;
1278+
1279+
error = -ENODATA;
1280+
rcu_read_lock();
1281+
ip = radix_tree_lookup(&pag->pag_ici_root, agino);
1282+
if (!ip) {
1283+
/* cache miss */
1284+
goto out_rcu;
1285+
}
1286+
1287+
/*
1288+
* If the inode number doesn't match, the incore inode got reused
1289+
* during an RCU grace period and the radix tree hasn't been updated.
1290+
* This isn't the inode we want.
1291+
*/
1292+
spin_lock(&ip->i_flags_lock);
1293+
if (ip->i_ino != ino)
1294+
goto out_skip;
1295+
1296+
trace_xchk_inode_is_allocated(ip);
1297+
1298+
/*
1299+
* We have an incore inode that matches the inode we want, and the
1300+
* caller holds the perag structure and the AGI buffer. Let's check
1301+
* our assumptions below:
1302+
*/
1303+
1304+
#ifdef DEBUG
1305+
/*
1306+
* (1) If the incore inode is live (i.e. referenced from the dcache),
1307+
* it will not be INEW, nor will it be in the inactivation or reclaim
1308+
* machinery. The ondisk inode had better be allocated. This is the
1309+
* most trivial case.
1310+
*/
1311+
if (!(ip->i_flags & (XFS_NEED_INACTIVE | XFS_INEW | XFS_IRECLAIMABLE |
1312+
XFS_INACTIVATING))) {
1313+
/* live inode */
1314+
ASSERT(VFS_I(ip)->i_mode != 0);
1315+
}
1316+
1317+
/*
1318+
* If the incore inode is INEW, there are several possibilities:
1319+
*
1320+
* (2) For a file that is being created, note that we allocate the
1321+
* ondisk inode before allocating, initializing, and adding the incore
1322+
* inode to the radix tree.
1323+
*
1324+
* (3) If the incore inode is being recycled, the inode has to be
1325+
* allocated because we don't allow freed inodes to be recycled.
1326+
* Recycling doesn't touch i_mode.
1327+
*/
1328+
if (ip->i_flags & XFS_INEW) {
1329+
/* created on disk already or recycling */
1330+
ASSERT(VFS_I(ip)->i_mode != 0);
1331+
}
1332+
1333+
/*
1334+
* (4) If the inode is queued for inactivation (NEED_INACTIVE) but
1335+
* inactivation has not started (!INACTIVATING), it is still allocated.
1336+
*/
1337+
if ((ip->i_flags & XFS_NEED_INACTIVE) &&
1338+
!(ip->i_flags & XFS_INACTIVATING)) {
1339+
/* definitely before difree */
1340+
ASSERT(VFS_I(ip)->i_mode != 0);
1341+
}
1342+
#endif
1343+
1344+
/*
1345+
* If the incore inode is undergoing inactivation (INACTIVATING), there
1346+
* are two possibilities:
1347+
*
1348+
* (5) It is before the point where it would get freed ondisk, in which
1349+
* case i_mode is still nonzero.
1350+
*
1351+
* (6) It has already been freed, in which case i_mode is zero.
1352+
*
1353+
* We don't take the ILOCK here, but difree and dialloc update the AGI,
1354+
* and we've taken the AGI buffer lock, which prevents that from
1355+
* happening.
1356+
*/
1357+
1358+
/*
1359+
* (7) Inodes undergoing inactivation (INACTIVATING) or queued for
1360+
* reclaim (IRECLAIMABLE) could be allocated or free. i_mode still
1361+
* reflects the ondisk state.
1362+
*/
1363+
1364+
/*
1365+
* (8) If the inode is in IFLUSHING, it's safe to query i_mode because
1366+
* the flush code uses i_mode to format the ondisk inode.
1367+
*/
1368+
1369+
/*
1370+
* (9) If the inode is in IRECLAIM and was reachable via the radix
1371+
* tree, it still has the same i_mode as it did before it entered
1372+
* reclaim. The inode object is still alive because we hold the RCU
1373+
* read lock.
1374+
*/
1375+
1376+
*inuse = VFS_I(ip)->i_mode != 0;
1377+
error = 0;
1378+
1379+
out_skip:
1380+
spin_unlock(&ip->i_flags_lock);
1381+
out_rcu:
1382+
rcu_read_unlock();
1383+
return error;
1384+
}

fs/xfs/scrub/common.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,4 +203,7 @@ static inline bool xchk_need_intent_drain(struct xfs_scrub *sc)
203203

204204
void xchk_fsgates_enable(struct xfs_scrub *sc, unsigned int scrub_fshooks);
205205

206+
int xchk_inode_is_allocated(struct xfs_scrub *sc, xfs_agino_t agino,
207+
bool *inuse);
208+
206209
#endif /* __XFS_SCRUB_COMMON_H__ */

fs/xfs/scrub/ialloc.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,8 +328,7 @@ xchk_iallocbt_check_cluster_ifree(
328328
goto out;
329329
}
330330

331-
error = xfs_icache_inode_is_allocated(mp, bs->cur->bc_tp, fsino,
332-
&ino_inuse);
331+
error = xchk_inode_is_allocated(bs->sc, agino, &ino_inuse);
333332
if (error == -ENODATA) {
334333
/* Not cached, just read the disk buffer */
335334
freemask_ok = irec_free ^ !!(dip->di_mode);

fs/xfs/scrub/trace.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,28 @@ TRACE_EVENT(xchk_iallocbt_check_cluster,
640640
__entry->cluster_ino)
641641
)
642642

643+
TRACE_EVENT(xchk_inode_is_allocated,
644+
TP_PROTO(struct xfs_inode *ip),
645+
TP_ARGS(ip),
646+
TP_STRUCT__entry(
647+
__field(dev_t, dev)
648+
__field(xfs_ino_t, ino)
649+
__field(unsigned long, iflags)
650+
__field(umode_t, mode)
651+
),
652+
TP_fast_assign(
653+
__entry->dev = VFS_I(ip)->i_sb->s_dev;
654+
__entry->ino = ip->i_ino;
655+
__entry->iflags = ip->i_flags;
656+
__entry->mode = VFS_I(ip)->i_mode;
657+
),
658+
TP_printk("dev %d:%d ino 0x%llx iflags 0x%lx mode 0x%x",
659+
MAJOR(__entry->dev), MINOR(__entry->dev),
660+
__entry->ino,
661+
__entry->iflags,
662+
__entry->mode)
663+
);
664+
643665
TRACE_EVENT(xchk_fscounters_calc,
644666
TP_PROTO(struct xfs_mount *mp, uint64_t icount, uint64_t ifree,
645667
uint64_t fdblocks, uint64_t delalloc),

fs/xfs/xfs_icache.c

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -802,44 +802,6 @@ xfs_iget(
802802
return error;
803803
}
804804

805-
/*
806-
* "Is this a cached inode that's also allocated?"
807-
*
808-
* Look up an inode by number in the given file system. If the inode is
809-
* in cache and isn't in purgatory, return 1 if the inode is allocated
810-
* and 0 if it is not. For all other cases (not in cache, being torn
811-
* down, etc.), return a negative error code.
812-
*
813-
* The caller has to prevent inode allocation and freeing activity,
814-
* presumably by locking the AGI buffer. This is to ensure that an
815-
* inode cannot transition from allocated to freed until the caller is
816-
* ready to allow that. If the inode is in an intermediate state (new,
817-
* reclaimable, or being reclaimed), -EAGAIN will be returned; if the
818-
* inode is not in the cache, -ENOENT will be returned. The caller must
819-
* deal with these scenarios appropriately.
820-
*
821-
* This is a specialized use case for the online scrubber; if you're
822-
* reading this, you probably want xfs_iget.
823-
*/
824-
int
825-
xfs_icache_inode_is_allocated(
826-
struct xfs_mount *mp,
827-
struct xfs_trans *tp,
828-
xfs_ino_t ino,
829-
bool *inuse)
830-
{
831-
struct xfs_inode *ip;
832-
int error;
833-
834-
error = xfs_iget(mp, tp, ino, XFS_IGET_INCORE, 0, &ip);
835-
if (error)
836-
return error;
837-
838-
*inuse = !!(VFS_I(ip)->i_mode);
839-
xfs_irele(ip);
840-
return 0;
841-
}
842-
843805
/*
844806
* Grab the inode for reclaim exclusively.
845807
*

fs/xfs/xfs_icache.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,6 @@ void xfs_inode_set_cowblocks_tag(struct xfs_inode *ip);
7171
void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);
7272

7373
void xfs_blockgc_worker(struct work_struct *work);
74-
75-
int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp,
76-
xfs_ino_t ino, bool *inuse);
77-
7874
void xfs_blockgc_stop(struct xfs_mount *mp);
7975
void xfs_blockgc_start(struct xfs_mount *mp);
8076

0 commit comments

Comments
 (0)