Skip to content

Commit abf7c81

Browse files
author
Chandan Babu R
committed
Merge tag 'fix-iunlink-list-6.6_2023-09-12' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.6-fixesA
xfs: reload entire iunlink lists This is the second part of correcting XFS to reload the incore unlinked inode list from the ondisk contents. Whereas part one tackled failures from regular filesystem calls, this part takes on the problem of needing to reload the entire incore unlinked inode list on account of somebody loading an inode that's in the /middle/ of an unlinked list. This happens during quotacheck, bulkstat, or even opening a file by handle. In this case we don't know the length of the list that we're reloading, so we don't want to create a new unbounded memory load while holding resources locked. Instead, we'll target UNTRUSTED iget calls to reload the entire bucket. Note that this changes the definition of the incore unlinked inode list slightly -- i_prev_unlinked == 0 now means "not on the incore list". Signed-off-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Chandan Babu R <chandanbabu@kernel.org> * tag 'fix-iunlink-list-6.6_2023-09-12' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux: xfs: make inode unlinked bucket recovery work with quotacheck xfs: reload entire unlinked bucket lists xfs: use i_prev_unlinked to distinguish inodes that are not on the unlinked list
2 parents fffcdcc + 49813a2 commit abf7c81

File tree

9 files changed

+195
-9
lines changed

9 files changed

+195
-9
lines changed

fs/xfs/xfs_attr_inactive.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,6 @@ xfs_attr_inactive(
333333
int error = 0;
334334

335335
mp = dp->i_mount;
336-
ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
337336

338337
xfs_ilock(dp, lock_mode);
339338
if (!xfs_inode_has_attr_fork(dp))

fs/xfs/xfs_export.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,12 @@ xfs_nfs_get_inode(
146146
return ERR_PTR(error);
147147
}
148148

149+
error = xfs_inode_reload_unlinked(ip);
150+
if (error) {
151+
xfs_irele(ip);
152+
return ERR_PTR(error);
153+
}
154+
149155
if (VFS_I(ip)->i_generation != generation) {
150156
xfs_irele(ip);
151157
return ERR_PTR(-ESTALE);

fs/xfs/xfs_icache.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ xfs_inode_alloc(
113113
INIT_LIST_HEAD(&ip->i_ioend_list);
114114
spin_lock_init(&ip->i_ioend_lock);
115115
ip->i_next_unlinked = NULLAGINO;
116-
ip->i_prev_unlinked = NULLAGINO;
116+
ip->i_prev_unlinked = 0;
117117

118118
return ip;
119119
}

fs/xfs/xfs_inode.c

Lines changed: 111 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1742,9 +1742,13 @@ xfs_inactive(
17421742
ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0))
17431743
truncate = 1;
17441744

1745-
error = xfs_qm_dqattach(ip);
1746-
if (error)
1747-
goto out;
1745+
if (xfs_iflags_test(ip, XFS_IQUOTAUNCHECKED)) {
1746+
xfs_qm_dqdetach(ip);
1747+
} else {
1748+
error = xfs_qm_dqattach(ip);
1749+
if (error)
1750+
goto out;
1751+
}
17481752

17491753
if (S_ISLNK(VFS_I(ip)->i_mode))
17501754
error = xfs_inactive_symlink(ip);
@@ -1962,6 +1966,8 @@ xfs_iunlink_reload_next(
19621966
trace_xfs_iunlink_reload_next(next_ip);
19631967
rele:
19641968
ASSERT(!(VFS_I(next_ip)->i_state & I_DONTCACHE));
1969+
if (xfs_is_quotacheck_running(mp) && next_ip)
1970+
xfs_iflags_set(next_ip, XFS_IQUOTAUNCHECKED);
19651971
xfs_irele(next_ip);
19661972
return error;
19671973
}
@@ -2014,6 +2020,7 @@ xfs_iunlink_insert_inode(
20142020
}
20152021

20162022
/* Point the head of the list to point to this inode. */
2023+
ip->i_prev_unlinked = NULLAGINO;
20172024
return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);
20182025
}
20192026

@@ -2116,7 +2123,7 @@ xfs_iunlink_remove_inode(
21162123
}
21172124

21182125
ip->i_next_unlinked = NULLAGINO;
2119-
ip->i_prev_unlinked = NULLAGINO;
2126+
ip->i_prev_unlinked = 0;
21202127
return error;
21212128
}
21222129

@@ -3605,3 +3612,103 @@ xfs_iunlock2_io_mmap(
36053612
if (ip1 != ip2)
36063613
inode_unlock(VFS_I(ip1));
36073614
}
3615+
3616+
/*
3617+
* Reload the incore inode list for this inode. Caller should ensure that
3618+
* the link count cannot change, either by taking ILOCK_SHARED or otherwise
3619+
* preventing other threads from executing.
3620+
*/
3621+
int
3622+
xfs_inode_reload_unlinked_bucket(
3623+
struct xfs_trans *tp,
3624+
struct xfs_inode *ip)
3625+
{
3626+
struct xfs_mount *mp = tp->t_mountp;
3627+
struct xfs_buf *agibp;
3628+
struct xfs_agi *agi;
3629+
struct xfs_perag *pag;
3630+
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
3631+
xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
3632+
xfs_agino_t prev_agino, next_agino;
3633+
unsigned int bucket;
3634+
bool foundit = false;
3635+
int error;
3636+
3637+
/* Grab the first inode in the list */
3638+
pag = xfs_perag_get(mp, agno);
3639+
error = xfs_ialloc_read_agi(pag, tp, &agibp);
3640+
xfs_perag_put(pag);
3641+
if (error)
3642+
return error;
3643+
3644+
bucket = agino % XFS_AGI_UNLINKED_BUCKETS;
3645+
agi = agibp->b_addr;
3646+
3647+
trace_xfs_inode_reload_unlinked_bucket(ip);
3648+
3649+
xfs_info_ratelimited(mp,
3650+
"Found unrecovered unlinked inode 0x%x in AG 0x%x. Initiating list recovery.",
3651+
agino, agno);
3652+
3653+
prev_agino = NULLAGINO;
3654+
next_agino = be32_to_cpu(agi->agi_unlinked[bucket]);
3655+
while (next_agino != NULLAGINO) {
3656+
struct xfs_inode *next_ip = NULL;
3657+
3658+
if (next_agino == agino) {
3659+
/* Found this inode, set its backlink. */
3660+
next_ip = ip;
3661+
next_ip->i_prev_unlinked = prev_agino;
3662+
foundit = true;
3663+
}
3664+
if (!next_ip) {
3665+
/* Inode already in memory. */
3666+
next_ip = xfs_iunlink_lookup(pag, next_agino);
3667+
}
3668+
if (!next_ip) {
3669+
/* Inode not in memory, reload. */
3670+
error = xfs_iunlink_reload_next(tp, agibp, prev_agino,
3671+
next_agino);
3672+
if (error)
3673+
break;
3674+
3675+
next_ip = xfs_iunlink_lookup(pag, next_agino);
3676+
}
3677+
if (!next_ip) {
3678+
/* No incore inode at all? We reloaded it... */
3679+
ASSERT(next_ip != NULL);
3680+
error = -EFSCORRUPTED;
3681+
break;
3682+
}
3683+
3684+
prev_agino = next_agino;
3685+
next_agino = next_ip->i_next_unlinked;
3686+
}
3687+
3688+
xfs_trans_brelse(tp, agibp);
3689+
/* Should have found this inode somewhere in the iunlinked bucket. */
3690+
if (!error && !foundit)
3691+
error = -EFSCORRUPTED;
3692+
return error;
3693+
}
3694+
3695+
/* Decide if this inode is missing its unlinked list and reload it. */
3696+
int
3697+
xfs_inode_reload_unlinked(
3698+
struct xfs_inode *ip)
3699+
{
3700+
struct xfs_trans *tp;
3701+
int error;
3702+
3703+
error = xfs_trans_alloc_empty(ip->i_mount, &tp);
3704+
if (error)
3705+
return error;
3706+
3707+
xfs_ilock(ip, XFS_ILOCK_SHARED);
3708+
if (xfs_inode_unlinked_incomplete(ip))
3709+
error = xfs_inode_reload_unlinked_bucket(tp, ip);
3710+
xfs_iunlock(ip, XFS_ILOCK_SHARED);
3711+
xfs_trans_cancel(tp);
3712+
3713+
return error;
3714+
}

fs/xfs/xfs_inode.h

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,21 @@ typedef struct xfs_inode {
6868
uint64_t i_diflags2; /* XFS_DIFLAG2_... */
6969
struct timespec64 i_crtime; /* time created */
7070

71-
/* unlinked list pointers */
71+
/*
72+
* Unlinked list pointers. These point to the next and previous inodes
73+
* in the AGI unlinked bucket list, respectively. These fields can
74+
* only be updated with the AGI locked.
75+
*
76+
* i_next_unlinked caches di_next_unlinked.
77+
*/
7278
xfs_agino_t i_next_unlinked;
79+
80+
/*
81+
* If the inode is not on an unlinked list, this field is zero. If the
82+
* inode is the first element in an unlinked list, this field is
83+
* NULLAGINO. Otherwise, i_prev_unlinked points to the previous inode
84+
* in the unlinked list.
85+
*/
7386
xfs_agino_t i_prev_unlinked;
7487

7588
/* VFS inode */
@@ -81,6 +94,11 @@ typedef struct xfs_inode {
8194
struct list_head i_ioend_list;
8295
} xfs_inode_t;
8396

97+
static inline bool xfs_inode_on_unlinked_list(const struct xfs_inode *ip)
98+
{
99+
return ip->i_prev_unlinked != 0;
100+
}
101+
84102
static inline bool xfs_inode_has_attr_fork(struct xfs_inode *ip)
85103
{
86104
return ip->i_forkoff > 0;
@@ -326,6 +344,9 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
326344
*/
327345
#define XFS_INACTIVATING (1 << 13)
328346

347+
/* Quotacheck is running but inode has not been added to quota counts. */
348+
#define XFS_IQUOTAUNCHECKED (1 << 14)
349+
329350
/* All inode state flags related to inode reclaim. */
330351
#define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \
331352
XFS_IRECLAIM | \
@@ -340,7 +361,7 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
340361
#define XFS_IRECLAIM_RESET_FLAGS \
341362
(XFS_IRECLAIMABLE | XFS_IRECLAIM | \
342363
XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \
343-
XFS_INACTIVATING)
364+
XFS_INACTIVATING | XFS_IQUOTAUNCHECKED)
344365

345366
/*
346367
* Flags for inode locking.
@@ -575,4 +596,13 @@ void xfs_end_io(struct work_struct *work);
575596
int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
576597
void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
577598

599+
static inline bool
600+
xfs_inode_unlinked_incomplete(
601+
struct xfs_inode *ip)
602+
{
603+
return VFS_I(ip)->i_nlink == 0 && !xfs_inode_on_unlinked_list(ip);
604+
}
605+
int xfs_inode_reload_unlinked_bucket(struct xfs_trans *tp, struct xfs_inode *ip);
606+
int xfs_inode_reload_unlinked(struct xfs_inode *ip);
607+
578608
#endif /* __XFS_INODE_H__ */

fs/xfs/xfs_itable.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,15 @@ xfs_bulkstat_one_int(
8080
if (error)
8181
goto out;
8282

83+
if (xfs_inode_unlinked_incomplete(ip)) {
84+
error = xfs_inode_reload_unlinked_bucket(tp, ip);
85+
if (error) {
86+
xfs_iunlock(ip, XFS_ILOCK_SHARED);
87+
xfs_irele(ip);
88+
return error;
89+
}
90+
}
91+
8392
ASSERT(ip != NULL);
8493
ASSERT(ip->i_imap.im_blkno != 0);
8594
inode = VFS_I(ip);

fs/xfs/xfs_mount.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,8 @@ __XFS_HAS_FEAT(nouuid, NOUUID)
405405
#define XFS_OPSTATE_WARNED_SHRINK 8
406406
/* Kernel has logged a warning about logged xattr updates being used. */
407407
#define XFS_OPSTATE_WARNED_LARP 9
408+
/* Mount time quotacheck is running */
409+
#define XFS_OPSTATE_QUOTACHECK_RUNNING 10
408410

409411
#define __XFS_IS_OPSTATE(name, NAME) \
410412
static inline bool xfs_is_ ## name (struct xfs_mount *mp) \
@@ -427,6 +429,11 @@ __XFS_IS_OPSTATE(inode32, INODE32)
427429
__XFS_IS_OPSTATE(readonly, READONLY)
428430
__XFS_IS_OPSTATE(inodegc_enabled, INODEGC_ENABLED)
429431
__XFS_IS_OPSTATE(blockgc_enabled, BLOCKGC_ENABLED)
432+
#ifdef CONFIG_XFS_QUOTA
433+
__XFS_IS_OPSTATE(quotacheck_running, QUOTACHECK_RUNNING)
434+
#else
435+
# define xfs_is_quotacheck_running(mp) (false)
436+
#endif
430437

431438
static inline bool
432439
xfs_should_warn(struct xfs_mount *mp, long nr)
@@ -444,7 +451,8 @@ xfs_should_warn(struct xfs_mount *mp, long nr)
444451
{ (1UL << XFS_OPSTATE_BLOCKGC_ENABLED), "blockgc" }, \
445452
{ (1UL << XFS_OPSTATE_WARNED_SCRUB), "wscrub" }, \
446453
{ (1UL << XFS_OPSTATE_WARNED_SHRINK), "wshrink" }, \
447-
{ (1UL << XFS_OPSTATE_WARNED_LARP), "wlarp" }
454+
{ (1UL << XFS_OPSTATE_WARNED_LARP), "wlarp" }, \
455+
{ (1UL << XFS_OPSTATE_QUOTACHECK_RUNNING), "quotacheck" }
448456

449457
/*
450458
* Max and min values for mount-option defined I/O

fs/xfs/xfs_qm.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,6 +1160,10 @@ xfs_qm_dqusage_adjust(
11601160
if (error)
11611161
return error;
11621162

1163+
error = xfs_inode_reload_unlinked(ip);
1164+
if (error)
1165+
goto error0;
1166+
11631167
ASSERT(ip->i_delayed_blks == 0);
11641168

11651169
if (XFS_IS_REALTIME_INODE(ip)) {
@@ -1173,6 +1177,7 @@ xfs_qm_dqusage_adjust(
11731177
}
11741178

11751179
nblks = (xfs_qcnt_t)ip->i_nblocks - rtblks;
1180+
xfs_iflags_clear(ip, XFS_IQUOTAUNCHECKED);
11761181

11771182
/*
11781183
* Add the (disk blocks and inode) resources occupied by this
@@ -1319,8 +1324,10 @@ xfs_qm_quotacheck(
13191324
flags |= XFS_PQUOTA_CHKD;
13201325
}
13211326

1327+
xfs_set_quotacheck_running(mp);
13221328
error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true,
13231329
NULL);
1330+
xfs_clear_quotacheck_running(mp);
13241331

13251332
/*
13261333
* On error, the inode walk may have partially populated the dquot

fs/xfs/xfs_trace.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3849,6 +3849,26 @@ TRACE_EVENT(xfs_iunlink_reload_next,
38493849
__entry->next_agino)
38503850
);
38513851

3852+
TRACE_EVENT(xfs_inode_reload_unlinked_bucket,
3853+
TP_PROTO(struct xfs_inode *ip),
3854+
TP_ARGS(ip),
3855+
TP_STRUCT__entry(
3856+
__field(dev_t, dev)
3857+
__field(xfs_agnumber_t, agno)
3858+
__field(xfs_agino_t, agino)
3859+
),
3860+
TP_fast_assign(
3861+
__entry->dev = ip->i_mount->m_super->s_dev;
3862+
__entry->agno = XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino);
3863+
__entry->agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino);
3864+
),
3865+
TP_printk("dev %d:%d agno 0x%x agino 0x%x bucket %u",
3866+
MAJOR(__entry->dev), MINOR(__entry->dev),
3867+
__entry->agno,
3868+
__entry->agino,
3869+
__entry->agino % XFS_AGI_UNLINKED_BUCKETS)
3870+
);
3871+
38523872
DECLARE_EVENT_CLASS(xfs_ag_inode_class,
38533873
TP_PROTO(struct xfs_inode *ip),
38543874
TP_ARGS(ip),

0 commit comments

Comments
 (0)