Skip to content

Commit 68b957f

Browse files
author
Darrick J. Wong
committed
xfs: load uncached unlinked inodes into memory on demand
shrikanth hegde reports that filesystems fail shortly after mount with the following failure: WARNING: CPU: 56 PID: 12450 at fs/xfs/xfs_inode.c:1839 xfs_iunlink_lookup+0x58/0x80 [xfs] This of course is the WARN_ON_ONCE in xfs_iunlink_lookup: ip = radix_tree_lookup(&pag->pag_ici_root, agino); if (WARN_ON_ONCE(!ip || !ip->i_ino)) { ... } From diagnostic data collected by the bug reporters, it would appear that we cleanly mounted a filesystem that contained unlinked inodes. Unlinked inodes are only processed as a final step of log recovery, which means that clean mounts do not process the unlinked list at all. Prior to the introduction of the incore unlinked lists, this wasn't a problem because the unlink code would (very expensively) traverse the entire ondisk metadata iunlink chain to keep things up to date. However, the incore unlinked list code complains when it realizes that it is out of sync with the ondisk metadata and shuts down the fs, which is bad. Ritesh proposed to solve this problem by unconditionally parsing the unlinked lists at mount time, but this imposes a mount time cost for every filesystem to catch something that should be very infrequent. Instead, let's target the places where we can encounter a next_unlinked pointer that refers to an inode that is not in cache, and load it into cache. Note: This patch does not address the problem of iget loading an inode from the middle of the iunlink list and needing to set i_prev_unlinked correctly. Reported-by: shrikanth hegde <sshegde@linux.vnet.ibm.com> Triaged-by: Ritesh Harjani <ritesh.list@gmail.com> Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com>
1 parent 3c919b0 commit 68b957f

File tree

2 files changed

+100
-5
lines changed

2 files changed

+100
-5
lines changed

fs/xfs/xfs_inode.c

Lines changed: 75 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1828,12 +1828,17 @@ xfs_iunlink_lookup(
18281828

18291829
rcu_read_lock();
18301830
ip = radix_tree_lookup(&pag->pag_ici_root, agino);
1831+
if (!ip) {
1832+
/* Caller can handle inode not being in memory. */
1833+
rcu_read_unlock();
1834+
return NULL;
1835+
}
18311836

18321837
/*
1833-
* Inode not in memory or in RCU freeing limbo should not happen.
1834-
* Warn about this and let the caller handle the failure.
1838+
* Inode in RCU freeing limbo should not happen. Warn about this and
1839+
* let the caller handle the failure.
18351840
*/
1836-
if (WARN_ON_ONCE(!ip || !ip->i_ino)) {
1841+
if (WARN_ON_ONCE(!ip->i_ino)) {
18371842
rcu_read_unlock();
18381843
return NULL;
18391844
}
@@ -1842,7 +1847,10 @@ xfs_iunlink_lookup(
18421847
return ip;
18431848
}
18441849

1845-
/* Update the prev pointer of the next agino. */
1850+
/*
1851+
* Update the prev pointer of the next agino. Returns -ENOLINK if the inode
1852+
* is not in cache.
1853+
*/
18461854
static int
18471855
xfs_iunlink_update_backref(
18481856
struct xfs_perag *pag,
@@ -1857,7 +1865,8 @@ xfs_iunlink_update_backref(
18571865

18581866
ip = xfs_iunlink_lookup(pag, next_agino);
18591867
if (!ip)
1860-
return -EFSCORRUPTED;
1868+
return -ENOLINK;
1869+
18611870
ip->i_prev_unlinked = prev_agino;
18621871
return 0;
18631872
}
@@ -1901,6 +1910,62 @@ xfs_iunlink_update_bucket(
19011910
return 0;
19021911
}
19031912

1913+
/*
1914+
* Load the inode @next_agino into the cache and set its prev_unlinked pointer
1915+
* to @prev_agino. Caller must hold the AGI to synchronize with other changes
1916+
* to the unlinked list.
1917+
*/
1918+
STATIC int
1919+
xfs_iunlink_reload_next(
1920+
struct xfs_trans *tp,
1921+
struct xfs_buf *agibp,
1922+
xfs_agino_t prev_agino,
1923+
xfs_agino_t next_agino)
1924+
{
1925+
struct xfs_perag *pag = agibp->b_pag;
1926+
struct xfs_mount *mp = pag->pag_mount;
1927+
struct xfs_inode *next_ip = NULL;
1928+
xfs_ino_t ino;
1929+
int error;
1930+
1931+
ASSERT(next_agino != NULLAGINO);
1932+
1933+
#ifdef DEBUG
1934+
rcu_read_lock();
1935+
next_ip = radix_tree_lookup(&pag->pag_ici_root, next_agino);
1936+
ASSERT(next_ip == NULL);
1937+
rcu_read_unlock();
1938+
#endif
1939+
1940+
xfs_info_ratelimited(mp,
1941+
"Found unrecovered unlinked inode 0x%x in AG 0x%x. Initiating recovery.",
1942+
next_agino, pag->pag_agno);
1943+
1944+
/*
1945+
* Use an untrusted lookup just to be cautious in case the AGI has been
1946+
* corrupted and now points at a free inode. That shouldn't happen,
1947+
* but we'd rather shut down now since we're already running in a weird
1948+
* situation.
1949+
*/
1950+
ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, next_agino);
1951+
error = xfs_iget(mp, tp, ino, XFS_IGET_UNTRUSTED, 0, &next_ip);
1952+
if (error)
1953+
return error;
1954+
1955+
/* If this is not an unlinked inode, something is very wrong. */
1956+
if (VFS_I(next_ip)->i_nlink != 0) {
1957+
error = -EFSCORRUPTED;
1958+
goto rele;
1959+
}
1960+
1961+
next_ip->i_prev_unlinked = prev_agino;
1962+
trace_xfs_iunlink_reload_next(next_ip);
1963+
rele:
1964+
ASSERT(!(VFS_I(next_ip)->i_state & I_DONTCACHE));
1965+
xfs_irele(next_ip);
1966+
return error;
1967+
}
1968+
19041969
static int
19051970
xfs_iunlink_insert_inode(
19061971
struct xfs_trans *tp,
@@ -1932,6 +1997,8 @@ xfs_iunlink_insert_inode(
19321997
* inode.
19331998
*/
19341999
error = xfs_iunlink_update_backref(pag, agino, next_agino);
2000+
if (error == -ENOLINK)
2001+
error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino);
19352002
if (error)
19362003
return error;
19372004

@@ -2026,6 +2093,9 @@ xfs_iunlink_remove_inode(
20262093
*/
20272094
error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked,
20282095
ip->i_next_unlinked);
2096+
if (error == -ENOLINK)
2097+
error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked,
2098+
ip->i_next_unlinked);
20292099
if (error)
20302100
return error;
20312101

fs/xfs/xfs_trace.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3824,6 +3824,31 @@ TRACE_EVENT(xfs_iunlink_update_dinode,
38243824
__entry->new_ptr)
38253825
);
38263826

3827+
TRACE_EVENT(xfs_iunlink_reload_next,
3828+
TP_PROTO(struct xfs_inode *ip),
3829+
TP_ARGS(ip),
3830+
TP_STRUCT__entry(
3831+
__field(dev_t, dev)
3832+
__field(xfs_agnumber_t, agno)
3833+
__field(xfs_agino_t, agino)
3834+
__field(xfs_agino_t, prev_agino)
3835+
__field(xfs_agino_t, next_agino)
3836+
),
3837+
TP_fast_assign(
3838+
__entry->dev = ip->i_mount->m_super->s_dev;
3839+
__entry->agno = XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino);
3840+
__entry->agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino);
3841+
__entry->prev_agino = ip->i_prev_unlinked;
3842+
__entry->next_agino = ip->i_next_unlinked;
3843+
),
3844+
TP_printk("dev %d:%d agno 0x%x agino 0x%x prev_unlinked 0x%x next_unlinked 0x%x",
3845+
MAJOR(__entry->dev), MINOR(__entry->dev),
3846+
__entry->agno,
3847+
__entry->agino,
3848+
__entry->prev_agino,
3849+
__entry->next_agino)
3850+
);
3851+
38273852
DECLARE_EVENT_CLASS(xfs_ag_inode_class,
38283853
TP_PROTO(struct xfs_inode *ip),
38293854
TP_ARGS(ip),

0 commit comments

Comments
 (0)