Skip to content

Commit 65552b0

Browse files
author
Darrick J. Wong
committed
xfs: take the ILOCK when readdir inspects directory mapping data
I was poking around in the directory code while diagnosing online fsck bugs, and noticed that xfs_readdir doesn't actually take the directory ILOCK when it calls xfs_dir2_isblock. xfs_dir_open most probably loaded the data fork mappings and the VFS took i_rwsem (aka IOLOCK_SHARED) so we're protected against writer threads, but we really need to follow the locking model like we do in other places. To avoid unnecessarily cycling the ILOCK for fairly small directories, change the block/leaf _getdents functions to consume the ILOCK hold that the parent readdir function took to decide on a _getdents implementation. It is ok to cycle the ILOCK in readdir because the VFS takes the IOLOCK in the appropriate mode during lookups and writes, and we don't want to be holding the ILOCK when we copy directory entries to userspace in case there's a page fault. We really only need it to protect against data fork lookups, like we do for other files. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com>
1 parent 7e937bb commit 65552b0

File tree

1 file changed

+34
-19
lines changed

1 file changed

+34
-19
lines changed

fs/xfs/xfs_dir2_readdir.c

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -138,15 +138,15 @@ xfs_dir2_sf_getdents(
138138
STATIC int
139139
xfs_dir2_block_getdents(
140140
struct xfs_da_args *args,
141-
struct dir_context *ctx)
141+
struct dir_context *ctx,
142+
unsigned int *lock_mode)
142143
{
143144
struct xfs_inode *dp = args->dp; /* incore directory inode */
144145
struct xfs_buf *bp; /* buffer for block */
145146
int error; /* error return value */
146147
int wantoff; /* starting block offset */
147148
xfs_off_t cook;
148149
struct xfs_da_geometry *geo = args->geo;
149-
int lock_mode;
150150
unsigned int offset, next_offset;
151151
unsigned int end;
152152

@@ -156,12 +156,13 @@ xfs_dir2_block_getdents(
156156
if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)
157157
return 0;
158158

159-
lock_mode = xfs_ilock_data_map_shared(dp);
160159
error = xfs_dir3_block_read(args->trans, dp, &bp);
161-
xfs_iunlock(dp, lock_mode);
162160
if (error)
163161
return error;
164162

163+
xfs_iunlock(dp, *lock_mode);
164+
*lock_mode = 0;
165+
165166
/*
166167
* Extract the byte offset we start at from the seek pointer.
167168
* We'll skip entries before this.
@@ -344,7 +345,8 @@ STATIC int
344345
xfs_dir2_leaf_getdents(
345346
struct xfs_da_args *args,
346347
struct dir_context *ctx,
347-
size_t bufsize)
348+
size_t bufsize,
349+
unsigned int *lock_mode)
348350
{
349351
struct xfs_inode *dp = args->dp;
350352
struct xfs_mount *mp = dp->i_mount;
@@ -356,7 +358,6 @@ xfs_dir2_leaf_getdents(
356358
xfs_dir2_off_t curoff; /* current overall offset */
357359
int length; /* temporary length value */
358360
int byteoff; /* offset in current block */
359-
int lock_mode;
360361
unsigned int offset = 0;
361362
int error = 0; /* error return value */
362363

@@ -390,13 +391,16 @@ xfs_dir2_leaf_getdents(
390391
bp = NULL;
391392
}
392393

393-
lock_mode = xfs_ilock_data_map_shared(dp);
394+
if (*lock_mode == 0)
395+
*lock_mode = xfs_ilock_data_map_shared(dp);
394396
error = xfs_dir2_leaf_readbuf(args, bufsize, &curoff,
395397
&rablk, &bp);
396-
xfs_iunlock(dp, lock_mode);
397398
if (error || !bp)
398399
break;
399400

401+
xfs_iunlock(dp, *lock_mode);
402+
*lock_mode = 0;
403+
400404
xfs_dir3_data_check(dp, bp);
401405
/*
402406
* Find our position in the block.
@@ -496,7 +500,7 @@ xfs_dir2_leaf_getdents(
496500
*
497501
* If supplied, the transaction collects locked dir buffers to avoid
498502
* nested buffer deadlocks. This function does not dirty the
499-
* transaction. The caller should ensure that the inode is locked
503+
* transaction. The caller must hold the IOLOCK (shared or exclusive)
500504
* before calling this function.
501505
*/
502506
int
@@ -507,29 +511,40 @@ xfs_readdir(
507511
size_t bufsize)
508512
{
509513
struct xfs_da_args args = { NULL };
510-
int rval;
511-
int v;
514+
unsigned int lock_mode;
515+
int isblock;
516+
int error;
512517

513518
trace_xfs_readdir(dp);
514519

515520
if (xfs_is_shutdown(dp->i_mount))
516521
return -EIO;
517522

518523
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
524+
ASSERT(xfs_isilocked(dp, XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
519525
XFS_STATS_INC(dp->i_mount, xs_dir_getdents);
520526

521527
args.dp = dp;
522528
args.geo = dp->i_mount->m_dir_geo;
523529
args.trans = tp;
524530

525531
if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
526-
rval = xfs_dir2_sf_getdents(&args, ctx);
527-
else if ((rval = xfs_dir2_isblock(&args, &v)))
528-
;
529-
else if (v)
530-
rval = xfs_dir2_block_getdents(&args, ctx);
531-
else
532-
rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize);
532+
return xfs_dir2_sf_getdents(&args, ctx);
533533

534-
return rval;
534+
lock_mode = xfs_ilock_data_map_shared(dp);
535+
error = xfs_dir2_isblock(&args, &isblock);
536+
if (error)
537+
goto out_unlock;
538+
539+
if (isblock) {
540+
error = xfs_dir2_block_getdents(&args, ctx, &lock_mode);
541+
goto out_unlock;
542+
}
543+
544+
error = xfs_dir2_leaf_getdents(&args, ctx, bufsize, &lock_mode);
545+
546+
out_unlock:
547+
if (lock_mode)
548+
xfs_iunlock(dp, lock_mode);
549+
return error;
535550
}

0 commit comments

Comments
 (0)