Skip to content

Commit 3c59366

Browse files
neilbrownTrond Myklebust
authored andcommitted
NFS: don't unhash dentry during unlink/rename
NFS unlink() (and rename over existing target) must determine if the file is open, and must perform a "silly rename" instead of an unlink (or before rename) if it is. Otherwise the client might hold a file open which has been removed on the server. Consequently if it determines that the file isn't open, it must block any subsequent opens until the unlink/rename has been completed on the server. This is currently achieved by unhashing the dentry. This forces any open attempt to the slow-path for lookup which will block on i_rwsem on the directory until the unlink/rename completes. A future patch will change the VFS to only get a shared lock on i_rwsem for unlink, so this will no longer work. Instead we introduce an explicit interlock. A special value is stored in dentry->d_fsdata while the unlink/rename is running and ->d_revalidate blocks while that value is present. When ->d_revalidate unblocks, the dentry will be invalid. This closes the race without requiring exclusion on i_rwsem. d_fsdata is already used in two different ways. 1/ an IS_ROOT directory dentry might have a "devname" stored in d_fsdata. Such a dentry doesn't have a name and so cannot be the target of unlink or rename. For safety we check if an old devname is still stored, and remove it if it is. 2/ a dentry with DCACHE_NFSFS_RENAMED set will have a 'struct nfs_unlinkdata' stored in d_fsdata. While this is set maydelete() will fail, so an unlink or rename will never proceed on such a dentry. Neither of these can be in effect when a dentry is the target of unlink or rename. So we can expect d_fsdata to be NULL, and store a special value ((void*)1) which is given the name NFS_FSDATA_BLOCKED to indicate that any lookup will be blocked. The d_count() is incremented under d_lock() when a lookup finds the dentry, so we check d_count() is low, and set NFS_FSDATA_BLOCKED under the same lock to avoid any races. Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
1 parent 2135e5d commit 3c59366

File tree

2 files changed

+63
-18
lines changed

2 files changed

+63
-18
lines changed

fs/nfs/dir.c

Lines changed: 54 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,6 +1782,8 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
17821782
int ret;
17831783

17841784
if (flags & LOOKUP_RCU) {
1785+
if (dentry->d_fsdata == NFS_FSDATA_BLOCKED)
1786+
return -ECHILD;
17851787
parent = READ_ONCE(dentry->d_parent);
17861788
dir = d_inode_rcu(parent);
17871789
if (!dir)
@@ -1790,6 +1792,9 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
17901792
if (parent != READ_ONCE(dentry->d_parent))
17911793
return -ECHILD;
17921794
} else {
1795+
/* Wait for unlink to complete */
1796+
wait_var_event(&dentry->d_fsdata,
1797+
dentry->d_fsdata != NFS_FSDATA_BLOCKED);
17931798
parent = dget_parent(dentry);
17941799
ret = reval(d_inode(parent), dentry, flags);
17951800
dput(parent);
@@ -2458,7 +2463,6 @@ static int nfs_safe_remove(struct dentry *dentry)
24582463
int nfs_unlink(struct inode *dir, struct dentry *dentry)
24592464
{
24602465
int error;
2461-
int need_rehash = 0;
24622466

24632467
dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
24642468
dir->i_ino, dentry);
@@ -2473,15 +2477,25 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
24732477
error = nfs_sillyrename(dir, dentry);
24742478
goto out;
24752479
}
2476-
if (!d_unhashed(dentry)) {
2477-
__d_drop(dentry);
2478-
need_rehash = 1;
2479-
}
2480+
/* We must prevent any concurrent open until the unlink
2481+
* completes. ->d_revalidate will wait for ->d_fsdata
2482+
* to clear. We set it here to ensure no lookup succeeds until
2483+
* the unlink is complete on the server.
2484+
*/
2485+
error = -ETXTBSY;
2486+
if (WARN_ON(dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
2487+
WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED))
2488+
goto out;
2489+
if (dentry->d_fsdata)
2490+
/* old devname */
2491+
kfree(dentry->d_fsdata);
2492+
dentry->d_fsdata = NFS_FSDATA_BLOCKED;
2493+
24802494
spin_unlock(&dentry->d_lock);
24812495
error = nfs_safe_remove(dentry);
24822496
nfs_dentry_remove_handle_error(dir, dentry, error);
2483-
if (need_rehash)
2484-
d_rehash(dentry);
2497+
dentry->d_fsdata = NULL;
2498+
wake_up_var(&dentry->d_fsdata);
24852499
out:
24862500
trace_nfs_unlink_exit(dir, dentry, error);
24872501
return error;
@@ -2588,6 +2602,15 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
25882602
}
25892603
EXPORT_SYMBOL_GPL(nfs_link);
25902604

2605+
static void
2606+
nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
2607+
{
2608+
struct dentry *new_dentry = data->new_dentry;
2609+
2610+
new_dentry->d_fsdata = NULL;
2611+
wake_up_var(&new_dentry->d_fsdata);
2612+
}
2613+
25912614
/*
25922615
* RENAME
25932616
* FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -2618,8 +2641,9 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
26182641
{
26192642
struct inode *old_inode = d_inode(old_dentry);
26202643
struct inode *new_inode = d_inode(new_dentry);
2621-
struct dentry *dentry = NULL, *rehash = NULL;
2644+
struct dentry *dentry = NULL;
26222645
struct rpc_task *task;
2646+
bool must_unblock = false;
26232647
int error = -EBUSY;
26242648

26252649
if (flags)
@@ -2637,18 +2661,27 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
26372661
* the new target.
26382662
*/
26392663
if (new_inode && !S_ISDIR(new_inode->i_mode)) {
2640-
/*
2641-
* To prevent any new references to the target during the
2642-
* rename, we unhash the dentry in advance.
2664+
/* We must prevent any concurrent open until the unlink
2665+
* completes. ->d_revalidate will wait for ->d_fsdata
2666+
* to clear. We set it here to ensure no lookup succeeds until
2667+
* the unlink is complete on the server.
26432668
*/
2644-
if (!d_unhashed(new_dentry)) {
2645-
d_drop(new_dentry);
2646-
rehash = new_dentry;
2669+
error = -ETXTBSY;
2670+
if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
2671+
WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED))
2672+
goto out;
2673+
if (new_dentry->d_fsdata) {
2674+
/* old devname */
2675+
kfree(new_dentry->d_fsdata);
2676+
new_dentry->d_fsdata = NULL;
26472677
}
26482678

2679+
spin_lock(&new_dentry->d_lock);
26492680
if (d_count(new_dentry) > 2) {
26502681
int err;
26512682

2683+
spin_unlock(&new_dentry->d_lock);
2684+
26522685
/* copy the target dentry's name */
26532686
dentry = d_alloc(new_dentry->d_parent,
26542687
&new_dentry->d_name);
@@ -2661,14 +2694,19 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
26612694
goto out;
26622695

26632696
new_dentry = dentry;
2664-
rehash = NULL;
26652697
new_inode = NULL;
2698+
} else {
2699+
new_dentry->d_fsdata = NFS_FSDATA_BLOCKED;
2700+
must_unblock = true;
2701+
spin_unlock(&new_dentry->d_lock);
26662702
}
2703+
26672704
}
26682705

26692706
if (S_ISREG(old_inode->i_mode))
26702707
nfs_sync_inode(old_inode);
2671-
task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
2708+
task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
2709+
must_unblock ? nfs_unblock_rename : NULL);
26722710
if (IS_ERR(task)) {
26732711
error = PTR_ERR(task);
26742712
goto out;
@@ -2692,8 +2730,6 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
26922730
spin_unlock(&old_inode->i_lock);
26932731
}
26942732
out:
2695-
if (rehash)
2696-
d_rehash(rehash);
26972733
trace_nfs_rename_exit(old_dir, old_dentry,
26982734
new_dir, new_dentry, error);
26992735
if (!error) {

include/linux/nfs_fs.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,15 @@ nfs_fileid_to_ino_t(u64 fileid)
617617

618618
#define NFS_JUKEBOX_RETRY_TIME (5 * HZ)
619619

620+
/* We need to block new opens while a file is being unlinked.
621+
* If it is opened *before* we decide to unlink, we will silly-rename
622+
* instead. If it is opened *after*, then we need to create or will fail.
623+
* If we allow the two to race, we could end up with a file that is open
624+
* but deleted on the server resulting in ESTALE.
625+
* So use ->d_fsdata to record when the unlink is happening
626+
* and block dentry revalidation while it is set.
627+
*/
628+
#define NFS_FSDATA_BLOCKED ((void*)1)
620629

621630
# undef ifdebug
622631
# ifdef NFS_DEBUG

0 commit comments

Comments
 (0)