Skip to content

Commit a8b0026

Browse files
author
Al Viro
committed
rename(): avoid a deadlock in the case of parents having no common ancestor
... and fix the directory locking documentation and proof of correctness. Holding ->s_vfs_rename_mutex *almost* prevents ->d_parent changes; the case where we really don't want it is splicing the root of disconnected tree to somewhere. In other words, ->s_vfs_rename_mutex is sufficient to stabilize "X is an ancestor of Y" only if X and Y are already in the same tree. Otherwise it can go from false to true, and one can construct a deadlock on that. Make lock_two_directories() report an error in such case and update the callers of lock_rename()/lock_rename_child() to handle such errors. And yes, such conditions are not impossible to create ;-/ Reviewed-by: Jan Kara <jack@suse.cz> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
1 parent dbd4540 commit a8b0026

File tree

11 files changed

+313
-118
lines changed

11 files changed

+313
-118
lines changed

Documentation/filesystems/directory-locking.rst

Lines changed: 242 additions & 104 deletions
Large diffs are not rendered by default.

Documentation/filesystems/porting.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,3 +1079,12 @@ On same-directory ->rename() the (tautological) update of .. is not protected
10791079
by any locks; just don't do it if the old parent is the same as the new one.
10801080
We really can't lock two subdirectories in same-directory rename - not without
10811081
deadlocks.
1082+
1083+
---
1084+
1085+
**mandatory**
1086+
1087+
lock_rename() and lock_rename_child() may fail in cross-directory case, if
1088+
their arguments do not have a common ancestor. In that case ERR_PTR(-EXDEV)
1089+
is returned, with no locks taken. In-tree users updated; out-of-tree ones
1090+
would need to do so.

fs/cachefiles/namei.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,8 @@ int cachefiles_bury_object(struct cachefiles_cache *cache,
305305

306306
/* do the multiway lock magic */
307307
trap = lock_rename(cache->graveyard, dir);
308+
if (IS_ERR(trap))
309+
return PTR_ERR(trap);
308310

309311
/* do some checks before getting the grave dentry */
310312
if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) {

fs/ecryptfs/inode.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,8 @@ ecryptfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
599599
target_inode = d_inode(new_dentry);
600600

601601
trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
602+
if (IS_ERR(trap))
603+
return PTR_ERR(trap);
602604
dget(lower_new_dentry);
603605
rc = -EINVAL;
604606
if (lower_old_dentry->d_parent != lower_old_dir_dentry)

fs/namei.c

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3014,21 +3014,37 @@ static inline int may_create(struct mnt_idmap *idmap,
30143014
return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
30153015
}
30163016

3017+
// p1 != p2, both are on the same filesystem, ->s_vfs_rename_mutex is held
30173018
static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2)
30183019
{
3019-
struct dentry *p;
3020+
struct dentry *p = p1, *q = p2, *r;
30203021

3021-
p = d_ancestor(p2, p1);
3022-
if (p) {
3022+
while ((r = p->d_parent) != p2 && r != p)
3023+
p = r;
3024+
if (r == p2) {
3025+
// p is a child of p2 and an ancestor of p1 or p1 itself
30233026
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
30243027
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2);
30253028
return p;
30263029
}
3027-
3028-
p = d_ancestor(p1, p2);
3029-
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
3030-
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
3031-
return p;
3030+
// p is the root of connected component that contains p1
3031+
// p2 does not occur on the path from p to p1
3032+
while ((r = q->d_parent) != p1 && r != p && r != q)
3033+
q = r;
3034+
if (r == p1) {
3035+
// q is a child of p1 and an ancestor of p2 or p2 itself
3036+
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
3037+
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
3038+
return q;
3039+
} else if (likely(r == p)) {
3040+
// both p2 and p1 are descendents of p
3041+
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
3042+
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
3043+
return NULL;
3044+
} else { // no common ancestor at the time we'd been called
3045+
mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
3046+
return ERR_PTR(-EXDEV);
3047+
}
30323048
}
30333049

30343050
/*
@@ -4947,6 +4963,10 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
49474963

49484964
retry_deleg:
49494965
trap = lock_rename(new_path.dentry, old_path.dentry);
4966+
if (IS_ERR(trap)) {
4967+
error = PTR_ERR(trap);
4968+
goto exit_lock_rename;
4969+
}
49504970

49514971
old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry,
49524972
lookup_flags);
@@ -5014,6 +5034,7 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
50145034
dput(old_dentry);
50155035
exit3:
50165036
unlock_rename(new_path.dentry, old_path.dentry);
5037+
exit_lock_rename:
50175038
if (delegated_inode) {
50185039
error = break_deleg_wait(&delegated_inode);
50195040
if (!error)

fs/nfsd/vfs.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1813,6 +1813,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
18131813
}
18141814

18151815
trap = lock_rename(tdentry, fdentry);
1816+
if (IS_ERR(trap)) {
1817+
err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
1818+
goto out;
1819+
}
18161820
err = fh_fill_pre_attrs(ffhp);
18171821
if (err != nfs_ok)
18181822
goto out_unlock;

fs/overlayfs/copy_up.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -722,7 +722,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
722722
struct inode *inode;
723723
struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
724724
struct path path = { .mnt = ovl_upper_mnt(ofs) };
725-
struct dentry *temp, *upper;
725+
struct dentry *temp, *upper, *trap;
726726
struct ovl_cu_creds cc;
727727
int err;
728728
struct ovl_cattr cattr = {
@@ -760,9 +760,11 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
760760
* If temp was moved, abort without the cleanup.
761761
*/
762762
ovl_start_write(c->dentry);
763-
if (lock_rename(c->workdir, c->destdir) != NULL ||
764-
temp->d_parent != c->workdir) {
763+
trap = lock_rename(c->workdir, c->destdir);
764+
if (trap || temp->d_parent != c->workdir) {
765765
err = -EIO;
766+
if (IS_ERR(trap))
767+
goto out;
766768
goto unlock;
767769
} else if (err) {
768770
goto cleanup;
@@ -803,6 +805,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
803805
ovl_set_flag(OVL_WHITEOUTS, inode);
804806
unlock:
805807
unlock_rename(c->workdir, c->destdir);
808+
out:
806809
ovl_end_write(c->dentry);
807810

808811
return err;

fs/overlayfs/dir.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,6 +1180,10 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
11801180
}
11811181

11821182
trap = lock_rename(new_upperdir, old_upperdir);
1183+
if (IS_ERR(trap)) {
1184+
err = PTR_ERR(trap);
1185+
goto out_revert_creds;
1186+
}
11831187

11841188
olddentry = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir,
11851189
old->d_name.len);

fs/overlayfs/super.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -439,8 +439,10 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
439439
bool ok = false;
440440

441441
if (workdir != upperdir) {
442-
ok = (lock_rename(workdir, upperdir) == NULL);
443-
unlock_rename(workdir, upperdir);
442+
struct dentry *trap = lock_rename(workdir, upperdir);
443+
if (!IS_ERR(trap))
444+
unlock_rename(workdir, upperdir);
445+
ok = (trap == NULL);
444446
}
445447
return ok;
446448
}

fs/overlayfs/util.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1198,12 +1198,17 @@ void ovl_nlink_end(struct dentry *dentry)
11981198

11991199
int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
12001200
{
1201+
struct dentry *trap;
1202+
12011203
/* Workdir should not be the same as upperdir */
12021204
if (workdir == upperdir)
12031205
goto err;
12041206

12051207
/* Workdir should not be subdir of upperdir and vice versa */
1206-
if (lock_rename(workdir, upperdir) != NULL)
1208+
trap = lock_rename(workdir, upperdir);
1209+
if (IS_ERR(trap))
1210+
goto err;
1211+
if (trap)
12071212
goto err_unlock;
12081213

12091214
return 0;

0 commit comments

Comments
 (0)