Skip to content

Commit 6ce8b2c

Browse files
committed
Merge tag 'fuse-update-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse
Pull fuse updates from Miklos Szeredi: - Add passthrough mode for regular file I/O. This allows performing read and write (also via memory maps) on a backing file without incurring the overhead of roundtrips to userspace. For now this is only allowed to privileged servers, but this limitation will go away in the future (Amir Goldstein) - Fix interaction of direct I/O mode with memory maps (Bernd Schubert) - Export filesystem tags through sysfs for virtiofs (Stefan Hajnoczi) - Allow resending queued requests for server crash recovery (Zhao Chen) - Misc fixes and cleanups * tag 'fuse-update-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (38 commits) fuse: get rid of ff->readdir.lock fuse: remove unneeded lock which protecting update of congestion_threshold fuse: Fix missing FOLL_PIN for direct-io fuse: remove an unnecessary if statement fuse: Track process write operations in both direct and writethrough modes fuse: Use the high bit of request ID for indicating resend requests fuse: Introduce a new notification type for resend pending requests fuse: add support for explicit export disabling fuse: __kuid_val/__kgid_val helpers in fuse_fill_attr_from_inode() fuse: fix typo for fuse_permission comment fuse: Convert fuse_writepage_locked to take a folio fuse: Remove fuse_writepage virtio_fs: remove duplicate check if queue is broken fuse: use FUSE_ROOT_ID in fuse_get_root_inode() fuse: don't unhash root fuse: fix root lookup with nonzero generation fuse: replace remaining make_bad_inode() with fuse_make_bad() virtiofs: drop __exit from virtio_fs_sysfs_exit() fuse: implement passthrough for mmap fuse: implement splice read/write passthrough ...
2 parents 68bf6bf + cdf6ac2 commit 6ce8b2c

File tree

14 files changed

+1422
-277
lines changed

14 files changed

+1422
-277
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
What: /sys/fs/virtiofs/<n>/tag
2+
Date: Feb 2024
3+
Contact: virtio-fs@lists.linux.dev
4+
Description:
5+
[RO] The mount "tag" that can be used to mount this filesystem.
6+
7+
What: /sys/fs/virtiofs/<n>/device
8+
Date: Feb 2024
9+
Contact: virtio-fs@lists.linux.dev
10+
Description:
11+
Symlink to the virtio device that exports this filesystem.

fs/fuse/Kconfig

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,14 @@ config FUSE_DAX
5252

5353
If you want to allow mounting a Virtio Filesystem with the "dax"
5454
option, answer Y.
55+
56+
config FUSE_PASSTHROUGH
57+
bool "FUSE passthrough operations support"
58+
default y
59+
depends on FUSE_FS
60+
select FS_STACK
61+
help
62+
This allows bypassing FUSE server by mapping specific FUSE operations
63+
to be performed directly on a backing file.
64+
65+
If you want to allow passthrough operations, answer Y.

fs/fuse/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ obj-$(CONFIG_CUSE) += cuse.o
88
obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
99

1010
fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o
11+
fuse-y += iomode.o
1112
fuse-$(CONFIG_FUSE_DAX) += dax.o
13+
fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o
1214

1315
virtiofs-y := virtio_fs.o

fs/fuse/control.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -174,11 +174,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
174174
if (!fc)
175175
goto out;
176176

177-
down_read(&fc->killsb);
178-
spin_lock(&fc->bg_lock);
179-
fc->congestion_threshold = val;
180-
spin_unlock(&fc->bg_lock);
181-
up_read(&fc->killsb);
177+
WRITE_ONCE(fc->congestion_threshold, val);
182178
fuse_conn_put(fc);
183179
out:
184180
return ret;

fs/fuse/dev.c

Lines changed: 131 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1775,6 +1775,61 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
17751775
return err;
17761776
}
17771777

1778+
/*
1779+
* Resending all processing queue requests.
1780+
*
1781+
* During a FUSE daemon panics and failover, it is possible for some inflight
1782+
* requests to be lost and never returned. As a result, applications awaiting
1783+
* replies would become stuck forever. To address this, we can use notification
1784+
* to trigger resending of these pending requests to the FUSE daemon, ensuring
1785+
* they are properly processed again.
1786+
*
1787+
* Please note that this strategy is applicable only to idempotent requests or
1788+
* if the FUSE daemon takes careful measures to avoid processing duplicated
1789+
* non-idempotent requests.
1790+
*/
1791+
static void fuse_resend(struct fuse_conn *fc)
1792+
{
1793+
struct fuse_dev *fud;
1794+
struct fuse_req *req, *next;
1795+
struct fuse_iqueue *fiq = &fc->iq;
1796+
LIST_HEAD(to_queue);
1797+
unsigned int i;
1798+
1799+
spin_lock(&fc->lock);
1800+
if (!fc->connected) {
1801+
spin_unlock(&fc->lock);
1802+
return;
1803+
}
1804+
1805+
list_for_each_entry(fud, &fc->devices, entry) {
1806+
struct fuse_pqueue *fpq = &fud->pq;
1807+
1808+
spin_lock(&fpq->lock);
1809+
for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
1810+
list_splice_tail_init(&fpq->processing[i], &to_queue);
1811+
spin_unlock(&fpq->lock);
1812+
}
1813+
spin_unlock(&fc->lock);
1814+
1815+
list_for_each_entry_safe(req, next, &to_queue, list) {
1816+
__set_bit(FR_PENDING, &req->flags);
1817+
/* mark the request as resend request */
1818+
req->in.h.unique |= FUSE_UNIQUE_RESEND;
1819+
}
1820+
1821+
spin_lock(&fiq->lock);
1822+
/* iq and pq requests are both oldest to newest */
1823+
list_splice(&to_queue, &fiq->pending);
1824+
fiq->ops->wake_pending_and_unlock(fiq);
1825+
}
1826+
1827+
static int fuse_notify_resend(struct fuse_conn *fc)
1828+
{
1829+
fuse_resend(fc);
1830+
return 0;
1831+
}
1832+
17781833
static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
17791834
unsigned int size, struct fuse_copy_state *cs)
17801835
{
@@ -1800,6 +1855,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
18001855
case FUSE_NOTIFY_DELETE:
18011856
return fuse_notify_delete(fc, size, cs);
18021857

1858+
case FUSE_NOTIFY_RESEND:
1859+
return fuse_notify_resend(fc);
1860+
18031861
default:
18041862
fuse_copy_finish(cs);
18051863
return -EINVAL;
@@ -2251,43 +2309,91 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
22512309
return 0;
22522310
}
22532311

2254-
static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
2255-
unsigned long arg)
2312+
static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp)
22562313
{
22572314
int res;
22582315
int oldfd;
22592316
struct fuse_dev *fud = NULL;
22602317
struct fd f;
22612318

2319+
if (get_user(oldfd, argp))
2320+
return -EFAULT;
2321+
2322+
f = fdget(oldfd);
2323+
if (!f.file)
2324+
return -EINVAL;
2325+
2326+
/*
2327+
* Check against file->f_op because CUSE
2328+
* uses the same ioctl handler.
2329+
*/
2330+
if (f.file->f_op == file->f_op)
2331+
fud = fuse_get_dev(f.file);
2332+
2333+
res = -EINVAL;
2334+
if (fud) {
2335+
mutex_lock(&fuse_mutex);
2336+
res = fuse_device_clone(fud->fc, file);
2337+
mutex_unlock(&fuse_mutex);
2338+
}
2339+
2340+
fdput(f);
2341+
return res;
2342+
}
2343+
2344+
static long fuse_dev_ioctl_backing_open(struct file *file,
2345+
struct fuse_backing_map __user *argp)
2346+
{
2347+
struct fuse_dev *fud = fuse_get_dev(file);
2348+
struct fuse_backing_map map;
2349+
2350+
if (!fud)
2351+
return -EPERM;
2352+
2353+
if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
2354+
return -EOPNOTSUPP;
2355+
2356+
if (copy_from_user(&map, argp, sizeof(map)))
2357+
return -EFAULT;
2358+
2359+
return fuse_backing_open(fud->fc, &map);
2360+
}
2361+
2362+
static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp)
2363+
{
2364+
struct fuse_dev *fud = fuse_get_dev(file);
2365+
int backing_id;
2366+
2367+
if (!fud)
2368+
return -EPERM;
2369+
2370+
if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
2371+
return -EOPNOTSUPP;
2372+
2373+
if (get_user(backing_id, argp))
2374+
return -EFAULT;
2375+
2376+
return fuse_backing_close(fud->fc, backing_id);
2377+
}
2378+
2379+
static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
2380+
unsigned long arg)
2381+
{
2382+
void __user *argp = (void __user *)arg;
2383+
22622384
switch (cmd) {
22632385
case FUSE_DEV_IOC_CLONE:
2264-
if (get_user(oldfd, (__u32 __user *)arg))
2265-
return -EFAULT;
2386+
return fuse_dev_ioctl_clone(file, argp);
22662387

2267-
f = fdget(oldfd);
2268-
if (!f.file)
2269-
return -EINVAL;
2388+
case FUSE_DEV_IOC_BACKING_OPEN:
2389+
return fuse_dev_ioctl_backing_open(file, argp);
2390+
2391+
case FUSE_DEV_IOC_BACKING_CLOSE:
2392+
return fuse_dev_ioctl_backing_close(file, argp);
22702393

2271-
/*
2272-
* Check against file->f_op because CUSE
2273-
* uses the same ioctl handler.
2274-
*/
2275-
if (f.file->f_op == file->f_op)
2276-
fud = fuse_get_dev(f.file);
2277-
2278-
res = -EINVAL;
2279-
if (fud) {
2280-
mutex_lock(&fuse_mutex);
2281-
res = fuse_device_clone(fud->fc, file);
2282-
mutex_unlock(&fuse_mutex);
2283-
}
2284-
fdput(f);
2285-
break;
22862394
default:
2287-
res = -ENOTTY;
2288-
break;
2395+
return -ENOTTY;
22892396
}
2290-
return res;
22912397
}
22922398

22932399
const struct file_operations fuse_dev_operations = {

fs/fuse/dir.c

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
391391
err = -EIO;
392392
if (fuse_invalid_attr(&outarg->attr))
393393
goto out_put_forget;
394+
if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
395+
pr_warn_once("root generation should be zero\n");
396+
outarg->generation = 0;
397+
}
394398

395399
*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
396400
&outarg->attr, ATTR_TIMEOUT(outarg),
@@ -615,7 +619,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
615619
FUSE_ARGS(args);
616620
struct fuse_forget_link *forget;
617621
struct fuse_create_in inarg;
618-
struct fuse_open_out outopen;
622+
struct fuse_open_out *outopenp;
619623
struct fuse_entry_out outentry;
620624
struct fuse_inode *fi;
621625
struct fuse_file *ff;
@@ -630,7 +634,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
630634
goto out_err;
631635

632636
err = -ENOMEM;
633-
ff = fuse_file_alloc(fm);
637+
ff = fuse_file_alloc(fm, true);
634638
if (!ff)
635639
goto out_put_forget_req;
636640

@@ -659,8 +663,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
659663
args.out_numargs = 2;
660664
args.out_args[0].size = sizeof(outentry);
661665
args.out_args[0].value = &outentry;
662-
args.out_args[1].size = sizeof(outopen);
663-
args.out_args[1].value = &outopen;
666+
/* Store outarg for fuse_finish_open() */
667+
outopenp = &ff->args->open_outarg;
668+
args.out_args[1].size = sizeof(*outopenp);
669+
args.out_args[1].value = outopenp;
664670

665671
err = get_create_ext(&args, dir, entry, mode);
666672
if (err)
@@ -676,9 +682,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
676682
fuse_invalid_attr(&outentry.attr))
677683
goto out_free_ff;
678684

679-
ff->fh = outopen.fh;
685+
ff->fh = outopenp->fh;
680686
ff->nodeid = outentry.nodeid;
681-
ff->open_flags = outopen.open_flags;
687+
ff->open_flags = outopenp->open_flags;
682688
inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
683689
&outentry.attr, ATTR_TIMEOUT(&outentry), 0);
684690
if (!inode) {
@@ -692,13 +698,15 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
692698
d_instantiate(entry, inode);
693699
fuse_change_entry_timeout(entry, &outentry);
694700
fuse_dir_changed(dir);
695-
err = finish_open(file, entry, generic_file_open);
701+
err = generic_file_open(inode, file);
702+
if (!err) {
703+
file->private_data = ff;
704+
err = finish_open(file, entry, fuse_finish_open);
705+
}
696706
if (err) {
697707
fi = get_fuse_inode(inode);
698708
fuse_sync_release(fi, ff, flags);
699709
} else {
700-
file->private_data = ff;
701-
fuse_finish_open(inode, file);
702710
if (fm->fc->atomic_o_trunc && trunc)
703711
truncate_pagecache(inode, 0);
704712
else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
@@ -1210,7 +1218,7 @@ static int fuse_do_statx(struct inode *inode, struct file *file,
12101218
if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
12111219
((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
12121220
inode_wrong_type(inode, sx->mode)))) {
1213-
make_bad_inode(inode);
1221+
fuse_make_bad(inode);
12141222
return -EIO;
12151223
}
12161224

@@ -1485,7 +1493,7 @@ static int fuse_perm_getattr(struct inode *inode, int mask)
14851493
*
14861494
* 1) Local access checking ('default_permissions' mount option) based
14871495
* on file mode. This is the plain old disk filesystem permission
1488-
* modell.
1496+
* model.
14891497
*
14901498
* 2) "Remote" access checking, where server is responsible for
14911499
* checking permission in each inode operation. An exception to this
@@ -1630,7 +1638,30 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
16301638

16311639
static int fuse_dir_open(struct inode *inode, struct file *file)
16321640
{
1633-
return fuse_open_common(inode, file, true);
1641+
struct fuse_mount *fm = get_fuse_mount(inode);
1642+
int err;
1643+
1644+
if (fuse_is_bad(inode))
1645+
return -EIO;
1646+
1647+
err = generic_file_open(inode, file);
1648+
if (err)
1649+
return err;
1650+
1651+
err = fuse_do_open(fm, get_node_id(inode), file, true);
1652+
if (!err) {
1653+
struct fuse_file *ff = file->private_data;
1654+
1655+
/*
1656+
* Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
1657+
* directories for backward compatibility, though it's unlikely
1658+
* to be useful.
1659+
*/
1660+
if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
1661+
nonseekable_open(inode, file);
1662+
}
1663+
1664+
return err;
16341665
}
16351666

16361667
static int fuse_dir_release(struct inode *inode, struct file *file)

0 commit comments

Comments
 (0)