Commit 69456535 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'fuse-update-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:

 - Support directly accessing host page cache from virtiofs. This can
   improve I/O performance for various workloads, as well as reducing
   the memory requirement by eliminating double caching. Thanks to Vivek
   Goyal for doing most of the work on this.

 - Allow automatic submounting inside virtiofs. This allows unique
   st_dev/ st_ino values to be assigned inside the guest to files
   residing on different filesystems on the host. Thanks to Max Reitz
   for the patches.

 - Fix an old use after free bug found by Pradeep P V K.

* tag 'fuse-update-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (25 commits)
  virtiofs: calculate number of scatter-gather elements accurately
  fuse: connection remove fix
  fuse: implement crossmounts
  fuse: Allow fuse_fill_super_common() for submounts
  fuse: split fuse_mount off of fuse_conn
  fuse: drop fuse_conn parameter where possible
  fuse: store fuse_conn in fuse_req
  fuse: add submount support to <uapi/linux/fuse.h>
  fuse: fix page dereference after free
  virtiofs: add logic to free up a memory range
  virtiofs: maintain a list of busy elements
  virtiofs: serialize truncate/punch_hole and dax fault path
  virtiofs: define dax address space operations
  virtiofs: add DAX mmap support
  virtiofs: implement dax read/write operations
  virtiofs: introduce setupmapping/removemapping commands
  virtiofs: implement FUSE_INIT map_alignment field
  virtiofs: keep a list of free dax memory ranges
  virtiofs: add a mount option to enable dax
  virtiofs: set up virtio_fs dax_device
  ...
parents 922a763a 42d3e2d0
......@@ -47,7 +47,7 @@ filesystems. A good example is sshfs: a secure network filesystem
using the sftp protocol.
The userspace library and utilities are available from the
`FUSE homepage: <http://fuse.sourceforge.net/>`_
`FUSE homepage: <https://github.com/libfuse/>`_
Filesystem type
===============
......
......@@ -7238,7 +7238,7 @@ FUSE: FILESYSTEM IN USERSPACE
M: Miklos Szeredi <miklos@szeredi.hu>
L: linux-fsdevel@vger.kernel.org
S: Maintained
W: http://fuse.sourceforge.net/
W: https://github.com/libfuse/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git
F: Documentation/filesystems/fuse.rst
F: fs/fuse/
......
......@@ -46,7 +46,8 @@ EXPORT_SYMBOL_GPL(dax_read_unlock);
int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
pgoff_t *pgoff)
{
phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512;
sector_t start_sect = bdev ? get_start_sect(bdev) : 0;
phys_addr_t phys_off = (start_sect + sector) * 512;
if (pgoff)
*pgoff = PHYS_PFN(phys_off);
......
......@@ -559,8 +559,11 @@ static void *grab_mapping_entry(struct xa_state *xas,
}
/**
* dax_layout_busy_page - find first pinned page in @mapping
* dax_layout_busy_page_range - find first pinned page in @mapping
* @mapping: address space to scan for a page with ref count > 1
* @start: Starting offset. Page containing 'start' is included.
* @end: End offset. Page containing 'end' is included. If 'end' is LLONG_MAX,
* pages from 'start' till the end of file are included.
*
* DAX requires ZONE_DEVICE mapped pages. These pages are never
* 'onlined' to the page allocator so they are considered idle when
......@@ -573,12 +576,15 @@ static void *grab_mapping_entry(struct xa_state *xas,
* to be able to run unmap_mapping_range() and subsequently not race
* mapping_mapped() becoming true.
*/
struct page *dax_layout_busy_page(struct address_space *mapping)
struct page *dax_layout_busy_page_range(struct address_space *mapping,
loff_t start, loff_t end)
{
XA_STATE(xas, &mapping->i_pages, 0);
void *entry;
unsigned int scanned = 0;
struct page *page = NULL;
pgoff_t start_idx = start >> PAGE_SHIFT;
pgoff_t end_idx;
XA_STATE(xas, &mapping->i_pages, start_idx);
/*
* In the 'limited' case get_user_pages() for dax is disabled.
......@@ -589,6 +595,11 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
if (!dax_mapping(mapping) || !mapping_mapped(mapping))
return NULL;
/* If end == LLONG_MAX, all pages from start to till end of file */
if (end == LLONG_MAX)
end_idx = ULONG_MAX;
else
end_idx = end >> PAGE_SHIFT;
/*
* If we race get_user_pages_fast() here either we'll see the
* elevated page count in the iteration and wait, or
......@@ -596,15 +607,15 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
* against is no longer mapped in the page tables and bail to the
* get_user_pages() slow path. The slow path is protected by
* pte_lock() and pmd_lock(). New references are not taken without
* holding those locks, and unmap_mapping_range() will not zero the
* holding those locks, and unmap_mapping_pages() will not zero the
* pte or pmd without holding the respective lock, so we are
* guaranteed to either see new references or prevent new
* references from being established.
*/
unmap_mapping_range(mapping, 0, 0, 0);
unmap_mapping_pages(mapping, start_idx, end_idx - start_idx + 1, 0);
xas_lock_irq(&xas);
xas_for_each(&xas, entry, ULONG_MAX) {
xas_for_each(&xas, entry, end_idx) {
if (WARN_ON_ONCE(!xa_is_value(entry)))
continue;
if (unlikely(dax_is_locked(entry)))
......@@ -625,6 +636,12 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
xas_unlock_irq(&xas);
return page;
}
EXPORT_SYMBOL_GPL(dax_layout_busy_page_range);
struct page *dax_layout_busy_page(struct address_space *mapping)
{
return dax_layout_busy_page_range(mapping, 0, LLONG_MAX);
}
EXPORT_SYMBOL_GPL(dax_layout_busy_page);
static int __dax_invalidate_entry(struct address_space *mapping,
......
......@@ -8,7 +8,7 @@ config FUSE_FS
There's also a companion library: libfuse2. This library is available
from the FUSE homepage:
<http://fuse.sourceforge.net/>
<https://github.com/libfuse/>
although chances are your distribution already has that library
installed if you've installed the "fuse" package itself.
......@@ -38,3 +38,17 @@ config VIRTIO_FS
If you want to share files between guests or with the host, answer Y
or M.
config FUSE_DAX
bool "Virtio Filesystem Direct Host Memory Access support"
default y
select INTERVAL_TREE
depends on VIRTIO_FS
depends on FS_DAX
depends on DAX_DRIVER
help
This allows bypassing guest page cache and allows mapping host page
cache directly in guest address space.
If you want to allow mounting a Virtio Filesystem with the "dax"
option, answer Y.
......@@ -7,5 +7,7 @@ obj-$(CONFIG_FUSE_FS) += fuse.o
obj-$(CONFIG_CUSE) += cuse.o
obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
virtiofs-y += virtio_fs.o
fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
fuse-$(CONFIG_FUSE_DAX) += dax.o
virtiofs-y := virtio_fs.o
......@@ -164,6 +164,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
{
unsigned val;
struct fuse_conn *fc;
struct fuse_mount *fm;
ssize_t ret;
ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
......@@ -174,18 +175,27 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
if (!fc)
goto out;
down_read(&fc->killsb);
spin_lock(&fc->bg_lock);
fc->congestion_threshold = val;
if (fc->sb) {
/*
* Get any fuse_mount belonging to this fuse_conn; s_bdi is
* shared between all of them
*/
if (!list_empty(&fc->mounts)) {
fm = list_first_entry(&fc->mounts, struct fuse_mount, fc_entry);
if (fc->num_background < fc->congestion_threshold) {
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
clear_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
} else {
set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
}
}
spin_unlock(&fc->bg_lock);
up_read(&fc->killsb);
fuse_conn_put(fc);
out:
return ret;
......
......@@ -57,6 +57,7 @@
struct cuse_conn {
struct list_head list; /* linked on cuse_conntbl */
struct fuse_mount fm; /* Dummy mount referencing fc */
struct fuse_conn fc; /* fuse connection */
struct cdev *cdev; /* associated character device */
struct device *dev; /* device representing @cdev */
......@@ -134,7 +135,7 @@ static int cuse_open(struct inode *inode, struct file *file)
* Generic permission check is already done against the chrdev
* file, proceed to open.
*/
rc = fuse_do_open(&cc->fc, 0, file, 0);
rc = fuse_do_open(&cc->fm, 0, file, 0);
if (rc)
fuse_conn_put(&cc->fc);
return rc;
......@@ -143,10 +144,10 @@ static int cuse_open(struct inode *inode, struct file *file)
static int cuse_release(struct inode *inode, struct file *file)
{
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc;
struct fuse_mount *fm = ff->fm;
fuse_sync_release(NULL, ff, file->f_flags);
fuse_conn_put(fc);
fuse_conn_put(fm->fc);
return 0;
}
......@@ -155,7 +156,7 @@ static long cuse_file_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct fuse_file *ff = file->private_data;
struct cuse_conn *cc = fc_to_cc(ff->fc);
struct cuse_conn *cc = fc_to_cc(ff->fm->fc);
unsigned int flags = 0;
if (cc->unrestricted_ioctl)
......@@ -168,7 +169,7 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct fuse_file *ff = file->private_data;
struct cuse_conn *cc = fc_to_cc(ff->fc);
struct cuse_conn *cc = fc_to_cc(ff->fm->fc);
unsigned int flags = FUSE_IOCTL_COMPAT;
if (cc->unrestricted_ioctl)
......@@ -313,9 +314,10 @@ struct cuse_init_args {
* required data structures for it. Please read the comment at the
* top of this file for high level overview.
*/
static void cuse_process_init_reply(struct fuse_conn *fc,
static void cuse_process_init_reply(struct fuse_mount *fm,
struct fuse_args *args, int error)
{
struct fuse_conn *fc = fm->fc;
struct cuse_init_args *ia = container_of(args, typeof(*ia), ap.args);
struct fuse_args_pages *ap = &ia->ap;
struct cuse_conn *cc = fc_to_cc(fc), *pos;
......@@ -424,7 +426,7 @@ static int cuse_send_init(struct cuse_conn *cc)
{
int rc;
struct page *page;
struct fuse_conn *fc = &cc->fc;
struct fuse_mount *fm = &cc->fm;
struct cuse_init_args *ia;
struct fuse_args_pages *ap;
......@@ -460,7 +462,7 @@ static int cuse_send_init(struct cuse_conn *cc)
ia->desc.length = ap->args.out_args[1].size;
ap->args.end = cuse_process_init_reply;
rc = fuse_simple_background(fc, &ap->args, GFP_KERNEL);
rc = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
if (rc) {
kfree(ia);
err_free_page:
......@@ -506,7 +508,8 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
* Limit the cuse channel to requests that can
* be represented in file->f_cred->user_ns.
*/
fuse_conn_init(&cc->fc, file->f_cred->user_ns, &fuse_dev_fiq_ops, NULL);
fuse_conn_init(&cc->fc, &cc->fm, file->f_cred->user_ns,
&fuse_dev_fiq_ops, NULL);
fud = fuse_dev_alloc_install(&cc->fc);
if (!fud) {
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -252,7 +252,7 @@ static int fuse_direntplus_link(struct file *file,
static void fuse_force_forget(struct file *file, u64 nodeid)
{
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_forget_in inarg;
FUSE_ARGS(args);
......@@ -266,7 +266,7 @@ static void fuse_force_forget(struct file *file, u64 nodeid)
args.force = true;
args.noreply = true;
fuse_simple_request(fc, &args);
fuse_simple_request(fm, &args);
/* ignore errors */
}
......@@ -320,7 +320,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
ssize_t res;
struct page *page;
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_io_args ia = {};
struct fuse_args_pages *ap = &ia.ap;
struct fuse_page_desc desc = { .length = PAGE_SIZE };
......@@ -337,7 +337,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
ap->pages = &page;
ap->descs = &desc;
if (plus) {
attr_version = fuse_get_attr_version(fc);
attr_version = fuse_get_attr_version(fm->fc);
fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
FUSE_READDIRPLUS);
} else {
......@@ -345,7 +345,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
FUSE_READDIR);
}
locked = fuse_lock_inode(inode);
res = fuse_simple_request(fc, &ap->args);
res = fuse_simple_request(fm, &ap->args);
fuse_unlock_inode(inode, locked);
if (res >= 0) {
if (!res) {
......
This diff is collapsed.
......@@ -14,12 +14,12 @@
int fuse_setxattr(struct inode *inode, const char *name, const void *value,
size_t size, int flags)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
struct fuse_setxattr_in inarg;
int err;
if (fc->no_setxattr)
if (fm->fc->no_setxattr)
return -EOPNOTSUPP;
memset(&inarg, 0, sizeof(inarg));
......@@ -34,9 +34,9 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value,
args.in_args[1].value = name;
args.in_args[2].size = size;
args.in_args[2].value = value;
err = fuse_simple_request(fc, &args);
err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) {
fc->no_setxattr = 1;
fm->fc->no_setxattr = 1;
err = -EOPNOTSUPP;
}
if (!err) {
......@@ -49,13 +49,13 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value,
ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
size_t size)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
struct fuse_getxattr_in inarg;
struct fuse_getxattr_out outarg;
ssize_t ret;
if (fc->no_getxattr)
if (fm->fc->no_getxattr)
return -EOPNOTSUPP;
memset(&inarg, 0, sizeof(inarg));
......@@ -77,11 +77,11 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
}
ret = fuse_simple_request(fc, &args);
ret = fuse_simple_request(fm, &args);
if (!ret && !size)
ret = min_t(ssize_t, outarg.size, XATTR_SIZE_MAX);
if (ret == -ENOSYS) {
fc->no_getxattr = 1;
fm->fc->no_getxattr = 1;
ret = -EOPNOTSUPP;
}
return ret;
......@@ -107,16 +107,16 @@ static int fuse_verify_xattr_list(char *list, size_t size)
ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
{
struct inode *inode = d_inode(entry);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
struct fuse_getxattr_in inarg;
struct fuse_getxattr_out outarg;
ssize_t ret;
if (!fuse_allow_current_process(fc))
if (!fuse_allow_current_process(fm->fc))
return -EACCES;
if (fc->no_listxattr)
if (fm->fc->no_listxattr)
return -EOPNOTSUPP;
memset(&inarg, 0, sizeof(inarg));
......@@ -136,13 +136,13 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
}
ret = fuse_simple_request(fc, &args);
ret = fuse_simple_request(fm, &args);
if (!ret && !size)
ret = min_t(ssize_t, outarg.size, XATTR_LIST_MAX);
if (ret > 0 && size)
ret = fuse_verify_xattr_list(list, ret);
if (ret == -ENOSYS) {
fc->no_listxattr = 1;
fm->fc->no_listxattr = 1;
ret = -EOPNOTSUPP;
}
return ret;
......@@ -150,11 +150,11 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
int fuse_removexattr(struct inode *inode, const char *name)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
int err;
if (fc->no_removexattr)
if (fm->fc->no_removexattr)
return -EOPNOTSUPP;
args.opcode = FUSE_REMOVEXATTR;
......@@ -162,9 +162,9 @@ int fuse_removexattr(struct inode *inode, const char *name)
args.in_numargs = 1;
args.in_args[0].size = strlen(name) + 1;
args.in_args[0].value = name;
err = fuse_simple_request(fc, &args);
err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) {
fc->no_removexattr = 1;
fm->fc->no_removexattr = 1;
err = -EOPNOTSUPP;
}
if (!err) {
......
......@@ -149,6 +149,7 @@ int dax_writeback_mapping_range(struct address_space *mapping,
struct dax_device *dax_dev, struct writeback_control *wbc);
struct page *dax_layout_busy_page(struct address_space *mapping);
struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end);
dax_entry_t dax_lock_page(struct page *page);
void dax_unlock_page(struct page *page, dax_entry_t cookie);
#else
......@@ -179,6 +180,11 @@ static inline struct page *dax_layout_busy_page(struct address_space *mapping)
return NULL;
}
static inline struct page *dax_layout_busy_page_range(struct address_space *mapping, pgoff_t start, pgoff_t nr_pages)
{
return NULL;
}
static inline int dax_writeback_mapping_range(struct address_space *mapping,
struct dax_device *dax_dev, struct writeback_control *wbc)
{
......
......@@ -172,6 +172,9 @@
* - add FUSE_WRITE_KILL_PRIV flag
* - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING
* - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag
*
* 7.32
* - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS
*/
#ifndef _LINUX_FUSE_H
......@@ -207,7 +210,7 @@
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
#define FUSE_KERNEL_MINOR_VERSION 31
#define FUSE_KERNEL_MINOR_VERSION 32
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
......@@ -231,7 +234,7 @@ struct fuse_attr {
uint32_t gid;
uint32_t rdev;
uint32_t blksize;
uint32_t padding;
uint32_t flags;
};
struct fuse_kstatfs {
......@@ -313,7 +316,10 @@ struct fuse_file_lock {
* FUSE_CACHE_SYMLINKS: cache READLINK responses
* FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir
* FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request
* FUSE_MAP_ALIGNMENT: map_alignment field is valid
* FUSE_MAP_ALIGNMENT: init_out.map_alignment contains log2(byte alignment) for
* foffset and moffset fields in struct
* fuse_setupmapping_out and fuse_removemapping_one.
* FUSE_SUBMOUNTS: kernel supports auto-mounting directory submounts
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
......@@ -342,6 +348,7 @@ struct fuse_file_lock {
#define FUSE_NO_OPENDIR_SUPPORT (1 << 24)
#define FUSE_EXPLICIT_INVAL_DATA (1 << 25)
#define FUSE_MAP_ALIGNMENT (1 << 26)
#define FUSE_SUBMOUNTS (1 << 27)
/**
* CUSE INIT request/reply flags
......@@ -417,6 +424,13 @@ struct fuse_file_lock {
*/
#define FUSE_FSYNC_FDATASYNC (1 << 0)
/**
* fuse_attr flags
*
* FUSE_ATTR_SUBMOUNT: Object is a submount root
*/
#define FUSE_ATTR_SUBMOUNT (1 << 0)
enum fuse_opcode {
FUSE_LOOKUP = 1,
FUSE_FORGET = 2, /* no reply */
......@@ -892,4 +906,34 @@ struct fuse_copy_file_range_in {
uint64_t flags;
};
#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0)
#define FUSE_SETUPMAPPING_FLAG_READ (1ull << 1)
struct fuse_setupmapping_in {
/* An already open handle */
uint64_t fh;
/* Offset into the file to start the mapping */
uint64_t foffset;
/* Length of mapping required */
uint64_t len;
/* Flags, FUSE_SETUPMAPPING_FLAG_* */
uint64_t flags;
/* Offset in Memory Window */
uint64_t moffset;
};
struct fuse_removemapping_in {
/* number of fuse_removemapping_one follows */
uint32_t count;
};
struct fuse_removemapping_one {
/* Offset into the dax window start the unmapping */
uint64_t moffset;
/* Length of mapping required */
uint64_t len;
};
#define FUSE_REMOVEMAPPING_MAX_ENTRY \
(PAGE_SIZE / sizeof(struct fuse_removemapping_one))
#endif /* _LINUX_FUSE_H */
......@@ -16,4 +16,7 @@ struct virtio_fs_config {
__le32 num_request_queues;
} __attribute__((packed));
/* For the id field in virtio_pci_shm_cap */
#define VIRTIO_FS_SHMCAP_ID_CACHE 0
#endif /* _UAPI_LINUX_VIRTIO_FS_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment