Commit 1881fba8 authored by Omar Sandoval's avatar Omar Sandoval Committed by David Sterba

btrfs: add BTRFS_IOC_ENCODED_READ ioctl

There are 4 main cases:

1. Inline extents: we copy the data straight out of the extent buffer.
2. Hole/preallocated extents: we fill in zeroes.
3. Regular, uncompressed extents: we read the sectors we need directly
   from disk.
4. Regular, compressed extents: we read the entire compressed extent
   from disk and indicate what subset of the decompressed extent is in
   the file.

This initial implementation simplifies a few things that can be improved
in the future:

- Cases 1, 3, and 4 allocate temporary memory to read into before
  copying out to userspace.
- We don't do read repair, because it turns out that read repair is
  currently broken for compressed data.
- We hold the inode lock during the operation.

Note that we don't need to hold the mmap lock. We may race with
btrfs_page_mkwrite() and read the old data from before the page was
dirtied:

btrfs_page_mkwrite         btrfs_encoded_read
---------------------------------------------------
(enter)                    (enter)
                           btrfs_wait_ordered_range
lock_extent_bits
btrfs_page_set_dirty
unlock_extent_cached
(exit)
                           lock_extent_bits
                           read extent (dirty page hasn't been flushed,
                                        so this is the old data)
                           unlock_extent_cached
                           (exit)

we read the old data from before the page was dirtied. But, that's true
even if we were to hold the mmap lock:

btrfs_page_mkwrite               btrfs_encoded_read
-------------------------------------------------------------------
(enter)                          (enter)
                                 btrfs_inode_lock(BTRFS_ILOCK_MMAP)
down_read(i_mmap_lock) (blocked)
                                 btrfs_wait_ordered_range
                                 lock_extent_bits
				 read extent (page hasn't been dirtied,
                                              so this is the old data)
                                 unlock_extent_cached
                                 btrfs_inode_unlock(BTRFS_ILOCK_MMAP)
down_read(i_mmap_lock) returns
lock_extent_bits
btrfs_page_set_dirty
unlock_extent_cached

In other words, this is inherently racy, so it's fine that we return the
old data in this tiny window.
Signed-off-by: default avatarOmar Sandoval <osandov@fb.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent dcb77a9a
......@@ -49,6 +49,7 @@ extern struct kmem_cache *btrfs_free_space_bitmap_cachep;
struct btrfs_ordered_sum;
struct btrfs_ref;
struct btrfs_bio;
struct btrfs_ioctl_encoded_io_args;
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
......@@ -3305,6 +3306,9 @@ int btrfs_writepage_cow_fixup(struct page *page);
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
struct page *page, u64 start,
u64 end, bool uptodate);
ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
struct btrfs_ioctl_encoded_io_args *encoded);
extern const struct dentry_operations btrfs_dentry_operations;
extern const struct iomap_ops btrfs_dio_iomap_ops;
extern const struct iomap_dio_ops btrfs_dio_ops;
......
This diff is collapsed.
......@@ -28,6 +28,7 @@
#include <linux/iversion.h>
#include <linux/fileattr.h>
#include <linux/fsverity.h>
#include <linux/sched/xacct.h>
#include "ctree.h"
#include "disk-io.h"
#include "export.h"
......@@ -88,6 +89,22 @@ struct btrfs_ioctl_send_args_32 {
#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
struct btrfs_ioctl_send_args_32)
struct btrfs_ioctl_encoded_io_args_32 {
compat_uptr_t iov;
compat_ulong_t iovcnt;
__s64 offset;
__u64 flags;
__u64 len;
__u64 unencoded_len;
__u64 unencoded_offset;
__u32 compression;
__u32 encryption;
__u8 reserved[64];
};
#define BTRFS_IOC_ENCODED_READ_32 _IOR(BTRFS_IOCTL_MAGIC, 64, \
struct btrfs_ioctl_encoded_io_args_32)
#endif
/* Mask out flags that are inappropriate for the given type of inode. */
......@@ -5195,6 +5212,89 @@ static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat
return ret;
}
static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
bool compat)
{
struct btrfs_ioctl_encoded_io_args args = { 0 };
size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
flags);
size_t copy_end;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
loff_t pos;
struct kiocb kiocb;
ssize_t ret;
if (!capable(CAP_SYS_ADMIN)) {
ret = -EPERM;
goto out_acct;
}
if (compat) {
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_encoded_io_args_32 args32;
copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32,
flags);
if (copy_from_user(&args32, argp, copy_end)) {
ret = -EFAULT;
goto out_acct;
}
args.iov = compat_ptr(args32.iov);
args.iovcnt = args32.iovcnt;
args.offset = args32.offset;
args.flags = args32.flags;
#else
return -ENOTTY;
#endif
} else {
copy_end = copy_end_kernel;
if (copy_from_user(&args, argp, copy_end)) {
ret = -EFAULT;
goto out_acct;
}
}
if (args.flags != 0) {
ret = -EINVAL;
goto out_acct;
}
ret = import_iovec(READ, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
&iov, &iter);
if (ret < 0)
goto out_acct;
if (iov_iter_count(&iter) == 0) {
ret = 0;
goto out_iov;
}
pos = args.offset;
ret = rw_verify_area(READ, file, &pos, args.len);
if (ret < 0)
goto out_iov;
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = pos;
ret = btrfs_encoded_read(&kiocb, &iter, &args);
if (ret >= 0) {
fsnotify_access(file);
if (copy_to_user(argp + copy_end,
(char *)&args + copy_end_kernel,
sizeof(args) - copy_end_kernel))
ret = -EFAULT;
}
out_iov:
kfree(iov);
out_acct:
if (ret > 0)
add_rchar(current, ret);
inc_syscr(current);
return ret;
}
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
......@@ -5339,6 +5439,12 @@ long btrfs_ioctl(struct file *file, unsigned int
return fsverity_ioctl_enable(file, (const void __user *)argp);
case FS_IOC_MEASURE_VERITY:
return fsverity_ioctl_measure(file, argp);
case BTRFS_IOC_ENCODED_READ:
return btrfs_ioctl_encoded_read(file, argp, false);
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
case BTRFS_IOC_ENCODED_READ_32:
return btrfs_ioctl_encoded_read(file, argp, true);
#endif
}
return -ENOTTY;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment