Commit 7ed641be authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
 "Filipe is doing a careful pass through fsync problems, and these are
  the fixes so far.  I'll have one more for rc6 that we're still
  testing.

  My big commit is fixing up some inode hash races that Al Viro found
  (thanks Al)"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: use insert_inode_locked4 for inode creation
  Btrfs: fix fsync data loss after a ranged fsync
  Btrfs: kfree()ing ERR_PTRs
  Btrfs: fix crash while doing a ranged fsync
  Btrfs: fix corruption after write/fsync failure + fsync + log recovery
  Btrfs: fix autodefrag with compression
parents 9925cc13 b0d5d10f
...@@ -1966,7 +1966,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ...@@ -1966,7 +1966,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
btrfs_init_log_ctx(&ctx); btrfs_init_log_ctx(&ctx);
ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx); ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
if (ret < 0) { if (ret < 0) {
/* Fallthrough and commit/free transaction. */ /* Fallthrough and commit/free transaction. */
ret = 1; ret = 1;
......
This diff is collapsed.
...@@ -1019,8 +1019,10 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) ...@@ -1019,8 +1019,10 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
return false; return false;
next = defrag_lookup_extent(inode, em->start + em->len); next = defrag_lookup_extent(inode, em->start + em->len);
if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE || if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
(em->block_start + em->block_len == next->block_start)) ret = false;
else if ((em->block_start + em->block_len == next->block_start) &&
(em->block_len > 128 * 1024 && next->block_len > 128 * 1024))
ret = false; ret = false;
free_extent_map(next); free_extent_map(next);
...@@ -1055,7 +1057,6 @@ static int should_defrag_range(struct inode *inode, u64 start, int thresh, ...@@ -1055,7 +1057,6 @@ static int should_defrag_range(struct inode *inode, u64 start, int thresh,
} }
next_mergeable = defrag_check_next_extent(inode, em); next_mergeable = defrag_check_next_extent(inode, em);
/* /*
* we hit a real extent, if it is big or the next extent is not a * we hit a real extent, if it is big or the next extent is not a
* real extent, don't bother defragging it * real extent, don't bother defragging it
...@@ -1702,7 +1703,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, ...@@ -1702,7 +1703,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY | ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY |
BTRFS_SUBVOL_QGROUP_INHERIT)) { BTRFS_SUBVOL_QGROUP_INHERIT)) {
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
goto out; goto free_args;
} }
if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC)
...@@ -1712,27 +1713,31 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, ...@@ -1712,27 +1713,31 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
if (vol_args->size > PAGE_CACHE_SIZE) { if (vol_args->size > PAGE_CACHE_SIZE) {
ret = -EINVAL; ret = -EINVAL;
goto out; goto free_args;
} }
inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size);
if (IS_ERR(inherit)) { if (IS_ERR(inherit)) {
ret = PTR_ERR(inherit); ret = PTR_ERR(inherit);
goto out; goto free_args;
} }
} }
ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
vol_args->fd, subvol, ptr, vol_args->fd, subvol, ptr,
readonly, inherit); readonly, inherit);
if (ret)
goto free_inherit;
if (ret == 0 && ptr && if (ptr && copy_to_user(arg +
copy_to_user(arg +
offsetof(struct btrfs_ioctl_vol_args_v2, offsetof(struct btrfs_ioctl_vol_args_v2,
transid), ptr, sizeof(*ptr))) transid),
ptr, sizeof(*ptr)))
ret = -EFAULT; ret = -EFAULT;
out:
kfree(vol_args); free_inherit:
kfree(inherit); kfree(inherit);
free_args:
kfree(vol_args);
return ret; return ret;
} }
...@@ -2652,7 +2657,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) ...@@ -2652,7 +2657,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
vol_args = memdup_user(arg, sizeof(*vol_args)); vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args)) { if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args); ret = PTR_ERR(vol_args);
goto out; goto err_drop;
} }
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
...@@ -2670,6 +2675,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) ...@@ -2670,6 +2675,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
out: out:
kfree(vol_args); kfree(vol_args);
err_drop:
mnt_drop_write_file(file); mnt_drop_write_file(file);
return ret; return ret;
} }
......
...@@ -95,7 +95,9 @@ ...@@ -95,7 +95,9 @@
static int btrfs_log_inode(struct btrfs_trans_handle *trans, static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode, struct btrfs_root *root, struct inode *inode,
int inode_only); int inode_only,
const loff_t start,
const loff_t end);
static int link_to_fixup_dir(struct btrfs_trans_handle *trans, static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
struct btrfs_path *path, u64 objectid); struct btrfs_path *path, u64 objectid);
...@@ -3859,7 +3861,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, ...@@ -3859,7 +3861,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
*/ */
static int btrfs_log_inode(struct btrfs_trans_handle *trans, static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode, struct btrfs_root *root, struct inode *inode,
int inode_only) int inode_only,
const loff_t start,
const loff_t end)
{ {
struct btrfs_path *path; struct btrfs_path *path;
struct btrfs_path *dst_path; struct btrfs_path *dst_path;
...@@ -3876,6 +3880,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ...@@ -3876,6 +3880,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
int ins_nr; int ins_nr;
bool fast_search = false; bool fast_search = false;
u64 ino = btrfs_ino(inode); u64 ino = btrfs_ino(inode);
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
path = btrfs_alloc_path(); path = btrfs_alloc_path();
if (!path) if (!path)
...@@ -4049,13 +4054,35 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ...@@ -4049,13 +4054,35 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
goto out_unlock; goto out_unlock;
} }
} else if (inode_only == LOG_INODE_ALL) { } else if (inode_only == LOG_INODE_ALL) {
struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
struct extent_map *em, *n; struct extent_map *em, *n;
write_lock(&tree->lock); write_lock(&em_tree->lock);
list_for_each_entry_safe(em, n, &tree->modified_extents, list) /*
* We can't just remove every em if we're called for a ranged
* fsync - that is, one that doesn't cover the whole possible
* file range (0 to LLONG_MAX). This is because we can have
* em's that fall outside the range we're logging and therefore
* their ordered operations haven't completed yet
* (btrfs_finish_ordered_io() not invoked yet). This means we
* didn't get their respective file extent item in the fs/subvol
* tree yet, and need to let the next fast fsync (one which
* consults the list of modified extent maps) find the em so
* that it logs a matching file extent item and waits for the
* respective ordered operation to complete (if it's still
* running).
*
* Removing every em outside the range we're logging would make
* the next fast fsync not log their matching file extent items,
* therefore making us lose data after a log replay.
*/
list_for_each_entry_safe(em, n, &em_tree->modified_extents,
list) {
const u64 mod_end = em->mod_start + em->mod_len - 1;
if (em->mod_start >= start && mod_end <= end)
list_del_init(&em->list); list_del_init(&em->list);
write_unlock(&tree->lock); }
write_unlock(&em_tree->lock);
} }
if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
...@@ -4065,8 +4092,19 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ...@@ -4065,8 +4092,19 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
goto out_unlock; goto out_unlock;
} }
} }
write_lock(&em_tree->lock);
/*
* If we're doing a ranged fsync and there are still modified extents
* in the list, we must run on the next fsync call as it might cover
* those extents (a full fsync or an fsync for other range).
*/
if (list_empty(&em_tree->modified_extents)) {
BTRFS_I(inode)->logged_trans = trans->transid; BTRFS_I(inode)->logged_trans = trans->transid;
BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; BTRFS_I(inode)->last_log_commit =
BTRFS_I(inode)->last_sub_trans;
}
write_unlock(&em_tree->lock);
out_unlock: out_unlock:
if (unlikely(err)) if (unlikely(err))
btrfs_put_logged_extents(&logged_list); btrfs_put_logged_extents(&logged_list);
...@@ -4161,7 +4199,10 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, ...@@ -4161,7 +4199,10 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
*/ */
static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode, struct btrfs_root *root, struct inode *inode,
struct dentry *parent, int exists_only, struct dentry *parent,
const loff_t start,
const loff_t end,
int exists_only,
struct btrfs_log_ctx *ctx) struct btrfs_log_ctx *ctx)
{ {
int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
...@@ -4207,7 +4248,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, ...@@ -4207,7 +4248,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (ret) if (ret)
goto end_no_trans; goto end_no_trans;
ret = btrfs_log_inode(trans, root, inode, inode_only); ret = btrfs_log_inode(trans, root, inode, inode_only, start, end);
if (ret) if (ret)
goto end_trans; goto end_trans;
...@@ -4235,7 +4276,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, ...@@ -4235,7 +4276,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (BTRFS_I(inode)->generation > if (BTRFS_I(inode)->generation >
root->fs_info->last_trans_committed) { root->fs_info->last_trans_committed) {
ret = btrfs_log_inode(trans, root, inode, inode_only); ret = btrfs_log_inode(trans, root, inode, inode_only,
0, LLONG_MAX);
if (ret) if (ret)
goto end_trans; goto end_trans;
} }
...@@ -4269,13 +4311,15 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, ...@@ -4269,13 +4311,15 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
*/ */
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct dentry *dentry, struct btrfs_root *root, struct dentry *dentry,
const loff_t start,
const loff_t end,
struct btrfs_log_ctx *ctx) struct btrfs_log_ctx *ctx)
{ {
struct dentry *parent = dget_parent(dentry); struct dentry *parent = dget_parent(dentry);
int ret; int ret;
ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent,
0, ctx); start, end, 0, ctx);
dput(parent); dput(parent);
return ret; return ret;
...@@ -4512,6 +4556,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans, ...@@ -4512,6 +4556,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
root->fs_info->last_trans_committed)) root->fs_info->last_trans_committed))
return 0; return 0;
return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL); return btrfs_log_inode_parent(trans, root, inode, parent, 0,
LLONG_MAX, 1, NULL);
} }
...@@ -59,6 +59,8 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, ...@@ -59,6 +59,8 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
int btrfs_recover_log_trees(struct btrfs_root *tree_root); int btrfs_recover_log_trees(struct btrfs_root *tree_root);
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct dentry *dentry, struct btrfs_root *root, struct dentry *dentry,
const loff_t start,
const loff_t end,
struct btrfs_log_ctx *ctx); struct btrfs_log_ctx *ctx);
int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment