Commit 5cea7647 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "There are some new user features and the usual load of invisible
  enhancements or cleanups.

  New features:

   - extend mount options to specify zlib compression level, -o
     compress=zlib:9

   - v2 of ioctl "extent to inode mapping", addressing a usecase where
     we want to retrieve more but inaccurate results and do the
     postprocessing in userspace, aiding defragmentation or
     deduplication tools

   - populate compression heuristics logic, do data sampling and try to
     guess compressibility by: looking for repeated patterns, counting
     unique byte values and distribution, calculating Shannon entropy;
     this will need more benchmarking and possibly fine tuning, but the
     base should be good enough

   - enable indexing for btrfs as lower filesystem in overlayfs

   - speedup page cache readahead during send on large files

  Internal enhancements:

   - more sanity checks of b-tree items when reading them from disk

   - more EINVAL/EUCLEAN fixups, missing BLK_STS_* conversion, other
     errno or error handling fixes

   - remove some homegrown IO-related logic, that's been obsoleted by
     core block layer changes (batching, plug/unplug, own counters)

   - add ref-verify, optional debugging feature to verify extent
     reference accounting

   - simplify code handling outstanding extents, make it more clear
     where and how the accounting is done

   - make delalloc reservations per-inode, simplify the code and make
     the logic more straightforward

   - extensive cleanup of delayed refs code

  Notable fixes:

   - fix send ioctl on 32bit with 64bit kernel"

* 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (102 commits)
  btrfs: Fix bug for misused dev_t when lookup in dev state hash table.
  Btrfs: heuristic: add Shannon entropy calculation
  Btrfs: heuristic: add byte core set calculation
  Btrfs: heuristic: add byte set calculation
  Btrfs: heuristic: add detection of repeated data patterns
  Btrfs: heuristic: implement sampling logic
  Btrfs: heuristic: add bucket and sample counters and other defines
  Btrfs: compression: separate heuristic/compression workspaces
  btrfs: move btrfs_truncate_block out of trans handle
  btrfs: don't call btrfs_start_delalloc_roots in flushoncommit
  btrfs: track refs in a rb_tree instead of a list
  btrfs: add a comp_refs() helper
  btrfs: switch args for comp_*_refs
  btrfs: make the delalloc block rsv per inode
  btrfs: add tracepoints for outstanding extents mods
  Btrfs: rework outstanding_extents
  btrfs: increase output size for LOGICAL_INO_V2 ioctl
  btrfs: add a flags argument to LOGICAL_INO and call it LOGICAL_INO_V2
  btrfs: add a flag to iterate_inodes_from_logical to find all extent refs for uncompressed extents
  btrfs: send: remove unused code
  ...
parents 808eb24e d28e649a
......@@ -91,3 +91,14 @@ config BTRFS_ASSERT
any of the assertions trip. This is meant for btrfs developers only.
If unsure, say N.
config BTRFS_FS_REF_VERIFY
bool "Btrfs with the ref verify tool compiled in"
depends on BTRFS_FS
default n
help
Enable run-time extent reference verification instrumentation. This
is meant to be used by btrfs developers for tracking down extent
reference problems or verifying they didn't break something.
If unsure, say N.
......@@ -10,10 +10,11 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
uuid-tree.o props.o hash.o free-space-tree.o
uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \
......
......@@ -67,7 +67,7 @@ struct btrfs_workqueue {
static void normal_work_helper(struct btrfs_work *work);
#define BTRFS_WORK_HELPER(name) \
void btrfs_##name(struct work_struct *arg) \
noinline_for_stack void btrfs_##name(struct work_struct *arg) \
{ \
struct btrfs_work *work = container_of(arg, struct btrfs_work, \
normal_work); \
......
......@@ -40,12 +40,14 @@ static int check_extent_in_eb(const struct btrfs_key *key,
const struct extent_buffer *eb,
const struct btrfs_file_extent_item *fi,
u64 extent_item_pos,
struct extent_inode_elem **eie)
struct extent_inode_elem **eie,
bool ignore_offset)
{
u64 offset = 0;
struct extent_inode_elem *e;
if (!btrfs_file_extent_compression(eb, fi) &&
if (!ignore_offset &&
!btrfs_file_extent_compression(eb, fi) &&
!btrfs_file_extent_encryption(eb, fi) &&
!btrfs_file_extent_other_encoding(eb, fi)) {
u64 data_offset;
......@@ -84,7 +86,8 @@ static void free_inode_elem_list(struct extent_inode_elem *eie)
static int find_extent_in_eb(const struct extent_buffer *eb,
u64 wanted_disk_byte, u64 extent_item_pos,
struct extent_inode_elem **eie)
struct extent_inode_elem **eie,
bool ignore_offset)
{
u64 disk_byte;
struct btrfs_key key;
......@@ -113,7 +116,7 @@ static int find_extent_in_eb(const struct extent_buffer *eb,
if (disk_byte != wanted_disk_byte)
continue;
ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie);
ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie, ignore_offset);
if (ret < 0)
return ret;
}
......@@ -419,7 +422,7 @@ static int add_indirect_ref(const struct btrfs_fs_info *fs_info,
static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
struct ulist *parents, struct prelim_ref *ref,
int level, u64 time_seq, const u64 *extent_item_pos,
u64 total_refs)
u64 total_refs, bool ignore_offset)
{
int ret = 0;
int slot;
......@@ -472,7 +475,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
if (extent_item_pos) {
ret = check_extent_in_eb(&key, eb, fi,
*extent_item_pos,
&eie);
&eie, ignore_offset);
if (ret < 0)
break;
}
......@@ -510,7 +513,8 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, u64 time_seq,
struct prelim_ref *ref, struct ulist *parents,
const u64 *extent_item_pos, u64 total_refs)
const u64 *extent_item_pos, u64 total_refs,
bool ignore_offset)
{
struct btrfs_root *root;
struct btrfs_key root_key;
......@@ -581,7 +585,7 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
}
ret = add_all_parents(root, path, parents, ref, level, time_seq,
extent_item_pos, total_refs);
extent_item_pos, total_refs, ignore_offset);
out:
path->lowest_level = 0;
btrfs_release_path(path);
......@@ -616,7 +620,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, u64 time_seq,
struct preftrees *preftrees,
const u64 *extent_item_pos, u64 total_refs,
struct share_check *sc)
struct share_check *sc, bool ignore_offset)
{
int err;
int ret = 0;
......@@ -661,7 +665,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
}
err = resolve_indirect_ref(fs_info, path, time_seq, ref,
parents, extent_item_pos,
total_refs);
total_refs, ignore_offset);
/*
* we can only tolerate ENOENT,otherwise,we should catch error
* and return directly.
......@@ -769,6 +773,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
struct btrfs_key key;
struct btrfs_key tmp_op_key;
struct btrfs_key *op_key = NULL;
struct rb_node *n;
int count;
int ret = 0;
......@@ -778,7 +783,9 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
}
spin_lock(&head->lock);
list_for_each_entry(node, &head->ref_list, list) {
for (n = rb_first(&head->ref_tree); n; n = rb_next(n)) {
node = rb_entry(n, struct btrfs_delayed_ref_node,
ref_node);
if (node->seq > seq)
continue;
......@@ -1107,13 +1114,17 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info,
*
* Otherwise this returns 0 for success and <0 for an error.
*
* If ignore_offset is set to false, only extent refs whose offsets match
* extent_item_pos are returned. If true, every extent ref is returned
* and extent_item_pos is ignored.
*
* FIXME some caching might speed things up
*/
static int find_parent_nodes(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist *refs,
struct ulist *roots, const u64 *extent_item_pos,
struct share_check *sc)
struct share_check *sc, bool ignore_offset)
{
struct btrfs_key key;
struct btrfs_path *path;
......@@ -1178,7 +1189,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (head) {
if (!mutex_trylock(&head->mutex)) {
refcount_inc(&head->node.refs);
refcount_inc(&head->refs);
spin_unlock(&delayed_refs->lock);
btrfs_release_path(path);
......@@ -1189,7 +1200,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
*/
mutex_lock(&head->mutex);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(&head->node);
btrfs_put_delayed_ref_head(head);
goto again;
}
spin_unlock(&delayed_refs->lock);
......@@ -1235,7 +1246,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root));
ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees,
extent_item_pos, total_refs, sc);
extent_item_pos, total_refs, sc, ignore_offset);
if (ret)
goto out;
......@@ -1282,7 +1293,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
ret = find_extent_in_eb(eb, bytenr,
*extent_item_pos, &eie);
*extent_item_pos, &eie, ignore_offset);
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
if (ret < 0)
......@@ -1350,7 +1361,7 @@ static void free_leaf_list(struct ulist *blocks)
static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **leafs,
const u64 *extent_item_pos)
const u64 *extent_item_pos, bool ignore_offset)
{
int ret;
......@@ -1359,7 +1370,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
return -ENOMEM;
ret = find_parent_nodes(trans, fs_info, bytenr, time_seq,
*leafs, NULL, extent_item_pos, NULL);
*leafs, NULL, extent_item_pos, NULL, ignore_offset);
if (ret < 0 && ret != -ENOENT) {
free_leaf_list(*leafs);
return ret;
......@@ -1383,7 +1394,8 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
*/
static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots)
u64 time_seq, struct ulist **roots,
bool ignore_offset)
{
struct ulist *tmp;
struct ulist_node *node = NULL;
......@@ -1402,7 +1414,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
ULIST_ITER_INIT(&uiter);
while (1) {
ret = find_parent_nodes(trans, fs_info, bytenr, time_seq,
tmp, *roots, NULL, NULL);
tmp, *roots, NULL, NULL, ignore_offset);
if (ret < 0 && ret != -ENOENT) {
ulist_free(tmp);
ulist_free(*roots);
......@@ -1421,14 +1433,15 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots)
u64 time_seq, struct ulist **roots,
bool ignore_offset)
{
int ret;
if (!trans)
down_read(&fs_info->commit_root_sem);
ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
time_seq, roots);
time_seq, roots, ignore_offset);
if (!trans)
up_read(&fs_info->commit_root_sem);
return ret;
......@@ -1483,7 +1496,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
ULIST_ITER_INIT(&uiter);
while (1) {
ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp,
roots, NULL, &shared);
roots, NULL, &shared, false);
if (ret == BACKREF_FOUND_SHARED) {
/* this is the only condition under which we return 1 */
ret = 1;
......@@ -1877,7 +1890,8 @@ static int iterate_leaf_refs(struct btrfs_fs_info *fs_info,
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
u64 extent_item_objectid, u64 extent_item_pos,
int search_commit_root,
iterate_extent_inodes_t *iterate, void *ctx)
iterate_extent_inodes_t *iterate, void *ctx,
bool ignore_offset)
{
int ret;
struct btrfs_trans_handle *trans = NULL;
......@@ -1903,14 +1917,15 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
tree_mod_seq_elem.seq, &refs,
&extent_item_pos);
&extent_item_pos, ignore_offset);
if (ret)
goto out;
ULIST_ITER_INIT(&ref_uiter);
while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val,
tree_mod_seq_elem.seq, &roots);
tree_mod_seq_elem.seq, &roots,
ignore_offset);
if (ret)
break;
ULIST_ITER_INIT(&root_uiter);
......@@ -1943,7 +1958,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
iterate_extent_inodes_t *iterate, void *ctx)
iterate_extent_inodes_t *iterate, void *ctx,
bool ignore_offset)
{
int ret;
u64 extent_item_pos;
......@@ -1961,7 +1977,7 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
extent_item_pos = logical - found_key.objectid;
ret = iterate_extent_inodes(fs_info, found_key.objectid,
extent_item_pos, search_commit_root,
iterate, ctx);
iterate, ctx, ignore_offset);
return ret;
}
......
......@@ -43,17 +43,19 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
u64 extent_item_objectid,
u64 extent_offset, int search_commit_root,
iterate_extent_inodes_t *iterate, void *ctx);
iterate_extent_inodes_t *iterate, void *ctx,
bool ignore_offset);
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
iterate_extent_inodes_t *iterate, void *ctx);
iterate_extent_inodes_t *iterate, void *ctx,
bool ignore_offset);
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots);
u64 time_seq, struct ulist **roots, bool ignore_offset);
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off,
struct extent_buffer *eb_in, u64 parent,
......
......@@ -36,14 +36,13 @@
#define BTRFS_INODE_ORPHAN_META_RESERVED 1
#define BTRFS_INODE_DUMMY 2
#define BTRFS_INODE_IN_DEFRAG 3
#define BTRFS_INODE_DELALLOC_META_RESERVED 4
#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
#define BTRFS_INODE_NEEDS_FULL_SYNC 7
#define BTRFS_INODE_COPY_EVERYTHING 8
#define BTRFS_INODE_IN_DELALLOC_LIST 9
#define BTRFS_INODE_READDIO_NEED_LOCK 10
#define BTRFS_INODE_HAS_PROPS 11
#define BTRFS_INODE_HAS_ORPHAN_ITEM 4
#define BTRFS_INODE_HAS_ASYNC_EXTENT 5
#define BTRFS_INODE_NEEDS_FULL_SYNC 6
#define BTRFS_INODE_COPY_EVERYTHING 7
#define BTRFS_INODE_IN_DELALLOC_LIST 8
#define BTRFS_INODE_READDIO_NEED_LOCK 9
#define BTRFS_INODE_HAS_PROPS 10
/* in memory btrfs inode */
struct btrfs_inode {
......@@ -176,7 +175,8 @@ struct btrfs_inode {
* of extent items we've reserved metadata for.
*/
unsigned outstanding_extents;
unsigned reserved_extents;
struct btrfs_block_rsv block_rsv;
/*
* Cached values of inode properties
......@@ -267,6 +267,17 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
return false;
}
static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
int mod)
{
lockdep_assert_held(&inode->lock);
inode->outstanding_extents += mod;
if (btrfs_is_free_space_inode(inode))
return;
trace_btrfs_inode_mod_outstanding_extents(inode->root, btrfs_ino(inode),
mod);
}
static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
{
int ret = 0;
......
......@@ -613,7 +613,7 @@ static void btrfsic_dev_state_hashtable_add(
struct btrfsic_dev_state_hashtable *h)
{
const unsigned int hashval =
(((unsigned int)((uintptr_t)ds->bdev)) &
(((unsigned int)((uintptr_t)ds->bdev->bd_dev)) &
(BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
list_add(&ds->collision_resolving_node, h->table + hashval);
......@@ -2803,7 +2803,7 @@ static void __btrfsic_submit_bio(struct bio *bio)
mutex_lock(&btrfsic_mutex);
/* since btrfsic_submit_bio() is also called before
* btrfsic_mount(), this might return NULL */
dev_state = btrfsic_dev_state_lookup(bio_dev(bio));
dev_state = btrfsic_dev_state_lookup(bio_dev(bio) + bio->bi_partno);
if (NULL != dev_state &&
(bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) {
unsigned int i = 0;
......@@ -2913,7 +2913,7 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
state = kvzalloc(sizeof(*state), GFP_KERNEL);
if (!state) {
pr_info("btrfs check-integrity: allocation failed!\n");
return -1;
return -ENOMEM;
}
if (!btrfsic_is_initialized) {
......@@ -2945,7 +2945,7 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
if (NULL == ds) {
pr_info("btrfs check-integrity: kmalloc() failed!\n");
mutex_unlock(&btrfsic_mutex);
return -1;
return -ENOMEM;
}
ds->bdev = device->bdev;
ds->state = state;
......
This diff is collapsed.
......@@ -76,7 +76,7 @@ struct compressed_bio {
void btrfs_init_compress(void);
void btrfs_exit_compress(void);
int btrfs_compress_pages(int type, struct address_space *mapping,
int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
u64 start, struct page **pages,
unsigned long *out_pages,
unsigned long *total_in,
......@@ -95,6 +95,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
unsigned btrfs_compress_str2level(const char *str);
enum btrfs_compression_type {
BTRFS_COMPRESS_NONE = 0,
BTRFS_COMPRESS_ZLIB = 1,
......@@ -124,6 +126,8 @@ struct btrfs_compress_op {
struct page *dest_page,
unsigned long start_byte,
size_t srclen, size_t destlen);
void (*set_level)(struct list_head *ws, unsigned int type);
};
extern const struct btrfs_compress_op btrfs_zlib_compress;
......
......@@ -192,7 +192,7 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
* tree until you end up with a lock on the root. A locked buffer
* is returned, with a reference held.
*/
static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
{
struct extent_buffer *eb;
......@@ -5496,8 +5496,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
goto out;
} else if (left_end_reached) {
if (right_level == 0) {
ret = changed_cb(left_root, right_root,
left_path, right_path,
ret = changed_cb(left_path, right_path,
&right_key,
BTRFS_COMPARE_TREE_DELETED,
ctx);
......@@ -5508,8 +5507,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
continue;
} else if (right_end_reached) {
if (left_level == 0) {
ret = changed_cb(left_root, right_root,
left_path, right_path,
ret = changed_cb(left_path, right_path,
&left_key,
BTRFS_COMPARE_TREE_NEW,
ctx);
......@@ -5523,8 +5521,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
if (left_level == 0 && right_level == 0) {
cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
if (cmp < 0) {
ret = changed_cb(left_root, right_root,
left_path, right_path,
ret = changed_cb(left_path, right_path,
&left_key,
BTRFS_COMPARE_TREE_NEW,
ctx);
......@@ -5532,8 +5529,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
goto out;
advance_left = ADVANCE;
} else if (cmp > 0) {
ret = changed_cb(left_root, right_root,
left_path, right_path,
ret = changed_cb(left_path, right_path,
&right_key,
BTRFS_COMPARE_TREE_DELETED,
ctx);
......@@ -5550,8 +5546,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
result = BTRFS_COMPARE_TREE_CHANGED;
else
result = BTRFS_COMPARE_TREE_SAME;
ret = changed_cb(left_root, right_root,
left_path, right_path,
ret = changed_cb(left_path, right_path,
&left_key, result, ctx);
if (ret < 0)
goto out;
......
......@@ -523,7 +523,7 @@ struct btrfs_caching_control {
};
/* Once caching_thread() finds this much free space, it will wake up waiters. */
#define CACHING_CTL_WAKE_UP (1024 * 1024 * 2)
#define CACHING_CTL_WAKE_UP SZ_2M
struct btrfs_io_ctl {
void *cur, *orig;
......@@ -763,8 +763,6 @@ struct btrfs_fs_info {
* delayed dir index item
*/
struct btrfs_block_rsv global_block_rsv;
/* block reservation for delay allocation */
struct btrfs_block_rsv delalloc_block_rsv;
/* block reservation for metadata operations */
struct btrfs_block_rsv trans_block_rsv;
/* block reservation for chunk tree */
......@@ -790,6 +788,7 @@ struct btrfs_fs_info {
*/
unsigned long pending_changes;
unsigned long compress_type:4;
unsigned int compress_level;
int commit_interval;
/*
* It is a suggestive number, the read side is safe even it gets a
......@@ -878,9 +877,6 @@ struct btrfs_fs_info {
rwlock_t tree_mod_log_lock;
struct rb_root tree_mod_log;
atomic_t nr_async_submits;
atomic_t async_submit_draining;
atomic_t nr_async_bios;
atomic_t async_delalloc_pages;
atomic_t open_ioctl_trans;
......@@ -1100,6 +1096,11 @@ struct btrfs_fs_info {
u32 nodesize;
u32 sectorsize;
u32 stripesize;
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
spinlock_t ref_verify_lock;
struct rb_root block_tree;
#endif
};
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
......@@ -1338,6 +1339,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25)
#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26)
#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
#define BTRFS_MOUNT_REF_VERIFY (1 << 28)
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
#define BTRFS_DEFAULT_MAX_INLINE (2048)
......@@ -2639,7 +2641,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct extent_buffer *buf,
u64 parent, int last_ref);
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 root_objectid, u64 owner,
struct btrfs_root *root, u64 owner,
u64 offset, u64 ram_bytes,
struct btrfs_key *ins);
int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
......@@ -2658,7 +2660,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 flags,
int level, int is_data);
int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
u64 owner, u64 offset);
......@@ -2670,7 +2672,7 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset);
......@@ -2744,6 +2746,8 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
u64 *qgroup_reserved, bool use_global_rsv);
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_space(struct inode *inode,
......@@ -2751,6 +2755,9 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
unsigned short type);
void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv,
unsigned short type);
void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
......@@ -2809,6 +2816,7 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
const struct btrfs_key *new_key);
struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root);
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *key, int lowest_level,
u64 min_trans);
......@@ -2821,9 +2829,7 @@ enum btrfs_compare_tree_result {
BTRFS_COMPARE_TREE_CHANGED,
BTRFS_COMPARE_TREE_SAME,
};
typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root,
struct btrfs_root *right_root,
struct btrfs_path *left_path,
typedef int (*btrfs_changed_cb_t)(struct btrfs_path *left_path,
struct btrfs_path *right_path,
struct btrfs_key *key,
enum btrfs_compare_tree_result result,
......
......@@ -581,36 +581,12 @@ static int btrfs_delayed_inode_reserve_metadata(
struct btrfs_block_rsv *dst_rsv;
u64 num_bytes;
int ret;
bool release = false;
src_rsv = trans->block_rsv;
dst_rsv = &fs_info->delayed_block_rsv;
num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
/*
* If our block_rsv is the delalloc block reserve then check and see if
* we have our extra reservation for updating the inode. If not fall
* through and try to reserve space quickly.
*
* We used to try and steal from the delalloc block rsv or the global
* reserve, but we'd steal a full reservation, which isn't kind. We are
* here through delalloc which means we've likely just cowed down close
* to the leaf that contains the inode, so we would steal less just
* doing the fallback inode update, so if we do end up having to steal
* from the global block rsv we hopefully only steal one or two blocks
* worth which is less likely to hurt us.
*/
if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
spin_lock(&inode->lock);
if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&inode->runtime_flags))
release = true;
else
src_rsv = NULL;
spin_unlock(&inode->lock);
}
/*
* btrfs_dirty_inode will update the inode under btrfs_join_transaction
* which doesn't reserve space for speed. This is a problem since we
......@@ -618,7 +594,7 @@ static int btrfs_delayed_inode_reserve_metadata(
* space.
*
* Now if src_rsv == delalloc_block_rsv we'll let it just steal since
* we're accounted for.
* we always reserve enough to update the inode item.
*/
if (!src_rsv || (!trans->bytes_reserved &&
src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
......@@ -643,32 +619,12 @@ static int btrfs_delayed_inode_reserve_metadata(
}
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
/*
* Migrate only takes a reservation, it doesn't touch the size of the
* block_rsv. This is to simplify people who don't normally have things
* migrated from their block rsv. If they go to release their
* reservation, that will decrease the size as well, so if migrate
* reduced size we'd end up with a negative size. But for the
* delalloc_meta_reserved stuff we will only know to drop 1 reservation,
* but we could in fact do this reserve/migrate dance several times
* between the time we did the original reservation and we'd clean it
* up. So to take care of this, release the space for the meta
* reservation here. I think it may be time for a documentation page on
* how block rsvs. work.
*/
if (!ret) {
trace_btrfs_space_reservation(fs_info, "delayed_inode",
btrfs_ino(inode), num_bytes, 1);
node->bytes_reserved = num_bytes;
}
if (release) {
trace_btrfs_space_reservation(fs_info, "delalloc",
btrfs_ino(inode), num_bytes, 0);
btrfs_block_rsv_release(fs_info, src_rsv, num_bytes);
}
return ret;
}
......
This diff is collapsed.
......@@ -26,18 +26,8 @@
#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
/*
* XXX: Qu: I really hate the design that ref_head and tree/data ref shares the
* same ref_node structure.
* Ref_head is in a higher logic level than tree/data ref, and duplicated
* bytenr/num_bytes in ref_node is really a waste or memory, they should be
* referred from ref_head.
* This gets more disgusting after we use list to store tree/data ref in
* ref_head. Must clean this mess up later.
*/
struct btrfs_delayed_ref_node {
/*data/tree ref use list, stored in ref_head->ref_list. */
struct list_head list;
struct rb_node ref_node;
/*
* If action is BTRFS_ADD_DELAYED_REF, also link this node to
* ref_head->ref_add_list, then we do not need to iterate the
......@@ -91,8 +81,9 @@ struct btrfs_delayed_extent_op {
* reference count modifications we've queued up.
*/
struct btrfs_delayed_ref_head {
struct btrfs_delayed_ref_node node;
u64 bytenr;
u64 num_bytes;
refcount_t refs;
/*
* the mutex is held while running the refs, and it is also
* held when checking the sum of reference modifications.
......@@ -100,7 +91,7 @@ struct btrfs_delayed_ref_head {
struct mutex mutex;
spinlock_t lock;
struct list_head ref_list;
struct rb_root ref_tree;
/* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */
struct list_head ref_add_list;
......@@ -115,6 +106,14 @@ struct btrfs_delayed_ref_head {
*/
int total_ref_mod;
/*
* This is the current outstanding mod references for this bytenr. This
* is used with lookup_extent_info to get an accurate reference count
* for a bytenr, so it is adjusted as delayed refs are run so that any
* on disk reference count + ref_mod is accurate.
*/
int ref_mod;
/*
* For qgroup reserved space freeing.
*
......@@ -234,15 +233,18 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
case BTRFS_SHARED_DATA_REF_KEY:
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
break;
case 0:
kmem_cache_free(btrfs_delayed_ref_head_cachep, ref);
break;
default:
BUG();
}
}
}
static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *head)
{
if (refcount_dec_and_test(&head->refs))
kmem_cache_free(btrfs_delayed_ref_head_cachep, head);
}
int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
......@@ -282,36 +284,18 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq);
/*
* a node might live in a head or a regular ref, this lets you
* test for the proper type to use.
*/
static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node)
{
return node->is_head;
}
/*
* helper functions to cast a node into its container
*/
static inline struct btrfs_delayed_tree_ref *
btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node)
{
WARN_ON(btrfs_delayed_ref_is_head(node));
return container_of(node, struct btrfs_delayed_tree_ref, node);
}
static inline struct btrfs_delayed_data_ref *
btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node)
{
WARN_ON(btrfs_delayed_ref_is_head(node));
return container_of(node, struct btrfs_delayed_data_ref, node);
}
static inline struct btrfs_delayed_ref_head *
btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node)
{
WARN_ON(!btrfs_delayed_ref_is_head(node));
return container_of(node, struct btrfs_delayed_ref_head, node);
}
#endif
This diff is collapsed.
This diff is collapsed.
......@@ -110,7 +110,6 @@ struct extent_page_data {
struct bio *bio;
struct extent_io_tree *tree;
get_extent_t *get_extent;
unsigned long bio_flags;
/* tells writepage not to lock the state bits for this range
* it still does the unlocking
......@@ -2762,8 +2761,8 @@ static int merge_bio(struct extent_io_tree *tree, struct page *page,
*/
static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
struct writeback_control *wbc,
struct page *page, sector_t sector,
size_t size, unsigned long offset,
struct page *page, u64 offset,
size_t size, unsigned long pg_offset,
struct block_device *bdev,
struct bio **bio_ret,
bio_end_io_t end_io_func,
......@@ -2777,6 +2776,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
int contig = 0;
int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
size_t page_size = min_t(size_t, size, PAGE_SIZE);
sector_t sector = offset >> 9;
if (bio_ret && *bio_ret) {
bio = *bio_ret;
......@@ -2787,8 +2787,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
if (prev_bio_flags != bio_flags || !contig ||
force_bio_submit ||
merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
bio_add_page(bio, page, page_size, offset) < page_size) {
merge_bio(tree, page, pg_offset, page_size, bio, bio_flags) ||
bio_add_page(bio, page, page_size, pg_offset) < page_size) {
ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
if (ret < 0) {
*bio_ret = NULL;
......@@ -2802,8 +2802,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
}
}
bio = btrfs_bio_alloc(bdev, (u64)sector << 9);
bio_add_page(bio, page, page_size, offset);
bio = btrfs_bio_alloc(bdev, offset);
bio_add_page(bio, page, page_size, pg_offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
bio->bi_write_hint = page->mapping->host->i_write_hint;
......@@ -2893,7 +2893,6 @@ static int __do_readpage(struct extent_io_tree *tree,
u64 last_byte = i_size_read(inode);
u64 block_start;
u64 cur_end;
sector_t sector;
struct extent_map *em;
struct block_device *bdev;
int ret = 0;
......@@ -2929,6 +2928,7 @@ static int __do_readpage(struct extent_io_tree *tree,
}
while (cur <= end) {
bool force_bio_submit = false;
u64 offset;
if (cur >= last_byte) {
char *userpage;
......@@ -2968,9 +2968,9 @@ static int __do_readpage(struct extent_io_tree *tree,
iosize = ALIGN(iosize, blocksize);
if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
disk_io_size = em->block_len;
sector = em->block_start >> 9;
offset = em->block_start;
} else {
sector = (em->block_start + extent_offset) >> 9;
offset = em->block_start + extent_offset;
disk_io_size = iosize;
}
bdev = em->bdev;
......@@ -3063,8 +3063,8 @@ static int __do_readpage(struct extent_io_tree *tree,
}
ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL,
page, sector, disk_io_size, pg_offset,
bdev, bio,
page, offset, disk_io_size,
pg_offset, bdev, bio,
end_bio_extent_readpage, mirror_num,
*bio_flags,
this_bio_flag,
......@@ -3325,7 +3325,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
u64 extent_offset;
u64 block_start;
u64 iosize;
sector_t sector;
struct extent_map *em;
struct block_device *bdev;
size_t pg_offset = 0;
......@@ -3368,6 +3367,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
while (cur <= end) {
u64 em_end;
u64 offset;
if (cur >= i_size) {
if (tree->ops && tree->ops->writepage_end_io_hook)
......@@ -3389,7 +3389,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
BUG_ON(end < cur);
iosize = min(em_end - cur, end - cur + 1);
iosize = ALIGN(iosize, blocksize);
sector = (em->block_start + extent_offset) >> 9;
offset = em->block_start + extent_offset;
bdev = em->bdev;
block_start = em->block_start;
compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
......@@ -3432,7 +3432,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
}
ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
page, sector, iosize, pg_offset,
page, offset, iosize, pg_offset,
bdev, &epd->bio,
end_bio_extent_writepage,
0, 0, 0, false);
......@@ -3716,7 +3716,6 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
u64 offset = eb->start;
u32 nritems;
unsigned long i, num_pages;
unsigned long bio_flags = 0;
unsigned long start, end;
unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
int ret = 0;
......@@ -3724,8 +3723,6 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
num_pages = num_extent_pages(eb->start, eb->len);
atomic_set(&eb->io_pages, num_pages);
if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
bio_flags = EXTENT_BIO_TREE_LOG;
/* set btree blocks beyond nritems with 0 to avoid stale content. */
nritems = btrfs_header_nritems(eb);
......@@ -3749,11 +3746,10 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
clear_page_dirty_for_io(p);
set_page_writeback(p);
ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
p, offset >> 9, PAGE_SIZE, 0, bdev,
p, offset, PAGE_SIZE, 0, bdev,
&epd->bio,
end_bio_extent_buffer_writepage,
0, epd->bio_flags, bio_flags, false);
epd->bio_flags = bio_flags;
0, 0, 0, false);
if (ret) {
set_btree_ioerr(p);
if (PageWriteback(p))
......@@ -3790,7 +3786,6 @@ int btree_write_cache_pages(struct address_space *mapping,
.tree = tree,
.extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
.bio_flags = 0,
};
int ret = 0;
int done = 0;
......@@ -4063,7 +4058,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
if (epd->bio) {
int ret;
ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
ret = submit_one_bio(epd->bio, 0, 0);
BUG_ON(ret < 0); /* -ENOMEM */
epd->bio = NULL;
}
......@@ -4086,7 +4081,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.get_extent = get_extent,
.extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
.bio_flags = 0,
};
ret = __extent_writepage(page, wbc, &epd);
......@@ -4111,7 +4105,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
.get_extent = get_extent,
.extent_locked = 1,
.sync_io = mode == WB_SYNC_ALL,
.bio_flags = 0,
};
struct writeback_control wbc_writepages = {
.sync_mode = mode,
......@@ -4151,7 +4144,6 @@ int extent_writepages(struct extent_io_tree *tree,
.get_extent = get_extent,
.extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
.bio_flags = 0,
};
ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd,
......
......@@ -34,7 +34,6 @@
* type for this bio
*/
#define EXTENT_BIO_COMPRESSED 1
#define EXTENT_BIO_TREE_LOG 2
#define EXTENT_BIO_FLAG_SHIFT 16
/* these are bit numbers for test/set bit */
......
......@@ -856,7 +856,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
if (update_refs && disk_bytenr > 0) {
ret = btrfs_inc_extent_ref(trans, fs_info,
ret = btrfs_inc_extent_ref(trans, root,
disk_bytenr, num_bytes, 0,
root->root_key.objectid,
new_key.objectid,
......@@ -940,7 +940,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
extent_end = ALIGN(extent_end,
fs_info->sectorsize);
} else if (update_refs && disk_bytenr > 0) {
ret = btrfs_free_extent(trans, fs_info,
ret = btrfs_free_extent(trans, root,
disk_bytenr, num_bytes, 0,
root->root_key.objectid,
key.objectid, key.offset -
......@@ -1234,7 +1234,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
extent_end - split);
btrfs_mark_buffer_dirty(leaf);
ret = btrfs_inc_extent_ref(trans, fs_info, bytenr, num_bytes,
ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
ino, orig_offset);
if (ret) {
......@@ -1268,7 +1268,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
extent_end = other_end;
del_slot = path->slots[0] + 1;
del_nr++;
ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes,
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
ino, orig_offset);
if (ret) {
......@@ -1288,7 +1288,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
key.offset = other_start;
del_slot = path->slots[0];
del_nr++;
ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes,
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
ino, orig_offset);
if (ret) {
......@@ -1590,7 +1590,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
int ret = 0;
bool only_release_metadata = false;
bool force_page_uptodate = false;
bool need_unlock;
nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
PAGE_SIZE / (sizeof(struct page *)));
......@@ -1613,6 +1612,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
size_t copied;
size_t dirty_sectors;
size_t num_sectors;
int extents_locked;
WARN_ON(num_pages > nrptrs);
......@@ -1656,6 +1656,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
}
}
WARN_ON(reserve_bytes == 0);
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
reserve_bytes);
if (ret) {
......@@ -1669,7 +1670,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
}
release_bytes = reserve_bytes;
need_unlock = false;
again:
/*
* This is going to setup the pages array with the number of
......@@ -1679,19 +1679,23 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
ret = prepare_pages(inode, pages, num_pages,
pos, write_bytes,
force_page_uptodate);
if (ret)
if (ret) {
btrfs_delalloc_release_extents(BTRFS_I(inode),
reserve_bytes);
break;
}
ret = lock_and_cleanup_extent_if_need(BTRFS_I(inode), pages,
extents_locked = lock_and_cleanup_extent_if_need(
BTRFS_I(inode), pages,
num_pages, pos, write_bytes, &lockstart,
&lockend, &cached_state);
if (ret < 0) {
if (ret == -EAGAIN)
if (extents_locked < 0) {
if (extents_locked == -EAGAIN)
goto again;
btrfs_delalloc_release_extents(BTRFS_I(inode),
reserve_bytes);
ret = extents_locked;
break;
} else if (ret > 0) {
need_unlock = true;
ret = 0;
}
copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
......@@ -1718,23 +1722,10 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
PAGE_SIZE);
}
/*
* If we had a short copy we need to release the excess delaloc
* bytes we reserved. We need to increment outstanding_extents
* because btrfs_delalloc_release_space and
* btrfs_delalloc_release_metadata will decrement it, but
* we still have an outstanding extent for the chunk we actually
* managed to copy.
*/
if (num_sectors > dirty_sectors) {
/* release everything except the sectors we dirtied */
release_bytes -= dirty_sectors <<
fs_info->sb->s_blocksize_bits;
if (copied > 0) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
}
if (only_release_metadata) {
btrfs_delalloc_release_metadata(BTRFS_I(inode),
release_bytes);
......@@ -1756,10 +1747,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
if (copied > 0)
ret = btrfs_dirty_pages(inode, pages, dirty_pages,
pos, copied, NULL);
if (need_unlock)
if (extents_locked)
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
lockstart, lockend, &cached_state,
GFP_NOFS);
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
if (ret) {
btrfs_drop_pages(pages, num_pages);
break;
......@@ -2046,7 +2038,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
struct btrfs_trans_handle *trans;
struct btrfs_log_ctx ctx;
int ret = 0, err;
bool full_sync = 0;
bool full_sync = false;
u64 len;
/*
......
......@@ -1286,12 +1286,8 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
u64 start, end;
int ret;
start = block_group->key.objectid;
end = block_group->key.objectid + block_group->key.offset;
block_group->needs_free_space = 0;
ret = add_new_free_space_info(trans, fs_info, block_group, path);
......
......@@ -500,11 +500,12 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
prealloc, prealloc, &alloc_hint);
if (ret) {
btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc);
btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
goto out_put;
}
ret = btrfs_write_out_ino_cache(root, trans, path, inode);
btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
out_put:
iput(inode);
out_release:
......
This diff is collapsed.
......@@ -86,6 +86,19 @@ struct btrfs_ioctl_received_subvol_args_32 {
struct btrfs_ioctl_received_subvol_args_32)
#endif
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_send_args_32 {
__s64 send_fd; /* in */
__u64 clone_sources_count; /* in */
compat_uptr_t clone_sources; /* in */
__u64 parent_root; /* in */
__u64 flags; /* in */
__u64 reserved[4]; /* in */
} __attribute__ ((__packed__));
#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
struct btrfs_ioctl_send_args_32)
#endif
static int btrfs_clone(struct inode *src, struct inode *inode,
u64 off, u64 olen, u64 olen_aligned, u64 destoff,
......@@ -609,23 +622,6 @@ static noinline int create_subvol(struct inode *dir,
return ret;
}
static void btrfs_wait_for_no_snapshotting_writes(struct btrfs_root *root)
{
s64 writers;
DEFINE_WAIT(wait);
do {
prepare_to_wait(&root->subv_writers->wait, &wait,
TASK_UNINTERRUPTIBLE);
writers = percpu_counter_sum(&root->subv_writers->counter);
if (writers)
schedule();
finish_wait(&root->subv_writers->wait, &wait);
} while (writers);
}
static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct dentry *dentry,
u64 *async_transid, bool readonly,
......@@ -654,7 +650,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
atomic_inc(&root->will_be_snapshotted);
smp_mb__after_atomic();
btrfs_wait_for_no_snapshotting_writes(root);
/* wait for no snapshot writes */
wait_event(root->subv_writers->wait,
percpu_counter_sum(&root->subv_writers->counter) == 0);
ret = btrfs_start_delalloc_inodes(root, 0);
if (ret)
......@@ -1219,6 +1217,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
unlock_page(pages[i]);
put_page(pages[i]);
}
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
extent_changeset_free(data_reserved);
return i_done;
out:
......@@ -1229,6 +1228,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
btrfs_delalloc_release_space(inode, data_reserved,
start_index << PAGE_SHIFT,
page_cnt << PAGE_SHIFT);
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
extent_changeset_free(data_reserved);
return ret;
......@@ -1420,21 +1420,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
filemap_flush(inode->i_mapping);
}
if (do_compress) {
/* the filemap_flush will queue IO into the worker threads, but
* we have to make sure the IO is actually started and that
* ordered extents get created before we return
*/
atomic_inc(&fs_info->async_submit_draining);
while (atomic_read(&fs_info->nr_async_submits) ||
atomic_read(&fs_info->async_delalloc_pages)) {
wait_event(fs_info->async_submit_wait,
(atomic_read(&fs_info->nr_async_submits) == 0 &&
atomic_read(&fs_info->async_delalloc_pages) == 0));
}
atomic_dec(&fs_info->async_submit_draining);
}
if (range->compress_type == BTRFS_COMPRESS_LZO) {
btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
} else if (range->compress_type == BTRFS_COMPRESS_ZSTD) {
......@@ -1842,8 +1827,13 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
ret = btrfs_update_root(trans, fs_info->tree_root,
&root->root_key, &root->root_item);
if (ret < 0) {
btrfs_end_transaction(trans);
goto out_reset;
}
ret = btrfs_commit_transaction(trans);
btrfs_commit_transaction(trans);
out_reset:
if (ret)
btrfs_set_root_flags(&root->root_item, root_flags);
......@@ -2179,7 +2169,7 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
inode = file_inode(file);
ret = search_ioctl(inode, &args.key, &buf_size,
(char *)(&uarg->buf[0]));
(char __user *)(&uarg->buf[0]));
if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
ret = -EFAULT;
else if (ret == -EOVERFLOW &&
......@@ -3706,7 +3696,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
if (disko) {
inode_add_bytes(inode, datal);
ret = btrfs_inc_extent_ref(trans,
fs_info,
root,
disko, diskl, 0,
root->root_key.objectid,
btrfs_ino(BTRFS_I(inode)),
......@@ -4129,10 +4119,12 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_space_info *dest_orig;
struct btrfs_ioctl_space_info __user *user_dest;
struct btrfs_space_info *info;
u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
BTRFS_BLOCK_GROUP_SYSTEM,
BTRFS_BLOCK_GROUP_METADATA,
BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
static const u64 types[] = {
BTRFS_BLOCK_GROUP_DATA,
BTRFS_BLOCK_GROUP_SYSTEM,
BTRFS_BLOCK_GROUP_METADATA,
BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA
};
int num_types = 4;
int alloc_size;
int ret = 0;
......@@ -4504,8 +4496,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
ipath->fspath->val[i] = rel_ptr;
}
ret = copy_to_user((void *)(unsigned long)ipa->fspath,
(void *)(unsigned long)ipath->fspath, size);
ret = copy_to_user((void __user *)(unsigned long)ipa->fspath,
ipath->fspath, size);
if (ret) {
ret = -EFAULT;
goto out;
......@@ -4540,13 +4532,14 @@ static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
}
static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
void __user *arg)
void __user *arg, int version)
{
int ret = 0;
int size;
struct btrfs_ioctl_logical_ino_args *loi;
struct btrfs_data_container *inodes = NULL;
struct btrfs_path *path = NULL;
bool ignore_offset;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
......@@ -4555,13 +4548,30 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
if (IS_ERR(loi))
return PTR_ERR(loi);
if (version == 1) {
ignore_offset = false;
size = min_t(u32, loi->size, SZ_64K);
} else {
/* All reserved bits must be 0 for now */
if (memchr_inv(loi->reserved, 0, sizeof(loi->reserved))) {
ret = -EINVAL;
goto out_loi;
}
/* Only accept flags we have defined so far */
if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) {
ret = -EINVAL;
goto out_loi;
}
ignore_offset = loi->flags & BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET;
size = min_t(u32, loi->size, SZ_16M);
}
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
size = min_t(u32, loi->size, SZ_64K);
inodes = init_data_container(size);
if (IS_ERR(inodes)) {
ret = PTR_ERR(inodes);
......@@ -4570,20 +4580,21 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
}
ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
build_ino_list, inodes);
build_ino_list, inodes, ignore_offset);
if (ret == -EINVAL)
ret = -ENOENT;
if (ret < 0)
goto out;
ret = copy_to_user((void *)(unsigned long)loi->inodes,
(void *)(unsigned long)inodes, size);
ret = copy_to_user((void __user *)(unsigned long)loi->inodes, inodes,
size);
if (ret)
ret = -EFAULT;
out:
btrfs_free_path(path);
kvfree(inodes);
out_loi:
kfree(loi);
return ret;
......@@ -5160,15 +5171,11 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
root->root_key.objectid);
if (ret < 0 && ret != -EEXIST) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto out;
}
}
ret = btrfs_commit_transaction(trans);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
}
out:
up_write(&fs_info->subvol_sem);
mnt_drop_write_file(file);
......@@ -5490,6 +5497,41 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
return ret;
}
static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
{
struct btrfs_ioctl_send_args *arg;
int ret;
if (compat) {
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_send_args_32 args32;
ret = copy_from_user(&args32, argp, sizeof(args32));
if (ret)
return -EFAULT;
arg = kzalloc(sizeof(*arg), GFP_KERNEL);
if (!arg)
return -ENOMEM;
arg->send_fd = args32.send_fd;
arg->clone_sources_count = args32.clone_sources_count;
arg->clone_sources = compat_ptr(args32.clone_sources);
arg->parent_root = args32.parent_root;
arg->flags = args32.flags;
memcpy(arg->reserved, args32.reserved,
sizeof(args32.reserved));
#else
return -ENOTTY;
#endif
} else {
arg = memdup_user(argp, sizeof(*arg));
if (IS_ERR(arg))
return PTR_ERR(arg);
}
ret = btrfs_ioctl_send(file, arg);
kfree(arg);
return ret;
}
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
......@@ -5554,7 +5596,9 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_INO_PATHS:
return btrfs_ioctl_ino_to_path(root, argp);
case BTRFS_IOC_LOGICAL_INO:
return btrfs_ioctl_logical_to_ino(fs_info, argp);
return btrfs_ioctl_logical_to_ino(fs_info, argp, 1);
case BTRFS_IOC_LOGICAL_INO_V2:
return btrfs_ioctl_logical_to_ino(fs_info, argp, 2);
case BTRFS_IOC_SPACE_INFO:
return btrfs_ioctl_space_info(fs_info, argp);
case BTRFS_IOC_SYNC: {
......@@ -5595,7 +5639,11 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_set_received_subvol_32(file, argp);
#endif
case BTRFS_IOC_SEND:
return btrfs_ioctl_send(file, argp);
return _btrfs_ioctl_send(file, argp, false);
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
case BTRFS_IOC_SEND_32:
return _btrfs_ioctl_send(file, argp, true);
#endif
case BTRFS_IOC_GET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(fs_info, argp);
case BTRFS_IOC_QUOTA_CTL:
......
......@@ -430,10 +430,15 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
return ret;
}
static void lzo_set_level(struct list_head *ws, unsigned int type)
{
}
const struct btrfs_compress_op btrfs_lzo_compress = {
.alloc_workspace = lzo_alloc_workspace,
.free_workspace = lzo_free_workspace,
.compress_pages = lzo_compress_pages,
.decompress_bio = lzo_decompress_bio,
.decompress = lzo_decompress,
.set_level = lzo_set_level,
};
......@@ -242,6 +242,15 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
}
spin_unlock(&root->ordered_extent_lock);
/*
* We don't need the count_max_extents here, we can assume that all of
* that work has been done at higher layers, so this is truly the
* smallest the extent is going to get.
*/
spin_lock(&BTRFS_I(inode)->lock);
btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
spin_unlock(&BTRFS_I(inode)->lock);
return 0;
}
......@@ -591,11 +600,19 @@ void btrfs_remove_ordered_extent(struct inode *inode,
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ordered_inode_tree *tree;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
struct btrfs_root *root = btrfs_inode->root;
struct rb_node *node;
bool dec_pending_ordered = false;
tree = &BTRFS_I(inode)->ordered_tree;
/* This is paired with btrfs_add_ordered_extent. */
spin_lock(&btrfs_inode->lock);
btrfs_mod_outstanding_extents(btrfs_inode, -1);
spin_unlock(&btrfs_inode->lock);
if (root != fs_info->tree_root)
btrfs_delalloc_release_metadata(btrfs_inode, entry->len);
tree = &btrfs_inode->ordered_tree;
spin_lock_irq(&tree->lock);
node = &entry->rb_node;
rb_erase(node, &tree->tree);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -226,10 +226,6 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
struct btrfs_root *root;
int err = 0;
int ret;
bool can_recover = true;
if (sb_rdonly(fs_info->sb))
can_recover = false;
path = btrfs_alloc_path();
if (!path)
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment