Commit 27eb427b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
 "We have a lot of subvolume quota improvements in here, along with big
  piles of cleanups from Dave Sterba and Anand Jain and others.

  Josef pitched in a batch of allocator fixes based on production use
  here at FB.  We found that mount -o ssd_spread greatly improved our
  performance on hardware raid5/6, but it exposed some CPU bottlenecks
  in the allocator.  These patches make a huge difference"

* 'for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (100 commits)
  Btrfs: fix hole punching when using the no-holes feature
  Btrfs: find_free_extent: Do not erroneously skip LOOP_CACHING_WAIT state
  btrfs: Fix a data space underflow warning
  btrfs: qgroup: Fix a rebase bug which will cause qgroup double free
  btrfs: qgroup: Fix a race in delayed_ref which leads to abort trans
  btrfs: clear PF_NOFREEZE in cleaner_kthread()
  btrfs: qgroup: Don't copy extent buffer to do qgroup rescan
  btrfs: add balance filters limits, stripes and usage to supported mask
  btrfs: extend balance filter usage to take minimum and maximum
  btrfs: add balance filter for stripes
  btrfs: extend balance filter limit to take minimum and maximum
  btrfs: fix use after free iterating extrefs
  btrfs: check unsupported filters in balance arguments
  Btrfs: fix regression running delayed references when using qgroups
  Btrfs: fix regression when running delayed references
  Btrfs: don't do extra bitmap search in one bit case
  Btrfs: keep track of largest extent in bitmaps
  Btrfs: don't keep trying to build clusters if we are fragmented
  Btrfs: cut down on loops through the allocator
  Btrfs: don't continue setting up space cache when enospc
  ...
parents 71300980 2959a32a
......@@ -362,6 +362,12 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
goto out;
}
if (btrfs_test_is_dummy_root(root)) {
srcu_read_unlock(&fs_info->subvol_srcu, index);
ret = -ENOENT;
goto out;
}
if (path->search_commit_root)
root_level = btrfs_header_level(root->commit_root);
else if (time_seq == (u64)-1)
......
......@@ -667,7 +667,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
if (NULL == selected_super) {
printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
return -1;
return -ENOMEM;
}
list_for_each_entry(device, dev_head, dev_list) {
......@@ -845,8 +845,8 @@ static int btrfsic_process_superblock_dev_mirror(
superblock_tmp->never_written = 0;
superblock_tmp->mirror_num = 1 + superblock_mirror_num;
if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
" @%llu (%s/%llu/%d)\n",
btrfs_info_in_rcu(device->dev_root->fs_info,
"new initial S-block (bdev %p, %s) @%llu (%s/%llu/%d)",
superblock_bdev,
rcu_str_deref(device->name), dev_bytenr,
dev_state->name, dev_bytenr,
......@@ -1660,7 +1660,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
sizeof(*block_ctx->pagev)) *
num_pages, GFP_NOFS);
if (!block_ctx->mem_to_free)
return -1;
return -ENOMEM;
block_ctx->datav = block_ctx->mem_to_free;
block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
for (i = 0; i < num_pages; i++) {
......
......@@ -745,11 +745,13 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
return ret;
}
static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES];
static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
static struct {
struct list_head idle_ws;
spinlock_t ws_lock;
int num_ws;
atomic_t alloc_ws;
wait_queue_head_t ws_wait;
} btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
static const struct btrfs_compress_op * const btrfs_compress_op[] = {
&btrfs_zlib_compress,
......@@ -761,10 +763,10 @@ void __init btrfs_init_compress(void)
int i;
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
INIT_LIST_HEAD(&comp_idle_workspace[i]);
spin_lock_init(&comp_workspace_lock[i]);
atomic_set(&comp_alloc_workspace[i], 0);
init_waitqueue_head(&comp_workspace_wait[i]);
INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
spin_lock_init(&btrfs_comp_ws[i].ws_lock);
atomic_set(&btrfs_comp_ws[i].alloc_ws, 0);
init_waitqueue_head(&btrfs_comp_ws[i].ws_wait);
}
}
......@@ -778,38 +780,38 @@ static struct list_head *find_workspace(int type)
int cpus = num_online_cpus();
int idx = type - 1;
struct list_head *idle_workspace = &comp_idle_workspace[idx];
spinlock_t *workspace_lock = &comp_workspace_lock[idx];
atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
int *num_workspace = &comp_num_workspace[idx];
struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
atomic_t *alloc_ws = &btrfs_comp_ws[idx].alloc_ws;
wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait;
int *num_ws = &btrfs_comp_ws[idx].num_ws;
again:
spin_lock(workspace_lock);
if (!list_empty(idle_workspace)) {
workspace = idle_workspace->next;
spin_lock(ws_lock);
if (!list_empty(idle_ws)) {
workspace = idle_ws->next;
list_del(workspace);
(*num_workspace)--;
spin_unlock(workspace_lock);
(*num_ws)--;
spin_unlock(ws_lock);
return workspace;
}
if (atomic_read(alloc_workspace) > cpus) {
if (atomic_read(alloc_ws) > cpus) {
DEFINE_WAIT(wait);
spin_unlock(workspace_lock);
prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
if (atomic_read(alloc_workspace) > cpus && !*num_workspace)
spin_unlock(ws_lock);
prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE);
if (atomic_read(alloc_ws) > cpus && !*num_ws)
schedule();
finish_wait(workspace_wait, &wait);
finish_wait(ws_wait, &wait);
goto again;
}
atomic_inc(alloc_workspace);
spin_unlock(workspace_lock);
atomic_inc(alloc_ws);
spin_unlock(ws_lock);
workspace = btrfs_compress_op[idx]->alloc_workspace();
if (IS_ERR(workspace)) {
atomic_dec(alloc_workspace);
wake_up(workspace_wait);
atomic_dec(alloc_ws);
wake_up(ws_wait);
}
return workspace;
}
......@@ -821,27 +823,30 @@ static struct list_head *find_workspace(int type)
static void free_workspace(int type, struct list_head *workspace)
{
int idx = type - 1;
struct list_head *idle_workspace = &comp_idle_workspace[idx];
spinlock_t *workspace_lock = &comp_workspace_lock[idx];
atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
int *num_workspace = &comp_num_workspace[idx];
spin_lock(workspace_lock);
if (*num_workspace < num_online_cpus()) {
list_add(workspace, idle_workspace);
(*num_workspace)++;
spin_unlock(workspace_lock);
struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
atomic_t *alloc_ws = &btrfs_comp_ws[idx].alloc_ws;
wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait;
int *num_ws = &btrfs_comp_ws[idx].num_ws;
spin_lock(ws_lock);
if (*num_ws < num_online_cpus()) {
list_add(workspace, idle_ws);
(*num_ws)++;
spin_unlock(ws_lock);
goto wake;
}
spin_unlock(workspace_lock);
spin_unlock(ws_lock);
btrfs_compress_op[idx]->free_workspace(workspace);
atomic_dec(alloc_workspace);
atomic_dec(alloc_ws);
wake:
/*
* Make sure counter is updated before we wake up waiters.
*/
smp_mb();
if (waitqueue_active(workspace_wait))
wake_up(workspace_wait);
if (waitqueue_active(ws_wait))
wake_up(ws_wait);
}
/*
......@@ -853,11 +858,11 @@ static void free_workspaces(void)
int i;
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
while (!list_empty(&comp_idle_workspace[i])) {
workspace = comp_idle_workspace[i].next;
while (!list_empty(&btrfs_comp_ws[i].idle_ws)) {
workspace = btrfs_comp_ws[i].idle_ws.next;
list_del(workspace);
btrfs_compress_op[i]->free_workspace(workspace);
atomic_dec(&comp_alloc_workspace[i]);
atomic_dec(&btrfs_comp_ws[i].alloc_ws);
}
}
}
......
......@@ -1011,7 +1011,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
return ret;
if (refs == 0) {
ret = -EROFS;
btrfs_std_error(root->fs_info, ret);
btrfs_std_error(root->fs_info, ret, NULL);
return ret;
}
} else {
......@@ -1927,7 +1927,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
child = read_node_slot(root, mid, 0);
if (!child) {
ret = -EROFS;
btrfs_std_error(root->fs_info, ret);
btrfs_std_error(root->fs_info, ret, NULL);
goto enospc;
}
......@@ -2030,7 +2030,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
*/
if (!left) {
ret = -EROFS;
btrfs_std_error(root->fs_info, ret);
btrfs_std_error(root->fs_info, ret, NULL);
goto enospc;
}
wret = balance_node_right(trans, root, mid, left);
......@@ -4940,8 +4940,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
{
struct extent_buffer *leaf;
struct btrfs_item *item;
int last_off;
int dsize = 0;
u32 last_off;
u32 dsize = 0;
int ret = 0;
int wret;
int i;
......
This diff is collapsed.
......@@ -463,6 +463,10 @@ static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
static void finish_one_item(struct btrfs_delayed_root *delayed_root)
{
int seq = atomic_inc_return(&delayed_root->items_seq);
/*
* atomic_dec_return implies a barrier for waitqueue_active
*/
if ((atomic_dec_return(&delayed_root->items) <
BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
waitqueue_active(&delayed_root->wait))
......
This diff is collapsed.
......@@ -68,7 +68,6 @@ struct btrfs_delayed_ref_node {
unsigned int action:8;
unsigned int type:8;
unsigned int no_quota:1;
/* is this node still in the rbtree? */
unsigned int is_head:1;
unsigned int in_tree:1;
......@@ -112,6 +111,17 @@ struct btrfs_delayed_ref_head {
*/
int total_ref_mod;
/*
* For qgroup reserved space freeing.
*
* ref_root and reserved will be recorded after
* BTRFS_ADD_DELAYED_EXTENT is called.
* And will be used to free reserved qgroup space at
* run_delayed_refs() time.
*/
u64 qgroup_ref_root;
u64 qgroup_reserved;
/*
* when a new extent is allocated, it is just reserved in memory
* The actual extent isn't inserted into the extent allocation tree
......@@ -233,15 +243,16 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op,
int no_quota);
struct btrfs_delayed_extent_op *extent_op);
int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, int action,
struct btrfs_delayed_extent_op *extent_op,
int no_quota);
u64 owner, u64 offset, u64 reserved, int action,
struct btrfs_delayed_extent_op *extent_op);
int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 ref_root, u64 bytenr, u64 num_bytes);
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
......
......@@ -327,19 +327,6 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
args->start.tgtdev_name[0] == '\0')
return -EINVAL;
/*
* Here we commit the transaction to make sure commit_total_bytes
* of all the devices are updated.
*/
trans = btrfs_attach_transaction(root);
if (!IS_ERR(trans)) {
ret = btrfs_commit_transaction(trans, root);
if (ret)
return ret;
} else if (PTR_ERR(trans) != -ENOENT) {
return PTR_ERR(trans);
}
/* the disk copy procedure reuses the scrub code */
mutex_lock(&fs_info->volume_mutex);
ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid,
......@@ -356,6 +343,19 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
if (ret)
return ret;
/*
* Here we commit the transaction to make sure commit_total_bytes
* of all the devices are updated.
*/
trans = btrfs_attach_transaction(root);
if (!IS_ERR(trans)) {
ret = btrfs_commit_transaction(trans, root);
if (ret)
return ret;
} else if (PTR_ERR(trans) != -ENOENT) {
return PTR_ERR(trans);
}
btrfs_dev_replace_lock(dev_replace);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
......@@ -375,12 +375,8 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
WARN_ON(!tgt_device);
dev_replace->tgtdev = tgt_device;
ret = btrfs_kobj_add_device(tgt_device->fs_devices, tgt_device);
if (ret)
btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret);
printk_in_rcu(KERN_INFO
"BTRFS: dev_replace from %s (devid %llu) to %s started\n",
btrfs_info_in_rcu(root->fs_info,
"dev_replace from %s (devid %llu) to %s started",
src_device->missing ? "<missing disk>" :
rcu_str_deref(src_device->name),
src_device->devid,
......@@ -401,6 +397,10 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
btrfs_dev_replace_unlock(dev_replace);
ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
if (ret)
btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret);
btrfs_wait_ordered_roots(root->fs_info, -1);
/* force writing the updated state information to disk */
......@@ -454,8 +454,7 @@ static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info)
static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info)
{
clear_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state);
if (waitqueue_active(&fs_info->replace_wait))
wake_up(&fs_info->replace_wait);
wake_up(&fs_info->replace_wait);
}
static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
......@@ -523,8 +522,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
src_device,
tgt_device);
} else {
printk_in_rcu(KERN_ERR
"BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n",
btrfs_err_in_rcu(root->fs_info,
"btrfs_scrub_dev(%s, %llu, %s) failed %d",
src_device->missing ? "<missing disk>" :
rcu_str_deref(src_device->name),
src_device->devid,
......@@ -540,8 +539,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
return scrub_ret;
}
printk_in_rcu(KERN_INFO
"BTRFS: dev_replace from %s (devid %llu) to %s finished\n",
btrfs_info_in_rcu(root->fs_info,
"dev_replace from %s (devid %llu) to %s finished",
src_device->missing ? "<missing disk>" :
rcu_str_deref(src_device->name),
src_device->devid,
......@@ -586,7 +585,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
mutex_unlock(&uuid_mutex);
/* replace the sysfs entry */
btrfs_kobj_rm_device(fs_info->fs_devices, src_device);
btrfs_sysfs_rm_device_link(fs_info->fs_devices, src_device);
btrfs_rm_dev_replace_free_srcdev(fs_info, src_device);
/* write back the superblocks */
......@@ -809,8 +808,8 @@ static int btrfs_dev_replace_kthread(void *data)
progress = status_args->status.progress_1000;
kfree(status_args);
progress = div_u64(progress, 10);
printk_in_rcu(KERN_INFO
"BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n",
btrfs_info_in_rcu(fs_info,
"continuing dev_replace from %s (devid %llu) to %s @%u%%",
dev_replace->srcdev->missing ? "<missing disk>" :
rcu_str_deref(dev_replace->srcdev->name),
dev_replace->srcdev->devid,
......
This diff is collapsed.
......@@ -60,6 +60,8 @@ void close_ctree(struct btrfs_root *root);
int write_ctree_super(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int max_mirrors);
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
struct buffer_head **bh_ret);
int btrfs_commit_super(struct btrfs_root *root);
struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
u64 bytenr);
......
This diff is collapsed.
This diff is collapsed.
......@@ -2,6 +2,7 @@
#define __EXTENTIO__
#include <linux/rbtree.h>
#include "ulist.h"
/* bits for the extent state */
#define EXTENT_DIRTY (1U << 0)
......@@ -18,6 +19,7 @@
#define EXTENT_NEED_WAIT (1U << 13)
#define EXTENT_DAMAGED (1U << 14)
#define EXTENT_NORESERVE (1U << 15)
#define EXTENT_QGROUP_RESERVED (1U << 16)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
......@@ -161,6 +163,17 @@ struct extent_buffer {
#endif
};
/*
* Structure to record how many bytes and which ranges are set/cleared
*/
struct extent_changeset {
/* How many bytes are set/cleared in this operation */
u64 bytes_changed;
/* Changed ranges */
struct ulist *range_changed;
};
static inline void extent_set_compress_type(unsigned long *bio_flags,
int compress_type)
{
......@@ -210,11 +223,17 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state *cached_state);
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, gfp_t mask);
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, gfp_t mask,
struct extent_changeset *changeset);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
struct extent_state **cached, gfp_t mask);
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, gfp_t mask);
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, gfp_t mask,
struct extent_changeset *changeset);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask);
......
This diff is collapsed.
......@@ -450,9 +450,9 @@ static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
gen = io_ctl->cur;
if (le64_to_cpu(*gen) != generation) {
printk_ratelimited(KERN_ERR "BTRFS: space cache generation "
"(%Lu) does not match inode (%Lu)\n", *gen,
generation);
btrfs_err_rl(io_ctl->root->fs_info,
"space cache generation (%llu) does not match inode (%llu)",
*gen, generation);
io_ctl_unmap_page(io_ctl);
return -EIO;
}
......@@ -506,8 +506,8 @@ static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
PAGE_CACHE_SIZE - offset);
btrfs_csum_final(crc, (char *)&crc);
if (val != crc) {
printk_ratelimited(KERN_ERR "BTRFS: csum mismatch on free "
"space cache\n");
btrfs_err_rl(io_ctl->root->fs_info,
"csum mismatch on free space cache");
io_ctl_unmap_page(io_ctl);
return -EIO;
}
......@@ -1215,7 +1215,7 @@ int btrfs_wait_cache_io(struct btrfs_root *root,
* @offset - the offset for the key we'll insert
*
* This function writes out a free space cache struct to disk for quick recovery
* on mount. This will return 0 if it was successfull in writing the cache out,
* on mount. This will return 0 if it was successful in writing the cache out,
* or an errno if it was not.
*/
static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
......@@ -1730,7 +1730,7 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
*/
static int search_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *bitmap_info, u64 *offset,
u64 *bytes)
u64 *bytes, bool for_alloc)
{
unsigned long found_bits = 0;
unsigned long max_bits = 0;
......@@ -1738,11 +1738,26 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
unsigned long next_zero;
unsigned long extent_bits;
/*
* Skip searching the bitmap if we don't have a contiguous section that
* is large enough for this allocation.
*/
if (for_alloc &&
bitmap_info->max_extent_size &&
bitmap_info->max_extent_size < *bytes) {
*bytes = bitmap_info->max_extent_size;
return -1;
}
i = offset_to_bit(bitmap_info->offset, ctl->unit,
max_t(u64, *offset, bitmap_info->offset));
bits = bytes_to_bits(*bytes, ctl->unit);
for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
if (for_alloc && bits == 1) {
found_bits = 1;
break;
}
next_zero = find_next_zero_bit(bitmap_info->bitmap,
BITS_PER_BITMAP, i);
extent_bits = next_zero - i;
......@@ -1762,6 +1777,7 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
}
*bytes = (u64)(max_bits) * ctl->unit;
bitmap_info->max_extent_size = *bytes;
return -1;
}
......@@ -1813,7 +1829,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
if (entry->bitmap) {
u64 size = *bytes;
ret = search_bitmap(ctl, entry, &tmp, &size);
ret = search_bitmap(ctl, entry, &tmp, &size, true);
if (!ret) {
*offset = tmp;
*bytes = size;
......@@ -1874,7 +1890,8 @@ static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
search_start = *offset;
search_bytes = ctl->unit;
search_bytes = min(search_bytes, end - search_start + 1);
ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes,
false);
if (ret < 0 || search_start != *offset)
return -EINVAL;
......@@ -1919,7 +1936,7 @@ static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
search_start = *offset;
search_bytes = ctl->unit;
ret = search_bitmap(ctl, bitmap_info, &search_start,
&search_bytes);
&search_bytes, false);
if (ret < 0 || search_start != *offset)
return -EAGAIN;
......@@ -1943,6 +1960,12 @@ static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
bitmap_set_bits(ctl, info, offset, bytes_to_set);
/*
* We set some bytes, we have no idea what the max extent size is
* anymore.
*/
info->max_extent_size = 0;
return bytes_to_set;
}
......@@ -1951,12 +1974,19 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info)
{
struct btrfs_block_group_cache *block_group = ctl->private;
bool forced = false;
#ifdef CONFIG_BTRFS_DEBUG
if (btrfs_should_fragment_free_space(block_group->fs_info->extent_root,
block_group))
forced = true;
#endif
/*
* If we are below the extents threshold then we can add this as an
* extent, and don't have to deal with the bitmap
*/
if (ctl->free_extents < ctl->extents_thresh) {
if (!forced && ctl->free_extents < ctl->extents_thresh) {
/*
* If this block group has some small extents we don't want to
* use up all of our free slots in the cache with them, we want
......@@ -2661,7 +2691,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
search_start = min_start;
search_bytes = bytes;
err = search_bitmap(ctl, entry, &search_start, &search_bytes);
err = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
if (err) {
if (search_bytes > *max_extent_size)
*max_extent_size = search_bytes;
......@@ -2775,6 +2805,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
unsigned long want_bits;
unsigned long min_bits;
unsigned long found_bits;
unsigned long max_bits = 0;
unsigned long start = 0;
unsigned long total_found = 0;
int ret;
......@@ -2784,6 +2815,13 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
want_bits = bytes_to_bits(bytes, ctl->unit);
min_bits = bytes_to_bits(min_bytes, ctl->unit);
/*
* Don't bother looking for a cluster in this bitmap if it's heavily
* fragmented.
*/
if (entry->max_extent_size &&
entry->max_extent_size < cont1_bytes)
return -ENOSPC;
again:
found_bits = 0;
for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) {
......@@ -2791,13 +2829,19 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
BITS_PER_BITMAP, i);
if (next_zero - i >= min_bits) {
found_bits = next_zero - i;
if (found_bits > max_bits)
max_bits = found_bits;
break;
}
if (next_zero - i > max_bits)
max_bits = next_zero - i;
i = next_zero;
}
if (!found_bits)
if (!found_bits) {
entry->max_extent_size = (u64)max_bits * ctl->unit;
return -ENOSPC;
}
if (!total_found) {
start = i;
......@@ -3056,6 +3100,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
spin_lock_init(&cluster->refill_lock);
cluster->root = RB_ROOT;
cluster->max_size = 0;
cluster->fragmented = false;
INIT_LIST_HEAD(&cluster->block_group_list);
cluster->block_group = NULL;
}
......@@ -3223,7 +3268,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
}
bytes = minlen;
ret2 = search_bitmap(ctl, entry, &start, &bytes);
ret2 = search_bitmap(ctl, entry, &start, &bytes, false);
if (ret2 || start >= end) {
spin_unlock(&ctl->tree_lock);
mutex_unlock(&ctl->cache_writeout_mutex);
......@@ -3376,7 +3421,7 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
u64 count = 1;
int ret;
ret = search_bitmap(ctl, entry, &offset, &count);
ret = search_bitmap(ctl, entry, &offset, &count, true);
/* Logic error; Should be empty if it can't find anything */
ASSERT(!ret);
......@@ -3532,6 +3577,7 @@ int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
spin_lock(&ctl->tree_lock);
info->offset = offset;
info->bytes = bytes;
info->max_extent_size = 0;
ret = link_free_space(ctl, info);
spin_unlock(&ctl->tree_lock);
if (ret)
......@@ -3559,6 +3605,7 @@ int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
}
bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
bytes -= bytes_added;
offset += bytes_added;
spin_unlock(&ctl->tree_lock);
......@@ -3602,7 +3649,7 @@ int test_check_exists(struct btrfs_block_group_cache *cache,
bit_off = offset;
bit_bytes = ctl->unit;
ret = search_bitmap(ctl, info, &bit_off, &bit_bytes);
ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false);
if (!ret) {
if (bit_off == offset) {
ret = 1;
......
......@@ -23,6 +23,7 @@ struct btrfs_free_space {
struct rb_node offset_index;
u64 offset;
u64 bytes;
u64 max_extent_size;
unsigned long *bitmap;
struct list_head list;
};
......
......@@ -157,7 +157,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
*/
if (!btrfs_find_name_in_ext_backref(path, ref_objectid,
name, name_len, &extref)) {
btrfs_std_error(root->fs_info, -ENOENT);
btrfs_std_error(root->fs_info, -ENOENT, NULL);
ret = -EROFS;
goto out;
}
......
......@@ -488,17 +488,17 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
/* Just to make sure we have enough space */
prealloc += 8 * PAGE_CACHE_SIZE;
ret = btrfs_delalloc_reserve_space(inode, prealloc);
ret = btrfs_delalloc_reserve_space(inode, 0, prealloc);
if (ret)
goto out_put;
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
prealloc, prealloc, &alloc_hint);
if (ret) {
btrfs_delalloc_release_space(inode, prealloc);
btrfs_delalloc_release_space(inode, 0, prealloc);
goto out_put;
}
btrfs_free_reserved_data_space(inode, prealloc);
btrfs_free_reserved_data_space(inode, 0, prealloc);
ret = btrfs_write_out_ino_cache(root, trans, path, inode);
out_put:
......
This diff is collapsed.
This diff is collapsed.
......@@ -79,6 +79,9 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
write_lock(&eb->lock);
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
/*
* atomic_dec_and_test implies a barrier for waitqueue_active
*/
if (atomic_dec_and_test(&eb->blocking_writers) &&
waitqueue_active(&eb->write_lock_wq))
wake_up(&eb->write_lock_wq);
......@@ -86,6 +89,9 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
BUG_ON(atomic_read(&eb->blocking_readers) == 0);
read_lock(&eb->lock);
atomic_inc(&eb->spinning_readers);
/*
* atomic_dec_and_test implies a barrier for waitqueue_active
*/
if (atomic_dec_and_test(&eb->blocking_readers) &&
waitqueue_active(&eb->read_lock_wq))
wake_up(&eb->read_lock_wq);
......@@ -229,6 +235,9 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
}
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->blocking_readers) == 0);
/*
* atomic_dec_and_test implies a barrier for waitqueue_active
*/
if (atomic_dec_and_test(&eb->blocking_readers) &&
waitqueue_active(&eb->read_lock_wq))
wake_up(&eb->read_lock_wq);
......@@ -280,6 +289,9 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
if (blockers) {
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_dec(&eb->blocking_writers);
/*
* Make sure counter is updated before we wake up waiters.
*/
smp_mb();
if (waitqueue_active(&eb->write_lock_wq))
wake_up(&eb->write_lock_wq);
......
......@@ -345,6 +345,9 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
if (entry->bytes_left == 0) {
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
/*
* Implicit memory barrier after test_and_set_bit
*/
if (waitqueue_active(&entry->wait))
wake_up(&entry->wait);
} else {
......@@ -409,6 +412,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
if (entry->bytes_left == 0) {
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
/*
* Implicit memory barrier after test_and_set_bit
*/
if (waitqueue_active(&entry->wait))
wake_up(&entry->wait);
} else {
......@@ -484,15 +490,16 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
spin_lock_irq(&log->log_extents_lock[index]);
while (!list_empty(&log->logged_list[index])) {
struct inode *inode;
ordered = list_first_entry(&log->logged_list[index],
struct btrfs_ordered_extent,
log_list);
list_del_init(&ordered->log_list);
inode = ordered->inode;
spin_unlock_irq(&log->log_extents_lock[index]);
if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
!test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
struct inode *inode = ordered->inode;
u64 start = ordered->file_offset;
u64 end = ordered->file_offset + ordered->len - 1;
......@@ -503,20 +510,25 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
&ordered->flags));
/*
* If our ordered extent completed it means it updated the
* fs/subvol and csum trees already, so no need to make the
* current transaction's commit wait for it, as we end up
* holding memory unnecessarily and delaying the inode's iput
* until the transaction commit (we schedule an iput for the
* inode when the ordered extent's refcount drops to 0), which
* prevents it from being evictable until the transaction
* commits.
* In order to keep us from losing our ordered extent
* information when committing the transaction we have to make
* sure that any logged extents are completed when we go to
* commit the transaction. To do this we simply increase the
* current transactions pending_ordered counter and decrement it
* when the ordered extent completes.
*/
if (test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags))
btrfs_put_ordered_extent(ordered);
else
list_add_tail(&ordered->trans_list, &trans->ordered);
if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
struct btrfs_ordered_inode_tree *tree;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock_irq(&tree->lock);
if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
set_bit(BTRFS_ORDERED_PENDING, &ordered->flags);
atomic_inc(&trans->transaction->pending_ordered);
}
spin_unlock_irq(&tree->lock);
}
btrfs_put_ordered_extent(ordered);
spin_lock_irq(&log->log_extents_lock[index]);
}
spin_unlock_irq(&log->log_extents_lock[index]);
......@@ -578,6 +590,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_inode_tree *tree;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct rb_node *node;
bool dec_pending_ordered = false;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock_irq(&tree->lock);
......@@ -587,8 +600,37 @@ void btrfs_remove_ordered_extent(struct inode *inode,
if (tree->last == node)
tree->last = NULL;
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
if (test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags))
dec_pending_ordered = true;
spin_unlock_irq(&tree->lock);
/*
* The current running transaction is waiting on us, we need to let it
* know that we're complete and wake it up.
*/
if (dec_pending_ordered) {
struct btrfs_transaction *trans;
/*
* The checks for trans are just a formality, it should be set,
* but if it isn't we don't want to deref/assert under the spin
* lock, so be nice and check if trans is set, but ASSERT() so
* if it isn't set a developer will notice.
*/
spin_lock(&root->fs_info->trans_lock);
trans = root->fs_info->running_transaction;
if (trans)
atomic_inc(&trans->use_count);
spin_unlock(&root->fs_info->trans_lock);
ASSERT(trans);
if (trans) {
if (atomic_dec_and_test(&trans->pending_ordered))
wake_up(&trans->pending_wait);
btrfs_put_transaction(trans);
}
}
spin_lock(&root->ordered_extent_lock);
list_del_init(&entry->root_extent_list);
root->nr_ordered_extents--;
......
......@@ -73,6 +73,8 @@ struct btrfs_ordered_sum {
#define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent
* in the logging code. */
#define BTRFS_ORDERED_PENDING 11 /* We are waiting for this ordered extent to
* complete in the current transaction. */
struct btrfs_ordered_extent {
/* logical offset in the file */
u64 file_offset;
......
......@@ -49,18 +49,16 @@ static struct prop_handler prop_handlers[] = {
.extract = prop_compression_extract,
.inheritable = 1
},
{
.xattr_name = NULL
}
};
void __init btrfs_props_init(void)
{
struct prop_handler *p;
int i;
hash_init(prop_handlers_ht);
for (p = &prop_handlers[0]; p->xattr_name; p++) {
for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
struct prop_handler *p = &prop_handlers[i];
u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name));
hash_add(prop_handlers_ht, &p->node, h);
......@@ -301,15 +299,16 @@ static int inherit_props(struct btrfs_trans_handle *trans,
struct inode *inode,
struct inode *parent)
{
const struct prop_handler *h;
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
int i;
if (!test_bit(BTRFS_INODE_HAS_PROPS,
&BTRFS_I(parent)->runtime_flags))
return 0;
for (h = &prop_handlers[0]; h->xattr_name; h++) {
for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
const struct prop_handler *h = &prop_handlers[i];
const char *value;
u64 num_bytes;
......
......@@ -1652,10 +1652,6 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
}
}
/* For exclusive extent, free its reserved bytes too */
if (nr_old_roots == 0 && nr_new_roots == 1 &&
cur_new_count == nr_new_roots)
qg->reserved -= num_bytes;
if (dirty)
qgroup_dirty(fs_info, qg);
}
......@@ -2035,7 +2031,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
return ret;
}
int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
......@@ -2116,14 +2112,13 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
return ret;
}
void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
u64 ref_root, u64 num_bytes)
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
struct btrfs_fs_info *fs_info = root->fs_info;
struct ulist_node *unode;
struct ulist_iterator uiter;
u64 ref_root = root->root_key.objectid;
int ret = 0;
if (!is_fstree(ref_root))
......@@ -2169,6 +2164,11 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
spin_unlock(&fs_info->qgroup_lock);
}
static inline void qgroup_free(struct btrfs_root *root, u64 num_bytes)
{
return btrfs_qgroup_free_refroot(root->fs_info, root->objectid,
num_bytes);
}
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
{
if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
......@@ -2188,10 +2188,10 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
*/
static int
qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
struct btrfs_trans_handle *trans,
struct extent_buffer *scratch_leaf)
struct btrfs_trans_handle *trans)
{
struct btrfs_key found;
struct extent_buffer *scratch_leaf = NULL;
struct ulist *roots = NULL;
struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
u64 num_bytes;
......@@ -2229,7 +2229,15 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]);
if (!scratch_leaf) {
ret = -ENOMEM;
mutex_unlock(&fs_info->qgroup_rescan_lock);
goto out;
}
extent_buffer_get(scratch_leaf);
btrfs_tree_read_lock(scratch_leaf);
btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK);
slot = path->slots[0];
btrfs_release_path(path);
mutex_unlock(&fs_info->qgroup_rescan_lock);
......@@ -2255,6 +2263,10 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
goto out;
}
out:
if (scratch_leaf) {
btrfs_tree_read_unlock_blocking(scratch_leaf);
free_extent_buffer(scratch_leaf);
}
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
return ret;
......@@ -2266,16 +2278,12 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
qgroup_rescan_work);
struct btrfs_path *path;
struct btrfs_trans_handle *trans = NULL;
struct extent_buffer *scratch_leaf = NULL;
int err = -ENOMEM;
int ret = 0;
path = btrfs_alloc_path();
if (!path)
goto out;
scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
if (!scratch_leaf)
goto out;
err = 0;
while (!err) {
......@@ -2287,8 +2295,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
if (!fs_info->quota_enabled) {
err = -EINTR;
} else {
err = qgroup_rescan_leaf(fs_info, path, trans,
scratch_leaf);
err = qgroup_rescan_leaf(fs_info, path, trans);
}
if (err > 0)
btrfs_commit_transaction(trans, fs_info->fs_root);
......@@ -2297,7 +2304,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
}
out:
kfree(scratch_leaf);
btrfs_free_path(path);
mutex_lock(&fs_info->qgroup_rescan_lock);
......@@ -2486,3 +2492,190 @@ btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
btrfs_queue_work(fs_info->qgroup_rescan_workers,
&fs_info->qgroup_rescan_work);
}
/*
* Reserve qgroup space for range [start, start + len).
*
* This function will either reserve space from related qgroups or doing
* nothing if the range is already reserved.
*
* Return 0 for successful reserve
* Return <0 for error (including -EQUOT)
*
* NOTE: this function may sleep for memory allocation.
*/
int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_changeset changeset;
struct ulist_node *unode;
struct ulist_iterator uiter;
int ret;
if (!root->fs_info->quota_enabled || !is_fstree(root->objectid) ||
len == 0)
return 0;
changeset.bytes_changed = 0;
changeset.range_changed = ulist_alloc(GFP_NOFS);
ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
start + len -1, EXTENT_QGROUP_RESERVED, GFP_NOFS,
&changeset);
trace_btrfs_qgroup_reserve_data(inode, start, len,
changeset.bytes_changed,
QGROUP_RESERVE);
if (ret < 0)
goto cleanup;
ret = qgroup_reserve(root, changeset.bytes_changed);
if (ret < 0)
goto cleanup;
ulist_free(changeset.range_changed);
return ret;
cleanup:
/* cleanup already reserved ranges */
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(changeset.range_changed, &uiter)))
clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL,
GFP_NOFS);
ulist_free(changeset.range_changed);
return ret;
}
static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len,
int free)
{
struct extent_changeset changeset;
int trace_op = QGROUP_RELEASE;
int ret;
changeset.bytes_changed = 0;
changeset.range_changed = ulist_alloc(GFP_NOFS);
if (!changeset.range_changed)
return -ENOMEM;
ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
start + len -1, EXTENT_QGROUP_RESERVED, GFP_NOFS,
&changeset);
if (ret < 0)
goto out;
if (free) {
qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
trace_op = QGROUP_FREE;
}
trace_btrfs_qgroup_release_data(inode, start, len,
changeset.bytes_changed, trace_op);
out:
ulist_free(changeset.range_changed);
return ret;
}
/*
* Free a reserved space range from io_tree and related qgroups
*
* Should be called when a range of pages get invalidated before reaching disk.
* Or for error cleanup case.
*
* For data written to disk, use btrfs_qgroup_release_data().
*
* NOTE: This function may sleep for memory allocation.
*/
int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len)
{
return __btrfs_qgroup_release_data(inode, start, len, 1);
}
/*
* Release a reserved space range from io_tree only.
*
* Should be called when a range of pages get written to disk and corresponding
* FILE_EXTENT is inserted into corresponding root.
*
* Since new qgroup accounting framework will only update qgroup numbers at
* commit_transaction() time, its reserved space shouldn't be freed from
* related qgroups.
*
* But we should release the range from io_tree, to allow further write to be
* COWed.
*
* NOTE: This function may sleep for memory allocation.
*/
int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
{
return __btrfs_qgroup_release_data(inode, start, len, 0);
}
int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes)
{
int ret;
if (!root->fs_info->quota_enabled || !is_fstree(root->objectid) ||
num_bytes == 0)
return 0;
BUG_ON(num_bytes != round_down(num_bytes, root->nodesize));
ret = qgroup_reserve(root, num_bytes);
if (ret < 0)
return ret;
atomic_add(num_bytes, &root->qgroup_meta_rsv);
return ret;
}
void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
{
int reserved;
if (!root->fs_info->quota_enabled || !is_fstree(root->objectid))
return;
reserved = atomic_xchg(&root->qgroup_meta_rsv, 0);
if (reserved == 0)
return;
qgroup_free(root, reserved);
}
void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
{
if (!root->fs_info->quota_enabled || !is_fstree(root->objectid))
return;
BUG_ON(num_bytes != round_down(num_bytes, root->nodesize));
WARN_ON(atomic_read(&root->qgroup_meta_rsv) < num_bytes);
atomic_sub(num_bytes, &root->qgroup_meta_rsv);
qgroup_free(root, num_bytes);
}
/*
* Check qgroup reserved space leaking, normally at destory inode
* time
*/
void btrfs_qgroup_check_reserved_leak(struct inode *inode)
{
struct extent_changeset changeset;
struct ulist_node *unode;
struct ulist_iterator iter;
int ret;
changeset.bytes_changed = 0;
changeset.range_changed = ulist_alloc(GFP_NOFS);
if (WARN_ON(!changeset.range_changed))
return;
ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_QGROUP_RESERVED, GFP_NOFS, &changeset);
WARN_ON(ret < 0);
if (WARN_ON(changeset.bytes_changed)) {
ULIST_ITER_INIT(&iter);
while ((unode = ulist_next(changeset.range_changed, &iter))) {
btrfs_warn(BTRFS_I(inode)->root->fs_info,
"leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu",
inode->i_ino, unode->val, unode->aux);
}
qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
}
ulist_free(changeset.range_changed);
}
......@@ -33,6 +33,13 @@ struct btrfs_qgroup_extent_record {
struct ulist *old_roots;
};
/*
* For qgroup event trace points only
*/
#define QGROUP_RESERVE (1<<0)
#define QGROUP_RELEASE (1<<1)
#define QGROUP_FREE (1<<2)
int btrfs_quota_enable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_quota_disable(struct btrfs_trans_handle *trans,
......@@ -71,9 +78,18 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
struct btrfs_qgroup_inherit *inherit);
int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
u64 ref_root, u64 num_bytes);
/*
* TODO: Add proper trace point for it, as btrfs_qgroup_free() is
* called by everywhere, can't provide good trace for delayed ref case.
*/
static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
u64 ref_root, u64 num_bytes)
{
btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
trace_btrfs_qgroup_free_delayed_ref(ref_root, num_bytes);
}
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
......@@ -81,4 +97,13 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
u64 rfer, u64 excl);
#endif
/* New io_tree based accurate qgroup reserve API */
int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len);
int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len);
int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes);
void btrfs_qgroup_free_meta_all(struct btrfs_root *root);
void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes);
void btrfs_qgroup_check_reserved_leak(struct inode *inode);
#endif /* __BTRFS_QGROUP__ */
......@@ -810,7 +810,11 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
}
goto done_nolock;
} else if (waitqueue_active(&h->wait)) {
/*
* The barrier for this waitqueue_active is not needed,
* we're protected by h->lock and can't miss a wakeup.
*/
} else if (waitqueue_active(&h->wait)) {
spin_unlock(&rbio->bio_list_lock);
spin_unlock_irqrestore(&h->lock, flags);
wake_up(&h->wait);
......
......@@ -569,7 +569,7 @@ static int reada_add_block(struct reada_control *rc, u64 logical,
rec = kzalloc(sizeof(*rec), GFP_NOFS);
if (!rec) {
reada_extent_put(root->fs_info, re);
return -1;
return -ENOMEM;
}
rec->rc = rc;
......@@ -918,6 +918,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
u64 start;
u64 generation;
int level;
int ret;
struct extent_buffer *node;
static struct btrfs_key max_key = {
.objectid = (u64)-1,
......@@ -943,9 +944,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
generation = btrfs_header_generation(node);
free_extent_buffer(node);
if (reada_add_block(rc, start, &max_key, level, generation)) {
ret = reada_add_block(rc, start, &max_key, level, generation);
if (ret) {
kfree(rc);
return ERR_PTR(-ENOMEM);
return ERR_PTR(ret);
}
reada_start_machine(root->fs_info);
......
......@@ -1716,7 +1716,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
num_bytes, parent,
btrfs_header_owner(leaf),
key.objectid, key.offset, 1);
key.objectid, key.offset);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
break;
......@@ -1724,7 +1724,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
parent, btrfs_header_owner(leaf),
key.objectid, key.offset, 1);
key.objectid, key.offset);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
break;
......@@ -1900,23 +1900,21 @@ int replace_path(struct btrfs_trans_handle *trans,
ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
path->nodes[level]->start,
src->root_key.objectid, level - 1, 0,
1);
src->root_key.objectid, level - 1, 0);
BUG_ON(ret);
ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
0, dest->root_key.objectid, level - 1,
0, 1);
0);
BUG_ON(ret);
ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
path->nodes[level]->start,
src->root_key.objectid, level - 1, 0,
1);
src->root_key.objectid, level - 1, 0);
BUG_ON(ret);
ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
0, dest->root_key.objectid, level - 1,
0, 1);
0);
BUG_ON(ret);
btrfs_unlock_up_safe(path, 0);
......@@ -2418,7 +2416,7 @@ void merge_reloc_roots(struct reloc_control *rc)
}
out:
if (ret) {
btrfs_std_error(root->fs_info, ret);
btrfs_std_error(root->fs_info, ret, NULL);
if (!list_empty(&reloc_roots))
free_reloc_roots(&reloc_roots);
......@@ -2745,7 +2743,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
node->eb->start, blocksize,
upper->eb->start,
btrfs_header_owner(upper->eb),
node->level, 0, 1);
node->level, 0);
BUG_ON(ret);
ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
......@@ -3034,8 +3032,8 @@ int prealloc_file_extent_cluster(struct inode *inode,
BUG_ON(cluster->start != cluster->boundary[0]);
mutex_lock(&inode->i_mutex);
ret = btrfs_check_data_free_space(inode, cluster->end +
1 - cluster->start, 0);
ret = btrfs_check_data_free_space(inode, cluster->start,
cluster->end + 1 - cluster->start);
if (ret)
goto out;
......@@ -3056,8 +3054,8 @@ int prealloc_file_extent_cluster(struct inode *inode,
break;
nr++;
}
btrfs_free_reserved_data_space(inode, cluster->end +
1 - cluster->start);
btrfs_free_reserved_data_space(inode, cluster->start,
cluster->end + 1 - cluster->start);
out:
mutex_unlock(&inode->i_mutex);
return ret;
......
......@@ -45,12 +45,13 @@ static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
if (!need_reset && btrfs_root_generation(item)
!= btrfs_root_generation_v2(item)) {
if (btrfs_root_generation_v2(item) != 0) {
printk(KERN_WARNING "BTRFS: mismatching "
btrfs_warn(eb->fs_info,
"mismatching "
"generation and generation_v2 "
"found in root item. This root "
"was probably mounted with an "
"older kernel. Resetting all "
"new fields.\n");
"new fields.");
}
need_reset = 1;
}
......@@ -141,7 +142,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
int ret;
int slot;
unsigned long ptr;
int old_len;
u32 old_len;
path = btrfs_alloc_path();
if (!path)
......@@ -283,7 +284,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
trans = btrfs_join_transaction(tree_root);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
btrfs_error(tree_root->fs_info, err,
btrfs_std_error(tree_root->fs_info, err,
"Failed to start trans to delete "
"orphan item");
break;
......@@ -292,7 +293,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
root_key.objectid);
btrfs_end_transaction(trans, tree_root);
if (err) {
btrfs_error(tree_root->fs_info, err,
btrfs_std_error(tree_root->fs_info, err,
"Failed to delete root orphan "
"item");
break;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -82,9 +82,9 @@ char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
extern const char * const btrfs_feature_set_names[3];
extern struct kobj_type space_info_ktype;
extern struct kobj_type btrfs_raid_ktype;
int btrfs_kobj_add_device(struct btrfs_fs_devices *fs_devices,
int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
struct btrfs_device *one_device);
int btrfs_kobj_rm_device(struct btrfs_fs_devices *fs_devices,
int btrfs_sysfs_rm_device_link(struct btrfs_fs_devices *fs_devices,
struct btrfs_device *one_device);
int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
struct kobject *parent);
......
......@@ -19,6 +19,7 @@
#include <linux/slab.h>
#include "btrfs-tests.h"
#include "../ctree.h"
#include "../disk-io.h"
#include "../free-space-cache.h"
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
......@@ -35,6 +36,12 @@ static struct btrfs_block_group_cache *init_test_block_group(void)
kfree(cache);
return NULL;
}
cache->fs_info = btrfs_alloc_dummy_fs_info();
if (!cache->fs_info) {
kfree(cache->free_space_ctl);
kfree(cache);
return NULL;
}
cache->key.objectid = 0;
cache->key.offset = 1024 * 1024 * 1024;
......@@ -879,7 +886,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
int btrfs_test_free_space_cache(void)
{
struct btrfs_block_group_cache *cache;
int ret;
struct btrfs_root *root = NULL;
int ret = -ENOMEM;
test_msg("Running btrfs free space cache tests\n");
......@@ -889,6 +897,17 @@ int btrfs_test_free_space_cache(void)
return 0;
}
root = btrfs_alloc_dummy_root();
if (!root)
goto out;
root->fs_info = btrfs_alloc_dummy_fs_info();
if (!root->fs_info)
goto out;
root->fs_info->extent_root = root;
cache->fs_info = root->fs_info;
ret = test_extents(cache);
if (ret)
goto out;
......@@ -904,6 +923,7 @@ int btrfs_test_free_space_cache(void)
__btrfs_remove_free_space_cache(cache->free_space_ctl);
kfree(cache->free_space_ctl);
kfree(cache);
btrfs_free_dummy_root(root);
test_msg("Free space cache tests finished\n");
return ret;
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment