Commit b478b2ba authored by Chris Mason's avatar Chris Mason

Merge branch 'qgroup' of git://git.jan-o-sch.net/btrfs-unstable into for-linus

Conflicts:
	fs/btrfs/ioctl.c
	fs/btrfs/ioctl.h
	fs/btrfs/transaction.c
	fs/btrfs/transaction.h
Signed-off-by: default avatarChris Mason <chris.mason@fusionio.com>
parents 67c9684f 6f72c7e2
......@@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o
reada.o backref.o ulist.o qgroup.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
......@@ -773,9 +773,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
*/
static int find_parent_nodes(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 delayed_ref_seq, u64 time_seq,
struct ulist *refs, struct ulist *roots,
const u64 *extent_item_pos)
u64 time_seq, struct ulist *refs,
struct ulist *roots, const u64 *extent_item_pos)
{
struct btrfs_key key;
struct btrfs_path *path;
......@@ -837,7 +836,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
btrfs_put_delayed_ref(&head->node);
goto again;
}
ret = __add_delayed_refs(head, delayed_ref_seq,
ret = __add_delayed_refs(head, time_seq,
&prefs_delayed);
mutex_unlock(&head->mutex);
if (ret) {
......@@ -981,8 +980,7 @@ static void free_leaf_list(struct ulist *blocks)
*/
static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 delayed_ref_seq, u64 time_seq,
struct ulist **leafs,
u64 time_seq, struct ulist **leafs,
const u64 *extent_item_pos)
{
struct ulist *tmp;
......@@ -997,7 +995,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
return -ENOMEM;
}
ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq,
ret = find_parent_nodes(trans, fs_info, bytenr,
time_seq, *leafs, tmp, extent_item_pos);
ulist_free(tmp);
......@@ -1024,8 +1022,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
*/
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 delayed_ref_seq, u64 time_seq,
struct ulist **roots)
u64 time_seq, struct ulist **roots)
{
struct ulist *tmp;
struct ulist_node *node = NULL;
......@@ -1043,7 +1040,7 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
ULIST_ITER_INIT(&uiter);
while (1) {
ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq,
ret = find_parent_nodes(trans, fs_info, bytenr,
time_seq, tmp, *roots, NULL);
if (ret < 0 && ret != -ENOENT) {
ulist_free(tmp);
......@@ -1376,11 +1373,9 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
struct ulist *roots = NULL;
struct ulist_node *ref_node = NULL;
struct ulist_node *root_node = NULL;
struct seq_list seq_elem = {};
struct seq_list tree_mod_seq_elem = {};
struct ulist_iterator ref_uiter;
struct ulist_iterator root_uiter;
struct btrfs_delayed_ref_root *delayed_refs = NULL;
pr_debug("resolving all inodes for extent %llu\n",
extent_item_objectid);
......@@ -1391,16 +1386,11 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
trans = btrfs_join_transaction(fs_info->extent_root);
if (IS_ERR(trans))
return PTR_ERR(trans);
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
btrfs_get_delayed_seq(delayed_refs, &seq_elem);
spin_unlock(&delayed_refs->lock);
btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
}
ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
seq_elem.seq, tree_mod_seq_elem.seq, &refs,
tree_mod_seq_elem.seq, &refs,
&extent_item_pos);
if (ret)
goto out;
......@@ -1408,7 +1398,6 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
ULIST_ITER_INIT(&ref_uiter);
while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
ret = btrfs_find_all_roots(trans, fs_info, ref_node->val,
seq_elem.seq,
tree_mod_seq_elem.seq, &roots);
if (ret)
break;
......@@ -1431,7 +1420,6 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
out:
if (!search_commit_root) {
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
btrfs_put_delayed_seq(delayed_refs, &seq_elem);
btrfs_end_transaction(trans, fs_info->extent_root);
}
......
......@@ -58,8 +58,7 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 delayed_ref_seq, u64 time_seq,
struct ulist **roots);
u64 time_seq, struct ulist **roots);
struct btrfs_data_container *init_data_container(u32 total_bytes);
struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
......
......@@ -321,7 +321,7 @@ struct tree_mod_root {
struct tree_mod_elem {
struct rb_node node;
u64 index; /* shifted logical */
struct seq_list elem;
u64 seq;
enum mod_log_op op;
/* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */
......@@ -341,20 +341,50 @@ struct tree_mod_elem {
struct tree_mod_root old_root;
};
static inline void
__get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem)
static inline void tree_mod_log_read_lock(struct btrfs_fs_info *fs_info)
{
elem->seq = atomic_inc_return(&fs_info->tree_mod_seq);
list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
read_lock(&fs_info->tree_mod_log_lock);
}
static inline void tree_mod_log_read_unlock(struct btrfs_fs_info *fs_info)
{
read_unlock(&fs_info->tree_mod_log_lock);
}
static inline void tree_mod_log_write_lock(struct btrfs_fs_info *fs_info)
{
write_lock(&fs_info->tree_mod_log_lock);
}
void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
{
write_unlock(&fs_info->tree_mod_log_lock);
}
/*
* This adds a new blocker to the tree mod log's blocker list if the @elem
* passed does not already have a sequence number set. So when a caller expects
* to record tree modifications, it should ensure to set elem->seq to zero
* before calling btrfs_get_tree_mod_seq.
* Returns a fresh, unused tree log modification sequence number, even if no new
* blocker was added.
*/
u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem)
{
elem->flags = 1;
u64 seq;
tree_mod_log_write_lock(fs_info);
spin_lock(&fs_info->tree_mod_seq_lock);
__get_tree_mod_seq(fs_info, elem);
if (!elem->seq) {
elem->seq = btrfs_inc_tree_mod_seq(fs_info);
list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
}
seq = btrfs_inc_tree_mod_seq(fs_info);
spin_unlock(&fs_info->tree_mod_seq_lock);
tree_mod_log_write_unlock(fs_info);
return seq;
}
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
......@@ -371,41 +401,46 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
if (!seq_putting)
return;
BUG_ON(!(elem->flags & 1));
spin_lock(&fs_info->tree_mod_seq_lock);
list_del(&elem->list);
elem->seq = 0;
list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) {
if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) {
if (cur_elem->seq < min_seq) {
if (seq_putting > cur_elem->seq) {
/*
* blocker with lower sequence number exists, we
* cannot remove anything from the log
*/
goto out;
spin_unlock(&fs_info->tree_mod_seq_lock);
return;
}
min_seq = cur_elem->seq;
}
}
spin_unlock(&fs_info->tree_mod_seq_lock);
/*
* we removed the lowest blocker from the blocker list, so there may be
* more processible delayed refs.
*/
wake_up(&fs_info->tree_mod_seq_wait);
/*
* anything that's lower than the lowest existing (read: blocked)
* sequence number can be removed from the tree.
*/
write_lock(&fs_info->tree_mod_log_lock);
tree_mod_log_write_lock(fs_info);
tm_root = &fs_info->tree_mod_log;
for (node = rb_first(tm_root); node; node = next) {
next = rb_next(node);
tm = container_of(node, struct tree_mod_elem, node);
if (tm->elem.seq > min_seq)
if (tm->seq > min_seq)
continue;
rb_erase(node, tm_root);
list_del(&tm->elem.list);
kfree(tm);
}
write_unlock(&fs_info->tree_mod_log_lock);
out:
spin_unlock(&fs_info->tree_mod_seq_lock);
tree_mod_log_write_unlock(fs_info);
}
/*
......@@ -423,11 +458,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
struct rb_node **new;
struct rb_node *parent = NULL;
struct tree_mod_elem *cur;
int ret = 0;
BUG_ON(!tm || !tm->elem.seq);
BUG_ON(!tm || !tm->seq);
write_lock(&fs_info->tree_mod_log_lock);
tm_root = &fs_info->tree_mod_log;
new = &tm_root->rb_node;
while (*new) {
......@@ -437,88 +470,81 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
new = &((*new)->rb_left);
else if (cur->index > tm->index)
new = &((*new)->rb_right);
else if (cur->elem.seq < tm->elem.seq)
else if (cur->seq < tm->seq)
new = &((*new)->rb_left);
else if (cur->elem.seq > tm->elem.seq)
else if (cur->seq > tm->seq)
new = &((*new)->rb_right);
else {
kfree(tm);
ret = -EEXIST;
goto unlock;
return -EEXIST;
}
}
rb_link_node(&tm->node, parent, new);
rb_insert_color(&tm->node, tm_root);
unlock:
write_unlock(&fs_info->tree_mod_log_lock);
return ret;
return 0;
}
/*
* Determines if logging can be omitted. Returns 1 if it can. Otherwise, it
* returns zero with the tree_mod_log_lock acquired. The caller must hold
* this until all tree mod log insertions are recorded in the rb tree and then
* call tree_mod_log_write_unlock() to release.
*/
static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb) {
smp_mb();
if (list_empty(&(fs_info)->tree_mod_seq_list))
return 1;
if (!eb)
return 0;
if (btrfs_header_level(eb) == 0)
if (eb && btrfs_header_level(eb) == 0)
return 1;
tree_mod_log_write_lock(fs_info);
if (list_empty(&fs_info->tree_mod_seq_list)) {
/*
* someone emptied the list while we were waiting for the lock.
* we must not add to the list when no blocker exists.
*/
tree_mod_log_write_unlock(fs_info);
return 1;
}
return 0;
}
/*
* This allocates memory and gets a tree modification sequence number when
* needed.
* This allocates memory and gets a tree modification sequence number.
*
* Returns 0 when no sequence number is needed, < 0 on error.
* Returns 1 when a sequence number was added. In this case,
* fs_info->tree_mod_seq_lock was acquired and must be released by the caller
* after inserting into the rb tree.
* Returns <0 on error.
* Returns >0 (the added sequence number) on success.
*/
static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
struct tree_mod_elem **tm_ret)
{
struct tree_mod_elem *tm;
int seq;
if (tree_mod_dont_log(fs_info, NULL))
return 0;
tm = *tm_ret = kzalloc(sizeof(*tm), flags);
if (!tm)
return -ENOMEM;
tm->elem.flags = 0;
spin_lock(&fs_info->tree_mod_seq_lock);
if (list_empty(&fs_info->tree_mod_seq_list)) {
/*
* someone emptied the list while we were waiting for the lock.
* we must not add to the list, because no blocker exists. items
* are removed from the list only when the existing blocker is
* removed from the list.
* once we switch from spin locks to something different, we should
* honor the flags parameter here.
*/
kfree(tm);
seq = 0;
spin_unlock(&fs_info->tree_mod_seq_lock);
} else {
__get_tree_mod_seq(fs_info, &tm->elem);
seq = tm->elem.seq;
}
tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC);
if (!tm)
return -ENOMEM;
return seq;
tm->seq = btrfs_inc_tree_mod_seq(fs_info);
return tm->seq;
}
static noinline int
tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
static inline int
__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int slot,
enum mod_log_op op, gfp_t flags)
{
struct tree_mod_elem *tm;
int ret;
struct tree_mod_elem *tm;
ret = tree_mod_alloc(fs_info, flags, &tm);
if (ret <= 0)
if (ret < 0)
return ret;
tm->index = eb->start >> PAGE_CACHE_SHIFT;
......@@ -530,8 +556,22 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
tm->slot = slot;
tm->generation = btrfs_node_ptr_generation(eb, slot);
ret = __tree_mod_log_insert(fs_info, tm);
spin_unlock(&fs_info->tree_mod_seq_lock);
return __tree_mod_log_insert(fs_info, tm);
}
static noinline int
tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int slot,
enum mod_log_op op, gfp_t flags)
{
int ret;
if (tree_mod_dont_log(fs_info, eb))
return 0;
ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
tree_mod_log_write_unlock(fs_info);
return ret;
}
......@@ -542,6 +582,14 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS);
}
static noinline int
tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int slot,
enum mod_log_op op)
{
return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS);
}
static noinline int
tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int dst_slot, int src_slot,
......@@ -555,14 +603,14 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
return 0;
for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot,
MOD_LOG_KEY_REMOVE_WHILE_MOVING);
BUG_ON(ret < 0);
}
ret = tree_mod_alloc(fs_info, flags, &tm);
if (ret <= 0)
return ret;
if (ret < 0)
goto out;
tm->index = eb->start >> PAGE_CACHE_SHIFT;
tm->slot = src_slot;
......@@ -571,10 +619,26 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
tm->op = MOD_LOG_MOVE_KEYS;
ret = __tree_mod_log_insert(fs_info, tm);
spin_unlock(&fs_info->tree_mod_seq_lock);
out:
tree_mod_log_write_unlock(fs_info);
return ret;
}
static inline void
__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
{
int i;
u32 nritems;
int ret;
nritems = btrfs_header_nritems(eb);
for (i = nritems - 1; i >= 0; i--) {
ret = tree_mod_log_insert_key_locked(fs_info, eb, i,
MOD_LOG_KEY_REMOVE_WHILE_FREEING);
BUG_ON(ret < 0);
}
}
static noinline int
tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
struct extent_buffer *old_root,
......@@ -583,9 +647,14 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
struct tree_mod_elem *tm;
int ret;
if (tree_mod_dont_log(fs_info, NULL))
return 0;
__tree_mod_log_free_eb(fs_info, old_root);
ret = tree_mod_alloc(fs_info, flags, &tm);
if (ret <= 0)
return ret;
if (ret < 0)
goto out;
tm->index = new_root->start >> PAGE_CACHE_SHIFT;
tm->old_root.logical = old_root->start;
......@@ -594,7 +663,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
tm->op = MOD_LOG_ROOT_REPLACE;
ret = __tree_mod_log_insert(fs_info, tm);
spin_unlock(&fs_info->tree_mod_seq_lock);
out:
tree_mod_log_write_unlock(fs_info);
return ret;
}
......@@ -608,7 +678,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
struct tree_mod_elem *found = NULL;
u64 index = start >> PAGE_CACHE_SHIFT;
read_lock(&fs_info->tree_mod_log_lock);
tree_mod_log_read_lock(fs_info);
tm_root = &fs_info->tree_mod_log;
node = tm_root->rb_node;
while (node) {
......@@ -617,18 +687,18 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
node = node->rb_left;
} else if (cur->index > index) {
node = node->rb_right;
} else if (cur->elem.seq < min_seq) {
} else if (cur->seq < min_seq) {
node = node->rb_left;
} else if (!smallest) {
/* we want the node with the highest seq */
if (found)
BUG_ON(found->elem.seq > cur->elem.seq);
BUG_ON(found->seq > cur->seq);
found = cur;
node = node->rb_left;
} else if (cur->elem.seq > min_seq) {
} else if (cur->seq > min_seq) {
/* we want the node with the smallest seq */
if (found)
BUG_ON(found->elem.seq < cur->elem.seq);
BUG_ON(found->seq < cur->seq);
found = cur;
node = node->rb_right;
} else {
......@@ -636,7 +706,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
break;
}
}
read_unlock(&fs_info->tree_mod_log_lock);
tree_mod_log_read_unlock(fs_info);
return found;
}
......@@ -664,7 +734,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
return __tree_mod_log_search(fs_info, start, min_seq, 0);
}
static inline void
static noinline void
tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
struct extent_buffer *src, unsigned long dst_offset,
unsigned long src_offset, int nr_items)
......@@ -675,18 +745,23 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
if (tree_mod_dont_log(fs_info, NULL))
return;
if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) {
tree_mod_log_write_unlock(fs_info);
return;
}
/* speed this up by single seq for all operations? */
for (i = 0; i < nr_items; i++) {
ret = tree_mod_log_insert_key(fs_info, src, i + src_offset,
ret = tree_mod_log_insert_key_locked(fs_info, src,
i + src_offset,
MOD_LOG_KEY_REMOVE);
BUG_ON(ret < 0);
ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset,
ret = tree_mod_log_insert_key_locked(fs_info, dst,
i + dst_offset,
MOD_LOG_KEY_ADD);
BUG_ON(ret < 0);
}
tree_mod_log_write_unlock(fs_info);
}
static inline void
......@@ -699,7 +774,7 @@ tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
BUG_ON(ret < 0);
}
static inline void
static noinline void
tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int slot, int atomic)
......@@ -712,30 +787,22 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
BUG_ON(ret < 0);
}
static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb)
static noinline void
tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
{
int i;
int ret;
u32 nritems;
if (tree_mod_dont_log(fs_info, eb))
return;
nritems = btrfs_header_nritems(eb);
for (i = nritems - 1; i >= 0; i--) {
ret = tree_mod_log_insert_key(fs_info, eb, i,
MOD_LOG_KEY_REMOVE_WHILE_FREEING);
BUG_ON(ret < 0);
}
__tree_mod_log_free_eb(fs_info, eb);
tree_mod_log_write_unlock(fs_info);
}
static inline void
static noinline void
tree_mod_log_set_root_pointer(struct btrfs_root *root,
struct extent_buffer *new_root_node)
{
int ret;
tree_mod_log_free_eb(root->fs_info, root->node);
ret = tree_mod_log_insert_root(root->fs_info, root->node,
new_root_node, GFP_NOFS);
BUG_ON(ret < 0);
......@@ -1069,7 +1136,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
unsigned long p_size = sizeof(struct btrfs_key_ptr);
n = btrfs_header_nritems(eb);
while (tm && tm->elem.seq >= time_seq) {
while (tm && tm->seq >= time_seq) {
/*
* all the operations are recorded with the operator used for
* the modification. as we're going backwards, we do the
......@@ -2721,6 +2788,78 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
return ret;
}
/*
* helper to use instead of search slot if no exact match is needed but
* instead the next or previous item should be returned.
* When find_higher is true, the next higher item is returned, the next lower
* otherwise.
* When return_any and find_higher are both true, and no higher item is found,
* return the next lower instead.
* When return_any is true and find_higher is false, and no lower item is found,
* return the next higher instead.
* It returns 0 if any item is found, 1 if none is found (tree empty), and
* < 0 on error
*/
int btrfs_search_slot_for_read(struct btrfs_root *root,
struct btrfs_key *key, struct btrfs_path *p,
int find_higher, int return_any)
{
int ret;
struct extent_buffer *leaf;
again:
ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
if (ret <= 0)
return ret;
/*
* a return value of 1 means the path is at the position where the
* item should be inserted. Normally this is the next bigger item,
* but in case the previous item is the last in a leaf, path points
* to the first free slot in the previous leaf, i.e. at an invalid
* item.
*/
leaf = p->nodes[0];
if (find_higher) {
if (p->slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, p);
if (ret <= 0)
return ret;
if (!return_any)
return 1;
/*
* no higher item found, return the next
* lower instead
*/
return_any = 0;
find_higher = 0;
btrfs_release_path(p);
goto again;
}
} else {
if (p->slots[0] >= btrfs_header_nritems(leaf)) {
/* we're sitting on an invalid slot */
if (p->slots[0] == 0) {
ret = btrfs_prev_leaf(root, p);
if (ret <= 0)
return ret;
if (!return_any)
return 1;
/*
* no lower item found, return the next
* higher instead
*/
return_any = 0;
find_higher = 1;
btrfs_release_path(p);
goto again;
}
--p->slots[0];
}
}
return 0;
}
/*
* adjust the pointers going up the tree, starting at level
* making sure the right key of each node is points to 'key'.
......
......@@ -91,6 +91,9 @@ struct btrfs_ordered_sum;
/* for storing balance parameters in the root tree */
#define BTRFS_BALANCE_OBJECTID -4ULL
/* holds quota configuration and tracking */
#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
/* orhpan objectid for tracking unlinked/truncated files */
#define BTRFS_ORPHAN_OBJECTID -5ULL
......@@ -883,6 +886,72 @@ struct btrfs_block_group_item {
__le64 flags;
} __attribute__ ((__packed__));
/*
* is subvolume quota turned on?
*/
#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0)
/*
* SCANNING is set during the initialization phase
*/
#define BTRFS_QGROUP_STATUS_FLAG_SCANNING (1ULL << 1)
/*
* Some qgroup entries are known to be out of date,
* either because the configuration has changed in a way that
* makes a rescan necessary, or because the fs has been mounted
* with a non-qgroup-aware version.
* Turning qouta off and on again makes it inconsistent, too.
*/
#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2)
#define BTRFS_QGROUP_STATUS_VERSION 1
struct btrfs_qgroup_status_item {
__le64 version;
/*
* the generation is updated during every commit. As older
* versions of btrfs are not aware of qgroups, it will be
* possible to detect inconsistencies by checking the
* generation on mount time
*/
__le64 generation;
/* flag definitions see above */
__le64 flags;
/*
* only used during scanning to record the progress
* of the scan. It contains a logical address
*/
__le64 scan;
} __attribute__ ((__packed__));
struct btrfs_qgroup_info_item {
__le64 generation;
__le64 rfer;
__le64 rfer_cmpr;
__le64 excl;
__le64 excl_cmpr;
} __attribute__ ((__packed__));
/* flags definition for qgroup limits */
#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0)
#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1)
#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2)
#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3)
#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4)
#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5)
struct btrfs_qgroup_limit_item {
/*
* only updated when any of the other values change
*/
__le64 flags;
__le64 max_rfer;
__le64 max_excl;
__le64 rsv_rfer;
__le64 rsv_excl;
} __attribute__ ((__packed__));
struct btrfs_space_info {
u64 flags;
......@@ -1030,6 +1099,13 @@ struct btrfs_block_group_cache {
struct list_head cluster_list;
};
/* delayed seq elem */
struct seq_list {
struct list_head list;
u64 seq;
};
/* fs_info */
struct reloc_control;
struct btrfs_device;
struct btrfs_fs_devices;
......@@ -1044,6 +1120,7 @@ struct btrfs_fs_info {
struct btrfs_root *dev_root;
struct btrfs_root *fs_root;
struct btrfs_root *csum_root;
struct btrfs_root *quota_root;
/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
......@@ -1144,6 +1221,8 @@ struct btrfs_fs_info {
spinlock_t tree_mod_seq_lock;
atomic_t tree_mod_seq;
struct list_head tree_mod_seq_list;
struct seq_list tree_mod_seq_elem;
wait_queue_head_t tree_mod_seq_wait;
/* this protects tree_mod_log */
rwlock_t tree_mod_log_lock;
......@@ -1298,6 +1377,29 @@ struct btrfs_fs_info {
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
u32 check_integrity_print_mask;
#endif
/*
* quota information
*/
unsigned int quota_enabled:1;
/*
* quota_enabled only changes state after a commit. This holds the
* next state.
*/
unsigned int pending_quota_state:1;
/* is qgroup tracking in a consistent state? */
u64 qgroup_flags;
/* holds configuration and tracking. Protected by qgroup_lock */
struct rb_root qgroup_tree;
spinlock_t qgroup_lock;
/* list of dirty qgroups to be written at next commit */
struct list_head dirty_qgroups;
/* used by btrfs_qgroup_record_ref for an efficient tree traversal */
u64 qgroup_seq;
/* filesystem state */
u64 fs_state;
......@@ -1527,6 +1629,30 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_DEV_ITEM_KEY 216
#define BTRFS_CHUNK_ITEM_KEY 228
/*
* Records the overall state of the qgroups.
* There's only one instance of this key present,
* (0, BTRFS_QGROUP_STATUS_KEY, 0)
*/
#define BTRFS_QGROUP_STATUS_KEY 240
/*
* Records the currently used space of the qgroup.
* One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid).
*/
#define BTRFS_QGROUP_INFO_KEY 242
/*
* Contains the user configured limits for the qgroup.
* One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid).
*/
#define BTRFS_QGROUP_LIMIT_KEY 244
/*
* Records the child-parent relationship of qgroups. For
* each relation, 2 keys are present:
* (childid, BTRFS_QGROUP_RELATION_KEY, parentid)
* (parentid, BTRFS_QGROUP_RELATION_KEY, childid)
*/
#define BTRFS_QGROUP_RELATION_KEY 246
#define BTRFS_BALANCE_ITEM_KEY 248
/*
......@@ -2508,6 +2634,49 @@ static inline void btrfs_set_dev_stats_value(struct extent_buffer *eb,
sizeof(val));
}
/* btrfs_qgroup_status_item */
BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item,
generation, 64);
BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item,
version, 64);
BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item,
flags, 64);
BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item,
scan, 64);
/* btrfs_qgroup_info_item */
BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item,
generation, 64);
BTRFS_SETGET_FUNCS(qgroup_info_rfer, struct btrfs_qgroup_info_item, rfer, 64);
BTRFS_SETGET_FUNCS(qgroup_info_rfer_cmpr, struct btrfs_qgroup_info_item,
rfer_cmpr, 64);
BTRFS_SETGET_FUNCS(qgroup_info_excl, struct btrfs_qgroup_info_item, excl, 64);
BTRFS_SETGET_FUNCS(qgroup_info_excl_cmpr, struct btrfs_qgroup_info_item,
excl_cmpr, 64);
BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation,
struct btrfs_qgroup_info_item, generation, 64);
BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer, struct btrfs_qgroup_info_item,
rfer, 64);
BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer_cmpr,
struct btrfs_qgroup_info_item, rfer_cmpr, 64);
BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl, struct btrfs_qgroup_info_item,
excl, 64);
BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl_cmpr,
struct btrfs_qgroup_info_item, excl_cmpr, 64);
/* btrfs_qgroup_limit_item */
BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item,
flags, 64);
BTRFS_SETGET_FUNCS(qgroup_limit_max_rfer, struct btrfs_qgroup_limit_item,
max_rfer, 64);
BTRFS_SETGET_FUNCS(qgroup_limit_max_excl, struct btrfs_qgroup_limit_item,
max_excl, 64);
BTRFS_SETGET_FUNCS(qgroup_limit_rsv_rfer, struct btrfs_qgroup_limit_item,
rsv_rfer, 64);
BTRFS_SETGET_FUNCS(qgroup_limit_rsv_excl, struct btrfs_qgroup_limit_item,
rsv_excl, 64);
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
{
return sb->s_fs_info;
......@@ -2703,6 +2872,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
......@@ -2753,6 +2924,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
ins_len, int cow);
int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
struct btrfs_path *p, u64 time_seq);
int btrfs_search_slot_for_read(struct btrfs_root *root,
struct btrfs_key *key, struct btrfs_path *p,
int find_higher, int return_any);
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *parent,
int start_slot, int cache_only, u64 *last_ret,
......@@ -2835,11 +3009,22 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
kfree(fs_info->chunk_root);
kfree(fs_info->dev_root);
kfree(fs_info->csum_root);
kfree(fs_info->quota_root);
kfree(fs_info->super_copy);
kfree(fs_info->super_for_commit);
kfree(fs_info);
}
/* tree mod log functions from ctree.c */
u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
{
return atomic_inc_return(&fs_info->tree_mod_seq);
}
/* root-item.c */
int btrfs_find_root_ref(struct btrfs_root *tree_root,
struct btrfs_path *path,
......@@ -3198,17 +3383,49 @@ void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
u64 start, int err);
/* delayed seq elem */
struct seq_list {
/* qgroup.c */
struct qgroup_update {
struct list_head list;
u64 seq;
u32 flags;
struct btrfs_delayed_ref_node *node;
struct btrfs_delayed_extent_op *extent_op;
};
void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
int btrfs_quota_enable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_quota_disable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_quota_rescan(struct btrfs_fs_info *fs_info);
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid,
char *name);
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid);
int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid,
struct btrfs_qgroup_limit *limit);
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
struct btrfs_delayed_extent_op;
int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op);
int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op);
int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
struct btrfs_qgroup_inherit *inherit);
int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
static inline int is_fstree(u64 rootid)
{
......
......@@ -233,22 +233,26 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
return 0;
}
int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq)
{
struct seq_list *elem;
int ret = 0;
assert_spin_locked(&delayed_refs->lock);
if (list_empty(&delayed_refs->seq_head))
return 0;
elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list);
spin_lock(&fs_info->tree_mod_seq_lock);
if (!list_empty(&fs_info->tree_mod_seq_list)) {
elem = list_first_entry(&fs_info->tree_mod_seq_list,
struct seq_list, list);
if (seq >= elem->seq) {
pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n",
seq, elem->seq, delayed_refs);
return 1;
pr_debug("holding back delayed_ref %llu, lowest is "
"%llu (%p)\n", seq, elem->seq, delayed_refs);
ret = 1;
}
return 0;
}
spin_unlock(&fs_info->tree_mod_seq_lock);
return ret;
}
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
......@@ -525,8 +529,8 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
if (is_fstree(ref_root))
seq = inc_delayed_seq(delayed_refs);
if (need_ref_seq(for_cow, ref_root))
seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
ref->seq = seq;
full_ref = btrfs_delayed_node_to_tree_ref(ref);
......@@ -584,8 +588,8 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
if (is_fstree(ref_root))
seq = inc_delayed_seq(delayed_refs);
if (need_ref_seq(for_cow, ref_root))
seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
ref->seq = seq;
full_ref = btrfs_delayed_node_to_data_ref(ref);
......@@ -658,10 +662,12 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action,
for_cow);
if (!is_fstree(ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
if (!need_ref_seq(for_cow, ref_root) &&
waitqueue_active(&fs_info->tree_mod_seq_wait))
wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
return 0;
}
......@@ -707,10 +713,12 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action, for_cow);
if (!is_fstree(ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
if (!need_ref_seq(for_cow, ref_root) &&
waitqueue_active(&fs_info->tree_mod_seq_wait))
wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
return 0;
}
......@@ -736,8 +744,8 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data);
if (waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
if (waitqueue_active(&fs_info->tree_mod_seq_wait))
wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
......
......@@ -139,26 +139,6 @@ struct btrfs_delayed_ref_root {
int flushing;
u64 run_delayed_start;
/*
* seq number of delayed refs. We need to know if a backref was being
* added before the currently processed ref or afterwards.
*/
u64 seq;
/*
* seq_list holds a list of all seq numbers that are currently being
* added to the list. While walking backrefs (btrfs_find_all_roots,
* qgroups), which might take some time, no newer ref must be processed,
* as it might influence the outcome of the walk.
*/
struct list_head seq_head;
/*
* when the only refs we have in the list must not be processed, we want
* to wait for more refs to show up or for the end of backref walking.
*/
wait_queue_head_t seq_wait;
};
static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
......@@ -195,34 +175,28 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
struct list_head *cluster, u64 search_start);
static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs)
{
assert_spin_locked(&delayed_refs->lock);
++delayed_refs->seq;
return delayed_refs->seq;
}
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq);
static inline void
btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
struct seq_list *elem)
/*
* delayed refs with a ref_seq > 0 must be held back during backref walking.
* this only applies to items in one of the fs-trees. for_cow items never need
* to be held back, so they won't get a ref_seq number.
*/
static inline int need_ref_seq(int for_cow, u64 rootid)
{
assert_spin_locked(&delayed_refs->lock);
elem->seq = delayed_refs->seq;
list_add_tail(&elem->list, &delayed_refs->seq_head);
}
if (for_cow)
return 0;
static inline void
btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
struct seq_list *elem)
{
spin_lock(&delayed_refs->lock);
list_del(&elem->list);
wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
}
if (rootid == BTRFS_FS_TREE_OBJECTID)
return 1;
int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
u64 seq);
if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
return 1;
return 0;
}
/*
* a node might live in a head or a regular ref, this lets you
......
......@@ -1225,6 +1225,82 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
return root;
}
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
u64 objectid)
{
struct extent_buffer *leaf;
struct btrfs_root *tree_root = fs_info->tree_root;
struct btrfs_root *root;
struct btrfs_key key;
int ret = 0;
u64 bytenr;
root = btrfs_alloc_root(fs_info);
if (!root)
return ERR_PTR(-ENOMEM);
__setup_root(tree_root->nodesize, tree_root->leafsize,
tree_root->sectorsize, tree_root->stripesize,
root, fs_info, objectid);
root->root_key.objectid = objectid;
root->root_key.type = BTRFS_ROOT_ITEM_KEY;
root->root_key.offset = 0;
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
0, objectid, NULL, 0, 0, 0);
if (IS_ERR(leaf)) {
ret = PTR_ERR(leaf);
goto fail;
}
bytenr = leaf->start;
memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_bytenr(leaf, leaf->start);
btrfs_set_header_generation(leaf, trans->transid);
btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(leaf, objectid);
root->node = leaf;
write_extent_buffer(leaf, fs_info->fsid,
(unsigned long)btrfs_header_fsid(leaf),
BTRFS_FSID_SIZE);
write_extent_buffer(leaf, fs_info->chunk_tree_uuid,
(unsigned long)btrfs_header_chunk_tree_uuid(leaf),
BTRFS_UUID_SIZE);
btrfs_mark_buffer_dirty(leaf);
root->commit_root = btrfs_root_node(root);
root->track_dirty = 1;
root->root_item.flags = 0;
root->root_item.byte_limit = 0;
btrfs_set_root_bytenr(&root->root_item, leaf->start);
btrfs_set_root_generation(&root->root_item, trans->transid);
btrfs_set_root_level(&root->root_item, 0);
btrfs_set_root_refs(&root->root_item, 1);
btrfs_set_root_used(&root->root_item, leaf->len);
btrfs_set_root_last_snapshot(&root->root_item, 0);
btrfs_set_root_dirid(&root->root_item, 0);
root->root_item.drop_level = 0;
key.objectid = objectid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = 0;
ret = btrfs_insert_root(trans, tree_root, &key, &root->root_item);
if (ret)
goto fail;
btrfs_tree_unlock(leaf);
fail:
if (ret)
return ERR_PTR(ret);
return root;
}
static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
......@@ -1396,6 +1472,9 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
return fs_info->dev_root;
if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
return fs_info->csum_root;
if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID)
return fs_info->quota_root ? fs_info->quota_root :
ERR_PTR(-ENOENT);
again:
spin_lock(&fs_info->fs_roots_radix_lock);
root = radix_tree_lookup(&fs_info->fs_roots_radix,
......@@ -1823,6 +1902,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
free_extent_buffer(info->extent_root->commit_root);
free_extent_buffer(info->csum_root->node);
free_extent_buffer(info->csum_root->commit_root);
if (info->quota_root) {
free_extent_buffer(info->quota_root->node);
free_extent_buffer(info->quota_root->commit_root);
}
info->tree_root->node = NULL;
info->tree_root->commit_root = NULL;
......@@ -1832,6 +1915,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
info->extent_root->commit_root = NULL;
info->csum_root->node = NULL;
info->csum_root->commit_root = NULL;
if (info->quota_root) {
info->quota_root->node = NULL;
info->quota_root->commit_root = NULL;
}
if (chunk_root) {
free_extent_buffer(info->chunk_root->node);
......@@ -1862,6 +1949,7 @@ int open_ctree(struct super_block *sb,
struct btrfs_root *csum_root;
struct btrfs_root *chunk_root;
struct btrfs_root *dev_root;
struct btrfs_root *quota_root;
struct btrfs_root *log_tree_root;
int ret;
int err = -EINVAL;
......@@ -1873,9 +1961,10 @@ int open_ctree(struct super_block *sb,
csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info);
chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info);
quota_root = fs_info->quota_root = btrfs_alloc_root(fs_info);
if (!tree_root || !extent_root || !csum_root ||
!chunk_root || !dev_root) {
!chunk_root || !dev_root || !quota_root) {
err = -ENOMEM;
goto fail;
}
......@@ -1944,6 +2033,8 @@ int open_ctree(struct super_block *sb,
fs_info->free_chunk_space = 0;
fs_info->tree_mod_log = RB_ROOT;
init_waitqueue_head(&fs_info->tree_mod_seq_wait);
/* readahead state */
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
spin_lock_init(&fs_info->reada_lock);
......@@ -2032,6 +2123,13 @@ int open_ctree(struct super_block *sb,
init_rwsem(&fs_info->cleanup_work_sem);
init_rwsem(&fs_info->subvol_sem);
spin_lock_init(&fs_info->qgroup_lock);
fs_info->qgroup_tree = RB_ROOT;
INIT_LIST_HEAD(&fs_info->dirty_qgroups);
fs_info->qgroup_seq = 1;
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
......@@ -2356,6 +2454,17 @@ int open_ctree(struct super_block *sb,
goto recovery_tree_root;
csum_root->track_dirty = 1;
ret = find_and_setup_root(tree_root, fs_info,
BTRFS_QUOTA_TREE_OBJECTID, quota_root);
if (ret) {
kfree(quota_root);
quota_root = fs_info->quota_root = NULL;
} else {
quota_root->track_dirty = 1;
fs_info->quota_enabled = 1;
fs_info->pending_quota_state = 1;
}
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
......@@ -2415,6 +2524,9 @@ int open_ctree(struct super_block *sb,
" integrity check module %s\n", sb->s_id);
}
#endif
ret = btrfs_read_qgroup_config(fs_info);
if (ret)
goto fail_trans_kthread;
/* do not make disk changes in broken FS */
if (btrfs_super_log_root(disk_super) != 0 &&
......@@ -2425,7 +2537,7 @@ int open_ctree(struct super_block *sb,
printk(KERN_WARNING "Btrfs log replay required "
"on RO media\n");
err = -EIO;
goto fail_trans_kthread;
goto fail_qgroup;
}
blocksize =
btrfs_level_size(tree_root,
......@@ -2434,7 +2546,7 @@ int open_ctree(struct super_block *sb,
log_tree_root = btrfs_alloc_root(fs_info);
if (!log_tree_root) {
err = -ENOMEM;
goto fail_trans_kthread;
goto fail_qgroup;
}
__setup_root(nodesize, leafsize, sectorsize, stripesize,
......@@ -2474,7 +2586,7 @@ int open_ctree(struct super_block *sb,
printk(KERN_WARNING
"btrfs: failed to recover relocation\n");
err = -EINVAL;
goto fail_trans_kthread;
goto fail_qgroup;
}
}
......@@ -2484,10 +2596,10 @@ int open_ctree(struct super_block *sb,
fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
if (!fs_info->fs_root)
goto fail_trans_kthread;
goto fail_qgroup;
if (IS_ERR(fs_info->fs_root)) {
err = PTR_ERR(fs_info->fs_root);
goto fail_trans_kthread;
goto fail_qgroup;
}
if (sb->s_flags & MS_RDONLY)
......@@ -2511,6 +2623,8 @@ int open_ctree(struct super_block *sb,
return 0;
fail_qgroup:
btrfs_free_qgroup_config(fs_info);
fail_trans_kthread:
kthread_stop(fs_info->transaction_kthread);
fail_cleaner:
......@@ -3109,6 +3223,8 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 2;
smp_mb();
btrfs_free_qgroup_config(root->fs_info);
if (fs_info->delalloc_bytes) {
printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
(unsigned long long)fs_info->delalloc_bytes);
......@@ -3128,6 +3244,10 @@ int close_ctree(struct btrfs_root *root)
free_extent_buffer(fs_info->dev_root->commit_root);
free_extent_buffer(fs_info->csum_root->node);
free_extent_buffer(fs_info->csum_root->commit_root);
if (fs_info->quota_root) {
free_extent_buffer(fs_info->quota_root->node);
free_extent_buffer(fs_info->quota_root->commit_root);
}
btrfs_free_block_groups(fs_info);
......@@ -3258,7 +3378,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
return btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
}
static int btree_lock_page_hook(struct page *page, void *data,
int btree_lock_page_hook(struct page *page, void *data,
void (*flush_fn)(void *))
{
struct inode *inode = page->mapping->host;
......
......@@ -89,6 +89,12 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
int btrfs_cleanup_transaction(struct btrfs_root *root);
void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
struct btrfs_root *root);
void btrfs_abort_devices(struct btrfs_root *root);
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
u64 objectid);
int btree_lock_page_hook(struct page *page, void *data,
void (*flush_fn)(void *));
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_init_lockdep(void);
......
......@@ -34,6 +34,8 @@
#include "locking.h"
#include "free-space-cache.h"
#undef SCRAMBLE_DELAYED_REFS
/*
* control flags for do_chunk_alloc's force field
* CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
......@@ -2217,6 +2219,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_ref_head *locked_ref = NULL;
struct btrfs_delayed_extent_op *extent_op;
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
int count = 0;
int must_insert_reserved = 0;
......@@ -2255,7 +2258,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
ref = select_delayed_ref(locked_ref);
if (ref && ref->seq &&
btrfs_check_delayed_seq(delayed_refs, ref->seq)) {
btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
/*
* there are still refs with lower seq numbers in the
* process of being added. Don't run this ref yet.
......@@ -2337,7 +2340,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
}
next:
do_chunk_alloc(trans, root->fs_info->extent_root,
do_chunk_alloc(trans, fs_info->extent_root,
2 * 1024 * 1024,
btrfs_get_alloc_profile(root, 0),
CHUNK_ALLOC_NO_FORCE);
......@@ -2347,21 +2350,99 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
return count;
}
static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
static void wait_for_more_refs(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
unsigned long num_refs,
struct list_head *first_seq)
{
spin_unlock(&delayed_refs->lock);
pr_debug("waiting for more refs (num %ld, first %p)\n",
num_refs, first_seq);
wait_event(delayed_refs->seq_wait,
wait_event(fs_info->tree_mod_seq_wait,
num_refs != delayed_refs->num_entries ||
delayed_refs->seq_head.next != first_seq);
fs_info->tree_mod_seq_list.next != first_seq);
pr_debug("done waiting for more refs (num %ld, first %p)\n",
delayed_refs->num_entries, delayed_refs->seq_head.next);
delayed_refs->num_entries, fs_info->tree_mod_seq_list.next);
spin_lock(&delayed_refs->lock);
}
#ifdef SCRAMBLE_DELAYED_REFS
/*
* Normally delayed refs get processed in ascending bytenr order. This
* correlates in most cases to the order added. To expose dependencies on this
* order, we start to process the tree in the middle instead of the beginning
*/
static u64 find_middle(struct rb_root *root)
{
struct rb_node *n = root->rb_node;
struct btrfs_delayed_ref_node *entry;
int alt = 1;
u64 middle;
u64 first = 0, last = 0;
n = rb_first(root);
if (n) {
entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
first = entry->bytenr;
}
n = rb_last(root);
if (n) {
entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
last = entry->bytenr;
}
n = root->rb_node;
while (n) {
entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
WARN_ON(!entry->in_tree);
middle = entry->bytenr;
if (alt)
n = n->rb_left;
else
n = n->rb_right;
alt = 1 - alt;
}
return middle;
}
#endif
int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct qgroup_update *qgroup_update;
int ret = 0;
if (list_empty(&trans->qgroup_ref_list) !=
!trans->delayed_ref_elem.seq) {
/* list without seq or seq without list */
printk(KERN_ERR "btrfs: qgroup accounting update error, list is%s empty, seq is %llu\n",
list_empty(&trans->qgroup_ref_list) ? "" : " not",
trans->delayed_ref_elem.seq);
BUG();
}
if (!trans->delayed_ref_elem.seq)
return 0;
while (!list_empty(&trans->qgroup_ref_list)) {
qgroup_update = list_first_entry(&trans->qgroup_ref_list,
struct qgroup_update, list);
list_del(&qgroup_update->list);
if (!ret)
ret = btrfs_qgroup_account_ref(
trans, fs_info, qgroup_update->node,
qgroup_update->extent_op);
kfree(qgroup_update);
}
btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
return ret;
}
/*
* this starts processing the delayed reference count updates and
* extent insertions we have queued up so far. count can be
......@@ -2398,11 +2479,18 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0),
CHUNK_ALLOC_NO_FORCE);
btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
delayed_refs = &trans->transaction->delayed_refs;
INIT_LIST_HEAD(&cluster);
again:
consider_waiting = 0;
spin_lock(&delayed_refs->lock);
#ifdef SCRAMBLE_DELAYED_REFS
delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
#endif
if (count == 0) {
count = delayed_refs->num_entries * 2;
run_most = 1;
......@@ -2437,7 +2525,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
num_refs = delayed_refs->num_entries;
first_seq = root->fs_info->tree_mod_seq_list.next;
} else {
wait_for_more_refs(delayed_refs,
wait_for_more_refs(root->fs_info, delayed_refs,
num_refs, first_seq);
/*
* after waiting, things have changed. we
......@@ -2502,6 +2590,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
}
out:
spin_unlock(&delayed_refs->lock);
assert_qgroups_uptodate(trans);
return 0;
}
......@@ -4479,6 +4568,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
csum_bytes = BTRFS_I(inode)->csum_bytes;
spin_unlock(&BTRFS_I(inode)->lock);
if (root->fs_info->quota_enabled) {
ret = btrfs_qgroup_reserve(root, num_bytes +
nr_extents * root->leafsize);
if (ret)
return ret;
}
ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
if (ret) {
u64 to_free = 0;
......@@ -4557,6 +4653,11 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
trace_btrfs_space_reservation(root->fs_info, "delalloc",
btrfs_ino(inode), to_free, 0);
if (root->fs_info->quota_enabled) {
btrfs_qgroup_free(root, num_bytes +
dropped * root->leafsize);
}
btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
to_free);
}
......@@ -5193,8 +5294,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
rb_erase(&head->node.rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
if (waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
if (waitqueue_active(&root->fs_info->tree_mod_seq_wait))
wake_up(&root->fs_info->tree_mod_seq_wait);
/*
* we don't take a ref on the node because we're removing it from the
......
......@@ -336,7 +336,8 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
static noinline int create_subvol(struct btrfs_root *root,
struct dentry *dentry,
char *name, int namelen,
u64 *async_transid)
u64 *async_transid,
struct btrfs_qgroup_inherit **inherit)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key;
......@@ -368,6 +369,11 @@ static noinline int create_subvol(struct btrfs_root *root,
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid,
inherit ? *inherit : NULL);
if (ret)
goto fail;
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
0, objectid, NULL, 0, 0, 0);
if (IS_ERR(leaf)) {
......@@ -484,7 +490,7 @@ static noinline int create_subvol(struct btrfs_root *root,
static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
char *name, int namelen, u64 *async_transid,
bool readonly)
bool readonly, struct btrfs_qgroup_inherit **inherit)
{
struct inode *inode;
struct btrfs_pending_snapshot *pending_snapshot;
......@@ -502,6 +508,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
pending_snapshot->readonly = readonly;
if (inherit) {
pending_snapshot->inherit = *inherit;
*inherit = NULL; /* take responsibility to free it */
}
trans = btrfs_start_transaction(root->fs_info->extent_root, 5);
if (IS_ERR(trans)) {
......@@ -635,7 +645,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
static noinline int btrfs_mksubvol(struct path *parent,
char *name, int namelen,
struct btrfs_root *snap_src,
u64 *async_transid, bool readonly)
u64 *async_transid, bool readonly,
struct btrfs_qgroup_inherit **inherit)
{
struct inode *dir = parent->dentry->d_inode;
struct dentry *dentry;
......@@ -662,11 +673,11 @@ static noinline int btrfs_mksubvol(struct path *parent,
goto out_up_read;
if (snap_src) {
error = create_snapshot(snap_src, dentry,
name, namelen, async_transid, readonly);
error = create_snapshot(snap_src, dentry, name, namelen,
async_transid, readonly, inherit);
} else {
error = create_subvol(BTRFS_I(dir)->root, dentry,
name, namelen, async_transid);
name, namelen, async_transid, inherit);
}
if (!error)
fsnotify_mkdir(dir, dentry);
......@@ -1375,11 +1386,9 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
}
static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
char *name,
unsigned long fd,
int subvol,
u64 *transid,
bool readonly)
char *name, unsigned long fd, int subvol,
u64 *transid, bool readonly,
struct btrfs_qgroup_inherit **inherit)
{
struct file *src_file;
int namelen;
......@@ -1403,7 +1412,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
if (subvol) {
ret = btrfs_mksubvol(&file->f_path, name, namelen,
NULL, transid, readonly);
NULL, transid, readonly, inherit);
} else {
struct inode *src_inode;
src_file = fget(fd);
......@@ -1422,7 +1431,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
}
ret = btrfs_mksubvol(&file->f_path, name, namelen,
BTRFS_I(src_inode)->root,
transid, readonly);
transid, readonly, inherit);
fput(src_file);
}
out_drop_write:
......@@ -1444,7 +1453,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
vol_args->fd, subvol,
NULL, false);
NULL, false, NULL);
kfree(vol_args);
return ret;
......@@ -1458,6 +1467,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
u64 transid = 0;
u64 *ptr = NULL;
bool readonly = false;
struct btrfs_qgroup_inherit *inherit = NULL;
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
......@@ -1465,7 +1475,8 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
if (vol_args->flags &
~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) {
~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY |
BTRFS_SUBVOL_QGROUP_INHERIT)) {
ret = -EOPNOTSUPP;
goto out;
}
......@@ -1474,10 +1485,21 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
ptr = &transid;
if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
readonly = true;
if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
if (vol_args->size > PAGE_CACHE_SIZE) {
ret = -EINVAL;
goto out;
}
inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size);
if (IS_ERR(inherit)) {
ret = PTR_ERR(inherit);
goto out;
}
}
ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
vol_args->fd, subvol,
ptr, readonly);
vol_args->fd, subvol, ptr,
readonly, &inherit);
if (ret == 0 && ptr &&
copy_to_user(arg +
......@@ -1486,6 +1508,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
ret = -EFAULT;
out:
kfree(vol_args);
kfree(inherit);
return ret;
}
......@@ -3401,6 +3424,183 @@ static long btrfs_ioctl_balance_progress(struct btrfs_root *root,
return ret;
}
static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg)
{
struct btrfs_ioctl_quota_ctl_args *sa;
struct btrfs_trans_handle *trans = NULL;
int ret;
int err;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa))
return PTR_ERR(sa);
if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) {
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
}
switch (sa->cmd) {
case BTRFS_QUOTA_CTL_ENABLE:
ret = btrfs_quota_enable(trans, root->fs_info);
break;
case BTRFS_QUOTA_CTL_DISABLE:
ret = btrfs_quota_disable(trans, root->fs_info);
break;
case BTRFS_QUOTA_CTL_RESCAN:
ret = btrfs_quota_rescan(root->fs_info);
break;
default:
ret = -EINVAL;
break;
}
if (copy_to_user(arg, sa, sizeof(*sa)))
ret = -EFAULT;
if (trans) {
err = btrfs_commit_transaction(trans, root);
if (err && !ret)
ret = err;
}
out:
kfree(sa);
return ret;
}
static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg)
{
struct btrfs_ioctl_qgroup_assign_args *sa;
struct btrfs_trans_handle *trans;
int ret;
int err;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa))
return PTR_ERR(sa);
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
/* FIXME: check if the IDs really exist */
if (sa->assign) {
ret = btrfs_add_qgroup_relation(trans, root->fs_info,
sa->src, sa->dst);
} else {
ret = btrfs_del_qgroup_relation(trans, root->fs_info,
sa->src, sa->dst);
}
err = btrfs_end_transaction(trans, root);
if (err && !ret)
ret = err;
out:
kfree(sa);
return ret;
}
static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg)
{
struct btrfs_ioctl_qgroup_create_args *sa;
struct btrfs_trans_handle *trans;
int ret;
int err;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa))
return PTR_ERR(sa);
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
/* FIXME: check if the IDs really exist */
if (sa->create) {
ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid,
NULL);
} else {
ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid);
}
err = btrfs_end_transaction(trans, root);
if (err && !ret)
ret = err;
out:
kfree(sa);
return ret;
}
static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg)
{
struct btrfs_ioctl_qgroup_limit_args *sa;
struct btrfs_trans_handle *trans;
int ret;
int err;
u64 qgroupid;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa))
return PTR_ERR(sa);
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
qgroupid = sa->qgroupid;
if (!qgroupid) {
/* take the current subvol as qgroup */
qgroupid = root->root_key.objectid;
}
/* FIXME: check if the IDs really exist */
ret = btrfs_limit_qgroup(trans, root->fs_info, qgroupid, &sa->lim);
err = btrfs_end_transaction(trans, root);
if (err && !ret)
ret = err;
out:
kfree(sa);
return ret;
}
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
......@@ -3422,6 +3622,8 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_snap_create_v2(file, argp, 0);
case BTRFS_IOC_SUBVOL_CREATE:
return btrfs_ioctl_snap_create(file, argp, 1);
case BTRFS_IOC_SUBVOL_CREATE_V2:
return btrfs_ioctl_snap_create_v2(file, argp, 1);
case BTRFS_IOC_SNAP_DESTROY:
return btrfs_ioctl_snap_destroy(file, argp);
case BTRFS_IOC_SUBVOL_GETFLAGS:
......@@ -3485,6 +3687,14 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_balance_progress(root, argp);
case BTRFS_IOC_GET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(root, argp);
case BTRFS_IOC_QUOTA_CTL:
return btrfs_ioctl_quota_ctl(root, argp);
case BTRFS_IOC_QGROUP_ASSIGN:
return btrfs_ioctl_qgroup_assign(root, argp);
case BTRFS_IOC_QGROUP_CREATE:
return btrfs_ioctl_qgroup_create(root, argp);
case BTRFS_IOC_QGROUP_LIMIT:
return btrfs_ioctl_qgroup_limit(root, argp);
}
return -ENOTTY;
......
......@@ -32,15 +32,46 @@ struct btrfs_ioctl_vol_args {
#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2)
#define BTRFS_FSID_SIZE 16
#define BTRFS_UUID_SIZE 16
#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0)
struct btrfs_qgroup_limit {
__u64 flags;
__u64 max_rfer;
__u64 max_excl;
__u64 rsv_rfer;
__u64 rsv_excl;
};
struct btrfs_qgroup_inherit {
__u64 flags;
__u64 num_qgroups;
__u64 num_ref_copies;
__u64 num_excl_copies;
struct btrfs_qgroup_limit lim;
__u64 qgroups[0];
};
struct btrfs_ioctl_qgroup_limit_args {
__u64 qgroupid;
struct btrfs_qgroup_limit lim;
};
#define BTRFS_SUBVOL_NAME_MAX 4039
struct btrfs_ioctl_vol_args_v2 {
__s64 fd;
__u64 transid;
__u64 flags;
union {
struct {
__u64 size;
struct btrfs_qgroup_inherit __user *qgroup_inherit;
};
__u64 unused[4];
};
char name[BTRFS_SUBVOL_NAME_MAX + 1];
};
......@@ -299,6 +330,25 @@ struct btrfs_ioctl_get_dev_stats {
__u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */
};
#define BTRFS_QUOTA_CTL_ENABLE 1
#define BTRFS_QUOTA_CTL_DISABLE 2
#define BTRFS_QUOTA_CTL_RESCAN 3
struct btrfs_ioctl_quota_ctl_args {
__u64 cmd;
__u64 status;
};
struct btrfs_ioctl_qgroup_assign_args {
__u64 assign;
__u64 src;
__u64 dst;
};
struct btrfs_ioctl_qgroup_create_args {
__u64 create;
__u64 qgroupid;
};
#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
......@@ -343,6 +393,8 @@ struct btrfs_ioctl_get_dev_stats {
#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
struct btrfs_ioctl_vol_args_v2)
#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \
struct btrfs_ioctl_vol_args_v2)
#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
......@@ -365,6 +417,14 @@ struct btrfs_ioctl_get_dev_stats {
struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \
struct btrfs_ioctl_quota_ctl_args)
#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \
struct btrfs_ioctl_qgroup_assign_args)
#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \
struct btrfs_ioctl_qgroup_create_args)
#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
struct btrfs_ioctl_qgroup_limit_args)
#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
struct btrfs_ioctl_get_dev_stats)
#endif
/*
* Copyright (C) 2011 STRATO. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/sched.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include "ctree.h"
#include "transaction.h"
#include "disk-io.h"
#include "locking.h"
#include "ulist.h"
#include "ioctl.h"
#include "backref.h"
/* TODO XXX FIXME
* - subvol delete -> delete when ref goes to 0? delete limits also?
* - reorganize keys
* - compressed
* - sync
* - rescan
* - copy also limits on subvol creation
* - limit
* - caches fuer ulists
* - performance benchmarks
* - check all ioctl parameters
*/
/*
* one struct for each qgroup, organized in fs_info->qgroup_tree.
*/
struct btrfs_qgroup {
u64 qgroupid;
/*
* state
*/
u64 rfer; /* referenced */
u64 rfer_cmpr; /* referenced compressed */
u64 excl; /* exclusive */
u64 excl_cmpr; /* exclusive compressed */
/*
* limits
*/
u64 lim_flags; /* which limits are set */
u64 max_rfer;
u64 max_excl;
u64 rsv_rfer;
u64 rsv_excl;
/*
* reservation tracking
*/
u64 reserved;
/*
* lists
*/
struct list_head groups; /* groups this group is member of */
struct list_head members; /* groups that are members of this group */
struct list_head dirty; /* dirty groups */
struct rb_node node; /* tree of qgroups */
/*
* temp variables for accounting operations
*/
u64 tag;
u64 refcnt;
};
/*
* glue structure to represent the relations between qgroups.
*/
struct btrfs_qgroup_list {
struct list_head next_group;
struct list_head next_member;
struct btrfs_qgroup *group;
struct btrfs_qgroup *member;
};
/* must be called with qgroup_lock held */
static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
u64 qgroupid)
{
struct rb_node *n = fs_info->qgroup_tree.rb_node;
struct btrfs_qgroup *qgroup;
while (n) {
qgroup = rb_entry(n, struct btrfs_qgroup, node);
if (qgroup->qgroupid < qgroupid)
n = n->rb_left;
else if (qgroup->qgroupid > qgroupid)
n = n->rb_right;
else
return qgroup;
}
return NULL;
}
/* must be called with qgroup_lock held */
static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
u64 qgroupid)
{
struct rb_node **p = &fs_info->qgroup_tree.rb_node;
struct rb_node *parent = NULL;
struct btrfs_qgroup *qgroup;
while (*p) {
parent = *p;
qgroup = rb_entry(parent, struct btrfs_qgroup, node);
if (qgroup->qgroupid < qgroupid)
p = &(*p)->rb_left;
else if (qgroup->qgroupid > qgroupid)
p = &(*p)->rb_right;
else
return qgroup;
}
qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
if (!qgroup)
return ERR_PTR(-ENOMEM);
qgroup->qgroupid = qgroupid;
INIT_LIST_HEAD(&qgroup->groups);
INIT_LIST_HEAD(&qgroup->members);
INIT_LIST_HEAD(&qgroup->dirty);
rb_link_node(&qgroup->node, parent, p);
rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
return qgroup;
}
/* must be called with qgroup_lock held */
static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
{
struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
struct btrfs_qgroup_list *list;
if (!qgroup)
return -ENOENT;
rb_erase(&qgroup->node, &fs_info->qgroup_tree);
list_del(&qgroup->dirty);
while (!list_empty(&qgroup->groups)) {
list = list_first_entry(&qgroup->groups,
struct btrfs_qgroup_list, next_group);
list_del(&list->next_group);
list_del(&list->next_member);
kfree(list);
}
while (!list_empty(&qgroup->members)) {
list = list_first_entry(&qgroup->members,
struct btrfs_qgroup_list, next_member);
list_del(&list->next_group);
list_del(&list->next_member);
kfree(list);
}
kfree(qgroup);
return 0;
}
/* must be called with qgroup_lock held */
static int add_relation_rb(struct btrfs_fs_info *fs_info,
u64 memberid, u64 parentid)
{
struct btrfs_qgroup *member;
struct btrfs_qgroup *parent;
struct btrfs_qgroup_list *list;
member = find_qgroup_rb(fs_info, memberid);
parent = find_qgroup_rb(fs_info, parentid);
if (!member || !parent)
return -ENOENT;
list = kzalloc(sizeof(*list), GFP_ATOMIC);
if (!list)
return -ENOMEM;
list->group = parent;
list->member = member;
list_add_tail(&list->next_group, &member->groups);
list_add_tail(&list->next_member, &parent->members);
return 0;
}
/* must be called with qgroup_lock held */
static int del_relation_rb(struct btrfs_fs_info *fs_info,
u64 memberid, u64 parentid)
{
struct btrfs_qgroup *member;
struct btrfs_qgroup *parent;
struct btrfs_qgroup_list *list;
member = find_qgroup_rb(fs_info, memberid);
parent = find_qgroup_rb(fs_info, parentid);
if (!member || !parent)
return -ENOENT;
list_for_each_entry(list, &member->groups, next_group) {
if (list->group == parent) {
list_del(&list->next_group);
list_del(&list->next_member);
kfree(list);
return 0;
}
}
return -ENOENT;
}
/*
* The full config is read in one go, only called from open_ctree()
* It doesn't use any locking, as at this point we're still single-threaded
*/
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
{
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_root *quota_root = fs_info->quota_root;
struct btrfs_path *path = NULL;
struct extent_buffer *l;
int slot;
int ret = 0;
u64 flags = 0;
if (!fs_info->quota_enabled)
return 0;
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
/* default this to quota off, in case no status key is found */
fs_info->qgroup_flags = 0;
/*
* pass 1: read status, all qgroup infos and limits
*/
key.objectid = 0;
key.type = 0;
key.offset = 0;
ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
if (ret)
goto out;
while (1) {
struct btrfs_qgroup *qgroup;
slot = path->slots[0];
l = path->nodes[0];
btrfs_item_key_to_cpu(l, &found_key, slot);
if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
struct btrfs_qgroup_status_item *ptr;
ptr = btrfs_item_ptr(l, slot,
struct btrfs_qgroup_status_item);
if (btrfs_qgroup_status_version(l, ptr) !=
BTRFS_QGROUP_STATUS_VERSION) {
printk(KERN_ERR
"btrfs: old qgroup version, quota disabled\n");
goto out;
}
if (btrfs_qgroup_status_generation(l, ptr) !=
fs_info->generation) {
flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
printk(KERN_ERR
"btrfs: qgroup generation mismatch, "
"marked as inconsistent\n");
}
fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
ptr);
/* FIXME read scan element */
goto next1;
}
if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
found_key.type != BTRFS_QGROUP_LIMIT_KEY)
goto next1;
qgroup = find_qgroup_rb(fs_info, found_key.offset);
if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
(!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
printk(KERN_ERR "btrfs: inconsitent qgroup config\n");
flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
}
if (!qgroup) {
qgroup = add_qgroup_rb(fs_info, found_key.offset);
if (IS_ERR(qgroup)) {
ret = PTR_ERR(qgroup);
goto out;
}
}
switch (found_key.type) {
case BTRFS_QGROUP_INFO_KEY: {
struct btrfs_qgroup_info_item *ptr;
ptr = btrfs_item_ptr(l, slot,
struct btrfs_qgroup_info_item);
qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
/* generation currently unused */
break;
}
case BTRFS_QGROUP_LIMIT_KEY: {
struct btrfs_qgroup_limit_item *ptr;
ptr = btrfs_item_ptr(l, slot,
struct btrfs_qgroup_limit_item);
qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
break;
}
}
next1:
ret = btrfs_next_item(quota_root, path);
if (ret < 0)
goto out;
if (ret)
break;
}
btrfs_release_path(path);
/*
* pass 2: read all qgroup relations
*/
key.objectid = 0;
key.type = BTRFS_QGROUP_RELATION_KEY;
key.offset = 0;
ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
if (ret)
goto out;
while (1) {
slot = path->slots[0];
l = path->nodes[0];
btrfs_item_key_to_cpu(l, &found_key, slot);
if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
goto next2;
if (found_key.objectid > found_key.offset) {
/* parent <- member, not needed to build config */
/* FIXME should we omit the key completely? */
goto next2;
}
ret = add_relation_rb(fs_info, found_key.objectid,
found_key.offset);
if (ret)
goto out;
next2:
ret = btrfs_next_item(quota_root, path);
if (ret < 0)
goto out;
if (ret)
break;
}
out:
fs_info->qgroup_flags |= flags;
if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
}
btrfs_free_path(path);
return ret < 0 ? ret : 0;
}
/*
* This is only called from close_ctree() or open_ctree(), both in single-
* treaded paths. Clean up the in-memory structures. No locking needed.
*/
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
{
struct rb_node *n;
struct btrfs_qgroup *qgroup;
struct btrfs_qgroup_list *list;
while ((n = rb_first(&fs_info->qgroup_tree))) {
qgroup = rb_entry(n, struct btrfs_qgroup, node);
rb_erase(n, &fs_info->qgroup_tree);
WARN_ON(!list_empty(&qgroup->dirty));
while (!list_empty(&qgroup->groups)) {
list = list_first_entry(&qgroup->groups,
struct btrfs_qgroup_list,
next_group);
list_del(&list->next_group);
list_del(&list->next_member);
kfree(list);
}
while (!list_empty(&qgroup->members)) {
list = list_first_entry(&qgroup->members,
struct btrfs_qgroup_list,
next_member);
list_del(&list->next_group);
list_del(&list->next_member);
kfree(list);
}
kfree(qgroup);
}
}
static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
struct btrfs_root *quota_root,
u64 src, u64 dst)
{
int ret;
struct btrfs_path *path;
struct btrfs_key key;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = src;
key.type = BTRFS_QGROUP_RELATION_KEY;
key.offset = dst;
ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
return ret;
}
static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
struct btrfs_root *quota_root,
u64 src, u64 dst)
{
int ret;
struct btrfs_path *path;
struct btrfs_key key;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = src;
key.type = BTRFS_QGROUP_RELATION_KEY;
key.offset = dst;
ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
if (ret < 0)
goto out;
if (ret > 0) {
ret = -ENOENT;
goto out;
}
ret = btrfs_del_item(trans, quota_root, path);
out:
btrfs_free_path(path);
return ret;
}
static int add_qgroup_item(struct btrfs_trans_handle *trans,
struct btrfs_root *quota_root, u64 qgroupid)
{
int ret;
struct btrfs_path *path;
struct btrfs_qgroup_info_item *qgroup_info;
struct btrfs_qgroup_limit_item *qgroup_limit;
struct extent_buffer *leaf;
struct btrfs_key key;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = 0;
key.type = BTRFS_QGROUP_INFO_KEY;
key.offset = qgroupid;
ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
sizeof(*qgroup_info));
if (ret)
goto out;
leaf = path->nodes[0];
qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_qgroup_info_item);
btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
key.type = BTRFS_QGROUP_LIMIT_KEY;
ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
sizeof(*qgroup_limit));
if (ret)
goto out;
leaf = path->nodes[0];
qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_qgroup_limit_item);
btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
btrfs_mark_buffer_dirty(leaf);
ret = 0;
out:
btrfs_free_path(path);
return ret;
}
static int del_qgroup_item(struct btrfs_trans_handle *trans,
struct btrfs_root *quota_root, u64 qgroupid)
{
int ret;
struct btrfs_path *path;
struct btrfs_key key;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = 0;
key.type = BTRFS_QGROUP_INFO_KEY;
key.offset = qgroupid;
ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
if (ret < 0)
goto out;
if (ret > 0) {
ret = -ENOENT;
goto out;
}
ret = btrfs_del_item(trans, quota_root, path);
if (ret)
goto out;
btrfs_release_path(path);
key.type = BTRFS_QGROUP_LIMIT_KEY;
ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
if (ret < 0)
goto out;
if (ret > 0) {
ret = -ENOENT;
goto out;
}
ret = btrfs_del_item(trans, quota_root, path);
out:
btrfs_free_path(path);
return ret;
}
static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 qgroupid,
u64 flags, u64 max_rfer, u64 max_excl,
u64 rsv_rfer, u64 rsv_excl)
{
struct btrfs_path *path;
struct btrfs_key key;
struct extent_buffer *l;
struct btrfs_qgroup_limit_item *qgroup_limit;
int ret;
int slot;
key.objectid = 0;
key.type = BTRFS_QGROUP_LIMIT_KEY;
key.offset = qgroupid;
path = btrfs_alloc_path();
BUG_ON(!path);
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
if (ret)
goto out;
l = path->nodes[0];
slot = path->slots[0];
qgroup_limit = btrfs_item_ptr(l, path->slots[0],
struct btrfs_qgroup_limit_item);
btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
btrfs_mark_buffer_dirty(l);
out:
btrfs_free_path(path);
return ret;
}
static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_qgroup *qgroup)
{
struct btrfs_path *path;
struct btrfs_key key;
struct extent_buffer *l;
struct btrfs_qgroup_info_item *qgroup_info;
int ret;
int slot;
key.objectid = 0;
key.type = BTRFS_QGROUP_INFO_KEY;
key.offset = qgroup->qgroupid;
path = btrfs_alloc_path();
BUG_ON(!path);
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
if (ret)
goto out;
l = path->nodes[0];
slot = path->slots[0];
qgroup_info = btrfs_item_ptr(l, path->slots[0],
struct btrfs_qgroup_info_item);
btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
btrfs_mark_buffer_dirty(l);
out:
btrfs_free_path(path);
return ret;
}
static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_root *root)
{
struct btrfs_path *path;
struct btrfs_key key;
struct extent_buffer *l;
struct btrfs_qgroup_status_item *ptr;
int ret;
int slot;
key.objectid = 0;
key.type = BTRFS_QGROUP_STATUS_KEY;
key.offset = 0;
path = btrfs_alloc_path();
BUG_ON(!path);
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
if (ret)
goto out;
l = path->nodes[0];
slot = path->slots[0];
ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
/* XXX scan */
btrfs_mark_buffer_dirty(l);
out:
btrfs_free_path(path);
return ret;
}
/*
* called with qgroup_lock held
*/
static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_path *path;
struct btrfs_key key;
int ret;
if (!root)
return -EINVAL;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
while (1) {
key.objectid = 0;
key.offset = 0;
key.type = 0;
path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0) {
if (path->slots[0] == 0)
break;
path->slots[0]--;
} else if (ret < 0) {
break;
}
ret = btrfs_del_item(trans, root, path);
if (ret)
goto out;
btrfs_release_path(path);
}
ret = 0;
out:
root->fs_info->pending_quota_state = 0;
btrfs_free_path(path);
return ret;
}
int btrfs_quota_enable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct btrfs_root *quota_root;
struct btrfs_path *path = NULL;
struct btrfs_qgroup_status_item *ptr;
struct extent_buffer *leaf;
struct btrfs_key key;
int ret = 0;
spin_lock(&fs_info->qgroup_lock);
if (fs_info->quota_root) {
fs_info->pending_quota_state = 1;
spin_unlock(&fs_info->qgroup_lock);
goto out;
}
spin_unlock(&fs_info->qgroup_lock);
/*
* initially create the quota tree
*/
quota_root = btrfs_create_tree(trans, fs_info,
BTRFS_QUOTA_TREE_OBJECTID);
if (IS_ERR(quota_root)) {
ret = PTR_ERR(quota_root);
goto out;
}
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = 0;
key.type = BTRFS_QGROUP_STATUS_KEY;
key.offset = 0;
ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
sizeof(*ptr));
if (ret)
goto out;
leaf = path->nodes[0];
ptr = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_qgroup_status_item);
btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
btrfs_set_qgroup_status_scan(leaf, ptr, 0);
btrfs_mark_buffer_dirty(leaf);
spin_lock(&fs_info->qgroup_lock);
fs_info->quota_root = quota_root;
fs_info->pending_quota_state = 1;
spin_unlock(&fs_info->qgroup_lock);
out:
btrfs_free_path(path);
return ret;
}
int btrfs_quota_disable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct btrfs_root *tree_root = fs_info->tree_root;
struct btrfs_root *quota_root;
int ret = 0;
spin_lock(&fs_info->qgroup_lock);
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
quota_root = fs_info->quota_root;
fs_info->quota_root = NULL;
btrfs_free_qgroup_config(fs_info);
spin_unlock(&fs_info->qgroup_lock);
if (!quota_root)
return -EINVAL;
ret = btrfs_clean_quota_tree(trans, quota_root);
if (ret)
goto out;
ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
if (ret)
goto out;
list_del(&quota_root->dirty_list);
btrfs_tree_lock(quota_root->node);
clean_tree_block(trans, tree_root, quota_root->node);
btrfs_tree_unlock(quota_root->node);
btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
free_extent_buffer(quota_root->node);
free_extent_buffer(quota_root->commit_root);
kfree(quota_root);
out:
return ret;
}
int btrfs_quota_rescan(struct btrfs_fs_info *fs_info)
{
/* FIXME */
return 0;
}
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst)
{
struct btrfs_root *quota_root;
int ret = 0;
quota_root = fs_info->quota_root;
if (!quota_root)
return -EINVAL;
ret = add_qgroup_relation_item(trans, quota_root, src, dst);
if (ret)
return ret;
ret = add_qgroup_relation_item(trans, quota_root, dst, src);
if (ret) {
del_qgroup_relation_item(trans, quota_root, src, dst);
return ret;
}
spin_lock(&fs_info->qgroup_lock);
ret = add_relation_rb(quota_root->fs_info, src, dst);
spin_unlock(&fs_info->qgroup_lock);
return ret;
}
int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst)
{
struct btrfs_root *quota_root;
int ret = 0;
int err;
quota_root = fs_info->quota_root;
if (!quota_root)
return -EINVAL;
ret = del_qgroup_relation_item(trans, quota_root, src, dst);
err = del_qgroup_relation_item(trans, quota_root, dst, src);
if (err && !ret)
ret = err;
spin_lock(&fs_info->qgroup_lock);
del_relation_rb(fs_info, src, dst);
spin_unlock(&fs_info->qgroup_lock);
return ret;
}
int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
int ret = 0;
quota_root = fs_info->quota_root;
if (!quota_root)
return -EINVAL;
ret = add_qgroup_item(trans, quota_root, qgroupid);
spin_lock(&fs_info->qgroup_lock);
qgroup = add_qgroup_rb(fs_info, qgroupid);
spin_unlock(&fs_info->qgroup_lock);
if (IS_ERR(qgroup))
ret = PTR_ERR(qgroup);
return ret;
}
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid)
{
struct btrfs_root *quota_root;
int ret = 0;
quota_root = fs_info->quota_root;
if (!quota_root)
return -EINVAL;
ret = del_qgroup_item(trans, quota_root, qgroupid);
spin_lock(&fs_info->qgroup_lock);
del_qgroup_rb(quota_root->fs_info, qgroupid);
spin_unlock(&fs_info->qgroup_lock);
return ret;
}
int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid,
struct btrfs_qgroup_limit *limit)
{
struct btrfs_root *quota_root = fs_info->quota_root;
struct btrfs_qgroup *qgroup;
int ret = 0;
if (!quota_root)
return -EINVAL;
ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
limit->flags, limit->max_rfer,
limit->max_excl, limit->rsv_rfer,
limit->rsv_excl);
if (ret) {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
printk(KERN_INFO "unable to update quota limit for %llu\n",
(unsigned long long)qgroupid);
}
spin_lock(&fs_info->qgroup_lock);
qgroup = find_qgroup_rb(fs_info, qgroupid);
if (!qgroup) {
ret = -ENOENT;
goto unlock;
}
qgroup->lim_flags = limit->flags;
qgroup->max_rfer = limit->max_rfer;
qgroup->max_excl = limit->max_excl;
qgroup->rsv_rfer = limit->rsv_rfer;
qgroup->rsv_excl = limit->rsv_excl;
unlock:
spin_unlock(&fs_info->qgroup_lock);
return ret;
}
static void qgroup_dirty(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *qgroup)
{
if (list_empty(&qgroup->dirty))
list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
}
/*
* btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
* the modification into a list that's later used by btrfs_end_transaction to
* pass the recorded modifications on to btrfs_qgroup_account_ref.
*/
int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op)
{
struct qgroup_update *u;
BUG_ON(!trans->delayed_ref_elem.seq);
u = kmalloc(sizeof(*u), GFP_NOFS);
if (!u)
return -ENOMEM;
u->node = node;
u->extent_op = extent_op;
list_add_tail(&u->list, &trans->qgroup_ref_list);
return 0;
}
/*
* btrfs_qgroup_account_ref is called for every ref that is added to or deleted
* from the fs. First, all roots referencing the extent are searched, and
* then the space is accounted accordingly to the different roots. The
* accounting algorithm works in 3 steps documented inline.
*/
int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op)
{
struct btrfs_key ins;
struct btrfs_root *quota_root;
u64 ref_root;
struct btrfs_qgroup *qgroup;
struct ulist_node *unode;
struct ulist *roots = NULL;
struct ulist *tmp = NULL;
struct ulist_iterator uiter;
u64 seq;
int ret = 0;
int sgn;
if (!fs_info->quota_enabled)
return 0;
BUG_ON(!fs_info->quota_root);
ins.objectid = node->bytenr;
ins.offset = node->num_bytes;
ins.type = BTRFS_EXTENT_ITEM_KEY;
if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
struct btrfs_delayed_tree_ref *ref;
ref = btrfs_delayed_node_to_tree_ref(node);
ref_root = ref->root;
} else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
node->type == BTRFS_SHARED_DATA_REF_KEY) {
struct btrfs_delayed_data_ref *ref;
ref = btrfs_delayed_node_to_data_ref(node);
ref_root = ref->root;
} else {
BUG();
}
if (!is_fstree(ref_root)) {
/*
* non-fs-trees are not being accounted
*/
return 0;
}
switch (node->action) {
case BTRFS_ADD_DELAYED_REF:
case BTRFS_ADD_DELAYED_EXTENT:
sgn = 1;
break;
case BTRFS_DROP_DELAYED_REF:
sgn = -1;
break;
case BTRFS_UPDATE_DELAYED_HEAD:
return 0;
default:
BUG();
}
/*
* the delayed ref sequence number we pass depends on the direction of
* the operation. for add operations, we pass (node->seq - 1) to skip
* the delayed ref's current sequence number, because we need the state
* of the tree before the add operation. for delete operations, we pass
* (node->seq) to include the delayed ref's current sequence number,
* because we need the state of the tree after the delete operation.
*/
ret = btrfs_find_all_roots(trans, fs_info, node->bytenr,
sgn > 0 ? node->seq - 1 : node->seq, &roots);
if (ret < 0)
goto out;
spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
if (!quota_root)
goto unlock;
qgroup = find_qgroup_rb(fs_info, ref_root);
if (!qgroup)
goto unlock;
/*
* step 1: for each old ref, visit all nodes once and inc refcnt
*/
tmp = ulist_alloc(GFP_ATOMIC);
if (!tmp) {
ret = -ENOMEM;
goto unlock;
}
seq = fs_info->qgroup_seq;
fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(roots, &uiter))) {
struct ulist_node *tmp_unode;
struct ulist_iterator tmp_uiter;
struct btrfs_qgroup *qg;
qg = find_qgroup_rb(fs_info, unode->val);
if (!qg)
continue;
ulist_reinit(tmp);
/* XXX id not needed */
ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC);
ULIST_ITER_INIT(&tmp_uiter);
while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
struct btrfs_qgroup_list *glist;
qg = (struct btrfs_qgroup *)tmp_unode->aux;
if (qg->refcnt < seq)
qg->refcnt = seq + 1;
else
++qg->refcnt;
list_for_each_entry(glist, &qg->groups, next_group) {
ulist_add(tmp, glist->group->qgroupid,
(unsigned long)glist->group,
GFP_ATOMIC);
}
}
}
/*
* step 2: walk from the new root
*/
ulist_reinit(tmp);
ulist_add(tmp, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC);
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(tmp, &uiter))) {
struct btrfs_qgroup *qg;
struct btrfs_qgroup_list *glist;
qg = (struct btrfs_qgroup *)unode->aux;
if (qg->refcnt < seq) {
/* not visited by step 1 */
qg->rfer += sgn * node->num_bytes;
qg->rfer_cmpr += sgn * node->num_bytes;
if (roots->nnodes == 0) {
qg->excl += sgn * node->num_bytes;
qg->excl_cmpr += sgn * node->num_bytes;
}
qgroup_dirty(fs_info, qg);
}
WARN_ON(qg->tag >= seq);
qg->tag = seq;
list_for_each_entry(glist, &qg->groups, next_group) {
ulist_add(tmp, glist->group->qgroupid,
(unsigned long)glist->group, GFP_ATOMIC);
}
}
/*
* step 3: walk again from old refs
*/
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(roots, &uiter))) {
struct btrfs_qgroup *qg;
struct ulist_node *tmp_unode;
struct ulist_iterator tmp_uiter;
qg = find_qgroup_rb(fs_info, unode->val);
if (!qg)
continue;
ulist_reinit(tmp);
ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC);
ULIST_ITER_INIT(&tmp_uiter);
while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
struct btrfs_qgroup_list *glist;
qg = (struct btrfs_qgroup *)tmp_unode->aux;
if (qg->tag == seq)
continue;
if (qg->refcnt - seq == roots->nnodes) {
qg->excl -= sgn * node->num_bytes;
qg->excl_cmpr -= sgn * node->num_bytes;
qgroup_dirty(fs_info, qg);
}
list_for_each_entry(glist, &qg->groups, next_group) {
ulist_add(tmp, glist->group->qgroupid,
(unsigned long)glist->group,
GFP_ATOMIC);
}
}
}
ret = 0;
unlock:
spin_unlock(&fs_info->qgroup_lock);
out:
ulist_free(roots);
ulist_free(tmp);
return ret;
}
/*
* called from commit_transaction. Writes all changed qgroups to disk.
*/
int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct btrfs_root *quota_root = fs_info->quota_root;
int ret = 0;
if (!quota_root)
goto out;
fs_info->quota_enabled = fs_info->pending_quota_state;
spin_lock(&fs_info->qgroup_lock);
while (!list_empty(&fs_info->dirty_qgroups)) {
struct btrfs_qgroup *qgroup;
qgroup = list_first_entry(&fs_info->dirty_qgroups,
struct btrfs_qgroup, dirty);
list_del_init(&qgroup->dirty);
spin_unlock(&fs_info->qgroup_lock);
ret = update_qgroup_info_item(trans, quota_root, qgroup);
if (ret)
fs_info->qgroup_flags |=
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
spin_lock(&fs_info->qgroup_lock);
}
if (fs_info->quota_enabled)
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
else
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
spin_unlock(&fs_info->qgroup_lock);
ret = update_qgroup_status_item(trans, fs_info, quota_root);
if (ret)
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
out:
return ret;
}
/*
* copy the acounting information between qgroups. This is necessary when a
* snapshot or a subvolume is created
*/
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
struct btrfs_qgroup_inherit *inherit)
{
int ret = 0;
int i;
u64 *i_qgroups;
struct btrfs_root *quota_root = fs_info->quota_root;
struct btrfs_qgroup *srcgroup;
struct btrfs_qgroup *dstgroup;
u32 level_size = 0;
if (!fs_info->quota_enabled)
return 0;
if (!quota_root)
return -EINVAL;
/*
* create a tracking group for the subvol itself
*/
ret = add_qgroup_item(trans, quota_root, objectid);
if (ret)
goto out;
if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
ret = update_qgroup_limit_item(trans, quota_root, objectid,
inherit->lim.flags,
inherit->lim.max_rfer,
inherit->lim.max_excl,
inherit->lim.rsv_rfer,
inherit->lim.rsv_excl);
if (ret)
goto out;
}
if (srcid) {
struct btrfs_root *srcroot;
struct btrfs_key srckey;
int srcroot_level;
srckey.objectid = srcid;
srckey.type = BTRFS_ROOT_ITEM_KEY;
srckey.offset = (u64)-1;
srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
if (IS_ERR(srcroot)) {
ret = PTR_ERR(srcroot);
goto out;
}
rcu_read_lock();
srcroot_level = btrfs_header_level(srcroot->node);
level_size = btrfs_level_size(srcroot, srcroot_level);
rcu_read_unlock();
}
/*
* add qgroup to all inherited groups
*/
if (inherit) {
i_qgroups = (u64 *)(inherit + 1);
for (i = 0; i < inherit->num_qgroups; ++i) {
ret = add_qgroup_relation_item(trans, quota_root,
objectid, *i_qgroups);
if (ret)
goto out;
ret = add_qgroup_relation_item(trans, quota_root,
*i_qgroups, objectid);
if (ret)
goto out;
++i_qgroups;
}
}
spin_lock(&fs_info->qgroup_lock);
dstgroup = add_qgroup_rb(fs_info, objectid);
if (!dstgroup)
goto unlock;
if (srcid) {
srcgroup = find_qgroup_rb(fs_info, srcid);
if (!srcgroup)
goto unlock;
dstgroup->rfer = srcgroup->rfer - level_size;
dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
srcgroup->excl = level_size;
srcgroup->excl_cmpr = level_size;
qgroup_dirty(fs_info, dstgroup);
qgroup_dirty(fs_info, srcgroup);
}
if (!inherit)
goto unlock;
i_qgroups = (u64 *)(inherit + 1);
for (i = 0; i < inherit->num_qgroups; ++i) {
ret = add_relation_rb(quota_root->fs_info, objectid,
*i_qgroups);
if (ret)
goto unlock;
++i_qgroups;
}
for (i = 0; i < inherit->num_ref_copies; ++i) {
struct btrfs_qgroup *src;
struct btrfs_qgroup *dst;
src = find_qgroup_rb(fs_info, i_qgroups[0]);
dst = find_qgroup_rb(fs_info, i_qgroups[1]);
if (!src || !dst) {
ret = -EINVAL;
goto unlock;
}
dst->rfer = src->rfer - level_size;
dst->rfer_cmpr = src->rfer_cmpr - level_size;
i_qgroups += 2;
}
for (i = 0; i < inherit->num_excl_copies; ++i) {
struct btrfs_qgroup *src;
struct btrfs_qgroup *dst;
src = find_qgroup_rb(fs_info, i_qgroups[0]);
dst = find_qgroup_rb(fs_info, i_qgroups[1]);
if (!src || !dst) {
ret = -EINVAL;
goto unlock;
}
dst->excl = src->excl + level_size;
dst->excl_cmpr = src->excl_cmpr + level_size;
i_qgroups += 2;
}
unlock:
spin_unlock(&fs_info->qgroup_lock);
out:
return ret;
}
/*
* reserve some space for a qgroup and all its parents. The reservation takes
* place with start_transaction or dealloc_reserve, similar to ENOSPC
* accounting. If not enough space is available, EDQUOT is returned.
* We assume that the requested space is new for all qgroups.
*/
int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
struct btrfs_fs_info *fs_info = root->fs_info;
u64 ref_root = root->root_key.objectid;
int ret = 0;
struct ulist *ulist = NULL;
struct ulist_node *unode;
struct ulist_iterator uiter;
if (!is_fstree(ref_root))
return 0;
if (num_bytes == 0)
return 0;
spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
if (!quota_root)
goto out;
qgroup = find_qgroup_rb(fs_info, ref_root);
if (!qgroup)
goto out;
/*
* in a first step, we check all affected qgroups if any limits would
* be exceeded
*/
ulist = ulist_alloc(GFP_ATOMIC);
ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC);
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(ulist, &uiter))) {
struct btrfs_qgroup *qg;
struct btrfs_qgroup_list *glist;
qg = (struct btrfs_qgroup *)unode->aux;
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
qg->reserved + qg->rfer + num_bytes >
qg->max_rfer)
ret = -EDQUOT;
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
qg->reserved + qg->excl + num_bytes >
qg->max_excl)
ret = -EDQUOT;
list_for_each_entry(glist, &qg->groups, next_group) {
ulist_add(ulist, glist->group->qgroupid,
(unsigned long)glist->group, GFP_ATOMIC);
}
}
if (ret)
goto out;
/*
* no limits exceeded, now record the reservation into all qgroups
*/
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(ulist, &uiter))) {
struct btrfs_qgroup *qg;
qg = (struct btrfs_qgroup *)unode->aux;
qg->reserved += num_bytes;
}
out:
spin_unlock(&fs_info->qgroup_lock);
ulist_free(ulist);
return ret;
}
void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
struct btrfs_fs_info *fs_info = root->fs_info;
struct ulist *ulist = NULL;
struct ulist_node *unode;
struct ulist_iterator uiter;
u64 ref_root = root->root_key.objectid;
if (!is_fstree(ref_root))
return;
if (num_bytes == 0)
return;
spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
if (!quota_root)
goto out;
qgroup = find_qgroup_rb(fs_info, ref_root);
if (!qgroup)
goto out;
ulist = ulist_alloc(GFP_ATOMIC);
ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC);
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(ulist, &uiter))) {
struct btrfs_qgroup *qg;
struct btrfs_qgroup_list *glist;
qg = (struct btrfs_qgroup *)unode->aux;
qg->reserved -= num_bytes;
list_for_each_entry(glist, &qg->groups, next_group) {
ulist_add(ulist, glist->group->qgroupid,
(unsigned long)glist->group, GFP_ATOMIC);
}
}
out:
spin_unlock(&fs_info->qgroup_lock);
ulist_free(ulist);
}
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
{
if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
return;
printk(KERN_ERR "btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %llu\n",
trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
trans->delayed_ref_elem.seq);
BUG();
}
......@@ -38,7 +38,6 @@ void put_transaction(struct btrfs_transaction *transaction)
if (atomic_dec_and_test(&transaction->use_count)) {
BUG_ON(!list_empty(&transaction->list));
WARN_ON(transaction->delayed_refs.root.rb_node);
WARN_ON(!list_empty(&transaction->delayed_refs.seq_head));
memset(transaction, 0, sizeof(*transaction));
kmem_cache_free(btrfs_transaction_cachep, transaction);
}
......@@ -126,7 +125,6 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
cur_trans->delayed_refs.num_heads = 0;
cur_trans->delayed_refs.flushing = 0;
cur_trans->delayed_refs.run_delayed_start = 0;
cur_trans->delayed_refs.seq = 1;
/*
* although the tree mod log is per file system and not per transaction,
......@@ -145,10 +143,8 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
}
atomic_set(&fs_info->tree_mod_seq, 0);
init_waitqueue_head(&cur_trans->delayed_refs.seq_wait);
spin_lock_init(&cur_trans->commit_lock);
spin_lock_init(&cur_trans->delayed_refs.lock);
INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head);
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
......@@ -299,6 +295,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
struct btrfs_transaction *cur_trans;
u64 num_bytes = 0;
int ret;
u64 qgroup_reserved = 0;
if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
return ERR_PTR(-EROFS);
......@@ -317,6 +314,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
* the appropriate flushing if need be.
*/
if (num_items > 0 && root != root->fs_info->chunk_root) {
if (root->fs_info->quota_enabled &&
is_fstree(root->root_key.objectid)) {
qgroup_reserved = num_items * root->leafsize;
ret = btrfs_qgroup_reserve(root, qgroup_reserved);
if (ret)
return ERR_PTR(ret);
}
num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
ret = btrfs_block_rsv_add(root,
&root->fs_info->trans_block_rsv,
......@@ -349,12 +354,16 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
h->transaction = cur_trans;
h->blocks_used = 0;
h->bytes_reserved = 0;
h->root = root;
h->delayed_ref_updates = 0;
h->use_count = 1;
h->adding_csums = 0;
h->block_rsv = NULL;
h->orig_rsv = NULL;
h->aborted = 0;
h->qgroup_reserved = qgroup_reserved;
h->delayed_ref_elem.seq = 0;
INIT_LIST_HEAD(&h->qgroup_ref_list);
smp_mb();
if (cur_trans->blocked && may_wait_transaction(root, type)) {
......@@ -505,6 +514,24 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
return 0;
}
/*
* do the qgroup accounting as early as possible
*/
err = btrfs_delayed_refs_qgroup_accounting(trans, info);
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
/*
* the same root has to be passed to start_transaction and
* end_transaction. Subvolume quota depends on this.
*/
WARN_ON(trans->root != root);
if (trans->qgroup_reserved) {
btrfs_qgroup_free(root, trans->qgroup_reserved);
trans->qgroup_reserved = 0;
}
while (count < 2) {
unsigned long cur = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
......@@ -559,6 +586,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
err = -EIO;
}
assert_qgroups_uptodate(trans);
memset(trans, 0, sizeof(*trans));
kmem_cache_free(btrfs_trans_handle_cachep, trans);
......@@ -777,6 +805,13 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
ret = btrfs_run_dev_stats(trans, root->fs_info);
BUG_ON(ret);
ret = btrfs_run_qgroups(trans, root->fs_info);
BUG_ON(ret);
/* run_qgroups might have added some more refs */
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
BUG_ON(ret);
while (!list_empty(&fs_info->dirty_cowonly_roots)) {
next = fs_info->dirty_cowonly_roots.next;
list_del_init(next);
......@@ -949,6 +984,14 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
}
ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid,
objectid, pending->inherit);
kfree(pending->inherit);
if (ret) {
pending->error = ret;
goto fail;
}
key.objectid = objectid;
key.offset = (u64)-1;
key.type = BTRFS_ROOT_ITEM_KEY;
......@@ -1344,6 +1387,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
if (ret)
goto cleanup_transaction;
/*
* running the delayed items may have added new refs. account
* them now so that they hinder processing of more delayed refs
* as little as possible.
*/
btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
/*
* rename don't use btrfs_join_transaction, so, once we
* set the transaction to blocked above, we aren't going
......@@ -1456,6 +1506,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
root->fs_info->chunk_root->node);
switch_commit_root(root->fs_info->chunk_root);
assert_qgroups_uptodate(trans);
update_super_roots(root);
if (!root->fs_info->log_root_recovering) {
......
......@@ -20,6 +20,7 @@
#define __BTRFS_TRANSACTION__
#include "btrfs_inode.h"
#include "delayed-ref.h"
#include "ctree.h"
struct btrfs_transaction {
u64 transid;
......@@ -49,6 +50,7 @@ struct btrfs_transaction {
struct btrfs_trans_handle {
u64 transid;
u64 bytes_reserved;
u64 qgroup_reserved;
unsigned long use_count;
unsigned long blocks_reserved;
unsigned long blocks_used;
......@@ -58,12 +60,21 @@ struct btrfs_trans_handle {
struct btrfs_block_rsv *orig_rsv;
int aborted;
int adding_csums;
/*
* this root is only needed to validate that the root passed to
* start_transaction is the same as the one passed to end_transaction.
* Subvolume quota depends on this
*/
struct btrfs_root *root;
struct seq_list delayed_ref_elem;
struct list_head qgroup_ref_list;
};
struct btrfs_pending_snapshot {
struct dentry *dentry;
struct btrfs_root *root;
struct btrfs_root *snap;
struct btrfs_qgroup_inherit *inherit;
/* block reservation for the operation */
struct btrfs_block_rsv block_rsv;
/* extra metadata reseration for relocation */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment