Commit 097b8a7c authored by Jan Schmidt's avatar Jan Schmidt

Btrfs: join tree mod log code with the code holding back delayed refs

We've got two mechanisms both required for reliable backref resolving (tree
mod log and holding back delayed refs). You cannot make use of one without
the other. So instead of requiring the user of this mechanism to setup both
correctly, we join them into a single interface.

Additionally, we stop inserting non-blockers into fs_info->tree_mod_seq_list
as we did before, which was of no value.
Signed-off-by: default avatarJan Schmidt <list.btrfs@jan-o-sch.net>
parent cf538830
......@@ -773,9 +773,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
*/
static int find_parent_nodes(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 delayed_ref_seq, u64 time_seq,
struct ulist *refs, struct ulist *roots,
const u64 *extent_item_pos)
u64 time_seq, struct ulist *refs,
struct ulist *roots, const u64 *extent_item_pos)
{
struct btrfs_key key;
struct btrfs_path *path;
......@@ -837,7 +836,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
btrfs_put_delayed_ref(&head->node);
goto again;
}
ret = __add_delayed_refs(head, delayed_ref_seq,
ret = __add_delayed_refs(head, time_seq,
&prefs_delayed);
mutex_unlock(&head->mutex);
if (ret) {
......@@ -981,8 +980,7 @@ static void free_leaf_list(struct ulist *blocks)
*/
static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 delayed_ref_seq, u64 time_seq,
struct ulist **leafs,
u64 time_seq, struct ulist **leafs,
const u64 *extent_item_pos)
{
struct ulist *tmp;
......@@ -997,7 +995,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
return -ENOMEM;
}
ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq,
ret = find_parent_nodes(trans, fs_info, bytenr,
time_seq, *leafs, tmp, extent_item_pos);
ulist_free(tmp);
......@@ -1024,8 +1022,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
*/
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 delayed_ref_seq, u64 time_seq,
struct ulist **roots)
u64 time_seq, struct ulist **roots)
{
struct ulist *tmp;
struct ulist_node *node = NULL;
......@@ -1043,7 +1040,7 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
ULIST_ITER_INIT(&uiter);
while (1) {
ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq,
ret = find_parent_nodes(trans, fs_info, bytenr,
time_seq, tmp, *roots, NULL);
if (ret < 0 && ret != -ENOENT) {
ulist_free(tmp);
......@@ -1376,11 +1373,9 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
struct ulist *roots = NULL;
struct ulist_node *ref_node = NULL;
struct ulist_node *root_node = NULL;
struct seq_list seq_elem = {};
struct seq_list tree_mod_seq_elem = {};
struct ulist_iterator ref_uiter;
struct ulist_iterator root_uiter;
struct btrfs_delayed_ref_root *delayed_refs = NULL;
pr_debug("resolving all inodes for extent %llu\n",
extent_item_objectid);
......@@ -1391,16 +1386,11 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
trans = btrfs_join_transaction(fs_info->extent_root);
if (IS_ERR(trans))
return PTR_ERR(trans);
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
btrfs_get_delayed_seq(delayed_refs, &seq_elem);
spin_unlock(&delayed_refs->lock);
btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
}
ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
seq_elem.seq, tree_mod_seq_elem.seq, &refs,
tree_mod_seq_elem.seq, &refs,
&extent_item_pos);
if (ret)
goto out;
......@@ -1408,8 +1398,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
ULIST_ITER_INIT(&ref_uiter);
while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
ret = btrfs_find_all_roots(trans, fs_info, ref_node->val,
seq_elem.seq,
tree_mod_seq_elem.seq, &roots);
tree_mod_seq_elem.seq, &roots);
if (ret)
break;
ULIST_ITER_INIT(&root_uiter);
......@@ -1431,7 +1420,6 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
out:
if (!search_commit_root) {
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
btrfs_put_delayed_seq(delayed_refs, &seq_elem);
btrfs_end_transaction(trans, fs_info->extent_root);
}
......
......@@ -58,8 +58,7 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 delayed_ref_seq, u64 time_seq,
struct ulist **roots);
u64 time_seq, struct ulist **roots);
struct btrfs_data_container *init_data_container(u32 total_bytes);
struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
......
This diff is collapsed.
......@@ -1030,6 +1030,13 @@ struct btrfs_block_group_cache {
struct list_head cluster_list;
};
/* delayed seq elem */
struct seq_list {
struct list_head list;
u64 seq;
};
/* fs_info */
struct reloc_control;
struct btrfs_device;
struct btrfs_fs_devices;
......@@ -1144,6 +1151,8 @@ struct btrfs_fs_info {
spinlock_t tree_mod_seq_lock;
atomic_t tree_mod_seq;
struct list_head tree_mod_seq_list;
struct seq_list tree_mod_seq_elem;
wait_queue_head_t tree_mod_seq_wait;
/* this protects tree_mod_log */
rwlock_t tree_mod_log_lock;
......@@ -2798,6 +2807,16 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
kfree(fs_info);
}
/* tree mod log functions from ctree.c */
u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
{
return atomic_inc_return(&fs_info->tree_mod_seq);
}
/* root-item.c */
int btrfs_find_root_ref(struct btrfs_root *tree_root,
struct btrfs_path *path,
......@@ -3157,18 +3176,6 @@ void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
u64 start, int err);
/* delayed seq elem */
struct seq_list {
struct list_head list;
u64 seq;
u32 flags;
};
void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
static inline int is_fstree(u64 rootid)
{
if (rootid == BTRFS_FS_TREE_OBJECTID ||
......
......@@ -233,22 +233,26 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
return 0;
}
int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq)
{
struct seq_list *elem;
assert_spin_locked(&delayed_refs->lock);
if (list_empty(&delayed_refs->seq_head))
return 0;
elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list);
if (seq >= elem->seq) {
pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n",
seq, elem->seq, delayed_refs);
return 1;
int ret = 0;
spin_lock(&fs_info->tree_mod_seq_lock);
if (!list_empty(&fs_info->tree_mod_seq_list)) {
elem = list_first_entry(&fs_info->tree_mod_seq_list,
struct seq_list, list);
if (seq >= elem->seq) {
pr_debug("holding back delayed_ref %llu, lowest is "
"%llu (%p)\n", seq, elem->seq, delayed_refs);
ret = 1;
}
}
return 0;
spin_unlock(&fs_info->tree_mod_seq_lock);
return ret;
}
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
......@@ -526,7 +530,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->in_tree = 1;
if (is_fstree(ref_root))
seq = inc_delayed_seq(delayed_refs);
seq = btrfs_inc_tree_mod_seq(fs_info);
ref->seq = seq;
full_ref = btrfs_delayed_node_to_tree_ref(ref);
......@@ -585,7 +589,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->in_tree = 1;
if (is_fstree(ref_root))
seq = inc_delayed_seq(delayed_refs);
seq = btrfs_inc_tree_mod_seq(fs_info);
ref->seq = seq;
full_ref = btrfs_delayed_node_to_data_ref(ref);
......@@ -659,8 +663,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
num_bytes, parent, ref_root, level, action,
for_cow);
if (!is_fstree(ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
waitqueue_active(&fs_info->tree_mod_seq_wait))
wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
......@@ -708,8 +712,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
num_bytes, parent, ref_root, owner, offset,
action, for_cow);
if (!is_fstree(ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
waitqueue_active(&fs_info->tree_mod_seq_wait))
wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
......@@ -736,8 +740,8 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data);
if (waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
if (waitqueue_active(&fs_info->tree_mod_seq_wait))
wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
......
......@@ -139,26 +139,6 @@ struct btrfs_delayed_ref_root {
int flushing;
u64 run_delayed_start;
/*
* seq number of delayed refs. We need to know if a backref was being
* added before the currently processed ref or afterwards.
*/
u64 seq;
/*
* seq_list holds a list of all seq numbers that are currently being
* added to the list. While walking backrefs (btrfs_find_all_roots,
* qgroups), which might take some time, no newer ref must be processed,
* as it might influence the outcome of the walk.
*/
struct list_head seq_head;
/*
* when the only refs we have in the list must not be processed, we want
* to wait for more refs to show up or for the end of backref walking.
*/
wait_queue_head_t seq_wait;
};
static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
......@@ -195,33 +175,8 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
struct list_head *cluster, u64 search_start);
static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs)
{
assert_spin_locked(&delayed_refs->lock);
++delayed_refs->seq;
return delayed_refs->seq;
}
static inline void
btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
struct seq_list *elem)
{
assert_spin_locked(&delayed_refs->lock);
elem->seq = delayed_refs->seq;
list_add_tail(&elem->list, &delayed_refs->seq_head);
}
static inline void
btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
struct seq_list *elem)
{
spin_lock(&delayed_refs->lock);
list_del(&elem->list);
wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
}
int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq);
/*
......
......@@ -1944,6 +1944,8 @@ int open_ctree(struct super_block *sb,
fs_info->free_chunk_space = 0;
fs_info->tree_mod_log = RB_ROOT;
init_waitqueue_head(&fs_info->tree_mod_seq_wait);
/* readahead state */
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
spin_lock_init(&fs_info->reada_lock);
......
......@@ -2217,6 +2217,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_ref_head *locked_ref = NULL;
struct btrfs_delayed_extent_op *extent_op;
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
int count = 0;
int must_insert_reserved = 0;
......@@ -2255,7 +2256,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
ref = select_delayed_ref(locked_ref);
if (ref && ref->seq &&
btrfs_check_delayed_seq(delayed_refs, ref->seq)) {
btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
/*
* there are still refs with lower seq numbers in the
* process of being added. Don't run this ref yet.
......@@ -2337,7 +2338,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
}
next:
do_chunk_alloc(trans, root->fs_info->extent_root,
do_chunk_alloc(trans, fs_info->extent_root,
2 * 1024 * 1024,
btrfs_get_alloc_profile(root, 0),
CHUNK_ALLOC_NO_FORCE);
......@@ -2347,18 +2348,19 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
return count;
}
static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
static void wait_for_more_refs(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
unsigned long num_refs,
struct list_head *first_seq)
{
spin_unlock(&delayed_refs->lock);
pr_debug("waiting for more refs (num %ld, first %p)\n",
num_refs, first_seq);
wait_event(delayed_refs->seq_wait,
wait_event(fs_info->tree_mod_seq_wait,
num_refs != delayed_refs->num_entries ||
delayed_refs->seq_head.next != first_seq);
fs_info->tree_mod_seq_list.next != first_seq);
pr_debug("done waiting for more refs (num %ld, first %p)\n",
delayed_refs->num_entries, delayed_refs->seq_head.next);
delayed_refs->num_entries, fs_info->tree_mod_seq_list.next);
spin_lock(&delayed_refs->lock);
}
......@@ -2403,6 +2405,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
again:
consider_waiting = 0;
spin_lock(&delayed_refs->lock);
if (count == 0) {
count = delayed_refs->num_entries * 2;
run_most = 1;
......@@ -2437,7 +2440,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
num_refs = delayed_refs->num_entries;
first_seq = root->fs_info->tree_mod_seq_list.next;
} else {
wait_for_more_refs(delayed_refs,
wait_for_more_refs(root->fs_info, delayed_refs,
num_refs, first_seq);
/*
* after waiting, things have changed. we
......@@ -5190,8 +5193,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
rb_erase(&head->node.rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
if (waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
if (waitqueue_active(&root->fs_info->tree_mod_seq_wait))
wake_up(&root->fs_info->tree_mod_seq_wait);
/*
* we don't take a ref on the node because we're removing it from the
......
......@@ -38,7 +38,6 @@ void put_transaction(struct btrfs_transaction *transaction)
if (atomic_dec_and_test(&transaction->use_count)) {
BUG_ON(!list_empty(&transaction->list));
WARN_ON(transaction->delayed_refs.root.rb_node);
WARN_ON(!list_empty(&transaction->delayed_refs.seq_head));
memset(transaction, 0, sizeof(*transaction));
kmem_cache_free(btrfs_transaction_cachep, transaction);
}
......@@ -126,7 +125,6 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
cur_trans->delayed_refs.num_heads = 0;
cur_trans->delayed_refs.flushing = 0;
cur_trans->delayed_refs.run_delayed_start = 0;
cur_trans->delayed_refs.seq = 1;
/*
* although the tree mod log is per file system and not per transaction,
......@@ -145,10 +143,8 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
}
atomic_set(&fs_info->tree_mod_seq, 0);
init_waitqueue_head(&cur_trans->delayed_refs.seq_wait);
spin_lock_init(&cur_trans->commit_lock);
spin_lock_init(&cur_trans->delayed_refs.lock);
INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head);
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment