Commit 6333bd2f authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Improve handling of extents in bch2_trans_update()

The transaction update/commit path cares about whether it's inserting
extents or regular keys; extents require extra passes (handling of
overlapping extents) but sometimes we want to skip all that. This
clarifies things by adding a new member to btree_insert_entry specifying
whether the key being inserted is an extent, instead of overloading
BTREE_ITER_IS_EXTENTS.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 2436cb9f
...@@ -335,7 +335,11 @@ struct bkey_cached { ...@@ -335,7 +335,11 @@ struct bkey_cached {
struct btree_insert_entry { struct btree_insert_entry {
unsigned trigger_flags; unsigned trigger_flags;
u8 bkey_type;
u8 btree_id;
u8 level;
unsigned trans_triggers_run:1; unsigned trans_triggers_run:1;
unsigned is_extent:1;
struct bkey_i *k; struct bkey_i *k;
struct btree_iter *iter; struct btree_iter *iter;
}; };
...@@ -589,19 +593,20 @@ static inline bool btree_iter_is_extents(struct btree_iter *iter) ...@@ -589,19 +593,20 @@ static inline bool btree_iter_is_extents(struct btree_iter *iter)
return btree_node_type_is_extents(btree_iter_key_type(iter)); return btree_node_type_is_extents(btree_iter_key_type(iter));
} }
#define BTREE_NODE_TYPE_HAS_TRIGGERS \ #define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \
((1U << BKEY_TYPE_extents)| \ ((1U << BKEY_TYPE_extents)| \
(1U << BKEY_TYPE_alloc)| \
(1U << BKEY_TYPE_inodes)| \ (1U << BKEY_TYPE_inodes)| \
(1U << BKEY_TYPE_reflink)| \
(1U << BKEY_TYPE_stripes)| \ (1U << BKEY_TYPE_stripes)| \
(1U << BKEY_TYPE_reflink)| \
(1U << BKEY_TYPE_btree)) (1U << BKEY_TYPE_btree))
#define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \ #define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \
((1U << BKEY_TYPE_extents)| \ ((1U << BKEY_TYPE_alloc)| \
(1U << BKEY_TYPE_inodes)| \ (1U << BKEY_TYPE_stripes))
(1U << BKEY_TYPE_stripes)| \
(1U << BKEY_TYPE_reflink)) #define BTREE_NODE_TYPE_HAS_TRIGGERS \
(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
BTREE_NODE_TYPE_HAS_MEM_TRIGGERS)
enum btree_trigger_flags { enum btree_trigger_flags {
__BTREE_TRIGGER_NORUN, /* Don't run triggers at all */ __BTREE_TRIGGER_NORUN, /* Don't run triggers at all */
......
...@@ -21,6 +21,14 @@ ...@@ -21,6 +21,14 @@
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include <linux/sort.h> #include <linux/sort.h>
static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
const struct btree_insert_entry *r)
{
return cmp_int(l->btree_id, r->btree_id) ?:
-cmp_int(l->level, r->level) ?:
bkey_cmp(l->k->k.p, r->k->k.p);
}
static inline bool same_leaf_as_prev(struct btree_trans *trans, static inline bool same_leaf_as_prev(struct btree_trans *trans,
struct btree_insert_entry *i) struct btree_insert_entry *i)
{ {
...@@ -211,15 +219,15 @@ static bool btree_insert_key_leaf(struct btree_trans *trans, ...@@ -211,15 +219,15 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
/* Normal update interface: */ /* Normal update interface: */
static inline void btree_insert_entry_checks(struct btree_trans *trans, static inline void btree_insert_entry_checks(struct btree_trans *trans,
struct btree_iter *iter, struct btree_insert_entry *i)
struct bkey_i *insert)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
BUG_ON(bkey_cmp(insert->k.p, iter->real_pos));
BUG_ON(bch2_debug_check_bkeys && BUG_ON(bch2_debug_check_bkeys &&
bch2_bkey_invalid(c, bkey_i_to_s_c(insert), bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type));
__btree_node_type(iter->level, iter->btree_id))); BUG_ON(bkey_cmp(i->k->k.p, i->iter->real_pos));
BUG_ON(i->level != i->iter->level);
BUG_ON(i->btree_id != i->iter->btree_id);
} }
static noinline int static noinline int
...@@ -332,19 +340,6 @@ static inline void do_btree_insert_one(struct btree_trans *trans, ...@@ -332,19 +340,6 @@ static inline void do_btree_insert_one(struct btree_trans *trans,
} }
} }
static inline bool iter_has_trans_triggers(struct btree_iter *iter)
{
return BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << iter->btree_id);
}
static inline bool iter_has_nontrans_triggers(struct btree_iter *iter)
{
return (((BTREE_NODE_TYPE_HAS_TRIGGERS &
~BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS)) |
(1U << BTREE_ID_stripes)) &
(1U << iter->btree_id);
}
static noinline void bch2_btree_iter_unlock_noinline(struct btree_iter *iter) static noinline void bch2_btree_iter_unlock_noinline(struct btree_iter *iter)
{ {
__bch2_btree_iter_unlock(iter); __bch2_btree_iter_unlock(iter);
...@@ -405,7 +400,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, ...@@ -405,7 +400,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
return ret; return ret;
} }
if (btree_node_type_needs_gc(i->iter->btree_id)) if (btree_node_type_needs_gc(i->bkey_type))
marking = true; marking = true;
} }
...@@ -459,7 +454,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, ...@@ -459,7 +454,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
} }
trans_for_each_update(trans, i) trans_for_each_update(trans, i)
if (iter_has_nontrans_triggers(i->iter)) if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type))
bch2_mark_update(trans, i->iter, i->k, bch2_mark_update(trans, i->iter, i->k,
&fs_usage->u, i->trigger_flags); &fs_usage->u, i->trigger_flags);
...@@ -531,7 +526,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, ...@@ -531,7 +526,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
trans_for_each_update2(trans, i) trans_for_each_update2(trans, i)
btree_insert_entry_checks(trans, i->iter, i->k); btree_insert_entry_checks(trans, i);
bch2_btree_trans_verify_locks(trans); bch2_btree_trans_verify_locks(trans);
trans_for_each_update2(trans, i) trans_for_each_update2(trans, i)
...@@ -696,69 +691,64 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans) ...@@ -696,69 +691,64 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
return 0; return 0;
} }
static inline int btree_iter_pos_cmp(const struct btree_iter *l, static int __bch2_trans_update2(struct btree_trans *trans,
const struct btree_iter *r) struct btree_insert_entry n)
{ {
return cmp_int(l->btree_id, r->btree_id) ?: struct btree_insert_entry *i;
bkey_cmp(l->pos, r->pos);
}
static int bch2_trans_update2(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_i *insert)
{
struct btree_insert_entry *i, n = (struct btree_insert_entry) {
.iter = iter, .k = insert
};
int ret;
btree_insert_entry_checks(trans, n.iter, n.k); btree_insert_entry_checks(trans, &n);
EBUG_ON(trans->nr_updates2 >= BTREE_ITER_MAX); EBUG_ON(trans->nr_updates2 >= BTREE_ITER_MAX);
ret = bch2_btree_iter_traverse(iter); n.iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
if (unlikely(ret))
return ret;
BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
trans_for_each_update2(trans, i) { trans_for_each_update2(trans, i)
if (btree_iter_pos_cmp(n.iter, i->iter) == 0) { if (btree_insert_entry_cmp(&n, i) <= 0)
*i = n;
return 0;
}
if (btree_iter_pos_cmp(n.iter, i->iter) <= 0)
break; break;
}
if (i < trans->updates2 + trans->nr_updates2 &&
!btree_insert_entry_cmp(&n, i))
*i = n;
else
array_insert_item(trans->updates2, trans->nr_updates2, array_insert_item(trans->updates2, trans->nr_updates2,
i - trans->updates2, n); i - trans->updates2, n);
return 0; return 0;
} }
static int extent_update_to_keys(struct btree_trans *trans, static int bch2_trans_update2(struct btree_trans *trans,
struct btree_iter *orig_iter, struct btree_iter *iter,
struct bkey_i *insert) struct bkey_i *insert)
{ {
struct btree_iter *iter; return __bch2_trans_update2(trans, (struct btree_insert_entry) {
.bkey_type = __btree_node_type(iter->level, iter->btree_id),
.btree_id = iter->btree_id,
.level = iter->level,
.iter = iter,
.k = insert,
});
}
static int extent_update_to_keys(struct btree_trans *trans,
struct btree_insert_entry n)
{
int ret; int ret;
ret = bch2_extent_can_insert(trans, orig_iter, insert); if (bkey_deleted(&n.k->k))
return 0;
ret = bch2_extent_can_insert(trans, n.iter, n.k);
if (ret) if (ret)
return ret; return ret;
if (bkey_deleted(&insert->k)) n.iter = bch2_trans_copy_iter(trans, n.iter);
return 0;
iter = bch2_trans_copy_iter(trans, orig_iter); n.iter->flags |= BTREE_ITER_INTENT;
__bch2_btree_iter_set_pos(n.iter, n.k->k.p, false);
n.is_extent = false;
iter->flags |= BTREE_ITER_INTENT; ret = __bch2_trans_update2(trans, n);
__bch2_btree_iter_set_pos(iter, insert->k.p, false); bch2_trans_iter_put(trans, n.iter);
ret = bch2_trans_update2(trans, iter, insert);
bch2_trans_iter_put(trans, iter);
return ret; return ret;
} }
...@@ -868,7 +858,7 @@ int __bch2_trans_commit(struct btree_trans *trans) ...@@ -868,7 +858,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
if (btree_iter_type(i->iter) != BTREE_ITER_CACHED && if (btree_iter_type(i->iter) != BTREE_ITER_CACHED &&
!(i->trigger_flags & BTREE_TRIGGER_NORUN)) !(i->trigger_flags & BTREE_TRIGGER_NORUN))
bch2_btree_key_cache_verify_clean(trans, bch2_btree_key_cache_verify_clean(trans,
i->iter->btree_id, i->iter->pos); i->btree_id, i->k->k.p);
#endif #endif
/* /*
...@@ -879,24 +869,7 @@ int __bch2_trans_commit(struct btree_trans *trans) ...@@ -879,24 +869,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
trans_trigger_run = false; trans_trigger_run = false;
trans_for_each_update(trans, i) { trans_for_each_update(trans, i) {
ret = bch2_btree_iter_traverse(i->iter); if ((BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)) &&
if (unlikely(ret)) {
trace_trans_restart_traverse(trans->ip);
goto out;
}
/*
* We're not using bch2_btree_iter_upgrade here because
* we know trans->nounlock can't be set:
*/
if (unlikely(!btree_node_intent_locked(i->iter, i->iter->level) &&
!__bch2_btree_iter_upgrade(i->iter, i->iter->level + 1))) {
trace_trans_restart_upgrade(trans->ip);
ret = -EINTR;
goto out;
}
if (iter_has_trans_triggers(i->iter) &&
!i->trans_triggers_run) { !i->trans_triggers_run) {
i->trans_triggers_run = true; i->trans_triggers_run = true;
trans_trigger_run = true; trans_trigger_run = true;
...@@ -914,39 +887,46 @@ int __bch2_trans_commit(struct btree_trans *trans) ...@@ -914,39 +887,46 @@ int __bch2_trans_commit(struct btree_trans *trans)
/* Turn extents updates into keys: */ /* Turn extents updates into keys: */
trans_for_each_update(trans, i) trans_for_each_update(trans, i)
if (i->iter->flags & BTREE_ITER_IS_EXTENTS) { if (i->is_extent) {
struct bpos start = bkey_start_pos(&i->k->k); struct bpos start = bkey_start_pos(&i->k->k);
while (i + 1 < trans->updates + trans->nr_updates && while (i + 1 < trans->updates + trans->nr_updates &&
i[0].iter->btree_id == i[1].iter->btree_id && i[0].btree_id == i[1].btree_id &&
!bkey_cmp(i[0].k->k.p, bkey_start_pos(&i[1].k->k))) !bkey_cmp(i[0].k->k.p, bkey_start_pos(&i[1].k->k)))
i++; i++;
ret = extent_handle_overwrites(trans, i->iter->btree_id, ret = extent_handle_overwrites(trans, i->btree_id,
start, i->k->k.p); start, i->k->k.p);
if (ret) if (ret)
goto out; goto out;
} }
trans_for_each_update(trans, i) { trans_for_each_update(trans, i) {
if (i->iter->flags & BTREE_ITER_IS_EXTENTS) { ret = i->is_extent
ret = extent_update_to_keys(trans, i->iter, i->k); ? extent_update_to_keys(trans, *i)
} else { : __bch2_trans_update2(trans, *i);
ret = bch2_trans_update2(trans, i->iter, i->k);
}
if (ret) if (ret)
goto out; goto out;
} }
trans_for_each_update2(trans, i) { trans_for_each_update2(trans, i) {
BUG_ON(i->iter->locks_want < 1);
ret = bch2_btree_iter_traverse(i->iter); ret = bch2_btree_iter_traverse(i->iter);
if (unlikely(ret)) { if (unlikely(ret)) {
trace_trans_restart_traverse(trans->ip); trace_trans_restart_traverse(trans->ip);
goto out; goto out;
} }
/*
* We're not using bch2_btree_iter_upgrade here because
* we know trans->nounlock can't be set:
*/
if (unlikely(!btree_node_intent_locked(i->iter, i->iter->level) &&
!__bch2_btree_iter_upgrade(i->iter, i->iter->level + 1))) {
trace_trans_restart_upgrade(trans->ip);
ret = -EINTR;
goto out;
}
u64s = jset_u64s(i->k->k.u64s); u64s = jset_u64s(i->k->k.u64s);
if (btree_iter_type(i->iter) == BTREE_ITER_CACHED && if (btree_iter_type(i->iter) == BTREE_ITER_CACHED &&
likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)))
...@@ -989,57 +969,78 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, ...@@ -989,57 +969,78 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_i *k, enum btree_trigger_flags flags) struct bkey_i *k, enum btree_trigger_flags flags)
{ {
struct btree_insert_entry *i, n = (struct btree_insert_entry) { struct btree_insert_entry *i, n = (struct btree_insert_entry) {
.trigger_flags = flags, .iter = iter, .k = k .trigger_flags = flags,
.bkey_type = __btree_node_type(iter->level, iter->btree_id),
.btree_id = iter->btree_id,
.level = iter->level,
.is_extent = (iter->flags & BTREE_ITER_IS_EXTENTS) != 0,
.iter = iter,
.k = k
}; };
BUG_ON(trans->nr_updates >= BTREE_ITER_MAX);
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
BUG_ON(bkey_cmp(iter->pos, BUG_ON(bkey_cmp(iter->pos,
(iter->flags & BTREE_ITER_IS_EXTENTS) n.is_extent ? bkey_start_pos(&k->k) : k->k.p));
? bkey_start_pos(&k->k)
: k->k.p));
trans_for_each_update(trans, i) { trans_for_each_update(trans, i) {
BUG_ON(bkey_cmp(i->iter->pos, BUG_ON(bkey_cmp(i->iter->pos,
(i->iter->flags & BTREE_ITER_IS_EXTENTS) i->is_extent ? bkey_start_pos(&i->k->k) : i->k->k.p));
? bkey_start_pos(&i->k->k)
: i->k->k.p));
BUG_ON(i != trans->updates && BUG_ON(i != trans->updates &&
btree_iter_pos_cmp(i[-1].iter, i[0].iter) >= 0); btree_insert_entry_cmp(i - 1, i) >= 0);
} }
#endif #endif
iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT; iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
if (btree_node_type_is_extents(iter->btree_id)) { if (n.is_extent) {
iter->pos_after_commit = k->k.p; iter->pos_after_commit = k->k.p;
iter->flags |= BTREE_ITER_SET_POS_AFTER_COMMIT; iter->flags |= BTREE_ITER_SET_POS_AFTER_COMMIT;
} }
/* /*
* Pending updates are kept sorted: first, find position of new update: * Pending updates are kept sorted: first, find position of new update,
* then delete/trim any updates the new update overwrites:
*/ */
if (!n.is_extent) {
trans_for_each_update(trans, i) trans_for_each_update(trans, i)
if (btree_iter_pos_cmp(iter, i->iter) <= 0) if (btree_insert_entry_cmp(&n, i) <= 0)
break; break;
/* if (i < trans->updates + trans->nr_updates &&
* Now delete/trim any updates the new update overwrites: !btree_insert_entry_cmp(&n, i))
*/ *i = n;
if (i > trans->updates && else
i[-1].iter->btree_id == iter->btree_id && array_insert_item(trans->updates, trans->nr_updates,
bkey_cmp(iter->pos, i[-1].k->k.p) < 0) i - trans->updates, n);
bch2_cut_back(n.iter->pos, i[-1].k); } else {
trans_for_each_update(trans, i)
if (btree_insert_entry_cmp(&n, i) < 0)
break;
while (i < trans->updates + trans->nr_updates && while (i > trans->updates &&
iter->btree_id == i->iter->btree_id && i[-1].btree_id == n.btree_id &&
bkey_cmp(n.k->k.p, i->k->k.p) >= 0) bkey_cmp(bkey_start_pos(&n.k->k),
bkey_start_pos(&i[-1].k->k)) <= 0) {
--i;
array_remove_item(trans->updates, trans->nr_updates, array_remove_item(trans->updates, trans->nr_updates,
i - trans->updates); i - trans->updates);
}
if (i > trans->updates &&
i[-1].btree_id == n.btree_id &&
bkey_cmp(bkey_start_pos(&n.k->k), i[-1].k->k.p) < 0)
bch2_cut_back(bkey_start_pos(&n.k->k), i[-1].k);
if (i < trans->updates + trans->nr_updates && if (i < trans->updates + trans->nr_updates &&
iter->btree_id == i->iter->btree_id && i->btree_id == n.btree_id &&
bkey_cmp(n.k->k.p, i->iter->pos) > 0) { bkey_cmp(n.k->k.p, bkey_start_pos(&i->k->k)) > 0) {
/* We don't handle splitting extents here: */
BUG_ON(bkey_cmp(bkey_start_pos(&n.k->k),
bkey_start_pos(&i->k->k)) > 0);
/* /*
* When we have an extent that overwrites the start of another * When we have an extent that overwrites the start of another
* update, trimming that extent will mean the iterator's * update, trimming that extent will mean the iterator's
...@@ -1059,10 +1060,10 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, ...@@ -1059,10 +1060,10 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
bch2_btree_iter_set_pos(i->iter, n.k->k.p); bch2_btree_iter_set_pos(i->iter, n.k->k.p);
} }
EBUG_ON(trans->nr_updates >= BTREE_ITER_MAX);
array_insert_item(trans->updates, trans->nr_updates, array_insert_item(trans->updates, trans->nr_updates,
i - trans->updates, n); i - trans->updates, n);
}
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment