Commit c4accde4 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Ensure srcu lock is not held too long

The SRCU read lock that btree_trans takes exists to make it safe for
bch2_trans_relock() to deref pointers to btree nodes/key cache items we
don't have locked, but as a side effect it blocks reclaim from freeing
those items.

Thus, it's important to not hold it for too long: we need to
differentiate between bch2_trans_unlock() calls that will be only for a
short duration, and ones that will be for an unbounded duration.

This introduces bch2_trans_unlock_long(), to be used mainly by the data
move paths.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 6dfa10ab
...@@ -1109,6 +1109,9 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans, ...@@ -1109,6 +1109,9 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans,
if (unlikely(ret)) if (unlikely(ret))
goto out; goto out;
if (unlikely(!trans->srcu_held))
bch2_trans_srcu_lock(trans);
/* /*
* Ensure we obey path->should_be_locked: if it's set, we can't unlock * Ensure we obey path->should_be_locked: if it's set, we can't unlock
* and re-traverse the path without a transaction restart: * and re-traverse the path without a transaction restart:
...@@ -2830,8 +2833,9 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) ...@@ -2830,8 +2833,9 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
return p; return p;
} }
static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans) void bch2_trans_srcu_unlock(struct btree_trans *trans)
{ {
if (trans->srcu_held) {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_path *path; struct btree_path *path;
...@@ -2840,8 +2844,17 @@ static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans) ...@@ -2840,8 +2844,17 @@ static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans)
path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset); path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); trans->srcu_held = false;
}
}
void bch2_trans_srcu_lock(struct btree_trans *trans)
{
if (!trans->srcu_held) {
trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier);
trans->srcu_lock_time = jiffies; trans->srcu_lock_time = jiffies;
trans->srcu_held = true;
}
} }
/** /**
...@@ -2895,8 +2908,9 @@ u32 bch2_trans_begin(struct btree_trans *trans) ...@@ -2895,8 +2908,9 @@ u32 bch2_trans_begin(struct btree_trans *trans)
} }
trans->last_begin_time = now; trans->last_begin_time = now;
if (unlikely(time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10)))) if (unlikely(trans->srcu_held &&
bch2_trans_reset_srcu_lock(trans); time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
bch2_trans_srcu_unlock(trans);
trans->last_begin_ip = _RET_IP_; trans->last_begin_ip = _RET_IP_;
if (trans->restarted) { if (trans->restarted) {
...@@ -2983,6 +2997,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) ...@@ -2983,6 +2997,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
trans->srcu_lock_time = jiffies; trans->srcu_lock_time = jiffies;
trans->srcu_held = true;
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) { if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
struct btree_trans *pos; struct btree_trans *pos;
...@@ -3059,6 +3074,7 @@ void bch2_trans_put(struct btree_trans *trans) ...@@ -3059,6 +3074,7 @@ void bch2_trans_put(struct btree_trans *trans)
check_btree_paths_leaked(trans); check_btree_paths_leaked(trans);
if (trans->srcu_held)
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
bch2_journal_preres_put(&c->journal, &trans->journal_preres); bch2_journal_preres_put(&c->journal, &trans->journal_preres);
......
...@@ -274,6 +274,7 @@ void bch2_path_put(struct btree_trans *, struct btree_path *, bool); ...@@ -274,6 +274,7 @@ void bch2_path_put(struct btree_trans *, struct btree_path *, bool);
int bch2_trans_relock(struct btree_trans *); int bch2_trans_relock(struct btree_trans *);
int bch2_trans_relock_notrace(struct btree_trans *); int bch2_trans_relock_notrace(struct btree_trans *);
void bch2_trans_unlock(struct btree_trans *); void bch2_trans_unlock(struct btree_trans *);
void bch2_trans_unlock_long(struct btree_trans *);
bool bch2_trans_locked(struct btree_trans *); bool bch2_trans_locked(struct btree_trans *);
static inline int trans_was_restarted(struct btree_trans *trans, u32 restart_count) static inline int trans_was_restarted(struct btree_trans *trans, u32 restart_count)
...@@ -579,6 +580,9 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, ...@@ -579,6 +580,9 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans,
__bch2_bkey_get_val_typed(_trans, _btree_id, _pos, _flags, \ __bch2_bkey_get_val_typed(_trans, _btree_id, _pos, _flags, \
KEY_TYPE_##_type, sizeof(*_val), _val) KEY_TYPE_##_type, sizeof(*_val), _val)
void bch2_trans_srcu_unlock(struct btree_trans *);
void bch2_trans_srcu_lock(struct btree_trans *);
u32 bch2_trans_begin(struct btree_trans *); u32 bch2_trans_begin(struct btree_trans *);
/* /*
......
...@@ -753,6 +753,12 @@ void bch2_trans_unlock(struct btree_trans *trans) ...@@ -753,6 +753,12 @@ void bch2_trans_unlock(struct btree_trans *trans)
__bch2_btree_path_unlock(trans, path); __bch2_btree_path_unlock(trans, path);
} }
void bch2_trans_unlock_long(struct btree_trans *trans)
{
bch2_trans_unlock(trans);
bch2_trans_srcu_unlock(trans);
}
bool bch2_trans_locked(struct btree_trans *trans) bool bch2_trans_locked(struct btree_trans *trans)
{ {
struct btree_path *path; struct btree_path *path;
......
...@@ -426,6 +426,7 @@ struct btree_trans { ...@@ -426,6 +426,7 @@ struct btree_trans {
u8 nr_updates; u8 nr_updates;
u8 nr_wb_updates; u8 nr_wb_updates;
u8 wb_updates_size; u8 wb_updates_size;
bool srcu_held:1;
bool used_mempool:1; bool used_mempool:1;
bool in_traverse_all:1; bool in_traverse_all:1;
bool paths_sorted:1; bool paths_sorted:1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment