Commit dfd41fb9 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Fix race between btree updates & journal replay

Add a flag to indicate whether a journal replay key has been
overwritten, and set/test it with appropriate btree locks held.

This fixes a race between the allocator - invalidating buckets, and
doing btree updates - and journal replay, which before this patch could
clobber the allocator thread's update with an older version of the key
from the journal.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
parent 528b18e6
...@@ -561,6 +561,7 @@ struct journal_keys { ...@@ -561,6 +561,7 @@ struct journal_keys {
enum btree_id btree_id:8; enum btree_id btree_id:8;
unsigned level:8; unsigned level:8;
bool allocated; bool allocated;
bool overwritten;
struct bkey_i *k; struct bkey_i *k;
u32 journal_seq; u32 journal_seq;
u32 journal_offset; u32 journal_offset;
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "journal.h" #include "journal.h"
#include "journal_reclaim.h" #include "journal_reclaim.h"
#include "keylist.h" #include "keylist.h"
#include "recovery.h"
#include "subvolume.h" #include "subvolume.h"
#include "replicas.h" #include "replicas.h"
#include "trace.h" #include "trace.h"
...@@ -625,6 +626,14 @@ static inline int trans_lock_write(struct btree_trans *trans) ...@@ -625,6 +626,14 @@ static inline int trans_lock_write(struct btree_trans *trans)
return btree_trans_restart(trans); return btree_trans_restart(trans);
} }
static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans)
{
struct btree_insert_entry *i;
trans_for_each_update(trans, i)
bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p);
}
/* /*
* Get journal reservation, take write locks, and attempt to do btree update(s): * Get journal reservation, take write locks, and attempt to do btree update(s):
*/ */
...@@ -702,6 +711,9 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, ...@@ -702,6 +711,9 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip); ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip);
if (!ret && unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)))
bch2_drop_overwrites_from_journal(trans);
trans_for_each_update(trans, i) trans_for_each_update(trans, i)
if (!same_leaf_as_prev(trans, i)) if (!same_leaf_as_prev(trans, i))
bch2_btree_node_unlock_write_inlined(trans, i->path, bch2_btree_node_unlock_write_inlined(trans, i->path,
......
...@@ -185,6 +185,19 @@ int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id, ...@@ -185,6 +185,19 @@ int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id,
return bch2_journal_key_insert(c, id, level, &whiteout); return bch2_journal_key_insert(c, id, level, &whiteout);
} }
void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
unsigned level, struct bpos pos)
{
struct journal_keys *keys = &c->journal_keys;
size_t idx = journal_key_search(keys, btree, level, pos);
if (idx < keys->nr &&
keys->d[idx].btree_id == btree &&
keys->d[idx].level == level &&
!bpos_cmp(keys->d[idx].k->k.p, pos))
keys->d[idx].overwritten = true;
}
static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter) static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
{ {
struct journal_key *k = iter->idx - iter->keys->nr struct journal_key *k = iter->idx - iter->keys->nr
...@@ -539,8 +552,16 @@ static int __bch2_journal_replay_key(struct btree_trans *trans, ...@@ -539,8 +552,16 @@ static int __bch2_journal_replay_key(struct btree_trans *trans,
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, k->level, BTREE_MAX_DEPTH, k->level,
iter_flags); iter_flags);
ret = bch2_btree_iter_traverse(&iter) ?: ret = bch2_btree_iter_traverse(&iter);
bch2_trans_update(trans, &iter, k->k, BTREE_TRIGGER_NORUN); if (ret)
goto out;
/* Must be checked with btree locked: */
if (k->overwritten)
goto out;
ret = bch2_trans_update(trans, &iter, k->k, BTREE_TRIGGER_NORUN);
out:
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return ret; return ret;
} }
......
...@@ -37,6 +37,8 @@ int bch2_journal_key_insert(struct bch_fs *, enum btree_id, ...@@ -37,6 +37,8 @@ int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
unsigned, struct bkey_i *); unsigned, struct bkey_i *);
int bch2_journal_key_delete(struct bch_fs *, enum btree_id, int bch2_journal_key_delete(struct bch_fs *, enum btree_id,
unsigned, struct bpos); unsigned, struct bpos);
void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id,
unsigned, struct bpos);
void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *); void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *); struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment