Commit 3e5d6c59 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Use journal preres for deferred btree updates

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 68ef94a6
...@@ -960,6 +960,7 @@ static int bch2_invalidate_one_bucket2(struct bch_fs *c, struct bch_dev *ca, ...@@ -960,6 +960,7 @@ static int bch2_invalidate_one_bucket2(struct bch_fs *c, struct bch_dev *ca,
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE| BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE| BTREE_INSERT_USE_ALLOC_RESERVE|
BTREE_INSERT_JOURNAL_RESERVED|
flags, flags,
BTREE_INSERT_ENTRY(iter, &a->k_i)); BTREE_INSERT_ENTRY(iter, &a->k_i));
if (ret == -EINTR) if (ret == -EINTR)
......
...@@ -246,10 +246,11 @@ struct btree_iter { ...@@ -246,10 +246,11 @@ struct btree_iter {
#define BTREE_ITER_MAX 8 #define BTREE_ITER_MAX 8
struct deferred_update { struct deferred_update {
struct journal_preres res;
struct journal_entry_pin journal; struct journal_entry_pin journal;
spinlock_t lock; spinlock_t lock;
unsigned gen; unsigned dirty:1;
u8 allocated_u64s; u8 allocated_u64s;
enum btree_id btree_id; enum btree_id btree_id;
......
...@@ -27,6 +27,7 @@ struct btree_insert { ...@@ -27,6 +27,7 @@ struct btree_insert {
struct bch_fs *c; struct bch_fs *c;
struct disk_reservation *disk_res; struct disk_reservation *disk_res;
struct journal_res journal_res; struct journal_res journal_res;
struct journal_preres journal_preres;
u64 *journal_seq; u64 *journal_seq;
unsigned flags; unsigned flags;
bool did_work; bool did_work;
...@@ -82,6 +83,7 @@ enum { ...@@ -82,6 +83,7 @@ enum {
__BTREE_INSERT_USE_RESERVE, __BTREE_INSERT_USE_RESERVE,
__BTREE_INSERT_USE_ALLOC_RESERVE, __BTREE_INSERT_USE_ALLOC_RESERVE,
__BTREE_INSERT_JOURNAL_REPLAY, __BTREE_INSERT_JOURNAL_REPLAY,
__BTREE_INSERT_JOURNAL_RESERVED,
__BTREE_INSERT_NOMARK, __BTREE_INSERT_NOMARK,
__BTREE_INSERT_NOWAIT, __BTREE_INSERT_NOWAIT,
__BTREE_INSERT_GC_LOCK_HELD, __BTREE_INSERT_GC_LOCK_HELD,
...@@ -112,6 +114,8 @@ enum { ...@@ -112,6 +114,8 @@ enum {
/* Insert is for journal replay - don't get journal reservations: */ /* Insert is for journal replay - don't get journal reservations: */
#define BTREE_INSERT_JOURNAL_REPLAY (1 << __BTREE_INSERT_JOURNAL_REPLAY) #define BTREE_INSERT_JOURNAL_REPLAY (1 << __BTREE_INSERT_JOURNAL_REPLAY)
#define BTREE_INSERT_JOURNAL_RESERVED (1 << __BTREE_INSERT_JOURNAL_RESERVED)
/* Don't call bch2_mark_key: */ /* Don't call bch2_mark_key: */
#define BTREE_INSERT_NOMARK (1 << __BTREE_INSERT_NOMARK) #define BTREE_INSERT_NOMARK (1 << __BTREE_INSERT_NOMARK)
......
...@@ -18,6 +18,9 @@ ...@@ -18,6 +18,9 @@
#include <linux/sort.h> #include <linux/sort.h>
static bool btree_trans_relock(struct btree_insert *);
static void btree_trans_unlock(struct btree_insert *);
/* Inserting into a given leaf node (last stage of insert): */ /* Inserting into a given leaf node (last stage of insert): */
/* Handle overwrites and do insert, for non extents: */ /* Handle overwrites and do insert, for non extents: */
...@@ -246,9 +249,9 @@ static void deferred_update_flush(struct journal *j, ...@@ -246,9 +249,9 @@ static void deferred_update_flush(struct journal *j,
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct deferred_update *d = struct deferred_update *d =
container_of(pin, struct deferred_update, journal); container_of(pin, struct deferred_update, journal);
struct journal_preres res = { 0 };
u64 tmp[32]; u64 tmp[32];
struct bkey_i *k = (void *) tmp; struct bkey_i *k = (void *) tmp;
unsigned gen;
int ret; int ret;
if (d->allocated_u64s > ARRAY_SIZE(tmp)) { if (d->allocated_u64s > ARRAY_SIZE(tmp)) {
...@@ -258,26 +261,32 @@ static void deferred_update_flush(struct journal *j, ...@@ -258,26 +261,32 @@ static void deferred_update_flush(struct journal *j,
} }
spin_lock(&d->lock); spin_lock(&d->lock);
gen = d->gen; if (d->dirty) {
BUG_ON(jset_u64s(d->k.k.u64s) > d->res.u64s);
swap(res, d->res);
if (journal_pin_active(&d->journal)) {
BUG_ON(d->k.k.u64s > d->allocated_u64s); BUG_ON(d->k.k.u64s > d->allocated_u64s);
bkey_copy(k, &d->k);
bkey_copy(k, &d->k);
d->dirty = false;
spin_unlock(&d->lock); spin_unlock(&d->lock);
ret = bch2_btree_insert(c, d->btree_id, k, NULL, NULL, ret = bch2_btree_insert(c, d->btree_id, k, NULL, NULL,
BTREE_INSERT_NOFAIL); BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_JOURNAL_RESERVED);
bch2_fs_fatal_err_on(ret && !bch2_journal_error(j), bch2_fs_fatal_err_on(ret && !bch2_journal_error(j),
c, "error flushing deferred btree update: %i", ret); c, "error flushing deferred btree update: %i", ret);
spin_lock(&d->lock); spin_lock(&d->lock);
} }
if (gen == d->gen) if (!d->dirty)
bch2_journal_pin_drop(j, &d->journal); bch2_journal_pin_drop(j, &d->journal);
spin_unlock(&d->lock); spin_unlock(&d->lock);
bch2_journal_preres_put(j, &res);
if (k != (void *) tmp) if (k != (void *) tmp)
kfree(k); kfree(k);
} }
...@@ -289,6 +298,7 @@ btree_insert_key_deferred(struct btree_insert *trans, ...@@ -289,6 +298,7 @@ btree_insert_key_deferred(struct btree_insert *trans,
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct journal *j = &c->journal; struct journal *j = &c->journal;
struct deferred_update *d = insert->d; struct deferred_update *d = insert->d;
int difference;
BUG_ON(trans->flags & BTREE_INSERT_JOURNAL_REPLAY); BUG_ON(trans->flags & BTREE_INSERT_JOURNAL_REPLAY);
BUG_ON(insert->k->u64s > d->allocated_u64s); BUG_ON(insert->k->u64s > d->allocated_u64s);
...@@ -296,12 +306,21 @@ btree_insert_key_deferred(struct btree_insert *trans, ...@@ -296,12 +306,21 @@ btree_insert_key_deferred(struct btree_insert *trans,
__btree_journal_key(trans, d->btree_id, insert->k); __btree_journal_key(trans, d->btree_id, insert->k);
spin_lock(&d->lock); spin_lock(&d->lock);
d->gen++; BUG_ON(jset_u64s(insert->k->u64s) >
trans->journal_preres.u64s);
difference = jset_u64s(insert->k->u64s) - d->res.u64s;
if (difference > 0) {
trans->journal_preres.u64s -= difference;
d->res.u64s += difference;
}
bkey_copy(&d->k, insert->k); bkey_copy(&d->k, insert->k);
spin_unlock(&d->lock); d->dirty = true;
bch2_journal_pin_update(j, trans->journal_res.seq, &d->journal, bch2_journal_pin_update(j, trans->journal_res.seq, &d->journal,
deferred_update_flush); deferred_update_flush);
spin_unlock(&d->lock);
return BTREE_INSERT_OK; return BTREE_INSERT_OK;
} }
...@@ -520,13 +539,16 @@ static inline int do_btree_insert_at(struct btree_insert *trans, ...@@ -520,13 +539,16 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
} }
if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) { if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
unsigned flags = (trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
? JOURNAL_RES_GET_RESERVED : 0;
u64s = 0; u64s = 0;
trans_for_each_entry(trans, i) trans_for_each_entry(trans, i)
u64s += jset_u64s(i->k->k.u64s); u64s += jset_u64s(i->k->k.u64s);
ret = bch2_journal_res_get(&c->journal, ret = bch2_journal_res_get(&c->journal,
&trans->journal_res, u64s, &trans->journal_res, u64s,
JOURNAL_RES_GET_NONBLOCK); flags|JOURNAL_RES_GET_NONBLOCK);
if (likely(!ret)) if (likely(!ret))
goto got_journal_res; goto got_journal_res;
if (ret != -EAGAIN) if (ret != -EAGAIN)
...@@ -537,7 +559,7 @@ static inline int do_btree_insert_at(struct btree_insert *trans, ...@@ -537,7 +559,7 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
ret = bch2_journal_res_get(&c->journal, ret = bch2_journal_res_get(&c->journal,
&trans->journal_res, u64s, &trans->journal_res, u64s,
JOURNAL_RES_GET_CHECK); flags|JOURNAL_RES_GET_CHECK);
if (ret) if (ret)
return ret; return ret;
...@@ -587,6 +609,10 @@ static inline int do_btree_insert_at(struct btree_insert *trans, ...@@ -587,6 +609,10 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
} }
} }
out: out:
BUG_ON(ret &&
(trans->flags & BTREE_INSERT_JOURNAL_RESERVED) &&
trans->journal_res.ref);
multi_unlock_write(trans); multi_unlock_write(trans);
bch2_journal_res_put(&c->journal, &trans->journal_res); bch2_journal_res_put(&c->journal, &trans->journal_res);
...@@ -628,7 +654,7 @@ int __bch2_btree_insert_at(struct btree_insert *trans) ...@@ -628,7 +654,7 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_insert_entry *i; struct btree_insert_entry *i;
struct btree_iter *linked; struct btree_iter *linked;
unsigned flags; unsigned flags, u64s = 0;
int ret; int ret;
BUG_ON(!trans->nr); BUG_ON(!trans->nr);
...@@ -639,11 +665,39 @@ int __bch2_btree_insert_at(struct btree_insert *trans) ...@@ -639,11 +665,39 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
if (trans->flags & BTREE_INSERT_GC_LOCK_HELD) if (trans->flags & BTREE_INSERT_GC_LOCK_HELD)
lockdep_assert_held(&c->gc_lock); lockdep_assert_held(&c->gc_lock);
memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
bubble_sort(trans->entries, trans->nr, btree_trans_cmp); bubble_sort(trans->entries, trans->nr, btree_trans_cmp);
trans_for_each_entry(trans, i) trans_for_each_entry(trans, i)
btree_insert_entry_checks(c, i); btree_insert_entry_checks(c, i);
trans_for_each_entry(trans, i)
if (i->deferred)
u64s += jset_u64s(i->k->k.u64s);
if (u64s) {
ret = bch2_journal_preres_get(&c->journal,
&trans->journal_preres, u64s,
JOURNAL_RES_GET_NONBLOCK);
if (!ret)
goto got_journal_preres;
if (ret != -EAGAIN)
return ret;
btree_trans_unlock(trans);
ret = bch2_journal_preres_get(&c->journal,
&trans->journal_preres, u64s, 0);
if (ret)
return ret;
if (!btree_trans_relock(trans)) {
trans_restart(" (iter relock after journal preres get blocked)");
bch2_journal_preres_put(&c->journal, &trans->journal_preres);
return -EINTR;
}
}
got_journal_preres:
if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) && if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
!percpu_ref_tryget(&c->writes))) !percpu_ref_tryget(&c->writes)))
return -EROFS; return -EROFS;
...@@ -675,6 +729,8 @@ int __bch2_btree_insert_at(struct btree_insert *trans) ...@@ -675,6 +729,8 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
trans_for_each_iter(trans, i) trans_for_each_iter(trans, i)
bch2_btree_iter_downgrade(i->iter); bch2_btree_iter_downgrade(i->iter);
out: out:
bch2_journal_preres_put(&c->journal, &trans->journal_preres);
if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW))) if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
percpu_ref_put(&c->writes); percpu_ref_put(&c->writes);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment