Commit 2252aa27 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: btree gc refactoring

prep work for erasure coding
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent abce30b7
......@@ -19,17 +19,6 @@ static inline enum bkey_type bkey_type(unsigned level, enum btree_id id)
return level ? BKEY_TYPE_BTREE : (enum bkey_type) id;
}
static inline bool btree_type_has_ptrs(enum bkey_type type)
{
switch (type) {
case BKEY_TYPE_BTREE:
case BKEY_TYPE_EXTENTS:
return true;
default:
return false;
}
}
struct bch_fs;
struct btree;
struct bkey;
......
......@@ -18,6 +18,7 @@
#include "error.h"
#include "extents.h"
#include "journal.h"
#include "journal_io.h"
#include "keylist.h"
#include "move.h"
#include "replicas.h"
......@@ -32,6 +33,23 @@
#include <linux/rcupdate.h>
#include <linux/sched/task.h>
static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
preempt_disable();
write_seqcount_begin(&c->gc_pos_lock);
c->gc_pos = new_pos;
write_seqcount_end(&c->gc_pos_lock);
preempt_enable();
}
static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
__gc_pos_set(c, new_pos);
}
/* range_checks - for validating min/max pos of each btree node: */
struct range_checks {
struct range_level {
struct bpos min;
......@@ -91,6 +109,19 @@ static void btree_node_range_checks(struct bch_fs *c, struct btree *b,
}
}
/* marking of btree keys/nodes: */
static bool bkey_type_needs_gc(enum bkey_type type)
{
switch (type) {
case BKEY_TYPE_BTREE:
case BKEY_TYPE_EXTENTS:
return true;
default:
return false;
}
}
u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
{
const struct bch_extent_ptr *ptr;
......@@ -113,38 +144,7 @@ u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
return max_stale;
}
/*
* For runtime mark and sweep:
*/
static u8 bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
struct bkey_s_c k, unsigned flags)
{
struct gc_pos pos = { 0 };
u8 ret = 0;
switch (type) {
case BKEY_TYPE_BTREE:
bch2_mark_key(c, k, c->opts.btree_node_size,
BCH_DATA_BTREE, pos, NULL,
0, flags|
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
BCH_BUCKET_MARK_GC_LOCK_HELD);
break;
case BKEY_TYPE_EXTENTS:
bch2_mark_key(c, k, k.k->size, BCH_DATA_USER, pos, NULL,
0, flags|
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
BCH_BUCKET_MARK_GC_LOCK_HELD);
ret = bch2_btree_key_recalc_oldest_gen(c, k);
break;
default:
BUG();
}
return ret;
}
int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
static int bch2_btree_mark_ptrs_initial(struct bch_fs *c, enum bkey_type type,
struct bkey_s_c k)
{
enum bch_data_type data_type = type == BKEY_TYPE_BTREE
......@@ -199,54 +199,90 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
}
}
atomic64_set(&c->key_version,
max_t(u64, k.k->version.lo,
atomic64_read(&c->key_version)));
bch2_gc_mark_key(c, type, k, BCH_BUCKET_MARK_NOATOMIC);
if (k.k->version.lo > atomic64_read(&c->key_version))
atomic64_set(&c->key_version, k.k->version.lo);
fsck_err:
return ret;
}
static unsigned btree_gc_mark_node(struct bch_fs *c, struct btree *b)
/*
* For runtime mark and sweep:
*/
static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
struct bkey_s_c k, bool initial)
{
struct gc_pos pos = { 0 };
unsigned flags = initial ? BCH_BUCKET_MARK_NOATOMIC : 0;
int ret = 0;
switch (type) {
case BKEY_TYPE_BTREE:
if (initial) {
ret = bch2_btree_mark_ptrs_initial(c, type, k);
if (ret < 0)
return ret;
}
bch2_mark_key(c, k, c->opts.btree_node_size,
BCH_DATA_BTREE, pos, NULL,
0, flags|
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
BCH_BUCKET_MARK_GC_LOCK_HELD);
break;
case BKEY_TYPE_EXTENTS:
if (initial) {
ret = bch2_btree_mark_ptrs_initial(c, type, k);
if (ret < 0)
return ret;
}
bch2_mark_key(c, k, k.k->size, BCH_DATA_USER, pos, NULL,
0, flags|
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
BCH_BUCKET_MARK_GC_LOCK_HELD);
ret = bch2_btree_key_recalc_oldest_gen(c, k);
break;
default:
break;
}
return ret;
}
static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
bool initial)
{
enum bkey_type type = btree_node_type(b);
struct btree_node_iter iter;
struct bkey unpacked;
struct bkey_s_c k;
u8 stale = 0;
int ret;
if (!bkey_type_needs_gc(type))
return 0;
if (btree_node_has_ptrs(b))
for_each_btree_node_key_unpack(b, k, &iter,
&unpacked) {
bch2_bkey_debugcheck(c, b, k);
stale = max(stale, bch2_gc_mark_key(c, type, k, 0));
}
return stale;
}
ret = bch2_gc_mark_key(c, type, k, initial);
if (ret < 0)
return ret;
static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
preempt_disable();
write_seqcount_begin(&c->gc_pos_lock);
c->gc_pos = new_pos;
write_seqcount_end(&c->gc_pos_lock);
preempt_enable();
}
stale = max_t(u8, stale, ret);
}
static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
__gc_pos_set(c, new_pos);
return stale;
}
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
bool initial)
{
struct btree_iter iter;
struct btree *b;
struct range_checks r;
unsigned depth = btree_id == BTREE_ID_EXTENTS ? 0 : 1;
unsigned depth = bkey_type_needs_gc(btree_id) ? 0 : 1;
unsigned max_stale;
int ret = 0;
......@@ -257,8 +293,11 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
/*
* if expensive_debug_checks is on, run range_checks on all leaf nodes:
*
* and on startup, we have to read every btree node (XXX: only if it was
* an unclean shutdown)
*/
if (expensive_debug_checks(c))
if (initial || expensive_debug_checks(c))
depth = 0;
btree_node_range_checks_init(&r, depth);
......@@ -269,10 +308,11 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
bch2_verify_btree_nr_keys(b);
max_stale = btree_gc_mark_node(c, b);
max_stale = btree_gc_mark_node(c, b, initial);
gc_pos_set(c, gc_pos_btree_node(b));
if (!initial) {
if (max_stale > 64)
bch2_btree_node_rewrite(c, &iter,
b->data->keys.seq,
......@@ -285,6 +325,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
b->data->keys.seq,
BTREE_INSERT_NOWAIT|
BTREE_INSERT_GC_LOCK_HELD);
}
bch2_btree_iter_cond_resched(&iter);
}
......@@ -296,13 +337,47 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
b = c->btree_roots[btree_id].b;
if (!btree_node_fake(b))
bch2_gc_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key), 0);
bch2_gc_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&b->key), initial);
gc_pos_set(c, gc_pos_btree_root(b->btree_id));
mutex_unlock(&c->btree_root_lock);
return 0;
}
static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
bool initial)
{
unsigned i;
for (i = 0; i < BTREE_ID_NR; i++) {
enum bkey_type type = bkey_type(0, i);
int ret = bch2_gc_btree(c, i, initial);
if (ret)
return ret;
if (journal && bkey_type_needs_gc(type)) {
struct bkey_i *k, *n;
struct jset_entry *j;
struct journal_replay *r;
int ret;
list_for_each_entry(r, journal, list)
for_each_jset_key(k, n, j, &r->j) {
if (type == bkey_type(j->level, j->btree_id)) {
ret = bch2_gc_mark_key(c, type,
bkey_i_to_s_c(k), initial);
if (ret < 0)
return ret;
}
}
}
}
return 0;
}
static void mark_metadata_sectors(struct bch_fs *c, struct bch_dev *ca,
u64 start, u64 end,
enum bch_data_type type,
......@@ -525,6 +600,7 @@ void bch2_gc(struct bch_fs *c)
struct bch_dev *ca;
u64 start_time = local_clock();
unsigned i;
int ret;
/*
* Walk _all_ references to buckets, and recompute them:
......@@ -560,15 +636,12 @@ void bch2_gc(struct bch_fs *c)
bch2_mark_superblocks(c);
/* Walk btree: */
for (i = 0; i < BTREE_ID_NR; i++) {
int ret = bch2_gc_btree(c, i);
ret = bch2_gc_btrees(c, NULL, false);
if (ret) {
bch_err(c, "btree gc failed: %d", ret);
set_bit(BCH_FS_GC_FAILURE, &c->flags);
goto out;
}
}
bch2_mark_pending_btree_node_frees(c);
bch2_mark_allocator_buckets(c);
......@@ -1009,58 +1082,9 @@ int bch2_gc_thread_start(struct bch_fs *c)
/* Initial GC computes bucket marks during startup */
static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
{
struct btree_iter iter;
struct btree *b;
struct range_checks r;
int ret = 0;
btree_node_range_checks_init(&r, 0);
gc_pos_set(c, gc_pos_btree(id, POS_MIN, 0));
if (!c->btree_roots[id].b)
return 0;
b = c->btree_roots[id].b;
if (!btree_node_fake(b))
ret = bch2_btree_mark_key_initial(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&b->key));
if (ret)
return ret;
/*
* We have to hit every btree node before starting journal replay, in
* order for the journal seq blacklist machinery to work:
*/
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
btree_node_range_checks(c, b, &r);
if (btree_node_has_ptrs(b)) {
struct btree_node_iter node_iter;
struct bkey unpacked;
struct bkey_s_c k;
for_each_btree_node_key_unpack(b, k, &node_iter,
&unpacked) {
ret = bch2_btree_mark_key_initial(c,
btree_node_type(b), k);
if (ret)
goto err;
}
}
bch2_btree_iter_cond_resched(&iter);
}
err:
return bch2_btree_iter_unlock(&iter) ?: ret;
}
int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
{
unsigned iter = 0;
enum btree_id id;
int ret = 0;
down_write(&c->gc_lock);
......@@ -1069,13 +1093,7 @@ int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
bch2_mark_superblocks(c);
for (id = 0; id < BTREE_ID_NR; id++) {
ret = bch2_initial_gc_btree(c, id);
if (ret)
goto err;
}
ret = bch2_journal_mark(c, journal);
ret = bch2_gc_btrees(c, journal, true);
if (ret)
goto err;
......
......@@ -12,8 +12,6 @@ void bch2_gc_thread_stop(struct bch_fs *);
int bch2_gc_thread_start(struct bch_fs *);
int bch2_initial_gc(struct bch_fs *, struct list_head *);
u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *, struct bkey_s_c);
int bch2_btree_mark_key_initial(struct bch_fs *, enum bkey_type,
struct bkey_s_c);
void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
/*
......
......@@ -415,11 +415,6 @@ static inline const struct bkey_ops *btree_node_ops(struct btree *b)
return &bch2_bkey_ops[btree_node_type(b)];
}
static inline bool btree_node_has_ptrs(struct btree *b)
{
return btree_type_has_ptrs(btree_node_type(b));
}
static inline bool btree_node_is_extents(struct btree *b)
{
return btree_node_type(b) == BKEY_TYPE_EXTENTS;
......
......@@ -355,10 +355,6 @@ static inline bool journal_flushes_device(struct bch_dev *ca)
return true;
}
int bch2_journal_mark(struct bch_fs *, struct list_head *);
void bch2_journal_entries_free(struct list_head *);
int bch2_journal_replay(struct bch_fs *, struct list_head *);
static inline void bch2_journal_set_replay_done(struct journal *j)
{
BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
......
......@@ -852,28 +852,6 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
/* journal replay: */
int bch2_journal_mark(struct bch_fs *c, struct list_head *list)
{
struct bkey_i *k, *n;
struct jset_entry *j;
struct journal_replay *r;
int ret;
list_for_each_entry(r, list, list)
for_each_jset_key(k, n, j, &r->j) {
enum bkey_type type = bkey_type(j->level, j->btree_id);
struct bkey_s_c k_s_c = bkey_i_to_s_c(k);
if (btree_type_has_ptrs(type)) {
ret = bch2_btree_mark_key_initial(c, type, k_s_c);
if (ret)
return ret;
}
}
return 0;
}
int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
{
struct journal *j = &c->journal;
......
......@@ -37,6 +37,8 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
int bch2_journal_set_seq(struct bch_fs *c, u64, u64);
int bch2_journal_read(struct bch_fs *, struct list_head *);
void bch2_journal_entries_free(struct list_head *);
int bch2_journal_replay(struct bch_fs *, struct list_head *);
int bch2_journal_entry_sectors(struct journal *);
void bch2_journal_write(struct closure *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment