Commit 1889ad5a authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Add code to scan for/rewite old btree nodes

This adds a new data job type to scan for btree nodes in the old extent
format, and rewrite them.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 85674154
...@@ -171,10 +171,11 @@ struct bch_ioctl_disk_set_state { ...@@ -171,10 +171,11 @@ struct bch_ioctl_disk_set_state {
}; };
enum bch_data_ops { enum bch_data_ops {
BCH_DATA_OP_SCRUB = 0, BCH_DATA_OP_SCRUB = 0,
BCH_DATA_OP_REREPLICATE = 1, BCH_DATA_OP_REREPLICATE = 1,
BCH_DATA_OP_MIGRATE = 2, BCH_DATA_OP_MIGRATE = 2,
BCH_DATA_OP_NR = 3, BCH_DATA_OP_REWRITE_OLD_NODES = 3,
BCH_DATA_OP_NR = 4,
}; };
/* /*
...@@ -187,11 +188,13 @@ enum bch_data_ops { ...@@ -187,11 +188,13 @@ enum bch_data_ops {
* job. The file descriptor is O_CLOEXEC. * job. The file descriptor is O_CLOEXEC.
*/ */
struct bch_ioctl_data { struct bch_ioctl_data {
__u32 op; __u16 op;
__u8 start_btree;
__u8 end_btree;
__u32 flags; __u32 flags;
struct bpos start; struct bpos start_pos;
struct bpos end; struct bpos end_pos;
union { union {
struct { struct {
......
...@@ -920,6 +920,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ...@@ -920,6 +920,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
unsigned u64s; unsigned u64s;
int ret, retry_read = 0, write = READ; int ret, retry_read = 0, write = READ;
b->version_ondisk = U16_MAX;
iter = mempool_alloc(&c->fill_iter, GFP_NOIO); iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
sort_iter_init(iter, b); sort_iter_init(iter, b);
iter->size = (btree_blocks(c) + 1) * 2; iter->size = (btree_blocks(c) + 1) * 2;
...@@ -1000,6 +1002,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ...@@ -1000,6 +1002,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
sectors = vstruct_sectors(bne, c->block_bits); sectors = vstruct_sectors(bne, c->block_bits);
} }
b->version_ondisk = min(b->version_ondisk,
le16_to_cpu(i->version));
ret = validate_bset(c, ca, b, i, sectors, ret = validate_bset(c, ca, b, i, sectors,
READ, have_retry); READ, have_retry);
if (ret) if (ret)
......
...@@ -76,6 +76,7 @@ struct btree { ...@@ -76,6 +76,7 @@ struct btree {
u16 written; u16 written;
u8 nsets; u8 nsets;
u8 nr_key_bits; u8 nr_key_bits;
u16 version_ondisk;
struct bkey_format format; struct bkey_format format;
......
...@@ -286,6 +286,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev ...@@ -286,6 +286,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
bch2_bset_init_first(b, &b->data->keys); bch2_bset_init_first(b, &b->data->keys);
b->c.level = level; b->c.level = level;
b->c.btree_id = as->btree_id; b->c.btree_id = as->btree_id;
b->version_ondisk = c->sb.version;
memset(&b->nr, 0, sizeof(b->nr)); memset(&b->nr, 0, sizeof(b->nr));
b->data->magic = cpu_to_le64(bset_magic(c)); b->data->magic = cpu_to_le64(bset_magic(c));
......
...@@ -531,7 +531,7 @@ static int __bch2_move_data(struct bch_fs *c, ...@@ -531,7 +531,7 @@ static int __bch2_move_data(struct bch_fs *c,
stats->data_type = BCH_DATA_user; stats->data_type = BCH_DATA_user;
stats->btree_id = btree_id; stats->btree_id = btree_id;
stats->pos = POS_MIN; stats->pos = start;
iter = bch2_trans_get_iter(&trans, btree_id, start, iter = bch2_trans_get_iter(&trans, btree_id, start,
BTREE_ITER_PREFETCH); BTREE_ITER_PREFETCH);
...@@ -646,14 +646,15 @@ static int __bch2_move_data(struct bch_fs *c, ...@@ -646,14 +646,15 @@ static int __bch2_move_data(struct bch_fs *c,
} }
int bch2_move_data(struct bch_fs *c, int bch2_move_data(struct bch_fs *c,
enum btree_id start_btree_id, struct bpos start_pos,
enum btree_id end_btree_id, struct bpos end_pos,
struct bch_ratelimit *rate, struct bch_ratelimit *rate,
struct write_point_specifier wp, struct write_point_specifier wp,
struct bpos start,
struct bpos end,
move_pred_fn pred, void *arg, move_pred_fn pred, void *arg,
struct bch_move_stats *stats) struct bch_move_stats *stats)
{ {
struct moving_context ctxt = { .stats = stats }; struct moving_context ctxt = { .stats = stats };
enum btree_id id;
int ret; int ret;
closure_init_stack(&ctxt.cl); closure_init_stack(&ctxt.cl);
...@@ -662,10 +663,23 @@ int bch2_move_data(struct bch_fs *c, ...@@ -662,10 +663,23 @@ int bch2_move_data(struct bch_fs *c,
stats->data_type = BCH_DATA_user; stats->data_type = BCH_DATA_user;
ret = __bch2_move_data(c, &ctxt, rate, wp, start, end, for (id = start_btree_id;
pred, arg, stats, BTREE_ID_EXTENTS) ?: id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1);
__bch2_move_data(c, &ctxt, rate, wp, start, end, id++) {
pred, arg, stats, BTREE_ID_REFLINK); stats->btree_id = id;
if (id != BTREE_ID_EXTENTS &&
id != BTREE_ID_REFLINK)
continue;
ret = __bch2_move_data(c, &ctxt, rate, wp,
id == start_btree_id ? start_pos : POS_MIN,
id == end_btree_id ? end_pos : POS_MAX,
pred, arg, stats, id);
if (ret)
break;
}
move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads)); move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
closure_sync(&ctxt.cl); closure_sync(&ctxt.cl);
...@@ -679,16 +693,22 @@ int bch2_move_data(struct bch_fs *c, ...@@ -679,16 +693,22 @@ int bch2_move_data(struct bch_fs *c,
return ret; return ret;
} }
typedef enum data_cmd (*move_btree_pred)(struct bch_fs *, void *,
struct btree *, struct bch_io_opts *,
struct data_opts *);
static int bch2_move_btree(struct bch_fs *c, static int bch2_move_btree(struct bch_fs *c,
move_pred_fn pred, enum btree_id start_btree_id, struct bpos start_pos,
void *arg, enum btree_id end_btree_id, struct bpos end_pos,
move_btree_pred pred, void *arg,
struct bch_move_stats *stats) struct bch_move_stats *stats)
{ {
bool kthread = (current->flags & PF_KTHREAD) != 0;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
struct btree_trans trans; struct btree_trans trans;
struct btree_iter *iter; struct btree_iter *iter;
struct btree *b; struct btree *b;
unsigned id; enum btree_id id;
struct data_opts data_opts; struct data_opts data_opts;
enum data_cmd cmd; enum data_cmd cmd;
int ret = 0; int ret = 0;
...@@ -697,16 +717,24 @@ static int bch2_move_btree(struct bch_fs *c, ...@@ -697,16 +717,24 @@ static int bch2_move_btree(struct bch_fs *c,
stats->data_type = BCH_DATA_btree; stats->data_type = BCH_DATA_btree;
for (id = 0; id < BTREE_ID_NR; id++) { for (id = start_btree_id;
id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1);
id++) {
stats->btree_id = id; stats->btree_id = id;
for_each_btree_node(&trans, iter, id, POS_MIN, for_each_btree_node(&trans, iter, id,
id == start_btree_id ? start_pos : POS_MIN,
BTREE_ITER_PREFETCH, b) { BTREE_ITER_PREFETCH, b) {
if (kthread && kthread_should_stop())
goto out;
if ((cmp_int(id, end_btree_id) ?:
bkey_cmp(b->key.k.p, end_pos)) > 0)
break;
stats->pos = iter->pos; stats->pos = iter->pos;
switch ((cmd = pred(c, arg, switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) {
bkey_i_to_s_c(&b->key),
&io_opts, &data_opts))) {
case DATA_SKIP: case DATA_SKIP:
goto next; goto next;
case DATA_SCRUB: case DATA_SCRUB:
...@@ -726,7 +754,7 @@ static int bch2_move_btree(struct bch_fs *c, ...@@ -726,7 +754,7 @@ static int bch2_move_btree(struct bch_fs *c,
ret = bch2_trans_iter_free(&trans, iter) ?: ret; ret = bch2_trans_iter_free(&trans, iter) ?: ret;
} }
out:
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
...@@ -785,6 +813,38 @@ static enum data_cmd migrate_pred(struct bch_fs *c, void *arg, ...@@ -785,6 +813,38 @@ static enum data_cmd migrate_pred(struct bch_fs *c, void *arg,
return DATA_REWRITE; return DATA_REWRITE;
} }
static enum data_cmd rereplicate_btree_pred(struct bch_fs *c, void *arg,
struct btree *b,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{
return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
}
static enum data_cmd migrate_btree_pred(struct bch_fs *c, void *arg,
struct btree *b,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{
return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
}
static enum data_cmd rewrite_old_nodes_pred(struct bch_fs *c, void *arg,
struct btree *b,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{
if (b->version_ondisk != c->sb.version ||
btree_node_need_rewrite(b)) {
data_opts->target = 0;
data_opts->nr_replicas = 1;
data_opts->btree_insert_flags = 0;
return DATA_REWRITE;
}
return DATA_SKIP;
}
int bch2_data_job(struct bch_fs *c, int bch2_data_job(struct bch_fs *c,
struct bch_move_stats *stats, struct bch_move_stats *stats,
struct bch_ioctl_data op) struct bch_ioctl_data op)
...@@ -796,17 +856,20 @@ int bch2_data_job(struct bch_fs *c, ...@@ -796,17 +856,20 @@ int bch2_data_job(struct bch_fs *c,
stats->data_type = BCH_DATA_journal; stats->data_type = BCH_DATA_journal;
ret = bch2_journal_flush_device_pins(&c->journal, -1); ret = bch2_journal_flush_device_pins(&c->journal, -1);
ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret; ret = bch2_move_btree(c,
op.start_btree, op.start_pos,
op.end_btree, op.end_pos,
rereplicate_btree_pred, c, stats) ?: ret;
closure_wait_event(&c->btree_interior_update_wait, closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c)); !bch2_btree_interior_updates_nr_pending(c));
ret = bch2_replicas_gc2(c) ?: ret; ret = bch2_replicas_gc2(c) ?: ret;
ret = bch2_move_data(c, NULL, ret = bch2_move_data(c,
writepoint_hashed((unsigned long) current), op.start_btree, op.start_pos,
op.start, op.end_btree, op.end_pos,
op.end, NULL, writepoint_hashed((unsigned long) current),
rereplicate_pred, c, stats) ?: ret; rereplicate_pred, c, stats) ?: ret;
ret = bch2_replicas_gc2(c) ?: ret; ret = bch2_replicas_gc2(c) ?: ret;
break; break;
...@@ -817,16 +880,32 @@ int bch2_data_job(struct bch_fs *c, ...@@ -817,16 +880,32 @@ int bch2_data_job(struct bch_fs *c,
stats->data_type = BCH_DATA_journal; stats->data_type = BCH_DATA_journal;
ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret; ret = bch2_move_btree(c,
op.start_btree, op.start_pos,
op.end_btree, op.end_pos,
migrate_btree_pred, &op, stats) ?: ret;
ret = bch2_replicas_gc2(c) ?: ret; ret = bch2_replicas_gc2(c) ?: ret;
ret = bch2_move_data(c, NULL, ret = bch2_move_data(c,
writepoint_hashed((unsigned long) current), op.start_btree, op.start_pos,
op.start, op.end_btree, op.end_pos,
op.end, NULL, writepoint_hashed((unsigned long) current),
migrate_pred, &op, stats) ?: ret; migrate_pred, &op, stats) ?: ret;
ret = bch2_replicas_gc2(c) ?: ret; ret = bch2_replicas_gc2(c) ?: ret;
break; break;
case BCH_DATA_OP_REWRITE_OLD_NODES:
ret = bch2_move_btree(c,
op.start_btree, op.start_pos,
op.end_btree, op.end_pos,
rewrite_old_nodes_pred, &op, stats) ?: ret;
if (!ret) {
mutex_lock(&c->sb_lock);
c->disk_sb.sb->version_min = c->disk_sb.sb->version;
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
}
break;
default: default:
ret = -EINVAL; ret = -EINVAL;
} }
......
...@@ -52,9 +52,11 @@ typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *, ...@@ -52,9 +52,11 @@ typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
struct bkey_s_c, struct bkey_s_c,
struct bch_io_opts *, struct data_opts *); struct bch_io_opts *, struct data_opts *);
int bch2_move_data(struct bch_fs *, struct bch_ratelimit *, int bch2_move_data(struct bch_fs *,
enum btree_id, struct bpos,
enum btree_id, struct bpos,
struct bch_ratelimit *,
struct write_point_specifier, struct write_point_specifier,
struct bpos, struct bpos,
move_pred_fn, void *, move_pred_fn, void *,
struct bch_move_stats *); struct bch_move_stats *);
......
...@@ -219,9 +219,11 @@ static int bch2_copygc(struct bch_fs *c) ...@@ -219,9 +219,11 @@ static int bch2_copygc(struct bch_fs *c)
sizeof(h->data[0]), sizeof(h->data[0]),
bucket_offset_cmp, NULL); bucket_offset_cmp, NULL);
ret = bch2_move_data(c, &c->copygc_pd.rate, ret = bch2_move_data(c,
0, POS_MIN,
BTREE_ID_NR, POS_MAX,
&c->copygc_pd.rate,
writepoint_ptr(&c->copygc_write_point), writepoint_ptr(&c->copygc_write_point),
POS_MIN, POS_MAX,
copygc_pred, NULL, copygc_pred, NULL,
&move_stats); &move_stats);
......
...@@ -239,10 +239,11 @@ static int bch2_rebalance_thread(void *arg) ...@@ -239,10 +239,11 @@ static int bch2_rebalance_thread(void *arg)
rebalance_work_reset(c); rebalance_work_reset(c);
bch2_move_data(c, bch2_move_data(c,
0, POS_MIN,
BTREE_ID_NR, POS_MAX,
/* ratelimiting disabled for now */ /* ratelimiting disabled for now */
NULL, /* &r->pd.rate, */ NULL, /* &r->pd.rate, */
writepoint_ptr(&c->rebalance_write_point), writepoint_ptr(&c->rebalance_write_point),
POS_MIN, POS_MAX,
rebalance_pred, NULL, rebalance_pred, NULL,
&r->move_stats); &r->move_stats);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment