Commit b40901b0 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: New erasure coding shutdown path

This implements a new shutdown path for erasure coding, which is needed
for the upcoming BCH_WRITE_WAIT_FOR_EC write path.

The process is:
 - Cancel new stripes being built up
 - Close out/cancel open buckets on write points or the partial list
   that are for stripes
 - Shutdown rebalance/copygc
 - Then wait for in flight new stripes to finish

With BCH_WRITE_WAIT_FOR_EC, move ops will be waiting on stripes to fill
up before they complete; the new ec shutdown path is needed for shutting
down copygc/rebalance without deadlocking.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent b9fa375b
...@@ -2158,44 +2158,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca) ...@@ -2158,44 +2158,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
*/ */
bch2_recalc_capacity(c); bch2_recalc_capacity(c);
/* Next, close write points that point to this device... */ bch2_open_buckets_stop(c, ca, false);
for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
bch2_writepoint_stop(c, ca, &c->write_points[i]);
bch2_writepoint_stop(c, ca, &c->copygc_write_point);
bch2_writepoint_stop(c, ca, &c->rebalance_write_point);
bch2_writepoint_stop(c, ca, &c->btree_write_point);
mutex_lock(&c->btree_reserve_cache_lock);
while (c->btree_reserve_cache_nr) {
struct btree_alloc *a =
&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
bch2_open_buckets_put(c, &a->ob);
}
mutex_unlock(&c->btree_reserve_cache_lock);
spin_lock(&c->freelist_lock);
i = 0;
while (i < c->open_buckets_partial_nr) {
struct open_bucket *ob =
c->open_buckets + c->open_buckets_partial[i];
if (ob->dev == ca->dev_idx) {
--c->open_buckets_partial_nr;
swap(c->open_buckets_partial[i],
c->open_buckets_partial[c->open_buckets_partial_nr]);
ob->on_partial_list = false;
spin_unlock(&c->freelist_lock);
bch2_open_bucket_put(c, ob);
spin_lock(&c->freelist_lock);
} else {
i++;
}
}
spin_unlock(&c->freelist_lock);
bch2_ec_stop_dev(c, ca);
/* /*
* Wake up threads that were blocked on allocation, so they can notice * Wake up threads that were blocked on allocation, so they can notice
......
...@@ -1023,45 +1023,96 @@ static int open_bucket_add_buckets(struct btree_trans *trans, ...@@ -1023,45 +1023,96 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
return ret < 0 ? ret : 0; return ret < 0 ? ret : 0;
} }
void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca, static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c,
struct open_buckets *obs) struct bch_dev *ca, bool ec)
{ {
struct open_buckets ptrs = { .nr = 0 }; if (ec) {
struct open_bucket *ob, *ob2; return ob->ec != NULL;
unsigned i, j; } else if (ca) {
bool drop = ob->dev == ca->dev_idx;
open_bucket_for_each(c, obs, ob, i) { struct open_bucket *ob2;
bool drop = !ca || ob->dev == ca->dev_idx; unsigned i;
if (!drop && ob->ec) { if (!drop && ob->ec) {
mutex_lock(&ob->ec->lock); mutex_lock(&ob->ec->lock);
for (j = 0; j < ob->ec->new_stripe.key.v.nr_blocks; j++) { for (i = 0; i < ob->ec->new_stripe.key.v.nr_blocks; i++) {
if (!ob->ec->blocks[j]) if (!ob->ec->blocks[i])
continue; continue;
ob2 = c->open_buckets + ob->ec->blocks[j]; ob2 = c->open_buckets + ob->ec->blocks[i];
drop |= ob2->dev == ca->dev_idx; drop |= ob2->dev == ca->dev_idx;
} }
mutex_unlock(&ob->ec->lock); mutex_unlock(&ob->ec->lock);
} }
if (drop) return drop;
bch2_open_bucket_put(c, ob); } else {
else return true;
ob_push(c, &ptrs, ob);
} }
*obs = ptrs;
} }
void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca, static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
struct write_point *wp) bool ec, struct write_point *wp)
{ {
struct open_buckets ptrs = { .nr = 0 };
struct open_bucket *ob;
unsigned i;
mutex_lock(&wp->lock); mutex_lock(&wp->lock);
bch2_open_buckets_stop_dev(c, ca, &wp->ptrs); open_bucket_for_each(c, &wp->ptrs, ob, i)
if (should_drop_bucket(ob, c, ca, ec))
bch2_open_bucket_put(c, ob);
else
ob_push(c, &ptrs, ob);
wp->ptrs = ptrs;
mutex_unlock(&wp->lock); mutex_unlock(&wp->lock);
} }
void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
bool ec)
{
unsigned i;
/* Next, close write points that point to this device... */
for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
bch2_writepoint_stop(c, ca, ec, &c->write_points[i]);
bch2_writepoint_stop(c, ca, ec, &c->copygc_write_point);
bch2_writepoint_stop(c, ca, ec, &c->rebalance_write_point);
bch2_writepoint_stop(c, ca, ec, &c->btree_write_point);
mutex_lock(&c->btree_reserve_cache_lock);
while (c->btree_reserve_cache_nr) {
struct btree_alloc *a =
&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
bch2_open_buckets_put(c, &a->ob);
}
mutex_unlock(&c->btree_reserve_cache_lock);
spin_lock(&c->freelist_lock);
i = 0;
while (i < c->open_buckets_partial_nr) {
struct open_bucket *ob =
c->open_buckets + c->open_buckets_partial[i];
if (should_drop_bucket(ob, c, ca, ec)) {
--c->open_buckets_partial_nr;
swap(c->open_buckets_partial[i],
c->open_buckets_partial[c->open_buckets_partial_nr]);
ob->on_partial_list = false;
spin_unlock(&c->freelist_lock);
bch2_open_bucket_put(c, ob);
spin_lock(&c->freelist_lock);
} else {
i++;
}
}
spin_unlock(&c->freelist_lock);
bch2_ec_stop_dev(c, ca);
}
static inline struct hlist_head *writepoint_hash(struct bch_fs *c, static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
unsigned long write_point) unsigned long write_point)
{ {
...@@ -1107,8 +1158,7 @@ static bool try_increase_writepoints(struct bch_fs *c) ...@@ -1107,8 +1158,7 @@ static bool try_increase_writepoints(struct bch_fs *c)
return true; return true;
} }
static bool try_decrease_writepoints(struct bch_fs *c, static bool try_decrease_writepoints(struct bch_fs *c, unsigned old_nr)
unsigned old_nr)
{ {
struct write_point *wp; struct write_point *wp;
...@@ -1129,7 +1179,7 @@ static bool try_decrease_writepoints(struct bch_fs *c, ...@@ -1129,7 +1179,7 @@ static bool try_decrease_writepoints(struct bch_fs *c,
hlist_del_rcu(&wp->node); hlist_del_rcu(&wp->node);
mutex_unlock(&c->write_points_hash_lock); mutex_unlock(&c->write_points_hash_lock);
bch2_writepoint_stop(c, NULL, wp); bch2_writepoint_stop(c, NULL, false, wp);
return true; return true;
} }
......
...@@ -202,11 +202,7 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *, ...@@ -202,11 +202,7 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
struct bkey_i *, unsigned, bool); struct bkey_i *, unsigned, bool);
void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *); void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *, void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *, bool);
struct open_buckets *);
void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
struct write_point *);
static inline struct write_point_specifier writepoint_hashed(unsigned long v) static inline struct write_point_specifier writepoint_hashed(unsigned long v)
{ {
......
...@@ -655,7 +655,6 @@ typedef struct { ...@@ -655,7 +655,6 @@ typedef struct {
x(fallocate) \ x(fallocate) \
x(discard) \ x(discard) \
x(invalidate) \ x(invalidate) \
x(move) \
x(delete_dead_snapshots) \ x(delete_dead_snapshots) \
x(snapshot_delete_pagecache) \ x(snapshot_delete_pagecache) \
x(sysfs) x(sysfs)
...@@ -958,14 +957,14 @@ struct bch_fs { ...@@ -958,14 +957,14 @@ struct bch_fs {
struct list_head ec_stripe_new_list; struct list_head ec_stripe_new_list;
struct mutex ec_stripe_new_lock; struct mutex ec_stripe_new_lock;
wait_queue_head_t ec_stripe_new_wait;
struct work_struct ec_stripe_create_work; struct work_struct ec_stripe_create_work;
u64 ec_stripe_hint; u64 ec_stripe_hint;
struct bio_set ec_bioset;
struct work_struct ec_stripe_delete_work; struct work_struct ec_stripe_delete_work;
struct llist_head ec_stripe_delete_list;
struct bio_set ec_bioset;
/* REFLINK */ /* REFLINK */
u64 reflink_hint; u64 reflink_hint;
......
...@@ -252,6 +252,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, ...@@ -252,6 +252,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
bch2_trans_commit(trans, &op->res, bch2_trans_commit(trans, &op->res,
NULL, NULL,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
m->data_opts.btree_insert_flags); m->data_opts.btree_insert_flags);
if (!ret) { if (!ret) {
......
...@@ -989,6 +989,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b ...@@ -989,6 +989,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
while (1) { while (1) {
ret = commit_do(trans, NULL, NULL, ret = commit_do(trans, NULL, NULL,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL, BTREE_INSERT_NOFAIL,
ec_stripe_update_extent(trans, bucket_pos, bucket.gen, ec_stripe_update_extent(trans, bucket_pos, bucket.gen,
s, &bp_offset)); s, &bp_offset));
...@@ -1127,7 +1128,9 @@ static void ec_stripe_create(struct ec_stripe_new *s) ...@@ -1127,7 +1128,9 @@ static void ec_stripe_create(struct ec_stripe_new *s)
goto err; goto err;
} }
ret = bch2_trans_do(c, &s->res, NULL, BTREE_INSERT_NOFAIL, ret = bch2_trans_do(c, &s->res, NULL,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL,
ec_stripe_key_update(&trans, &s->new_stripe.key, ec_stripe_key_update(&trans, &s->new_stripe.key,
!s->have_existing_stripe)); !s->have_existing_stripe));
if (ret) { if (ret) {
...@@ -1409,6 +1412,11 @@ struct ec_stripe_head *__bch2_ec_stripe_head_get(struct btree_trans *trans, ...@@ -1409,6 +1412,11 @@ struct ec_stripe_head *__bch2_ec_stripe_head_get(struct btree_trans *trans,
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
if (test_bit(BCH_FS_GOING_RO, &c->flags)) {
h = ERR_PTR(-EROFS);
goto found;
}
list_for_each_entry(h, &c->ec_stripe_head_list, list) list_for_each_entry(h, &c->ec_stripe_head_list, list)
if (h->target == target && if (h->target == target &&
h->algo == algo && h->algo == algo &&
...@@ -1753,7 +1761,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, ...@@ -1753,7 +1761,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
return ERR_PTR(ret); return ERR_PTR(ret);
} }
void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
{ {
struct ec_stripe_head *h; struct ec_stripe_head *h;
struct open_bucket *ob; struct open_bucket *ob;
...@@ -1761,11 +1769,13 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) ...@@ -1761,11 +1769,13 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
mutex_lock(&c->ec_stripe_head_lock); mutex_lock(&c->ec_stripe_head_lock);
list_for_each_entry(h, &c->ec_stripe_head_list, list) { list_for_each_entry(h, &c->ec_stripe_head_list, list) {
mutex_lock(&h->lock); mutex_lock(&h->lock);
if (!h->s) if (!h->s)
goto unlock; goto unlock;
if (!ca)
goto found;
for (i = 0; i < h->s->new_stripe.key.v.nr_blocks; i++) { for (i = 0; i < h->s->new_stripe.key.v.nr_blocks; i++) {
if (!h->s->blocks[i]) if (!h->s->blocks[i])
continue; continue;
...@@ -1784,6 +1794,32 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) ...@@ -1784,6 +1794,32 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
mutex_unlock(&c->ec_stripe_head_lock); mutex_unlock(&c->ec_stripe_head_lock);
} }
void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
{
__bch2_ec_stop(c, ca);
}
void bch2_fs_ec_stop(struct bch_fs *c)
{
__bch2_ec_stop(c, NULL);
}
static bool bch2_fs_ec_flush_done(struct bch_fs *c)
{
bool ret;
mutex_lock(&c->ec_stripe_new_lock);
ret = list_empty(&c->ec_stripe_new_list);
mutex_unlock(&c->ec_stripe_new_lock);
return ret;
}
void bch2_fs_ec_flush(struct bch_fs *c)
{
wait_event(c->ec_stripe_new_wait, bch2_fs_ec_flush_done(c));
}
int bch2_stripes_read(struct bch_fs *c) int bch2_stripes_read(struct bch_fs *c)
{ {
struct btree_trans trans; struct btree_trans trans;
...@@ -1915,14 +1951,22 @@ void bch2_fs_ec_exit(struct bch_fs *c) ...@@ -1915,14 +1951,22 @@ void bch2_fs_ec_exit(struct bch_fs *c)
void bch2_fs_ec_init_early(struct bch_fs *c) void bch2_fs_ec_init_early(struct bch_fs *c)
{ {
spin_lock_init(&c->ec_stripes_new_lock);
mutex_init(&c->ec_stripes_heap_lock);
INIT_LIST_HEAD(&c->ec_stripe_head_list);
mutex_init(&c->ec_stripe_head_lock);
INIT_LIST_HEAD(&c->ec_stripe_new_list);
mutex_init(&c->ec_stripe_new_lock);
init_waitqueue_head(&c->ec_stripe_new_wait);
INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work); INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work);
INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work); INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work);
} }
int bch2_fs_ec_init(struct bch_fs *c) int bch2_fs_ec_init(struct bch_fs *c)
{ {
spin_lock_init(&c->ec_stripes_new_lock);
return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio), return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
BIOSET_NEED_BVECS); BIOSET_NEED_BVECS);
} }
...@@ -245,8 +245,8 @@ static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s, ...@@ -245,8 +245,8 @@ static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s,
} }
void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *); void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
void bch2_fs_ec_stop(struct bch_fs *);
void bch2_ec_flush_new_stripes(struct bch_fs *); void bch2_fs_ec_flush(struct bch_fs *);
int bch2_stripes_read(struct bch_fs *); int bch2_stripes_read(struct bch_fs *);
......
...@@ -705,7 +705,8 @@ static void bch2_write_done(struct closure *cl) ...@@ -705,7 +705,8 @@ static void bch2_write_done(struct closure *cl)
struct bch_fs *c = op->c; struct bch_fs *c = op->c;
bch2_disk_reservation_put(c, &op->res); bch2_disk_reservation_put(c, &op->res);
bch2_write_ref_put(c, BCH_WRITE_REF_write); if (!(op->flags & BCH_WRITE_MOVE))
bch2_write_ref_put(c, BCH_WRITE_REF_write);
bch2_keylist_free(&op->insert_keys, op->inline_keys); bch2_keylist_free(&op->insert_keys, op->inline_keys);
bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time); bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
...@@ -1842,7 +1843,12 @@ void bch2_write(struct closure *cl) ...@@ -1842,7 +1843,12 @@ void bch2_write(struct closure *cl)
goto err; goto err;
} }
if (c->opts.nochanges || if (c->opts.nochanges) {
op->error = -BCH_ERR_erofs_no_writes;
goto err;
}
if (!(op->flags & BCH_WRITE_MOVE) &&
!bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) { !bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) {
op->error = -BCH_ERR_erofs_no_writes; op->error = -BCH_ERR_erofs_no_writes;
goto err; goto err;
......
...@@ -59,7 +59,6 @@ struct moving_io { ...@@ -59,7 +59,6 @@ struct moving_io {
static void move_free(struct moving_io *io) static void move_free(struct moving_io *io)
{ {
struct moving_context *ctxt = io->write.ctxt; struct moving_context *ctxt = io->write.ctxt;
struct bch_fs *c = ctxt->c;
if (io->b) if (io->b)
atomic_dec(&io->b->count); atomic_dec(&io->b->count);
...@@ -71,7 +70,6 @@ static void move_free(struct moving_io *io) ...@@ -71,7 +70,6 @@ static void move_free(struct moving_io *io)
wake_up(&ctxt->wait); wake_up(&ctxt->wait);
mutex_unlock(&ctxt->lock); mutex_unlock(&ctxt->lock);
bch2_write_ref_put(c, BCH_WRITE_REF_move);
kfree(io); kfree(io);
} }
...@@ -280,9 +278,6 @@ static int bch2_move_extent(struct btree_trans *trans, ...@@ -280,9 +278,6 @@ static int bch2_move_extent(struct btree_trans *trans,
return 0; return 0;
} }
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_move))
return -BCH_ERR_erofs_no_writes;
/* /*
* Before memory allocations & taking nocow locks in * Before memory allocations & taking nocow locks in
* bch2_data_update_init(): * bch2_data_update_init():
...@@ -378,7 +373,6 @@ static int bch2_move_extent(struct btree_trans *trans, ...@@ -378,7 +373,6 @@ static int bch2_move_extent(struct btree_trans *trans,
err_free: err_free:
kfree(io); kfree(io);
err: err:
bch2_write_ref_put(c, BCH_WRITE_REF_move);
trace_and_count(c, move_extent_alloc_mem_fail, k.k); trace_and_count(c, move_extent_alloc_mem_fail, k.k);
return ret; return ret;
} }
......
...@@ -205,9 +205,12 @@ static void __bch2_fs_read_only(struct bch_fs *c) ...@@ -205,9 +205,12 @@ static void __bch2_fs_read_only(struct bch_fs *c)
unsigned i, clean_passes = 0; unsigned i, clean_passes = 0;
u64 seq = 0; u64 seq = 0;
bch2_fs_ec_stop(c);
bch2_open_buckets_stop(c, NULL, true);
bch2_rebalance_stop(c); bch2_rebalance_stop(c);
bch2_copygc_stop(c); bch2_copygc_stop(c);
bch2_gc_thread_stop(c); bch2_gc_thread_stop(c);
bch2_fs_ec_flush(c);
bch_verbose(c, "flushing journal and stopping allocators, journal seq %llu", bch_verbose(c, "flushing journal and stopping allocators, journal seq %llu",
journal_cur_seq(&c->journal)); journal_cur_seq(&c->journal));
...@@ -700,15 +703,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) ...@@ -700,15 +703,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
INIT_LIST_HEAD(&c->fsck_errors); INIT_LIST_HEAD(&c->fsck_errors);
mutex_init(&c->fsck_error_lock); mutex_init(&c->fsck_error_lock);
INIT_LIST_HEAD(&c->ec_stripe_head_list);
mutex_init(&c->ec_stripe_head_lock);
INIT_LIST_HEAD(&c->ec_stripe_new_list);
mutex_init(&c->ec_stripe_new_lock);
mutex_init(&c->ec_stripes_heap_lock);
seqcount_init(&c->gc_pos_lock); seqcount_init(&c->gc_pos_lock);
seqcount_init(&c->usage_lock); seqcount_init(&c->usage_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment