Commit 74ed7e56 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Don't let copygc buckets be stolen by other threads

And assorted other copygc fixes.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 3d080aa5
...@@ -518,7 +518,9 @@ static int wait_buckets_available(struct bch_fs *c, struct bch_dev *ca) ...@@ -518,7 +518,9 @@ static int wait_buckets_available(struct bch_fs *c, struct bch_dev *ca)
ca->inc_gen_really_needs_gc); ca->inc_gen_really_needs_gc);
if (available > fifo_free(&ca->free_inc) || if (available > fifo_free(&ca->free_inc) ||
(available && !fifo_full(&ca->free[RESERVE_BTREE]))) (available &&
(!fifo_full(&ca->free[RESERVE_BTREE]) ||
!fifo_full(&ca->free[RESERVE_MOVINGGC]))))
break; break;
up_read(&c->gc_lock); up_read(&c->gc_lock);
......
...@@ -144,12 +144,13 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c) ...@@ -144,12 +144,13 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
} }
static void open_bucket_free_unused(struct bch_fs *c, static void open_bucket_free_unused(struct bch_fs *c,
struct open_bucket *ob, struct write_point *wp,
bool may_realloc) struct open_bucket *ob)
{ {
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev); struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
bool may_realloc = wp->type == BCH_DATA_user;
BUG_ON(ca->open_buckets_partial_nr >= BUG_ON(ca->open_buckets_partial_nr >
ARRAY_SIZE(ca->open_buckets_partial)); ARRAY_SIZE(ca->open_buckets_partial));
if (ca->open_buckets_partial_nr < if (ca->open_buckets_partial_nr <
...@@ -228,13 +229,22 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, ...@@ -228,13 +229,22 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
spin_lock(&c->freelist_lock); spin_lock(&c->freelist_lock);
if (may_alloc_partial && if (may_alloc_partial) {
ca->open_buckets_partial_nr) { int i;
ob = c->open_buckets +
ca->open_buckets_partial[--ca->open_buckets_partial_nr]; for (i = ca->open_buckets_partial_nr - 1; i >= 0; --i) {
ob->on_partial_list = false; ob = c->open_buckets + ca->open_buckets_partial[i];
spin_unlock(&c->freelist_lock);
return ob; if (reserve <= ob->alloc_reserve) {
array_remove_item(ca->open_buckets_partial,
ca->open_buckets_partial_nr,
i);
ob->on_partial_list = false;
ob->alloc_reserve = reserve;
spin_unlock(&c->freelist_lock);
return ob;
}
}
} }
if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(reserve))) { if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(reserve))) {
...@@ -291,6 +301,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, ...@@ -291,6 +301,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
ob->valid = true; ob->valid = true;
ob->sectors_free = ca->mi.bucket_size; ob->sectors_free = ca->mi.bucket_size;
ob->alloc_reserve = reserve;
ob->ptr = (struct bch_extent_ptr) { ob->ptr = (struct bch_extent_ptr) {
.type = 1 << BCH_EXTENT_ENTRY_ptr, .type = 1 << BCH_EXTENT_ENTRY_ptr,
.gen = buckets->b[bucket].mark.gen, .gen = buckets->b[bucket].mark.gen,
...@@ -835,9 +846,6 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c, ...@@ -835,9 +846,6 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
alloc_done: alloc_done:
BUG_ON(!ret && nr_effective < nr_replicas); BUG_ON(!ret && nr_effective < nr_replicas);
WARN_ON(reserve == RESERVE_MOVINGGC &&
ret == FREELIST_EMPTY);
if (erasure_code && !ec_open_bucket(c, &ptrs)) if (erasure_code && !ec_open_bucket(c, &ptrs))
pr_debug("failed to get ec bucket: ret %u", ret); pr_debug("failed to get ec bucket: ret %u", ret);
...@@ -850,7 +858,7 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c, ...@@ -850,7 +858,7 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
/* Free buckets we didn't use: */ /* Free buckets we didn't use: */
open_bucket_for_each(c, &wp->ptrs, ob, i) open_bucket_for_each(c, &wp->ptrs, ob, i)
open_bucket_free_unused(c, ob, wp->type == BCH_DATA_user); open_bucket_free_unused(c, wp, ob);
wp->ptrs = ptrs; wp->ptrs = ptrs;
...@@ -869,8 +877,7 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c, ...@@ -869,8 +877,7 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
if (ptrs.nr < ARRAY_SIZE(ptrs.v)) if (ptrs.nr < ARRAY_SIZE(ptrs.v))
ob_push(c, &ptrs, ob); ob_push(c, &ptrs, ob);
else else
open_bucket_free_unused(c, ob, open_bucket_free_unused(c, wp, ob);
wp->type == BCH_DATA_user);
wp->ptrs = ptrs; wp->ptrs = ptrs;
mutex_unlock(&wp->lock); mutex_unlock(&wp->lock);
...@@ -938,6 +945,13 @@ void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp) ...@@ -938,6 +945,13 @@ void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
bch2_open_buckets_put(c, &ptrs); bch2_open_buckets_put(c, &ptrs);
} }
static inline void writepoint_init(struct write_point *wp,
enum bch_data_type type)
{
mutex_init(&wp->lock);
wp->type = type;
}
void bch2_fs_allocator_foreground_init(struct bch_fs *c) void bch2_fs_allocator_foreground_init(struct bch_fs *c)
{ {
struct open_bucket *ob; struct open_bucket *ob;
......
...@@ -133,13 +133,6 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp ...@@ -133,13 +133,6 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp
return (struct write_point_specifier) { .v = (unsigned long) wp }; return (struct write_point_specifier) { .v = (unsigned long) wp };
} }
static inline void writepoint_init(struct write_point *wp,
enum bch_data_type type)
{
mutex_init(&wp->lock);
wp->type = type;
}
void bch2_fs_allocator_foreground_init(struct bch_fs *); void bch2_fs_allocator_foreground_init(struct bch_fs *);
#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */ #endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
...@@ -66,6 +66,7 @@ struct open_bucket { ...@@ -66,6 +66,7 @@ struct open_bucket {
u8 type; u8 type;
unsigned valid:1; unsigned valid:1;
unsigned on_partial_list:1; unsigned on_partial_list:1;
int alloc_reserve:3;
unsigned sectors_free; unsigned sectors_free;
struct bch_extent_ptr ptr; struct bch_extent_ptr ptr;
struct ec_stripe_new *ec; struct ec_stripe_new *ec;
......
...@@ -954,8 +954,10 @@ int bch2_gc_gens(struct bch_fs *c) ...@@ -954,8 +954,10 @@ int bch2_gc_gens(struct bch_fs *c)
for (i = 0; i < BTREE_ID_NR; i++) for (i = 0; i < BTREE_ID_NR; i++)
if (btree_node_type_needs_gc(i)) { if (btree_node_type_needs_gc(i)) {
ret = bch2_gc_btree_gens(c, i); ret = bch2_gc_btree_gens(c, i);
if (ret) if (ret) {
bch_err(c, "error recalculating oldest_gen: %i", ret);
goto err; goto err;
}
} }
for_each_member_device(ca, c, i) { for_each_member_device(ca, c, i) {
...@@ -966,6 +968,8 @@ int bch2_gc_gens(struct bch_fs *c) ...@@ -966,6 +968,8 @@ int bch2_gc_gens(struct bch_fs *c)
g->oldest_gen = g->gc_gen; g->oldest_gen = g->gc_gen;
up_read(&ca->bucket_lock); up_read(&ca->bucket_lock);
} }
c->gc_count++;
err: err:
up_read(&c->gc_lock); up_read(&c->gc_lock);
return ret; return ret;
......
...@@ -248,6 +248,7 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, ...@@ -248,6 +248,7 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) { if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) {
m->op.alloc_reserve = RESERVE_MOVINGGC; m->op.alloc_reserve = RESERVE_MOVINGGC;
m->op.flags |= BCH_WRITE_ALLOC_NOWAIT;
} else { } else {
/* XXX: this should probably be passed in */ /* XXX: this should probably be passed in */
m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS; m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS;
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include "buckets.h" #include "buckets.h"
#include "clock.h" #include "clock.h"
#include "disk_groups.h" #include "disk_groups.h"
#include "error.h"
#include "extents.h" #include "extents.h"
#include "eytzinger.h" #include "eytzinger.h"
#include "io.h" #include "io.h"
...@@ -104,7 +105,6 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg, ...@@ -104,7 +105,6 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
if (dev_idx < 0) if (dev_idx < 0)
return DATA_SKIP; return DATA_SKIP;
/* XXX: use io_opts for this inode */
data_opts->target = io_opts->background_target; data_opts->target = io_opts->background_target;
data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE; data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE;
data_opts->rewrite_dev = dev_idx; data_opts->rewrite_dev = dev_idx;
...@@ -123,7 +123,7 @@ static bool have_copygc_reserve(struct bch_dev *ca) ...@@ -123,7 +123,7 @@ static bool have_copygc_reserve(struct bch_dev *ca)
return ret; return ret;
} }
static void bch2_copygc(struct bch_fs *c) static int bch2_copygc(struct bch_fs *c)
{ {
copygc_heap *h = &c->copygc_heap; copygc_heap *h = &c->copygc_heap;
struct copygc_heap_entry e, *i; struct copygc_heap_entry e, *i;
...@@ -153,7 +153,7 @@ static void bch2_copygc(struct bch_fs *c) ...@@ -153,7 +153,7 @@ static void bch2_copygc(struct bch_fs *c)
free_heap(&c->copygc_heap); free_heap(&c->copygc_heap);
if (!init_heap(&c->copygc_heap, heap_size, GFP_KERNEL)) { if (!init_heap(&c->copygc_heap, heap_size, GFP_KERNEL)) {
bch_err(c, "error allocating copygc heap"); bch_err(c, "error allocating copygc heap");
return; return 0;
} }
} }
...@@ -178,6 +178,7 @@ static void bch2_copygc(struct bch_fs *c) ...@@ -178,6 +178,7 @@ static void bch2_copygc(struct bch_fs *c)
continue; continue;
e = (struct copygc_heap_entry) { e = (struct copygc_heap_entry) {
.dev = dev_idx,
.gen = m.gen, .gen = m.gen,
.sectors = bucket_sectors_used(m), .sectors = bucket_sectors_used(m),
.offset = bucket_to_sector(ca, b), .offset = bucket_to_sector(ca, b),
...@@ -187,6 +188,11 @@ static void bch2_copygc(struct bch_fs *c) ...@@ -187,6 +188,11 @@ static void bch2_copygc(struct bch_fs *c)
up_read(&ca->bucket_lock); up_read(&ca->bucket_lock);
} }
if (!sectors_reserved) {
bch2_fs_fatal_error(c, "stuck, ran out of copygc reserve!");
return -1;
}
for (i = h->data; i < h->data + h->used; i++) for (i = h->data; i < h->data + h->used; i++)
sectors_to_move += i->sectors; sectors_to_move += i->sectors;
...@@ -198,7 +204,7 @@ static void bch2_copygc(struct bch_fs *c) ...@@ -198,7 +204,7 @@ static void bch2_copygc(struct bch_fs *c)
buckets_to_move = h->used; buckets_to_move = h->used;
if (!buckets_to_move) if (!buckets_to_move)
return; return 0;
eytzinger0_sort(h->data, h->used, eytzinger0_sort(h->data, h->used,
sizeof(h->data[0]), sizeof(h->data[0]),
...@@ -214,10 +220,17 @@ static void bch2_copygc(struct bch_fs *c) ...@@ -214,10 +220,17 @@ static void bch2_copygc(struct bch_fs *c)
down_read(&ca->bucket_lock); down_read(&ca->bucket_lock);
buckets = bucket_array(ca); buckets = bucket_array(ca);
for (i = h->data; i < h->data + h->used; i++) { for (i = h->data; i < h->data + h->used; i++) {
size_t b = sector_to_bucket(ca, i->offset); struct bucket_mark m;
struct bucket_mark m = READ_ONCE(buckets->b[b].mark); size_t b;
if (i->gen == m.gen && bucket_sectors_used(m)) { if (i->dev != dev_idx)
continue;
b = sector_to_bucket(ca, i->offset);
m = READ_ONCE(buckets->b[b].mark);
if (i->gen == m.gen &&
bucket_sectors_used(m)) {
sectors_not_moved += bucket_sectors_used(m); sectors_not_moved += bucket_sectors_used(m);
buckets_not_moved++; buckets_not_moved++;
} }
...@@ -237,6 +250,7 @@ static void bch2_copygc(struct bch_fs *c) ...@@ -237,6 +250,7 @@ static void bch2_copygc(struct bch_fs *c)
trace_copygc(c, trace_copygc(c,
atomic64_read(&move_stats.sectors_moved), sectors_not_moved, atomic64_read(&move_stats.sectors_moved), sectors_not_moved,
buckets_to_move, buckets_not_moved); buckets_to_move, buckets_not_moved);
return 0;
} }
/* /*
...@@ -292,7 +306,8 @@ static int bch2_copygc_thread(void *arg) ...@@ -292,7 +306,8 @@ static int bch2_copygc_thread(void *arg)
continue; continue;
} }
bch2_copygc(c); if (bch2_copygc(c))
break;
} }
return 0; return 0;
...@@ -323,8 +338,7 @@ int bch2_copygc_start(struct bch_fs *c) ...@@ -323,8 +338,7 @@ int bch2_copygc_start(struct bch_fs *c)
if (bch2_fs_init_fault("copygc_start")) if (bch2_fs_init_fault("copygc_start"))
return -ENOMEM; return -ENOMEM;
t = kthread_create(bch2_copygc_thread, c, t = kthread_create(bch2_copygc_thread, c, "bch_copygc");
"bch_copygc[%s]", c->name);
if (IS_ERR(t)) if (IS_ERR(t))
return PTR_ERR(t); return PTR_ERR(t);
......
...@@ -1308,12 +1308,20 @@ static bool bch2_fs_may_start(struct bch_fs *c) ...@@ -1308,12 +1308,20 @@ static bool bch2_fs_may_start(struct bch_fs *c)
static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
{ {
/*
* Device going read only means the copygc reserve get smaller, so we
* don't want that happening while copygc is in progress:
*/
bch2_copygc_stop(c);
/* /*
* The allocator thread itself allocates btree nodes, so stop it first: * The allocator thread itself allocates btree nodes, so stop it first:
*/ */
bch2_dev_allocator_stop(ca); bch2_dev_allocator_stop(ca);
bch2_dev_allocator_remove(c, ca); bch2_dev_allocator_remove(c, ca);
bch2_dev_journal_stop(&c->journal, ca); bch2_dev_journal_stop(&c->journal, ca);
bch2_copygc_start(c);
} }
static const char *__bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) static const char *__bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment