Commit 1dd7f9d9 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Rewrite journal_seq_blacklist machinery

Now, we store blacklisted journal sequence numbers in the superblock,
not the journal: this helps to greatly simplify the code, and more
importantly it's now implemented in a way that doesn't require all btree
nodes to be visited before starting the journal - instead, we
unconditionally blacklist the next 4 journal sequence numbers after an
unclean shutdown.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent ece254b2
...@@ -185,6 +185,7 @@ ...@@ -185,6 +185,7 @@
#include <linux/closure.h> #include <linux/closure.h>
#include <linux/kobject.h> #include <linux/kobject.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/math64.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/percpu-refcount.h> #include <linux/percpu-refcount.h>
#include <linux/percpu-rwsem.h> #include <linux/percpu-rwsem.h>
...@@ -486,6 +487,7 @@ enum { ...@@ -486,6 +487,7 @@ enum {
BCH_FS_RW, BCH_FS_RW,
/* shutdown: */ /* shutdown: */
BCH_FS_STOPPING,
BCH_FS_EMERGENCY_RO, BCH_FS_EMERGENCY_RO,
BCH_FS_WRITE_DISABLE_COMPLETE, BCH_FS_WRITE_DISABLE_COMPLETE,
...@@ -511,6 +513,15 @@ struct bch_fs_pcpu { ...@@ -511,6 +513,15 @@ struct bch_fs_pcpu {
u64 sectors_available; u64 sectors_available;
}; };
struct journal_seq_blacklist_table {
size_t nr;
struct journal_seq_blacklist_table_entry {
u64 start;
u64 end;
bool dirty;
} entries[0];
};
struct bch_fs { struct bch_fs {
struct closure cl; struct closure cl;
...@@ -646,6 +657,11 @@ struct bch_fs { ...@@ -646,6 +657,11 @@ struct bch_fs {
struct io_clock io_clock[2]; struct io_clock io_clock[2];
/* JOURNAL SEQ BLACKLIST */
struct journal_seq_blacklist_table *
journal_seq_blacklist_table;
struct work_struct journal_seq_blacklist_gc_work;
/* ALLOCATOR */ /* ALLOCATOR */
spinlock_t freelist_lock; spinlock_t freelist_lock;
struct closure_waitlist freelist_wait; struct closure_waitlist freelist_wait;
......
...@@ -909,7 +909,8 @@ struct bch_sb_field { ...@@ -909,7 +909,8 @@ struct bch_sb_field {
x(quota, 4) \ x(quota, 4) \
x(disk_groups, 5) \ x(disk_groups, 5) \
x(clean, 6) \ x(clean, 6) \
x(replicas, 7) x(replicas, 7) \
x(journal_seq_blacklist, 8)
enum bch_sb_field_type { enum bch_sb_field_type {
#define x(f, nr) BCH_SB_FIELD_##f = nr, #define x(f, nr) BCH_SB_FIELD_##f = nr,
...@@ -1124,6 +1125,20 @@ struct bch_sb_field_clean { ...@@ -1124,6 +1125,20 @@ struct bch_sb_field_clean {
}; };
}; };
struct journal_seq_blacklist_entry {
__le64 start;
__le64 end;
};
struct bch_sb_field_journal_seq_blacklist {
struct bch_sb_field field;
union {
struct journal_seq_blacklist_entry start[0];
__u64 _data[0];
};
};
/* Superblock: */ /* Superblock: */
/* /*
...@@ -1279,6 +1294,7 @@ enum bch_sb_features { ...@@ -1279,6 +1294,7 @@ enum bch_sb_features {
BCH_FEATURE_ZSTD = 2, BCH_FEATURE_ZSTD = 2,
BCH_FEATURE_ATOMIC_NLINK = 3, /* should have gone under compat */ BCH_FEATURE_ATOMIC_NLINK = 3, /* should have gone under compat */
BCH_FEATURE_EC = 4, BCH_FEATURE_EC = 4,
BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3 = 5,
BCH_FEATURE_NR, BCH_FEATURE_NR,
}; };
......
...@@ -770,7 +770,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry ...@@ -770,7 +770,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
struct btree_node *sorted; struct btree_node *sorted;
struct bkey_packed *k; struct bkey_packed *k;
struct bset *i; struct bset *i;
bool used_mempool; bool used_mempool, blacklisted;
unsigned u64s; unsigned u64s;
int ret, retry_read = 0, write = READ; int ret, retry_read = 0, write = READ;
...@@ -844,20 +844,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry ...@@ -844,20 +844,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
b->written += sectors; b->written += sectors;
ret = bch2_journal_seq_should_ignore(c, le64_to_cpu(i->journal_seq), b); blacklisted = bch2_journal_seq_is_blacklisted(c,
if (ret < 0) { le64_to_cpu(i->journal_seq),
btree_err(BTREE_ERR_FATAL, c, b, i, true);
"insufficient memory");
goto err;
}
if (ret) { btree_err_on(blacklisted && first,
btree_err_on(first, BTREE_ERR_FIXABLE, c, b, i,
BTREE_ERR_FIXABLE, c, b, i, "first btree node bset has blacklisted journal seq");
"first btree node bset has blacklisted journal seq"); if (blacklisted && !first)
if (!first) continue;
continue;
}
bch2_btree_node_iter_large_push(iter, b, bch2_btree_node_iter_large_push(iter, b,
i->start, i->start,
...@@ -930,7 +925,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry ...@@ -930,7 +925,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
out: out:
mempool_free(iter, &c->fill_iter); mempool_free(iter, &c->fill_iter);
return retry_read; return retry_read;
err:
fsck_err: fsck_err:
if (ret == BTREE_RETRY_READ) { if (ret == BTREE_RETRY_READ) {
retry_read = 1; retry_read = 1;
......
...@@ -1156,6 +1156,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth) ...@@ -1156,6 +1156,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
if (!btree_iter_node(iter, iter->level)) if (!btree_iter_node(iter, iter->level))
return NULL; return NULL;
bch2_trans_cond_resched(iter->trans);
btree_iter_up(iter); btree_iter_up(iter);
if (!bch2_btree_node_relock(iter, iter->level)) if (!bch2_btree_node_relock(iter, iter->level))
......
...@@ -4,8 +4,6 @@ ...@@ -4,8 +4,6 @@
#include "opts.h" #include "opts.h"
#include <linux/math64.h>
extern const char * const bch2_inode_opts[]; extern const char * const bch2_inode_opts[];
const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c); const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c);
......
...@@ -988,27 +988,57 @@ void bch2_fs_journal_stop(struct journal *j) ...@@ -988,27 +988,57 @@ void bch2_fs_journal_stop(struct journal *j)
cancel_delayed_work_sync(&j->reclaim_work); cancel_delayed_work_sync(&j->reclaim_work);
} }
void bch2_fs_journal_start(struct journal *j) int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
struct list_head *journal_entries)
{ {
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_seq_blacklist *bl; struct journal_entry_pin_list *p;
u64 blacklist = 0; struct journal_replay *i;
u64 last_seq = cur_seq, nr, seq;
if (!list_empty(journal_entries))
last_seq = le64_to_cpu(list_last_entry(journal_entries,
struct journal_replay,
list)->j.last_seq);
nr = cur_seq - last_seq;
if (nr + 1 > j->pin.size) {
free_fifo(&j->pin);
init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL);
if (!j->pin.data) {
bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
return -ENOMEM;
}
}
j->last_seq_ondisk = last_seq;
j->pin.front = last_seq;
j->pin.back = cur_seq;
atomic64_set(&j->seq, cur_seq - 1);
fifo_for_each_entry_ptr(p, &j->pin, seq) {
INIT_LIST_HEAD(&p->list);
INIT_LIST_HEAD(&p->flushed);
atomic_set(&p->count, 0);
p->devs.nr = 0;
}
list_for_each_entry(i, journal_entries, list) {
seq = le64_to_cpu(i->j.seq);
BUG_ON(seq < last_seq || seq >= cur_seq);
list_for_each_entry(bl, &j->seq_blacklist, list) p = journal_seq_pin(j, seq);
blacklist = max(blacklist, bl->end);
atomic_set(&p->count, 1);
p->devs = i->devs;
}
spin_lock(&j->lock); spin_lock(&j->lock);
set_bit(JOURNAL_STARTED, &j->flags); set_bit(JOURNAL_STARTED, &j->flags);
while (journal_cur_seq(j) < blacklist)
journal_pin_new_entry(j, 0);
/*
* __journal_entry_close() only inits the next journal entry when it
* closes an open journal entry - the very first journal entry gets
* initialized here:
*/
journal_pin_new_entry(j, 1); journal_pin_new_entry(j, 1);
bch2_journal_buf_init(j); bch2_journal_buf_init(j);
...@@ -1017,12 +1047,7 @@ void bch2_fs_journal_start(struct journal *j) ...@@ -1017,12 +1047,7 @@ void bch2_fs_journal_start(struct journal *j)
bch2_journal_space_available(j); bch2_journal_space_available(j);
spin_unlock(&j->lock); spin_unlock(&j->lock);
/* return 0;
* Adding entries to the next journal entry before allocating space on
* disk for the next journal entry - this is ok, because these entries
* only have to go down with the next journal entry we write:
*/
bch2_journal_seq_blacklist_write(j);
} }
/* init/exit: */ /* init/exit: */
...@@ -1090,8 +1115,6 @@ int bch2_fs_journal_init(struct journal *j) ...@@ -1090,8 +1115,6 @@ int bch2_fs_journal_init(struct journal *j)
INIT_DELAYED_WORK(&j->write_work, journal_write_work); INIT_DELAYED_WORK(&j->write_work, journal_write_work);
INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work); INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work);
init_waitqueue_head(&j->pin_flush_wait); init_waitqueue_head(&j->pin_flush_wait);
mutex_init(&j->blacklist_lock);
INIT_LIST_HEAD(&j->seq_blacklist);
mutex_init(&j->reclaim_lock); mutex_init(&j->reclaim_lock);
mutex_init(&j->discard_lock); mutex_init(&j->discard_lock);
......
...@@ -472,8 +472,10 @@ int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *, ...@@ -472,8 +472,10 @@ int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
int bch2_dev_journal_alloc(struct bch_dev *); int bch2_dev_journal_alloc(struct bch_dev *);
void bch2_dev_journal_stop(struct journal *, struct bch_dev *); void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
void bch2_fs_journal_stop(struct journal *); void bch2_fs_journal_stop(struct journal *);
void bch2_fs_journal_start(struct journal *); int bch2_fs_journal_start(struct journal *, u64, struct list_head *);
void bch2_dev_journal_exit(struct bch_dev *); void bch2_dev_journal_exit(struct bch_dev *);
int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *); int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
void bch2_fs_journal_exit(struct journal *); void bch2_fs_journal_exit(struct journal *);
......
...@@ -10,7 +10,6 @@ ...@@ -10,7 +10,6 @@
#include "journal.h" #include "journal.h"
#include "journal_io.h" #include "journal_io.h"
#include "journal_reclaim.h" #include "journal_reclaim.h"
#include "journal_seq_blacklist.h"
#include "replicas.h" #include "replicas.h"
#include "trace.h" #include "trace.h"
...@@ -655,45 +654,11 @@ void bch2_journal_entries_free(struct list_head *list) ...@@ -655,45 +654,11 @@ void bch2_journal_entries_free(struct list_head *list)
} }
} }
int bch2_journal_set_seq(struct bch_fs *c, u64 last_seq, u64 end_seq)
{
struct journal *j = &c->journal;
struct journal_entry_pin_list *p;
u64 seq, nr = end_seq - last_seq + 1;
if (nr > j->pin.size) {
free_fifo(&j->pin);
init_fifo(&j->pin, roundup_pow_of_two(nr), GFP_KERNEL);
if (!j->pin.data) {
bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
return -ENOMEM;
}
}
atomic64_set(&j->seq, end_seq);
j->last_seq_ondisk = last_seq;
j->pin.front = last_seq;
j->pin.back = end_seq + 1;
fifo_for_each_entry_ptr(p, &j->pin, seq) {
INIT_LIST_HEAD(&p->list);
INIT_LIST_HEAD(&p->flushed);
atomic_set(&p->count, 0);
p->devs.nr = 0;
}
return 0;
}
int bch2_journal_read(struct bch_fs *c, struct list_head *list) int bch2_journal_read(struct bch_fs *c, struct list_head *list)
{ {
struct journal *j = &c->journal;
struct journal_list jlist; struct journal_list jlist;
struct journal_replay *i; struct journal_replay *i;
struct journal_entry_pin_list *p;
struct bch_dev *ca; struct bch_dev *ca;
u64 cur_seq, end_seq;
unsigned iter; unsigned iter;
size_t keys = 0, entries = 0; size_t keys = 0, entries = 0;
bool degraded = false; bool degraded = false;
...@@ -725,17 +690,12 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list) ...@@ -725,17 +690,12 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
if (jlist.ret) if (jlist.ret)
return jlist.ret; return jlist.ret;
if (list_empty(list)){
bch_err(c, "no journal entries found");
return BCH_FSCK_REPAIR_IMPOSSIBLE;
}
list_for_each_entry(i, list, list) { list_for_each_entry(i, list, list) {
struct jset_entry *entry;
struct bkey_i *k, *_n;
struct bch_replicas_padded replicas; struct bch_replicas_padded replicas;
char buf[80]; char buf[80];
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, i->devs);
ret = jset_validate_entries(c, &i->j, READ); ret = jset_validate_entries(c, &i->j, READ);
if (ret) if (ret)
goto fsck_err; goto fsck_err;
...@@ -745,6 +705,8 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list) ...@@ -745,6 +705,8 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
* the devices - this is wrong: * the devices - this is wrong:
*/ */
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, i->devs);
if (!degraded && if (!degraded &&
(test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) || (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
fsck_err_on(!bch2_replicas_marked(c, &replicas.e, false), c, fsck_err_on(!bch2_replicas_marked(c, &replicas.e, false), c,
...@@ -755,68 +717,18 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list) ...@@ -755,68 +717,18 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
if (ret) if (ret)
return ret; return ret;
} }
}
i = list_last_entry(list, struct journal_replay, list);
ret = bch2_journal_set_seq(c,
le64_to_cpu(i->j.last_seq),
le64_to_cpu(i->j.seq));
if (ret)
return ret;
mutex_lock(&j->blacklist_lock);
list_for_each_entry(i, list, list) {
p = journal_seq_pin(j, le64_to_cpu(i->j.seq));
atomic_set(&p->count, 1);
p->devs = i->devs;
if (bch2_journal_seq_blacklist_read(j, i)) {
mutex_unlock(&j->blacklist_lock);
return -ENOMEM;
}
}
mutex_unlock(&j->blacklist_lock);
cur_seq = journal_last_seq(j);
end_seq = le64_to_cpu(list_last_entry(list,
struct journal_replay, list)->j.seq);
list_for_each_entry(i, list, list) {
struct jset_entry *entry;
struct bkey_i *k, *_n;
bool blacklisted;
mutex_lock(&j->blacklist_lock);
while (cur_seq < le64_to_cpu(i->j.seq) &&
bch2_journal_seq_blacklist_find(j, cur_seq))
cur_seq++;
blacklisted = bch2_journal_seq_blacklist_find(j,
le64_to_cpu(i->j.seq));
mutex_unlock(&j->blacklist_lock);
fsck_err_on(blacklisted, c,
"found blacklisted journal entry %llu",
le64_to_cpu(i->j.seq));
fsck_err_on(le64_to_cpu(i->j.seq) != cur_seq, c,
"journal entries %llu-%llu missing! (replaying %llu-%llu)",
cur_seq, le64_to_cpu(i->j.seq) - 1,
journal_last_seq(j), end_seq);
cur_seq = le64_to_cpu(i->j.seq) + 1;
for_each_jset_key(k, _n, entry, &i->j) for_each_jset_key(k, _n, entry, &i->j)
keys++; keys++;
entries++; entries++;
} }
bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu", if (!list_empty(list)) {
keys, entries, journal_cur_seq(j)); i = list_last_entry(list, struct journal_replay, list);
bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu",
keys, entries, le64_to_cpu(i->j.seq));
}
fsck_err: fsck_err:
return ret; return ret;
} }
......
...@@ -35,7 +35,6 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset, ...@@ -35,7 +35,6 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys) \ for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys) \
vstruct_for_each_safe(entry, k, _n) vstruct_for_each_safe(entry, k, _n)
int bch2_journal_set_seq(struct bch_fs *c, u64, u64);
int bch2_journal_read(struct bch_fs *, struct list_head *); int bch2_journal_read(struct bch_fs *, struct list_head *);
void bch2_journal_entries_free(struct list_head *); void bch2_journal_entries_free(struct list_head *);
int bch2_journal_replay(struct bch_fs *, struct list_head *); int bch2_journal_replay(struct bch_fs *, struct list_head *);
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "btree_update.h" #include "btree_iter.h"
#include "btree_update_interior.h" #include "eytzinger.h"
#include "error.h"
#include "journal.h"
#include "journal_io.h"
#include "journal_reclaim.h"
#include "journal_seq_blacklist.h" #include "journal_seq_blacklist.h"
#include "super-io.h"
/* /*
* journal_seq_blacklist machinery: * journal_seq_blacklist machinery:
...@@ -37,327 +34,285 @@ ...@@ -37,327 +34,285 @@
* record that it was blacklisted so that a) on recovery we don't think we have * record that it was blacklisted so that a) on recovery we don't think we have
* missing journal entries and b) so that the btree code continues to ignore * missing journal entries and b) so that the btree code continues to ignore
* that bset, until that btree node is rewritten. * that bset, until that btree node is rewritten.
*
* Blacklisted journal sequence numbers are themselves recorded as entries in
* the journal.
*/ */
/* static unsigned
* Called when journal needs to evict a blacklist entry to reclaim space: find blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
* any btree nodes that refer to the blacklist journal sequence numbers, and
* rewrite them:
*/
static void journal_seq_blacklist_flush(struct journal *j,
struct journal_entry_pin *pin, u64 seq)
{ {
struct bch_fs *c = return bl
container_of(j, struct bch_fs, journal); ? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) /
struct journal_seq_blacklist *bl = sizeof(struct journal_seq_blacklist_entry))
container_of(pin, struct journal_seq_blacklist, pin); : 0;
struct blacklisted_node n; }
struct closure cl;
unsigned i;
int ret;
closure_init_stack(&cl); static unsigned sb_blacklist_u64s(unsigned nr)
{
struct bch_sb_field_journal_seq_blacklist *bl;
for (i = 0;; i++) { return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
struct btree_trans trans; }
struct btree_iter *iter;
struct btree *b;
bch2_trans_init(&trans, c); static struct bch_sb_field_journal_seq_blacklist *
blacklist_entry_try_merge(struct bch_fs *c,
struct bch_sb_field_journal_seq_blacklist *bl,
unsigned i)
{
unsigned nr = blacklist_nr_entries(bl);
if (le64_to_cpu(bl->start[i].end) >=
le64_to_cpu(bl->start[i + 1].start)) {
bl->start[i].end = bl->start[i + 1].end;
--nr;
memmove(&bl->start[i],
&bl->start[i + 1],
sizeof(bl->start[0]) * (nr - i));
bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
sb_blacklist_u64s(nr));
BUG_ON(!bl);
}
mutex_lock(&j->blacklist_lock); return bl;
if (i >= bl->nr_entries) { }
mutex_unlock(&j->blacklist_lock);
break;
}
n = bl->entries[i];
mutex_unlock(&j->blacklist_lock);
iter = bch2_trans_get_node_iter(&trans, n.btree_id, n.pos, int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
0, 0, 0); {
struct bch_sb_field_journal_seq_blacklist *bl;
unsigned i, nr;
int ret = 0;
b = bch2_btree_iter_peek_node(iter); mutex_lock(&c->sb_lock);
bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
nr = blacklist_nr_entries(bl);
/* The node might have already been rewritten: */ if (bl) {
for (i = 0; i < nr; i++) {
struct journal_seq_blacklist_entry *e =
bl->start + i;
if (b->data->keys.seq == n.seq) { if (start == le64_to_cpu(e->start) &&
ret = bch2_btree_node_rewrite(c, iter, n.seq, 0); end == le64_to_cpu(e->end))
if (ret) { goto out;
bch2_trans_exit(&trans);
bch2_fs_fatal_error(c, if (start <= le64_to_cpu(e->start) &&
"error %i rewriting btree node with blacklisted journal seq", end >= le64_to_cpu(e->end)) {
ret); e->start = cpu_to_le64(start);
bch2_journal_halt(j); e->end = cpu_to_le64(end);
return;
if (i + 1 < nr)
bl = blacklist_entry_try_merge(c,
bl, i);
if (i)
bl = blacklist_entry_try_merge(c,
bl, i - 1);
goto out_write_sb;
} }
} }
bch2_trans_exit(&trans);
} }
for (i = 0;; i++) { bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
struct btree_update *as; sb_blacklist_u64s(nr + 1));
struct pending_btree_node_free *d; if (!bl) {
ret = -ENOMEM;
mutex_lock(&j->blacklist_lock); goto out;
if (i >= bl->nr_entries) {
mutex_unlock(&j->blacklist_lock);
break;
}
n = bl->entries[i];
mutex_unlock(&j->blacklist_lock);
redo_wait:
mutex_lock(&c->btree_interior_update_lock);
/*
* Is the node on the list of pending interior node updates -
* being freed? If so, wait for that to finish:
*/
for_each_pending_btree_node_free(c, as, d)
if (n.seq == d->seq &&
n.btree_id == d->btree_id &&
!d->level &&
!bkey_cmp(n.pos, d->key.k.p)) {
closure_wait(&as->wait, &cl);
mutex_unlock(&c->btree_interior_update_lock);
closure_sync(&cl);
goto redo_wait;
}
mutex_unlock(&c->btree_interior_update_lock);
} }
mutex_lock(&j->blacklist_lock); bl->start[nr].start = cpu_to_le64(start);
bl->start[nr].end = cpu_to_le64(end);
out_write_sb:
c->disk_sb.sb->features[0] |=
1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3;
bch2_journal_pin_drop(j, &bl->pin); ret = bch2_write_super(c);
list_del(&bl->list); out:
kfree(bl->entries); mutex_unlock(&c->sb_lock);
kfree(bl);
mutex_unlock(&j->blacklist_lock); return ret;
} }
/* static int journal_seq_blacklist_table_cmp(const void *_l,
* Determine if a particular sequence number is blacklisted - if so, return const void *_r, size_t size)
* blacklist entry:
*/
struct journal_seq_blacklist *
bch2_journal_seq_blacklist_find(struct journal *j, u64 seq)
{ {
struct journal_seq_blacklist *bl; const struct journal_seq_blacklist_table_entry *l = _l;
const struct journal_seq_blacklist_table_entry *r = _r;
lockdep_assert_held(&j->blacklist_lock); return (l->start > r->start) - (l->start < r->start);
list_for_each_entry(bl, &j->seq_blacklist, list)
if (seq >= bl->start && seq <= bl->end)
return bl;
return NULL;
} }
/* bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
* Allocate a new, in memory blacklist entry: bool dirty)
*/
static struct journal_seq_blacklist *
bch2_journal_seq_blacklisted_new(struct journal *j, u64 start, u64 end)
{ {
struct journal_seq_blacklist *bl; struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
struct journal_seq_blacklist_table_entry search = { .start = seq };
int idx;
lockdep_assert_held(&j->blacklist_lock); if (!t)
return false;
/* idx = eytzinger0_find_le(t->entries, t->nr,
* When we start the journal, bch2_journal_start() will skip over @seq: sizeof(t->entries[0]),
*/ journal_seq_blacklist_table_cmp,
&search);
if (idx < 0)
return false;
bl = kzalloc(sizeof(*bl), GFP_KERNEL); BUG_ON(t->entries[idx].start > seq);
if (!bl)
return NULL;
bl->start = start; if (seq >= t->entries[idx].end)
bl->end = end; return false;
list_add_tail(&bl->list, &j->seq_blacklist); if (dirty)
return bl; t->entries[idx].dirty = true;
return true;
} }
/* int bch2_blacklist_table_initialize(struct bch_fs *c)
* Returns true if @seq is newer than the most recent journal entry that got
* written, and data corresponding to @seq should be ignored - also marks @seq
* as blacklisted so that on future restarts the corresponding data will still
* be ignored:
*/
int bch2_journal_seq_should_ignore(struct bch_fs *c, u64 seq, struct btree *b)
{ {
struct journal *j = &c->journal; struct bch_sb_field_journal_seq_blacklist *bl =
struct journal_seq_blacklist *bl = NULL; bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
struct blacklisted_node *n; struct journal_seq_blacklist_table *t;
u64 journal_seq; unsigned i, nr = blacklist_nr_entries(bl);
int ret = 0;
if (!seq)
return 0;
spin_lock(&j->lock); BUG_ON(c->journal_seq_blacklist_table);
journal_seq = journal_cur_seq(j);
spin_unlock(&j->lock);
/* Interier updates aren't journalled: */ if (!bl)
BUG_ON(b->level); return 0;
BUG_ON(seq > journal_seq && test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags));
/* t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr,
* Decrease this back to j->seq + 2 when we next rev the on disk format: GFP_KERNEL);
* increasing it temporarily to work around bug in old kernels if (!t)
*/ return -ENOMEM;
fsck_err_on(seq > journal_seq + 4, c,
"bset journal seq too far in the future: %llu > %llu",
seq, journal_seq);
if (seq <= journal_seq && t->nr = nr;
list_empty_careful(&j->seq_blacklist))
return 0;
mutex_lock(&j->blacklist_lock); for (i = 0; i < nr; i++) {
t->entries[i].start = le64_to_cpu(bl->start[i].start);
if (seq <= journal_seq) { t->entries[i].end = le64_to_cpu(bl->start[i].end);
bl = bch2_journal_seq_blacklist_find(j, seq);
if (!bl)
goto out;
} else {
bch_verbose(c, "btree node %u:%llu:%llu has future journal sequence number %llu, blacklisting",
b->btree_id, b->key.k.p.inode, b->key.k.p.offset, seq);
if (!j->new_blacklist) {
j->new_blacklist = bch2_journal_seq_blacklisted_new(j,
journal_seq + 1,
journal_seq + 1);
if (!j->new_blacklist) {
ret = -ENOMEM;
goto out;
}
}
bl = j->new_blacklist;
bl->end = max(bl->end, seq);
} }
for (n = bl->entries; n < bl->entries + bl->nr_entries; n++) eytzinger0_sort(t->entries,
if (b->data->keys.seq == n->seq && t->nr,
b->btree_id == n->btree_id && sizeof(t->entries[0]),
!bkey_cmp(b->key.k.p, n->pos)) journal_seq_blacklist_table_cmp,
goto found_entry; NULL);
if (!bl->nr_entries ||
is_power_of_2(bl->nr_entries)) {
n = krealloc(bl->entries,
max_t(size_t, bl->nr_entries * 2, 8) * sizeof(*n),
GFP_KERNEL);
if (!n) {
ret = -ENOMEM;
goto out;
}
bl->entries = n;
}
bl->entries[bl->nr_entries++] = (struct blacklisted_node) { c->journal_seq_blacklist_table = t;
.seq = b->data->keys.seq, return 0;
.btree_id = b->btree_id,
.pos = b->key.k.p,
};
found_entry:
ret = 1;
out:
fsck_err:
mutex_unlock(&j->blacklist_lock);
return ret;
} }
static int __bch2_journal_seq_blacklist_read(struct journal *j, static const char *
struct journal_replay *i, bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb,
u64 start, u64 end) struct bch_sb_field *f)
{ {
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_sb_field_journal_seq_blacklist *bl =
struct journal_seq_blacklist *bl; field_to_type(f, journal_seq_blacklist);
struct journal_seq_blacklist_entry *i;
bch_verbose(c, "blacklisting existing journal seq %llu-%llu", unsigned nr = blacklist_nr_entries(bl);
start, end);
for (i = bl->start; i < bl->start + nr; i++) {
if (le64_to_cpu(i->start) >=
le64_to_cpu(i->end))
return "entry start >= end";
if (i + 1 < bl->start + nr &&
le64_to_cpu(i[0].end) >
le64_to_cpu(i[1].start))
return "entries out of order";
}
bl = bch2_journal_seq_blacklisted_new(j, start, end); return NULL;
if (!bl) }
return -ENOMEM;
bch2_journal_pin_add(j, le64_to_cpu(i->j.seq), &bl->pin, static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
journal_seq_blacklist_flush); struct bch_sb *sb,
return 0; struct bch_sb_field *f)
{
struct bch_sb_field_journal_seq_blacklist *bl =
field_to_type(f, journal_seq_blacklist);
struct journal_seq_blacklist_entry *i;
unsigned nr = blacklist_nr_entries(bl);
for (i = bl->start; i < bl->start + nr; i++) {
if (i != bl->start)
pr_buf(out, " ");
pr_buf(out, "%llu-%llu",
le64_to_cpu(i->start),
le64_to_cpu(i->end));
}
} }
/* const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
* After reading the journal, find existing journal seq blacklist entries and .validate = bch2_sb_journal_seq_blacklist_validate,
* read them into memory: .to_text = bch2_sb_journal_seq_blacklist_to_text
*/ };
int bch2_journal_seq_blacklist_read(struct journal *j,
struct journal_replay *i) void bch2_blacklist_entries_gc(struct work_struct *work)
{ {
struct jset_entry *entry; struct bch_fs *c = container_of(work, struct bch_fs,
int ret = 0; journal_seq_blacklist_gc_work);
struct journal_seq_blacklist_table *t;
struct bch_sb_field_journal_seq_blacklist *bl;
struct journal_seq_blacklist_entry *src, *dst;
struct btree_trans trans;
unsigned i, nr, new_nr;
int ret;
vstruct_for_each(&i->j, entry) { bch2_trans_init(&trans, c);
switch (entry->type) {
case BCH_JSET_ENTRY_blacklist: {
struct jset_entry_blacklist *bl_entry =
container_of(entry, struct jset_entry_blacklist, entry);
ret = __bch2_journal_seq_blacklist_read(j, i, for (i = 0; i < BTREE_ID_NR; i++) {
le64_to_cpu(bl_entry->seq), struct btree_iter *iter;
le64_to_cpu(bl_entry->seq)); struct btree *b;
break;
}
case BCH_JSET_ENTRY_blacklist_v2: {
struct jset_entry_blacklist_v2 *bl_entry =
container_of(entry, struct jset_entry_blacklist_v2, entry);
ret = __bch2_journal_seq_blacklist_read(j, i,
le64_to_cpu(bl_entry->start),
le64_to_cpu(bl_entry->end));
break;
}
}
if (ret) for_each_btree_node(&trans, iter, i, POS_MIN,
break; BTREE_ITER_PREFETCH, b)
if (test_bit(BCH_FS_STOPPING, &c->flags)) {
bch2_trans_exit(&trans);
return;
}
bch2_trans_iter_free(&trans, iter);
} }
return ret; ret = bch2_trans_exit(&trans);
} if (ret)
return;
/*
* After reading the journal and walking the btree, we might have new journal
* sequence numbers to blacklist - add entries to the next journal entry to be
* written:
*/
void bch2_journal_seq_blacklist_write(struct journal *j)
{
struct journal_seq_blacklist *bl = j->new_blacklist;
struct jset_entry_blacklist_v2 *bl_entry;
struct jset_entry *entry;
mutex_lock(&c->sb_lock);
bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
if (!bl) if (!bl)
return; goto out;
entry = bch2_journal_add_entry_noreservation(journal_cur_buf(j), nr = blacklist_nr_entries(bl);
(sizeof(*bl_entry) - sizeof(*entry)) / sizeof(u64)); dst = bl->start;
bl_entry = container_of(entry, struct jset_entry_blacklist_v2, entry); t = c->journal_seq_blacklist_table;
bl_entry->entry.type = BCH_JSET_ENTRY_blacklist_v2; BUG_ON(nr != t->nr);
bl_entry->start = cpu_to_le64(bl->start);
bl_entry->end = cpu_to_le64(bl->end); for (src = bl->start, i = eytzinger0_first(t->nr);
src < bl->start + nr;
src++, i = eytzinger0_next(i, nr)) {
BUG_ON(t->entries[i].start != le64_to_cpu(src->start));
BUG_ON(t->entries[i].end != le64_to_cpu(src->end));
if (t->entries[i].dirty)
*dst++ = *src;
}
bch2_journal_pin_add(j, new_nr = dst - bl->start;
journal_cur_seq(j),
&bl->pin,
journal_seq_blacklist_flush);
j->new_blacklist = NULL; bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
if (new_nr != nr) {
bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
new_nr ? sb_blacklist_u64s(new_nr) : 0);
BUG_ON(new_nr && !bl);
if (!new_nr)
c->disk_sb.sb->features[0] &=
~(1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3);
bch2_write_super(c);
}
out:
mutex_unlock(&c->sb_lock);
} }
...@@ -2,13 +2,12 @@ ...@@ -2,13 +2,12 @@
#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H #ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H #define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
struct journal_replay; bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool);
int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64);
struct journal_seq_blacklist * int bch2_blacklist_table_initialize(struct bch_fs *);
bch2_journal_seq_blacklist_find(struct journal *, u64);
int bch2_journal_seq_should_ignore(struct bch_fs *, u64, struct btree *); extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist;
int bch2_journal_seq_blacklist_read(struct journal *,
struct journal_replay *); void bch2_blacklist_entries_gc(struct work_struct *);
void bch2_journal_seq_blacklist_write(struct journal *);
#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */ #endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */
...@@ -54,24 +54,6 @@ struct journal_entry_pin { ...@@ -54,24 +54,6 @@ struct journal_entry_pin {
u64 seq; u64 seq;
}; };
/* corresponds to a btree node with a blacklisted bset: */
struct blacklisted_node {
__le64 seq;
enum btree_id btree_id;
struct bpos pos;
};
struct journal_seq_blacklist {
struct list_head list;
u64 start;
u64 end;
struct journal_entry_pin pin;
struct blacklisted_node *entries;
size_t nr_entries;
};
struct journal_res { struct journal_res {
bool ref; bool ref;
u8 idx; u8 idx;
...@@ -222,10 +204,6 @@ struct journal { ...@@ -222,10 +204,6 @@ struct journal {
u64 replay_journal_seq; u64 replay_journal_seq;
struct mutex blacklist_lock;
struct list_head seq_blacklist;
struct journal_seq_blacklist *new_blacklist;
struct write_point wp; struct write_point wp;
spinlock_t err_lock; spinlock_t err_lock;
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include "error.h" #include "error.h"
#include "fsck.h" #include "fsck.h"
#include "journal_io.h" #include "journal_io.h"
#include "journal_seq_blacklist.h"
#include "quota.h" #include "quota.h"
#include "recovery.h" #include "recovery.h"
#include "replicas.h" #include "replicas.h"
...@@ -99,18 +100,49 @@ static int verify_superblock_clean(struct bch_fs *c, ...@@ -99,18 +100,49 @@ static int verify_superblock_clean(struct bch_fs *c,
return ret; return ret;
} }
static int
verify_journal_entries_not_blacklisted_or_missing(struct bch_fs *c,
struct list_head *journal)
{
struct journal_replay *i =
list_last_entry(journal, struct journal_replay, list);
u64 start_seq = le64_to_cpu(i->j.last_seq);
u64 end_seq = le64_to_cpu(i->j.seq);
u64 seq = start_seq;
int ret = 0;
list_for_each_entry(i, journal, list) {
fsck_err_on(seq != le64_to_cpu(i->j.seq), c,
"journal entries %llu-%llu missing! (replaying %llu-%llu)",
seq, le64_to_cpu(i->j.seq) - 1,
start_seq, end_seq);
seq = le64_to_cpu(i->j.seq);
fsck_err_on(bch2_journal_seq_is_blacklisted(c, seq, false), c,
"found blacklisted journal entry %llu", seq);
do {
seq++;
} while (bch2_journal_seq_is_blacklisted(c, seq, false));
}
fsck_err:
return ret;
}
static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c) static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
{ {
struct bch_sb_field_clean *clean, *sb_clean; struct bch_sb_field_clean *clean, *sb_clean;
int ret;
if (!c->sb.clean)
return NULL;
mutex_lock(&c->sb_lock); mutex_lock(&c->sb_lock);
sb_clean = bch2_sb_get_clean(c->disk_sb.sb); sb_clean = bch2_sb_get_clean(c->disk_sb.sb);
if (!sb_clean) {
if (fsck_err_on(!sb_clean, c,
"superblock marked clean but clean section not present")) {
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
c->sb.clean = false;
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
bch_err(c, "superblock marked clean but clean section not present");
return NULL; return NULL;
} }
...@@ -128,6 +160,9 @@ static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c) ...@@ -128,6 +160,9 @@ static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
return clean; return clean;
fsck_err:
mutex_unlock(&c->sb_lock);
return ERR_PTR(ret);
} }
static int journal_replay_entry_early(struct bch_fs *c, static int journal_replay_entry_early(struct bch_fs *c,
...@@ -179,14 +214,32 @@ static int journal_replay_entry_early(struct bch_fs *c, ...@@ -179,14 +214,32 @@ static int journal_replay_entry_early(struct bch_fs *c,
le64_to_cpu(u->v)); le64_to_cpu(u->v));
break; break;
} }
case BCH_JSET_ENTRY_blacklist: {
struct jset_entry_blacklist *bl_entry =
container_of(entry, struct jset_entry_blacklist, entry);
ret = bch2_journal_seq_blacklist_add(c,
le64_to_cpu(bl_entry->seq),
le64_to_cpu(bl_entry->seq) + 1);
break;
}
case BCH_JSET_ENTRY_blacklist_v2: {
struct jset_entry_blacklist_v2 *bl_entry =
container_of(entry, struct jset_entry_blacklist_v2, entry);
ret = bch2_journal_seq_blacklist_add(c,
le64_to_cpu(bl_entry->start),
le64_to_cpu(bl_entry->end) + 1);
break;
}
} }
return ret; return ret;
} }
static int load_journal_metadata(struct bch_fs *c, static int journal_replay_early(struct bch_fs *c,
struct bch_sb_field_clean *clean, struct bch_sb_field_clean *clean,
struct list_head *journal) struct list_head *journal)
{ {
struct jset_entry *entry; struct jset_entry *entry;
int ret; int ret;
...@@ -300,37 +353,76 @@ static bool journal_empty(struct list_head *journal) ...@@ -300,37 +353,76 @@ static bool journal_empty(struct list_head *journal)
int bch2_fs_recovery(struct bch_fs *c) int bch2_fs_recovery(struct bch_fs *c)
{ {
const char *err = "cannot allocate memory"; const char *err = "cannot allocate memory";
struct bch_sb_field_clean *clean; struct bch_sb_field_clean *clean = NULL;
u64 journal_seq;
LIST_HEAD(journal); LIST_HEAD(journal);
int ret; int ret;
clean = read_superblock_clean(c); if (c->sb.clean)
if (clean) clean = read_superblock_clean(c);
ret = PTR_ERR_OR_ZERO(clean);
if (ret)
goto err;
if (c->sb.clean)
bch_info(c, "recovering from clean shutdown, journal seq %llu", bch_info(c, "recovering from clean shutdown, journal seq %llu",
le64_to_cpu(clean->journal_seq)); le64_to_cpu(clean->journal_seq));
if (!clean || c->opts.fsck) { if (!c->replicas.entries) {
bch_info(c, "building replicas info");
set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
}
if (!c->sb.clean || c->opts.fsck) {
struct jset *j;
ret = bch2_journal_read(c, &journal); ret = bch2_journal_read(c, &journal);
if (ret) if (ret)
goto err; goto err;
ret = verify_superblock_clean(c, &clean, fsck_err_on(c->sb.clean && !journal_empty(&journal), c,
&list_last_entry(&journal, struct journal_replay, "filesystem marked clean but journal not empty");
list)->j);
if (!c->sb.clean && list_empty(&journal)){
bch_err(c, "no journal entries found");
ret = BCH_FSCK_REPAIR_IMPOSSIBLE;
goto err;
}
j = &list_last_entry(&journal, struct journal_replay, list)->j;
ret = verify_superblock_clean(c, &clean, j);
if (ret) if (ret)
goto err; goto err;
journal_seq = le64_to_cpu(j->seq) + 1;
} else { } else {
ret = bch2_journal_set_seq(c, journal_seq = le64_to_cpu(clean->journal_seq) + 1;
le64_to_cpu(clean->journal_seq), }
le64_to_cpu(clean->journal_seq));
if (ret) ret = journal_replay_early(c, clean, &journal);
if (ret)
goto err;
if (!c->sb.clean) {
ret = bch2_journal_seq_blacklist_add(c,
journal_seq,
journal_seq + 4);
if (ret) {
bch_err(c, "error creating new journal seq blacklist entry");
goto err; goto err;
}
journal_seq += 4;
} }
fsck_err_on(clean && !journal_empty(&journal), c, ret = bch2_blacklist_table_initialize(c);
"filesystem marked clean but journal not empty");
ret = verify_journal_entries_not_blacklisted_or_missing(c, &journal);
if (ret)
goto err;
ret = load_journal_metadata(c, clean, &journal); ret = bch2_fs_journal_start(&c->journal, journal_seq, &journal);
if (ret) if (ret)
goto err; goto err;
...@@ -351,11 +443,6 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -351,11 +443,6 @@ int bch2_fs_recovery(struct bch_fs *c)
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags); set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
if (!c->replicas.entries) {
bch_info(c, "building replicas info");
set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
}
if (c->opts.fsck || if (c->opts.fsck ||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) || !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) { test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
...@@ -377,13 +464,6 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -377,13 +464,6 @@ int bch2_fs_recovery(struct bch_fs *c)
if (c->sb.encryption_type && !c->sb.clean) if (c->sb.encryption_type && !c->sb.clean)
atomic64_add(1 << 16, &c->key_version); atomic64_add(1 << 16, &c->key_version);
/*
* bch2_fs_journal_start() can't happen sooner, or btree_gc_finish()
* will give spurious errors about oldest_gen > bucket_gen -
* this is a hack but oh well.
*/
bch2_fs_journal_start(&c->journal);
if (c->opts.noreplay) if (c->opts.noreplay)
goto out; goto out;
...@@ -424,6 +504,10 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -424,6 +504,10 @@ int bch2_fs_recovery(struct bch_fs *c)
SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0); SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
} }
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
if (c->journal_seq_blacklist_table &&
c->journal_seq_blacklist_table->nr > 128)
queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work);
out: out:
bch2_journal_entries_free(&journal); bch2_journal_entries_free(&journal);
kfree(clean); kfree(clean);
...@@ -472,7 +556,7 @@ int bch2_fs_initialize(struct bch_fs *c) ...@@ -472,7 +556,7 @@ int bch2_fs_initialize(struct bch_fs *c)
* journal_res_get() will crash if called before this has * journal_res_get() will crash if called before this has
* set up the journal.pin FIFO and journal.cur pointer: * set up the journal.pin FIFO and journal.cur pointer:
*/ */
bch2_fs_journal_start(&c->journal); bch2_fs_journal_start(&c->journal, 1, &journal);
bch2_journal_set_replay_done(&c->journal); bch2_journal_set_replay_done(&c->journal);
err = "error going read write"; err = "error going read write";
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "error.h" #include "error.h"
#include "io.h" #include "io.h"
#include "journal.h" #include "journal.h"
#include "journal_seq_blacklist.h"
#include "replicas.h" #include "replicas.h"
#include "quota.h" #include "quota.h"
#include "super-io.h" #include "super-io.h"
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "io.h" #include "io.h"
#include "journal.h" #include "journal.h"
#include "journal_reclaim.h" #include "journal_reclaim.h"
#include "journal_seq_blacklist.h"
#include "move.h" #include "move.h"
#include "migrate.h" #include "migrate.h"
#include "movinggc.h" #include "movinggc.h"
...@@ -468,6 +469,7 @@ static void bch2_fs_free(struct bch_fs *c) ...@@ -468,6 +469,7 @@ static void bch2_fs_free(struct bch_fs *c)
kfree(c->replicas.entries); kfree(c->replicas.entries);
kfree(c->replicas_gc.entries); kfree(c->replicas_gc.entries);
kfree(rcu_dereference_protected(c->disk_groups, 1)); kfree(rcu_dereference_protected(c->disk_groups, 1));
kfree(c->journal_seq_blacklist_table);
if (c->journal_reclaim_wq) if (c->journal_reclaim_wq)
destroy_workqueue(c->journal_reclaim_wq); destroy_workqueue(c->journal_reclaim_wq);
...@@ -496,6 +498,10 @@ void bch2_fs_stop(struct bch_fs *c) ...@@ -496,6 +498,10 @@ void bch2_fs_stop(struct bch_fs *c)
bch_verbose(c, "shutting down"); bch_verbose(c, "shutting down");
set_bit(BCH_FS_STOPPING, &c->flags);
cancel_work_sync(&c->journal_seq_blacklist_gc_work);
for_each_member_device(ca, c, i) for_each_member_device(ca, c, i)
if (ca->kobj.state_in_sysfs && if (ca->kobj.state_in_sysfs &&
ca->disk_sb.bdev) ca->disk_sb.bdev)
...@@ -631,6 +637,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) ...@@ -631,6 +637,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
spin_lock_init(&c->btree_write_error_lock); spin_lock_init(&c->btree_write_error_lock);
INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work); INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work);
INIT_WORK(&c->journal_seq_blacklist_gc_work,
bch2_blacklist_entries_gc);
INIT_LIST_HEAD(&c->fsck_errors); INIT_LIST_HEAD(&c->fsck_errors);
mutex_init(&c->fsck_error_lock); mutex_init(&c->fsck_error_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment