Commit 1dd7f9d9 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Rewrite journal_seq_blacklist machinery

Now, we store blacklisted journal sequence numbers in the superblock,
not the journal: this helps to greatly simplify the code, and more
importantly it's now implemented in a way that doesn't require all btree
nodes to be visited before starting the journal - instead, we
unconditionally blacklist the next 4 journal sequence numbers after an
unclean shutdown.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent ece254b2
...@@ -185,6 +185,7 @@ ...@@ -185,6 +185,7 @@
#include <linux/closure.h> #include <linux/closure.h>
#include <linux/kobject.h> #include <linux/kobject.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/math64.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/percpu-refcount.h> #include <linux/percpu-refcount.h>
#include <linux/percpu-rwsem.h> #include <linux/percpu-rwsem.h>
...@@ -486,6 +487,7 @@ enum { ...@@ -486,6 +487,7 @@ enum {
BCH_FS_RW, BCH_FS_RW,
/* shutdown: */ /* shutdown: */
BCH_FS_STOPPING,
BCH_FS_EMERGENCY_RO, BCH_FS_EMERGENCY_RO,
BCH_FS_WRITE_DISABLE_COMPLETE, BCH_FS_WRITE_DISABLE_COMPLETE,
...@@ -511,6 +513,15 @@ struct bch_fs_pcpu { ...@@ -511,6 +513,15 @@ struct bch_fs_pcpu {
u64 sectors_available; u64 sectors_available;
}; };
struct journal_seq_blacklist_table {
size_t nr;
struct journal_seq_blacklist_table_entry {
u64 start;
u64 end;
bool dirty;
} entries[0];
};
struct bch_fs { struct bch_fs {
struct closure cl; struct closure cl;
...@@ -646,6 +657,11 @@ struct bch_fs { ...@@ -646,6 +657,11 @@ struct bch_fs {
struct io_clock io_clock[2]; struct io_clock io_clock[2];
/* JOURNAL SEQ BLACKLIST */
struct journal_seq_blacklist_table *
journal_seq_blacklist_table;
struct work_struct journal_seq_blacklist_gc_work;
/* ALLOCATOR */ /* ALLOCATOR */
spinlock_t freelist_lock; spinlock_t freelist_lock;
struct closure_waitlist freelist_wait; struct closure_waitlist freelist_wait;
......
...@@ -909,7 +909,8 @@ struct bch_sb_field { ...@@ -909,7 +909,8 @@ struct bch_sb_field {
x(quota, 4) \ x(quota, 4) \
x(disk_groups, 5) \ x(disk_groups, 5) \
x(clean, 6) \ x(clean, 6) \
x(replicas, 7) x(replicas, 7) \
x(journal_seq_blacklist, 8)
enum bch_sb_field_type { enum bch_sb_field_type {
#define x(f, nr) BCH_SB_FIELD_##f = nr, #define x(f, nr) BCH_SB_FIELD_##f = nr,
...@@ -1124,6 +1125,20 @@ struct bch_sb_field_clean { ...@@ -1124,6 +1125,20 @@ struct bch_sb_field_clean {
}; };
}; };
struct journal_seq_blacklist_entry {
__le64 start;
__le64 end;
};
struct bch_sb_field_journal_seq_blacklist {
struct bch_sb_field field;
union {
struct journal_seq_blacklist_entry start[0];
__u64 _data[0];
};
};
/* Superblock: */ /* Superblock: */
/* /*
...@@ -1279,6 +1294,7 @@ enum bch_sb_features { ...@@ -1279,6 +1294,7 @@ enum bch_sb_features {
BCH_FEATURE_ZSTD = 2, BCH_FEATURE_ZSTD = 2,
BCH_FEATURE_ATOMIC_NLINK = 3, /* should have gone under compat */ BCH_FEATURE_ATOMIC_NLINK = 3, /* should have gone under compat */
BCH_FEATURE_EC = 4, BCH_FEATURE_EC = 4,
BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3 = 5,
BCH_FEATURE_NR, BCH_FEATURE_NR,
}; };
......
...@@ -770,7 +770,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry ...@@ -770,7 +770,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
struct btree_node *sorted; struct btree_node *sorted;
struct bkey_packed *k; struct bkey_packed *k;
struct bset *i; struct bset *i;
bool used_mempool; bool used_mempool, blacklisted;
unsigned u64s; unsigned u64s;
int ret, retry_read = 0, write = READ; int ret, retry_read = 0, write = READ;
...@@ -844,20 +844,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry ...@@ -844,20 +844,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
b->written += sectors; b->written += sectors;
ret = bch2_journal_seq_should_ignore(c, le64_to_cpu(i->journal_seq), b); blacklisted = bch2_journal_seq_is_blacklisted(c,
if (ret < 0) { le64_to_cpu(i->journal_seq),
btree_err(BTREE_ERR_FATAL, c, b, i, true);
"insufficient memory");
goto err;
}
if (ret) { btree_err_on(blacklisted && first,
btree_err_on(first,
BTREE_ERR_FIXABLE, c, b, i, BTREE_ERR_FIXABLE, c, b, i,
"first btree node bset has blacklisted journal seq"); "first btree node bset has blacklisted journal seq");
if (!first) if (blacklisted && !first)
continue; continue;
}
bch2_btree_node_iter_large_push(iter, b, bch2_btree_node_iter_large_push(iter, b,
i->start, i->start,
...@@ -930,7 +925,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry ...@@ -930,7 +925,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
out: out:
mempool_free(iter, &c->fill_iter); mempool_free(iter, &c->fill_iter);
return retry_read; return retry_read;
err:
fsck_err: fsck_err:
if (ret == BTREE_RETRY_READ) { if (ret == BTREE_RETRY_READ) {
retry_read = 1; retry_read = 1;
......
...@@ -1156,6 +1156,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth) ...@@ -1156,6 +1156,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
if (!btree_iter_node(iter, iter->level)) if (!btree_iter_node(iter, iter->level))
return NULL; return NULL;
bch2_trans_cond_resched(iter->trans);
btree_iter_up(iter); btree_iter_up(iter);
if (!bch2_btree_node_relock(iter, iter->level)) if (!bch2_btree_node_relock(iter, iter->level))
......
...@@ -4,8 +4,6 @@ ...@@ -4,8 +4,6 @@
#include "opts.h" #include "opts.h"
#include <linux/math64.h>
extern const char * const bch2_inode_opts[]; extern const char * const bch2_inode_opts[];
const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c); const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c);
......
...@@ -988,27 +988,57 @@ void bch2_fs_journal_stop(struct journal *j) ...@@ -988,27 +988,57 @@ void bch2_fs_journal_stop(struct journal *j)
cancel_delayed_work_sync(&j->reclaim_work); cancel_delayed_work_sync(&j->reclaim_work);
} }
void bch2_fs_journal_start(struct journal *j) int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
struct list_head *journal_entries)
{ {
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_seq_blacklist *bl; struct journal_entry_pin_list *p;
u64 blacklist = 0; struct journal_replay *i;
u64 last_seq = cur_seq, nr, seq;
if (!list_empty(journal_entries))
last_seq = le64_to_cpu(list_last_entry(journal_entries,
struct journal_replay,
list)->j.last_seq);
nr = cur_seq - last_seq;
if (nr + 1 > j->pin.size) {
free_fifo(&j->pin);
init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL);
if (!j->pin.data) {
bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
return -ENOMEM;
}
}
j->last_seq_ondisk = last_seq;
j->pin.front = last_seq;
j->pin.back = cur_seq;
atomic64_set(&j->seq, cur_seq - 1);
fifo_for_each_entry_ptr(p, &j->pin, seq) {
INIT_LIST_HEAD(&p->list);
INIT_LIST_HEAD(&p->flushed);
atomic_set(&p->count, 0);
p->devs.nr = 0;
}
list_for_each_entry(i, journal_entries, list) {
seq = le64_to_cpu(i->j.seq);
list_for_each_entry(bl, &j->seq_blacklist, list) BUG_ON(seq < last_seq || seq >= cur_seq);
blacklist = max(blacklist, bl->end);
p = journal_seq_pin(j, seq);
atomic_set(&p->count, 1);
p->devs = i->devs;
}
spin_lock(&j->lock); spin_lock(&j->lock);
set_bit(JOURNAL_STARTED, &j->flags); set_bit(JOURNAL_STARTED, &j->flags);
while (journal_cur_seq(j) < blacklist)
journal_pin_new_entry(j, 0);
/*
* __journal_entry_close() only inits the next journal entry when it
* closes an open journal entry - the very first journal entry gets
* initialized here:
*/
journal_pin_new_entry(j, 1); journal_pin_new_entry(j, 1);
bch2_journal_buf_init(j); bch2_journal_buf_init(j);
...@@ -1017,12 +1047,7 @@ void bch2_fs_journal_start(struct journal *j) ...@@ -1017,12 +1047,7 @@ void bch2_fs_journal_start(struct journal *j)
bch2_journal_space_available(j); bch2_journal_space_available(j);
spin_unlock(&j->lock); spin_unlock(&j->lock);
/* return 0;
* Adding entries to the next journal entry before allocating space on
* disk for the next journal entry - this is ok, because these entries
* only have to go down with the next journal entry we write:
*/
bch2_journal_seq_blacklist_write(j);
} }
/* init/exit: */ /* init/exit: */
...@@ -1090,8 +1115,6 @@ int bch2_fs_journal_init(struct journal *j) ...@@ -1090,8 +1115,6 @@ int bch2_fs_journal_init(struct journal *j)
INIT_DELAYED_WORK(&j->write_work, journal_write_work); INIT_DELAYED_WORK(&j->write_work, journal_write_work);
INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work); INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work);
init_waitqueue_head(&j->pin_flush_wait); init_waitqueue_head(&j->pin_flush_wait);
mutex_init(&j->blacklist_lock);
INIT_LIST_HEAD(&j->seq_blacklist);
mutex_init(&j->reclaim_lock); mutex_init(&j->reclaim_lock);
mutex_init(&j->discard_lock); mutex_init(&j->discard_lock);
......
...@@ -472,8 +472,10 @@ int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *, ...@@ -472,8 +472,10 @@ int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
int bch2_dev_journal_alloc(struct bch_dev *); int bch2_dev_journal_alloc(struct bch_dev *);
void bch2_dev_journal_stop(struct journal *, struct bch_dev *); void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
void bch2_fs_journal_stop(struct journal *); void bch2_fs_journal_stop(struct journal *);
void bch2_fs_journal_start(struct journal *); int bch2_fs_journal_start(struct journal *, u64, struct list_head *);
void bch2_dev_journal_exit(struct bch_dev *); void bch2_dev_journal_exit(struct bch_dev *);
int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *); int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
void bch2_fs_journal_exit(struct journal *); void bch2_fs_journal_exit(struct journal *);
......
...@@ -10,7 +10,6 @@ ...@@ -10,7 +10,6 @@
#include "journal.h" #include "journal.h"
#include "journal_io.h" #include "journal_io.h"
#include "journal_reclaim.h" #include "journal_reclaim.h"
#include "journal_seq_blacklist.h"
#include "replicas.h" #include "replicas.h"
#include "trace.h" #include "trace.h"
...@@ -655,45 +654,11 @@ void bch2_journal_entries_free(struct list_head *list) ...@@ -655,45 +654,11 @@ void bch2_journal_entries_free(struct list_head *list)
} }
} }
int bch2_journal_set_seq(struct bch_fs *c, u64 last_seq, u64 end_seq)
{
struct journal *j = &c->journal;
struct journal_entry_pin_list *p;
u64 seq, nr = end_seq - last_seq + 1;
if (nr > j->pin.size) {
free_fifo(&j->pin);
init_fifo(&j->pin, roundup_pow_of_two(nr), GFP_KERNEL);
if (!j->pin.data) {
bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
return -ENOMEM;
}
}
atomic64_set(&j->seq, end_seq);
j->last_seq_ondisk = last_seq;
j->pin.front = last_seq;
j->pin.back = end_seq + 1;
fifo_for_each_entry_ptr(p, &j->pin, seq) {
INIT_LIST_HEAD(&p->list);
INIT_LIST_HEAD(&p->flushed);
atomic_set(&p->count, 0);
p->devs.nr = 0;
}
return 0;
}
int bch2_journal_read(struct bch_fs *c, struct list_head *list) int bch2_journal_read(struct bch_fs *c, struct list_head *list)
{ {
struct journal *j = &c->journal;
struct journal_list jlist; struct journal_list jlist;
struct journal_replay *i; struct journal_replay *i;
struct journal_entry_pin_list *p;
struct bch_dev *ca; struct bch_dev *ca;
u64 cur_seq, end_seq;
unsigned iter; unsigned iter;
size_t keys = 0, entries = 0; size_t keys = 0, entries = 0;
bool degraded = false; bool degraded = false;
...@@ -725,17 +690,12 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list) ...@@ -725,17 +690,12 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
if (jlist.ret) if (jlist.ret)
return jlist.ret; return jlist.ret;
if (list_empty(list)){
bch_err(c, "no journal entries found");
return BCH_FSCK_REPAIR_IMPOSSIBLE;
}
list_for_each_entry(i, list, list) { list_for_each_entry(i, list, list) {
struct jset_entry *entry;
struct bkey_i *k, *_n;
struct bch_replicas_padded replicas; struct bch_replicas_padded replicas;
char buf[80]; char buf[80];
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, i->devs);
ret = jset_validate_entries(c, &i->j, READ); ret = jset_validate_entries(c, &i->j, READ);
if (ret) if (ret)
goto fsck_err; goto fsck_err;
...@@ -745,6 +705,8 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list) ...@@ -745,6 +705,8 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
* the devices - this is wrong: * the devices - this is wrong:
*/ */
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, i->devs);
if (!degraded && if (!degraded &&
(test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) || (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
fsck_err_on(!bch2_replicas_marked(c, &replicas.e, false), c, fsck_err_on(!bch2_replicas_marked(c, &replicas.e, false), c,
...@@ -755,68 +717,18 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list) ...@@ -755,68 +717,18 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
if (ret) if (ret)
return ret; return ret;
} }
}
i = list_last_entry(list, struct journal_replay, list);
ret = bch2_journal_set_seq(c,
le64_to_cpu(i->j.last_seq),
le64_to_cpu(i->j.seq));
if (ret)
return ret;
mutex_lock(&j->blacklist_lock);
list_for_each_entry(i, list, list) {
p = journal_seq_pin(j, le64_to_cpu(i->j.seq));
atomic_set(&p->count, 1);
p->devs = i->devs;
if (bch2_journal_seq_blacklist_read(j, i)) {
mutex_unlock(&j->blacklist_lock);
return -ENOMEM;
}
}
mutex_unlock(&j->blacklist_lock);
cur_seq = journal_last_seq(j);
end_seq = le64_to_cpu(list_last_entry(list,
struct journal_replay, list)->j.seq);
list_for_each_entry(i, list, list) {
struct jset_entry *entry;
struct bkey_i *k, *_n;
bool blacklisted;
mutex_lock(&j->blacklist_lock);
while (cur_seq < le64_to_cpu(i->j.seq) &&
bch2_journal_seq_blacklist_find(j, cur_seq))
cur_seq++;
blacklisted = bch2_journal_seq_blacklist_find(j,
le64_to_cpu(i->j.seq));
mutex_unlock(&j->blacklist_lock);
fsck_err_on(blacklisted, c,
"found blacklisted journal entry %llu",
le64_to_cpu(i->j.seq));
fsck_err_on(le64_to_cpu(i->j.seq) != cur_seq, c,
"journal entries %llu-%llu missing! (replaying %llu-%llu)",
cur_seq, le64_to_cpu(i->j.seq) - 1,
journal_last_seq(j), end_seq);
cur_seq = le64_to_cpu(i->j.seq) + 1;
for_each_jset_key(k, _n, entry, &i->j) for_each_jset_key(k, _n, entry, &i->j)
keys++; keys++;
entries++; entries++;
} }
if (!list_empty(list)) {
i = list_last_entry(list, struct journal_replay, list);
bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu", bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu",
keys, entries, journal_cur_seq(j)); keys, entries, le64_to_cpu(i->j.seq));
}
fsck_err: fsck_err:
return ret; return ret;
} }
......
...@@ -35,7 +35,6 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset, ...@@ -35,7 +35,6 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys) \ for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys) \
vstruct_for_each_safe(entry, k, _n) vstruct_for_each_safe(entry, k, _n)
int bch2_journal_set_seq(struct bch_fs *c, u64, u64);
int bch2_journal_read(struct bch_fs *, struct list_head *); int bch2_journal_read(struct bch_fs *, struct list_head *);
void bch2_journal_entries_free(struct list_head *); void bch2_journal_entries_free(struct list_head *);
int bch2_journal_replay(struct bch_fs *, struct list_head *); int bch2_journal_replay(struct bch_fs *, struct list_head *);
......
This diff is collapsed.
...@@ -2,13 +2,12 @@ ...@@ -2,13 +2,12 @@
#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H #ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H #define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
struct journal_replay; bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool);
int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64);
struct journal_seq_blacklist * int bch2_blacklist_table_initialize(struct bch_fs *);
bch2_journal_seq_blacklist_find(struct journal *, u64);
int bch2_journal_seq_should_ignore(struct bch_fs *, u64, struct btree *); extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist;
int bch2_journal_seq_blacklist_read(struct journal *,
struct journal_replay *); void bch2_blacklist_entries_gc(struct work_struct *);
void bch2_journal_seq_blacklist_write(struct journal *);
#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */ #endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */
...@@ -54,24 +54,6 @@ struct journal_entry_pin { ...@@ -54,24 +54,6 @@ struct journal_entry_pin {
u64 seq; u64 seq;
}; };
/* corresponds to a btree node with a blacklisted bset: */
struct blacklisted_node {
__le64 seq;
enum btree_id btree_id;
struct bpos pos;
};
struct journal_seq_blacklist {
struct list_head list;
u64 start;
u64 end;
struct journal_entry_pin pin;
struct blacklisted_node *entries;
size_t nr_entries;
};
struct journal_res { struct journal_res {
bool ref; bool ref;
u8 idx; u8 idx;
...@@ -222,10 +204,6 @@ struct journal { ...@@ -222,10 +204,6 @@ struct journal {
u64 replay_journal_seq; u64 replay_journal_seq;
struct mutex blacklist_lock;
struct list_head seq_blacklist;
struct journal_seq_blacklist *new_blacklist;
struct write_point wp; struct write_point wp;
spinlock_t err_lock; spinlock_t err_lock;
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include "error.h" #include "error.h"
#include "fsck.h" #include "fsck.h"
#include "journal_io.h" #include "journal_io.h"
#include "journal_seq_blacklist.h"
#include "quota.h" #include "quota.h"
#include "recovery.h" #include "recovery.h"
#include "replicas.h" #include "replicas.h"
...@@ -99,18 +100,49 @@ static int verify_superblock_clean(struct bch_fs *c, ...@@ -99,18 +100,49 @@ static int verify_superblock_clean(struct bch_fs *c,
return ret; return ret;
} }
static int
verify_journal_entries_not_blacklisted_or_missing(struct bch_fs *c,
struct list_head *journal)
{
struct journal_replay *i =
list_last_entry(journal, struct journal_replay, list);
u64 start_seq = le64_to_cpu(i->j.last_seq);
u64 end_seq = le64_to_cpu(i->j.seq);
u64 seq = start_seq;
int ret = 0;
list_for_each_entry(i, journal, list) {
fsck_err_on(seq != le64_to_cpu(i->j.seq), c,
"journal entries %llu-%llu missing! (replaying %llu-%llu)",
seq, le64_to_cpu(i->j.seq) - 1,
start_seq, end_seq);
seq = le64_to_cpu(i->j.seq);
fsck_err_on(bch2_journal_seq_is_blacklisted(c, seq, false), c,
"found blacklisted journal entry %llu", seq);
do {
seq++;
} while (bch2_journal_seq_is_blacklisted(c, seq, false));
}
fsck_err:
return ret;
}
static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c) static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
{ {
struct bch_sb_field_clean *clean, *sb_clean; struct bch_sb_field_clean *clean, *sb_clean;
int ret;
if (!c->sb.clean)
return NULL;
mutex_lock(&c->sb_lock); mutex_lock(&c->sb_lock);
sb_clean = bch2_sb_get_clean(c->disk_sb.sb); sb_clean = bch2_sb_get_clean(c->disk_sb.sb);
if (!sb_clean) {
if (fsck_err_on(!sb_clean, c,
"superblock marked clean but clean section not present")) {
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
c->sb.clean = false;
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
bch_err(c, "superblock marked clean but clean section not present");
return NULL; return NULL;
} }
...@@ -128,6 +160,9 @@ static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c) ...@@ -128,6 +160,9 @@ static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
return clean; return clean;
fsck_err:
mutex_unlock(&c->sb_lock);
return ERR_PTR(ret);
} }
static int journal_replay_entry_early(struct bch_fs *c, static int journal_replay_entry_early(struct bch_fs *c,
...@@ -179,12 +214,30 @@ static int journal_replay_entry_early(struct bch_fs *c, ...@@ -179,12 +214,30 @@ static int journal_replay_entry_early(struct bch_fs *c,
le64_to_cpu(u->v)); le64_to_cpu(u->v));
break; break;
} }
case BCH_JSET_ENTRY_blacklist: {
struct jset_entry_blacklist *bl_entry =
container_of(entry, struct jset_entry_blacklist, entry);
ret = bch2_journal_seq_blacklist_add(c,
le64_to_cpu(bl_entry->seq),
le64_to_cpu(bl_entry->seq) + 1);
break;
}
case BCH_JSET_ENTRY_blacklist_v2: {
struct jset_entry_blacklist_v2 *bl_entry =
container_of(entry, struct jset_entry_blacklist_v2, entry);
ret = bch2_journal_seq_blacklist_add(c,
le64_to_cpu(bl_entry->start),
le64_to_cpu(bl_entry->end) + 1);
break;
}
} }
return ret; return ret;
} }
static int load_journal_metadata(struct bch_fs *c, static int journal_replay_early(struct bch_fs *c,
struct bch_sb_field_clean *clean, struct bch_sb_field_clean *clean,
struct list_head *journal) struct list_head *journal)
{ {
...@@ -300,37 +353,76 @@ static bool journal_empty(struct list_head *journal) ...@@ -300,37 +353,76 @@ static bool journal_empty(struct list_head *journal)
int bch2_fs_recovery(struct bch_fs *c) int bch2_fs_recovery(struct bch_fs *c)
{ {
const char *err = "cannot allocate memory"; const char *err = "cannot allocate memory";
struct bch_sb_field_clean *clean; struct bch_sb_field_clean *clean = NULL;
u64 journal_seq;
LIST_HEAD(journal); LIST_HEAD(journal);
int ret; int ret;
if (c->sb.clean)
clean = read_superblock_clean(c); clean = read_superblock_clean(c);
if (clean) ret = PTR_ERR_OR_ZERO(clean);
if (ret)
goto err;
if (c->sb.clean)
bch_info(c, "recovering from clean shutdown, journal seq %llu", bch_info(c, "recovering from clean shutdown, journal seq %llu",
le64_to_cpu(clean->journal_seq)); le64_to_cpu(clean->journal_seq));
if (!clean || c->opts.fsck) { if (!c->replicas.entries) {
bch_info(c, "building replicas info");
set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
}
if (!c->sb.clean || c->opts.fsck) {
struct jset *j;
ret = bch2_journal_read(c, &journal); ret = bch2_journal_read(c, &journal);
if (ret) if (ret)
goto err; goto err;
ret = verify_superblock_clean(c, &clean, fsck_err_on(c->sb.clean && !journal_empty(&journal), c,
&list_last_entry(&journal, struct journal_replay, "filesystem marked clean but journal not empty");
list)->j);
if (!c->sb.clean && list_empty(&journal)){
bch_err(c, "no journal entries found");
ret = BCH_FSCK_REPAIR_IMPOSSIBLE;
goto err;
}
j = &list_last_entry(&journal, struct journal_replay, list)->j;
ret = verify_superblock_clean(c, &clean, j);
if (ret) if (ret)
goto err; goto err;
journal_seq = le64_to_cpu(j->seq) + 1;
} else { } else {
ret = bch2_journal_set_seq(c, journal_seq = le64_to_cpu(clean->journal_seq) + 1;
le64_to_cpu(clean->journal_seq), }
le64_to_cpu(clean->journal_seq));
ret = journal_replay_early(c, clean, &journal);
if (ret) if (ret)
goto err; goto err;
if (!c->sb.clean) {
ret = bch2_journal_seq_blacklist_add(c,
journal_seq,
journal_seq + 4);
if (ret) {
bch_err(c, "error creating new journal seq blacklist entry");
goto err;
} }
fsck_err_on(clean && !journal_empty(&journal), c, journal_seq += 4;
"filesystem marked clean but journal not empty"); }
ret = bch2_blacklist_table_initialize(c);
ret = load_journal_metadata(c, clean, &journal); ret = verify_journal_entries_not_blacklisted_or_missing(c, &journal);
if (ret)
goto err;
ret = bch2_fs_journal_start(&c->journal, journal_seq, &journal);
if (ret) if (ret)
goto err; goto err;
...@@ -351,11 +443,6 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -351,11 +443,6 @@ int bch2_fs_recovery(struct bch_fs *c)
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags); set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
if (!c->replicas.entries) {
bch_info(c, "building replicas info");
set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
}
if (c->opts.fsck || if (c->opts.fsck ||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) || !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) { test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
...@@ -377,13 +464,6 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -377,13 +464,6 @@ int bch2_fs_recovery(struct bch_fs *c)
if (c->sb.encryption_type && !c->sb.clean) if (c->sb.encryption_type && !c->sb.clean)
atomic64_add(1 << 16, &c->key_version); atomic64_add(1 << 16, &c->key_version);
/*
* bch2_fs_journal_start() can't happen sooner, or btree_gc_finish()
* will give spurious errors about oldest_gen > bucket_gen -
* this is a hack but oh well.
*/
bch2_fs_journal_start(&c->journal);
if (c->opts.noreplay) if (c->opts.noreplay)
goto out; goto out;
...@@ -424,6 +504,10 @@ int bch2_fs_recovery(struct bch_fs *c) ...@@ -424,6 +504,10 @@ int bch2_fs_recovery(struct bch_fs *c)
SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0); SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
} }
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
if (c->journal_seq_blacklist_table &&
c->journal_seq_blacklist_table->nr > 128)
queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work);
out: out:
bch2_journal_entries_free(&journal); bch2_journal_entries_free(&journal);
kfree(clean); kfree(clean);
...@@ -472,7 +556,7 @@ int bch2_fs_initialize(struct bch_fs *c) ...@@ -472,7 +556,7 @@ int bch2_fs_initialize(struct bch_fs *c)
* journal_res_get() will crash if called before this has * journal_res_get() will crash if called before this has
* set up the journal.pin FIFO and journal.cur pointer: * set up the journal.pin FIFO and journal.cur pointer:
*/ */
bch2_fs_journal_start(&c->journal); bch2_fs_journal_start(&c->journal, 1, &journal);
bch2_journal_set_replay_done(&c->journal); bch2_journal_set_replay_done(&c->journal);
err = "error going read write"; err = "error going read write";
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "error.h" #include "error.h"
#include "io.h" #include "io.h"
#include "journal.h" #include "journal.h"
#include "journal_seq_blacklist.h"
#include "replicas.h" #include "replicas.h"
#include "quota.h" #include "quota.h"
#include "super-io.h" #include "super-io.h"
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "io.h" #include "io.h"
#include "journal.h" #include "journal.h"
#include "journal_reclaim.h" #include "journal_reclaim.h"
#include "journal_seq_blacklist.h"
#include "move.h" #include "move.h"
#include "migrate.h" #include "migrate.h"
#include "movinggc.h" #include "movinggc.h"
...@@ -468,6 +469,7 @@ static void bch2_fs_free(struct bch_fs *c) ...@@ -468,6 +469,7 @@ static void bch2_fs_free(struct bch_fs *c)
kfree(c->replicas.entries); kfree(c->replicas.entries);
kfree(c->replicas_gc.entries); kfree(c->replicas_gc.entries);
kfree(rcu_dereference_protected(c->disk_groups, 1)); kfree(rcu_dereference_protected(c->disk_groups, 1));
kfree(c->journal_seq_blacklist_table);
if (c->journal_reclaim_wq) if (c->journal_reclaim_wq)
destroy_workqueue(c->journal_reclaim_wq); destroy_workqueue(c->journal_reclaim_wq);
...@@ -496,6 +498,10 @@ void bch2_fs_stop(struct bch_fs *c) ...@@ -496,6 +498,10 @@ void bch2_fs_stop(struct bch_fs *c)
bch_verbose(c, "shutting down"); bch_verbose(c, "shutting down");
set_bit(BCH_FS_STOPPING, &c->flags);
cancel_work_sync(&c->journal_seq_blacklist_gc_work);
for_each_member_device(ca, c, i) for_each_member_device(ca, c, i)
if (ca->kobj.state_in_sysfs && if (ca->kobj.state_in_sysfs &&
ca->disk_sb.bdev) ca->disk_sb.bdev)
...@@ -631,6 +637,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) ...@@ -631,6 +637,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
spin_lock_init(&c->btree_write_error_lock); spin_lock_init(&c->btree_write_error_lock);
INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work); INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work);
INIT_WORK(&c->journal_seq_blacklist_gc_work,
bch2_blacklist_entries_gc);
INIT_LIST_HEAD(&c->fsck_errors); INIT_LIST_HEAD(&c->fsck_errors);
mutex_init(&c->fsck_error_lock); mutex_init(&c->fsck_error_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment