Commit 280249b9 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Correctly order flushes and journal writes on multi device filesystems

All writes prior to a journal write need to be flushed before the
journal write itself happens. On single device filesystems, it suffices
to mark the write with REQ_PREFLUSH|REQ_FUA, but on multi device
filesystems we need to issue flushes to every device - and wait for them
to complete - before issuing the journal writes. Previously, we were
issuing flushes to every device, but we weren't waiting for them to
complete before issuing the journal writes.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent ed9d58a2
...@@ -509,9 +509,6 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, ...@@ -509,9 +509,6 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
n->submit_time = local_clock(); n->submit_time = local_clock();
n->bio.bi_iter.bi_sector = ptr->offset; n->bio.bi_iter.bi_sector = ptr->offset;
if (!journal_flushes_device(ca))
n->bio.bi_opf |= REQ_FUA;
if (likely(n->have_ioref)) { if (likely(n->have_ioref)) {
this_cpu_add(ca->io_done->sectors[WRITE][type], this_cpu_add(ca->io_done->sectors[WRITE][type],
bio_sectors(&n->bio)); bio_sectors(&n->bio));
......
...@@ -81,6 +81,7 @@ static void bch2_journal_buf_init(struct journal *j) ...@@ -81,6 +81,7 @@ static void bch2_journal_buf_init(struct journal *j)
bkey_extent_init(&buf->key); bkey_extent_init(&buf->key);
buf->noflush = false; buf->noflush = false;
buf->must_flush = false; buf->must_flush = false;
buf->separate_flush = false;
memset(buf->has_inode, 0, sizeof(buf->has_inode)); memset(buf->has_inode, 0, sizeof(buf->has_inode));
......
...@@ -496,11 +496,6 @@ static inline int bch2_journal_error(struct journal *j) ...@@ -496,11 +496,6 @@ static inline int bch2_journal_error(struct journal *j)
struct bch_dev; struct bch_dev;
static inline bool journal_flushes_device(struct bch_dev *ca)
{
return true;
}
static inline void bch2_journal_set_replay_done(struct journal *j) static inline void bch2_journal_set_replay_done(struct journal *j)
{ {
BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags)); BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
......
...@@ -1188,6 +1188,51 @@ static void journal_write_endio(struct bio *bio) ...@@ -1188,6 +1188,51 @@ static void journal_write_endio(struct bio *bio)
percpu_ref_put(&ca->io_ref); percpu_ref_put(&ca->io_ref);
} }
static void do_journal_write(struct closure *cl)
{
struct journal *j = container_of(cl, struct journal, io);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca;
struct journal_buf *w = journal_last_unwritten_buf(j);
struct bch_extent_ptr *ptr;
struct bio *bio;
unsigned sectors = vstruct_sectors(w->data, c->block_bits);
extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
ca = bch_dev_bkey_exists(c, ptr->dev);
if (!percpu_ref_tryget(&ca->io_ref)) {
/* XXX: fix this */
bch_err(c, "missing device for journal write\n");
continue;
}
this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal],
sectors);
bio = ca->journal.bio;
bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META);
bio->bi_iter.bi_sector = ptr->offset;
bio->bi_end_io = journal_write_endio;
bio->bi_private = ca;
if (!JSET_NO_FLUSH(w->data))
bio->bi_opf |= REQ_FUA;
if (!JSET_NO_FLUSH(w->data) && !w->separate_flush)
bio->bi_opf |= REQ_PREFLUSH;
bch2_bio_map(bio, w->data, sectors << 9);
trace_journal_write(bio);
closure_bio_submit(bio, cl);
ca->journal.bucket_seq[ca->journal.cur_idx] =
le64_to_cpu(w->data->seq);
}
continue_at(cl, journal_write_done, system_highpri_wq);
return;
}
void bch2_journal_write(struct closure *cl) void bch2_journal_write(struct closure *cl)
{ {
struct journal *j = container_of(cl, struct journal, io); struct journal *j = container_of(cl, struct journal, io);
...@@ -1197,9 +1242,8 @@ void bch2_journal_write(struct closure *cl) ...@@ -1197,9 +1242,8 @@ void bch2_journal_write(struct closure *cl)
struct jset_entry *start, *end; struct jset_entry *start, *end;
struct jset *jset; struct jset *jset;
struct bio *bio; struct bio *bio;
struct bch_extent_ptr *ptr;
bool validate_before_checksum = false; bool validate_before_checksum = false;
unsigned i, sectors, bytes, u64s; unsigned i, sectors, bytes, u64s, nr_rw_members = 0;
int ret; int ret;
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
...@@ -1329,45 +1373,28 @@ void bch2_journal_write(struct closure *cl) ...@@ -1329,45 +1373,28 @@ void bch2_journal_write(struct closure *cl)
if (c->opts.nochanges) if (c->opts.nochanges)
goto no_io; goto no_io;
extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) { for_each_rw_member(ca, c, i)
ca = bch_dev_bkey_exists(c, ptr->dev); nr_rw_members++;
if (!percpu_ref_tryget(&ca->io_ref)) {
/* XXX: fix this */
bch_err(c, "missing device for journal write\n");
continue;
}
this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal],
sectors);
bio = ca->journal.bio; if (nr_rw_members > 1)
bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META); w->separate_flush = true;
bio->bi_iter.bi_sector = ptr->offset;
bio->bi_end_io = journal_write_endio;
bio->bi_private = ca;
if (!JSET_NO_FLUSH(jset))
bio->bi_opf |= REQ_PREFLUSH|REQ_FUA;
bch2_bio_map(bio, jset, sectors << 9);
trace_journal_write(bio); if (!JSET_NO_FLUSH(jset) && w->separate_flush) {
closure_bio_submit(bio, cl); for_each_rw_member(ca, c, i) {
percpu_ref_get(&ca->io_ref);
ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(jset->seq); bio = ca->journal.bio;
bio_reset(bio, ca->disk_sb.bdev, REQ_OP_FLUSH);
bio->bi_end_io = journal_write_endio;
bio->bi_private = ca;
closure_bio_submit(bio, cl);
}
} }
if (!JSET_NO_FLUSH(jset)) { bch2_bucket_seq_cleanup(c);
for_each_rw_member(ca, c, i)
if (journal_flushes_device(ca) && continue_at(cl, do_journal_write, system_highpri_wq);
!bch2_bkey_has_device(bkey_i_to_s_c(&w->key), i)) { return;
percpu_ref_get(&ca->io_ref);
bio = ca->journal.bio;
bio_reset(bio, ca->disk_sb.bdev, REQ_OP_FLUSH);
bio->bi_end_io = journal_write_endio;
bio->bi_private = ca;
closure_bio_submit(bio, cl);
}
}
no_io: no_io:
bch2_bucket_seq_cleanup(c); bch2_bucket_seq_cleanup(c);
......
...@@ -31,6 +31,7 @@ struct journal_buf { ...@@ -31,6 +31,7 @@ struct journal_buf {
unsigned u64s_reserved; unsigned u64s_reserved;
bool noflush; /* write has already been kicked off, and was noflush */ bool noflush; /* write has already been kicked off, and was noflush */
bool must_flush; /* something wants a flush */ bool must_flush; /* something wants a flush */
bool separate_flush;
/* bloom filter: */ /* bloom filter: */
unsigned long has_inode[1024 / sizeof(unsigned long)]; unsigned long has_inode[1024 / sizeof(unsigned long)];
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment