Commit f81b648d authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Clean up, possixly fix page disk reservation accounting

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent b1ba2359
...@@ -454,12 +454,12 @@ struct bch_page_state { ...@@ -454,12 +454,12 @@ struct bch_page_state {
union { struct { union { struct {
/* existing data: */ /* existing data: */
unsigned sectors:PAGE_SECTOR_SHIFT + 1; unsigned sectors:PAGE_SECTOR_SHIFT + 1;
/* Uncompressed, fully allocated replicas: */
unsigned nr_replicas:4; unsigned nr_replicas:4;
unsigned compressed:1;
/* Owns PAGE_SECTORS sized reservation: */ /* Owns PAGE_SECTORS * replicas_reserved sized reservation: */
unsigned reserved:1; unsigned replicas_reserved:4;
unsigned reservation_replicas:4;
/* Owns PAGE_SECTORS sized quota reservation: */ /* Owns PAGE_SECTORS sized quota reservation: */
unsigned quota_reserved:1; unsigned quota_reserved:1;
...@@ -506,7 +506,7 @@ static inline struct bch_page_state *page_state(struct page *page) ...@@ -506,7 +506,7 @@ static inline struct bch_page_state *page_state(struct page *page)
static inline unsigned page_res_sectors(struct bch_page_state s) static inline unsigned page_res_sectors(struct bch_page_state s)
{ {
return s.reserved ? s.reservation_replicas * PAGE_SECTORS : 0; return s.replicas_reserved * PAGE_SECTORS;
} }
static void __bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *inode, static void __bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
...@@ -524,8 +524,10 @@ static void bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *i ...@@ -524,8 +524,10 @@ static void bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *i
{ {
struct bch_page_state s; struct bch_page_state s;
EBUG_ON(!PageLocked(page));
s = page_state_cmpxchg(page_state(page), s, { s = page_state_cmpxchg(page_state(page), s, {
s.reserved = 0; s.replicas_reserved = 0;
s.quota_reserved = 0; s.quota_reserved = 0;
}); });
...@@ -535,62 +537,46 @@ static void bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *i ...@@ -535,62 +537,46 @@ static void bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *i
static int bch2_get_page_reservation(struct bch_fs *c, struct bch_inode_info *inode, static int bch2_get_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
struct page *page, bool check_enospc) struct page *page, bool check_enospc)
{ {
struct bch_page_state *s = page_state(page), new, old; struct bch_page_state *s = page_state(page), new;
/* XXX: this should not be open coded */ /* XXX: this should not be open coded */
unsigned nr_replicas = inode->ei_inode.bi_data_replicas unsigned nr_replicas = inode->ei_inode.bi_data_replicas
? inode->ei_inode.bi_data_replicas - 1 ? inode->ei_inode.bi_data_replicas - 1
: c->opts.data_replicas; : c->opts.data_replicas;
struct disk_reservation disk_res;
struct disk_reservation disk_res = bch2_disk_reservation_init(c,
nr_replicas);
struct quota_res quota_res = { 0 }; struct quota_res quota_res = { 0 };
int ret = 0; int ret;
/* EBUG_ON(!PageLocked(page));
* XXX: this could likely be quite a bit simpler, page reservations
* _should_ only be manipulated with page locked:
*/
old = page_state_cmpxchg(s, new, { if (s->replicas_reserved < nr_replicas) {
if (new.reserved ret = bch2_disk_reservation_get(c, &disk_res, PAGE_SECTORS,
? (new.reservation_replicas < disk_res.nr_replicas) nr_replicas - s->replicas_reserved,
: (new.sectors < PAGE_SECTORS || !check_enospc ? BCH_DISK_RESERVATION_NOFAIL : 0);
new.nr_replicas < disk_res.nr_replicas ||
new.compressed)) {
int sectors = (disk_res.nr_replicas * PAGE_SECTORS -
page_res_sectors(new) -
disk_res.sectors);
if (sectors > 0) {
ret = bch2_disk_reservation_add(c, &disk_res, sectors,
!check_enospc
? BCH_DISK_RESERVATION_NOFAIL : 0);
if (unlikely(ret)) if (unlikely(ret))
goto err; return ret;
}
new.reserved = 1; page_state_cmpxchg(s, new, ({
new.reservation_replicas = disk_res.nr_replicas; BUG_ON(new.replicas_reserved +
disk_res.nr_replicas != nr_replicas);
new.replicas_reserved += disk_res.nr_replicas;
}));
} }
if (!new.quota_reserved && if (!s->quota_reserved &&
new.sectors + new.dirty_sectors < PAGE_SECTORS) { s->sectors + s->dirty_sectors < PAGE_SECTORS) {
ret = bch2_quota_reservation_add(c, inode, &quota_res, ret = bch2_quota_reservation_add(c, inode, &quota_res,
PAGE_SECTORS - quota_res.sectors, PAGE_SECTORS,
check_enospc); check_enospc);
if (unlikely(ret)) if (unlikely(ret))
goto err; return ret;
page_state_cmpxchg(s, new, ({
BUG_ON(new.quota_reserved);
new.quota_reserved = 1; new.quota_reserved = 1;
}));
} }
});
quota_res.sectors -= (new.quota_reserved - old.quota_reserved) * PAGE_SECTORS;
disk_res.sectors -= page_res_sectors(new) - page_res_sectors(old);
err:
bch2_quota_reservation_put(c, inode, &quota_res);
bch2_disk_reservation_put(c, &disk_res);
return ret; return ret;
} }
...@@ -600,6 +586,8 @@ static void bch2_clear_page_bits(struct page *page) ...@@ -600,6 +586,8 @@ static void bch2_clear_page_bits(struct page *page)
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_page_state s; struct bch_page_state s;
EBUG_ON(!PageLocked(page));
if (!PagePrivate(page)) if (!PagePrivate(page))
return; return;
...@@ -763,11 +751,8 @@ static void bch2_readpages_end_io(struct bio *bio) ...@@ -763,11 +751,8 @@ static void bch2_readpages_end_io(struct bio *bio)
static inline void page_state_init_for_read(struct page *page) static inline void page_state_init_for_read(struct page *page)
{ {
struct bch_page_state *s = page_state(page); SetPagePrivate(page);
page->private = 0;
BUG_ON(s->reserved);
s->sectors = 0;
s->compressed = 0;
} }
struct readpages_iter { struct readpages_iter {
...@@ -816,10 +801,13 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k) ...@@ -816,10 +801,13 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
{ {
struct bvec_iter iter; struct bvec_iter iter;
struct bio_vec bv; struct bio_vec bv;
bool compressed = bch2_extent_is_compressed(k); unsigned nr_ptrs = !bch2_extent_is_compressed(k)
unsigned nr_ptrs = bch2_extent_nr_dirty_ptrs(k); ? bch2_extent_nr_dirty_ptrs(k)
: 0;
bio_for_each_segment(bv, bio, iter) { bio_for_each_segment(bv, bio, iter) {
/* brand new pages, don't need to be locked: */
struct bch_page_state *s = page_state(bv.bv_page); struct bch_page_state *s = page_state(bv.bv_page);
/* sectors in @k from the start of this page: */ /* sectors in @k from the start of this page: */
...@@ -827,14 +815,11 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k) ...@@ -827,14 +815,11 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
unsigned page_sectors = min(bv.bv_len >> 9, k_sectors); unsigned page_sectors = min(bv.bv_len >> 9, k_sectors);
s->nr_replicas = !s->sectors s->nr_replicas = page_sectors == PAGE_SECTORS
? nr_ptrs ? nr_ptrs : 0;
: min_t(unsigned, s->nr_replicas, nr_ptrs);
BUG_ON(s->sectors + page_sectors > PAGE_SECTORS); BUG_ON(s->sectors + page_sectors > PAGE_SECTORS);
s->sectors += page_sectors; s->sectors += page_sectors;
s->compressed |= compressed;
} }
} }
...@@ -1163,7 +1148,7 @@ static int __bch2_writepage(struct folio *folio, ...@@ -1163,7 +1148,7 @@ static int __bch2_writepage(struct folio *folio,
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_writepage_state *w = data; struct bch_writepage_state *w = data;
struct bch_page_state new, old; struct bch_page_state new, old;
unsigned offset; unsigned offset, nr_replicas_this_write;
loff_t i_size = i_size_read(&inode->v); loff_t i_size = i_size_read(&inode->v);
pgoff_t end_index = i_size >> PAGE_SHIFT; pgoff_t end_index = i_size >> PAGE_SHIFT;
...@@ -1189,19 +1174,31 @@ static int __bch2_writepage(struct folio *folio, ...@@ -1189,19 +1174,31 @@ static int __bch2_writepage(struct folio *folio,
*/ */
zero_user_segment(page, offset, PAGE_SIZE); zero_user_segment(page, offset, PAGE_SIZE);
do_io: do_io:
EBUG_ON(!PageLocked(page));
/* Before unlocking the page, transfer reservation to w->io: */ /* Before unlocking the page, transfer reservation to w->io: */
old = page_state_cmpxchg(page_state(page), new, { old = page_state_cmpxchg(page_state(page), new, {
EBUG_ON(!new.reserved && /*
(new.sectors != PAGE_SECTORS || * If we didn't get a reservation, we can only write out the
new.compressed)); * number of (fully allocated) replicas that currently exist,
* and only if the entire page has been written:
*/
nr_replicas_this_write =
max_t(unsigned,
new.replicas_reserved,
(new.sectors == PAGE_SECTORS
? new.nr_replicas : 0));
BUG_ON(!nr_replicas_this_write);
if (new.reserved) new.nr_replicas = w->opts.compression
new.nr_replicas = new.reservation_replicas; ? 0
new.reserved = 0; : nr_replicas_this_write;
new.compressed |= w->opts.compression != 0; new.replicas_reserved = 0;
new.sectors += new.dirty_sectors; new.sectors += new.dirty_sectors;
BUG_ON(new.sectors != PAGE_SECTORS);
new.dirty_sectors = 0; new.dirty_sectors = 0;
}); });
...@@ -1210,21 +1207,20 @@ static int __bch2_writepage(struct folio *folio, ...@@ -1210,21 +1207,20 @@ static int __bch2_writepage(struct folio *folio,
unlock_page(page); unlock_page(page);
if (w->io && if (w->io &&
(w->io->op.op.res.nr_replicas != new.nr_replicas || (w->io->op.op.res.nr_replicas != nr_replicas_this_write ||
!bio_can_add_page_contig(&w->io->op.op.wbio.bio, page))) !bio_can_add_page_contig(&w->io->op.op.wbio.bio, page)))
bch2_writepage_do_io(w); bch2_writepage_do_io(w);
if (!w->io) if (!w->io)
bch2_writepage_io_alloc(c, w, inode, page, new.nr_replicas); bch2_writepage_io_alloc(c, w, inode, page,
nr_replicas_this_write);
w->io->new_sectors += new.sectors - old.sectors; w->io->new_sectors += new.sectors - old.sectors;
BUG_ON(inode != w->io->op.inode); BUG_ON(inode != w->io->op.inode);
BUG_ON(bio_add_page_contig(&w->io->op.op.wbio.bio, page)); BUG_ON(bio_add_page_contig(&w->io->op.op.wbio.bio, page));
if (old.reserved) w->io->op.op.res.sectors += old.replicas_reserved * PAGE_SECTORS;
w->io->op.op.res.sectors += old.reservation_replicas * PAGE_SECTORS;
w->io->op.new_i_size = i_size; w->io->op.new_i_size = i_size;
if (wbc->sync_mode == WB_SYNC_ALL) if (wbc->sync_mode == WB_SYNC_ALL)
...@@ -2606,6 +2602,8 @@ long bch2_fallocate_dispatch(struct file *file, int mode, ...@@ -2606,6 +2602,8 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
static bool folio_is_data(struct folio *folio) static bool folio_is_data(struct folio *folio)
{ {
EBUG_ON(!PageLocked(&folio->page));
/* XXX: should only have to check PageDirty */ /* XXX: should only have to check PageDirty */
return folio_test_private(folio) && return folio_test_private(folio) &&
(page_state(&folio->page)->sectors || (page_state(&folio->page)->sectors ||
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment