Commit ec4edd7b authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Prep work for variable size btree node buffers

bcachefs btree nodes are big - typically 256k - and btree roots are
pinned in memory. As we're now up to 18 btrees, we now have significant
memory overhead in mostly empty btree roots.

And in the future we're going to start enforcing that certain btree node
boundaries exist, to solve lock contention issues - analagous to XFS's
AGIs.

Thus, we need to start allocating smaller btree node buffers when we
can. This patch changes code that refers to the filesystem constant
c->opts.btree_node_size to refer to the btree node buffer size -
btree_buf_bytes() - where appropriate.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 2acc59dd
...@@ -560,7 +560,7 @@ static size_t btree_nodes_fit_in_ram(struct bch_fs *c) ...@@ -560,7 +560,7 @@ static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
si_meminfo(&i); si_meminfo(&i);
mem_bytes = i.totalram * i.mem_unit; mem_bytes = i.totalram * i.mem_unit;
return div_u64(mem_bytes >> 1, btree_bytes(c)); return div_u64(mem_bytes >> 1, c->opts.btree_node_size);
} }
static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H #ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H
#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H #define _BCACHEFS_BACKPOINTERS_BACKGROUND_H
#include "btree_cache.h"
#include "btree_iter.h" #include "btree_iter.h"
#include "btree_update.h" #include "btree_update.h"
#include "buckets.h" #include "buckets.h"
......
...@@ -1204,11 +1204,6 @@ static inline unsigned block_sectors(const struct bch_fs *c) ...@@ -1204,11 +1204,6 @@ static inline unsigned block_sectors(const struct bch_fs *c)
return c->opts.block_size >> 9; return c->opts.block_size >> 9;
} }
static inline size_t btree_sectors(const struct bch_fs *c)
{
return c->opts.btree_node_size >> 9;
}
static inline bool btree_id_cached(const struct bch_fs *c, enum btree_id btree) static inline bool btree_id_cached(const struct bch_fs *c, enum btree_id btree)
{ {
return c->btree_key_cache_btrees & (1U << btree); return c->btree_key_cache_btrees & (1U << btree);
......
...@@ -823,13 +823,12 @@ void bch2_bset_init_first(struct btree *b, struct bset *i) ...@@ -823,13 +823,12 @@ void bch2_bset_init_first(struct btree *b, struct bset *i)
set_btree_bset(b, t, i); set_btree_bset(b, t, i);
} }
void bch2_bset_init_next(struct bch_fs *c, struct btree *b, void bch2_bset_init_next(struct btree *b, struct btree_node_entry *bne)
struct btree_node_entry *bne)
{ {
struct bset *i = &bne->keys; struct bset *i = &bne->keys;
struct bset_tree *t; struct bset_tree *t;
BUG_ON(bset_byte_offset(b, bne) >= btree_bytes(c)); BUG_ON(bset_byte_offset(b, bne) >= btree_buf_bytes(b));
BUG_ON((void *) bne < (void *) btree_bkey_last(b, bset_tree_last(b))); BUG_ON((void *) bne < (void *) btree_bkey_last(b, bset_tree_last(b)));
BUG_ON(b->nsets >= MAX_BSETS); BUG_ON(b->nsets >= MAX_BSETS);
......
...@@ -264,8 +264,7 @@ static inline struct bset *bset_next_set(struct btree *b, ...@@ -264,8 +264,7 @@ static inline struct bset *bset_next_set(struct btree *b,
void bch2_btree_keys_init(struct btree *); void bch2_btree_keys_init(struct btree *);
void bch2_bset_init_first(struct btree *, struct bset *); void bch2_bset_init_first(struct btree *, struct bset *);
void bch2_bset_init_next(struct bch_fs *, struct btree *, void bch2_bset_init_next(struct btree *, struct btree_node_entry *);
struct btree_node_entry *);
void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool); void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool);
void bch2_bset_insert(struct btree *, struct btree_node_iter *, void bch2_bset_insert(struct btree *, struct btree_node_iter *,
......
...@@ -60,7 +60,7 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b) ...@@ -60,7 +60,7 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b)
clear_btree_node_just_written(b); clear_btree_node_just_written(b);
kvpfree(b->data, btree_bytes(c)); kvpfree(b->data, btree_buf_bytes(b));
b->data = NULL; b->data = NULL;
#ifdef __KERNEL__ #ifdef __KERNEL__
kvfree(b->aux_data); kvfree(b->aux_data);
...@@ -94,7 +94,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) ...@@ -94,7 +94,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
{ {
BUG_ON(b->data || b->aux_data); BUG_ON(b->data || b->aux_data);
b->data = kvpmalloc(btree_bytes(c), gfp); b->data = kvpmalloc(btree_buf_bytes(b), gfp);
if (!b->data) if (!b->data)
return -BCH_ERR_ENOMEM_btree_node_mem_alloc; return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
#ifdef __KERNEL__ #ifdef __KERNEL__
...@@ -107,7 +107,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) ...@@ -107,7 +107,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
b->aux_data = NULL; b->aux_data = NULL;
#endif #endif
if (!b->aux_data) { if (!b->aux_data) {
kvpfree(b->data, btree_bytes(c)); kvpfree(b->data, btree_buf_bytes(b));
b->data = NULL; b->data = NULL;
return -BCH_ERR_ENOMEM_btree_node_mem_alloc; return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
} }
...@@ -126,7 +126,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) ...@@ -126,7 +126,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
bkey_btree_ptr_init(&b->key); bkey_btree_ptr_init(&b->key);
INIT_LIST_HEAD(&b->list); INIT_LIST_HEAD(&b->list);
INIT_LIST_HEAD(&b->write_blocked); INIT_LIST_HEAD(&b->write_blocked);
b->byte_order = ilog2(btree_bytes(c)); b->byte_order = ilog2(c->opts.btree_node_size);
return b; return b;
} }
...@@ -408,7 +408,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) ...@@ -408,7 +408,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
if (c->verify_data) if (c->verify_data)
list_move(&c->verify_data->list, &bc->live); list_move(&c->verify_data->list, &bc->live);
kvpfree(c->verify_ondisk, btree_bytes(c)); kvpfree(c->verify_ondisk, c->opts.btree_node_size);
for (i = 0; i < btree_id_nr_alive(c); i++) { for (i = 0; i < btree_id_nr_alive(c); i++) {
struct btree_root *r = bch2_btree_id_root(c, i); struct btree_root *r = bch2_btree_id_root(c, i);
...@@ -1192,7 +1192,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struc ...@@ -1192,7 +1192,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struc
" failed unpacked %zu\n", " failed unpacked %zu\n",
b->unpack_fn_len, b->unpack_fn_len,
b->nr.live_u64s * sizeof(u64), b->nr.live_u64s * sizeof(u64),
btree_bytes(c) - sizeof(struct btree_node), btree_buf_bytes(b) - sizeof(struct btree_node),
b->nr.live_u64s * 100 / btree_max_u64s(c), b->nr.live_u64s * 100 / btree_max_u64s(c),
b->sib_u64s[0], b->sib_u64s[0],
b->sib_u64s[1], b->sib_u64s[1],
......
...@@ -74,22 +74,27 @@ static inline bool btree_node_hashed(struct btree *b) ...@@ -74,22 +74,27 @@ static inline bool btree_node_hashed(struct btree *b)
_iter = 0; _iter < (_tbl)->size; _iter++) \ _iter = 0; _iter < (_tbl)->size; _iter++) \
rht_for_each_entry_rcu((_b), (_pos), _tbl, _iter, hash) rht_for_each_entry_rcu((_b), (_pos), _tbl, _iter, hash)
static inline size_t btree_bytes(struct bch_fs *c) static inline size_t btree_buf_bytes(const struct btree *b)
{ {
return c->opts.btree_node_size; return 1UL << b->byte_order;
} }
static inline size_t btree_max_u64s(struct bch_fs *c) static inline size_t btree_buf_max_u64s(const struct btree *b)
{ {
return (btree_bytes(c) - sizeof(struct btree_node)) / sizeof(u64); return (btree_buf_bytes(b) - sizeof(struct btree_node)) / sizeof(u64);
} }
static inline size_t btree_pages(struct bch_fs *c) static inline size_t btree_max_u64s(const struct bch_fs *c)
{ {
return btree_bytes(c) / PAGE_SIZE; return (c->opts.btree_node_size - sizeof(struct btree_node)) / sizeof(u64);
} }
static inline unsigned btree_blocks(struct bch_fs *c) static inline size_t btree_sectors(const struct bch_fs *c)
{
return c->opts.btree_node_size >> SECTOR_SHIFT;
}
static inline unsigned btree_blocks(const struct bch_fs *c)
{ {
return btree_sectors(c) >> c->block_bits; return btree_sectors(c) >> c->block_bits;
} }
......
...@@ -112,7 +112,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size, ...@@ -112,7 +112,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
unsigned flags = memalloc_nofs_save(); unsigned flags = memalloc_nofs_save();
void *p; void *p;
BUG_ON(size > btree_bytes(c)); BUG_ON(size > c->opts.btree_node_size);
*used_mempool = false; *used_mempool = false;
p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT); p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
...@@ -174,8 +174,8 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) ...@@ -174,8 +174,8 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
ptrs = ptrs_end = ((void *) new_whiteouts + bytes); ptrs = ptrs_end = ((void *) new_whiteouts + bytes);
for (k = unwritten_whiteouts_start(c, b); for (k = unwritten_whiteouts_start(b);
k != unwritten_whiteouts_end(c, b); k != unwritten_whiteouts_end(b);
k = bkey_p_next(k)) k = bkey_p_next(k))
*--ptrs = k; *--ptrs = k;
...@@ -192,7 +192,7 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) ...@@ -192,7 +192,7 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
verify_no_dups(b, new_whiteouts, verify_no_dups(b, new_whiteouts,
(void *) ((u64 *) new_whiteouts + b->whiteout_u64s)); (void *) ((u64 *) new_whiteouts + b->whiteout_u64s));
memcpy_u64s(unwritten_whiteouts_start(c, b), memcpy_u64s(unwritten_whiteouts_start(b),
new_whiteouts, b->whiteout_u64s); new_whiteouts, b->whiteout_u64s);
btree_bounce_free(c, bytes, used_mempool, new_whiteouts); btree_bounce_free(c, bytes, used_mempool, new_whiteouts);
...@@ -313,7 +313,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, ...@@ -313,7 +313,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
} }
bytes = sorting_entire_node bytes = sorting_entire_node
? btree_bytes(c) ? btree_buf_bytes(b)
: __vstruct_bytes(struct btree_node, u64s); : __vstruct_bytes(struct btree_node, u64s);
out = btree_bounce_alloc(c, bytes, &used_mempool); out = btree_bounce_alloc(c, bytes, &used_mempool);
...@@ -338,7 +338,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, ...@@ -338,7 +338,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
if (sorting_entire_node) { if (sorting_entire_node) {
u64s = le16_to_cpu(out->keys.u64s); u64s = le16_to_cpu(out->keys.u64s);
BUG_ON(bytes != btree_bytes(c)); BUG_ON(bytes != btree_buf_bytes(b));
/* /*
* Our temporary buffer is the same size as the btree node's * Our temporary buffer is the same size as the btree node's
...@@ -502,7 +502,7 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) ...@@ -502,7 +502,7 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
bne = want_new_bset(c, b); bne = want_new_bset(c, b);
if (bne) if (bne)
bch2_bset_init_next(c, b, bne); bch2_bset_init_next(b, bne);
bch2_btree_build_aux_trees(b); bch2_btree_build_aux_trees(b);
...@@ -1160,7 +1160,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ...@@ -1160,7 +1160,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
ptr_written, b->written); ptr_written, b->written);
} else { } else {
for (bne = write_block(b); for (bne = write_block(b);
bset_byte_offset(b, bne) < btree_bytes(c); bset_byte_offset(b, bne) < btree_buf_bytes(b);
bne = (void *) bne + block_bytes(c)) bne = (void *) bne + block_bytes(c))
btree_err_on(bne->keys.seq == b->data->keys.seq && btree_err_on(bne->keys.seq == b->data->keys.seq &&
!bch2_journal_seq_is_blacklisted(c, !bch2_journal_seq_is_blacklisted(c,
...@@ -1172,7 +1172,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ...@@ -1172,7 +1172,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
"found bset signature after last bset"); "found bset signature after last bset");
} }
sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool); sorted = btree_bounce_alloc(c, btree_buf_bytes(b), &used_mempool);
sorted->keys.u64s = 0; sorted->keys.u64s = 0;
set_btree_bset(b, b->set, &b->data->keys); set_btree_bset(b, b->set, &b->data->keys);
...@@ -1188,7 +1188,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ...@@ -1188,7 +1188,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
BUG_ON(b->nr.live_u64s != u64s); BUG_ON(b->nr.live_u64s != u64s);
btree_bounce_free(c, btree_bytes(c), used_mempool, sorted); btree_bounce_free(c, btree_buf_bytes(b), used_mempool, sorted);
if (updated_range) if (updated_range)
bch2_btree_node_drop_keys_outside_node(b); bch2_btree_node_drop_keys_outside_node(b);
...@@ -1284,7 +1284,7 @@ static void btree_node_read_work(struct work_struct *work) ...@@ -1284,7 +1284,7 @@ static void btree_node_read_work(struct work_struct *work)
rb->have_ioref = bch2_dev_get_ioref(ca, READ); rb->have_ioref = bch2_dev_get_ioref(ca, READ);
bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META); bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META);
bio->bi_iter.bi_sector = rb->pick.ptr.offset; bio->bi_iter.bi_sector = rb->pick.ptr.offset;
bio->bi_iter.bi_size = btree_bytes(c); bio->bi_iter.bi_size = btree_buf_bytes(b);
if (rb->have_ioref) { if (rb->have_ioref) {
bio_set_dev(bio, ca->disk_sb.bdev); bio_set_dev(bio, ca->disk_sb.bdev);
...@@ -1512,7 +1512,7 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) ...@@ -1512,7 +1512,7 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done)
} }
if (best >= 0) { if (best >= 0) {
memcpy(b->data, ra->buf[best], btree_bytes(c)); memcpy(b->data, ra->buf[best], btree_buf_bytes(b));
ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error); ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error);
} else { } else {
ret = -1; ret = -1;
...@@ -1578,7 +1578,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool ...@@ -1578,7 +1578,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
for (i = 0; i < ra->nr; i++) { for (i = 0; i < ra->nr; i++) {
ra->buf[i] = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS); ra->buf[i] = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS);
ra->bio[i] = bio_alloc_bioset(NULL, ra->bio[i] = bio_alloc_bioset(NULL,
buf_pages(ra->buf[i], btree_bytes(c)), buf_pages(ra->buf[i], btree_buf_bytes(b)),
REQ_OP_READ|REQ_SYNC|REQ_META, REQ_OP_READ|REQ_SYNC|REQ_META,
GFP_NOFS, GFP_NOFS,
&c->btree_bio); &c->btree_bio);
...@@ -1598,7 +1598,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool ...@@ -1598,7 +1598,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
rb->pick = pick; rb->pick = pick;
rb->bio.bi_iter.bi_sector = pick.ptr.offset; rb->bio.bi_iter.bi_sector = pick.ptr.offset;
rb->bio.bi_end_io = btree_node_read_all_replicas_endio; rb->bio.bi_end_io = btree_node_read_all_replicas_endio;
bch2_bio_map(&rb->bio, ra->buf[i], btree_bytes(c)); bch2_bio_map(&rb->bio, ra->buf[i], btree_buf_bytes(b));
if (rb->have_ioref) { if (rb->have_ioref) {
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree], this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
...@@ -1665,7 +1665,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, ...@@ -1665,7 +1665,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
ca = bch_dev_bkey_exists(c, pick.ptr.dev); ca = bch_dev_bkey_exists(c, pick.ptr.dev);
bio = bio_alloc_bioset(NULL, bio = bio_alloc_bioset(NULL,
buf_pages(b->data, btree_bytes(c)), buf_pages(b->data, btree_buf_bytes(b)),
REQ_OP_READ|REQ_SYNC|REQ_META, REQ_OP_READ|REQ_SYNC|REQ_META,
GFP_NOFS, GFP_NOFS,
&c->btree_bio); &c->btree_bio);
...@@ -1679,7 +1679,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, ...@@ -1679,7 +1679,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
INIT_WORK(&rb->work, btree_node_read_work); INIT_WORK(&rb->work, btree_node_read_work);
bio->bi_iter.bi_sector = pick.ptr.offset; bio->bi_iter.bi_sector = pick.ptr.offset;
bio->bi_end_io = btree_node_read_endio; bio->bi_end_io = btree_node_read_endio;
bch2_bio_map(bio, b->data, btree_bytes(c)); bch2_bio_map(bio, b->data, btree_buf_bytes(b));
if (rb->have_ioref) { if (rb->have_ioref) {
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree], this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
...@@ -2074,8 +2074,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) ...@@ -2074,8 +2074,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
i->u64s = 0; i->u64s = 0;
sort_iter_add(&sort_iter.iter, sort_iter_add(&sort_iter.iter,
unwritten_whiteouts_start(c, b), unwritten_whiteouts_start(b),
unwritten_whiteouts_end(c, b)); unwritten_whiteouts_end(b));
SET_BSET_SEPARATE_WHITEOUTS(i, false); SET_BSET_SEPARATE_WHITEOUTS(i, false);
b->whiteout_u64s = 0; b->whiteout_u64s = 0;
...@@ -2251,7 +2251,7 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b) ...@@ -2251,7 +2251,7 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
bne = want_new_bset(c, b); bne = want_new_bset(c, b);
if (bne) if (bne)
bch2_bset_init_next(c, b, bne); bch2_bset_init_next(b, bne);
bch2_btree_build_aux_trees(b); bch2_btree_build_aux_trees(b);
......
...@@ -139,8 +139,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans, ...@@ -139,8 +139,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans,
EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k)); EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
EBUG_ON(bpos_lt(insert->k.p, b->data->min_key)); EBUG_ON(bpos_lt(insert->k.p, b->data->min_key));
EBUG_ON(bpos_gt(insert->k.p, b->data->max_key)); EBUG_ON(bpos_gt(insert->k.p, b->data->max_key));
EBUG_ON(insert->k.u64s > EBUG_ON(insert->k.u64s > bch2_btree_keys_u64s_remaining(b));
bch_btree_keys_u64s_remaining(trans->c, b));
EBUG_ON(!b->c.level && !bpos_eq(insert->k.p, path->pos)); EBUG_ON(!b->c.level && !bpos_eq(insert->k.p, path->pos));
k = bch2_btree_node_iter_peek_all(node_iter, b); k = bch2_btree_node_iter_peek_all(node_iter, b);
...@@ -160,7 +159,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans, ...@@ -160,7 +159,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans,
k->type = KEY_TYPE_deleted; k->type = KEY_TYPE_deleted;
if (k->needs_whiteout) if (k->needs_whiteout)
push_whiteout(trans->c, b, insert->k.p); push_whiteout(b, insert->k.p);
k->needs_whiteout = false; k->needs_whiteout = false;
if (k >= btree_bset_last(b)->start) { if (k >= btree_bset_last(b)->start) {
...@@ -348,9 +347,7 @@ static noinline void journal_transaction_name(struct btree_trans *trans) ...@@ -348,9 +347,7 @@ static noinline void journal_transaction_name(struct btree_trans *trans)
static inline int btree_key_can_insert(struct btree_trans *trans, static inline int btree_key_can_insert(struct btree_trans *trans,
struct btree *b, unsigned u64s) struct btree *b, unsigned u64s)
{ {
struct bch_fs *c = trans->c; if (!bch2_btree_node_insert_fits(b, u64s))
if (!bch2_btree_node_insert_fits(c, b, u64s))
return -BCH_ERR_btree_insert_btree_node_full; return -BCH_ERR_btree_insert_btree_node_full;
return 0; return 0;
......
...@@ -159,7 +159,7 @@ static bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b, ...@@ -159,7 +159,7 @@ static bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
{ {
size_t u64s = btree_node_u64s_with_format(nr, &b->format, new_f); size_t u64s = btree_node_u64s_with_format(nr, &b->format, new_f);
return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c); return __vstruct_bytes(struct btree_node, u64s) < btree_buf_bytes(b);
} }
/* Btree node freeing/allocation: */ /* Btree node freeing/allocation: */
...@@ -1097,7 +1097,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, ...@@ -1097,7 +1097,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
* Always check for space for two keys, even if we won't have to * Always check for space for two keys, even if we won't have to
* split at prior level - it might have been a merge instead: * split at prior level - it might have been a merge instead:
*/ */
if (bch2_btree_node_insert_fits(c, path->l[update_level].b, if (bch2_btree_node_insert_fits(path->l[update_level].b,
BKEY_BTREE_PTR_U64s_MAX * 2)) BKEY_BTREE_PTR_U64s_MAX * 2))
break; break;
...@@ -1401,7 +1401,7 @@ static void __btree_split_node(struct btree_update *as, ...@@ -1401,7 +1401,7 @@ static void __btree_split_node(struct btree_update *as,
unsigned u64s = nr_keys[i].nr_keys * n[i]->data->format.key_u64s + unsigned u64s = nr_keys[i].nr_keys * n[i]->data->format.key_u64s +
nr_keys[i].val_u64s; nr_keys[i].val_u64s;
if (__vstruct_bytes(struct btree_node, u64s) > btree_bytes(as->c)) if (__vstruct_bytes(struct btree_node, u64s) > btree_buf_bytes(b))
n[i]->data->format = b->format; n[i]->data->format = b->format;
btree_node_set_format(n[i], n[i]->data->format); btree_node_set_format(n[i], n[i]->data->format);
...@@ -1703,7 +1703,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t ...@@ -1703,7 +1703,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
bch2_btree_node_prep_for_write(trans, path, b); bch2_btree_node_prep_for_write(trans, path, b);
if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) { if (!bch2_btree_node_insert_fits(b, bch2_keylist_u64s(keys))) {
bch2_btree_node_unlock_write(trans, path, b); bch2_btree_node_unlock_write(trans, path, b);
goto split; goto split;
} }
......
...@@ -184,21 +184,19 @@ static inline void btree_node_reset_sib_u64s(struct btree *b) ...@@ -184,21 +184,19 @@ static inline void btree_node_reset_sib_u64s(struct btree *b)
b->sib_u64s[1] = b->nr.live_u64s; b->sib_u64s[1] = b->nr.live_u64s;
} }
static inline void *btree_data_end(struct bch_fs *c, struct btree *b) static inline void *btree_data_end(struct btree *b)
{ {
return (void *) b->data + btree_bytes(c); return (void *) b->data + btree_buf_bytes(b);
} }
static inline struct bkey_packed *unwritten_whiteouts_start(struct bch_fs *c, static inline struct bkey_packed *unwritten_whiteouts_start(struct btree *b)
struct btree *b)
{ {
return (void *) ((u64 *) btree_data_end(c, b) - b->whiteout_u64s); return (void *) ((u64 *) btree_data_end(b) - b->whiteout_u64s);
} }
static inline struct bkey_packed *unwritten_whiteouts_end(struct bch_fs *c, static inline struct bkey_packed *unwritten_whiteouts_end(struct btree *b)
struct btree *b)
{ {
return btree_data_end(c, b); return btree_data_end(b);
} }
static inline void *write_block(struct btree *b) static inline void *write_block(struct btree *b)
...@@ -221,13 +219,11 @@ static inline bool bkey_written(struct btree *b, struct bkey_packed *k) ...@@ -221,13 +219,11 @@ static inline bool bkey_written(struct btree *b, struct bkey_packed *k)
return __btree_addr_written(b, k); return __btree_addr_written(b, k);
} }
static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c, static inline ssize_t __bch2_btree_u64s_remaining(struct btree *b, void *end)
struct btree *b,
void *end)
{ {
ssize_t used = bset_byte_offset(b, end) / sizeof(u64) + ssize_t used = bset_byte_offset(b, end) / sizeof(u64) +
b->whiteout_u64s; b->whiteout_u64s;
ssize_t total = c->opts.btree_node_size >> 3; ssize_t total = btree_buf_bytes(b) >> 3;
/* Always leave one extra u64 for bch2_varint_decode: */ /* Always leave one extra u64 for bch2_varint_decode: */
used++; used++;
...@@ -235,10 +231,9 @@ static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c, ...@@ -235,10 +231,9 @@ static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c,
return total - used; return total - used;
} }
static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c, static inline size_t bch2_btree_keys_u64s_remaining(struct btree *b)
struct btree *b)
{ {
ssize_t remaining = __bch_btree_u64s_remaining(c, b, ssize_t remaining = __bch2_btree_u64s_remaining(b,
btree_bkey_last(b, bset_tree_last(b))); btree_bkey_last(b, bset_tree_last(b)));
BUG_ON(remaining < 0); BUG_ON(remaining < 0);
...@@ -260,14 +255,13 @@ static inline unsigned btree_write_set_buffer(struct btree *b) ...@@ -260,14 +255,13 @@ static inline unsigned btree_write_set_buffer(struct btree *b)
return 8 << BTREE_WRITE_SET_U64s_BITS; return 8 << BTREE_WRITE_SET_U64s_BITS;
} }
static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, struct btree *b)
struct btree *b)
{ {
struct bset_tree *t = bset_tree_last(b); struct bset_tree *t = bset_tree_last(b);
struct btree_node_entry *bne = max(write_block(b), struct btree_node_entry *bne = max(write_block(b),
(void *) btree_bkey_last(b, bset_tree_last(b))); (void *) btree_bkey_last(b, bset_tree_last(b)));
ssize_t remaining_space = ssize_t remaining_space =
__bch_btree_u64s_remaining(c, b, bne->keys.start); __bch2_btree_u64s_remaining(b, bne->keys.start);
if (unlikely(bset_written(b, bset(b, t)))) { if (unlikely(bset_written(b, bset(b, t)))) {
if (remaining_space > (ssize_t) (block_bytes(c) >> 3)) if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
...@@ -281,12 +275,11 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, ...@@ -281,12 +275,11 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
return NULL; return NULL;
} }
static inline void push_whiteout(struct bch_fs *c, struct btree *b, static inline void push_whiteout(struct btree *b, struct bpos pos)
struct bpos pos)
{ {
struct bkey_packed k; struct bkey_packed k;
BUG_ON(bch_btree_keys_u64s_remaining(c, b) < BKEY_U64s); BUG_ON(bch2_btree_keys_u64s_remaining(b) < BKEY_U64s);
EBUG_ON(btree_node_just_written(b)); EBUG_ON(btree_node_just_written(b));
if (!bkey_pack_pos(&k, pos, b)) { if (!bkey_pack_pos(&k, pos, b)) {
...@@ -299,20 +292,19 @@ static inline void push_whiteout(struct bch_fs *c, struct btree *b, ...@@ -299,20 +292,19 @@ static inline void push_whiteout(struct bch_fs *c, struct btree *b,
k.needs_whiteout = true; k.needs_whiteout = true;
b->whiteout_u64s += k.u64s; b->whiteout_u64s += k.u64s;
bkey_p_copy(unwritten_whiteouts_start(c, b), &k); bkey_p_copy(unwritten_whiteouts_start(b), &k);
} }
/* /*
* write lock must be held on @b (else the dirty bset that we were going to * write lock must be held on @b (else the dirty bset that we were going to
* insert into could be written out from under us) * insert into could be written out from under us)
*/ */
static inline bool bch2_btree_node_insert_fits(struct bch_fs *c, static inline bool bch2_btree_node_insert_fits(struct btree *b, unsigned u64s)
struct btree *b, unsigned u64s)
{ {
if (unlikely(btree_node_need_rewrite(b))) if (unlikely(btree_node_need_rewrite(b)))
return false; return false;
return u64s <= bch_btree_keys_u64s_remaining(c, b); return u64s <= bch2_btree_keys_u64s_remaining(b);
} }
void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *); void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *);
......
...@@ -125,13 +125,12 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite ...@@ -125,13 +125,12 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
struct btree_write_buffered_key *wb, struct btree_write_buffered_key *wb,
bool *write_locked, size_t *fast) bool *write_locked, size_t *fast)
{ {
struct bch_fs *c = trans->c;
struct btree_path *path; struct btree_path *path;
int ret; int ret;
EBUG_ON(!wb->journal_seq); EBUG_ON(!wb->journal_seq);
EBUG_ON(!c->btree_write_buffer.flushing.pin.seq); EBUG_ON(!trans->c->btree_write_buffer.flushing.pin.seq);
EBUG_ON(c->btree_write_buffer.flushing.pin.seq > wb->journal_seq); EBUG_ON(trans->c->btree_write_buffer.flushing.pin.seq > wb->journal_seq);
ret = bch2_btree_iter_traverse(iter); ret = bch2_btree_iter_traverse(iter);
if (ret) if (ret)
...@@ -155,7 +154,7 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite ...@@ -155,7 +154,7 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
*write_locked = true; *write_locked = true;
} }
if (unlikely(!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s))) { if (unlikely(!bch2_btree_node_insert_fits(path->l[0].b, wb->k.k.u64s))) {
*write_locked = false; *write_locked = false;
return wb_flush_one_slowpath(trans, iter, wb); return wb_flush_one_slowpath(trans, iter, wb);
} }
......
...@@ -44,19 +44,19 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, ...@@ -44,19 +44,19 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
return false; return false;
bio = bio_alloc_bioset(ca->disk_sb.bdev, bio = bio_alloc_bioset(ca->disk_sb.bdev,
buf_pages(n_sorted, btree_bytes(c)), buf_pages(n_sorted, btree_buf_bytes(b)),
REQ_OP_READ|REQ_META, REQ_OP_READ|REQ_META,
GFP_NOFS, GFP_NOFS,
&c->btree_bio); &c->btree_bio);
bio->bi_iter.bi_sector = pick.ptr.offset; bio->bi_iter.bi_sector = pick.ptr.offset;
bch2_bio_map(bio, n_sorted, btree_bytes(c)); bch2_bio_map(bio, n_sorted, btree_buf_bytes(b));
submit_bio_wait(bio); submit_bio_wait(bio);
bio_put(bio); bio_put(bio);
percpu_ref_put(&ca->io_ref); percpu_ref_put(&ca->io_ref);
memcpy(n_ondisk, n_sorted, btree_bytes(c)); memcpy(n_ondisk, n_sorted, btree_buf_bytes(b));
v->written = 0; v->written = 0;
if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error) if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error)
...@@ -137,7 +137,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) ...@@ -137,7 +137,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
mutex_lock(&c->verify_lock); mutex_lock(&c->verify_lock);
if (!c->verify_ondisk) { if (!c->verify_ondisk) {
c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL); c->verify_ondisk = kvpmalloc(btree_buf_bytes(b), GFP_KERNEL);
if (!c->verify_ondisk) if (!c->verify_ondisk)
goto out; goto out;
} }
...@@ -199,19 +199,19 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, ...@@ -199,19 +199,19 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
return; return;
} }
n_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL); n_ondisk = kvpmalloc(btree_buf_bytes(b), GFP_KERNEL);
if (!n_ondisk) { if (!n_ondisk) {
prt_printf(out, "memory allocation failure\n"); prt_printf(out, "memory allocation failure\n");
goto out; goto out;
} }
bio = bio_alloc_bioset(ca->disk_sb.bdev, bio = bio_alloc_bioset(ca->disk_sb.bdev,
buf_pages(n_ondisk, btree_bytes(c)), buf_pages(n_ondisk, btree_buf_bytes(b)),
REQ_OP_READ|REQ_META, REQ_OP_READ|REQ_META,
GFP_NOFS, GFP_NOFS,
&c->btree_bio); &c->btree_bio);
bio->bi_iter.bi_sector = pick.ptr.offset; bio->bi_iter.bi_sector = pick.ptr.offset;
bch2_bio_map(bio, n_ondisk, btree_bytes(c)); bch2_bio_map(bio, n_ondisk, btree_buf_bytes(b));
ret = submit_bio_wait(bio); ret = submit_bio_wait(bio);
if (ret) { if (ret) {
...@@ -293,7 +293,7 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, ...@@ -293,7 +293,7 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
out: out:
if (bio) if (bio)
bio_put(bio); bio_put(bio);
kvpfree(n_ondisk, btree_bytes(c)); kvpfree(n_ondisk, btree_buf_bytes(b));
percpu_ref_put(&ca->io_ref); percpu_ref_put(&ca->io_ref);
} }
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "bcachefs.h" #include "bcachefs.h"
#include "bkey_methods.h" #include "bkey_methods.h"
#include "btree_cache.h"
#include "btree_gc.h" #include "btree_gc.h"
#include "btree_io.h" #include "btree_io.h"
#include "btree_iter.h" #include "btree_iter.h"
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include "backpointers.h" #include "backpointers.h"
#include "bkey_buf.h" #include "bkey_buf.h"
#include "btree_gc.h" #include "btree_gc.h"
#include "btree_io.h"
#include "btree_update.h" #include "btree_update.h"
#include "btree_update_interior.h" #include "btree_update_interior.h"
#include "btree_write_buffer.h" #include "btree_write_buffer.h"
...@@ -804,6 +805,8 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, ...@@ -804,6 +805,8 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
if (!b) if (!b)
goto next; goto next;
unsigned sectors = btree_ptr_sectors_written(&b->key);
ret = bch2_btree_node_rewrite(trans, &iter, b, 0); ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
...@@ -813,11 +816,10 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, ...@@ -813,11 +816,10 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
goto err; goto err;
if (ctxt->rate) if (ctxt->rate)
bch2_ratelimit_increment(ctxt->rate, bch2_ratelimit_increment(ctxt->rate, sectors);
c->opts.btree_node_size >> 9);
if (ctxt->stats) { if (ctxt->stats) {
atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_seen); atomic64_add(sectors, &ctxt->stats->sectors_seen);
atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_moved); atomic64_add(sectors, &ctxt->stats->sectors_moved);
} }
} }
next: next:
......
...@@ -883,7 +883,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) ...@@ -883,7 +883,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
!(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) || !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
!(c->online_reserved = alloc_percpu(u64)) || !(c->online_reserved = alloc_percpu(u64)) ||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
btree_bytes(c)) || c->opts.btree_node_size) ||
mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) || mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
!(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits, !(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits,
sizeof(u64), GFP_KERNEL))) { sizeof(u64), GFP_KERNEL))) {
......
...@@ -248,7 +248,7 @@ static size_t bch2_btree_cache_size(struct bch_fs *c) ...@@ -248,7 +248,7 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
mutex_lock(&c->btree_cache.lock); mutex_lock(&c->btree_cache.lock);
list_for_each_entry(b, &c->btree_cache.live, list) list_for_each_entry(b, &c->btree_cache.live, list)
ret += btree_bytes(c); ret += btree_buf_bytes(b);
mutex_unlock(&c->btree_cache.lock); mutex_unlock(&c->btree_cache.lock);
return ret; return ret;
......
...@@ -1013,7 +1013,7 @@ TRACE_EVENT(trans_restart_split_race, ...@@ -1013,7 +1013,7 @@ TRACE_EVENT(trans_restart_split_race,
__entry->level = b->c.level; __entry->level = b->c.level;
__entry->written = b->written; __entry->written = b->written;
__entry->blocks = btree_blocks(trans->c); __entry->blocks = btree_blocks(trans->c);
__entry->u64s_remaining = bch_btree_keys_u64s_remaining(trans->c, b); __entry->u64s_remaining = bch2_btree_keys_u64s_remaining(b);
), ),
TP_printk("%s %pS l=%u written %u/%u u64s remaining %u", TP_printk("%s %pS l=%u written %u/%u u64s remaining %u",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment