Commit 74d8fc2b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'bcachefs-2023-12-19' of https://evilpiepirate.org/git/bcachefs

Pull more bcachefs fixes from Kent Overstreet:

 - Fix a deadlock in the data move path with nocow locks (vs. update in
   place writes); when trylock failed we were incorrectly waiting for in
   flight ios to flush.

 - Fix reporting of NFS file handle length

 - Fix early error path in bch2_fs_alloc() - list head wasn't being
   initialized early enough

 - Make sure correct (hardware accelerated) crc modules get loaded

 - Fix a rare overflow in the btree split path, when the packed bkey
   format grows and all the keys have no value (LRU btree).

 - Fix error handling in the sector allocator

   This was causing writes to spuriously fail in multidevice setups, and
   another bug meant that the errors weren't being logged, only reported
   via fsync.

* tag 'bcachefs-2023-12-19' of https://evilpiepirate.org/git/bcachefs:
  bcachefs: Fix bch2_alloc_sectors_start_trans() error handling
  bcachefs; guard against overflow in btree node split
  bcachefs: btree_node_u64s_with_format() takes nr keys
  bcachefs: print explicit recovery pass message only once
  bcachefs: improve modprobe support by providing softdeps
  bcachefs: fix invalid memory access in bch2_fs_alloc() error path
  bcachefs: Fix determining required file handle length
  bcachefs: Fix nocow locks deadlock
parents ac1c13e2 247ce5f1
......@@ -1374,8 +1374,17 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
goto alloc_done;
/* Don't retry from all devices if we're out of open buckets: */
if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
goto allocate_blocking;
if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) {
int ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code,
nr_replicas, &nr_effective,
&have_cache, watermark,
flags, cl);
if (!ret ||
bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
goto alloc_done;
}
/*
* Only try to allocate cache (durability = 0 devices) from the
......@@ -1389,7 +1398,6 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
&have_cache, watermark,
flags, cl);
} else {
allocate_blocking:
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code,
nr_replicas, &nr_effective,
......
......@@ -3214,10 +3214,9 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
mempool_exit(&c->btree_trans_pool);
}
int bch2_fs_btree_iter_init(struct bch_fs *c)
void bch2_fs_btree_iter_init_early(struct bch_fs *c)
{
struct btree_transaction_stats *s;
int ret;
for (s = c->btree_transaction_stats;
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
......@@ -3228,6 +3227,11 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
INIT_LIST_HEAD(&c->btree_trans_list);
seqmutex_init(&c->btree_trans_lock);
}
int bch2_fs_btree_iter_init(struct bch_fs *c)
{
int ret;
c->btree_trans_bufs = alloc_percpu(struct btree_trans_buf);
if (!c->btree_trans_bufs)
......
......@@ -938,6 +938,7 @@ unsigned bch2_trans_get_fn_idx(const char *);
void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *);
void bch2_fs_btree_iter_exit(struct bch_fs *);
void bch2_fs_btree_iter_init_early(struct bch_fs *);
int bch2_fs_btree_iter_init(struct bch_fs *);
#endif /* _BCACHEFS_BTREE_ITER_H */
......@@ -99,7 +99,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
/* Calculate ideal packed bkey format for new btree nodes: */
void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b)
static void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b)
{
struct bkey_packed *k;
struct bset_tree *t;
......@@ -125,21 +125,20 @@ static struct bkey_format bch2_btree_calc_format(struct btree *b)
return bch2_bkey_format_done(&s);
}
static size_t btree_node_u64s_with_format(struct btree *b,
static size_t btree_node_u64s_with_format(struct btree_nr_keys nr,
struct bkey_format *old_f,
struct bkey_format *new_f)
{
struct bkey_format *old_f = &b->format;
/* stupid integer promotion rules */
ssize_t delta =
(((int) new_f->key_u64s - old_f->key_u64s) *
(int) b->nr.packed_keys) +
(int) nr.packed_keys) +
(((int) new_f->key_u64s - BKEY_U64s) *
(int) b->nr.unpacked_keys);
(int) nr.unpacked_keys);
BUG_ON(delta + b->nr.live_u64s < 0);
BUG_ON(delta + nr.live_u64s < 0);
return b->nr.live_u64s + delta;
return nr.live_u64s + delta;
}
/**
......@@ -147,16 +146,18 @@ static size_t btree_node_u64s_with_format(struct btree *b,
*
* @c: filesystem handle
* @b: btree node to rewrite
* @nr: number of keys for new node (i.e. b->nr)
* @new_f: bkey format to translate keys to
*
* Returns: true if all re-packed keys will be able to fit in a new node.
*
* Assumes all keys will successfully pack with the new format.
*/
bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
static bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
struct btree_nr_keys nr,
struct bkey_format *new_f)
{
size_t u64s = btree_node_u64s_with_format(b, new_f);
size_t u64s = btree_node_u64s_with_format(nr, &b->format, new_f);
return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c);
}
......@@ -391,7 +392,7 @@ static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
* The keys might expand with the new format - if they wouldn't fit in
* the btree node anymore, use the old format for now:
*/
if (!bch2_btree_node_format_fits(as->c, b, &format))
if (!bch2_btree_node_format_fits(as->c, b, b->nr, &format))
format = b->format;
SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
......@@ -1345,8 +1346,11 @@ static void __btree_split_node(struct btree_update *as,
struct bkey_packed *out[2];
struct bkey uk;
unsigned u64s, n1_u64s = (b->nr.live_u64s * 3) / 5;
struct { unsigned nr_keys, val_u64s; } nr_keys[2];
int i;
memset(&nr_keys, 0, sizeof(nr_keys));
for (i = 0; i < 2; i++) {
BUG_ON(n[i]->nsets != 1);
......@@ -1368,6 +1372,9 @@ static void __btree_split_node(struct btree_update *as,
if (!i)
n1_pos = uk.p;
bch2_bkey_format_add_key(&format[i], &uk);
nr_keys[i].nr_keys++;
nr_keys[i].val_u64s += bkeyp_val_u64s(&b->format, k);
}
btree_set_min(n[0], b->data->min_key);
......@@ -1380,6 +1387,12 @@ static void __btree_split_node(struct btree_update *as,
bch2_bkey_format_add_pos(&format[i], n[i]->data->max_key);
n[i]->data->format = bch2_bkey_format_done(&format[i]);
unsigned u64s = nr_keys[i].nr_keys * n[i]->data->format.key_u64s +
nr_keys[i].val_u64s;
if (__vstruct_bytes(struct btree_node, u64s) > btree_bytes(as->c))
n[i]->data->format = b->format;
btree_node_set_format(n[i], n[i]->data->format);
}
......@@ -1822,8 +1835,8 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
bch2_bkey_format_add_pos(&new_s, next->data->max_key);
new_f = bch2_bkey_format_done(&new_s);
sib_u64s = btree_node_u64s_with_format(b, &new_f) +
btree_node_u64s_with_format(m, &new_f);
sib_u64s = btree_node_u64s_with_format(b->nr, &b->format, &new_f) +
btree_node_u64s_with_format(m->nr, &m->format, &new_f);
if (sib_u64s > BTREE_FOREGROUND_MERGE_HYSTERESIS(c)) {
sib_u64s -= BTREE_FOREGROUND_MERGE_HYSTERESIS(c);
......
......@@ -6,10 +6,6 @@
#include "btree_locking.h"
#include "btree_update.h"
void __bch2_btree_calc_format(struct bkey_format_state *, struct btree *);
bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *,
struct bkey_format *);
#define BTREE_UPDATE_NODES_MAX ((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES)
#define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
......
......@@ -560,7 +560,8 @@ int bch2_data_update_init(struct btree_trans *trans,
move_ctxt_wait_event(ctxt,
(locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
PTR_BUCKET_POS(c, &p.ptr), 0)) ||
!atomic_read(&ctxt->read_sectors));
(!atomic_read(&ctxt->read_sectors) &&
!atomic_read(&ctxt->write_sectors)));
if (!locked)
bch2_bucket_nocow_lock(&c->nocow_locks,
......
......@@ -1143,24 +1143,33 @@ static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len,
{
struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_inode_info *dir = to_bch_ei(vdir);
if (*len < sizeof(struct bcachefs_fid_with_parent) / sizeof(u32))
return FILEID_INVALID;
int min_len;
if (!S_ISDIR(inode->v.i_mode) && dir) {
struct bcachefs_fid_with_parent *fid = (void *) fh;
min_len = sizeof(*fid) / sizeof(u32);
if (*len < min_len) {
*len = min_len;
return FILEID_INVALID;
}
fid->fid = bch2_inode_to_fid(inode);
fid->dir = bch2_inode_to_fid(dir);
*len = sizeof(*fid) / sizeof(u32);
*len = min_len;
return FILEID_BCACHEFS_WITH_PARENT;
} else {
struct bcachefs_fid *fid = (void *) fh;
min_len = sizeof(*fid) / sizeof(u32);
if (*len < min_len) {
*len = min_len;
return FILEID_INVALID;
}
*fid = bch2_inode_to_fid(inode);
*len = sizeof(*fid) / sizeof(u32);
*len = min_len;
return FILEID_BCACHEFS_WITHOUT_PARENT;
}
}
......
......@@ -10,6 +10,9 @@ extern const char * const bch2_recovery_passes[];
static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c,
enum bch_recovery_pass pass)
{
if (c->recovery_passes_explicit & BIT_ULL(pass))
return 0;
bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
bch2_recovery_passes[pass], pass,
bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
......
......@@ -72,6 +72,12 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
MODULE_DESCRIPTION("bcachefs filesystem");
MODULE_SOFTDEP("pre: crc32c");
MODULE_SOFTDEP("pre: crc64");
MODULE_SOFTDEP("pre: sha256");
MODULE_SOFTDEP("pre: chacha20");
MODULE_SOFTDEP("pre: poly1305");
MODULE_SOFTDEP("pre: xxhash");
#define KTYPE(type) \
static const struct attribute_group type ## _group = { \
......@@ -714,6 +720,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_fs_copygc_init(c);
bch2_fs_btree_key_cache_init_early(&c->btree_key_cache);
bch2_fs_btree_iter_init_early(c);
bch2_fs_btree_interior_update_init_early(c);
bch2_fs_allocator_background_init(c);
bch2_fs_allocator_foreground_init(c);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment