Commit c9d01179 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'bcachefs-2023-11-5' of https://evilpiepirate.org/git/bcachefs

Pull more bcachefs updates from Kent Overstreet:
 "Here's the second big bcachefs pull request. This brings your tree up
  to date with my master branch, which is what existing bcachefs users
  are currently running.

  New features:
   - rebalance_work btree (and metadata version 1.3): the rebalance
     thread no longer has to scan to find extents that need processing -
     big scalability improvement.
   - sb_errors superblock section: this adds counters for each fsck
     error type, since filesystem creation, along with the date of the
     most recent error. It'll get us better bug reports (since users do
     not typically report errors that fsck was able to fix), and I might
     add telemetry for this in the future.

  Fixes include:
   - multiple snapshot deletion fixes
   - members_v2 fixups
   - deleted_inodes btree fixes
   - copygc thread no longer spins when a device is full but has no
     fragmented buckets (i.e. rebalance needs to move data around
     instead)
   - a fix for a memory reclaim issue with the btree key cache: we're
     now careful not to hold the srcu read lock that blocks key cache
     reclaim for too long
   - an early allocator locking fix, from Brian
   - endianness fixes, from Brian
   - CONFIG_BCACHEFS_DEBUG_TRANSACTIONS no longer defaults to y, a big
     performance improvement on multithreaded workloads"

* tag 'bcachefs-2023-11-5' of https://evilpiepirate.org/git/bcachefs: (70 commits)
  bcachefs: Improve stripe checksum error message
  bcachefs: Simplify, fix bch2_backpointer_get_key()
  bcachefs: kill thing_it_points_to arg to backpointer_not_found()
  bcachefs: bch2_ec_read_extent() now takes btree_trans
  bcachefs: bch2_stripe_to_text() now prints ptr gens
  bcachefs: Don't iterate over journal entries just for btree roots
  bcachefs: Break up bch2_journal_write()
  bcachefs: Replace ERANGE with private error codes
  bcachefs: bkey_copy() is no longer a macro
  bcachefs: x-macro-ify inode flags enum
  bcachefs: Convert bch2_fs_open() to darray
  bcachefs: Move __bch2_members_v2_get_mut to sb-members.h
  bcachefs: bch2_prt_datetime()
  bcachefs: CONFIG_BCACHEFS_DEBUG_TRANSACTIONS no longer defaults to y
  bcachefs: Add a comment for BTREE_INSERT_NOJOURNAL usage
  bcachefs: rebalance_work btree is not a snapshots btree
  bcachefs: Add missing printk newlines
  bcachefs: Fix recovery when forced to use JSET_NO_FLUSH journal entry
  bcachefs: .get_parent() should return an error pointer
  bcachefs: Fix bch2_delete_dead_inodes()
  ...
parents be3ca57c c7046ed0
......@@ -24,7 +24,6 @@ config BCACHEFS_FS
select XXHASH
select SRCU
select SYMBOLIC_ERRNAME
select MEAN_AND_VARIANCE
help
The bcachefs filesystem - a modern, copy on write filesystem, with
support for multiple devices, compression, checksumming, etc.
......@@ -42,7 +41,6 @@ config BCACHEFS_POSIX_ACL
config BCACHEFS_DEBUG_TRANSACTIONS
bool "bcachefs runtime info"
depends on BCACHEFS_FS
default y
help
This makes the list of running btree transactions available in debugfs.
......@@ -78,7 +76,7 @@ config BCACHEFS_NO_LATENCY_ACCT
config MEAN_AND_VARIANCE_UNIT_TEST
tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS
depends on KUNIT
select MEAN_AND_VARIANCE
depends on BCACHEFS_FS
default KUNIT_ALL_TESTS
help
This option enables the kunit tests for mean_and_variance module.
......
......@@ -70,6 +70,7 @@ bcachefs-y := \
reflink.o \
replicas.o \
sb-clean.o \
sb-errors.o \
sb-members.o \
siphash.o \
six.o \
......
This diff is collapsed.
......@@ -149,13 +149,13 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s
int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
int bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c,
int bch2_alloc_v1_invalid(struct bch_fs *, struct bkey_s_c,
enum bkey_invalid_flags, struct printbuf *);
int bch2_alloc_v2_invalid(const struct bch_fs *, struct bkey_s_c,
int bch2_alloc_v2_invalid(struct bch_fs *, struct bkey_s_c,
enum bkey_invalid_flags, struct printbuf *);
int bch2_alloc_v3_invalid(const struct bch_fs *, struct bkey_s_c,
int bch2_alloc_v3_invalid(struct bch_fs *, struct bkey_s_c,
enum bkey_invalid_flags, struct printbuf *);
int bch2_alloc_v4_invalid(const struct bch_fs *, struct bkey_s_c,
int bch2_alloc_v4_invalid(struct bch_fs *, struct bkey_s_c,
enum bkey_invalid_flags, struct printbuf *);
void bch2_alloc_v4_swab(struct bkey_s);
void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
......@@ -193,7 +193,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
.min_val_size = 48, \
})
int bch2_bucket_gens_invalid(const struct bch_fs *, struct bkey_s_c,
int bch2_bucket_gens_invalid(struct bch_fs *, struct bkey_s_c,
enum bkey_invalid_flags, struct printbuf *);
void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
......@@ -249,6 +249,7 @@ int bch2_dev_freespace_init(struct bch_fs *, struct bch_dev *, u64, u64);
int bch2_fs_freespace_init(struct bch_fs *);
void bch2_recalc_capacity(struct bch_fs *);
u64 bch2_min_rw_member_capacity(struct bch_fs *);
void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
......
......@@ -399,12 +399,23 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
struct bucket_alloc_state *s,
struct closure *cl)
{
struct btree_iter iter;
struct bkey_s_c k;
struct btree_iter iter, citer;
struct bkey_s_c k, ck;
struct open_bucket *ob = NULL;
u64 alloc_start = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
u64 alloc_cursor = max(alloc_start, READ_ONCE(ca->alloc_cursor));
u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
u64 alloc_start = max(first_bucket, READ_ONCE(ca->alloc_cursor));
u64 alloc_cursor = alloc_start;
int ret;
/*
* Scan with an uncached iterator to avoid polluting the key cache. An
* uncached iter will return a cached key if one exists, but if not
* there is no other underlying protection for the associated key cache
* slot. To avoid racing bucket allocations, look up the cached key slot
* of any likely allocation candidate before attempting to proceed with
* the allocation. This provides proper exclusion on the associated
* bucket.
*/
again:
for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, alloc_cursor),
BTREE_ITER_SLOTS, k, ret) {
......@@ -419,25 +430,38 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
continue;
a = bch2_alloc_to_v4(k, &a_convert);
if (a->data_type != BCH_DATA_free)
continue;
/* now check the cached key to serialize concurrent allocs of the bucket */
ck = bch2_bkey_get_iter(trans, &citer, BTREE_ID_alloc, k.k->p, BTREE_ITER_CACHED);
ret = bkey_err(ck);
if (ret)
break;
a = bch2_alloc_to_v4(ck, &a_convert);
if (a->data_type != BCH_DATA_free)
goto next;
s->buckets_seen++;
ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl);
next:
citer.path->preserve = false;
bch2_trans_iter_exit(trans, &citer);
if (ob)
break;
}
bch2_trans_iter_exit(trans, &iter);
alloc_cursor = iter.pos.offset;
ca->alloc_cursor = alloc_cursor;
if (!ob && ret)
ob = ERR_PTR(ret);
if (!ob && alloc_cursor > alloc_start) {
alloc_cursor = alloc_start;
if (!ob && alloc_start > first_bucket) {
alloc_cursor = alloc_start = first_bucket;
goto again;
}
......
......@@ -5,6 +5,7 @@
#include "backpointers.h"
#include "btree_cache.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_write_buffer.h"
#include "error.h"
......@@ -37,25 +38,26 @@ static bool extent_matches_bp(struct bch_fs *c,
return false;
}
int bch2_backpointer_invalid(const struct bch_fs *c, struct bkey_s_c k,
int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
enum bkey_invalid_flags flags,
struct printbuf *err)
{
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
int ret = 0;
if (!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset))) {
prt_str(err, "backpointer at wrong pos");
return -BCH_ERR_invalid_bkey;
}
return 0;
bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
c, err,
backpointer_pos_wrong,
"backpointer at wrong pos");
fsck_err:
return ret;
}
void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
{
prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
bch2_btree_ids[bp->btree_id],
bch2_btree_id_str(bp->btree_id),
bp->level,
(u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
(u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
......@@ -76,7 +78,7 @@ void bch2_backpointer_swab(struct bkey_s k)
{
struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
bp.v->bucket_offset = swab32(bp.v->bucket_offset);
bp.v->bucket_offset = swab40(bp.v->bucket_offset);
bp.v->bucket_len = swab32(bp.v->bucket_len);
bch2_bpos_swab(&bp.v->pos);
}
......@@ -219,18 +221,22 @@ int bch2_get_next_backpointer(struct btree_trans *trans,
static void backpointer_not_found(struct btree_trans *trans,
struct bpos bp_pos,
struct bch_backpointer bp,
struct bkey_s_c k,
const char *thing_it_points_to)
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
/*
* If we're using the btree write buffer, the backpointer we were
* looking at may have already been deleted - failure to find what it
* pointed to is not an error:
*/
if (likely(!bch2_backpointers_no_use_write_buffer))
return;
prt_printf(&buf, "backpointer doesn't match %s it points to:\n ",
thing_it_points_to);
bp.level ? "btree node" : "extent");
prt_printf(&buf, "bucket: ");
bch2_bpos_to_text(&buf, bucket);
prt_printf(&buf, "\n ");
......@@ -256,56 +262,37 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
struct bch_backpointer bp,
unsigned iter_flags)
{
struct bch_fs *c = trans->c;
struct btree_root *r = bch2_btree_id_root(c, bp.btree_id);
struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
struct bkey_s_c k;
bch2_trans_node_iter_init(trans, iter,
bp.btree_id,
bp.pos,
0,
min(bp.level, r->level),
iter_flags);
k = bch2_btree_iter_peek_slot(iter);
if (bkey_err(k)) {
bch2_trans_iter_exit(trans, iter);
return k;
}
if (bp.level == r->level + 1)
k = bkey_i_to_s_c(&r->key);
if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
return k;
bch2_trans_iter_exit(trans, iter);
if (likely(!bp.level)) {
struct bch_fs *c = trans->c;
struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
struct bkey_s_c k;
bch2_trans_node_iter_init(trans, iter,
bp.btree_id,
bp.pos,
0, 0,
iter_flags);
k = bch2_btree_iter_peek_slot(iter);
if (bkey_err(k)) {
bch2_trans_iter_exit(trans, iter);
return k;
}
if (unlikely(bch2_backpointers_no_use_write_buffer)) {
if (bp.level) {
struct btree *b;
if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
return k;
/*
* If a backpointer for a btree node wasn't found, it may be
* because it was overwritten by a new btree node that hasn't
* been written out yet - backpointer_get_node() checks for
* this:
*/
b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
if (!IS_ERR_OR_NULL(b))
return bkey_i_to_s_c(&b->key);
bch2_trans_iter_exit(trans, iter);
backpointer_not_found(trans, bp_pos, bp, k);
return bkey_s_c_null;
} else {
struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
if (IS_ERR_OR_NULL(b)) {
bch2_trans_iter_exit(trans, iter);
if (IS_ERR(b))
return bkey_s_c_err(PTR_ERR(b));
return bkey_s_c_null;
return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
}
backpointer_not_found(trans, bp_pos, bp, k, "extent");
return bkey_i_to_s_c(&b->key);
}
return bkey_s_c_null;
}
struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
......@@ -329,6 +316,8 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
if (IS_ERR(b))
goto err;
BUG_ON(b->c.level != bp.level - 1);
if (b && extent_matches_bp(c, bp.btree_id, bp.level,
bkey_i_to_s_c(&b->key),
bucket, bp))
......@@ -337,8 +326,7 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
if (b && btree_node_will_make_reachable(b)) {
b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
} else {
backpointer_not_found(trans, bp_pos, bp,
bkey_i_to_s_c(&b->key), "btree node");
backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
b = NULL;
}
err:
......@@ -356,6 +344,7 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_
int ret = 0;
if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
backpointer_to_missing_device,
"backpointer for missing device:\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = bch2_btree_delete_at(trans, bp_iter, 0);
......@@ -369,6 +358,7 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_
goto out;
if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
backpointer_to_missing_alloc,
"backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
alloc_iter.pos.inode, alloc_iter.pos.offset,
(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
......@@ -453,14 +443,14 @@ static int check_bp_exists(struct btree_trans *trans,
return ret;
missing:
prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
bch2_btree_ids[bp.btree_id], bp.level);
bch2_btree_id_str(bp.btree_id), bp.level);
bch2_bkey_val_to_text(&buf, c, orig_k);
prt_printf(&buf, "\nbp pos ");
bch2_bpos_to_text(&buf, bp_iter.pos);
if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointers ||
c->opts.reconstruct_alloc ||
fsck_err(c, "%s", buf.buf))
fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
goto out;
......@@ -793,7 +783,9 @@ static int check_one_backpointer(struct btree_trans *trans,
}
if (fsck_err_on(!k.k, c,
"backpointer for missing extent\n %s",
backpointer_to_missing_ptr,
"backpointer for missing %s\n %s",
bp.v->level ? "btree node" : "extent",
(bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
goto out;
......
......@@ -7,7 +7,16 @@
#include "buckets.h"
#include "super.h"
int bch2_backpointer_invalid(const struct bch_fs *, struct bkey_s_c k,
static inline u64 swab40(u64 x)
{
return (((x & 0x00000000ffULL) << 32)|
((x & 0x000000ff00ULL) << 16)|
((x & 0x0000ff0000ULL) >> 0)|
((x & 0x00ff000000ULL) >> 16)|
((x & 0xff00000000ULL) >> 32));
}
int bch2_backpointer_invalid(struct bch_fs *, struct bkey_s_c k,
enum bkey_invalid_flags, struct printbuf *);
void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *);
void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
......
......@@ -2,20 +2,9 @@
#ifndef _BCACHEFS_BBPOS_H
#define _BCACHEFS_BBPOS_H
#include "bbpos_types.h"
#include "bkey_methods.h"
struct bbpos {
enum btree_id btree;
struct bpos pos;
};
static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos)
{
return (struct bbpos) { btree, pos };
}
#define BBPOS_MIN BBPOS(0, POS_MIN)
#define BBPOS_MAX BBPOS(BTREE_ID_NR - 1, POS_MAX)
#include "btree_cache.h"
static inline int bbpos_cmp(struct bbpos l, struct bbpos r)
{
......@@ -40,7 +29,7 @@ static inline struct bbpos bbpos_successor(struct bbpos pos)
static inline void bch2_bbpos_to_text(struct printbuf *out, struct bbpos pos)
{
prt_str(out, bch2_btree_ids[pos.btree]);
prt_str(out, bch2_btree_id_str(pos.btree));
prt_char(out, ':');
bch2_bpos_to_text(out, pos.pos);
}
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BBPOS_TYPES_H
#define _BCACHEFS_BBPOS_TYPES_H
struct bbpos {
enum btree_id btree;
struct bpos pos;
};
static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos)
{
return (struct bbpos) { btree, pos };
}
#define BBPOS_MIN BBPOS(0, POS_MIN)
#define BBPOS_MAX BBPOS(BTREE_ID_NR - 1, POS_MAX)
#endif /* _BCACHEFS_BBPOS_TYPES_H */
......@@ -209,6 +209,7 @@
#include "nocow_locking_types.h"
#include "opts.h"
#include "recovery_types.h"
#include "sb-errors_types.h"
#include "seqmutex.h"
#include "util.h"
......@@ -418,6 +419,7 @@ enum bch_time_stats {
#include "buckets_types.h"
#include "buckets_waiting_for_journal_types.h"
#include "clock_types.h"
#include "disk_groups_types.h"
#include "ec_types.h"
#include "journal_types.h"
#include "keylist_types.h"
......@@ -463,6 +465,7 @@ enum gc_phase {
GC_PHASE_BTREE_snapshot_trees,
GC_PHASE_BTREE_deleted_inodes,
GC_PHASE_BTREE_logged_ops,
GC_PHASE_BTREE_rebalance_work,
GC_PHASE_PENDING_DELETE,
};
......@@ -500,6 +503,8 @@ struct bch_dev {
* Committed by bch2_write_super() -> bch_fs_mi_update()
*/
struct bch_member_cpu mi;
atomic64_t errors[BCH_MEMBER_ERROR_NR];
__uuid_t uuid;
char name[BDEVNAME_SIZE];
......@@ -578,7 +583,7 @@ enum {
BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */
BCH_FS_NEED_ANOTHER_GC,
BCH_FS_HAVE_DELETED_SNAPSHOTS,
BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS,
/* errors: */
BCH_FS_ERROR,
......@@ -938,9 +943,6 @@ struct bch_fs {
struct list_head moving_context_list;
struct mutex moving_context_lock;
struct list_head data_progress_list;
struct mutex data_progress_lock;
/* REBALANCE */
struct bch_fs_rebalance rebalance;
......@@ -991,11 +993,6 @@ struct bch_fs {
struct bio_set dio_read_bioset;
struct bio_set nocow_flush_bioset;
/* ERRORS */
struct list_head fsck_errors;
struct mutex fsck_error_lock;
bool fsck_alloc_err;
/* QUOTAS */
struct bch_memquota_type quotas[QTYP_NR];
......@@ -1044,6 +1041,14 @@ struct bch_fs {
struct bch2_time_stats times[BCH_TIME_STAT_NR];
struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
/* ERRORS */
struct list_head fsck_error_msgs;
struct mutex fsck_error_msgs_lock;
bool fsck_alloc_msgs_err;
bch_sb_errors_cpu fsck_error_counts;
struct mutex fsck_error_counts_lock;
};
extern struct wait_queue_head bch2_read_only_wait;
......
......@@ -613,31 +613,17 @@ struct bch_extent_stripe_ptr {
#endif
};
struct bch_extent_reservation {
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u64 type:6,
unused:22,
replicas:4,
generation:32;
#elif defined (__BIG_ENDIAN_BITFIELD)
__u64 generation:32,
replicas:4,
unused:22,
type:6;
#endif
};
struct bch_extent_rebalance {
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u64 type:7,
unused:33,
compression:8,
__u64 type:6,
unused:34,
compression:8, /* enum bch_compression_opt */
target:16;
#elif defined (__BIG_ENDIAN_BITFIELD)
__u64 target:16,
compression:8,
unused:33,
type:7;
unused:34,
type:6;
#endif
};
......@@ -838,34 +824,30 @@ enum inode_opt_id {
Inode_opt_nr,
};
enum {
/*
* User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL
* flags)
*/
__BCH_INODE_SYNC = 0,
__BCH_INODE_IMMUTABLE = 1,
__BCH_INODE_APPEND = 2,
__BCH_INODE_NODUMP = 3,
__BCH_INODE_NOATIME = 4,
__BCH_INODE_I_SIZE_DIRTY = 5, /* obsolete */
__BCH_INODE_I_SECTORS_DIRTY = 6, /* obsolete */
__BCH_INODE_UNLINKED = 7,
__BCH_INODE_BACKPTR_UNTRUSTED = 8,
/* bits 20+ reserved for packed fields below: */
};
#define BCH_INODE_SYNC (1 << __BCH_INODE_SYNC)
#define BCH_INODE_IMMUTABLE (1 << __BCH_INODE_IMMUTABLE)
#define BCH_INODE_APPEND (1 << __BCH_INODE_APPEND)
#define BCH_INODE_NODUMP (1 << __BCH_INODE_NODUMP)
#define BCH_INODE_NOATIME (1 << __BCH_INODE_NOATIME)
#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY)
#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
#define BCH_INODE_UNLINKED (1 << __BCH_INODE_UNLINKED)
#define BCH_INODE_BACKPTR_UNTRUSTED (1 << __BCH_INODE_BACKPTR_UNTRUSTED)
#define BCH_INODE_FLAGS() \
x(sync, 0) \
x(immutable, 1) \
x(append, 2) \
x(nodump, 3) \
x(noatime, 4) \
x(i_size_dirty, 5) \
x(i_sectors_dirty, 6) \
x(unlinked, 7) \
x(backptr_untrusted, 8)
/* bits 20+ reserved for packed fields below: */
enum bch_inode_flags {
#define x(t, n) BCH_INODE_##t = 1U << n,
BCH_INODE_FLAGS()
#undef x
};
enum __bch_inode_flags {
#define x(t, n) __BCH_INODE_##t = n,
BCH_INODE_FLAGS()
#undef x
};
LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24);
LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31);
......@@ -1232,7 +1214,8 @@ struct bch_sb_field {
x(journal_seq_blacklist, 8) \
x(journal_v2, 9) \
x(counters, 10) \
x(members_v2, 11)
x(members_v2, 11) \
x(errors, 12)
enum bch_sb_field_type {
#define x(f, nr) BCH_SB_FIELD_##f = nr,
......@@ -1282,6 +1265,18 @@ enum bch_iops_measurement {
BCH_IOPS_NR
};
#define BCH_MEMBER_ERROR_TYPES() \
x(read, 0) \
x(write, 1) \
x(checksum, 2)
enum bch_member_error_type {
#define x(t, n) BCH_MEMBER_ERROR_##t = n,
BCH_MEMBER_ERROR_TYPES()
#undef x
BCH_MEMBER_ERROR_NR
};
struct bch_member {
__uuid_t uuid;
__le64 nbuckets; /* device size */
......@@ -1292,6 +1287,9 @@ struct bch_member {
__le64 flags;
__le32 iops[4];
__le64 errors[BCH_MEMBER_ERROR_NR];
__le64 errors_at_reset[BCH_MEMBER_ERROR_NR];
__le64 errors_reset_time;
};
#define BCH_MEMBER_V1_BYTES 56
......@@ -1615,11 +1613,20 @@ struct journal_seq_blacklist_entry {
struct bch_sb_field_journal_seq_blacklist {
struct bch_sb_field field;
struct journal_seq_blacklist_entry start[];
};
struct journal_seq_blacklist_entry start[0];
__u64 _data[];
struct bch_sb_field_errors {
struct bch_sb_field field;
struct bch_sb_field_error_entry {
__le64 v;
__le64 last_error_time;
} entries[];
};
LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16);
LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64);
/* Superblock: */
/*
......@@ -1682,7 +1689,9 @@ struct bch_sb_field_journal_seq_blacklist {
x(snapshot_skiplists, BCH_VERSION(1, 1), \
BIT_ULL(BCH_RECOVERY_PASS_check_snapshots)) \
x(deleted_inodes, BCH_VERSION(1, 2), \
BIT_ULL(BCH_RECOVERY_PASS_check_inodes))
BIT_ULL(BCH_RECOVERY_PASS_check_inodes)) \
x(rebalance_work, BCH_VERSION(1, 3), \
BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
......@@ -1693,7 +1702,7 @@ enum bcachefs_metadata_version {
};
static const __maybe_unused
unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_major_minor;
unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_rebalance_work;
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
......@@ -2247,7 +2256,8 @@ LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6);
enum btree_id_flags {
BTREE_ID_EXTENTS = BIT(0),
BTREE_ID_SNAPSHOTS = BIT(1),
BTREE_ID_DATA = BIT(2),
BTREE_ID_SNAPSHOT_FIELD = BIT(2),
BTREE_ID_DATA = BIT(3),
};
#define BCH_BTREE_IDS() \
......@@ -2302,11 +2312,13 @@ enum btree_id_flags {
BIT_ULL(KEY_TYPE_bucket_gens)) \
x(snapshot_trees, 15, 0, \
BIT_ULL(KEY_TYPE_snapshot_tree)) \
x(deleted_inodes, 16, BTREE_ID_SNAPSHOTS, \
x(deleted_inodes, 16, BTREE_ID_SNAPSHOT_FIELD, \
BIT_ULL(KEY_TYPE_set)) \
x(logged_ops, 17, 0, \
BIT_ULL(KEY_TYPE_logged_op_truncate)| \
BIT_ULL(KEY_TYPE_logged_op_finsert))
BIT_ULL(KEY_TYPE_logged_op_finsert)) \
x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \
BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie))
enum btree_id {
#define x(name, nr, ...) BTREE_ID_##name = nr,
......
......@@ -92,19 +92,15 @@ enum bkey_lr_packed {
#define bkey_lr_packed(_l, _r) \
((_l)->format + ((_r)->format << 1))
#define bkey_copy(_dst, _src) \
do { \
BUILD_BUG_ON(!type_is(_dst, struct bkey_i *) && \
!type_is(_dst, struct bkey_packed *)); \
BUILD_BUG_ON(!type_is(_src, struct bkey_i *) && \
!type_is(_src, struct bkey_packed *)); \
EBUG_ON((u64 *) (_dst) > (u64 *) (_src) && \
(u64 *) (_dst) < (u64 *) (_src) + \
((struct bkey *) (_src))->u64s); \
\
memcpy_u64s_small((_dst), (_src), \
((struct bkey *) (_src))->u64s); \
} while (0)
static inline void bkey_p_copy(struct bkey_packed *dst, const struct bkey_packed *src)
{
memcpy_u64s_small(dst, src, src->u64s);
}
static inline void bkey_copy(struct bkey_i *dst, const struct bkey_i *src)
{
memcpy_u64s_small(dst, src, src->k.u64s);
}
struct btree;
......
......@@ -3,6 +3,7 @@
#include "bcachefs.h"
#include "backpointers.h"
#include "bkey_methods.h"
#include "btree_cache.h"
#include "btree_types.h"
#include "alloc_background.h"
#include "dirent.h"
......@@ -25,7 +26,7 @@ const char * const bch2_bkey_types[] = {
NULL
};
static int deleted_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
static int deleted_key_invalid(struct bch_fs *c, struct bkey_s_c k,
enum bkey_invalid_flags flags, struct printbuf *err)
{
return 0;
......@@ -39,23 +40,24 @@ static int deleted_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
.key_invalid = deleted_key_invalid, \
})
static int empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
static int empty_val_key_invalid(struct bch_fs *c, struct bkey_s_c k,
enum bkey_invalid_flags flags, struct printbuf *err)
{
if (bkey_val_bytes(k.k)) {
prt_printf(err, "incorrect value size (%zu != 0)",
bkey_val_bytes(k.k));
return -BCH_ERR_invalid_bkey;
}
return 0;
int ret = 0;
bkey_fsck_err_on(bkey_val_bytes(k.k), c, err,
bkey_val_size_nonzero,
"incorrect value size (%zu != 0)",
bkey_val_bytes(k.k));
fsck_err:
return ret;
}
#define bch2_bkey_ops_error ((struct bkey_ops) { \
.key_invalid = empty_val_key_invalid, \
})
static int key_type_cookie_invalid(const struct bch_fs *c, struct bkey_s_c k,
static int key_type_cookie_invalid(struct bch_fs *c, struct bkey_s_c k,
enum bkey_invalid_flags flags, struct printbuf *err)
{
return 0;
......@@ -70,7 +72,7 @@ static int key_type_cookie_invalid(const struct bch_fs *c, struct bkey_s_c k,
.key_invalid = empty_val_key_invalid, \
})
static int key_type_inline_data_invalid(const struct bch_fs *c, struct bkey_s_c k,
static int key_type_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k,
enum bkey_invalid_flags flags, struct printbuf *err)
{
return 0;
......@@ -91,18 +93,6 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c,
.val_to_text = key_type_inline_data_to_text, \
})
static int key_type_set_invalid(const struct bch_fs *c, struct bkey_s_c k,
enum bkey_invalid_flags flags, struct printbuf *err)
{
if (bkey_val_bytes(k.k)) {
prt_printf(err, "incorrect value size (%zu != %zu)",
bkey_val_bytes(k.k), sizeof(struct bch_cookie));
return -BCH_ERR_invalid_bkey;
}
return 0;
}
static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
{
bch2_key_resize(l.k, l.k->size + r.k->size);
......@@ -110,7 +100,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_
}
#define bch2_bkey_ops_set ((struct bkey_ops) { \
.key_invalid = key_type_set_invalid, \
.key_invalid = empty_val_key_invalid, \
.key_merge = key_type_set_merge, \
})
......@@ -128,84 +118,95 @@ int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k,
struct printbuf *err)
{
const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type);
int ret = 0;
if (bkey_val_bytes(k.k) < ops->min_val_size) {
prt_printf(err, "bad val size (%zu < %u)",
bkey_val_bytes(k.k), ops->min_val_size);
return -BCH_ERR_invalid_bkey;
}
bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, c, err,
bkey_val_size_too_small,
"bad val size (%zu < %u)",
bkey_val_bytes(k.k), ops->min_val_size);
if (!ops->key_invalid)
return 0;
return ops->key_invalid(c, k, flags, err);
ret = ops->key_invalid(c, k, flags, err);
fsck_err:
return ret;
}
static u64 bch2_key_types_allowed[] = {
#define x(name, nr, flags, keys) [BKEY_TYPE_##name] = BIT_ULL(KEY_TYPE_deleted)|keys,
BCH_BTREE_IDS()
#undef x
[BKEY_TYPE_btree] =
BIT_ULL(KEY_TYPE_deleted)|
BIT_ULL(KEY_TYPE_btree_ptr)|
BIT_ULL(KEY_TYPE_btree_ptr_v2),
#define x(name, nr, flags, keys) [BKEY_TYPE_##name] = BIT_ULL(KEY_TYPE_deleted)|keys,
BCH_BTREE_IDS()
#undef x
};
const char *bch2_btree_node_type_str(enum btree_node_type type)
{
return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1);
}
int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
enum btree_node_type type,
enum bkey_invalid_flags flags,
struct printbuf *err)
{
if (k.k->u64s < BKEY_U64s) {
prt_printf(err, "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s);
return -BCH_ERR_invalid_bkey;
}
int ret = 0;
if (flags & BKEY_INVALID_COMMIT &&
!(bch2_key_types_allowed[type] & BIT_ULL(k.k->type))) {
prt_printf(err, "invalid key type for btree %s (%s)",
bch2_btree_ids[type], bch2_bkey_types[k.k->type]);
return -BCH_ERR_invalid_bkey;
}
bkey_fsck_err_on(k.k->u64s < BKEY_U64s, c, err,
bkey_u64s_too_small,
"u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s);
if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
if (k.k->size == 0) {
prt_printf(err, "size == 0");
return -BCH_ERR_invalid_bkey;
}
if (type >= BKEY_TYPE_NR)
return 0;
if (k.k->size > k.k->p.offset) {
prt_printf(err, "size greater than offset (%u > %llu)",
k.k->size, k.k->p.offset);
return -BCH_ERR_invalid_bkey;
}
bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) &&
!(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err,
bkey_invalid_type_for_btree,
"invalid key type for btree %s (%s)",
bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]);
if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
bkey_fsck_err_on(k.k->size == 0, c, err,
bkey_extent_size_zero,
"size == 0");
bkey_fsck_err_on(k.k->size > k.k->p.offset, c, err,
bkey_extent_size_greater_than_offset,
"size greater than offset (%u > %llu)",
k.k->size, k.k->p.offset);
} else {
if (k.k->size) {
prt_printf(err, "size != 0");
return -BCH_ERR_invalid_bkey;
}
bkey_fsck_err_on(k.k->size, c, err,
bkey_size_nonzero,
"size != 0");
}
if (type != BKEY_TYPE_btree) {
if (!btree_type_has_snapshots((enum btree_id) type) &&
k.k->p.snapshot) {
prt_printf(err, "nonzero snapshot");
return -BCH_ERR_invalid_bkey;
}
if (btree_type_has_snapshots((enum btree_id) type) &&
!k.k->p.snapshot) {
prt_printf(err, "snapshot == 0");
return -BCH_ERR_invalid_bkey;
enum btree_id btree = type - 1;
if (btree_type_has_snapshots(btree)) {
bkey_fsck_err_on(!k.k->p.snapshot, c, err,
bkey_snapshot_zero,
"snapshot == 0");
} else if (!btree_type_has_snapshot_field(btree)) {
bkey_fsck_err_on(k.k->p.snapshot, c, err,
bkey_snapshot_nonzero,
"nonzero snapshot");
} else {
/*
* btree uses snapshot field but it's not required to be
* nonzero
*/
}
if (bkey_eq(k.k->p, POS_MAX)) {
prt_printf(err, "key at POS_MAX");
return -BCH_ERR_invalid_bkey;
}
bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), c, err,
bkey_at_pos_max,
"key at POS_MAX");
}
return 0;
fsck_err:
return ret;
}
int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
......@@ -217,20 +218,20 @@ int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
bch2_bkey_val_invalid(c, k, flags, err);
}
int bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k,
struct printbuf *err)
int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b,
struct bkey_s_c k, struct printbuf *err)
{
if (bpos_lt(k.k->p, b->data->min_key)) {
prt_printf(err, "key before start of btree node");
return -BCH_ERR_invalid_bkey;
}
int ret = 0;
if (bpos_gt(k.k->p, b->data->max_key)) {
prt_printf(err, "key past end of btree node");
return -BCH_ERR_invalid_bkey;
}
bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), c, err,
bkey_before_start_of_btree_node,
"key before start of btree node");
return 0;
bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), c, err,
bkey_after_end_of_btree_node,
"key past end of btree node");
fsck_err:
return ret;
}
void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
......
......@@ -21,7 +21,7 @@ extern const struct bkey_ops bch2_bkey_null_ops;
* being read or written; more aggressive checks can be enabled when rw == WRITE.
*/
struct bkey_ops {
int (*key_invalid)(const struct bch_fs *c, struct bkey_s_c k,
int (*key_invalid)(struct bch_fs *c, struct bkey_s_c k,
enum bkey_invalid_flags flags, struct printbuf *err);
void (*val_to_text)(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
......@@ -55,7 +55,8 @@ int __bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type,
enum bkey_invalid_flags, struct printbuf *);
int bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type,
enum bkey_invalid_flags, struct printbuf *);
int bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c, struct printbuf *);
int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *,
struct bkey_s_c, struct printbuf *);
void bch2_bpos_to_text(struct printbuf *, struct bpos);
void bch2_bkey_to_text(struct printbuf *, const struct bkey *);
......@@ -119,16 +120,6 @@ enum btree_update_flags {
#define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
#define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC)
#define BTREE_TRIGGER_WANTS_OLD_AND_NEW \
((1U << KEY_TYPE_alloc)| \
(1U << KEY_TYPE_alloc_v2)| \
(1U << KEY_TYPE_alloc_v3)| \
(1U << KEY_TYPE_alloc_v4)| \
(1U << KEY_TYPE_stripe)| \
(1U << KEY_TYPE_inode)| \
(1U << KEY_TYPE_inode_v2)| \
(1U << KEY_TYPE_snapshot))
static inline int bch2_trans_mark_key(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_i *new,
......
......@@ -106,7 +106,7 @@ bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
while ((k = sort_iter_peek(iter))) {
if (!bkey_deleted(k) &&
!should_drop_next_key(iter)) {
bkey_copy(out, k);
bkey_p_copy(out, k);
btree_keys_account_key_add(&nr, 0, out);
out = bkey_p_next(out);
}
......@@ -137,7 +137,7 @@ bch2_sort_repack(struct bset *dst, struct btree *src,
continue;
if (!transform)
bkey_copy(out, in);
bkey_p_copy(out, in);
else if (bch2_bkey_transform(out_f, out, bkey_packed(in)
? in_f : &bch2_bkey_format_current, in))
out->format = KEY_FORMAT_LOCAL_BTREE;
......@@ -191,7 +191,7 @@ unsigned bch2_sort_keys(struct bkey_packed *dst,
memcpy_u64s_small(out, in, bkeyp_key_u64s(f, in));
set_bkeyp_val_u64s(f, out, 0);
} else {
bkey_copy(out, in);
bkey_p_copy(out, in);
}
out->needs_whiteout |= needs_whiteout;
out = bkey_p_next(out);
......
......@@ -472,7 +472,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
mutex_init(&c->verify_lock);
shrink = shrinker_alloc(0, "%s/btree_cache", c->name);
shrink = shrinker_alloc(0, "%s-btree_cache", c->name);
if (!shrink)
goto err;
bc->shrink = shrink;
......@@ -785,12 +785,12 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
"btree node header doesn't match ptr\n"
"btree %s level %u\n"
"ptr: ",
bch2_btree_ids[b->c.btree_id], b->c.level);
bch2_btree_id_str(b->c.btree_id), b->c.level);
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
prt_printf(&buf, "\nheader: btree %s level %llu\n"
"min ",
bch2_btree_ids[BTREE_NODE_ID(b->data)],
bch2_btree_id_str(BTREE_NODE_ID(b->data)),
BTREE_NODE_LEVEL(b->data));
bch2_bpos_to_text(&buf, b->data->min_key);
......@@ -1153,8 +1153,21 @@ void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
six_unlock_intent(&b->c.lock);
}
void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
const struct btree *b)
const char *bch2_btree_id_str(enum btree_id btree)
{
return btree < BTREE_ID_NR ? __bch2_btree_ids[btree] : "(unknown)";
}
void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
{
prt_printf(out, "%s level %u/%u\n ",
bch2_btree_id_str(b->c.btree_id),
b->c.level,
bch2_btree_id_root(c, b->c.btree_id)->level);
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
}
void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
{
struct bset_stats stats;
......
......@@ -123,8 +123,9 @@ static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b)
return bch2_btree_id_root(c, b->c.btree_id)->b;
}
void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *,
const struct btree *);
const char *bch2_btree_id_str(enum btree_id);
void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
void bch2_btree_cache_to_text(struct printbuf *, const struct bch_fs *);
#endif /* _BCACHEFS_BTREE_CACHE_H */
This diff is collapsed.
This diff is collapsed.
......@@ -257,7 +257,7 @@ static void bch2_btree_iter_verify(struct btree_iter *iter)
BUG_ON(!(iter->flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
!btree_type_has_snapshots(iter->btree_id));
!btree_type_has_snapshot_field(iter->btree_id));
if (iter->update_path)
bch2_btree_path_verify(trans, iter->update_path);
......@@ -362,7 +362,7 @@ void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
bch2_bpos_to_text(&buf, pos);
panic("not locked: %s %s%s\n",
bch2_btree_ids[id], buf.buf,
bch2_btree_id_str(id), buf.buf,
key_cache ? " cached" : "");
}
......@@ -1109,6 +1109,9 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans,
if (unlikely(ret))
goto out;
if (unlikely(!trans->srcu_held))
bch2_trans_srcu_lock(trans);
/*
* Ensure we obey path->should_be_locked: if it's set, we can't unlock
* and re-traverse the path without a transaction restart:
......@@ -1371,7 +1374,7 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
struct bkey_s_c old = { &i->old_k, i->old_v };
prt_printf(buf, "update: btree=%s cached=%u %pS",
bch2_btree_ids[i->btree_id],
bch2_btree_id_str(i->btree_id),
i->cached,
(void *) i->ip_allocated);
prt_newline(buf);
......@@ -1387,7 +1390,7 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
trans_for_each_wb_update(trans, wb) {
prt_printf(buf, "update: btree=%s wb=1 %pS",
bch2_btree_ids[wb->btree],
bch2_btree_id_str(wb->btree),
(void *) i->ip_allocated);
prt_newline(buf);
......@@ -1416,7 +1419,7 @@ void bch2_btree_path_to_text(struct printbuf *out, struct btree_path *path)
path->idx, path->ref, path->intent_ref,
path->preserve ? 'P' : ' ',
path->should_be_locked ? 'S' : ' ',
bch2_btree_ids[path->btree_id],
bch2_btree_id_str(path->btree_id),
path->level);
bch2_bpos_to_text(out, path->pos);
......@@ -1523,6 +1526,7 @@ static inline struct btree_path *btree_path_alloc(struct btree_trans *trans,
path->ref = 0;
path->intent_ref = 0;
path->nodes_locked = 0;
path->alloc_seq++;
btree_path_list_add(trans, pos, path);
trans->paths_sorted = false;
......@@ -1598,7 +1602,7 @@ struct btree_path *bch2_path_get(struct btree_trans *trans,
locks_want = min(locks_want, BTREE_MAX_DEPTH);
if (locks_want > path->locks_want)
bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want);
bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want, NULL);
return path;
}
......@@ -2829,18 +2833,36 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
return p;
}
static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans)
static inline void check_srcu_held_too_long(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
struct btree_path *path;
WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10),
"btree trans held srcu lock (delaying memory reclaim) for %lu seconds",
(jiffies - trans->srcu_lock_time) / HZ);
}
trans_for_each_path(trans, path)
if (path->cached && !btree_node_locked(path, 0))
path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
void bch2_trans_srcu_unlock(struct btree_trans *trans)
{
if (trans->srcu_held) {
struct bch_fs *c = trans->c;
struct btree_path *path;
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
trans->srcu_lock_time = jiffies;
trans_for_each_path(trans, path)
if (path->cached && !btree_node_locked(path, 0))
path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
check_srcu_held_too_long(trans);
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
trans->srcu_held = false;
}
}
void bch2_trans_srcu_lock(struct btree_trans *trans)
{
if (!trans->srcu_held) {
trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier);
trans->srcu_lock_time = jiffies;
trans->srcu_held = true;
}
}
/**
......@@ -2894,8 +2916,9 @@ u32 bch2_trans_begin(struct btree_trans *trans)
}
trans->last_begin_time = now;
if (unlikely(time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
bch2_trans_reset_srcu_lock(trans);
if (unlikely(trans->srcu_held &&
time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
bch2_trans_srcu_unlock(trans);
trans->last_begin_ip = _RET_IP_;
if (trans->restarted) {
......@@ -2980,8 +3003,9 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
trans->wb_updates_size = s->wb_updates_size;
}
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
trans->srcu_lock_time = jiffies;
trans->srcu_held = true;
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
struct btree_trans *pos;
......@@ -3025,7 +3049,7 @@ static void check_btree_paths_leaked(struct btree_trans *trans)
trans_for_each_path(trans, path)
if (path->ref)
printk(KERN_ERR " btree %s %pS\n",
bch2_btree_ids[path->btree_id],
bch2_btree_id_str(path->btree_id),
(void *) path->ip_allocated);
/* Be noisy about this: */
bch2_fatal_error(c);
......@@ -3058,7 +3082,10 @@ void bch2_trans_put(struct btree_trans *trans)
check_btree_paths_leaked(trans);
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
if (trans->srcu_held) {
check_srcu_held_too_long(trans);
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
}
bch2_journal_preres_put(&c->journal, &trans->journal_preres);
......@@ -3100,7 +3127,7 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out,
prt_tab(out);
prt_printf(out, "%px %c l=%u %s:", b, b->cached ? 'c' : 'b',
b->level, bch2_btree_ids[b->btree_id]);
b->level, bch2_btree_id_str(b->btree_id));
bch2_bpos_to_text(out, btree_node_pos(b));
prt_tab(out);
......@@ -3130,7 +3157,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
path->idx,
path->cached ? 'c' : 'b',
path->level,
bch2_btree_ids[path->btree_id]);
bch2_btree_id_str(path->btree_id));
bch2_bpos_to_text(out, path->pos);
prt_newline(out);
......
......@@ -274,6 +274,7 @@ void bch2_path_put(struct btree_trans *, struct btree_path *, bool);
int bch2_trans_relock(struct btree_trans *);
int bch2_trans_relock_notrace(struct btree_trans *);
void bch2_trans_unlock(struct btree_trans *);
void bch2_trans_unlock_long(struct btree_trans *);
bool bch2_trans_locked(struct btree_trans *);
static inline int trans_was_restarted(struct btree_trans *trans, u32 restart_count)
......@@ -411,11 +412,11 @@ static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans,
flags |= BTREE_ITER_ALL_SNAPSHOTS|__BTREE_ITER_ALL_SNAPSHOTS;
if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) &&
btree_node_type_is_extents(btree_id))
btree_id_is_extents(btree_id))
flags |= BTREE_ITER_IS_EXTENTS;
if (!(flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
!btree_type_has_snapshots(btree_id))
!btree_type_has_snapshot_field(btree_id))
flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
if (!(flags & BTREE_ITER_ALL_SNAPSHOTS) &&
......@@ -579,6 +580,9 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans,
__bch2_bkey_get_val_typed(_trans, _btree_id, _pos, _flags, \
KEY_TYPE_##_type, sizeof(*_val), _val)
void bch2_trans_srcu_unlock(struct btree_trans *);
void bch2_trans_srcu_lock(struct btree_trans *);
u32 bch2_trans_begin(struct btree_trans *);
/*
......
......@@ -324,7 +324,7 @@ btree_key_cache_create(struct btree_trans *trans, struct btree_path *path)
ck = bkey_cached_reuse(bc);
if (unlikely(!ck)) {
bch_err(c, "error allocating memory for key cache item, btree %s",
bch2_btree_ids[path->btree_id]);
bch2_btree_id_str(path->btree_id));
return ERR_PTR(-BCH_ERR_ENOMEM_btree_key_cache_create);
}
......@@ -407,7 +407,7 @@ static int btree_key_cache_fill(struct btree_trans *trans,
new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
if (!new_k) {
bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
bch2_btree_ids[ck->key.btree_id], new_u64s);
bch2_btree_id_str(ck->key.btree_id), new_u64s);
ret = -BCH_ERR_ENOMEM_btree_key_cache_fill;
goto err;
}
......@@ -509,7 +509,7 @@ bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree
* path->uptodate yet:
*/
if (!path->locks_want &&
!__bch2_btree_path_upgrade(trans, path, 1)) {
!__bch2_btree_path_upgrade(trans, path, 1, NULL)) {
trace_and_count(trans->c, trans_restart_key_cache_upgrade, trans, _THIS_IP_);
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_upgrade);
goto err;
......@@ -1038,7 +1038,7 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
bc->table_init_done = true;
shrink = shrinker_alloc(0, "%s/btree_key_cache", c->name);
shrink = shrinker_alloc(0, "%s-btree_key_cache", c->name);
if (!shrink)
return -BCH_ERR_ENOMEM_fs_btree_cache_init;
bc->shrink = shrink;
......
......@@ -431,7 +431,8 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
static inline bool btree_path_get_locks(struct btree_trans *trans,
struct btree_path *path,
bool upgrade)
bool upgrade,
struct get_locks_fail *f)
{
unsigned l = path->level;
int fail_idx = -1;
......@@ -442,8 +443,14 @@ static inline bool btree_path_get_locks(struct btree_trans *trans,
if (!(upgrade
? bch2_btree_node_upgrade(trans, path, l)
: bch2_btree_node_relock(trans, path, l)))
fail_idx = l;
: bch2_btree_node_relock(trans, path, l))) {
fail_idx = l;
if (f) {
f->l = l;
f->b = path->l[l].b;
}
}
l++;
} while (l < path->locks_want);
......@@ -584,7 +591,9 @@ __flatten
bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
struct btree_path *path, unsigned long trace_ip)
{
return btree_path_get_locks(trans, path, false);
struct get_locks_fail f;
return btree_path_get_locks(trans, path, false, &f);
}
int __bch2_btree_path_relock(struct btree_trans *trans,
......@@ -600,22 +609,24 @@ int __bch2_btree_path_relock(struct btree_trans *trans,
bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans,
struct btree_path *path,
unsigned new_locks_want)
unsigned new_locks_want,
struct get_locks_fail *f)
{
EBUG_ON(path->locks_want >= new_locks_want);
path->locks_want = new_locks_want;
return btree_path_get_locks(trans, path, true);
return btree_path_get_locks(trans, path, true, f);
}
bool __bch2_btree_path_upgrade(struct btree_trans *trans,
struct btree_path *path,
unsigned new_locks_want)
unsigned new_locks_want,
struct get_locks_fail *f)
{
struct btree_path *linked;
if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want))
if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want, f))
return true;
/*
......@@ -644,7 +655,7 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans,
linked->btree_id == path->btree_id &&
linked->locks_want < new_locks_want) {
linked->locks_want = new_locks_want;
btree_path_get_locks(trans, linked, true);
btree_path_get_locks(trans, linked, true, NULL);
}
return false;
......@@ -656,6 +667,9 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans,
{
unsigned l;
if (trans->restarted)
return;
EBUG_ON(path->locks_want < new_locks_want);
path->locks_want = new_locks_want;
......@@ -674,6 +688,9 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans,
}
bch2_btree_path_verify_locks(path);
path->downgrade_seq++;
trace_path_downgrade(trans, _RET_IP_, path);
}
/* Btree transaction locking: */
......@@ -682,6 +699,9 @@ void bch2_trans_downgrade(struct btree_trans *trans)
{
struct btree_path *path;
if (trans->restarted)
return;
trans_for_each_path(trans, path)
bch2_btree_path_downgrade(trans, path);
}
......@@ -733,6 +753,12 @@ void bch2_trans_unlock(struct btree_trans *trans)
__bch2_btree_path_unlock(trans, path);
}
void bch2_trans_unlock_long(struct btree_trans *trans)
{
bch2_trans_unlock(trans);
bch2_trans_srcu_unlock(trans);
}
bool bch2_trans_locked(struct btree_trans *trans)
{
struct btree_path *path;
......
......@@ -355,26 +355,36 @@ static inline bool bch2_btree_node_relock_notrace(struct btree_trans *trans,
/* upgrade */
struct get_locks_fail {
unsigned l;
struct btree *b;
};
bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *,
struct btree_path *, unsigned);
struct btree_path *, unsigned,
struct get_locks_fail *);
bool __bch2_btree_path_upgrade(struct btree_trans *,
struct btree_path *, unsigned);
struct btree_path *, unsigned,
struct get_locks_fail *);
static inline int bch2_btree_path_upgrade(struct btree_trans *trans,
struct btree_path *path,
unsigned new_locks_want)
{
struct get_locks_fail f;
unsigned old_locks_want = path->locks_want;
new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
if (path->locks_want < new_locks_want
? __bch2_btree_path_upgrade(trans, path, new_locks_want)
? __bch2_btree_path_upgrade(trans, path, new_locks_want, &f)
: path->uptodate == BTREE_ITER_UPTODATE)
return 0;
trace_and_count(trans->c, trans_restart_upgrade, trans, _THIS_IP_, path,
old_locks_want, new_locks_want);
old_locks_want, new_locks_want, &f);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
}
......
......@@ -269,6 +269,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
BUG_ON(i->level != i->path->level);
BUG_ON(i->btree_id != i->path->btree_id);
EBUG_ON(!i->level &&
btree_type_has_snapshots(i->btree_id) &&
!(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) &&
test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) &&
i->k->k.p.snapshot &&
......@@ -349,7 +350,7 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags
new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS);
if (!new_k) {
bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
bch2_btree_ids[path->btree_id], new_u64s);
bch2_btree_id_str(path->btree_id), new_u64s);
return -BCH_ERR_ENOMEM_btree_key_cache_insert;
}
......@@ -379,11 +380,10 @@ static int run_one_mem_trigger(struct btree_trans *trans,
if (unlikely(flags & BTREE_TRIGGER_NORUN))
return 0;
if (!btree_node_type_needs_gc((enum btree_node_type) i->btree_id))
if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id)))
return 0;
if (old_ops->atomic_trigger == new_ops->atomic_trigger &&
((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
if (old_ops->atomic_trigger == new_ops->atomic_trigger) {
ret = bch2_mark_key(trans, i->btree_id, i->level,
old, bkey_i_to_s_c(new),
BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
......@@ -425,8 +425,7 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
if (!i->insert_trigger_run &&
!i->overwrite_trigger_run &&
old_ops->trans_trigger == new_ops->trans_trigger &&
((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
old_ops->trans_trigger == new_ops->trans_trigger) {
i->overwrite_trigger_run = true;
i->insert_trigger_run = true;
return bch2_trans_mark_key(trans, i->btree_id, i->level, old, i->k,
......@@ -683,7 +682,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
BCH_JSET_ENTRY_overwrite,
i->btree_id, i->level,
i->old_k.u64s);
bkey_reassemble(&entry->start[0],
bkey_reassemble((struct bkey_i *) entry->start,
(struct bkey_s_c) { &i->old_k, i->old_v });
}
......@@ -691,7 +690,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
BCH_JSET_ENTRY_btree_keys,
i->btree_id, i->level,
i->k->k.u64s);
bkey_copy(&entry->start[0], i->k);
bkey_copy((struct bkey_i *) entry->start, i->k);
}
trans_for_each_wb_update(trans, wb) {
......@@ -699,7 +698,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
BCH_JSET_ENTRY_btree_keys,
wb->btree, 0,
wb->k.k.u64s);
bkey_copy(&entry->start[0], &wb->k);
bkey_copy((struct bkey_i *) entry->start, &wb->k);
}
if (trans->journal_seq)
......@@ -776,12 +775,12 @@ static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans
bch2_journal_key_overwritten(trans->c, wb->btree, 0, wb->k.k.p);
}
static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, unsigned flags,
static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans,
enum bkey_invalid_flags flags,
struct btree_insert_entry *i,
struct printbuf *err)
{
struct bch_fs *c = trans->c;
int rw = (flags & BTREE_INSERT_JOURNAL_REPLAY) ? READ : WRITE;
printbuf_reset(err);
prt_printf(err, "invalid bkey on insert from %s -> %ps",
......@@ -792,8 +791,7 @@ static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, un
bch2_bkey_val_to_text(err, c, bkey_i_to_s_c(i->k));
prt_newline(err);
bch2_bkey_invalid(c, bkey_i_to_s_c(i->k),
i->bkey_type, rw, err);
bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type, flags, err);
bch2_print_string_as_lines(KERN_ERR, err->buf);
bch2_inconsistent_error(c);
......@@ -864,12 +862,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
*/
bch2_journal_res_put(&c->journal, &trans->journal_res);
if (unlikely(ret))
return ret;
bch2_trans_downgrade(trans);
return 0;
return ret;
}
static int journal_reclaim_wait_done(struct bch_fs *c)
......@@ -1034,7 +1027,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k),
i->bkey_type, invalid_flags, &buf)))
ret = bch2_trans_commit_bkey_invalid(trans, flags, i, &buf);
ret = bch2_trans_commit_bkey_invalid(trans, invalid_flags, i, &buf);
btree_insert_entry_checks(trans, i);
printbuf_exit(&buf);
......@@ -1138,6 +1131,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
if (likely(!(flags & BTREE_INSERT_NOCHECK_RW)))
bch2_write_ref_put(c, BCH_WRITE_REF_trans);
out_reset:
if (!ret)
bch2_trans_downgrade(trans);
bch2_trans_reset_updates(trans);
return ret;
......
......@@ -228,6 +228,8 @@ struct btree_path {
u8 sorted_idx;
u8 ref;
u8 intent_ref;
u32 alloc_seq;
u32 downgrade_seq;
/* btree_iter_copy starts here: */
struct bpos pos;
......@@ -424,6 +426,7 @@ struct btree_trans {
u8 nr_updates;
u8 nr_wb_updates;
u8 wb_updates_size;
bool srcu_held:1;
bool used_mempool:1;
bool in_traverse_all:1;
bool paths_sorted:1;
......@@ -636,16 +639,17 @@ static inline unsigned bset_byte_offset(struct btree *b, void *i)
}
enum btree_node_type {
#define x(kwd, val, ...) BKEY_TYPE_##kwd = val,
BKEY_TYPE_btree,
#define x(kwd, val, ...) BKEY_TYPE_##kwd = val + 1,
BCH_BTREE_IDS()
#undef x
BKEY_TYPE_btree,
BKEY_TYPE_NR
};
/* Type of a key in btree @id at level @level: */
static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id)
{
return level ? BKEY_TYPE_btree : (enum btree_node_type) id;
return level ? BKEY_TYPE_btree : (unsigned) id + 1;
}
/* Type of keys @b contains: */
......@@ -654,19 +658,21 @@ static inline enum btree_node_type btree_node_type(struct btree *b)
return __btree_node_type(b->c.level, b->c.btree_id);
}
const char *bch2_btree_node_type_str(enum btree_node_type);
#define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \
(BIT(BKEY_TYPE_extents)| \
BIT(BKEY_TYPE_alloc)| \
BIT(BKEY_TYPE_inodes)| \
BIT(BKEY_TYPE_stripes)| \
BIT(BKEY_TYPE_reflink)| \
BIT(BKEY_TYPE_btree))
(BIT_ULL(BKEY_TYPE_extents)| \
BIT_ULL(BKEY_TYPE_alloc)| \
BIT_ULL(BKEY_TYPE_inodes)| \
BIT_ULL(BKEY_TYPE_stripes)| \
BIT_ULL(BKEY_TYPE_reflink)| \
BIT_ULL(BKEY_TYPE_btree))
#define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \
(BIT(BKEY_TYPE_alloc)| \
BIT(BKEY_TYPE_inodes)| \
BIT(BKEY_TYPE_stripes)| \
BIT(BKEY_TYPE_snapshots))
(BIT_ULL(BKEY_TYPE_alloc)| \
BIT_ULL(BKEY_TYPE_inodes)| \
BIT_ULL(BKEY_TYPE_stripes)| \
BIT_ULL(BKEY_TYPE_snapshots))
#define BTREE_NODE_TYPE_HAS_TRIGGERS \
(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
......@@ -674,13 +680,13 @@ static inline enum btree_node_type btree_node_type(struct btree *b)
static inline bool btree_node_type_needs_gc(enum btree_node_type type)
{
return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type);
return BTREE_NODE_TYPE_HAS_TRIGGERS & BIT_ULL(type);
}
static inline bool btree_node_type_is_extents(enum btree_node_type type)
{
const unsigned mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << nr)
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1))
BCH_BTREE_IDS()
#undef x
;
......@@ -690,7 +696,7 @@ static inline bool btree_node_type_is_extents(enum btree_node_type type)
static inline bool btree_id_is_extents(enum btree_id btree)
{
return btree_node_type_is_extents((enum btree_node_type) btree);
return btree_node_type_is_extents(__btree_node_type(0, btree));
}
static inline bool btree_type_has_snapshots(enum btree_id id)
......@@ -704,6 +710,17 @@ static inline bool btree_type_has_snapshots(enum btree_id id)
return (1U << id) & mask;
}
static inline bool btree_type_has_snapshot_field(enum btree_id id)
{
const unsigned mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr)
BCH_BTREE_IDS()
#undef x
;
return (1U << id) & mask;
}
static inline bool btree_type_has_ptrs(enum btree_id id)
{
const unsigned mask = 0
......
......@@ -1274,14 +1274,14 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
if (bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
btree_node_type(b), WRITE, &buf) ?:
bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert), &buf)) {
bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf)) {
printbuf_reset(&buf);
prt_printf(&buf, "inserting invalid bkey\n ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
prt_printf(&buf, "\n ");
bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
btree_node_type(b), WRITE, &buf);
bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert), &buf);
bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf);
bch2_fs_inconsistent(c, "%s", buf.buf);
dump_stack();
......@@ -1987,7 +1987,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
out:
if (new_path)
bch2_path_put(trans, new_path, true);
bch2_btree_path_downgrade(trans, iter->path);
bch2_trans_downgrade(trans);
return ret;
err:
bch2_btree_node_free_never_used(as, trans, n);
......@@ -2411,30 +2411,24 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry
r->level = entry->level;
r->alive = true;
bkey_copy(&r->key, &entry->start[0]);
bkey_copy(&r->key, (struct bkey_i *) entry->start);
mutex_unlock(&c->btree_root_lock);
}
struct jset_entry *
bch2_btree_roots_to_journal_entries(struct bch_fs *c,
struct jset_entry *start,
struct jset_entry *end)
struct jset_entry *end,
unsigned long skip)
{
struct jset_entry *entry;
unsigned long have = 0;
unsigned i;
for (entry = start; entry < end; entry = vstruct_next(entry))
if (entry->type == BCH_JSET_ENTRY_btree_root)
__set_bit(entry->btree_id, &have);
mutex_lock(&c->btree_root_lock);
for (i = 0; i < btree_id_nr_alive(c); i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
if (r->alive && !test_bit(i, &have)) {
if (r->alive && !test_bit(i, &skip)) {
journal_entry_set(end, BCH_JSET_ENTRY_btree_root,
i, r->level, &r->key, r->key.k.u64s);
end = vstruct_next(end);
......
......@@ -271,7 +271,7 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
struct btree_node_entry *bne = max(write_block(b),
(void *) btree_bkey_last(b, bset_tree_last(b)));
ssize_t remaining_space =
__bch_btree_u64s_remaining(c, b, &bne->keys.start[0]);
__bch_btree_u64s_remaining(c, b, bne->keys.start);
if (unlikely(bset_written(b, bset(b, t)))) {
if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
......@@ -303,7 +303,7 @@ static inline void push_whiteout(struct bch_fs *c, struct btree *b,
k.needs_whiteout = true;
b->whiteout_u64s += k.u64s;
bkey_copy(unwritten_whiteouts_start(c, b), &k);
bkey_p_copy(unwritten_whiteouts_start(c, b), &k);
}
/*
......@@ -325,7 +325,7 @@ bool bch2_btree_interior_updates_flush(struct bch_fs *);
void bch2_journal_entry_to_btree_root(struct bch_fs *, struct jset_entry *);
struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *,
struct jset_entry *, struct jset_entry *);
struct jset_entry *, unsigned long);
void bch2_do_pending_node_rewrites(struct bch_fs *);
void bch2_free_pending_node_rewrites(struct bch_fs *);
......
This diff is collapsed.
......@@ -339,12 +339,27 @@ int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct
int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
#define mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\
({ \
int ret = 0; \
\
if (_old.k->type) \
ret = _fn(_trans, _btree_id, _level, _old, _flags & ~BTREE_TRIGGER_INSERT); \
if (!ret && _new.k->type) \
ret = _fn(_trans, _btree_id, _level, _new, _flags & ~BTREE_TRIGGER_OVERWRITE); \
ret; \
})
#define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags) \
mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, bkey_i_to_s_c(_new), _flags)
void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *);
int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *,
size_t, enum bch_data_type, unsigned);
int bch2_trans_mark_dev_sb(struct bch_fs *, struct bch_dev *);
int bch2_trans_mark_dev_sbs(struct bch_fs *);
static inline bool is_superblock_bucket(struct bch_dev *ca, u64 b)
{
......
......@@ -332,8 +332,8 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
struct bch_ioctl_data_event e = {
.type = BCH_DATA_EVENT_PROGRESS,
.p.data_type = ctx->stats.data_type,
.p.btree_id = ctx->stats.btree_id,
.p.pos = ctx->stats.pos,
.p.btree_id = ctx->stats.pos.btree,
.p.pos = ctx->stats.pos.pos,
.p.sectors_done = atomic64_read(&ctx->stats.sectors_seen),
.p.sectors_total = bch2_fs_usage_read_short(c).used,
};
......
......@@ -697,14 +697,32 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res,
return ret;
}
void bch2_compression_opt_to_text(struct printbuf *out, u64 v)
{
struct bch_compression_opt opt = bch2_compression_decode(v);
if (opt.type < BCH_COMPRESSION_OPT_NR)
prt_str(out, bch2_compression_opts[opt.type]);
else
prt_printf(out, "(unknown compression opt %u)", opt.type);
if (opt.level)
prt_printf(out, ":%u", opt.level);
}
void bch2_opt_compression_to_text(struct printbuf *out,
struct bch_fs *c,
struct bch_sb *sb,
u64 v)
{
struct bch_compression_opt opt = bch2_compression_decode(v);
return bch2_compression_opt_to_text(out, v);
}
prt_str(out, bch2_compression_opts[opt.type]);
if (opt.level)
prt_printf(out, ":%u", opt.level);
int bch2_opt_compression_validate(u64 v, struct printbuf *err)
{
if (!bch2_compression_opt_valid(v)) {
prt_printf(err, "invalid compression opt %llu", v);
return -BCH_ERR_invalid_sb_opt_compression;
}
return 0;
}
......@@ -4,12 +4,18 @@
#include "extents_types.h"
static const unsigned __bch2_compression_opt_to_type[] = {
#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
BCH_COMPRESSION_OPTS()
#undef x
};
struct bch_compression_opt {
u8 type:4,
level:4;
};
static inline struct bch_compression_opt bch2_compression_decode(unsigned v)
static inline struct bch_compression_opt __bch2_compression_decode(unsigned v)
{
return (struct bch_compression_opt) {
.type = v & 15,
......@@ -17,17 +23,25 @@ static inline struct bch_compression_opt bch2_compression_decode(unsigned v)
};
}
static inline bool bch2_compression_opt_valid(unsigned v)
{
struct bch_compression_opt opt = __bch2_compression_decode(v);
return opt.type < ARRAY_SIZE(__bch2_compression_opt_to_type) && !(!opt.type && opt.level);
}
static inline struct bch_compression_opt bch2_compression_decode(unsigned v)
{
return bch2_compression_opt_valid(v)
? __bch2_compression_decode(v)
: (struct bch_compression_opt) { 0 };
}
static inline unsigned bch2_compression_encode(struct bch_compression_opt opt)
{
return opt.type|(opt.level << 4);
}
static const unsigned __bch2_compression_opt_to_type[] = {
#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
BCH_COMPRESSION_OPTS()
#undef x
};
static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
{
return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
......@@ -44,12 +58,16 @@ int bch2_check_set_has_compressed_data(struct bch_fs *, unsigned);
void bch2_fs_compress_exit(struct bch_fs *);
int bch2_fs_compress_init(struct bch_fs *);
void bch2_compression_opt_to_text(struct printbuf *, u64);
int bch2_opt_compression_parse(struct bch_fs *, const char *, u64 *, struct printbuf *);
void bch2_opt_compression_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
int bch2_opt_compression_validate(u64, struct printbuf *);
#define bch2_opt_compression (struct bch_opt_fn) { \
.parse = bch2_opt_compression_parse, \
.to_text = bch2_opt_compression_to_text, \
.parse = bch2_opt_compression_parse, \
.to_text = bch2_opt_compression_to_text, \
.validate = bch2_opt_compression_validate, \
}
#endif /* _BCACHEFS_COMPRESS_H */
......@@ -69,9 +69,15 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more,
_ret; \
})
#define darray_remove_item(_d, _pos) \
array_remove_item((_d)->data, (_d)->nr, (_pos) - (_d)->data)
#define darray_for_each(_d, _i) \
for (_i = (_d).data; _i < (_d).data + (_d).nr; _i++)
#define darray_for_each_reverse(_d, _i) \
for (_i = (_d).data + (_d).nr - 1; _i >= (_d).data; --_i)
#define darray_init(_d) \
do { \
(_d)->data = NULL; \
......
......@@ -13,6 +13,7 @@
#include "keylist.h"
#include "move.h"
#include "nocow_locking.h"
#include "rebalance.h"
#include "subvolume.h"
#include "trace.h"
......@@ -161,11 +162,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
if (((1U << i) & m->data_opts.rewrite_ptrs) &&
(ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
!ptr->cached) {
bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), ptr);
/*
* See comment below:
bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr);
*/
rewrites_found |= 1U << i;
}
i++;
......@@ -211,14 +208,8 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
if (!p.ptr.cached &&
durability - ptr_durability >= m->op.opts.data_replicas) {
durability -= ptr_durability;
bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), &entry->ptr);
/*
* Currently, we're dropping unneeded replicas
* instead of marking them as cached, since
* cached data in stripe buckets prevents them
* from being reused:
bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr);
*/
goto restart_drop_extra_replicas;
}
}
......@@ -251,11 +242,11 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id,
k.k->p, bkey_start_pos(&insert->k)) ?:
bch2_insert_snapshot_whiteouts(trans, m->btree_id,
k.k->p, insert->k.p);
if (ret)
goto err;
ret = bch2_trans_update(trans, &iter, insert,
k.k->p, insert->k.p) ?:
bch2_bkey_set_needs_rebalance(c, insert,
op->opts.background_target,
op->opts.background_compression) ?:
bch2_trans_update(trans, &iter, insert,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
bch2_trans_commit(trans, &op->res,
NULL,
......@@ -281,11 +272,11 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
}
continue;
nowork:
if (m->ctxt && m->ctxt->stats) {
if (m->stats && m->stats) {
BUG_ON(k.k->p.offset <= iter.pos.offset);
atomic64_inc(&m->ctxt->stats->keys_raced);
atomic64_inc(&m->stats->keys_raced);
atomic64_add(k.k->p.offset - iter.pos.offset,
&m->ctxt->stats->sectors_raced);
&m->stats->sectors_raced);
}
this_cpu_inc(c->counters[BCH_COUNTER_move_extent_fail]);
......@@ -439,6 +430,8 @@ int bch2_data_update_init(struct btree_trans *trans,
bch2_bkey_buf_reassemble(&m->k, c, k);
m->btree_id = btree_id;
m->data_opts = data_opts;
m->ctxt = ctxt;
m->stats = ctxt ? ctxt->stats : NULL;
bch2_write_op_init(&m->op, c, io_opts);
m->op.pos = bkey_start_pos(k.k);
......@@ -487,7 +480,7 @@ int bch2_data_update_init(struct btree_trans *trans,
if (c->opts.nocow_enabled) {
if (ctxt) {
move_ctxt_wait_event(ctxt, trans,
move_ctxt_wait_event(ctxt,
(locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
PTR_BUCKET_POS(c, &p.ptr), 0)) ||
!atomic_read(&ctxt->read_sectors));
......
......@@ -23,6 +23,7 @@ struct data_update {
struct bkey_buf k;
struct data_update_opts data_opts;
struct moving_context *ctxt;
struct bch_move_stats *stats;
struct bch_write_op op;
};
......
......@@ -517,7 +517,7 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *
prt_printf(out, "%px btree=%s l=%u ",
b,
bch2_btree_ids[b->c.btree_id],
bch2_btree_id_str(b->c.btree_id),
b->c.level);
prt_newline(out);
......@@ -919,18 +919,18 @@ void bch2_fs_debug_init(struct bch_fs *c)
bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
bd++) {
bd->id = bd - c->btree_debug;
debugfs_create_file(bch2_btree_ids[bd->id],
debugfs_create_file(bch2_btree_id_str(bd->id),
0400, c->btree_debug_dir, bd,
&btree_debug_ops);
snprintf(name, sizeof(name), "%s-formats",
bch2_btree_ids[bd->id]);
bch2_btree_id_str(bd->id));
debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
&btree_format_debug_ops);
snprintf(name, sizeof(name), "%s-bfloat-failed",
bch2_btree_ids[bd->id]);
bch2_btree_id_str(bd->id));
debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
&bfloat_failed_debug_ops);
......
......@@ -97,61 +97,51 @@ const struct bch_hash_desc bch2_dirent_hash_desc = {
.is_visible = dirent_is_visible,
};
int bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k,
int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k,
enum bkey_invalid_flags flags,
struct printbuf *err)
{
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
struct qstr d_name = bch2_dirent_get_name(d);
int ret = 0;
if (!d_name.len) {
prt_printf(err, "empty name");
return -BCH_ERR_invalid_bkey;
}
bkey_fsck_err_on(!d_name.len, c, err,
dirent_empty_name,
"empty name");
if (bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len)) {
prt_printf(err, "value too big (%zu > %u)",
bkey_val_u64s(k.k), dirent_val_u64s(d_name.len));
return -BCH_ERR_invalid_bkey;
}
bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err,
dirent_val_too_big,
"value too big (%zu > %u)",
bkey_val_u64s(k.k), dirent_val_u64s(d_name.len));
/*
* Check new keys don't exceed the max length
* (older keys may be larger.)
*/
if ((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX) {
prt_printf(err, "dirent name too big (%u > %u)",
d_name.len, BCH_NAME_MAX);
return -BCH_ERR_invalid_bkey;
}
if (d_name.len != strnlen(d_name.name, d_name.len)) {
prt_printf(err, "dirent has stray data after name's NUL");
return -BCH_ERR_invalid_bkey;
}
if (d_name.len == 1 && !memcmp(d_name.name, ".", 1)) {
prt_printf(err, "invalid name");
return -BCH_ERR_invalid_bkey;
}
if (d_name.len == 2 && !memcmp(d_name.name, "..", 2)) {
prt_printf(err, "invalid name");
return -BCH_ERR_invalid_bkey;
}
if (memchr(d_name.name, '/', d_name.len)) {
prt_printf(err, "invalid name");
return -BCH_ERR_invalid_bkey;
}
if (d.v->d_type != DT_SUBVOL &&
le64_to_cpu(d.v->d_inum) == d.k->p.inode) {
prt_printf(err, "dirent points to own directory");
return -BCH_ERR_invalid_bkey;
}
return 0;
bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX, c, err,
dirent_name_too_long,
"dirent name too big (%u > %u)",
d_name.len, BCH_NAME_MAX);
bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err,
dirent_name_embedded_nul,
"dirent has stray data after name's NUL");
bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) ||
(d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err,
dirent_name_dot_or_dotdot,
"invalid name");
bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err,
dirent_name_has_slash,
"name with /");
bkey_fsck_err_on(d.v->d_type != DT_SUBVOL &&
le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err,
dirent_to_itself,
"dirent points to own directory");
fsck_err:
return ret;
}
void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
......
......@@ -7,7 +7,7 @@
enum bkey_invalid_flags;
extern const struct bch_hash_desc bch2_dirent_hash_desc;
int bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c,
int bch2_dirent_invalid(struct bch_fs *, struct bkey_s_c,
enum bkey_invalid_flags, struct printbuf *);
void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
......
......@@ -175,6 +175,7 @@ int bch2_sb_disk_groups_to_cpu(struct bch_fs *c)
dst->deleted = BCH_GROUP_DELETED(src);
dst->parent = BCH_GROUP_PARENT(src);
memcpy(dst->label, src->label, sizeof(dst->label));
}
for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
......@@ -382,7 +383,57 @@ int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name)
return v;
}
void bch2_disk_path_to_text(struct printbuf *out, struct bch_sb *sb, unsigned v)
void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
{
struct bch_disk_groups_cpu *groups;
struct bch_disk_group_cpu *g;
unsigned nr = 0;
u16 path[32];
out->atomic++;
rcu_read_lock();
groups = rcu_dereference(c->disk_groups);
if (!groups)
goto invalid;
while (1) {
if (nr == ARRAY_SIZE(path))
goto invalid;
if (v >= groups->nr)
goto invalid;
g = groups->entries + v;
if (g->deleted)
goto invalid;
path[nr++] = v;
if (!g->parent)
break;
v = g->parent - 1;
}
while (nr) {
v = path[--nr];
g = groups->entries + v;
prt_printf(out, "%.*s", (int) sizeof(g->label), g->label);
if (nr)
prt_printf(out, ".");
}
out:
rcu_read_unlock();
out->atomic--;
return;
invalid:
prt_printf(out, "invalid label %u", v);
goto out;
}
void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
{
struct bch_sb_field_disk_groups *groups =
bch2_sb_field_get(sb, disk_groups);
......@@ -493,10 +544,7 @@ int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res,
return -EINVAL;
}
void bch2_opt_target_to_text(struct printbuf *out,
struct bch_fs *c,
struct bch_sb *sb,
u64 v)
void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
{
struct target t = target_decode(v);
......@@ -504,47 +552,69 @@ void bch2_opt_target_to_text(struct printbuf *out,
case TARGET_NULL:
prt_printf(out, "none");
break;
case TARGET_DEV:
if (c) {
struct bch_dev *ca;
rcu_read_lock();
ca = t.dev < c->sb.nr_devices
? rcu_dereference(c->devs[t.dev])
: NULL;
if (ca && percpu_ref_tryget(&ca->io_ref)) {
prt_printf(out, "/dev/%pg", ca->disk_sb.bdev);
percpu_ref_put(&ca->io_ref);
} else if (ca) {
prt_printf(out, "offline device %u", t.dev);
} else {
prt_printf(out, "invalid device %u", t.dev);
}
rcu_read_unlock();
case TARGET_DEV: {
struct bch_dev *ca;
rcu_read_lock();
ca = t.dev < c->sb.nr_devices
? rcu_dereference(c->devs[t.dev])
: NULL;
if (ca && percpu_ref_tryget(&ca->io_ref)) {
prt_printf(out, "/dev/%pg", ca->disk_sb.bdev);
percpu_ref_put(&ca->io_ref);
} else if (ca) {
prt_printf(out, "offline device %u", t.dev);
} else {
struct bch_member m = bch2_sb_member_get(sb, t.dev);
if (bch2_dev_exists(sb, t.dev)) {
prt_printf(out, "Device ");
pr_uuid(out, m.uuid.b);
prt_printf(out, " (%u)", t.dev);
} else {
prt_printf(out, "Bad device %u", t.dev);
}
prt_printf(out, "invalid device %u", t.dev);
}
rcu_read_unlock();
break;
}
case TARGET_GROUP:
if (c) {
mutex_lock(&c->sb_lock);
bch2_disk_path_to_text(out, c->disk_sb.sb, t.group);
mutex_unlock(&c->sb_lock);
bch2_disk_path_to_text(out, c, t.group);
break;
default:
BUG();
}
}
void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
{
struct target t = target_decode(v);
switch (t.type) {
case TARGET_NULL:
prt_printf(out, "none");
break;
case TARGET_DEV: {
struct bch_member m = bch2_sb_member_get(sb, t.dev);
if (bch2_dev_exists(sb, t.dev)) {
prt_printf(out, "Device ");
pr_uuid(out, m.uuid.b);
prt_printf(out, " (%u)", t.dev);
} else {
bch2_disk_path_to_text(out, sb, t.group);
prt_printf(out, "Bad device %u", t.dev);
}
break;
}
case TARGET_GROUP:
bch2_disk_path_to_text_sb(out, sb, t.group);
break;
default:
BUG();
}
}
void bch2_opt_target_to_text(struct printbuf *out,
struct bch_fs *c,
struct bch_sb *sb,
u64 v)
{
if (c)
bch2_target_to_text(out, c, v);
else
bch2_target_to_text_sb(out, sb, v);
}
......@@ -2,6 +2,8 @@
#ifndef _BCACHEFS_DISK_GROUPS_H
#define _BCACHEFS_DISK_GROUPS_H
#include "disk_groups_types.h"
extern const struct bch_sb_field_ops bch_sb_field_ops_disk_groups;
static inline unsigned disk_groups_nr(struct bch_sb_field_disk_groups *groups)
......@@ -83,7 +85,10 @@ int bch2_disk_path_find(struct bch_sb_handle *, const char *);
/* Exported for userspace bcachefs-tools: */
int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *);
void bch2_disk_path_to_text(struct printbuf *, struct bch_sb *, unsigned);
void bch2_disk_path_to_text(struct printbuf *, struct bch_fs *, unsigned);
void bch2_disk_path_to_text_sb(struct printbuf *, struct bch_sb *, unsigned);
void bch2_target_to_text(struct printbuf *out, struct bch_fs *, unsigned);
int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *, struct printbuf *);
void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_DISK_GROUPS_TYPES_H
#define _BCACHEFS_DISK_GROUPS_TYPES_H
struct bch_disk_group_cpu {
bool deleted;
u16 parent;
u8 label[BCH_SB_LABEL_SIZE];
struct bch_devs_mask devs;
};
struct bch_disk_groups_cpu {
struct rcu_head rcu;
unsigned nr;
struct bch_disk_group_cpu entries[] __counted_by(nr);
};
#endif /* _BCACHEFS_DISK_GROUPS_TYPES_H */
......@@ -105,29 +105,26 @@ struct ec_bio {
/* Stripes btree keys: */
int bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k,
int bch2_stripe_invalid(struct bch_fs *c, struct bkey_s_c k,
enum bkey_invalid_flags flags,
struct printbuf *err)
{
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
int ret = 0;
if (bkey_eq(k.k->p, POS_MIN)) {
prt_printf(err, "stripe at POS_MIN");
return -BCH_ERR_invalid_bkey;
}
if (k.k->p.inode) {
prt_printf(err, "nonzero inode field");
return -BCH_ERR_invalid_bkey;
}
bkey_fsck_err_on(bkey_eq(k.k->p, POS_MIN) ||
bpos_gt(k.k->p, POS(0, U32_MAX)), c, err,
stripe_pos_bad,
"stripe at bad pos");
if (bkey_val_u64s(k.k) < stripe_val_u64s(s)) {
prt_printf(err, "incorrect value size (%zu < %u)",
bkey_val_u64s(k.k), stripe_val_u64s(s));
return -BCH_ERR_invalid_bkey;
}
bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), c, err,
stripe_val_size_bad,
"incorrect value size (%zu < %u)",
bkey_val_u64s(k.k), stripe_val_u64s(s));
return bch2_bkey_ptrs_invalid(c, k, flags, err);
ret = bch2_bkey_ptrs_invalid(c, k, flags, err);
fsck_err:
return ret;
}
void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
......@@ -153,6 +150,7 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset);
if (i < nr_data)
prt_printf(out, "#%u", stripe_blockcount_get(s, i));
prt_printf(out, " gen %u", ptr->gen);
if (ptr_stale(ca, ptr))
prt_printf(out, " stale");
}
......@@ -306,16 +304,21 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
struct bch_csum got = ec_block_checksum(buf, i, offset);
if (bch2_crc_cmp(want, got)) {
struct printbuf buf2 = PRINTBUF;
struct printbuf err = PRINTBUF;
struct bch_dev *ca = bch_dev_bkey_exists(c, v->ptrs[i].dev);
prt_printf(&err, "stripe checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)\n",
want.hi, want.lo,
got.hi, got.lo,
bch2_csum_types[v->csum_type]);
prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i);
bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key));
bch_err_ratelimited(ca, "%s", err.buf);
printbuf_exit(&err);
bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&buf->key));
bch_err_ratelimited(c,
"stripe checksum error for %ps at %u:%u: csum type %u, expected %llx got %llx\n%s",
(void *) _RET_IP_, i, j, v->csum_type,
want.lo, got.lo, buf2.buf);
printbuf_exit(&buf2);
clear_bit(i, buf->valid);
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
break;
}
......@@ -373,7 +376,11 @@ static void ec_block_endio(struct bio *bio)
struct bch_dev *ca = ec_bio->ca;
struct closure *cl = bio->bi_private;
if (bch2_dev_io_err_on(bio->bi_status, ca, "erasure coding %s error: %s",
if (bch2_dev_io_err_on(bio->bi_status, ca,
bio_data_dir(bio)
? BCH_MEMBER_ERROR_write
: BCH_MEMBER_ERROR_read,
"erasure coding %s error: %s",
bio_data_dir(bio) ? "write" : "read",
bch2_blk_status_to_str(bio->bi_status)))
clear_bit(ec_bio->idx, ec_bio->buf->valid);
......@@ -474,14 +481,10 @@ static int get_stripe_key_trans(struct btree_trans *trans, u64 idx,
return ret;
}
static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe)
{
return bch2_trans_run(c, get_stripe_key_trans(trans, idx, stripe));
}
/* recovery read path: */
int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio)
{
struct bch_fs *c = trans->c;
struct ec_stripe_buf *buf;
struct closure cl;
struct bch_stripe *v;
......@@ -496,7 +499,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
if (!buf)
return -BCH_ERR_ENOMEM_ec_read_extent;
ret = get_stripe_key(c, rbio->pick.ec.idx, buf);
ret = lockrestart_do(trans, get_stripe_key_trans(trans, rbio->pick.ec.idx, buf));
if (ret) {
bch_err_ratelimited(c,
"error doing reconstruct read: error %i looking up stripe", ret);
......
......@@ -8,7 +8,7 @@
enum bkey_invalid_flags;
int bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c,
int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c,
enum bkey_invalid_flags, struct printbuf *);
void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
......@@ -199,7 +199,7 @@ struct ec_stripe_head {
struct ec_stripe_new *s;
};
int bch2_ec_read_extent(struct bch_fs *, struct bch_read_bio *);
int bch2_ec_read_extent(struct btree_trans *, struct bch_read_bio *);
void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *);
......
......@@ -3,6 +3,8 @@
#define _BCACHEFS_ERRCODE_H
#define BCH_ERRCODES() \
x(ERANGE, ERANGE_option_too_small) \
x(ERANGE, ERANGE_option_too_big) \
x(ENOMEM, ENOMEM_stripe_buf) \
x(ENOMEM, ENOMEM_replicas_table) \
x(ENOMEM, ENOMEM_cpu_replicas) \
......@@ -213,6 +215,8 @@
x(BCH_ERR_invalid_sb, invalid_sb_crypt) \
x(BCH_ERR_invalid_sb, invalid_sb_clean) \
x(BCH_ERR_invalid_sb, invalid_sb_quota) \
x(BCH_ERR_invalid_sb, invalid_sb_errors) \
x(BCH_ERR_invalid_sb, invalid_sb_opt_compression) \
x(BCH_ERR_invalid, invalid_bkey) \
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
x(EIO, btree_node_read_err) \
......
......@@ -56,8 +56,9 @@ void bch2_io_error_work(struct work_struct *work)
up_write(&c->state_lock);
}
void bch2_io_error(struct bch_dev *ca)
void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type)
{
atomic64_inc(&ca->errors[type]);
//queue_work(system_long_wq, &ca->io_error_work);
}
......@@ -116,31 +117,34 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
if (test_bit(BCH_FS_FSCK_DONE, &c->flags))
return NULL;
list_for_each_entry(s, &c->fsck_errors, list)
list_for_each_entry(s, &c->fsck_error_msgs, list)
if (s->fmt == fmt) {
/*
* move it to the head of the list: repeated fsck errors
* are common
*/
list_move(&s->list, &c->fsck_errors);
list_move(&s->list, &c->fsck_error_msgs);
return s;
}
s = kzalloc(sizeof(*s), GFP_NOFS);
if (!s) {
if (!c->fsck_alloc_err)
if (!c->fsck_alloc_msgs_err)
bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
c->fsck_alloc_err = true;
c->fsck_alloc_msgs_err = true;
return NULL;
}
INIT_LIST_HEAD(&s->list);
s->fmt = fmt;
list_add(&s->list, &c->fsck_errors);
list_add(&s->list, &c->fsck_error_msgs);
return s;
}
int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
int bch2_fsck_err(struct bch_fs *c,
enum bch_fsck_flags flags,
enum bch_sb_error_id err,
const char *fmt, ...)
{
struct fsck_err_state *s = NULL;
va_list args;
......@@ -148,11 +152,13 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
struct printbuf buf = PRINTBUF, *out = &buf;
int ret = -BCH_ERR_fsck_ignore;
bch2_sb_error_count(c, err);
va_start(args, fmt);
prt_vprintf(out, fmt, args);
va_end(args);
mutex_lock(&c->fsck_error_lock);
mutex_lock(&c->fsck_error_msgs_lock);
s = fsck_err_get(c, fmt);
if (s) {
/*
......@@ -162,7 +168,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
*/
if (s->last_msg && !strcmp(buf.buf, s->last_msg)) {
ret = s->ret;
mutex_unlock(&c->fsck_error_lock);
mutex_unlock(&c->fsck_error_msgs_lock);
printbuf_exit(&buf);
return ret;
}
......@@ -257,7 +263,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
if (s)
s->ret = ret;
mutex_unlock(&c->fsck_error_lock);
mutex_unlock(&c->fsck_error_msgs_lock);
printbuf_exit(&buf);
......@@ -278,9 +284,9 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
{
struct fsck_err_state *s, *n;
mutex_lock(&c->fsck_error_lock);
mutex_lock(&c->fsck_error_msgs_lock);
list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) {
if (s->ratelimited && s->last_msg)
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg);
......@@ -289,5 +295,5 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
kfree(s);
}
mutex_unlock(&c->fsck_error_lock);
mutex_unlock(&c->fsck_error_msgs_lock);
}
......@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <linux/printk.h>
#include "sb-errors.h"
struct bch_dev;
struct bch_fs;
......@@ -101,18 +102,26 @@ struct fsck_err_state {
char *last_msg;
};
#define FSCK_CAN_FIX (1 << 0)
#define FSCK_CAN_IGNORE (1 << 1)
#define FSCK_NEED_FSCK (1 << 2)
#define FSCK_NO_RATELIMIT (1 << 3)
enum bch_fsck_flags {
FSCK_CAN_FIX = 1 << 0,
FSCK_CAN_IGNORE = 1 << 1,
FSCK_NEED_FSCK = 1 << 2,
FSCK_NO_RATELIMIT = 1 << 3,
};
#define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err)
__printf(3, 4) __cold
int bch2_fsck_err(struct bch_fs *, unsigned, const char *, ...);
__printf(4, 5) __cold
int bch2_fsck_err(struct bch_fs *,
enum bch_fsck_flags,
enum bch_sb_error_id,
const char *, ...);
void bch2_flush_fsck_errs(struct bch_fs *);
#define __fsck_err(c, _flags, msg, ...) \
#define __fsck_err(c, _flags, _err_type, ...) \
({ \
int _ret = bch2_fsck_err(c, _flags, msg, ##__VA_ARGS__); \
int _ret = bch2_fsck_err(c, _flags, BCH_FSCK_ERR_##_err_type, \
__VA_ARGS__); \
\
if (_ret != -BCH_ERR_fsck_fix && \
_ret != -BCH_ERR_fsck_ignore) { \
......@@ -127,26 +136,53 @@ void bch2_flush_fsck_errs(struct bch_fs *);
/* XXX: mark in superblock that filesystem contains errors, if we ignore: */
#define __fsck_err_on(cond, c, _flags, ...) \
(unlikely(cond) ? __fsck_err(c, _flags, ##__VA_ARGS__) : false)
#define __fsck_err_on(cond, c, _flags, _err_type, ...) \
(unlikely(cond) ? __fsck_err(c, _flags, _err_type, __VA_ARGS__) : false)
#define need_fsck_err_on(cond, c, _err_type, ...) \
__fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__)
#define need_fsck_err(c, _err_type, ...) \
__fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__)
#define mustfix_fsck_err(c, _err_type, ...) \
__fsck_err(c, FSCK_CAN_FIX, _err_type, __VA_ARGS__)
#define mustfix_fsck_err_on(cond, c, _err_type, ...) \
__fsck_err_on(cond, c, FSCK_CAN_FIX, _err_type, __VA_ARGS__)
#define need_fsck_err_on(cond, c, ...) \
__fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
#define fsck_err(c, _err_type, ...) \
__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__)
#define need_fsck_err(c, ...) \
__fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
#define fsck_err_on(cond, c, _err_type, ...) \
__fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__)
#define mustfix_fsck_err(c, ...) \
__fsck_err(c, FSCK_CAN_FIX, ##__VA_ARGS__)
static inline void bch2_bkey_fsck_err(struct bch_fs *c,
struct printbuf *err_msg,
enum bch_sb_error_id err_type,
const char *fmt, ...)
{
va_list args;
#define mustfix_fsck_err_on(cond, c, ...) \
__fsck_err_on(cond, c, FSCK_CAN_FIX, ##__VA_ARGS__)
va_start(args, fmt);
prt_vprintf(err_msg, fmt, args);
va_end(args);
#define fsck_err(c, ...) \
__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)
}
#define fsck_err_on(cond, c, ...) \
__fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)
#define bkey_fsck_err(c, _err_msg, _err_type, ...) \
do { \
prt_printf(_err_msg, __VA_ARGS__); \
bch2_sb_error_count(c, BCH_FSCK_ERR_##_err_type); \
ret = -BCH_ERR_invalid_bkey; \
goto fsck_err; \
} while (0)
#define bkey_fsck_err_on(cond, ...) \
do { \
if (unlikely(cond)) \
bkey_fsck_err(__VA_ARGS__); \
} while (0)
/*
* Fatal errors: these don't indicate a bug, but we can't continue running in RW
......@@ -179,26 +215,26 @@ do { \
void bch2_io_error_work(struct work_struct *);
/* Does the error handling without logging a message */
void bch2_io_error(struct bch_dev *);
void bch2_io_error(struct bch_dev *, enum bch_member_error_type);
#define bch2_dev_io_err_on(cond, ca, ...) \
#define bch2_dev_io_err_on(cond, ca, _type, ...) \
({ \
bool _ret = (cond); \
\
if (_ret) { \
bch_err_dev_ratelimited(ca, __VA_ARGS__); \
bch2_io_error(ca); \
bch2_io_error(ca, _type); \
} \
_ret; \
})
#define bch2_dev_inum_io_err_on(cond, ca, ...) \
#define bch2_dev_inum_io_err_on(cond, ca, _type, ...) \
({ \
bool _ret = (cond); \
\
if (_ret) { \
bch_err_inum_offset_ratelimited(ca, __VA_ARGS__); \
bch2_io_error(ca); \
bch2_io_error(ca, _type); \
} \
_ret; \
})
......
This diff is collapsed.
......@@ -89,6 +89,18 @@ static inline void __extent_entry_insert(struct bkey_i *k,
memcpy_u64s_small(dst, new, extent_entry_u64s(new));
}
static inline void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry)
{
union bch_extent_entry *next = extent_entry_next(entry);
/* stripes have ptrs, but their layout doesn't work with this code */
BUG_ON(k.k->type == KEY_TYPE_stripe);
memmove_u64s_down(entry, next,
(u64 *) bkey_val_end(k) - (u64 *) next);
k.k->u64s -= (u64 *) next - (u64 *) entry;
}
static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
{
return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
......@@ -190,6 +202,11 @@ static inline bool crc_is_compressed(struct bch_extent_crc_unpacked crc)
crc.compression_type != BCH_COMPRESSION_TYPE_incompressible);
}
static inline bool crc_is_encoded(struct bch_extent_crc_unpacked crc)
{
return crc.csum_type != BCH_CSUM_none || crc_is_compressed(crc);
}
/* bkey_ptrs: generically over any key type that has ptrs */
struct bkey_ptrs_c {
......@@ -383,12 +400,12 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
/* KEY_TYPE_btree_ptr: */
int bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c,
int bch2_btree_ptr_invalid(struct bch_fs *, struct bkey_s_c,
enum bkey_invalid_flags, struct printbuf *);
void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
int bch2_btree_ptr_v2_invalid(const struct bch_fs *, struct bkey_s_c,
int bch2_btree_ptr_v2_invalid(struct bch_fs *, struct bkey_s_c,
enum bkey_invalid_flags, struct printbuf *);
void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
......@@ -428,7 +445,7 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
/* KEY_TYPE_reservation: */
int bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c,
int bch2_reservation_invalid(struct bch_fs *, struct bkey_s_c,
enum bkey_invalid_flags, struct printbuf *);
void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
......@@ -688,11 +705,19 @@ void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
int bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c,
int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c,
enum bkey_invalid_flags, struct printbuf *);
void bch2_ptr_swab(struct bkey_s);
const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c);
unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c,
unsigned, unsigned);
bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c);
int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *,
unsigned, unsigned);
/* Generic extent code: */
enum bch_extent_overlap {
......@@ -737,22 +762,4 @@ static inline void bch2_key_resize(struct bkey *k, unsigned new_size)
k->size = new_size;
}
/*
* In extent_sort_fix_overlapping(), insert_fixup_extent(),
* extent_merge_inline() - we're modifying keys in place that are packed. To do
* that we have to unpack the key, modify the unpacked key - then this
* copies/repacks the unpacked to the original as necessary.
*/
static inline void extent_save(struct btree *b, struct bkey_packed *dst,
struct bkey *src)
{
struct bkey_format *f = &b->format;
struct bkey_i *dst_unpacked;
if ((dst_unpacked = packed_to_bkey(dst)))
dst_unpacked->k = *src;
else
BUG_ON(!bch2_bkey_pack_key(dst, src, f));
}
#endif /* _BCACHEFS_EXTENTS_H */
......@@ -51,7 +51,7 @@ int bch2_create_trans(struct btree_trans *trans,
bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
if (flags & BCH_CREATE_TMPFILE)
new_inode->bi_flags |= BCH_INODE_UNLINKED;
new_inode->bi_flags |= BCH_INODE_unlinked;
ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu);
if (ret)
......
......@@ -389,6 +389,21 @@ static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs
return ret;
}
/*
* Determine when a writepage io is full. We have to limit writepage bios to a
* single page per bvec (i.e. 1MB with 4k pages) because that is the limit to
* what the bounce path in bch2_write_extent() can handle. In theory we could
* loosen this restriction for non-bounce I/O, but we don't have that context
* here. Ideally, we can up this limit and make it configurable in the future
* when the bounce path can be enhanced to accommodate larger source bios.
*/
static inline bool bch_io_full(struct bch_writepage_io *io, unsigned len)
{
struct bio *bio = &io->op.wbio.bio;
return bio_full(bio, len) ||
(bio->bi_iter.bi_size + len > BIO_MAX_VECS * PAGE_SIZE);
}
static void bch2_writepage_io_done(struct bch_write_op *op)
{
struct bch_writepage_io *io =
......@@ -606,9 +621,7 @@ static int __bch2_writepage(struct folio *folio,
if (w->io &&
(w->io->op.res.nr_replicas != nr_replicas_this_write ||
bio_full(&w->io->op.wbio.bio, sectors << 9) ||
w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >=
(BIO_MAX_VECS * PAGE_SIZE) ||
bch_io_full(w->io, sectors << 9) ||
bio_end_sector(&w->io->op.wbio.bio) != sector))
bch2_writepage_do_io(w);
......
......@@ -113,6 +113,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
} else {
atomic_set(&dio->cl.remaining,
CLOSURE_REMAINING_INITIALIZER + 1);
dio->cl.closure_get_happened = true;
}
dio->req = req;
......
......@@ -45,13 +45,13 @@ static int bch2_inode_flags_set(struct btree_trans *trans,
unsigned newflags = s->flags;
unsigned oldflags = bi->bi_flags & s->mask;
if (((newflags ^ oldflags) & (BCH_INODE_APPEND|BCH_INODE_IMMUTABLE)) &&
if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) &&
!capable(CAP_LINUX_IMMUTABLE))
return -EPERM;
if (!S_ISREG(bi->bi_mode) &&
!S_ISDIR(bi->bi_mode) &&
(newflags & (BCH_INODE_NODUMP|BCH_INODE_NOATIME)) != newflags)
(newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags)
return -EINVAL;
if (s->set_projinherit) {
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -4,6 +4,7 @@
int bch2_check_inodes(struct bch_fs *);
int bch2_check_extents(struct bch_fs *);
int bch2_check_indirect_extents(struct bch_fs *);
int bch2_check_dirents(struct bch_fs *);
int bch2_check_xattrs(struct bch_fs *);
int bch2_check_root(struct bch_fs *);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -3,7 +3,7 @@
#define _BCACHEFS_IO_MISC_H
int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *,
unsigned, struct bch_io_opts, s64 *,
u64, struct bch_io_opts, s64 *,
struct write_point_specifier);
int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
subvol_inum, u64, s64 *);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -534,6 +534,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
unsigned nr);
int bch2_dev_journal_alloc(struct bch_dev *);
int bch2_fs_journal_alloc(struct bch_fs *);
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment