Commit 999a36b5 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'bcachefs-2024-01-10' of https://evilpiepirate.org/git/bcachefs

Pull bcachefs updates from Kent Overstreet:

 - btree write buffer rewrite: instead of adding keys to the btree write
   buffer at transaction commit time, we now journal them with a
   different journal entry type and copy them from the journal to the
   write buffer just prior to journal write.

   This reduces the number of atomic operations on shared cachelines in
   the transaction commit path and is a signicant performance
   improvement on some workloads: multithreaded 4k random writes went
   from ~650k iops to ~850k iops.

 - Bring back optimistic spinning for six locks: the new implementation
   doesn't use osq locks; instead we add to the lock waitlist as normal,
   and then spin on the lock_acquired bit in the waitlist entry, _not_
   the lock itself.

 - New ioctls:

    - BCH_IOCTL_DEV_USAGE_V2, which allows for new data types

    - BCH_IOCTL_OFFLINE_FSCK, which runs the kernel implementation of
      fsck but without mounting: useful for transparently using the
      kernel version of fsck from 'bcachefs fsck' when the kernel
      version is a better match for the on disk filesystem.

    - BCH_IOCTL_ONLINE_FSCK: online fsck. Not all passes are supported
      yet, but the passes that are supported are fully featured - errors
      may be corrected as normal.

   The new ioctls use the new 'thread_with_file' abstraction for kicking
   off a kthread that's tied to a file descriptor returned to userspace
   via the ioctl.

 - btree_paths within a btree_trans are now dynamically growable,
   instead of being limited to 64. This is important for the
   check_directory_structure phase of fsck, and also fixes some issues
   we were having with btree path overflow in the reflink btree.

 - Trigger refactoring; prep work for the upcoming disk space accounting
   rewrite

 - Numerous bugfixes :)

* tag 'bcachefs-2024-01-10' of https://evilpiepirate.org/git/bcachefs: (226 commits)
  bcachefs: eytzinger0_find() search should be const
  bcachefs: move "ptrs not changing" optimization to bch2_trigger_extent()
  bcachefs: fix simulateously upgrading & downgrading
  bcachefs: Restart recovery passes more reliably
  bcachefs: bch2_dump_bset() doesn't choke on u64s == 0
  bcachefs: improve checksum error messages
  bcachefs: improve validate_bset_keys()
  bcachefs: print sb magic when relevant
  bcachefs: __bch2_sb_field_to_text()
  bcachefs: %pg is banished
  bcachefs: Improve would_deadlock trace event
  bcachefs: fsck_err()s don't need to manually check c->sb.version anymore
  bcachefs: Upgrades now specify errors to fix, like downgrades
  bcachefs: no thread_with_file in userspace
  bcachefs: Don't autofix errors we can't fix
  bcachefs: add missing bch2_latency_acct() call
  bcachefs: increase max_active on io_complete_wq
  bcachefs: add time_stats for btree_node_read_done()
  bcachefs: don't clear accessed bit in btree node fill
  bcachefs: Add an option to control btree node prefetching
  ...
parents 84e9a2d5 169de419
...@@ -3502,7 +3502,7 @@ F: drivers/net/hamradio/baycom* ...@@ -3502,7 +3502,7 @@ F: drivers/net/hamradio/baycom*
BCACHE (BLOCK LAYER CACHE) BCACHE (BLOCK LAYER CACHE)
M: Coly Li <colyli@suse.de> M: Coly Li <colyli@suse.de>
M: Kent Overstreet <kent.overstreet@gmail.com> M: Kent Overstreet <kent.overstreet@linux.dev>
L: linux-bcache@vger.kernel.org L: linux-bcache@vger.kernel.org
S: Maintained S: Maintained
W: http://bcache.evilpiepirate.org W: http://bcache.evilpiepirate.org
......
...@@ -23,6 +23,8 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features); ...@@ -23,6 +23,8 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features);
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
DEFINE_STATIC_KEY_FALSE(kvm_guest); DEFINE_STATIC_KEY_FALSE(kvm_guest);
EXPORT_SYMBOL_GPL(kvm_guest);
int __init check_kvm_guest(void) int __init check_kvm_guest(void)
{ {
struct device_node *hyper_node; struct device_node *hyper_node;
......
...@@ -50,14 +50,6 @@ config BCACHEFS_POSIX_ACL ...@@ -50,14 +50,6 @@ config BCACHEFS_POSIX_ACL
depends on BCACHEFS_FS depends on BCACHEFS_FS
select FS_POSIX_ACL select FS_POSIX_ACL
config BCACHEFS_DEBUG_TRANSACTIONS
bool "bcachefs runtime info"
depends on BCACHEFS_FS
help
This makes the list of running btree transactions available in debugfs.
This is a highly useful debugging feature but does add a small amount of overhead.
config BCACHEFS_DEBUG config BCACHEFS_DEBUG
bool "bcachefs debugging" bool "bcachefs debugging"
depends on BCACHEFS_FS depends on BCACHEFS_FS
...@@ -85,6 +77,16 @@ config BCACHEFS_NO_LATENCY_ACCT ...@@ -85,6 +77,16 @@ config BCACHEFS_NO_LATENCY_ACCT
help help
This disables device latency tracking and time stats, only for performance testing This disables device latency tracking and time stats, only for performance testing
config BCACHEFS_SIX_OPTIMISTIC_SPIN
bool "Optimistic spinning for six locks"
depends on BCACHEFS_FS
depends on SMP
default y
help
Instead of immediately sleeping when attempting to take a six lock that
is held by another thread, spin for a short while, as long as the
thread owning the lock is running.
config MEAN_AND_VARIANCE_UNIT_TEST config MEAN_AND_VARIANCE_UNIT_TEST
tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS
depends on KUNIT depends on KUNIT
......
...@@ -82,6 +82,7 @@ bcachefs-y := \ ...@@ -82,6 +82,7 @@ bcachefs-y := \
super-io.o \ super-io.o \
sysfs.o \ sysfs.o \
tests.o \ tests.o \
thread_with_file.o \
trace.o \ trace.o \
two_state_shared_lock.o \ two_state_shared_lock.o \
util.o \ util.o \
......
This diff is collapsed.
...@@ -71,6 +71,24 @@ static inline enum bch_data_type bucket_data_type(enum bch_data_type data_type) ...@@ -71,6 +71,24 @@ static inline enum bch_data_type bucket_data_type(enum bch_data_type data_type)
return data_type == BCH_DATA_stripe ? BCH_DATA_user : data_type; return data_type == BCH_DATA_stripe ? BCH_DATA_user : data_type;
} }
static inline unsigned bch2_bucket_sectors(struct bch_alloc_v4 a)
{
return a.dirty_sectors + a.cached_sectors;
}
static inline unsigned bch2_bucket_sectors_dirty(struct bch_alloc_v4 a)
{
return a.dirty_sectors;
}
static inline unsigned bch2_bucket_sectors_fragmented(struct bch_dev *ca,
struct bch_alloc_v4 a)
{
int d = bch2_bucket_sectors_dirty(a);
return d ? max(0, ca->mi.bucket_size - d) : 0;
}
static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a) static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a)
{ {
return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0; return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0;
...@@ -90,10 +108,11 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a, ...@@ -90,10 +108,11 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
struct bch_dev *ca) struct bch_dev *ca)
{ {
if (!data_type_movable(a.data_type) || if (!data_type_movable(a.data_type) ||
a.dirty_sectors >= ca->mi.bucket_size) !bch2_bucket_sectors_fragmented(ca, a))
return 0; return 0;
return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size); u64 d = bch2_bucket_sectors_dirty(a);
return div_u64(d * (1ULL << 31), ca->mi.bucket_size);
} }
static inline u64 alloc_freespace_genbits(struct bch_alloc_v4 a) static inline u64 alloc_freespace_genbits(struct bch_alloc_v4 a)
...@@ -163,24 +182,21 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); ...@@ -163,24 +182,21 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_alloc ((struct bkey_ops) { \ #define bch2_bkey_ops_alloc ((struct bkey_ops) { \
.key_invalid = bch2_alloc_v1_invalid, \ .key_invalid = bch2_alloc_v1_invalid, \
.val_to_text = bch2_alloc_to_text, \ .val_to_text = bch2_alloc_to_text, \
.trans_trigger = bch2_trans_mark_alloc, \ .trigger = bch2_trigger_alloc, \
.atomic_trigger = bch2_mark_alloc, \
.min_val_size = 8, \ .min_val_size = 8, \
}) })
#define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) { \ #define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) { \
.key_invalid = bch2_alloc_v2_invalid, \ .key_invalid = bch2_alloc_v2_invalid, \
.val_to_text = bch2_alloc_to_text, \ .val_to_text = bch2_alloc_to_text, \
.trans_trigger = bch2_trans_mark_alloc, \ .trigger = bch2_trigger_alloc, \
.atomic_trigger = bch2_mark_alloc, \
.min_val_size = 8, \ .min_val_size = 8, \
}) })
#define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) { \ #define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) { \
.key_invalid = bch2_alloc_v3_invalid, \ .key_invalid = bch2_alloc_v3_invalid, \
.val_to_text = bch2_alloc_to_text, \ .val_to_text = bch2_alloc_to_text, \
.trans_trigger = bch2_trans_mark_alloc, \ .trigger = bch2_trigger_alloc, \
.atomic_trigger = bch2_mark_alloc, \
.min_val_size = 16, \ .min_val_size = 16, \
}) })
...@@ -188,8 +204,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); ...@@ -188,8 +204,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
.key_invalid = bch2_alloc_v4_invalid, \ .key_invalid = bch2_alloc_v4_invalid, \
.val_to_text = bch2_alloc_to_text, \ .val_to_text = bch2_alloc_to_text, \
.swab = bch2_alloc_v4_swab, \ .swab = bch2_alloc_v4_swab, \
.trans_trigger = bch2_trans_mark_alloc, \ .trigger = bch2_trigger_alloc, \
.atomic_trigger = bch2_mark_alloc, \
.min_val_size = 48, \ .min_val_size = 48, \
}) })
...@@ -213,8 +228,8 @@ static inline bool bkey_is_alloc(const struct bkey *k) ...@@ -213,8 +228,8 @@ static inline bool bkey_is_alloc(const struct bkey *k)
int bch2_alloc_read(struct bch_fs *); int bch2_alloc_read(struct bch_fs *);
int bch2_trans_mark_alloc(struct btree_trans *, enum btree_id, unsigned, int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned,
struct bkey_s_c, struct bkey_i *, unsigned); struct bkey_s_c, struct bkey_s, unsigned);
int bch2_check_alloc_info(struct bch_fs *); int bch2_check_alloc_info(struct bch_fs *);
int bch2_check_alloc_to_lru_refs(struct bch_fs *); int bch2_check_alloc_to_lru_refs(struct bch_fs *);
void bch2_do_discards(struct bch_fs *); void bch2_do_discards(struct bch_fs *);
......
...@@ -69,11 +69,8 @@ const char * const bch2_watermarks[] = { ...@@ -69,11 +69,8 @@ const char * const bch2_watermarks[] = {
void bch2_reset_alloc_cursors(struct bch_fs *c) void bch2_reset_alloc_cursors(struct bch_fs *c)
{ {
struct bch_dev *ca;
unsigned i;
rcu_read_lock(); rcu_read_lock();
for_each_member_device_rcu(ca, c, i, NULL) for_each_member_device_rcu(c, ca, NULL)
ca->alloc_cursor = 0; ca->alloc_cursor = 0;
rcu_read_unlock(); rcu_read_unlock();
} }
...@@ -239,9 +236,8 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * ...@@ -239,9 +236,8 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
if (cl) if (cl)
closure_wait(&c->open_buckets_wait, cl); closure_wait(&c->open_buckets_wait, cl);
if (!c->blocked_allocate_open_bucket) track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket],
c->blocked_allocate_open_bucket = local_clock(); &c->blocked_allocate_open_bucket, true);
spin_unlock(&c->freelist_lock); spin_unlock(&c->freelist_lock);
return ERR_PTR(-BCH_ERR_open_buckets_empty); return ERR_PTR(-BCH_ERR_open_buckets_empty);
} }
...@@ -267,19 +263,11 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * ...@@ -267,19 +263,11 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
ca->nr_open_buckets++; ca->nr_open_buckets++;
bch2_open_bucket_hash_add(c, ob); bch2_open_bucket_hash_add(c, ob);
if (c->blocked_allocate_open_bucket) { track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket],
bch2_time_stats_update( &c->blocked_allocate_open_bucket, false);
&c->times[BCH_TIME_blocked_allocate_open_bucket],
c->blocked_allocate_open_bucket);
c->blocked_allocate_open_bucket = 0;
}
if (c->blocked_allocate) { track_event_change(&c->times[BCH_TIME_blocked_allocate],
bch2_time_stats_update( &c->blocked_allocate, false);
&c->times[BCH_TIME_blocked_allocate],
c->blocked_allocate);
c->blocked_allocate = 0;
}
spin_unlock(&c->freelist_lock); spin_unlock(&c->freelist_lock);
return ob; return ob;
...@@ -377,9 +365,9 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc ...@@ -377,9 +365,9 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
ob = __try_alloc_bucket(c, ca, b, watermark, a, s, cl); ob = __try_alloc_bucket(c, ca, b, watermark, a, s, cl);
if (!ob) if (!ob)
iter.path->preserve = false; set_btree_iter_dontneed(&iter);
err: err:
if (iter.trans && iter.path) if (iter.path)
set_btree_iter_dontneed(&iter); set_btree_iter_dontneed(&iter);
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
printbuf_exit(&buf); printbuf_exit(&buf);
...@@ -447,7 +435,7 @@ bch2_bucket_alloc_early(struct btree_trans *trans, ...@@ -447,7 +435,7 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl); ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl);
next: next:
citer.path->preserve = false; set_btree_iter_dontneed(&citer);
bch2_trans_iter_exit(trans, &citer); bch2_trans_iter_exit(trans, &citer);
if (ob) if (ob)
break; break;
...@@ -502,7 +490,7 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, ...@@ -502,7 +490,7 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
ob = try_alloc_bucket(trans, ca, watermark, ob = try_alloc_bucket(trans, ca, watermark,
alloc_cursor, s, k, cl); alloc_cursor, s, k, cl);
if (ob) { if (ob) {
iter.path->preserve = false; set_btree_iter_dontneed(&iter);
break; break;
} }
} }
...@@ -567,8 +555,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, ...@@ -567,8 +555,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
goto again; goto again;
} }
if (!c->blocked_allocate) track_event_change(&c->times[BCH_TIME_blocked_allocate],
c->blocked_allocate = local_clock(); &c->blocked_allocate, true);
ob = ERR_PTR(-BCH_ERR_freelist_empty); ob = ERR_PTR(-BCH_ERR_freelist_empty);
goto err; goto err;
...@@ -697,11 +685,9 @@ static int add_new_bucket(struct bch_fs *c, ...@@ -697,11 +685,9 @@ static int add_new_bucket(struct bch_fs *c,
bch_dev_bkey_exists(c, ob->dev)->mi.durability; bch_dev_bkey_exists(c, ob->dev)->mi.durability;
BUG_ON(*nr_effective >= nr_replicas); BUG_ON(*nr_effective >= nr_replicas);
BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
__clear_bit(ob->dev, devs_may_alloc->d); __clear_bit(ob->dev, devs_may_alloc->d);
*nr_effective += (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS) *nr_effective += durability;
? durability : 1;
*have_cache |= !durability; *have_cache |= !durability;
ob_push(c, ptrs, ob); ob_push(c, ptrs, ob);
...@@ -972,8 +958,8 @@ static int __open_bucket_add_buckets(struct btree_trans *trans, ...@@ -972,8 +958,8 @@ static int __open_bucket_add_buckets(struct btree_trans *trans,
devs = target_rw_devs(c, wp->data_type, target); devs = target_rw_devs(c, wp->data_type, target);
/* Don't allocate from devices we already have pointers to: */ /* Don't allocate from devices we already have pointers to: */
for (i = 0; i < devs_have->nr; i++) darray_for_each(*devs_have, i)
__clear_bit(devs_have->devs[i], devs.d); __clear_bit(*i, devs.d);
open_bucket_for_each(c, ptrs, ob, i) open_bucket_for_each(c, ptrs, ob, i)
__clear_bit(ob->dev, devs.d); __clear_bit(ob->dev, devs.d);
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include "bbpos.h" #include "bbpos.h"
#include "alloc_background.h" #include "alloc_background.h"
#include "backpointers.h" #include "backpointers.h"
#include "bkey_buf.h"
#include "btree_cache.h" #include "btree_cache.h"
#include "btree_update.h" #include "btree_update.h"
#include "btree_update_interior.h" #include "btree_update_interior.h"
...@@ -136,15 +137,30 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, ...@@ -136,15 +137,30 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
} }
int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
struct bkey_i_backpointer *bp_k, struct bpos bucket,
struct bch_backpointer bp, struct bch_backpointer bp,
struct bkey_s_c orig_k, struct bkey_s_c orig_k,
bool insert) bool insert)
{ {
struct btree_iter bp_iter; struct btree_iter bp_iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_i_backpointer *bp_k;
int ret; int ret;
bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
ret = PTR_ERR_OR_ZERO(bp_k);
if (ret)
return ret;
bkey_backpointer_init(&bp_k->k_i);
bp_k->k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
bp_k->v = bp;
if (!insert) {
bp_k->k.type = KEY_TYPE_deleted;
set_bkey_val_u64s(&bp_k->k, 0);
}
k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
bp_k->k.p, bp_k->k.p,
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
...@@ -375,39 +391,32 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ ...@@ -375,39 +391,32 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_
/* verify that every backpointer has a corresponding alloc key */ /* verify that every backpointer has a corresponding alloc key */
int bch2_check_btree_backpointers(struct bch_fs *c) int bch2_check_btree_backpointers(struct bch_fs *c)
{ {
struct btree_iter iter; int ret = bch2_trans_run(c,
struct bkey_s_c k;
int ret;
ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, for_each_btree_key_commit(trans, iter,
BTREE_ID_backpointers, POS_MIN, 0, k, BTREE_ID_backpointers, POS_MIN, 0, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
bch2_check_btree_backpointer(trans, &iter, k))); bch2_check_btree_backpointer(trans, &iter, k)));
if (ret)
bch_err_fn(c, ret); bch_err_fn(c, ret);
return ret; return ret;
} }
struct bpos_level {
unsigned level;
struct bpos pos;
};
static int check_bp_exists(struct btree_trans *trans, static int check_bp_exists(struct btree_trans *trans,
struct bpos bucket, struct bpos bucket,
struct bch_backpointer bp, struct bch_backpointer bp,
struct bkey_s_c orig_k, struct bkey_s_c orig_k,
struct bpos bucket_start, struct bpos bucket_start,
struct bpos bucket_end, struct bpos bucket_end,
struct bpos_level *last_flushed) struct bkey_buf *last_flushed)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter bp_iter = { NULL }; struct btree_iter bp_iter = { NULL };
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
struct bkey_s_c bp_k; struct bkey_s_c bp_k;
struct bkey_buf tmp;
int ret; int ret;
bch2_bkey_buf_init(&tmp);
if (bpos_lt(bucket, bucket_start) || if (bpos_lt(bucket, bucket_start) ||
bpos_gt(bucket, bucket_end)) bpos_gt(bucket, bucket_end))
return 0; return 0;
...@@ -424,13 +433,22 @@ static int check_bp_exists(struct btree_trans *trans, ...@@ -424,13 +433,22 @@ static int check_bp_exists(struct btree_trans *trans,
if (bp_k.k->type != KEY_TYPE_backpointer || if (bp_k.k->type != KEY_TYPE_backpointer ||
memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
if (last_flushed->level != bp.level || if (!bpos_eq(orig_k.k->p, last_flushed->k->k.p) ||
!bpos_eq(last_flushed->pos, orig_k.k->p)) { bkey_bytes(orig_k.k) != bkey_bytes(&last_flushed->k->k) ||
last_flushed->level = bp.level; memcmp(orig_k.v, &last_flushed->k->v, bkey_val_bytes(orig_k.k))) {
last_flushed->pos = orig_k.k->p; bch2_bkey_buf_reassemble(&tmp, c, orig_k);
if (bp.level) {
bch2_trans_unlock(trans);
bch2_btree_interior_updates_flush(c);
}
ret = bch2_btree_write_buffer_flush_sync(trans) ?: ret = bch2_btree_write_buffer_flush_sync(trans);
-BCH_ERR_transaction_restart_write_buffer_flush; if (ret)
goto err;
bch2_bkey_buf_copy(last_flushed, c, tmp.k);
ret = -BCH_ERR_transaction_restart_write_buffer_flush;
goto out; goto out;
} }
goto missing; goto missing;
...@@ -439,6 +457,7 @@ static int check_bp_exists(struct btree_trans *trans, ...@@ -439,6 +457,7 @@ static int check_bp_exists(struct btree_trans *trans,
err: err:
fsck_err: fsck_err:
bch2_trans_iter_exit(trans, &bp_iter); bch2_trans_iter_exit(trans, &bp_iter);
bch2_bkey_buf_exit(&tmp, c);
printbuf_exit(&buf); printbuf_exit(&buf);
return ret; return ret;
missing: missing:
...@@ -448,8 +467,7 @@ static int check_bp_exists(struct btree_trans *trans, ...@@ -448,8 +467,7 @@ static int check_bp_exists(struct btree_trans *trans,
prt_printf(&buf, "\nbp pos "); prt_printf(&buf, "\nbp pos ");
bch2_bpos_to_text(&buf, bp_iter.pos); bch2_bpos_to_text(&buf, bp_iter.pos);
if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointers || if (c->opts.reconstruct_alloc ||
c->opts.reconstruct_alloc ||
fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf)) fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true); ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
...@@ -457,25 +475,18 @@ static int check_bp_exists(struct btree_trans *trans, ...@@ -457,25 +475,18 @@ static int check_bp_exists(struct btree_trans *trans,
} }
static int check_extent_to_backpointers(struct btree_trans *trans, static int check_extent_to_backpointers(struct btree_trans *trans,
struct btree_iter *iter, enum btree_id btree, unsigned level,
struct bpos bucket_start, struct bpos bucket_start,
struct bpos bucket_end, struct bpos bucket_end,
struct bpos_level *last_flushed) struct bkey_buf *last_flushed,
struct bkey_s_c k)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs; struct bkey_ptrs_c ptrs;
const union bch_extent_entry *entry; const union bch_extent_entry *entry;
struct extent_ptr_decoded p; struct extent_ptr_decoded p;
struct bkey_s_c k;
int ret; int ret;
k = bch2_btree_iter_peek_all_levels(iter);
ret = bkey_err(k);
if (ret)
return ret;
if (!k.k)
return 0;
ptrs = bch2_bkey_ptrs_c(k); ptrs = bch2_bkey_ptrs_c(k);
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
struct bpos bucket_pos; struct bpos bucket_pos;
...@@ -484,7 +495,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans, ...@@ -484,7 +495,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
if (p.ptr.cached) if (p.ptr.cached)
continue; continue;
bch2_extent_ptr_to_bp(c, iter->btree_id, iter->path->level, bch2_extent_ptr_to_bp(c, btree, level,
k, p, &bucket_pos, &bp); k, p, &bucket_pos, &bp);
ret = check_bp_exists(trans, bucket_pos, bp, k, ret = check_bp_exists(trans, bucket_pos, bp, k,
...@@ -501,44 +512,33 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans, ...@@ -501,44 +512,33 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans,
enum btree_id btree_id, enum btree_id btree_id,
struct bpos bucket_start, struct bpos bucket_start,
struct bpos bucket_end, struct bpos bucket_end,
struct bpos_level *last_flushed) struct bkey_buf *last_flushed,
int *level)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_root *r = bch2_btree_id_root(c, btree_id);
struct btree_iter iter; struct btree_iter iter;
struct btree *b; struct btree *b;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_ptrs_c ptrs;
struct extent_ptr_decoded p;
const union bch_extent_entry *entry;
int ret; int ret;
retry:
bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, r->level, 0); bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
b = bch2_btree_iter_peek_node(&iter); b = bch2_btree_iter_peek_node(&iter);
ret = PTR_ERR_OR_ZERO(b); ret = PTR_ERR_OR_ZERO(b);
if (ret) if (ret)
goto err; goto err;
BUG_ON(b != btree_node_root(c, b)); if (b != btree_node_root(c, b)) {
bch2_trans_iter_exit(trans, &iter);
k = bkey_i_to_s_c(&b->key); goto retry;
ptrs = bch2_bkey_ptrs_c(k); }
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
struct bpos bucket_pos;
struct bch_backpointer bp;
if (p.ptr.cached)
continue;
bch2_extent_ptr_to_bp(c, iter.btree_id, b->c.level + 1, *level = b->c.level;
k, p, &bucket_pos, &bp);
ret = check_bp_exists(trans, bucket_pos, bp, k, k = bkey_i_to_s_c(&b->key);
ret = check_extent_to_backpointers(trans, btree_id, b->c.level + 1,
bucket_start, bucket_end, bucket_start, bucket_end,
last_flushed); last_flushed, k);
if (ret)
goto err;
}
err: err:
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return ret; return ret;
...@@ -616,43 +616,60 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, ...@@ -616,43 +616,60 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter iter; struct btree_iter iter;
enum btree_id btree_id; enum btree_id btree_id;
struct bpos_level last_flushed = { UINT_MAX, POS_MIN }; struct bkey_s_c k;
struct bkey_buf last_flushed;
int ret = 0; int ret = 0;
for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) { bch2_bkey_buf_init(&last_flushed);
unsigned depth = btree_type_has_ptrs(btree_id) ? 0 : 1; bkey_init(&last_flushed.k->k);
bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) {
depth, int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
BTREE_ITER_ALL_LEVELS|
BTREE_ITER_PREFETCH);
do {
ret = commit_do(trans, NULL, NULL, ret = commit_do(trans, NULL, NULL,
BTREE_INSERT_LAZY_RW| BCH_TRANS_COMMIT_no_enospc,
BTREE_INSERT_NOFAIL, check_btree_root_to_backpointers(trans, btree_id,
check_extent_to_backpointers(trans, &iter,
bucket_start, bucket_end, bucket_start, bucket_end,
&last_flushed)); &last_flushed, &level));
if (ret) if (ret)
break; return ret;
} while (!bch2_btree_iter_advance(&iter));
bch2_trans_iter_exit(trans, &iter);
if (ret) while (level >= depth) {
bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
level,
BTREE_ITER_PREFETCH);
while (1) {
bch2_trans_begin(trans);
k = bch2_btree_iter_peek(&iter);
if (!k.k)
break; break;
ret = bkey_err(k) ?:
ret = commit_do(trans, NULL, NULL, check_extent_to_backpointers(trans, btree_id, level,
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL,
check_btree_root_to_backpointers(trans, btree_id,
bucket_start, bucket_end, bucket_start, bucket_end,
&last_flushed)); &last_flushed, k) ?:
bch2_trans_commit(trans, NULL, NULL,
BCH_TRANS_COMMIT_no_enospc);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
ret = 0;
continue;
}
if (ret) if (ret)
break; break;
if (bpos_eq(iter.pos, SPOS_MAX))
break;
bch2_btree_iter_advance(&iter);
} }
bch2_trans_iter_exit(trans, &iter);
if (ret)
return ret; return ret;
--level;
}
}
bch2_bkey_buf_exit(&last_flushed, c);
return 0;
} }
static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c, static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c,
...@@ -746,7 +763,6 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) ...@@ -746,7 +763,6 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
} }
bch2_trans_put(trans); bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret); bch_err_fn(c, ret);
return ret; return ret;
} }
...@@ -801,13 +817,11 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, ...@@ -801,13 +817,11 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
struct bbpos start, struct bbpos start,
struct bbpos end) struct bbpos end)
{ {
struct btree_iter iter;
struct bkey_s_c k;
struct bpos last_flushed_pos = SPOS_MAX; struct bpos last_flushed_pos = SPOS_MAX;
return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
POS_MIN, BTREE_ITER_PREFETCH, k, POS_MIN, BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_one_backpointer(trans, start, end, check_one_backpointer(trans, start, end,
bkey_s_c_to_backpointer(k), bkey_s_c_to_backpointer(k),
&last_flushed_pos)); &last_flushed_pos));
...@@ -854,7 +868,6 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c) ...@@ -854,7 +868,6 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c)
} }
bch2_trans_put(trans); bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret); bch_err_fn(c, ret);
return ret; return ret;
} }
...@@ -63,7 +63,7 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c, ...@@ -63,7 +63,7 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
return ret; return ret;
} }
int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bkey_i_backpointer *, int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bpos bucket,
struct bch_backpointer, struct bkey_s_c, bool); struct bch_backpointer, struct bkey_s_c, bool);
static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans, static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
...@@ -72,28 +72,21 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans, ...@@ -72,28 +72,21 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
struct bkey_s_c orig_k, struct bkey_s_c orig_k,
bool insert) bool insert)
{ {
struct bch_fs *c = trans->c; if (unlikely(bch2_backpointers_no_use_write_buffer))
struct bkey_i_backpointer *bp_k; return bch2_bucket_backpointer_mod_nowritebuffer(trans, bucket, bp, orig_k, insert);
int ret;
bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); struct bkey_i_backpointer bp_k;
ret = PTR_ERR_OR_ZERO(bp_k);
if (ret)
return ret;
bkey_backpointer_init(&bp_k->k_i); bkey_backpointer_init(&bp_k.k_i);
bp_k->k.p = bucket_pos_to_bp(c, bucket, bp.bucket_offset); bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
bp_k->v = bp; bp_k.v = bp;
if (!insert) { if (!insert) {
bp_k->k.type = KEY_TYPE_deleted; bp_k.k.type = KEY_TYPE_deleted;
set_bkey_val_u64s(&bp_k->k, 0); set_bkey_val_u64s(&bp_k.k, 0);
} }
if (unlikely(bch2_backpointers_no_use_write_buffer)) return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k.k_i);
return bch2_bucket_backpointer_mod_nowritebuffer(trans, bp_k, bp, orig_k, insert);
return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k->k_i);
} }
static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level, static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level,
......
This diff is collapsed.
...@@ -307,6 +307,13 @@ struct bkey_i { ...@@ -307,6 +307,13 @@ struct bkey_i {
struct bch_val v; struct bch_val v;
}; };
#define POS_KEY(_pos) \
((struct bkey) { \
.u64s = BKEY_U64s, \
.format = KEY_FORMAT_CURRENT, \
.p = _pos, \
})
#define KEY(_inode, _offset, _size) \ #define KEY(_inode, _offset, _size) \
((struct bkey) { \ ((struct bkey) { \
.u64s = BKEY_U64s, \ .u64s = BKEY_U64s, \
...@@ -1296,6 +1303,7 @@ struct bch_member { ...@@ -1296,6 +1303,7 @@ struct bch_member {
__le64 errors[BCH_MEMBER_ERROR_NR]; __le64 errors[BCH_MEMBER_ERROR_NR];
__le64 errors_at_reset[BCH_MEMBER_ERROR_NR]; __le64 errors_at_reset[BCH_MEMBER_ERROR_NR];
__le64 errors_reset_time; __le64 errors_reset_time;
__le64 seq;
}; };
#define BCH_MEMBER_V1_BYTES 56 #define BCH_MEMBER_V1_BYTES 56
...@@ -1442,7 +1450,7 @@ struct bch_sb_field_replicas_v0 { ...@@ -1442,7 +1450,7 @@ struct bch_sb_field_replicas_v0 {
struct bch_replicas_entry_v0 entries[]; struct bch_replicas_entry_v0 entries[];
} __packed __aligned(8); } __packed __aligned(8);
struct bch_replicas_entry { struct bch_replicas_entry_v1 {
__u8 data_type; __u8 data_type;
__u8 nr_devs; __u8 nr_devs;
__u8 nr_required; __u8 nr_required;
...@@ -1454,7 +1462,7 @@ struct bch_replicas_entry { ...@@ -1454,7 +1462,7 @@ struct bch_replicas_entry {
struct bch_sb_field_replicas { struct bch_sb_field_replicas {
struct bch_sb_field field; struct bch_sb_field field;
struct bch_replicas_entry entries[]; struct bch_replicas_entry_v1 entries[];
} __packed __aligned(8); } __packed __aligned(8);
/* BCH_SB_FIELD_quota: */ /* BCH_SB_FIELD_quota: */
...@@ -1571,7 +1579,9 @@ struct bch_sb_field_disk_groups { ...@@ -1571,7 +1579,9 @@ struct bch_sb_field_disk_groups {
x(write_super, 73) \ x(write_super, 73) \
x(trans_restart_would_deadlock_recursion_limit, 74) \ x(trans_restart_would_deadlock_recursion_limit, 74) \
x(trans_restart_write_buffer_flush, 75) \ x(trans_restart_write_buffer_flush, 75) \
x(trans_restart_split_race, 76) x(trans_restart_split_race, 76) \
x(write_buffer_flush_slowpath, 77) \
x(write_buffer_flush_sync, 78)
enum bch_persistent_counters { enum bch_persistent_counters {
#define x(t, n, ...) BCH_COUNTER_##t, #define x(t, n, ...) BCH_COUNTER_##t,
...@@ -1662,69 +1672,41 @@ struct bch_sb_field_downgrade { ...@@ -1662,69 +1672,41 @@ struct bch_sb_field_downgrade {
#define BCH_VERSION_MINOR(_v) ((__u16) ((_v) & ~(~0U << 10))) #define BCH_VERSION_MINOR(_v) ((__u16) ((_v) & ~(~0U << 10)))
#define BCH_VERSION(_major, _minor) (((_major) << 10)|(_minor) << 0) #define BCH_VERSION(_major, _minor) (((_major) << 10)|(_minor) << 0)
#define RECOVERY_PASS_ALL_FSCK (1ULL << 63)
/* /*
* field 1: version name * field 1: version name
* field 2: BCH_VERSION(major, minor) * field 2: BCH_VERSION(major, minor)
* field 3: recovery passess required on upgrade * field 3: recovery passess required on upgrade
*/ */
#define BCH_METADATA_VERSIONS() \ #define BCH_METADATA_VERSIONS() \
x(bkey_renumber, BCH_VERSION(0, 10), \ x(bkey_renumber, BCH_VERSION(0, 10)) \
RECOVERY_PASS_ALL_FSCK) \ x(inode_btree_change, BCH_VERSION(0, 11)) \
x(inode_btree_change, BCH_VERSION(0, 11), \ x(snapshot, BCH_VERSION(0, 12)) \
RECOVERY_PASS_ALL_FSCK) \ x(inode_backpointers, BCH_VERSION(0, 13)) \
x(snapshot, BCH_VERSION(0, 12), \ x(btree_ptr_sectors_written, BCH_VERSION(0, 14)) \
RECOVERY_PASS_ALL_FSCK) \ x(snapshot_2, BCH_VERSION(0, 15)) \
x(inode_backpointers, BCH_VERSION(0, 13), \ x(reflink_p_fix, BCH_VERSION(0, 16)) \
RECOVERY_PASS_ALL_FSCK) \ x(subvol_dirent, BCH_VERSION(0, 17)) \
x(btree_ptr_sectors_written, BCH_VERSION(0, 14), \ x(inode_v2, BCH_VERSION(0, 18)) \
RECOVERY_PASS_ALL_FSCK) \ x(freespace, BCH_VERSION(0, 19)) \
x(snapshot_2, BCH_VERSION(0, 15), \ x(alloc_v4, BCH_VERSION(0, 20)) \
BIT_ULL(BCH_RECOVERY_PASS_fs_upgrade_for_subvolumes)| \ x(new_data_types, BCH_VERSION(0, 21)) \
BIT_ULL(BCH_RECOVERY_PASS_initialize_subvolumes)| \ x(backpointers, BCH_VERSION(0, 22)) \
RECOVERY_PASS_ALL_FSCK) \ x(inode_v3, BCH_VERSION(0, 23)) \
x(reflink_p_fix, BCH_VERSION(0, 16), \ x(unwritten_extents, BCH_VERSION(0, 24)) \
BIT_ULL(BCH_RECOVERY_PASS_fix_reflink_p)) \ x(bucket_gens, BCH_VERSION(0, 25)) \
x(subvol_dirent, BCH_VERSION(0, 17), \ x(lru_v2, BCH_VERSION(0, 26)) \
RECOVERY_PASS_ALL_FSCK) \ x(fragmentation_lru, BCH_VERSION(0, 27)) \
x(inode_v2, BCH_VERSION(0, 18), \ x(no_bps_in_alloc_keys, BCH_VERSION(0, 28)) \
RECOVERY_PASS_ALL_FSCK) \ x(snapshot_trees, BCH_VERSION(0, 29)) \
x(freespace, BCH_VERSION(0, 19), \ x(major_minor, BCH_VERSION(1, 0)) \
RECOVERY_PASS_ALL_FSCK) \ x(snapshot_skiplists, BCH_VERSION(1, 1)) \
x(alloc_v4, BCH_VERSION(0, 20), \ x(deleted_inodes, BCH_VERSION(1, 2)) \
RECOVERY_PASS_ALL_FSCK) \ x(rebalance_work, BCH_VERSION(1, 3)) \
x(new_data_types, BCH_VERSION(0, 21), \ x(member_seq, BCH_VERSION(1, 4))
RECOVERY_PASS_ALL_FSCK) \
x(backpointers, BCH_VERSION(0, 22), \
RECOVERY_PASS_ALL_FSCK) \
x(inode_v3, BCH_VERSION(0, 23), \
RECOVERY_PASS_ALL_FSCK) \
x(unwritten_extents, BCH_VERSION(0, 24), \
RECOVERY_PASS_ALL_FSCK) \
x(bucket_gens, BCH_VERSION(0, 25), \
BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \
RECOVERY_PASS_ALL_FSCK) \
x(lru_v2, BCH_VERSION(0, 26), \
RECOVERY_PASS_ALL_FSCK) \
x(fragmentation_lru, BCH_VERSION(0, 27), \
RECOVERY_PASS_ALL_FSCK) \
x(no_bps_in_alloc_keys, BCH_VERSION(0, 28), \
RECOVERY_PASS_ALL_FSCK) \
x(snapshot_trees, BCH_VERSION(0, 29), \
RECOVERY_PASS_ALL_FSCK) \
x(major_minor, BCH_VERSION(1, 0), \
0) \
x(snapshot_skiplists, BCH_VERSION(1, 1), \
BIT_ULL(BCH_RECOVERY_PASS_check_snapshots)) \
x(deleted_inodes, BCH_VERSION(1, 2), \
BIT_ULL(BCH_RECOVERY_PASS_check_inodes)) \
x(rebalance_work, BCH_VERSION(1, 3), \
BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))
enum bcachefs_metadata_version { enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9, bcachefs_metadata_version_min = 9,
#define x(t, n, upgrade_passes) bcachefs_metadata_version_##t = n, #define x(t, n) bcachefs_metadata_version_##t = n,
BCH_METADATA_VERSIONS() BCH_METADATA_VERSIONS()
#undef x #undef x
bcachefs_metadata_version_max bcachefs_metadata_version_max
...@@ -1786,7 +1768,8 @@ struct bch_sb { ...@@ -1786,7 +1768,8 @@ struct bch_sb {
__le32 time_base_hi; __le32 time_base_hi;
__le32 time_precision; __le32 time_precision;
__le64 flags[8]; __le64 flags[7];
__le64 write_time;
__le64 features[2]; __le64 features[2];
__le64 compat[2]; __le64 compat[2];
...@@ -2153,7 +2136,8 @@ static inline __u64 __bset_magic(struct bch_sb *sb) ...@@ -2153,7 +2136,8 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
x(clock, 7) \ x(clock, 7) \
x(dev_usage, 8) \ x(dev_usage, 8) \
x(log, 9) \ x(log, 9) \
x(overwrite, 10) x(overwrite, 10) \
x(write_buffer_keys, 11)
enum { enum {
#define x(f, nr) BCH_JSET_ENTRY_##f = nr, #define x(f, nr) BCH_JSET_ENTRY_##f = nr,
...@@ -2162,6 +2146,19 @@ enum { ...@@ -2162,6 +2146,19 @@ enum {
BCH_JSET_ENTRY_NR BCH_JSET_ENTRY_NR
}; };
static inline bool jset_entry_is_key(struct jset_entry *e)
{
switch (e->type) {
case BCH_JSET_ENTRY_btree_keys:
case BCH_JSET_ENTRY_btree_root:
case BCH_JSET_ENTRY_overwrite:
case BCH_JSET_ENTRY_write_buffer_keys:
return true;
}
return false;
}
/* /*
* Journal sequence numbers can be blacklisted: bsets record the max sequence * Journal sequence numbers can be blacklisted: bsets record the max sequence
* number of all the journal entries they contain updates for, so that on * number of all the journal entries they contain updates for, so that on
...@@ -2203,7 +2200,7 @@ struct jset_entry_usage { ...@@ -2203,7 +2200,7 @@ struct jset_entry_usage {
struct jset_entry_data_usage { struct jset_entry_data_usage {
struct jset_entry entry; struct jset_entry entry;
__le64 v; __le64 v;
struct bch_replicas_entry r; struct bch_replicas_entry_v1 r;
} __packed; } __packed;
struct jset_entry_clock { struct jset_entry_clock {
...@@ -2224,7 +2221,7 @@ struct jset_entry_dev_usage { ...@@ -2224,7 +2221,7 @@ struct jset_entry_dev_usage {
__le32 dev; __le32 dev;
__u32 pad; __u32 pad;
__le64 buckets_ec; __le64 _buckets_ec; /* No longer used */
__le64 _buckets_unavailable; /* No longer used */ __le64 _buckets_unavailable; /* No longer used */
struct jset_entry_dev_usage_type d[]; struct jset_entry_dev_usage_type d[];
...@@ -2239,7 +2236,7 @@ static inline unsigned jset_entry_dev_usage_nr_types(struct jset_entry_dev_usage ...@@ -2239,7 +2236,7 @@ static inline unsigned jset_entry_dev_usage_nr_types(struct jset_entry_dev_usage
struct jset_entry_log { struct jset_entry_log {
struct jset_entry entry; struct jset_entry entry;
u8 d[]; u8 d[];
} __packed; } __packed __aligned(8);
/* /*
* On disk format for a journal entry: * On disk format for a journal entry:
......
...@@ -81,6 +81,11 @@ struct bch_ioctl_incremental { ...@@ -81,6 +81,11 @@ struct bch_ioctl_incremental {
#define BCH_IOCTL_SUBVOLUME_CREATE _IOW(0xbc, 16, struct bch_ioctl_subvolume) #define BCH_IOCTL_SUBVOLUME_CREATE _IOW(0xbc, 16, struct bch_ioctl_subvolume)
#define BCH_IOCTL_SUBVOLUME_DESTROY _IOW(0xbc, 17, struct bch_ioctl_subvolume) #define BCH_IOCTL_SUBVOLUME_DESTROY _IOW(0xbc, 17, struct bch_ioctl_subvolume)
#define BCH_IOCTL_DEV_USAGE_V2 _IOWR(0xbc, 18, struct bch_ioctl_dev_usage_v2)
#define BCH_IOCTL_FSCK_OFFLINE _IOW(0xbc, 19, struct bch_ioctl_fsck_offline)
#define BCH_IOCTL_FSCK_ONLINE _IOW(0xbc, 20, struct bch_ioctl_fsck_online)
/* ioctl below act on a particular file, not the filesystem as a whole: */ /* ioctl below act on a particular file, not the filesystem as a whole: */
#define BCHFS_IOC_REINHERIT_ATTRS _IOR(0xbc, 64, const char __user *) #define BCHFS_IOC_REINHERIT_ATTRS _IOR(0xbc, 64, const char __user *)
...@@ -173,12 +178,18 @@ struct bch_ioctl_disk_set_state { ...@@ -173,12 +178,18 @@ struct bch_ioctl_disk_set_state {
__u64 dev; __u64 dev;
}; };
#define BCH_DATA_OPS() \
x(scrub, 0) \
x(rereplicate, 1) \
x(migrate, 2) \
x(rewrite_old_nodes, 3) \
x(drop_extra_replicas, 4)
enum bch_data_ops { enum bch_data_ops {
BCH_DATA_OP_SCRUB = 0, #define x(t, n) BCH_DATA_OP_##t = n,
BCH_DATA_OP_REREPLICATE = 1, BCH_DATA_OPS()
BCH_DATA_OP_MIGRATE = 2, #undef x
BCH_DATA_OP_REWRITE_OLD_NODES = 3, BCH_DATA_OP_NR
BCH_DATA_OP_NR = 4,
}; };
/* /*
...@@ -237,7 +248,7 @@ struct bch_ioctl_data_event { ...@@ -237,7 +248,7 @@ struct bch_ioctl_data_event {
struct bch_replicas_usage { struct bch_replicas_usage {
__u64 sectors; __u64 sectors;
struct bch_replicas_entry r; struct bch_replicas_entry_v1 r;
} __packed; } __packed;
static inline struct bch_replicas_usage * static inline struct bch_replicas_usage *
...@@ -268,7 +279,7 @@ struct bch_ioctl_fs_usage { ...@@ -268,7 +279,7 @@ struct bch_ioctl_fs_usage {
__u32 replica_entries_bytes; __u32 replica_entries_bytes;
__u32 pad; __u32 pad;
struct bch_replicas_usage replicas[0]; struct bch_replicas_usage replicas[];
}; };
/* /*
...@@ -292,7 +303,20 @@ struct bch_ioctl_dev_usage { ...@@ -292,7 +303,20 @@ struct bch_ioctl_dev_usage {
__u64 buckets; __u64 buckets;
__u64 sectors; __u64 sectors;
__u64 fragmented; __u64 fragmented;
} d[BCH_DATA_NR]; } d[10];
};
struct bch_ioctl_dev_usage_v2 {
__u64 dev;
__u32 flags;
__u8 state;
__u8 nr_data_types;
__u8 pad[6];
__u32 bucket_size;
__u64 nr_buckets;
struct bch_ioctl_dev_usage_type d[];
}; };
/* /*
...@@ -365,4 +389,24 @@ struct bch_ioctl_subvolume { ...@@ -365,4 +389,24 @@ struct bch_ioctl_subvolume {
#define BCH_SUBVOL_SNAPSHOT_CREATE (1U << 0) #define BCH_SUBVOL_SNAPSHOT_CREATE (1U << 0)
#define BCH_SUBVOL_SNAPSHOT_RO (1U << 1) #define BCH_SUBVOL_SNAPSHOT_RO (1U << 1)
/*
* BCH_IOCTL_FSCK_OFFLINE: run fsck from the 'bcachefs fsck' userspace command,
* but with the kernel's implementation of fsck:
*/
struct bch_ioctl_fsck_offline {
__u64 flags;
__u64 opts; /* string */
__u64 nr_devs;
__u64 devs[] __counted_by(nr_devs);
};
/*
* BCH_IOCTL_FSCK_ONLINE: run fsck from the 'bcachefs fsck' userspace command,
* but with the kernel's implementation of fsck:
*/
struct bch_ioctl_fsck_online {
__u64 flags;
__u64 opts; /* string */
};
#endif /* _BCACHEFS_IOCTL_H */ #endif /* _BCACHEFS_IOCTL_H */
...@@ -28,10 +28,8 @@ struct bkey_ops { ...@@ -28,10 +28,8 @@ struct bkey_ops {
void (*swab)(struct bkey_s); void (*swab)(struct bkey_s);
bool (*key_normalize)(struct bch_fs *, struct bkey_s); bool (*key_normalize)(struct bch_fs *, struct bkey_s);
bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c); bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c);
int (*trans_trigger)(struct btree_trans *, enum btree_id, unsigned, int (*trigger)(struct btree_trans *, enum btree_id, unsigned,
struct bkey_s_c, struct bkey_i *, unsigned); struct bkey_s_c, struct bkey_s, unsigned);
int (*atomic_trigger)(struct btree_trans *, enum btree_id, unsigned,
struct bkey_s_c, struct bkey_s_c, unsigned);
void (*compat)(enum btree_id id, unsigned version, void (*compat)(enum btree_id id, unsigned version,
unsigned big_endian, int write, unsigned big_endian, int write,
struct bkey_s); struct bkey_s);
...@@ -78,61 +76,63 @@ static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct b ...@@ -78,61 +76,63 @@ static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct b
bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
static inline int bch2_mark_key(struct btree_trans *trans,
enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
{
const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new.k->type);
return ops->atomic_trigger
? ops->atomic_trigger(trans, btree, level, old, new, flags)
: 0;
}
enum btree_update_flags { enum btree_update_flags {
__BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END, __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END,
__BTREE_UPDATE_NOJOURNAL, __BTREE_UPDATE_NOJOURNAL,
__BTREE_UPDATE_PREJOURNAL,
__BTREE_UPDATE_KEY_CACHE_RECLAIM, __BTREE_UPDATE_KEY_CACHE_RECLAIM,
__BTREE_TRIGGER_NORUN, /* Don't run triggers at all */ __BTREE_TRIGGER_NORUN,
__BTREE_TRIGGER_TRANSACTIONAL,
__BTREE_TRIGGER_INSERT, __BTREE_TRIGGER_INSERT,
__BTREE_TRIGGER_OVERWRITE, __BTREE_TRIGGER_OVERWRITE,
__BTREE_TRIGGER_GC, __BTREE_TRIGGER_GC,
__BTREE_TRIGGER_BUCKET_INVALIDATE, __BTREE_TRIGGER_BUCKET_INVALIDATE,
__BTREE_TRIGGER_NOATOMIC,
}; };
#define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) #define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)
#define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL) #define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL)
#define BTREE_UPDATE_PREJOURNAL (1U << __BTREE_UPDATE_PREJOURNAL)
#define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM) #define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM)
/* Don't run triggers at all */
#define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN) #define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN)
/*
* If set, we're running transactional triggers as part of a transaction commit:
* triggers may generate new updates
*
* If cleared, and either BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE are set,
* we're running atomic triggers during a transaction commit: we have our
* journal reservation, we're holding btree node write locks, and we know the
* transaction is going to commit (returning an error here is a fatal error,
* causing us to go emergency read-only)
*/
#define BTREE_TRIGGER_TRANSACTIONAL (1U << __BTREE_TRIGGER_TRANSACTIONAL)
/* @new is entering the btree */
#define BTREE_TRIGGER_INSERT (1U << __BTREE_TRIGGER_INSERT) #define BTREE_TRIGGER_INSERT (1U << __BTREE_TRIGGER_INSERT)
/* @old is leaving the btree */
#define BTREE_TRIGGER_OVERWRITE (1U << __BTREE_TRIGGER_OVERWRITE) #define BTREE_TRIGGER_OVERWRITE (1U << __BTREE_TRIGGER_OVERWRITE)
/* We're in gc/fsck: running triggers to recalculate e.g. disk usage */
#define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC) #define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC)
/* signal from bucket invalidate path to alloc trigger */
#define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE) #define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
#define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC)
static inline int bch2_trans_mark_key(struct btree_trans *trans, static inline int bch2_key_trigger(struct btree_trans *trans,
enum btree_id btree_id, unsigned level, enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_i *new, struct bkey_s_c old, struct bkey_s new,
unsigned flags) unsigned flags)
{ {
const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new->k.type); const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new.k->type);
return ops->trans_trigger return ops->trigger
? ops->trans_trigger(trans, btree_id, level, old, new, flags) ? ops->trigger(trans, btree, level, old, new, flags)
: 0; : 0;
} }
static inline int bch2_trans_mark_old(struct btree_trans *trans, static inline int bch2_key_trigger_old(struct btree_trans *trans,
enum btree_id btree_id, unsigned level, enum btree_id btree_id, unsigned level,
struct bkey_s_c old, unsigned flags) struct bkey_s_c old, unsigned flags)
{ {
...@@ -141,20 +141,20 @@ static inline int bch2_trans_mark_old(struct btree_trans *trans, ...@@ -141,20 +141,20 @@ static inline int bch2_trans_mark_old(struct btree_trans *trans,
bkey_init(&deleted.k); bkey_init(&deleted.k);
deleted.k.p = old.k->p; deleted.k.p = old.k->p;
return bch2_trans_mark_key(trans, btree_id, level, old, &deleted, return bch2_key_trigger(trans, btree_id, level, old, bkey_i_to_s(&deleted),
BTREE_TRIGGER_OVERWRITE|flags); BTREE_TRIGGER_OVERWRITE|flags);
} }
static inline int bch2_trans_mark_new(struct btree_trans *trans, static inline int bch2_key_trigger_new(struct btree_trans *trans,
enum btree_id btree_id, unsigned level, enum btree_id btree_id, unsigned level,
struct bkey_i *new, unsigned flags) struct bkey_s new, unsigned flags)
{ {
struct bkey_i deleted; struct bkey_i deleted;
bkey_init(&deleted.k); bkey_init(&deleted.k);
deleted.k.p = new->k.p; deleted.k.p = new.k->p;
return bch2_trans_mark_key(trans, btree_id, level, bkey_i_to_s_c(&deleted), new, return bch2_key_trigger(trans, btree_id, level, bkey_i_to_s_c(&deleted), new,
BTREE_TRIGGER_INSERT|flags); BTREE_TRIGGER_INSERT|flags);
} }
......
...@@ -68,6 +68,12 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b, ...@@ -68,6 +68,12 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b,
_k = _n) { _k = _n) {
_n = bkey_p_next(_k); _n = bkey_p_next(_k);
if (!_k->u64s) {
printk(KERN_ERR "block %u key %5zu - u64s 0? aieee!\n", set,
_k->_data - i->_data);
break;
}
k = bkey_disassemble(b, _k, &uk); k = bkey_disassemble(b, _k, &uk);
printbuf_reset(&buf); printbuf_reset(&buf);
......
...@@ -500,19 +500,21 @@ void bch2_fs_btree_cache_init_early(struct btree_cache *bc) ...@@ -500,19 +500,21 @@ void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
* cannibalize_bucket() will take. This means every time we unlock the root of * cannibalize_bucket() will take. This means every time we unlock the root of
* the btree, we need to release this lock if we have it held. * the btree, we need to release this lock if we have it held.
*/ */
void bch2_btree_cache_cannibalize_unlock(struct bch_fs *c) void bch2_btree_cache_cannibalize_unlock(struct btree_trans *trans)
{ {
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache; struct btree_cache *bc = &c->btree_cache;
if (bc->alloc_lock == current) { if (bc->alloc_lock == current) {
trace_and_count(c, btree_cache_cannibalize_unlock, c); trace_and_count(c, btree_cache_cannibalize_unlock, trans);
bc->alloc_lock = NULL; bc->alloc_lock = NULL;
closure_wake_up(&bc->alloc_wait); closure_wake_up(&bc->alloc_wait);
} }
} }
int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl) int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure *cl)
{ {
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache; struct btree_cache *bc = &c->btree_cache;
struct task_struct *old; struct task_struct *old;
...@@ -521,7 +523,7 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl) ...@@ -521,7 +523,7 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
goto success; goto success;
if (!cl) { if (!cl) {
trace_and_count(c, btree_cache_cannibalize_lock_fail, c); trace_and_count(c, btree_cache_cannibalize_lock_fail, trans);
return -BCH_ERR_ENOMEM_btree_cache_cannibalize_lock; return -BCH_ERR_ENOMEM_btree_cache_cannibalize_lock;
} }
...@@ -535,11 +537,11 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl) ...@@ -535,11 +537,11 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
goto success; goto success;
} }
trace_and_count(c, btree_cache_cannibalize_lock_fail, c); trace_and_count(c, btree_cache_cannibalize_lock_fail, trans);
return -BCH_ERR_btree_cache_cannibalize_lock_blocked; return -BCH_ERR_btree_cache_cannibalize_lock_blocked;
success: success:
trace_and_count(c, btree_cache_cannibalize_lock, c); trace_and_count(c, btree_cache_cannibalize_lock, trans);
return 0; return 0;
} }
...@@ -673,7 +675,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea ...@@ -673,7 +675,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
mutex_unlock(&bc->lock); mutex_unlock(&bc->lock);
trace_and_count(c, btree_cache_cannibalize, c); trace_and_count(c, btree_cache_cannibalize, trans);
goto out; goto out;
} }
...@@ -717,12 +719,6 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, ...@@ -717,12 +719,6 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
if (IS_ERR(b)) if (IS_ERR(b))
return b; return b;
/*
* Btree nodes read in from disk should not have the accessed bit set
* initially, so that linear scans don't thrash the cache:
*/
clear_btree_node_accessed(b);
bkey_copy(&b->key, k); bkey_copy(&b->key, k);
if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) { if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
/* raced with another fill: */ /* raced with another fill: */
...@@ -749,7 +745,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, ...@@ -749,7 +745,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
if (path && sync) if (path && sync)
bch2_trans_unlock_noassert(trans); bch2_trans_unlock_noassert(trans);
bch2_btree_node_read(c, b, sync); bch2_btree_node_read(trans, b, sync);
if (!sync) if (!sync)
return NULL; return NULL;
...@@ -1039,7 +1035,7 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, ...@@ -1039,7 +1035,7 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans,
goto retry; goto retry;
if (IS_ERR(b) && if (IS_ERR(b) &&
!bch2_btree_cache_cannibalize_lock(c, NULL)) !bch2_btree_cache_cannibalize_lock(trans, NULL))
goto retry; goto retry;
if (IS_ERR(b)) if (IS_ERR(b))
...@@ -1087,7 +1083,7 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, ...@@ -1087,7 +1083,7 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans,
EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
btree_check_header(c, b); btree_check_header(c, b);
out: out:
bch2_btree_cache_cannibalize_unlock(c); bch2_btree_cache_cannibalize_unlock(trans);
return b; return b;
} }
......
...@@ -17,8 +17,8 @@ int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *); ...@@ -17,8 +17,8 @@ int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *, int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
unsigned, enum btree_id); unsigned, enum btree_id);
void bch2_btree_cache_cannibalize_unlock(struct bch_fs *); void bch2_btree_cache_cannibalize_unlock(struct btree_trans *);
int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *); int bch2_btree_cache_cannibalize_lock(struct btree_trans *, struct closure *);
struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *); struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *);
struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool); struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool);
......
This diff is collapsed.
This diff is collapsed.
...@@ -130,7 +130,7 @@ void bch2_btree_init_next(struct btree_trans *, struct btree *); ...@@ -130,7 +130,7 @@ void bch2_btree_init_next(struct btree_trans *, struct btree *);
int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *, int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *,
struct btree *, bool, bool *); struct btree *, bool, bool *);
void bch2_btree_node_read(struct bch_fs *, struct btree *, bool); void bch2_btree_node_read(struct btree_trans *, struct btree *, bool);
int bch2_btree_root_read(struct bch_fs *, enum btree_id, int bch2_btree_root_read(struct bch_fs *, enum btree_id,
const struct bkey_i *, unsigned); const struct bkey_i *, unsigned);
......
This diff is collapsed.
This diff is collapsed.
...@@ -73,6 +73,7 @@ static size_t bch2_journal_key_search(struct journal_keys *keys, ...@@ -73,6 +73,7 @@ static size_t bch2_journal_key_search(struct journal_keys *keys,
return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos)); return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos));
} }
/* Returns first non-overwritten key >= search key: */
struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree_id, struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree_id,
unsigned level, struct bpos pos, unsigned level, struct bpos pos,
struct bpos end_pos, size_t *idx) struct bpos end_pos, size_t *idx)
...@@ -86,12 +87,26 @@ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree ...@@ -86,12 +87,26 @@ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree
if (!*idx) if (!*idx)
*idx = __bch2_journal_key_search(keys, btree_id, level, pos); *idx = __bch2_journal_key_search(keys, btree_id, level, pos);
while (*idx &&
__journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) {
--(*idx);
iters++;
if (iters == 10) {
*idx = 0;
goto search;
}
}
while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) {
if (__journal_key_cmp(btree_id, level, end_pos, k) < 0) if (__journal_key_cmp(btree_id, level, end_pos, k) < 0)
return NULL; return NULL;
if (__journal_key_cmp(btree_id, level, pos, k) <= 0 && if (k->overwritten) {
!k->overwritten) (*idx)++;
continue;
}
if (__journal_key_cmp(btree_id, level, pos, k) <= 0)
return k->k; return k->k;
(*idx)++; (*idx)++;
...@@ -162,7 +177,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, ...@@ -162,7 +177,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
struct journal_keys *keys = &c->journal_keys; struct journal_keys *keys = &c->journal_keys;
size_t idx = bch2_journal_key_search(keys, id, level, k->k.p); size_t idx = bch2_journal_key_search(keys, id, level, k->k.p);
BUG_ON(test_bit(BCH_FS_RW, &c->flags)); BUG_ON(test_bit(BCH_FS_rw, &c->flags));
if (idx < keys->size && if (idx < keys->size &&
journal_key_cmp(&n, &keys->d[idx]) == 0) { journal_key_cmp(&n, &keys->d[idx]) == 0) {
...@@ -452,9 +467,7 @@ static void __journal_keys_sort(struct journal_keys *keys) ...@@ -452,9 +467,7 @@ static void __journal_keys_sort(struct journal_keys *keys)
src = dst = keys->d; src = dst = keys->d;
while (src < keys->d + keys->nr) { while (src < keys->d + keys->nr) {
while (src + 1 < keys->d + keys->nr && while (src + 1 < keys->d + keys->nr &&
src[0].btree_id == src[1].btree_id && !journal_key_cmp(src, src + 1))
src[0].level == src[1].level &&
bpos_eq(src[0].k->k.p, src[1].k->k.p))
src++; src++;
*dst++ = *src++; *dst++ = *src++;
......
This diff is collapsed.
...@@ -31,8 +31,6 @@ int bch2_btree_path_traverse_cached(struct btree_trans *, struct btree_path *, ...@@ -31,8 +31,6 @@ int bch2_btree_path_traverse_cached(struct btree_trans *, struct btree_path *,
bool bch2_btree_insert_key_cached(struct btree_trans *, unsigned, bool bch2_btree_insert_key_cached(struct btree_trans *, unsigned,
struct btree_insert_entry *); struct btree_insert_entry *);
int bch2_btree_key_cache_flush(struct btree_trans *,
enum btree_id, struct bpos);
void bch2_btree_key_cache_drop(struct btree_trans *, void bch2_btree_key_cache_drop(struct btree_trans *,
struct btree_path *); struct btree_path *);
......
This diff is collapsed.
...@@ -122,10 +122,7 @@ static void btree_trans_lock_hold_time_update(struct btree_trans *trans, ...@@ -122,10 +122,7 @@ static void btree_trans_lock_hold_time_update(struct btree_trans *trans,
struct btree_path *path, unsigned level) struct btree_path *path, unsigned level)
{ {
#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS #ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
struct btree_transaction_stats *s = btree_trans_stats(trans); __bch2_time_stats_update(&btree_trans_stats(trans)->lock_hold_times,
if (s)
__bch2_time_stats_update(&s->lock_hold_times,
path->l[level].lock_taken_time, path->l[level].lock_taken_time,
local_clock()); local_clock());
#endif #endif
...@@ -175,6 +172,7 @@ bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_pat ...@@ -175,6 +172,7 @@ bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_pat
struct btree *b) struct btree *b)
{ {
struct btree_path *linked; struct btree_path *linked;
unsigned i;
EBUG_ON(path->l[b->c.level].b != b); EBUG_ON(path->l[b->c.level].b != b);
EBUG_ON(path->l[b->c.level].lock_seq != six_lock_seq(&b->c.lock)); EBUG_ON(path->l[b->c.level].lock_seq != six_lock_seq(&b->c.lock));
...@@ -182,7 +180,7 @@ bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_pat ...@@ -182,7 +180,7 @@ bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_pat
mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED); mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED);
trans_for_each_path_with_node(trans, b, linked) trans_for_each_path_with_node(trans, b, linked, i)
linked->l[b->c.level].lock_seq++; linked->l[b->c.level].lock_seq++;
six_unlock_write(&b->c.lock); six_unlock_write(&b->c.lock);
...@@ -242,8 +240,9 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans, ...@@ -242,8 +240,9 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans,
enum btree_node_locked_type want) enum btree_node_locked_type want)
{ {
struct btree_path *path; struct btree_path *path;
unsigned i;
trans_for_each_path(trans, path) trans_for_each_path(trans, path, i)
if (&path->l[level].b->c == b && if (&path->l[level].b->c == b &&
btree_node_locked_type(path, level) >= want) { btree_node_locked_type(path, level) >= want) {
six_lock_increment(&b->lock, (enum six_lock_type) want); six_lock_increment(&b->lock, (enum six_lock_type) want);
...@@ -263,7 +262,6 @@ static inline int btree_node_lock(struct btree_trans *trans, ...@@ -263,7 +262,6 @@ static inline int btree_node_lock(struct btree_trans *trans,
int ret = 0; int ret = 0;
EBUG_ON(level >= BTREE_MAX_DEPTH); EBUG_ON(level >= BTREE_MAX_DEPTH);
EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
if (likely(six_trylock_type(&b->lock, type)) || if (likely(six_trylock_type(&b->lock, type)) ||
btree_node_lock_increment(trans, b, level, (enum btree_node_locked_type) type) || btree_node_lock_increment(trans, b, level, (enum btree_node_locked_type) type) ||
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -117,16 +117,17 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *, ...@@ -117,16 +117,17 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
struct btree *, struct btree *,
struct bkey_format); struct bkey_format);
int bch2_btree_split_leaf(struct btree_trans *, struct btree_path *, unsigned); int bch2_btree_split_leaf(struct btree_trans *, btree_path_idx_t, unsigned);
int __bch2_foreground_maybe_merge(struct btree_trans *, struct btree_path *, int __bch2_foreground_maybe_merge(struct btree_trans *, btree_path_idx_t,
unsigned, unsigned, enum btree_node_sibling); unsigned, unsigned, enum btree_node_sibling);
static inline int bch2_foreground_maybe_merge_sibling(struct btree_trans *trans, static inline int bch2_foreground_maybe_merge_sibling(struct btree_trans *trans,
struct btree_path *path, btree_path_idx_t path_idx,
unsigned level, unsigned flags, unsigned level, unsigned flags,
enum btree_node_sibling sib) enum btree_node_sibling sib)
{ {
struct btree_path *path = trans->paths + path_idx;
struct btree *b; struct btree *b;
EBUG_ON(!btree_node_locked(path, level)); EBUG_ON(!btree_node_locked(path, level));
...@@ -135,11 +136,11 @@ static inline int bch2_foreground_maybe_merge_sibling(struct btree_trans *trans, ...@@ -135,11 +136,11 @@ static inline int bch2_foreground_maybe_merge_sibling(struct btree_trans *trans,
if (b->sib_u64s[sib] > trans->c->btree_foreground_merge_threshold) if (b->sib_u64s[sib] > trans->c->btree_foreground_merge_threshold)
return 0; return 0;
return __bch2_foreground_maybe_merge(trans, path, level, flags, sib); return __bch2_foreground_maybe_merge(trans, path_idx, level, flags, sib);
} }
static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, static inline int bch2_foreground_maybe_merge(struct btree_trans *trans,
struct btree_path *path, btree_path_idx_t path,
unsigned level, unsigned level,
unsigned flags) unsigned flags)
{ {
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -33,8 +33,6 @@ struct bucket_gens { ...@@ -33,8 +33,6 @@ struct bucket_gens {
}; };
struct bch_dev_usage { struct bch_dev_usage {
u64 buckets_ec;
struct { struct {
u64 buckets; u64 buckets;
u64 sectors; /* _compressed_ sectors: */ u64 sectors; /* _compressed_ sectors: */
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -31,6 +31,7 @@ static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c, ...@@ -31,6 +31,7 @@ static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c,
} }
} }
int bch2_run_online_recovery_passes(struct bch_fs *);
u64 bch2_fsck_recovery_passes(void); u64 bch2_fsck_recovery_passes(void);
int bch2_fs_recovery(struct bch_fs *); int bch2_fs_recovery(struct bch_fs *);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment