Commit 350175bf authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Improved nocow locking

This improves the nocow lock table so that hash table entries have
multiple locks, and locks specify which bucket they're for - i.e. we can
now resolve hash collisions.

This is important because the allocator has to skip buckets that are
locked in the nocow lock table, and previously hash collisions would
cause it to spuriously skip unlocked buckets.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent f3a37e76
...@@ -23,6 +23,16 @@ static inline bool bch2_dev_bucket_exists(struct bch_fs *c, struct bpos pos) ...@@ -23,6 +23,16 @@ static inline bool bch2_dev_bucket_exists(struct bch_fs *c, struct bpos pos)
pos.offset < ca->mi.nbuckets; pos.offset < ca->mi.nbuckets;
} }
static inline u64 bucket_to_u64(struct bpos bucket)
{
return (bucket.inode << 48) | bucket.offset;
}
static inline struct bpos u64_to_bucket(u64 bucket)
{
return POS(bucket >> 48, bucket & ~(~0ULL << 48));
}
static inline u8 alloc_gc_gen(struct bch_alloc_v4 a) static inline u8 alloc_gc_gen(struct bch_alloc_v4 a)
{ {
return a.gen - a.oldest_gen; return a.gen - a.oldest_gen;
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "io.h" #include "io.h"
#include "journal.h" #include "journal.h"
#include "movinggc.h" #include "movinggc.h"
#include "nocow_locking.h"
#include "trace.h" #include "trace.h"
#include <linux/math64.h> #include <linux/math64.h>
......
...@@ -206,7 +206,7 @@ ...@@ -206,7 +206,7 @@
#include "bcachefs_format.h" #include "bcachefs_format.h"
#include "errcode.h" #include "errcode.h"
#include "fifo.h" #include "fifo.h"
#include "nocow_locking.h" #include "nocow_locking_types.h"
#include "opts.h" #include "opts.h"
#include "util.h" #include "util.h"
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "io.h" #include "io.h"
#include "keylist.h" #include "keylist.h"
#include "move.h" #include "move.h"
#include "nocow_locking.h"
#include "subvolume.h" #include "subvolume.h"
#include "trace.h" #include "trace.h"
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "journal.h" #include "journal.h"
#include "keylist.h" #include "keylist.h"
#include "move.h" #include "move.h"
#include "nocow_locking.h"
#include "rebalance.h" #include "rebalance.h"
#include "subvolume.h" #include "subvolume.h"
#include "super.h" #include "super.h"
...@@ -1469,7 +1470,7 @@ static void bch2_nocow_write(struct bch_write_op *op) ...@@ -1469,7 +1470,7 @@ static void bch2_nocow_write(struct bch_write_op *op)
struct { struct {
struct bpos b; struct bpos b;
unsigned gen; unsigned gen;
two_state_lock_t *l; struct nocow_lock_bucket *l;
} buckets[BCH_REPLICAS_MAX]; } buckets[BCH_REPLICAS_MAX];
unsigned nr_buckets = 0; unsigned nr_buckets = 0;
u32 snapshot; u32 snapshot;
...@@ -1516,7 +1517,8 @@ static void bch2_nocow_write(struct bch_write_op *op) ...@@ -1516,7 +1517,8 @@ static void bch2_nocow_write(struct bch_write_op *op)
buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr); buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr);
buckets[nr_buckets].gen = ptr->gen; buckets[nr_buckets].gen = ptr->gen;
buckets[nr_buckets].l = buckets[nr_buckets].l =
bucket_nocow_lock(&c->nocow_locks, buckets[nr_buckets].b); bucket_nocow_lock(&c->nocow_locks,
bucket_to_u64(buckets[nr_buckets].b));
prefetch(buckets[nr_buckets].l); prefetch(buckets[nr_buckets].l);
nr_buckets++; nr_buckets++;
...@@ -1538,11 +1540,12 @@ static void bch2_nocow_write(struct bch_write_op *op) ...@@ -1538,11 +1540,12 @@ static void bch2_nocow_write(struct bch_write_op *op)
for (i = 0; i < nr_buckets; i++) { for (i = 0; i < nr_buckets; i++) {
struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode); struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode);
two_state_lock_t *l = buckets[i].l; struct nocow_lock_bucket *l = buckets[i].l;
bool stale; bool stale;
if (!bch2_two_state_trylock(l, BUCKET_NOCOW_LOCK_UPDATE)) __bch2_bucket_nocow_lock(&c->nocow_locks, l,
__bch2_bucket_nocow_lock(&c->nocow_locks, l, BUCKET_NOCOW_LOCK_UPDATE); bucket_to_u64(buckets[i].b),
BUCKET_NOCOW_LOCK_UPDATE);
rcu_read_lock(); rcu_read_lock();
stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen); stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen);
...@@ -2984,11 +2987,6 @@ void bch2_fs_io_exit(struct bch_fs *c) ...@@ -2984,11 +2987,6 @@ void bch2_fs_io_exit(struct bch_fs *c)
int bch2_fs_io_init(struct bch_fs *c) int bch2_fs_io_init(struct bch_fs *c)
{ {
unsigned i;
for (i = 0; i < ARRAY_SIZE(c->nocow_locks.l); i++)
two_state_lock_init(&c->nocow_locks.l[i]);
if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio), if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
BIOSET_NEED_BVECS) || BIOSET_NEED_BVECS) ||
bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio), bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
......
...@@ -4,12 +4,116 @@ ...@@ -4,12 +4,116 @@
#include "nocow_locking.h" #include "nocow_locking.h"
#include "util.h" #include "util.h"
#include <linux/closure.h>
bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t, struct bpos bucket)
{
u64 dev_bucket = bucket_to_u64(bucket);
struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
unsigned i;
for (i = 0; i < ARRAY_SIZE(l->b); i++)
if (l->b[i] == dev_bucket && atomic_read(&l->l[i]))
return true;
return false;
}
void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, struct bpos bucket, int flags)
{
u64 dev_bucket = bucket_to_u64(bucket);
struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
int lock_val = flags ? 1 : -1;
unsigned i;
for (i = 0; i < ARRAY_SIZE(l->b); i++)
if (l->b[i] == dev_bucket) {
if (!atomic_sub_return(lock_val, &l->l[i]))
closure_wake_up(&l->wait);
return;
}
BUG();
}
bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l,
u64 dev_bucket, int flags)
{
int v, lock_val = flags ? 1 : -1;
unsigned i;
spin_lock(&l->lock);
for (i = 0; i < ARRAY_SIZE(l->b); i++)
if (l->b[i] == dev_bucket)
goto got_entry;
for (i = 0; i < ARRAY_SIZE(l->b); i++)
if (!atomic_read(&l->l[i])) {
l->b[i] = dev_bucket;
goto take_lock;
}
fail:
spin_unlock(&l->lock);
return false;
got_entry:
v = atomic_read(&l->l[i]);
if (lock_val > 0 ? v < 0 : v > 0)
goto fail;
take_lock:
atomic_add(lock_val, &l->l[i]);
spin_unlock(&l->lock);
return true;
}
void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t, void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t,
two_state_lock_t *l, int flags) struct nocow_lock_bucket *l,
u64 dev_bucket, int flags)
{
if (!__bch2_bucket_nocow_trylock(l, dev_bucket, flags)) {
struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks);
u64 start_time = local_clock();
__closure_wait_event(&l->wait, __bch2_bucket_nocow_trylock(l, dev_bucket, flags));
bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time);
}
}
void bch2_nocow_locks_to_text(struct printbuf *out, struct bucket_nocow_lock_table *t)
{ {
struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks); unsigned i, nr_zero = 0;
u64 start_time = local_clock(); struct nocow_lock_bucket *l;
for (l = t->l; l < t->l + ARRAY_SIZE(t->l); l++) {
unsigned v = 0;
for (i = 0; i < ARRAY_SIZE(l->l); i++)
v |= atomic_read(&l->l[i]);
if (!v) {
nr_zero++;
continue;
}
if (nr_zero)
prt_printf(out, "(%u empty entries)\n", nr_zero);
nr_zero = 0;
for (i = 0; i < ARRAY_SIZE(l->l); i++)
if (atomic_read(&l->l[i]))
prt_printf(out, "%llu: %i ", l->b[i], atomic_read(&l->l[i]));
prt_newline(out);
}
if (nr_zero)
prt_printf(out, "(%u empty entries)\n", nr_zero);
}
int bch2_fs_nocow_locking_init(struct bch_fs *c)
{
unsigned i;
for (i = 0; i < ARRAY_SIZE(c->nocow_locks.l); i++)
spin_lock_init(&c->nocow_locks.l[i].lock);
__bch2_two_state_lock(l, flags & BUCKET_NOCOW_LOCK_UPDATE); return 0;
bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time);
} }
...@@ -2,54 +2,48 @@ ...@@ -2,54 +2,48 @@
#ifndef _BCACHEFS_NOCOW_LOCKING_H #ifndef _BCACHEFS_NOCOW_LOCKING_H
#define _BCACHEFS_NOCOW_LOCKING_H #define _BCACHEFS_NOCOW_LOCKING_H
#include "bcachefs_format.h" #include "bcachefs.h"
#include "two_state_shared_lock.h" #include "alloc_background.h"
#include "nocow_locking_types.h"
#include <linux/hash.h> #include <linux/hash.h>
#define BUCKET_NOCOW_LOCKS_BITS 10 static inline struct nocow_lock_bucket *bucket_nocow_lock(struct bucket_nocow_lock_table *t,
#define BUCKET_NOCOW_LOCKS (1U << BUCKET_NOCOW_LOCKS_BITS) u64 dev_bucket)
struct bucket_nocow_lock_table {
two_state_lock_t l[BUCKET_NOCOW_LOCKS];
};
#define BUCKET_NOCOW_LOCK_UPDATE (1 << 0)
static inline two_state_lock_t *bucket_nocow_lock(struct bucket_nocow_lock_table *t,
struct bpos bucket)
{ {
u64 dev_bucket = bucket.inode << 56 | bucket.offset;
unsigned h = hash_64(dev_bucket, BUCKET_NOCOW_LOCKS_BITS); unsigned h = hash_64(dev_bucket, BUCKET_NOCOW_LOCKS_BITS);
return t->l + (h & (BUCKET_NOCOW_LOCKS - 1)); return t->l + (h & (BUCKET_NOCOW_LOCKS - 1));
} }
static inline bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t, #define BUCKET_NOCOW_LOCK_UPDATE (1 << 0)
struct bpos bucket)
{
two_state_lock_t *l = bucket_nocow_lock(t, bucket);
return atomic_long_read(&l->v) != 0; bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *, struct bpos);
} void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *, struct bpos, int);
bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *, u64, int);
void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *,
struct nocow_lock_bucket *, u64, int);
static inline void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, static inline void bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t,
struct bpos bucket, int flags) struct bpos bucket, int flags)
{ {
two_state_lock_t *l = bucket_nocow_lock(t, bucket); u64 dev_bucket = bucket_to_u64(bucket);
struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
bch2_two_state_unlock(l, flags & BUCKET_NOCOW_LOCK_UPDATE); __bch2_bucket_nocow_lock(t, l, dev_bucket, flags);
} }
void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *, two_state_lock_t *, int); static inline bool bch2_bucket_nocow_trylock(struct bucket_nocow_lock_table *t,
static inline void bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t,
struct bpos bucket, int flags) struct bpos bucket, int flags)
{ {
two_state_lock_t *l = bucket_nocow_lock(t, bucket); u64 dev_bucket = bucket_to_u64(bucket);
struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
if (!bch2_two_state_trylock(l, flags & BUCKET_NOCOW_LOCK_UPDATE)) return __bch2_bucket_nocow_trylock(l, dev_bucket, flags);
__bch2_bucket_nocow_lock(t, l, flags);
} }
void bch2_nocow_locks_to_text(struct printbuf *, struct bucket_nocow_lock_table *);
int bch2_fs_nocow_locking_init(struct bch_fs *);
#endif /* _BCACHEFS_NOCOW_LOCKING_H */ #endif /* _BCACHEFS_NOCOW_LOCKING_H */
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_NOCOW_LOCKING_TYPES_H
#define _BCACHEFS_NOCOW_LOCKING_TYPES_H
#define BUCKET_NOCOW_LOCKS_BITS 10
#define BUCKET_NOCOW_LOCKS (1U << BUCKET_NOCOW_LOCKS_BITS)
struct nocow_lock_bucket {
struct closure_waitlist wait;
spinlock_t lock;
u64 b[4];
atomic_t l[4];
} __aligned(SMP_CACHE_BYTES);
struct bucket_nocow_lock_table {
struct nocow_lock_bucket l[BUCKET_NOCOW_LOCKS];
};
#endif /* _BCACHEFS_NOCOW_LOCKING_TYPES_H */
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include "move.h" #include "move.h"
#include "migrate.h" #include "migrate.h"
#include "movinggc.h" #include "movinggc.h"
#include "nocow_locking.h"
#include "quota.h" #include "quota.h"
#include "rebalance.h" #include "rebalance.h"
#include "recovery.h" #include "recovery.h"
...@@ -821,6 +822,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) ...@@ -821,6 +822,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_fs_btree_write_buffer_init(c) ?: bch2_fs_btree_write_buffer_init(c) ?:
bch2_fs_subvolumes_init(c) ?: bch2_fs_subvolumes_init(c) ?:
bch2_fs_io_init(c) ?: bch2_fs_io_init(c) ?:
bch2_fs_nocow_locking_init(c) ?:
bch2_fs_encryption_init(c) ?: bch2_fs_encryption_init(c) ?:
bch2_fs_compress_init(c) ?: bch2_fs_compress_init(c) ?:
bch2_fs_ec_init(c) ?: bch2_fs_ec_init(c) ?:
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "journal.h" #include "journal.h"
#include "keylist.h" #include "keylist.h"
#include "move.h" #include "move.h"
#include "nocow_locking.h"
#include "opts.h" #include "opts.h"
#include "rebalance.h" #include "rebalance.h"
#include "replicas.h" #include "replicas.h"
...@@ -477,22 +478,8 @@ SHOW(bch2_fs) ...@@ -477,22 +478,8 @@ SHOW(bch2_fs)
bch2_write_refs_to_text(out, c); bch2_write_refs_to_text(out, c);
#endif #endif
if (attr == &sysfs_nocow_lock_table) { if (attr == &sysfs_nocow_lock_table)
int i, count = 1; bch2_nocow_locks_to_text(out, &c->nocow_locks);
long last, curr = 0;
last = atomic_long_read(&c->nocow_locks.l[0].v);
for (i = 1; i < BUCKET_NOCOW_LOCKS; i++) {
curr = atomic_long_read(&c->nocow_locks.l[i].v);
if (last != curr) {
prt_printf(out, "%li: %d\n", last, count);
count = 1;
last = curr;
} else
count++;
}
prt_printf(out, "%li: %d\n", last, count);
}
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment