Commit e34820f9 authored by Mingzhe Zou's avatar Mingzhe Zou Committed by Jens Axboe

bcache: fixup lock c->root error

We had a problem with io hung because it was waiting for c->root to
release the lock.

crash> cache_set.root -l cache_set.list ffffa03fde4c0050
  root = 0xffff802ef454c800
crash> btree -o 0xffff802ef454c800 | grep rw_semaphore
  [ffff802ef454c858] struct rw_semaphore lock;
crash> struct rw_semaphore ffff802ef454c858
struct rw_semaphore {
  count = {
    counter = -4294967297
  },
  wait_list = {
    next = 0xffff00006786fc28,
    prev = 0xffff00005d0efac8
  },
  wait_lock = {
    raw_lock = {
      {
        val = {
          counter = 0
        },
        {
          locked = 0 '\000',
          pending = 0 '\000'
        },
        {
          locked_pending = 0,
          tail = 0
        }
      }
    }
  },
  osq = {
    tail = {
      counter = 0
    }
  },
  owner = 0xffffa03fdc586603
}

The "counter = -4294967297" means that lock count is -1 and a write lock
is being attempted. Then, we found that there is a btree with a counter
of 1 in btree_cache_freeable.

crash> cache_set -l cache_set.list ffffa03fde4c0050 -o|grep btree_cache
  [ffffa03fde4c1140] struct list_head btree_cache;
  [ffffa03fde4c1150] struct list_head btree_cache_freeable;
  [ffffa03fde4c1160] struct list_head btree_cache_freed;
  [ffffa03fde4c1170] unsigned int btree_cache_used;
  [ffffa03fde4c1178] wait_queue_head_t btree_cache_wait;
  [ffffa03fde4c1190] struct task_struct *btree_cache_alloc_lock;
crash> list -H ffffa03fde4c1140|wc -l
973
crash> list -H ffffa03fde4c1150|wc -l
1123
crash> cache_set.btree_cache_used -l cache_set.list ffffa03fde4c0050
  btree_cache_used = 2097
crash> list -s btree -l btree.list -H ffffa03fde4c1140|grep -E -A2 "^  lock = {" > btree_cache.txt
crash> list -s btree -l btree.list -H ffffa03fde4c1150|grep -E -A2 "^  lock = {" > btree_cache_freeable.txt
[root@node-3 127.0.0.1-2023-08-04-16:40:28]# pwd
/var/crash/127.0.0.1-2023-08-04-16:40:28
[root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache.txt|grep counter|grep -v "counter = 0"
[root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache_freeable.txt|grep counter|grep -v "counter = 0"
      counter = 1

We found that this is a bug in bch_sectors_dirty_init() when locking c->root:
    (1). Thread X has locked c->root(A) write.
    (2). Thread Y failed to lock c->root(A), waiting for the lock(c->root A).
    (3). Thread X bch_btree_set_root() changes c->root from A to B.
    (4). Thread X releases the lock(c->root A).
    (5). Thread Y successfully locks c->root(A).
    (6). Thread Y releases the lock(c->root B).

        down_write locked ---(1)----------------------┐
                |                                     |
                |   down_read waiting ---(2)----┐     |
                |           |               ┌-------------┐ ┌-------------┐
        bch_btree_set_root ===(3)========>> | c->root   A | | c->root   B |
                |           |               └-------------┘ └-------------┘
            up_write ---(4)---------------------┘     |            |
                            |                         |            |
                    down_read locked ---(5)-----------┘            |
                            |                                      |
                        up_read ---(6)-----------------------------┘

Since c->root may change, the correct steps to lock c->root should be
the same as bch_root_usage(), compare after locking.

static unsigned int bch_root_usage(struct cache_set *c)
{
        unsigned int bytes = 0;
        struct bkey *k;
        struct btree *b;
        struct btree_iter iter;

        goto lock_root;

        do {
                rw_unlock(false, b);
lock_root:
                b = c->root;
                rw_lock(false, b, b->level);
        } while (b != c->root);

        for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
                bytes += bkey_bytes(k);

        rw_unlock(false, b);

        return (bytes * 100) / btree_bytes(c);
}

Fixes: b144e45f ("bcache: make bch_sectors_dirty_init() to be multithreaded")
Signed-off-by: default avatarMingzhe Zou <mingzhe.zou@easystack.cn>
Cc:  <stable@vger.kernel.org>
Signed-off-by: default avatarColy Li <colyli@suse.de>
Link: https://lore.kernel.org/r/20231120052503.6122-7-colyli@suse.deSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 7cc47e64
...@@ -977,14 +977,22 @@ static int bch_btre_dirty_init_thread_nr(void) ...@@ -977,14 +977,22 @@ static int bch_btre_dirty_init_thread_nr(void)
void bch_sectors_dirty_init(struct bcache_device *d) void bch_sectors_dirty_init(struct bcache_device *d)
{ {
int i; int i;
struct btree *b = NULL;
struct bkey *k = NULL; struct bkey *k = NULL;
struct btree_iter iter; struct btree_iter iter;
struct sectors_dirty_init op; struct sectors_dirty_init op;
struct cache_set *c = d->c; struct cache_set *c = d->c;
struct bch_dirty_init_state state; struct bch_dirty_init_state state;
retry_lock:
b = c->root;
rw_lock(0, b, b->level);
if (b != c->root) {
rw_unlock(0, b);
goto retry_lock;
}
/* Just count root keys if no leaf node */ /* Just count root keys if no leaf node */
rw_lock(0, c->root, c->root->level);
if (c->root->level == 0) { if (c->root->level == 0) {
bch_btree_op_init(&op.op, -1); bch_btree_op_init(&op.op, -1);
op.inode = d->id; op.inode = d->id;
...@@ -997,7 +1005,7 @@ void bch_sectors_dirty_init(struct bcache_device *d) ...@@ -997,7 +1005,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
sectors_dirty_init_fn(&op.op, c->root, k); sectors_dirty_init_fn(&op.op, c->root, k);
} }
rw_unlock(0, c->root); rw_unlock(0, b);
return; return;
} }
...@@ -1033,7 +1041,7 @@ void bch_sectors_dirty_init(struct bcache_device *d) ...@@ -1033,7 +1041,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
out: out:
/* Must wait for all threads to stop. */ /* Must wait for all threads to stop. */
wait_event(state.wait, atomic_read(&state.started) == 0); wait_event(state.wait, atomic_read(&state.started) == 0);
rw_unlock(0, c->root); rw_unlock(0, b);
} }
void bch_cached_dev_writeback_init(struct cached_dev *dc) void bch_cached_dev_writeback_init(struct cached_dev *dc)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment