Commit 1d18b5ca authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: __bch2_read(): call trans_begin() on every loop iter

perusal of /sys/kernel/debug/bcachefs/*/btree_transaction_stats shows
that the read path has been acculumalating unneeded paths on the reflink
btree, which we don't want.

The solution is to call bch2_trans_begin(), which drops paths not used
on previous loop iteration.

bch2_readahead:
  Max mem used: 0
  Transaction duration:
    count:      194235
                           since mount        recent
    duration of events
      min:                      150 ns
      max:                        9 ms
      total:                    838 ms
      mean:                       4 us          6 us
      stddev:                    34 us          7 us
    time between events
      min:                       10 ns
      max:                       15 h
      mean:                       2 s          12 s
      stddev:                     2 s           3 ms
  Maximum allocated btree paths (193):
    path: idx  2 ref 0:0 P   btree=extents l=0 pos 270943112:392:U32_MAX locks 0
    path: idx  3 ref 1:0   S btree=extents l=0 pos 270943112:24578:U32_MAX locks 1
    path: idx  4 ref 0:0 P   btree=reflink l=0 pos 0:24773509:0 locks 0
    path: idx  5 ref 0:0 P S btree=reflink l=0 pos 0:24773631:0 locks 1
    path: idx  6 ref 0:0 P S btree=reflink l=0 pos 0:24773759:0 locks 1
    path: idx  7 ref 0:0 P S btree=reflink l=0 pos 0:24773887:0 locks 1
    path: idx  8 ref 0:0 P S btree=reflink l=0 pos 0:24774015:0 locks 1
    path: idx  9 ref 0:0 P S btree=reflink l=0 pos 0:24774143:0 locks 1
    path: idx 10 ref 0:0 P S btree=reflink l=0 pos 0:24774271:0 locks 1
<many more reflink paths>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 114f530e
......@@ -1147,34 +1147,27 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
struct btree_iter iter;
struct bkey_buf sk;
struct bkey_s_c k;
u32 snapshot;
int ret;
BUG_ON(flags & BCH_READ_NODECODE);
bch2_bkey_buf_init(&sk);
retry:
bch2_trans_begin(trans);
iter = (struct btree_iter) { NULL };
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
SPOS(inum.inum, bvec_iter.bi_sector, snapshot),
POS(inum.inum, bvec_iter.bi_sector),
BTREE_ITER_slots);
while (1) {
unsigned bytes, sectors, offset_into_extent;
enum btree_id data_btree = BTREE_ID_extents;
/*
* read_extent -> io_time_reset may cause a transaction restart
* without returning an error, we need to check for that here:
*/
ret = bch2_trans_relock(trans);
bch2_trans_begin(trans);
u32 snapshot;
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
break;
goto err;
bch2_btree_iter_set_snapshot(&iter, snapshot);
bch2_btree_iter_set_pos(&iter,
POS(inum.inum, bvec_iter.bi_sector));
......@@ -1182,7 +1175,7 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
if (ret)
break;
goto err;
offset_into_extent = iter.pos.offset -
bkey_start_offset(k.k);
......@@ -1193,7 +1186,7 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
ret = bch2_read_indirect_extent(trans, &data_btree,
&offset_into_extent, &sk);
if (ret)
break;
goto err;
k = bkey_i_to_s_c(sk.k);
......@@ -1213,7 +1206,7 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
data_btree, k,
offset_into_extent, failed, flags);
if (ret)
break;
goto err;
if (flags & BCH_READ_LAST_FRAGMENT)
break;
......@@ -1223,16 +1216,16 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
ret = btree_trans_too_many_iters(trans);
if (ret)
goto err;
err:
if (ret &&
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
ret != READ_RETRY &&
ret != READ_RETRY_AVOID)
break;
}
err:
bch2_trans_iter_exit(trans, &iter);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
ret == READ_RETRY ||
ret == READ_RETRY_AVOID)
goto retry;
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
bch2_bkey_buf_exit(&sk, c);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment