move.c 24.6 KB
Newer Older
Kent Overstreet's avatar
Kent Overstreet committed
1 2 3
// SPDX-License-Identifier: GPL-2.0

#include "bcachefs.h"
4
#include "alloc_foreground.h"
5
#include "bkey_buf.h"
Kent Overstreet's avatar
Kent Overstreet committed
6 7
#include "btree_gc.h"
#include "btree_update.h"
8
#include "btree_update_interior.h"
Kent Overstreet's avatar
Kent Overstreet committed
9
#include "buckets.h"
10
#include "disk_groups.h"
Kent Overstreet's avatar
Kent Overstreet committed
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
#include "inode.h"
#include "io.h"
#include "journal_reclaim.h"
#include "keylist.h"
#include "move.h"
#include "replicas.h"
#include "super-io.h"
#include "trace.h"

#include <linux/ioprio.h>
#include <linux/kthread.h>

#define SECTORS_IN_FLIGHT_PER_DEVICE	2048

struct moving_io {
	struct list_head	list;
	struct closure		cl;
	bool			read_completed;

	unsigned		read_sectors;
	unsigned		write_sectors;

	struct bch_read_bio	rbio;

	struct migrate_write	write;
	/* Must be last since it is variable size */
	struct bio_vec		bi_inline_vecs[0];
};

struct moving_context {
	/* Closure for waiting on all reads and writes to complete */
	struct closure		cl;

	struct bch_move_stats	*stats;

	struct list_head	reads;

	/* in flight sectors: */
	atomic_t		read_sectors;
	atomic_t		write_sectors;

	wait_queue_head_t	wait;
};

55
int bch2_migrate_index_update(struct bch_write_op *op)
Kent Overstreet's avatar
Kent Overstreet committed
56 57
{
	struct bch_fs *c = op->c;
58 59
	struct btree_trans trans;
	struct btree_iter *iter;
Kent Overstreet's avatar
Kent Overstreet committed
60 61 62
	struct migrate_write *m =
		container_of(op, struct migrate_write, op);
	struct keylist *keys = &op->insert_keys;
63
	struct bkey_buf _new, _insert;
Kent Overstreet's avatar
Kent Overstreet committed
64 65
	int ret = 0;

66 67 68 69
	bch2_bkey_buf_init(&_new);
	bch2_bkey_buf_init(&_insert);
	bch2_bkey_buf_realloc(&_insert, c, U8_MAX);

70
	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
71

Kent Overstreet's avatar
Kent Overstreet committed
72
	iter = bch2_trans_get_iter(&trans, m->btree_id,
73 74
				   bkey_start_pos(&bch2_keylist_front(keys)->k),
				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
Kent Overstreet's avatar
Kent Overstreet committed
75 76

	while (1) {
77
		struct bkey_s_c k;
Kent Overstreet's avatar
Kent Overstreet committed
78
		struct bkey_i *insert;
79
		struct bkey_i_extent *new;
80 81
		const union bch_extent_entry *entry;
		struct extent_ptr_decoded p;
Kent Overstreet's avatar
Kent Overstreet committed
82
		bool did_work = false;
83 84
		bool extending = false, should_check_enospc;
		s64 i_sectors_delta = 0, disk_sectors_delta = 0;
Kent Overstreet's avatar
Kent Overstreet committed
85

86
		bch2_trans_begin(&trans);
87 88

		k = bch2_btree_iter_peek_slot(iter);
89
		ret = bkey_err(k);
90 91
		if (ret)
			goto err;
92 93

		new = bkey_i_to_extent(bch2_keylist_front(keys));
Kent Overstreet's avatar
Kent Overstreet committed
94 95

		if (bversion_cmp(k.k->version, new->k.version) ||
96
		    !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset))
Kent Overstreet's avatar
Kent Overstreet committed
97 98
			goto nomatch;

99 100
		bkey_reassemble(_insert.k, k);
		insert = _insert.k;
Kent Overstreet's avatar
Kent Overstreet committed
101

102 103
		bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys));
		new = bkey_i_to_extent(_new.k);
104
		bch2_cut_front(iter->pos, &new->k_i);
Kent Overstreet's avatar
Kent Overstreet committed
105

106 107 108
		bch2_cut_front(iter->pos,	insert);
		bch2_cut_back(new->k.p,		insert);
		bch2_cut_back(insert->k.p,	&new->k_i);
Kent Overstreet's avatar
Kent Overstreet committed
109

110 111 112 113 114 115 116 117 118 119 120 121 122
		if (m->data_cmd == DATA_REWRITE) {
			struct bch_extent_ptr *new_ptr, *old_ptr = (void *)
				bch2_bkey_has_device(bkey_i_to_s_c(insert),
						     m->data_opts.rewrite_dev);
			if (!old_ptr)
				goto nomatch;

			if (old_ptr->cached)
				extent_for_each_ptr(extent_i_to_s(new), new_ptr)
					new_ptr->cached = true;

			bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr);
		}
Kent Overstreet's avatar
Kent Overstreet committed
123

124
		extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
Kent Overstreet's avatar
Kent Overstreet committed
125
			if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) {
Kent Overstreet's avatar
Kent Overstreet committed
126 127 128 129 130 131 132 133
				/*
				 * raced with another move op? extent already
				 * has a pointer to the device we just wrote
				 * data to
				 */
				continue;
			}

Kent Overstreet's avatar
Kent Overstreet committed
134
			bch2_extent_ptr_decoded_append(insert, &p);
Kent Overstreet's avatar
Kent Overstreet committed
135 136 137 138 139 140
			did_work = true;
		}

		if (!did_work)
			goto nomatch;

Kent Overstreet's avatar
Kent Overstreet committed
141
		bch2_bkey_narrow_crcs(insert,
Kent Overstreet's avatar
Kent Overstreet committed
142
				(struct bch_extent_crc_unpacked) { 0 });
Kent Overstreet's avatar
Kent Overstreet committed
143 144 145 146
		bch2_extent_normalize(c, bkey_i_to_s(insert));
		bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert),
					       op->opts.background_target,
					       op->opts.data_replicas);
Kent Overstreet's avatar
Kent Overstreet committed
147

148 149 150 151 152 153 154
		ret = bch2_sum_sector_overwrites(&trans, iter, insert,
						 &extending,
						 &should_check_enospc,
						 &i_sectors_delta,
						 &disk_sectors_delta);
		if (ret)
			goto err;
155

156
		if (disk_sectors_delta > (s64) op->res.sectors) {
Kent Overstreet's avatar
Kent Overstreet committed
157
			ret = bch2_disk_reservation_add(c, &op->res,
158 159 160
						disk_sectors_delta - op->res.sectors,
						!should_check_enospc
						? BCH_DISK_RESERVATION_NOFAIL : 0);
Kent Overstreet's avatar
Kent Overstreet committed
161 162 163 164
			if (ret)
				goto out;
		}

165 166
		ret   = bch2_trans_update(&trans, iter, insert, 0) ?:
			bch2_trans_commit(&trans, &op->res,
167
				op_journal_seq(op),
Kent Overstreet's avatar
Kent Overstreet committed
168
				BTREE_INSERT_NOFAIL|
169
				m->data_opts.btree_insert_flags);
170
err:
Kent Overstreet's avatar
Kent Overstreet committed
171 172 173 174 175 176 177
		if (!ret)
			atomic_long_inc(&c->extent_migrate_done);
		if (ret == -EINTR)
			ret = 0;
		if (ret)
			break;
next:
178
		while (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) >= 0) {
Kent Overstreet's avatar
Kent Overstreet committed
179 180 181 182 183 184
			bch2_keylist_pop_front(keys);
			if (bch2_keylist_empty(keys))
				goto out;
		}
		continue;
nomatch:
185 186 187
		if (m->ctxt) {
			BUG_ON(k.k->p.offset <= iter->pos.offset);
			atomic64_inc(&m->ctxt->stats->keys_raced);
188
			atomic64_add(k.k->p.offset - iter->pos.offset,
Kent Overstreet's avatar
Kent Overstreet committed
189
				     &m->ctxt->stats->sectors_raced);
190
		}
Kent Overstreet's avatar
Kent Overstreet committed
191 192
		atomic_long_inc(&c->extent_migrate_raced);
		trace_move_race(&new->k);
193
		bch2_btree_iter_advance(iter);
Kent Overstreet's avatar
Kent Overstreet committed
194 195 196
		goto next;
	}
out:
197
	bch2_trans_iter_put(&trans, iter);
198
	bch2_trans_exit(&trans);
199 200
	bch2_bkey_buf_exit(&_insert, c);
	bch2_bkey_buf_exit(&_new, c);
201
	BUG_ON(ret == -EINTR);
Kent Overstreet's avatar
Kent Overstreet committed
202 203 204 205 206 207 208 209 210
	return ret;
}

void bch2_migrate_read_done(struct migrate_write *m, struct bch_read_bio *rbio)
{
	/* write bio must own pages: */
	BUG_ON(!m->op.wbio.bio.bi_vcnt);

	m->ptr		= rbio->pick.ptr;
211
	m->offset	= rbio->data_pos.offset - rbio->pick.crc.offset;
Kent Overstreet's avatar
Kent Overstreet committed
212
	m->op.devs_have	= rbio->devs_have;
213
	m->op.pos	= rbio->data_pos;
Kent Overstreet's avatar
Kent Overstreet committed
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
	m->op.version	= rbio->version;
	m->op.crc	= rbio->pick.crc;
	m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;

	if (bch2_csum_type_is_encryption(m->op.crc.csum_type)) {
		m->op.nonce	= m->op.crc.nonce + m->op.crc.offset;
		m->op.csum_type = m->op.crc.csum_type;
	}

	if (m->data_cmd == DATA_REWRITE)
		bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev);
}

int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
			    struct write_point_specifier wp,
			    struct bch_io_opts io_opts,
			    enum data_cmd data_cmd,
			    struct data_opts data_opts,
Kent Overstreet's avatar
Kent Overstreet committed
232
			    enum btree_id btree_id,
Kent Overstreet's avatar
Kent Overstreet committed
233 234
			    struct bkey_s_c k)
{
235 236 237
	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
	const union bch_extent_entry *entry;
	struct extent_ptr_decoded p;
Kent Overstreet's avatar
Kent Overstreet committed
238 239
	int ret;

Kent Overstreet's avatar
Kent Overstreet committed
240
	m->btree_id	= btree_id;
Kent Overstreet's avatar
Kent Overstreet committed
241 242 243 244 245
	m->data_cmd	= data_cmd;
	m->data_opts	= data_opts;
	m->nr_ptrs_reserved = 0;

	bch2_write_op_init(&m->op, c, io_opts);
246 247 248 249 250 251 252 253

	if (!bch2_bkey_is_incompressible(k))
		m->op.compression_type =
			bch2_compression_opt_to_type[io_opts.background_compression ?:
						     io_opts.compression];
	else
		m->op.incompressible = true;

Kent Overstreet's avatar
Kent Overstreet committed
254 255 256
	m->op.target	= data_opts.target,
	m->op.write_point = wp;

257
	if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) {
Kent Overstreet's avatar
Kent Overstreet committed
258
		m->op.alloc_reserve = RESERVE_MOVINGGC;
259
		m->op.flags |= BCH_WRITE_ALLOC_NOWAIT;
260 261 262 263
	} else {
		/* XXX: this should probably be passed in */
		m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS;
	}
Kent Overstreet's avatar
Kent Overstreet committed
264

265
	m->op.flags |= BCH_WRITE_PAGES_STABLE|
Kent Overstreet's avatar
Kent Overstreet committed
266
		BCH_WRITE_PAGES_OWNED|
267
		BCH_WRITE_DATA_ENCODED|
268 269
		BCH_WRITE_FROM_INTERNAL|
		BCH_WRITE_MOVE;
Kent Overstreet's avatar
Kent Overstreet committed
270

271 272
	m->op.nr_replicas	= data_opts.nr_replicas;
	m->op.nr_replicas_required = data_opts.nr_replicas;
Kent Overstreet's avatar
Kent Overstreet committed
273 274 275

	switch (data_cmd) {
	case DATA_ADD_REPLICAS: {
276 277 278 279 280 281
		/*
		 * DATA_ADD_REPLICAS is used for moving data to a different
		 * device in the background, and due to compression the new copy
		 * might take up more space than the old copy:
		 */
#if 0
Kent Overstreet's avatar
Kent Overstreet committed
282
		int nr = (int) io_opts.data_replicas -
283
			bch2_bkey_nr_ptrs_allocated(k);
284 285
#endif
		int nr = (int) io_opts.data_replicas;
Kent Overstreet's avatar
Kent Overstreet committed
286 287 288 289 290 291 292 293 294 295 296

		if (nr > 0) {
			m->op.nr_replicas = m->nr_ptrs_reserved = nr;

			ret = bch2_disk_reservation_get(c, &m->op.res,
					k.k->size, m->op.nr_replicas, 0);
			if (ret)
				return ret;
		}
		break;
	}
297 298 299
	case DATA_REWRITE: {
		unsigned compressed_sectors = 0;

Kent Overstreet's avatar
Kent Overstreet committed
300
		bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
301 302 303
			if (p.ptr.dev == data_opts.rewrite_dev &&
			    !p.ptr.cached &&
			    crc_is_compressed(p.crc))
304 305 306 307
				compressed_sectors += p.crc.compressed_size;

		if (compressed_sectors) {
			ret = bch2_disk_reservation_add(c, &m->op.res,
308
					k.k->size * m->op.nr_replicas,
309 310 311 312
					BCH_DISK_RESERVATION_NOFAIL);
			if (ret)
				return ret;
		}
Kent Overstreet's avatar
Kent Overstreet committed
313
		break;
314
	}
Kent Overstreet's avatar
Kent Overstreet committed
315 316 317 318 319 320 321 322 323 324 325
	case DATA_PROMOTE:
		m->op.flags	|= BCH_WRITE_ALLOC_NOWAIT;
		m->op.flags	|= BCH_WRITE_CACHED;
		break;
	default:
		BUG();
	}

	return 0;
}

326
static void move_free(struct moving_io *io)
Kent Overstreet's avatar
Kent Overstreet committed
327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
{
	struct moving_context *ctxt = io->write.ctxt;
	struct bvec_iter_all iter;
	struct bio_vec *bv;

	bch2_disk_reservation_put(io->write.op.c, &io->write.op.res);

	bio_for_each_segment_all(bv, &io->write.op.wbio.bio, iter)
		if (bv->bv_page)
			__free_page(bv->bv_page);

	wake_up(&ctxt->wait);

	kfree(io);
}

343
static void move_write_done(struct bch_write_op *op)
Kent Overstreet's avatar
Kent Overstreet committed
344
{
345 346
	struct moving_io *io = container_of(op, struct moving_io, write.op);
	struct moving_context *ctxt = io->write.ctxt;
Kent Overstreet's avatar
Kent Overstreet committed
347 348

	atomic_sub(io->write_sectors, &io->write.ctxt->write_sectors);
349 350
	move_free(io);
	closure_put(&ctxt->cl);
Kent Overstreet's avatar
Kent Overstreet committed
351 352
}

353
static void move_write(struct moving_io *io)
Kent Overstreet's avatar
Kent Overstreet committed
354 355
{
	if (unlikely(io->rbio.bio.bi_status || io->rbio.hole)) {
356
		move_free(io);
Kent Overstreet's avatar
Kent Overstreet committed
357 358 359
		return;
	}

360
	closure_get(&io->write.ctxt->cl);
Kent Overstreet's avatar
Kent Overstreet committed
361
	atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
362 363 364

	bch2_migrate_read_done(&io->write, &io->rbio);
	closure_call(&io->write.op.cl, bch2_write, NULL, NULL);
Kent Overstreet's avatar
Kent Overstreet committed
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
}

static inline struct moving_io *next_pending_write(struct moving_context *ctxt)
{
	struct moving_io *io =
		list_first_entry_or_null(&ctxt->reads, struct moving_io, list);

	return io && io->read_completed ? io : NULL;
}

static void move_read_endio(struct bio *bio)
{
	struct moving_io *io = container_of(bio, struct moving_io, rbio.bio);
	struct moving_context *ctxt = io->write.ctxt;

	atomic_sub(io->read_sectors, &ctxt->read_sectors);
	io->read_completed = true;

	if (next_pending_write(ctxt))
		wake_up(&ctxt->wait);

	closure_put(&ctxt->cl);
}

static void do_pending_writes(struct moving_context *ctxt)
{
	struct moving_io *io;

	while ((io = next_pending_write(ctxt))) {
		list_del(&io->list);
395
		move_write(io);
Kent Overstreet's avatar
Kent Overstreet committed
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417
	}
}

#define move_ctxt_wait_event(_ctxt, _cond)			\
do {								\
	do_pending_writes(_ctxt);				\
								\
	if (_cond)						\
		break;						\
	__wait_event((_ctxt)->wait,				\
		     next_pending_write(_ctxt) || (_cond));	\
} while (1)

static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
{
	unsigned sectors_pending = atomic_read(&ctxt->write_sectors);

	move_ctxt_wait_event(ctxt,
		!atomic_read(&ctxt->write_sectors) ||
		atomic_read(&ctxt->write_sectors) != sectors_pending);
}

418
static int bch2_move_extent(struct btree_trans *trans,
Kent Overstreet's avatar
Kent Overstreet committed
419 420 421
			    struct moving_context *ctxt,
			    struct write_point_specifier wp,
			    struct bch_io_opts io_opts,
Kent Overstreet's avatar
Kent Overstreet committed
422
			    enum btree_id btree_id,
423
			    struct bkey_s_c k,
Kent Overstreet's avatar
Kent Overstreet committed
424 425 426
			    enum data_cmd data_cmd,
			    struct data_opts data_opts)
{
427
	struct bch_fs *c = trans->c;
428
	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
Kent Overstreet's avatar
Kent Overstreet committed
429
	struct moving_io *io;
430 431
	const union bch_extent_entry *entry;
	struct extent_ptr_decoded p;
432
	unsigned sectors = k.k->size, pages;
Kent Overstreet's avatar
Kent Overstreet committed
433 434 435 436 437 438 439 440 441 442 443
	int ret = -ENOMEM;

	move_ctxt_wait_event(ctxt,
		atomic_read(&ctxt->write_sectors) <
		SECTORS_IN_FLIGHT_PER_DEVICE);

	move_ctxt_wait_event(ctxt,
		atomic_read(&ctxt->read_sectors) <
		SECTORS_IN_FLIGHT_PER_DEVICE);

	/* write path might have to decompress data: */
444
	bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
445
		sectors = max_t(unsigned, sectors, p.crc.uncompressed_size);
Kent Overstreet's avatar
Kent Overstreet committed
446 447 448 449 450 451 452 453

	pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
	io = kzalloc(sizeof(struct moving_io) +
		     sizeof(struct bio_vec) * pages, GFP_KERNEL);
	if (!io)
		goto err;

	io->write.ctxt		= ctxt;
454 455
	io->read_sectors	= k.k->size;
	io->write_sectors	= k.k->size;
Kent Overstreet's avatar
Kent Overstreet committed
456 457 458 459 460 461 462 463 464

	bio_init(&io->write.op.wbio.bio, NULL, io->bi_inline_vecs, pages, 0);
	bio_set_prio(&io->write.op.wbio.bio,
		     IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));

	if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, sectors << 9,
				 GFP_KERNEL))
		goto err_free;

465 466
	io->rbio.c		= c;
	io->rbio.opts		= io_opts;
Kent Overstreet's avatar
Kent Overstreet committed
467 468 469 470 471 472
	bio_init(&io->rbio.bio, NULL, io->bi_inline_vecs, pages, 0);
	io->rbio.bio.bi_vcnt = pages;
	bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
	io->rbio.bio.bi_iter.bi_size = sectors << 9;

	io->rbio.bio.bi_opf		= REQ_OP_READ;
473
	io->rbio.bio.bi_iter.bi_sector	= bkey_start_offset(k.k);
Kent Overstreet's avatar
Kent Overstreet committed
474 475 476
	io->rbio.bio.bi_end_io		= move_read_endio;

	ret = bch2_migrate_write_init(c, &io->write, wp, io_opts,
Kent Overstreet's avatar
Kent Overstreet committed
477
				      data_cmd, data_opts, btree_id, k);
Kent Overstreet's avatar
Kent Overstreet committed
478 479 480
	if (ret)
		goto err_free_pages;

481 482
	io->write.op.end_io = move_write_done;

Kent Overstreet's avatar
Kent Overstreet committed
483
	atomic64_inc(&ctxt->stats->keys_moved);
484
	atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
Kent Overstreet's avatar
Kent Overstreet committed
485

486
	trace_move_extent(k.k);
Kent Overstreet's avatar
Kent Overstreet committed
487 488 489 490 491 492 493 494 495

	atomic_add(io->read_sectors, &ctxt->read_sectors);
	list_add_tail(&io->list, &ctxt->reads);

	/*
	 * dropped by move_read_endio() - guards against use after free of
	 * ctxt when doing wakeup
	 */
	closure_get(&ctxt->cl);
496 497 498
	bch2_read_extent(trans, &io->rbio,
			 bkey_start_pos(k.k),
			 btree_id, k, 0,
Kent Overstreet's avatar
Kent Overstreet committed
499 500 501 502 503 504 505 506
			 BCH_READ_NODECODE|
			 BCH_READ_LAST_FRAGMENT);
	return 0;
err_free_pages:
	bio_free_pages(&io->write.op.wbio.bio);
err_free:
	kfree(io);
err:
507
	trace_move_alloc_fail(k.k);
Kent Overstreet's avatar
Kent Overstreet committed
508 509 510
	return ret;
}

511 512 513 514 515 516 517 518 519 520 521 522 523 524
static int lookup_inode(struct btree_trans *trans, struct bpos pos,
			struct bch_inode_unpacked *inode)
{
	struct btree_iter *iter;
	struct bkey_s_c k;
	int ret;

	iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, pos,
				   BTREE_ITER_ALL_SNAPSHOTS);
	k = bch2_btree_iter_peek(iter);
	ret = bkey_err(k);
	if (ret)
		goto err;

525 526 527 528 529
	if (!k.k || bkey_cmp(k.k->p, pos)) {
		ret = -ENOENT;
		goto err;
	}

530 531 532 533 534 535 536 537 538 539 540 541
	ret = k.k->type == KEY_TYPE_inode ? 0 : -EIO;
	if (ret)
		goto err;

	ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
	if (ret)
		goto err;
err:
	bch2_trans_iter_put(trans, iter);
	return ret;
}

Kent Overstreet's avatar
Kent Overstreet committed
542 543 544 545 546 547 548 549 550
static int __bch2_move_data(struct bch_fs *c,
		struct moving_context *ctxt,
		struct bch_ratelimit *rate,
		struct write_point_specifier wp,
		struct bpos start,
		struct bpos end,
		move_pred_fn pred, void *arg,
		struct bch_move_stats *stats,
		enum btree_id btree_id)
Kent Overstreet's avatar
Kent Overstreet committed
551 552 553
{
	bool kthread = (current->flags & PF_KTHREAD) != 0;
	struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
554
	struct bkey_buf sk;
555 556
	struct btree_trans trans;
	struct btree_iter *iter;
Kent Overstreet's avatar
Kent Overstreet committed
557 558 559
	struct bkey_s_c k;
	struct data_opts data_opts;
	enum data_cmd data_cmd;
560
	u64 delay, cur_inum = U64_MAX;
Kent Overstreet's avatar
Kent Overstreet committed
561 562
	int ret = 0, ret2;

563
	bch2_bkey_buf_init(&sk);
564
	bch2_trans_init(&trans, c, 0, 0);
565

566
	stats->data_type = BCH_DATA_user;
Kent Overstreet's avatar
Kent Overstreet committed
567
	stats->btree_id	= btree_id;
568
	stats->pos	= start;
569

Kent Overstreet's avatar
Kent Overstreet committed
570
	iter = bch2_trans_get_iter(&trans, btree_id, start,
571
				   BTREE_ITER_PREFETCH);
Kent Overstreet's avatar
Kent Overstreet committed
572 573 574 575

	if (rate)
		bch2_ratelimit_reset(rate);

576 577 578 579 580
	while (1) {
		do {
			delay = rate ? bch2_ratelimit_delay(rate) : 0;

			if (delay) {
581
				bch2_trans_unlock(&trans);
582 583 584 585 586 587 588 589 590 591 592 593
				set_current_state(TASK_INTERRUPTIBLE);
			}

			if (kthread && (ret = kthread_should_stop())) {
				__set_current_state(TASK_RUNNING);
				goto out;
			}

			if (delay)
				schedule_timeout(delay);

			if (unlikely(freezing(current))) {
594
				bch2_trans_unlock(&trans);
Kent Overstreet's avatar
Kent Overstreet committed
595
				move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
596 597 598
				try_to_freeze();
			}
		} while (delay);
599

600 601
		bch2_trans_begin(&trans);

602 603 604 605
		k = bch2_btree_iter_peek(iter);

		stats->pos = iter->pos;

Kent Overstreet's avatar
Kent Overstreet committed
606 607
		if (!k.k)
			break;
608
		ret = bkey_err(k);
Kent Overstreet's avatar
Kent Overstreet committed
609 610 611 612 613
		if (ret)
			break;
		if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
			break;

614
		if (!bkey_extent_is_direct_data(k.k))
Kent Overstreet's avatar
Kent Overstreet committed
615 616
			goto next_nondata;

617
		if (btree_id == BTREE_ID_extents &&
618
		    cur_inum != k.k->p.inode) {
Kent Overstreet's avatar
Kent Overstreet committed
619 620 621
			struct bch_inode_unpacked inode;

			io_opts = bch2_opts_to_inode_opts(c->opts);
622 623 624 625 626 627 628 629

			ret = lookup_inode(&trans,
					SPOS(0, k.k->p.inode, k.k->p.snapshot),
					&inode);
			if (ret == -EINTR)
				continue;

			if (!ret)
Kent Overstreet's avatar
Kent Overstreet committed
630
				bch2_io_opts_apply(&io_opts, bch2_inode_opts_get(&inode));
631

Kent Overstreet's avatar
Kent Overstreet committed
632 633 634
			cur_inum = k.k->p.inode;
		}

635
		switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) {
Kent Overstreet's avatar
Kent Overstreet committed
636 637 638 639 640 641 642 643 644 645 646 647 648
		case DATA_SKIP:
			goto next;
		case DATA_SCRUB:
			BUG();
		case DATA_ADD_REPLICAS:
		case DATA_REWRITE:
		case DATA_PROMOTE:
			break;
		default:
			BUG();
		}

		/* unlock before doing IO: */
649
		bch2_bkey_buf_reassemble(&sk, c, k);
Kent Overstreet's avatar
Kent Overstreet committed
650
		k = bkey_i_to_s_c(sk.k);
651
		bch2_trans_unlock(&trans);
Kent Overstreet's avatar
Kent Overstreet committed
652

653
		ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k,
Kent Overstreet's avatar
Kent Overstreet committed
654 655
					data_cmd, data_opts);
		if (ret2) {
656
			if (ret2 == -EINTR) {
657
				bch2_trans_begin(&trans);
658 659 660
				continue;
			}

Kent Overstreet's avatar
Kent Overstreet committed
661 662
			if (ret2 == -ENOMEM) {
				/* memory allocation failure, wait for some IO to finish */
Kent Overstreet's avatar
Kent Overstreet committed
663
				bch2_move_ctxt_wait_for_io(ctxt);
Kent Overstreet's avatar
Kent Overstreet committed
664 665 666 667 668 669 670 671 672 673
				continue;
			}

			/* XXX signal failure */
			goto next;
		}

		if (rate)
			bch2_ratelimit_increment(rate, k.k->size);
next:
674
		atomic64_add(k.k->size * bch2_bkey_nr_ptrs_allocated(k),
Kent Overstreet's avatar
Kent Overstreet committed
675 676
			     &stats->sectors_seen);
next_nondata:
677
		bch2_btree_iter_advance(iter);
678
		bch2_trans_cond_resched(&trans);
Kent Overstreet's avatar
Kent Overstreet committed
679
	}
680
out:
681 682

	bch2_trans_iter_put(&trans, iter);
Kent Overstreet's avatar
Kent Overstreet committed
683
	ret = bch2_trans_exit(&trans) ?: ret;
684
	bch2_bkey_buf_exit(&sk, c);
Kent Overstreet's avatar
Kent Overstreet committed
685 686 687 688

	return ret;
}

689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712
inline void bch_move_stats_init(struct bch_move_stats *stats, char *name)
{
	memset(stats, 0, sizeof(*stats));

	scnprintf(stats->name, sizeof(stats->name),
			"%s", name);
}

static inline void progress_list_add(struct bch_fs *c,
				     struct bch_move_stats *stats)
{
	mutex_lock(&c->data_progress_lock);
	list_add(&stats->list, &c->data_progress_list);
	mutex_unlock(&c->data_progress_lock);
}

static inline void progress_list_del(struct bch_fs *c,
				     struct bch_move_stats *stats)
{
	mutex_lock(&c->data_progress_lock);
	list_del(&stats->list);
	mutex_unlock(&c->data_progress_lock);
}

Kent Overstreet's avatar
Kent Overstreet committed
713
int bch2_move_data(struct bch_fs *c,
714 715
		   enum btree_id start_btree_id, struct bpos start_pos,
		   enum btree_id end_btree_id,   struct bpos end_pos,
Kent Overstreet's avatar
Kent Overstreet committed
716 717 718 719 720 721
		   struct bch_ratelimit *rate,
		   struct write_point_specifier wp,
		   move_pred_fn pred, void *arg,
		   struct bch_move_stats *stats)
{
	struct moving_context ctxt = { .stats = stats };
722
	enum btree_id id;
Kent Overstreet's avatar
Kent Overstreet committed
723 724
	int ret;

725
	progress_list_add(c, stats);
Kent Overstreet's avatar
Kent Overstreet committed
726 727 728 729
	closure_init_stack(&ctxt.cl);
	INIT_LIST_HEAD(&ctxt.reads);
	init_waitqueue_head(&ctxt.wait);

730
	stats->data_type = BCH_DATA_user;
Kent Overstreet's avatar
Kent Overstreet committed
731

732 733 734 735 736
	for (id = start_btree_id;
	     id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1);
	     id++) {
		stats->btree_id = id;

737 738
		if (id != BTREE_ID_extents &&
		    id != BTREE_ID_reflink)
739 740 741 742 743 744 745 746 747 748
			continue;

		ret = __bch2_move_data(c, &ctxt, rate, wp,
				       id == start_btree_id ? start_pos : POS_MIN,
				       id == end_btree_id   ? end_pos   : POS_MAX,
				       pred, arg, stats, id);
		if (ret)
			break;
	}

Kent Overstreet's avatar
Kent Overstreet committed
749 750 751 752 753 754 755 756 757 758

	move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
	closure_sync(&ctxt.cl);

	EBUG_ON(atomic_read(&ctxt.write_sectors));

	trace_move_data(c,
			atomic64_read(&stats->sectors_moved),
			atomic64_read(&stats->keys_moved));

759
	progress_list_del(c, stats);
Kent Overstreet's avatar
Kent Overstreet committed
760 761 762
	return ret;
}

763 764 765 766
typedef enum data_cmd (*move_btree_pred)(struct bch_fs *, void *,
					 struct btree *, struct bch_io_opts *,
					 struct data_opts *);

Kent Overstreet's avatar
Kent Overstreet committed
767
static int bch2_move_btree(struct bch_fs *c,
768 769 770
			   enum btree_id start_btree_id, struct bpos start_pos,
			   enum btree_id end_btree_id,   struct bpos end_pos,
			   move_btree_pred pred, void *arg,
Kent Overstreet's avatar
Kent Overstreet committed
771 772
			   struct bch_move_stats *stats)
{
773
	bool kthread = (current->flags & PF_KTHREAD) != 0;
Kent Overstreet's avatar
Kent Overstreet committed
774
	struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
775 776
	struct btree_trans trans;
	struct btree_iter *iter;
Kent Overstreet's avatar
Kent Overstreet committed
777
	struct btree *b;
778
	enum btree_id id;
Kent Overstreet's avatar
Kent Overstreet committed
779 780 781 782
	struct data_opts data_opts;
	enum data_cmd cmd;
	int ret = 0;

783
	bch2_trans_init(&trans, c, 0, 0);
784
	progress_list_add(c, stats);
785

786
	stats->data_type = BCH_DATA_btree;
Kent Overstreet's avatar
Kent Overstreet committed
787

788 789 790
	for (id = start_btree_id;
	     id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1);
	     id++) {
791 792
		stats->btree_id = id;

793 794
		for_each_btree_node(&trans, iter, id,
				    id == start_btree_id ? start_pos : POS_MIN,
795
				    BTREE_ITER_PREFETCH, b) {
796
			if (kthread && kthread_should_stop())
797
				break;
798 799

			if ((cmp_int(id, end_btree_id) ?:
800
			     bpos_cmp(b->key.k.p, end_pos)) > 0)
801 802
				break;

803 804
			stats->pos = iter->pos;

805
			switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) {
Kent Overstreet's avatar
Kent Overstreet committed
806 807 808 809 810 811 812 813 814 815 816
			case DATA_SKIP:
				goto next;
			case DATA_SCRUB:
				BUG();
			case DATA_ADD_REPLICAS:
			case DATA_REWRITE:
				break;
			default:
				BUG();
			}

817
			ret = bch2_btree_node_rewrite(&trans, iter,
Kent Overstreet's avatar
Kent Overstreet committed
818 819
					b->data->keys.seq, 0) ?: ret;
next:
820
			bch2_trans_cond_resched(&trans);
Kent Overstreet's avatar
Kent Overstreet committed
821 822
		}

823
		ret = bch2_trans_iter_free(&trans, iter) ?: ret;
824 825
		if (kthread && kthread_should_stop())
			break;
Kent Overstreet's avatar
Kent Overstreet committed
826
	}
827

828 829
	bch2_trans_exit(&trans);

830 831 832
	if (ret)
		bch_err(c, "error %i in bch2_move_btree", ret);

833
	progress_list_del(c, stats);
Kent Overstreet's avatar
Kent Overstreet committed
834 835 836 837 838
	return ret;
}

#if 0
static enum data_cmd scrub_pred(struct bch_fs *c, void *arg,
839
				struct bkey_s_c k,
Kent Overstreet's avatar
Kent Overstreet committed
840 841 842 843 844 845 846 847
				struct bch_io_opts *io_opts,
				struct data_opts *data_opts)
{
	return DATA_SCRUB;
}
#endif

static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg,
848
				      struct bkey_s_c k,
Kent Overstreet's avatar
Kent Overstreet committed
849 850 851
				      struct bch_io_opts *io_opts,
				      struct data_opts *data_opts)
{
852 853 854 855 856 857 858 859 860 861 862
	unsigned nr_good = bch2_bkey_durability(c, k);
	unsigned replicas = 0;

	switch (k.k->type) {
	case KEY_TYPE_btree_ptr:
		replicas = c->opts.metadata_replicas;
		break;
	case KEY_TYPE_extent:
		replicas = io_opts->data_replicas;
		break;
	}
Kent Overstreet's avatar
Kent Overstreet committed
863 864 865 866 867

	if (!nr_good || nr_good >= replicas)
		return DATA_SKIP;

	data_opts->target		= 0;
868
	data_opts->nr_replicas		= 1;
869
	data_opts->btree_insert_flags	= 0;
Kent Overstreet's avatar
Kent Overstreet committed
870 871 872 873
	return DATA_ADD_REPLICAS;
}

static enum data_cmd migrate_pred(struct bch_fs *c, void *arg,
874
				  struct bkey_s_c k,
Kent Overstreet's avatar
Kent Overstreet committed
875 876 877 878 879
				  struct bch_io_opts *io_opts,
				  struct data_opts *data_opts)
{
	struct bch_ioctl_data *op = arg;

880
	if (!bch2_bkey_has_device(k, op->migrate.dev))
Kent Overstreet's avatar
Kent Overstreet committed
881 882 883
		return DATA_SKIP;

	data_opts->target		= 0;
884
	data_opts->nr_replicas		= 1;
Kent Overstreet's avatar
Kent Overstreet committed
885 886 887 888 889
	data_opts->btree_insert_flags	= 0;
	data_opts->rewrite_dev		= op->migrate.dev;
	return DATA_REWRITE;
}

890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905
static enum data_cmd rereplicate_btree_pred(struct bch_fs *c, void *arg,
					    struct btree *b,
					    struct bch_io_opts *io_opts,
					    struct data_opts *data_opts)
{
	return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
}

static enum data_cmd migrate_btree_pred(struct bch_fs *c, void *arg,
					struct btree *b,
					struct bch_io_opts *io_opts,
					struct data_opts *data_opts)
{
	return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
}

906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929
static bool bformat_needs_redo(struct bkey_format *f)
{
	unsigned i;

	for (i = 0; i < f->nr_fields; i++) {
		unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
		u64 unpacked_mask = ~((~0ULL << 1) << (unpacked_bits - 1));
		u64 field_offset = le64_to_cpu(f->field_offset[i]);

		if (f->bits_per_field[i] > unpacked_bits)
			return true;

		if ((f->bits_per_field[i] == unpacked_bits) && field_offset)
			return true;

		if (((field_offset + ((1ULL << f->bits_per_field[i]) - 1)) &
		     unpacked_mask) <
		    field_offset)
			return true;
	}

	return false;
}

930 931 932 933 934 935
static enum data_cmd rewrite_old_nodes_pred(struct bch_fs *c, void *arg,
					    struct btree *b,
					    struct bch_io_opts *io_opts,
					    struct data_opts *data_opts)
{
	if (b->version_ondisk != c->sb.version ||
936 937
	    btree_node_need_rewrite(b) ||
	    bformat_needs_redo(&b->format)) {
938 939 940 941 942 943 944 945 946
		data_opts->target		= 0;
		data_opts->nr_replicas		= 1;
		data_opts->btree_insert_flags	= 0;
		return DATA_REWRITE;
	}

	return DATA_SKIP;
}

947 948 949 950 951 952
int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
{
	int ret;

	ret = bch2_move_btree(c,
			      0,		POS_MIN,
953
			      BTREE_ID_NR,	SPOS_MAX,
954 955 956
			      rewrite_old_nodes_pred, c, stats);
	if (!ret) {
		mutex_lock(&c->sb_lock);
957 958
		c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
		c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
959 960 961 962 963 964 965 966
		c->disk_sb.sb->version_min = c->disk_sb.sb->version;
		bch2_write_super(c);
		mutex_unlock(&c->sb_lock);
	}

	return ret;
}

Kent Overstreet's avatar
Kent Overstreet committed
967 968 969 970 971 972 973 974
int bch2_data_job(struct bch_fs *c,
		  struct bch_move_stats *stats,
		  struct bch_ioctl_data op)
{
	int ret = 0;

	switch (op.op) {
	case BCH_DATA_OP_REREPLICATE:
975
		bch_move_stats_init(stats, "rereplicate");
976
		stats->data_type = BCH_DATA_journal;
Kent Overstreet's avatar
Kent Overstreet committed
977 978
		ret = bch2_journal_flush_device_pins(&c->journal, -1);

979 980 981 982
		ret = bch2_move_btree(c,
				      op.start_btree,	op.start_pos,
				      op.end_btree,	op.end_pos,
				      rereplicate_btree_pred, c, stats) ?: ret;
983

984 985
		closure_wait_event(&c->btree_interior_update_wait,
				   !bch2_btree_interior_updates_nr_pending(c));
986

987
		ret = bch2_replicas_gc2(c) ?: ret;
Kent Overstreet's avatar
Kent Overstreet committed
988

989 990 991 992
		ret = bch2_move_data(c,
				     op.start_btree,	op.start_pos,
				     op.end_btree,	op.end_pos,
				     NULL, writepoint_hashed((unsigned long) current),
Kent Overstreet's avatar
Kent Overstreet committed
993
				     rereplicate_pred, c, stats) ?: ret;
994
		ret = bch2_replicas_gc2(c) ?: ret;
Kent Overstreet's avatar
Kent Overstreet committed
995 996 997 998 999
		break;
	case BCH_DATA_OP_MIGRATE:
		if (op.migrate.dev >= c->sb.nr_devices)
			return -EINVAL;

1000
		bch_move_stats_init(stats, "migrate");
1001
		stats->data_type = BCH_DATA_journal;
Kent Overstreet's avatar
Kent Overstreet committed
1002 1003
		ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);

1004 1005 1006 1007
		ret = bch2_move_btree(c,
				      op.start_btree,	op.start_pos,
				      op.end_btree,	op.end_pos,
				      migrate_btree_pred, &op, stats) ?: ret;
1008
		ret = bch2_replicas_gc2(c) ?: ret;
Kent Overstreet's avatar
Kent Overstreet committed
1009

1010 1011 1012 1013
		ret = bch2_move_data(c,
				     op.start_btree,	op.start_pos,
				     op.end_btree,	op.end_pos,
				     NULL, writepoint_hashed((unsigned long) current),
Kent Overstreet's avatar
Kent Overstreet committed
1014
				     migrate_pred, &op, stats) ?: ret;
1015
		ret = bch2_replicas_gc2(c) ?: ret;
Kent Overstreet's avatar
Kent Overstreet committed
1016
		break;
1017
	case BCH_DATA_OP_REWRITE_OLD_NODES:
1018
		bch_move_stats_init(stats, "rewrite_old_nodes");
1019
		ret = bch2_scan_old_btree_nodes(c, stats);
1020
		break;
Kent Overstreet's avatar
Kent Overstreet committed
1021 1022 1023 1024 1025 1026
	default:
		ret = -EINVAL;
	}

	return ret;
}