super.c 69.1 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
Chris Mason's avatar
Chris Mason committed
2 3 4 5
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 */

6
#include <linux/blkdev.h>
7 8 9 10 11 12
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
13
#include <linux/seq_file.h>
14 15
#include <linux/string.h>
#include <linux/backing-dev.h>
16
#include <linux/mount.h>
Chris Mason's avatar
Chris Mason committed
17
#include <linux/writeback.h>
Chris Mason's avatar
Chris Mason committed
18
#include <linux/statfs.h>
Chris Mason's avatar
Chris Mason committed
19
#include <linux/compat.h>
20
#include <linux/parser.h>
21
#include <linux/ctype.h>
22
#include <linux/namei.h>
23
#include <linux/miscdevice.h>
24
#include <linux/magic.h>
25
#include <linux/slab.h>
26
#include <linux/cleancache.h>
27
#include <linux/ratelimit.h>
28
#include <linux/crc32c.h>
29
#include <linux/btrfs.h>
30
#include "delayed-inode.h"
31
#include "ctree.h"
Chris Mason's avatar
Chris Mason committed
32
#include "disk-io.h"
33
#include "transaction.h"
34
#include "btrfs_inode.h"
Chris Mason's avatar
Chris Mason committed
35
#include "print-tree.h"
36
#include "props.h"
Josef Bacik's avatar
Josef Bacik committed
37
#include "xattr.h"
38
#include "volumes.h"
Balaji Rao's avatar
Balaji Rao committed
39
#include "export.h"
40
#include "compression.h"
41
#include "rcu-string.h"
42
#include "dev-replace.h"
43
#include "free-space-cache.h"
44
#include "backref.h"
45
#include "space-info.h"
46
#include "sysfs.h"
47
#include "tests/btrfs-tests.h"
48
#include "block-group.h"
49
#include "discard.h"
50

51
#include "qgroup.h"
52 53 54
#define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h>

55
static const struct super_operations btrfs_super_ops;
56 57 58 59 60 61

/*
 * Types for mounting the default subvolume and a subvolume explicitly
 * requested by subvol=/path. That way the callchain is straightforward and we
 * don't have to play tricks with the mount options and recursive calls to
 * btrfs_mount.
62 63
 *
 * The new btrfs_root_fs_type also servers as a tag for the bdev_holder.
64
 */
65
static struct file_system_type btrfs_fs_type;
66
static struct file_system_type btrfs_root_fs_type;
Chris Mason's avatar
Chris Mason committed
67

68 69
static int btrfs_remount(struct super_block *sb, int *flags, char *data);

70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
/*
 * Generally the error codes correspond to their respective errors, but there
 * are a few special cases.
 *
 * EUCLEAN: Any sort of corruption that we encounter.  The tree-checker for
 *          instance will return EUCLEAN if any of the blocks are corrupted in
 *          a way that is problematic.  We want to reserve EUCLEAN for these
 *          sort of corruptions.
 *
 * EROFS: If we check BTRFS_FS_STATE_ERROR and fail out with a return error, we
 *        need to use EROFS for this case.  We will have no idea of the
 *        original failure, that will have been reported at the time we tripped
 *        over the error.  Each subsequent error that doesn't have any context
 *        of the original error should use EROFS when handling BTRFS_FS_STATE_ERROR.
 */
85
const char * __attribute_const__ btrfs_decode_error(int errno)
86
{
87
	char *errstr = "unknown";
88 89

	switch (errno) {
90 91 92 93
	case -ENOENT:		/* -2 */
		errstr = "No such entry";
		break;
	case -EIO:		/* -5 */
94 95
		errstr = "IO failure";
		break;
96
	case -ENOMEM:		/* -12*/
97 98
		errstr = "Out of memory";
		break;
99
	case -EEXIST:		/* -17 */
Jeff Mahoney's avatar
Jeff Mahoney committed
100 101
		errstr = "Object already exists";
		break;
102
	case -ENOSPC:		/* -28 */
103 104
		errstr = "No space left";
		break;
105 106
	case -EROFS:		/* -30 */
		errstr = "Readonly filesystem";
107
		break;
108 109 110 111 112 113 114 115 116
	case -EOPNOTSUPP:	/* -95 */
		errstr = "Operation not supported";
		break;
	case -EUCLEAN:		/* -117 */
		errstr = "Filesystem corrupted";
		break;
	case -EDQUOT:		/* -122 */
		errstr = "Quota exceeded";
		break;
117 118 119 120 121 122
	}

	return errstr;
}

/*
123
 * __btrfs_handle_fs_error decodes expected errors from the caller and
124
 * invokes the appropriate error response.
125
 */
126
__cold
127
void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
128
		       unsigned int line, int errno, const char *fmt, ...)
129 130
{
	struct super_block *sb = fs_info->sb;
131
#ifdef CONFIG_PRINTK
132
	const char *errstr;
133
#endif
134 135 136

	/*
	 * Special case: if the error is EROFS, and we're already
137
	 * under SB_RDONLY, then it is safe here.
138
	 */
139
	if (errno == -EROFS && sb_rdonly(sb))
140 141
  		return;

142
#ifdef CONFIG_PRINTK
143
	errstr = btrfs_decode_error(errno);
144
	if (fmt) {
145 146 147 148 149 150
		struct va_format vaf;
		va_list args;

		va_start(args, fmt);
		vaf.fmt = fmt;
		vaf.va = &args;
151

152
		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
153
			sb->s_id, function, line, errno, errstr, &vaf);
154
		va_end(args);
155
	} else {
156
		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
157
			sb->s_id, function, line, errno, errstr);
158
	}
159
#endif
160

161 162 163 164 165 166
	/*
	 * Today we only save the error info to memory.  Long term we'll
	 * also send it down to the disk
	 */
	set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);

167
	/* Don't go through full error handling during mount */
168 169 170 171 172 173
	if (!(sb->s_flags & SB_BORN))
		return;

	if (sb_rdonly(sb))
		return;

174 175
	btrfs_discard_stop(fs_info);

176 177 178 179 180 181 182 183
	/* btrfs handle error by forcing the filesystem readonly */
	sb->s_flags |= SB_RDONLY;
	btrfs_info(fs_info, "forced readonly");
	/*
	 * Note that a running device replace operation is not canceled here
	 * although there is no way to update the progress. It would add the
	 * risk of a deadlock, therefore the canceling is omitted. The only
	 * penalty is that some I/O remains active until the procedure
184
	 * completes. The next time when the filesystem is mounted writable
185 186
	 * again, the device replace operation continues.
	 */
187
}
188

189
#ifdef CONFIG_PRINTK
190
static const char * const logtypes[] = {
191 192 193 194 195 196 197 198 199 200
	"emergency",
	"alert",
	"critical",
	"error",
	"warning",
	"notice",
	"info",
	"debug",
};

201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216

/*
 * Use one ratelimit state per log level so that a flood of less important
 * messages doesn't cause more important ones to be dropped.
 */
static struct ratelimit_state printk_limits[] = {
	RATELIMIT_STATE_INIT(printk_limits[0], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[1], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[2], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[3], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[4], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[5], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[6], DEFAULT_RATELIMIT_INTERVAL, 100),
	RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
};

217
void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
218
{
219
	char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
220 221
	struct va_format vaf;
	va_list args;
222
	int kern_level;
223 224
	const char *type = logtypes[4];
	struct ratelimit_state *ratelimit = &printk_limits[4];
225 226 227

	va_start(args, fmt);

228
	while ((kern_level = printk_get_level(fmt)) != 0) {
229
		size_t size = printk_skip_level(fmt) - fmt;
230 231 232 233 234 235 236

		if (kern_level >= '0' && kern_level <= '7') {
			memcpy(lvl, fmt,  size);
			lvl[size] = '\0';
			type = logtypes[kern_level - '0'];
			ratelimit = &printk_limits[kern_level - '0'];
		}
237
		fmt += size;
238 239
	}

240 241
	vaf.fmt = fmt;
	vaf.va = &args;
242

243
	if (__ratelimit(ratelimit))
244 245
		printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
			fs_info ? fs_info->sb->s_id : "<unknown>", &vaf);
246 247 248 249

	va_end(args);
}
#endif
250

251 252 253 254 255 256 257 258 259 260 261 262 263
/*
 * We only mark the transaction aborted and then set the file system read-only.
 * This will prevent new transactions from starting or trying to join this
 * one.
 *
 * This means that error recovery at the call site is limited to freeing
 * any local memory allocations and passing the error code up without
 * further cleanup. The transaction should complete as it normally would
 * in the call path but will return -EIO.
 *
 * We'll complete the cleanup in btrfs_end_transaction and
 * btrfs_commit_transaction.
 */
264
__cold
265
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
266
			       const char *function,
267 268
			       unsigned int line, int errno)
{
269 270
	struct btrfs_fs_info *fs_info = trans->fs_info;

271
	WRITE_ONCE(trans->aborted, errno);
272 273
	/* Nothing used. The other threads that have joined this
	 * transaction may be able to continue. */
274
	if (!trans->dirty && list_empty(&trans->new_bgs)) {
275 276
		const char *errstr;

277
		errstr = btrfs_decode_error(errno);
278
		btrfs_warn(fs_info,
279 280
		           "%s:%d: Aborting unused transaction(%s).",
		           function, line, errstr);
281
		return;
282
	}
283
	WRITE_ONCE(trans->transaction->aborted, errno);
284
	/* Wake up anybody who may be waiting on this transaction */
285 286 287
	wake_up(&fs_info->transaction_wait);
	wake_up(&fs_info->transaction_blocked_wait);
	__btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
288
}
Jeff Mahoney's avatar
Jeff Mahoney committed
289 290 291 292
/*
 * __btrfs_panic decodes unexpected, fatal errors from the caller,
 * issues an alert, and either panics or BUGs, depending on mount options.
 */
293
__cold
Jeff Mahoney's avatar
Jeff Mahoney committed
294 295 296 297 298 299 300
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
		   unsigned int line, int errno, const char *fmt, ...)
{
	char *s_id = "<unknown>";
	const char *errstr;
	struct va_format vaf = { .fmt = fmt };
	va_list args;
301

Jeff Mahoney's avatar
Jeff Mahoney committed
302 303
	if (fs_info)
		s_id = fs_info->sb->s_id;
304

Jeff Mahoney's avatar
Jeff Mahoney committed
305 306 307
	va_start(args, fmt);
	vaf.va = &args;

308
	errstr = btrfs_decode_error(errno);
309
	if (fs_info && (btrfs_test_opt(fs_info, PANIC_ON_FATAL_ERROR)))
310 311
		panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
			s_id, function, line, &vaf, errno, errstr);
Jeff Mahoney's avatar
Jeff Mahoney committed
312

313 314
	btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)",
		   function, line, &vaf, errno, errstr);
Jeff Mahoney's avatar
Jeff Mahoney committed
315 316
	va_end(args);
	/* Caller calls BUG() */
317 318
}

319
static void btrfs_put_super(struct super_block *sb)
Chris Mason's avatar
Chris Mason committed
320
{
321
	close_ctree(btrfs_sb(sb));
Chris Mason's avatar
Chris Mason committed
322 323
}

324
enum {
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
	Opt_acl, Opt_noacl,
	Opt_clear_cache,
	Opt_commit_interval,
	Opt_compress,
	Opt_compress_force,
	Opt_compress_force_type,
	Opt_compress_type,
	Opt_degraded,
	Opt_device,
	Opt_fatal_errors,
	Opt_flushoncommit, Opt_noflushoncommit,
	Opt_inode_cache, Opt_noinode_cache,
	Opt_max_inline,
	Opt_barrier, Opt_nobarrier,
	Opt_datacow, Opt_nodatacow,
	Opt_datasum, Opt_nodatasum,
	Opt_defrag, Opt_nodefrag,
	Opt_discard, Opt_nodiscard,
343
	Opt_discard_mode,
344 345 346 347 348 349 350 351 352
	Opt_norecovery,
	Opt_ratio,
	Opt_rescan_uuid_tree,
	Opt_skip_balance,
	Opt_space_cache, Opt_no_space_cache,
	Opt_space_cache_version,
	Opt_ssd, Opt_nossd,
	Opt_ssd_spread, Opt_nossd_spread,
	Opt_subvol,
353
	Opt_subvol_empty,
354 355 356 357 358
	Opt_subvolid,
	Opt_thread_pool,
	Opt_treelog, Opt_notreelog,
	Opt_user_subvol_rm_allowed,

359 360 361 362 363
	/* Rescue options */
	Opt_rescue,
	Opt_usebackuproot,
	Opt_nologreplay,

364 365 366 367 368
	/* Deprecated options */
	Opt_recovery,

	/* Debugging options */
	Opt_check_integrity,
369
	Opt_check_integrity_including_extent_data,
370 371
	Opt_check_integrity_print_mask,
	Opt_enospc_debug, Opt_noenospc_debug,
372 373
#ifdef CONFIG_BTRFS_DEBUG
	Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
374 375 376
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
	Opt_ref_verify,
377
#endif
378
	Opt_err,
379 380
};

381
static const match_table_t tokens = {
382 383 384 385
	{Opt_acl, "acl"},
	{Opt_noacl, "noacl"},
	{Opt_clear_cache, "clear_cache"},
	{Opt_commit_interval, "commit=%u"},
386
	{Opt_compress, "compress"},
387
	{Opt_compress_type, "compress=%s"},
388
	{Opt_compress_force, "compress-force"},
389
	{Opt_compress_force_type, "compress-force=%s"},
390 391 392
	{Opt_degraded, "degraded"},
	{Opt_device, "device=%s"},
	{Opt_fatal_errors, "fatal_errors=%s"},
393
	{Opt_flushoncommit, "flushoncommit"},
394
	{Opt_noflushoncommit, "noflushoncommit"},
395 396 397 398 399 400 401 402 403 404 405
	{Opt_inode_cache, "inode_cache"},
	{Opt_noinode_cache, "noinode_cache"},
	{Opt_max_inline, "max_inline=%s"},
	{Opt_barrier, "barrier"},
	{Opt_nobarrier, "nobarrier"},
	{Opt_datacow, "datacow"},
	{Opt_nodatacow, "nodatacow"},
	{Opt_datasum, "datasum"},
	{Opt_nodatasum, "nodatasum"},
	{Opt_defrag, "autodefrag"},
	{Opt_nodefrag, "noautodefrag"},
406
	{Opt_discard, "discard"},
407
	{Opt_discard_mode, "discard=%s"},
408
	{Opt_nodiscard, "nodiscard"},
409 410 411 412
	{Opt_norecovery, "norecovery"},
	{Opt_ratio, "metadata_ratio=%u"},
	{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
	{Opt_skip_balance, "skip_balance"},
413
	{Opt_space_cache, "space_cache"},
414
	{Opt_no_space_cache, "nospace_cache"},
415 416 417 418 419 420
	{Opt_space_cache_version, "space_cache=%s"},
	{Opt_ssd, "ssd"},
	{Opt_nossd, "nossd"},
	{Opt_ssd_spread, "ssd_spread"},
	{Opt_nossd_spread, "nossd_spread"},
	{Opt_subvol, "subvol=%s"},
421
	{Opt_subvol_empty, "subvol="},
422 423 424 425 426 427
	{Opt_subvolid, "subvolid=%s"},
	{Opt_thread_pool, "thread_pool=%u"},
	{Opt_treelog, "treelog"},
	{Opt_notreelog, "notreelog"},
	{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},

428 429 430 431 432 433 434
	/* Rescue options */
	{Opt_rescue, "rescue=%s"},
	/* Deprecated, with alias rescue=nologreplay */
	{Opt_nologreplay, "nologreplay"},
	/* Deprecated, with alias rescue=usebackuproot */
	{Opt_usebackuproot, "usebackuproot"},

435 436 437 438
	/* Deprecated options */
	{Opt_recovery, "recovery"},

	/* Debugging options */
439 440
	{Opt_check_integrity, "check_int"},
	{Opt_check_integrity_including_extent_data, "check_int_data"},
441
	{Opt_check_integrity_print_mask, "check_int_print_mask=%u"},
442 443
	{Opt_enospc_debug, "enospc_debug"},
	{Opt_noenospc_debug, "noenospc_debug"},
444 445 446 447
#ifdef CONFIG_BTRFS_DEBUG
	{Opt_fragment_data, "fragment=data"},
	{Opt_fragment_metadata, "fragment=metadata"},
	{Opt_fragment_all, "fragment=all"},
448 449 450
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
	{Opt_ref_verify, "ref_verify"},
451
#endif
Josef Bacik's avatar
Josef Bacik committed
452
	{Opt_err, NULL},
453 454
};

455 456 457 458 459 460
static const match_table_t rescue_tokens = {
	{Opt_usebackuproot, "usebackuproot"},
	{Opt_nologreplay, "nologreplay"},
	{Opt_err, NULL},
};

461 462 463 464 465 466 467 468 469 470 471
static bool check_ro_option(struct btrfs_fs_info *fs_info, unsigned long opt,
			    const char *opt_name)
{
	if (fs_info->mount_opt & opt) {
		btrfs_err(fs_info, "%s must be used with ro mount option",
			  opt_name);
		return true;
	}
	return false;
}

472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514
static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
{
	char *opts;
	char *orig;
	char *p;
	substring_t args[MAX_OPT_ARGS];
	int ret = 0;

	opts = kstrdup(options, GFP_KERNEL);
	if (!opts)
		return -ENOMEM;
	orig = opts;

	while ((p = strsep(&opts, ":")) != NULL) {
		int token;

		if (!*p)
			continue;
		token = match_token(p, rescue_tokens, args);
		switch (token){
		case Opt_usebackuproot:
			btrfs_info(info,
				   "trying to use backup root at mount time");
			btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
			break;
		case Opt_nologreplay:
			btrfs_set_and_info(info, NOLOGREPLAY,
					   "disabling log replay at mount time");
			break;
		case Opt_err:
			btrfs_info(info, "unrecognized rescue option '%s'", p);
			ret = -EINVAL;
			goto out;
		default:
			break;
		}

	}
out:
	kfree(orig);
	return ret;
}

515 516 517
/*
 * Regular mount options parser.  Everything that is needed only when
 * reading in a new superblock is parsed here.
518
 * XXX JDM: This needs to be cleaned up for remount.
519
 */
520
int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
521
			unsigned long new_flags)
522 523
{
	substring_t args[MAX_OPT_ARGS];
524
	char *p, *num;
525
	u64 cache_gen;
526
	int intarg;
527
	int ret = 0;
528 529
	char *compress_type;
	bool compress_force = false;
530
	enum btrfs_compression_type saved_compress_type;
531
	int saved_compress_level;
532 533
	bool saved_compress_force;
	int no_compress = 0;
534

535 536
	cache_gen = btrfs_super_cache_generation(info->super_copy);
	if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
537 538
		btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
	else if (cache_gen)
539 540
		btrfs_set_opt(info->mount_opt, SPACE_CACHE);

541 542 543 544
	/*
	 * Even the options are empty, we still need to do extra check
	 * against new flags
	 */
545
	if (!options)
546
		goto check;
547

548
	while ((p = strsep(&options, ",")) != NULL) {
549 550 551 552 553 554
		int token;
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
555
		case Opt_degraded:
556
			btrfs_info(info, "allowing degraded mounts");
557
			btrfs_set_opt(info->mount_opt, DEGRADED);
558
			break;
559
		case Opt_subvol:
560
		case Opt_subvol_empty:
561
		case Opt_subvolid:
562
		case Opt_device:
563
			/*
564 565
			 * These are parsed by btrfs_parse_subvol_options or
			 * btrfs_parse_device_options and can be ignored here.
566
			 */
567 568
			break;
		case Opt_nodatasum:
569
			btrfs_set_and_info(info, NODATASUM,
570
					   "setting nodatasum");
571
			break;
Qu Wenruo's avatar
Qu Wenruo committed
572
		case Opt_datasum:
573 574
			if (btrfs_test_opt(info, NODATASUM)) {
				if (btrfs_test_opt(info, NODATACOW))
575
					btrfs_info(info,
576
						   "setting datasum, datacow enabled");
577
				else
578
					btrfs_info(info, "setting datasum");
579
			}
Qu Wenruo's avatar
Qu Wenruo committed
580 581 582
			btrfs_clear_opt(info->mount_opt, NODATACOW);
			btrfs_clear_opt(info->mount_opt, NODATASUM);
			break;
583
		case Opt_nodatacow:
584 585 586
			if (!btrfs_test_opt(info, NODATACOW)) {
				if (!btrfs_test_opt(info, COMPRESS) ||
				    !btrfs_test_opt(info, FORCE_COMPRESS)) {
587
					btrfs_info(info,
588 589
						   "setting nodatacow, compression disabled");
				} else {
590
					btrfs_info(info, "setting nodatacow");
591
				}
592 593 594
			}
			btrfs_clear_opt(info->mount_opt, COMPRESS);
			btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
595 596
			btrfs_set_opt(info->mount_opt, NODATACOW);
			btrfs_set_opt(info->mount_opt, NODATASUM);
597
			break;
Qu Wenruo's avatar
Qu Wenruo committed
598
		case Opt_datacow:
599
			btrfs_clear_and_info(info, NODATACOW,
600
					     "setting datacow");
Qu Wenruo's avatar
Qu Wenruo committed
601
			break;
602
		case Opt_compress_force:
603 604
		case Opt_compress_force_type:
			compress_force = true;
605
			fallthrough;
606 607
		case Opt_compress:
		case Opt_compress_type:
608 609
			saved_compress_type = btrfs_test_opt(info,
							     COMPRESS) ?
610 611
				info->compress_type : BTRFS_COMPRESS_NONE;
			saved_compress_force =
612
				btrfs_test_opt(info, FORCE_COMPRESS);
613
			saved_compress_level = info->compress_level;
614 615
			if (token == Opt_compress ||
			    token == Opt_compress_force ||
616
			    strncmp(args[0].from, "zlib", 4) == 0) {
617
				compress_type = "zlib";
618

619
				info->compress_type = BTRFS_COMPRESS_ZLIB;
620 621 622 623 624 625 626 627 628
				info->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL;
				/*
				 * args[0] contains uninitialized data since
				 * for these tokens we don't expect any
				 * parameter.
				 */
				if (token != Opt_compress &&
				    token != Opt_compress_force)
					info->compress_level =
629 630 631
					  btrfs_compress_str2level(
							BTRFS_COMPRESS_ZLIB,
							args[0].from + 4);
632
				btrfs_set_opt(info->mount_opt, COMPRESS);
633 634
				btrfs_clear_opt(info->mount_opt, NODATACOW);
				btrfs_clear_opt(info->mount_opt, NODATASUM);
635
				no_compress = 0;
636
			} else if (strncmp(args[0].from, "lzo", 3) == 0) {
637 638
				compress_type = "lzo";
				info->compress_type = BTRFS_COMPRESS_LZO;
639
				info->compress_level = 0;
640
				btrfs_set_opt(info->mount_opt, COMPRESS);
641 642
				btrfs_clear_opt(info->mount_opt, NODATACOW);
				btrfs_clear_opt(info->mount_opt, NODATASUM);
643
				btrfs_set_fs_incompat(info, COMPRESS_LZO);
644
				no_compress = 0;
645
			} else if (strncmp(args[0].from, "zstd", 4) == 0) {
Nick Terrell's avatar
Nick Terrell committed
646 647
				compress_type = "zstd";
				info->compress_type = BTRFS_COMPRESS_ZSTD;
648 649 650 651
				info->compress_level =
					btrfs_compress_str2level(
							 BTRFS_COMPRESS_ZSTD,
							 args[0].from + 4);
Nick Terrell's avatar
Nick Terrell committed
652 653 654 655 656
				btrfs_set_opt(info->mount_opt, COMPRESS);
				btrfs_clear_opt(info->mount_opt, NODATACOW);
				btrfs_clear_opt(info->mount_opt, NODATASUM);
				btrfs_set_fs_incompat(info, COMPRESS_ZSTD);
				no_compress = 0;
657 658
			} else if (strncmp(args[0].from, "no", 2) == 0) {
				compress_type = "no";
659 660
				info->compress_level = 0;
				info->compress_type = 0;
661 662 663
				btrfs_clear_opt(info->mount_opt, COMPRESS);
				btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
				compress_force = false;
664
				no_compress++;
665 666 667 668 669 670
			} else {
				ret = -EINVAL;
				goto out;
			}

			if (compress_force) {
671
				btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
672
			} else {
673 674 675 676 677 678 679
				/*
				 * If we remount from compress-force=xxx to
				 * compress=xxx, we need clear FORCE_COMPRESS
				 * flag, otherwise, there is no way for users
				 * to disable forcible compression separately.
				 */
				btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
680
			}
681 682 683 684 685
			if (no_compress == 1) {
				btrfs_info(info, "use no compression");
			} else if ((info->compress_type != saved_compress_type) ||
				   (compress_force != saved_compress_force) ||
				   (info->compress_level != saved_compress_level)) {
686
				btrfs_info(info, "%s %s compression, level %d",
687
					   (compress_force) ? "force" : "use",
688
					   compress_type, info->compress_level);
689 690
			}
			compress_force = false;
691
			break;
692
		case Opt_ssd:
693
			btrfs_set_and_info(info, SSD,
694
					   "enabling ssd optimizations");
695
			btrfs_clear_opt(info->mount_opt, NOSSD);
696
			break;
697
		case Opt_ssd_spread:
698 699
			btrfs_set_and_info(info, SSD,
					   "enabling ssd optimizations");
700
			btrfs_set_and_info(info, SSD_SPREAD,
701
					   "using spread ssd allocation scheme");
702
			btrfs_clear_opt(info->mount_opt, NOSSD);
703
			break;
Chris Mason's avatar
Chris Mason committed
704
		case Opt_nossd:
705 706 707
			btrfs_set_opt(info->mount_opt, NOSSD);
			btrfs_clear_and_info(info, SSD,
					     "not using ssd optimizations");
708
			fallthrough;
709
		case Opt_nossd_spread:
710 711
			btrfs_clear_and_info(info, SSD_SPREAD,
					     "not using spread ssd allocation scheme");
Chris Mason's avatar
Chris Mason committed
712
			break;
713
		case Opt_barrier:
714
			btrfs_clear_and_info(info, NOBARRIER,
715
					     "turning on barriers");
716
			break;
717
		case Opt_nobarrier:
718
			btrfs_set_and_info(info, NOBARRIER,
719
					   "turning off barriers");
720
			break;
721
		case Opt_thread_pool:
722 723 724
			ret = match_int(&args[0], &intarg);
			if (ret) {
				goto out;
725
			} else if (intarg == 0) {
726 727 728
				ret = -EINVAL;
				goto out;
			}
729
			info->thread_pool_size = intarg;
730
			break;
731
		case Opt_max_inline:
732 733
			num = match_strdup(&args[0]);
			if (num) {
Akinobu Mita's avatar
Akinobu Mita committed
734
				info->max_inline = memparse(num, NULL);
735 736
				kfree(num);

737
				if (info->max_inline) {
738
					info->max_inline = min_t(u64,
739
						info->max_inline,
740
						info->sectorsize);
741
				}
742 743
				btrfs_info(info, "max_inline at %llu",
					   info->max_inline);
744 745 746
			} else {
				ret = -ENOMEM;
				goto out;
747 748
			}
			break;
Qu Wenruo's avatar
Qu Wenruo committed
749
		case Opt_acl:
750
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
751
			info->sb->s_flags |= SB_POSIXACL;
Qu Wenruo's avatar
Qu Wenruo committed
752
			break;
753
#else
754
			btrfs_err(info, "support for ACL not compiled in!");
755 756 757
			ret = -EINVAL;
			goto out;
#endif
Josef Bacik's avatar
Josef Bacik committed
758
		case Opt_noacl:
759
			info->sb->s_flags &= ~SB_POSIXACL;
Josef Bacik's avatar
Josef Bacik committed
760
			break;
Sage Weil's avatar
Sage Weil committed
761
		case Opt_notreelog:
762
			btrfs_set_and_info(info, NOTREELOG,
763
					   "disabling tree log");
Qu Wenruo's avatar
Qu Wenruo committed
764 765
			break;
		case Opt_treelog:
766
			btrfs_clear_and_info(info, NOTREELOG,
767
					     "enabling tree log");
Sage Weil's avatar
Sage Weil committed
768
			break;
769
		case Opt_norecovery:
770
		case Opt_nologreplay:
771 772
			btrfs_warn(info,
		"'nologreplay' is deprecated, use 'rescue=nologreplay' instead");
773
			btrfs_set_and_info(info, NOLOGREPLAY,
774 775
					   "disabling log replay at mount time");
			break;
776
		case Opt_flushoncommit:
777
			btrfs_set_and_info(info, FLUSHONCOMMIT,
778
					   "turning on flush-on-commit");
779
			break;
780
		case Opt_noflushoncommit:
781
			btrfs_clear_and_info(info, FLUSHONCOMMIT,
782
					     "turning off flush-on-commit");
783
			break;
784
		case Opt_ratio:
785
			ret = match_int(&args[0], &intarg);
786
			if (ret)
787
				goto out;
788 789 790
			info->metadata_ratio = intarg;
			btrfs_info(info, "metadata ratio %u",
				   info->metadata_ratio);
791
			break;
792
		case Opt_discard:
793 794 795 796 797 798 799 800 801 802 803 804 805 806
		case Opt_discard_mode:
			if (token == Opt_discard ||
			    strcmp(args[0].from, "sync") == 0) {
				btrfs_clear_opt(info->mount_opt, DISCARD_ASYNC);
				btrfs_set_and_info(info, DISCARD_SYNC,
						   "turning on sync discard");
			} else if (strcmp(args[0].from, "async") == 0) {
				btrfs_clear_opt(info->mount_opt, DISCARD_SYNC);
				btrfs_set_and_info(info, DISCARD_ASYNC,
						   "turning on async discard");
			} else {
				ret = -EINVAL;
				goto out;
			}
807
			break;
808
		case Opt_nodiscard:
809
			btrfs_clear_and_info(info, DISCARD_SYNC,
810
					     "turning off discard");
811 812
			btrfs_clear_and_info(info, DISCARD_ASYNC,
					     "turning off async discard");
813
			break;
814
		case Opt_space_cache:
815 816 817
		case Opt_space_cache_version:
			if (token == Opt_space_cache ||
			    strcmp(args[0].from, "v1") == 0) {
818
				btrfs_clear_opt(info->mount_opt,
819
						FREE_SPACE_TREE);
820
				btrfs_set_and_info(info, SPACE_CACHE,
821
					   "enabling disk space caching");
822
			} else if (strcmp(args[0].from, "v2") == 0) {
823
				btrfs_clear_opt(info->mount_opt,
824
						SPACE_CACHE);
825
				btrfs_set_and_info(info, FREE_SPACE_TREE,
826 827 828 829 830
						   "enabling free space tree");
			} else {
				ret = -EINVAL;
				goto out;
			}
831
			break;
832 833 834
		case Opt_rescan_uuid_tree:
			btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
			break;
835
		case Opt_no_space_cache:
836
			if (btrfs_test_opt(info, SPACE_CACHE)) {
837 838
				btrfs_clear_and_info(info, SPACE_CACHE,
					     "disabling disk space caching");
839
			}
840
			if (btrfs_test_opt(info, FREE_SPACE_TREE)) {
841 842
				btrfs_clear_and_info(info, FREE_SPACE_TREE,
					     "disabling free space tree");
843
			}
844
			break;
845
		case Opt_inode_cache:
846 847
			btrfs_warn(info,
	"the 'inode_cache' option is deprecated and will have no effect from 5.11");
848
			btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
849
					   "enabling inode map caching");
850 851
			break;
		case Opt_noinode_cache:
852
			btrfs_clear_pending_and_info(info, INODE_MAP_CACHE,
853
					     "disabling inode map caching");
854
			break;
855
		case Opt_clear_cache:
856
			btrfs_set_and_info(info, CLEAR_CACHE,
857
					   "force clearing of disk cache");
858
			break;
859 860 861
		case Opt_user_subvol_rm_allowed:
			btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
			break;
862 863 864
		case Opt_enospc_debug:
			btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
			break;
865 866 867
		case Opt_noenospc_debug:
			btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
			break;
868
		case Opt_defrag:
869
			btrfs_set_and_info(info, AUTO_DEFRAG,
870
					   "enabling auto defrag");
871
			break;
872
		case Opt_nodefrag:
873
			btrfs_clear_and_info(info, AUTO_DEFRAG,
874
					     "disabling auto defrag");
875
			break;
876
		case Opt_recovery:
877
		case Opt_usebackuproot:
878 879 880 881
			btrfs_warn(info,
			"'%s' is deprecated, use 'rescue=usebackuproot' instead",
				   token == Opt_recovery ? "recovery" :
				   "usebackuproot");
882
			btrfs_info(info,
883 884
				   "trying to use backup root at mount time");
			btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
885
			break;
886 887 888
		case Opt_skip_balance:
			btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
			break;
889 890
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
		case Opt_check_integrity_including_extent_data:
891
			btrfs_info(info,
892
				   "enabling check integrity including extent data");
893 894 895 896 897
			btrfs_set_opt(info->mount_opt,
				      CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
			btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
			break;
		case Opt_check_integrity:
898
			btrfs_info(info, "enabling check integrity");
899 900 901
			btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
			break;
		case Opt_check_integrity_print_mask:
902
			ret = match_int(&args[0], &intarg);
903
			if (ret)
904
				goto out;
905 906 907
			info->check_integrity_print_mask = intarg;
			btrfs_info(info, "check_integrity_print_mask 0x%x",
				   info->check_integrity_print_mask);
908 909 910 911 912
			break;
#else
		case Opt_check_integrity_including_extent_data:
		case Opt_check_integrity:
		case Opt_check_integrity_print_mask:
913 914
			btrfs_err(info,
				  "support for check_integrity* not compiled in!");
915 916 917
			ret = -EINVAL;
			goto out;
#endif
Jeff Mahoney's avatar
Jeff Mahoney committed
918 919 920 921 922 923 924 925 926 927 928 929
		case Opt_fatal_errors:
			if (strcmp(args[0].from, "panic") == 0)
				btrfs_set_opt(info->mount_opt,
					      PANIC_ON_FATAL_ERROR);
			else if (strcmp(args[0].from, "bug") == 0)
				btrfs_clear_opt(info->mount_opt,
					      PANIC_ON_FATAL_ERROR);
			else {
				ret = -EINVAL;
				goto out;
			}
			break;
930 931 932
		case Opt_commit_interval:
			intarg = 0;
			ret = match_int(&args[0], &intarg);
933
			if (ret)
934
				goto out;
935
			if (intarg == 0) {
936
				btrfs_info(info,
937
					   "using default commit interval %us",
938
					   BTRFS_DEFAULT_COMMIT_INTERVAL);
939 940 941 942
				intarg = BTRFS_DEFAULT_COMMIT_INTERVAL;
			} else if (intarg > 300) {
				btrfs_warn(info, "excessive commit interval %d",
					   intarg);
943
			}
944
			info->commit_interval = intarg;
945
			break;
946 947 948 949 950
		case Opt_rescue:
			ret = parse_rescue_options(info, args[0].from);
			if (ret < 0)
				goto out;
			break;
951 952
#ifdef CONFIG_BTRFS_DEBUG
		case Opt_fragment_all:
953
			btrfs_info(info, "fragmenting all space");
954 955 956 957
			btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
			btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
			break;
		case Opt_fragment_metadata:
958
			btrfs_info(info, "fragmenting metadata");
959 960 961 962
			btrfs_set_opt(info->mount_opt,
				      FRAGMENT_METADATA);
			break;
		case Opt_fragment_data:
963
			btrfs_info(info, "fragmenting data");
964 965
			btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
			break;
966 967 968 969 970 971
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
		case Opt_ref_verify:
			btrfs_info(info, "doing ref verification");
			btrfs_set_opt(info->mount_opt, REF_VERIFY);
			break;
972
#endif
973
		case Opt_err:
974
			btrfs_err(info, "unrecognized mount option '%s'", p);
975 976
			ret = -EINVAL;
			goto out;
977
		default:
978
			break;
979 980
		}
	}
981
check:
982 983 984 985 986
	/* We're read-only, don't have to check. */
	if (new_flags & SB_RDONLY)
		goto out;

	if (check_ro_option(info, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay"))
987
		ret = -EINVAL;
988
out:
989
	if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
990 991
	    !btrfs_test_opt(info, FREE_SPACE_TREE) &&
	    !btrfs_test_opt(info, CLEAR_CACHE)) {
992
		btrfs_err(info, "cannot disable free space tree");
993 994 995
		ret = -EINVAL;

	}
996
	if (!ret && btrfs_test_opt(info, SPACE_CACHE))
997
		btrfs_info(info, "disk space caching is enabled");
998
	if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
999
		btrfs_info(info, "using free space tree");
1000
	return ret;
1001 1002 1003 1004 1005 1006 1007 1008
}

/*
 * Parse mount options that are required early in the mount process.
 *
 * All other options will be parsed on much later in the mount process and
 * only when we need to allocate a new super block.
 */
1009 1010
static int btrfs_parse_device_options(const char *options, fmode_t flags,
				      void *holder)
1011 1012
{
	substring_t args[MAX_OPT_ARGS];
1013
	char *device_name, *opts, *orig, *p;
1014
	struct btrfs_device *device = NULL;
1015 1016
	int error = 0;

1017 1018
	lockdep_assert_held(&uuid_mutex);

1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043
	if (!options)
		return 0;

	/*
	 * strsep changes the string, duplicate it because btrfs_parse_options
	 * gets called later
	 */
	opts = kstrdup(options, GFP_KERNEL);
	if (!opts)
		return -ENOMEM;
	orig = opts;

	while ((p = strsep(&opts, ",")) != NULL) {
		int token;

		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		if (token == Opt_device) {
			device_name = match_strdup(&args[0]);
			if (!device_name) {
				error = -ENOMEM;
				goto out;
			}
1044 1045
			device = btrfs_scan_one_device(device_name, flags,
					holder);
1046
			kfree(device_name);
1047 1048
			if (IS_ERR(device)) {
				error = PTR_ERR(device);
1049
				goto out;
1050
			}
1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063
		}
	}

out:
	kfree(orig);
	return error;
}

/*
 * Parse mount options that are related to subvolume id
 *
 * The value is later passed to mount_subvol()
 */
1064 1065
static int btrfs_parse_subvol_options(const char *options, char **subvol_name,
		u64 *subvol_objectid)
1066 1067 1068
{
	substring_t args[MAX_OPT_ARGS];
	char *opts, *orig, *p;
1069
	int error = 0;
1070
	u64 subvolid;
1071 1072

	if (!options)
1073
		return 0;
1074 1075

	/*
1076
	 * strsep changes the string, duplicate it because
1077
	 * btrfs_parse_device_options gets called later
1078 1079 1080 1081
	 */
	opts = kstrdup(options, GFP_KERNEL);
	if (!opts)
		return -ENOMEM;
1082
	orig = opts;
1083 1084 1085 1086 1087 1088 1089 1090 1091

	while ((p = strsep(&opts, ",")) != NULL) {
		int token;
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
		case Opt_subvol:
1092
			kfree(*subvol_name);
1093
			*subvol_name = match_strdup(&args[0]);
1094 1095 1096 1097
			if (!*subvol_name) {
				error = -ENOMEM;
				goto out;
			}
1098
			break;
1099
		case Opt_subvolid:
1100 1101
			error = match_u64(&args[0], &subvolid);
			if (error)
1102
				goto out;
1103 1104 1105 1106 1107 1108

			/* we want the original fs_tree */
			if (subvolid == 0)
				subvolid = BTRFS_FS_TREE_OBJECTID;

			*subvol_objectid = subvolid;
1109
			break;
1110 1111 1112 1113 1114
		default:
			break;
		}
	}

1115
out:
1116
	kfree(orig);
1117
	return error;
1118 1119
}

1120 1121
char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
					  u64 subvol_objectid)
1122
{
1123
	struct btrfs_root *root = fs_info->tree_root;
1124
	struct btrfs_root *fs_root = NULL;
1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140
	struct btrfs_root_ref *root_ref;
	struct btrfs_inode_ref *inode_ref;
	struct btrfs_key key;
	struct btrfs_path *path = NULL;
	char *name = NULL, *ptr;
	u64 dirid;
	int len;
	int ret;

	path = btrfs_alloc_path();
	if (!path) {
		ret = -ENOMEM;
		goto err;
	}
	path->leave_spinning = 1;

1141
	name = kmalloc(PATH_MAX, GFP_KERNEL);
1142 1143 1144 1145 1146 1147
	if (!name) {
		ret = -ENOMEM;
		goto err;
	}
	ptr = name + PATH_MAX - 1;
	ptr[0] = '\0';
1148 1149

	/*
1150 1151
	 * Walk up the subvolume trees in the tree of tree roots by root
	 * backrefs until we hit the top-level subvolume.
1152
	 */
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188
	while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
		key.objectid = subvol_objectid;
		key.type = BTRFS_ROOT_BACKREF_KEY;
		key.offset = (u64)-1;

		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
		if (ret < 0) {
			goto err;
		} else if (ret > 0) {
			ret = btrfs_previous_item(root, path, subvol_objectid,
						  BTRFS_ROOT_BACKREF_KEY);
			if (ret < 0) {
				goto err;
			} else if (ret > 0) {
				ret = -ENOENT;
				goto err;
			}
		}

		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
		subvol_objectid = key.offset;

		root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
					  struct btrfs_root_ref);
		len = btrfs_root_ref_name_len(path->nodes[0], root_ref);
		ptr -= len + 1;
		if (ptr < name) {
			ret = -ENAMETOOLONG;
			goto err;
		}
		read_extent_buffer(path->nodes[0], ptr + 1,
				   (unsigned long)(root_ref + 1), len);
		ptr[0] = '/';
		dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
		btrfs_release_path(path);

1189
		fs_root = btrfs_get_fs_root(fs_info, subvol_objectid, true);
1190 1191
		if (IS_ERR(fs_root)) {
			ret = PTR_ERR(fs_root);
1192 1193 1194
			fs_root = NULL;
			goto err;
		}
1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236

		/*
		 * Walk up the filesystem tree by inode refs until we hit the
		 * root directory.
		 */
		while (dirid != BTRFS_FIRST_FREE_OBJECTID) {
			key.objectid = dirid;
			key.type = BTRFS_INODE_REF_KEY;
			key.offset = (u64)-1;

			ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
			if (ret < 0) {
				goto err;
			} else if (ret > 0) {
				ret = btrfs_previous_item(fs_root, path, dirid,
							  BTRFS_INODE_REF_KEY);
				if (ret < 0) {
					goto err;
				} else if (ret > 0) {
					ret = -ENOENT;
					goto err;
				}
			}

			btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
			dirid = key.offset;

			inode_ref = btrfs_item_ptr(path->nodes[0],
						   path->slots[0],
						   struct btrfs_inode_ref);
			len = btrfs_inode_ref_name_len(path->nodes[0],
						       inode_ref);
			ptr -= len + 1;
			if (ptr < name) {
				ret = -ENAMETOOLONG;
				goto err;
			}
			read_extent_buffer(path->nodes[0], ptr + 1,
					   (unsigned long)(inode_ref + 1), len);
			ptr[0] = '/';
			btrfs_release_path(path);
		}
1237
		btrfs_put_root(fs_root);
1238
		fs_root = NULL;
1239 1240
	}

1241 1242 1243 1244 1245 1246 1247 1248 1249 1250
	btrfs_free_path(path);
	if (ptr == name + PATH_MAX - 1) {
		name[0] = '/';
		name[1] = '\0';
	} else {
		memmove(name, ptr, name + PATH_MAX - ptr);
	}
	return name;

err:
1251
	btrfs_put_root(fs_root);
1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264
	btrfs_free_path(path);
	kfree(name);
	return ERR_PTR(ret);
}

static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid)
{
	struct btrfs_root *root = fs_info->tree_root;
	struct btrfs_dir_item *di;
	struct btrfs_path *path;
	struct btrfs_key location;
	u64 dir_id;

1265 1266
	path = btrfs_alloc_path();
	if (!path)
1267
		return -ENOMEM;
1268 1269 1270 1271 1272 1273 1274
	path->leave_spinning = 1;

	/*
	 * Find the "default" dir item which points to the root item that we
	 * will mount by default if we haven't been given a specific subvolume
	 * to mount.
	 */
1275
	dir_id = btrfs_super_root_dir(fs_info->super_copy);
1276
	di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
1277 1278
	if (IS_ERR(di)) {
		btrfs_free_path(path);
1279
		return PTR_ERR(di);
1280
	}
1281 1282 1283 1284
	if (!di) {
		/*
		 * Ok the default dir item isn't there.  This is weird since
		 * it's always been there, but don't freak out, just try and
1285
		 * mount the top-level subvolume.
1286 1287
		 */
		btrfs_free_path(path);
1288 1289
		*objectid = BTRFS_FS_TREE_OBJECTID;
		return 0;
1290 1291 1292 1293
	}

	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
	btrfs_free_path(path);
1294 1295
	*objectid = location.objectid;
	return 0;
1296 1297
}

1298
static int btrfs_fill_super(struct super_block *sb,
1299
			    struct btrfs_fs_devices *fs_devices,
1300
			    void *data)
Chris Mason's avatar
Chris Mason committed
1301
{
1302
	struct inode *inode;
1303
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
Chris Mason's avatar
Chris Mason committed
1304
	int err;
1305

Chris Mason's avatar
Chris Mason committed
1306 1307 1308
	sb->s_maxbytes = MAX_LFS_FILESIZE;
	sb->s_magic = BTRFS_SUPER_MAGIC;
	sb->s_op = &btrfs_super_ops;
Al Viro's avatar
Al Viro committed
1309
	sb->s_d_op = &btrfs_dentry_operations;
Balaji Rao's avatar
Balaji Rao committed
1310
	sb->s_export_op = &btrfs_export_ops;
Josef Bacik's avatar
Josef Bacik committed
1311
	sb->s_xattr = btrfs_xattr_handlers;
Chris Mason's avatar
Chris Mason committed
1312
	sb->s_time_gran = 1;
1313
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
1314
	sb->s_flags |= SB_POSIXACL;
1315
#endif
1316
	sb->s_flags |= SB_I_VERSION;
1317
	sb->s_iflags |= SB_I_CGROUPWB;
1318 1319 1320 1321 1322 1323 1324

	err = super_setup_bdi(sb);
	if (err) {
		btrfs_err(fs_info, "super_setup_bdi failed");
		return err;
	}

1325 1326
	err = open_ctree(sb, fs_devices, (char *)data);
	if (err) {
1327
		btrfs_err(fs_info, "open_ctree failed");
1328
		return err;
1329 1330
	}

1331
	inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root);
1332 1333
	if (IS_ERR(inode)) {
		err = PTR_ERR(inode);
Chris Mason's avatar
Chris Mason committed
1334
		goto fail_close;
Chris Mason's avatar
Chris Mason committed
1335 1336
	}

1337 1338
	sb->s_root = d_make_root(inode);
	if (!sb->s_root) {
Chris Mason's avatar
Chris Mason committed
1339 1340
		err = -ENOMEM;
		goto fail_close;
Chris Mason's avatar
Chris Mason committed
1341
	}
1342

1343
	cleancache_init_fs(sb);
1344
	sb->s_flags |= SB_ACTIVE;
Chris Mason's avatar
Chris Mason committed
1345
	return 0;
Chris Mason's avatar
Chris Mason committed
1346 1347

fail_close:
1348
	close_ctree(fs_info);
Chris Mason's avatar
Chris Mason committed
1349
	return err;
Chris Mason's avatar
Chris Mason committed
1350 1351
}

Sage Weil's avatar
Sage Weil committed
1352
int btrfs_sync_fs(struct super_block *sb, int wait)
Chris Mason's avatar
Chris Mason committed
1353 1354
{
	struct btrfs_trans_handle *trans;
1355 1356
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
	struct btrfs_root *root = fs_info->tree_root;
Chris Mason's avatar
Chris Mason committed
1357

1358
	trace_btrfs_sync_fs(fs_info, wait);
1359

Chris Mason's avatar
Chris Mason committed
1360
	if (!wait) {
1361
		filemap_flush(fs_info->btree_inode->i_mapping);
Chris Mason's avatar
Chris Mason committed
1362 1363
		return 0;
	}
1364

1365
	btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
1366

1367
	trans = btrfs_attach_transaction_barrier(root);
1368
	if (IS_ERR(trans)) {
1369
		/* no transaction, don't bother */
1370 1371 1372 1373 1374 1375 1376
		if (PTR_ERR(trans) == -ENOENT) {
			/*
			 * Exit unless we have some pending changes
			 * that need to go through commit
			 */
			if (fs_info->pending_changes == 0)
				return 0;
1377 1378 1379 1380 1381 1382
			/*
			 * A non-blocking test if the fs is frozen. We must not
			 * start a new transaction here otherwise a deadlock
			 * happens. The pending operations are delayed to the
			 * next commit after thawing.
			 */
1383 1384
			if (sb_start_write_trylock(sb))
				sb_end_write(sb);
1385 1386
			else
				return 0;
1387 1388
			trans = btrfs_start_transaction(root, 0);
		}
1389 1390
		if (IS_ERR(trans))
			return PTR_ERR(trans);
1391
	}
1392
	return btrfs_commit_transaction(trans);
1393 1394
}

1395 1396 1397 1398 1399 1400
static void print_rescue_option(struct seq_file *seq, const char *s, bool *printed)
{
	seq_printf(seq, "%s%s", (*printed) ? ":" : ",rescue=", s);
	*printed = true;
}

1401
static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
1402
{
1403
	struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
1404
	const char *compress_type;
1405
	const char *subvol_name;
1406
	bool printed = false;
1407

1408
	if (btrfs_test_opt(info, DEGRADED))
1409
		seq_puts(seq, ",degraded");
1410
	if (btrfs_test_opt(info, NODATASUM))
1411
		seq_puts(seq, ",nodatasum");
1412
	if (btrfs_test_opt(info, NODATACOW))
1413
		seq_puts(seq, ",nodatacow");
1414
	if (btrfs_test_opt(info, NOBARRIER))
1415
		seq_puts(seq, ",nobarrier");
1416
	if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE)
1417
		seq_printf(seq, ",max_inline=%llu", info->max_inline);
1418 1419
	if (info->thread_pool_size !=  min_t(unsigned long,
					     num_online_cpus() + 2, 8))
1420
		seq_printf(seq, ",thread_pool=%u", info->thread_pool_size);
1421
	if (btrfs_test_opt(info, COMPRESS)) {
1422
		compress_type = btrfs_compress_type2str(info->compress_type);
1423
		if (btrfs_test_opt(info, FORCE_COMPRESS))
Tsutomu Itoh's avatar
Tsutomu Itoh committed
1424 1425 1426
			seq_printf(seq, ",compress-force=%s", compress_type);
		else
			seq_printf(seq, ",compress=%s", compress_type);
1427
		if (info->compress_level)
1428
			seq_printf(seq, ":%d", info->compress_level);
Tsutomu Itoh's avatar
Tsutomu Itoh committed
1429
	}
1430
	if (btrfs_test_opt(info, NOSSD))
Chris Mason's avatar
Chris Mason committed
1431
		seq_puts(seq, ",nossd");
1432
	if (btrfs_test_opt(info, SSD_SPREAD))
1433
		seq_puts(seq, ",ssd_spread");
1434
	else if (btrfs_test_opt(info, SSD))
1435
		seq_puts(seq, ",ssd");
1436
	if (btrfs_test_opt(info, NOTREELOG))
1437
		seq_puts(seq, ",notreelog");
1438
	if (btrfs_test_opt(info, NOLOGREPLAY))
1439
		print_rescue_option(seq, "nologreplay", &printed);
1440
	if (btrfs_test_opt(info, FLUSHONCOMMIT))
1441
		seq_puts(seq, ",flushoncommit");
1442
	if (btrfs_test_opt(info, DISCARD_SYNC))
1443
		seq_puts(seq, ",discard");
1444 1445
	if (btrfs_test_opt(info, DISCARD_ASYNC))
		seq_puts(seq, ",discard=async");
1446
	if (!(info->sb->s_flags & SB_POSIXACL))
1447
		seq_puts(seq, ",noacl");
1448
	if (btrfs_test_opt(info, SPACE_CACHE))
Tsutomu Itoh's avatar
Tsutomu Itoh committed
1449
		seq_puts(seq, ",space_cache");
1450
	else if (btrfs_test_opt(info, FREE_SPACE_TREE))
1451
		seq_puts(seq, ",space_cache=v2");
1452
	else
1453
		seq_puts(seq, ",nospace_cache");
1454
	if (btrfs_test_opt(info, RESCAN_UUID_TREE))
1455
		seq_puts(seq, ",rescan_uuid_tree");
1456
	if (btrfs_test_opt(info, CLEAR_CACHE))
Tsutomu Itoh's avatar
Tsutomu Itoh committed
1457
		seq_puts(seq, ",clear_cache");
1458
	if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED))
Tsutomu Itoh's avatar
Tsutomu Itoh committed
1459
		seq_puts(seq, ",user_subvol_rm_allowed");
1460
	if (btrfs_test_opt(info, ENOSPC_DEBUG))
1461
		seq_puts(seq, ",enospc_debug");
1462
	if (btrfs_test_opt(info, AUTO_DEFRAG))
1463
		seq_puts(seq, ",autodefrag");
1464
	if (btrfs_test_opt(info, INODE_MAP_CACHE))
1465
		seq_puts(seq, ",inode_cache");
1466
	if (btrfs_test_opt(info, SKIP_BALANCE))
1467
		seq_puts(seq, ",skip_balance");
1468
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1469
	if (btrfs_test_opt(info, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
1470
		seq_puts(seq, ",check_int_data");
1471
	else if (btrfs_test_opt(info, CHECK_INTEGRITY))
1472 1473 1474 1475 1476 1477
		seq_puts(seq, ",check_int");
	if (info->check_integrity_print_mask)
		seq_printf(seq, ",check_int_print_mask=%d",
				info->check_integrity_print_mask);
#endif
	if (info->metadata_ratio)
1478
		seq_printf(seq, ",metadata_ratio=%u", info->metadata_ratio);
1479
	if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR))
Jeff Mahoney's avatar
Jeff Mahoney committed
1480
		seq_puts(seq, ",fatal_errors=panic");
1481
	if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
1482
		seq_printf(seq, ",commit=%u", info->commit_interval);
1483
#ifdef CONFIG_BTRFS_DEBUG
1484
	if (btrfs_test_opt(info, FRAGMENT_DATA))
1485
		seq_puts(seq, ",fragment=data");
1486
	if (btrfs_test_opt(info, FRAGMENT_METADATA))
1487 1488
		seq_puts(seq, ",fragment=metadata");
#endif
1489 1490
	if (btrfs_test_opt(info, REF_VERIFY))
		seq_puts(seq, ",ref_verify");
1491 1492
	seq_printf(seq, ",subvolid=%llu",
		  BTRFS_I(d_inode(dentry))->root->root_key.objectid);
1493 1494 1495 1496 1497 1498 1499
	subvol_name = btrfs_get_subvol_name_from_objectid(info,
			BTRFS_I(d_inode(dentry))->root->root_key.objectid);
	if (!IS_ERR(subvol_name)) {
		seq_puts(seq, ",subvol=");
		seq_escape(seq, subvol_name, " \t\n\\");
		kfree(subvol_name);
	}
1500 1501 1502
	return 0;
}

1503
static int btrfs_test_super(struct super_block *s, void *data)
1504
{
1505 1506
	struct btrfs_fs_info *p = data;
	struct btrfs_fs_info *fs_info = btrfs_sb(s);
1507

1508
	return fs_info->fs_devices == p->fs_devices;
1509 1510
}

1511 1512
static int btrfs_set_super(struct super_block *s, void *data)
{
Al Viro's avatar
Al Viro committed
1513 1514 1515 1516
	int err = set_anon_super(s, data);
	if (!err)
		s->s_fs_info = data;
	return err;
1517 1518
}

1519 1520 1521 1522 1523 1524 1525 1526 1527 1528
/*
 * subvolumes are identified by ino 256
 */
static inline int is_subvolume_inode(struct inode *inode)
{
	if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
		return 1;
	return 0;
}

1529
static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
1530
				   struct vfsmount *mnt)
1531 1532
{
	struct dentry *root;
1533
	int ret;
1534

1535 1536 1537 1538 1539 1540 1541 1542 1543
	if (!subvol_name) {
		if (!subvol_objectid) {
			ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
							  &subvol_objectid);
			if (ret) {
				root = ERR_PTR(ret);
				goto out;
			}
		}
1544 1545
		subvol_name = btrfs_get_subvol_name_from_objectid(
					btrfs_sb(mnt->mnt_sb), subvol_objectid);
1546 1547 1548 1549 1550 1551 1552 1553
		if (IS_ERR(subvol_name)) {
			root = ERR_CAST(subvol_name);
			subvol_name = NULL;
			goto out;
		}

	}

Al Viro's avatar
Al Viro committed
1554
	root = mount_subtree(mnt, subvol_name);
1555 1556
	/* mount_subtree() drops our reference on the vfsmount. */
	mnt = NULL;
1557

1558
	if (!IS_ERR(root)) {
Al Viro's avatar
Al Viro committed
1559
		struct super_block *s = root->d_sb;
1560
		struct btrfs_fs_info *fs_info = btrfs_sb(s);
1561 1562 1563 1564 1565
		struct inode *root_inode = d_inode(root);
		u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid;

		ret = 0;
		if (!is_subvolume_inode(root_inode)) {
1566
			btrfs_err(fs_info, "'%s' is not a valid subvolume",
1567 1568 1569 1570
			       subvol_name);
			ret = -EINVAL;
		}
		if (subvol_objectid && root_objectid != subvol_objectid) {
1571 1572 1573 1574 1575
			/*
			 * This will also catch a race condition where a
			 * subvolume which was passed by ID is renamed and
			 * another subvolume is renamed over the old location.
			 */
1576 1577 1578
			btrfs_err(fs_info,
				  "subvol '%s' does not match subvolid %llu",
				  subvol_name, subvol_objectid);
1579 1580 1581 1582 1583 1584 1585
			ret = -EINVAL;
		}
		if (ret) {
			dput(root);
			root = ERR_PTR(ret);
			deactivate_locked_super(s);
		}
1586 1587
	}

1588 1589 1590
out:
	mntput(mnt);
	kfree(subvol_name);
1591 1592
	return root;
}
1593

1594 1595 1596 1597 1598 1599
/*
 * Find a superblock for the given device / mount point.
 *
 * Note: This is based on mount_bdev from fs/super.c with a few additions
 *       for multiple device setup.  Make sure to keep it in sync.
 */
1600 1601 1602 1603 1604
static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
		int flags, const char *device_name, void *data)
{
	struct block_device *bdev = NULL;
	struct super_block *s;
1605
	struct btrfs_device *device = NULL;
1606 1607
	struct btrfs_fs_devices *fs_devices = NULL;
	struct btrfs_fs_info *fs_info = NULL;
1608
	void *new_sec_opts = NULL;
1609 1610 1611 1612 1613 1614 1615
	fmode_t mode = FMODE_READ;
	int error = 0;

	if (!(flags & SB_RDONLY))
		mode |= FMODE_WRITE;

	if (data) {
1616
		error = security_sb_eat_lsm_opts(data, &new_sec_opts);
1617 1618 1619 1620 1621 1622 1623
		if (error)
			return ERR_PTR(error);
	}

	/*
	 * Setup a dummy root and fs_info for test/set super.  This is because
	 * we don't actually fill this stuff out until open_ctree, but we need
1624 1625 1626 1627
	 * then open_ctree will properly initialize the file system specific
	 * settings later.  btrfs_init_fs_info initializes the static elements
	 * of the fs_info (locks and such) to make cleanup easier if we find a
	 * superblock with our given fs_devices later on at sget() time.
1628
	 */
1629
	fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
1630 1631 1632 1633
	if (!fs_info) {
		error = -ENOMEM;
		goto error_sec_opts;
	}
1634
	btrfs_init_fs_info(fs_info);
1635 1636 1637 1638 1639 1640 1641 1642

	fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
	fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
	if (!fs_info->super_copy || !fs_info->super_for_commit) {
		error = -ENOMEM;
		goto error_fs_info;
	}

1643
	mutex_lock(&uuid_mutex);
1644
	error = btrfs_parse_device_options(data, mode, fs_type);
1645 1646
	if (error) {
		mutex_unlock(&uuid_mutex);
1647
		goto error_fs_info;
1648
	}
1649

1650 1651
	device = btrfs_scan_one_device(device_name, mode, fs_type);
	if (IS_ERR(device)) {
1652
		mutex_unlock(&uuid_mutex);
1653
		error = PTR_ERR(device);
1654
		goto error_fs_info;
1655
	}
1656

1657
	fs_devices = device->fs_devices;
1658 1659
	fs_info->fs_devices = fs_devices;

1660
	error = btrfs_open_devices(fs_devices, mode, fs_type);
1661
	mutex_unlock(&uuid_mutex);
1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679
	if (error)
		goto error_fs_info;

	if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) {
		error = -EACCES;
		goto error_close_devices;
	}

	bdev = fs_devices->latest_bdev;
	s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC,
		 fs_info);
	if (IS_ERR(s)) {
		error = PTR_ERR(s);
		goto error_close_devices;
	}

	if (s->s_root) {
		btrfs_close_devices(fs_devices);
1680
		btrfs_free_fs_info(fs_info);
1681 1682 1683 1684 1685
		if ((flags ^ s->s_flags) & SB_RDONLY)
			error = -EBUSY;
	} else {
		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
		btrfs_sb(s)->bdev_holder = fs_type;
1686 1687
		if (!strstr(crc32c_impl(), "generic"))
			set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
1688 1689
		error = btrfs_fill_super(s, fs_devices, data);
	}
1690
	if (!error)
1691
		error = security_sb_set_mnt_opts(s, new_sec_opts, 0, NULL);
1692
	security_free_mnt_opts(&new_sec_opts);
1693 1694
	if (error) {
		deactivate_locked_super(s);
1695
		return ERR_PTR(error);
1696 1697 1698 1699 1700 1701 1702
	}

	return dget(s->s_root);

error_close_devices:
	btrfs_close_devices(fs_devices);
error_fs_info:
1703
	btrfs_free_fs_info(fs_info);
1704 1705 1706 1707
error_sec_opts:
	security_free_mnt_opts(&new_sec_opts);
	return ERR_PTR(error);
}
1708

1709
/*
1710
 * Mount function which is called by VFS layer.
1711
 *
1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729
 * In order to allow mounting a subvolume directly, btrfs uses mount_subtree()
 * which needs vfsmount* of device's root (/).  This means device's root has to
 * be mounted internally in any case.
 *
 * Operation flow:
 *   1. Parse subvol id related options for later use in mount_subvol().
 *
 *   2. Mount device's root (/) by calling vfs_kern_mount().
 *
 *      NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the
 *      first place. In order to avoid calling btrfs_mount() again, we use
 *      different file_system_type which is not registered to VFS by
 *      register_filesystem() (btrfs_root_fs_type). As a result,
 *      btrfs_mount_root() is called. The return value will be used by
 *      mount_subtree() in mount_subvol().
 *
 *   3. Call mount_subvol() to get the dentry of subvolume. Since there is
 *      "btrfs subvolume set-default", mount_subvol() is called always.
1730
 */
Al Viro's avatar
Al Viro committed
1731
static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1732
		const char *device_name, void *data)
1733
{
1734 1735
	struct vfsmount *mnt_root;
	struct dentry *root;
1736 1737
	char *subvol_name = NULL;
	u64 subvol_objectid = 0;
1738 1739
	int error = 0;

1740 1741
	error = btrfs_parse_subvol_options(data, &subvol_name,
					&subvol_objectid);
1742 1743
	if (error) {
		kfree(subvol_name);
Al Viro's avatar
Al Viro committed
1744
		return ERR_PTR(error);
1745
	}
1746

1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757
	/* mount device's root (/) */
	mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data);
	if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) {
		if (flags & SB_RDONLY) {
			mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
				flags & ~SB_RDONLY, device_name, data);
		} else {
			mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
				flags | SB_RDONLY, device_name, data);
			if (IS_ERR(mnt_root)) {
				root = ERR_CAST(mnt_root);
1758
				kfree(subvol_name);
1759 1760
				goto out;
			}
1761

1762 1763 1764 1765 1766 1767
			down_write(&mnt_root->mnt_sb->s_umount);
			error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL);
			up_write(&mnt_root->mnt_sb->s_umount);
			if (error < 0) {
				root = ERR_PTR(error);
				mntput(mnt_root);
1768
				kfree(subvol_name);
1769 1770 1771
				goto out;
			}
		}
1772
	}
1773 1774
	if (IS_ERR(mnt_root)) {
		root = ERR_CAST(mnt_root);
1775
		kfree(subvol_name);
1776
		goto out;
1777
	}
1778

1779
	/* mount_subvol() will free subvol_name and mnt_root */
1780
	root = mount_subvol(subvol_name, subvol_objectid, mnt_root);
1781

1782 1783
out:
	return root;
1784
}
1785

1786
static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
1787
				     u32 new_pool_size, u32 old_pool_size)
1788 1789 1790 1791 1792 1793
{
	if (new_pool_size == old_pool_size)
		return;

	fs_info->thread_pool_size = new_pool_size;

1794
	btrfs_info(fs_info, "resize thread pool %d -> %d",
1795 1796
	       old_pool_size, new_pool_size);

1797
	btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
1798
	btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
1799
	btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
1800 1801 1802 1803 1804 1805
	btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_meta_write_workers,
				new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
	btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
1806
	btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
1807
	btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size);
1808 1809
	btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers,
				new_pool_size);
1810 1811
}

1812 1813 1814
static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
				       unsigned long old_opts, int flags)
{
Miao Xie's avatar
Miao Xie committed
1815 1816
	if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
	    (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
1817
	     (flags & SB_RDONLY))) {
Miao Xie's avatar
Miao Xie committed
1818 1819 1820
		/* wait for any defraggers to finish */
		wait_event(fs_info->transaction_wait,
			   (atomic_read(&fs_info->defrag_running) == 0));
1821
		if (flags & SB_RDONLY)
Miao Xie's avatar
Miao Xie committed
1822 1823 1824 1825 1826 1827 1828 1829
			sync_filesystem(fs_info->sb);
	}
}

static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
					 unsigned long old_opts)
{
	/*
1830 1831
	 * We need to cleanup all defragable inodes if the autodefragment is
	 * close or the filesystem is read only.
Miao Xie's avatar
Miao Xie committed
1832 1833
	 */
	if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
1834
	    (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || sb_rdonly(fs_info->sb))) {
Miao Xie's avatar
Miao Xie committed
1835 1836 1837
		btrfs_cleanup_defrag_inodes(fs_info);
	}

1838 1839 1840 1841 1842 1843 1844
	/* If we toggled discard async */
	if (!btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
	    btrfs_test_opt(fs_info, DISCARD_ASYNC))
		btrfs_discard_resume(fs_info);
	else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
		 !btrfs_test_opt(fs_info, DISCARD_ASYNC))
		btrfs_discard_cleanup(fs_info);
Miao Xie's avatar
Miao Xie committed
1845 1846
}

1847 1848
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
{
1849 1850
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
	struct btrfs_root *root = fs_info->tree_root;
1851 1852 1853 1854
	unsigned old_flags = sb->s_flags;
	unsigned long old_opts = fs_info->mount_opt;
	unsigned long old_compress_type = fs_info->compress_type;
	u64 old_max_inline = fs_info->max_inline;
1855
	u32 old_thread_pool_size = fs_info->thread_pool_size;
1856
	u32 old_metadata_ratio = fs_info->metadata_ratio;
1857 1858
	int ret;

1859
	sync_filesystem(sb);
1860
	set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
Miao Xie's avatar
Miao Xie committed
1861

1862
	if (data) {
1863
		void *new_sec_opts = NULL;
1864

1865 1866
		ret = security_sb_eat_lsm_opts(data, &new_sec_opts);
		if (!ret)
1867
			ret = security_sb_remount(sb, new_sec_opts);
1868
		security_free_mnt_opts(&new_sec_opts);
1869 1870 1871 1872
		if (ret)
			goto restore;
	}

1873
	ret = btrfs_parse_options(fs_info, data, *flags);
1874
	if (ret)
1875
		goto restore;
1876

1877
	btrfs_remount_begin(fs_info, old_opts, *flags);
1878 1879 1880
	btrfs_resize_thread_pool(fs_info,
		fs_info->thread_pool_size, old_thread_pool_size);

1881
	if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
Miao Xie's avatar
Miao Xie committed
1882
		goto out;
1883

1884
	if (*flags & SB_RDONLY) {
1885 1886 1887 1888
		/*
		 * this also happens on 'umount -rf' or on shutdown, when
		 * the filesystem is busy.
		 */
1889
		cancel_work_sync(&fs_info->async_reclaim_work);
1890
		cancel_work_sync(&fs_info->async_data_reclaim_work);
1891

1892 1893
		btrfs_discard_cleanup(fs_info);

1894 1895 1896 1897 1898
		/* wait for the uuid_scan task to finish */
		down(&fs_info->uuid_tree_rescan_sem);
		/* avoid complains from lockdep et al. */
		up(&fs_info->uuid_tree_rescan_sem);

1899
		sb->s_flags |= SB_RDONLY;
1900

1901
		/*
1902
		 * Setting SB_RDONLY will put the cleaner thread to
1903 1904 1905 1906 1907 1908 1909
		 * sleep at the next loop if it's already active.
		 * If it's already asleep, we'll leave unused block
		 * groups on disk until we're mounted read-write again
		 * unless we clean them up here.
		 */
		btrfs_delete_unused_bgs(fs_info);

1910 1911
		btrfs_dev_replace_suspend_for_unmount(fs_info);
		btrfs_scrub_cancel(fs_info);
1912
		btrfs_pause_balance(fs_info);
1913

1914
		ret = btrfs_commit_super(fs_info);
1915 1916
		if (ret)
			goto restore;
1917
	} else {
1918
		if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
1919
			btrfs_err(fs_info,
1920
				"Remounting read-write after error is not allowed");
1921 1922 1923
			ret = -EINVAL;
			goto restore;
		}
1924
		if (fs_info->fs_devices->rw_devices == 0) {
1925 1926
			ret = -EACCES;
			goto restore;
1927
		}
Yan Zheng's avatar
Yan Zheng committed
1928

1929
		if (!btrfs_check_rw_degradable(fs_info, NULL)) {
1930
			btrfs_warn(fs_info,
1931
		"too many missing devices, writable remount is not allowed");
1932 1933 1934 1935
			ret = -EACCES;
			goto restore;
		}

1936
		if (btrfs_super_log_root(fs_info->super_copy) != 0) {
1937 1938
			btrfs_warn(fs_info,
		"mount required to replay tree-log, cannot remount read-write");
1939 1940
			ret = -EINVAL;
			goto restore;
1941
		}
1942

1943
		ret = btrfs_cleanup_fs_roots(fs_info);
1944 1945
		if (ret)
			goto restore;
1946

1947
		/* recover relocation */
1948
		mutex_lock(&fs_info->cleaner_mutex);
1949
		ret = btrfs_recover_relocation(root);
1950
		mutex_unlock(&fs_info->cleaner_mutex);
1951 1952
		if (ret)
			goto restore;
1953

1954 1955 1956 1957
		ret = btrfs_resume_balance_async(fs_info);
		if (ret)
			goto restore;

1958 1959
		ret = btrfs_resume_dev_replace_async(fs_info);
		if (ret) {
1960
			btrfs_warn(fs_info, "failed to resume dev_replace");
1961 1962
			goto restore;
		}
1963

1964 1965
		btrfs_qgroup_rescan_resume(fs_info);

1966
		if (!fs_info->uuid_root) {
1967
			btrfs_info(fs_info, "creating UUID tree");
1968 1969
			ret = btrfs_create_uuid_tree(fs_info);
			if (ret) {
1970 1971 1972
				btrfs_warn(fs_info,
					   "failed to create the UUID tree %d",
					   ret);
1973 1974 1975
				goto restore;
			}
		}
1976
		sb->s_flags &= ~SB_RDONLY;
1977

1978
		set_bit(BTRFS_FS_OPEN, &fs_info->flags);
1979
	}
Miao Xie's avatar
Miao Xie committed
1980
out:
1981 1982 1983 1984 1985 1986
	/*
	 * We need to set SB_I_VERSION here otherwise it'll get cleared by VFS,
	 * since the absence of the flag means it can be toggled off by remount.
	 */
	*flags |= SB_I_VERSION;

1987
	wake_up_process(fs_info->transaction_kthread);
Miao Xie's avatar
Miao Xie committed
1988
	btrfs_remount_cleanup(fs_info, old_opts);
1989 1990
	clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);

1991
	return 0;
1992 1993

restore:
1994
	/* We've hit an error - don't reset SB_RDONLY */
1995
	if (sb_rdonly(sb))
1996
		old_flags |= SB_RDONLY;
1997 1998 1999 2000
	sb->s_flags = old_flags;
	fs_info->mount_opt = old_opts;
	fs_info->compress_type = old_compress_type;
	fs_info->max_inline = old_max_inline;
2001 2002
	btrfs_resize_thread_pool(fs_info,
		old_thread_pool_size, fs_info->thread_pool_size);
2003
	fs_info->metadata_ratio = old_metadata_ratio;
Miao Xie's avatar
Miao Xie committed
2004
	btrfs_remount_cleanup(fs_info, old_opts);
2005 2006
	clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);

2007
	return ret;
2008 2009
}

2010
/* Used to sort the devices by max_avail(descending sort) */
2011
static inline int btrfs_cmp_device_free_bytes(const void *dev_info1,
2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035
				       const void *dev_info2)
{
	if (((struct btrfs_device_info *)dev_info1)->max_avail >
	    ((struct btrfs_device_info *)dev_info2)->max_avail)
		return -1;
	else if (((struct btrfs_device_info *)dev_info1)->max_avail <
		 ((struct btrfs_device_info *)dev_info2)->max_avail)
		return 1;
	else
	return 0;
}

/*
 * sort the devices by max_avail, in which max free extent size of each device
 * is stored.(Descending Sort)
 */
static inline void btrfs_descending_sort_devices(
					struct btrfs_device_info *devices,
					size_t nr_devices)
{
	sort(devices, nr_devices, sizeof(struct btrfs_device_info),
	     btrfs_cmp_device_free_bytes, NULL);
}

2036 2037 2038 2039
/*
 * The helper to calc the free space on the devices that can be used to store
 * file data.
 */
2040 2041
static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
					      u64 *free_bytes)
2042 2043 2044 2045 2046 2047 2048
{
	struct btrfs_device_info *devices_info;
	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
	struct btrfs_device *device;
	u64 type;
	u64 avail_space;
	u64 min_stripe_size;
2049
	int num_stripes = 1;
2050
	int i = 0, nr_devices;
2051
	const struct btrfs_raid_attr *rattr;
2052

2053
	/*
2054
	 * We aren't under the device list lock, so this is racy-ish, but good
2055 2056
	 * enough for our purposes.
	 */
2057
	nr_devices = fs_info->fs_devices->open_devices;
2058 2059 2060 2061 2062 2063 2064 2065 2066
	if (!nr_devices) {
		smp_mb();
		nr_devices = fs_info->fs_devices->open_devices;
		ASSERT(nr_devices);
		if (!nr_devices) {
			*free_bytes = 0;
			return 0;
		}
	}
2067

2068
	devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
2069
			       GFP_KERNEL);
2070 2071 2072
	if (!devices_info)
		return -ENOMEM;

2073
	/* calc min stripe number for data space allocation */
2074
	type = btrfs_data_alloc_profile(fs_info);
2075 2076
	rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)];

2077
	if (type & BTRFS_BLOCK_GROUP_RAID0)
2078
		num_stripes = nr_devices;
2079
	else if (type & BTRFS_BLOCK_GROUP_RAID1)
2080
		num_stripes = 2;
2081 2082
	else if (type & BTRFS_BLOCK_GROUP_RAID1C3)
		num_stripes = 3;
2083 2084
	else if (type & BTRFS_BLOCK_GROUP_RAID1C4)
		num_stripes = 4;
2085
	else if (type & BTRFS_BLOCK_GROUP_RAID10)
2086
		num_stripes = 4;
2087

2088 2089
	/* Adjust for more than 1 stripe per device */
	min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN;
2090

2091 2092
	rcu_read_lock();
	list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
2093 2094
		if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
						&device->dev_state) ||
2095 2096
		    !device->bdev ||
		    test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
2097 2098
			continue;

2099 2100 2101
		if (i >= nr_devices)
			break;

2102 2103 2104
		avail_space = device->total_bytes - device->bytes_used;

		/* align with stripe_len */
2105
		avail_space = rounddown(avail_space, BTRFS_STRIPE_LEN);
2106 2107

		/*
2108
		 * In order to avoid overwriting the superblock on the drive,
2109 2110
		 * btrfs starts at an offset of at least 1MB when doing chunk
		 * allocation.
2111 2112 2113
		 *
		 * This ensures we have at least min_stripe_size free space
		 * after excluding 1MB.
2114
		 */
2115
		if (avail_space <= SZ_1M + min_stripe_size)
2116 2117
			continue;

2118 2119
		avail_space -= SZ_1M;

2120 2121 2122 2123 2124
		devices_info[i].dev = device;
		devices_info[i].max_avail = avail_space;

		i++;
	}
2125
	rcu_read_unlock();
2126 2127 2128 2129 2130 2131 2132

	nr_devices = i;

	btrfs_descending_sort_devices(devices_info, nr_devices);

	i = nr_devices - 1;
	avail_space = 0;
2133 2134
	while (nr_devices >= rattr->devs_min) {
		num_stripes = min(num_stripes, nr_devices);
2135

2136 2137 2138 2139
		if (devices_info[i].max_avail >= min_stripe_size) {
			int j;
			u64 alloc_size;

2140
			avail_space += devices_info[i].max_avail * num_stripes;
2141
			alloc_size = devices_info[i].max_avail;
2142
			for (j = i + 1 - num_stripes; j <= i; j++)
2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153
				devices_info[j].max_avail -= alloc_size;
		}
		i--;
		nr_devices--;
	}

	kfree(devices_info);
	*free_bytes = avail_space;
	return 0;
}

2154 2155 2156 2157 2158 2159 2160
/*
 * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
 *
 * If there's a redundant raid level at DATA block groups, use the respective
 * multiplier to scale the sizes.
 *
 * Unused device space usage is based on simulating the chunk allocator
2161 2162 2163
 * algorithm that respects the device sizes and order of allocations.  This is
 * a close approximation of the actual use but there are other factors that may
 * change the result (like a new metadata chunk).
2164
 *
2165
 * If metadata is exhausted, f_bavail will be 0.
2166
 */
Chris Mason's avatar
Chris Mason committed
2167 2168
static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
2169 2170
	struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
	struct btrfs_super_block *disk_super = fs_info->super_copy;
2171 2172
	struct btrfs_space_info *found;
	u64 total_used = 0;
2173
	u64 total_free_data = 0;
2174
	u64 total_free_meta = 0;
2175
	int bits = dentry->d_sb->s_blocksize_bits;
2176
	__be32 *fsid = (__be32 *)fs_info->fs_devices->fsid;
2177 2178
	unsigned factor = 1;
	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
2179
	int ret;
2180
	u64 thresh = 0;
2181
	int mixed = 0;
Chris Mason's avatar
Chris Mason committed
2182

2183
	list_for_each_entry(found, &fs_info->space_info, list) {
2184
		if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
2185 2186
			int i;

2187 2188 2189
			total_free_data += found->disk_total - found->disk_used;
			total_free_data -=
				btrfs_account_ro_block_groups_free_space(found);
2190 2191

			for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
2192 2193 2194
				if (!list_empty(&found->block_groups[i]))
					factor = btrfs_bg_type_to_factor(
						btrfs_raid_array[i].bg_flag);
2195
			}
2196
		}
2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207

		/*
		 * Metadata in mixed block goup profiles are accounted in data
		 */
		if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) {
			if (found->flags & BTRFS_BLOCK_GROUP_DATA)
				mixed = 1;
			else
				total_free_meta += found->disk_total -
					found->disk_used;
		}
2208

2209
		total_used += found->disk_used;
Josef Bacik's avatar
Josef Bacik committed
2210
	}
2211 2212 2213 2214 2215 2216 2217

	buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
	buf->f_blocks >>= bits;
	buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);

	/* Account global block reserve as used, it's in logical size already */
	spin_lock(&block_rsv->lock);
2218 2219 2220 2221 2222
	/* Mixed block groups accounting is not byte-accurate, avoid overflow */
	if (buf->f_bfree >= block_rsv->size >> bits)
		buf->f_bfree -= block_rsv->size >> bits;
	else
		buf->f_bfree = 0;
2223 2224
	spin_unlock(&block_rsv->lock);

2225
	buf->f_bavail = div_u64(total_free_data, factor);
2226
	ret = btrfs_calc_avail_data_space(fs_info, &total_free_data);
2227
	if (ret)
2228
		return ret;
2229
	buf->f_bavail += div_u64(total_free_data, factor);
2230
	buf->f_bavail = buf->f_bavail >> bits;
2231

2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244
	/*
	 * We calculate the remaining metadata space minus global reserve. If
	 * this is (supposedly) smaller than zero, there's no space. But this
	 * does not hold in practice, the exhausted state happens where's still
	 * some positive delta. So we apply some guesswork and compare the
	 * delta to a 4M threshold.  (Practically observed delta was ~2M.)
	 *
	 * We probably cannot calculate the exact threshold value because this
	 * depends on the internal reservations requested by various
	 * operations, so some operations that consume a few metadata will
	 * succeed even if the Avail is zero. But this is better than the other
	 * way around.
	 */
2245
	thresh = SZ_4M;
2246

2247 2248 2249 2250 2251 2252 2253 2254 2255
	/*
	 * We only want to claim there's no available space if we can no longer
	 * allocate chunks for our metadata profile and our global reserve will
	 * not fit in the free metadata space.  If we aren't ->full then we
	 * still can allocate chunks and thus are fine using the currently
	 * calculated f_bavail.
	 */
	if (!mixed && block_rsv->space_info->full &&
	    total_free_meta - thresh < block_rsv->size)
2256 2257
		buf->f_bavail = 0;

2258 2259 2260 2261
	buf->f_type = BTRFS_SUPER_MAGIC;
	buf->f_bsize = dentry->d_sb->s_blocksize;
	buf->f_namelen = BTRFS_NAME_LEN;

2262
	/* We treat it as constant endianness (it doesn't matter _which_)
2263
	   because we want the fsid to come out the same whether mounted
2264 2265 2266
	   on a big-endian or little-endian host */
	buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
	buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
2267
	/* Mask in the root object ID too, to disambiguate subvols */
2268 2269 2270 2271
	buf->f_fsid.val[0] ^=
		BTRFS_I(d_inode(dentry))->root->root_key.objectid >> 32;
	buf->f_fsid.val[1] ^=
		BTRFS_I(d_inode(dentry))->root->root_key.objectid;
2272

Chris Mason's avatar
Chris Mason committed
2273 2274
	return 0;
}
Chris Mason's avatar
Chris Mason committed
2275

Al Viro's avatar
Al Viro committed
2276 2277
static void btrfs_kill_super(struct super_block *sb)
{
2278
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
Al Viro's avatar
Al Viro committed
2279
	kill_anon_super(sb);
2280
	btrfs_free_fs_info(fs_info);
Al Viro's avatar
Al Viro committed
2281 2282
}

2283 2284 2285
static struct file_system_type btrfs_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "btrfs",
Al Viro's avatar
Al Viro committed
2286
	.mount		= btrfs_mount,
Al Viro's avatar
Al Viro committed
2287
	.kill_sb	= btrfs_kill_super,
2288
	.fs_flags	= FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
2289
};
2290 2291 2292 2293 2294 2295 2296 2297 2298

static struct file_system_type btrfs_root_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "btrfs",
	.mount		= btrfs_mount_root,
	.kill_sb	= btrfs_kill_super,
	.fs_flags	= FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
};

2299
MODULE_ALIAS_FS("btrfs");
2300

2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311
static int btrfs_control_open(struct inode *inode, struct file *file)
{
	/*
	 * The control file's private_data is used to hold the
	 * transaction when it is started and is used to keep
	 * track of whether a transaction is already in progress.
	 */
	file->private_data = NULL;
	return 0;
}

2312
/*
2313
 * Used by /dev/btrfs-control for devices ioctls.
2314
 */
2315 2316 2317 2318
static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
				unsigned long arg)
{
	struct btrfs_ioctl_vol_args *vol;
2319
	struct btrfs_device *device = NULL;
2320
	int ret = -ENOTTY;
2321

2322 2323 2324
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

Li Zefan's avatar
Li Zefan committed
2325 2326 2327
	vol = memdup_user((void __user *)arg, sizeof(*vol));
	if (IS_ERR(vol))
		return PTR_ERR(vol);
2328
	vol->name[BTRFS_PATH_NAME_MAX] = '\0';
2329

2330 2331
	switch (cmd) {
	case BTRFS_IOC_SCAN_DEV:
2332
		mutex_lock(&uuid_mutex);
2333 2334 2335
		device = btrfs_scan_one_device(vol->name, FMODE_READ,
					       &btrfs_root_fs_type);
		ret = PTR_ERR_OR_ZERO(device);
2336
		mutex_unlock(&uuid_mutex);
2337
		break;
2338 2339 2340
	case BTRFS_IOC_FORGET_DEV:
		ret = btrfs_forget_devices(vol->name);
		break;
Josef Bacik's avatar
Josef Bacik committed
2341
	case BTRFS_IOC_DEVICES_READY:
2342
		mutex_lock(&uuid_mutex);
2343 2344 2345
		device = btrfs_scan_one_device(vol->name, FMODE_READ,
					       &btrfs_root_fs_type);
		if (IS_ERR(device)) {
2346
			mutex_unlock(&uuid_mutex);
2347
			ret = PTR_ERR(device);
Josef Bacik's avatar
Josef Bacik committed
2348
			break;
2349
		}
2350 2351
		ret = !(device->fs_devices->num_devices ==
			device->fs_devices->total_devices);
2352
		mutex_unlock(&uuid_mutex);
Josef Bacik's avatar
Josef Bacik committed
2353
		break;
2354
	case BTRFS_IOC_GET_SUPPORTED_FEATURES:
2355
		ret = btrfs_ioctl_get_supported_features((void __user*)arg);
2356
		break;
2357
	}
Li Zefan's avatar
Li Zefan committed
2358

2359
	kfree(vol);
2360
	return ret;
2361 2362
}

2363
static int btrfs_freeze(struct super_block *sb)
Yan's avatar
Yan committed
2364
{
2365
	struct btrfs_trans_handle *trans;
2366 2367
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
	struct btrfs_root *root = fs_info->tree_root;
2368

2369
	set_bit(BTRFS_FS_FROZEN, &fs_info->flags);
2370 2371 2372 2373 2374 2375
	/*
	 * We don't need a barrier here, we'll wait for any transaction that
	 * could be in progress on other threads (and do delayed iputs that
	 * we want to avoid on a frozen filesystem), or do the commit
	 * ourselves.
	 */
2376
	trans = btrfs_attach_transaction_barrier(root);
2377 2378 2379 2380 2381 2382
	if (IS_ERR(trans)) {
		/* no transaction, don't bother */
		if (PTR_ERR(trans) == -ENOENT)
			return 0;
		return PTR_ERR(trans);
	}
2383
	return btrfs_commit_transaction(trans);
Yan's avatar
Yan committed
2384 2385
}

2386 2387
static int btrfs_unfreeze(struct super_block *sb)
{
2388 2389 2390
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);

	clear_bit(BTRFS_FS_FROZEN, &fs_info->flags);
2391 2392 2393
	return 0;
}

2394 2395 2396 2397 2398
static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
{
	struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
	struct btrfs_device *dev, *first_dev = NULL;

2399 2400 2401 2402 2403
	/*
	 * Lightweight locking of the devices. We should not need
	 * device_list_mutex here as we only read the device data and the list
	 * is protected by RCU.  Even if a device is deleted during the list
	 * traversals, we'll get valid data, the freeing callback will wait at
2404
	 * least until the rcu_read_unlock.
2405 2406
	 */
	rcu_read_lock();
2407 2408 2409 2410 2411 2412 2413
	list_for_each_entry_rcu(dev, &fs_info->fs_devices->devices, dev_list) {
		if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
			continue;
		if (!dev->name)
			continue;
		if (!first_dev || dev->devid < first_dev->devid)
			first_dev = dev;
2414 2415
	}

2416 2417 2418
	if (first_dev)
		seq_escape(m, rcu_str_deref(first_dev->name), " \t\n\\");
	else
2419
		WARN_ON(1);
2420
	rcu_read_unlock();
2421 2422 2423
	return 0;
}

2424
static const struct super_operations btrfs_super_ops = {
2425
	.drop_inode	= btrfs_drop_inode,
Al Viro's avatar
Al Viro committed
2426
	.evict_inode	= btrfs_evict_inode,
Chris Mason's avatar
Chris Mason committed
2427
	.put_super	= btrfs_put_super,
2428
	.sync_fs	= btrfs_sync_fs,
2429
	.show_options	= btrfs_show_options,
2430
	.show_devname	= btrfs_show_devname,
2431 2432
	.alloc_inode	= btrfs_alloc_inode,
	.destroy_inode	= btrfs_destroy_inode,
Al Viro's avatar
Al Viro committed
2433
	.free_inode	= btrfs_free_inode,
Chris Mason's avatar
Chris Mason committed
2434
	.statfs		= btrfs_statfs,
2435
	.remount_fs	= btrfs_remount,
2436
	.freeze_fs	= btrfs_freeze,
2437
	.unfreeze_fs	= btrfs_unfreeze,
Chris Mason's avatar
Chris Mason committed
2438
};
2439 2440

static const struct file_operations btrfs_ctl_fops = {
2441
	.open = btrfs_control_open,
2442
	.unlocked_ioctl	 = btrfs_control_ioctl,
2443
	.compat_ioctl = compat_ptr_ioctl,
2444
	.owner	 = THIS_MODULE,
2445
	.llseek = noop_llseek,
2446 2447 2448
};

static struct miscdevice btrfs_misc = {
2449
	.minor		= BTRFS_MINOR,
2450 2451 2452 2453
	.name		= "btrfs-control",
	.fops		= &btrfs_ctl_fops
};

2454 2455 2456
MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
MODULE_ALIAS("devname:btrfs-control");

2457
static int __init btrfs_interface_init(void)
2458 2459 2460 2461
{
	return misc_register(&btrfs_misc);
}

2462
static __cold void btrfs_interface_exit(void)
2463
{
2464
	misc_deregister(&btrfs_misc);
2465 2466
}

2467
static void __init btrfs_print_mod_info(void)
2468
{
2469
	static const char options[] = ""
2470 2471 2472
#ifdef CONFIG_BTRFS_DEBUG
			", debug=on"
#endif
2473 2474 2475
#ifdef CONFIG_BTRFS_ASSERT
			", assert=on"
#endif
2476 2477
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
			", integrity-checker=on"
2478 2479 2480
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
			", ref-verify=on"
2481
#endif
2482 2483
			;
	pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options);
2484 2485
}

2486 2487
static int __init init_btrfs_fs(void)
{
2488
	int err;
2489

2490 2491
	btrfs_props_init();

2492 2493
	err = btrfs_init_sysfs();
	if (err)
2494
		return err;
2495

2496
	btrfs_init_compress();
2497

2498 2499 2500 2501
	err = btrfs_init_cachep();
	if (err)
		goto free_compress;

2502
	err = extent_io_init();
2503 2504 2505
	if (err)
		goto free_cachep;

2506
	err = extent_state_cache_init();
2507 2508 2509
	if (err)
		goto free_extent_io;

2510 2511 2512 2513
	err = extent_map_init();
	if (err)
		goto free_extent_state_cache;

2514
	err = ordered_data_init();
2515 2516
	if (err)
		goto free_extent_map;
2517

2518 2519 2520 2521
	err = btrfs_delayed_inode_init();
	if (err)
		goto free_ordered_data;

2522
	err = btrfs_auto_defrag_init();
2523 2524 2525
	if (err)
		goto free_delayed_inode;

2526
	err = btrfs_delayed_ref_init();
2527 2528 2529
	if (err)
		goto free_auto_defrag;

2530 2531
	err = btrfs_prelim_ref_init();
	if (err)
2532
		goto free_delayed_ref;
2533

2534
	err = btrfs_end_io_wq_init();
2535
	if (err)
2536
		goto free_prelim_ref;
2537

2538 2539 2540 2541
	err = btrfs_interface_init();
	if (err)
		goto free_end_io_wq;

2542 2543
	btrfs_init_lockdep();

2544
	btrfs_print_mod_info();
2545 2546 2547 2548 2549 2550 2551 2552

	err = btrfs_run_sanity_tests();
	if (err)
		goto unregister_ioctl;

	err = register_filesystem(&btrfs_fs_type);
	if (err)
		goto unregister_ioctl;
2553

2554 2555
	return 0;

2556 2557
unregister_ioctl:
	btrfs_interface_exit();
2558 2559
free_end_io_wq:
	btrfs_end_io_wq_exit();
2560 2561
free_prelim_ref:
	btrfs_prelim_ref_exit();
2562 2563
free_delayed_ref:
	btrfs_delayed_ref_exit();
2564 2565
free_auto_defrag:
	btrfs_auto_defrag_exit();
2566 2567
free_delayed_inode:
	btrfs_delayed_inode_exit();
2568 2569
free_ordered_data:
	ordered_data_exit();
2570 2571
free_extent_map:
	extent_map_exit();
2572 2573
free_extent_state_cache:
	extent_state_cache_exit();
2574 2575
free_extent_io:
	extent_io_exit();
2576 2577
free_cachep:
	btrfs_destroy_cachep();
2578 2579
free_compress:
	btrfs_exit_compress();
2580
	btrfs_exit_sysfs();
2581

2582
	return err;
2583 2584 2585 2586
}

static void __exit exit_btrfs_fs(void)
{
Chris Mason's avatar
Chris Mason committed
2587
	btrfs_destroy_cachep();
2588
	btrfs_delayed_ref_exit();
2589
	btrfs_auto_defrag_exit();
2590
	btrfs_delayed_inode_exit();
2591
	btrfs_prelim_ref_exit();
2592
	ordered_data_exit();
2593
	extent_map_exit();
2594
	extent_state_cache_exit();
2595
	extent_io_exit();
2596
	btrfs_interface_exit();
2597
	btrfs_end_io_wq_exit();
2598
	unregister_filesystem(&btrfs_fs_type);
2599
	btrfs_exit_sysfs();
2600
	btrfs_cleanup_fs_uuids();
2601
	btrfs_exit_compress();
2602 2603
}

2604
late_initcall(init_btrfs_fs);
2605 2606 2607
module_exit(exit_btrfs_fs)

MODULE_LICENSE("GPL");
2608
MODULE_SOFTDEP("pre: crc32c");
2609
MODULE_SOFTDEP("pre: xxhash64");
2610
MODULE_SOFTDEP("pre: sha256");
2611
MODULE_SOFTDEP("pre: blake2b-256");