super.c 46.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
Linus Torvalds's avatar
Linus Torvalds committed
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 *  linux/fs/super.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  super.c contains code to handle: - mount structures
 *                                   - super-block tables
 *                                   - filesystem drivers list
 *                                   - mount system call
 *                                   - umount system call
 *                                   - ustat system call
 *
 * GK 2/5/95  -  Changed to support mounting the root fs via NFS
 *
 *  Added kerneld support: Jacques Gelinas and Bjorn Ekwall
 *  Added change_root: Werner Almesberger & Hans Lermen, Feb '96
 *  Added options to /proc/mounts:
19
 *    Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
Linus Torvalds's avatar
Linus Torvalds committed
20 21 22 23
 *  Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998
 *  Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000
 */

24
#include <linux/export.h>
Linus Torvalds's avatar
Linus Torvalds committed
25 26 27 28 29 30
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/writeback.h>		/* for the emergency remount stuff */
#include <linux/idr.h>
Ingo Molnar's avatar
Ingo Molnar committed
31
#include <linux/mutex.h>
32
#include <linux/backing-dev.h>
33
#include <linux/rculist_bl.h>
34
#include <linux/fscrypt.h>
Al Viro's avatar
Al Viro committed
35
#include <linux/fsnotify.h>
36
#include <linux/lockdep.h>
37
#include <linux/user_namespace.h>
38
#include <linux/fs_context.h>
39
#include <uapi/linux/mount.h>
40
#include "internal.h"
Linus Torvalds's avatar
Linus Torvalds committed
41

42
static int thaw_super_locked(struct super_block *sb);
Linus Torvalds's avatar
Linus Torvalds committed
43

44 45
static LIST_HEAD(super_blocks);
static DEFINE_SPINLOCK(sb_lock);
Linus Torvalds's avatar
Linus Torvalds committed
46

47 48 49 50 51 52
static char *sb_writers_name[SB_FREEZE_LEVELS] = {
	"sb_writers",
	"sb_pagefaults",
	"sb_internal",
};

53 54 55 56 57 58 59
/*
 * One thing we have to be careful of with a per-sb shrinker is that we don't
 * drop the last active reference to the superblock from within the shrinker.
 * If that happens we could trigger unregistering the shrinker from within the
 * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
 * take a passive reference to the superblock to avoid this from occurring.
 */
60 61
static unsigned long super_cache_scan(struct shrinker *shrink,
				      struct shrink_control *sc)
62 63
{
	struct super_block *sb;
64 65 66 67 68
	long	fs_objects = 0;
	long	total_objects;
	long	freed = 0;
	long	dentries;
	long	inodes;
69 70 71 72 73 74 75

	sb = container_of(shrink, struct super_block, s_shrink);

	/*
	 * Deadlock avoidance.  We may hold various FS locks, and we don't want
	 * to recurse into the FS that called us in clear_inode() and friends..
	 */
76 77
	if (!(sc->gfp_mask & __GFP_FS))
		return SHRINK_STOP;
78

79
	if (!trylock_super(sb))
80
		return SHRINK_STOP;
81

82
	if (sb->s_op->nr_cached_objects)
83
		fs_objects = sb->s_op->nr_cached_objects(sb, sc);
84

85 86
	inodes = list_lru_shrink_count(&sb->s_inode_lru, sc);
	dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc);
87
	total_objects = dentries + inodes + fs_objects + 1;
88 89
	if (!total_objects)
		total_objects = 1;
90

91
	/* proportion the scan between the caches */
92
	dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
93
	inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);
94
	fs_objects = mult_frac(sc->nr_to_scan, fs_objects, total_objects);
95

96 97 98
	/*
	 * prune the dcache first as the icache is pinned by it, then
	 * prune the icache, followed by the filesystem specific caches
99 100 101
	 *
	 * Ensure that we always scan at least one object - memcg kmem
	 * accounting uses this to fully empty the caches.
102
	 */
103
	sc->nr_to_scan = dentries + 1;
104
	freed = prune_dcache_sb(sb, sc);
105
	sc->nr_to_scan = inodes + 1;
106
	freed += prune_icache_sb(sb, sc);
107 108

	if (fs_objects) {
109
		sc->nr_to_scan = fs_objects + 1;
110
		freed += sb->s_op->free_cached_objects(sb, sc);
111 112
	}

113
	up_read(&sb->s_umount);
114 115 116 117 118 119 120 121 122 123 124
	return freed;
}

static unsigned long super_cache_count(struct shrinker *shrink,
				       struct shrink_control *sc)
{
	struct super_block *sb;
	long	total_objects = 0;

	sb = container_of(shrink, struct super_block, s_shrink);

125
	/*
126 127 128 129 130 131 132 133 134 135 136 137
	 * We don't call trylock_super() here as it is a scalability bottleneck,
	 * so we're exposed to partial setup state. The shrinker rwsem does not
	 * protect filesystem operations backing list_lru_shrink_count() or
	 * s_op->nr_cached_objects(). Counts can change between
	 * super_cache_count and super_cache_scan, so we really don't need locks
	 * here.
	 *
	 * However, if we are currently mounting the superblock, the underlying
	 * filesystem might be in a state of partial construction and hence it
	 * is dangerous to access it.  trylock_super() uses a SB_BORN check to
	 * avoid this situation, so do the same here. The memory barrier is
	 * matched with the one in mount_fs() as we don't hold locks here.
138
	 */
139 140 141 142
	if (!(sb->s_flags & SB_BORN))
		return 0;
	smp_rmb();

143
	if (sb->s_op && sb->s_op->nr_cached_objects)
144
		total_objects = sb->s_op->nr_cached_objects(sb, sc);
145

146 147
	total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc);
	total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc);
148

149 150 151
	if (!total_objects)
		return SHRINK_EMPTY;

152
	total_objects = vfs_pressure_ratio(total_objects);
153
	return total_objects;
154 155
}

156 157 158 159 160 161 162
static void destroy_super_work(struct work_struct *work)
{
	struct super_block *s = container_of(work, struct super_block,
							destroy_work);
	int i;

	for (i = 0; i < SB_FREEZE_LEVELS; i++)
163
		percpu_free_rwsem(&s->s_writers.rw_sem[i]);
164 165 166 167 168 169 170 171 172 173
	kfree(s);
}

static void destroy_super_rcu(struct rcu_head *head)
{
	struct super_block *s = container_of(head, struct super_block, rcu);
	INIT_WORK(&s->destroy_work, destroy_super_work);
	schedule_work(&s->destroy_work);
}

174 175
/* Free a superblock that has never been seen by anyone */
static void destroy_unused_super(struct super_block *s)
176
{
177 178 179
	if (!s)
		return;
	up_write(&s->s_umount);
180 181 182
	list_lru_destroy(&s->s_dentry_lru);
	list_lru_destroy(&s->s_inode_lru);
	security_sb_free(s);
183
	put_user_ns(s->s_user_ns);
184
	kfree(s->s_subtype);
185
	free_prealloced_shrinker(&s->s_shrink);
186 187
	/* no delays needed */
	destroy_super_work(&s->destroy_work);
188 189
}

Linus Torvalds's avatar
Linus Torvalds committed
190 191
/**
 *	alloc_super	-	create new superblock
192
 *	@type:	filesystem type superblock should belong to
193
 *	@flags: the mount flags
194
 *	@user_ns: User namespace for the super_block
Linus Torvalds's avatar
Linus Torvalds committed
195 196 197 198
 *
 *	Allocates and initializes a new &struct super_block.  alloc_super()
 *	returns a pointer new superblock or %NULL if allocation had failed.
 */
199 200
static struct super_block *alloc_super(struct file_system_type *type, int flags,
				       struct user_namespace *user_ns)
Linus Torvalds's avatar
Linus Torvalds committed
201
{
202
	struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
203
	static const struct super_operations default_op;
204 205 206 207
	int i;

	if (!s)
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
208

209
	INIT_LIST_HEAD(&s->s_mounts);
210
	s->s_user_ns = get_user_ns(user_ns);
211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
	init_rwsem(&s->s_umount);
	lockdep_set_class(&s->s_umount, &type->s_umount_key);
	/*
	 * sget() can have s_umount recursion.
	 *
	 * When it cannot find a suitable sb, it allocates a new
	 * one (this one), and tries again to find a suitable old
	 * one.
	 *
	 * In case that succeeds, it will acquire the s_umount
	 * lock of the old one. Since these are clearly distrinct
	 * locks, and this object isn't exposed yet, there's no
	 * risk of deadlocks.
	 *
	 * Annotate this by putting this lock in a different
	 * subclass.
	 */
	down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
229

230 231
	if (security_sb_alloc(s))
		goto fail;
232

233
	for (i = 0; i < SB_FREEZE_LEVELS; i++) {
234 235 236
		if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
					sb_writers_name[i],
					&type->s_writers_key[i]))
237
			goto fail;
Linus Torvalds's avatar
Linus Torvalds committed
238
	}
239
	init_waitqueue_head(&s->s_writers.wait_unfrozen);
240
	s->s_bdi = &noop_backing_dev_info;
241
	s->s_flags = flags;
242
	if (s->s_user_ns != &init_user_ns)
243
		s->s_iflags |= SB_I_NODEV;
244
	INIT_HLIST_NODE(&s->s_instances);
245
	INIT_HLIST_BL_HEAD(&s->s_roots);
246
	mutex_init(&s->s_sync_lock);
247
	INIT_LIST_HEAD(&s->s_inodes);
248
	spin_lock_init(&s->s_inode_list_lock);
249 250
	INIT_LIST_HEAD(&s->s_inodes_wb);
	spin_lock_init(&s->s_inode_wblist_lock);
251 252 253 254 255

	s->s_count = 1;
	atomic_set(&s->s_active, 1);
	mutex_init(&s->s_vfs_rename_mutex);
	lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
256
	init_rwsem(&s->s_dquot.dqio_sem);
257 258 259
	s->s_maxbytes = MAX_NON_LFS;
	s->s_op = &default_op;
	s->s_time_gran = 1000000000;
260 261
	s->s_time_min = TIME64_MIN;
	s->s_time_max = TIME64_MAX;
262 263 264 265 266

	s->s_shrink.seeks = DEFAULT_SEEKS;
	s->s_shrink.scan_objects = super_cache_scan;
	s->s_shrink.count_objects = super_cache_count;
	s->s_shrink.batch = 1024;
267
	s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE;
268 269
	if (prealloc_shrinker(&s->s_shrink))
		goto fail;
270
	if (list_lru_init_memcg(&s->s_dentry_lru, &s->s_shrink))
271
		goto fail;
272
	if (list_lru_init_memcg(&s->s_inode_lru, &s->s_shrink))
273
		goto fail;
Linus Torvalds's avatar
Linus Torvalds committed
274
	return s;
275

276
fail:
277
	destroy_unused_super(s);
278
	return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
279 280 281 282 283
}

/* Superblock refcounting  */

/*
284
 * Drop a superblock's refcount.  The caller must hold sb_lock.
Linus Torvalds's avatar
Linus Torvalds committed
285
 */
286
static void __put_super(struct super_block *s)
Linus Torvalds's avatar
Linus Torvalds committed
287
{
288 289 290 291 292 293
	if (!--s->s_count) {
		list_del_init(&s->s_list);
		WARN_ON(s->s_dentry_lru.node);
		WARN_ON(s->s_inode_lru.node);
		WARN_ON(!list_empty(&s->s_mounts));
		security_sb_free(s);
294
		fscrypt_sb_free(s);
295 296 297
		put_user_ns(s->s_user_ns);
		kfree(s->s_subtype);
		call_rcu(&s->rcu, destroy_super_rcu);
Linus Torvalds's avatar
Linus Torvalds committed
298 299 300 301 302 303 304 305 306 307
	}
}

/**
 *	put_super	-	drop a temporary reference to superblock
 *	@sb: superblock in question
 *
 *	Drops a temporary reference, frees superblock if there's no
 *	references left.
 */
308
void put_super(struct super_block *sb)
Linus Torvalds's avatar
Linus Torvalds committed
309 310 311 312 313 314 315 316
{
	spin_lock(&sb_lock);
	__put_super(sb);
	spin_unlock(&sb_lock);
}


/**
317
 *	deactivate_locked_super	-	drop an active reference to superblock
Linus Torvalds's avatar
Linus Torvalds committed
318 319
 *	@s: superblock to deactivate
 *
320
 *	Drops an active reference to superblock, converting it into a temporary
321
 *	one if there is no other active references left.  In that case we
Linus Torvalds's avatar
Linus Torvalds committed
322 323
 *	tell fs driver to shut it down and drop the temporary reference we
 *	had just acquired.
324 325
 *
 *	Caller holds exclusive lock on superblock; that lock is released.
Linus Torvalds's avatar
Linus Torvalds committed
326
 */
327
void deactivate_locked_super(struct super_block *s)
Linus Torvalds's avatar
Linus Torvalds committed
328 329
{
	struct file_system_type *fs = s->s_type;
Al Viro's avatar
Al Viro committed
330
	if (atomic_dec_and_test(&s->s_active)) {
331
		unregister_shrinker(&s->s_shrink);
332
		fs->kill_sb(s);
Glauber Costa's avatar
Glauber Costa committed
333

334 335 336 337 338 339 340 341
		/*
		 * Since list_lru_destroy() may sleep, we cannot call it from
		 * put_super(), where we hold the sb_lock. Therefore we destroy
		 * the lru lists right now.
		 */
		list_lru_destroy(&s->s_dentry_lru);
		list_lru_destroy(&s->s_inode_lru);

Linus Torvalds's avatar
Linus Torvalds committed
342 343
		put_filesystem(fs);
		put_super(s);
344 345
	} else {
		up_write(&s->s_umount);
Linus Torvalds's avatar
Linus Torvalds committed
346 347 348
	}
}

349
EXPORT_SYMBOL(deactivate_locked_super);
Linus Torvalds's avatar
Linus Torvalds committed
350

351
/**
352
 *	deactivate_super	-	drop an active reference to superblock
353 354
 *	@s: superblock to deactivate
 *
355 356 357
 *	Variant of deactivate_locked_super(), except that superblock is *not*
 *	locked by caller.  If we are going to drop the final active reference,
 *	lock will be acquired prior to that.
358
 */
359
void deactivate_super(struct super_block *s)
360
{
361
	if (!atomic_add_unless(&s->s_active, -1, 1)) {
362 363
		down_write(&s->s_umount);
		deactivate_locked_super(s);
364 365 366
	}
}

367
EXPORT_SYMBOL(deactivate_super);
368

Linus Torvalds's avatar
Linus Torvalds committed
369 370 371 372 373 374 375 376 377
/**
 *	grab_super - acquire an active reference
 *	@s: reference we are trying to make active
 *
 *	Tries to acquire an active reference.  grab_super() is used when we
 * 	had just found a superblock in super_blocks or fs_type->fs_supers
 *	and want to turn it into a full-blown active reference.  grab_super()
 *	is called with sb_lock held and drops it.  Returns 1 in case of
 *	success, 0 if we had failed (superblock contents was already dead or
Al Viro's avatar
Al Viro committed
378 379 380
 *	dying when grab_super() had been called).  Note that this is only
 *	called for superblocks not in rundown mode (== ones still on ->fs_supers
 *	of their type), so increment of ->s_count is OK here.
Linus Torvalds's avatar
Linus Torvalds committed
381
 */
382
static int grab_super(struct super_block *s) __releases(sb_lock)
Linus Torvalds's avatar
Linus Torvalds committed
383 384 385 386
{
	s->s_count++;
	spin_unlock(&sb_lock);
	down_write(&s->s_umount);
387
	if ((s->s_flags & SB_BORN) && atomic_inc_not_zero(&s->s_active)) {
Al Viro's avatar
Al Viro committed
388 389 390
		put_super(s);
		return 1;
	}
Linus Torvalds's avatar
Linus Torvalds committed
391 392 393 394 395
	up_write(&s->s_umount);
	put_super(s);
	return 0;
}

396
/*
397
 *	trylock_super - try to grab ->s_umount shared
398
 *	@sb: reference we are trying to grab
399
 *
400
 *	Try to prevent fs shutdown.  This is used in places where we
401
 *	cannot take an active reference but we need to ensure that the
402 403 404 405 406 407 408 409 410 411
 *	filesystem is not shut down while we are working on it. It returns
 *	false if we cannot acquire s_umount or if we lose the race and
 *	filesystem already got into shutdown, and returns true with the s_umount
 *	lock held in read mode in case of success. On successful return,
 *	the caller must drop the s_umount lock when done.
 *
 *	Note that unlike get_super() et.al. this one does *not* bump ->s_count.
 *	The reason why it's safe is that we are OK with doing trylock instead
 *	of down_read().  There's a couple of places that are OK with that, but
 *	it's very much not a general-purpose interface.
412
 */
413
bool trylock_super(struct super_block *sb)
414 415
{
	if (down_read_trylock(&sb->s_umount)) {
416
		if (!hlist_unhashed(&sb->s_instances) &&
417
		    sb->s_root && (sb->s_flags & SB_BORN))
418 419 420 421 422 423 424
			return true;
		up_read(&sb->s_umount);
	}

	return false;
}

425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453
/**
 *	retire_super	-	prevents superblock from being reused
 *	@sb: superblock to retire
 *
 *	The function marks superblock to be ignored in superblock test, which
 *	prevents it from being reused for any new mounts.  If the superblock has
 *	a private bdi, it also unregisters it, but doesn't reduce the refcount
 *	of the superblock to prevent potential races.  The refcount is reduced
 *	by generic_shutdown_super().  The function can not be called
 *	concurrently with generic_shutdown_super().  It is safe to call the
 *	function multiple times, subsequent calls have no effect.
 *
 *	The marker will affect the re-use only for block-device-based
 *	superblocks.  Other superblocks will still get marked if this function
 *	is used, but that will not affect their reusability.
 */
void retire_super(struct super_block *sb)
{
	WARN_ON(!sb->s_bdev);
	down_write(&sb->s_umount);
	if (sb->s_iflags & SB_I_PERSB_BDI) {
		bdi_unregister(sb->s_bdi);
		sb->s_iflags &= ~SB_I_PERSB_BDI;
	}
	sb->s_iflags |= SB_I_RETIRED;
	up_write(&sb->s_umount);
}
EXPORT_SYMBOL(retire_super);

Linus Torvalds's avatar
Linus Torvalds committed
454 455 456 457 458 459 460 461 462
/**
 *	generic_shutdown_super	-	common helper for ->kill_sb()
 *	@sb: superblock to kill
 *
 *	generic_shutdown_super() does all fs-independent work on superblock
 *	shutdown.  Typical ->kill_sb() should pick all fs-specific objects
 *	that need destruction out of superblock, call generic_shutdown_super()
 *	and release aforementioned objects.  Note: dentries and inodes _are_
 *	taken care of and do not need specific handling.
463 464 465 466
 *
 *	Upon calling this function, the filesystem may no longer alter or
 *	rearrange the set of dentries belonging to this super_block, nor may it
 *	change the attachments of dentries to inodes.
Linus Torvalds's avatar
Linus Torvalds committed
467 468 469
 */
void generic_shutdown_super(struct super_block *sb)
{
470
	const struct super_operations *sop = sb->s_op;
Linus Torvalds's avatar
Linus Torvalds committed
471

472 473
	if (sb->s_root) {
		shrink_dcache_for_umount(sb);
474
		sync_filesystem(sb);
475
		sb->s_flags &= ~SB_ACTIVE;
476

477
		cgroup_writeback_umount();
Al Viro's avatar
Al Viro committed
478

479
		/* evict all inodes with zero refcount */
Al Viro's avatar
Al Viro committed
480
		evict_inodes(sb);
481 482
		/* only nonzero refcount inodes can have marks */
		fsnotify_sb_delete(sb);
483
		security_sb_delete(sb);
Linus Torvalds's avatar
Linus Torvalds committed
484

485 486 487 488 489
		if (sb->s_dio_done_wq) {
			destroy_workqueue(sb->s_dio_done_wq);
			sb->s_dio_done_wq = NULL;
		}

Linus Torvalds's avatar
Linus Torvalds committed
490 491 492
		if (sop->put_super)
			sop->put_super(sb);

Al Viro's avatar
Al Viro committed
493
		if (!list_empty(&sb->s_inodes)) {
494 495 496
			printk("VFS: Busy inodes after unmount of %s. "
			   "Self-destruct in 5 seconds.  Have a nice day...\n",
			   sb->s_id);
Linus Torvalds's avatar
Linus Torvalds committed
497 498 499 500
		}
	}
	spin_lock(&sb_lock);
	/* should be initialized for __put_super_and_need_restart() */
Al Viro's avatar
Al Viro committed
501
	hlist_del_init(&sb->s_instances);
Linus Torvalds's avatar
Linus Torvalds committed
502 503
	spin_unlock(&sb_lock);
	up_write(&sb->s_umount);
Jan Kara's avatar
Jan Kara committed
504
	if (sb->s_bdi != &noop_backing_dev_info) {
505 506
		if (sb->s_iflags & SB_I_PERSB_BDI)
			bdi_unregister(sb->s_bdi);
507 508 509
		bdi_put(sb->s_bdi);
		sb->s_bdi = &noop_backing_dev_info;
	}
Linus Torvalds's avatar
Linus Torvalds committed
510 511 512 513
}

EXPORT_SYMBOL(generic_shutdown_super);

514
bool mount_capable(struct fs_context *fc)
515
{
516
	if (!(fc->fs_type->fs_flags & FS_USERNS_MOUNT))
517 518
		return capable(CAP_SYS_ADMIN);
	else
Al Viro's avatar
Al Viro committed
519
		return ns_capable(fc->user_ns, CAP_SYS_ADMIN);
520 521
}

522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574
/**
 * sget_fc - Find or create a superblock
 * @fc:	Filesystem context.
 * @test: Comparison callback
 * @set: Setup callback
 *
 * Find or create a superblock using the parameters stored in the filesystem
 * context and the two callback functions.
 *
 * If an extant superblock is matched, then that will be returned with an
 * elevated reference count that the caller must transfer or discard.
 *
 * If no match is made, a new superblock will be allocated and basic
 * initialisation will be performed (s_type, s_fs_info and s_id will be set and
 * the set() callback will be invoked), the superblock will be published and it
 * will be returned in a partially constructed state with SB_BORN and SB_ACTIVE
 * as yet unset.
 */
struct super_block *sget_fc(struct fs_context *fc,
			    int (*test)(struct super_block *, struct fs_context *),
			    int (*set)(struct super_block *, struct fs_context *))
{
	struct super_block *s = NULL;
	struct super_block *old;
	struct user_namespace *user_ns = fc->global ? &init_user_ns : fc->user_ns;
	int err;

retry:
	spin_lock(&sb_lock);
	if (test) {
		hlist_for_each_entry(old, &fc->fs_type->fs_supers, s_instances) {
			if (test(old, fc))
				goto share_extant_sb;
		}
	}
	if (!s) {
		spin_unlock(&sb_lock);
		s = alloc_super(fc->fs_type, fc->sb_flags, user_ns);
		if (!s)
			return ERR_PTR(-ENOMEM);
		goto retry;
	}

	s->s_fs_info = fc->s_fs_info;
	err = set(s, fc);
	if (err) {
		s->s_fs_info = NULL;
		spin_unlock(&sb_lock);
		destroy_unused_super(s);
		return ERR_PTR(err);
	}
	fc->s_fs_info = NULL;
	s->s_type = fc->fs_type;
575
	s->s_iflags |= fc->s_iflags;
576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596
	strlcpy(s->s_id, s->s_type->name, sizeof(s->s_id));
	list_add_tail(&s->s_list, &super_blocks);
	hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
	spin_unlock(&sb_lock);
	get_filesystem(s->s_type);
	register_shrinker_prepared(&s->s_shrink);
	return s;

share_extant_sb:
	if (user_ns != old->s_user_ns) {
		spin_unlock(&sb_lock);
		destroy_unused_super(s);
		return ERR_PTR(-EBUSY);
	}
	if (!grab_super(old))
		goto retry;
	destroy_unused_super(s);
	return old;
}
EXPORT_SYMBOL(sget_fc);

Linus Torvalds's avatar
Linus Torvalds committed
597
/**
David Howells's avatar
David Howells committed
598 599 600 601 602 603
 *	sget	-	find or create a superblock
 *	@type:	  filesystem type superblock should belong to
 *	@test:	  comparison callback
 *	@set:	  setup callback
 *	@flags:	  mount flags
 *	@data:	  argument to each of them
Linus Torvalds's avatar
Linus Torvalds committed
604
 */
David Howells's avatar
David Howells committed
605
struct super_block *sget(struct file_system_type *type,
Linus Torvalds's avatar
Linus Torvalds committed
606 607
			int (*test)(struct super_block *,void *),
			int (*set)(struct super_block *,void *),
David Howells's avatar
David Howells committed
608
			int flags,
Linus Torvalds's avatar
Linus Torvalds committed
609 610
			void *data)
{
David Howells's avatar
David Howells committed
611
	struct user_namespace *user_ns = current_user_ns();
Linus Torvalds's avatar
Linus Torvalds committed
612
	struct super_block *s = NULL;
613
	struct super_block *old;
Linus Torvalds's avatar
Linus Torvalds committed
614 615
	int err;

David Howells's avatar
David Howells committed
616 617 618 619 620 621 622
	/* We don't yet pass the user namespace of the parent
	 * mount through to here so always use &init_user_ns
	 * until that changes.
	 */
	if (flags & SB_SUBMOUNT)
		user_ns = &init_user_ns;

Linus Torvalds's avatar
Linus Torvalds committed
623 624
retry:
	spin_lock(&sb_lock);
625
	if (test) {
626
		hlist_for_each_entry(old, &type->fs_supers, s_instances) {
627 628
			if (!test(old, data))
				continue;
629 630
			if (user_ns != old->s_user_ns) {
				spin_unlock(&sb_lock);
631
				destroy_unused_super(s);
632 633
				return ERR_PTR(-EBUSY);
			}
634 635
			if (!grab_super(old))
				goto retry;
636
			destroy_unused_super(s);
637 638
			return old;
		}
Linus Torvalds's avatar
Linus Torvalds committed
639 640 641
	}
	if (!s) {
		spin_unlock(&sb_lock);
642
		s = alloc_super(type, (flags & ~SB_SUBMOUNT), user_ns);
Linus Torvalds's avatar
Linus Torvalds committed
643 644 645 646
		if (!s)
			return ERR_PTR(-ENOMEM);
		goto retry;
	}
647

Linus Torvalds's avatar
Linus Torvalds committed
648 649 650
	err = set(s, data);
	if (err) {
		spin_unlock(&sb_lock);
651
		destroy_unused_super(s);
Linus Torvalds's avatar
Linus Torvalds committed
652 653 654 655 656
		return ERR_PTR(err);
	}
	s->s_type = type;
	strlcpy(s->s_id, type->name, sizeof(s->s_id));
	list_add_tail(&s->s_list, &super_blocks);
Al Viro's avatar
Al Viro committed
657
	hlist_add_head(&s->s_instances, &type->fs_supers);
Linus Torvalds's avatar
Linus Torvalds committed
658 659
	spin_unlock(&sb_lock);
	get_filesystem(type);
660
	register_shrinker_prepared(&s->s_shrink);
Linus Torvalds's avatar
Linus Torvalds committed
661 662 663 664 665 666 667 668 669 670 671 672
	return s;
}
EXPORT_SYMBOL(sget);

void drop_super(struct super_block *sb)
{
	up_read(&sb->s_umount);
	put_super(sb);
}

EXPORT_SYMBOL(drop_super);

673 674 675 676 677 678 679
void drop_super_exclusive(struct super_block *sb)
{
	up_write(&sb->s_umount);
	put_super(sb);
}
EXPORT_SYMBOL(drop_super_exclusive);

680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701
static void __iterate_supers(void (*f)(struct super_block *))
{
	struct super_block *sb, *p = NULL;

	spin_lock(&sb_lock);
	list_for_each_entry(sb, &super_blocks, s_list) {
		if (hlist_unhashed(&sb->s_instances))
			continue;
		sb->s_count++;
		spin_unlock(&sb_lock);

		f(sb);

		spin_lock(&sb_lock);
		if (p)
			__put_super(p);
		p = sb;
	}
	if (p)
		__put_super(p);
	spin_unlock(&sb_lock);
}
Al Viro's avatar
Al Viro committed
702 703 704 705 706 707 708 709 710 711
/**
 *	iterate_supers - call function for all active superblocks
 *	@f: function to call
 *	@arg: argument to pass to it
 *
 *	Scans the superblock list and calls given function, passing it
 *	locked superblock and given argument.
 */
void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
{
712
	struct super_block *sb, *p = NULL;
Al Viro's avatar
Al Viro committed
713 714

	spin_lock(&sb_lock);
715
	list_for_each_entry(sb, &super_blocks, s_list) {
Al Viro's avatar
Al Viro committed
716
		if (hlist_unhashed(&sb->s_instances))
Al Viro's avatar
Al Viro committed
717 718 719 720 721
			continue;
		sb->s_count++;
		spin_unlock(&sb_lock);

		down_read(&sb->s_umount);
722
		if (sb->s_root && (sb->s_flags & SB_BORN))
Al Viro's avatar
Al Viro committed
723 724 725 726
			f(sb, arg);
		up_read(&sb->s_umount);

		spin_lock(&sb_lock);
727 728 729
		if (p)
			__put_super(p);
		p = sb;
Al Viro's avatar
Al Viro committed
730
	}
731 732
	if (p)
		__put_super(p);
Al Viro's avatar
Al Viro committed
733 734 735
	spin_unlock(&sb_lock);
}

Al Viro's avatar
Al Viro committed
736 737 738 739 740 741 742 743 744 745 746 747 748 749 750
/**
 *	iterate_supers_type - call function for superblocks of given type
 *	@type: fs type
 *	@f: function to call
 *	@arg: argument to pass to it
 *
 *	Scans the superblock list and calls given function, passing it
 *	locked superblock and given argument.
 */
void iterate_supers_type(struct file_system_type *type,
	void (*f)(struct super_block *, void *), void *arg)
{
	struct super_block *sb, *p = NULL;

	spin_lock(&sb_lock);
751
	hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
Al Viro's avatar
Al Viro committed
752 753 754 755
		sb->s_count++;
		spin_unlock(&sb_lock);

		down_read(&sb->s_umount);
756
		if (sb->s_root && (sb->s_flags & SB_BORN))
Al Viro's avatar
Al Viro committed
757 758 759 760 761 762 763 764 765 766 767 768 769 770 771
			f(sb, arg);
		up_read(&sb->s_umount);

		spin_lock(&sb_lock);
		if (p)
			__put_super(p);
		p = sb;
	}
	if (p)
		__put_super(p);
	spin_unlock(&sb_lock);
}

EXPORT_SYMBOL(iterate_supers_type);

Christoph Hellwig's avatar
Christoph Hellwig committed
772 773 774 775 776 777 778 779
/**
 * get_super - get the superblock of a device
 * @bdev: device to get the superblock for
 *
 * Scans the superblock list and finds the superblock of the file system
 * mounted on the device given. %NULL is returned if no match is found.
 */
struct super_block *get_super(struct block_device *bdev)
Linus Torvalds's avatar
Linus Torvalds committed
780
{
781 782
	struct super_block *sb;

Linus Torvalds's avatar
Linus Torvalds committed
783 784
	if (!bdev)
		return NULL;
785

Linus Torvalds's avatar
Linus Torvalds committed
786
	spin_lock(&sb_lock);
787 788
rescan:
	list_for_each_entry(sb, &super_blocks, s_list) {
Al Viro's avatar
Al Viro committed
789
		if (hlist_unhashed(&sb->s_instances))
790
			continue;
791 792
		if (sb->s_bdev == bdev) {
			sb->s_count++;
Linus Torvalds's avatar
Linus Torvalds committed
793
			spin_unlock(&sb_lock);
Christoph Hellwig's avatar
Christoph Hellwig committed
794
			down_read(&sb->s_umount);
795
			/* still alive? */
796
			if (sb->s_root && (sb->s_flags & SB_BORN))
797
				return sb;
Christoph Hellwig's avatar
Christoph Hellwig committed
798
			up_read(&sb->s_umount);
799
			/* nope, got unmounted */
800
			spin_lock(&sb_lock);
801 802
			__put_super(sb);
			goto rescan;
Linus Torvalds's avatar
Linus Torvalds committed
803 804 805 806 807 808
		}
	}
	spin_unlock(&sb_lock);
	return NULL;
}

809 810 811 812 813 814
/**
 * get_active_super - get an active reference to the superblock of a device
 * @bdev: device to get the superblock for
 *
 * Scans the superblock list and finds the superblock of the file system
 * mounted on the device given.  Returns the superblock with an active
815
 * reference or %NULL if none was found.
816 817 818 819 820 821 822 823
 */
struct super_block *get_active_super(struct block_device *bdev)
{
	struct super_block *sb;

	if (!bdev)
		return NULL;

824
restart:
825 826
	spin_lock(&sb_lock);
	list_for_each_entry(sb, &super_blocks, s_list) {
Al Viro's avatar
Al Viro committed
827
		if (hlist_unhashed(&sb->s_instances))
828
			continue;
829
		if (sb->s_bdev == bdev) {
Al Viro's avatar
Al Viro committed
830
			if (!grab_super(sb))
831
				goto restart;
Al Viro's avatar
Al Viro committed
832 833
			up_write(&sb->s_umount);
			return sb;
834
		}
835 836 837 838
	}
	spin_unlock(&sb_lock);
	return NULL;
}
839

Christoph Hellwig's avatar
Christoph Hellwig committed
840
struct super_block *user_get_super(dev_t dev, bool excl)
Linus Torvalds's avatar
Linus Torvalds committed
841
{
842
	struct super_block *sb;
Linus Torvalds's avatar
Linus Torvalds committed
843 844

	spin_lock(&sb_lock);
845 846
rescan:
	list_for_each_entry(sb, &super_blocks, s_list) {
Al Viro's avatar
Al Viro committed
847
		if (hlist_unhashed(&sb->s_instances))
848
			continue;
849 850
		if (sb->s_dev ==  dev) {
			sb->s_count++;
Linus Torvalds's avatar
Linus Torvalds committed
851
			spin_unlock(&sb_lock);
Christoph Hellwig's avatar
Christoph Hellwig committed
852 853 854 855
			if (excl)
				down_write(&sb->s_umount);
			else
				down_read(&sb->s_umount);
856
			/* still alive? */
857
			if (sb->s_root && (sb->s_flags & SB_BORN))
858
				return sb;
Christoph Hellwig's avatar
Christoph Hellwig committed
859 860 861 862
			if (excl)
				up_write(&sb->s_umount);
			else
				up_read(&sb->s_umount);
863
			/* nope, got unmounted */
864
			spin_lock(&sb_lock);
865 866
			__put_super(sb);
			goto rescan;
Linus Torvalds's avatar
Linus Torvalds committed
867 868 869 870 871 872 873
		}
	}
	spin_unlock(&sb_lock);
	return NULL;
}

/**
874 875
 * reconfigure_super - asks filesystem to change superblock parameters
 * @fc: The superblock and configuration
Linus Torvalds's avatar
Linus Torvalds committed
876
 *
877
 * Alters the configuration parameters of a live superblock.
Linus Torvalds's avatar
Linus Torvalds committed
878
 */
879
int reconfigure_super(struct fs_context *fc)
Linus Torvalds's avatar
Linus Torvalds committed
880
{
881
	struct super_block *sb = fc->root->d_sb;
Linus Torvalds's avatar
Linus Torvalds committed
882
	int retval;
883 884
	bool remount_ro = false;
	bool force = fc->sb_flags & SB_FORCE;
885

886 887
	if (fc->sb_flags_mask & ~MS_RMT_MASK)
		return -EINVAL;
888
	if (sb->s_writers.frozen != SB_UNFROZEN)
889 890
		return -EBUSY;

891 892 893 894 895
	retval = security_sb_remount(sb, fc->security);
	if (retval)
		return retval;

	if (fc->sb_flags_mask & SB_RDONLY) {
896
#ifdef CONFIG_BLOCK
897 898
		if (!(fc->sb_flags & SB_RDONLY) && sb->s_bdev &&
		    bdev_read_only(sb->s_bdev))
899
			return -EACCES;
900
#endif
901

902 903
		remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb);
	}
904

905
	if (remount_ro) {
906
		if (!hlist_empty(&sb->s_pins)) {
907
			up_write(&sb->s_umount);
908
			group_pin_kill(&sb->s_pins);
909 910 911 912 913
			down_write(&sb->s_umount);
			if (!sb->s_root)
				return 0;
			if (sb->s_writers.frozen != SB_UNFROZEN)
				return -EBUSY;
914
			remount_ro = !sb_rdonly(sb);
915 916 917 918
		}
	}
	shrink_dcache_sb(sb);

919 920 921
	/* If we are reconfiguring to RDONLY and current sb is read/write,
	 * make sure there are no files open for writing.
	 */
922
	if (remount_ro) {
923
		if (force) {
Al Viro's avatar
Al Viro committed
924 925
			sb->s_readonly_remount = 1;
			smp_wmb();
926 927 928 929 930
		} else {
			retval = sb_prepare_remount_readonly(sb);
			if (retval)
				return retval;
		}
Linus Torvalds's avatar
Linus Torvalds committed
931 932
	}

Al Viro's avatar
Al Viro committed
933 934 935 936 937 938 939 940 941
	if (fc->ops->reconfigure) {
		retval = fc->ops->reconfigure(fc);
		if (retval) {
			if (!force)
				goto cancel_readonly;
			/* If forced remount, go ahead despite any errors */
			WARN(1, "forced remount of a %s fs returned %i\n",
			     sb->s_type->name, retval);
		}
Linus Torvalds's avatar
Linus Torvalds committed
942
	}
943 944 945

	WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) |
				 (fc->sb_flags & fc->sb_flags_mask)));
946 947 948
	/* Needs to be ordered wrt mnt_is_readonly() */
	smp_wmb();
	sb->s_readonly_remount = 0;
949

950 951 952 953 954 955 956 957 958 959
	/*
	 * Some filesystems modify their metadata via some other path than the
	 * bdev buffer cache (eg. use a private mapping, or directories in
	 * pagecache, etc). Also file data modifications go via their own
	 * mappings. So If we try to mount readonly then copy the filesystem
	 * from bdev, we could get stale data, so invalidate it to give a best
	 * effort at coherency.
	 */
	if (remount_ro && sb->s_bdev)
		invalidate_bdev(sb->s_bdev);
Linus Torvalds's avatar
Linus Torvalds committed
960
	return 0;
961 962 963 964

cancel_readonly:
	sb->s_readonly_remount = 0;
	return retval;
Linus Torvalds's avatar
Linus Torvalds committed
965 966
}

967
static void do_emergency_remount_callback(struct super_block *sb)
Linus Torvalds's avatar
Linus Torvalds committed
968
{
969 970 971
	down_write(&sb->s_umount);
	if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) &&
	    !sb_rdonly(sb)) {
972 973 974 975 976 977 978 979 980
		struct fs_context *fc;

		fc = fs_context_for_reconfigure(sb->s_root,
					SB_RDONLY | SB_FORCE, SB_RDONLY);
		if (!IS_ERR(fc)) {
			if (parse_monolithic_mount_data(fc, NULL) == 0)
				(void)reconfigure_super(fc);
			put_fs_context(fc);
		}
Linus Torvalds's avatar
Linus Torvalds committed
981
	}
982 983 984 985 986 987
	up_write(&sb->s_umount);
}

static void do_emergency_remount(struct work_struct *work)
{
	__iterate_supers(do_emergency_remount_callback);
988
	kfree(work);
Linus Torvalds's avatar
Linus Torvalds committed
989 990 991 992 993
	printk("Emergency Remount complete\n");
}

void emergency_remount(void)
{
994 995 996 997 998 999 1000
	struct work_struct *work;

	work = kmalloc(sizeof(*work), GFP_ATOMIC);
	if (work) {
		INIT_WORK(work, do_emergency_remount);
		schedule_work(work);
	}
Linus Torvalds's avatar
Linus Torvalds committed
1001 1002
}

1003 1004 1005
static void do_thaw_all_callback(struct super_block *sb)
{
	down_write(&sb->s_umount);
Al Viro's avatar
Al Viro committed
1006
	if (sb->s_root && sb->s_flags & SB_BORN) {
1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036
		emergency_thaw_bdev(sb);
		thaw_super_locked(sb);
	} else {
		up_write(&sb->s_umount);
	}
}

static void do_thaw_all(struct work_struct *work)
{
	__iterate_supers(do_thaw_all_callback);
	kfree(work);
	printk(KERN_WARNING "Emergency Thaw complete\n");
}

/**
 * emergency_thaw_all -- forcibly thaw every frozen filesystem
 *
 * Used for emergency unfreeze of all filesystems via SysRq
 */
void emergency_thaw_all(void)
{
	struct work_struct *work;

	work = kmalloc(sizeof(*work), GFP_ATOMIC);
	if (work) {
		INIT_WORK(work, do_thaw_all);
		schedule_work(work);
	}
}

1037
static DEFINE_IDA(unnamed_dev_ida);
Linus Torvalds's avatar
Linus Torvalds committed
1038

1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049
/**
 * get_anon_bdev - Allocate a block device for filesystems which don't have one.
 * @p: Pointer to a dev_t.
 *
 * Filesystems which don't use real block devices can call this function
 * to allocate a virtual block device.
 *
 * Context: Any context.  Frequently called while holding sb_lock.
 * Return: 0 on success, -EMFILE if there are no anonymous bdevs left
 * or -ENOMEM if memory allocation failed.
 */
1050
int get_anon_bdev(dev_t *p)
Linus Torvalds's avatar
Linus Torvalds committed
1051 1052
{
	int dev;
1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065

	/*
	 * Many userspace utilities consider an FSID of 0 invalid.
	 * Always return at least 1 from get_anon_bdev.
	 */
	dev = ida_alloc_range(&unnamed_dev_ida, 1, (1 << MINORBITS) - 1,
			GFP_ATOMIC);
	if (dev == -ENOSPC)
		dev = -EMFILE;
	if (dev < 0)
		return dev;

	*p = MKDEV(0, dev);
Linus Torvalds's avatar
Linus Torvalds committed
1066 1067
	return 0;
}
1068
EXPORT_SYMBOL(get_anon_bdev);
Linus Torvalds's avatar
Linus Torvalds committed
1069

1070
void free_anon_bdev(dev_t dev)
Linus Torvalds's avatar
Linus Torvalds committed
1071
{
1072
	ida_free(&unnamed_dev_ida, MINOR(dev));
Linus Torvalds's avatar
Linus Torvalds committed
1073
}
1074 1075 1076 1077
EXPORT_SYMBOL(free_anon_bdev);

int set_anon_super(struct super_block *s, void *data)
{
1078
	return get_anon_bdev(&s->s_dev);
1079 1080 1081 1082 1083 1084 1085 1086 1087
}
EXPORT_SYMBOL(set_anon_super);

void kill_anon_super(struct super_block *sb)
{
	dev_t dev = sb->s_dev;
	generic_shutdown_super(sb);
	free_anon_bdev(dev);
}
Linus Torvalds's avatar
Linus Torvalds committed
1088 1089 1090 1091 1092 1093 1094 1095 1096 1097
EXPORT_SYMBOL(kill_anon_super);

void kill_litter_super(struct super_block *sb)
{
	if (sb->s_root)
		d_genocide(sb->s_root);
	kill_anon_super(sb);
}
EXPORT_SYMBOL(kill_litter_super);

1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145
int set_anon_super_fc(struct super_block *sb, struct fs_context *fc)
{
	return set_anon_super(sb, NULL);
}
EXPORT_SYMBOL(set_anon_super_fc);

static int test_keyed_super(struct super_block *sb, struct fs_context *fc)
{
	return sb->s_fs_info == fc->s_fs_info;
}

static int test_single_super(struct super_block *s, struct fs_context *fc)
{
	return 1;
}

/**
 * vfs_get_super - Get a superblock with a search key set in s_fs_info.
 * @fc: The filesystem context holding the parameters
 * @keying: How to distinguish superblocks
 * @fill_super: Helper to initialise a new superblock
 *
 * Search for a superblock and create a new one if not found.  The search
 * criterion is controlled by @keying.  If the search fails, a new superblock
 * is created and @fill_super() is called to initialise it.
 *
 * @keying can take one of a number of values:
 *
 * (1) vfs_get_single_super - Only one superblock of this type may exist on the
 *     system.  This is typically used for special system filesystems.
 *
 * (2) vfs_get_keyed_super - Multiple superblocks may exist, but they must have
 *     distinct keys (where the key is in s_fs_info).  Searching for the same
 *     key again will turn up the superblock for that key.
 *
 * (3) vfs_get_independent_super - Multiple superblocks may exist and are
 *     unkeyed.  Each call will get a new superblock.
 *
 * A permissions check is made by sget_fc() unless we're getting a superblock
 * for a kernel-internal mount or a submount.
 */
int vfs_get_super(struct fs_context *fc,
		  enum vfs_get_super_keying keying,
		  int (*fill_super)(struct super_block *sb,
				    struct fs_context *fc))
{
	int (*test)(struct super_block *, struct fs_context *);
	struct super_block *sb;
1146
	int err;
1147 1148 1149

	switch (keying) {
	case vfs_get_single_super:
1150
	case vfs_get_single_reconf_super:
1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167
		test = test_single_super;
		break;
	case vfs_get_keyed_super:
		test = test_keyed_super;
		break;
	case vfs_get_independent_super:
		test = NULL;
		break;
	default:
		BUG();
	}

	sb = sget_fc(fc, test, set_anon_super_fc);
	if (IS_ERR(sb))
		return PTR_ERR(sb);

	if (!sb->s_root) {
1168 1169 1170
		err = fill_super(sb, fc);
		if (err)
			goto error;
1171 1172

		sb->s_flags |= SB_ACTIVE;
1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183
		fc->root = dget(sb->s_root);
	} else {
		fc->root = dget(sb->s_root);
		if (keying == vfs_get_single_reconf_super) {
			err = reconfigure_super(fc);
			if (err < 0) {
				dput(fc->root);
				fc->root = NULL;
				goto error;
			}
		}
1184 1185 1186
	}

	return 0;
1187 1188 1189 1190

error:
	deactivate_locked_super(sb);
	return err;
1191 1192 1193
}
EXPORT_SYMBOL(vfs_get_super);

1194 1195 1196 1197 1198 1199 1200 1201
int get_tree_nodev(struct fs_context *fc,
		  int (*fill_super)(struct super_block *sb,
				    struct fs_context *fc))
{
	return vfs_get_super(fc, vfs_get_independent_super, fill_super);
}
EXPORT_SYMBOL(get_tree_nodev);

1202 1203 1204 1205 1206 1207 1208 1209
int get_tree_single(struct fs_context *fc,
		  int (*fill_super)(struct super_block *sb,
				    struct fs_context *fc))
{
	return vfs_get_super(fc, vfs_get_single_super, fill_super);
}
EXPORT_SYMBOL(get_tree_single);

1210 1211 1212 1213 1214 1215 1216 1217
int get_tree_single_reconf(struct fs_context *fc,
		  int (*fill_super)(struct super_block *sb,
				    struct fs_context *fc))
{
	return vfs_get_super(fc, vfs_get_single_reconf_super, fill_super);
}
EXPORT_SYMBOL(get_tree_single_reconf);

Al Viro's avatar
Al Viro committed
1218 1219 1220 1221 1222 1223 1224 1225 1226 1227
int get_tree_keyed(struct fs_context *fc,
		  int (*fill_super)(struct super_block *sb,
				    struct fs_context *fc),
		void *key)
{
	fc->s_fs_info = key;
	return vfs_get_super(fc, vfs_get_keyed_super, fill_super);
}
EXPORT_SYMBOL(get_tree_keyed);

1228
#ifdef CONFIG_BLOCK
1229

Linus Torvalds's avatar
Linus Torvalds committed
1230 1231 1232 1233
static int set_bdev_super(struct super_block *s, void *data)
{
	s->s_bdev = data;
	s->s_dev = s->s_bdev->bd_dev;
1234
	s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi);
Jens Axboe's avatar
Jens Axboe committed
1235

1236
	if (bdev_stable_writes(s->s_bdev))
1237
		s->s_iflags |= SB_I_STABLE_WRITES;
Linus Torvalds's avatar
Linus Torvalds committed
1238 1239 1240
	return 0;
}

1241 1242 1243 1244 1245 1246 1247
static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc)
{
	return set_bdev_super(s, fc->sget_key);
}

static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc)
{
1248
	return !(s->s_iflags & SB_I_RETIRED) && s->s_bdev == fc->sget_key;
1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284
}

/**
 * get_tree_bdev - Get a superblock based on a single block device
 * @fc: The filesystem context holding the parameters
 * @fill_super: Helper to initialise a new superblock
 */
int get_tree_bdev(struct fs_context *fc,
		int (*fill_super)(struct super_block *,
				  struct fs_context *))
{
	struct block_device *bdev;
	struct super_block *s;
	fmode_t mode = FMODE_READ | FMODE_EXCL;
	int error = 0;

	if (!(fc->sb_flags & SB_RDONLY))
		mode |= FMODE_WRITE;

	if (!fc->source)
		return invalf(fc, "No source specified");

	bdev = blkdev_get_by_path(fc->source, mode, fc->fs_type);
	if (IS_ERR(bdev)) {
		errorf(fc, "%s: Can't open blockdev", fc->source);
		return PTR_ERR(bdev);
	}

	/* Once the superblock is inserted into the list by sget_fc(), s_umount
	 * will protect the lockfs code from trying to start a snapshot while
	 * we are mounting
	 */
	mutex_lock(&bdev->bd_fsfreeze_mutex);
	if (bdev->bd_fsfreeze_count > 0) {
		mutex_unlock(&bdev->bd_fsfreeze_mutex);
		warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
1285
		blkdev_put(bdev, mode);
1286 1287 1288 1289 1290 1291 1292
		return -EBUSY;
	}

	fc->sb_flags |= SB_NOSEC;
	fc->sget_key = bdev;
	s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
	mutex_unlock(&bdev->bd_fsfreeze_mutex);
1293 1294
	if (IS_ERR(s)) {
		blkdev_put(bdev, mode);
1295
		return PTR_ERR(s);
1296
	}
1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307

	if (s->s_root) {
		/* Don't summarily change the RO/RW state. */
		if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
			warnf(fc, "%pg: Can't mount, would change RO state", bdev);
			deactivate_locked_super(s);
			blkdev_put(bdev, mode);
			return -EBUSY;
		}

		/*
1308
		 * s_umount nests inside open_mutex during
1309
		 * __invalidate_device().  blkdev_put() acquires
1310
		 * open_mutex and can't be called under s_umount.  Drop
1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336
		 * s_umount temporarily.  This is safe as we're
		 * holding an active reference.
		 */
		up_write(&s->s_umount);
		blkdev_put(bdev, mode);
		down_write(&s->s_umount);
	} else {
		s->s_mode = mode;
		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
		sb_set_blocksize(s, block_size(bdev));
		error = fill_super(s, fc);
		if (error) {
			deactivate_locked_super(s);
			return error;
		}

		s->s_flags |= SB_ACTIVE;
		bdev->bd_super = s;
	}

	BUG_ON(fc->root);
	fc->root = dget(s->s_root);
	return 0;
}
EXPORT_SYMBOL(get_tree_bdev);

Linus Torvalds's avatar
Linus Torvalds committed
1337 1338
static int test_bdev_super(struct super_block *s, void *data)
{
1339
	return !(s->s_iflags & SB_I_RETIRED) && (void *)s->s_bdev == data;
Linus Torvalds's avatar
Linus Torvalds committed
1340 1341
}

Al Viro's avatar
Al Viro committed
1342
struct dentry *mount_bdev(struct file_system_type *fs_type,
Linus Torvalds's avatar
Linus Torvalds committed
1343
	int flags, const char *dev_name, void *data,
Al Viro's avatar
Al Viro committed
1344
	int (*fill_super)(struct super_block *, void *, int))
Linus Torvalds's avatar
Linus Torvalds committed
1345 1346 1347
{
	struct block_device *bdev;
	struct super_block *s;
1348
	fmode_t mode = FMODE_READ | FMODE_EXCL;
Linus Torvalds's avatar
Linus Torvalds committed
1349 1350
	int error = 0;

1351
	if (!(flags & SB_RDONLY))
1352 1353
		mode |= FMODE_WRITE;

1354
	bdev = blkdev_get_by_path(dev_name, mode, fs_type);
Linus Torvalds's avatar
Linus Torvalds committed
1355
	if (IS_ERR(bdev))
Al Viro's avatar
Al Viro committed
1356
		return ERR_CAST(bdev);
Linus Torvalds's avatar
Linus Torvalds committed
1357 1358 1359 1360 1361 1362

	/*
	 * once the super is inserted into the list by sget, s_umount
	 * will protect the lockfs code from trying to start a snapshot
	 * while we are mounting
	 */
1363 1364 1365 1366 1367 1368
	mutex_lock(&bdev->bd_fsfreeze_mutex);
	if (bdev->bd_fsfreeze_count > 0) {
		mutex_unlock(&bdev->bd_fsfreeze_mutex);
		error = -EBUSY;
		goto error_bdev;
	}
1369
	s = sget(fs_type, test_bdev_super, set_bdev_super, flags | SB_NOSEC,
1370
		 bdev);
1371
	mutex_unlock(&bdev->bd_fsfreeze_mutex);
Linus Torvalds's avatar
Linus Torvalds committed
1372
	if (IS_ERR(s))
1373
		goto error_s;
Linus Torvalds's avatar
Linus Torvalds committed
1374 1375

	if (s->s_root) {
1376
		if ((flags ^ s->s_flags) & SB_RDONLY) {
1377
			deactivate_locked_super(s);
1378 1379
			error = -EBUSY;
			goto error_bdev;
Linus Torvalds's avatar
Linus Torvalds committed
1380
		}
1381

1382
		/*
1383
		 * s_umount nests inside open_mutex during
1384
		 * __invalidate_device().  blkdev_put() acquires
1385
		 * open_mutex and can't be called under s_umount.  Drop
1386 1387
		 * s_umount temporarily.  This is safe as we're
		 * holding an active reference.
1388 1389
		 */
		up_write(&s->s_umount);
1390
		blkdev_put(bdev, mode);
1391
		down_write(&s->s_umount);
Linus Torvalds's avatar
Linus Torvalds committed
1392
	} else {
1393
		s->s_mode = mode;
1394
		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
1395
		sb_set_blocksize(s, block_size(bdev));
1396
		error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
Linus Torvalds's avatar
Linus Torvalds committed
1397
		if (error) {
1398
			deactivate_locked_super(s);
1399
			goto error;
1400
		}
1401

1402
		s->s_flags |= SB_ACTIVE;
1403
		bdev->bd_super = s;
Linus Torvalds's avatar
Linus Torvalds committed
1404 1405
	}

Al Viro's avatar
Al Viro committed
1406
	return dget(s->s_root);
Linus Torvalds's avatar
Linus Torvalds committed
1407

1408 1409 1410
error_s:
	error = PTR_ERR(s);
error_bdev:
1411
	blkdev_put(bdev, mode);
1412
error:
Al Viro's avatar
Al Viro committed
1413 1414 1415 1416
	return ERR_PTR(error);
}
EXPORT_SYMBOL(mount_bdev);

Linus Torvalds's avatar
Linus Torvalds committed
1417 1418 1419
void kill_block_super(struct super_block *sb)
{
	struct block_device *bdev = sb->s_bdev;
1420
	fmode_t mode = sb->s_mode;
Linus Torvalds's avatar
Linus Torvalds committed
1421

1422
	bdev->bd_super = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
1423 1424
	generic_shutdown_super(sb);
	sync_blockdev(bdev);
1425
	WARN_ON_ONCE(!(mode & FMODE_EXCL));
1426
	blkdev_put(bdev, mode | FMODE_EXCL);
Linus Torvalds's avatar
Linus Torvalds committed
1427 1428 1429
}

EXPORT_SYMBOL(kill_block_super);
1430
#endif
Linus Torvalds's avatar
Linus Torvalds committed
1431

Al Viro's avatar
Al Viro committed
1432
struct dentry *mount_nodev(struct file_system_type *fs_type,
Linus Torvalds's avatar
Linus Torvalds committed
1433
	int flags, void *data,
Al Viro's avatar
Al Viro committed
1434
	int (*fill_super)(struct super_block *, void *, int))
Linus Torvalds's avatar
Linus Torvalds committed
1435 1436
{
	int error;
1437
	struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
1438 1439

	if (IS_ERR(s))
Al Viro's avatar
Al Viro committed
1440
		return ERR_CAST(s);
Linus Torvalds's avatar
Linus Torvalds committed
1441

1442
	error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
Linus Torvalds's avatar
Linus Torvalds committed
1443
	if (error) {
1444
		deactivate_locked_super(s);
Al Viro's avatar
Al Viro committed
1445
		return ERR_PTR(error);
Linus Torvalds's avatar
Linus Torvalds committed
1446
	}
1447
	s->s_flags |= SB_ACTIVE;
Al Viro's avatar
Al Viro committed
1448
	return dget(s->s_root);
Linus Torvalds's avatar
Linus Torvalds committed
1449
}
Al Viro's avatar
Al Viro committed
1450 1451
EXPORT_SYMBOL(mount_nodev);

1452 1453
int reconfigure_single(struct super_block *s,
		       int flags, void *data)
1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476
{
	struct fs_context *fc;
	int ret;

	/* The caller really need to be passing fc down into mount_single(),
	 * then a chunk of this can be removed.  [Bollocks -- AV]
	 * Better yet, reconfiguration shouldn't happen, but rather the second
	 * mount should be rejected if the parameters are not compatible.
	 */
	fc = fs_context_for_reconfigure(s->s_root, flags, MS_RMT_MASK);
	if (IS_ERR(fc))
		return PTR_ERR(fc);

	ret = parse_monolithic_mount_data(fc, data);
	if (ret < 0)
		goto out;

	ret = reconfigure_super(fc);
out:
	put_fs_context(fc);
	return ret;
}

Linus Torvalds's avatar
Linus Torvalds committed
1477 1478 1479 1480 1481
static int compare_single(struct super_block *s, void *p)
{
	return 1;
}

Al Viro's avatar
Al Viro committed
1482
struct dentry *mount_single(struct file_system_type *fs_type,
Linus Torvalds's avatar
Linus Torvalds committed
1483
	int flags, void *data,
Al Viro's avatar
Al Viro committed
1484
	int (*fill_super)(struct super_block *, void *, int))
Linus Torvalds's avatar
Linus Torvalds committed
1485 1486 1487 1488
{
	struct super_block *s;
	int error;

1489
	s = sget(fs_type, compare_single, set_anon_super, flags, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
1490
	if (IS_ERR(s))
Al Viro's avatar
Al Viro committed
1491
		return ERR_CAST(s);
Linus Torvalds's avatar
Linus Torvalds committed
1492
	if (!s->s_root) {
1493
		error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
1494 1495
		if (!error)
			s->s_flags |= SB_ACTIVE;
1496
	} else {
1497 1498 1499 1500 1501
		error = reconfigure_single(s, flags, data);
	}
	if (unlikely(error)) {
		deactivate_locked_super(s);
		return ERR_PTR(error);
Linus Torvalds's avatar
Linus Torvalds committed
1502
	}
Al Viro's avatar
Al Viro committed
1503 1504 1505 1506
	return dget(s->s_root);
}
EXPORT_SYMBOL(mount_single);

1507 1508 1509 1510 1511 1512 1513 1514 1515
/**
 * vfs_get_tree - Get the mountable root
 * @fc: The superblock configuration context.
 *
 * The filesystem is invoked to get or create a superblock which can then later
 * be used for mounting.  The filesystem places a pointer to the root to be
 * used for mounting in @fc->root.
 */
int vfs_get_tree(struct fs_context *fc)
Linus Torvalds's avatar
Linus Torvalds committed
1516
{
1517
	struct super_block *sb;
1518
	int error;
Al Viro's avatar
Al Viro committed
1519

Al Viro's avatar
Al Viro committed
1520 1521 1522 1523 1524 1525 1526
	if (fc->root)
		return -EBUSY;

	/* Get the mountable root in fc->root, with a ref on the root and a ref
	 * on the superblock.
	 */
	error = fc->ops->get_tree(fc);
1527 1528
	if (error < 0)
		return error;
Linus Torvalds's avatar
Linus Torvalds committed
1529

Al Viro's avatar
Al Viro committed
1530 1531 1532 1533 1534 1535 1536 1537 1538
	if (!fc->root) {
		pr_err("Filesystem %s get_tree() didn't set fc->root\n",
		       fc->fs_type->name);
		/* We don't know what the locking state of the superblock is -
		 * if there is a superblock.
		 */
		BUG();
	}

1539
	sb = fc->root->d_sb;
1540
	WARN_ON(!sb->s_bdi);
1541 1542 1543 1544 1545 1546 1547 1548

	/*
	 * Write barrier is for super_cache_count(). We place it before setting
	 * SB_BORN as the data dependency between the two functions is the
	 * superblock structure contents that we just set up, not the SB_BORN
	 * flag.
	 */
	smp_wmb();
1549
	sb->s_flags |= SB_BORN;
1550

1551
	error = security_sb_set_mnt_opts(sb, fc->security, 0, NULL);
1552 1553 1554
	if (unlikely(error)) {
		fc_drop_locked(fc);
		return error;
1555 1556
	}

1557 1558 1559 1560
	/*
	 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
	 * but s_maxbytes was an unsigned long long for many releases. Throw
	 * this warning for a little while to try and catch filesystems that
1561
	 * violate this rule.
1562
	 */
1563
	WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
1564
		"negative value (%lld)\n", fc->fs_type->name, sb->s_maxbytes);
1565

1566
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1567
}
1568
EXPORT_SYMBOL(vfs_get_tree);
Linus Torvalds's avatar
Linus Torvalds committed
1569

1570 1571 1572 1573 1574 1575 1576 1577 1578 1579
/*
 * Setup private BDI for given superblock. It gets automatically cleaned up
 * in generic_shutdown_super().
 */
int super_setup_bdi_name(struct super_block *sb, char *fmt, ...)
{
	struct backing_dev_info *bdi;
	int err;
	va_list args;

1580
	bdi = bdi_alloc(NUMA_NO_NODE);
1581 1582 1583 1584
	if (!bdi)
		return -ENOMEM;

	va_start(args, fmt);
1585
	err = bdi_register_va(bdi, fmt, args);
1586 1587 1588 1589 1590 1591 1592
	va_end(args);
	if (err) {
		bdi_put(bdi);
		return err;
	}
	WARN_ON(sb->s_bdi != &noop_backing_dev_info);
	sb->s_bdi = bdi;
1593
	sb->s_iflags |= SB_I_PERSB_BDI;
1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611

	return 0;
}
EXPORT_SYMBOL(super_setup_bdi_name);

/*
 * Setup private BDI for given superblock. I gets automatically cleaned up
 * in generic_shutdown_super().
 */
int super_setup_bdi(struct super_block *sb)
{
	static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);

	return super_setup_bdi_name(sb, "%.28s-%ld", sb->s_type->name,
				    atomic_long_inc_return(&bdi_seq));
}
EXPORT_SYMBOL(super_setup_bdi);

1612 1613 1614 1615 1616 1617
/**
 * sb_wait_write - wait until all writers to given file system finish
 * @sb: the super for which we wait
 * @level: type of writers we wait for (normal vs page fault)
 *
 * This function waits until there are no writers of given type to given file
1618
 * system.
1619 1620 1621
 */
static void sb_wait_write(struct super_block *sb, int level)
{
1622 1623
	percpu_down_write(sb->s_writers.rw_sem + level-1);
}
1624

1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640
/*
 * We are going to return to userspace and forget about these locks, the
 * ownership goes to the caller of thaw_super() which does unlock().
 */
static void lockdep_sb_freeze_release(struct super_block *sb)
{
	int level;

	for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
		percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
}

/*
 * Tell lockdep we are holding these locks before we call ->unfreeze_fs(sb).
 */
static void lockdep_sb_freeze_acquire(struct super_block *sb)
1641 1642
{
	int level;
1643

1644 1645
	for (level = 0; level < SB_FREEZE_LEVELS; ++level)
		percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
1646 1647
}

1648
static void sb_freeze_unlock(struct super_block *sb, int level)
1649
{
1650
	for (level--; level >= 0; level--)
1651
		percpu_up_write(sb->s_writers.rw_sem + level);
1652 1653
}

1654
/**
1655 1656
 * freeze_super - lock the filesystem and force it into a consistent state
 * @sb: the super to lock
1657 1658 1659 1660
 *
 * Syncs the super to make sure the filesystem is consistent and calls the fs's
 * freeze_fs.  Subsequent calls to this without first thawing the fs will return
 * -EBUSY.
1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685
 *
 * During this function, sb->s_writers.frozen goes through these values:
 *
 * SB_UNFROZEN: File system is normal, all writes progress as usual.
 *
 * SB_FREEZE_WRITE: The file system is in the process of being frozen.  New
 * writes should be blocked, though page faults are still allowed. We wait for
 * all writes to complete and then proceed to the next stage.
 *
 * SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked
 * but internal fs threads can still modify the filesystem (although they
 * should not dirty new pages or inodes), writeback can run etc. After waiting
 * for all running page faults we sync the filesystem which will clean all
 * dirty pages and inodes (no new dirty pages or inodes can be created when
 * sync is running).
 *
 * SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs
 * modification are blocked (e.g. XFS preallocation truncation on inode
 * reclaim). This is usually implemented by blocking new transactions for
 * filesystems that have them and need this additional guard. After all
 * internal writers are finished we call ->freeze_fs() to finish filesystem
 * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is
 * mostly auxiliary for filesystems to verify they do not modify frozen fs.
 *
 * sb->s_writers.frozen is protected by sb->s_umount.
1686 1687 1688 1689 1690 1691 1692
 */
int freeze_super(struct super_block *sb)
{
	int ret;

	atomic_inc(&sb->s_active);
	down_write(&sb->s_umount);
1693
	if (sb->s_writers.frozen != SB_UNFROZEN) {
1694 1695 1696 1697
		deactivate_locked_super(sb);
		return -EBUSY;
	}

1698
	if (!(sb->s_flags & SB_BORN)) {
Al Viro's avatar
Al Viro committed
1699 1700 1701 1702
		up_write(&sb->s_umount);
		return 0;	/* sic - it's "nothing to do" */
	}

1703
	if (sb_rdonly(sb)) {
1704 1705
		/* Nothing to do really... */
		sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1706 1707 1708 1709
		up_write(&sb->s_umount);
		return 0;
	}

1710 1711 1712 1713
	sb->s_writers.frozen = SB_FREEZE_WRITE;
	/* Release s_umount to preserve sb_start_write -> s_umount ordering */
	up_write(&sb->s_umount);
	sb_wait_write(sb, SB_FREEZE_WRITE);
1714
	down_write(&sb->s_umount);
1715 1716 1717 1718 1719 1720

	/* Now we go and block page faults... */
	sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
	sb_wait_write(sb, SB_FREEZE_PAGEFAULT);

	/* All writers are done so after syncing there won't be dirty data */
1721 1722 1723 1724 1725 1726 1727 1728
	ret = sync_filesystem(sb);
	if (ret) {
		sb->s_writers.frozen = SB_UNFROZEN;
		sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT);
		wake_up(&sb->s_writers.wait_unfrozen);
		deactivate_locked_super(sb);
		return ret;
	}
1729

1730 1731 1732
	/* Now wait for internal filesystem counter */
	sb->s_writers.frozen = SB_FREEZE_FS;
	sb_wait_write(sb, SB_FREEZE_FS);
1733 1734 1735 1736 1737 1738

	if (sb->s_op->freeze_fs) {
		ret = sb->s_op->freeze_fs(sb);
		if (ret) {
			printk(KERN_ERR
				"VFS:Filesystem freeze failed\n");
1739
			sb->s_writers.frozen = SB_UNFROZEN;
1740
			sb_freeze_unlock(sb, SB_FREEZE_FS);
1741
			wake_up(&sb->s_writers.wait_unfrozen);
1742 1743 1744 1745
			deactivate_locked_super(sb);
			return ret;
		}
	}
1746
	/*
1747 1748
	 * For debugging purposes so that fs can warn if it sees write activity
	 * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super().
1749 1750
	 */
	sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1751
	lockdep_sb_freeze_release(sb);
1752 1753 1754 1755 1756
	up_write(&sb->s_umount);
	return 0;
}
EXPORT_SYMBOL(freeze_super);

1757
static int thaw_super_locked(struct super_block *sb)
1758 1759 1760
{
	int error;

1761
	if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
1762 1763 1764 1765
		up_write(&sb->s_umount);
		return -EINVAL;
	}

1766
	if (sb_rdonly(sb)) {
1767
		sb->s_writers.frozen = SB_UNFROZEN;
1768
		goto out;
1769
	}
1770

1771 1772
	lockdep_sb_freeze_acquire(sb);

1773 1774 1775 1776 1777
	if (sb->s_op->unfreeze_fs) {
		error = sb->s_op->unfreeze_fs(sb);
		if (error) {
			printk(KERN_ERR
				"VFS:Filesystem thaw failed\n");
1778
			lockdep_sb_freeze_release(sb);
1779 1780 1781 1782 1783
			up_write(&sb->s_umount);
			return error;
		}
	}

1784
	sb->s_writers.frozen = SB_UNFROZEN;
1785
	sb_freeze_unlock(sb, SB_FREEZE_FS);
1786
out:
1787
	wake_up(&sb->s_writers.wait_unfrozen);
1788 1789 1790
	deactivate_locked_super(sb);
	return 0;
}
1791

1792 1793 1794 1795 1796 1797
/**
 * thaw_super -- unlock filesystem
 * @sb: the super to thaw
 *
 * Unlocks the filesystem and marks it writeable again after freeze_super().
 */
1798 1799 1800 1801 1802
int thaw_super(struct super_block *sb)
{
	down_write(&sb->s_umount);
	return thaw_super_locked(sb);
}
1803
EXPORT_SYMBOL(thaw_super);