super.c 37.2 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
David Teigland's avatar
David Teigland committed
2 3
/*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
Bob Peterson's avatar
Bob Peterson committed
4
 * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
David Teigland's avatar
David Teigland committed
5 6
 */

7 8
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

9
#include <linux/bio.h>
10
#include <linux/sched/signal.h>
David Teigland's avatar
David Teigland committed
11 12 13 14
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
15 16 17 18 19
#include <linux/statfs.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
#include <linux/kthread.h>
#include <linux/delay.h>
20
#include <linux/gfs2_ondisk.h>
21 22
#include <linux/crc32.h>
#include <linux/time.h>
23
#include <linux/wait.h>
24
#include <linux/writeback.h>
25
#include <linux/backing-dev.h>
26
#include <linux/kernel.h>
David Teigland's avatar
David Teigland committed
27 28

#include "gfs2.h"
29
#include "incore.h"
David Teigland's avatar
David Teigland committed
30 31 32 33 34 35 36 37 38 39 40 41
#include "bmap.h"
#include "dir.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "log.h"
#include "meta_io.h"
#include "quota.h"
#include "recovery.h"
#include "rgrp.h"
#include "super.h"
#include "trans.h"
42
#include "util.h"
43
#include "sys.h"
44
#include "xattr.h"
45
#include "lops.h"
46

47 48 49 50 51 52
enum dinode_demise {
	SHOULD_DELETE_DINODE,
	SHOULD_NOT_DELETE_DINODE,
	SHOULD_DEFER_EVICTION,
};

53 54 55 56 57 58 59 60
/**
 * gfs2_jindex_free - Clear all the journal index information
 * @sdp: The GFS2 superblock
 *
 */

void gfs2_jindex_free(struct gfs2_sbd *sdp)
{
61
	struct list_head list;
62 63 64 65 66 67 68 69
	struct gfs2_jdesc *jd;

	spin_lock(&sdp->sd_jindex_spin);
	list_add(&list, &sdp->sd_jindex_list);
	list_del_init(&sdp->sd_jindex_list);
	sdp->sd_journals = 0;
	spin_unlock(&sdp->sd_jindex_spin);

70
	sdp->sd_jdesc = NULL;
71
	while (!list_empty(&list)) {
72
		jd = list_first_entry(&list, struct gfs2_jdesc, jd_list);
73
		gfs2_free_journal_extents(jd);
74 75
		list_del(&jd->jd_list);
		iput(jd->jd_inode);
76
		jd->jd_inode = NULL;
77 78 79 80
		kfree(jd);
	}
}

David Teigland's avatar
David Teigland committed
81 82 83 84 85
static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
{
	struct gfs2_jdesc *jd;

	list_for_each_entry(jd, head, jd_list) {
86 87
		if (jd->jd_jid == jid)
			return jd;
David Teigland's avatar
David Teigland committed
88
	}
89
	return NULL;
David Teigland's avatar
David Teigland committed
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
}

struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
{
	struct gfs2_jdesc *jd;

	spin_lock(&sdp->sd_jindex_spin);
	jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
	spin_unlock(&sdp->sd_jindex_spin);

	return jd;
}

int gfs2_jdesc_check(struct gfs2_jdesc *jd)
{
105 106
	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
Steven Whitehouse's avatar
Steven Whitehouse committed
107
	u64 size = i_size_read(jd->jd_inode);
David Teigland's avatar
David Teigland committed
108

Fabian Frederick's avatar
Fabian Frederick committed
109
	if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, BIT(30)))
David Teigland's avatar
David Teigland committed
110 111
		return -EIO;

Steven Whitehouse's avatar
Steven Whitehouse committed
112 113 114
	jd->jd_blocks = size >> sdp->sd_sb.sb_bsize_shift;

	if (gfs2_write_alloc_required(ip, 0, size)) {
David Teigland's avatar
David Teigland committed
115
		gfs2_consist_inode(ip);
116
		return -EIO;
David Teigland's avatar
David Teigland committed
117 118
	}

119
	return 0;
David Teigland's avatar
David Teigland committed
120 121 122 123 124 125 126 127 128 129 130
}

/**
 * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
 * @sdp: the filesystem
 *
 * Returns: errno
 */

int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
{
131
	struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
132
	struct gfs2_glock *j_gl = ip->i_gl;
133
	struct gfs2_log_header_host head;
David Teigland's avatar
David Teigland committed
134 135
	int error;

136
	j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
137 138
	if (gfs2_withdrawn(sdp))
		return -EIO;
David Teigland's avatar
David Teigland committed
139

140
	error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
141
	if (error || gfs2_withdrawn(sdp))
142
		return error;
David Teigland's avatar
David Teigland committed
143 144 145

	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
		gfs2_consist(sdp);
146
		return -EIO;
David Teigland's avatar
David Teigland committed
147 148 149 150 151 152 153
	}

	/*  Initialize some head of the log stuff  */
	sdp->sd_log_sequence = head.lh_sequence + 1;
	gfs2_log_pointers_init(sdp, head.lh_blkno);

	error = gfs2_quota_init(sdp);
154 155
	if (!error && !gfs2_withdrawn(sdp))
		set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
David Teigland's avatar
David Teigland committed
156 157 158
	return error;
}

159
void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
160 161 162 163 164 165 166 167
{
	const struct gfs2_statfs_change *str = buf;

	sc->sc_total = be64_to_cpu(str->sc_total);
	sc->sc_free = be64_to_cpu(str->sc_free);
	sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
}

168
void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
169 170 171 172 173 174 175 176
{
	struct gfs2_statfs_change *str = buf;

	str->sc_total = cpu_to_be64(sc->sc_total);
	str->sc_free = cpu_to_be64(sc->sc_free);
	str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
}

David Teigland's avatar
David Teigland committed
177 178
int gfs2_statfs_init(struct gfs2_sbd *sdp)
{
179
	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
180 181
	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
182
	struct buffer_head *m_bh;
David Teigland's avatar
David Teigland committed
183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
	struct gfs2_holder gh;
	int error;

	error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
				   &gh);
	if (error)
		return error;

	error = gfs2_meta_inode_buffer(m_ip, &m_bh);
	if (error)
		goto out;

	if (sdp->sd_args.ar_spectator) {
		spin_lock(&sdp->sd_statfs_spin);
		gfs2_statfs_change_in(m_sc, m_bh->b_data +
				      sizeof(struct gfs2_dinode));
		spin_unlock(&sdp->sd_statfs_spin);
	} else {
		spin_lock(&sdp->sd_statfs_spin);
		gfs2_statfs_change_in(m_sc, m_bh->b_data +
				      sizeof(struct gfs2_dinode));
204
		gfs2_statfs_change_in(l_sc, sdp->sd_sc_bh->b_data +
David Teigland's avatar
David Teigland committed
205 206 207 208 209 210
				      sizeof(struct gfs2_dinode));
		spin_unlock(&sdp->sd_statfs_spin);

	}

	brelse(m_bh);
211
out:
David Teigland's avatar
David Teigland committed
212 213 214 215
	gfs2_glock_dq_uninit(&gh);
	return 0;
}

216 217
void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
			s64 dinodes)
David Teigland's avatar
David Teigland committed
218
{
219
	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
220
	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
221
	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
222 223
	s64 x, y;
	int need_sync = 0;
David Teigland's avatar
David Teigland committed
224

225
	gfs2_trans_add_meta(l_ip->i_gl, sdp->sd_sc_bh);
David Teigland's avatar
David Teigland committed
226 227 228 229 230

	spin_lock(&sdp->sd_statfs_spin);
	l_sc->sc_total += total;
	l_sc->sc_free += free;
	l_sc->sc_dinodes += dinodes;
231 232
	gfs2_statfs_change_out(l_sc, sdp->sd_sc_bh->b_data +
			       sizeof(struct gfs2_dinode));
233 234 235 236 237 238
	if (sdp->sd_args.ar_statfs_percent) {
		x = 100 * l_sc->sc_free;
		y = m_sc->sc_free * sdp->sd_args.ar_statfs_percent;
		if (x >= y || x <= -y)
			need_sync = 1;
	}
David Teigland's avatar
David Teigland committed
239 240
	spin_unlock(&sdp->sd_statfs_spin);

241
	if (need_sync)
242
		gfs2_wake_up_statfs(sdp);
David Teigland's avatar
David Teigland committed
243 244
}

245
void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh)
246 247 248 249 250 251
{
	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;

252
	gfs2_trans_add_meta(l_ip->i_gl, sdp->sd_sc_bh);
253
	gfs2_trans_add_meta(m_ip->i_gl, m_bh);
254 255 256 257 258 259

	spin_lock(&sdp->sd_statfs_spin);
	m_sc->sc_total += l_sc->sc_total;
	m_sc->sc_free += l_sc->sc_free;
	m_sc->sc_dinodes += l_sc->sc_dinodes;
	memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
260
	memset(sdp->sd_sc_bh->b_data + sizeof(struct gfs2_dinode),
261 262
	       0, sizeof(struct gfs2_statfs_change));
	gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
263
	spin_unlock(&sdp->sd_statfs_spin);
264 265
}

266
int gfs2_statfs_sync(struct super_block *sb, int type)
David Teigland's avatar
David Teigland committed
267
{
268
	struct gfs2_sbd *sdp = sb->s_fs_info;
269
	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
270 271
	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
David Teigland's avatar
David Teigland committed
272
	struct gfs2_holder gh;
273
	struct buffer_head *m_bh;
David Teigland's avatar
David Teigland committed
274 275 276 277 278
	int error;

	error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
				   &gh);
	if (error)
279
		goto out;
David Teigland's avatar
David Teigland committed
280 281 282

	error = gfs2_meta_inode_buffer(m_ip, &m_bh);
	if (error)
283
		goto out_unlock;
David Teigland's avatar
David Teigland committed
284 285 286

	spin_lock(&sdp->sd_statfs_spin);
	gfs2_statfs_change_in(m_sc, m_bh->b_data +
287
			      sizeof(struct gfs2_dinode));
David Teigland's avatar
David Teigland committed
288 289 290 291 292 293 294 295
	if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) {
		spin_unlock(&sdp->sd_statfs_spin);
		goto out_bh;
	}
	spin_unlock(&sdp->sd_statfs_spin);

	error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
	if (error)
296
		goto out_bh;
David Teigland's avatar
David Teigland committed
297

298
	update_statfs(sdp, m_bh);
299
	sdp->sd_statfs_force_sync = 0;
David Teigland's avatar
David Teigland committed
300 301 302

	gfs2_trans_end(sdp);

303
out_bh:
David Teigland's avatar
David Teigland committed
304
	brelse(m_bh);
305
out_unlock:
David Teigland's avatar
David Teigland committed
306
	gfs2_glock_dq_uninit(&gh);
307
out:
David Teigland's avatar
David Teigland committed
308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
	return error;
}

struct lfcc {
	struct list_head list;
	struct gfs2_holder gh;
};

/**
 * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all
 *                            journals are clean
 * @sdp: the file system
 *
 * Returns: errno
 */

324
static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
David Teigland's avatar
David Teigland committed
325
{
326
	struct gfs2_inode *ip;
David Teigland's avatar
David Teigland committed
327 328 329
	struct gfs2_jdesc *jd;
	struct lfcc *lfcc;
	LIST_HEAD(list);
330
	struct gfs2_log_header_host lh;
David Teigland's avatar
David Teigland committed
331 332 333 334 335 336 337 338
	int error;

	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
		lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
		if (!lfcc) {
			error = -ENOMEM;
			goto out;
		}
339 340
		ip = GFS2_I(jd->jd_inode);
		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &lfcc->gh);
David Teigland's avatar
David Teigland committed
341 342 343 344 345 346 347
		if (error) {
			kfree(lfcc);
			goto out;
		}
		list_add(&lfcc->list, &list);
	}

348
	error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE,
349
				   LM_FLAG_NOEXP, &sdp->sd_freeze_gh);
350 351
	if (error)
		goto out;
David Teigland's avatar
David Teigland committed
352 353 354 355 356

	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
		error = gfs2_jdesc_check(jd);
		if (error)
			break;
357
		error = gfs2_find_jhead(jd, &lh, false);
David Teigland's avatar
David Teigland committed
358 359 360 361 362 363 364 365 366
		if (error)
			break;
		if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
			error = -EBUSY;
			break;
		}
	}

	if (error)
367
		gfs2_freeze_unlock(&sdp->sd_freeze_gh);
David Teigland's avatar
David Teigland committed
368

369
out:
David Teigland's avatar
David Teigland committed
370
	while (!list_empty(&list)) {
371
		lfcc = list_first_entry(&list, struct lfcc, list);
David Teigland's avatar
David Teigland committed
372 373 374 375 376 377 378
		list_del(&lfcc->list);
		gfs2_glock_dq_uninit(&lfcc->gh);
		kfree(lfcc);
	}
	return error;
}

379 380 381 382 383 384 385 386 387 388
void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
{
	struct gfs2_dinode *str = buf;

	str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
	str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
	str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
	str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
	str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
389 390
	str->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode));
	str->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode));
391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
	str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
	str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
	str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
	str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
	str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
	str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);

	str->di_goal_meta = cpu_to_be64(ip->i_goal);
	str->di_goal_data = cpu_to_be64(ip->i_goal);
	str->di_generation = cpu_to_be64(ip->i_generation);

	str->di_flags = cpu_to_be32(ip->i_diskflags);
	str->di_height = cpu_to_be16(ip->i_height);
	str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
					     !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
					     GFS2_FORMAT_DE : 0);
	str->di_depth = cpu_to_be16(ip->i_depth);
	str->di_entries = cpu_to_be32(ip->i_entries);

	str->di_eattr = cpu_to_be64(ip->i_eattr);
	str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
	str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
	str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
}
415 416 417 418

/**
 * gfs2_write_inode - Make sure the inode is stable on the disk
 * @inode: The inode
419
 * @wbc: The writeback control structure
420 421 422 423
 *
 * Returns: errno
 */

424
static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
425 426 427
{
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_sbd *sdp = GFS2_SB(inode);
428
	struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
429
	struct backing_dev_info *bdi = inode_to_bdi(metamapping->host);
430
	int ret = 0;
431
	bool flush_all = (wbc->sync_mode == WB_SYNC_ALL || gfs2_is_jdata(ip));
432

433
	if (flush_all)
434
		gfs2_log_flush(GFS2_SB(inode), ip->i_gl,
435 436
			       GFS2_LOG_HEAD_FLUSH_NORMAL |
			       GFS2_LFC_WRITE_INODE);
437
	if (bdi->wb.dirty_exceeded)
438
		gfs2_ail1_flush(sdp, wbc);
439 440
	else
		filemap_fdatawrite(metamapping);
441
	if (flush_all)
442 443 444
		ret = filemap_fdatawait(metamapping);
	if (ret)
		mark_inode_dirty_sync(inode);
445 446 447 448 449 450
	else {
		spin_lock(&inode->i_lock);
		if (!(inode->i_flags & I_DIRTY))
			gfs2_ordered_del_inode(ip);
		spin_unlock(&inode->i_lock);
	}
451 452 453
	return ret;
}

454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476
/**
 * gfs2_dirty_inode - check for atime updates
 * @inode: The inode in question
 * @flags: The type of dirty
 *
 * Unfortunately it can be called under any combination of inode
 * glock and transaction lock, so we have to check carefully.
 *
 * At the moment this deals only with atime - it should be possible
 * to expand that role in future, once a review of the locking has
 * been carried out.
 */

static void gfs2_dirty_inode(struct inode *inode, int flags)
{
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_sbd *sdp = GFS2_SB(inode);
	struct buffer_head *bh;
	struct gfs2_holder gh;
	int need_unlock = 0;
	int need_endtrans = 0;
	int ret;

477
	if (unlikely(gfs2_withdrawn(sdp)))
478
		return;
479 480 481 482
	if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
		if (ret) {
			fs_err(sdp, "dirty_inode: glock %d\n", ret);
483
			gfs2_dump_glock(NULL, ip->i_gl, true);
484 485 486
			return;
		}
		need_unlock = 1;
487 488
	} else if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE))
		return;
489 490 491 492 493 494 495 496 497 498 499 500

	if (current->journal_info == NULL) {
		ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
		if (ret) {
			fs_err(sdp, "dirty_inode: gfs2_trans_begin %d\n", ret);
			goto out;
		}
		need_endtrans = 1;
	}

	ret = gfs2_meta_inode_buffer(ip, &bh);
	if (ret == 0) {
501
		gfs2_trans_add_meta(ip->i_gl, bh);
502 503 504 505 506 507 508 509 510 511 512
		gfs2_dinode_out(ip, bh->b_data);
		brelse(bh);
	}

	if (need_endtrans)
		gfs2_trans_end(sdp);
out:
	if (need_unlock)
		gfs2_glock_dq_uninit(&gh);
}

513 514 515 516 517 518 519
/**
 * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
 * @sdp: the filesystem
 *
 * Returns: errno
 */

520
void gfs2_make_fs_ro(struct gfs2_sbd *sdp)
521
{
522 523
	int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);

524
	gfs2_flush_delete_work(sdp);
525 526 527
	if (!log_write_allowed && current == sdp->sd_quotad_process)
		fs_warn(sdp, "The quotad daemon is withdrawing.\n");
	else if (sdp->sd_quotad_process)
528 529
		kthread_stop(sdp->sd_quotad_process);
	sdp->sd_quotad_process = NULL;
530 531 532 533

	if (!log_write_allowed && current == sdp->sd_logd_process)
		fs_warn(sdp, "The logd daemon is withdrawing.\n");
	else if (sdp->sd_logd_process)
534 535
		kthread_stop(sdp->sd_logd_process);
	sdp->sd_logd_process = NULL;
536

537 538 539
	if (log_write_allowed) {
		gfs2_quota_sync(sdp->sd_vfs, 0);
		gfs2_statfs_sync(sdp->sd_vfs, 0);
540

541 542
		gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
			       GFS2_LFC_MAKE_FS_RO);
543 544 545 546
		wait_event_timeout(sdp->sd_log_waitq,
				   gfs2_log_is_empty(sdp),
				   HZ * 5);
		gfs2_assert_warn(sdp, gfs2_log_is_empty(sdp));
547
	} else {
548 549
		wait_event_timeout(sdp->sd_log_waitq,
				   gfs2_log_is_empty(sdp),
550 551
				   HZ * 5);
	}
552 553
	gfs2_quota_cleanup(sdp);

554 555
	if (!log_write_allowed)
		sdp->sd_vfs->s_flags |= SB_RDONLY;
556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580
}

/**
 * gfs2_put_super - Unmount the filesystem
 * @sb: The VFS superblock
 *
 */

static void gfs2_put_super(struct super_block *sb)
{
	struct gfs2_sbd *sdp = sb->s_fs_info;
	struct gfs2_jdesc *jd;

	/* No more recovery requests */
	set_bit(SDF_NORECOVERY, &sdp->sd_flags);
	smp_mb();

	/* Wait on outstanding recovery */
restart:
	spin_lock(&sdp->sd_jindex_spin);
	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
		if (!test_bit(JDF_RECOVERY, &jd->jd_flags))
			continue;
		spin_unlock(&sdp->sd_jindex_spin);
		wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
581
			    TASK_UNINTERRUPTIBLE);
582 583 584 585
		goto restart;
	}
	spin_unlock(&sdp->sd_jindex_spin);

586
	if (!sb_rdonly(sb)) {
587
		gfs2_make_fs_ro(sdp);
588
	}
589 590
	WARN_ON(gfs2_withdrawing(sdp));

591 592 593 594 595 596 597 598 599 600
	/*  At this point, we're through modifying the disk  */

	/*  Release stuff  */

	iput(sdp->sd_jindex);
	iput(sdp->sd_statfs_inode);
	iput(sdp->sd_rindex);
	iput(sdp->sd_quota_inode);

	gfs2_glock_put(sdp->sd_rename_gl);
601
	gfs2_glock_put(sdp->sd_freeze_gl);
602 603

	if (!sdp->sd_args.ar_spectator) {
604 605 606 607
		if (gfs2_holder_initialized(&sdp->sd_journal_gh))
			gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
		if (gfs2_holder_initialized(&sdp->sd_jinode_gh))
			gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
608
		brelse(sdp->sd_sc_bh);
609 610
		gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
		gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
611
		free_local_statfs_inodes(sdp);
612 613 614 615 616 617 618 619
		iput(sdp->sd_qc_inode);
	}

	gfs2_glock_dq_uninit(&sdp->sd_live_gh);
	gfs2_clear_rgrpd(sdp);
	gfs2_jindex_free(sdp);
	/*  Take apart glock structures and buffer lists  */
	gfs2_gl_hash_clear(sdp);
620
	truncate_inode_pages_final(&sdp->sd_aspace);
621
	gfs2_delete_debugfs_file(sdp);
622 623 624 625 626
	/*  Unmount the locking protocol  */
	gfs2_lm_unmount(sdp);

	/*  At this point, we're through participating in the lockspace  */
	gfs2_sys_fs_del(sdp);
627
	free_sbd(sdp);
628 629 630 631 632
}

/**
 * gfs2_sync_fs - sync the filesystem
 * @sb: the superblock
633
 * @wait: true to wait for completion
634 635 636 637 638 639
 *
 * Flushes the log to disk.
 */

static int gfs2_sync_fs(struct super_block *sb, int wait)
{
640
	struct gfs2_sbd *sdp = sb->s_fs_info;
641 642

	gfs2_quota_sync(sb, -1);
643
	if (wait)
644 645
		gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
			       GFS2_LFC_SYNC_FS);
646
	return sdp->sd_log_error;
647 648
}

649 650 651 652 653 654 655 656
void gfs2_freeze_func(struct work_struct *work)
{
	int error;
	struct gfs2_holder freeze_gh;
	struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work);
	struct super_block *sb = sdp->sd_vfs;

	atomic_inc(&sb->s_active);
657
	error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
658 659
	if (error) {
		gfs2_assert_withdraw(sdp, 0);
660
	} else {
661 662 663
		atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
		error = thaw_super(sb);
		if (error) {
664 665
			fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n",
				error);
666 667
			gfs2_assert_withdraw(sdp, 0);
		}
668
		gfs2_freeze_unlock(&freeze_gh);
669 670
	}
	deactivate_super(sb);
671 672
	clear_bit_unlock(SDF_FS_FROZEN, &sdp->sd_flags);
	wake_up_bit(&sdp->sd_flags, SDF_FS_FROZEN);
673 674 675
	return;
}

676 677 678 679 680 681 682 683 684
/**
 * gfs2_freeze - prevent further writes to the filesystem
 * @sb: the VFS structure for the filesystem
 *
 */

static int gfs2_freeze(struct super_block *sb)
{
	struct gfs2_sbd *sdp = sb->s_fs_info;
685
	int error;
686

687
	mutex_lock(&sdp->sd_freeze_mutex);
688 689
	if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) {
		error = -EBUSY;
690
		goto out;
691
	}
692

693
	for (;;) {
694 695 696 697 698
		if (gfs2_withdrawn(sdp)) {
			error = -EINVAL;
			goto out;
		}

699
		error = gfs2_lock_fs_check_clean(sdp);
700 701 702
		if (!error)
			break;

703
		if (error == -EBUSY)
704
			fs_err(sdp, "waiting for recovery before freeze\n");
705 706 707 708 709
		else if (error == -EIO) {
			fs_err(sdp, "Fatal IO error: cannot freeze gfs2 due "
			       "to recovery error.\n");
			goto out;
		} else {
710
			fs_err(sdp, "error freezing FS: %d\n", error);
711
		}
712 713 714
		fs_err(sdp, "retrying...\n");
		msleep(1000);
	}
715
	set_bit(SDF_FS_FROZEN, &sdp->sd_flags);
716 717 718
out:
	mutex_unlock(&sdp->sd_freeze_mutex);
	return error;
719 720 721 722 723 724 725 726 727 728
}

/**
 * gfs2_unfreeze - reallow writes to the filesystem
 * @sb: the VFS structure for the filesystem
 *
 */

static int gfs2_unfreeze(struct super_block *sb)
{
729 730
	struct gfs2_sbd *sdp = sb->s_fs_info;

731
	mutex_lock(&sdp->sd_freeze_mutex);
732
	if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
733
	    !gfs2_holder_initialized(&sdp->sd_freeze_gh)) {
734
		mutex_unlock(&sdp->sd_freeze_mutex);
735
		return -EINVAL;
736 737
	}

738
	gfs2_freeze_unlock(&sdp->sd_freeze_gh);
739
	mutex_unlock(&sdp->sd_freeze_mutex);
740
	return wait_on_bit(&sdp->sd_flags, SDF_FS_FROZEN, TASK_INTERRUPTIBLE);
741 742 743
}

/**
744
 * statfs_slow_fill - fill in the sg for a given RG
745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783
 * @rgd: the RG
 * @sc: the sc structure
 *
 * Returns: 0 on success, -ESTALE if the LVB is invalid
 */

static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
			    struct gfs2_statfs_change_host *sc)
{
	gfs2_rgrp_verify(rgd);
	sc->sc_total += rgd->rd_data;
	sc->sc_free += rgd->rd_free;
	sc->sc_dinodes += rgd->rd_dinodes;
	return 0;
}

/**
 * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
 * @sdp: the filesystem
 * @sc: the sc info that will be returned
 *
 * Any error (other than a signal) will cause this routine to fall back
 * to the synchronous version.
 *
 * FIXME: This really shouldn't busy wait like this.
 *
 * Returns: errno
 */

static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
{
	struct gfs2_rgrpd *rgd_next;
	struct gfs2_holder *gha, *gh;
	unsigned int slots = 64;
	unsigned int x;
	int done;
	int error = 0, err;

	memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
784
	gha = kmalloc_array(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
785 786
	if (!gha)
		return -ENOMEM;
787 788
	for (x = 0; x < slots; x++)
		gfs2_holder_mark_uninitialized(gha + x);
789 790 791 792 793 794 795 796 797

	rgd_next = gfs2_rgrpd_get_first(sdp);

	for (;;) {
		done = 1;

		for (x = 0; x < slots; x++) {
			gh = gha + x;

798
			if (gfs2_holder_initialized(gh) && gfs2_glock_poll(gh)) {
799 800 801 802 803
				err = gfs2_glock_wait(gh);
				if (err) {
					gfs2_holder_uninit(gh);
					error = err;
				} else {
804 805 806 807 808 809
					if (!error) {
						struct gfs2_rgrpd *rgd =
							gfs2_glock2rgrp(gh->gh_gl);

						error = statfs_slow_fill(rgd, sc);
					}
810 811 812 813
					gfs2_glock_dq_uninit(gh);
				}
			}

814
			if (gfs2_holder_initialized(gh))
815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841
				done = 0;
			else if (rgd_next && !error) {
				error = gfs2_glock_nq_init(rgd_next->rd_gl,
							   LM_ST_SHARED,
							   GL_ASYNC,
							   gh);
				rgd_next = gfs2_rgrpd_get_next(rgd_next);
				done = 0;
			}

			if (signal_pending(current))
				error = -ERESTARTSYS;
		}

		if (done)
			break;

		yield();
	}

	kfree(gha);
	return error;
}

/**
 * gfs2_statfs_i - Do a statfs
 * @sdp: the filesystem
842
 * @sc: the sc structure
843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872
 *
 * Returns: errno
 */

static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
{
	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;

	spin_lock(&sdp->sd_statfs_spin);

	*sc = *m_sc;
	sc->sc_total += l_sc->sc_total;
	sc->sc_free += l_sc->sc_free;
	sc->sc_dinodes += l_sc->sc_dinodes;

	spin_unlock(&sdp->sd_statfs_spin);

	if (sc->sc_free < 0)
		sc->sc_free = 0;
	if (sc->sc_free > sc->sc_total)
		sc->sc_free = sc->sc_total;
	if (sc->sc_dinodes < 0)
		sc->sc_dinodes = 0;

	return 0;
}

/**
 * gfs2_statfs - Gather and return stats about the filesystem
873 874
 * @dentry: The name of the link
 * @buf: The buffer
875 876 877 878 879 880
 *
 * Returns: 0 on success or error code
 */

static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
{
881
	struct super_block *sb = dentry->d_sb;
882 883 884 885
	struct gfs2_sbd *sdp = sb->s_fs_info;
	struct gfs2_statfs_change_host sc;
	int error;

886 887 888 889
	error = gfs2_rindex_update(sdp);
	if (error)
		return error;

890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913
	if (gfs2_tune_get(sdp, gt_statfs_slow))
		error = gfs2_statfs_slow(sdp, &sc);
	else
		error = gfs2_statfs_i(sdp, &sc);

	if (error)
		return error;

	buf->f_type = GFS2_MAGIC;
	buf->f_bsize = sdp->sd_sb.sb_bsize;
	buf->f_blocks = sc.sc_total;
	buf->f_bfree = sc.sc_free;
	buf->f_bavail = sc.sc_free;
	buf->f_files = sc.sc_dinodes + sc.sc_free;
	buf->f_ffree = sc.sc_free;
	buf->f_namelen = GFS2_FNAMESIZE;

	return 0;
}

/**
 * gfs2_drop_inode - Drop an inode (test for remote unlink)
 * @inode: The inode to drop
 *
914
 * If we've received a callback on an iopen lock then it's because a
915 916 917 918 919 920 921 922 923 924
 * remote node tried to deallocate the inode but failed due to this node
 * still having the inode open. Here we mark the link count zero
 * since we know that it must have reached zero if the GLF_DEMOTE flag
 * is set on the iopen glock. If we didn't do a disk read since the
 * remote node removed the final link then we might otherwise miss
 * this event. This check ensures that this node will deallocate the
 * inode's blocks, or alternatively pass the baton on to another
 * node for later deallocation.
 */

925
static int gfs2_drop_inode(struct inode *inode)
926 927 928
{
	struct gfs2_inode *ip = GFS2_I(inode);

929 930 931
	if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) &&
	    inode->i_nlink &&
	    gfs2_holder_initialized(&ip->i_iopen_gh)) {
932
		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
933
		if (test_bit(GLF_DEMOTE, &gl->gl_flags))
934 935
			clear_nlink(inode);
	}
936 937 938 939 940 941 942 943 944 945 946 947

	/*
	 * When under memory pressure when an inode's link count has dropped to
	 * zero, defer deleting the inode to the delete workqueue.  This avoids
	 * calling into DLM under memory pressure, which can deadlock.
	 */
	if (!inode->i_nlink &&
	    unlikely(current->flags & PF_MEMALLOC) &&
	    gfs2_holder_initialized(&ip->i_iopen_gh)) {
		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;

		gfs2_glock_hold(gl);
948
		if (!gfs2_queue_delete_work(gl, 0))
949
			gfs2_glock_queue_put(gl);
950
		return 0;
951 952
	}

953
	return generic_drop_inode(inode);
954 955 956 957 958 959 960 961 962 963 964 965 966 967 968
}

static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
{
	do {
		if (d1 == d2)
			return 1;
		d1 = d1->d_parent;
	} while (!IS_ROOT(d1));
	return 0;
}

/**
 * gfs2_show_options - Show mount options for /proc/mounts
 * @s: seq_file structure
969
 * @root: root of this (sub)tree
970 971 972 973
 *
 * Returns: 0 on success or error code
 */

974
static int gfs2_show_options(struct seq_file *s, struct dentry *root)
975
{
976
	struct gfs2_sbd *sdp = root->d_sb->s_fs_info;
977
	struct gfs2_args *args = &sdp->sd_args;
978
	int val;
979

980
	if (is_ancestor(root, sdp->sd_master_dir))
981
		seq_puts(s, ",meta");
982
	if (args->ar_lockproto[0])
983
		seq_show_option(s, "lockproto", args->ar_lockproto);
984
	if (args->ar_locktable[0])
985
		seq_show_option(s, "locktable", args->ar_locktable);
986
	if (args->ar_hostdata[0])
987
		seq_show_option(s, "hostdata", args->ar_hostdata);
988
	if (args->ar_spectator)
989
		seq_puts(s, ",spectator");
990
	if (args->ar_localflocks)
991
		seq_puts(s, ",localflocks");
992
	if (args->ar_debug)
993
		seq_puts(s, ",debug");
994
	if (args->ar_posix_acl)
995
		seq_puts(s, ",acl");
996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
	if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
		char *state;
		switch (args->ar_quota) {
		case GFS2_QUOTA_OFF:
			state = "off";
			break;
		case GFS2_QUOTA_ACCOUNT:
			state = "account";
			break;
		case GFS2_QUOTA_ON:
			state = "on";
			break;
		default:
			state = "unknown";
			break;
		}
		seq_printf(s, ",quota=%s", state);
	}
	if (args->ar_suiddir)
1015
		seq_puts(s, ",suiddir");
1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031
	if (args->ar_data != GFS2_DATA_DEFAULT) {
		char *state;
		switch (args->ar_data) {
		case GFS2_DATA_WRITEBACK:
			state = "writeback";
			break;
		case GFS2_DATA_ORDERED:
			state = "ordered";
			break;
		default:
			state = "unknown";
			break;
		}
		seq_printf(s, ",data=%s", state);
	}
	if (args->ar_discard)
1032
		seq_puts(s, ",discard");
1033 1034
	val = sdp->sd_tune.gt_logd_secs;
	if (val != 30)
1035 1036 1037 1038
		seq_printf(s, ",commit=%d", val);
	val = sdp->sd_tune.gt_statfs_quantum;
	if (val != 30)
		seq_printf(s, ",statfs_quantum=%d", val);
1039 1040
	else if (sdp->sd_tune.gt_statfs_slow)
		seq_puts(s, ",statfs_quantum=0");
1041 1042 1043 1044 1045
	val = sdp->sd_tune.gt_quota_quantum;
	if (val != 60)
		seq_printf(s, ",quota_quantum=%d", val);
	if (args->ar_statfs_percent)
		seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent);
1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061
	if (args->ar_errors != GFS2_ERRORS_DEFAULT) {
		const char *state;

		switch (args->ar_errors) {
		case GFS2_ERRORS_WITHDRAW:
			state = "withdraw";
			break;
		case GFS2_ERRORS_PANIC:
			state = "panic";
			break;
		default:
			state = "unknown";
			break;
		}
		seq_printf(s, ",errors=%s", state);
	}
1062
	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
1063
		seq_puts(s, ",nobarrier");
1064
	if (test_bit(SDF_DEMOTE, &sdp->sd_flags))
1065
		seq_puts(s, ",demote_interface_used");
1066
	if (args->ar_rgrplvb)
1067
		seq_puts(s, ",rgrplvb");
1068 1069
	if (args->ar_loccookie)
		seq_puts(s, ",loccookie");
1070 1071 1072
	return 0;
}

1073 1074 1075 1076 1077 1078 1079 1080
static void gfs2_final_release_pages(struct gfs2_inode *ip)
{
	struct inode *inode = &ip->i_inode;
	struct gfs2_glock *gl = ip->i_gl;

	truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0);
	truncate_inode_pages(&inode->i_data, 0);

1081
	if (atomic_read(&gl->gl_revokes) == 0) {
1082 1083 1084 1085 1086 1087 1088 1089 1090
		clear_bit(GLF_LFLUSH, &gl->gl_flags);
		clear_bit(GLF_DIRTY, &gl->gl_flags);
	}
}

static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
{
	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
	struct gfs2_rgrpd *rgd;
1091
	struct gfs2_holder gh;
1092 1093 1094
	int error;

	if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
1095
		gfs2_consist_inode(ip);
1096 1097 1098
		return -EIO;
	}

1099 1100 1101
	error = gfs2_rindex_update(sdp);
	if (error)
		return error;
1102

1103
	error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1104
	if (error)
1105
		return error;
1106

1107
	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
1108 1109 1110
	if (!rgd) {
		gfs2_consist_inode(ip);
		error = -EIO;
1111
		goto out_qs;
1112 1113
	}

1114 1115
	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
				   LM_FLAG_NODE_SCOPE, &gh);
1116
	if (error)
1117
		goto out_qs;
1118

1119 1120
	error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
				 sdp->sd_jdesc->jd_blocks);
1121 1122 1123 1124 1125 1126 1127 1128 1129 1130
	if (error)
		goto out_rg_gunlock;

	gfs2_free_di(rgd, ip);

	gfs2_final_release_pages(ip);

	gfs2_trans_end(sdp);

out_rg_gunlock:
1131
	gfs2_glock_dq_uninit(&gh);
1132 1133 1134 1135 1136
out_qs:
	gfs2_quota_unhold(ip);
	return error;
}

1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152
/**
 * gfs2_glock_put_eventually
 * @gl:	The glock to put
 *
 * When under memory pressure, trigger a deferred glock put to make sure we
 * won't call into DLM and deadlock.  Otherwise, put the glock directly.
 */

static void gfs2_glock_put_eventually(struct gfs2_glock *gl)
{
	if (current->flags & PF_MEMALLOC)
		gfs2_glock_queue_put(gl);
	else
		gfs2_glock_put(gl);
}

1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167
static bool gfs2_upgrade_iopen_glock(struct inode *inode)
{
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_sbd *sdp = GFS2_SB(inode);
	struct gfs2_holder *gh = &ip->i_iopen_gh;
	long timeout = 5 * HZ;
	int error;

	gh->gh_flags |= GL_NOCACHE;
	gfs2_glock_dq_wait(gh);

	/*
	 * If there are no other lock holders, we'll get the lock immediately.
	 * Otherwise, the other nodes holding the lock will be notified about
	 * our locking request.  If they don't have the inode open, they'll
1168 1169 1170 1171 1172 1173
	 * evict the cached inode and release the lock.  Otherwise, if they
	 * poke the inode glock, we'll take this as an indication that they
	 * still need the iopen glock and that they'll take care of deleting
	 * the inode when they're done.  As a last resort, if another node
	 * keeps holding the iopen glock without showing any activity on the
	 * inode glock, we'll eventually time out.
1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191
	 *
	 * Note that we're passing the LM_FLAG_TRY_1CB flag to the first
	 * locking request as an optimization to notify lock holders as soon as
	 * possible.  Without that flag, they'd be notified implicitly by the
	 * second locking request.
	 */

	gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, gh);
	error = gfs2_glock_nq(gh);
	if (error != GLR_TRYFAILED)
		return !error;

	gfs2_holder_reinit(LM_ST_EXCLUSIVE, GL_ASYNC | GL_NOCACHE, gh);
	error = gfs2_glock_nq(gh);
	if (error)
		return false;

	timeout = wait_event_interruptible_timeout(sdp->sd_async_glock_wait,
1192 1193
		!test_bit(HIF_WAIT, &gh->gh_iflags) ||
		test_bit(GLF_DEMOTE, &ip->i_gl->gl_flags),
1194 1195 1196 1197 1198 1199 1200 1201
		timeout);
	if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) {
		gfs2_glock_dq(gh);
		return false;
	}
	return true;
}

1202 1203 1204
/**
 * evict_should_delete - determine whether the inode is eligible for deletion
 * @inode: The inode to evict
1205
 * @gh: The glock holder structure
1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246
 *
 * This function determines whether the evicted inode is eligible to be deleted
 * and locks the inode glock.
 *
 * Returns: the fate of the dinode
 */
static enum dinode_demise evict_should_delete(struct inode *inode,
					      struct gfs2_holder *gh)
{
	struct gfs2_inode *ip = GFS2_I(inode);
	struct super_block *sb = inode->i_sb;
	struct gfs2_sbd *sdp = sb->s_fs_info;
	int ret;

	if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) {
		BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));
		goto should_delete;
	}

	if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags))
		return SHOULD_DEFER_EVICTION;

	/* Deletes should never happen under memory pressure anymore.  */
	if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
		return SHOULD_DEFER_EVICTION;

	/* Must not read inode block until block type has been verified */
	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, gh);
	if (unlikely(ret)) {
		glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
		ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
		return SHOULD_DEFER_EVICTION;
	}

	if (gfs2_inode_already_deleted(ip->i_gl, ip->i_no_formal_ino))
		return SHOULD_NOT_DELETE_DINODE;
	ret = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
	if (ret)
		return SHOULD_NOT_DELETE_DINODE;

1247
	if (test_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags)) {
1248
		ret = gfs2_instantiate(gh);
1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269
		if (ret)
			return SHOULD_NOT_DELETE_DINODE;
	}

	/*
	 * The inode may have been recreated in the meantime.
	 */
	if (inode->i_nlink)
		return SHOULD_NOT_DELETE_DINODE;

should_delete:
	if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
	    test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
		if (!gfs2_upgrade_iopen_glock(inode)) {
			gfs2_holder_uninit(&ip->i_iopen_gh);
			return SHOULD_NOT_DELETE_DINODE;
		}
	}
	return SHOULD_DELETE_DINODE;
}

1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308
/**
 * evict_unlinked_inode - delete the pieces of an unlinked evicted inode
 * @inode: The inode to evict
 */
static int evict_unlinked_inode(struct inode *inode)
{
	struct gfs2_inode *ip = GFS2_I(inode);
	int ret;

	if (S_ISDIR(inode->i_mode) &&
	    (ip->i_diskflags & GFS2_DIF_EXHASH)) {
		ret = gfs2_dir_exhash_dealloc(ip);
		if (ret)
			goto out;
	}

	if (ip->i_eattr) {
		ret = gfs2_ea_dealloc(ip);
		if (ret)
			goto out;
	}

	if (!gfs2_is_stuffed(ip)) {
		ret = gfs2_file_dealloc(ip);
		if (ret)
			goto out;
	}

	/* We're about to clear the bitmap for the dinode, but as soon as we
	   do, gfs2_create_inode can create another inode at the same block
	   location and try to set gl_object again. We clear gl_object here so
	   that subsequent inode creates don't see an old gl_object. */
	glock_clear_object(ip->i_gl, ip);
	ret = gfs2_dinode_dealloc(ip);
	gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
out:
	return ret;
}

1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341
/*
 * evict_linked_inode - evict an inode whose dinode has not been unlinked
 * @inode: The inode to evict
 */
static int evict_linked_inode(struct inode *inode)
{
	struct super_block *sb = inode->i_sb;
	struct gfs2_sbd *sdp = sb->s_fs_info;
	struct gfs2_inode *ip = GFS2_I(inode);
	struct address_space *metamapping;
	int ret;

	gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
		       GFS2_LFC_EVICT_INODE);
	metamapping = gfs2_glock2aspace(ip->i_gl);
	if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) {
		filemap_fdatawrite(metamapping);
		filemap_fdatawait(metamapping);
	}
	write_inode_now(inode, 1);
	gfs2_ail_flush(ip->i_gl, 0);

	ret = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
	if (ret)
		return ret;

	/* Needs to be done before glock release & also in a transaction */
	truncate_inode_pages(&inode->i_data, 0);
	truncate_inode_pages(metamapping, 0);
	gfs2_trans_end(sdp);
	return 0;
}

1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355
/**
 * gfs2_evict_inode - Remove an inode from cache
 * @inode: The inode to evict
 *
 * There are three cases to consider:
 * 1. i_nlink == 0, we are final opener (and must deallocate)
 * 2. i_nlink == 0, we are not the final opener (and cannot deallocate)
 * 3. i_nlink > 0
 *
 * If the fs is read only, then we have to treat all cases as per #3
 * since we are unable to do any deallocation. The inode will be
 * deallocated by the next read/write node to attempt an allocation
 * in the same resource group
 *
1356 1357 1358 1359 1360 1361 1362
 * We have to (at the moment) hold the inodes main lock to cover
 * the gap between unlocking the shared lock on the iopen lock and
 * taking the exclusive lock. I'd rather do a shared -> exclusive
 * conversion on the iopen lock, but we can change that later. This
 * is safe, just less efficient.
 */

Al Viro's avatar
Al Viro committed
1363
static void gfs2_evict_inode(struct inode *inode)
1364
{
1365 1366
	struct super_block *sb = inode->i_sb;
	struct gfs2_sbd *sdp = sb->s_fs_info;
1367 1368
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_holder gh;
1369
	int ret;
1370

1371 1372 1373 1374 1375
	if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) {
		clear_inode(inode);
		return;
	}

1376
	if (inode->i_nlink || sb_rdonly(sb))
Al Viro's avatar
Al Viro committed
1377 1378
		goto out;

1379 1380 1381
	gfs2_holder_mark_uninitialized(&gh);
	ret = evict_should_delete(inode, &gh);
	if (ret == SHOULD_DEFER_EVICTION)
1382
		goto out;
1383 1384 1385 1386
	if (ret == SHOULD_DELETE_DINODE)
		ret = evict_unlinked_inode(inode);
	else
		ret = evict_linked_inode(inode);
1387

1388 1389
	if (gfs2_rs_active(&ip->i_res))
		gfs2_rs_deltree(&ip->i_res);
1390

1391 1392
	if (gfs2_holder_initialized(&gh)) {
		glock_clear_object(ip->i_gl, ip);
1393
		gfs2_glock_dq_uninit(&gh);
1394
	}
1395 1396
	if (ret && ret != GLR_TRYFAILED && ret != -EROFS)
		fs_warn(sdp, "gfs2_evict_inode: %d\n", ret);
1397
out:
1398
	truncate_inode_pages_final(&inode->i_data);
1399 1400
	if (ip->i_qadata)
		gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
1401
	gfs2_rs_delete(ip, NULL);
1402
	gfs2_ordered_del_inode(ip);
1403
	clear_inode(inode);
1404
	gfs2_dir_hash_inval(ip);
1405
	if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
1406 1407 1408
		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;

		glock_clear_object(gl, ip);
1409 1410 1411 1412
		if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
			ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
			gfs2_glock_dq(&ip->i_iopen_gh);
		}
1413
		gfs2_glock_hold(gl);
1414
		gfs2_holder_uninit(&ip->i_iopen_gh);
1415
		gfs2_glock_put_eventually(gl);
Al Viro's avatar
Al Viro committed
1416
	}
1417 1418 1419 1420 1421 1422 1423
	if (ip->i_gl) {
		glock_clear_object(ip->i_gl, ip);
		wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
		gfs2_glock_add_to_lru(ip->i_gl);
		gfs2_glock_put_eventually(ip->i_gl);
		ip->i_gl = NULL;
	}
1424 1425 1426 1427 1428 1429 1430
}

static struct inode *gfs2_alloc_inode(struct super_block *sb)
{
	struct gfs2_inode *ip;

	ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
1431 1432 1433 1434
	if (!ip)
		return NULL;
	ip->i_flags = 0;
	ip->i_gl = NULL;
1435
	gfs2_holder_mark_uninitialized(&ip->i_iopen_gh);
1436 1437 1438
	memset(&ip->i_res, 0, sizeof(ip->i_res));
	RB_CLEAR_NODE(&ip->i_res.rs_node);
	ip->i_rahead = 0;
1439 1440 1441
	return &ip->i_inode;
}

Al Viro's avatar
Al Viro committed
1442
static void gfs2_free_inode(struct inode *inode)
1443
{
Al Viro's avatar
Al Viro committed
1444
	kmem_cache_free(gfs2_inode_cachep, GFS2_I(inode));
Nick Piggin's avatar
Nick Piggin committed
1445 1446
}

1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475
extern void free_local_statfs_inodes(struct gfs2_sbd *sdp)
{
	struct local_statfs_inode *lsi, *safe;

	/* Run through the statfs inodes list to iput and free memory */
	list_for_each_entry_safe(lsi, safe, &sdp->sd_sc_inodes_list, si_list) {
		if (lsi->si_jid == sdp->sd_jdesc->jd_jid)
			sdp->sd_sc_inode = NULL; /* belongs to this node */
		if (lsi->si_sc_inode)
			iput(lsi->si_sc_inode);
		list_del(&lsi->si_list);
		kfree(lsi);
	}
}

extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
					     unsigned int index)
{
	struct local_statfs_inode *lsi;

	/* Return the local (per node) statfs inode in the
	 * sdp->sd_sc_inodes_list corresponding to the 'index'. */
	list_for_each_entry(lsi, &sdp->sd_sc_inodes_list, si_list) {
		if (lsi->si_jid == index)
			return lsi->si_sc_inode;
	}
	return NULL;
}

1476 1477
const struct super_operations gfs2_super_ops = {
	.alloc_inode		= gfs2_alloc_inode,
Al Viro's avatar
Al Viro committed
1478
	.free_inode		= gfs2_free_inode,
1479
	.write_inode		= gfs2_write_inode,
1480
	.dirty_inode		= gfs2_dirty_inode,
Al Viro's avatar
Al Viro committed
1481
	.evict_inode		= gfs2_evict_inode,
1482 1483
	.put_super		= gfs2_put_super,
	.sync_fs		= gfs2_sync_fs,
1484 1485
	.freeze_super		= gfs2_freeze,
	.thaw_super		= gfs2_unfreeze,
1486 1487 1488 1489 1490
	.statfs			= gfs2_statfs,
	.drop_inode		= gfs2_drop_inode,
	.show_options		= gfs2_show_options,
};