Merge tag 'gfs2-v6.10-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2

Pull gfs2 updates from Andreas Gruenbacher: "Fixes and cleanups: - Revise the glock reference counting model and LRU list handling to be more sensible - Several quota related fixes: clean up the quota code, add some missing locking, and work around the on-disk corruption that the reverted patch "gfs2: ignore negated quota changes" causes - Clean up the glock demote logic in glock_work_func()" * tag 'gfs2-v6.10-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2: (29 commits) gfs2: Clean up glock demote logic gfs2: Revert "check for no eligible quota changes" gfs2: Be more careful with the quota sync generation gfs2: Get rid of some unnecessary quota locking gfs2: Add some missing quota locking gfs2: Fold qd_fish into gfs2_quota_sync gfs2: quota need_sync cleanup gfs2: Fix and clean up function do_qc gfs2: Revert "Add quota_change type" gfs2: Revert "ignore negated quota changes" gfs2: qd_check_sync cleanups gfs2: Revert "introduce qd_bh_get_or_undo" gfs2: Check quota consistency on mount gfs2: Minor gfs2_quota_init error path cleanup gfs2: Get rid of demote_ok checks Revert "GFS2: Don't add all glocks to the lru" gfs2: Revise glock reference counting model gfs2: Switch to a per-filesystem glock workqueue gfs2: Report when glocks cannot be freed for a long time gfs2: gfs2_glock_get cleanup ...

Merge tag 'gfs2-v6.10-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2
Pull gfs2 updates from Andreas Gruenbacher: "Fixes and cleanups: - Revise the glock reference counting model and LRU list handling to be more sensible - Several quota related fixes: clean up the quota code, add some missing locking, and work around the on-disk corruption that the reverted patch "gfs2: ignore negated quota changes" causes - Clean up the glock demote logic in glock_work_func()" * tag 'gfs2-v6.10-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2: (29 commits) gfs2: Clean up glock demote logic gfs2: Revert "check for no eligible quota changes" gfs2: Be more careful with the quota sync generation gfs2: Get rid of some unnecessary quota locking gfs2: Add some missing quota locking gfs2: Fold qd_fish into gfs2_quota_sync gfs2: quota need_sync cleanup gfs2: Fix and clean up function do_qc gfs2: Revert "Add quota_change type" gfs2: Revert "ignore negated quota changes" gfs2: qd_check_sync cleanups gfs2: Revert "introduce qd_bh_get_or_undo" gfs2: Check quota consistency on mount gfs2: Minor gfs2_quota_init error path cleanup gfs2: Get rid of demote_ok checks Revert "GFS2: Don't add all glocks to the lru" gfs2: Revise glock reference counting model gfs2: Switch to a per-filesystem glock workqueue gfs2: Report when glocks cannot be freed for a long time gfs2: gfs2_glock_get cleanup ...
6706415b · Linus Torvalds · f097ef0e · f75efefb · 6706415b · 6706415b
Commit 6706415b authored Jul 17, 2024 by Linus Torvalds
11 changed files
--- a/Documentation/filesystems/gfs2-glocks.rst
+++ b/Documentation/filesystems/gfs2-glocks.rst
@@ -40,14 +40,14 @@ shared lock mode, SH. In GFS2 the DF mode is used exclusively for direct I/O
 operations. The glocks are basically a lock plus some routines which deal
 with cache management. The following rules apply for the cache:

-==========      ==========   ==============   ==========   ==============
-Glock mode      Cache data   Cache Metadata   Dirty Data   Dirty Metadata
-==========      ==========   ==============   ==========   ==============
-    UN             No              No             No            No
-    SH             Yes             Yes            No            No
-    DF             No              Yes            No            No
-    EX             Yes             Yes            Yes           Yes
-==========      ==========   ==============   ==========   ==============
+==========      ==============   ==========   ==========   ==============
+Glock mode      Cache Metadata   Cache data   Dirty Data   Dirty Metadata
+==========      ==============   ==========   ==========   ==============
+    UN                No            No            No            No
+    DF                Yes           No            No            No
+    SH                Yes           Yes           No            No
+    EX                Yes           Yes           Yes           Yes
+==========      ==============   ==========   ==========   ==============

 These rules are implemented using the various glock operations which
 are defined for each type of glock. Not all types of glocks use
@@ -55,23 +55,22 @@ all the modes. Only inode glocks use the DF mode for example.

 Table of glock operations and per type constants:

-=============      =============================================================
+==============     =============================================================
 Field              Purpose
-=============      =============================================================
-go_xmote_th        Called before remote state change (e.g. to sync dirty data)
+==============     =============================================================
+go_sync            Called before remote state change (e.g. to sync dirty data)
 go_xmote_bh        Called after remote state change (e.g. to refill cache)
 go_inval           Called if remote state change requires invalidating the cache
-go_demote_ok       Returns boolean value of whether its ok to demote a glock
-                   (e.g. checks timeout, and that there is no cached data)
-go_lock            Called for the first local holder of a lock
-go_unlock          Called on the final local unlock of a lock
+go_instantiate     Called when a glock has been acquired
+go_held            Called every time a glock holder is acquired
 go_dump            Called to print content of object for debugfs file, or on
                   error to dump glock to the log.
-go_type            The type of the glock, ``LM_TYPE_*``
 go_callback	   Called if the DLM sends a callback to drop this lock
+go_unlocked        Called when a glock is unlocked (dlm_unlock())
+go_type            The type of the glock, ``LM_TYPE_*``
 go_flags	   GLOF_ASPACE is set, if the glock has an address space
                   associated with it
-=============      =============================================================
+==============     =============================================================

 The minimum hold time for each lock is the time after a remote lock
 grant for which we ignore remote demote requests. This is in order to
@@ -82,26 +81,24 @@ to by multiple nodes. By delaying the demotion in response to a
 remote callback, that gives the userspace program time to make
 some progress before the pages are unmapped.

-There is a plan to try and remove the go_lock and go_unlock callbacks
-if possible, in order to try and speed up the fast path though the locking.
-Also, eventually we hope to make the glock "EX" mode locally shared
-such that any local locking will be done with the i_mutex as required
-rather than via the glock.
+Eventually, we hope to make the glock "EX" mode locally shared such that any
+local locking will be done with the i_mutex as required rather than via the
+glock.

 Locking rules for glock operations:

-=============    ======================    =============================
+==============   ======================    =============================
 Operation        GLF_LOCK bit lock held    gl_lockref.lock spinlock held
-=============    ======================    =============================
-go_xmote_th           Yes                       No
+==============   ======================    =============================
+go_sync               Yes                       No
 go_xmote_bh           Yes                       No
 go_inval              Yes                       No
-go_demote_ok          Sometimes                 Yes
-go_lock               Yes                       No
-go_unlock             Yes                       No
+go_instantiate        No                        No
+go_held               No                        No
 go_dump               Sometimes                 Yes
 go_callback           Sometimes (N/A)           Yes
-=============    ======================    =============================
+go_unlocked           Yes                       No
+==============   ======================    =============================

 .. Note::


--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -250,7 +250,6 @@ void gfs2_flush_delete_work(struct gfs2_sbd *sdp);
 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
 void gfs2_gl_dq_holders(struct gfs2_sbd *sdp);
 void gfs2_glock_thaw(struct gfs2_sbd *sdp);
-void gfs2_glock_add_to_lru(struct gfs2_glock *gl);
 void gfs2_glock_free(struct gfs2_glock *gl);
 void gfs2_glock_free_later(struct gfs2_glock *gl);


--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -385,23 +385,6 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
 	gfs2_clear_glop_pending(ip);
 }

-/**
- * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock
- * @gl: the glock
- *
- * Returns: 1 if it's ok
- */
-
-static int inode_go_demote_ok(const struct gfs2_glock *gl)
-{
-	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
-
-	if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
-		return 0;
-
-	return 1;
-}
-
 static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
@@ -648,21 +631,21 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
 }

 /**
- * inode_go_free - wake up anyone waiting for dlm's unlock ast to free it
- * @gl: glock being freed
+ * inode_go_unlocked - wake up anyone waiting for dlm's unlock ast
+ * @gl: glock being unlocked
 *
 * For now, this is only used for the journal inode glock. In withdraw
- * situations, we need to wait for the glock to be freed so that we know
+ * situations, we need to wait for the glock to be unlocked so that we know
 * other nodes may proceed with recovery / journal replay.
 */
-static void inode_go_free(struct gfs2_glock *gl)
+static void inode_go_unlocked(struct gfs2_glock *gl)
 {
 	/* Note that we cannot reference gl_object because it's already set
 	 * to NULL by this point in its lifecycle. */
-	if (!test_bit(GLF_FREEING, &gl->gl_flags))
+	if (!test_bit(GLF_UNLOCKED, &gl->gl_flags))
 		return;
-	clear_bit_unlock(GLF_FREEING, &gl->gl_flags);
-	wake_up_bit(&gl->gl_flags, GLF_FREEING);
+	clear_bit_unlock(GLF_UNLOCKED, &gl->gl_flags);
+	wake_up_bit(&gl->gl_flags, GLF_UNLOCKED);
 }

 /**
@@ -722,13 +705,12 @@ const struct gfs2_glock_operations gfs2_meta_glops = {
 const struct gfs2_glock_operations gfs2_inode_glops = {
 	.go_sync = inode_go_sync,
 	.go_inval = inode_go_inval,
-	.go_demote_ok = inode_go_demote_ok,
 	.go_instantiate = inode_go_instantiate,
 	.go_held = inode_go_held,
 	.go_dump = inode_go_dump,
 	.go_type = LM_TYPE_INODE,
-	.go_flags = GLOF_ASPACE | GLOF_LRU | GLOF_LVB,
-	.go_free = inode_go_free,
+	.go_flags = GLOF_ASPACE | GLOF_LVB,
+	.go_unlocked = inode_go_unlocked,
 };

 const struct gfs2_glock_operations gfs2_rgrp_glops = {
@@ -751,13 +733,13 @@ const struct gfs2_glock_operations gfs2_iopen_glops = {
 	.go_type = LM_TYPE_IOPEN,
 	.go_callback = iopen_go_callback,
 	.go_dump = inode_go_dump,
-	.go_flags = GLOF_LRU | GLOF_NONDISK,
+	.go_flags = GLOF_NONDISK,
 	.go_subclass = 1,
 };

 const struct gfs2_glock_operations gfs2_flock_glops = {
 	.go_type = LM_TYPE_FLOCK,
-	.go_flags = GLOF_LRU | GLOF_NONDISK,
+	.go_flags = GLOF_NONDISK,
 };

 const struct gfs2_glock_operations gfs2_nondisk_glops = {
@@ -768,7 +750,7 @@ const struct gfs2_glock_operations gfs2_nondisk_glops = {

 const struct gfs2_glock_operations gfs2_quota_glops = {
 	.go_type = LM_TYPE_QUOTA,
-	.go_flags = GLOF_LVB | GLOF_LRU | GLOF_NONDISK,
+	.go_flags = GLOF_LVB | GLOF_NONDISK,
 };

 const struct gfs2_glock_operations gfs2_journal_glops = {

--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -218,19 +218,17 @@ struct gfs2_glock_operations {
 	int (*go_sync) (struct gfs2_glock *gl);
 	int (*go_xmote_bh)(struct gfs2_glock *gl);
 	void (*go_inval) (struct gfs2_glock *gl, int flags);
-	int (*go_demote_ok) (const struct gfs2_glock *gl);
 	int (*go_instantiate) (struct gfs2_glock *gl);
 	int (*go_held)(struct gfs2_holder *gh);
 	void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl,
 			const char *fs_id_buf);
 	void (*go_callback)(struct gfs2_glock *gl, bool remote);
-	void (*go_free)(struct gfs2_glock *gl);
+	void (*go_unlocked)(struct gfs2_glock *gl);
 	const int go_subclass;
 	const int go_type;
 	const unsigned long go_flags;
 #define GLOF_ASPACE 1 /* address space attached */
 #define GLOF_LVB    2 /* Lock Value Block attached */
-#define GLOF_LRU    4 /* LRU managed */
 #define GLOF_NONDISK   8 /* not I/O related */
 };

@@ -322,14 +320,14 @@ enum {
 	GLF_DIRTY			= 6,
 	GLF_LFLUSH			= 7,
 	GLF_INVALIDATE_IN_PROGRESS	= 8,
-	GLF_REPLY_PENDING		= 9,
+	GLF_HAVE_REPLY			= 9,
 	GLF_INITIAL			= 10,
-	GLF_FROZEN			= 11,
+	GLF_HAVE_FROZEN_REPLY		= 11,
 	GLF_INSTANTIATE_IN_PROG		= 12, /* instantiate happening now */
 	GLF_LRU				= 13,
 	GLF_OBJECT			= 14, /* Used only for tracing */
 	GLF_BLOCKING			= 15,
-	GLF_FREEING			= 16, /* Wait for glock to be freed */
+	GLF_UNLOCKED			= 16, /* Wait for glock to be unlocked */
 	GLF_TRY_TO_EVICT		= 17, /* iopen glocks only */
 	GLF_VERIFY_EVICT		= 18, /* iopen glocks only */
 };
@@ -772,6 +770,7 @@ struct gfs2_sbd {

 	/* Workqueue stuff */

+	struct workqueue_struct *sd_glock_wq;
 	struct workqueue_struct *sd_delete_wq;

 	/* Daemon stuff */
@@ -783,7 +782,6 @@ struct gfs2_sbd {

 	struct list_head sd_quota_list;
 	atomic_t sd_quota_count;
-	struct mutex sd_quota_mutex;
 	struct mutex sd_quota_sync_mutex;
 	wait_queue_head_t sd_quota_wait;


--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -134,8 +134,8 @@ static void gdlm_ast(void *arg)

 	switch (gl->gl_lksb.sb_status) {
 	case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
-		if (gl->gl_ops->go_free)
-			gl->gl_ops->go_free(gl);
+		if (gl->gl_ops->go_unlocked)
+			gl->gl_ops->go_unlocked(gl);
 		gfs2_glock_free(gl);
 		return;
 	case -DLM_ECANCEL: /* Cancel while getting lock */
@@ -163,11 +163,21 @@ static void gdlm_ast(void *arg)
 			BUG();
 	}

-	set_bit(GLF_INITIAL, &gl->gl_flags);
+	/*
+	 * The GLF_INITIAL flag is initially set for new glocks.  Upon the
+	 * first successful new (non-conversion) request, we clear this flag to
+	 * indicate that a DLM lock exists and that gl->gl_lksb.sb_lkid is the
+	 * identifier to use for identifying it.
+	 *
+	 * Any failed initial requests do not create a DLM lock, so we ignore
+	 * the gl->gl_lksb.sb_lkid values that come with such requests.
+	 */
+
+	clear_bit(GLF_INITIAL, &gl->gl_flags);
 	gfs2_glock_complete(gl, ret);
 	return;
 out:
-	if (!test_bit(GLF_INITIAL, &gl->gl_flags))
+	if (test_bit(GLF_INITIAL, &gl->gl_flags))
 		gl->gl_lksb.sb_lkid = 0;
 	gfs2_glock_complete(gl, ret);
 }
@@ -239,7 +249,7 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
 			BUG();
 	}

-	if (gl->gl_lksb.sb_lkid != 0) {
+	if (!test_bit(GLF_INITIAL, &gl->gl_flags)) {
 		lkf |= DLM_LKF_CONVERT;
 		if (test_bit(GLF_BLOCKING, &gl->gl_flags))
 			lkf |= DLM_LKF_QUECVT;
@@ -270,14 +280,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
 	lkf = make_flags(gl, flags, req);
 	gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
 	gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
-	if (gl->gl_lksb.sb_lkid) {
-		gfs2_update_request_times(gl);
-	} else {
+	if (test_bit(GLF_INITIAL, &gl->gl_flags)) {
 		memset(strname, ' ', GDLM_STRNAME_BYTES - 1);
 		strname[GDLM_STRNAME_BYTES - 1] = '\0';
 		gfs2_reverse_hex(strname + 7, gl->gl_name.ln_type);
 		gfs2_reverse_hex(strname + 23, gl->gl_name.ln_number);
 		gl->gl_dstamp = ktime_get_real();
+	} else {
+		gfs2_update_request_times(gl);
 	}
 	/*
 	 * Submit the actual lock request.
@@ -301,7 +311,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl)

 	BUG_ON(!__lockref_is_dead(&gl->gl_lockref));

-	if (gl->gl_lksb.sb_lkid == 0) {
+	if (test_bit(GLF_INITIAL, &gl->gl_flags)) {
 		gfs2_glock_free(gl);
 		return;
 	}

--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -103,7 +103,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 	init_completion(&sdp->sd_journal_ready);

 	INIT_LIST_HEAD(&sdp->sd_quota_list);
-	mutex_init(&sdp->sd_quota_mutex);
 	mutex_init(&sdp->sd_quota_sync_mutex);
 	init_waitqueue_head(&sdp->sd_quota_wait);
 	spin_lock_init(&sdp->sd_bitmap_lock);
@@ -1188,11 +1187,17 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)

 	snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name);

+	error = -ENOMEM;
+	sdp->sd_glock_wq = alloc_workqueue("gfs2-glock/%s",
+			WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 0,
+			sdp->sd_fsname);
+	if (!sdp->sd_glock_wq)
+		goto fail_free;
+
 	sdp->sd_delete_wq = alloc_workqueue("gfs2-delete/%s",
 			WQ_MEM_RECLAIM | WQ_FREEZABLE, 0, sdp->sd_fsname);
-	error = -ENOMEM;
 	if (!sdp->sd_delete_wq)
-		goto fail_free;
+		goto fail_glock_wq;

 	error = gfs2_sys_fs_add(sdp);
 	if (error)
@@ -1301,6 +1306,8 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
 	gfs2_sys_fs_del(sdp);
 fail_delete_wq:
 	destroy_workqueue(sdp->sd_delete_wq);
+fail_glock_wq:
+	destroy_workqueue(sdp->sd_glock_wq);
 fail_free:
 	free_sbd(sdp);
 	sb->s_fs_info = NULL;

--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1524,7 +1524,6 @@ static void gfs2_evict_inode(struct inode *inode)
 	if (ip->i_gl) {
 		glock_clear_object(ip->i_gl, ip);
 		wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
-		gfs2_glock_add_to_lru(ip->i_gl);
 		gfs2_glock_put_eventually(ip->i_gl);
 		rcu_assign_pointer(ip->i_gl, NULL);
 	}

--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -53,9 +53,9 @@
 	{(1UL << GLF_DIRTY),			"y" },		\
 	{(1UL << GLF_LFLUSH),			"f" },		\
 	{(1UL << GLF_INVALIDATE_IN_PROGRESS),	"i" },		\
-	{(1UL << GLF_REPLY_PENDING),		"r" },		\
-	{(1UL << GLF_INITIAL),			"I" },		\
-	{(1UL << GLF_FROZEN),			"F" },		\
+	{(1UL << GLF_HAVE_REPLY),		"r" },		\
+	{(1UL << GLF_INITIAL),			"a" },		\
+	{(1UL << GLF_HAVE_FROZEN_REPLY),	"F" },		\
 	{(1UL << GLF_LRU),			"L" },		\
 	{(1UL << GLF_OBJECT),			"o" },		\
 	{(1UL << GLF_BLOCKING),			"b" })

--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -99,12 +99,12 @@ int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
 */
 int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp)
 {
+	int flags = LM_FLAG_NOEXP | GL_EXACT;
 	int error;

-	error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
-				   LM_FLAG_NOEXP | GL_EXACT,
+	error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags,
 				   &sdp->sd_freeze_gh);
-	if (error)
+	if (error && error != GLR_TRYFAILED)
 		fs_err(sdp, "can't lock the freeze glock: %d\n", error);
 	return error;
 }
@@ -206,9 +206,9 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
 	 * on other nodes to be successful, otherwise we remain the owner of
 	 * the glock as far as dlm is concerned.
 	 */
-	if (i_gl->gl_ops->go_free) {
-		set_bit(GLF_FREEING, &i_gl->gl_flags);
-		wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
+	if (i_gl->gl_ops->go_unlocked) {
+		set_bit(GLF_UNLOCKED, &i_gl->gl_flags);
+		wait_on_bit(&i_gl->gl_flags, GLF_UNLOCKED, TASK_UNINTERRUPTIBLE);
 	}

 	/*