Merge tag 'locks-v4.5-1' of git://git.samba.org/jlayton/linux

Pull file locking updates from Jeff Layton: "File locking related changes for v4.5 (pile #1) Highlights: - new Kconfig option to allow disabling mandatory locking (which is racy anyway) - new tracepoints for setlk and close codepaths - fix for a long-standing bug in code that handles races between setting a POSIX lock and close()" * tag 'locks-v4.5-1' of git://git.samba.org/jlayton/linux: locks: rename __posix_lock_file to posix_lock_inode locks: prink more detail when there are leaked locks locks: pass inode pointer to locks_free_lock_context locks: sprinkle some tracepoints around the file locking code locks: don't check for race with close when setting OFD lock locks: fix unlock when fcntl_setlk races with a close fs: make locks.c explicitly non-modular locks: use list_first_entry_or_null() locks: Don't allow mounts in user namespaces to enable mandatory locking locks: Allow disabling mandatory locking at compile time

Merge tag 'locks-v4.5-1' of git://git.samba.org/jlayton/linux
Pull file locking updates from Jeff Layton: "File locking related changes for v4.5 (pile #1) Highlights: - new Kconfig option to allow disabling mandatory locking (which is racy anyway) - new tracepoints for setlk and close codepaths - fix for a long-standing bug in code that handles races between setting a POSIX lock and close()" * tag 'locks-v4.5-1' of git://git.samba.org/jlayton/linux: locks: rename __posix_lock_file to posix_lock_inode locks: prink more detail when there are leaked locks locks: pass inode pointer to locks_free_lock_context locks: sprinkle some tracepoints around the file locking code locks: don't check for race with close when setting OFD lock locks: fix unlock when fcntl_setlk races with a close fs: make locks.c explicitly non-modular locks: use list_first_entry_or_null() locks: Don't allow mounts in user namespaces to enable mandatory locking locks: Allow disabling mandatory locking at compile time
065019a3 · Linus Torvalds · 4f31d774 · b4d629a3 · 065019a3 · 065019a3
Commit 065019a3 authored Jan 12, 2016 by Linus Torvalds
6 changed files
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -73,6 +73,16 @@ config FILE_LOCKING
          for filesystems like NFS and for the flock() system
          call. Disabling this option saves about 11k.

+config MANDATORY_FILE_LOCKING
+	bool "Enable Mandatory file locking"
+	depends on FILE_LOCKING
+	default y
+	help
+	  This option enables files appropriately marked files on appropriely
+	  mounted filesystems to support mandatory locking.
+
+	  To the best of my knowledge this is dead code that no one cares about.
+
 source "fs/notify/Kconfig"

 source "fs/quota/Kconfig"

--- a/fs/inode.c
+++ b/fs/inode.c
@@ -225,7 +225,7 @@ void __destroy_inode(struct inode *inode)
 	inode_detach_wb(inode);
 	security_inode_free(inode);
 	fsnotify_inode_delete(inode);
-	locks_free_lock_context(inode->i_flctx);
+	locks_free_lock_context(inode);
 	if (!inode->i_nlink) {
 		WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
 		atomic_long_dec(&inode->i_sb->s_remove_count);

--- a/fs/locks.c
+++ b/fs/locks.c
@@ -119,7 +119,6 @@
 #include <linux/fdtable.h>
 #include <linux/fs.h>
 #include <linux/init.h>
-#include <linux/module.h>
 #include <linux/security.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
@@ -230,16 +229,44 @@ locks_get_lock_context(struct inode *inode, int type)
 		ctx = smp_load_acquire(&inode->i_flctx);
 	}
 out:
+	trace_locks_get_lock_context(inode, type, ctx);
 	return ctx;
 }

+static void
+locks_dump_ctx_list(struct list_head *list, char *list_type)
+{
+	struct file_lock *fl;
+
+	list_for_each_entry(fl, list, fl_list) {
+		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
+	}
+}
+
+static void
+locks_check_ctx_lists(struct inode *inode)
+{
+	struct file_lock_context *ctx = inode->i_flctx;
+
+	if (unlikely(!list_empty(&ctx->flc_flock) ||
+		     !list_empty(&ctx->flc_posix) ||
+		     !list_empty(&ctx->flc_lease))) {
+		pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n",
+			MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
+			inode->i_ino);
+		locks_dump_ctx_list(&ctx->flc_flock, "FLOCK");
+		locks_dump_ctx_list(&ctx->flc_posix, "POSIX");
+		locks_dump_ctx_list(&ctx->flc_lease, "LEASE");
+	}
+}
+
 void
-locks_free_lock_context(struct file_lock_context *ctx)
+locks_free_lock_context(struct inode *inode)
 {
-	if (ctx) {
-		WARN_ON_ONCE(!list_empty(&ctx->flc_flock));
-		WARN_ON_ONCE(!list_empty(&ctx->flc_posix));
-		WARN_ON_ONCE(!list_empty(&ctx->flc_lease));
+	struct file_lock_context *ctx = inode->i_flctx;
+
+	if (unlikely(ctx)) {
+		locks_check_ctx_lists(inode);
 		kmem_cache_free(flctx_cache, ctx);
 	}
 }
@@ -934,7 +961,8 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request)
 	return error;
 }

-static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
+static int posix_lock_inode(struct inode *inode, struct file_lock *request,
+			    struct file_lock *conflock)
 {
 	struct file_lock *fl, *tmp;
 	struct file_lock *new_fl = NULL;
@@ -1142,6 +1170,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 	if (new_fl2)
 		locks_free_lock(new_fl2);
 	locks_dispose_list(&dispose);
+	trace_posix_lock_inode(inode, request, error);
+
 	return error;
 }

@@ -1162,7 +1192,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 int posix_lock_file(struct file *filp, struct file_lock *fl,
 			struct file_lock *conflock)
 {
-	return __posix_lock_file(file_inode(filp), fl, conflock);
+	return posix_lock_inode(file_inode(filp), fl, conflock);
 }
 EXPORT_SYMBOL(posix_lock_file);

@@ -1178,7 +1208,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
 	int error;
 	might_sleep ();
 	for (;;) {
-		error = __posix_lock_file(inode, fl, NULL);
+		error = posix_lock_inode(inode, fl, NULL);
 		if (error != FILE_LOCK_DEFERRED)
 			break;
 		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@@ -1191,6 +1221,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
 	return error;
 }

+#ifdef CONFIG_MANDATORY_FILE_LOCKING
 /**
 * locks_mandatory_locked - Check for an active lock
 * @file: the file to check
@@ -1260,7 +1291,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
 		if (filp) {
 			fl.fl_owner = filp;
 			fl.fl_flags &= ~FL_SLEEP;
-			error = __posix_lock_file(inode, &fl, NULL);
+			error = posix_lock_inode(inode, &fl, NULL);
 			if (!error)
 				break;
 		}
@@ -1268,7 +1299,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
 		if (sleep)
 			fl.fl_flags |= FL_SLEEP;
 		fl.fl_owner = current->files;
-		error = __posix_lock_file(inode, &fl, NULL);
+		error = posix_lock_inode(inode, &fl, NULL);
 		if (error != FILE_LOCK_DEFERRED)
 			break;
 		error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
@@ -1289,6 +1320,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
 }

 EXPORT_SYMBOL(locks_mandatory_area);
+#endif /* CONFIG_MANDATORY_FILE_LOCKING */

 static void lease_clear_pending(struct file_lock *fl, int arg)
 {
@@ -1503,12 +1535,10 @@ void lease_get_mtime(struct inode *inode, struct timespec *time)
 	ctx = smp_load_acquire(&inode->i_flctx);
 	if (ctx && !list_empty_careful(&ctx->flc_lease)) {
 		spin_lock(&ctx->flc_lock);
-		if (!list_empty(&ctx->flc_lease)) {
-			fl = list_first_entry(&ctx->flc_lease,
-						struct file_lock, fl_list);
-			if (fl->fl_type == F_WRLCK)
-				has_lease = true;
-		}
+		fl = list_first_entry_or_null(&ctx->flc_lease,
+					      struct file_lock, fl_list);
+		if (fl && (fl->fl_type == F_WRLCK))
+			has_lease = true;
 		spin_unlock(&ctx->flc_lock);
 	}

@@ -2165,6 +2195,8 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 	if (file_lock == NULL)
 		return -ENOLCK;

+	inode = file_inode(filp);
+
 	/*
 	 * This might block, so we do it before checking the inode.
 	 */
@@ -2172,8 +2204,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 	if (copy_from_user(&flock, l, sizeof(flock)))
 		goto out;

-	inode = file_inode(filp);
-
 	/* Don't allow mandatory locks on files that may be memory mapped
 	 * and shared.
 	 */
@@ -2182,7 +2212,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 		goto out;
 	}

-again:
 	error = flock_to_posix_lock(filp, file_lock, &flock);
 	if (error)
 		goto out;
@@ -2221,23 +2250,29 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 	error = do_lock_file_wait(filp, cmd, file_lock);

 	/*
-	 * Attempt to detect a close/fcntl race and recover by
-	 * releasing the lock that was just acquired.
-	 */
-	/*
-	 * we need that spin_lock here - it prevents reordering between
-	 * update of i_flctx->flc_posix and check for it done in close().
-	 * rcu_read_lock() wouldn't do.
+	 * Attempt to detect a close/fcntl race and recover by releasing the
+	 * lock that was just acquired. There is no need to do that when we're
+	 * unlocking though, or for OFD locks.
 	 */
-	spin_lock(&current->files->file_lock);
-	f = fcheck(fd);
-	spin_unlock(&current->files->file_lock);
-	if (!error && f != filp && flock.l_type != F_UNLCK) {
-		flock.l_type = F_UNLCK;
-		goto again;
+	if (!error && file_lock->fl_type != F_UNLCK &&
+	    !(file_lock->fl_flags & FL_OFDLCK)) {
+		/*
+		 * We need that spin_lock here - it prevents reordering between
+		 * update of i_flctx->flc_posix and check for it done in
+		 * close(). rcu_read_lock() wouldn't do.
+		 */
+		spin_lock(&current->files->file_lock);
+		f = fcheck(fd);
+		spin_unlock(&current->files->file_lock);
+		if (f != filp) {
+			file_lock->fl_type = F_UNLCK;
+			error = do_lock_file_wait(filp, cmd, file_lock);
+			WARN_ON_ONCE(error);
+			error = -EBADF;
+		}
 	}
-
 out:
+	trace_fcntl_setlk(inode, file_lock, error);
 	locks_free_lock(file_lock);
 	return error;
 }
@@ -2322,7 +2357,6 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 		goto out;
 	}

-again:
 	error = flock64_to_posix_lock(filp, file_lock, &flock);
 	if (error)
 		goto out;
@@ -2361,17 +2395,27 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 	error = do_lock_file_wait(filp, cmd, file_lock);

 	/*
-	 * Attempt to detect a close/fcntl race and recover by
-	 * releasing the lock that was just acquired.
+	 * Attempt to detect a close/fcntl race and recover by releasing the
+	 * lock that was just acquired. There is no need to do that when we're
+	 * unlocking though, or for OFD locks.
 	 */
-	spin_lock(&current->files->file_lock);
-	f = fcheck(fd);
-	spin_unlock(&current->files->file_lock);
-	if (!error && f != filp && flock.l_type != F_UNLCK) {
-		flock.l_type = F_UNLCK;
-		goto again;
+	if (!error && file_lock->fl_type != F_UNLCK &&
+	    !(file_lock->fl_flags & FL_OFDLCK)) {
+		/*
+		 * We need that spin_lock here - it prevents reordering between
+		 * update of i_flctx->flc_posix and check for it done in
+		 * close(). rcu_read_lock() wouldn't do.
+		 */
+		spin_lock(&current->files->file_lock);
+		f = fcheck(fd);
+		spin_unlock(&current->files->file_lock);
+		if (f != filp) {
+			file_lock->fl_type = F_UNLCK;
+			error = do_lock_file_wait(filp, cmd, file_lock);
+			WARN_ON_ONCE(error);
+			error = -EBADF;
+		}
 	}
-
 out:
 	locks_free_lock(file_lock);
 	return error;
@@ -2385,6 +2429,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 */
 void locks_remove_posix(struct file *filp, fl_owner_t owner)
 {
+	int error;
 	struct file_lock lock;
 	struct file_lock_context *ctx;

@@ -2407,10 +2452,11 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
 	lock.fl_ops = NULL;
 	lock.fl_lmops = NULL;

-	vfs_lock_file(filp, F_SETLK, &lock, NULL);
+	error = vfs_lock_file(filp, F_SETLK, &lock, NULL);

 	if (lock.fl_ops && lock.fl_ops->fl_release_private)
 		lock.fl_ops->fl_release_private(&lock);
+	trace_locks_remove_posix(file_inode(filp), &lock, error);
 }

 EXPORT_SYMBOL(locks_remove_posix);
@@ -2706,7 +2752,7 @@ static int __init proc_locks_init(void)
 	proc_create("locks", 0, NULL, &proc_locks_operations);
 	return 0;
 }
-module_init(proc_locks_init);
+fs_initcall(proc_locks_init);
 #endif

 static int __init filelock_init(void)

--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1584,6 +1584,14 @@ static inline bool may_mount(void)
 	return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
 }

+static inline bool may_mandlock(void)
+{
+#ifndef	CONFIG_MANDATORY_FILE_LOCKING
+	return false;
+#endif
+	return capable(CAP_SYS_ADMIN);
+}
+
 /*
 * Now umount can handle mount points as well as block devices.
 * This is important for filesystems which use unnamed block devices.
@@ -2677,6 +2685,8 @@ long do_mount(const char *dev_name, const char __user *dir_name,
 				   type_page, flags, data_page);
 	if (!retval && !may_mount())
 		retval = -EPERM;
+	if (!retval && (flags & MS_MANDLOCK) && !may_mandlock())
+		retval = -EPERM;
 	if (retval)
 		goto dput_out;


--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1043,7 +1043,7 @@ extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
 extern int fcntl_getlease(struct file *filp);

 /* fs/locks.c */
-void locks_free_lock_context(struct file_lock_context *ctx);
+void locks_free_lock_context(struct inode *inode);
 void locks_free_lock(struct file_lock *fl);
 extern void locks_init_lock(struct file_lock *);
 extern struct file_lock * locks_alloc_lock(void);
@@ -1104,7 +1104,7 @@ static inline int fcntl_getlease(struct file *filp)
 }

 static inline void
-locks_free_lock_context(struct file_lock_context *ctx)
+locks_free_lock_context(struct inode *inode)
 {
 }

@@ -2030,7 +2030,7 @@ extern struct kobject *fs_kobj;
 #define FLOCK_VERIFY_READ  1
 #define FLOCK_VERIFY_WRITE 2

-#ifdef CONFIG_FILE_LOCKING
+#ifdef CONFIG_MANDATORY_FILE_LOCKING
 extern int locks_mandatory_locked(struct file *);
 extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);

@@ -2075,6 +2075,45 @@ static inline int locks_verify_truncate(struct inode *inode,
 	return 0;
 }

+#else /* !CONFIG_MANDATORY_FILE_LOCKING */
+
+static inline int locks_mandatory_locked(struct file *file)
+{
+	return 0;
+}
+
+static inline int locks_mandatory_area(int rw, struct inode *inode,
+				       struct file *filp, loff_t offset,
+				       size_t count)
+{
+	return 0;
+}
+
+static inline int __mandatory_lock(struct inode *inode)
+{
+	return 0;
+}
+
+static inline int mandatory_lock(struct inode *inode)
+{
+	return 0;
+}
+
+static inline int locks_verify_locked(struct file *file)
+{
+	return 0;
+}
+
+static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
+					size_t size)
+{
+	return 0;
+}
+
+#endif /* CONFIG_MANDATORY_FILE_LOCKING */
+
+
+#ifdef CONFIG_FILE_LOCKING
 static inline int break_lease(struct inode *inode, unsigned int mode)
 {
 	/*
@@ -2136,39 +2175,6 @@ static inline int break_layout(struct inode *inode, bool wait)
 }

 #else /* !CONFIG_FILE_LOCKING */
-static inline int locks_mandatory_locked(struct file *file)
-{
-	return 0;
-}
-
-static inline int locks_mandatory_area(int rw, struct inode *inode,
-				       struct file *filp, loff_t offset,
-				       size_t count)
-{
-	return 0;
-}
-
-static inline int __mandatory_lock(struct inode *inode)
-{
-	return 0;
-}
-
-static inline int mandatory_lock(struct inode *inode)
-{
-	return 0;
-}
-
-static inline int locks_verify_locked(struct file *file)
-{
-	return 0;
-}
-
-static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
-					size_t size)
-{
-	return 0;
-}
-
 static inline int break_lease(struct inode *inode, unsigned int mode)
 {
 	return 0;

--- a/include/trace/events/filelock.h
+++ b/include/trace/events/filelock.h
@@ -34,6 +34,83 @@
 			{ F_WRLCK, "F_WRLCK" },		\
 			{ F_UNLCK, "F_UNLCK" })

+TRACE_EVENT(locks_get_lock_context,
+	TP_PROTO(struct inode *inode, int type, struct file_lock_context *ctx),
+
+	TP_ARGS(inode, type, ctx),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, i_ino)
+		__field(dev_t, s_dev)
+		__field(unsigned char, type)
+		__field(struct file_lock_context *, ctx)
+	),
+
+	TP_fast_assign(
+		__entry->s_dev = inode->i_sb->s_dev;
+		__entry->i_ino = inode->i_ino;
+		__entry->type = type;
+		__entry->ctx = ctx;
+	),
+
+	TP_printk("dev=0x%x:0x%x ino=0x%lx type=%s ctx=%p",
+		  MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+		  __entry->i_ino, show_fl_type(__entry->type), __entry->ctx)
+);
+
+DECLARE_EVENT_CLASS(filelock_lock,
+	TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+
+	TP_ARGS(inode, fl, ret),
+
+	TP_STRUCT__entry(
+		__field(struct file_lock *, fl)
+		__field(unsigned long, i_ino)
+		__field(dev_t, s_dev)
+		__field(struct file_lock *, fl_next)
+		__field(fl_owner_t, fl_owner)
+		__field(unsigned int, fl_pid)
+		__field(unsigned int, fl_flags)
+		__field(unsigned char, fl_type)
+		__field(loff_t, fl_start)
+		__field(loff_t, fl_end)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->fl = fl ? fl : NULL;
+		__entry->s_dev = inode->i_sb->s_dev;
+		__entry->i_ino = inode->i_ino;
+		__entry->fl_next = fl ? fl->fl_next : NULL;
+		__entry->fl_owner = fl ? fl->fl_owner : NULL;
+		__entry->fl_pid = fl ? fl->fl_pid : 0;
+		__entry->fl_flags = fl ? fl->fl_flags : 0;
+		__entry->fl_type = fl ? fl->fl_type : 0;
+		__entry->fl_start = fl ? fl->fl_start : 0;
+		__entry->fl_end = fl ? fl->fl_end : 0;
+		__entry->ret = ret;
+	),
+
+	TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_next=0x%p fl_owner=0x%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d",
+		__entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+		__entry->i_ino, __entry->fl_next, __entry->fl_owner,
+		__entry->fl_pid, show_fl_flags(__entry->fl_flags),
+		show_fl_type(__entry->fl_type),
+		__entry->fl_start, __entry->fl_end, __entry->ret)
+);
+
+DEFINE_EVENT(filelock_lock, posix_lock_inode,
+		TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+		TP_ARGS(inode, fl, ret));
+
+DEFINE_EVENT(filelock_lock, fcntl_setlk,
+		TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+		TP_ARGS(inode, fl, ret));
+
+DEFINE_EVENT(filelock_lock, locks_remove_posix,
+		TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+		TP_ARGS(inode, fl, ret));
+
 DECLARE_EVENT_CLASS(filelock_lease,

 	TP_PROTO(struct inode *inode, struct file_lock *fl),