Commit 065019a3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'locks-v4.5-1' of git://git.samba.org/jlayton/linux

Pull file locking updates from Jeff Layton:
 "File locking related changes for v4.5 (pile #1)

  Highlights:
   - new Kconfig option to allow disabling mandatory locking (which is
     racy anyway)
   - new tracepoints for setlk and close codepaths
   - fix for a long-standing bug in code that handles races between
     setting a POSIX lock and close()"

* tag 'locks-v4.5-1' of git://git.samba.org/jlayton/linux:
  locks: rename __posix_lock_file to posix_lock_inode
  locks: prink more detail when there are leaked locks
  locks: pass inode pointer to locks_free_lock_context
  locks: sprinkle some tracepoints around the file locking code
  locks: don't check for race with close when setting OFD lock
  locks: fix unlock when fcntl_setlk races with a close
  fs: make locks.c explicitly non-modular
  locks: use list_first_entry_or_null()
  locks: Don't allow mounts in user namespaces to enable mandatory locking
  locks: Allow disabling mandatory locking at compile time
parents 4f31d774 b4d629a3
......@@ -73,6 +73,16 @@ config FILE_LOCKING
for filesystems like NFS and for the flock() system
call. Disabling this option saves about 11k.
config MANDATORY_FILE_LOCKING
bool "Enable Mandatory file locking"
depends on FILE_LOCKING
default y
help
This option enables files appropriately marked files on appropriely
mounted filesystems to support mandatory locking.
To the best of my knowledge this is dead code that no one cares about.
source "fs/notify/Kconfig"
source "fs/quota/Kconfig"
......
......@@ -225,7 +225,7 @@ void __destroy_inode(struct inode *inode)
inode_detach_wb(inode);
security_inode_free(inode);
fsnotify_inode_delete(inode);
locks_free_lock_context(inode->i_flctx);
locks_free_lock_context(inode);
if (!inode->i_nlink) {
WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
atomic_long_dec(&inode->i_sb->s_remove_count);
......
......@@ -119,7 +119,6 @@
#include <linux/fdtable.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/security.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
......@@ -230,16 +229,44 @@ locks_get_lock_context(struct inode *inode, int type)
ctx = smp_load_acquire(&inode->i_flctx);
}
out:
trace_locks_get_lock_context(inode, type, ctx);
return ctx;
}
static void
locks_dump_ctx_list(struct list_head *list, char *list_type)
{
struct file_lock *fl;
list_for_each_entry(fl, list, fl_list) {
pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
}
}
static void
locks_check_ctx_lists(struct inode *inode)
{
struct file_lock_context *ctx = inode->i_flctx;
if (unlikely(!list_empty(&ctx->flc_flock) ||
!list_empty(&ctx->flc_posix) ||
!list_empty(&ctx->flc_lease))) {
pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n",
MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
inode->i_ino);
locks_dump_ctx_list(&ctx->flc_flock, "FLOCK");
locks_dump_ctx_list(&ctx->flc_posix, "POSIX");
locks_dump_ctx_list(&ctx->flc_lease, "LEASE");
}
}
void
locks_free_lock_context(struct file_lock_context *ctx)
locks_free_lock_context(struct inode *inode)
{
if (ctx) {
WARN_ON_ONCE(!list_empty(&ctx->flc_flock));
WARN_ON_ONCE(!list_empty(&ctx->flc_posix));
WARN_ON_ONCE(!list_empty(&ctx->flc_lease));
struct file_lock_context *ctx = inode->i_flctx;
if (unlikely(ctx)) {
locks_check_ctx_lists(inode);
kmem_cache_free(flctx_cache, ctx);
}
}
......@@ -934,7 +961,8 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request)
return error;
}
static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
static int posix_lock_inode(struct inode *inode, struct file_lock *request,
struct file_lock *conflock)
{
struct file_lock *fl, *tmp;
struct file_lock *new_fl = NULL;
......@@ -1142,6 +1170,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
if (new_fl2)
locks_free_lock(new_fl2);
locks_dispose_list(&dispose);
trace_posix_lock_inode(inode, request, error);
return error;
}
......@@ -1162,7 +1192,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
int posix_lock_file(struct file *filp, struct file_lock *fl,
struct file_lock *conflock)
{
return __posix_lock_file(file_inode(filp), fl, conflock);
return posix_lock_inode(file_inode(filp), fl, conflock);
}
EXPORT_SYMBOL(posix_lock_file);
......@@ -1178,7 +1208,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
int error;
might_sleep ();
for (;;) {
error = __posix_lock_file(inode, fl, NULL);
error = posix_lock_inode(inode, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
......@@ -1191,6 +1221,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
return error;
}
#ifdef CONFIG_MANDATORY_FILE_LOCKING
/**
* locks_mandatory_locked - Check for an active lock
* @file: the file to check
......@@ -1260,7 +1291,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
if (filp) {
fl.fl_owner = filp;
fl.fl_flags &= ~FL_SLEEP;
error = __posix_lock_file(inode, &fl, NULL);
error = posix_lock_inode(inode, &fl, NULL);
if (!error)
break;
}
......@@ -1268,7 +1299,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
if (sleep)
fl.fl_flags |= FL_SLEEP;
fl.fl_owner = current->files;
error = __posix_lock_file(inode, &fl, NULL);
error = posix_lock_inode(inode, &fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
......@@ -1289,6 +1320,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
}
EXPORT_SYMBOL(locks_mandatory_area);
#endif /* CONFIG_MANDATORY_FILE_LOCKING */
static void lease_clear_pending(struct file_lock *fl, int arg)
{
......@@ -1503,12 +1535,10 @@ void lease_get_mtime(struct inode *inode, struct timespec *time)
ctx = smp_load_acquire(&inode->i_flctx);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
spin_lock(&ctx->flc_lock);
if (!list_empty(&ctx->flc_lease)) {
fl = list_first_entry(&ctx->flc_lease,
struct file_lock, fl_list);
if (fl->fl_type == F_WRLCK)
has_lease = true;
}
fl = list_first_entry_or_null(&ctx->flc_lease,
struct file_lock, fl_list);
if (fl && (fl->fl_type == F_WRLCK))
has_lease = true;
spin_unlock(&ctx->flc_lock);
}
......@@ -2165,6 +2195,8 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (file_lock == NULL)
return -ENOLCK;
inode = file_inode(filp);
/*
* This might block, so we do it before checking the inode.
*/
......@@ -2172,8 +2204,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (copy_from_user(&flock, l, sizeof(flock)))
goto out;
inode = file_inode(filp);
/* Don't allow mandatory locks on files that may be memory mapped
* and shared.
*/
......@@ -2182,7 +2212,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
goto out;
}
again:
error = flock_to_posix_lock(filp, file_lock, &flock);
if (error)
goto out;
......@@ -2221,23 +2250,29 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
error = do_lock_file_wait(filp, cmd, file_lock);
/*
* Attempt to detect a close/fcntl race and recover by
* releasing the lock that was just acquired.
*/
/*
* we need that spin_lock here - it prevents reordering between
* update of i_flctx->flc_posix and check for it done in close().
* rcu_read_lock() wouldn't do.
* Attempt to detect a close/fcntl race and recover by releasing the
* lock that was just acquired. There is no need to do that when we're
* unlocking though, or for OFD locks.
*/
spin_lock(&current->files->file_lock);
f = fcheck(fd);
spin_unlock(&current->files->file_lock);
if (!error && f != filp && flock.l_type != F_UNLCK) {
flock.l_type = F_UNLCK;
goto again;
if (!error && file_lock->fl_type != F_UNLCK &&
!(file_lock->fl_flags & FL_OFDLCK)) {
/*
* We need that spin_lock here - it prevents reordering between
* update of i_flctx->flc_posix and check for it done in
* close(). rcu_read_lock() wouldn't do.
*/
spin_lock(&current->files->file_lock);
f = fcheck(fd);
spin_unlock(&current->files->file_lock);
if (f != filp) {
file_lock->fl_type = F_UNLCK;
error = do_lock_file_wait(filp, cmd, file_lock);
WARN_ON_ONCE(error);
error = -EBADF;
}
}
out:
trace_fcntl_setlk(inode, file_lock, error);
locks_free_lock(file_lock);
return error;
}
......@@ -2322,7 +2357,6 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
goto out;
}
again:
error = flock64_to_posix_lock(filp, file_lock, &flock);
if (error)
goto out;
......@@ -2361,17 +2395,27 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
error = do_lock_file_wait(filp, cmd, file_lock);
/*
* Attempt to detect a close/fcntl race and recover by
* releasing the lock that was just acquired.
* Attempt to detect a close/fcntl race and recover by releasing the
* lock that was just acquired. There is no need to do that when we're
* unlocking though, or for OFD locks.
*/
spin_lock(&current->files->file_lock);
f = fcheck(fd);
spin_unlock(&current->files->file_lock);
if (!error && f != filp && flock.l_type != F_UNLCK) {
flock.l_type = F_UNLCK;
goto again;
if (!error && file_lock->fl_type != F_UNLCK &&
!(file_lock->fl_flags & FL_OFDLCK)) {
/*
* We need that spin_lock here - it prevents reordering between
* update of i_flctx->flc_posix and check for it done in
* close(). rcu_read_lock() wouldn't do.
*/
spin_lock(&current->files->file_lock);
f = fcheck(fd);
spin_unlock(&current->files->file_lock);
if (f != filp) {
file_lock->fl_type = F_UNLCK;
error = do_lock_file_wait(filp, cmd, file_lock);
WARN_ON_ONCE(error);
error = -EBADF;
}
}
out:
locks_free_lock(file_lock);
return error;
......@@ -2385,6 +2429,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
*/
void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
int error;
struct file_lock lock;
struct file_lock_context *ctx;
......@@ -2407,10 +2452,11 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
lock.fl_ops = NULL;
lock.fl_lmops = NULL;
vfs_lock_file(filp, F_SETLK, &lock, NULL);
error = vfs_lock_file(filp, F_SETLK, &lock, NULL);
if (lock.fl_ops && lock.fl_ops->fl_release_private)
lock.fl_ops->fl_release_private(&lock);
trace_locks_remove_posix(file_inode(filp), &lock, error);
}
EXPORT_SYMBOL(locks_remove_posix);
......@@ -2706,7 +2752,7 @@ static int __init proc_locks_init(void)
proc_create("locks", 0, NULL, &proc_locks_operations);
return 0;
}
module_init(proc_locks_init);
fs_initcall(proc_locks_init);
#endif
static int __init filelock_init(void)
......
......@@ -1584,6 +1584,14 @@ static inline bool may_mount(void)
return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
}
static inline bool may_mandlock(void)
{
#ifndef CONFIG_MANDATORY_FILE_LOCKING
return false;
#endif
return capable(CAP_SYS_ADMIN);
}
/*
* Now umount can handle mount points as well as block devices.
* This is important for filesystems which use unnamed block devices.
......@@ -2677,6 +2685,8 @@ long do_mount(const char *dev_name, const char __user *dir_name,
type_page, flags, data_page);
if (!retval && !may_mount())
retval = -EPERM;
if (!retval && (flags & MS_MANDLOCK) && !may_mandlock())
retval = -EPERM;
if (retval)
goto dput_out;
......
......@@ -1043,7 +1043,7 @@ extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
extern int fcntl_getlease(struct file *filp);
/* fs/locks.c */
void locks_free_lock_context(struct file_lock_context *ctx);
void locks_free_lock_context(struct inode *inode);
void locks_free_lock(struct file_lock *fl);
extern void locks_init_lock(struct file_lock *);
extern struct file_lock * locks_alloc_lock(void);
......@@ -1104,7 +1104,7 @@ static inline int fcntl_getlease(struct file *filp)
}
static inline void
locks_free_lock_context(struct file_lock_context *ctx)
locks_free_lock_context(struct inode *inode)
{
}
......@@ -2030,7 +2030,7 @@ extern struct kobject *fs_kobj;
#define FLOCK_VERIFY_READ 1
#define FLOCK_VERIFY_WRITE 2
#ifdef CONFIG_FILE_LOCKING
#ifdef CONFIG_MANDATORY_FILE_LOCKING
extern int locks_mandatory_locked(struct file *);
extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
......@@ -2075,6 +2075,45 @@ static inline int locks_verify_truncate(struct inode *inode,
return 0;
}
#else /* !CONFIG_MANDATORY_FILE_LOCKING */
static inline int locks_mandatory_locked(struct file *file)
{
return 0;
}
static inline int locks_mandatory_area(int rw, struct inode *inode,
struct file *filp, loff_t offset,
size_t count)
{
return 0;
}
static inline int __mandatory_lock(struct inode *inode)
{
return 0;
}
static inline int mandatory_lock(struct inode *inode)
{
return 0;
}
static inline int locks_verify_locked(struct file *file)
{
return 0;
}
static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
size_t size)
{
return 0;
}
#endif /* CONFIG_MANDATORY_FILE_LOCKING */
#ifdef CONFIG_FILE_LOCKING
static inline int break_lease(struct inode *inode, unsigned int mode)
{
/*
......@@ -2136,39 +2175,6 @@ static inline int break_layout(struct inode *inode, bool wait)
}
#else /* !CONFIG_FILE_LOCKING */
static inline int locks_mandatory_locked(struct file *file)
{
return 0;
}
static inline int locks_mandatory_area(int rw, struct inode *inode,
struct file *filp, loff_t offset,
size_t count)
{
return 0;
}
static inline int __mandatory_lock(struct inode *inode)
{
return 0;
}
static inline int mandatory_lock(struct inode *inode)
{
return 0;
}
static inline int locks_verify_locked(struct file *file)
{
return 0;
}
static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
size_t size)
{
return 0;
}
static inline int break_lease(struct inode *inode, unsigned int mode)
{
return 0;
......
......@@ -34,6 +34,83 @@
{ F_WRLCK, "F_WRLCK" }, \
{ F_UNLCK, "F_UNLCK" })
TRACE_EVENT(locks_get_lock_context,
TP_PROTO(struct inode *inode, int type, struct file_lock_context *ctx),
TP_ARGS(inode, type, ctx),
TP_STRUCT__entry(
__field(unsigned long, i_ino)
__field(dev_t, s_dev)
__field(unsigned char, type)
__field(struct file_lock_context *, ctx)
),
TP_fast_assign(
__entry->s_dev = inode->i_sb->s_dev;
__entry->i_ino = inode->i_ino;
__entry->type = type;
__entry->ctx = ctx;
),
TP_printk("dev=0x%x:0x%x ino=0x%lx type=%s ctx=%p",
MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
__entry->i_ino, show_fl_type(__entry->type), __entry->ctx)
);
DECLARE_EVENT_CLASS(filelock_lock,
TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
TP_ARGS(inode, fl, ret),
TP_STRUCT__entry(
__field(struct file_lock *, fl)
__field(unsigned long, i_ino)
__field(dev_t, s_dev)
__field(struct file_lock *, fl_next)
__field(fl_owner_t, fl_owner)
__field(unsigned int, fl_pid)
__field(unsigned int, fl_flags)
__field(unsigned char, fl_type)
__field(loff_t, fl_start)
__field(loff_t, fl_end)
__field(int, ret)
),
TP_fast_assign(
__entry->fl = fl ? fl : NULL;
__entry->s_dev = inode->i_sb->s_dev;
__entry->i_ino = inode->i_ino;
__entry->fl_next = fl ? fl->fl_next : NULL;
__entry->fl_owner = fl ? fl->fl_owner : NULL;
__entry->fl_pid = fl ? fl->fl_pid : 0;
__entry->fl_flags = fl ? fl->fl_flags : 0;
__entry->fl_type = fl ? fl->fl_type : 0;
__entry->fl_start = fl ? fl->fl_start : 0;
__entry->fl_end = fl ? fl->fl_end : 0;
__entry->ret = ret;
),
TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_next=0x%p fl_owner=0x%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d",
__entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
__entry->i_ino, __entry->fl_next, __entry->fl_owner,
__entry->fl_pid, show_fl_flags(__entry->fl_flags),
show_fl_type(__entry->fl_type),
__entry->fl_start, __entry->fl_end, __entry->ret)
);
DEFINE_EVENT(filelock_lock, posix_lock_inode,
TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
TP_ARGS(inode, fl, ret));
DEFINE_EVENT(filelock_lock, fcntl_setlk,
TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
TP_ARGS(inode, fl, ret));
DEFINE_EVENT(filelock_lock, locks_remove_posix,
TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
TP_ARGS(inode, fl, ret));
DECLARE_EVENT_CLASS(filelock_lease,
TP_PROTO(struct inode *inode, struct file_lock *fl),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment