Commit 8c8946f5 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.infradead.org/users/eparis/notify

* 'for-linus' of git://git.infradead.org/users/eparis/notify: (132 commits)
  fanotify: use both marks when possible
  fsnotify: pass both the vfsmount mark and inode mark
  fsnotify: walk the inode and vfsmount lists simultaneously
  fsnotify: rework ignored mark flushing
  fsnotify: remove global fsnotify groups lists
  fsnotify: remove group->mask
  fsnotify: remove the global masks
  fsnotify: cleanup should_send_event
  fanotify: use the mark in handler functions
  audit: use the mark in handler functions
  dnotify: use the mark in handler functions
  inotify: use the mark in handler functions
  fsnotify: send fsnotify_mark to groups in event handling functions
  fsnotify: Exchange list heads instead of moving elements
  fsnotify: srcu to protect read side of inode and vfsmount locks
  fsnotify: use an explicit flag to indicate fsnotify_destroy_mark has been called
  fsnotify: use _rcu functions for mark list traversal
  fsnotify: place marks on object in order of group memory address
  vfs/fsnotify: fsnotify_close can delay the final work in fput
  fsnotify: store struct file not struct path
  ...

Fix up trivial delete/modify conflict in fs/notify/inotify/inotify.c.
parents 5f248c9c 1968f5ee
......@@ -360,14 +360,6 @@ When: 2.6.33
Why: Should be implemented in userspace, policy daemon.
Who: Johannes Berg <johannes@sipsolutions.net>
---------------------------
What: CONFIG_INOTIFY
When: 2.6.33
Why: last user (audit) will be converted to the newer more generic
and more easily maintained fsnotify subsystem
Who: Eric Paris <eparis@redhat.com>
----------------------------
What: sound-slot/service-* module aliases and related clutters in
......
......@@ -842,4 +842,6 @@ ia32_sys_call_table:
.quad compat_sys_rt_tgsigqueueinfo /* 335 */
.quad sys_perf_event_open
.quad compat_sys_recvmmsg
.quad sys_fanotify_init
.quad sys32_fanotify_mark
ia32_syscall_end:
......@@ -546,3 +546,12 @@ asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo,
return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
((u64)len_hi << 32) | len_lo);
}
asmlinkage long sys32_fanotify_mark(int fanotify_fd, unsigned int flags,
u32 mask_lo, u32 mask_hi,
int fd, const char __user *pathname)
{
return sys_fanotify_mark(fanotify_fd, flags,
((u64)mask_hi << 32) | mask_lo,
fd, pathname);
}
......@@ -80,4 +80,7 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *);
/* ia32/ipc32.c */
asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32);
asmlinkage long sys32_fanotify_mark(int, unsigned int, u32, u32, int,
const char __user *);
#endif /* _ASM_X86_SYS_IA32_H */
......@@ -343,10 +343,12 @@
#define __NR_rt_tgsigqueueinfo 335
#define __NR_perf_event_open 336
#define __NR_recvmmsg 337
#define __NR_fanotify_init 338
#define __NR_fanotify_mark 339
#ifdef __KERNEL__
#define NR_syscalls 338
#define NR_syscalls 340
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
......
......@@ -663,6 +663,10 @@ __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
#define __NR_recvmmsg 299
__SYSCALL(__NR_recvmmsg, sys_recvmmsg)
#define __NR_fanotify_init 300
__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
#define __NR_fanotify_mark 301
__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
......
......@@ -337,3 +337,5 @@ ENTRY(sys_call_table)
.long sys_rt_tgsigqueueinfo /* 335 */
.long sys_perf_event_open
.long sys_recvmmsg
.long sys_fanotify_init
.long sys_fanotify_mark
......@@ -1193,11 +1193,10 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
if (iov != iovstack)
kfree(iov);
if ((ret + (type == READ)) > 0) {
struct dentry *dentry = file->f_path.dentry;
if (type == READ)
fsnotify_access(dentry);
fsnotify_access(file);
else
fsnotify_modify(dentry);
fsnotify_modify(file);
}
return ret;
}
......
......@@ -128,7 +128,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
goto exit;
fsnotify_open(file->f_path.dentry);
fsnotify_open(file);
error = -ENOEXEC;
if(file->f_op) {
......@@ -683,7 +683,7 @@ struct file *open_exec(const char *name)
if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
goto exit;
fsnotify_open(file->f_path.dentry);
fsnotify_open(file);
err = deny_write_access(file);
if (err)
......
......@@ -230,6 +230,15 @@ static void __fput(struct file *file)
might_sleep();
fsnotify_close(file);
/*
* fsnotify_create_event may have taken one or more references on this
* file. If it did so it left one reference for us to drop to make sure
* its calls to fput could not prematurely destroy the file.
*/
if (atomic_long_read(&file->f_count))
return fput(file);
/*
* The function eventpoll_release() should be the first called
* in the file cleanup chain.
......
......@@ -20,7 +20,6 @@
#include <linux/pagemap.h>
#include <linux/cdev.h>
#include <linux/bootmem.h>
#include <linux/inotify.h>
#include <linux/fsnotify.h>
#include <linux/mount.h>
#include <linux/async.h>
......@@ -264,12 +263,8 @@ void inode_init_once(struct inode *inode)
INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
i_size_ordered_init(inode);
#ifdef CONFIG_INOTIFY
INIT_LIST_HEAD(&inode->inotify_watches);
mutex_init(&inode->inotify_mutex);
#endif
#ifdef CONFIG_FSNOTIFY
INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries);
INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
#endif
}
EXPORT_SYMBOL(inode_init_once);
......@@ -413,7 +408,6 @@ int invalidate_inodes(struct super_block *sb)
down_write(&iprune_sem);
spin_lock(&inode_lock);
inotify_unmount_inodes(&sb->s_inodes);
fsnotify_unmount_inodes(&sb->s_inodes);
busy = invalidate_list(&sb->s_inodes, &throw_away);
spin_unlock(&inode_lock);
......
......@@ -2633,7 +2633,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
{
int error;
int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
const char *old_name;
const unsigned char *old_name;
if (old_dentry->d_inode == new_dentry->d_inode)
return 0;
......
......@@ -29,6 +29,7 @@
#include <linux/log2.h>
#include <linux/idr.h>
#include <linux/fs_struct.h>
#include <linux/fsnotify.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include "pnode.h"
......@@ -150,6 +151,9 @@ struct vfsmount *alloc_vfsmnt(const char *name)
INIT_LIST_HEAD(&mnt->mnt_share);
INIT_LIST_HEAD(&mnt->mnt_slave_list);
INIT_LIST_HEAD(&mnt->mnt_slave);
#ifdef CONFIG_FSNOTIFY
INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
#endif
#ifdef CONFIG_SMP
mnt->mnt_writers = alloc_percpu(int);
if (!mnt->mnt_writers)
......@@ -610,6 +614,7 @@ static inline void __mntput(struct vfsmount *mnt)
* provides barriers, so count_mnt_writers() below is safe. AV
*/
WARN_ON(count_mnt_writers(mnt));
fsnotify_vfsmount_delete(mnt);
dput(mnt->mnt_root);
free_vfsmnt(mnt);
deactivate_super(sb);
......
......@@ -934,7 +934,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
nfsdstats.io_read += host_err;
*count = host_err;
err = 0;
fsnotify_access(file->f_path.dentry);
fsnotify_access(file);
} else
err = nfserrno(host_err);
out:
......@@ -1045,7 +1045,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
goto out_nfserr;
*cnt = host_err;
nfsdstats.io_write += host_err;
fsnotify_modify(file->f_path.dentry);
fsnotify_modify(file);
/* clear setuid/setgid flag after write */
if (inode->i_mode & (S_ISUID | S_ISGID))
......
......@@ -3,3 +3,4 @@ config FSNOTIFY
source "fs/notify/dnotify/Kconfig"
source "fs/notify/inotify/Kconfig"
source "fs/notify/fanotify/Kconfig"
obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o
obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o \
mark.o vfsmount_mark.o
obj-y += dnotify/
obj-y += inotify/
obj-y += fanotify/
This diff is collapsed.
config FANOTIFY
bool "Filesystem wide access notification"
select FSNOTIFY
select ANON_INODES
default n
---help---
Say Y here to enable fanotify suport. fanotify is a file access
notification system which differs from inotify in that it sends
and open file descriptor to the userspace listener along with
the event.
If unsure, say Y.
config FANOTIFY_ACCESS_PERMISSIONS
bool "fanotify permissions checking"
depends on FANOTIFY
depends on SECURITY
default n
---help---
Say Y here is you want fanotify listeners to be able to make permissions
decisions concerning filesystem events. This is used by some fanotify
listeners which need to scan files before allowing the system access to
use those files. This is used by some anti-malware vendors and by some
hierarchical storage managent systems.
If unsure, say N.
obj-$(CONFIG_FANOTIFY) += fanotify.o fanotify_user.o
#include <linux/fanotify.h>
#include <linux/fdtable.h>
#include <linux/fsnotify_backend.h>
#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/kernel.h> /* UINT_MAX */
#include <linux/mount.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/wait.h>
static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
{
pr_debug("%s: old=%p new=%p\n", __func__, old, new);
if (old->to_tell == new->to_tell &&
old->data_type == new->data_type &&
old->tgid == new->tgid) {
switch (old->data_type) {
case (FSNOTIFY_EVENT_FILE):
if ((old->file->f_path.mnt == new->file->f_path.mnt) &&
(old->file->f_path.dentry == new->file->f_path.dentry))
return true;
case (FSNOTIFY_EVENT_NONE):
return true;
default:
BUG();
};
}
return false;
}
/* and the list better be locked by something too! */
static struct fsnotify_event *fanotify_merge(struct list_head *list,
struct fsnotify_event *event)
{
struct fsnotify_event_holder *test_holder;
struct fsnotify_event *test_event = NULL;
struct fsnotify_event *new_event;
pr_debug("%s: list=%p event=%p\n", __func__, list, event);
list_for_each_entry_reverse(test_holder, list, event_list) {
if (should_merge(test_holder->event, event)) {
test_event = test_holder->event;
break;
}
}
if (!test_event)
return NULL;
fsnotify_get_event(test_event);
/* if they are exactly the same we are done */
if (test_event->mask == event->mask)
return test_event;
/*
* if the refcnt == 2 this is the only queue
* for this event and so we can update the mask
* in place.
*/
if (atomic_read(&test_event->refcnt) == 2) {
test_event->mask |= event->mask;
return test_event;
}
new_event = fsnotify_clone_event(test_event);
/* done with test_event */
fsnotify_put_event(test_event);
/* couldn't allocate memory, merge was not possible */
if (unlikely(!new_event))
return ERR_PTR(-ENOMEM);
/* build new event and replace it on the list */
new_event->mask = (test_event->mask | event->mask);
fsnotify_replace_event(test_holder, new_event);
/* we hold a reference on new_event from clone_event */
return new_event;
}
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
static int fanotify_get_response_from_access(struct fsnotify_group *group,
struct fsnotify_event *event)
{
int ret;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
wait_event(group->fanotify_data.access_waitq, event->response);
/* userspace responded, convert to something usable */
spin_lock(&event->lock);
switch (event->response) {
case FAN_ALLOW:
ret = 0;
break;
case FAN_DENY:
default:
ret = -EPERM;
}
event->response = 0;
spin_unlock(&event->lock);
pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
group, event, ret);
return ret;
}
#endif
static int fanotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *fanotify_mark,
struct fsnotify_event *event)
{
int ret = 0;
struct fsnotify_event *notify_event = NULL;
BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
notify_event = fsnotify_add_notify_event(group, event, NULL, fanotify_merge);
if (IS_ERR(notify_event))
return PTR_ERR(notify_event);
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
if (event->mask & FAN_ALL_PERM_EVENTS) {
/* if we merged we need to wait on the new event */
if (notify_event)
event = notify_event;
ret = fanotify_get_response_from_access(group, event);
}
#endif
if (notify_event)
fsnotify_put_event(notify_event);
return ret;
}
static bool fanotify_should_send_event(struct fsnotify_group *group,
struct inode *to_tell,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmnt_mark,
__u32 event_mask, void *data, int data_type)
{
__u32 marks_mask, marks_ignored_mask;
pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
"mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
inode_mark, vfsmnt_mark, event_mask, data, data_type);
pr_debug("%s: group=%p vfsmount_mark=%p inode_mark=%p mask=%x\n",
__func__, group, vfsmnt_mark, inode_mark, event_mask);
/* sorry, fanotify only gives a damn about files and dirs */
if (!S_ISREG(to_tell->i_mode) &&
!S_ISDIR(to_tell->i_mode))
return false;
/* if we don't have enough info to send an event to userspace say no */
if (data_type != FSNOTIFY_EVENT_FILE)
return false;
if (inode_mark && vfsmnt_mark) {
marks_mask = (vfsmnt_mark->mask | inode_mark->mask);
marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask);
} else if (inode_mark) {
/*
* if the event is for a child and this inode doesn't care about
* events on the child, don't send it!
*/
if ((event_mask & FS_EVENT_ON_CHILD) &&
!(inode_mark->mask & FS_EVENT_ON_CHILD))
return false;
marks_mask = inode_mark->mask;
marks_ignored_mask = inode_mark->ignored_mask;
} else if (vfsmnt_mark) {
marks_mask = vfsmnt_mark->mask;
marks_ignored_mask = vfsmnt_mark->ignored_mask;
} else {
BUG();
}
if (event_mask & marks_mask & ~marks_ignored_mask)
return true;
return false;
}
const struct fsnotify_ops fanotify_fsnotify_ops = {
.handle_event = fanotify_handle_event,
.should_send_event = fanotify_should_send_event,
.free_group_priv = NULL,
.free_event_priv = NULL,
.freeing_mark = NULL,
};
This diff is collapsed.
......@@ -21,6 +21,7 @@
#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/srcu.h>
#include <linux/fsnotify_backend.h>
......@@ -35,6 +36,11 @@ void __fsnotify_inode_delete(struct inode *inode)
}
EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);
void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
{
fsnotify_clear_marks_by_mount(mnt);
}
/*
* Given an inode, first check if we care what happens to our children. Inotify
* and dnotify both tell their parents about events. If we care about any event
......@@ -78,13 +84,16 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
}
/* Notify this dentry's parent about a child's events. */
void __fsnotify_parent(struct dentry *dentry, __u32 mask)
void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask)
{
struct dentry *parent;
struct inode *p_inode;
bool send = false;
bool should_update_children = false;
if (!dentry)
dentry = file->f_path.dentry;
if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
return;
......@@ -115,8 +124,12 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask)
* specifies these are events which came from a child. */
mask |= FS_EVENT_ON_CHILD;
fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
dentry->d_name.name, 0);
if (file)
fsnotify(p_inode, mask, file, FSNOTIFY_EVENT_FILE,
dentry->d_name.name, 0);
else
fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
dentry->d_name.name, 0);
dput(parent);
}
......@@ -127,63 +140,181 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask)
}
EXPORT_SYMBOL_GPL(__fsnotify_parent);
static int send_to_group(struct inode *to_tell, struct vfsmount *mnt,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
__u32 mask, void *data,
int data_is, u32 cookie,
const unsigned char *file_name,
struct fsnotify_event **event)
{
struct fsnotify_group *group = inode_mark->group;
__u32 inode_test_mask = (mask & ~FS_EVENT_ON_CHILD);
__u32 vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD);
pr_debug("%s: group=%p to_tell=%p mnt=%p mark=%p mask=%x data=%p"
" data_is=%d cookie=%d event=%p\n", __func__, group, to_tell,
mnt, inode_mark, mask, data, data_is, cookie, *event);
/* clear ignored on inode modification */
if (mask & FS_MODIFY) {
if (inode_mark &&
!(inode_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
inode_mark->ignored_mask = 0;
if (vfsmount_mark &&
!(vfsmount_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
vfsmount_mark->ignored_mask = 0;
}
/* does the inode mark tell us to do something? */
if (inode_mark) {
inode_test_mask &= inode_mark->mask;
inode_test_mask &= ~inode_mark->ignored_mask;
}
/* does the vfsmount_mark tell us to do something? */
if (vfsmount_mark) {
vfsmount_test_mask &= vfsmount_mark->mask;
vfsmount_test_mask &= ~vfsmount_mark->ignored_mask;
if (inode_mark)
vfsmount_test_mask &= ~inode_mark->ignored_mask;
}
if (!inode_test_mask && !vfsmount_test_mask)
return 0;
if (group->ops->should_send_event(group, to_tell, inode_mark,
vfsmount_mark, mask, data,
data_is) == false)
return 0;
if (!*event) {
*event = fsnotify_create_event(to_tell, mask, data,
data_is, file_name,
cookie, GFP_KERNEL);
if (!*event)
return -ENOMEM;
}
return group->ops->handle_event(group, inode_mark, vfsmount_mark, *event);
}
/*
* This is the main call to fsnotify. The VFS calls into hook specific functions
* in linux/fsnotify.h. Those functions then in turn call here. Here will call
* out to all of the registered fsnotify_group. Those groups can then use the
* notification event in whatever means they feel necessary.
*/
void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name, u32 cookie)
int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
const unsigned char *file_name, u32 cookie)
{
struct fsnotify_group *group;
struct hlist_node *inode_node, *vfsmount_node;
struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
struct fsnotify_group *inode_group, *vfsmount_group;
struct fsnotify_event *event = NULL;
int idx;
struct vfsmount *mnt;
int idx, ret = 0;
bool used_inode = false, used_vfsmount = false;
/* global tests shouldn't care about events on child only the specific event */
__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
if (list_empty(&fsnotify_groups))
return;
if (data_is == FSNOTIFY_EVENT_FILE)
mnt = ((struct file *)data)->f_path.mnt;
else
mnt = NULL;
if (!(test_mask & fsnotify_mask))
return;
if (!(test_mask & to_tell->i_fsnotify_mask))
return;
/*
* SRCU!! the groups list is very very much read only and the path is
* very hot. The VAST majority of events are not going to need to do
* anything other than walk the list so it's crazy to pre-allocate.
* if this is a modify event we may need to clear the ignored masks
* otherwise return if neither the inode nor the vfsmount care about
* this type of event.
*/
idx = srcu_read_lock(&fsnotify_grp_srcu);
list_for_each_entry_rcu(group, &fsnotify_groups, group_list) {
if (test_mask & group->mask) {
if (!group->ops->should_send_event(group, to_tell, mask))
continue;
if (!event) {
event = fsnotify_create_event(to_tell, mask, data,
data_is, file_name, cookie,
GFP_KERNEL);
/* shit, we OOM'd and now we can't tell, maybe
* someday someone else will want to do something
* here */
if (!event)
break;
}
group->ops->handle_event(group, event);
if (!(mask & FS_MODIFY) &&
!(test_mask & to_tell->i_fsnotify_mask) &&
!(mnt && test_mask & mnt->mnt_fsnotify_mask))
return 0;
idx = srcu_read_lock(&fsnotify_mark_srcu);
if ((mask & FS_MODIFY) ||
(test_mask & to_tell->i_fsnotify_mask))
inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first,
&fsnotify_mark_srcu);
else
inode_node = NULL;
if (mnt) {
if ((mask & FS_MODIFY) ||
(test_mask & mnt->mnt_fsnotify_mask))
vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first,
&fsnotify_mark_srcu);
else
vfsmount_node = NULL;
} else {
mnt = NULL;
vfsmount_node = NULL;
}
while (inode_node || vfsmount_node) {
if (inode_node) {
inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
struct fsnotify_mark, i.i_list);
inode_group = inode_mark->group;
} else
inode_group = (void *)-1;
if (vfsmount_node) {
vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu),
struct fsnotify_mark, m.m_list);
vfsmount_group = vfsmount_mark->group;
} else
vfsmount_group = (void *)-1;
if (inode_group < vfsmount_group) {
/* handle inode */
send_to_group(to_tell, NULL, inode_mark, NULL, mask, data,
data_is, cookie, file_name, &event);
used_inode = true;
} else if (vfsmount_group < inode_group) {
send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
data_is, cookie, file_name, &event);
used_vfsmount = true;
} else {
send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
mask, data, data_is, cookie, file_name,
&event);
used_vfsmount = true;
used_inode = true;
}
if (used_inode)
inode_node = srcu_dereference(inode_node->next,
&fsnotify_mark_srcu);
if (used_vfsmount)
vfsmount_node = srcu_dereference(vfsmount_node->next,
&fsnotify_mark_srcu);
}
srcu_read_unlock(&fsnotify_grp_srcu, idx);
srcu_read_unlock(&fsnotify_mark_srcu, idx);
/*
* fsnotify_create_event() took a reference so the event can't be cleaned
* up while we are still trying to add it to lists, drop that one.
*/
if (event)
fsnotify_put_event(event);
return ret;
}
EXPORT_SYMBOL_GPL(fsnotify);
static __init int fsnotify_init(void)
{
return init_srcu_struct(&fsnotify_grp_srcu);
int ret;
BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23);
ret = init_srcu_struct(&fsnotify_mark_srcu);
if (ret)
panic("initializing fsnotify_mark_srcu");
return 0;
}
subsys_initcall(fsnotify_init);
core_initcall(fsnotify_init);
......@@ -6,21 +6,34 @@
#include <linux/srcu.h>
#include <linux/types.h>
/* protects reads of fsnotify_groups */
extern struct srcu_struct fsnotify_grp_srcu;
/* all groups which receive fsnotify events */
extern struct list_head fsnotify_groups;
/* all bitwise OR of all event types (FS_*) for all fsnotify_groups */
extern __u32 fsnotify_mask;
/* destroy all events sitting in this groups notification queue */
extern void fsnotify_flush_notify(struct fsnotify_group *group);
/* protects reads of inode and vfsmount marks list */
extern struct srcu_struct fsnotify_mark_srcu;
extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
__u32 mask);
/* add a mark to an inode */
extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group, struct inode *inode,
int allow_dups);
/* add a mark to a vfsmount */
extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group, struct vfsmount *mnt,
int allow_dups);
/* final kfree of a group */
extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
/* vfsmount specific destruction of a mark */
extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark);
/* inode specific destruction of a mark */
extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark);
/* run the list of all marks associated with inode and flag them to be freed */
extern void fsnotify_clear_marks_by_inode(struct inode *inode);
/* run the list of all marks associated with vfsmount and flag them to be freed */
extern void fsnotify_clear_marks_by_mount(struct vfsmount *mnt);
/*
* update the dentry->d_flags of all of inode's children to indicate if inode cares
* about events that happen to its children.
......
......@@ -28,64 +28,6 @@
#include <asm/atomic.h>
/* protects writes to fsnotify_groups and fsnotify_mask */
static DEFINE_MUTEX(fsnotify_grp_mutex);
/* protects reads while running the fsnotify_groups list */
struct srcu_struct fsnotify_grp_srcu;
/* all groups registered to receive filesystem notifications */
LIST_HEAD(fsnotify_groups);
/* bitwise OR of all events (FS_*) interesting to some group on this system */
__u32 fsnotify_mask;
/*
* When a new group registers or changes it's set of interesting events
* this function updates the fsnotify_mask to contain all interesting events
*/
void fsnotify_recalc_global_mask(void)
{
struct fsnotify_group *group;
__u32 mask = 0;
int idx;
idx = srcu_read_lock(&fsnotify_grp_srcu);
list_for_each_entry_rcu(group, &fsnotify_groups, group_list)
mask |= group->mask;
srcu_read_unlock(&fsnotify_grp_srcu, idx);
fsnotify_mask = mask;
}
/*
* Update the group->mask by running all of the marks associated with this
* group and finding the bitwise | of all of the mark->mask. If we change
* the group->mask we need to update the global mask of events interesting
* to the system.
*/
void fsnotify_recalc_group_mask(struct fsnotify_group *group)
{
__u32 mask = 0;
__u32 old_mask = group->mask;
struct fsnotify_mark_entry *entry;
spin_lock(&group->mark_lock);
list_for_each_entry(entry, &group->mark_entries, g_list)
mask |= entry->mask;
spin_unlock(&group->mark_lock);
group->mask = mask;
if (old_mask != mask)
fsnotify_recalc_global_mask();
}
/*
* Take a reference to a group so things found under the fsnotify_grp_mutex
* can't get freed under us
*/
static void fsnotify_get_group(struct fsnotify_group *group)
{
atomic_inc(&group->refcnt);
}
/*
* Final freeing of a group
*/
......@@ -110,145 +52,53 @@ void fsnotify_final_destroy_group(struct fsnotify_group *group)
*/
static void fsnotify_destroy_group(struct fsnotify_group *group)
{
/* clear all inode mark entries for this group */
/* clear all inode marks for this group */
fsnotify_clear_marks_by_group(group);
synchronize_srcu(&fsnotify_mark_srcu);
/* past the point of no return, matches the initial value of 1 */
if (atomic_dec_and_test(&group->num_marks))
fsnotify_final_destroy_group(group);
}
/*
* Remove this group from the global list of groups that will get events
* this can be done even if there are still references and things still using
* this group. This just stops the group from getting new events.
*/
static void __fsnotify_evict_group(struct fsnotify_group *group)
{
BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
if (group->on_group_list)
list_del_rcu(&group->group_list);
group->on_group_list = 0;
}
/*
* Called when a group is no longer interested in getting events. This can be
* used if a group is misbehaving or if for some reason a group should no longer
* get any filesystem events.
*/
void fsnotify_evict_group(struct fsnotify_group *group)
{
mutex_lock(&fsnotify_grp_mutex);
__fsnotify_evict_group(group);
mutex_unlock(&fsnotify_grp_mutex);
}
/*
* Drop a reference to a group. Free it if it's through.
*/
void fsnotify_put_group(struct fsnotify_group *group)
{
if (!atomic_dec_and_mutex_lock(&group->refcnt, &fsnotify_grp_mutex))
return;
/*
* OK, now we know that there's no other users *and* we hold mutex,
* so no new references will appear
*/
__fsnotify_evict_group(group);
/*
* now it's off the list, so the only thing we might care about is
* srcu access....
*/
mutex_unlock(&fsnotify_grp_mutex);
synchronize_srcu(&fsnotify_grp_srcu);
/* and now it is really dead. _Nothing_ could be seeing it */
fsnotify_recalc_global_mask();
fsnotify_destroy_group(group);
}
/*
* Simply run the fsnotify_groups list and find a group which matches
* the given parameters. If a group is found we take a reference to that
* group.
*/
static struct fsnotify_group *fsnotify_find_group(unsigned int group_num, __u32 mask,
const struct fsnotify_ops *ops)
{
struct fsnotify_group *group_iter;
struct fsnotify_group *group = NULL;
BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
list_for_each_entry_rcu(group_iter, &fsnotify_groups, group_list) {
if (group_iter->group_num == group_num) {
if ((group_iter->mask == mask) &&
(group_iter->ops == ops)) {
fsnotify_get_group(group_iter);
group = group_iter;
} else
group = ERR_PTR(-EEXIST);
}
}
return group;
if (atomic_dec_and_test(&group->refcnt))
fsnotify_destroy_group(group);
}
/*
* Either finds an existing group which matches the group_num, mask, and ops or
* creates a new group and adds it to the global group list. In either case we
* take a reference for the group returned.
* Create a new fsnotify_group and hold a reference for the group returned.
*/
struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
const struct fsnotify_ops *ops)
struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
{
struct fsnotify_group *group, *tgroup;
struct fsnotify_group *group;
/* very low use, simpler locking if we just always alloc */
group = kmalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
group = kzalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
if (!group)
return ERR_PTR(-ENOMEM);
/* set to 0 when there a no external references to this group */
atomic_set(&group->refcnt, 1);
group->on_group_list = 0;
group->group_num = group_num;
group->mask = mask;
/*
* hits 0 when there are no external references AND no marks for
* this group
*/
atomic_set(&group->num_marks, 1);
mutex_init(&group->notification_mutex);
INIT_LIST_HEAD(&group->notification_list);
init_waitqueue_head(&group->notification_waitq);
group->q_len = 0;
group->max_events = UINT_MAX;
spin_lock_init(&group->mark_lock);
atomic_set(&group->num_marks, 0);
INIT_LIST_HEAD(&group->mark_entries);
INIT_LIST_HEAD(&group->marks_list);
group->ops = ops;
mutex_lock(&fsnotify_grp_mutex);
tgroup = fsnotify_find_group(group_num, mask, ops);
if (tgroup) {
/* group already exists */
mutex_unlock(&fsnotify_grp_mutex);
/* destroy the new one we made */
fsnotify_put_group(group);
return tgroup;
}
/* group not found, add a new one */
list_add_rcu(&group->group_list, &fsnotify_groups);
group->on_group_list = 1;
/* being on the fsnotify_groups list holds one num_marks */
atomic_inc(&group->num_marks);
mutex_unlock(&fsnotify_grp_mutex);
if (mask)
fsnotify_recalc_global_mask();
return group;
}
This diff is collapsed.
config INOTIFY
bool "Inotify file change notification support"
default n
---help---
Say Y here to enable legacy in kernel inotify support. Inotify is a
file change notification system. It is a replacement for dnotify.
This option only provides the legacy inotify in kernel API. There
are no in tree kernel users of this interface since it is deprecated.
You only need this if you are loading an out of tree kernel module
that uses inotify.
For more information, see <file:Documentation/filesystems/inotify.txt>
If unsure, say N.
config INOTIFY_USER
bool "Inotify support for userspace"
select ANON_INODES
......
obj-$(CONFIG_INOTIFY) += inotify.o
obj-$(CONFIG_INOTIFY_USER) += inotify_fsnotify.o inotify_user.o
This diff is collapsed.
......@@ -9,13 +9,12 @@ struct inotify_event_private_data {
int wd;
};
struct inotify_inode_mark_entry {
/* fsnotify_mark_entry MUST be the first thing */
struct fsnotify_mark_entry fsn_entry;
struct inotify_inode_mark {
struct fsnotify_mark fsn_mark;
int wd;
};
extern void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group);
extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
......
......@@ -22,6 +22,7 @@
* General Public License for more details.
*/
#include <linux/dcache.h> /* d_unlinked */
#include <linux/fs.h> /* struct inode */
#include <linux/fsnotify_backend.h>
#include <linux/inotify.h>
......@@ -32,26 +33,84 @@
#include "inotify.h"
static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
/*
* Check if 2 events contain the same information. We do not compare private data
* but at this moment that isn't a problem for any know fsnotify listeners.
*/
static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
{
if ((old->mask == new->mask) &&
(old->to_tell == new->to_tell) &&
(old->data_type == new->data_type) &&
(old->name_len == new->name_len)) {
switch (old->data_type) {
case (FSNOTIFY_EVENT_INODE):
/* remember, after old was put on the wait_q we aren't
* allowed to look at the inode any more, only thing
* left to check was if the file_name is the same */
if (!old->name_len ||
!strcmp(old->file_name, new->file_name))
return true;
break;
case (FSNOTIFY_EVENT_FILE):
if ((old->file->f_path.mnt == new->file->f_path.mnt) &&
(old->file->f_path.dentry == new->file->f_path.dentry))
return true;
break;
case (FSNOTIFY_EVENT_NONE):
if (old->mask & FS_Q_OVERFLOW)
return true;
else if (old->mask & FS_IN_IGNORED)
return false;
return true;
};
}
return false;
}
static struct fsnotify_event *inotify_merge(struct list_head *list,
struct fsnotify_event *event)
{
struct fsnotify_mark_entry *entry;
struct inotify_inode_mark_entry *ientry;
struct fsnotify_event_holder *last_holder;
struct fsnotify_event *last_event;
/* and the list better be locked by something too */
spin_lock(&event->lock);
last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
last_event = last_holder->event;
if (event_compare(last_event, event))
fsnotify_get_event(last_event);
else
last_event = NULL;
spin_unlock(&event->lock);
return last_event;
}
static int inotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
struct fsnotify_event *event)
{
struct inotify_inode_mark *i_mark;
struct inode *to_tell;
struct inotify_event_private_data *event_priv;
struct fsnotify_event_private_data *fsn_event_priv;
int wd, ret;
struct fsnotify_event *added_event;
int wd, ret = 0;
BUG_ON(vfsmount_mark);
pr_debug("%s: group=%p event=%p to_tell=%p mask=%x\n", __func__, group,
event, event->to_tell, event->mask);
to_tell = event->to_tell;
spin_lock(&to_tell->i_lock);
entry = fsnotify_find_mark_entry(group, to_tell);
spin_unlock(&to_tell->i_lock);
/* race with watch removal? We already passes should_send */
if (unlikely(!entry))
return 0;
ientry = container_of(entry, struct inotify_inode_mark_entry,
fsn_entry);
wd = ientry->wd;
i_mark = container_of(inode_mark, struct inotify_inode_mark,
fsn_mark);
wd = i_mark->wd;
event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
if (unlikely(!event_priv))
......@@ -62,48 +121,40 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
fsn_event_priv->group = group;
event_priv->wd = wd;
ret = fsnotify_add_notify_event(group, event, fsn_event_priv);
if (ret) {
added_event = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge);
if (added_event) {
inotify_free_event_priv(fsn_event_priv);
/* EEXIST says we tail matched, EOVERFLOW isn't something
* to report up the stack. */
if ((ret == -EEXIST) ||
(ret == -EOVERFLOW))
ret = 0;
if (!IS_ERR(added_event))
fsnotify_put_event(added_event);
else
ret = PTR_ERR(added_event);
}
/*
* If we hold the entry until after the event is on the queue
* IN_IGNORED won't be able to pass this event in the queue
*/
fsnotify_put_mark(entry);
if (inode_mark->mask & IN_ONESHOT)
fsnotify_destroy_mark(inode_mark);
return ret;
}
static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group)
static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group)
{
inotify_ignored_and_remove_idr(entry, group);
inotify_ignored_and_remove_idr(fsn_mark, group);
}
static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask)
static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
__u32 mask, void *data, int data_type)
{
struct fsnotify_mark_entry *entry;
bool send;
spin_lock(&inode->i_lock);
entry = fsnotify_find_mark_entry(group, inode);
spin_unlock(&inode->i_lock);
if (!entry)
return false;
if ((inode_mark->mask & FS_EXCL_UNLINK) &&
(data_type == FSNOTIFY_EVENT_FILE)) {
struct file *file = data;
mask = (mask & ~FS_EVENT_ON_CHILD);
send = (entry->mask & mask);
/* find took a reference */
fsnotify_put_mark(entry);
if (d_unlinked(file->f_path.dentry))
return false;
}
return send;
return true;
}
/*
......@@ -115,18 +166,18 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
*/
static int idr_callback(int id, void *p, void *data)
{
struct fsnotify_mark_entry *entry;
struct inotify_inode_mark_entry *ientry;
struct fsnotify_mark *fsn_mark;
struct inotify_inode_mark *i_mark;
static bool warned = false;
if (warned)
return 0;
warned = true;
entry = p;
ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
fsn_mark = p;
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
WARN(1, "inotify closing but id=%d for entry=%p in group=%p still in "
WARN(1, "inotify closing but id=%d for fsn_mark=%p in group=%p still in "
"idr. Probably leaking memory\n", id, p, data);
/*
......@@ -135,9 +186,9 @@ static int idr_callback(int id, void *p, void *data)
* out why we got here and the panic is no worse than the original
* BUG() that was here.
*/
if (entry)
printk(KERN_WARNING "entry->group=%p inode=%p wd=%d\n",
entry->group, entry->inode, ientry->wd);
if (fsn_mark)
printk(KERN_WARNING "fsn_mark->group=%p inode=%p wd=%d\n",
fsn_mark->group, fsn_mark->i.inode, i_mark->wd);
return 0;
}
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -29,6 +29,7 @@
#include <linux/falloc.h>
#include <linux/fs_struct.h>
#include <linux/ima.h>
#include <linux/dnotify.h>
#include "internal.h"
......@@ -887,7 +888,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
put_unused_fd(fd);
fd = PTR_ERR(f);
} else {
fsnotify_open(f->f_path.dentry);
fsnotify_open(f);
fd_install(fd, f);
}
}
......
This diff is collapsed.
This diff is collapsed.
......@@ -210,6 +210,7 @@ unifdef-y += ethtool.h
unifdef-y += eventpoll.h
unifdef-y += signalfd.h
unifdef-y += ext2_fs.h
unifdef-y += fanotify.h
unifdef-y += fb.h
unifdef-y += fcntl.h
unifdef-y += filter.h
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment