Commit e375780b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'fsnotify_for_v5.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull fsnotify updates from Jan Kara:
 "The biggest part of this is support for fsnotify inode marks that
  don't pin inodes in memory but rather get evicted together with the
  inode (they are useful if userspace needs to exclude receipt of events
  from potentially large subtrees using fanotify ignore marks).

  There is also a fix for more consistent handling of events sent to
  parent and a fix of sparse(1) complaints"

* tag 'fsnotify_for_v5.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  fanotify: fix incorrect fmode_t casts
  fsnotify: consistent behavior for parent not watching children
  fsnotify: introduce mark type iterator
  fanotify: enable "evictable" inode marks
  fanotify: use fsnotify group lock helpers
  fanotify: implement "evictable" inode marks
  fanotify: factor out helper fanotify_mark_update_flags()
  fanotify: create helper fanotify_mark_user_flags()
  fsnotify: allow adding an inode mark without pinning inode
  dnotify: use fsnotify group lock helpers
  nfsd: use fsnotify group lock helpers
  audit: use fsnotify group lock helpers
  inotify: use fsnotify group lock helpers
  fsnotify: create helpers for group mark_mutex lock
  fsnotify: make allow_dups a property of the group
  fsnotify: pass flags argument to fsnotify_alloc_group()
  fsnotify: fix wrong lockdep annotations
  inotify: move control flags from mask to mark flags
  inotify: show inotify mask flags in proc fdinfo
parents 8b728edc dccd8557
...@@ -119,14 +119,14 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) ...@@ -119,14 +119,14 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf)
struct inode *inode = nf->nf_inode; struct inode *inode = nf->nf_inode;
do { do {
mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); fsnotify_group_lock(nfsd_file_fsnotify_group);
mark = fsnotify_find_mark(&inode->i_fsnotify_marks, mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
nfsd_file_fsnotify_group); nfsd_file_fsnotify_group);
if (mark) { if (mark) {
nfm = nfsd_file_mark_get(container_of(mark, nfm = nfsd_file_mark_get(container_of(mark,
struct nfsd_file_mark, struct nfsd_file_mark,
nfm_mark)); nfm_mark));
mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); fsnotify_group_unlock(nfsd_file_fsnotify_group);
if (nfm) { if (nfm) {
fsnotify_put_mark(mark); fsnotify_put_mark(mark);
break; break;
...@@ -134,8 +134,9 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) ...@@ -134,8 +134,9 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf)
/* Avoid soft lockup race with nfsd_file_mark_put() */ /* Avoid soft lockup race with nfsd_file_mark_put() */
fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group);
fsnotify_put_mark(mark); fsnotify_put_mark(mark);
} else } else {
mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); fsnotify_group_unlock(nfsd_file_fsnotify_group);
}
/* allocate a new nfm */ /* allocate a new nfm */
new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
...@@ -678,7 +679,8 @@ nfsd_file_cache_init(void) ...@@ -678,7 +679,8 @@ nfsd_file_cache_init(void)
goto out_shrinker; goto out_shrinker;
} }
nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops,
FSNOTIFY_GROUP_NOFS);
if (IS_ERR(nfsd_file_fsnotify_group)) { if (IS_ERR(nfsd_file_fsnotify_group)) {
pr_err("nfsd: unable to create fsnotify group: %ld\n", pr_err("nfsd: unable to create fsnotify group: %ld\n",
PTR_ERR(nfsd_file_fsnotify_group)); PTR_ERR(nfsd_file_fsnotify_group));
......
...@@ -168,7 +168,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) ...@@ -168,7 +168,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
return; return;
dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
mutex_lock(&dnotify_group->mark_mutex); fsnotify_group_lock(dnotify_group);
spin_lock(&fsn_mark->lock); spin_lock(&fsn_mark->lock);
prev = &dn_mark->dn; prev = &dn_mark->dn;
...@@ -191,7 +191,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) ...@@ -191,7 +191,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
free = true; free = true;
} }
mutex_unlock(&dnotify_group->mark_mutex); fsnotify_group_unlock(dnotify_group);
if (free) if (free)
fsnotify_free_mark(fsn_mark); fsnotify_free_mark(fsn_mark);
...@@ -324,7 +324,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) ...@@ -324,7 +324,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
new_dn_mark->dn = NULL; new_dn_mark->dn = NULL;
/* this is needed to prevent the fcntl/close race described below */ /* this is needed to prevent the fcntl/close race described below */
mutex_lock(&dnotify_group->mark_mutex); fsnotify_group_lock(dnotify_group);
/* add the new_fsn_mark or find an old one. */ /* add the new_fsn_mark or find an old one. */
fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group);
...@@ -334,7 +334,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) ...@@ -334,7 +334,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
} else { } else {
error = fsnotify_add_inode_mark_locked(new_fsn_mark, inode, 0); error = fsnotify_add_inode_mark_locked(new_fsn_mark, inode, 0);
if (error) { if (error) {
mutex_unlock(&dnotify_group->mark_mutex); fsnotify_group_unlock(dnotify_group);
goto out_err; goto out_err;
} }
spin_lock(&new_fsn_mark->lock); spin_lock(&new_fsn_mark->lock);
...@@ -383,7 +383,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) ...@@ -383,7 +383,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
if (destroy) if (destroy)
fsnotify_detach_mark(fsn_mark); fsnotify_detach_mark(fsn_mark);
mutex_unlock(&dnotify_group->mark_mutex); fsnotify_group_unlock(dnotify_group);
if (destroy) if (destroy)
fsnotify_free_mark(fsn_mark); fsnotify_free_mark(fsn_mark);
fsnotify_put_mark(fsn_mark); fsnotify_put_mark(fsn_mark);
...@@ -401,7 +401,8 @@ static int __init dnotify_init(void) ...@@ -401,7 +401,8 @@ static int __init dnotify_init(void)
SLAB_PANIC|SLAB_ACCOUNT); SLAB_PANIC|SLAB_ACCOUNT);
dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT);
dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops); dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops,
FSNOTIFY_GROUP_NOFS);
if (IS_ERR(dnotify_group)) if (IS_ERR(dnotify_group))
panic("unable to allocate fsnotify group for dnotify\n"); panic("unable to allocate fsnotify group for dnotify\n");
dnotify_sysctl_init(); dnotify_sysctl_init();
......
...@@ -319,12 +319,8 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, ...@@ -319,12 +319,8 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
return 0; return 0;
} }
fsnotify_foreach_iter_type(type) { fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
if (!fsnotify_iter_should_report_type(iter_info, type)) /* Apply ignore mask regardless of mark's ISDIR flag */
continue;
mark = iter_info->marks[type];
/* Apply ignore mask regardless of ISDIR and ON_CHILD flags */
marks_ignored_mask |= mark->ignored_mask; marks_ignored_mask |= mark->ignored_mask;
/* /*
...@@ -334,14 +330,6 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, ...@@ -334,14 +330,6 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
if (event_mask & FS_ISDIR && !(mark->mask & FS_ISDIR)) if (event_mask & FS_ISDIR && !(mark->mask & FS_ISDIR))
continue; continue;
/*
* If the event is on a child and this mark is on a parent not
* watching children, don't send it!
*/
if (type == FSNOTIFY_ITER_TYPE_PARENT &&
!(mark->mask & FS_EVENT_ON_CHILD))
continue;
marks_mask |= mark->mask; marks_mask |= mark->mask;
/* Record the mark types of this group that matched the event */ /* Record the mark types of this group that matched the event */
...@@ -849,16 +837,14 @@ static struct fanotify_event *fanotify_alloc_event( ...@@ -849,16 +837,14 @@ static struct fanotify_event *fanotify_alloc_event(
*/ */
static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
{ {
struct fsnotify_mark *mark;
int type; int type;
__kernel_fsid_t fsid = {}; __kernel_fsid_t fsid = {};
fsnotify_foreach_iter_type(type) { fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
struct fsnotify_mark_connector *conn; struct fsnotify_mark_connector *conn;
if (!fsnotify_iter_should_report_type(iter_info, type)) conn = READ_ONCE(mark->connector);
continue;
conn = READ_ONCE(iter_info->marks[type]->connector);
/* Mark is just getting destroyed or created? */ /* Mark is just getting destroyed or created? */
if (!conn) if (!conn)
continue; continue;
......
...@@ -490,3 +490,15 @@ static inline unsigned int fanotify_event_hash_bucket( ...@@ -490,3 +490,15 @@ static inline unsigned int fanotify_event_hash_bucket(
{ {
return event->hash & FANOTIFY_HTABLE_MASK; return event->hash & FANOTIFY_HTABLE_MASK;
} }
static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
{
unsigned int mflags = 0;
if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
if (mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)
mflags |= FAN_MARK_EVICTABLE;
return mflags;
}
...@@ -264,7 +264,7 @@ static int create_fd(struct fsnotify_group *group, struct path *path, ...@@ -264,7 +264,7 @@ static int create_fd(struct fsnotify_group *group, struct path *path,
* originally opened O_WRONLY. * originally opened O_WRONLY.
*/ */
new_file = dentry_open(path, new_file = dentry_open(path,
group->fanotify_data.f_flags | FMODE_NONOTIFY, group->fanotify_data.f_flags | __FMODE_NONOTIFY,
current_cred()); current_cred());
if (IS_ERR(new_file)) { if (IS_ERR(new_file)) {
/* /*
...@@ -1035,10 +1035,10 @@ static int fanotify_remove_mark(struct fsnotify_group *group, ...@@ -1035,10 +1035,10 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
__u32 removed; __u32 removed;
int destroy_mark; int destroy_mark;
mutex_lock(&group->mark_mutex); fsnotify_group_lock(group);
fsn_mark = fsnotify_find_mark(connp, group); fsn_mark = fsnotify_find_mark(connp, group);
if (!fsn_mark) { if (!fsn_mark) {
mutex_unlock(&group->mark_mutex); fsnotify_group_unlock(group);
return -ENOENT; return -ENOENT;
} }
...@@ -1048,7 +1048,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group, ...@@ -1048,7 +1048,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
fsnotify_recalc_mask(fsn_mark->connector); fsnotify_recalc_mask(fsn_mark->connector);
if (destroy_mark) if (destroy_mark)
fsnotify_detach_mark(fsn_mark); fsnotify_detach_mark(fsn_mark);
mutex_unlock(&group->mark_mutex); fsnotify_group_unlock(group);
if (destroy_mark) if (destroy_mark)
fsnotify_free_mark(fsn_mark); fsnotify_free_mark(fsn_mark);
...@@ -1081,47 +1081,63 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, ...@@ -1081,47 +1081,63 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
flags, umask); flags, umask);
} }
static void fanotify_mark_add_ignored_mask(struct fsnotify_mark *fsn_mark, static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark,
__u32 mask, unsigned int flags, unsigned int fan_flags)
__u32 *removed)
{ {
fsn_mark->ignored_mask |= mask; bool want_iref = !(fan_flags & FAN_MARK_EVICTABLE);
bool recalc = false;
/* /*
* Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to * Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to
* the removal of the FS_MODIFY bit in calculated mask if it was set * the removal of the FS_MODIFY bit in calculated mask if it was set
* because of an ignored mask that is now going to survive FS_MODIFY. * because of an ignored mask that is now going to survive FS_MODIFY.
*/ */
if ((flags & FAN_MARK_IGNORED_SURV_MODIFY) && if ((fan_flags & FAN_MARK_IGNORED_MASK) &&
(fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
!(fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) { !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) {
fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
if (!(fsn_mark->mask & FS_MODIFY)) if (!(fsn_mark->mask & FS_MODIFY))
*removed = FS_MODIFY; recalc = true;
} }
if (fsn_mark->connector->type != FSNOTIFY_OBJ_TYPE_INODE ||
want_iref == !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF))
return recalc;
/*
* NO_IREF may be removed from a mark, but not added.
* When removed, fsnotify_recalc_mask() will take the inode ref.
*/
WARN_ON_ONCE(!want_iref);
fsn_mark->flags &= ~FSNOTIFY_MARK_FLAG_NO_IREF;
return true;
} }
static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
__u32 mask, unsigned int flags, __u32 mask, unsigned int fan_flags)
__u32 *removed)
{ {
__u32 oldmask, newmask; bool recalc;
spin_lock(&fsn_mark->lock); spin_lock(&fsn_mark->lock);
oldmask = fsnotify_calc_mask(fsn_mark); if (!(fan_flags & FAN_MARK_IGNORED_MASK))
if (!(flags & FAN_MARK_IGNORED_MASK)) {
fsn_mark->mask |= mask; fsn_mark->mask |= mask;
} else { else
fanotify_mark_add_ignored_mask(fsn_mark, mask, flags, removed); fsn_mark->ignored_mask |= mask;
}
newmask = fsnotify_calc_mask(fsn_mark); recalc = fsnotify_calc_mask(fsn_mark) &
~fsnotify_conn_mask(fsn_mark->connector);
recalc |= fanotify_mark_update_flags(fsn_mark, fan_flags);
spin_unlock(&fsn_mark->lock); spin_unlock(&fsn_mark->lock);
return newmask & ~oldmask; return recalc;
} }
static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
fsnotify_connp_t *connp, fsnotify_connp_t *connp,
unsigned int obj_type, unsigned int obj_type,
unsigned int fan_flags,
__kernel_fsid_t *fsid) __kernel_fsid_t *fsid)
{ {
struct ucounts *ucounts = group->fanotify_data.ucounts; struct ucounts *ucounts = group->fanotify_data.ucounts;
...@@ -1144,6 +1160,9 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, ...@@ -1144,6 +1160,9 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
} }
fsnotify_init_mark(mark, group); fsnotify_init_mark(mark, group);
if (fan_flags & FAN_MARK_EVICTABLE)
mark->flags |= FSNOTIFY_MARK_FLAG_NO_IREF;
ret = fsnotify_add_mark_locked(mark, connp, obj_type, 0, fsid); ret = fsnotify_add_mark_locked(mark, connp, obj_type, 0, fsid);
if (ret) { if (ret) {
fsnotify_put_mark(mark); fsnotify_put_mark(mark);
...@@ -1170,39 +1189,49 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group) ...@@ -1170,39 +1189,49 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
static int fanotify_add_mark(struct fsnotify_group *group, static int fanotify_add_mark(struct fsnotify_group *group,
fsnotify_connp_t *connp, unsigned int obj_type, fsnotify_connp_t *connp, unsigned int obj_type,
__u32 mask, unsigned int flags, __u32 mask, unsigned int fan_flags,
__kernel_fsid_t *fsid) __kernel_fsid_t *fsid)
{ {
struct fsnotify_mark *fsn_mark; struct fsnotify_mark *fsn_mark;
__u32 added, removed = 0; bool recalc;
int ret = 0; int ret = 0;
mutex_lock(&group->mark_mutex); fsnotify_group_lock(group);
fsn_mark = fsnotify_find_mark(connp, group); fsn_mark = fsnotify_find_mark(connp, group);
if (!fsn_mark) { if (!fsn_mark) {
fsn_mark = fanotify_add_new_mark(group, connp, obj_type, fsid); fsn_mark = fanotify_add_new_mark(group, connp, obj_type,
fan_flags, fsid);
if (IS_ERR(fsn_mark)) { if (IS_ERR(fsn_mark)) {
mutex_unlock(&group->mark_mutex); fsnotify_group_unlock(group);
return PTR_ERR(fsn_mark); return PTR_ERR(fsn_mark);
} }
} }
/*
* Non evictable mark cannot be downgraded to evictable mark.
*/
if (fan_flags & FAN_MARK_EVICTABLE &&
!(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) {
ret = -EEXIST;
goto out;
}
/* /*
* Error events are pre-allocated per group, only if strictly * Error events are pre-allocated per group, only if strictly
* needed (i.e. FAN_FS_ERROR was requested). * needed (i.e. FAN_FS_ERROR was requested).
*/ */
if (!(flags & FAN_MARK_IGNORED_MASK) && (mask & FAN_FS_ERROR)) { if (!(fan_flags & FAN_MARK_IGNORED_MASK) && (mask & FAN_FS_ERROR)) {
ret = fanotify_group_init_error_pool(group); ret = fanotify_group_init_error_pool(group);
if (ret) if (ret)
goto out; goto out;
} }
added = fanotify_mark_add_to_mask(fsn_mark, mask, flags, &removed); recalc = fanotify_mark_add_to_mask(fsn_mark, mask, fan_flags);
if (removed || (added & ~fsnotify_conn_mask(fsn_mark->connector))) if (recalc)
fsnotify_recalc_mask(fsn_mark->connector); fsnotify_recalc_mask(fsn_mark->connector);
out: out:
mutex_unlock(&group->mark_mutex); fsnotify_group_unlock(group);
fsnotify_put_mark(fsn_mark); fsnotify_put_mark(fsn_mark);
return ret; return ret;
...@@ -1348,14 +1377,15 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) ...@@ -1348,14 +1377,15 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
(!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID))) (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID)))
return -EINVAL; return -EINVAL;
f_flags = O_RDWR | FMODE_NONOTIFY; f_flags = O_RDWR | __FMODE_NONOTIFY;
if (flags & FAN_CLOEXEC) if (flags & FAN_CLOEXEC)
f_flags |= O_CLOEXEC; f_flags |= O_CLOEXEC;
if (flags & FAN_NONBLOCK) if (flags & FAN_NONBLOCK)
f_flags |= O_NONBLOCK; f_flags |= O_NONBLOCK;
/* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
group = fsnotify_alloc_user_group(&fanotify_fsnotify_ops); group = fsnotify_alloc_group(&fanotify_fsnotify_ops,
FSNOTIFY_GROUP_USER | FSNOTIFY_GROUP_NOFS);
if (IS_ERR(group)) { if (IS_ERR(group)) {
return PTR_ERR(group); return PTR_ERR(group);
} }
...@@ -1597,6 +1627,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, ...@@ -1597,6 +1627,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
mark_type != FAN_MARK_FILESYSTEM) mark_type != FAN_MARK_FILESYSTEM)
goto fput_and_out; goto fput_and_out;
/*
* Evictable is only relevant for inode marks, because only inode object
* can be evicted on memory pressure.
*/
if (flags & FAN_MARK_EVICTABLE &&
mark_type != FAN_MARK_INODE)
goto fput_and_out;
/* /*
* Events that do not carry enough information to report * Events that do not carry enough information to report
* event->fd require a group that supports reporting fid. Those * event->fd require a group that supports reporting fid. Those
...@@ -1762,7 +1800,7 @@ static int __init fanotify_user_setup(void) ...@@ -1762,7 +1800,7 @@ static int __init fanotify_user_setup(void)
BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 10);
fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
SLAB_PANIC|SLAB_ACCOUNT); SLAB_PANIC|SLAB_ACCOUNT);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/exportfs.h> #include <linux/exportfs.h>
#include "inotify/inotify.h" #include "inotify/inotify.h"
#include "fanotify/fanotify.h"
#include "fdinfo.h" #include "fdinfo.h"
#include "fsnotify.h" #include "fsnotify.h"
...@@ -28,13 +29,13 @@ static void show_fdinfo(struct seq_file *m, struct file *f, ...@@ -28,13 +29,13 @@ static void show_fdinfo(struct seq_file *m, struct file *f,
struct fsnotify_group *group = f->private_data; struct fsnotify_group *group = f->private_data;
struct fsnotify_mark *mark; struct fsnotify_mark *mark;
mutex_lock(&group->mark_mutex); fsnotify_group_lock(group);
list_for_each_entry(mark, &group->marks_list, g_list) { list_for_each_entry(mark, &group->marks_list, g_list) {
show(m, mark); show(m, mark);
if (seq_has_overflowed(m)) if (seq_has_overflowed(m))
break; break;
} }
mutex_unlock(&group->mark_mutex); fsnotify_group_unlock(group);
} }
#if defined(CONFIG_EXPORTFS) #if defined(CONFIG_EXPORTFS)
...@@ -83,16 +84,9 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) ...@@ -83,16 +84,9 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark); inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
inode = igrab(fsnotify_conn_inode(mark->connector)); inode = igrab(fsnotify_conn_inode(mark->connector));
if (inode) { if (inode) {
/* seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:0 ",
* IN_ALL_EVENTS represents all of the mask bits
* that we expose to userspace. There is at
* least one bit (FS_EVENT_ON_CHILD) which is
* used only internally to the kernel.
*/
u32 mask = mark->mask & IN_ALL_EVENTS;
seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:%x ",
inode_mark->wd, inode->i_ino, inode->i_sb->s_dev, inode_mark->wd, inode->i_ino, inode->i_sb->s_dev,
mask, mark->ignored_mask); inotify_mark_user_mask(mark));
show_mark_fhandle(m, inode); show_mark_fhandle(m, inode);
seq_putc(m, '\n'); seq_putc(m, '\n');
iput(inode); iput(inode);
...@@ -110,12 +104,9 @@ void inotify_show_fdinfo(struct seq_file *m, struct file *f) ...@@ -110,12 +104,9 @@ void inotify_show_fdinfo(struct seq_file *m, struct file *f)
static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
{ {
unsigned int mflags = 0; unsigned int mflags = fanotify_mark_user_flags(mark);
struct inode *inode; struct inode *inode;
if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
if (mark->connector->type == FSNOTIFY_OBJ_TYPE_INODE) { if (mark->connector->type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = igrab(fsnotify_conn_inode(mark->connector)); inode = igrab(fsnotify_conn_inode(mark->connector));
if (!inode) if (!inode)
......
...@@ -253,7 +253,7 @@ static int fsnotify_handle_inode_event(struct fsnotify_group *group, ...@@ -253,7 +253,7 @@ static int fsnotify_handle_inode_event(struct fsnotify_group *group,
if (WARN_ON_ONCE(!inode && !dir)) if (WARN_ON_ONCE(!inode && !dir))
return 0; return 0;
if ((inode_mark->mask & FS_EXCL_UNLINK) && if ((inode_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK) &&
path && d_unlinked(path->dentry)) path && d_unlinked(path->dentry))
return 0; return 0;
...@@ -290,22 +290,15 @@ static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask, ...@@ -290,22 +290,15 @@ static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask,
} }
if (parent_mark) { if (parent_mark) {
/* ret = fsnotify_handle_inode_event(group, parent_mark, mask,
* parent_mark indicates that the parent inode is watching data, data_type, dir, name, 0);
* children and interested in this event, which is an event if (ret)
* possible on child. But is *this mark* watching children and return ret;
* interested in this event?
*/
if (parent_mark->mask & FS_EVENT_ON_CHILD) {
ret = fsnotify_handle_inode_event(group, parent_mark, mask,
data, data_type, dir, name, 0);
if (ret)
return ret;
}
if (!inode_mark)
return 0;
} }
if (!inode_mark)
return 0;
if (mask & FS_EVENT_ON_CHILD) { if (mask & FS_EVENT_ON_CHILD) {
/* /*
* Some events can be sent on both parent dir and child marks * Some events can be sent on both parent dir and child marks
...@@ -335,31 +328,23 @@ static int send_to_group(__u32 mask, const void *data, int data_type, ...@@ -335,31 +328,23 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
struct fsnotify_mark *mark; struct fsnotify_mark *mark;
int type; int type;
if (WARN_ON(!iter_info->report_mask)) if (!iter_info->report_mask)
return 0; return 0;
/* clear ignored on inode modification */ /* clear ignored on inode modification */
if (mask & FS_MODIFY) { if (mask & FS_MODIFY) {
fsnotify_foreach_iter_type(type) { fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
if (!fsnotify_iter_should_report_type(iter_info, type)) if (!(mark->flags &
continue; FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
mark = iter_info->marks[type];
if (mark &&
!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
mark->ignored_mask = 0; mark->ignored_mask = 0;
} }
} }
fsnotify_foreach_iter_type(type) { /* Are any of the group marks interested in this event? */
if (!fsnotify_iter_should_report_type(iter_info, type)) fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
continue; group = mark->group;
mark = iter_info->marks[type]; marks_mask |= mark->mask;
/* does the object mark tell us to do something? */ marks_ignored_mask |= mark->ignored_mask;
if (mark) {
group = mark->group;
marks_mask |= mark->mask;
marks_ignored_mask |= mark->ignored_mask;
}
} }
pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignored_mask=%x data=%p data_type=%d dir=%p cookie=%d\n", pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignored_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
...@@ -403,11 +388,11 @@ static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) ...@@ -403,11 +388,11 @@ static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark)
/* /*
* iter_info is a multi head priority queue of marks. * iter_info is a multi head priority queue of marks.
* Pick a subset of marks from queue heads, all with the * Pick a subset of marks from queue heads, all with the same group
* same group and set the report_mask for selected subset. * and set the report_mask to a subset of the selected marks.
* Returns the report_mask of the selected subset. * Returns false if there are no more groups to iterate.
*/ */
static unsigned int fsnotify_iter_select_report_types( static bool fsnotify_iter_select_report_types(
struct fsnotify_iter_info *iter_info) struct fsnotify_iter_info *iter_info)
{ {
struct fsnotify_group *max_prio_group = NULL; struct fsnotify_group *max_prio_group = NULL;
...@@ -423,30 +408,48 @@ static unsigned int fsnotify_iter_select_report_types( ...@@ -423,30 +408,48 @@ static unsigned int fsnotify_iter_select_report_types(
} }
if (!max_prio_group) if (!max_prio_group)
return 0; return false;
/* Set the report mask for marks from same group as max prio group */ /* Set the report mask for marks from same group as max prio group */
iter_info->current_group = max_prio_group;
iter_info->report_mask = 0; iter_info->report_mask = 0;
fsnotify_foreach_iter_type(type) { fsnotify_foreach_iter_type(type) {
mark = iter_info->marks[type]; mark = iter_info->marks[type];
if (mark && if (mark && mark->group == iter_info->current_group) {
fsnotify_compare_groups(max_prio_group, mark->group) == 0) /*
* FSNOTIFY_ITER_TYPE_PARENT indicates that this inode
* is watching children and interested in this event,
* which is an event possible on child.
* But is *this mark* watching children?
*/
if (type == FSNOTIFY_ITER_TYPE_PARENT &&
!(mark->mask & FS_EVENT_ON_CHILD))
continue;
fsnotify_iter_set_report_type(iter_info, type); fsnotify_iter_set_report_type(iter_info, type);
}
} }
return iter_info->report_mask; return true;
} }
/* /*
* Pop from iter_info multi head queue, the marks that were iterated in the * Pop from iter_info multi head queue, the marks that belong to the group of
* current iteration step. * current iteration step.
*/ */
static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info) static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info)
{ {
struct fsnotify_mark *mark;
int type; int type;
/*
* We cannot use fsnotify_foreach_iter_mark_type() here because we
* may need to advance a mark of type X that belongs to current_group
* but was not selected for reporting.
*/
fsnotify_foreach_iter_type(type) { fsnotify_foreach_iter_type(type) {
if (fsnotify_iter_should_report_type(iter_info, type)) mark = iter_info->marks[type];
if (mark && mark->group == iter_info->current_group)
iter_info->marks[type] = iter_info->marks[type] =
fsnotify_next_mark(iter_info->marks[type]); fsnotify_next_mark(iter_info->marks[type]);
} }
...@@ -581,7 +584,7 @@ static __init int fsnotify_init(void) ...@@ -581,7 +584,7 @@ static __init int fsnotify_init(void)
{ {
int ret; int ret;
BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25); BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23);
ret = init_srcu_struct(&fsnotify_mark_srcu); ret = init_srcu_struct(&fsnotify_mark_srcu);
if (ret) if (ret)
......
...@@ -112,8 +112,10 @@ void fsnotify_put_group(struct fsnotify_group *group) ...@@ -112,8 +112,10 @@ void fsnotify_put_group(struct fsnotify_group *group)
EXPORT_SYMBOL_GPL(fsnotify_put_group); EXPORT_SYMBOL_GPL(fsnotify_put_group);
static struct fsnotify_group *__fsnotify_alloc_group( static struct fsnotify_group *__fsnotify_alloc_group(
const struct fsnotify_ops *ops, gfp_t gfp) const struct fsnotify_ops *ops,
int flags, gfp_t gfp)
{ {
static struct lock_class_key nofs_marks_lock;
struct fsnotify_group *group; struct fsnotify_group *group;
group = kzalloc(sizeof(struct fsnotify_group), gfp); group = kzalloc(sizeof(struct fsnotify_group), gfp);
...@@ -133,6 +135,17 @@ static struct fsnotify_group *__fsnotify_alloc_group( ...@@ -133,6 +135,17 @@ static struct fsnotify_group *__fsnotify_alloc_group(
INIT_LIST_HEAD(&group->marks_list); INIT_LIST_HEAD(&group->marks_list);
group->ops = ops; group->ops = ops;
group->flags = flags;
/*
* For most backends, eviction of inode with a mark is not expected,
* because marks hold a refcount on the inode against eviction.
*
* Use a different lockdep class for groups that support evictable
* inode marks, because with evictable marks, mark_mutex is NOT
* fs-reclaim safe - the mutex is taken when evicting inodes.
*/
if (flags & FSNOTIFY_GROUP_NOFS)
lockdep_set_class(&group->mark_mutex, &nofs_marks_lock);
return group; return group;
} }
...@@ -140,20 +153,15 @@ static struct fsnotify_group *__fsnotify_alloc_group( ...@@ -140,20 +153,15 @@ static struct fsnotify_group *__fsnotify_alloc_group(
/* /*
* Create a new fsnotify_group and hold a reference for the group returned. * Create a new fsnotify_group and hold a reference for the group returned.
*/ */
struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops,
int flags)
{ {
return __fsnotify_alloc_group(ops, GFP_KERNEL); gfp_t gfp = (flags & FSNOTIFY_GROUP_USER) ? GFP_KERNEL_ACCOUNT :
} GFP_KERNEL;
EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
/* return __fsnotify_alloc_group(ops, flags, gfp);
* Create a new fsnotify_group and hold a reference for the group returned.
*/
struct fsnotify_group *fsnotify_alloc_user_group(const struct fsnotify_ops *ops)
{
return __fsnotify_alloc_group(ops, GFP_KERNEL_ACCOUNT);
} }
EXPORT_SYMBOL_GPL(fsnotify_alloc_user_group); EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
int fsnotify_fasync(int fd, struct file *file, int on) int fsnotify_fasync(int fd, struct file *file, int on)
{ {
......
...@@ -22,6 +22,25 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse) ...@@ -22,6 +22,25 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse)
return container_of(fse, struct inotify_event_info, fse); return container_of(fse, struct inotify_event_info, fse);
} }
/*
* INOTIFY_USER_FLAGS represents all of the mask bits that we expose to
* userspace. There is at least one bit (FS_EVENT_ON_CHILD) which is
* used only internally to the kernel.
*/
#define INOTIFY_USER_MASK (IN_ALL_EVENTS)
static inline __u32 inotify_mark_user_mask(struct fsnotify_mark *fsn_mark)
{
__u32 mask = fsn_mark->mask & INOTIFY_USER_MASK;
if (fsn_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK)
mask |= IN_EXCL_UNLINK;
if (fsn_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT)
mask |= IN_ONESHOT;
return mask;
}
extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group); struct fsnotify_group *group);
extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark,
......
...@@ -122,7 +122,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, ...@@ -122,7 +122,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
fsnotify_destroy_event(group, fsn_event); fsnotify_destroy_event(group, fsn_event);
} }
if (inode_mark->mask & IN_ONESHOT) if (inode_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT)
fsnotify_destroy_mark(inode_mark, group); fsnotify_destroy_mark(inode_mark, group);
return 0; return 0;
......
...@@ -110,11 +110,26 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg) ...@@ -110,11 +110,26 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg)
mask |= FS_EVENT_ON_CHILD; mask |= FS_EVENT_ON_CHILD;
/* mask off the flags used to open the fd */ /* mask off the flags used to open the fd */
mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK)); mask |= (arg & INOTIFY_USER_MASK);
return mask; return mask;
} }
#define INOTIFY_MARK_FLAGS \
(FSNOTIFY_MARK_FLAG_EXCL_UNLINK | FSNOTIFY_MARK_FLAG_IN_ONESHOT)
static inline unsigned int inotify_arg_to_flags(u32 arg)
{
unsigned int flags = 0;
if (arg & IN_EXCL_UNLINK)
flags |= FSNOTIFY_MARK_FLAG_EXCL_UNLINK;
if (arg & IN_ONESHOT)
flags |= FSNOTIFY_MARK_FLAG_IN_ONESHOT;
return flags;
}
static inline u32 inotify_mask_to_arg(__u32 mask) static inline u32 inotify_mask_to_arg(__u32 mask)
{ {
return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED | return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED |
...@@ -526,13 +541,10 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, ...@@ -526,13 +541,10 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
struct fsnotify_mark *fsn_mark; struct fsnotify_mark *fsn_mark;
struct inotify_inode_mark *i_mark; struct inotify_inode_mark *i_mark;
__u32 old_mask, new_mask; __u32 old_mask, new_mask;
__u32 mask; int replace = !(arg & IN_MASK_ADD);
int add = (arg & IN_MASK_ADD);
int create = (arg & IN_MASK_CREATE); int create = (arg & IN_MASK_CREATE);
int ret; int ret;
mask = inotify_arg_to_mask(inode, arg);
fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
if (!fsn_mark) if (!fsn_mark)
return -ENOENT; return -ENOENT;
...@@ -545,10 +557,12 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, ...@@ -545,10 +557,12 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
spin_lock(&fsn_mark->lock); spin_lock(&fsn_mark->lock);
old_mask = fsn_mark->mask; old_mask = fsn_mark->mask;
if (add) if (replace) {
fsn_mark->mask |= mask; fsn_mark->mask = 0;
else fsn_mark->flags &= ~INOTIFY_MARK_FLAGS;
fsn_mark->mask = mask; }
fsn_mark->mask |= inotify_arg_to_mask(inode, arg);
fsn_mark->flags |= inotify_arg_to_flags(arg);
new_mask = fsn_mark->mask; new_mask = fsn_mark->mask;
spin_unlock(&fsn_mark->lock); spin_unlock(&fsn_mark->lock);
...@@ -579,19 +593,17 @@ static int inotify_new_watch(struct fsnotify_group *group, ...@@ -579,19 +593,17 @@ static int inotify_new_watch(struct fsnotify_group *group,
u32 arg) u32 arg)
{ {
struct inotify_inode_mark *tmp_i_mark; struct inotify_inode_mark *tmp_i_mark;
__u32 mask;
int ret; int ret;
struct idr *idr = &group->inotify_data.idr; struct idr *idr = &group->inotify_data.idr;
spinlock_t *idr_lock = &group->inotify_data.idr_lock; spinlock_t *idr_lock = &group->inotify_data.idr_lock;
mask = inotify_arg_to_mask(inode, arg);
tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
if (unlikely(!tmp_i_mark)) if (unlikely(!tmp_i_mark))
return -ENOMEM; return -ENOMEM;
fsnotify_init_mark(&tmp_i_mark->fsn_mark, group); fsnotify_init_mark(&tmp_i_mark->fsn_mark, group);
tmp_i_mark->fsn_mark.mask = mask; tmp_i_mark->fsn_mark.mask = inotify_arg_to_mask(inode, arg);
tmp_i_mark->fsn_mark.flags = inotify_arg_to_flags(arg);
tmp_i_mark->wd = -1; tmp_i_mark->wd = -1;
ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark); ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark);
...@@ -628,13 +640,13 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod ...@@ -628,13 +640,13 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod
{ {
int ret = 0; int ret = 0;
mutex_lock(&group->mark_mutex); fsnotify_group_lock(group);
/* try to update and existing watch with the new arg */ /* try to update and existing watch with the new arg */
ret = inotify_update_existing_watch(group, inode, arg); ret = inotify_update_existing_watch(group, inode, arg);
/* no mark present, try to add a new one */ /* no mark present, try to add a new one */
if (ret == -ENOENT) if (ret == -ENOENT)
ret = inotify_new_watch(group, inode, arg); ret = inotify_new_watch(group, inode, arg);
mutex_unlock(&group->mark_mutex); fsnotify_group_unlock(group);
return ret; return ret;
} }
...@@ -644,7 +656,8 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) ...@@ -644,7 +656,8 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
struct fsnotify_group *group; struct fsnotify_group *group;
struct inotify_event_info *oevent; struct inotify_event_info *oevent;
group = fsnotify_alloc_user_group(&inotify_fsnotify_ops); group = fsnotify_alloc_group(&inotify_fsnotify_ops,
FSNOTIFY_GROUP_USER);
if (IS_ERR(group)) if (IS_ERR(group))
return group; return group;
...@@ -845,9 +858,7 @@ static int __init inotify_user_setup(void) ...@@ -845,9 +858,7 @@ static int __init inotify_user_setup(void)
BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT); BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22); BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22);
......
...@@ -116,20 +116,64 @@ __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn) ...@@ -116,20 +116,64 @@ __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn)
return *fsnotify_conn_mask_p(conn); return *fsnotify_conn_mask_p(conn);
} }
static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) static void fsnotify_get_inode_ref(struct inode *inode)
{
ihold(inode);
atomic_long_inc(&inode->i_sb->s_fsnotify_connectors);
}
/*
* Grab or drop inode reference for the connector if needed.
*
* When it's time to drop the reference, we only clear the HAS_IREF flag and
* return the inode object. fsnotify_drop_object() will be resonsible for doing
* iput() outside of spinlocks. This happens when last mark that wanted iref is
* detached.
*/
static struct inode *fsnotify_update_iref(struct fsnotify_mark_connector *conn,
bool want_iref)
{
bool has_iref = conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF;
struct inode *inode = NULL;
if (conn->type != FSNOTIFY_OBJ_TYPE_INODE ||
want_iref == has_iref)
return NULL;
if (want_iref) {
/* Pin inode if any mark wants inode refcount held */
fsnotify_get_inode_ref(fsnotify_conn_inode(conn));
conn->flags |= FSNOTIFY_CONN_FLAG_HAS_IREF;
} else {
/* Unpin inode after detach of last mark that wanted iref */
inode = fsnotify_conn_inode(conn);
conn->flags &= ~FSNOTIFY_CONN_FLAG_HAS_IREF;
}
return inode;
}
static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
{ {
u32 new_mask = 0; u32 new_mask = 0;
bool want_iref = false;
struct fsnotify_mark *mark; struct fsnotify_mark *mark;
assert_spin_locked(&conn->lock); assert_spin_locked(&conn->lock);
/* We can get detached connector here when inode is getting unlinked. */ /* We can get detached connector here when inode is getting unlinked. */
if (!fsnotify_valid_obj_type(conn->type)) if (!fsnotify_valid_obj_type(conn->type))
return; return NULL;
hlist_for_each_entry(mark, &conn->list, obj_list) { hlist_for_each_entry(mark, &conn->list, obj_list) {
if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED))
new_mask |= fsnotify_calc_mask(mark); continue;
new_mask |= fsnotify_calc_mask(mark);
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE &&
!(mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF))
want_iref = true;
} }
*fsnotify_conn_mask_p(conn) = new_mask; *fsnotify_conn_mask_p(conn) = new_mask;
return fsnotify_update_iref(conn, want_iref);
} }
/* /*
...@@ -169,12 +213,6 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) ...@@ -169,12 +213,6 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
} }
} }
static void fsnotify_get_inode_ref(struct inode *inode)
{
ihold(inode);
atomic_long_inc(&inode->i_sb->s_fsnotify_connectors);
}
static void fsnotify_put_inode_ref(struct inode *inode) static void fsnotify_put_inode_ref(struct inode *inode)
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
...@@ -213,6 +251,10 @@ static void *fsnotify_detach_connector_from_object( ...@@ -213,6 +251,10 @@ static void *fsnotify_detach_connector_from_object(
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = fsnotify_conn_inode(conn); inode = fsnotify_conn_inode(conn);
inode->i_fsnotify_mask = 0; inode->i_fsnotify_mask = 0;
/* Unpin inode when detaching from connector */
if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF))
inode = NULL;
} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) { } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
...@@ -274,7 +316,8 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) ...@@ -274,7 +316,8 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
objp = fsnotify_detach_connector_from_object(conn, &type); objp = fsnotify_detach_connector_from_object(conn, &type);
free_conn = true; free_conn = true;
} else { } else {
__fsnotify_recalc_mask(conn); objp = __fsnotify_recalc_mask(conn);
type = conn->type;
} }
WRITE_ONCE(mark->connector, NULL); WRITE_ONCE(mark->connector, NULL);
spin_unlock(&conn->lock); spin_unlock(&conn->lock);
...@@ -398,9 +441,7 @@ void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) ...@@ -398,9 +441,7 @@ void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
*/ */
void fsnotify_detach_mark(struct fsnotify_mark *mark) void fsnotify_detach_mark(struct fsnotify_mark *mark)
{ {
struct fsnotify_group *group = mark->group; fsnotify_group_assert_locked(mark->group);
WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) && WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) &&
refcount_read(&mark->refcnt) < 1 + refcount_read(&mark->refcnt) < 1 +
!!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED));
...@@ -452,9 +493,9 @@ void fsnotify_free_mark(struct fsnotify_mark *mark) ...@@ -452,9 +493,9 @@ void fsnotify_free_mark(struct fsnotify_mark *mark)
void fsnotify_destroy_mark(struct fsnotify_mark *mark, void fsnotify_destroy_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group) struct fsnotify_group *group)
{ {
mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); fsnotify_group_lock(group);
fsnotify_detach_mark(mark); fsnotify_detach_mark(mark);
mutex_unlock(&group->mark_mutex); fsnotify_group_unlock(group);
fsnotify_free_mark(mark); fsnotify_free_mark(mark);
} }
EXPORT_SYMBOL_GPL(fsnotify_destroy_mark); EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
...@@ -499,7 +540,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, ...@@ -499,7 +540,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
unsigned int obj_type, unsigned int obj_type,
__kernel_fsid_t *fsid) __kernel_fsid_t *fsid)
{ {
struct inode *inode = NULL;
struct fsnotify_mark_connector *conn; struct fsnotify_mark_connector *conn;
conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL); conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL);
...@@ -507,6 +547,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, ...@@ -507,6 +547,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
return -ENOMEM; return -ENOMEM;
spin_lock_init(&conn->lock); spin_lock_init(&conn->lock);
INIT_HLIST_HEAD(&conn->list); INIT_HLIST_HEAD(&conn->list);
conn->flags = 0;
conn->type = obj_type; conn->type = obj_type;
conn->obj = connp; conn->obj = connp;
/* Cache fsid of filesystem containing the object */ /* Cache fsid of filesystem containing the object */
...@@ -517,10 +558,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, ...@@ -517,10 +558,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
conn->fsid.val[0] = conn->fsid.val[1] = 0; conn->fsid.val[0] = conn->fsid.val[1] = 0;
conn->flags = 0; conn->flags = 0;
} }
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = fsnotify_conn_inode(conn);
fsnotify_get_inode_ref(inode);
}
fsnotify_get_sb_connectors(conn); fsnotify_get_sb_connectors(conn);
/* /*
...@@ -529,8 +566,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, ...@@ -529,8 +566,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
*/ */
if (cmpxchg(connp, NULL, conn)) { if (cmpxchg(connp, NULL, conn)) {
/* Someone else created list structure for us */ /* Someone else created list structure for us */
if (inode)
fsnotify_put_inode_ref(inode);
fsnotify_put_sb_connectors(conn); fsnotify_put_sb_connectors(conn);
kmem_cache_free(fsnotify_mark_connector_cachep, conn); kmem_cache_free(fsnotify_mark_connector_cachep, conn);
} }
...@@ -574,7 +609,7 @@ static struct fsnotify_mark_connector *fsnotify_grab_connector( ...@@ -574,7 +609,7 @@ static struct fsnotify_mark_connector *fsnotify_grab_connector(
static int fsnotify_add_mark_list(struct fsnotify_mark *mark, static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
fsnotify_connp_t *connp, fsnotify_connp_t *connp,
unsigned int obj_type, unsigned int obj_type,
int allow_dups, __kernel_fsid_t *fsid) int add_flags, __kernel_fsid_t *fsid)
{ {
struct fsnotify_mark *lmark, *last = NULL; struct fsnotify_mark *lmark, *last = NULL;
struct fsnotify_mark_connector *conn; struct fsnotify_mark_connector *conn;
...@@ -633,7 +668,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, ...@@ -633,7 +668,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
if ((lmark->group == mark->group) && if ((lmark->group == mark->group) &&
(lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) &&
!allow_dups) { !(mark->group->flags & FSNOTIFY_GROUP_DUPS)) {
err = -EEXIST; err = -EEXIST;
goto out_err; goto out_err;
} }
...@@ -668,12 +703,12 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, ...@@ -668,12 +703,12 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
*/ */
int fsnotify_add_mark_locked(struct fsnotify_mark *mark, int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
fsnotify_connp_t *connp, unsigned int obj_type, fsnotify_connp_t *connp, unsigned int obj_type,
int allow_dups, __kernel_fsid_t *fsid) int add_flags, __kernel_fsid_t *fsid)
{ {
struct fsnotify_group *group = mark->group; struct fsnotify_group *group = mark->group;
int ret = 0; int ret = 0;
BUG_ON(!mutex_is_locked(&group->mark_mutex)); fsnotify_group_assert_locked(group);
/* /*
* LOCKING ORDER!!!! * LOCKING ORDER!!!!
...@@ -688,12 +723,11 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, ...@@ -688,12 +723,11 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
fsnotify_get_mark(mark); /* for g_list */ fsnotify_get_mark(mark); /* for g_list */
spin_unlock(&mark->lock); spin_unlock(&mark->lock);
ret = fsnotify_add_mark_list(mark, connp, obj_type, allow_dups, fsid); ret = fsnotify_add_mark_list(mark, connp, obj_type, add_flags, fsid);
if (ret) if (ret)
goto err; goto err;
if (mark->mask || mark->ignored_mask) fsnotify_recalc_mask(mark->connector);
fsnotify_recalc_mask(mark->connector);
return ret; return ret;
err: err:
...@@ -708,15 +742,15 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, ...@@ -708,15 +742,15 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
} }
int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
unsigned int obj_type, int allow_dups, unsigned int obj_type, int add_flags,
__kernel_fsid_t *fsid) __kernel_fsid_t *fsid)
{ {
int ret; int ret;
struct fsnotify_group *group = mark->group; struct fsnotify_group *group = mark->group;
mutex_lock(&group->mark_mutex); fsnotify_group_lock(group);
ret = fsnotify_add_mark_locked(mark, connp, obj_type, allow_dups, fsid); ret = fsnotify_add_mark_locked(mark, connp, obj_type, add_flags, fsid);
mutex_unlock(&group->mark_mutex); fsnotify_group_unlock(group);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(fsnotify_add_mark); EXPORT_SYMBOL_GPL(fsnotify_add_mark);
...@@ -770,24 +804,24 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, ...@@ -770,24 +804,24 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
* move marks to free to to_free list in one go and then free marks in * move marks to free to to_free list in one go and then free marks in
* to_free list one by one. * to_free list one by one.
*/ */
mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); fsnotify_group_lock(group);
list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
if (mark->connector->type == obj_type) if (mark->connector->type == obj_type)
list_move(&mark->g_list, &to_free); list_move(&mark->g_list, &to_free);
} }
mutex_unlock(&group->mark_mutex); fsnotify_group_unlock(group);
clear: clear:
while (1) { while (1) {
mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); fsnotify_group_lock(group);
if (list_empty(head)) { if (list_empty(head)) {
mutex_unlock(&group->mark_mutex); fsnotify_group_unlock(group);
break; break;
} }
mark = list_first_entry(head, struct fsnotify_mark, g_list); mark = list_first_entry(head, struct fsnotify_mark, g_list);
fsnotify_get_mark(mark); fsnotify_get_mark(mark);
fsnotify_detach_mark(mark); fsnotify_detach_mark(mark);
mutex_unlock(&group->mark_mutex); fsnotify_group_unlock(group);
fsnotify_free_mark(mark); fsnotify_free_mark(mark);
fsnotify_put_mark(mark); fsnotify_put_mark(mark);
} }
......
...@@ -66,6 +66,7 @@ ...@@ -66,6 +66,7 @@
FAN_MARK_ONLYDIR | \ FAN_MARK_ONLYDIR | \
FAN_MARK_IGNORED_MASK | \ FAN_MARK_IGNORED_MASK | \
FAN_MARK_IGNORED_SURV_MODIFY | \ FAN_MARK_IGNORED_SURV_MODIFY | \
FAN_MARK_EVICTABLE | \
FAN_MARK_FLUSH) FAN_MARK_FLUSH)
/* /*
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <linux/user_namespace.h> #include <linux/user_namespace.h>
#include <linux/refcount.h> #include <linux/refcount.h>
#include <linux/mempool.h> #include <linux/mempool.h>
#include <linux/sched/mm.h>
/* /*
* IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
...@@ -55,7 +56,6 @@ ...@@ -55,7 +56,6 @@
#define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */
#define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */
#define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */
/* /*
* Set on inode mark that cares about things that happen to its children. * Set on inode mark that cares about things that happen to its children.
* Always set for dnotify and inotify. * Always set for dnotify and inotify.
...@@ -66,7 +66,6 @@ ...@@ -66,7 +66,6 @@
#define FS_RENAME 0x10000000 /* File was renamed */ #define FS_RENAME 0x10000000 /* File was renamed */
#define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ #define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */
#define FS_ISDIR 0x40000000 /* event occurred against dir */ #define FS_ISDIR 0x40000000 /* event occurred against dir */
#define FS_IN_ONESHOT 0x80000000 /* only send event once */
#define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO)
...@@ -106,8 +105,7 @@ ...@@ -106,8 +105,7 @@
FS_ERROR) FS_ERROR)
/* Extra flags that may be reported with event or control handling of events */ /* Extra flags that may be reported with event or control handling of events */
#define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \ #define ALL_FSNOTIFY_FLAGS (FS_ISDIR | FS_EVENT_ON_CHILD | FS_DN_MULTISHOT)
FS_DN_MULTISHOT | FS_EVENT_ON_CHILD)
#define ALL_FSNOTIFY_BITS (ALL_FSNOTIFY_EVENTS | ALL_FSNOTIFY_FLAGS) #define ALL_FSNOTIFY_BITS (ALL_FSNOTIFY_EVENTS | ALL_FSNOTIFY_FLAGS)
...@@ -213,6 +211,12 @@ struct fsnotify_group { ...@@ -213,6 +211,12 @@ struct fsnotify_group {
unsigned int priority; unsigned int priority;
bool shutdown; /* group is being shut down, don't queue more events */ bool shutdown; /* group is being shut down, don't queue more events */
#define FSNOTIFY_GROUP_USER 0x01 /* user allocated group */
#define FSNOTIFY_GROUP_DUPS 0x02 /* allow multiple marks per object */
#define FSNOTIFY_GROUP_NOFS 0x04 /* group lock is not direct reclaim safe */
int flags;
unsigned int owner_flags; /* stored flags of mark_mutex owner */
/* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
struct mutex mark_mutex; /* protect marks_list */ struct mutex mark_mutex; /* protect marks_list */
atomic_t user_waits; /* Number of tasks waiting for user atomic_t user_waits; /* Number of tasks waiting for user
...@@ -253,6 +257,31 @@ struct fsnotify_group { ...@@ -253,6 +257,31 @@ struct fsnotify_group {
}; };
}; };
/*
* These helpers are used to prevent deadlock when reclaiming inodes with
* evictable marks of the same group that is allocating a new mark.
*/
static inline void fsnotify_group_lock(struct fsnotify_group *group)
{
mutex_lock(&group->mark_mutex);
if (group->flags & FSNOTIFY_GROUP_NOFS)
group->owner_flags = memalloc_nofs_save();
}
static inline void fsnotify_group_unlock(struct fsnotify_group *group)
{
if (group->flags & FSNOTIFY_GROUP_NOFS)
memalloc_nofs_restore(group->owner_flags);
mutex_unlock(&group->mark_mutex);
}
static inline void fsnotify_group_assert_locked(struct fsnotify_group *group)
{
WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
if (group->flags & FSNOTIFY_GROUP_NOFS)
WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS));
}
/* When calling fsnotify tell it if the data is a path or inode */ /* When calling fsnotify tell it if the data is a path or inode */
enum fsnotify_data_type { enum fsnotify_data_type {
FSNOTIFY_EVENT_NONE, FSNOTIFY_EVENT_NONE,
...@@ -370,6 +399,7 @@ static inline bool fsnotify_valid_obj_type(unsigned int obj_type) ...@@ -370,6 +399,7 @@ static inline bool fsnotify_valid_obj_type(unsigned int obj_type)
struct fsnotify_iter_info { struct fsnotify_iter_info {
struct fsnotify_mark *marks[FSNOTIFY_ITER_TYPE_COUNT]; struct fsnotify_mark *marks[FSNOTIFY_ITER_TYPE_COUNT];
struct fsnotify_group *current_group;
unsigned int report_mask; unsigned int report_mask;
int srcu_idx; int srcu_idx;
}; };
...@@ -386,20 +416,31 @@ static inline void fsnotify_iter_set_report_type( ...@@ -386,20 +416,31 @@ static inline void fsnotify_iter_set_report_type(
iter_info->report_mask |= (1U << iter_type); iter_info->report_mask |= (1U << iter_type);
} }
static inline void fsnotify_iter_set_report_type_mark( static inline struct fsnotify_mark *fsnotify_iter_mark(
struct fsnotify_iter_info *iter_info, int iter_type, struct fsnotify_iter_info *iter_info, int iter_type)
struct fsnotify_mark *mark)
{ {
iter_info->marks[iter_type] = mark; if (fsnotify_iter_should_report_type(iter_info, iter_type))
iter_info->report_mask |= (1U << iter_type); return iter_info->marks[iter_type];
return NULL;
}
static inline int fsnotify_iter_step(struct fsnotify_iter_info *iter, int type,
struct fsnotify_mark **markp)
{
while (type < FSNOTIFY_ITER_TYPE_COUNT) {
*markp = fsnotify_iter_mark(iter, type);
if (*markp)
break;
type++;
}
return type;
} }
#define FSNOTIFY_ITER_FUNCS(name, NAME) \ #define FSNOTIFY_ITER_FUNCS(name, NAME) \
static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \
struct fsnotify_iter_info *iter_info) \ struct fsnotify_iter_info *iter_info) \
{ \ { \
return (iter_info->report_mask & (1U << FSNOTIFY_ITER_TYPE_##NAME)) ? \ return fsnotify_iter_mark(iter_info, FSNOTIFY_ITER_TYPE_##NAME); \
iter_info->marks[FSNOTIFY_ITER_TYPE_##NAME] : NULL; \
} }
FSNOTIFY_ITER_FUNCS(inode, INODE) FSNOTIFY_ITER_FUNCS(inode, INODE)
...@@ -409,6 +450,11 @@ FSNOTIFY_ITER_FUNCS(sb, SB) ...@@ -409,6 +450,11 @@ FSNOTIFY_ITER_FUNCS(sb, SB)
#define fsnotify_foreach_iter_type(type) \ #define fsnotify_foreach_iter_type(type) \
for (type = 0; type < FSNOTIFY_ITER_TYPE_COUNT; type++) for (type = 0; type < FSNOTIFY_ITER_TYPE_COUNT; type++)
#define fsnotify_foreach_iter_mark_type(iter, mark, type) \
for (type = 0; \
type = fsnotify_iter_step(iter, type, &mark), \
type < FSNOTIFY_ITER_TYPE_COUNT; \
type++)
/* /*
* fsnotify_connp_t is what we embed in objects which connector can be attached * fsnotify_connp_t is what we embed in objects which connector can be attached
...@@ -427,6 +473,7 @@ struct fsnotify_mark_connector { ...@@ -427,6 +473,7 @@ struct fsnotify_mark_connector {
spinlock_t lock; spinlock_t lock;
unsigned short type; /* Type of object [lock] */ unsigned short type; /* Type of object [lock] */
#define FSNOTIFY_CONN_FLAG_HAS_FSID 0x01 #define FSNOTIFY_CONN_FLAG_HAS_FSID 0x01
#define FSNOTIFY_CONN_FLAG_HAS_IREF 0x02
unsigned short flags; /* flags [lock] */ unsigned short flags; /* flags [lock] */
__kernel_fsid_t fsid; /* fsid of filesystem containing object */ __kernel_fsid_t fsid; /* fsid of filesystem containing object */
union { union {
...@@ -473,9 +520,15 @@ struct fsnotify_mark { ...@@ -473,9 +520,15 @@ struct fsnotify_mark {
struct fsnotify_mark_connector *connector; struct fsnotify_mark_connector *connector;
/* Events types to ignore [mark->lock, group->mark_mutex] */ /* Events types to ignore [mark->lock, group->mark_mutex] */
__u32 ignored_mask; __u32 ignored_mask;
#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x01 /* General fsnotify mark flags */
#define FSNOTIFY_MARK_FLAG_ALIVE 0x02 #define FSNOTIFY_MARK_FLAG_ALIVE 0x0001
#define FSNOTIFY_MARK_FLAG_ATTACHED 0x04 #define FSNOTIFY_MARK_FLAG_ATTACHED 0x0002
/* inotify mark flags */
#define FSNOTIFY_MARK_FLAG_EXCL_UNLINK 0x0010
#define FSNOTIFY_MARK_FLAG_IN_ONESHOT 0x0020
/* fanotify mark flags */
#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100
#define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200
unsigned int flags; /* flags [mark->lock] */ unsigned int flags; /* flags [mark->lock] */
}; };
...@@ -541,8 +594,9 @@ static inline void fsnotify_update_flags(struct dentry *dentry) ...@@ -541,8 +594,9 @@ static inline void fsnotify_update_flags(struct dentry *dentry)
/* called from fsnotify listeners, such as fanotify or dnotify */ /* called from fsnotify listeners, such as fanotify or dnotify */
/* create a new group */ /* create a new group */
extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops); extern struct fsnotify_group *fsnotify_alloc_group(
extern struct fsnotify_group *fsnotify_alloc_user_group(const struct fsnotify_ops *ops); const struct fsnotify_ops *ops,
int flags);
/* get reference to a group */ /* get reference to a group */
extern void fsnotify_get_group(struct fsnotify_group *group); extern void fsnotify_get_group(struct fsnotify_group *group);
/* drop reference on a group from fsnotify_alloc_group */ /* drop reference on a group from fsnotify_alloc_group */
...@@ -635,26 +689,26 @@ extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn, ...@@ -635,26 +689,26 @@ extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn,
/* attach the mark to the object */ /* attach the mark to the object */
extern int fsnotify_add_mark(struct fsnotify_mark *mark, extern int fsnotify_add_mark(struct fsnotify_mark *mark,
fsnotify_connp_t *connp, unsigned int obj_type, fsnotify_connp_t *connp, unsigned int obj_type,
int allow_dups, __kernel_fsid_t *fsid); int add_flags, __kernel_fsid_t *fsid);
extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
fsnotify_connp_t *connp, fsnotify_connp_t *connp,
unsigned int obj_type, int allow_dups, unsigned int obj_type, int add_flags,
__kernel_fsid_t *fsid); __kernel_fsid_t *fsid);
/* attach the mark to the inode */ /* attach the mark to the inode */
static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
struct inode *inode, struct inode *inode,
int allow_dups) int add_flags)
{ {
return fsnotify_add_mark(mark, &inode->i_fsnotify_marks, return fsnotify_add_mark(mark, &inode->i_fsnotify_marks,
FSNOTIFY_OBJ_TYPE_INODE, allow_dups, NULL); FSNOTIFY_OBJ_TYPE_INODE, add_flags, NULL);
} }
static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark,
struct inode *inode, struct inode *inode,
int allow_dups) int add_flags)
{ {
return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks, return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks,
FSNOTIFY_OBJ_TYPE_INODE, allow_dups, FSNOTIFY_OBJ_TYPE_INODE, add_flags,
NULL); NULL);
} }
......
...@@ -82,6 +82,7 @@ ...@@ -82,6 +82,7 @@
#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 #define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040
#define FAN_MARK_FLUSH 0x00000080 #define FAN_MARK_FLUSH 0x00000080
/* FAN_MARK_FILESYSTEM is 0x00000100 */ /* FAN_MARK_FILESYSTEM is 0x00000100 */
#define FAN_MARK_EVICTABLE 0x00000200
/* These are NOT bitwise flags. Both bits can be used togther. */ /* These are NOT bitwise flags. Both bits can be used togther. */
#define FAN_MARK_INODE 0x00000000 #define FAN_MARK_INODE 0x00000000
......
...@@ -100,7 +100,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa ...@@ -100,7 +100,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
audit_update_mark(audit_mark, dentry->d_inode); audit_update_mark(audit_mark, dentry->d_inode);
audit_mark->rule = krule; audit_mark->rule = krule;
ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, true); ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, 0);
if (ret < 0) { if (ret < 0) {
fsnotify_put_mark(&audit_mark->mark); fsnotify_put_mark(&audit_mark->mark);
audit_mark = ERR_PTR(ret); audit_mark = ERR_PTR(ret);
...@@ -181,7 +181,8 @@ static const struct fsnotify_ops audit_mark_fsnotify_ops = { ...@@ -181,7 +181,8 @@ static const struct fsnotify_ops audit_mark_fsnotify_ops = {
static int __init audit_fsnotify_init(void) static int __init audit_fsnotify_init(void)
{ {
audit_fsnotify_group = fsnotify_alloc_group(&audit_mark_fsnotify_ops); audit_fsnotify_group = fsnotify_alloc_group(&audit_mark_fsnotify_ops,
FSNOTIFY_GROUP_DUPS);
if (IS_ERR(audit_fsnotify_group)) { if (IS_ERR(audit_fsnotify_group)) {
audit_fsnotify_group = NULL; audit_fsnotify_group = NULL;
audit_panic("cannot create audit fsnotify group"); audit_panic("cannot create audit fsnotify group");
......
...@@ -351,7 +351,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *mark) ...@@ -351,7 +351,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *mark)
struct audit_chunk *new; struct audit_chunk *new;
int size; int size;
mutex_lock(&audit_tree_group->mark_mutex); fsnotify_group_lock(audit_tree_group);
/* /*
* mark_mutex stabilizes chunk attached to the mark so we can check * mark_mutex stabilizes chunk attached to the mark so we can check
* whether it didn't change while we've dropped hash_lock. * whether it didn't change while we've dropped hash_lock.
...@@ -368,7 +368,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *mark) ...@@ -368,7 +368,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *mark)
replace_mark_chunk(mark, NULL); replace_mark_chunk(mark, NULL);
spin_unlock(&hash_lock); spin_unlock(&hash_lock);
fsnotify_detach_mark(mark); fsnotify_detach_mark(mark);
mutex_unlock(&audit_tree_group->mark_mutex); fsnotify_group_unlock(audit_tree_group);
audit_mark_put_chunk(chunk); audit_mark_put_chunk(chunk);
fsnotify_free_mark(mark); fsnotify_free_mark(mark);
return; return;
...@@ -385,12 +385,12 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *mark) ...@@ -385,12 +385,12 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *mark)
*/ */
replace_chunk(new, chunk); replace_chunk(new, chunk);
spin_unlock(&hash_lock); spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex); fsnotify_group_unlock(audit_tree_group);
audit_mark_put_chunk(chunk); audit_mark_put_chunk(chunk);
return; return;
out_mutex: out_mutex:
mutex_unlock(&audit_tree_group->mark_mutex); fsnotify_group_unlock(audit_tree_group);
} }
/* Call with group->mark_mutex held, releases it */ /* Call with group->mark_mutex held, releases it */
...@@ -400,19 +400,19 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) ...@@ -400,19 +400,19 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
struct audit_chunk *chunk = alloc_chunk(1); struct audit_chunk *chunk = alloc_chunk(1);
if (!chunk) { if (!chunk) {
mutex_unlock(&audit_tree_group->mark_mutex); fsnotify_group_unlock(audit_tree_group);
return -ENOMEM; return -ENOMEM;
} }
mark = alloc_mark(); mark = alloc_mark();
if (!mark) { if (!mark) {
mutex_unlock(&audit_tree_group->mark_mutex); fsnotify_group_unlock(audit_tree_group);
kfree(chunk); kfree(chunk);
return -ENOMEM; return -ENOMEM;
} }
if (fsnotify_add_inode_mark_locked(mark, inode, 0)) { if (fsnotify_add_inode_mark_locked(mark, inode, 0)) {
mutex_unlock(&audit_tree_group->mark_mutex); fsnotify_group_unlock(audit_tree_group);
fsnotify_put_mark(mark); fsnotify_put_mark(mark);
kfree(chunk); kfree(chunk);
return -ENOSPC; return -ENOSPC;
...@@ -422,7 +422,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) ...@@ -422,7 +422,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
if (tree->goner) { if (tree->goner) {
spin_unlock(&hash_lock); spin_unlock(&hash_lock);
fsnotify_detach_mark(mark); fsnotify_detach_mark(mark);
mutex_unlock(&audit_tree_group->mark_mutex); fsnotify_group_unlock(audit_tree_group);
fsnotify_free_mark(mark); fsnotify_free_mark(mark);
fsnotify_put_mark(mark); fsnotify_put_mark(mark);
kfree(chunk); kfree(chunk);
...@@ -444,7 +444,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) ...@@ -444,7 +444,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
*/ */
insert_hash(chunk); insert_hash(chunk);
spin_unlock(&hash_lock); spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex); fsnotify_group_unlock(audit_tree_group);
/* /*
* Drop our initial reference. When mark we point to is getting freed, * Drop our initial reference. When mark we point to is getting freed,
* we get notification through ->freeing_mark callback and cleanup * we get notification through ->freeing_mark callback and cleanup
...@@ -462,7 +462,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) ...@@ -462,7 +462,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
struct audit_node *p; struct audit_node *p;
int n; int n;
mutex_lock(&audit_tree_group->mark_mutex); fsnotify_group_lock(audit_tree_group);
mark = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group); mark = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
if (!mark) if (!mark)
return create_chunk(inode, tree); return create_chunk(inode, tree);
...@@ -478,7 +478,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) ...@@ -478,7 +478,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
for (n = 0; n < old->count; n++) { for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) { if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock); spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex); fsnotify_group_unlock(audit_tree_group);
fsnotify_put_mark(mark); fsnotify_put_mark(mark);
return 0; return 0;
} }
...@@ -487,7 +487,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) ...@@ -487,7 +487,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
chunk = alloc_chunk(old->count + 1); chunk = alloc_chunk(old->count + 1);
if (!chunk) { if (!chunk) {
mutex_unlock(&audit_tree_group->mark_mutex); fsnotify_group_unlock(audit_tree_group);
fsnotify_put_mark(mark); fsnotify_put_mark(mark);
return -ENOMEM; return -ENOMEM;
} }
...@@ -495,7 +495,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) ...@@ -495,7 +495,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
spin_lock(&hash_lock); spin_lock(&hash_lock);
if (tree->goner) { if (tree->goner) {
spin_unlock(&hash_lock); spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex); fsnotify_group_unlock(audit_tree_group);
fsnotify_put_mark(mark); fsnotify_put_mark(mark);
kfree(chunk); kfree(chunk);
return 0; return 0;
...@@ -515,7 +515,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) ...@@ -515,7 +515,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
*/ */
replace_chunk(chunk, old); replace_chunk(chunk, old);
spin_unlock(&hash_lock); spin_unlock(&hash_lock);
mutex_unlock(&audit_tree_group->mark_mutex); fsnotify_group_unlock(audit_tree_group);
fsnotify_put_mark(mark); /* pair to fsnotify_find_mark */ fsnotify_put_mark(mark); /* pair to fsnotify_find_mark */
audit_mark_put_chunk(old); audit_mark_put_chunk(old);
...@@ -1044,12 +1044,12 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *mark, ...@@ -1044,12 +1044,12 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *mark,
{ {
struct audit_chunk *chunk; struct audit_chunk *chunk;
mutex_lock(&mark->group->mark_mutex); fsnotify_group_lock(mark->group);
spin_lock(&hash_lock); spin_lock(&hash_lock);
chunk = mark_chunk(mark); chunk = mark_chunk(mark);
replace_mark_chunk(mark, NULL); replace_mark_chunk(mark, NULL);
spin_unlock(&hash_lock); spin_unlock(&hash_lock);
mutex_unlock(&mark->group->mark_mutex); fsnotify_group_unlock(mark->group);
if (chunk) { if (chunk) {
evict_chunk(chunk); evict_chunk(chunk);
audit_mark_put_chunk(chunk); audit_mark_put_chunk(chunk);
...@@ -1074,7 +1074,7 @@ static int __init audit_tree_init(void) ...@@ -1074,7 +1074,7 @@ static int __init audit_tree_init(void)
audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC); audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC);
audit_tree_group = fsnotify_alloc_group(&audit_tree_ops); audit_tree_group = fsnotify_alloc_group(&audit_tree_ops, 0);
if (IS_ERR(audit_tree_group)) if (IS_ERR(audit_tree_group))
audit_panic("cannot initialize fsnotify group for rectree watches"); audit_panic("cannot initialize fsnotify group for rectree watches");
......
...@@ -493,7 +493,7 @@ static const struct fsnotify_ops audit_watch_fsnotify_ops = { ...@@ -493,7 +493,7 @@ static const struct fsnotify_ops audit_watch_fsnotify_ops = {
static int __init audit_watch_init(void) static int __init audit_watch_init(void)
{ {
audit_watch_group = fsnotify_alloc_group(&audit_watch_fsnotify_ops); audit_watch_group = fsnotify_alloc_group(&audit_watch_fsnotify_ops, 0);
if (IS_ERR(audit_watch_group)) { if (IS_ERR(audit_watch_group)) {
audit_watch_group = NULL; audit_watch_group = NULL;
audit_panic("cannot create audit fsnotify group"); audit_panic("cannot create audit fsnotify group");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment