Commit 3644286f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'fsnotify_for_v5.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull fsnotify updates from Jan Kara:

 - support for limited fanotify functionality for unpriviledged users

 - faster merging of fanotify events

 - a few smaller fsnotify improvements

* tag 'fsnotify_for_v5.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  shmem: allow reporting fanotify events with file handles on tmpfs
  fs: introduce a wrapper uuid_to_fsid()
  fanotify_user: use upper_32_bits() to verify mask
  fanotify: support limited functionality for unprivileged users
  fanotify: configurable limits via sysfs
  fanotify: limit number of event merge attempts
  fsnotify: use hash table for faster events merge
  fanotify: mix event info and pid into merge key hash
  fanotify: reduce event objectid to 29-bit hash
  fsnotify: allow fsnotify_{peek,remove}_first_event with empty queue
parents 767fcbc8 59cda49e
...@@ -1399,7 +1399,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) ...@@ -1399,7 +1399,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
struct super_block *sb = dentry->d_sb; struct super_block *sb = dentry->d_sb;
struct ext2_sb_info *sbi = EXT2_SB(sb); struct ext2_sb_info *sbi = EXT2_SB(sb);
struct ext2_super_block *es = sbi->s_es; struct ext2_super_block *es = sbi->s_es;
u64 fsid;
spin_lock(&sbi->s_lock); spin_lock(&sbi->s_lock);
...@@ -1453,9 +1452,7 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) ...@@ -1453,9 +1452,7 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
buf->f_ffree = ext2_count_free_inodes(sb); buf->f_ffree = ext2_count_free_inodes(sb);
es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
buf->f_namelen = EXT2_NAME_LEN; buf->f_namelen = EXT2_NAME_LEN;
fsid = le64_to_cpup((void *)es->s_uuid) ^ buf->f_fsid = uuid_to_fsid(es->s_uuid);
le64_to_cpup((void *)es->s_uuid + sizeof(u64));
buf->f_fsid = u64_to_fsid(fsid);
spin_unlock(&sbi->s_lock); spin_unlock(&sbi->s_lock);
return 0; return 0;
} }
......
...@@ -6153,7 +6153,6 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -6153,7 +6153,6 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es; struct ext4_super_block *es = sbi->s_es;
ext4_fsblk_t overhead = 0, resv_blocks; ext4_fsblk_t overhead = 0, resv_blocks;
u64 fsid;
s64 bfree; s64 bfree;
resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters)); resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
...@@ -6174,9 +6173,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -6174,9 +6173,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = le32_to_cpu(es->s_inodes_count); buf->f_files = le32_to_cpu(es->s_inodes_count);
buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
buf->f_namelen = EXT4_NAME_LEN; buf->f_namelen = EXT4_NAME_LEN;
fsid = le64_to_cpup((void *)es->s_uuid) ^ buf->f_fsid = uuid_to_fsid(es->s_uuid);
le64_to_cpup((void *)es->s_uuid + sizeof(u64));
buf->f_fsid = u64_to_fsid(fsid);
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) && if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/audit.h> #include <linux/audit.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/statfs.h> #include <linux/statfs.h>
#include <linux/stringhash.h>
#include "fanotify.h" #include "fanotify.h"
...@@ -22,12 +23,24 @@ static bool fanotify_path_equal(struct path *p1, struct path *p2) ...@@ -22,12 +23,24 @@ static bool fanotify_path_equal(struct path *p1, struct path *p2)
return p1->mnt == p2->mnt && p1->dentry == p2->dentry; return p1->mnt == p2->mnt && p1->dentry == p2->dentry;
} }
static unsigned int fanotify_hash_path(const struct path *path)
{
return hash_ptr(path->dentry, FANOTIFY_EVENT_HASH_BITS) ^
hash_ptr(path->mnt, FANOTIFY_EVENT_HASH_BITS);
}
static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1, static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1,
__kernel_fsid_t *fsid2) __kernel_fsid_t *fsid2)
{ {
return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1]; return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1];
} }
static unsigned int fanotify_hash_fsid(__kernel_fsid_t *fsid)
{
return hash_32(fsid->val[0], FANOTIFY_EVENT_HASH_BITS) ^
hash_32(fsid->val[1], FANOTIFY_EVENT_HASH_BITS);
}
static bool fanotify_fh_equal(struct fanotify_fh *fh1, static bool fanotify_fh_equal(struct fanotify_fh *fh1,
struct fanotify_fh *fh2) struct fanotify_fh *fh2)
{ {
...@@ -38,6 +51,16 @@ static bool fanotify_fh_equal(struct fanotify_fh *fh1, ...@@ -38,6 +51,16 @@ static bool fanotify_fh_equal(struct fanotify_fh *fh1,
!memcmp(fanotify_fh_buf(fh1), fanotify_fh_buf(fh2), fh1->len); !memcmp(fanotify_fh_buf(fh1), fanotify_fh_buf(fh2), fh1->len);
} }
static unsigned int fanotify_hash_fh(struct fanotify_fh *fh)
{
long salt = (long)fh->type | (long)fh->len << 8;
/*
* full_name_hash() works long by long, so it handles fh buf optimally.
*/
return full_name_hash((void *)salt, fanotify_fh_buf(fh), fh->len);
}
static bool fanotify_fid_event_equal(struct fanotify_fid_event *ffe1, static bool fanotify_fid_event_equal(struct fanotify_fid_event *ffe1,
struct fanotify_fid_event *ffe2) struct fanotify_fid_event *ffe2)
{ {
...@@ -88,16 +111,12 @@ static bool fanotify_name_event_equal(struct fanotify_name_event *fne1, ...@@ -88,16 +111,12 @@ static bool fanotify_name_event_equal(struct fanotify_name_event *fne1,
return fanotify_info_equal(info1, info2); return fanotify_info_equal(info1, info2);
} }
static bool fanotify_should_merge(struct fsnotify_event *old_fsn, static bool fanotify_should_merge(struct fanotify_event *old,
struct fsnotify_event *new_fsn) struct fanotify_event *new)
{ {
struct fanotify_event *old, *new; pr_debug("%s: old=%p new=%p\n", __func__, old, new);
pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn);
old = FANOTIFY_E(old_fsn);
new = FANOTIFY_E(new_fsn);
if (old_fsn->objectid != new_fsn->objectid || if (old->hash != new->hash ||
old->type != new->type || old->pid != new->pid) old->type != new->type || old->pid != new->pid)
return false; return false;
...@@ -129,14 +148,20 @@ static bool fanotify_should_merge(struct fsnotify_event *old_fsn, ...@@ -129,14 +148,20 @@ static bool fanotify_should_merge(struct fsnotify_event *old_fsn,
return false; return false;
} }
/* Limit event merges to limit CPU overhead per event */
#define FANOTIFY_MAX_MERGE_EVENTS 128
/* and the list better be locked by something too! */ /* and the list better be locked by something too! */
static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) static int fanotify_merge(struct fsnotify_group *group,
struct fsnotify_event *event)
{ {
struct fsnotify_event *test_event; struct fanotify_event *old, *new = FANOTIFY_E(event);
struct fanotify_event *new; unsigned int bucket = fanotify_event_hash_bucket(group, new);
struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket];
int i = 0;
pr_debug("%s: list=%p event=%p\n", __func__, list, event); pr_debug("%s: group=%p event=%p bucket=%u\n", __func__,
new = FANOTIFY_E(event); group, event, bucket);
/* /*
* Don't merge a permission event with any other event so that we know * Don't merge a permission event with any other event so that we know
...@@ -146,9 +171,11 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) ...@@ -146,9 +171,11 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
if (fanotify_is_perm_event(new->mask)) if (fanotify_is_perm_event(new->mask))
return 0; return 0;
list_for_each_entry_reverse(test_event, list, list) { hlist_for_each_entry(old, hlist, merge_list) {
if (fanotify_should_merge(test_event, event)) { if (++i > FANOTIFY_MAX_MERGE_EVENTS)
FANOTIFY_E(test_event)->mask |= new->mask; break;
if (fanotify_should_merge(old, new)) {
old->mask |= new->mask;
return 1; return 1;
} }
} }
...@@ -184,8 +211,11 @@ static int fanotify_get_response(struct fsnotify_group *group, ...@@ -184,8 +211,11 @@ static int fanotify_get_response(struct fsnotify_group *group,
return ret; return ret;
} }
/* Event not yet reported? Just remove it. */ /* Event not yet reported? Just remove it. */
if (event->state == FAN_EVENT_INIT) if (event->state == FAN_EVENT_INIT) {
fsnotify_remove_queued_event(group, &event->fae.fse); fsnotify_remove_queued_event(group, &event->fae.fse);
/* Permission events are not supposed to be hashed */
WARN_ON_ONCE(!hlist_unhashed(&event->fae.merge_list));
}
/* /*
* Event may be also answered in case signal delivery raced * Event may be also answered in case signal delivery raced
* with wakeup. In that case we have nothing to do besides * with wakeup. In that case we have nothing to do besides
...@@ -329,7 +359,8 @@ static int fanotify_encode_fh_len(struct inode *inode) ...@@ -329,7 +359,8 @@ static int fanotify_encode_fh_len(struct inode *inode)
* Return 0 on failure to encode. * Return 0 on failure to encode.
*/ */
static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
unsigned int fh_len, gfp_t gfp) unsigned int fh_len, unsigned int *hash,
gfp_t gfp)
{ {
int dwords, type = 0; int dwords, type = 0;
char *ext_buf = NULL; char *ext_buf = NULL;
...@@ -372,6 +403,9 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, ...@@ -372,6 +403,9 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
fh->type = type; fh->type = type;
fh->len = fh_len; fh->len = fh_len;
/* Mix fh into event merge key */
*hash ^= fanotify_hash_fh(fh);
return FANOTIFY_FH_HDR_LEN + fh_len; return FANOTIFY_FH_HDR_LEN + fh_len;
out_err: out_err:
...@@ -425,6 +459,7 @@ static struct inode *fanotify_dfid_inode(u32 event_mask, const void *data, ...@@ -425,6 +459,7 @@ static struct inode *fanotify_dfid_inode(u32 event_mask, const void *data,
} }
static struct fanotify_event *fanotify_alloc_path_event(const struct path *path, static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
unsigned int *hash,
gfp_t gfp) gfp_t gfp)
{ {
struct fanotify_path_event *pevent; struct fanotify_path_event *pevent;
...@@ -435,6 +470,7 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path, ...@@ -435,6 +470,7 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
pevent->fae.type = FANOTIFY_EVENT_TYPE_PATH; pevent->fae.type = FANOTIFY_EVENT_TYPE_PATH;
pevent->path = *path; pevent->path = *path;
*hash ^= fanotify_hash_path(path);
path_get(path); path_get(path);
return &pevent->fae; return &pevent->fae;
...@@ -460,6 +496,7 @@ static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path, ...@@ -460,6 +496,7 @@ static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path,
static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id, static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id,
__kernel_fsid_t *fsid, __kernel_fsid_t *fsid,
unsigned int *hash,
gfp_t gfp) gfp_t gfp)
{ {
struct fanotify_fid_event *ffe; struct fanotify_fid_event *ffe;
...@@ -470,16 +507,18 @@ static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id, ...@@ -470,16 +507,18 @@ static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id,
ffe->fae.type = FANOTIFY_EVENT_TYPE_FID; ffe->fae.type = FANOTIFY_EVENT_TYPE_FID;
ffe->fsid = *fsid; ffe->fsid = *fsid;
*hash ^= fanotify_hash_fsid(fsid);
fanotify_encode_fh(&ffe->object_fh, id, fanotify_encode_fh_len(id), fanotify_encode_fh(&ffe->object_fh, id, fanotify_encode_fh_len(id),
gfp); hash, gfp);
return &ffe->fae; return &ffe->fae;
} }
static struct fanotify_event *fanotify_alloc_name_event(struct inode *id, static struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
__kernel_fsid_t *fsid, __kernel_fsid_t *fsid,
const struct qstr *file_name, const struct qstr *name,
struct inode *child, struct inode *child,
unsigned int *hash,
gfp_t gfp) gfp_t gfp)
{ {
struct fanotify_name_event *fne; struct fanotify_name_event *fne;
...@@ -492,24 +531,30 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id, ...@@ -492,24 +531,30 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
size = sizeof(*fne) + FANOTIFY_FH_HDR_LEN + dir_fh_len; size = sizeof(*fne) + FANOTIFY_FH_HDR_LEN + dir_fh_len;
if (child_fh_len) if (child_fh_len)
size += FANOTIFY_FH_HDR_LEN + child_fh_len; size += FANOTIFY_FH_HDR_LEN + child_fh_len;
if (file_name) if (name)
size += file_name->len + 1; size += name->len + 1;
fne = kmalloc(size, gfp); fne = kmalloc(size, gfp);
if (!fne) if (!fne)
return NULL; return NULL;
fne->fae.type = FANOTIFY_EVENT_TYPE_FID_NAME; fne->fae.type = FANOTIFY_EVENT_TYPE_FID_NAME;
fne->fsid = *fsid; fne->fsid = *fsid;
*hash ^= fanotify_hash_fsid(fsid);
info = &fne->info; info = &fne->info;
fanotify_info_init(info); fanotify_info_init(info);
dfh = fanotify_info_dir_fh(info); dfh = fanotify_info_dir_fh(info);
info->dir_fh_totlen = fanotify_encode_fh(dfh, id, dir_fh_len, 0); info->dir_fh_totlen = fanotify_encode_fh(dfh, id, dir_fh_len, hash, 0);
if (child_fh_len) { if (child_fh_len) {
ffh = fanotify_info_file_fh(info); ffh = fanotify_info_file_fh(info);
info->file_fh_totlen = fanotify_encode_fh(ffh, child, child_fh_len, 0); info->file_fh_totlen = fanotify_encode_fh(ffh, child,
child_fh_len, hash, 0);
}
if (name) {
long salt = name->len;
fanotify_info_copy_name(info, name);
*hash ^= full_name_hash((void *)salt, name->name, name->len);
} }
if (file_name)
fanotify_info_copy_name(info, file_name);
pr_debug("%s: ino=%lu size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n", pr_debug("%s: ino=%lu size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n",
__func__, id->i_ino, size, dir_fh_len, child_fh_len, __func__, id->i_ino, size, dir_fh_len, child_fh_len,
...@@ -533,6 +578,9 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, ...@@ -533,6 +578,9 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
struct mem_cgroup *old_memcg; struct mem_cgroup *old_memcg;
struct inode *child = NULL; struct inode *child = NULL;
bool name_event = false; bool name_event = false;
unsigned int hash = 0;
bool ondir = mask & FAN_ONDIR;
struct pid *pid;
if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) { if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) {
/* /*
...@@ -540,8 +588,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, ...@@ -540,8 +588,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
* report the child fid for events reported on a non-dir child * report the child fid for events reported on a non-dir child
* in addition to reporting the parent fid and maybe child name. * in addition to reporting the parent fid and maybe child name.
*/ */
if ((fid_mode & FAN_REPORT_FID) && if ((fid_mode & FAN_REPORT_FID) && id != dirid && !ondir)
id != dirid && !(mask & FAN_ONDIR))
child = id; child = id;
id = dirid; id = dirid;
...@@ -562,8 +609,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, ...@@ -562,8 +609,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
if (!(fid_mode & FAN_REPORT_NAME)) { if (!(fid_mode & FAN_REPORT_NAME)) {
name_event = !!child; name_event = !!child;
file_name = NULL; file_name = NULL;
} else if ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) || } else if ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) || !ondir) {
!(mask & FAN_ONDIR)) {
name_event = true; name_event = true;
} }
} }
...@@ -586,26 +632,25 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, ...@@ -586,26 +632,25 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
event = fanotify_alloc_perm_event(path, gfp); event = fanotify_alloc_perm_event(path, gfp);
} else if (name_event && (file_name || child)) { } else if (name_event && (file_name || child)) {
event = fanotify_alloc_name_event(id, fsid, file_name, child, event = fanotify_alloc_name_event(id, fsid, file_name, child,
gfp); &hash, gfp);
} else if (fid_mode) { } else if (fid_mode) {
event = fanotify_alloc_fid_event(id, fsid, gfp); event = fanotify_alloc_fid_event(id, fsid, &hash, gfp);
} else { } else {
event = fanotify_alloc_path_event(path, gfp); event = fanotify_alloc_path_event(path, &hash, gfp);
} }
if (!event) if (!event)
goto out; goto out;
/*
* Use the victim inode instead of the watching inode as the id for
* event queue, so event reported on parent is merged with event
* reported on child when both directory and child watches exist.
*/
fanotify_init_event(event, (unsigned long)id, mask);
if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
event->pid = get_pid(task_pid(current)); pid = get_pid(task_pid(current));
else else
event->pid = get_pid(task_tgid(current)); pid = get_pid(task_tgid(current));
/* Mix event info, FAN_ONDIR flag and pid into event merge key */
hash ^= hash_long((unsigned long)pid | ondir, FANOTIFY_EVENT_HASH_BITS);
fanotify_init_event(event, hash, mask);
event->pid = pid;
out: out:
set_active_memcg(old_memcg); set_active_memcg(old_memcg);
...@@ -645,6 +690,24 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) ...@@ -645,6 +690,24 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
return fsid; return fsid;
} }
/*
* Add an event to hash table for faster merge.
*/
static void fanotify_insert_event(struct fsnotify_group *group,
struct fsnotify_event *fsn_event)
{
struct fanotify_event *event = FANOTIFY_E(fsn_event);
unsigned int bucket = fanotify_event_hash_bucket(group, event);
struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket];
assert_spin_locked(&group->notification_lock);
pr_debug("%s: group=%p event=%p bucket=%u\n", __func__,
group, event, bucket);
hlist_add_head(&event->merge_list, hlist);
}
static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
const void *data, int data_type, const void *data, int data_type,
struct inode *dir, struct inode *dir,
...@@ -715,7 +778,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, ...@@ -715,7 +778,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
} }
fsn_event = &event->fse; fsn_event = &event->fse;
ret = fsnotify_add_event(group, fsn_event, fanotify_merge); ret = fsnotify_add_event(group, fsn_event, fanotify_merge,
fanotify_is_hashed_event(mask) ?
fanotify_insert_event : NULL);
if (ret) { if (ret) {
/* Permission events shouldn't be merged */ /* Permission events shouldn't be merged */
BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS); BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
...@@ -736,11 +801,10 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, ...@@ -736,11 +801,10 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
static void fanotify_free_group_priv(struct fsnotify_group *group) static void fanotify_free_group_priv(struct fsnotify_group *group)
{ {
struct user_struct *user; kfree(group->fanotify_data.merge_hash);
if (group->fanotify_data.ucounts)
user = group->fanotify_data.user; dec_ucount(group->fanotify_data.ucounts,
atomic_dec(&user->fanotify_listeners); UCOUNT_FANOTIFY_GROUPS);
free_uid(user);
} }
static void fanotify_free_path_event(struct fanotify_event *event) static void fanotify_free_path_event(struct fanotify_event *event)
...@@ -796,6 +860,13 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) ...@@ -796,6 +860,13 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event)
} }
} }
static void fanotify_freeing_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group)
{
if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS))
dec_ucount(group->fanotify_data.ucounts, UCOUNT_FANOTIFY_MARKS);
}
static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
{ {
kmem_cache_free(fanotify_mark_cache, fsn_mark); kmem_cache_free(fanotify_mark_cache, fsn_mark);
...@@ -805,5 +876,6 @@ const struct fsnotify_ops fanotify_fsnotify_ops = { ...@@ -805,5 +876,6 @@ const struct fsnotify_ops fanotify_fsnotify_ops = {
.handle_event = fanotify_handle_event, .handle_event = fanotify_handle_event,
.free_group_priv = fanotify_free_group_priv, .free_group_priv = fanotify_free_group_priv,
.free_event = fanotify_free_event, .free_event = fanotify_free_event,
.freeing_mark = fanotify_freeing_mark,
.free_mark = fanotify_free_mark, .free_mark = fanotify_free_mark,
}; };
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <linux/path.h> #include <linux/path.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/exportfs.h> #include <linux/exportfs.h>
#include <linux/hashtable.h>
extern struct kmem_cache *fanotify_mark_cache; extern struct kmem_cache *fanotify_mark_cache;
extern struct kmem_cache *fanotify_fid_event_cachep; extern struct kmem_cache *fanotify_fid_event_cachep;
...@@ -115,6 +116,11 @@ static inline void fanotify_info_init(struct fanotify_info *info) ...@@ -115,6 +116,11 @@ static inline void fanotify_info_init(struct fanotify_info *info)
info->name_len = 0; info->name_len = 0;
} }
static inline unsigned int fanotify_info_len(struct fanotify_info *info)
{
return info->dir_fh_totlen + info->file_fh_totlen + info->name_len;
}
static inline void fanotify_info_copy_name(struct fanotify_info *info, static inline void fanotify_info_copy_name(struct fanotify_info *info,
const struct qstr *name) const struct qstr *name)
{ {
...@@ -135,19 +141,31 @@ enum fanotify_event_type { ...@@ -135,19 +141,31 @@ enum fanotify_event_type {
FANOTIFY_EVENT_TYPE_PATH, FANOTIFY_EVENT_TYPE_PATH,
FANOTIFY_EVENT_TYPE_PATH_PERM, FANOTIFY_EVENT_TYPE_PATH_PERM,
FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */ FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
__FANOTIFY_EVENT_TYPE_NUM
}; };
#define FANOTIFY_EVENT_TYPE_BITS \
(ilog2(__FANOTIFY_EVENT_TYPE_NUM - 1) + 1)
#define FANOTIFY_EVENT_HASH_BITS \
(32 - FANOTIFY_EVENT_TYPE_BITS)
struct fanotify_event { struct fanotify_event {
struct fsnotify_event fse; struct fsnotify_event fse;
struct hlist_node merge_list; /* List for hashed merge */
u32 mask; u32 mask;
enum fanotify_event_type type; struct {
unsigned int type : FANOTIFY_EVENT_TYPE_BITS;
unsigned int hash : FANOTIFY_EVENT_HASH_BITS;
};
struct pid *pid; struct pid *pid;
}; };
static inline void fanotify_init_event(struct fanotify_event *event, static inline void fanotify_init_event(struct fanotify_event *event,
unsigned long id, u32 mask) unsigned int hash, u32 mask)
{ {
fsnotify_init_event(&event->fse, id); fsnotify_init_event(&event->fse);
INIT_HLIST_NODE(&event->merge_list);
event->hash = hash;
event->mask = mask; event->mask = mask;
event->pid = NULL; event->pid = NULL;
} }
...@@ -284,3 +302,25 @@ static inline struct path *fanotify_event_path(struct fanotify_event *event) ...@@ -284,3 +302,25 @@ static inline struct path *fanotify_event_path(struct fanotify_event *event)
else else
return NULL; return NULL;
} }
/*
* Use 128 size hash table to speed up events merge.
*/
#define FANOTIFY_HTABLE_BITS (7)
#define FANOTIFY_HTABLE_SIZE (1 << FANOTIFY_HTABLE_BITS)
#define FANOTIFY_HTABLE_MASK (FANOTIFY_HTABLE_SIZE - 1)
/*
* Permission events and overflow event do not get merged - don't hash them.
*/
static inline bool fanotify_is_hashed_event(u32 mask)
{
return !fanotify_is_perm_event(mask) && !(mask & FS_Q_OVERFLOW);
}
static inline unsigned int fanotify_event_hash_bucket(
struct fsnotify_group *group,
struct fanotify_event *event)
{
return event->hash & FANOTIFY_HTABLE_MASK;
}
...@@ -27,8 +27,61 @@ ...@@ -27,8 +27,61 @@
#include "fanotify.h" #include "fanotify.h"
#define FANOTIFY_DEFAULT_MAX_EVENTS 16384 #define FANOTIFY_DEFAULT_MAX_EVENTS 16384
#define FANOTIFY_DEFAULT_MAX_MARKS 8192 #define FANOTIFY_OLD_DEFAULT_MAX_MARKS 8192
#define FANOTIFY_DEFAULT_MAX_LISTENERS 128 #define FANOTIFY_DEFAULT_MAX_GROUPS 128
/*
* Legacy fanotify marks limits (8192) is per group and we introduced a tunable
* limit of marks per user, similar to inotify. Effectively, the legacy limit
* of fanotify marks per user is <max marks per group> * <max groups per user>.
* This default limit (1M) also happens to match the increased limit of inotify
* max_user_watches since v5.10.
*/
#define FANOTIFY_DEFAULT_MAX_USER_MARKS \
(FANOTIFY_OLD_DEFAULT_MAX_MARKS * FANOTIFY_DEFAULT_MAX_GROUPS)
/*
* Most of the memory cost of adding an inode mark is pinning the marked inode.
* The size of the filesystem inode struct is not uniform across filesystems,
* so double the size of a VFS inode is used as a conservative approximation.
*/
#define INODE_MARK_COST (2 * sizeof(struct inode))
/* configurable via /proc/sys/fs/fanotify/ */
static int fanotify_max_queued_events __read_mostly;
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
struct ctl_table fanotify_table[] = {
{
.procname = "max_user_groups",
.data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS],
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
{
.procname = "max_user_marks",
.data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS],
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
{
.procname = "max_queued_events",
.data = &fanotify_max_queued_events,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO
},
{ }
};
#endif /* CONFIG_SYSCTL */
/* /*
* All flags that may be specified in parameter event_f_flags of fanotify_init. * All flags that may be specified in parameter event_f_flags of fanotify_init.
...@@ -89,6 +142,23 @@ static int fanotify_event_info_len(unsigned int fid_mode, ...@@ -89,6 +142,23 @@ static int fanotify_event_info_len(unsigned int fid_mode,
return info_len; return info_len;
} }
/*
* Remove an hashed event from merge hash table.
*/
static void fanotify_unhash_event(struct fsnotify_group *group,
struct fanotify_event *event)
{
assert_spin_locked(&group->notification_lock);
pr_debug("%s: group=%p event=%p bucket=%u\n", __func__,
group, event, fanotify_event_hash_bucket(group, event));
if (WARN_ON_ONCE(hlist_unhashed(&event->merge_list)))
return;
hlist_del_init(&event->merge_list);
}
/* /*
* Get an fanotify notification event if one exists and is small * Get an fanotify notification event if one exists and is small
* enough to fit in "count". Return an error pointer if the count * enough to fit in "count". Return an error pointer if the count
...@@ -100,26 +170,34 @@ static struct fanotify_event *get_one_event(struct fsnotify_group *group, ...@@ -100,26 +170,34 @@ static struct fanotify_event *get_one_event(struct fsnotify_group *group,
{ {
size_t event_size = FAN_EVENT_METADATA_LEN; size_t event_size = FAN_EVENT_METADATA_LEN;
struct fanotify_event *event = NULL; struct fanotify_event *event = NULL;
struct fsnotify_event *fsn_event;
unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
pr_debug("%s: group=%p count=%zd\n", __func__, group, count); pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
spin_lock(&group->notification_lock); spin_lock(&group->notification_lock);
if (fsnotify_notify_queue_is_empty(group)) fsn_event = fsnotify_peek_first_event(group);
if (!fsn_event)
goto out; goto out;
if (fid_mode) { event = FANOTIFY_E(fsn_event);
event_size += fanotify_event_info_len(fid_mode, if (fid_mode)
FANOTIFY_E(fsnotify_peek_first_event(group))); event_size += fanotify_event_info_len(fid_mode, event);
}
if (event_size > count) { if (event_size > count) {
event = ERR_PTR(-EINVAL); event = ERR_PTR(-EINVAL);
goto out; goto out;
} }
event = FANOTIFY_E(fsnotify_remove_first_event(group));
/*
* Held the notification_lock the whole time, so this is the
* same event we peeked above.
*/
fsnotify_remove_first_event(group);
if (fanotify_is_perm_event(event->mask)) if (fanotify_is_perm_event(event->mask))
FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED; FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED;
if (fanotify_is_hashed_event(event->mask))
fanotify_unhash_event(group, event);
out: out:
spin_unlock(&group->notification_lock); spin_unlock(&group->notification_lock);
return event; return event;
...@@ -341,6 +419,14 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, ...@@ -341,6 +419,14 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
metadata.reserved = 0; metadata.reserved = 0;
metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS;
metadata.pid = pid_vnr(event->pid); metadata.pid = pid_vnr(event->pid);
/*
* For an unprivileged listener, event->pid can be used to identify the
* events generated by the listener process itself, without disclosing
* the pids of other processes.
*/
if (!capable(CAP_SYS_ADMIN) &&
task_tgid(current) != event->pid)
metadata.pid = 0;
if (path && path->mnt && path->dentry) { if (path && path->mnt && path->dentry) {
fd = create_fd(group, path, &f); fd = create_fd(group, path, &f);
...@@ -573,6 +659,7 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t ...@@ -573,6 +659,7 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t
static int fanotify_release(struct inode *ignored, struct file *file) static int fanotify_release(struct inode *ignored, struct file *file)
{ {
struct fsnotify_group *group = file->private_data; struct fsnotify_group *group = file->private_data;
struct fsnotify_event *fsn_event;
/* /*
* Stop new events from arriving in the notification queue. since * Stop new events from arriving in the notification queue. since
...@@ -601,13 +688,12 @@ static int fanotify_release(struct inode *ignored, struct file *file) ...@@ -601,13 +688,12 @@ static int fanotify_release(struct inode *ignored, struct file *file)
* dequeue them and set the response. They will be freed once the * dequeue them and set the response. They will be freed once the
* response is consumed and fanotify_get_response() returns. * response is consumed and fanotify_get_response() returns.
*/ */
while (!fsnotify_notify_queue_is_empty(group)) { while ((fsn_event = fsnotify_remove_first_event(group))) {
struct fanotify_event *event; struct fanotify_event *event = FANOTIFY_E(fsn_event);
event = FANOTIFY_E(fsnotify_remove_first_event(group));
if (!(event->mask & FANOTIFY_PERM_EVENTS)) { if (!(event->mask & FANOTIFY_PERM_EVENTS)) {
spin_unlock(&group->notification_lock); spin_unlock(&group->notification_lock);
fsnotify_destroy_event(group, &event->fse); fsnotify_destroy_event(group, fsn_event);
} else { } else {
finish_permission_event(group, FANOTIFY_PERM(event), finish_permission_event(group, FANOTIFY_PERM(event),
FAN_ALLOW); FAN_ALLOW);
...@@ -822,24 +908,38 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, ...@@ -822,24 +908,38 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
unsigned int type, unsigned int type,
__kernel_fsid_t *fsid) __kernel_fsid_t *fsid)
{ {
struct ucounts *ucounts = group->fanotify_data.ucounts;
struct fsnotify_mark *mark; struct fsnotify_mark *mark;
int ret; int ret;
if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) /*
* Enforce per user marks limits per user in all containing user ns.
* A group with FAN_UNLIMITED_MARKS does not contribute to mark count
* in the limited groups account.
*/
if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS) &&
!inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_FANOTIFY_MARKS))
return ERR_PTR(-ENOSPC); return ERR_PTR(-ENOSPC);
mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
if (!mark) if (!mark) {
return ERR_PTR(-ENOMEM); ret = -ENOMEM;
goto out_dec_ucounts;
}
fsnotify_init_mark(mark, group); fsnotify_init_mark(mark, group);
ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid); ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid);
if (ret) { if (ret) {
fsnotify_put_mark(mark); fsnotify_put_mark(mark);
return ERR_PTR(ret); goto out_dec_ucounts;
} }
return mark; return mark;
out_dec_ucounts:
if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS))
dec_ucount(ucounts, UCOUNT_FANOTIFY_MARKS);
return ERR_PTR(ret);
} }
...@@ -919,20 +1019,41 @@ static struct fsnotify_event *fanotify_alloc_overflow_event(void) ...@@ -919,20 +1019,41 @@ static struct fsnotify_event *fanotify_alloc_overflow_event(void)
return &oevent->fse; return &oevent->fse;
} }
static struct hlist_head *fanotify_alloc_merge_hash(void)
{
struct hlist_head *hash;
hash = kmalloc(sizeof(struct hlist_head) << FANOTIFY_HTABLE_BITS,
GFP_KERNEL_ACCOUNT);
if (!hash)
return NULL;
__hash_init(hash, FANOTIFY_HTABLE_SIZE);
return hash;
}
/* fanotify syscalls */ /* fanotify syscalls */
SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
{ {
struct fsnotify_group *group; struct fsnotify_group *group;
int f_flags, fd; int f_flags, fd;
struct user_struct *user;
unsigned int fid_mode = flags & FANOTIFY_FID_BITS; unsigned int fid_mode = flags & FANOTIFY_FID_BITS;
unsigned int class = flags & FANOTIFY_CLASS_BITS; unsigned int class = flags & FANOTIFY_CLASS_BITS;
pr_debug("%s: flags=%x event_f_flags=%x\n", pr_debug("%s: flags=%x event_f_flags=%x\n",
__func__, flags, event_f_flags); __func__, flags, event_f_flags);
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN)) {
return -EPERM; /*
* An unprivileged user can setup an fanotify group with
* limited functionality - an unprivileged group is limited to
* notification events with file handles and it cannot use
* unlimited queue/marks.
*/
if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode)
return -EPERM;
}
#ifdef CONFIG_AUDITSYSCALL #ifdef CONFIG_AUDITSYSCALL
if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT)) if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT))
...@@ -963,12 +1084,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) ...@@ -963,12 +1084,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
if ((fid_mode & FAN_REPORT_NAME) && !(fid_mode & FAN_REPORT_DIR_FID)) if ((fid_mode & FAN_REPORT_NAME) && !(fid_mode & FAN_REPORT_DIR_FID))
return -EINVAL; return -EINVAL;
user = get_current_user();
if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) {
free_uid(user);
return -EMFILE;
}
f_flags = O_RDWR | FMODE_NONOTIFY; f_flags = O_RDWR | FMODE_NONOTIFY;
if (flags & FAN_CLOEXEC) if (flags & FAN_CLOEXEC)
f_flags |= O_CLOEXEC; f_flags |= O_CLOEXEC;
...@@ -978,15 +1093,27 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) ...@@ -978,15 +1093,27 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
/* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
group = fsnotify_alloc_user_group(&fanotify_fsnotify_ops); group = fsnotify_alloc_user_group(&fanotify_fsnotify_ops);
if (IS_ERR(group)) { if (IS_ERR(group)) {
free_uid(user);
return PTR_ERR(group); return PTR_ERR(group);
} }
group->fanotify_data.user = user; /* Enforce groups limits per user in all containing user ns */
group->fanotify_data.ucounts = inc_ucount(current_user_ns(),
current_euid(),
UCOUNT_FANOTIFY_GROUPS);
if (!group->fanotify_data.ucounts) {
fd = -EMFILE;
goto out_destroy_group;
}
group->fanotify_data.flags = flags; group->fanotify_data.flags = flags;
atomic_inc(&user->fanotify_listeners);
group->memcg = get_mem_cgroup_from_mm(current->mm); group->memcg = get_mem_cgroup_from_mm(current->mm);
group->fanotify_data.merge_hash = fanotify_alloc_merge_hash();
if (!group->fanotify_data.merge_hash) {
fd = -ENOMEM;
goto out_destroy_group;
}
group->overflow_event = fanotify_alloc_overflow_event(); group->overflow_event = fanotify_alloc_overflow_event();
if (unlikely(!group->overflow_event)) { if (unlikely(!group->overflow_event)) {
fd = -ENOMEM; fd = -ENOMEM;
...@@ -1019,16 +1146,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) ...@@ -1019,16 +1146,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
goto out_destroy_group; goto out_destroy_group;
group->max_events = UINT_MAX; group->max_events = UINT_MAX;
} else { } else {
group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; group->max_events = fanotify_max_queued_events;
} }
if (flags & FAN_UNLIMITED_MARKS) { if (flags & FAN_UNLIMITED_MARKS) {
fd = -EPERM; fd = -EPERM;
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
goto out_destroy_group; goto out_destroy_group;
group->fanotify_data.max_marks = UINT_MAX;
} else {
group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
} }
if (flags & FAN_ENABLE_AUDIT) { if (flags & FAN_ENABLE_AUDIT) {
...@@ -1126,7 +1250,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, ...@@ -1126,7 +1250,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
__func__, fanotify_fd, flags, dfd, pathname, mask); __func__, fanotify_fd, flags, dfd, pathname, mask);
/* we only use the lower 32 bits as of right now. */ /* we only use the lower 32 bits as of right now. */
if (mask & ((__u64)0xffffffff << 32)) if (upper_32_bits(mask))
return -EINVAL; return -EINVAL;
if (flags & ~FANOTIFY_MARK_FLAGS) if (flags & ~FANOTIFY_MARK_FLAGS)
...@@ -1180,6 +1304,15 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, ...@@ -1180,6 +1304,15 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
goto fput_and_out; goto fput_and_out;
group = f.file->private_data; group = f.file->private_data;
/*
* An unprivileged user is not allowed to watch a mount point nor
* a filesystem.
*/
ret = -EPERM;
if (!capable(CAP_SYS_ADMIN) &&
mark_type != FAN_MARK_INODE)
goto fput_and_out;
/* /*
* group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not
* allowed to set permissions events. * allowed to set permissions events.
...@@ -1312,6 +1445,21 @@ SYSCALL32_DEFINE6(fanotify_mark, ...@@ -1312,6 +1445,21 @@ SYSCALL32_DEFINE6(fanotify_mark,
*/ */
static int __init fanotify_user_setup(void) static int __init fanotify_user_setup(void)
{ {
struct sysinfo si;
int max_marks;
si_meminfo(&si);
/*
* Allow up to 1% of addressable memory to be accounted for per user
* marks limited to the range [8192, 1048576]. mount and sb marks are
* a lot cheaper than inode marks, but there is no reason for a user
* to have many of those, so calculate by the cost of inode marks.
*/
max_marks = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) /
INODE_MARK_COST;
max_marks = clamp(max_marks, FANOTIFY_OLD_DEFAULT_MAX_MARKS,
FANOTIFY_DEFAULT_MAX_USER_MARKS);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 10); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 10);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
...@@ -1326,6 +1474,11 @@ static int __init fanotify_user_setup(void) ...@@ -1326,6 +1474,11 @@ static int __init fanotify_user_setup(void)
KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); KMEM_CACHE(fanotify_perm_event, SLAB_PANIC);
} }
fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS;
init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] =
FANOTIFY_DEFAULT_MAX_GROUPS;
init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS] = max_marks;
return 0; return 0;
} }
device_initcall(fanotify_user_setup); device_initcall(fanotify_user_setup);
...@@ -144,7 +144,8 @@ void fanotify_show_fdinfo(struct seq_file *m, struct file *f) ...@@ -144,7 +144,8 @@ void fanotify_show_fdinfo(struct seq_file *m, struct file *f)
struct fsnotify_group *group = f->private_data; struct fsnotify_group *group = f->private_data;
seq_printf(m, "fanotify flags:%x event-flags:%x\n", seq_printf(m, "fanotify flags:%x event-flags:%x\n",
group->fanotify_data.flags, group->fanotify_data.f_flags); group->fanotify_data.flags,
group->fanotify_data.f_flags);
show_fdinfo(m, f, fanotify_fdinfo); show_fdinfo(m, f, fanotify_fdinfo);
} }
......
...@@ -122,7 +122,6 @@ static struct fsnotify_group *__fsnotify_alloc_group( ...@@ -122,7 +122,6 @@ static struct fsnotify_group *__fsnotify_alloc_group(
/* set to 0 when there a no external references to this group */ /* set to 0 when there a no external references to this group */
refcount_set(&group->refcnt, 1); refcount_set(&group->refcnt, 1);
atomic_set(&group->num_marks, 0);
atomic_set(&group->user_waits, 0); atomic_set(&group->user_waits, 0);
spin_lock_init(&group->notification_lock); spin_lock_init(&group->notification_lock);
......
...@@ -46,9 +46,10 @@ static bool event_compare(struct fsnotify_event *old_fsn, ...@@ -46,9 +46,10 @@ static bool event_compare(struct fsnotify_event *old_fsn,
return false; return false;
} }
static int inotify_merge(struct list_head *list, static int inotify_merge(struct fsnotify_group *group,
struct fsnotify_event *event) struct fsnotify_event *event)
{ {
struct list_head *list = &group->notification_list;
struct fsnotify_event *last_event; struct fsnotify_event *last_event;
last_event = list_entry(list->prev, struct fsnotify_event, list); last_event = list_entry(list->prev, struct fsnotify_event, list);
...@@ -107,7 +108,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, ...@@ -107,7 +108,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
mask &= ~IN_ISDIR; mask &= ~IN_ISDIR;
fsn_event = &event->fse; fsn_event = &event->fse;
fsnotify_init_event(fsn_event, 0); fsnotify_init_event(fsn_event);
event->mask = mask; event->mask = mask;
event->wd = i_mark->wd; event->wd = i_mark->wd;
event->sync_cookie = cookie; event->sync_cookie = cookie;
...@@ -115,7 +116,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, ...@@ -115,7 +116,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
if (len) if (len)
strcpy(event->name, name->name); strcpy(event->name, name->name);
ret = fsnotify_add_event(group, fsn_event, inotify_merge); ret = fsnotify_add_event(group, fsn_event, inotify_merge, NULL);
if (ret) { if (ret) {
/* Our event wasn't used in the end. Free it. */ /* Our event wasn't used in the end. Free it. */
fsnotify_destroy_event(group, fsn_event); fsnotify_destroy_event(group, fsn_event);
......
...@@ -146,10 +146,9 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, ...@@ -146,10 +146,9 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
size_t event_size = sizeof(struct inotify_event); size_t event_size = sizeof(struct inotify_event);
struct fsnotify_event *event; struct fsnotify_event *event;
if (fsnotify_notify_queue_is_empty(group))
return NULL;
event = fsnotify_peek_first_event(group); event = fsnotify_peek_first_event(group);
if (!event)
return NULL;
pr_debug("%s: group=%p event=%p\n", __func__, group, event); pr_debug("%s: group=%p event=%p\n", __func__, group, event);
...@@ -642,7 +641,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) ...@@ -642,7 +641,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
group->overflow_event = &oevent->fse; group->overflow_event = &oevent->fse;
fsnotify_init_event(group->overflow_event, 0); fsnotify_init_event(group->overflow_event);
oevent->mask = FS_Q_OVERFLOW; oevent->mask = FS_Q_OVERFLOW;
oevent->wd = -1; oevent->wd = -1;
oevent->sync_cookie = 0; oevent->sync_cookie = 0;
......
...@@ -391,8 +391,6 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark) ...@@ -391,8 +391,6 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark)
list_del_init(&mark->g_list); list_del_init(&mark->g_list);
spin_unlock(&mark->lock); spin_unlock(&mark->lock);
atomic_dec(&group->num_marks);
/* Drop mark reference acquired in fsnotify_add_mark_locked() */ /* Drop mark reference acquired in fsnotify_add_mark_locked() */
fsnotify_put_mark(mark); fsnotify_put_mark(mark);
} }
...@@ -656,7 +654,6 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, ...@@ -656,7 +654,6 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED; mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED;
list_add(&mark->g_list, &group->marks_list); list_add(&mark->g_list, &group->marks_list);
atomic_inc(&group->num_marks);
fsnotify_get_mark(mark); /* for g_list */ fsnotify_get_mark(mark); /* for g_list */
spin_unlock(&mark->lock); spin_unlock(&mark->lock);
...@@ -674,7 +671,6 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, ...@@ -674,7 +671,6 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
FSNOTIFY_MARK_FLAG_ATTACHED); FSNOTIFY_MARK_FLAG_ATTACHED);
list_del_init(&mark->g_list); list_del_init(&mark->g_list);
spin_unlock(&mark->lock); spin_unlock(&mark->lock);
atomic_dec(&group->num_marks);
fsnotify_put_mark(mark); fsnotify_put_mark(mark);
return ret; return ret;
......
...@@ -47,13 +47,6 @@ u32 fsnotify_get_cookie(void) ...@@ -47,13 +47,6 @@ u32 fsnotify_get_cookie(void)
} }
EXPORT_SYMBOL_GPL(fsnotify_get_cookie); EXPORT_SYMBOL_GPL(fsnotify_get_cookie);
/* return true if the notify queue is empty, false otherwise */
bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
{
assert_spin_locked(&group->notification_lock);
return list_empty(&group->notification_list) ? true : false;
}
void fsnotify_destroy_event(struct fsnotify_group *group, void fsnotify_destroy_event(struct fsnotify_group *group,
struct fsnotify_event *event) struct fsnotify_event *event)
{ {
...@@ -75,16 +68,22 @@ void fsnotify_destroy_event(struct fsnotify_group *group, ...@@ -75,16 +68,22 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
} }
/* /*
* Add an event to the group notification queue. The group can later pull this * Try to add an event to the notification queue.
* event off the queue to deal with. The function returns 0 if the event was * The group can later pull this event off the queue to deal with.
* added to the queue, 1 if the event was merged with some other queued event, * The group can use the @merge hook to merge the event with a queued event.
* The group can use the @insert hook to insert the event into hash table.
* The function returns:
* 0 if the event was added to a queue
* 1 if the event was merged with some other queued event
* 2 if the event was not queued - either the queue of events has overflown * 2 if the event was not queued - either the queue of events has overflown
* or the group is shutting down. * or the group is shutting down.
*/ */
int fsnotify_add_event(struct fsnotify_group *group, int fsnotify_add_event(struct fsnotify_group *group,
struct fsnotify_event *event, struct fsnotify_event *event,
int (*merge)(struct list_head *, int (*merge)(struct fsnotify_group *,
struct fsnotify_event *)) struct fsnotify_event *),
void (*insert)(struct fsnotify_group *,
struct fsnotify_event *))
{ {
int ret = 0; int ret = 0;
struct list_head *list = &group->notification_list; struct list_head *list = &group->notification_list;
...@@ -111,7 +110,7 @@ int fsnotify_add_event(struct fsnotify_group *group, ...@@ -111,7 +110,7 @@ int fsnotify_add_event(struct fsnotify_group *group,
} }
if (!list_empty(list) && merge) { if (!list_empty(list) && merge) {
ret = merge(list, event); ret = merge(group, event);
if (ret) { if (ret) {
spin_unlock(&group->notification_lock); spin_unlock(&group->notification_lock);
return ret; return ret;
...@@ -121,6 +120,8 @@ int fsnotify_add_event(struct fsnotify_group *group, ...@@ -121,6 +120,8 @@ int fsnotify_add_event(struct fsnotify_group *group,
queue: queue:
group->q_len++; group->q_len++;
list_add_tail(&event->list, list); list_add_tail(&event->list, list);
if (insert)
insert(group, event);
spin_unlock(&group->notification_lock); spin_unlock(&group->notification_lock);
wake_up(&group->notification_waitq); wake_up(&group->notification_waitq);
...@@ -141,33 +142,36 @@ void fsnotify_remove_queued_event(struct fsnotify_group *group, ...@@ -141,33 +142,36 @@ void fsnotify_remove_queued_event(struct fsnotify_group *group,
} }
/* /*
* Remove and return the first event from the notification list. It is the * Return the first event on the notification list without removing it.
* responsibility of the caller to destroy the obtained event * Returns NULL if the list is empty.
*/ */
struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group) struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group)
{ {
struct fsnotify_event *event;
assert_spin_locked(&group->notification_lock); assert_spin_locked(&group->notification_lock);
pr_debug("%s: group=%p\n", __func__, group); if (fsnotify_notify_queue_is_empty(group))
return NULL;
event = list_first_entry(&group->notification_list, return list_first_entry(&group->notification_list,
struct fsnotify_event, list); struct fsnotify_event, list);
fsnotify_remove_queued_event(group, event);
return event;
} }
/* /*
* This will not remove the event, that must be done with * Remove and return the first event from the notification list. It is the
* fsnotify_remove_first_event() * responsibility of the caller to destroy the obtained event
*/ */
struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group) struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group)
{ {
assert_spin_locked(&group->notification_lock); struct fsnotify_event *event = fsnotify_peek_first_event(group);
return list_first_entry(&group->notification_list, if (!event)
struct fsnotify_event, list); return NULL;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
fsnotify_remove_queued_event(group, event);
return event;
} }
/* /*
......
...@@ -1177,7 +1177,6 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -1177,7 +1177,6 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct super_block *sb = dentry->d_sb; struct super_block *sb = dentry->d_sb;
struct zonefs_sb_info *sbi = ZONEFS_SB(sb); struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
enum zonefs_ztype t; enum zonefs_ztype t;
u64 fsid;
buf->f_type = ZONEFS_MAGIC; buf->f_type = ZONEFS_MAGIC;
buf->f_bsize = sb->s_blocksize; buf->f_bsize = sb->s_blocksize;
...@@ -1200,9 +1199,7 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -1200,9 +1199,7 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
spin_unlock(&sbi->s_lock); spin_unlock(&sbi->s_lock);
fsid = le64_to_cpup((void *)sbi->s_uuid.b) ^ buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b);
le64_to_cpup((void *)sbi->s_uuid.b + sizeof(u64));
buf->f_fsid = u64_to_fsid(fsid);
return 0; return 0;
} }
......
...@@ -2,8 +2,11 @@ ...@@ -2,8 +2,11 @@
#ifndef _LINUX_FANOTIFY_H #ifndef _LINUX_FANOTIFY_H
#define _LINUX_FANOTIFY_H #define _LINUX_FANOTIFY_H
#include <linux/sysctl.h>
#include <uapi/linux/fanotify.h> #include <uapi/linux/fanotify.h>
extern struct ctl_table fanotify_table[]; /* for sysctl */
#define FAN_GROUP_FLAG(group, flag) \ #define FAN_GROUP_FLAG(group, flag) \
((group)->fanotify_data.flags & (flag)) ((group)->fanotify_data.flags & (flag))
...@@ -15,15 +18,38 @@ ...@@ -15,15 +18,38 @@
* these constant, the programs may break if re-compiled with new uapi headers * these constant, the programs may break if re-compiled with new uapi headers
* and then run on an old kernel. * and then run on an old kernel.
*/ */
#define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \
/* Group classes where permission events are allowed */
#define FANOTIFY_PERM_CLASSES (FAN_CLASS_CONTENT | \
FAN_CLASS_PRE_CONTENT) FAN_CLASS_PRE_CONTENT)
#define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FANOTIFY_PERM_CLASSES)
#define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DFID_NAME) #define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DFID_NAME)
#define FANOTIFY_INIT_FLAGS (FANOTIFY_CLASS_BITS | FANOTIFY_FID_BITS | \ /*
FAN_REPORT_TID | \ * fanotify_init() flags that require CAP_SYS_ADMIN.
FAN_CLOEXEC | FAN_NONBLOCK | \ * We do not allow unprivileged groups to request permission events.
FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS) * We do not allow unprivileged groups to get other process pid in events.
* We do not allow unprivileged groups to use unlimited resources.
*/
#define FANOTIFY_ADMIN_INIT_FLAGS (FANOTIFY_PERM_CLASSES | \
FAN_REPORT_TID | \
FAN_UNLIMITED_QUEUE | \
FAN_UNLIMITED_MARKS)
/*
* fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN.
* FAN_CLASS_NOTIF is the only class we allow for unprivileged group.
* We do not allow unprivileged groups to get file descriptors in events,
* so one of the flags for reporting file handles is required.
*/
#define FANOTIFY_USER_INIT_FLAGS (FAN_CLASS_NOTIF | \
FANOTIFY_FID_BITS | \
FAN_CLOEXEC | FAN_NONBLOCK)
#define FANOTIFY_INIT_FLAGS (FANOTIFY_ADMIN_INIT_FLAGS | \
FANOTIFY_USER_INIT_FLAGS)
#define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \ #define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \
FAN_MARK_FILESYSTEM) FAN_MARK_FILESYSTEM)
......
...@@ -167,7 +167,6 @@ struct fsnotify_ops { ...@@ -167,7 +167,6 @@ struct fsnotify_ops {
*/ */
struct fsnotify_event { struct fsnotify_event {
struct list_head list; struct list_head list;
unsigned long objectid; /* identifier for queue merges */
}; };
/* /*
...@@ -207,9 +206,6 @@ struct fsnotify_group { ...@@ -207,9 +206,6 @@ struct fsnotify_group {
/* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
struct mutex mark_mutex; /* protect marks_list */ struct mutex mark_mutex; /* protect marks_list */
atomic_t num_marks; /* 1 for each mark and 1 for not being
* past the point of no return when freeing
* a group */
atomic_t user_waits; /* Number of tasks waiting for user atomic_t user_waits; /* Number of tasks waiting for user
* response */ * response */
struct list_head marks_list; /* all inode marks for this group */ struct list_head marks_list; /* all inode marks for this group */
...@@ -234,13 +230,14 @@ struct fsnotify_group { ...@@ -234,13 +230,14 @@ struct fsnotify_group {
#endif #endif
#ifdef CONFIG_FANOTIFY #ifdef CONFIG_FANOTIFY
struct fanotify_group_private_data { struct fanotify_group_private_data {
/* Hash table of events for merge */
struct hlist_head *merge_hash;
/* allows a group to block waiting for a userspace response */ /* allows a group to block waiting for a userspace response */
struct list_head access_list; struct list_head access_list;
wait_queue_head_t access_waitq; wait_queue_head_t access_waitq;
int flags; /* flags from fanotify_init() */ int flags; /* flags from fanotify_init() */
int f_flags; /* event_f_flags from fanotify_init() */ int f_flags; /* event_f_flags from fanotify_init() */
unsigned int max_marks; struct ucounts *ucounts;
struct user_struct *user;
} fanotify_data; } fanotify_data;
#endif /* CONFIG_FANOTIFY */ #endif /* CONFIG_FANOTIFY */
}; };
...@@ -487,15 +484,23 @@ extern void fsnotify_destroy_event(struct fsnotify_group *group, ...@@ -487,15 +484,23 @@ extern void fsnotify_destroy_event(struct fsnotify_group *group,
/* attach the event to the group notification queue */ /* attach the event to the group notification queue */
extern int fsnotify_add_event(struct fsnotify_group *group, extern int fsnotify_add_event(struct fsnotify_group *group,
struct fsnotify_event *event, struct fsnotify_event *event,
int (*merge)(struct list_head *, int (*merge)(struct fsnotify_group *,
struct fsnotify_event *)); struct fsnotify_event *),
void (*insert)(struct fsnotify_group *,
struct fsnotify_event *));
/* Queue overflow event to a notification group */ /* Queue overflow event to a notification group */
static inline void fsnotify_queue_overflow(struct fsnotify_group *group) static inline void fsnotify_queue_overflow(struct fsnotify_group *group)
{ {
fsnotify_add_event(group, group->overflow_event, NULL); fsnotify_add_event(group, group->overflow_event, NULL, NULL);
}
static inline bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
{
assert_spin_locked(&group->notification_lock);
return list_empty(&group->notification_list);
} }
/* true if the group notification queue is empty */
extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
/* return, but do not dequeue the first event on the notification queue */ /* return, but do not dequeue the first event on the notification queue */
extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group); extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group);
...@@ -576,11 +581,9 @@ extern void fsnotify_put_mark(struct fsnotify_mark *mark); ...@@ -576,11 +581,9 @@ extern void fsnotify_put_mark(struct fsnotify_mark *mark);
extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info);
extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info);
static inline void fsnotify_init_event(struct fsnotify_event *event, static inline void fsnotify_init_event(struct fsnotify_event *event)
unsigned long objectid)
{ {
INIT_LIST_HEAD(&event->list); INIT_LIST_HEAD(&event->list);
event->objectid = objectid;
} }
#else #else
......
...@@ -14,9 +14,6 @@ struct user_struct { ...@@ -14,9 +14,6 @@ struct user_struct {
refcount_t __count; /* reference count */ refcount_t __count; /* reference count */
atomic_t processes; /* How many processes does this user have? */ atomic_t processes; /* How many processes does this user have? */
atomic_t sigpending; /* How many pending signals does this user have? */ atomic_t sigpending; /* How many pending signals does this user have? */
#ifdef CONFIG_FANOTIFY
atomic_t fanotify_listeners;
#endif
#ifdef CONFIG_EPOLL #ifdef CONFIG_EPOLL
atomic_long_t epoll_watches; /* The number of file descriptors currently watched */ atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
#endif #endif
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <linux/types.h> #include <linux/types.h>
#include <asm/statfs.h> #include <asm/statfs.h>
#include <asm/byteorder.h>
struct kstatfs { struct kstatfs {
long f_type; long f_type;
...@@ -50,4 +51,11 @@ static inline __kernel_fsid_t u64_to_fsid(u64 v) ...@@ -50,4 +51,11 @@ static inline __kernel_fsid_t u64_to_fsid(u64 v)
return (__kernel_fsid_t){.val = {(u32)v, (u32)(v>>32)}}; return (__kernel_fsid_t){.val = {(u32)v, (u32)(v>>32)}};
} }
/* Fold 16 bytes uuid to 64 bit fsid */
static inline __kernel_fsid_t uuid_to_fsid(__u8 *uuid)
{
return u64_to_fsid(le64_to_cpup((void *)uuid) ^
le64_to_cpup((void *)(uuid + sizeof(u64))));
}
#endif #endif
...@@ -49,6 +49,10 @@ enum ucount_type { ...@@ -49,6 +49,10 @@ enum ucount_type {
#ifdef CONFIG_INOTIFY_USER #ifdef CONFIG_INOTIFY_USER
UCOUNT_INOTIFY_INSTANCES, UCOUNT_INOTIFY_INSTANCES,
UCOUNT_INOTIFY_WATCHES, UCOUNT_INOTIFY_WATCHES,
#endif
#ifdef CONFIG_FANOTIFY
UCOUNT_FANOTIFY_GROUPS,
UCOUNT_FANOTIFY_MARKS,
#endif #endif
UCOUNT_COUNTS, UCOUNT_COUNTS,
}; };
......
...@@ -148,6 +148,9 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ); ...@@ -148,6 +148,9 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
#ifdef CONFIG_INOTIFY_USER #ifdef CONFIG_INOTIFY_USER
#include <linux/inotify.h> #include <linux/inotify.h>
#endif #endif
#ifdef CONFIG_FANOTIFY
#include <linux/fanotify.h>
#endif
#ifdef CONFIG_PROC_SYSCTL #ifdef CONFIG_PROC_SYSCTL
...@@ -3164,7 +3167,14 @@ static struct ctl_table fs_table[] = { ...@@ -3164,7 +3167,14 @@ static struct ctl_table fs_table[] = {
.mode = 0555, .mode = 0555,
.child = inotify_table, .child = inotify_table,
}, },
#endif #endif
#ifdef CONFIG_FANOTIFY
{
.procname = "fanotify",
.mode = 0555,
.child = fanotify_table,
},
#endif
#ifdef CONFIG_EPOLL #ifdef CONFIG_EPOLL
{ {
.procname = "epoll", .procname = "epoll",
......
...@@ -73,6 +73,10 @@ static struct ctl_table user_table[] = { ...@@ -73,6 +73,10 @@ static struct ctl_table user_table[] = {
#ifdef CONFIG_INOTIFY_USER #ifdef CONFIG_INOTIFY_USER
UCOUNT_ENTRY("max_inotify_instances"), UCOUNT_ENTRY("max_inotify_instances"),
UCOUNT_ENTRY("max_inotify_watches"), UCOUNT_ENTRY("max_inotify_watches"),
#endif
#ifdef CONFIG_FANOTIFY
UCOUNT_ENTRY("max_fanotify_groups"),
UCOUNT_ENTRY("max_fanotify_marks"),
#endif #endif
{ } { }
}; };
......
...@@ -2846,6 +2846,9 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -2846,6 +2846,9 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_ffree = sbinfo->free_inodes; buf->f_ffree = sbinfo->free_inodes;
} }
/* else leave those fields 0 like simple_statfs */ /* else leave those fields 0 like simple_statfs */
buf->f_fsid = uuid_to_fsid(dentry->d_sb->s_uuid.b);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment