Commit 59eda0e0 authored by Al Viro's avatar Al Viro

new fs_pin killing logics

Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent fdab684d
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/sched.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/fs_pin.h> #include <linux/fs_pin.h>
#include "internal.h" #include "internal.h"
...@@ -12,6 +13,10 @@ void pin_remove(struct fs_pin *pin) ...@@ -12,6 +13,10 @@ void pin_remove(struct fs_pin *pin)
hlist_del(&pin->m_list); hlist_del(&pin->m_list);
hlist_del(&pin->s_list); hlist_del(&pin->s_list);
spin_unlock(&pin_lock); spin_unlock(&pin_lock);
spin_lock_irq(&pin->wait.lock);
pin->done = 1;
wake_up_locked(&pin->wait);
spin_unlock_irq(&pin->wait.lock);
} }
void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p) void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p)
...@@ -28,19 +33,58 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m) ...@@ -28,19 +33,58 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m)
pin_insert_group(pin, m, &m->mnt_sb->s_pins); pin_insert_group(pin, m, &m->mnt_sb->s_pins);
} }
void pin_kill(struct fs_pin *p)
{
wait_queue_t wait;
if (!p) {
rcu_read_unlock();
return;
}
init_wait(&wait);
spin_lock_irq(&p->wait.lock);
if (likely(!p->done)) {
p->done = -1;
spin_unlock_irq(&p->wait.lock);
rcu_read_unlock();
p->kill(p);
return;
}
if (p->done > 0) {
spin_unlock_irq(&p->wait.lock);
rcu_read_unlock();
return;
}
__add_wait_queue(&p->wait, &wait);
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
spin_unlock_irq(&p->wait.lock);
rcu_read_unlock();
schedule();
rcu_read_lock();
if (likely(list_empty(&wait.task_list)))
break;
/* OK, we know p couldn't have been freed yet */
spin_lock_irq(&p->wait.lock);
if (p->done > 0) {
spin_unlock_irq(&p->wait.lock);
break;
}
}
rcu_read_unlock();
}
void mnt_pin_kill(struct mount *m) void mnt_pin_kill(struct mount *m)
{ {
while (1) { while (1) {
struct hlist_node *p; struct hlist_node *p;
struct fs_pin *pin;
rcu_read_lock(); rcu_read_lock();
p = ACCESS_ONCE(m->mnt_pins.first); p = ACCESS_ONCE(m->mnt_pins.first);
if (!p) { if (!p) {
rcu_read_unlock(); rcu_read_unlock();
break; break;
} }
pin = hlist_entry(p, struct fs_pin, m_list); pin_kill(hlist_entry(p, struct fs_pin, m_list));
pin->kill(pin);
} }
} }
...@@ -48,14 +92,12 @@ void group_pin_kill(struct hlist_head *p) ...@@ -48,14 +92,12 @@ void group_pin_kill(struct hlist_head *p)
{ {
while (1) { while (1) {
struct hlist_node *q; struct hlist_node *q;
struct fs_pin *pin;
rcu_read_lock(); rcu_read_lock();
q = ACCESS_ONCE(p->first); q = ACCESS_ONCE(p->first);
if (!q) { if (!q) {
rcu_read_unlock(); rcu_read_unlock();
break; break;
} }
pin = hlist_entry(q, struct fs_pin, s_list); pin_kill(hlist_entry(q, struct fs_pin, s_list));
pin->kill(pin);
} }
} }
#include <linux/fs.h> #include <linux/wait.h>
struct fs_pin { struct fs_pin {
wait_queue_head_t wait;
int done;
struct hlist_node s_list; struct hlist_node s_list;
struct hlist_node m_list; struct hlist_node m_list;
void (*kill)(struct fs_pin *); void (*kill)(struct fs_pin *);
}; };
struct vfsmount;
static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *))
{
init_waitqueue_head(&p->wait);
p->kill = kill;
}
void pin_remove(struct fs_pin *); void pin_remove(struct fs_pin *);
void pin_insert_group(struct fs_pin *, struct vfsmount *, struct hlist_head *); void pin_insert_group(struct fs_pin *, struct vfsmount *, struct hlist_head *);
void pin_insert(struct fs_pin *, struct vfsmount *); void pin_insert(struct fs_pin *, struct vfsmount *);
void pin_kill(struct fs_pin *);
...@@ -19,7 +19,7 @@ struct pidmap { ...@@ -19,7 +19,7 @@ struct pidmap {
#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
#define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE) #define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE)
struct bsd_acct_struct; struct fs_pin;
struct pid_namespace { struct pid_namespace {
struct kref kref; struct kref kref;
...@@ -37,7 +37,7 @@ struct pid_namespace { ...@@ -37,7 +37,7 @@ struct pid_namespace {
struct dentry *proc_thread_self; struct dentry *proc_thread_self;
#endif #endif
#ifdef CONFIG_BSD_PROCESS_ACCT #ifdef CONFIG_BSD_PROCESS_ACCT
struct bsd_acct_struct *bacct; struct fs_pin *bacct;
#endif #endif
struct user_namespace *user_ns; struct user_namespace *user_ns;
struct work_struct proc_work; struct work_struct proc_work;
......
...@@ -76,7 +76,6 @@ int acct_parm[3] = {4, 2, 30}; ...@@ -76,7 +76,6 @@ int acct_parm[3] = {4, 2, 30};
/* /*
* External references and all of the globals. * External references and all of the globals.
*/ */
static void do_acct_process(struct bsd_acct_struct *acct);
struct bsd_acct_struct { struct bsd_acct_struct {
struct fs_pin pin; struct fs_pin pin;
...@@ -91,6 +90,8 @@ struct bsd_acct_struct { ...@@ -91,6 +90,8 @@ struct bsd_acct_struct {
struct completion done; struct completion done;
}; };
static void do_acct_process(struct bsd_acct_struct *acct);
/* /*
* Check the amount of free space and suspend/resume accordingly. * Check the amount of free space and suspend/resume accordingly.
*/ */
...@@ -132,13 +133,18 @@ static void acct_put(struct bsd_acct_struct *p) ...@@ -132,13 +133,18 @@ static void acct_put(struct bsd_acct_struct *p)
kfree_rcu(p, rcu); kfree_rcu(p, rcu);
} }
static inline struct bsd_acct_struct *to_acct(struct fs_pin *p)
{
return p ? container_of(p, struct bsd_acct_struct, pin) : NULL;
}
static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
{ {
struct bsd_acct_struct *res; struct bsd_acct_struct *res;
again: again:
smp_rmb(); smp_rmb();
rcu_read_lock(); rcu_read_lock();
res = ACCESS_ONCE(ns->bacct); res = to_acct(ACCESS_ONCE(ns->bacct));
if (!res) { if (!res) {
rcu_read_unlock(); rcu_read_unlock();
return NULL; return NULL;
...@@ -150,7 +156,7 @@ static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) ...@@ -150,7 +156,7 @@ static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
} }
rcu_read_unlock(); rcu_read_unlock();
mutex_lock(&res->lock); mutex_lock(&res->lock);
if (!res->ns) { if (res != to_acct(ACCESS_ONCE(ns->bacct))) {
mutex_unlock(&res->lock); mutex_unlock(&res->lock);
acct_put(res); acct_put(res);
goto again; goto again;
...@@ -158,6 +164,19 @@ static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) ...@@ -158,6 +164,19 @@ static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
return res; return res;
} }
static void acct_pin_kill(struct fs_pin *pin)
{
struct bsd_acct_struct *acct = to_acct(pin);
mutex_lock(&acct->lock);
do_acct_process(acct);
schedule_work(&acct->work);
wait_for_completion(&acct->done);
cmpxchg(&acct->ns->bacct, pin, NULL);
mutex_unlock(&acct->lock);
pin_remove(pin);
acct_put(acct);
}
static void close_work(struct work_struct *work) static void close_work(struct work_struct *work)
{ {
struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work); struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work);
...@@ -168,49 +187,13 @@ static void close_work(struct work_struct *work) ...@@ -168,49 +187,13 @@ static void close_work(struct work_struct *work)
complete(&acct->done); complete(&acct->done);
} }
static void acct_kill(struct bsd_acct_struct *acct)
{
if (acct) {
struct pid_namespace *ns = acct->ns;
do_acct_process(acct);
INIT_WORK(&acct->work, close_work);
init_completion(&acct->done);
schedule_work(&acct->work);
wait_for_completion(&acct->done);
pin_remove(&acct->pin);
cmpxchg(&ns->bacct, acct, NULL);
acct->ns = NULL;
atomic_long_dec(&acct->count);
mutex_unlock(&acct->lock);
acct_put(acct);
}
}
static void acct_pin_kill(struct fs_pin *pin)
{
struct bsd_acct_struct *acct;
acct = container_of(pin, struct bsd_acct_struct, pin);
if (!atomic_long_inc_not_zero(&acct->count)) {
rcu_read_unlock();
cpu_relax();
return;
}
rcu_read_unlock();
mutex_lock(&acct->lock);
if (!acct->ns) {
mutex_unlock(&acct->lock);
acct_put(acct);
acct = NULL;
}
acct_kill(acct);
}
static int acct_on(struct filename *pathname) static int acct_on(struct filename *pathname)
{ {
struct file *file; struct file *file;
struct vfsmount *mnt, *internal; struct vfsmount *mnt, *internal;
struct pid_namespace *ns = task_active_pid_ns(current); struct pid_namespace *ns = task_active_pid_ns(current);
struct bsd_acct_struct *acct, *old; struct bsd_acct_struct *acct;
struct fs_pin *old;
int err; int err;
acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
...@@ -252,18 +235,20 @@ static int acct_on(struct filename *pathname) ...@@ -252,18 +235,20 @@ static int acct_on(struct filename *pathname)
file->f_path.mnt = internal; file->f_path.mnt = internal;
atomic_long_set(&acct->count, 1); atomic_long_set(&acct->count, 1);
acct->pin.kill = acct_pin_kill; init_fs_pin(&acct->pin, acct_pin_kill);
acct->file = file; acct->file = file;
acct->needcheck = jiffies; acct->needcheck = jiffies;
acct->ns = ns; acct->ns = ns;
mutex_init(&acct->lock); mutex_init(&acct->lock);
INIT_WORK(&acct->work, close_work);
init_completion(&acct->done);
mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */ mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */
pin_insert(&acct->pin, mnt); pin_insert(&acct->pin, mnt);
old = acct_get(ns); rcu_read_lock();
ns->bacct = acct; old = xchg(&ns->bacct, &acct->pin);
acct_kill(old);
mutex_unlock(&acct->lock); mutex_unlock(&acct->lock);
pin_kill(old);
mnt_drop_write(mnt); mnt_drop_write(mnt);
mntput(mnt); mntput(mnt);
return 0; return 0;
...@@ -299,7 +284,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name) ...@@ -299,7 +284,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
mutex_unlock(&acct_on_mutex); mutex_unlock(&acct_on_mutex);
putname(tmp); putname(tmp);
} else { } else {
acct_kill(acct_get(task_active_pid_ns(current))); rcu_read_lock();
pin_kill(task_active_pid_ns(current)->bacct);
} }
return error; return error;
...@@ -307,7 +293,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name) ...@@ -307,7 +293,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
void acct_exit_ns(struct pid_namespace *ns) void acct_exit_ns(struct pid_namespace *ns)
{ {
acct_kill(acct_get(ns)); rcu_read_lock();
pin_kill(ns->bacct);
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment