Commit 16698c49 authored by Rik van Riel's avatar Rik van Riel Committed by Linus Torvalds

[PATCH] rlimit-based mlocks for unprivileged users

Here is the last agreed-on patch that lets normal users mlock pages up to
their rlimit.  This patch addresses all the issues brought up by Chris and
Andrea.

From: Chris Wright <chrisw@osdl.org>

Couple more nits.

The default lockable amount is one page now (first patch is was 0).  Why
don't we keep it as 0, with the CAP_IPC_LOCK overrides in place?  That way
nothing is changed from user perspective, and the rest of the policy can be
done by userspace as it should.

This patch breaks in one scenario.  When ulimit == 0, process has
CAP_IPC_LOCK, and does SHM_LOCK.  The subsequent unlock or destroy will
corrupt the locked_shm count.

It's also inconsistent in handling user_can_mlock/CAP_IPC_LOCK interaction
betwen shm_lock and shm_hugetlb.

SHM_HUGETLB can now only be done by the shm_group or CAP_IPC_LOCK.
Not any can_do_mlock() user.

Double check of can_do_mlock isn't needed in SHM_LOCK path.

Interface names user_can_mlock and user_substract_mlock could be better.

Incremental update below.  Ran some simple sanity tests on this plus my
patch below and didn't find any problems.

* Make default RLIM_MEMLOCK limit 0.
* Move CAP_IPC_LOCK check into user_can_mlock to be consistent
  and fix but with ulimit == 0 && CAP_IPC_LOCK with SHM_LOCK.
* Allow can_do_mlock() user to try SHM_HUGETLB setup.
* Remove unecessary extra can_do_mlock() test in shmem_lock().
* Rename user_can_mlock to user_shm_lock and user_subtract_mlock
  to user_shm_unlock.
* Use user instead of current->user to fit in 80 cols on SHM_LOCK.
Signed-off-by: default avatarRik van Riel <riel@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent e4f8c4aa
......@@ -720,12 +720,13 @@ static unsigned long hugetlbfs_counter(void)
static int can_do_hugetlb_shm(void)
{
return likely(capable(CAP_IPC_LOCK) ||
in_group_p(sysctl_hugetlb_shm_group));
in_group_p(sysctl_hugetlb_shm_group) ||
can_do_mlock());
}
struct file *hugetlb_zero_setup(size_t size)
{
int error;
int error = -ENOMEM;
struct file *file;
struct inode *inode;
struct dentry *dentry, *root;
......@@ -738,6 +739,9 @@ struct file *hugetlb_zero_setup(size_t size)
if (!is_hugepage_mem_enough(size))
return ERR_PTR(-ENOMEM);
if (!user_shm_lock(size, current->user))
return ERR_PTR(-ENOMEM);
root = hugetlbfs_vfsmount->mnt_root;
snprintf(buf, 16, "%lu", hugetlbfs_counter());
quick_string.name = buf;
......@@ -745,7 +749,7 @@ struct file *hugetlb_zero_setup(size_t size)
quick_string.hash = 0;
dentry = d_alloc(root, &quick_string);
if (!dentry)
return ERR_PTR(-ENOMEM);
goto out_shm_unlock;
error = -ENFILE;
file = get_empty_filp();
......@@ -772,6 +776,8 @@ struct file *hugetlb_zero_setup(size_t size)
put_filp(file);
out_dentry:
dput(dentry);
out_shm_unlock:
user_shm_unlock(size, current->user);
return ERR_PTR(error);
}
......
......@@ -41,7 +41,7 @@
{INR_OPEN, INR_OPEN}, /* RLIMIT_NOFILE */ \
{LONG_MAX, LONG_MAX}, /* RLIMIT_AS */ \
{LONG_MAX, LONG_MAX}, /* RLIMIT_NPROC */ \
{LONG_MAX, LONG_MAX}, /* RLIMIT_MEMLOCK */ \
{0, 0 }, /* RLIMIT_MEMLOCK */ \
{LONG_MAX, LONG_MAX}, /* RLIMIT_LOCKS */ \
{MAX_SIGPENDING, MAX_SIGPENDING}, /* RLIMIT_SIGPENDING */ \
{MQ_BYTES_MAX, MQ_BYTES_MAX}, /* RLIMIT_MSGQUEUE */ \
......
......@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING}, \
......
......@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING}, \
......
......@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
......
......@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
......
......@@ -40,7 +40,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
......
......@@ -46,7 +46,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
......
......@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
......
......@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
......
......@@ -36,7 +36,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
......
......@@ -45,7 +45,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
......
......@@ -47,7 +47,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
......
......@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
......
......@@ -44,7 +44,7 @@
{ 0, RLIM_INFINITY}, \
{RLIM_INFINITY, RLIM_INFINITY}, \
{INR_OPEN, INR_OPEN}, {0, 0}, \
{RLIM_INFINITY, RLIM_INFINITY}, \
{0, 0}, \
{RLIM_INFINITY, RLIM_INFINITY}, \
{RLIM_INFINITY, RLIM_INFINITY}, \
{MAX_SIGPENDING, MAX_SIGPENDING}, \
......
......@@ -43,7 +43,7 @@
{ 0, RLIM_INFINITY}, \
{RLIM_INFINITY, RLIM_INFINITY}, \
{INR_OPEN, INR_OPEN}, {0, 0}, \
{RLIM_INFINITY, RLIM_INFINITY}, \
{0, 0 }, \
{RLIM_INFINITY, RLIM_INFINITY}, \
{RLIM_INFINITY, RLIM_INFINITY}, \
{MAX_SIGPENDING, MAX_SIGPENDING}, \
......
......@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
......
......@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
......
......@@ -496,9 +496,20 @@ int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new);
struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
unsigned long addr);
struct file *shmem_file_setup(char * name, loff_t size, unsigned long flags);
void shmem_lock(struct file * file, int lock);
int shmem_lock(struct file *file, int lock, struct user_struct *user);
int shmem_zero_setup(struct vm_area_struct *);
static inline int can_do_mlock(void)
{
if (capable(CAP_IPC_LOCK))
return 1;
if (current->rlim[RLIMIT_MEMLOCK].rlim_cur != 0)
return 1;
return 0;
}
extern int user_shm_lock(size_t, struct user_struct *);
extern void user_shm_unlock(size_t, struct user_struct *);
/*
* Parameter block passed down to zap_pte_range in exceptional cases.
*/
......
......@@ -316,6 +316,7 @@ struct user_struct {
atomic_t sigpending; /* How many pending signals does this user have? */
/* protected by mq_lock */
unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */
unsigned long locked_shm; /* How many pages of mlocked shm ? */
/* Hash table maintenance information */
struct list_head uidhash_list;
......
......@@ -84,6 +84,7 @@ struct shmid_kernel /* private to the kernel */
time_t shm_ctim;
pid_t shm_cprid;
pid_t shm_lprid;
struct user_struct *mlock_user;
};
/* shm_mode upper byte flags */
......
......@@ -114,7 +114,10 @@ static void shm_destroy (struct shmid_kernel *shp)
shm_rmid (shp->id);
shm_unlock(shp);
if (!is_file_hugepages(shp->shm_file))
shmem_lock(shp->shm_file, 0);
shmem_lock(shp->shm_file, 0, shp->mlock_user);
else
user_shm_unlock(shp->shm_file->f_dentry->d_inode->i_size,
shp->mlock_user);
fput (shp->shm_file);
security_shm_free(shp);
ipc_rcu_putref(shp);
......@@ -190,6 +193,7 @@ static int newseg (key_t key, int shmflg, size_t size)
shp->shm_perm.key = key;
shp->shm_flags = (shmflg & S_IRWXUGO);
shp->mlock_user = NULL;
shp->shm_perm.security = NULL;
error = security_shm_alloc(shp);
......@@ -198,9 +202,11 @@ static int newseg (key_t key, int shmflg, size_t size)
return error;
}
if (shmflg & SHM_HUGETLB)
if (shmflg & SHM_HUGETLB) {
/* hugetlb_zero_setup takes care of mlock user accounting */
file = hugetlb_zero_setup(size);
else {
shp->mlock_user = current->user;
} else {
sprintf (name, "SYSV%08x", key);
file = shmem_file_setup(name, size, VM_ACCOUNT);
}
......@@ -504,14 +510,11 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
case SHM_LOCK:
case SHM_UNLOCK:
{
/* Allow superuser to lock segment in memory */
/* Should the pages be faulted in here or leave it to user? */
/* need to determine interaction with current->swappable */
if (!capable(CAP_IPC_LOCK)) {
/* Allow superuser to lock segment in memory */
if (!can_do_mlock() && cmd == SHM_LOCK) {
err = -EPERM;
goto out;
}
shp = shm_lock(shmid);
if(shp==NULL) {
err = -EINVAL;
......@@ -526,13 +529,18 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
goto out_unlock;
if(cmd==SHM_LOCK) {
if (!is_file_hugepages(shp->shm_file))
shmem_lock(shp->shm_file, 1);
struct user_struct * user = current->user;
if (!is_file_hugepages(shp->shm_file)) {
err = shmem_lock(shp->shm_file, 1, user);
if (!err) {
shp->shm_flags |= SHM_LOCKED;
} else {
if (!is_file_hugepages(shp->shm_file))
shmem_lock(shp->shm_file, 0);
shp->mlock_user = user;
}
}
} else if (!is_file_hugepages(shp->shm_file)) {
shmem_lock(shp->shm_file, 0, shp->mlock_user);
shp->shm_flags &= ~SHM_LOCKED;
shp->mlock_user = NULL;
}
shm_unlock(shp);
goto out;
......
......@@ -32,7 +32,8 @@ struct user_struct root_user = {
.processes = ATOMIC_INIT(1),
.files = ATOMIC_INIT(0),
.sigpending = ATOMIC_INIT(0),
.mq_bytes = 0
.mq_bytes = 0,
.locked_shm = 0,
};
/*
......@@ -113,6 +114,7 @@ struct user_struct * alloc_uid(uid_t uid)
atomic_set(&new->sigpending, 0);
new->mq_bytes = 0;
new->locked_shm = 0;
/*
* Before adding this, check whether we raced
......
......@@ -60,7 +60,7 @@ static int do_mlock(unsigned long start, size_t len, int on)
struct vm_area_struct * vma, * next;
int error;
if (on && !capable(CAP_IPC_LOCK))
if (on && !can_do_mlock())
return -EPERM;
len = PAGE_ALIGN(len);
end = start + len;
......@@ -118,7 +118,7 @@ asmlinkage long sys_mlock(unsigned long start, size_t len)
lock_limit >>= PAGE_SHIFT;
/* check against resource limits */
if (locked <= lock_limit)
if ( (locked <= lock_limit) || capable(CAP_IPC_LOCK))
error = do_mlock(start, len, 1);
up_write(&current->mm->mmap_sem);
return error;
......@@ -142,7 +142,7 @@ static int do_mlockall(int flags)
unsigned int def_flags;
struct vm_area_struct * vma;
if (!capable(CAP_IPC_LOCK))
if (!can_do_mlock())
return -EPERM;
def_flags = 0;
......@@ -177,7 +177,7 @@ asmlinkage long sys_mlockall(int flags)
lock_limit >>= PAGE_SHIFT;
ret = -ENOMEM;
if (current->mm->total_vm <= lock_limit)
if ((current->mm->total_vm <= lock_limit) || capable(CAP_IPC_LOCK))
ret = do_mlockall(flags);
out:
up_write(&current->mm->mmap_sem);
......@@ -193,3 +193,36 @@ asmlinkage long sys_munlockall(void)
up_write(&current->mm->mmap_sem);
return ret;
}
/*
* Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
* shm segments) get accounted against the user_struct instead.
*/
static spinlock_t shmlock_user_lock = SPIN_LOCK_UNLOCKED;
int user_shm_lock(size_t size, struct user_struct *user)
{
unsigned long lock_limit, locked;
int allowed = 0;
spin_lock(&shmlock_user_lock);
locked = size >> PAGE_SHIFT;
lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
lock_limit >>= PAGE_SHIFT;
if (locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
goto out;
get_uid(user);
user->locked_shm += locked;
allowed = 1;
out:
spin_unlock(&shmlock_user_lock);
return allowed;
}
void user_shm_unlock(size_t size, struct user_struct *user)
{
spin_lock(&shmlock_user_lock);
user->locked_shm -= (size >> PAGE_SHIFT);
spin_unlock(&shmlock_user_lock);
free_uid(user);
}
......@@ -796,15 +796,17 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
if (flags & MAP_LOCKED) {
if (!capable(CAP_IPC_LOCK))
if (!can_do_mlock())
return -EPERM;
vm_flags |= VM_LOCKED;
}
/* mlock MCL_FUTURE? */
if (vm_flags & VM_LOCKED) {
unsigned long locked = mm->locked_vm << PAGE_SHIFT;
unsigned long locked, lock_limit;
locked = mm->locked_vm << PAGE_SHIFT;
lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
locked += len;
if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
return -EAGAIN;
}
......@@ -1625,9 +1627,11 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
* mlock MCL_FUTURE?
*/
if (mm->def_flags & VM_LOCKED) {
unsigned long locked = mm->locked_vm << PAGE_SHIFT;
unsigned long locked, lock_limit;
locked = mm->locked_vm << PAGE_SHIFT;
lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
locked += len;
if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
return -EAGAIN;
}
......
......@@ -324,10 +324,12 @@ unsigned long do_mremap(unsigned long addr,
goto out;
}
if (vma->vm_flags & VM_LOCKED) {
unsigned long locked = current->mm->locked_vm << PAGE_SHIFT;
unsigned long locked, lock_limit;
locked = current->mm->locked_vm << PAGE_SHIFT;
lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
locked += new_len - old_len;
ret = -EAGAIN;
if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
goto out;
}
ret = -ENOMEM;
......
......@@ -1151,17 +1151,26 @@ shmem_get_policy(struct vm_area_struct *vma, unsigned long addr)
}
#endif
void shmem_lock(struct file *file, int lock)
int shmem_lock(struct file *file, int lock, struct user_struct *user)
{
struct inode *inode = file->f_dentry->d_inode;
struct shmem_inode_info *info = SHMEM_I(inode);
int retval = -ENOMEM;
spin_lock(&info->lock);
if (lock)
if (lock && !(info->flags & VM_LOCKED)) {
if (!user_shm_lock(inode->i_size, user))
goto out_nomem;
info->flags |= VM_LOCKED;
else
}
if (!lock && (info->flags & VM_LOCKED) && user) {
user_shm_unlock(inode->i_size, user);
info->flags &= ~VM_LOCKED;
}
retval = 0;
out_nomem:
spin_unlock(&info->lock);
return retval;
}
static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment