Commit b6dfc3f2 authored by Serge Hallyn's avatar Serge Hallyn Committed by Linus Torvalds

[PATCH] merge *_vm_enough_memory()s into a common helper

The vm_enough_memory functionality was replicated in three separate places,
and not always kept in sync.  It also used capable() for authorization checks.
This caused any process which ends up checking for this permission to have
PF_SUPERPRIV set (inappropriately), and caused poor dependencies between
stacked modules, since each LSM was generically asked to moderate
capable(CAP_SYS_ADMIN) without knowing why.
Signed-off-by: default avatarSerge Hallyn <serue@us.ibm.com>
Signed-off-by: default avatarChris Wright <chrisw@osdl.org>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 7f01bafb
...@@ -711,6 +711,7 @@ static inline void vma_nonlinear_insert(struct vm_area_struct *vma, ...@@ -711,6 +711,7 @@ static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
} }
/* mmap.c */ /* mmap.c */
extern int __vm_enough_memory(long pages, int cap_sys_admin);
extern void vma_adjust(struct vm_area_struct *vma, unsigned long start, extern void vma_adjust(struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert);
extern struct vm_area_struct *vma_merge(struct mm_struct *, extern struct vm_area_struct *vma_merge(struct mm_struct *,
......
...@@ -61,10 +61,98 @@ int sysctl_overcommit_ratio = 50; /* default is 50% */ ...@@ -61,10 +61,98 @@ int sysctl_overcommit_ratio = 50; /* default is 50% */
int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
atomic_t vm_committed_space = ATOMIC_INIT(0); atomic_t vm_committed_space = ATOMIC_INIT(0);
/*
* Check that a process has enough memory to allocate a new virtual
* mapping. 0 means there is enough memory for the allocation to
* succeed and -ENOMEM implies there is not.
*
* We currently support three overcommit policies, which are set via the
* vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
*
* Strict overcommit modes added 2002 Feb 26 by Alan Cox.
* Additional code 2002 Jul 20 by Robert Love.
*
* cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
*
* Note this is a helper function intended to be used by LSMs which
* wish to use this logic.
*/
int __vm_enough_memory(long pages, int cap_sys_admin)
{
unsigned long free, allowed;
vm_acct_memory(pages);
/*
* Sometimes we want to use more memory than we have
*/
if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
return 0;
if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
unsigned long n;
free = get_page_cache_size();
free += nr_swap_pages;
/*
* Any slabs which are created with the
* SLAB_RECLAIM_ACCOUNT flag claim to have contents
* which are reclaimable, under pressure. The dentry
* cache and most inode caches should fall into this
*/
free += atomic_read(&slab_reclaim_pages);
/*
* Leave the last 3% for root
*/
if (!cap_sys_admin)
free -= free / 32;
if (free > pages)
return 0;
/*
* nr_free_pages() is very expensive on large systems,
* only call if we're about to fail.
*/
n = nr_free_pages();
if (!cap_sys_admin)
n -= n / 32;
free += n;
if (free > pages)
return 0;
vm_unacct_memory(pages);
return -ENOMEM;
}
allowed = (totalram_pages - hugetlb_total_pages())
* sysctl_overcommit_ratio / 100;
/*
* Leave the last 3% for root
*/
if (!cap_sys_admin)
allowed -= allowed / 32;
allowed += total_swap_pages;
/* Don't let a single process grow too big:
leave 3% of the size of this process for other processes */
allowed -= current->mm->total_vm / 32;
if (atomic_read(&vm_committed_space) < allowed)
return 0;
vm_unacct_memory(pages);
return -ENOMEM;
}
EXPORT_SYMBOL(sysctl_overcommit_memory); EXPORT_SYMBOL(sysctl_overcommit_memory);
EXPORT_SYMBOL(sysctl_overcommit_ratio); EXPORT_SYMBOL(sysctl_overcommit_ratio);
EXPORT_SYMBOL(sysctl_max_map_count); EXPORT_SYMBOL(sysctl_max_map_count);
EXPORT_SYMBOL(vm_committed_space); EXPORT_SYMBOL(vm_committed_space);
EXPORT_SYMBOL(__vm_enough_memory);
/* /*
* Requires inode->i_mapping->i_mmap_lock * Requires inode->i_mapping->i_mmap_lock
......
...@@ -316,86 +316,13 @@ int cap_syslog (int type) ...@@ -316,86 +316,13 @@ int cap_syslog (int type)
return 0; return 0;
} }
/*
* Check that a process has enough memory to allocate a new virtual
* mapping. 0 means there is enough memory for the allocation to
* succeed and -ENOMEM implies there is not.
*
* We currently support three overcommit policies, which are set via the
* vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
*
* Strict overcommit modes added 2002 Feb 26 by Alan Cox.
* Additional code 2002 Jul 20 by Robert Love.
*/
int cap_vm_enough_memory(long pages) int cap_vm_enough_memory(long pages)
{ {
unsigned long free, allowed; int cap_sys_admin = 0;
vm_acct_memory(pages);
/*
* Sometimes we want to use more memory than we have
*/
if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
return 0;
if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
unsigned long n;
free = get_page_cache_size();
free += nr_swap_pages;
/*
* Any slabs which are created with the
* SLAB_RECLAIM_ACCOUNT flag claim to have contents
* which are reclaimable, under pressure. The dentry
* cache and most inode caches should fall into this
*/
free += atomic_read(&slab_reclaim_pages);
/*
* Leave the last 3% for root
*/
if (!capable(CAP_SYS_ADMIN))
free -= free / 32;
if (free > pages)
return 0;
/*
* nr_free_pages() is very expensive on large systems,
* only call if we're about to fail.
*/
n = nr_free_pages();
if (!capable(CAP_SYS_ADMIN))
n -= n / 32;
free += n;
if (free > pages)
return 0;
vm_unacct_memory(pages);
return -ENOMEM;
}
allowed = (totalram_pages - hugetlb_total_pages())
* sysctl_overcommit_ratio / 100;
/*
* Leave the last 3% for root
*/
if (!capable(CAP_SYS_ADMIN))
allowed -= allowed / 32;
allowed += total_swap_pages;
/* Don't let a single process grow too big:
leave 3% of the size of this process for other processes */
allowed -= current->mm->total_vm / 32;
if (atomic_read(&vm_committed_space) < allowed)
return 0;
vm_unacct_memory(pages);
return -ENOMEM; if (cap_capable(current, CAP_SYS_ADMIN) == 0)
cap_sys_admin = 1;
return __vm_enough_memory(pages, cap_sys_admin);
} }
EXPORT_SYMBOL(cap_capable); EXPORT_SYMBOL(cap_capable);
......
...@@ -108,69 +108,13 @@ static int dummy_settime(struct timespec *ts, struct timezone *tz) ...@@ -108,69 +108,13 @@ static int dummy_settime(struct timespec *ts, struct timezone *tz)
return 0; return 0;
} }
/*
* Check that a process has enough memory to allocate a new virtual
* mapping. 0 means there is enough memory for the allocation to
* succeed and -ENOMEM implies there is not.
*
* We currently support three overcommit policies, which are set via the
* vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
*/
static int dummy_vm_enough_memory(long pages) static int dummy_vm_enough_memory(long pages)
{ {
unsigned long free, allowed; int cap_sys_admin = 0;
vm_acct_memory(pages);
/*
* Sometimes we want to use more memory than we have
*/
if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
return 0;
if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
free = get_page_cache_size();
free += nr_free_pages();
free += nr_swap_pages;
/*
* Any slabs which are created with the
* SLAB_RECLAIM_ACCOUNT flag claim to have contents
* which are reclaimable, under pressure. The dentry
* cache and most inode caches should fall into this
*/
free += atomic_read(&slab_reclaim_pages);
/*
* Leave the last 3% for root
*/
if (current->euid)
free -= free / 32;
if (free > pages)
return 0;
vm_unacct_memory(pages);
return -ENOMEM;
}
allowed = (totalram_pages - hugetlb_total_pages())
* sysctl_overcommit_ratio / 100;
allowed += total_swap_pages;
/* Leave the last 3% for root */
if (current->euid)
allowed -= allowed / 32;
/* Don't let a single process grow too big:
leave 3% of the size of this process for other processes */
allowed -= current->mm->total_vm / 32;
if (atomic_read(&vm_committed_space) < allowed)
return 0;
vm_unacct_memory(pages);
return -ENOMEM; if (dummy_capable(current, CAP_SYS_ADMIN) == 0)
cap_sys_admin = 1;
return __vm_enough_memory(pages, cap_sys_admin);
} }
static int dummy_bprm_alloc_security (struct linux_binprm *bprm) static int dummy_bprm_alloc_security (struct linux_binprm *bprm)
......
...@@ -1515,69 +1515,29 @@ static int selinux_syslog(int type) ...@@ -1515,69 +1515,29 @@ static int selinux_syslog(int type)
* mapping. 0 means there is enough memory for the allocation to * mapping. 0 means there is enough memory for the allocation to
* succeed and -ENOMEM implies there is not. * succeed and -ENOMEM implies there is not.
* *
* We currently support three overcommit policies, which are set via the * Note that secondary_ops->capable and task_has_perm_noaudit return 0
* vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting * if the capability is granted, but __vm_enough_memory requires 1 if
* the capability is granted.
* *
* Strict overcommit modes added 2002 Feb 26 by Alan Cox. * Do not audit the selinux permission check, as this is applied to all
* Additional code 2002 Jul 20 by Robert Love. * processes that allocate mappings.
*/ */
static int selinux_vm_enough_memory(long pages) static int selinux_vm_enough_memory(long pages)
{ {
unsigned long free, allowed; int rc, cap_sys_admin = 0;
int rc;
struct task_security_struct *tsec = current->security; struct task_security_struct *tsec = current->security;
vm_acct_memory(pages); rc = secondary_ops->capable(current, CAP_SYS_ADMIN);
if (rc == 0)
/* rc = avc_has_perm_noaudit(tsec->sid, tsec->sid,
* Sometimes we want to use more memory than we have SECCLASS_CAPABILITY,
*/ CAP_TO_MASK(CAP_SYS_ADMIN),
if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) NULL);
return 0;
if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
free = get_page_cache_size();
free += nr_free_pages();
free += nr_swap_pages;
/*
* Any slabs which are created with the
* SLAB_RECLAIM_ACCOUNT flag claim to have contents
* which are reclaimable, under pressure. The dentry
* cache and most inode caches should fall into this
*/
free += atomic_read(&slab_reclaim_pages);
/*
* Leave the last 3% for privileged processes.
* Don't audit the check, as it is applied to all processes
* that allocate mappings.
*/
rc = secondary_ops->capable(current, CAP_SYS_ADMIN);
if (!rc) {
rc = avc_has_perm_noaudit(tsec->sid, tsec->sid,
SECCLASS_CAPABILITY,
CAP_TO_MASK(CAP_SYS_ADMIN), NULL);
}
if (rc)
free -= free / 32;
if (free > pages)
return 0;
vm_unacct_memory(pages);
return -ENOMEM;
}
allowed = (totalram_pages - hugetlb_total_pages())
* sysctl_overcommit_ratio / 100;
allowed += total_swap_pages;
if (atomic_read(&vm_committed_space) < allowed)
return 0;
vm_unacct_memory(pages); if (rc == 0)
cap_sys_admin = 1;
return -ENOMEM; return __vm_enough_memory(pages, cap_sys_admin);
} }
/* binprm security operations */ /* binprm security operations */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment