Commit be1d0e0e authored by Andrea Arcangeli's avatar Andrea Arcangeli Committed by Linus Torvalds

[PATCH] mm: oom-killer tunable

With <garloff@suse.de>

This is protect-pids, a patch to allow the admin to tune the oom killer.
The tweak is inherited between parent and child so it's easy to write a
wrapper for complex apps.

I made used_math a char at the light of later patches. Current patch
breaks alpha, but future patches will fix it.
Signed-off-by: default avatarAndrea Arcangeli <andrea@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent a10fdbe6
...@@ -72,6 +72,8 @@ enum pid_directory_inos { ...@@ -72,6 +72,8 @@ enum pid_directory_inos {
PROC_TGID_ATTR_FSCREATE, PROC_TGID_ATTR_FSCREATE,
#endif #endif
PROC_TGID_FD_DIR, PROC_TGID_FD_DIR,
PROC_TGID_OOM_SCORE,
PROC_TGID_OOM_ADJUST,
PROC_TID_INO, PROC_TID_INO,
PROC_TID_STATUS, PROC_TID_STATUS,
PROC_TID_MEM, PROC_TID_MEM,
...@@ -98,6 +100,8 @@ enum pid_directory_inos { ...@@ -98,6 +100,8 @@ enum pid_directory_inos {
PROC_TID_ATTR_FSCREATE, PROC_TID_ATTR_FSCREATE,
#endif #endif
PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */
PROC_TID_OOM_SCORE,
PROC_TID_OOM_ADJUST,
}; };
struct pid_entry { struct pid_entry {
...@@ -133,6 +137,8 @@ static struct pid_entry tgid_base_stuff[] = { ...@@ -133,6 +137,8 @@ static struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHEDSTATS
E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO), E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO),
#endif #endif
E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO),
E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
{0,0,NULL,0} {0,0,NULL,0}
}; };
static struct pid_entry tid_base_stuff[] = { static struct pid_entry tid_base_stuff[] = {
...@@ -158,6 +164,8 @@ static struct pid_entry tid_base_stuff[] = { ...@@ -158,6 +164,8 @@ static struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHEDSTATS
E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO), E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO),
#endif #endif
E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO),
E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
{0,0,NULL,0} {0,0,NULL,0}
}; };
...@@ -384,6 +392,18 @@ static int proc_pid_schedstat(struct task_struct *task, char *buffer) ...@@ -384,6 +392,18 @@ static int proc_pid_schedstat(struct task_struct *task, char *buffer)
} }
#endif #endif
/* The badness from the OOM killer */
unsigned long badness(struct task_struct *p, unsigned long uptime);
static int proc_oom_score(struct task_struct *task, char *buffer)
{
unsigned long points;
struct timespec uptime;
do_posix_clock_monotonic_gettime(&uptime);
points = badness(task, uptime.tv_sec);
return sprintf(buffer, "%lu\n", points);
}
/************************************************************************/ /************************************************************************/
/* Here the fs part begins */ /* Here the fs part begins */
/************************************************************************/ /************************************************************************/
...@@ -657,6 +677,56 @@ static struct file_operations proc_mem_operations = { ...@@ -657,6 +677,56 @@ static struct file_operations proc_mem_operations = {
.open = mem_open, .open = mem_open,
}; };
static ssize_t oom_adjust_read(struct file *file, char *buf,
size_t count, loff_t *ppos)
{
struct task_struct *task = proc_task(file->f_dentry->d_inode);
char buffer[8];
size_t len;
int oom_adjust = task->oomkilladj;
loff_t __ppos = *ppos;
len = sprintf(buffer, "%i\n", oom_adjust);
if (__ppos >= len)
return 0;
if (count > len-__ppos)
count = len-__ppos;
if (copy_to_user(buf, buffer + __ppos, count))
return -EFAULT;
*ppos = __ppos + count;
return count;
}
static ssize_t oom_adjust_write(struct file *file, const char *buf,
size_t count, loff_t *ppos)
{
struct task_struct *task = proc_task(file->f_dentry->d_inode);
char buffer[8], *end;
int oom_adjust;
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
memset(buffer, 0, 8);
if (count > 6)
count = 6;
if (copy_from_user(buffer, buf, count))
return -EFAULT;
oom_adjust = simple_strtol(buffer, &end, 0);
if (oom_adjust < -16 || oom_adjust > 15)
return -EINVAL;
if (*end == '\n')
end++;
task->oomkilladj = oom_adjust;
if (end - buffer == 0)
return -EIO;
return end - buffer;
}
static struct file_operations proc_oom_adjust_operations = {
read: oom_adjust_read,
write: oom_adjust_write,
};
static struct inode_operations proc_mem_inode_operations = { static struct inode_operations proc_mem_inode_operations = {
.permission = proc_permission, .permission = proc_permission,
}; };
...@@ -1336,6 +1406,15 @@ static struct dentry *proc_pident_lookup(struct inode *dir, ...@@ -1336,6 +1406,15 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
ei->op.proc_read = proc_pid_schedstat; ei->op.proc_read = proc_pid_schedstat;
break; break;
#endif #endif
case PROC_TID_OOM_SCORE:
case PROC_TGID_OOM_SCORE:
inode->i_fop = &proc_info_file_operations;
ei->op.proc_read = proc_oom_score;
break;
case PROC_TID_OOM_ADJUST:
case PROC_TGID_OOM_ADJUST:
inode->i_fop = &proc_oom_adjust_operations;
break;
default: default:
printk("procfs: impossible type (%d)",p->type); printk("procfs: impossible type (%d)",p->type);
iput(inode); iput(inode);
......
...@@ -614,7 +614,19 @@ struct task_struct { ...@@ -614,7 +614,19 @@ struct task_struct {
struct key *process_keyring; /* keyring private to this process (CLONE_THREAD) */ struct key *process_keyring; /* keyring private to this process (CLONE_THREAD) */
struct key *thread_keyring; /* keyring private to this thread */ struct key *thread_keyring; /* keyring private to this thread */
#endif #endif
unsigned short used_math; /*
* Must be changed atomically so it shouldn't be
* be a shareable bitflag.
*/
unsigned char used_math;
/*
* OOM kill score adjustment (bit shift).
* Cannot live together with used_math since
* used_math and oomkilladj can be changed at the
* same time, so they would race if they're in the
* same atomic block.
*/
short oomkilladj;
char comm[TASK_COMM_LEN]; char comm[TASK_COMM_LEN];
/* file system info */ /* file system info */
int link_count, total_link_count; int link_count, total_link_count;
......
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
* of least surprise ... (be careful when you change it) * of least surprise ... (be careful when you change it)
*/ */
static unsigned long badness(struct task_struct *p, unsigned long uptime) unsigned long badness(struct task_struct *p, unsigned long uptime)
{ {
unsigned long points, cpu_time, run_time, s; unsigned long points, cpu_time, run_time, s;
...@@ -99,6 +99,17 @@ static unsigned long badness(struct task_struct *p, unsigned long uptime) ...@@ -99,6 +99,17 @@ static unsigned long badness(struct task_struct *p, unsigned long uptime)
*/ */
if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO))
points /= 4; points /= 4;
/*
* Adjust the score by oomkilladj.
*/
if (p->oomkilladj) {
if (p->oomkilladj > 0)
points <<= p->oomkilladj;
else
points >>= -(p->oomkilladj);
}
#ifdef DEBUG #ifdef DEBUG
printk(KERN_DEBUG "OOMkill: task %d (%s) got %d points\n", printk(KERN_DEBUG "OOMkill: task %d (%s) got %d points\n",
p->pid, p->comm, points); p->pid, p->comm, points);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment