Commit fa0cbbf1 authored by David Rientjes's avatar David Rientjes Committed by Linus Torvalds

mm, oom: reintroduce /proc/pid/oom_adj

This is mostly a revert of 01dc52eb ("oom: remove deprecated oom_adj")
from Davidlohr Bueso.

It reintroduces /proc/pid/oom_adj for backwards compatibility with earlier
kernels.  It simply scales the value linearly when /proc/pid/oom_score_adj
is written.

The major difference is that its scheduled removal is no longer included
in Documentation/feature-removal-schedule.txt.  We do warn users with a
single printk, though, to suggest the more powerful and supported
/proc/pid/oom_score_adj interface.
Reported-by: default avatarArtem S. Tashkinov <t.artem@lycos.com>
Signed-off-by: default avatarDavid Rientjes <rientjes@google.com>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent f4bcd79c
...@@ -33,7 +33,7 @@ Table of Contents ...@@ -33,7 +33,7 @@ Table of Contents
2 Modifying System Parameters 2 Modifying System Parameters
3 Per-Process Parameters 3 Per-Process Parameters
3.1 /proc/<pid>/oom_score_adj - Adjust the oom-killer 3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj - Adjust the oom-killer
score score
3.2 /proc/<pid>/oom_score - Display current oom-killer score 3.2 /proc/<pid>/oom_score - Display current oom-killer score
3.3 /proc/<pid>/io - Display the IO accounting fields 3.3 /proc/<pid>/io - Display the IO accounting fields
...@@ -1320,10 +1320,10 @@ of the kernel. ...@@ -1320,10 +1320,10 @@ of the kernel.
CHAPTER 3: PER-PROCESS PARAMETERS CHAPTER 3: PER-PROCESS PARAMETERS
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
3.1 /proc/<pid>/oom_score_adj- Adjust the oom-killer score 3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj- Adjust the oom-killer score
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
This file can be used to adjust the badness heuristic used to select which These file can be used to adjust the badness heuristic used to select which
process gets killed in out of memory conditions. process gets killed in out of memory conditions.
The badness heuristic assigns a value to each candidate task ranging from 0 The badness heuristic assigns a value to each candidate task ranging from 0
...@@ -1361,6 +1361,12 @@ same system, cpuset, mempolicy, or memory controller resources to use at least ...@@ -1361,6 +1361,12 @@ same system, cpuset, mempolicy, or memory controller resources to use at least
equivalent to discounting 50% of the task's allowed memory from being considered equivalent to discounting 50% of the task's allowed memory from being considered
as scoring against the task. as scoring against the task.
For backwards compatibility with previous kernels, /proc/<pid>/oom_adj may also
be used to tune the badness score. Its acceptable values range from -16
(OOM_ADJUST_MIN) to +15 (OOM_ADJUST_MAX) and a special value of -17
(OOM_DISABLE) to disable oom killing entirely for that task. Its value is
scaled linearly with /proc/<pid>/oom_score_adj.
The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last
value set by a CAP_SYS_RESOURCE process. To reduce the value any lower value set by a CAP_SYS_RESOURCE process. To reduce the value any lower
requires CAP_SYS_RESOURCE. requires CAP_SYS_RESOURCE.
...@@ -1375,7 +1381,9 @@ minimal amount of work. ...@@ -1375,7 +1381,9 @@ minimal amount of work.
------------------------------------------------------------- -------------------------------------------------------------
This file can be used to check the current score used by the oom-killer is for This file can be used to check the current score used by the oom-killer is for
any given <pid>. any given <pid>. Use it together with /proc/<pid>/oom_score_adj to tune which
process should be killed in an out-of-memory situation.
3.3 /proc/<pid>/io - Display the IO accounting fields 3.3 /proc/<pid>/io - Display the IO accounting fields
------------------------------------------------------- -------------------------------------------------------
......
...@@ -873,6 +873,113 @@ static const struct file_operations proc_environ_operations = { ...@@ -873,6 +873,113 @@ static const struct file_operations proc_environ_operations = {
.release = mem_release, .release = mem_release,
}; };
static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
{
struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
char buffer[PROC_NUMBUF];
int oom_adj = OOM_ADJUST_MIN;
size_t len;
unsigned long flags;
if (!task)
return -ESRCH;
if (lock_task_sighand(task, &flags)) {
if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
oom_adj = OOM_ADJUST_MAX;
else
oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
OOM_SCORE_ADJ_MAX;
unlock_task_sighand(task, &flags);
}
put_task_struct(task);
len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
return simple_read_from_buffer(buf, count, ppos, buffer, len);
}
static ssize_t oom_adj_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct task_struct *task;
char buffer[PROC_NUMBUF];
int oom_adj;
unsigned long flags;
int err;
memset(buffer, 0, sizeof(buffer));
if (count > sizeof(buffer) - 1)
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count)) {
err = -EFAULT;
goto out;
}
err = kstrtoint(strstrip(buffer), 0, &oom_adj);
if (err)
goto out;
if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) &&
oom_adj != OOM_DISABLE) {
err = -EINVAL;
goto out;
}
task = get_proc_task(file->f_path.dentry->d_inode);
if (!task) {
err = -ESRCH;
goto out;
}
task_lock(task);
if (!task->mm) {
err = -EINVAL;
goto err_task_lock;
}
if (!lock_task_sighand(task, &flags)) {
err = -ESRCH;
goto err_task_lock;
}
/*
* Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
* value is always attainable.
*/
if (oom_adj == OOM_ADJUST_MAX)
oom_adj = OOM_SCORE_ADJ_MAX;
else
oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
if (oom_adj < task->signal->oom_score_adj &&
!capable(CAP_SYS_RESOURCE)) {
err = -EACCES;
goto err_sighand;
}
/*
* /proc/pid/oom_adj is provided for legacy purposes, ask users to use
* /proc/pid/oom_score_adj instead.
*/
printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
current->comm, task_pid_nr(current), task_pid_nr(task),
task_pid_nr(task));
task->signal->oom_score_adj = oom_adj;
trace_oom_score_adj_update(task);
err_sighand:
unlock_task_sighand(task, &flags);
err_task_lock:
task_unlock(task);
put_task_struct(task);
out:
return err < 0 ? err : count;
}
static const struct file_operations proc_oom_adj_operations = {
.read = oom_adj_read,
.write = oom_adj_write,
.llseek = generic_file_llseek,
};
static ssize_t oom_score_adj_read(struct file *file, char __user *buf, static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos) size_t count, loff_t *ppos)
{ {
...@@ -2598,6 +2705,7 @@ static const struct pid_entry tgid_base_stuff[] = { ...@@ -2598,6 +2705,7 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("cgroup", S_IRUGO, proc_cgroup_operations), REG("cgroup", S_IRUGO, proc_cgroup_operations),
#endif #endif
INF("oom_score", S_IRUGO, proc_oom_score), INF("oom_score", S_IRUGO, proc_oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL #ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
...@@ -2964,6 +3072,7 @@ static const struct pid_entry tid_base_stuff[] = { ...@@ -2964,6 +3072,7 @@ static const struct pid_entry tid_base_stuff[] = {
REG("cgroup", S_IRUGO, proc_cgroup_operations), REG("cgroup", S_IRUGO, proc_cgroup_operations),
#endif #endif
INF("oom_score", S_IRUGO, proc_oom_score), INF("oom_score", S_IRUGO, proc_oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL #ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
......
...@@ -8,4 +8,13 @@ ...@@ -8,4 +8,13 @@
#define OOM_SCORE_ADJ_MIN (-1000) #define OOM_SCORE_ADJ_MIN (-1000)
#define OOM_SCORE_ADJ_MAX 1000 #define OOM_SCORE_ADJ_MAX 1000
/*
* /proc/<pid>/oom_adj set to -17 protects from the oom killer for legacy
* purposes.
*/
#define OOM_DISABLE (-17)
/* inclusive */
#define OOM_ADJUST_MIN (-16)
#define OOM_ADJUST_MAX 15
#endif /* _UAPI__INCLUDE_LINUX_OOM_H */ #endif /* _UAPI__INCLUDE_LINUX_OOM_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment