Commit 3431a940 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 AVX512 status update from Ingo Molnar:
 "This adds a new ABI that the main scheduler probably doesn't want to
  deal with but HPC job schedulers might want to use: the
  AVX512_elapsed_ms field in the new /proc/<pid>/arch_status task status
  file, which allows the user-space job scheduler to cluster such tasks,
  to avoid turbo frequency drops"

* 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  Documentation/filesystems/proc.txt: Add arch_status file
  x86/process: Add AVX-512 usage elapsed time to /proc/pid/arch_status
  proc: Add /proc/<pid>/arch_status
parents 5b7a2095 711486fd
......@@ -45,6 +45,7 @@ Table of Contents
3.9 /proc/<pid>/map_files - Information about memory mapped files
3.10 /proc/<pid>/timerslack_ns - Task timerslack value
3.11 /proc/<pid>/patch_state - Livepatch patch operation state
3.12 /proc/<pid>/arch_status - Task architecture specific information
4 Configuring procfs
4.1 Mount options
......@@ -1948,6 +1949,45 @@ patched. If the patch is being enabled, then the task has already been
patched. If the patch is being disabled, then the task hasn't been
unpatched yet.
3.12 /proc/<pid>/arch_status - task architecture specific status
-------------------------------------------------------------------
When CONFIG_PROC_PID_ARCH_STATUS is enabled, this file displays the
architecture specific status of the task.
Example
-------
$ cat /proc/6753/arch_status
AVX512_elapsed_ms: 8
Description
-----------
x86 specific entries:
---------------------
AVX512_elapsed_ms:
------------------
If AVX512 is supported on the machine, this entry shows the milliseconds
elapsed since the last time AVX512 usage was recorded. The recording
happens on a best effort basis when a task is scheduled out. This means
that the value depends on two factors:
1) The time which the task spent on the CPU without being scheduled
out. With CPU isolation and a single runnable task this can take
several seconds.
2) The time since the task was scheduled out last. Depending on the
reason for being scheduled out (time slice exhausted, syscall ...)
this can be arbitrary long time.
As a consequence the value cannot be considered precise and authoritative
information. The application which uses this information has to be aware
of the overall scenario on the system in order to determine whether a
task is a real AVX512 user or not. Precise information can be obtained
with performance counters.
A special value of '-1' indicates that no AVX512 usage was recorded, thus
the task is unlikely an AVX512 user, but depends on the workload and the
scheduling scenario, it also could be a false negative mentioned above.
------------------------------------------------------------------------------
Configuring procfs
......
......@@ -220,6 +220,7 @@ config X86
select USER_STACKTRACE_SUPPORT
select VIRT_TO_BUS
select X86_FEATURE_NAMES if PROC_FS
select PROC_PID_ARCH_STATUS if PROC_FS
config INSTRUCTION_DECODER
def_bool y
......
......@@ -8,6 +8,8 @@
#include <linux/cpu.h>
#include <linux/mman.h>
#include <linux/pkeys.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <asm/fpu/api.h>
#include <asm/fpu/internal.h>
......@@ -1231,3 +1233,48 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
return 0;
}
#ifdef CONFIG_PROC_PID_ARCH_STATUS
/*
* Report the amount of time elapsed in millisecond since last AVX512
* use in the task.
*/
static void avx512_status(struct seq_file *m, struct task_struct *task)
{
unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
long delta;
if (!timestamp) {
/*
* Report -1 if no AVX512 usage
*/
delta = -1;
} else {
delta = (long)(jiffies - timestamp);
/*
* Cap to LONG_MAX if time difference > LONG_MAX
*/
if (delta < 0)
delta = LONG_MAX;
delta = jiffies_to_msecs(delta);
}
seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
seq_putc(m, '\n');
}
/*
* Report architecture specific information
*/
int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
/*
* Report AVX512 state if the processor and build option supported.
*/
if (cpu_feature_enabled(X86_FEATURE_AVX512F))
avx512_status(m, task);
return 0;
}
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
......@@ -98,3 +98,7 @@ config PROC_CHILDREN
Say Y if you are running any user-space software which takes benefit from
this interface. For example, rkt is such a piece of software.
config PROC_PID_ARCH_STATUS
def_bool n
depends on PROC_FS
......@@ -3061,6 +3061,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_STACKLEAK_METRICS
ONE("stack_depth", S_IRUGO, proc_stack_depth),
#endif
#ifdef CONFIG_PROC_PID_ARCH_STATUS
ONE("arch_status", S_IRUGO, proc_pid_arch_status),
#endif
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
......@@ -3448,6 +3451,9 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_LIVEPATCH
ONE("patch_state", S_IRUSR, proc_pid_patch_state),
#endif
#ifdef CONFIG_PROC_PID_ARCH_STATUS
ONE("arch_status", S_IRUGO, proc_pid_arch_status),
#endif
};
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
......
......@@ -75,6 +75,15 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
void *data);
extern struct pid *tgid_pidfd_to_pid(const struct file *file);
#ifdef CONFIG_PROC_PID_ARCH_STATUS
/*
* The architecture which selects CONFIG_PROC_PID_ARCH_STATUS must
* provide proc_pid_arch_status() definition.
*/
int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task);
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
#else /* CONFIG_PROC_FS */
static inline void proc_root_init(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment