Commit f46b1652 authored by Daniel Bristot de Oliveira's avatar Daniel Bristot de Oliveira Committed by Steven Rostedt (VMware)

trace/hwlat: Implement the per-cpu mode

Implements the per-cpu mode in which a sampling thread is created for
each cpu in the "cpus" (and tracing_mask).

The per-cpu mode has the potention to speed up the hwlat detection by
running on multiple CPUs at the same time, at the cost of higher cpu
usage with irqs disabled. Use with care.

[
  Changed get_cpu_data() to static.
Reported-by: default avatarkernel test robot <lkp@intel.com>
]

Link: https://lkml.kernel.org/r/ec06d0ab340e8460d293772faba19ad8a5c371aa.1624372313.git.bristot@redhat.com

Cc: Phil Auld <pauld@redhat.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Kate Carcia <kcarcia@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexandre Chartre <alexandre.chartre@oracle.com>
Cc: Clark Willaims <williams@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: default avatarDaniel Bristot de Oliveira <bristot@redhat.com>
Signed-off-by: default avatarSteven Rostedt (VMware) <rostedt@goodmis.org>
parent 7bb7d802
...@@ -78,10 +78,11 @@ in /sys/kernel/tracing: ...@@ -78,10 +78,11 @@ in /sys/kernel/tracing:
- hwlat_detector/window - amount of time between (width) runs (usecs) - hwlat_detector/window - amount of time between (width) runs (usecs)
- hwlat_detector/mode - the thread mode - hwlat_detector/mode - the thread mode
By default, the hwlat detector's kernel thread will migrate across each CPU By default, one hwlat detector's kernel thread will migrate across each CPU
specified in cpumask at the beginning of a new window, in a round-robin specified in cpumask at the beginning of a new window, in a round-robin
fashion. This behavior can be changed by changing the thread mode, fashion. This behavior can be changed by changing the thread mode,
the available options are: the available options are:
- none: do not force migration - none: do not force migration
- round-robin: migrate across each CPU specified in cpumask [default] - round-robin: migrate across each CPU specified in cpumask [default]
- per-cpu: create one thread for each cpu in tracing_cpumask
...@@ -54,9 +54,6 @@ static struct trace_array *hwlat_trace; ...@@ -54,9 +54,6 @@ static struct trace_array *hwlat_trace;
#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */ #define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */
#define DEFAULT_LAT_THRESHOLD 10 /* 10us */ #define DEFAULT_LAT_THRESHOLD 10 /* 10us */
/* sampling thread*/
static struct task_struct *hwlat_kthread;
static struct dentry *hwlat_sample_width; /* sample width us */ static struct dentry *hwlat_sample_width; /* sample width us */
static struct dentry *hwlat_sample_window; /* sample window us */ static struct dentry *hwlat_sample_window; /* sample window us */
static struct dentry *hwlat_thread_mode; /* hwlat thread mode */ static struct dentry *hwlat_thread_mode; /* hwlat thread mode */
...@@ -64,18 +61,26 @@ static struct dentry *hwlat_thread_mode; /* hwlat thread mode */ ...@@ -64,18 +61,26 @@ static struct dentry *hwlat_thread_mode; /* hwlat thread mode */
enum { enum {
MODE_NONE = 0, MODE_NONE = 0,
MODE_ROUND_ROBIN, MODE_ROUND_ROBIN,
MODE_PER_CPU,
MODE_MAX MODE_MAX
}; };
static char *thread_mode_str[] = { "none", "round-robin" }; static char *thread_mode_str[] = { "none", "round-robin", "per-cpu" };
/* Save the previous tracing_thresh value */ /* Save the previous tracing_thresh value */
static unsigned long save_tracing_thresh; static unsigned long save_tracing_thresh;
/* NMI timestamp counters */ /* runtime kthread data */
static u64 nmi_ts_start; struct hwlat_kthread_data {
static u64 nmi_total_ts; struct task_struct *kthread;
static int nmi_count; /* NMI timestamp counters */
static int nmi_cpu; u64 nmi_ts_start;
u64 nmi_total_ts;
int nmi_count;
int nmi_cpu;
};
struct hwlat_kthread_data hwlat_single_cpu_data;
DEFINE_PER_CPU(struct hwlat_kthread_data, hwlat_per_cpu_data);
/* Tells NMIs to call back to the hwlat tracer to record timestamps */ /* Tells NMIs to call back to the hwlat tracer to record timestamps */
bool trace_hwlat_callback_enabled; bool trace_hwlat_callback_enabled;
...@@ -112,6 +117,14 @@ static struct hwlat_data { ...@@ -112,6 +117,14 @@ static struct hwlat_data {
.thread_mode = MODE_ROUND_ROBIN .thread_mode = MODE_ROUND_ROBIN
}; };
static struct hwlat_kthread_data *get_cpu_data(void)
{
if (hwlat_data.thread_mode == MODE_PER_CPU)
return this_cpu_ptr(&hwlat_per_cpu_data);
else
return &hwlat_single_cpu_data;
}
static bool hwlat_busy; static bool hwlat_busy;
static void trace_hwlat_sample(struct hwlat_sample *sample) static void trace_hwlat_sample(struct hwlat_sample *sample)
...@@ -149,7 +162,9 @@ static void trace_hwlat_sample(struct hwlat_sample *sample) ...@@ -149,7 +162,9 @@ static void trace_hwlat_sample(struct hwlat_sample *sample)
void trace_hwlat_callback(bool enter) void trace_hwlat_callback(bool enter)
{ {
if (smp_processor_id() != nmi_cpu) struct hwlat_kthread_data *kdata = get_cpu_data();
if (!kdata->kthread)
return; return;
/* /*
...@@ -158,13 +173,13 @@ void trace_hwlat_callback(bool enter) ...@@ -158,13 +173,13 @@ void trace_hwlat_callback(bool enter)
*/ */
if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
if (enter) if (enter)
nmi_ts_start = time_get(); kdata->nmi_ts_start = time_get();
else else
nmi_total_ts += time_get() - nmi_ts_start; kdata->nmi_total_ts += time_get() - kdata->nmi_ts_start;
} }
if (enter) if (enter)
nmi_count++; kdata->nmi_count++;
} }
/** /**
...@@ -176,6 +191,7 @@ void trace_hwlat_callback(bool enter) ...@@ -176,6 +191,7 @@ void trace_hwlat_callback(bool enter)
*/ */
static int get_sample(void) static int get_sample(void)
{ {
struct hwlat_kthread_data *kdata = get_cpu_data();
struct trace_array *tr = hwlat_trace; struct trace_array *tr = hwlat_trace;
struct hwlat_sample s; struct hwlat_sample s;
time_type start, t1, t2, last_t2; time_type start, t1, t2, last_t2;
...@@ -188,9 +204,8 @@ static int get_sample(void) ...@@ -188,9 +204,8 @@ static int get_sample(void)
do_div(thresh, NSEC_PER_USEC); /* modifies interval value */ do_div(thresh, NSEC_PER_USEC); /* modifies interval value */
nmi_cpu = smp_processor_id(); kdata->nmi_total_ts = 0;
nmi_total_ts = 0; kdata->nmi_count = 0;
nmi_count = 0;
/* Make sure NMIs see this first */ /* Make sure NMIs see this first */
barrier(); barrier();
...@@ -260,15 +275,15 @@ static int get_sample(void) ...@@ -260,15 +275,15 @@ static int get_sample(void)
ret = 1; ret = 1;
/* We read in microseconds */ /* We read in microseconds */
if (nmi_total_ts) if (kdata->nmi_total_ts)
do_div(nmi_total_ts, NSEC_PER_USEC); do_div(kdata->nmi_total_ts, NSEC_PER_USEC);
hwlat_data.count++; hwlat_data.count++;
s.seqnum = hwlat_data.count; s.seqnum = hwlat_data.count;
s.duration = sample; s.duration = sample;
s.outer_duration = outer_sample; s.outer_duration = outer_sample;
s.nmi_total_ts = nmi_total_ts; s.nmi_total_ts = kdata->nmi_total_ts;
s.nmi_count = nmi_count; s.nmi_count = kdata->nmi_count;
s.count = count; s.count = count;
trace_hwlat_sample(&s); trace_hwlat_sample(&s);
...@@ -364,21 +379,40 @@ static int kthread_fn(void *data) ...@@ -364,21 +379,40 @@ static int kthread_fn(void *data)
} }
/* /*
* start_kthread - Kick off the hardware latency sampling/detector kthread * stop_stop_kthread - Inform the hardware latency sampling/detector kthread to stop
*
* This kicks the running hardware latency sampling/detector kernel thread and
* tells it to stop sampling now. Use this on unload and at system shutdown.
*/
static void stop_single_kthread(void)
{
struct hwlat_kthread_data *kdata = get_cpu_data();
struct task_struct *kthread = kdata->kthread;
if (!kthread)
return;
kthread_stop(kthread);
kdata->kthread = NULL;
}
/*
* start_single_kthread - Kick off the hardware latency sampling/detector kthread
* *
* This starts the kernel thread that will sit and sample the CPU timestamp * This starts the kernel thread that will sit and sample the CPU timestamp
* counter (TSC or similar) and look for potential hardware latencies. * counter (TSC or similar) and look for potential hardware latencies.
*/ */
static int start_kthread(struct trace_array *tr) static int start_single_kthread(struct trace_array *tr)
{ {
struct hwlat_kthread_data *kdata = get_cpu_data();
struct cpumask *current_mask = &save_cpumask; struct cpumask *current_mask = &save_cpumask;
struct task_struct *kthread; struct task_struct *kthread;
int next_cpu; int next_cpu;
if (hwlat_kthread) if (kdata->kthread)
return 0; return 0;
kthread = kthread_create(kthread_fn, NULL, "hwlatd"); kthread = kthread_create(kthread_fn, NULL, "hwlatd");
if (IS_ERR(kthread)) { if (IS_ERR(kthread)) {
pr_err(BANNER "could not start sampling thread\n"); pr_err(BANNER "could not start sampling thread\n");
...@@ -400,24 +434,97 @@ static int start_kthread(struct trace_array *tr) ...@@ -400,24 +434,97 @@ static int start_kthread(struct trace_array *tr)
sched_setaffinity(kthread->pid, current_mask); sched_setaffinity(kthread->pid, current_mask);
hwlat_kthread = kthread; kdata->kthread = kthread;
wake_up_process(kthread); wake_up_process(kthread);
return 0; return 0;
} }
/* /*
* stop_kthread - Inform the hardware latency sampling/detector kthread to stop * stop_cpu_kthread - Stop a hwlat cpu kthread
*/
static void stop_cpu_kthread(unsigned int cpu)
{
struct task_struct *kthread;
kthread = per_cpu(hwlat_per_cpu_data, cpu).kthread;
if (kthread)
kthread_stop(kthread);
}
/*
* stop_per_cpu_kthreads - Inform the hardware latency sampling/detector kthread to stop
* *
* This kicks the running hardware latency sampling/detector kernel thread and * This kicks the running hardware latency sampling/detector kernel threads and
* tells it to stop sampling now. Use this on unload and at system shutdown. * tells it to stop sampling now. Use this on unload and at system shutdown.
*/ */
static void stop_kthread(void) static void stop_per_cpu_kthreads(void)
{ {
if (!hwlat_kthread) unsigned int cpu;
return;
kthread_stop(hwlat_kthread); get_online_cpus();
hwlat_kthread = NULL; for_each_online_cpu(cpu)
stop_cpu_kthread(cpu);
put_online_cpus();
}
/*
* start_cpu_kthread - Start a hwlat cpu kthread
*/
static int start_cpu_kthread(unsigned int cpu)
{
struct task_struct *kthread;
char comm[24];
snprintf(comm, 24, "hwlatd/%d", cpu);
kthread = kthread_create_on_cpu(kthread_fn, NULL, cpu, comm);
if (IS_ERR(kthread)) {
pr_err(BANNER "could not start sampling thread\n");
return -ENOMEM;
}
per_cpu(hwlat_per_cpu_data, cpu).kthread = kthread;
wake_up_process(kthread);
return 0;
}
/*
* start_per_cpu_kthreads - Kick off the hardware latency sampling/detector kthreads
*
* This starts the kernel threads that will sit on potentially all cpus and
* sample the CPU timestamp counter (TSC or similar) and look for potential
* hardware latencies.
*/
static int start_per_cpu_kthreads(struct trace_array *tr)
{
struct cpumask *current_mask = &save_cpumask;
unsigned int cpu;
int retval;
get_online_cpus();
/*
* Run only on CPUs in which hwlat is allowed to run.
*/
cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask);
for_each_online_cpu(cpu)
per_cpu(hwlat_per_cpu_data, cpu).kthread = NULL;
for_each_cpu(cpu, current_mask) {
retval = start_cpu_kthread(cpu);
if (retval)
goto out_error;
}
put_online_cpus();
return 0;
out_error:
put_online_cpus();
stop_per_cpu_kthreads();
return retval;
} }
/* /*
...@@ -600,7 +707,8 @@ static void hwlat_tracer_stop(struct trace_array *tr); ...@@ -600,7 +707,8 @@ static void hwlat_tracer_stop(struct trace_array *tr);
* The "none" sets the allowed cpumask for a single hwlatd thread at the * The "none" sets the allowed cpumask for a single hwlatd thread at the
* startup and lets the scheduler handle the migration. The default mode is * startup and lets the scheduler handle the migration. The default mode is
* the "round-robin" one, in which a single hwlatd thread runs, migrating * the "round-robin" one, in which a single hwlatd thread runs, migrating
* among the allowed CPUs in a round-robin fashion. * among the allowed CPUs in a round-robin fashion. The "per-cpu" mode
* creates one hwlatd thread per allowed CPU.
*/ */
static ssize_t hwlat_mode_write(struct file *filp, const char __user *ubuf, static ssize_t hwlat_mode_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos) size_t cnt, loff_t *ppos)
...@@ -724,14 +832,20 @@ static void hwlat_tracer_start(struct trace_array *tr) ...@@ -724,14 +832,20 @@ static void hwlat_tracer_start(struct trace_array *tr)
{ {
int err; int err;
err = start_kthread(tr); if (hwlat_data.thread_mode == MODE_PER_CPU)
err = start_per_cpu_kthreads(tr);
else
err = start_single_kthread(tr);
if (err) if (err)
pr_err(BANNER "Cannot start hwlat kthread\n"); pr_err(BANNER "Cannot start hwlat kthread\n");
} }
static void hwlat_tracer_stop(struct trace_array *tr) static void hwlat_tracer_stop(struct trace_array *tr)
{ {
stop_kthread(); if (hwlat_data.thread_mode == MODE_PER_CPU)
stop_per_cpu_kthreads();
else
stop_single_kthread();
} }
static int hwlat_tracer_init(struct trace_array *tr) static int hwlat_tracer_init(struct trace_array *tr)
...@@ -760,7 +874,7 @@ static int hwlat_tracer_init(struct trace_array *tr) ...@@ -760,7 +874,7 @@ static int hwlat_tracer_init(struct trace_array *tr)
static void hwlat_tracer_reset(struct trace_array *tr) static void hwlat_tracer_reset(struct trace_array *tr)
{ {
stop_kthread(); hwlat_tracer_stop(tr);
/* the tracing threshold is static between runs */ /* the tracing threshold is static between runs */
last_tracing_thresh = tracing_thresh; last_tracing_thresh = tracing_thresh;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment