Commit fbf94829 authored by Ingo Molnar's avatar Ingo Molnar

perf sched: Implement the scheduling workload replay engine

Integrate the schedbench.c bits with the raw trace events
that we get from the perf machinery, and activate the
workload replayer/simulator.

Example of a captured 'make -j' workload:

$ perf sched

  run measurement overhead: 90 nsecs
  sleep measurement overhead: 2724743 nsecs
  the run test took 1000081 nsecs
  the sleep test took 2981111 nsecs
  version = 0.5
  ...
  nr_run_events:        70
  nr_sleep_events:      66
  nr_wakeup_events:     9
  target-less wakeups:  71
  multi-target wakeups: 47
  run events optimized: 139
  task      0 (                perf:      6607), nr_events: 2
  task      1 (                perf:      6608), nr_events: 6
  task      2 (                    :         0), nr_events: 1
  task      3 (                make:      6609), nr_events: 5
  task      4 (                  sh:      6610), nr_events: 4
  task      5 (                make:      6611), nr_events: 6
  task      6 (                  sh:      6612), nr_events: 4
  task      7 (                make:      6613), nr_events: 5
  task      8 (        migration/11:        25), nr_events: 1
  task      9 (        migration/13:        29), nr_events: 1
  task     10 (        migration/15:        33), nr_events: 1
  task     11 (         migration/9:        21), nr_events: 1
  task     12 (                  sh:      6614), nr_events: 4
  task     13 (                make:      6615), nr_events: 5
  task     14 (                  sh:      6616), nr_events: 4
  task     15 (                make:      6617), nr_events: 7
  task     16 (         migration/3:         9), nr_events: 1
  task     17 (         migration/5:        13), nr_events: 1
  task     18 (         migration/7:        17), nr_events: 1
  task     19 (         migration/1:         5), nr_events: 1
  task     20 (                  sh:      6618), nr_events: 4
  task     21 (                make:      6619), nr_events: 5
  task     22 (                  sh:      6620), nr_events: 4
  task     23 (                make:      6621), nr_events: 10
  task     24 (                  sh:      6623), nr_events: 3
  task     25 (                 gcc:      6624), nr_events: 4
  task     26 (                 gcc:      6625), nr_events: 4
  task     27 (                 gcc:      6626), nr_events: 5
  task     28 (            collect2:      6627), nr_events: 5
  task     29 (                  sh:      6622), nr_events: 1
  task     30 (                make:      6628), nr_events: 7
  task     31 (                  sh:      6630), nr_events: 4
  task     32 (                 gcc:      6631), nr_events: 4
  task     33 (                  sh:      6629), nr_events: 1
  task     34 (                 gcc:      6632), nr_events: 4
  task     35 (                 gcc:      6633), nr_events: 4
  task     36 (            collect2:      6634), nr_events: 4
  task     37 (                make:      6635), nr_events: 8
  task     38 (                  sh:      6637), nr_events: 4
  task     39 (                  sh:      6636), nr_events: 1
  task     40 (                 gcc:      6638), nr_events: 4
  task     41 (                 gcc:      6639), nr_events: 4
  task     42 (                 gcc:      6640), nr_events: 4
  task     43 (            collect2:      6641), nr_events: 4
  task     44 (                make:      6642), nr_events: 6
  task     45 (                  sh:      6643), nr_events: 5
  task     46 (                  sh:      6644), nr_events: 3
  task     47 (                  sh:      6645), nr_events: 4
  task     48 (                make:      6646), nr_events: 6
  task     49 (                  sh:      6647), nr_events: 3
  task     50 (                make:      6648), nr_events: 5
  task     51 (                  sh:      6649), nr_events: 5
  task     52 (                  sh:      6650), nr_events: 6
  task     53 (                make:      6651), nr_events: 4
  task     54 (                make:      6652), nr_events: 5
  task     55 (                make:      6653), nr_events: 4
  task     56 (                make:      6654), nr_events: 4
  task     57 (                make:      6655), nr_events: 5
  task     58 (                  sh:      6656), nr_events: 4
  task     59 (                 gcc:      6657), nr_events: 9
  task     60 (         ksoftirqd/3:        10), nr_events: 1
  task     61 (                 gcc:      6658), nr_events: 4
  task     62 (                make:      6659), nr_events: 5
  task     63 (                  sh:      6660), nr_events: 3
  task     64 (                 gcc:      6661), nr_events: 5
  task     65 (            collect2:      6662), nr_events: 4
  ------------------------------------------------------------
  #1  : 256.745, ravg: 256.74, cpu: 0.00 / 0.00
  #2  : 439.372, ravg: 275.01, cpu: 0.00 / 0.00
  #3  : 411.971, ravg: 288.70, cpu: 0.00 / 0.00
  #4  : 385.500, ravg: 298.38, cpu: 0.00 / 0.00
  #5  : 366.526, ravg: 305.20, cpu: 0.00 / 0.00
  #6  : 381.281, ravg: 312.81, cpu: 0.00 / 0.00
  #7  : 410.756, ravg: 322.60, cpu: 0.00 / 0.00
  #8  : 368.009, ravg: 327.14, cpu: 0.00 / 0.00
  #9  : 408.098, ravg: 335.24, cpu: 0.00 / 0.00
  #10 : 368.582, ravg: 338.57, cpu: 0.00 / 0.00

I.e. we successfully analyzed the trace, replayed it
via real threads and measured the replayed workload's
scheduling properties.

This is how it looked like in 'top' output:

   PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
  7164 mingo     20   0 1434m 8080  888 R 57.0  0.1   0:02.04 :perf
  7165 mingo     20   0 1434m 8080  888 R 41.8  0.1   0:01.52 :perf
  7228 mingo     20   0 1434m 8080  888 R 39.8  0.1   0:01.44 :gcc
  7225 mingo     20   0 1434m 8080  888 R 33.8  0.1   0:01.26 :gcc
  7202 mingo     20   0 1434m 8080  888 R 31.2  0.1   0:01.16 :sh
  7222 mingo     20   0 1434m 8080  888 R 25.2  0.1   0:00.96 :sh
  7211 mingo     20   0 1434m 8080  888 R 21.9  0.1   0:00.82 :sh
  7213 mingo     20   0 1434m 8080  888 D 19.2  0.1   0:00.74 :sh
  7194 mingo     20   0 1434m 8080  888 D 18.6  0.1   0:00.72 :make

There's still various kinks in it - more patches to come.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent ec156764
...@@ -57,7 +57,7 @@ static u64 sample_type; ...@@ -57,7 +57,7 @@ static u64 sample_type;
#define BUG_ON(x) assert(!(x)) #define BUG_ON(x) assert(!(x))
#define DEBUG 1 #define DEBUG 0
typedef unsigned long long nsec_t; typedef unsigned long long nsec_t;
...@@ -238,15 +238,14 @@ static struct sched_event *last_event(struct task_desc *task) ...@@ -238,15 +238,14 @@ static struct sched_event *last_event(struct task_desc *task)
} }
static void static void
add_sched_event_run(struct task_desc *task, nsec_t timestamp, add_sched_event_run(struct task_desc *task, nsec_t timestamp, u64 duration)
unsigned long duration)
{ {
struct sched_event *event, *curr_event = last_event(task); struct sched_event *event, *curr_event = last_event(task);
/* /*
* optimize an existing RUN event by merging this one * optimize an existing RUN event by merging this one
* to it: * to it:
*/ */
if (curr_event && curr_event->type == SCHED_EVENT_RUN) { if (curr_event && curr_event->type == SCHED_EVENT_RUN) {
nr_run_events_optimized++; nr_run_events_optimized++;
curr_event->duration += duration; curr_event->duration += duration;
...@@ -376,7 +375,7 @@ void parse_line(char *line) ...@@ -376,7 +375,7 @@ void parse_line(char *line)
dprintk("parsed: %s - %ld %Ld: %s - <%s %ld> (%ld %ld)\n", dprintk("parsed: %s - %ld %Ld: %s - <%s %ld> (%ld %ld)\n",
comm, comm,
pid, pid,
timestamp, timestamp,
func_str, func_str,
comm2, comm2,
pid2, pid2,
...@@ -429,7 +428,7 @@ static void add_cross_task_wakeups(void) ...@@ -429,7 +428,7 @@ static void add_cross_task_wakeups(void)
} }
static void static void
process_sched_event(struct task_desc *this_task, struct sched_event *event) process_sched_event(struct task_desc *this_task __used, struct sched_event *event)
{ {
int ret = 0; int ret = 0;
nsec_t now; nsec_t now;
...@@ -744,9 +743,9 @@ static void run_one_test(void) ...@@ -744,9 +743,9 @@ static void run_one_test(void)
#if 0 #if 0
/* /*
* rusage statistics done by the parent, these are less * rusage statistics done by the parent, these are less
* accurate than the sum_exec_runtime based statistics: * accurate than the sum_exec_runtime based statistics:
*/ */
printk(" [%0.2f / %0.2f]", printk(" [%0.2f / %0.2f]",
(double)parent_cpu_usage/1e6, (double)parent_cpu_usage/1e6,
(double)runavg_parent_cpu_usage/1e6); (double)runavg_parent_cpu_usage/1e6);
...@@ -798,16 +797,128 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) ...@@ -798,16 +797,128 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
return 0; return 0;
} }
static void process_sched_wakeup_event(struct event *event, struct trace_wakeup_event {
u32 size;
u16 common_type;
u8 common_flags;
u8 common_preempt_count;
u32 common_pid;
u32 common_tgid;
char comm[16];
u32 pid;
u32 prio;
u32 success;
u32 cpu;
};
static void
process_sched_wakeup_event(struct trace_wakeup_event *wakeup_event, struct event *event,
int cpu __used, u64 timestamp __used, struct thread *thread __used) int cpu __used, u64 timestamp __used, struct thread *thread __used)
{ {
struct task_desc *waker, *wakee;
printf("sched_wakeup event %p\n", event); printf("sched_wakeup event %p\n", event);
printf(" ... pid %d woke up %s/%d\n",
wakeup_event->common_pid,
wakeup_event->comm,
wakeup_event->pid);
waker = register_pid(wakeup_event->common_pid, "<unknown>");
wakee = register_pid(wakeup_event->pid, wakeup_event->comm);
add_sched_event_wakeup(waker, timestamp, wakee);
} }
static void process_sched_switch_event(struct event *event, struct trace_switch_event {
u32 size;
u16 common_type;
u8 common_flags;
u8 common_preempt_count;
u32 common_pid;
u32 common_tgid;
char prev_comm[16];
u32 prev_pid;
u32 prev_prio;
u64 prev_state;
char next_comm[16];
u32 next_pid;
u32 next_prio;
};
#define MAX_CPUS 4096
unsigned long cpu_last_switched[MAX_CPUS];
static void
process_sched_switch_event(struct trace_switch_event *switch_event, struct event *event,
int cpu __used, u64 timestamp __used, struct thread *thread __used) int cpu __used, u64 timestamp __used, struct thread *thread __used)
{ {
struct task_desc *prev, *next;
u64 timestamp0;
s64 delta;
printf("sched_switch event %p\n", event); printf("sched_switch event %p\n", event);
if (cpu >= MAX_CPUS || cpu < 0)
return;
timestamp0 = cpu_last_switched[cpu];
if (timestamp0)
delta = timestamp - timestamp0;
else
delta = 0;
if (delta < 0)
die("hm, delta: %Ld < 0 ?\n", delta);
printf(" ... switch from %s/%d to %s/%d [ran %Ld nsecs]\n",
switch_event->prev_comm, switch_event->prev_pid,
switch_event->next_comm, switch_event->next_pid,
delta);
prev = register_pid(switch_event->prev_pid, switch_event->prev_comm);
next = register_pid(switch_event->next_pid, switch_event->next_comm);
cpu_last_switched[cpu] = timestamp;
add_sched_event_run(prev, timestamp, delta);
}
struct trace_fork_event {
u32 size;
u16 common_type;
u8 common_flags;
u8 common_preempt_count;
u32 common_pid;
u32 common_tgid;
char parent_comm[16];
u32 parent_pid;
char child_comm[16];
u32 child_pid;
};
static void
process_sched_fork_event(struct trace_fork_event *fork_event, struct event *event,
int cpu __used, u64 timestamp __used, struct thread *thread __used)
{
printf("sched_fork event %p\n", event);
printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid);
printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid);
register_pid(fork_event->parent_pid, fork_event->parent_comm);
register_pid(fork_event->child_pid, fork_event->child_comm);
}
static void process_sched_exit_event(struct event *event,
int cpu __used, u64 timestamp __used, struct thread *thread __used)
{
printf("sched_exit event %p\n", event);
} }
static void static void
...@@ -833,9 +944,15 @@ process_raw_event(event_t *raw_event, void *more_data, ...@@ -833,9 +944,15 @@ process_raw_event(event_t *raw_event, void *more_data,
raw_event->header.type, type, event->name); raw_event->header.type, type, event->name);
if (!strcmp(event->name, "sched_switch")) if (!strcmp(event->name, "sched_switch"))
process_sched_switch_event(event, cpu, timestamp, thread); process_sched_switch_event(more_data, event, cpu, timestamp, thread);
if (!strcmp(event->name, "sched_wakeup")) if (!strcmp(event->name, "sched_wakeup"))
process_sched_wakeup_event(event, cpu, timestamp, thread); process_sched_wakeup_event(more_data, event, cpu, timestamp, thread);
if (!strcmp(event->name, "sched_wakeup_new"))
process_sched_wakeup_event(more_data, event, cpu, timestamp, thread);
if (!strcmp(event->name, "sched_process_fork"))
process_sched_fork_event(more_data, event, cpu, timestamp, thread);
if (!strcmp(event->name, "sched_process_exit"))
process_sched_exit_event(event, cpu, timestamp, thread);
} }
static int static int
...@@ -1053,7 +1170,7 @@ static const struct option options[] = { ...@@ -1053,7 +1170,7 @@ static const struct option options[] = {
int cmd_sched(int argc, const char **argv, const char *prefix __used) int cmd_sched(int argc, const char **argv, const char *prefix __used)
{ {
long nr_iterations = LONG_MAX, i; long nr_iterations = 10, i;
symbol__init(); symbol__init();
page_size = getpagesize(); page_size = getpagesize();
...@@ -1068,8 +1185,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used) ...@@ -1068,8 +1185,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
usage_with_options(annotate_usage, options); usage_with_options(annotate_usage, options);
} }
// setup_pager();
setup_pager();
calibrate_run_measurement_overhead(); calibrate_run_measurement_overhead();
calibrate_sleep_measurement_overhead(); calibrate_sleep_measurement_overhead();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment