Commit 9c2136be authored by Delyan Kratunov's avatar Delyan Kratunov Committed by Peter Zijlstra

sched/tracing: Append prev_state to tp args instead

Commit fa2c3254 (sched/tracing: Don't re-read p->state when emitting
sched_switch event, 2022-01-20) added a new prev_state argument to the
sched_switch tracepoint, before the prev task_struct pointer.

This reordering of arguments broke BPF programs that use the raw
tracepoint (e.g. tp_btf programs). The type of the second argument has
changed and existing programs that assume a task_struct* argument
(e.g. for bpf_task_storage access) will now fail to verify.

If we instead append the new argument to the end, all existing programs
would continue to work and can conditionally extract the prev_state
argument on supported kernel versions.

Fixes: fa2c3254 (sched/tracing: Don't re-read p->state when emitting sched_switch event, 2022-01-20)
Signed-off-by: default avatarDelyan Kratunov <delyank@fb.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarSteven Rostedt (Google) <rostedt@goodmis.org>
Link: https://lkml.kernel.org/r/c8a6930dfdd58a4a5755fc01732675472979732b.camel@fb.com
parent c5eb0a61
...@@ -222,11 +222,11 @@ static inline long __trace_sched_switch_state(bool preempt, ...@@ -222,11 +222,11 @@ static inline long __trace_sched_switch_state(bool preempt,
TRACE_EVENT(sched_switch, TRACE_EVENT(sched_switch,
TP_PROTO(bool preempt, TP_PROTO(bool preempt,
unsigned int prev_state,
struct task_struct *prev, struct task_struct *prev,
struct task_struct *next), struct task_struct *next,
unsigned int prev_state),
TP_ARGS(preempt, prev_state, prev, next), TP_ARGS(preempt, prev, next, prev_state),
TP_STRUCT__entry( TP_STRUCT__entry(
__array( char, prev_comm, TASK_COMM_LEN ) __array( char, prev_comm, TASK_COMM_LEN )
......
...@@ -6382,7 +6382,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) ...@@ -6382,7 +6382,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
migrate_disable_switch(rq, prev); migrate_disable_switch(rq, prev);
psi_sched_switch(prev, next, !task_on_rq_queued(prev)); psi_sched_switch(prev, next, !task_on_rq_queued(prev));
trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev_state, prev, next); trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state);
/* Also unlocks the rq: */ /* Also unlocks the rq: */
rq = context_switch(rq, prev, next, &rf); rq = context_switch(rq, prev, next, &rf);
......
...@@ -404,9 +404,9 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) ...@@ -404,9 +404,9 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
static void static void
ftrace_graph_probe_sched_switch(void *ignore, bool preempt, ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
unsigned int prev_state,
struct task_struct *prev, struct task_struct *prev,
struct task_struct *next) struct task_struct *next,
unsigned int prev_state)
{ {
unsigned long long timestamp; unsigned long long timestamp;
int index; int index;
......
...@@ -7420,9 +7420,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) ...@@ -7420,9 +7420,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
static void static void
ftrace_filter_pid_sched_switch_probe(void *data, bool preempt, ftrace_filter_pid_sched_switch_probe(void *data, bool preempt,
unsigned int prev_state,
struct task_struct *prev, struct task_struct *prev,
struct task_struct *next) struct task_struct *next,
unsigned int prev_state)
{ {
struct trace_array *tr = data; struct trace_array *tr = data;
struct trace_pid_list *pid_list; struct trace_pid_list *pid_list;
......
...@@ -773,9 +773,9 @@ void trace_event_follow_fork(struct trace_array *tr, bool enable) ...@@ -773,9 +773,9 @@ void trace_event_follow_fork(struct trace_array *tr, bool enable)
static void static void
event_filter_pid_sched_switch_probe_pre(void *data, bool preempt, event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
unsigned int prev_state,
struct task_struct *prev, struct task_struct *prev,
struct task_struct *next) struct task_struct *next,
unsigned int prev_state)
{ {
struct trace_array *tr = data; struct trace_array *tr = data;
struct trace_pid_list *no_pid_list; struct trace_pid_list *no_pid_list;
...@@ -799,9 +799,9 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt, ...@@ -799,9 +799,9 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
static void static void
event_filter_pid_sched_switch_probe_post(void *data, bool preempt, event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
unsigned int prev_state,
struct task_struct *prev, struct task_struct *prev,
struct task_struct *next) struct task_struct *next,
unsigned int prev_state)
{ {
struct trace_array *tr = data; struct trace_array *tr = data;
struct trace_pid_list *no_pid_list; struct trace_pid_list *no_pid_list;
......
...@@ -1168,9 +1168,9 @@ thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) ...@@ -1168,9 +1168,9 @@ thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
*/ */
static void static void
trace_sched_switch_callback(void *data, bool preempt, trace_sched_switch_callback(void *data, bool preempt,
unsigned int prev_state,
struct task_struct *p, struct task_struct *p,
struct task_struct *n) struct task_struct *n,
unsigned int prev_state)
{ {
struct osnoise_variables *osn_var = this_cpu_osn_var(); struct osnoise_variables *osn_var = this_cpu_osn_var();
......
...@@ -22,8 +22,8 @@ static DEFINE_MUTEX(sched_register_mutex); ...@@ -22,8 +22,8 @@ static DEFINE_MUTEX(sched_register_mutex);
static void static void
probe_sched_switch(void *ignore, bool preempt, probe_sched_switch(void *ignore, bool preempt,
unsigned int prev_state, struct task_struct *prev, struct task_struct *next,
struct task_struct *prev, struct task_struct *next) unsigned int prev_state)
{ {
int flags; int flags;
......
...@@ -426,8 +426,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, ...@@ -426,8 +426,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
static void notrace static void notrace
probe_wakeup_sched_switch(void *ignore, bool preempt, probe_wakeup_sched_switch(void *ignore, bool preempt,
unsigned int prev_state, struct task_struct *prev, struct task_struct *next,
struct task_struct *prev, struct task_struct *next) unsigned int prev_state)
{ {
struct trace_array_cpu *data; struct trace_array_cpu *data;
u64 T0, T1, delta; u64 T0, T1, delta;
......
...@@ -25,11 +25,11 @@ TRACE_CUSTOM_EVENT(sched_switch, ...@@ -25,11 +25,11 @@ TRACE_CUSTOM_EVENT(sched_switch,
* that the custom event is using. * that the custom event is using.
*/ */
TP_PROTO(bool preempt, TP_PROTO(bool preempt,
unsigned int prev_state,
struct task_struct *prev, struct task_struct *prev,
struct task_struct *next), struct task_struct *next,
unsigned int prev_state),
TP_ARGS(preempt, prev_state, prev, next), TP_ARGS(preempt, prev, next, prev_state),
/* /*
* The next fields are where the customization happens. * The next fields are where the customization happens.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment