Commit 39e38362 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Thomas Gleixner:
 "Four patches which all address lock inversions and deadlocks in the
  perf core code and the Intel debug store"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86: Fix perf,x86,cpuhp deadlock
  perf/core: Fix ctx::mutex deadlock
  perf/core: Fix another perf,trace,cpuhp lock inversion
  perf/core: Fix lock inversion between perf,trace,cpuhp
parents 8c76e31a efe951d3
...@@ -372,10 +372,9 @@ static int alloc_pebs_buffer(int cpu) ...@@ -372,10 +372,9 @@ static int alloc_pebs_buffer(int cpu)
static void release_pebs_buffer(int cpu) static void release_pebs_buffer(int cpu)
{ {
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
struct debug_store *ds = hwev->ds;
void *cea; void *cea;
if (!ds || !x86_pmu.pebs) if (!x86_pmu.pebs)
return; return;
kfree(per_cpu(insn_buffer, cpu)); kfree(per_cpu(insn_buffer, cpu));
...@@ -384,7 +383,6 @@ static void release_pebs_buffer(int cpu) ...@@ -384,7 +383,6 @@ static void release_pebs_buffer(int cpu)
/* Clear the fixmap */ /* Clear the fixmap */
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
ds_clear_cea(cea, x86_pmu.pebs_buffer_size); ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
ds->pebs_buffer_base = 0;
dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size); dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
hwev->ds_pebs_vaddr = NULL; hwev->ds_pebs_vaddr = NULL;
} }
...@@ -419,16 +417,14 @@ static int alloc_bts_buffer(int cpu) ...@@ -419,16 +417,14 @@ static int alloc_bts_buffer(int cpu)
static void release_bts_buffer(int cpu) static void release_bts_buffer(int cpu)
{ {
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
struct debug_store *ds = hwev->ds;
void *cea; void *cea;
if (!ds || !x86_pmu.bts) if (!x86_pmu.bts)
return; return;
/* Clear the fixmap */ /* Clear the fixmap */
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
ds_clear_cea(cea, BTS_BUFFER_SIZE); ds_clear_cea(cea, BTS_BUFFER_SIZE);
ds->bts_buffer_base = 0;
dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE); dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
hwev->ds_bts_vaddr = NULL; hwev->ds_bts_vaddr = NULL;
} }
...@@ -454,16 +450,22 @@ void release_ds_buffers(void) ...@@ -454,16 +450,22 @@ void release_ds_buffers(void)
if (!x86_pmu.bts && !x86_pmu.pebs) if (!x86_pmu.bts && !x86_pmu.pebs)
return; return;
get_online_cpus(); for_each_possible_cpu(cpu)
for_each_online_cpu(cpu) release_ds_buffer(cpu);
for_each_possible_cpu(cpu) {
/*
* Again, ignore errors from offline CPUs, they will no longer
* observe cpu_hw_events.ds and not program the DS_AREA when
* they come up.
*/
fini_debug_store_on_cpu(cpu); fini_debug_store_on_cpu(cpu);
}
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
release_pebs_buffer(cpu); release_pebs_buffer(cpu);
release_bts_buffer(cpu); release_bts_buffer(cpu);
release_ds_buffer(cpu);
} }
put_online_cpus();
} }
void reserve_ds_buffers(void) void reserve_ds_buffers(void)
...@@ -483,8 +485,6 @@ void reserve_ds_buffers(void) ...@@ -483,8 +485,6 @@ void reserve_ds_buffers(void)
if (!x86_pmu.pebs) if (!x86_pmu.pebs)
pebs_err = 1; pebs_err = 1;
get_online_cpus();
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
if (alloc_ds_buffer(cpu)) { if (alloc_ds_buffer(cpu)) {
bts_err = 1; bts_err = 1;
...@@ -521,11 +521,14 @@ void reserve_ds_buffers(void) ...@@ -521,11 +521,14 @@ void reserve_ds_buffers(void)
if (x86_pmu.pebs && !pebs_err) if (x86_pmu.pebs && !pebs_err)
x86_pmu.pebs_active = 1; x86_pmu.pebs_active = 1;
for_each_online_cpu(cpu) for_each_possible_cpu(cpu) {
/*
* Ignores wrmsr_on_cpu() errors for offline CPUs they
* will get this call through intel_pmu_cpu_starting().
*/
init_debug_store_on_cpu(cpu); init_debug_store_on_cpu(cpu);
} }
}
put_online_cpus();
} }
/* /*
......
...@@ -1231,6 +1231,10 @@ static void put_ctx(struct perf_event_context *ctx) ...@@ -1231,6 +1231,10 @@ static void put_ctx(struct perf_event_context *ctx)
* perf_event_context::lock * perf_event_context::lock
* perf_event::mmap_mutex * perf_event::mmap_mutex
* mmap_sem * mmap_sem
*
* cpu_hotplug_lock
* pmus_lock
* cpuctx->mutex / perf_event_context::mutex
*/ */
static struct perf_event_context * static struct perf_event_context *
perf_event_ctx_lock_nested(struct perf_event *event, int nesting) perf_event_ctx_lock_nested(struct perf_event *event, int nesting)
...@@ -4196,6 +4200,7 @@ int perf_event_release_kernel(struct perf_event *event) ...@@ -4196,6 +4200,7 @@ int perf_event_release_kernel(struct perf_event *event)
{ {
struct perf_event_context *ctx = event->ctx; struct perf_event_context *ctx = event->ctx;
struct perf_event *child, *tmp; struct perf_event *child, *tmp;
LIST_HEAD(free_list);
/* /*
* If we got here through err_file: fput(event_file); we will not have * If we got here through err_file: fput(event_file); we will not have
...@@ -4268,8 +4273,7 @@ int perf_event_release_kernel(struct perf_event *event) ...@@ -4268,8 +4273,7 @@ int perf_event_release_kernel(struct perf_event *event)
struct perf_event, child_list); struct perf_event, child_list);
if (tmp == child) { if (tmp == child) {
perf_remove_from_context(child, DETACH_GROUP); perf_remove_from_context(child, DETACH_GROUP);
list_del(&child->child_list); list_move(&child->child_list, &free_list);
free_event(child);
/* /*
* This matches the refcount bump in inherit_event(); * This matches the refcount bump in inherit_event();
* this can't be the last reference. * this can't be the last reference.
...@@ -4284,6 +4288,11 @@ int perf_event_release_kernel(struct perf_event *event) ...@@ -4284,6 +4288,11 @@ int perf_event_release_kernel(struct perf_event *event)
} }
mutex_unlock(&event->child_mutex); mutex_unlock(&event->child_mutex);
list_for_each_entry_safe(child, tmp, &free_list, child_list) {
list_del(&child->child_list);
free_event(child);
}
no_ctx: no_ctx:
put_event(event); /* Must be the 'last' reference */ put_event(event); /* Must be the 'last' reference */
return 0; return 0;
...@@ -8516,6 +8525,29 @@ perf_event_set_addr_filter(struct perf_event *event, char *filter_str) ...@@ -8516,6 +8525,29 @@ perf_event_set_addr_filter(struct perf_event *event, char *filter_str)
return ret; return ret;
} }
static int
perf_tracepoint_set_filter(struct perf_event *event, char *filter_str)
{
struct perf_event_context *ctx = event->ctx;
int ret;
/*
* Beware, here be dragons!!
*
* the tracepoint muck will deadlock against ctx->mutex, but the tracepoint
* stuff does not actually need it. So temporarily drop ctx->mutex. As per
* perf_event_ctx_lock() we already have a reference on ctx.
*
* This can result in event getting moved to a different ctx, but that
* does not affect the tracepoint state.
*/
mutex_unlock(&ctx->mutex);
ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
mutex_lock(&ctx->mutex);
return ret;
}
static int perf_event_set_filter(struct perf_event *event, void __user *arg) static int perf_event_set_filter(struct perf_event *event, void __user *arg)
{ {
char *filter_str; char *filter_str;
...@@ -8532,8 +8564,7 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg) ...@@ -8532,8 +8564,7 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg)
if (IS_ENABLED(CONFIG_EVENT_TRACING) && if (IS_ENABLED(CONFIG_EVENT_TRACING) &&
event->attr.type == PERF_TYPE_TRACEPOINT) event->attr.type == PERF_TYPE_TRACEPOINT)
ret = ftrace_profile_set_filter(event, event->attr.config, ret = perf_tracepoint_set_filter(event, filter_str);
filter_str);
else if (has_addr_filter(event)) else if (has_addr_filter(event))
ret = perf_event_set_addr_filter(event, filter_str); ret = perf_event_set_addr_filter(event, filter_str);
...@@ -9168,7 +9199,13 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) ...@@ -9168,7 +9199,13 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
if (!try_module_get(pmu->module)) if (!try_module_get(pmu->module))
return -ENODEV; return -ENODEV;
if (event->group_leader != event) { /*
* A number of pmu->event_init() methods iterate the sibling_list to,
* for example, validate if the group fits on the PMU. Therefore,
* if this is a sibling event, acquire the ctx->mutex to protect
* the sibling_list.
*/
if (event->group_leader != event && pmu->task_ctx_nr != perf_sw_context) {
/* /*
* This ctx->mutex can nest when we're called through * This ctx->mutex can nest when we're called through
* inheritance. See the perf_event_ctx_lock_nested() comment. * inheritance. See the perf_event_ctx_lock_nested() comment.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment