Commit fa588151 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

perf: Optimize the hotpath by converting the perf output buffer to local_t

Since there is now only a single writer, we can use
local_t instead and avoid all these pesky LOCK insn.
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent ef60777c
...@@ -485,6 +485,7 @@ struct perf_guest_info_callbacks { ...@@ -485,6 +485,7 @@ struct perf_guest_info_callbacks {
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <asm/atomic.h> #include <asm/atomic.h>
#include <asm/local.h>
#define PERF_MAX_STACK_DEPTH 255 #define PERF_MAX_STACK_DEPTH 255
...@@ -588,20 +589,18 @@ struct perf_mmap_data { ...@@ -588,20 +589,18 @@ struct perf_mmap_data {
#ifdef CONFIG_PERF_USE_VMALLOC #ifdef CONFIG_PERF_USE_VMALLOC
struct work_struct work; struct work_struct work;
#endif #endif
int data_order; int data_order; /* allocation order */
int nr_pages; /* nr of data pages */ int nr_pages; /* nr of data pages */
int writable; /* are we writable */ int writable; /* are we writable */
int nr_locked; /* nr pages mlocked */ int nr_locked; /* nr pages mlocked */
atomic_t poll; /* POLL_ for wakeups */ atomic_t poll; /* POLL_ for wakeups */
atomic_t events; /* event_id limit */
atomic_long_t head; /* write position */ local_t head; /* write position */
local_t nest; /* nested writers */
atomic_t wakeup; /* needs a wakeup */ local_t events; /* event limit */
atomic_t lost; /* nr records lost */ local_t wakeup; /* needs a wakeup */
local_t lost; /* nr records lost */
atomic_t nest; /* nested writers */
long watermark; /* wakeup watermark */ long watermark; /* wakeup watermark */
......
...@@ -2916,7 +2916,7 @@ static void perf_output_get_handle(struct perf_output_handle *handle) ...@@ -2916,7 +2916,7 @@ static void perf_output_get_handle(struct perf_output_handle *handle)
struct perf_mmap_data *data = handle->data; struct perf_mmap_data *data = handle->data;
preempt_disable(); preempt_disable();
atomic_inc(&data->nest); local_inc(&data->nest);
} }
static void perf_output_put_handle(struct perf_output_handle *handle) static void perf_output_put_handle(struct perf_output_handle *handle)
...@@ -2925,13 +2925,13 @@ static void perf_output_put_handle(struct perf_output_handle *handle) ...@@ -2925,13 +2925,13 @@ static void perf_output_put_handle(struct perf_output_handle *handle)
unsigned long head; unsigned long head;
again: again:
head = atomic_long_read(&data->head); head = local_read(&data->head);
/* /*
* IRQ/NMI can happen here, which means we can miss a head update. * IRQ/NMI can happen here, which means we can miss a head update.
*/ */
if (!atomic_dec_and_test(&data->nest)) if (!local_dec_and_test(&data->nest))
return; return;
/* /*
...@@ -2945,12 +2945,12 @@ static void perf_output_put_handle(struct perf_output_handle *handle) ...@@ -2945,12 +2945,12 @@ static void perf_output_put_handle(struct perf_output_handle *handle)
* Now check if we missed an update, rely on the (compiler) * Now check if we missed an update, rely on the (compiler)
* barrier in atomic_dec_and_test() to re-read data->head. * barrier in atomic_dec_and_test() to re-read data->head.
*/ */
if (unlikely(head != atomic_long_read(&data->head))) { if (unlikely(head != local_read(&data->head))) {
atomic_inc(&data->nest); local_inc(&data->nest);
goto again; goto again;
} }
if (atomic_xchg(&data->wakeup, 0)) if (local_xchg(&data->wakeup, 0))
perf_output_wakeup(handle); perf_output_wakeup(handle);
preempt_enable(); preempt_enable();
...@@ -3031,7 +3031,7 @@ int perf_output_begin(struct perf_output_handle *handle, ...@@ -3031,7 +3031,7 @@ int perf_output_begin(struct perf_output_handle *handle,
if (!data->nr_pages) if (!data->nr_pages)
goto out; goto out;
have_lost = atomic_read(&data->lost); have_lost = local_read(&data->lost);
if (have_lost) if (have_lost)
size += sizeof(lost_event); size += sizeof(lost_event);
...@@ -3045,24 +3045,24 @@ int perf_output_begin(struct perf_output_handle *handle, ...@@ -3045,24 +3045,24 @@ int perf_output_begin(struct perf_output_handle *handle,
*/ */
tail = ACCESS_ONCE(data->user_page->data_tail); tail = ACCESS_ONCE(data->user_page->data_tail);
smp_rmb(); smp_rmb();
offset = head = atomic_long_read(&data->head); offset = head = local_read(&data->head);
head += size; head += size;
if (unlikely(!perf_output_space(data, tail, offset, head))) if (unlikely(!perf_output_space(data, tail, offset, head)))
goto fail; goto fail;
} while (atomic_long_cmpxchg(&data->head, offset, head) != offset); } while (local_cmpxchg(&data->head, offset, head) != offset);
handle->offset = offset; handle->offset = offset;
handle->head = head; handle->head = head;
if (head - tail > data->watermark) if (head - tail > data->watermark)
atomic_inc(&data->wakeup); local_inc(&data->wakeup);
if (have_lost) { if (have_lost) {
lost_event.header.type = PERF_RECORD_LOST; lost_event.header.type = PERF_RECORD_LOST;
lost_event.header.misc = 0; lost_event.header.misc = 0;
lost_event.header.size = sizeof(lost_event); lost_event.header.size = sizeof(lost_event);
lost_event.id = event->id; lost_event.id = event->id;
lost_event.lost = atomic_xchg(&data->lost, 0); lost_event.lost = local_xchg(&data->lost, 0);
perf_output_put(handle, lost_event); perf_output_put(handle, lost_event);
} }
...@@ -3070,7 +3070,7 @@ int perf_output_begin(struct perf_output_handle *handle, ...@@ -3070,7 +3070,7 @@ int perf_output_begin(struct perf_output_handle *handle,
return 0; return 0;
fail: fail:
atomic_inc(&data->lost); local_inc(&data->lost);
perf_output_put_handle(handle); perf_output_put_handle(handle);
out: out:
rcu_read_unlock(); rcu_read_unlock();
...@@ -3086,10 +3086,10 @@ void perf_output_end(struct perf_output_handle *handle) ...@@ -3086,10 +3086,10 @@ void perf_output_end(struct perf_output_handle *handle)
int wakeup_events = event->attr.wakeup_events; int wakeup_events = event->attr.wakeup_events;
if (handle->sample && wakeup_events) { if (handle->sample && wakeup_events) {
int events = atomic_inc_return(&data->events); int events = local_inc_return(&data->events);
if (events >= wakeup_events) { if (events >= wakeup_events) {
atomic_sub(wakeup_events, &data->events); local_sub(wakeup_events, &data->events);
atomic_inc(&data->wakeup); local_inc(&data->wakeup);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment