Commit 92f22a38 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

perf_counter: update mmap() counter read

Paul noted that we don't need SMP barriers for the mmap() counter read
because its always on the same cpu (otherwise you can't access the hw
counter anyway).

So remove the SMP barriers and replace them with regular compiler
barriers.

Further, update the comment to include a race free method of reading
said hardware counter. The primary change is putting the pmc_read
inside the seq-loop, otherwise we can still race and read rubbish.
Noticed-by: default avatarPaul Mackerras <paulus@samba.org>
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Orig-LKML-Reference: <20090402091319.577951445@chello.nl>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 5872bdb8
...@@ -167,30 +167,28 @@ struct perf_counter_mmap_page { ...@@ -167,30 +167,28 @@ struct perf_counter_mmap_page {
/* /*
* Bits needed to read the hw counters in user-space. * Bits needed to read the hw counters in user-space.
* *
* The index and offset should be read atomically using the seqlock: * u32 seq;
* * s64 count;
* __u32 seq, index;
* __s64 offset;
* *
* again: * again:
* rmb();
* seq = pc->lock; * seq = pc->lock;
*
* if (unlikely(seq & 1)) { * if (unlikely(seq & 1)) {
* cpu_relax(); * cpu_relax();
* goto again; * goto again;
* } * }
* *
* index = pc->index; * if (pc->index) {
* offset = pc->offset; * count = pmc_read(pc->index - 1);
* count += pc->offset;
* } else
* goto regular_read;
* *
* rmb(); * barrier();
* if (pc->lock != seq) * if (pc->lock != seq)
* goto again; * goto again;
* *
* After this, index contains architecture specific counter index + 1, * NOTE: for obvious reason this only works on self-monitoring
* so that 0 means unavailable, offset contains the value to be added * processes.
* to the result of the raw timer read to obtain this counter's value.
*/ */
__u32 lock; /* seqlock for synchronization */ __u32 lock; /* seqlock for synchronization */
__u32 index; /* hardware counter identifier */ __u32 index; /* hardware counter identifier */
......
...@@ -1340,13 +1340,13 @@ void perf_counter_update_userpage(struct perf_counter *counter) ...@@ -1340,13 +1340,13 @@ void perf_counter_update_userpage(struct perf_counter *counter)
*/ */
preempt_disable(); preempt_disable();
++userpg->lock; ++userpg->lock;
smp_wmb(); barrier();
userpg->index = counter->hw.idx; userpg->index = counter->hw.idx;
userpg->offset = atomic64_read(&counter->count); userpg->offset = atomic64_read(&counter->count);
if (counter->state == PERF_COUNTER_STATE_ACTIVE) if (counter->state == PERF_COUNTER_STATE_ACTIVE)
userpg->offset -= atomic64_read(&counter->hw.prev_count); userpg->offset -= atomic64_read(&counter->hw.prev_count);
smp_wmb(); barrier();
++userpg->lock; ++userpg->lock;
preempt_enable(); preempt_enable();
unlock: unlock:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment