perf evlist: Refcount mmaps

We need to know how many fds are using a perf mmap via
PERF_EVENT_IOC_SET_OUTPUT, so that we can know when to ditch an mmap,
refcount it.

v2: Automatically unmap it when the refcount hits one, which will happen
when all fds are filtered by perf_evlist__filter_pollfd(), in later
patches.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jean Pihet <jean.pihet@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20140908153824.GG2773@kernel.org
Link: http://lkml.kernel.org/n/tip-cpv7v2lw0g74ucmxa39xdpms@git.kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 1b85337d
...@@ -28,6 +28,8 @@ ...@@ -28,6 +28,8 @@
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
#define SID(e, x, y) xyarray__entry(e->sample_id, x, y) #define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx);
void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
struct thread_map *threads) struct thread_map *threads)
{ {
...@@ -651,14 +653,36 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) ...@@ -651,14 +653,36 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
return event; return event;
} }
static bool perf_mmap__empty(struct perf_mmap *md)
{
return perf_mmap__read_head(md) != md->prev;
}
static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
{
++evlist->mmap[idx].refcnt;
}
static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
{
BUG_ON(evlist->mmap[idx].refcnt == 0);
if (--evlist->mmap[idx].refcnt == 0)
__perf_evlist__munmap(evlist, idx);
}
void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
{ {
struct perf_mmap *md = &evlist->mmap[idx];
if (!evlist->overwrite) { if (!evlist->overwrite) {
struct perf_mmap *md = &evlist->mmap[idx];
unsigned int old = md->prev; unsigned int old = md->prev;
perf_mmap__write_tail(md, old); perf_mmap__write_tail(md, old);
} }
if (md->refcnt == 1 && perf_mmap__empty(md))
perf_evlist__mmap_put(evlist, idx);
} }
static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
...@@ -666,6 +690,7 @@ static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) ...@@ -666,6 +690,7 @@ static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
if (evlist->mmap[idx].base != NULL) { if (evlist->mmap[idx].base != NULL) {
munmap(evlist->mmap[idx].base, evlist->mmap_len); munmap(evlist->mmap[idx].base, evlist->mmap_len);
evlist->mmap[idx].base = NULL; evlist->mmap[idx].base = NULL;
evlist->mmap[idx].refcnt = 0;
} }
} }
...@@ -699,6 +724,20 @@ struct mmap_params { ...@@ -699,6 +724,20 @@ struct mmap_params {
static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
struct mmap_params *mp, int fd) struct mmap_params *mp, int fd)
{ {
/*
* The last one will be done at perf_evlist__mmap_consume(), so that we
* make sure we don't prevent tools from consuming every last event in
* the ring buffer.
*
* I.e. we can get the POLLHUP meaning that the fd doesn't exist
* anymore, but the last events for it are still in the ring buffer,
* waiting to be consumed.
*
* Tools can chose to ignore this at their own discretion, but the
* evlist layer can't just drop it when filtering events in
* perf_evlist__filter_pollfd().
*/
evlist->mmap[idx].refcnt = 2;
evlist->mmap[idx].prev = 0; evlist->mmap[idx].prev = 0;
evlist->mmap[idx].mask = mp->mask; evlist->mmap[idx].mask = mp->mask;
evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot, evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot,
...@@ -734,10 +773,14 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, ...@@ -734,10 +773,14 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
} else { } else {
if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
return -1; return -1;
perf_evlist__mmap_get(evlist, idx);
} }
if (perf_evlist__add_pollfd(evlist, fd) < 0) if (perf_evlist__add_pollfd(evlist, fd) < 0) {
perf_evlist__mmap_put(evlist, idx);
return -1; return -1;
}
if ((evsel->attr.read_format & PERF_FORMAT_ID) && if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0) perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0)
......
...@@ -18,9 +18,15 @@ struct record_opts; ...@@ -18,9 +18,15 @@ struct record_opts;
#define PERF_EVLIST__HLIST_BITS 8 #define PERF_EVLIST__HLIST_BITS 8
#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) #define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
/**
* struct perf_mmap - perf's ring buffer mmap details
*
* @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
*/
struct perf_mmap { struct perf_mmap {
void *base; void *base;
int mask; int mask;
int refcnt;
unsigned int prev; unsigned int prev;
char event_copy[PERF_SAMPLE_MAX_SIZE]; char event_copy[PERF_SAMPLE_MAX_SIZE];
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment