Commit 9597f088 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf-tools-fixes-for-v6.0-2022-09-21' of...

Merge tag 'perf-tools-fixes-for-v6.0-2022-09-21' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tools fixes from Arnaldo Carvalho de Melo:

 - Fix polling of system-wide events related to mixing per-cpu and
   per-thread events.

 - Do not check if /proc/modules is unchanged when copying /proc/kcore,
   that doesn't get in the way of post processing analysis.

 - Include program header in ELF files generated for JIT files, so that
   they can be opened by tools using elfutils libraries.

 - Enter namespaces when synthesizing build-ids.

 - Fix some bugs related to a recent cpu_map overhaul where we should be
   using an index and not the cpu number.

 - Fix BPF program ELF section name, using the naming expected by libbpf
   when using BPF counters in 'perf stat'.

 - Add a new test for perf stat cgroup BPF counter.

 - Adjust check on 'perf test wp' for older kernels, where the
   PERF_EVENT_IOC_MODIFY_ATTRIBUTES ioctl isn't supported.

 - Sync x86 cpufeatures with the kernel sources, no changes in tooling.

* tag 'perf-tools-fixes-for-v6.0-2022-09-21' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
  perf tools: Honor namespace when synthesizing build-ids
  tools headers cpufeatures: Sync with the kernel sources
  perf kcore_copy: Do not check /proc/modules is unchanged
  libperf evlist: Fix polling of system-wide events
  perf record: Fix cpu mask bit setting for mixed mmaps
  perf test: Skip wp modify test on old kernels
  perf jit: Include program header in ELF files
  perf test: Add a new test for perf stat cgroup BPF counter
  perf stat: Use evsel->core.cpus to iterate cpus in BPF cgroup counters
  perf stat: Fix cpu map index in bperf cgroup code
  perf stat: Fix BPF program section name
parents dc164f4f 999e4eaa
...@@ -457,7 +457,8 @@ ...@@ -457,7 +457,8 @@
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ #define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
#endif /* _ASM_X86_CPUFEATURES_H */ #endif /* _ASM_X86_CPUFEATURES_H */
...@@ -441,6 +441,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, ...@@ -441,6 +441,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
perf_evlist__for_each_entry(evlist, evsel) { perf_evlist__for_each_entry(evlist, evsel) {
bool overwrite = evsel->attr.write_backward; bool overwrite = evsel->attr.write_backward;
enum fdarray_flags flgs;
struct perf_mmap *map; struct perf_mmap *map;
int *output, fd, cpu; int *output, fd, cpu;
...@@ -504,8 +505,8 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, ...@@ -504,8 +505,8 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
revent = !overwrite ? POLLIN : 0; revent = !overwrite ? POLLIN : 0;
if (!evsel->system_wide && flgs = evsel->system_wide ? fdarray_flag__nonfilterable : fdarray_flag__default;
perf_evlist__add_pollfd(evlist, fd, map, revent, fdarray_flag__default) < 0) { if (perf_evlist__add_pollfd(evlist, fd, map, revent, flgs) < 0) {
perf_mmap__put(map); perf_mmap__put(map);
return -1; return -1;
} }
......
...@@ -3371,6 +3371,8 @@ static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cp ...@@ -3371,6 +3371,8 @@ static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cp
return 0; return 0;
perf_cpu_map__for_each_cpu(cpu, idx, cpus) { perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
if (cpu.cpu == -1)
continue;
/* Return ENODEV is input cpu is greater than max cpu */ /* Return ENODEV is input cpu is greater than max cpu */
if ((unsigned long)cpu.cpu > mask->nbits) if ((unsigned long)cpu.cpu > mask->nbits)
return -ENODEV; return -ENODEV;
......
#!/bin/sh
# perf stat --bpf-counters --for-each-cgroup test
# SPDX-License-Identifier: GPL-2.0
set -e
test_cgroups=
if [ "$1" = "-v" ]; then
verbose="1"
fi
# skip if --bpf-counters --for-each-cgroup is not supported
check_bpf_counter()
{
if ! perf stat -a --bpf-counters --for-each-cgroup / true > /dev/null 2>&1; then
if [ "${verbose}" = "1" ]; then
echo "Skipping: --bpf-counters --for-each-cgroup not supported"
perf --no-pager stat -a --bpf-counters --for-each-cgroup / true || true
fi
exit 2
fi
}
# find two cgroups to measure
find_cgroups()
{
# try usual systemd slices first
if [ -d /sys/fs/cgroup/system.slice -a -d /sys/fs/cgroup/user.slice ]; then
test_cgroups="system.slice,user.slice"
return
fi
# try root and self cgroups
local self_cgrp=$(grep perf_event /proc/self/cgroup | cut -d: -f3)
if [ -z ${self_cgrp} ]; then
# cgroup v2 doesn't specify perf_event
self_cgrp=$(grep ^0: /proc/self/cgroup | cut -d: -f3)
fi
if [ -z ${self_cgrp} ]; then
test_cgroups="/"
else
test_cgroups="/,${self_cgrp}"
fi
}
# As cgroup events are cpu-wide, we cannot simply compare the result.
# Just check if it runs without failure and has non-zero results.
check_system_wide_counted()
{
local output
output=$(perf stat -a --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, sleep 1 2>&1)
if echo ${output} | grep -q -F "<not "; then
echo "Some system-wide events are not counted"
if [ "${verbose}" = "1" ]; then
echo ${output}
fi
exit 1
fi
}
check_cpu_list_counted()
{
local output
output=$(perf stat -C 1 --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, taskset -c 1 sleep 1 2>&1)
if echo ${output} | grep -q -F "<not "; then
echo "Some CPU events are not counted"
if [ "${verbose}" = "1" ]; then
echo ${output}
fi
exit 1
fi
}
check_bpf_counter
find_cgroups
check_system_wide_counted
check_cpu_list_counted
exit 0
...@@ -2,7 +2,9 @@ ...@@ -2,7 +2,9 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <unistd.h> #include <unistd.h>
#include <errno.h>
#include <sys/ioctl.h> #include <sys/ioctl.h>
#include <linux/compiler.h>
#include <linux/hw_breakpoint.h> #include <linux/hw_breakpoint.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include "tests.h" #include "tests.h"
...@@ -137,8 +139,7 @@ static int test__wp_rw(struct test_suite *test __maybe_unused, ...@@ -137,8 +139,7 @@ static int test__wp_rw(struct test_suite *test __maybe_unused,
#endif #endif
} }
static int test__wp_modify(struct test_suite *test __maybe_unused, static int test__wp_modify(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
int subtest __maybe_unused)
{ {
#if defined(__s390x__) #if defined(__s390x__)
return TEST_SKIP; return TEST_SKIP;
...@@ -160,6 +161,11 @@ static int test__wp_modify(struct test_suite *test __maybe_unused, ...@@ -160,6 +161,11 @@ static int test__wp_modify(struct test_suite *test __maybe_unused,
new_attr.disabled = 1; new_attr.disabled = 1;
ret = ioctl(fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &new_attr); ret = ioctl(fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &new_attr);
if (ret < 0) { if (ret < 0) {
if (errno == ENOTTY) {
test->test_cases[subtest].skip_reason = "missing kernel support";
ret = TEST_SKIP;
}
pr_debug("ioctl(PERF_EVENT_IOC_MODIFY_ATTRIBUTES) failed\n"); pr_debug("ioctl(PERF_EVENT_IOC_MODIFY_ATTRIBUTES) failed\n");
close(fd); close(fd);
return ret; return ret;
......
...@@ -95,7 +95,7 @@ static int bperf_load_program(struct evlist *evlist) ...@@ -95,7 +95,7 @@ static int bperf_load_program(struct evlist *evlist)
perf_cpu_map__for_each_cpu(cpu, i, evlist->core.all_cpus) { perf_cpu_map__for_each_cpu(cpu, i, evlist->core.all_cpus) {
link = bpf_program__attach_perf_event(skel->progs.on_cgrp_switch, link = bpf_program__attach_perf_event(skel->progs.on_cgrp_switch,
FD(cgrp_switch, cpu.cpu)); FD(cgrp_switch, i));
if (IS_ERR(link)) { if (IS_ERR(link)) {
pr_err("Failed to attach cgroup program\n"); pr_err("Failed to attach cgroup program\n");
err = PTR_ERR(link); err = PTR_ERR(link);
...@@ -115,15 +115,15 @@ static int bperf_load_program(struct evlist *evlist) ...@@ -115,15 +115,15 @@ static int bperf_load_program(struct evlist *evlist)
evsel->cgrp = NULL; evsel->cgrp = NULL;
/* open single copy of the events w/o cgroup */ /* open single copy of the events w/o cgroup */
err = evsel__open_per_cpu(evsel, evlist->core.all_cpus, -1); err = evsel__open_per_cpu(evsel, evsel->core.cpus, -1);
if (err) { if (err) {
pr_err("Failed to open first cgroup events\n"); pr_err("Failed to open first cgroup events\n");
goto out; goto out;
} }
map_fd = bpf_map__fd(skel->maps.events); map_fd = bpf_map__fd(skel->maps.events);
perf_cpu_map__for_each_cpu(cpu, j, evlist->core.all_cpus) { perf_cpu_map__for_each_cpu(cpu, j, evsel->core.cpus) {
int fd = FD(evsel, cpu.cpu); int fd = FD(evsel, j);
__u32 idx = evsel->core.idx * total_cpus + cpu.cpu; __u32 idx = evsel->core.idx * total_cpus + cpu.cpu;
err = bpf_map_update_elem(map_fd, &idx, &fd, err = bpf_map_update_elem(map_fd, &idx, &fd,
...@@ -269,7 +269,7 @@ static int bperf_cgrp__read(struct evsel *evsel) ...@@ -269,7 +269,7 @@ static int bperf_cgrp__read(struct evsel *evsel)
goto out; goto out;
} }
perf_cpu_map__for_each_cpu(cpu, i, evlist->core.all_cpus) { perf_cpu_map__for_each_cpu(cpu, i, evsel->core.cpus) {
counts = perf_counts(evsel->counts, i, 0); counts = perf_counts(evsel->counts, i, 0);
counts->val = values[cpu.cpu].counter; counts->val = values[cpu.cpu].counter;
counts->ena = values[cpu.cpu].enabled; counts->ena = values[cpu.cpu].enabled;
......
...@@ -176,7 +176,7 @@ static int bperf_cgroup_count(void) ...@@ -176,7 +176,7 @@ static int bperf_cgroup_count(void)
} }
// This will be attached to cgroup-switches event for each cpu // This will be attached to cgroup-switches event for each cpu
SEC("perf_events") SEC("perf_event")
int BPF_PROG(on_cgrp_switch) int BPF_PROG(on_cgrp_switch)
{ {
return bperf_cgroup_count(); return bperf_cgroup_count();
......
...@@ -253,6 +253,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, ...@@ -253,6 +253,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym,
Elf_Data *d; Elf_Data *d;
Elf_Scn *scn; Elf_Scn *scn;
Elf_Ehdr *ehdr; Elf_Ehdr *ehdr;
Elf_Phdr *phdr;
Elf_Shdr *shdr; Elf_Shdr *shdr;
uint64_t eh_frame_base_offset; uint64_t eh_frame_base_offset;
char *strsym = NULL; char *strsym = NULL;
...@@ -287,6 +288,19 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, ...@@ -287,6 +288,19 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym,
ehdr->e_version = EV_CURRENT; ehdr->e_version = EV_CURRENT;
ehdr->e_shstrndx= unwinding ? 4 : 2; /* shdr index for section name */ ehdr->e_shstrndx= unwinding ? 4 : 2; /* shdr index for section name */
/*
* setup program header
*/
phdr = elf_newphdr(e, 1);
phdr[0].p_type = PT_LOAD;
phdr[0].p_offset = 0;
phdr[0].p_vaddr = 0;
phdr[0].p_paddr = 0;
phdr[0].p_filesz = csize;
phdr[0].p_memsz = csize;
phdr[0].p_flags = PF_X | PF_R;
phdr[0].p_align = 8;
/* /*
* setup text section * setup text section
*/ */
......
...@@ -53,8 +53,10 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent ...@@ -53,8 +53,10 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
#if GEN_ELF_CLASS == ELFCLASS64 #if GEN_ELF_CLASS == ELFCLASS64
#define elf_newehdr elf64_newehdr #define elf_newehdr elf64_newehdr
#define elf_newphdr elf64_newphdr
#define elf_getshdr elf64_getshdr #define elf_getshdr elf64_getshdr
#define Elf_Ehdr Elf64_Ehdr #define Elf_Ehdr Elf64_Ehdr
#define Elf_Phdr Elf64_Phdr
#define Elf_Shdr Elf64_Shdr #define Elf_Shdr Elf64_Shdr
#define Elf_Sym Elf64_Sym #define Elf_Sym Elf64_Sym
#define ELF_ST_TYPE(a) ELF64_ST_TYPE(a) #define ELF_ST_TYPE(a) ELF64_ST_TYPE(a)
...@@ -62,8 +64,10 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent ...@@ -62,8 +64,10 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
#define ELF_ST_VIS(a) ELF64_ST_VISIBILITY(a) #define ELF_ST_VIS(a) ELF64_ST_VISIBILITY(a)
#else #else
#define elf_newehdr elf32_newehdr #define elf_newehdr elf32_newehdr
#define elf_newphdr elf32_newphdr
#define elf_getshdr elf32_getshdr #define elf_getshdr elf32_getshdr
#define Elf_Ehdr Elf32_Ehdr #define Elf_Ehdr Elf32_Ehdr
#define Elf_Phdr Elf32_Phdr
#define Elf_Shdr Elf32_Shdr #define Elf_Shdr Elf32_Shdr
#define Elf_Sym Elf32_Sym #define Elf_Sym Elf32_Sym
#define ELF_ST_TYPE(a) ELF32_ST_TYPE(a) #define ELF_ST_TYPE(a) ELF32_ST_TYPE(a)
......
...@@ -2102,8 +2102,8 @@ static int kcore_copy__compare_file(const char *from_dir, const char *to_dir, ...@@ -2102,8 +2102,8 @@ static int kcore_copy__compare_file(const char *from_dir, const char *to_dir,
* unusual. One significant peculiarity is that the mapping (start -> pgoff) * unusual. One significant peculiarity is that the mapping (start -> pgoff)
* is not the same for the kernel map and the modules map. That happens because * is not the same for the kernel map and the modules map. That happens because
* the data is copied adjacently whereas the original kcore has gaps. Finally, * the data is copied adjacently whereas the original kcore has gaps. Finally,
* kallsyms and modules files are compared with their copies to check that * kallsyms file is compared with its copy to check that modules have not been
* modules have not been loaded or unloaded while the copies were taking place. * loaded or unloaded while the copies were taking place.
* *
* Return: %0 on success, %-1 on failure. * Return: %0 on success, %-1 on failure.
*/ */
...@@ -2166,9 +2166,6 @@ int kcore_copy(const char *from_dir, const char *to_dir) ...@@ -2166,9 +2166,6 @@ int kcore_copy(const char *from_dir, const char *to_dir)
goto out_extract_close; goto out_extract_close;
} }
if (kcore_copy__compare_file(from_dir, to_dir, "modules"))
goto out_extract_close;
if (kcore_copy__compare_file(from_dir, to_dir, "kallsyms")) if (kcore_copy__compare_file(from_dir, to_dir, "kallsyms"))
goto out_extract_close; goto out_extract_close;
......
...@@ -367,13 +367,24 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event, ...@@ -367,13 +367,24 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event,
bool is_kernel) bool is_kernel)
{ {
struct build_id bid; struct build_id bid;
struct nsinfo *nsi;
struct nscookie nc;
int rc; int rc;
if (is_kernel) if (is_kernel) {
rc = sysfs__read_build_id("/sys/kernel/notes", &bid); rc = sysfs__read_build_id("/sys/kernel/notes", &bid);
else goto out;
rc = filename__read_build_id(event->filename, &bid) > 0 ? 0 : -1; }
nsi = nsinfo__new(event->pid);
nsinfo__mountns_enter(nsi, &nc);
rc = filename__read_build_id(event->filename, &bid) > 0 ? 0 : -1;
nsinfo__mountns_exit(&nc);
nsinfo__put(nsi);
out:
if (rc == 0) { if (rc == 0) {
memcpy(event->build_id, bid.data, sizeof(bid.data)); memcpy(event->build_id, bid.data, sizeof(bid.data));
event->build_id_size = (u8) bid.size; event->build_id_size = (u8) bid.size;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment