1. 14 Nov, 2022 1 commit
    • Namhyung Kim's avatar
      perf stat: Increase metric length to align outputs · 4ea0be1f
      Namhyung Kim authored
      When perf stat is called with very detailed events, the output doesn't
      align well like below:
      
        $ sudo perf stat -a -ddd sleep 1
      
         Performance counter stats for 'system wide':
      
                8,020.23 msec cpu-clock                        #    7.997 CPUs utilized
                   3,970      context-switches                 #  494.998 /sec
                     169      cpu-migrations                   #   21.072 /sec
                     586      page-faults                      #   73.065 /sec
             649,568,060      cycles                           #    0.081 GHz                      (30.42%)
             304,044,345      instructions                     #    0.47  insn per cycle           (38.40%)
              60,313,022      branches                         #    7.520 M/sec                    (38.89%)
               2,766,919      branch-misses                    #    4.59% of all branches          (39.26%)
              74,422,951      L1-dcache-loads                  #    9.279 M/sec                    (39.39%)
               8,025,568      L1-dcache-load-misses            #   10.78% of all L1-dcache accesses  (39.22%)
               3,314,995      LLC-loads                        #  413.329 K/sec                    (30.83%)
               1,225,619      LLC-load-misses                  #   36.97% of all LL-cache accesses  (30.45%)
         <not supported>      L1-icache-loads
              20,420,493      L1-icache-load-misses            #    0.00% of all L1-icache accesses  (30.29%)
              58,017,947      dTLB-loads                       #    7.234 M/sec                    (30.37%)
                 704,677      dTLB-load-misses                 #    1.21% of all dTLB cache accesses  (30.27%)
                 234,225      iTLB-loads                       #   29.204 K/sec                    (30.29%)
                 417,166      iTLB-load-misses                 #  178.10% of all iTLB cache accesses  (30.32%)
         <not supported>      L1-dcache-prefetches
         <not supported>      L1-dcache-prefetch-misses
      
             1.002947355 seconds time elapsed
      
      Increase the METRIC_LEN by 3 so that it can align properly.
      Signed-off-by: default avatarNamhyung Kim <namhyung@kernel.org>
      Acked-by: default avatarIan Rogers <irogers@google.com>
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Ingo Molnar <mingo@kernel.org>
      Cc: James Clark <james.clark@arm.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Kan Liang <kan.liang@linux.intel.com>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
      Link: https://lore.kernel.org/r/20221107213314.3239159-3-namhyung@kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      4ea0be1f
  2. 10 Nov, 2022 5 commits
  3. 09 Nov, 2022 4 commits
  4. 07 Nov, 2022 1 commit
    • Arnaldo Carvalho de Melo's avatar
      perf trace: Add BPF augmenter to perf_event_open()'s 'struct perf_event_attr' arg · a9cd6c67
      Arnaldo Carvalho de Melo authored
      Using BPF for that, doing a cleverish reuse of perf_event_attr__fprintf(),
      that really needs to be turned into __snprintf(), etc.
      
      But since the plan is to go the BTF way probably use libbpf's
      btf_dump__dump_type_data().
      
      Example:
      
      [root@quaco ~]# perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,perf_event_open --max-events 10 perf stat --quiet sleep 0.001
      fg
           0.000 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x1, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 3
           0.067 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x3, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
           0.120 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x4, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 5
           0.172 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x2, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 7
           0.190 perf_event_open(attr_uptr: { size: 128, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 8
           0.199 perf_event_open(attr_uptr: { size: 128, config: 0x1, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 9
           0.204 perf_event_open(attr_uptr: { size: 128, config: 0x4, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 10
           0.210 perf_event_open(attr_uptr: { size: 128, config: 0x5, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 11
      [root@quaco ~]#
      Suggested-by: default avatarIan Rogers <irogers@google.com>
      Tested-by: default avatarIan Rogers <irogers@google.com>
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Link: https://lore.kernel.org/r/Y2V2Tpu+2vzJyon2@kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      a9cd6c67
  5. 04 Nov, 2022 7 commits
    • Arnaldo Carvalho de Melo's avatar
      perf bpf: Rename perf_include_dir to libbpf_include_dir · b018899e
      Arnaldo Carvalho de Melo authored
      As this is where we expect to find bpf/bpf_helpers.h, etc.
      
      This needs more work to make it follow LIBBPF_DYNAMIC=1 usage, i.e. when
      not using the system libbpf it should use the headers in the in-kernel
      sources libbpf in tools/lib/bpf.
      
      We need to do that anyway to avoid this mixup system libbpf and
      in-kernel files, so we'll get this sorted out that way.
      
      And this also may become moot as we move to using BPF skels for this
      feature.
      
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      b018899e
    • Arnaldo Carvalho de Melo's avatar
      perf examples bpf: Remove augmented_syscalls.c, the raw_syscalls one should be used instead · 3cd65616
      Arnaldo Carvalho de Melo authored
      The attempt at using BPF to copy syscall pointer arguments to show them
      like strace does started with sys_{enter,exit}_SYSCALL_NAME tracepoints,
      in tools/perf/examples/bpf/augmented_syscalls.c, but then achieving this
      result using raw_syscalls:{enter,exit} and BPF tail calls was deemed
      more flexible.
      
      The 'perf trace' codebase was adapted to using it while trying to
      continue supporting the old style per-syscall tracepoints, which at some
      point became too unwieldly and now isn't working properly.
      
      So lets scale back and concentrate on the augmented_raw_syscalls.c
      model on the way to using BPF skeletons.
      
      For the same reason remove the etcsnoop.c example, that used the
      old style per-tp syscalls just for the 'open' and 'openat' syscalls,
      looking at the pathnames starting with "/etc/", we should be able
      to do this later using filters, after we move to BPF skels.
      
      The augmented_raw_syscalls.c one continues to work, now with libbpf 1.0,
      after Ian work on using the libbpf map style:
      
        # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,open* --max-events 4
           0.000 ping/194815 openat(dfd: CWD, filename: "/etc/hosts", flags: RDONLY|CLOEXEC) = 5
          20.225 systemd-oomd/972 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12
          20.285 abrt-dump-jour/1371 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 21
          20.301 abrt-dump-jour/1370 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 21
        #
      
      This is using this:
      
        # cat ~/.perfconfig
        [trace]
      	show_zeros = yes
      	show_duration = no
      	no_inherit = yes
      	args_alignment = 40
      
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      3cd65616
    • Ian Rogers's avatar
      perf bpf: Remove now unused BPF headers · cfddf0d4
      Ian Rogers authored
      Example code has migrated to use standard BPF header files, remove
      unnecessary perf equivalents. Update install step to not try to copy
      these.
      Signed-off-by: default avatarIan Rogers <irogers@google.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Ingo Molnar <mingo@redhat.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Leo Yan <leo.yan@linaro.org>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Stephane Eranian <eranian@google.com>
      Link: https://lore.kernel.org/r/20221103045437.163510-8-irogers@google.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      cfddf0d4
    • Ian Rogers's avatar
      perf trace: 5sec fix libbpf 1.0+ compatibility · 71811e8c
      Ian Rogers authored
      Avoid use of tools/perf/include/bpf/bpf.h and use the more regular BPF
      headers.
      
      Committer testing:
      
        # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/5sec.c sleep 5
             0.000 perf_bpf_probe:hrtimer_nanosleep(__probe_ip: -1474734416, rqtp: 5000000000)
        # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/5sec.c/max-stack=7/ sleep 5
             0.000 perf_bpf_probe:hrtimer_nanosleep(__probe_ip: -1474734416, rqtp: 5000000000)
                                               hrtimer_nanosleep ([kernel.kallsyms])
                                               common_nsleep ([kernel.kallsyms])
                                               __x64_sys_clock_nanosleep ([kernel.kallsyms])
                                               do_syscall_64 ([kernel.kallsyms])
                                               entry_SYSCALL_64_after_hwframe ([kernel.kallsyms])
                                               __GI___clock_nanosleep (/usr/lib64/libc.so.6)
                                               [0] ([unknown])
        #
      Signed-off-by: default avatarIan Rogers <irogers@google.com>
      Tested-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Ingo Molnar <mingo@redhat.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Leo Yan <leo.yan@linaro.org>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Stephane Eranian <eranian@google.com>
      Link: https://lore.kernel.org/r/20221103045437.163510-7-irogers@google.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      71811e8c
    • Ian Rogers's avatar
      perf trace: empty fix libbpf 1.0+ compatibility · baddab89
      Ian Rogers authored
      Avoid use of tools/perf/include/bpf/bpf.h and use the more regular BPF
      headers.  Add raw_syscalls:sys_enter to avoid the evlist being empty.
      
      Committer testing:
      
        # time perf trace -e ~acme/git/perf/tools/perf/examples/bpf/empty.c sleep 5
      
        real	0m5.697s
        user	0m0.217s
        sys	0m0.453s
        #
      
      I.e. it sets up everything successfully (use -v to see the details) and
      filters out all syscalls, then exits when the workload (sleep 5)
      finishes.
      Signed-off-by: default avatarIan Rogers <irogers@google.com>
      Tested-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Ingo Molnar <mingo@redhat.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Leo Yan <leo.yan@linaro.org>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Stephane Eranian <eranian@google.com>
      Link: https://lore.kernel.org/r/20221103045437.163510-6-irogers@google.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      baddab89
    • Ian Rogers's avatar
      perf trace: hello fix libbpf 1.0+ compatibility · 514607e3
      Ian Rogers authored
      Don't use deprecated and now broken map style. Avoid use of
      tools/perf/include/bpf/bpf.h and use the more regular BPF headers.
      
      Switch to raw_syscalls:sys_enter to avoid the evlist being empty and
      fixing generating output.
      
      Committer testing:
      
        # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/hello.c --call-graph=dwarf --max-events 5
           0.000 perf/206852 __bpf_stdout__(Hello, world)
                                             syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                             syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                             do_syscall_64 ([kernel.kallsyms])
                                             entry_SYSCALL_64_after_hwframe ([kernel.kallsyms])
                                             __GI___sched_setaffinity_new (/usr/lib64/libc.so.6)
           8.561 pipewire/2290 __bpf_stdout__(Hello, world)
                                             syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                             syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                             do_syscall_64 ([kernel.kallsyms])
                                             entry_SYSCALL_64_after_hwframe ([kernel.kallsyms])
                                             __libc_read (/usr/lib64/libc.so.6)
           8.571 pipewire/2290 __bpf_stdout__(Hello, world)
                                             syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                             syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                             do_syscall_64 ([kernel.kallsyms])
                                             entry_SYSCALL_64_after_hwframe ([kernel.kallsyms])
                                             __GI___ioctl (/usr/lib64/libc.so.6)
           8.586 pipewire/2290 __bpf_stdout__(Hello, world)
                                             syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                             syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                             do_syscall_64 ([kernel.kallsyms])
                                             entry_SYSCALL_64_after_hwframe ([kernel.kallsyms])
                                             __GI___write (/usr/lib64/libc.so.6)
           8.592 pipewire/2290 __bpf_stdout__(Hello, world)
                                             syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                             syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                             do_syscall_64 ([kernel.kallsyms])
                                             entry_SYSCALL_64_after_hwframe ([kernel.kallsyms])
                                             __timerfd_settime (/usr/lib64/libc.so.6)
        #
      Signed-off-by: default avatarIan Rogers <irogers@google.com>
      Tested-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Ingo Molnar <mingo@redhat.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Leo Yan <leo.yan@linaro.org>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Stephane Eranian <eranian@google.com>
      Link: https://lore.kernel.org/r/20221103045437.163510-5-irogers@google.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      514607e3
    • Ian Rogers's avatar
      perf trace: Raw augmented syscalls fix libbpf 1.0+ compatibility · 14e4b9f4
      Ian Rogers authored
      Don't use deprecated and now broken map style. Avoid use of
      tools/perf/include/bpf/bpf.h and use the more regular BPF headers.
      
      Committer notes:
      
      Add /usr/include to the include path so that bpf/bpf_helpers.h can be
      found, remove sys/socket.h, adding the sockaddr_storage definition, also
      remove stdbool.h, both were preventing building the
      augmented_raw_syscalls.c file with clang, revisit later.
      
      Testing it:
      
      Asking for syscalls that have string arguments:
      
        # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,string --max-events 10
           0.000 thermald/1144 openat(dfd: CWD, filename: "/sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:2/energy_uj", flags: RDONLY) = 13
           0.158 thermald/1144 openat(dfd: CWD, filename: "/sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj", flags: RDONLY) = 13
           0.215 thermald/1144 openat(dfd: CWD, filename: "/sys/class/thermal/thermal_zone3/temp", flags: RDONLY) = 13
          16.448 cgroupify/36478 openat(dfd: 4, filename: ".", flags: RDONLY|CLOEXEC|DIRECTORY|NONBLOCK) = 5
          16.468 cgroupify/36478 newfstatat(dfd: 5, filename: "", statbuf: 0x7fffca5b4130, flag: 4096) = 0
          16.473 systemd-oomd/972 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12
          16.499 systemd-oomd/972 newfstatat(dfd: 12, filename: "", statbuf: 0x7ffd2bc73cc0, flag: 4096) = 0
          16.516 abrt-dump-jour/1370 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 21
          16.538 abrt-dump-jour/1370 newfstatat(dfd: 21, filename: "", statbuf: 0x7ffc651b8980, flag: 4096) = 0
          16.540 abrt-dump-jour/1371 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 21
        #
      
      Networking syscalls:
      
        # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,sendto*,connect* --max-events 10
           0.000 isc-net-0005/1206 connect(fd: 512, uservaddr: { .family: INET, port: 53, addr: 23.211.132.65 }, addrlen: 16) = 0
           0.070 isc-net-0002/1203 connect(fd: 515, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:2::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable)
           0.031 isc-net-0006/1207 connect(fd: 513, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:2::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable)
           0.079 isc-net-0006/1207 sendto(fd: 3, buff: 0x7f73a40611b0, len: 106, flags: NOSIGNAL, addr: { .family: UNSPEC }, addr_len: NULL) = 106
           0.180 isc-net-0006/1207 connect(fd: 519, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:1::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable)
           0.211 isc-net-0006/1207 sendto(fd: 3, buff: 0x7f73a4061230, len: 106, flags: NOSIGNAL, addr: { .family: UNSPEC }, addr_len: NULL) = 106
           0.298 isc-net-0006/1207 connect(fd: 515, uservaddr: { .family: INET, port: 53, addr: 96.7.49.67 }, addrlen: 16) = 0
           0.109 isc-net-0004/1205 connect(fd: 518, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:2::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable)
           0.164 isc-net-0002/1203 sendto(fd: 3, buff: 0x7f73ac064300, len: 107, flags: NOSIGNAL, addr: { .family: UNSPEC }, addr_len: NULL) = 107
           0.247 isc-net-0002/1203 connect(fd: 522, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:1::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable)
        #
      Signed-off-by: default avatarIan Rogers <irogers@google.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Ingo Molnar <mingo@redhat.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Leo Yan <leo.yan@linaro.org>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Stephane Eranian <eranian@google.com>
      Link: https://lore.kernel.org/r/20221103045437.163510-2-irogers@google.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      14e4b9f4
  6. 03 Nov, 2022 11 commits
  7. 31 Oct, 2022 11 commits