• Arnaldo Carvalho de Melo's avatar
    perf report: Add --switch-on/--switch-off events · ef4b1a53
    Arnaldo Carvalho de Melo authored
    Since 'perf top' shares the histogram browser with 'perf report', then
    the same explanation in the previous cset applies.
    
    An additional example uses a pair of SDT events available for systemtap:
    
      # perf probe --exec=/usr/bin/stap '%*:*'
      Added new events:
        sdt_stap:benchmark__thread__start (on %* in /usr/bin/stap)
        sdt_stap:benchmark   (on %* in /usr/bin/stap)
        sdt_stap:benchmark__thread__end (on %* in /usr/bin/stap)
        sdt_stap:pass6__start (on %* in /usr/bin/stap)
        sdt_stap:pass6__end  (on %* in /usr/bin/stap)
        sdt_stap:pass5__start (on %* in /usr/bin/stap)
        sdt_stap:pass5__end  (on %* in /usr/bin/stap)
        sdt_stap:pass0__start (on %* in /usr/bin/stap)
        sdt_stap:pass0__end  (on %* in /usr/bin/stap)
        sdt_stap:pass1a__start (on %* in /usr/bin/stap)
        sdt_stap:pass1b__start (on %* in /usr/bin/stap)
        sdt_stap:pass1__end  (on %* in /usr/bin/stap)
        sdt_stap:pass2__start (on %* in /usr/bin/stap)
        sdt_stap:pass2__end  (on %* in /usr/bin/stap)
        sdt_stap:pass3__start (on %* in /usr/bin/stap)
        sdt_stap:pass3__end  (on %* in /usr/bin/stap)
        sdt_stap:pass4__start (on %* in /usr/bin/stap)
        sdt_stap:pass4__end  (on %* in /usr/bin/stap)
        sdt_stap:benchmark__start (on %* in /usr/bin/stap)
        sdt_stap:benchmark__end (on %* in /usr/bin/stap)
        sdt_stap:cache__get  (on %* in /usr/bin/stap)
        sdt_stap:cache__clean (on %* in /usr/bin/stap)
        sdt_stap:cache__add__module (on %* in /usr/bin/stap)
        sdt_stap:cache__add__source (on %* in /usr/bin/stap)
        sdt_stap:stap_system__complete (on %* in /usr/bin/stap)
        sdt_stap:stap_system__start (on %* in /usr/bin/stap)
        sdt_stap:stap_system__spawn (on %* in /usr/bin/stap)
        sdt_stap:stap_system__fork (on %* in /usr/bin/stap)
        sdt_stap:intern_string (on %* in /usr/bin/stap)
        sdt_stap:client__start (on %* in /usr/bin/stap)
        sdt_stap:client__end (on %* in /usr/bin/stap)
    
      You can now use it in all perf tools, such as:
    
      	perf record -e sdt_stap:client__end -aR sleep 1
    
      #
    
    From these we're use the two below to run systemtap's test suite:
    
      # perf record -e sdt_stap:pass2__*,cycles:P make installcheck > /dev/null
      ^C[ perf record: Woken up 8 times to write data ]
      [ perf record: Captured and wrote 2.691 MB perf.data (39638 samples) ]
      Terminated
      # perf script | grep sdt_stap
                  stap 28979 [000] 19424.302660: sdt_stap:pass2__start: (561b9a537de3) arg1=140730364262544
                  stap 28979 [000] 19424.333083:   sdt_stap:pass2__end: (561b9a53a9e1) arg1=140730364262544
                  stap 29045 [006] 19424.933460: sdt_stap:pass2__start: (563edddcede3) arg1=140722674883152
                  stap 29045 [006] 19424.963794:   sdt_stap:pass2__end: (563edddd19e1) arg1=140722674883152
      # perf script | grep cycles |  wc -l
      39634
      #
    
    Looking at the whole perf.data file:
    
      [root@quaco testsuite]# perf report | grep cycles:P -A25
      # Samples: 39K of event 'cycles:P'
      # Event count (approx.): 34044267368
      #
      # Overhead  Command  Shared Object         Symbol
      # ........  .......  ....................  ................................
      #
           3.50%  cc1      cc1                   [.] ht_lookup_with_hash
           3.04%  cc1      cc1                   [.] _cpp_lex_token
           2.11%  cc1      cc1                   [.] ggc_internal_alloc
           1.83%  cc1      cc1                   [.] cpp_get_token_with_location
           1.68%  cc1      libc-2.29.so          [.] _int_malloc
           1.41%  cc1      cc1                   [.] linemap_position_for_column
           1.25%  cc1      cc1                   [.] ggc_internal_cleared_alloc
           1.20%  cc1      cc1                   [.] c_lex_with_flags
           1.18%  cc1      cc1                   [.] get_combined_adhoc_loc
           1.05%  cc1      libc-2.29.so          [.] malloc
           1.01%  cc1      libc-2.29.so          [.] _int_free
           0.96%  stap     stap                  [.] std::_Hashtable<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::__detail::_Identity, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, stringtable_hash, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, true, true> >::_M_insert<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__detail::_AllocNode<std::allocator<std::__detail::_Hash_node<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, true> > > >
           0.78%  stap     stap                  [.] lexer::scan
           0.74%  cc1      cc1                   [.] _cpp_lex_direct
           0.70%  cc1      cc1                   [.] pop_scope
           0.70%  cc1      cc1                   [.] c_parser_declspecs
           0.69%  stap     libc-2.29.so          [.] _int_malloc
           0.68%  cc1      cc1                   [.] htab_find_slot
           0.68%  cc1      [kernel.vmlinux]      [k] prepare_exit_to_usermode
           0.64%  cc1      [kernel.vmlinux]      [k] clear_page_erms
      [root@quaco testsuite]#
    
    And now only what happens in slices demarcated by those start/end SDT
    events:
    
      [root@quaco testsuite]# perf report --switch-on=sdt_stap:pass2__start --switch-off=sdt_stap:pass2__end | grep cycles:P -A100
      # Samples: 240  of event 'cycles:P'
      # Event count (approx.): 206491934
      #
      # Overhead  Command  Shared Object        Symbol
      # ........  .......  ...................  ................................................
      #
          38.99%  stap     stap                 [.] systemtap_session::register_library_aliases
          19.47%  stap     stap                 [.] match_key::operator<
          15.01%  stap     libc-2.29.so         [.] __memcmp_avx2_movbe
           5.19%  stap     libc-2.29.so         [.] _int_malloc
           2.50%  stap     libstdc++.so.6.0.26  [.] std::_Rb_tree_insert_and_rebalance
           2.30%  stap     stap                 [.] match_node::build_no_more
           2.07%  stap     libc-2.29.so         [.] malloc
           1.66%  stap     stap                 [.] std::_Rb_tree<match_key, std::pair<match_key const, match_node*>, std::_Select1st<std::pair<match_key const, match_node*> >, std::less<match_key>, std::allocator<std::pair<match_key const, match_node*> > >::find
           1.66%  stap     stap                 [.] match_node::bind
           1.58%  stap     [kernel.vmlinux]     [k] prepare_exit_to_usermode
           1.17%  stap     [kernel.vmlinux]     [k] native_irq_return_iret
           0.87%  stap     stap                 [.] 0x0000000000032ec4
           0.77%  stap     libstdc++.so.6.0.26  [.] std::_Rb_tree_increment
           0.47%  stap     stap                 [.] std::vector<derived_probe_builder*, std::allocator<derived_probe_builder*> >::_M_realloc_insert<derived_probe_builder* const&>
           0.47%  stap     [kernel.vmlinux]     [k] get_page_from_freelist
           0.47%  stap     [kernel.vmlinux]     [k] swapgs_restore_regs_and_return_to_usermode
           0.47%  stap     [kernel.vmlinux]     [k] do_user_addr_fault
           0.46%  stap     [kernel.vmlinux]     [k] __pagevec_lru_add_fn
           0.46%  stap     stap                 [.] std::_Rb_tree<match_key, std::pair<match_key const, match_node*>, std::_Select1st<std::pair<match_key const, match_node*> >, std::less<match_key>, std::allocator<std::pair<match_key const, match_node*> > >::_M_emplace_unique<std::pair<match_key, match_node*> >
           0.42%  stap     libstdc++.so.6.0.26  [.] 0x00000000000c18fa
           0.40%  stap     [kernel.vmlinux]     [k] interrupt_entry
           0.40%  stap     [kernel.vmlinux]     [k] update_load_avg
           0.40%  stap     [kernel.vmlinux]     [k] __intel_pmu_disable_all
           0.40%  stap     [kernel.vmlinux]     [k] clear_page_erms
           0.39%  stap     [kernel.vmlinux]     [k] __mod_node_page_state
           0.39%  stap     [kernel.vmlinux]     [k] error_entry
           0.39%  stap     [kernel.vmlinux]     [k] sync_regs
           0.38%  stap     [kernel.vmlinux]     [k] __handle_mm_fault
           0.38%  stap     stap                 [.] derive_probes
    
      #
      # (Tip: System-wide collection from all CPUs: perf record -a)
      #
      [root@quaco testsuite]#
    
    Cc: Adrian Hunter <adrian.hunter@intel.com>
    Cc: Florian Weimer <fweimer@redhat.com>
    Cc: Jiri Olsa <jolsa@kernel.org>
    Cc: Namhyung Kim <namhyung@kernel.org>
    Cc: William Cohen <wcohen@redhat.com>
    Link: https://lkml.kernel.org/n/tip-408hvumcnyn93a0auihnawew@git.kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
    ef4b1a53
perf-report.txt 18.9 KB