• Arnaldo Carvalho de Melo's avatar
    perf annotate: Handle samples not at objdump output addr boundaries · 48fb4fdd
    Arnaldo Carvalho de Melo authored
    Without this patch we get this for need_resched:
    
    [root@mica ~]# perf annotate need_resched
    
    ------------------------------------------------
     Percent |      Source code & Disassembly of vmlinux
    ------------------------------------------------
             :
             :
             :      Disassembly of section .text:
             :
             :      ffffffff810095ed <need_resched>:
             :              return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
             :      }
             :
             :      static inline int need_resched(void)
             :      {
        0.00 :      ffffffff810095ed:       55                      push   %rbp
             :              return unlikely(test_thread_flag(TIF_NEED_RESCHED));
        0.00 :      ffffffff810095ee:       be 03 00 00 00          mov    $0x3,%esi
             :
             :      static inline struct thread_info *current_thread_info(void)
             :      {
             :              struct thread_info *ti;
             :              ti = (void *)(percpu_read_stable(kernel_stack) +
        0.00 :      ffffffff810095f3:       65 48 8b 3c 25 48 b5    mov    %gs:0xb548,%rdi
        0.00 :      ffffffff810095fa:       00 00
             :              return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
             :      }
             :
             :      static inline int need_resched(void)
             :      {
        0.00 :      ffffffff810095fc:       48 89 e5                mov    %rsp,%rbp
             :              return unlikely(test_thread_flag(TIF_NEED_RESCHED));
        0.00 :      ffffffff810095ff:       48 81 ef d8 1f 00 00    sub    $0x1fd8,%rdi
        0.00 :      ffffffff81009606:       e8 9d ff ff ff          callq  ffffffff810095a8 <test_ti_thread_flag>
             :      }
        0.00 :      ffffffff8100960b:       c9                      leaveq
        0.00 :      ffffffff8100960c:       85 c0                   test   %eax,%eax
        0.00 :      ffffffff8100960e:       0f 95 c0                setne  %al
        0.00 :      ffffffff81009611:       0f b6 c0                movzbl %al,%eax
             :      Disassembly of section .vsyscall_0:
             :      Disassembly of section .vsyscall_fn:
             :      Disassembly of section .vsyscall_1:
             :      Disassembly of section .vsyscall_2:
             :      Disassembly of section .init.text:
             :      Disassembly of section .altinstr_replacement:
             :      Disassembly of section .exit.text:
    [root@mica ~]#
    
    But from the 'perf report' result we know that there are hits
    for need_resched on a 4 way machine mostly doing nothing, so
    after adding code to show what is in each hist offset and
    collapsing IP hits for what happens between objdump lines we
    get, for the same perf.data file:
    
    [root@mica ~]# perf annotate -v need_resched
    
    ------------------------------------------------
     Percent |      Source code & Disassembly of vmlinux
    ------------------------------------------------
             :
             :
             :      Disassembly of section .text:
             :
             :      ffffffff810095ed <need_resched>:
             :              return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
             :      }
             :
             :      static inline int need_resched(void)
             :      {
        0.00 :      ffffffff810095ed:       55                      push   %rbp
             :              return unlikely(test_thread_flag(TIF_NEED_RESCHED));
       52.78 :      ffffffff810095ee:       be 03 00 00 00          mov    $0x3,%esi
             :
             :      static inline struct thread_info *current_thread_info(void)
             :      {
             :              struct thread_info *ti;
             :              ti = (void *)(percpu_read_stable(kernel_stack) +
        0.00 :      ffffffff810095f3:       65 48 8b 3c 25 48 b5    mov    %gs:0xb548,%rdi
        0.00 :      ffffffff810095fa:       00 00
             :              return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
             :      }
             :
             :      static inline int need_resched(void)
             :      {
        0.00 :      ffffffff810095fc:       48 89 e5                mov    %rsp,%rbp
             :              return unlikely(test_thread_flag(TIF_NEED_RESCHED));
        9.72 :      ffffffff810095ff:       48 81 ef d8 1f 00 00    sub    $0x1fd8,%rdi
        0.00 :      ffffffff81009606:       e8 9d ff ff ff          callq  ffffffff810095a8 <test_ti_thread_flag>
             :      }
        0.00 :      ffffffff8100960b:       c9                      leaveq
        0.00 :      ffffffff8100960c:       85 c0                   test   %eax,%eax
       37.50 :      ffffffff8100960e:       0f 95 c0                setne  %al
        0.00 :      ffffffff81009611:       0f b6 c0                movzbl %al,%eax
             :      Disassembly of section .vsyscall_0:
             :      Disassembly of section .vsyscall_fn:
             :      Disassembly of section .vsyscall_1:
             :      Disassembly of section .vsyscall_2:
             :      Disassembly of section .init.text:
             :      Disassembly of section .altinstr_replacement:
             :      Disassembly of section .exit.text:
    [root@mica ~]#
    
    And now 'perf annotate -v', verbose mode, will show the hits per
    precise IP, so that one can make sense of the attribution to
    each objdumop line:
    
    [root@mica ~]# perf annotate -v need_resched
    Looking at the vmlinux_path (5 entries long)
    Using /lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux
    for symbols annotate_sym: filename=/lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux, sym=need_resched, start=0xffffffff810095ed, end=0xffffffff81009614
    
    ------------------------------------------------
     Percent |      Source code & Disassembly of vmlinux
    ------------------------------------------------
                    ffffffff810095f1: 152
                    ffffffff81009603: 28
                    ffffffff8100960f: 55
                    ffffffff81009610: 53
                              h->sum: 288
    <SNIP same annotation>
    Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
    Cc: David Miller <davem@davemloft.net>
    Cc: Frédéric Weisbecker <fweisbec@gmail.com>
    Cc: Mike Galbraith <efault@gmx.de>
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Paul Mackerras <paulus@samba.org>
    LKML-Reference: <1267194194-15670-1-git-send-email-acme@infradead.org>
    Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
    48fb4fdd
builtin-annotate.c 13.6 KB