1. 15 May, 2014 16 commits
    • Alexei Starovoitov's avatar
      seccomp: JIT compile seccomp filter · 8f577cad
      Alexei Starovoitov authored
      Take advantage of internal BPF JIT
      
      05-sim-long_jumps.c of libseccomp was used as micro-benchmark:
      
       seccomp_rule_add_exact(ctx,...
       seccomp_rule_add_exact(ctx,...
      
       rc = seccomp_load(ctx);
      
       for (i = 0; i < 10000000; i++)
          syscall(...);
      
      $ sudo sysctl net.core.bpf_jit_enable=1
      $ time ./bench
      real	0m2.769s
      user	0m1.136s
      sys	0m1.624s
      
      $ sudo sysctl net.core.bpf_jit_enable=0
      $ time ./bench
      real	0m5.825s
      user	0m1.268s
      sys	0m4.548s
      Signed-off-by: default avatarAlexei Starovoitov <ast@plumgrid.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      8f577cad
    • Alexei Starovoitov's avatar
      net: filter: x86: internal BPF JIT · 62258278
      Alexei Starovoitov authored
      Maps all internal BPF instructions into x86_64 instructions.
      This patch replaces original BPF x64 JIT with internal BPF x64 JIT.
      sysctl net.core.bpf_jit_enable is reused as on/off switch.
      
      Performance:
      
      1. old BPF JIT and internal BPF JIT generate equivalent x86_64 code.
        No performance difference is observed for filters that were JIT-able before
      
      Example assembler code for BPF filter "tcpdump port 22"
      
      original BPF -> old JIT:            original BPF -> internal BPF -> new JIT:
         0:   push   %rbp                      0:     push   %rbp
         1:   mov    %rsp,%rbp                 1:     mov    %rsp,%rbp
         4:   sub    $0x60,%rsp                4:     sub    $0x228,%rsp
         8:   mov    %rbx,-0x8(%rbp)           b:     mov    %rbx,-0x228(%rbp) // prologue
                                              12:     mov    %r13,-0x220(%rbp)
                                              19:     mov    %r14,-0x218(%rbp)
                                              20:     mov    %r15,-0x210(%rbp)
                                              27:     xor    %eax,%eax         // clear A
         c:   xor    %ebx,%ebx                29:     xor    %r13,%r13         // clear X
         e:   mov    0x68(%rdi),%r9d          2c:     mov    0x68(%rdi),%r9d
        12:   sub    0x6c(%rdi),%r9d          30:     sub    0x6c(%rdi),%r9d
        16:   mov    0xd8(%rdi),%r8           34:     mov    0xd8(%rdi),%r10
                                              3b:     mov    %rdi,%rbx
        1d:   mov    $0xc,%esi                3e:     mov    $0xc,%esi
        22:   callq  0xffffffffe1021e15       43:     callq  0xffffffffe102bd75
        27:   cmp    $0x86dd,%eax             48:     cmp    $0x86dd,%rax
        2c:   jne    0x0000000000000069       4f:     jne    0x000000000000009a
        2e:   mov    $0x14,%esi               51:     mov    $0x14,%esi
        33:   callq  0xffffffffe1021e31       56:     callq  0xffffffffe102bd91
        38:   cmp    $0x84,%eax               5b:     cmp    $0x84,%rax
        3d:   je     0x0000000000000049       62:     je     0x0000000000000074
        3f:   cmp    $0x6,%eax                64:     cmp    $0x6,%rax
        42:   je     0x0000000000000049       68:     je     0x0000000000000074
        44:   cmp    $0x11,%eax               6a:     cmp    $0x11,%rax
        47:   jne    0x00000000000000c6       6e:     jne    0x0000000000000117
        49:   mov    $0x36,%esi               74:     mov    $0x36,%esi
        4e:   callq  0xffffffffe1021e15       79:     callq  0xffffffffe102bd75
        53:   cmp    $0x16,%eax               7e:     cmp    $0x16,%rax
        56:   je     0x00000000000000bf       82:     je     0x0000000000000110
        58:   mov    $0x38,%esi               88:     mov    $0x38,%esi
        5d:   callq  0xffffffffe1021e15       8d:     callq  0xffffffffe102bd75
        62:   cmp    $0x16,%eax               92:     cmp    $0x16,%rax
        65:   je     0x00000000000000bf       96:     je     0x0000000000000110
        67:   jmp    0x00000000000000c6       98:     jmp    0x0000000000000117
        69:   cmp    $0x800,%eax              9a:     cmp    $0x800,%rax
        6e:   jne    0x00000000000000c6       a1:     jne    0x0000000000000117
        70:   mov    $0x17,%esi               a3:     mov    $0x17,%esi
        75:   callq  0xffffffffe1021e31       a8:     callq  0xffffffffe102bd91
        7a:   cmp    $0x84,%eax               ad:     cmp    $0x84,%rax
        7f:   je     0x000000000000008b       b4:     je     0x00000000000000c2
        81:   cmp    $0x6,%eax                b6:     cmp    $0x6,%rax
        84:   je     0x000000000000008b       ba:     je     0x00000000000000c2
        86:   cmp    $0x11,%eax               bc:     cmp    $0x11,%rax
        89:   jne    0x00000000000000c6       c0:     jne    0x0000000000000117
        8b:   mov    $0x14,%esi               c2:     mov    $0x14,%esi
        90:   callq  0xffffffffe1021e15       c7:     callq  0xffffffffe102bd75
        95:   test   $0x1fff,%ax              cc:     test   $0x1fff,%rax
        99:   jne    0x00000000000000c6       d3:     jne    0x0000000000000117
                                              d5:     mov    %rax,%r14
        9b:   mov    $0xe,%esi                d8:     mov    $0xe,%esi
        a0:   callq  0xffffffffe1021e44       dd:     callq  0xffffffffe102bd91 // MSH
                                              e2:     and    $0xf,%eax
                                              e5:     shl    $0x2,%eax
                                              e8:     mov    %rax,%r13
                                              eb:     mov    %r14,%rax
                                              ee:     mov    %r13,%rsi
        a5:   lea    0xe(%rbx),%esi           f1:     add    $0xe,%esi
        a8:   callq  0xffffffffe1021e0d       f4:     callq  0xffffffffe102bd6d
        ad:   cmp    $0x16,%eax               f9:     cmp    $0x16,%rax
        b0:   je     0x00000000000000bf       fd:     je     0x0000000000000110
                                              ff:     mov    %r13,%rsi
        b2:   lea    0x10(%rbx),%esi         102:     add    $0x10,%esi
        b5:   callq  0xffffffffe1021e0d      105:     callq  0xffffffffe102bd6d
        ba:   cmp    $0x16,%eax              10a:     cmp    $0x16,%rax
        bd:   jne    0x00000000000000c6      10e:     jne    0x0000000000000117
        bf:   mov    $0xffff,%eax            110:     mov    $0xffff,%eax
        c4:   jmp    0x00000000000000c8      115:     jmp    0x000000000000011c
        c6:   xor    %eax,%eax               117:     mov    $0x0,%eax
        c8:   mov    -0x8(%rbp),%rbx         11c:     mov    -0x228(%rbp),%rbx // epilogue
        cc:   leaveq                         123:     mov    -0x220(%rbp),%r13
        cd:   retq                           12a:     mov    -0x218(%rbp),%r14
                                             131:     mov    -0x210(%rbp),%r15
                                             138:     leaveq
                                             139:     retq
      
      On fully cached SKBs both JITed functions take 12 nsec to execute.
      BPF interpreter executes the program in 30 nsec.
      
      The difference in generated assembler is due to the following:
      
      Old BPF imlements LDX_MSH instruction via sk_load_byte_msh() helper function
      inside bpf_jit.S.
      New JIT removes the helper and does it explicitly, so ldx_msh cost
      is the same for both JITs, but generated code looks longer.
      
      New JIT has 4 registers to save, so prologue/epilogue are larger,
      but the cost is within noise on x64.
      
      Old JIT checks whether first insn clears A and if not emits 'xor %eax,%eax'.
      New JIT clears %rax unconditionally.
      
      2. old BPF JIT doesn't support ANC_NLATTR, ANC_PAY_OFFSET, ANC_RANDOM
        extensions. New JIT supports all BPF extensions.
        Performance of such filters improves 2-4 times depending on a filter.
        The longer the filter the higher performance gain.
        Synthetic benchmarks with many ancillary loads see 20x speedup
        which seems to be the maximum gain from JIT
      
      Notes:
      
      . net.core.bpf_jit_enable=2 + tools/net/bpf_jit_disasm is still functional
        and can be used to see generated assembler
      
      . there are two jit_compile() functions and code flow for classic filters is:
        sk_attach_filter() - load classic BPF
        bpf_jit_compile() - try to JIT from classic BPF
        sk_convert_filter() - convert classic to internal
        bpf_int_jit_compile() - JIT from internal BPF
      
        seccomp and tracing filters will just call bpf_int_jit_compile()
      Signed-off-by: default avatarAlexei Starovoitov <ast@plumgrid.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      62258278
    • Alexei Starovoitov's avatar
      net: filter: x86: split bpf_jit_compile() · f3c2af7b
      Alexei Starovoitov authored
      Split bpf_jit_compile() into two functions to improve readability
      of for(pass++) loop. The change follows similar style of JIT compilers
      for arm, powerpc, s390
      
      The body of new do_jit() was not reformatted to reduce noise
      in this patch, since the following patch replaces most of it.
      
      Tested with BPF testsuite.
      Signed-off-by: default avatarAlexei Starovoitov <ast@plumgrid.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      f3c2af7b
    • David S. Miller's avatar
      Merge branch 'ieee802154-next' · 9509b1c1
      David S. Miller authored
      Phoebe Buckheister says:
      
      ====================
      802154: some cleanups and fixes
      
      This series adds some definitions for 802.15.4 header fields that were missing,
      changes 6lowpan fragmentation to be aware of security headers and fixes
      802.15.4 datagram socket sendmsg(), which was entirely incompliant to date.
      Also a few minor changes to mac_cb handling, mark a single-use function static,
      and correctly check for EMSGSIZE conditions during wpan_header_create.
      
      Changes since v1:
        * rename mac_cb_alloc to mac_cb_init
        * catch all error cases of sendmsg() instead of only !conn && msg_name
        * redo 6lowpan fragmentation to not clone lower layer headers
      ====================
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      9509b1c1
    • Phoebe Buckheister's avatar
      mac802154: make mac802154_wpan_open static · 6ef0023a
      Phoebe Buckheister authored
      This function is only used within the same translation unit, so mark it
      static.
      Signed-off-by: default avatarPhoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      6ef0023a
    • Phoebe Buckheister's avatar
      ieee802154: fix dgram socket sendmsg() · 1cc76e36
      Phoebe Buckheister authored
      802.15.4 datagram sockets do not currently have a compliant sendmsg().
      The destination address supplied is always ignored, and in unconnected
      mode, packets are broadcast instead of dropped with -EDESTADDRREQ. This
      patch fixes 802.15.4 dgram sockets to be compliant, i.e.
      
       !conn && !msg_name => -EDESTADDRREQ
       !conn &&  msg_name => send to msg_name
        conn && !msg_name => send to connected
        conn &&  msg_name => -EISCONN
      Signed-off-by: default avatarPhoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      1cc76e36
    • Phoebe Buckheister's avatar
      6lowpan: fix fragmentation · d4b2816d
      Phoebe Buckheister authored
      Currently, 6lowpan creates one 802.15.4 MAC header for the original
      packet the device was given by upper layers and reuses this header for
      all fragments, if fragmentation is required. This also reuses frame
      sequence numbers, which must not happen. 6lowpan also has issues with
      fragmentation in the presence of security headers, since those may imply
      the presence of trailing fields that are not accounted for by the
      fragmentation code right now.
      
      Fix both of these issues by properly allocating fragment skbs with
      headromm and tailroom as specified by the underlying device, create one
      header for each skb instead of reusing the original header, let the
      underlying device do the rest.
      Signed-off-by: default avatarPhoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      d4b2816d
    • Phoebe Buckheister's avatar
      ieee802154: change _cb handling slightly · 32edc40a
      Phoebe Buckheister authored
      The current mac_cb handling of ieee802154 is rather awkward and limited.
      Decompose the single flags field into multiple fields with the meanings
      of each subfield of the flags field to make future extensions (for
      example, link-layer security) easier. Also don't set the frame sequence
      number in upper layers, since that's a thing the MAC is supposed to set
      on frame transmit - we set it on header creation, but assuming that
      upper layers do not blindly duplicate our headers, this is fine.
      Signed-off-by: default avatarPhoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      32edc40a
    • Phoebe Buckheister's avatar
      mac802154: account for all header parts during wpan header creationg · 8c84296f
      Phoebe Buckheister authored
      The current WPAN header creation code checks for EMSGSIZE conditions,
      but does not account for the MIC field that link layer security may add
      at the end of the frame. Now that we can accurately calculate the
      maximum payload size of packets, use that to check for EMSGSIZE
      conditions.
      Signed-off-by: default avatarPhoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      8c84296f
    • Phoebe Buckheister's avatar
      ieee802154: add definitions for link-layer security and header functions · c3a6114f
      Phoebe Buckheister authored
      When dealing with 802.15.4, one often has to know the maximum payload
      size for a given packet. This depends on many factors, one of which is
      whether or not a security header is present in the frame. These
      definitions and functions provide an easy way for any upper layer to
      calculate the maximum payload size for a packet. The first obvious user
      for this is 6lowpan, which duplicates this calculation and gets it
      partially wrong because it ignores security headers.
      Signed-off-by: default avatarPhoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      c3a6114f
    • Markus Lottmann's avatar
      drivers: net: Register Micrel ksz884x network devices in PCI device tree. · 9dbccc30
      Markus Lottmann authored
      This unifies the behaviour with other network device drivers and
      allows for a matching of the PCI device path in UDEV rules.
      Signed-off-by: default avatarMarkus Lottmann <markus.lottmann1986@gmail.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      9dbccc30
    • David S. Miller's avatar
      net: Fix CONFIG_SYSCTL ifdef test. · fcd77db0
      David S. Miller authored
      > include/net/ip.h:211:5: warning: "CONFIG_SYSCTL" is not defined [-Wundef]
      >  #if CONFIG_SYSCTL
      >      ^
      Reported-by: default avatarStephen Rothwell <sfr@canb.auug.org.au>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      fcd77db0
    • David S. Miller's avatar
      Merge branch 'cpsw_cleanups' · fd0d5e73
      David S. Miller authored
      George Cherian says:
      
      ====================
      TI CPSW Cleanup
      
      This series does some minimal cleanups.
      	-Conversion of pr_*() to dev_*()
      	-Convert kzalloc to devm_kzalloc.
      ====================
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      fd0d5e73
    • George Cherian's avatar
      drivers: net: davinci_cpdma: Convert kzalloc() to devm_kzalloc(). · e1943128
      George Cherian authored
      Convert kzalloc() to devm_kzalloc().
      Signed-off-by: default avatarGeorge Cherian <george.cherian@ti.com>
      Reviewed-by: default avatarFelipe Balbi <balbi@ti.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      e1943128
    • George Cherian's avatar
      net: davinci_mdio: Convert pr_err() to dev_err() call · a92f40a9
      George Cherian authored
      Convert the lone pr_err() to dev_err() call.
      Signed-off-by: default avatarGeorge Cherian <george.cherian@ti.com>
      Reviewed-by: default avatarFelipe Balbi <balbi@ti.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      a92f40a9
    • George Cherian's avatar
      driver net: cpsw: Convert pr_*() to dev_*() calls · 88c99ff6
      George Cherian authored
      Convert all pr_*() calls to dev_*() calls.
      No functional changes.
      Signed-off-by: default avatarGeorge Cherian <george.cherian@ti.com>
      Reviewed-by: default avatarFelipe Balbi <balbi@ti.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      88c99ff6
  2. 14 May, 2014 24 commits