• Christophe Leroy's avatar
    powerpc: Force inlining of csum_add() · 4423eff7
    Christophe Leroy authored
    Commit 328e7e48 ("powerpc: force inlining of csum_partial() to
    avoid multiple csum_partial() with GCC10") inlined csum_partial().
    
    Now that csum_partial() is inlined, GCC outlines csum_add() when
    called by csum_partial().
    
    c064fb28 <csum_add>:
    c064fb28:	7c 63 20 14 	addc    r3,r3,r4
    c064fb2c:	7c 63 01 94 	addze   r3,r3
    c064fb30:	4e 80 00 20 	blr
    
    c0665fb8 <csum_add>:
    c0665fb8:	7c 63 20 14 	addc    r3,r3,r4
    c0665fbc:	7c 63 01 94 	addze   r3,r3
    c0665fc0:	4e 80 00 20 	blr
    
    c066719c:	7c 9a c0 2e 	lwzx    r4,r26,r24
    c06671a0:	38 60 00 00 	li      r3,0
    c06671a4:	7f 1a c2 14 	add     r24,r26,r24
    c06671a8:	4b ff ee 11 	bl      c0665fb8 <csum_add>
    c06671ac:	80 98 00 04 	lwz     r4,4(r24)
    c06671b0:	4b ff ee 09 	bl      c0665fb8 <csum_add>
    c06671b4:	80 98 00 08 	lwz     r4,8(r24)
    c06671b8:	4b ff ee 01 	bl      c0665fb8 <csum_add>
    c06671bc:	a0 98 00 0c 	lhz     r4,12(r24)
    c06671c0:	4b ff ed f9 	bl      c0665fb8 <csum_add>
    c06671c4:	7c 63 18 f8 	not     r3,r3
    c06671c8:	81 3f 00 68 	lwz     r9,104(r31)
    c06671cc:	81 5f 00 a0 	lwz     r10,160(r31)
    c06671d0:	7d 29 18 14 	addc    r9,r9,r3
    c06671d4:	7d 29 01 94 	addze   r9,r9
    c06671d8:	91 3f 00 68 	stw     r9,104(r31)
    c06671dc:	7d 1a 50 50 	subf    r8,r26,r10
    c06671e0:	83 01 00 10 	lwz     r24,16(r1)
    c06671e4:	83 41 00 18 	lwz     r26,24(r1)
    
    The sum with 0 is useless, should have been skipped.
    And there is even one completely unused instance of csum_add().
    
    In file included from ./include/net/checksum.h:22,
                     from ./include/linux/skbuff.h:28,
                     from ./include/linux/icmp.h:16,
                     from net/ipv6/ip6_tunnel.c:23:
    ./arch/powerpc/include/asm/checksum.h: In function '__ip6_tnl_rcv':
    ./arch/powerpc/include/asm/checksum.h:94:22: warning: inlining failed in call to 'csum_add': call is unlikely and code size would grow [-Winline]
       94 | static inline __wsum csum_add(__wsum csum, __wsum addend)
          |                      ^~~~~~~~
    ./arch/powerpc/include/asm/checksum.h:172:31: note: called from here
      172 |                         sum = csum_add(sum, (__force __wsum)*(const u32 *)buff);
          |                               ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ./arch/powerpc/include/asm/checksum.h:94:22: warning: inlining failed in call to 'csum_add': call is unlikely and code size would grow [-Winline]
       94 | static inline __wsum csum_add(__wsum csum, __wsum addend)
          |                      ^~~~~~~~
    ./arch/powerpc/include/asm/checksum.h:177:31: note: called from here
      177 |                         sum = csum_add(sum, (__force __wsum)
          |                               ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      178 |                                             *(const u32 *)(buff + 4));
          |                                             ~~~~~~~~~~~~~~~~~~~~~~~~~
    ./arch/powerpc/include/asm/checksum.h:94:22: warning: inlining failed in call to 'csum_add': call is unlikely and code size would grow [-Winline]
       94 | static inline __wsum csum_add(__wsum csum, __wsum addend)
          |                      ^~~~~~~~
    ./arch/powerpc/include/asm/checksum.h:183:31: note: called from here
      183 |                         sum = csum_add(sum, (__force __wsum)
          |                               ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      184 |                                             *(const u32 *)(buff + 8));
          |                                             ~~~~~~~~~~~~~~~~~~~~~~~~~
    ./arch/powerpc/include/asm/checksum.h:94:22: warning: inlining failed in call to 'csum_add': call is unlikely and code size would grow [-Winline]
       94 | static inline __wsum csum_add(__wsum csum, __wsum addend)
          |                      ^~~~~~~~
    ./arch/powerpc/include/asm/checksum.h:186:31: note: called from here
      186 |                         sum = csum_add(sum, (__force __wsum)
          |                               ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      187 |                                             *(const u16 *)(buff + 12));
          |                                             ~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    Force inlining of csum_add().
    
         94c:	80 df 00 a0 	lwz     r6,160(r31)
         950:	7d 28 50 2e 	lwzx    r9,r8,r10
         954:	7d 48 52 14 	add     r10,r8,r10
         958:	80 aa 00 04 	lwz     r5,4(r10)
         95c:	80 ff 00 68 	lwz     r7,104(r31)
         960:	7d 29 28 14 	addc    r9,r9,r5
         964:	7d 29 01 94 	addze   r9,r9
         968:	7d 08 30 50 	subf    r8,r8,r6
         96c:	80 aa 00 08 	lwz     r5,8(r10)
         970:	a1 4a 00 0c 	lhz     r10,12(r10)
         974:	7d 29 28 14 	addc    r9,r9,r5
         978:	7d 29 01 94 	addze   r9,r9
         97c:	7d 29 50 14 	addc    r9,r9,r10
         980:	7d 29 01 94 	addze   r9,r9
         984:	7d 29 48 f8 	not     r9,r9
         988:	7c e7 48 14 	addc    r7,r7,r9
         98c:	7c e7 01 94 	addze   r7,r7
         990:	90 ff 00 68 	stw     r7,104(r31)
    
    In the non-inlined version, the first sum with 0 was performed.
    Here it is skipped.
    Signed-off-by: default avatarChristophe Leroy <christophe.leroy@csgroup.eu>
    Reviewed-by: default avatarSegher Boessenkool <segher@kernel.crashing.org>
    Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
    Link: https://lore.kernel.org/r/f7f4d4e364de6e473da874468b903da6e5d97adc.1620713272.git.christophe.leroy@csgroup.eu
    4423eff7
checksum.h 5.59 KB