Commit 0d74c42f authored by Joe Perches's avatar Joe Perches Committed by David S. Miller

ether_addr_equal: Optimize implementation, remove unused compare_ether_addr

Add a new check for CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS to reduce
the number of or's used in the ether_addr_equal comparison to very
slightly improve function performance.

Simplify the ether_addr_equal_64bits implementation.
Integrate and remove the zap_last_2bytes helper as it's now
used only once.

Remove the now unused compare_ether_addr function.

Update the unaligned-memory-access documentation to remove the
compare_ether_addr description and show how unaligned accesses
could occur with ether_addr_equal.
Signed-off-by: default avatarJoe Perches <joe@perches.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 5cc208be
...@@ -137,24 +137,34 @@ Code that causes unaligned access ...@@ -137,24 +137,34 @@ Code that causes unaligned access
================================= =================================
With the above in mind, let's move onto a real life example of a function With the above in mind, let's move onto a real life example of a function
that can cause an unaligned memory access. The following function adapted that can cause an unaligned memory access. The following function taken
from include/linux/etherdevice.h is an optimized routine to compare two from include/linux/etherdevice.h is an optimized routine to compare two
ethernet MAC addresses for equality. ethernet MAC addresses for equality.
unsigned int compare_ether_addr(const u8 *addr1, const u8 *addr2) bool ether_addr_equal(const u8 *addr1, const u8 *addr2)
{ {
const u16 *a = (const u16 *) addr1; #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
const u16 *b = (const u16 *) addr2; u32 fold = ((*(const u32 *)addr1) ^ (*(const u32 *)addr2)) |
((*(const u16 *)(addr1 + 4)) ^ (*(const u16 *)(addr2 + 4)));
return fold == 0;
#else
const u16 *a = (const u16 *)addr1;
const u16 *b = (const u16 *)addr2;
return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
#endif
} }
In the above function, the reference to a[0] causes 2 bytes (16 bits) to In the above function, when the hardware has efficient unaligned access
be read from memory starting at address addr1. Think about what would happen capability, there is no issue with this code. But when the hardware isn't
if addr1 was an odd address such as 0x10003. (Hint: it'd be an unaligned able to access memory on arbitrary boundaries, the reference to a[0] causes
access.) 2 bytes (16 bits) to be read from memory starting at address addr1.
Think about what would happen if addr1 was an odd address such as 0x10003.
(Hint: it'd be an unaligned access.)
Despite the potential unaligned access problems with the above function, it Despite the potential unaligned access problems with the above function, it
is included in the kernel anyway but is understood to only work on is included in the kernel anyway but is understood to only work normally on
16-bit-aligned addresses. It is up to the caller to ensure this alignment or 16-bit-aligned addresses. It is up to the caller to ensure this alignment or
not use this function at all. This alignment-unsafe function is still useful not use this function at all. This alignment-unsafe function is still useful
as it is a decent optimization for the cases when you can ensure alignment, as it is a decent optimization for the cases when you can ensure alignment,
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/random.h> #include <linux/random.h>
#include <asm/unaligned.h> #include <asm/unaligned.h>
#include <asm/bitsperlong.h>
#ifdef __KERNEL__ #ifdef __KERNEL__
__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
...@@ -210,41 +211,27 @@ static inline void eth_hw_addr_inherit(struct net_device *dst, ...@@ -210,41 +211,27 @@ static inline void eth_hw_addr_inherit(struct net_device *dst,
memcpy(dst->dev_addr, src->dev_addr, ETH_ALEN); memcpy(dst->dev_addr, src->dev_addr, ETH_ALEN);
} }
/**
* compare_ether_addr - Compare two Ethernet addresses
* @addr1: Pointer to a six-byte array containing the Ethernet address
* @addr2: Pointer other six-byte array containing the Ethernet address
*
* Compare two Ethernet addresses, returns 0 if equal, non-zero otherwise.
* Unlike memcmp(), it doesn't return a value suitable for sorting.
*/
static inline unsigned compare_ether_addr(const u8 *addr1, const u8 *addr2)
{
const u16 *a = (const u16 *) addr1;
const u16 *b = (const u16 *) addr2;
BUILD_BUG_ON(ETH_ALEN != 6);
return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
}
/** /**
* ether_addr_equal - Compare two Ethernet addresses * ether_addr_equal - Compare two Ethernet addresses
* @addr1: Pointer to a six-byte array containing the Ethernet address * @addr1: Pointer to a six-byte array containing the Ethernet address
* @addr2: Pointer other six-byte array containing the Ethernet address * @addr2: Pointer other six-byte array containing the Ethernet address
* *
* Compare two Ethernet addresses, returns true if equal * Compare two Ethernet addresses, returns true if equal
*
* Please note: addr1 & addr2 must both be aligned to u16.
*/ */
static inline bool ether_addr_equal(const u8 *addr1, const u8 *addr2) static inline bool ether_addr_equal(const u8 *addr1, const u8 *addr2)
{ {
return !compare_ether_addr(addr1, addr2); #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
} u32 fold = ((*(const u32 *)addr1) ^ (*(const u32 *)addr2)) |
((*(const u16 *)(addr1 + 4)) ^ (*(const u16 *)(addr2 + 4)));
static inline unsigned long zap_last_2bytes(unsigned long value) return fold == 0;
{
#ifdef __BIG_ENDIAN
return value >> 16;
#else #else
return value << 16; const u16 *a = (const u16 *)addr1;
const u16 *b = (const u16 *)addr2;
return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0;
#endif #endif
} }
...@@ -265,16 +252,14 @@ static inline unsigned long zap_last_2bytes(unsigned long value) ...@@ -265,16 +252,14 @@ static inline unsigned long zap_last_2bytes(unsigned long value)
static inline bool ether_addr_equal_64bits(const u8 addr1[6+2], static inline bool ether_addr_equal_64bits(const u8 addr1[6+2],
const u8 addr2[6+2]) const u8 addr2[6+2])
{ {
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
unsigned long fold = ((*(unsigned long *)addr1) ^ u64 fold = (*(const u64 *)addr1) ^ (*(const u64 *)addr2);
(*(unsigned long *)addr2));
if (sizeof(fold) == 8)
return zap_last_2bytes(fold) == 0;
fold |= zap_last_2bytes((*(unsigned long *)(addr1 + 4)) ^ #ifdef __BIG_ENDIAN
(*(unsigned long *)(addr2 + 4))); return (fold >> 16) == 0;
return fold == 0; #else
return (fold << 16) == 0;
#endif
#else #else
return ether_addr_equal(addr1, addr2); return ether_addr_equal(addr1, addr2);
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment