Commit 6c035ea0 authored by David S. Miller's avatar David S. Miller

Merge branch 'net-hash-tx'

Tom Herbert says:

====================
net: Improvements and applications of packet flow hash in transmit path

This patch series includes some patches which improve and make use
of skb->hash in the transmit path.

What is included:

- Infrastructure to save a precomputed hash in the sock structure.
  For connected TCP and UDP sockets we only need to compute the
  flow hash once and not once for every packet.
- Call skb_get_hash in get_xps_queue and __skb_tx_hash. This eliminates
  the awkward access to skb->sk->sk_hash in the lower transmit path.
- Move UDP source port generation into a common function in udp.h This
  implementation is mostly based on vxlan_src_port.
- Use non-zero IPv6 flow labels in flow_dissector as port information
  for flow hash calculation.
- Implement automatic flow label generation on transmit (per RFC 6438).
- Don't repeatedly try to compute an L4 hash in skb_get_hash if we've
  already tried to find one in software stack calculation.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 081a20ff a3b18ddb
...@@ -1132,6 +1132,15 @@ flowlabel_consistency - BOOLEAN ...@@ -1132,6 +1132,15 @@ flowlabel_consistency - BOOLEAN
FALSE: disabled FALSE: disabled
Default: TRUE Default: TRUE
auto_flowlabels - BOOLEAN
Automatically generate flow labels based based on a flow hash
of the packet. This allows intermediate devices, such as routers,
to idenfify packet flows for mechanisms like Equal Cost Multipath
Routing (see RFC 6438).
TRUE: enabled
FALSE: disabled
Default: false
anycast_src_echo_reply - BOOLEAN anycast_src_echo_reply - BOOLEAN
Controls the use of anycast addresses as source addresses for ICMPv6 Controls the use of anycast addresses as source addresses for ICMPv6
echo reply echo reply
......
...@@ -1570,25 +1570,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) ...@@ -1570,25 +1570,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
return false; return false;
} }
/* Compute source port for outgoing packet
* first choice to use L4 flow hash since it will spread
* better and maybe available from hardware
* secondary choice is to use jhash on the Ethernet header
*/
__be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb)
{
unsigned int range = (port_max - port_min) + 1;
u32 hash;
hash = skb_get_hash(skb);
if (!hash)
hash = jhash(skb->data, 2 * ETH_ALEN,
(__force u32) skb->protocol);
return htons((((u64) hash * range) >> 32) + port_min);
}
EXPORT_SYMBOL_GPL(vxlan_src_port);
static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb, static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb,
bool udp_csum) bool udp_csum)
{ {
...@@ -1807,7 +1788,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1807,7 +1788,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (tos == 1) if (tos == 1)
tos = ip_tunnel_get_dsfield(old_iph, skb); tos = ip_tunnel_get_dsfield(old_iph, skb);
src_port = vxlan_src_port(vxlan->port_min, vxlan->port_max, skb); src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->port_min,
vxlan->port_max, true);
if (dst->sa.sa_family == AF_INET) { if (dst->sa.sa_family == AF_INET) {
memset(&fl4, 0, sizeof(fl4)); memset(&fl4, 0, sizeof(fl4));
...@@ -2235,7 +2217,6 @@ static void vxlan_setup(struct net_device *dev) ...@@ -2235,7 +2217,6 @@ static void vxlan_setup(struct net_device *dev)
{ {
struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_dev *vxlan = netdev_priv(dev);
unsigned int h; unsigned int h;
int low, high;
eth_hw_addr_random(dev); eth_hw_addr_random(dev);
ether_setup(dev); ether_setup(dev);
...@@ -2272,9 +2253,6 @@ static void vxlan_setup(struct net_device *dev) ...@@ -2272,9 +2253,6 @@ static void vxlan_setup(struct net_device *dev)
vxlan->age_timer.function = vxlan_cleanup; vxlan->age_timer.function = vxlan_cleanup;
vxlan->age_timer.data = (unsigned long) vxlan; vxlan->age_timer.data = (unsigned long) vxlan;
inet_get_local_port_range(dev_net(dev), &low, &high);
vxlan->port_min = low;
vxlan->port_max = high;
vxlan->dst_port = htons(vxlan_port); vxlan->dst_port = htons(vxlan_port);
vxlan->dev = dev; vxlan->dev = dev;
......
...@@ -199,7 +199,8 @@ struct ipv6_pinfo { ...@@ -199,7 +199,8 @@ struct ipv6_pinfo {
* 010: prefer public address * 010: prefer public address
* 100: prefer care-of address * 100: prefer care-of address
*/ */
dontfrag:1; dontfrag:1,
autoflowlabel:1;
__u8 min_hopcount; __u8 min_hopcount;
__u8 tclass; __u8 tclass;
__be32 rcv_flowinfo; __be32 rcv_flowinfo;
......
...@@ -2486,7 +2486,7 @@ static inline int netif_set_xps_queue(struct net_device *dev, ...@@ -2486,7 +2486,7 @@ static inline int netif_set_xps_queue(struct net_device *dev,
* as a distribution range limit for the returned value. * as a distribution range limit for the returned value.
*/ */
static inline u16 skb_tx_hash(const struct net_device *dev, static inline u16 skb_tx_hash(const struct net_device *dev,
const struct sk_buff *skb) struct sk_buff *skb)
{ {
return __skb_tx_hash(dev, skb, dev->real_num_tx_queues); return __skb_tx_hash(dev, skb, dev->real_num_tx_queues);
} }
......
...@@ -455,6 +455,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, ...@@ -455,6 +455,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
* @ooo_okay: allow the mapping of a socket to a queue to be changed * @ooo_okay: allow the mapping of a socket to a queue to be changed
* @l4_hash: indicate hash is a canonical 4-tuple hash over transport * @l4_hash: indicate hash is a canonical 4-tuple hash over transport
* ports. * ports.
* @sw_hash: indicates hash was computed in software stack
* @wifi_acked_valid: wifi_acked was set * @wifi_acked_valid: wifi_acked was set
* @wifi_acked: whether frame was acked on wifi or not * @wifi_acked: whether frame was acked on wifi or not
* @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
...@@ -562,6 +563,7 @@ struct sk_buff { ...@@ -562,6 +563,7 @@ struct sk_buff {
__u8 pfmemalloc:1; __u8 pfmemalloc:1;
__u8 ooo_okay:1; __u8 ooo_okay:1;
__u8 l4_hash:1; __u8 l4_hash:1;
__u8 sw_hash:1;
__u8 wifi_acked_valid:1; __u8 wifi_acked_valid:1;
__u8 wifi_acked:1; __u8 wifi_acked:1;
__u8 no_fcs:1; __u8 no_fcs:1;
...@@ -575,7 +577,7 @@ struct sk_buff { ...@@ -575,7 +577,7 @@ struct sk_buff {
__u8 encap_hdr_csum:1; __u8 encap_hdr_csum:1;
__u8 csum_valid:1; __u8 csum_valid:1;
__u8 csum_complete_sw:1; __u8 csum_complete_sw:1;
/* 3/5 bit hole (depending on ndisc_nodetype presence) */ /* 2/4 bit hole (depending on ndisc_nodetype presence) */
kmemcheck_bitfield_end(flags2); kmemcheck_bitfield_end(flags2);
#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
...@@ -830,13 +832,14 @@ static inline void ...@@ -830,13 +832,14 @@ static inline void
skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type) skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type)
{ {
skb->l4_hash = (type == PKT_HASH_TYPE_L4); skb->l4_hash = (type == PKT_HASH_TYPE_L4);
skb->sw_hash = 0;
skb->hash = hash; skb->hash = hash;
} }
void __skb_get_hash(struct sk_buff *skb); void __skb_get_hash(struct sk_buff *skb);
static inline __u32 skb_get_hash(struct sk_buff *skb) static inline __u32 skb_get_hash(struct sk_buff *skb)
{ {
if (!skb->l4_hash) if (!skb->l4_hash && !skb->sw_hash)
__skb_get_hash(skb); __skb_get_hash(skb);
return skb->hash; return skb->hash;
...@@ -850,6 +853,7 @@ static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) ...@@ -850,6 +853,7 @@ static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
static inline void skb_clear_hash(struct sk_buff *skb) static inline void skb_clear_hash(struct sk_buff *skb)
{ {
skb->hash = 0; skb->hash = 0;
skb->sw_hash = 0;
skb->l4_hash = 0; skb->l4_hash = 0;
} }
...@@ -862,6 +866,7 @@ static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb) ...@@ -862,6 +866,7 @@ static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb)
static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
{ {
to->hash = from->hash; to->hash = from->hash;
to->sw_hash = from->sw_hash;
to->l4_hash = from->l4_hash; to->l4_hash = from->l4_hash;
}; };
...@@ -3005,7 +3010,7 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) ...@@ -3005,7 +3010,7 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
return skb->queue_mapping != 0; return skb->queue_mapping != 0;
} }
u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
unsigned int num_tx_queues); unsigned int num_tx_queues);
static inline struct sec_path *skb_sec_path(struct sk_buff *skb) static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
......
...@@ -29,4 +29,5 @@ struct flow_keys { ...@@ -29,4 +29,5 @@ struct flow_keys {
bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow); bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow);
__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto); __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto);
u32 flow_hash_from_keys(struct flow_keys *keys);
#endif #endif
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include <net/route.h> #include <net/route.h>
#include <net/snmp.h> #include <net/snmp.h>
#include <net/flow.h> #include <net/flow.h>
#include <net/flow_keys.h>
struct sock; struct sock;
...@@ -353,6 +354,19 @@ static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto) ...@@ -353,6 +354,19 @@ static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto)
skb->len, proto, 0); skb->len, proto, 0);
} }
static inline void inet_set_txhash(struct sock *sk)
{
struct inet_sock *inet = inet_sk(sk);
struct flow_keys keys;
keys.src = inet->inet_saddr;
keys.dst = inet->inet_daddr;
keys.port16[0] = inet->inet_sport;
keys.port16[1] = inet->inet_dport;
sk->sk_txhash = flow_hash_from_keys(&keys);
}
/* /*
* Map a multicast IP onto multicast MAC for type ethernet. * Map a multicast IP onto multicast MAC for type ethernet.
*/ */
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <net/if_inet6.h> #include <net/if_inet6.h>
#include <net/ndisc.h> #include <net/ndisc.h>
#include <net/flow.h> #include <net/flow.h>
#include <net/flow_keys.h>
#include <net/snmp.h> #include <net/snmp.h>
#define SIN6_LEN_RFC2133 24 #define SIN6_LEN_RFC2133 24
...@@ -684,6 +685,40 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, ...@@ -684,6 +685,40 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
return hlimit; return hlimit;
} }
static inline void ip6_set_txhash(struct sock *sk)
{
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct flow_keys keys;
keys.src = (__force __be32)ipv6_addr_hash(&np->saddr);
keys.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr);
keys.port16[0] = inet->inet_sport;
keys.port16[1] = inet->inet_dport;
sk->sk_txhash = flow_hash_from_keys(&keys);
}
static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
__be32 flowlabel, bool autolabel)
{
if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) {
__be32 hash;
hash = skb_get_hash(skb);
/* Since this is being sent on the wire obfuscate hash a bit
* to minimize possbility that any useful information to an
* attacker is leaked. Only lower 20 bits are relevant.
*/
hash ^= hash >> 12;
flowlabel = hash & IPV6_FLOWLABEL_MASK;
}
return flowlabel;
}
/* /*
* Header manipulation * Header manipulation
*/ */
......
...@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 { ...@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
int ip6_rt_mtu_expires; int ip6_rt_mtu_expires;
int ip6_rt_min_advmss; int ip6_rt_min_advmss;
int flowlabel_consistency; int flowlabel_consistency;
int auto_flowlabels;
int icmpv6_time; int icmpv6_time;
int anycast_src_echo_reply; int anycast_src_echo_reply;
int fwmark_reflect; int fwmark_reflect;
......
...@@ -273,6 +273,7 @@ struct cg_proto; ...@@ -273,6 +273,7 @@ struct cg_proto;
* @sk_rcvtimeo: %SO_RCVTIMEO setting * @sk_rcvtimeo: %SO_RCVTIMEO setting
* @sk_sndtimeo: %SO_SNDTIMEO setting * @sk_sndtimeo: %SO_SNDTIMEO setting
* @sk_rxhash: flow hash received from netif layer * @sk_rxhash: flow hash received from netif layer
* @sk_txhash: computed flow hash for use on transmit
* @sk_filter: socket filtering instructions * @sk_filter: socket filtering instructions
* @sk_protinfo: private area, net family specific, when not using slab * @sk_protinfo: private area, net family specific, when not using slab
* @sk_timer: sock cleanup timer * @sk_timer: sock cleanup timer
...@@ -347,6 +348,7 @@ struct sock { ...@@ -347,6 +348,7 @@ struct sock {
#ifdef CONFIG_RPS #ifdef CONFIG_RPS
__u32 sk_rxhash; __u32 sk_rxhash;
#endif #endif
__u32 sk_txhash;
#ifdef CONFIG_NET_RX_BUSY_POLL #ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sk_napi_id; unsigned int sk_napi_id;
unsigned int sk_ll_usec; unsigned int sk_ll_usec;
...@@ -1980,6 +1982,14 @@ static inline void sock_poll_wait(struct file *filp, ...@@ -1980,6 +1982,14 @@ static inline void sock_poll_wait(struct file *filp,
} }
} }
static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
{
if (sk->sk_txhash) {
skb->l4_hash = 1;
skb->hash = sk->sk_txhash;
}
}
/* /*
* Queue a received datagram if it will fit. Stream and sequenced * Queue a received datagram if it will fit. Stream and sequenced
* protocols can't normally use this as they need to fit buffers in * protocols can't normally use this as they need to fit buffers in
...@@ -1994,6 +2004,7 @@ static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) ...@@ -1994,6 +2004,7 @@ static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
skb_orphan(skb); skb_orphan(skb);
skb->sk = sk; skb->sk = sk;
skb->destructor = sock_wfree; skb->destructor = sock_wfree;
skb_set_hash_from_sk(skb, sk);
/* /*
* We used to take a refcount on sk, but following operation * We used to take a refcount on sk, but following operation
* is enough to guarantee sk_free() wont free this sock until * is enough to guarantee sk_free() wont free this sock until
......
...@@ -176,6 +176,35 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, ...@@ -176,6 +176,35 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
int (*)(const struct sock *, const struct sock *), int (*)(const struct sock *, const struct sock *),
unsigned int hash2_nulladdr); unsigned int hash2_nulladdr);
static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb,
int min, int max, bool use_eth)
{
u32 hash;
if (min >= max) {
/* Use default range */
inet_get_local_port_range(net, &min, &max);
}
hash = skb_get_hash(skb);
if (unlikely(!hash) && use_eth) {
/* Can't find a normal hash, caller has indicated an Ethernet
* packet so use that to compute a hash.
*/
hash = jhash(skb->data, 2 * ETH_ALEN,
(__force u32) skb->protocol);
}
/* Since this is being sent on the wire obfuscate hash a bit
* to minimize possbility that any useful information to an
* attacker is leaked. Only upper 16 bits are relevant in the
* computation for 16 bit port value.
*/
hash ^= hash << 16;
return htons((((u64) hash * (max - min)) >> 32) + min);
}
/* net/ipv4/udp.c */ /* net/ipv4/udp.c */
void udp_v4_early_demux(struct sk_buff *skb); void udp_v4_early_demux(struct sk_buff *skb);
int udp_get_port(struct sock *sk, unsigned short snum, int udp_get_port(struct sock *sk, unsigned short snum,
......
...@@ -45,8 +45,6 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, ...@@ -45,8 +45,6 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
__be16 src_port, __be16 dst_port, __be32 vni, bool xnet); __be16 src_port, __be16 dst_port, __be32 vni, bool xnet);
__be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb);
/* IP header + UDP + VXLAN + Ethernet header */ /* IP header + UDP + VXLAN + Ethernet header */
#define VXLAN_HEADROOM (20 + 8 + 8 + 14) #define VXLAN_HEADROOM (20 + 8 + 8 + 14)
/* IPv6 header + UDP + VXLAN + Ethernet header */ /* IPv6 header + UDP + VXLAN + Ethernet header */
......
...@@ -233,6 +233,7 @@ struct in6_flowlabel_req { ...@@ -233,6 +233,7 @@ struct in6_flowlabel_req {
#if 0 /* not yet */ #if 0 /* not yet */
#define IPV6_USE_MIN_MTU 63 #define IPV6_USE_MIN_MTU 63
#endif #endif
#define IPV6_AUTOFLOWLABEL 64
/* /*
* Netfilter (1) * Netfilter (1)
......
...@@ -80,6 +80,8 @@ bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) ...@@ -80,6 +80,8 @@ bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
case htons(ETH_P_IPV6): { case htons(ETH_P_IPV6): {
const struct ipv6hdr *iph; const struct ipv6hdr *iph;
struct ipv6hdr _iph; struct ipv6hdr _iph;
__be32 flow_label;
ipv6: ipv6:
iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
if (!iph) if (!iph)
...@@ -89,6 +91,21 @@ bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) ...@@ -89,6 +91,21 @@ bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
nhoff += sizeof(struct ipv6hdr); nhoff += sizeof(struct ipv6hdr);
flow_label = ip6_flowlabel(iph);
if (flow_label) {
/* Awesome, IPv6 packet has a flow label so we can
* use that to represent the ports without any
* further dissection.
*/
flow->n_proto = proto;
flow->ip_proto = ip_proto;
flow->ports = flow_label;
flow->thoff = (u16)nhoff;
return true;
}
break; break;
} }
case htons(ETH_P_8021AD): case htons(ETH_P_8021AD):
...@@ -196,12 +213,33 @@ static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c) ...@@ -196,12 +213,33 @@ static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c)
return jhash_3words(a, b, c, hashrnd); return jhash_3words(a, b, c, hashrnd);
} }
static __always_inline u32 __flow_hash_1word(u32 a) static inline u32 __flow_hash_from_keys(struct flow_keys *keys)
{ {
__flow_hash_secret_init(); u32 hash;
return jhash_1word(a, hashrnd);
/* get a consistent hash (same value on both flow directions) */
if (((__force u32)keys->dst < (__force u32)keys->src) ||
(((__force u32)keys->dst == (__force u32)keys->src) &&
((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) {
swap(keys->dst, keys->src);
swap(keys->port16[0], keys->port16[1]);
}
hash = __flow_hash_3words((__force u32)keys->dst,
(__force u32)keys->src,
(__force u32)keys->ports);
if (!hash)
hash = 1;
return hash;
} }
u32 flow_hash_from_keys(struct flow_keys *keys)
{
return __flow_hash_from_keys(keys);
}
EXPORT_SYMBOL(flow_hash_from_keys);
/* /*
* __skb_get_hash: calculate a flow hash based on src/dst addresses * __skb_get_hash: calculate a flow hash based on src/dst addresses
* and src/dst port numbers. Sets hash in skb to non-zero hash value * and src/dst port numbers. Sets hash in skb to non-zero hash value
...@@ -211,7 +249,6 @@ static __always_inline u32 __flow_hash_1word(u32 a) ...@@ -211,7 +249,6 @@ static __always_inline u32 __flow_hash_1word(u32 a)
void __skb_get_hash(struct sk_buff *skb) void __skb_get_hash(struct sk_buff *skb)
{ {
struct flow_keys keys; struct flow_keys keys;
u32 hash;
if (!skb_flow_dissect(skb, &keys)) if (!skb_flow_dissect(skb, &keys))
return; return;
...@@ -219,21 +256,9 @@ void __skb_get_hash(struct sk_buff *skb) ...@@ -219,21 +256,9 @@ void __skb_get_hash(struct sk_buff *skb)
if (keys.ports) if (keys.ports)
skb->l4_hash = 1; skb->l4_hash = 1;
/* get a consistent hash (same value on both flow directions) */ skb->sw_hash = 1;
if (((__force u32)keys.dst < (__force u32)keys.src) ||
(((__force u32)keys.dst == (__force u32)keys.src) &&
((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
swap(keys.dst, keys.src);
swap(keys.port16[0], keys.port16[1]);
}
hash = __flow_hash_3words((__force u32)keys.dst,
(__force u32)keys.src,
(__force u32)keys.ports);
if (!hash)
hash = 1;
skb->hash = hash; skb->hash = __flow_hash_from_keys(&keys);
} }
EXPORT_SYMBOL(__skb_get_hash); EXPORT_SYMBOL(__skb_get_hash);
...@@ -241,7 +266,7 @@ EXPORT_SYMBOL(__skb_get_hash); ...@@ -241,7 +266,7 @@ EXPORT_SYMBOL(__skb_get_hash);
* Returns a Tx hash based on the given packet descriptor a Tx queues' number * Returns a Tx hash based on the given packet descriptor a Tx queues' number
* to be used as a distribution range. * to be used as a distribution range.
*/ */
u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
unsigned int num_tx_queues) unsigned int num_tx_queues)
{ {
u32 hash; u32 hash;
...@@ -261,13 +286,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, ...@@ -261,13 +286,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
qcount = dev->tc_to_txq[tc].count; qcount = dev->tc_to_txq[tc].count;
} }
if (skb->sk && skb->sk->sk_hash) return (u16) (((u64)skb_get_hash(skb) * qcount) >> 32) + qoffset;
hash = skb->sk->sk_hash;
else
hash = (__force u16) skb->protocol;
hash = __flow_hash_1word(hash);
return (u16) (((u64) hash * qcount) >> 32) + qoffset;
} }
EXPORT_SYMBOL(__skb_tx_hash); EXPORT_SYMBOL(__skb_tx_hash);
...@@ -339,17 +358,10 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) ...@@ -339,17 +358,10 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
if (map) { if (map) {
if (map->len == 1) if (map->len == 1)
queue_index = map->queues[0]; queue_index = map->queues[0];
else { else
u32 hash;
if (skb->sk && skb->sk->sk_hash)
hash = skb->sk->sk_hash;
else
hash = (__force u16) skb->protocol ^
skb->hash;
hash = __flow_hash_1word(hash);
queue_index = map->queues[ queue_index = map->queues[
((u64)hash * map->len) >> 32]; ((u64)skb_get_hash(skb) * map->len) >> 32];
}
if (unlikely(queue_index >= dev->real_num_tx_queues)) if (unlikely(queue_index >= dev->real_num_tx_queues))
queue_index = -1; queue_index = -1;
} }
......
...@@ -76,6 +76,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) ...@@ -76,6 +76,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr = fl4->daddr; inet->inet_daddr = fl4->daddr;
inet->inet_dport = usin->sin_port; inet->inet_dport = usin->sin_port;
sk->sk_state = TCP_ESTABLISHED; sk->sk_state = TCP_ESTABLISHED;
inet_set_txhash(sk);
inet->inet_id = jiffies; inet->inet_id = jiffies;
sk_dst_set(sk, &rt->dst); sk_dst_set(sk, &rt->dst);
......
...@@ -208,6 +208,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) ...@@ -208,6 +208,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_dport = usin->sin_port; inet->inet_dport = usin->sin_port;
inet->inet_daddr = daddr; inet->inet_daddr = daddr;
inet_set_txhash(sk);
inet_csk(sk)->icsk_ext_hdr_len = 0; inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet_opt) if (inet_opt)
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
...@@ -1334,6 +1336,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, ...@@ -1334,6 +1336,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->rcv_tos = ip_hdr(skb)->tos; newinet->rcv_tos = ip_hdr(skb)->tos;
inet_csk(newsk)->icsk_ext_hdr_len = 0; inet_csk(newsk)->icsk_ext_hdr_len = 0;
inet_set_txhash(newsk);
if (inet_opt) if (inet_opt)
inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
newinet->inet_id = newtp->write_seq ^ jiffies; newinet->inet_id = newtp->write_seq ^ jiffies;
......
...@@ -916,6 +916,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, ...@@ -916,6 +916,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb_orphan(skb); skb_orphan(skb);
skb->sk = sk; skb->sk = sk;
skb->destructor = tcp_wfree; skb->destructor = tcp_wfree;
skb_set_hash_from_sk(skb, sk);
atomic_add(skb->truesize, &sk->sk_wmem_alloc); atomic_add(skb->truesize, &sk->sk_wmem_alloc);
/* Build TCP header and checksum it. */ /* Build TCP header and checksum it. */
......
...@@ -765,6 +765,7 @@ static int __net_init inet6_net_init(struct net *net) ...@@ -765,6 +765,7 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.bindv6only = 0;
net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.icmpv6_time = 1*HZ;
net->ipv6.sysctl.flowlabel_consistency = 1; net->ipv6.sysctl.flowlabel_consistency = 1;
net->ipv6.sysctl.auto_flowlabels = 0;
atomic_set(&net->ipv6.rt_genid, 0); atomic_set(&net->ipv6.rt_genid, 0);
err = ipv6_init_mibs(net); err = ipv6_init_mibs(net);
......
...@@ -199,6 +199,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) ...@@ -199,6 +199,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
NULL); NULL);
sk->sk_state = TCP_ESTABLISHED; sk->sk_state = TCP_ESTABLISHED;
ip6_set_txhash(sk);
out: out:
fl6_sock_release(flowlabel); fl6_sock_release(flowlabel);
return err; return err;
......
...@@ -723,7 +723,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, ...@@ -723,7 +723,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
* Push down and install the IP header. * Push down and install the IP header.
*/ */
ipv6h = ipv6_hdr(skb); ipv6h = ipv6_hdr(skb);
ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
ipv6h->hop_limit = tunnel->parms.hop_limit; ipv6h->hop_limit = tunnel->parms.hop_limit;
ipv6h->nexthdr = proto; ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr; ipv6h->saddr = fl6->saddr;
...@@ -1174,7 +1175,9 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, ...@@ -1174,7 +1175,9 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
__be16 *p = (__be16 *)(ipv6h+1); __be16 *p = (__be16 *)(ipv6h+1);
ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel); ip6_flow_hdr(ipv6h, 0,
ip6_make_flowlabel(dev_net(dev), skb,
t->fl.u.ip6.flowlabel, false));
ipv6h->hop_limit = t->parms.hop_limit; ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = NEXTHDR_GRE; ipv6h->nexthdr = NEXTHDR_GRE;
ipv6h->saddr = t->parms.laddr; ipv6h->saddr = t->parms.laddr;
......
...@@ -205,7 +205,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, ...@@ -205,7 +205,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
if (hlimit < 0) if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst); hlimit = ip6_dst_hoplimit(dst);
ip6_flow_hdr(hdr, tclass, fl6->flowlabel); ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
np->autoflowlabel));
hdr->payload_len = htons(seg_len); hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto; hdr->nexthdr = proto;
...@@ -1569,7 +1570,9 @@ int ip6_push_pending_frames(struct sock *sk) ...@@ -1569,7 +1570,9 @@ int ip6_push_pending_frames(struct sock *sk)
skb_reset_network_header(skb); skb_reset_network_header(skb);
hdr = ipv6_hdr(skb); hdr = ipv6_hdr(skb);
ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel); ip6_flow_hdr(hdr, np->cork.tclass,
ip6_make_flowlabel(net, skb, fl6->flowlabel,
np->autoflowlabel));
hdr->hop_limit = np->cork.hop_limit; hdr->hop_limit = np->cork.hop_limit;
hdr->nexthdr = proto; hdr->nexthdr = proto;
hdr->saddr = fl6->saddr; hdr->saddr = fl6->saddr;
......
...@@ -1046,7 +1046,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, ...@@ -1046,7 +1046,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
skb_push(skb, sizeof(struct ipv6hdr)); skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb); skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb); ipv6h = ipv6_hdr(skb);
ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
ipv6h->hop_limit = t->parms.hop_limit; ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = proto; ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr; ipv6h->saddr = fl6->saddr;
......
...@@ -834,6 +834,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, ...@@ -834,6 +834,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
np->dontfrag = valbool; np->dontfrag = valbool;
retv = 0; retv = 0;
break; break;
case IPV6_AUTOFLOWLABEL:
np->autoflowlabel = valbool;
retv = 0;
break;
} }
release_sock(sk); release_sock(sk);
...@@ -1273,6 +1277,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, ...@@ -1273,6 +1277,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->dontfrag; val = np->dontfrag;
break; break;
case IPV6_AUTOFLOWLABEL:
val = np->autoflowlabel;
break;
default: default:
return -ENOPROTOOPT; return -ENOPROTOOPT;
} }
......
...@@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = { ...@@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec .proc_handler = proc_dointvec
}, },
{
.procname = "auto_flowlabels",
.data = &init_net.ipv6.sysctl.auto_flowlabels,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{ {
.procname = "fwmark_reflect", .procname = "fwmark_reflect",
.data = &init_net.ipv6.sysctl.fwmark_reflect, .data = &init_net.ipv6.sysctl.fwmark_reflect,
...@@ -74,6 +81,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ...@@ -74,6 +81,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; ipv6_table[0].data = &net->ipv6.sysctl.bindv6only;
ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply; ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply;
ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency; ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency;
ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels;
ipv6_route_table = ipv6_route_sysctl_init(net); ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table) if (!ipv6_route_table)
......
...@@ -198,6 +198,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ...@@ -198,6 +198,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_v6_daddr = usin->sin6_addr; sk->sk_v6_daddr = usin->sin6_addr;
np->flow_label = fl6.flowlabel; np->flow_label = fl6.flowlabel;
ip6_set_txhash(sk);
/* /*
* TCP over IPv4 * TCP over IPv4
*/ */
...@@ -1132,6 +1134,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, ...@@ -1132,6 +1134,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
newsk->sk_bound_dev_if = ireq->ir_iif; newsk->sk_bound_dev_if = ireq->ir_iif;
ip6_set_txhash(newsk);
/* Now IPv6 options... /* Now IPv6 options...
First: no IPv4 options. First: no IPv4 options.
......
...@@ -143,8 +143,6 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) ...@@ -143,8 +143,6 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
struct rtable *rt; struct rtable *rt;
struct flowi4 fl; struct flowi4 fl;
__be16 src_port; __be16 src_port;
int port_min;
int port_max;
__be16 df; __be16 df;
int err; int err;
...@@ -172,8 +170,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) ...@@ -172,8 +170,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
skb->ignore_df = 1; skb->ignore_df = 1;
inet_get_local_port_range(net, &port_min, &port_max); src_port = udp_flow_src_port(net, skb, 0, 0, true);
src_port = vxlan_src_port(port_min, port_max, skb);
err = vxlan_xmit_skb(vxlan_port->vs, rt, skb, err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst, fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment