Commit dd58c635 authored by David S. Miller's avatar David S. Miller

Merge branch 'cls_flower'

Jiri Pirko says:

====================
introduce programable flow dissector and cls_flower

Per Davem's request, I prepared this patchset which introduces programmable
flow dissector. For current users of flow_keys, there is a wrapper
skb_flow_dissect_flow_keys which maintains the previous behaviour.
For purposes of cls_flower, couple of new dissection keys were introduced.

Note that this dissector can be also eventually used by openvswitch code.

Also, as a next step, I plan to get rid of *skb_flow_get_ports(export)
and *__skb_get_poff as their functionality can be now implemented by
skb_flow_dissect as well.

v2->v3:
- remove TCA_FLOWER_POLICE attr suggested by Jamal

v1->v2:
- move __skb_tx_hash rather to dev.c as suggested by Alex
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 212da1fa 77b9900e
...@@ -76,7 +76,7 @@ ...@@ -76,7 +76,7 @@
#include <net/netns/generic.h> #include <net/netns/generic.h>
#include <net/pkt_sched.h> #include <net/pkt_sched.h>
#include <linux/rculist.h> #include <linux/rculist.h>
#include <net/flow_keys.h> #include <net/flow_dissector.h>
#include <net/switchdev.h> #include <net/switchdev.h>
#include <net/bonding.h> #include <net/bonding.h>
#include <net/bond_3ad.h> #include <net/bond_3ad.h>
...@@ -3051,16 +3051,16 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, ...@@ -3051,16 +3051,16 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
int noff, proto = -1; int noff, proto = -1;
if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
return skb_flow_dissect(skb, fk); return skb_flow_dissect_flow_keys(skb, fk);
fk->ports = 0; fk->ports.ports = 0;
noff = skb_network_offset(skb); noff = skb_network_offset(skb);
if (skb->protocol == htons(ETH_P_IP)) { if (skb->protocol == htons(ETH_P_IP)) {
if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph)))) if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph))))
return false; return false;
iph = ip_hdr(skb); iph = ip_hdr(skb);
fk->src = iph->saddr; fk->addrs.src = iph->saddr;
fk->dst = iph->daddr; fk->addrs.dst = iph->daddr;
noff += iph->ihl << 2; noff += iph->ihl << 2;
if (!ip_is_fragment(iph)) if (!ip_is_fragment(iph))
proto = iph->protocol; proto = iph->protocol;
...@@ -3068,15 +3068,15 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, ...@@ -3068,15 +3068,15 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6)))) if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6))))
return false; return false;
iph6 = ipv6_hdr(skb); iph6 = ipv6_hdr(skb);
fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr); fk->addrs.src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr); fk->addrs.dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
noff += sizeof(*iph6); noff += sizeof(*iph6);
proto = iph6->nexthdr; proto = iph6->nexthdr;
} else { } else {
return false; return false;
} }
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0)
fk->ports = skb_flow_get_ports(skb, noff, proto); fk->ports.ports = skb_flow_get_ports(skb, noff, proto);
return true; return true;
} }
...@@ -3102,8 +3102,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) ...@@ -3102,8 +3102,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
hash = bond_eth_hash(skb); hash = bond_eth_hash(skb);
else else
hash = (__force u32)flow.ports; hash = (__force u32)flow.ports.ports;
hash ^= (__force u32)flow.dst ^ (__force u32)flow.src; hash ^= (__force u32)flow.addrs.dst ^ (__force u32)flow.addrs.src;
hash ^= (hash >> 16); hash ^= (hash >> 16);
hash ^= (hash >> 8); hash ^= (hash >> 8);
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <linux/in.h> #include <linux/in.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <net/flow_keys.h> #include <net/flow_dissector.h>
#include "enic_res.h" #include "enic_res.h"
#include "enic_clsf.h" #include "enic_clsf.h"
...@@ -22,7 +22,7 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq) ...@@ -22,7 +22,7 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq)
int res; int res;
struct filter data; struct filter data;
switch (keys->ip_proto) { switch (keys->basic.ip_proto) {
case IPPROTO_TCP: case IPPROTO_TCP:
data.u.ipv4.protocol = PROTO_TCP; data.u.ipv4.protocol = PROTO_TCP;
break; break;
...@@ -33,10 +33,10 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq) ...@@ -33,10 +33,10 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq)
return -EPROTONOSUPPORT; return -EPROTONOSUPPORT;
}; };
data.type = FILTER_IPV4_5TUPLE; data.type = FILTER_IPV4_5TUPLE;
data.u.ipv4.src_addr = ntohl(keys->src); data.u.ipv4.src_addr = ntohl(keys->addrs.src);
data.u.ipv4.dst_addr = ntohl(keys->dst); data.u.ipv4.dst_addr = ntohl(keys->addrs.dst);
data.u.ipv4.src_port = ntohs(keys->port16[0]); data.u.ipv4.src_port = ntohs(keys->ports.src);
data.u.ipv4.dst_port = ntohs(keys->port16[1]); data.u.ipv4.dst_port = ntohs(keys->ports.dst);
data.u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE; data.u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE;
spin_lock_bh(&enic->devcmd_lock); spin_lock_bh(&enic->devcmd_lock);
...@@ -158,11 +158,11 @@ static struct enic_rfs_fltr_node *htbl_key_search(struct hlist_head *h, ...@@ -158,11 +158,11 @@ static struct enic_rfs_fltr_node *htbl_key_search(struct hlist_head *h,
struct enic_rfs_fltr_node *tpos; struct enic_rfs_fltr_node *tpos;
hlist_for_each_entry(tpos, h, node) hlist_for_each_entry(tpos, h, node)
if (tpos->keys.src == k->src && if (tpos->keys.addrs.src == k->addrs.src &&
tpos->keys.dst == k->dst && tpos->keys.addrs.dst == k->addrs.dst &&
tpos->keys.ports == k->ports && tpos->keys.ports.ports == k->ports.ports &&
tpos->keys.ip_proto == k->ip_proto && tpos->keys.basic.ip_proto == k->basic.ip_proto &&
tpos->keys.n_proto == k->n_proto) tpos->keys.basic.n_proto == k->basic.n_proto)
return tpos; return tpos;
return NULL; return NULL;
} }
...@@ -177,9 +177,10 @@ int enic_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, ...@@ -177,9 +177,10 @@ int enic_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
int res, i; int res, i;
enic = netdev_priv(dev); enic = netdev_priv(dev);
res = skb_flow_dissect(skb, &keys); res = skb_flow_dissect_flow_keys(skb, &keys);
if (!res || keys.n_proto != htons(ETH_P_IP) || if (!res || keys.basic.n_proto != htons(ETH_P_IP) ||
(keys.ip_proto != IPPROTO_TCP && keys.ip_proto != IPPROTO_UDP)) (keys.basic.ip_proto != IPPROTO_TCP &&
keys.basic.ip_proto != IPPROTO_UDP))
return -EPROTONOSUPPORT; return -EPROTONOSUPPORT;
tbl_idx = skb_get_hash_raw(skb) & ENIC_RFS_FLW_MASK; tbl_idx = skb_get_hash_raw(skb) & ENIC_RFS_FLW_MASK;
......
...@@ -334,7 +334,7 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd) ...@@ -334,7 +334,7 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd)
n = htbl_fltr_search(enic, (u16)fsp->location); n = htbl_fltr_search(enic, (u16)fsp->location);
if (!n) if (!n)
return -EINVAL; return -EINVAL;
switch (n->keys.ip_proto) { switch (n->keys.basic.ip_proto) {
case IPPROTO_TCP: case IPPROTO_TCP:
fsp->flow_type = TCP_V4_FLOW; fsp->flow_type = TCP_V4_FLOW;
break; break;
...@@ -346,16 +346,16 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd) ...@@ -346,16 +346,16 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd)
break; break;
} }
fsp->h_u.tcp_ip4_spec.ip4src = n->keys.src; fsp->h_u.tcp_ip4_spec.ip4src = n->keys.addrs.src;
fsp->m_u.tcp_ip4_spec.ip4src = (__u32)~0; fsp->m_u.tcp_ip4_spec.ip4src = (__u32)~0;
fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.dst; fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.addrs.dst;
fsp->m_u.tcp_ip4_spec.ip4dst = (__u32)~0; fsp->m_u.tcp_ip4_spec.ip4dst = (__u32)~0;
fsp->h_u.tcp_ip4_spec.psrc = n->keys.port16[0]; fsp->h_u.tcp_ip4_spec.psrc = n->keys.ports.src;
fsp->m_u.tcp_ip4_spec.psrc = (__u16)~0; fsp->m_u.tcp_ip4_spec.psrc = (__u16)~0;
fsp->h_u.tcp_ip4_spec.pdst = n->keys.port16[1]; fsp->h_u.tcp_ip4_spec.pdst = n->keys.ports.dst;
fsp->m_u.tcp_ip4_spec.pdst = (__u16)~0; fsp->m_u.tcp_ip4_spec.pdst = (__u16)~0;
fsp->ring_cookie = n->rq_id; fsp->ring_cookie = n->rq_id;
......
...@@ -196,12 +196,12 @@ static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb) ...@@ -196,12 +196,12 @@ static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb)
struct flow_keys flow; struct flow_keys flow;
int data_len; int data_len;
if (!skb_flow_dissect(skb, &flow) || if (!skb_flow_dissect_flow_keys(skb, &flow) ||
!(flow.n_proto == htons(ETH_P_IP) || !(flow.basic.n_proto == htons(ETH_P_IP) ||
flow.n_proto == htons(ETH_P_IPV6))) flow.basic.n_proto == htons(ETH_P_IPV6)))
return false; return false;
if (flow.ip_proto == IPPROTO_TCP) if (flow.basic.ip_proto == IPPROTO_TCP)
data_len = 12; data_len = 12;
else else
data_len = 8; data_len = 8;
......
...@@ -2832,6 +2832,9 @@ static inline int netif_set_xps_queue(struct net_device *dev, ...@@ -2832,6 +2832,9 @@ static inline int netif_set_xps_queue(struct net_device *dev,
} }
#endif #endif
u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
unsigned int num_tx_queues);
/* /*
* Returns a Tx hash for the given packet when dev->real_num_tx_queues is used * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used
* as a distribution range limit for the returned value. * as a distribution range limit for the returned value.
......
...@@ -34,7 +34,7 @@ ...@@ -34,7 +34,7 @@
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <linux/netdev_features.h> #include <linux/netdev_features.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <net/flow_keys.h> #include <net/flow_dissector.h>
/* A. Checksumming of received packets by device. /* A. Checksumming of received packets by device.
* *
...@@ -918,7 +918,6 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type) ...@@ -918,7 +918,6 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type)
skb->hash = hash; skb->hash = hash;
} }
void __skb_get_hash(struct sk_buff *skb);
static inline __u32 skb_get_hash(struct sk_buff *skb) static inline __u32 skb_get_hash(struct sk_buff *skb)
{ {
if (!skb->l4_hash && !skb->sw_hash) if (!skb->l4_hash && !skb->sw_hash)
...@@ -1936,8 +1935,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb, ...@@ -1936,8 +1935,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb,
if (skb_transport_header_was_set(skb)) if (skb_transport_header_was_set(skb))
return; return;
else if (skb_flow_dissect(skb, &keys)) else if (skb_flow_dissect_flow_keys(skb, &keys))
skb_set_transport_header(skb, keys.thoff); skb_set_transport_header(skb, keys.basic.thoff);
else else
skb_set_transport_header(skb, offset_hint); skb_set_transport_header(skb, offset_hint);
} }
...@@ -3300,9 +3299,6 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) ...@@ -3300,9 +3299,6 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
return skb->queue_mapping != 0; return skb->queue_mapping != 0;
} }
u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
unsigned int num_tx_queues);
static inline struct sec_path *skb_sec_path(struct sk_buff *skb) static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
{ {
#ifdef CONFIG_XFRM #ifdef CONFIG_XFRM
...@@ -3424,10 +3420,6 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, ...@@ -3424,10 +3420,6 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
unsigned int transport_len, unsigned int transport_len,
__sum16(*skb_chkf)(struct sk_buff *skb)); __sum16(*skb_chkf)(struct sk_buff *skb));
u32 skb_get_poff(const struct sk_buff *skb);
u32 __skb_get_poff(const struct sk_buff *skb, void *data,
const struct flow_keys *keys, int hlen);
/** /**
* skb_head_is_locked - Determine if the skb->head is locked down * skb_head_is_locked - Determine if the skb->head is locked down
* @skb: skb to check * @skb: skb to check
......
#ifndef _NET_FLOW_DISSECTOR_H
#define _NET_FLOW_DISSECTOR_H
#include <linux/types.h>
#include <linux/skbuff.h>
#include <linux/in6.h>
#include <uapi/linux/if_ether.h>
/**
* struct flow_dissector_key_basic:
* @thoff: Transport header offset
* @n_proto: Network header protocol (eg. IPv4/IPv6)
* @ip_proto: Transport header protocol (eg. TCP/UDP)
*/
struct flow_dissector_key_basic {
u16 thoff;
__be16 n_proto;
u8 ip_proto;
};
/**
* struct flow_dissector_key_addrs:
* @src: source ip address in case of IPv4
* For IPv6 it contains 32bit hash of src address
* @dst: destination ip address in case of IPv4
* For IPv6 it contains 32bit hash of dst address
*/
struct flow_dissector_key_addrs {
/* (src,dst) must be grouped, in the same way than in IP header */
__be32 src;
__be32 dst;
};
/**
* flow_dissector_key_tp_ports:
* @ports: port numbers of Transport header
* src: source port number
* dst: destination port number
*/
struct flow_dissector_key_ports {
union {
__be32 ports;
struct {
__be16 src;
__be16 dst;
};
};
};
/**
* struct flow_dissector_key_ipv6_addrs:
* @src: source ip address
* @dst: destination ip address
*/
struct flow_dissector_key_ipv6_addrs {
/* (src,dst) must be grouped, in the same way than in IP header */
struct in6_addr src;
struct in6_addr dst;
};
/**
* struct flow_dissector_key_eth_addrs:
* @src: source Ethernet address
* @dst: destination Ethernet address
*/
struct flow_dissector_key_eth_addrs {
/* (dst,src) must be grouped, in the same way than in ETH header */
unsigned char dst[ETH_ALEN];
unsigned char src[ETH_ALEN];
};
enum flow_dissector_key_id {
FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */
FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */
FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */
FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */
FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */
FLOW_DISSECTOR_KEY_MAX,
};
struct flow_dissector_key {
enum flow_dissector_key_id key_id;
size_t offset; /* offset of struct flow_dissector_key_*
in target the struct */
};
struct flow_dissector {
unsigned int used_keys; /* each bit repesents presence of one key id */
unsigned short int offset[FLOW_DISSECTOR_KEY_MAX];
};
void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
const struct flow_dissector_key *key,
unsigned int key_count);
bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container,
void *data, __be16 proto, int nhoff, int hlen);
static inline bool skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container)
{
return __skb_flow_dissect(skb, flow_dissector, target_container,
NULL, 0, 0, 0);
}
struct flow_keys {
struct flow_dissector_key_addrs addrs;
struct flow_dissector_key_ports ports;
struct flow_dissector_key_basic basic;
};
extern struct flow_dissector flow_keys_dissector;
extern struct flow_dissector flow_keys_buf_dissector;
static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
struct flow_keys *flow)
{
memset(flow, 0, sizeof(*flow));
return __skb_flow_dissect(skb, &flow_keys_dissector, flow,
NULL, 0, 0, 0);
}
static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow,
void *data, __be16 proto,
int nhoff, int hlen)
{
memset(flow, 0, sizeof(*flow));
return __skb_flow_dissect(NULL, &flow_keys_buf_dissector, flow,
data, proto, nhoff, hlen);
}
__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
void *data, int hlen_proto);
static inline __be32 skb_flow_get_ports(const struct sk_buff *skb,
int thoff, u8 ip_proto)
{
return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0);
}
u32 flow_hash_from_keys(struct flow_keys *keys);
void __skb_get_hash(struct sk_buff *skb);
u32 skb_get_poff(const struct sk_buff *skb);
u32 __skb_get_poff(const struct sk_buff *skb, void *data,
const struct flow_keys *keys, int hlen);
/* struct flow_keys_digest:
*
* This structure is used to hold a digest of the full flow keys. This is a
* larger "hash" of a flow to allow definitively matching specific flows where
* the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so
* that it can by used in CB of skb (see sch_choke for an example).
*/
#define FLOW_KEYS_DIGEST_LEN 16
struct flow_keys_digest {
u8 data[FLOW_KEYS_DIGEST_LEN];
};
void make_flow_keys_digest(struct flow_keys_digest *digest,
const struct flow_keys *flow);
#endif
#ifndef _NET_FLOW_KEYS_H
#define _NET_FLOW_KEYS_H
/* struct flow_keys:
* @src: source ip address in case of IPv4
* For IPv6 it contains 32bit hash of src address
* @dst: destination ip address in case of IPv4
* For IPv6 it contains 32bit hash of dst address
* @ports: port numbers of Transport header
* port16[0]: src port number
* port16[1]: dst port number
* @thoff: Transport header offset
* @n_proto: Network header protocol (eg. IPv4/IPv6)
* @ip_proto: Transport header protocol (eg. TCP/UDP)
* All the members, except thoff, are in network byte order.
*/
struct flow_keys {
/* (src,dst) must be grouped, in the same way than in IP header */
__be32 src;
__be32 dst;
union {
__be32 ports;
__be16 port16[2];
};
u16 thoff;
__be16 n_proto;
u8 ip_proto;
};
bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
void *data, __be16 proto, int nhoff, int hlen);
static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
{
return __skb_flow_dissect(skb, flow, NULL, 0, 0, 0);
}
__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
void *data, int hlen_proto);
static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto)
{
return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0);
}
u32 flow_hash_from_keys(struct flow_keys *keys);
unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len,
__be16 protocol);
/* struct flow_keys_digest:
*
* This structure is used to hold a digest of the full flow keys. This is a
* larger "hash" of a flow to allow definitively matching specific flows where
* the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so
* that it can by used in CB of skb (see sch_choke for an example).
*/
#define FLOW_KEYS_DIGEST_LEN 16
struct flow_keys_digest {
u8 data[FLOW_KEYS_DIGEST_LEN];
};
void make_flow_keys_digest(struct flow_keys_digest *digest,
const struct flow_keys *flow);
#endif
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include <net/route.h> #include <net/route.h>
#include <net/snmp.h> #include <net/snmp.h>
#include <net/flow.h> #include <net/flow.h>
#include <net/flow_keys.h> #include <net/flow_dissector.h>
struct sock; struct sock;
...@@ -360,10 +360,10 @@ static inline void inet_set_txhash(struct sock *sk) ...@@ -360,10 +360,10 @@ static inline void inet_set_txhash(struct sock *sk)
struct inet_sock *inet = inet_sk(sk); struct inet_sock *inet = inet_sk(sk);
struct flow_keys keys; struct flow_keys keys;
keys.src = inet->inet_saddr; keys.addrs.src = inet->inet_saddr;
keys.dst = inet->inet_daddr; keys.addrs.dst = inet->inet_daddr;
keys.port16[0] = inet->inet_sport; keys.ports.src = inet->inet_sport;
keys.port16[1] = inet->inet_dport; keys.ports.dst = inet->inet_dport;
sk->sk_txhash = flow_hash_from_keys(&keys); sk->sk_txhash = flow_hash_from_keys(&keys);
} }
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include <net/if_inet6.h> #include <net/if_inet6.h>
#include <net/ndisc.h> #include <net/ndisc.h>
#include <net/flow.h> #include <net/flow.h>
#include <net/flow_keys.h> #include <net/flow_dissector.h>
#include <net/snmp.h> #include <net/snmp.h>
#define SIN6_LEN_RFC2133 24 #define SIN6_LEN_RFC2133 24
...@@ -698,10 +698,10 @@ static inline void ip6_set_txhash(struct sock *sk) ...@@ -698,10 +698,10 @@ static inline void ip6_set_txhash(struct sock *sk)
struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk);
struct flow_keys keys; struct flow_keys keys;
keys.src = (__force __be32)ipv6_addr_hash(&np->saddr); keys.addrs.src = (__force __be32)ipv6_addr_hash(&np->saddr);
keys.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); keys.addrs.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr);
keys.port16[0] = inet->inet_sport; keys.ports.src = inet->inet_sport;
keys.port16[1] = inet->inet_dport; keys.ports.dst = inet->inet_dport;
sk->sk_txhash = flow_hash_from_keys(&keys); sk->sk_txhash = flow_hash_from_keys(&keys);
} }
......
...@@ -409,6 +409,36 @@ enum { ...@@ -409,6 +409,36 @@ enum {
#define TCA_BPF_MAX (__TCA_BPF_MAX - 1) #define TCA_BPF_MAX (__TCA_BPF_MAX - 1)
/* Flower classifier */
enum {
TCA_FLOWER_UNSPEC,
TCA_FLOWER_CLASSID,
TCA_FLOWER_INDEV,
TCA_FLOWER_ACT,
TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */
TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */
TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */
TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */
TCA_FLOWER_KEY_ETH_TYPE, /* be16 */
TCA_FLOWER_KEY_IP_PROTO, /* u8 */
TCA_FLOWER_KEY_IPV4_SRC, /* be32 */
TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */
TCA_FLOWER_KEY_IPV4_DST, /* be32 */
TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */
TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */
TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */
TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */
TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */
TCA_FLOWER_KEY_TCP_SRC, /* be16 */
TCA_FLOWER_KEY_TCP_DST, /* be16 */
TCA_FLOWER_KEY_UDP_SRC, /* be16 */
TCA_FLOWER_KEY_UDP_DST, /* be16 */
__TCA_FLOWER_MAX,
};
#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
/* Extended Matches */ /* Extended Matches */
struct tcf_ematch_tree_hdr { struct tcf_ematch_tree_hdr {
......
...@@ -2350,6 +2350,34 @@ void netif_device_attach(struct net_device *dev) ...@@ -2350,6 +2350,34 @@ void netif_device_attach(struct net_device *dev)
} }
EXPORT_SYMBOL(netif_device_attach); EXPORT_SYMBOL(netif_device_attach);
/*
* Returns a Tx hash based on the given packet descriptor a Tx queues' number
* to be used as a distribution range.
*/
u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
unsigned int num_tx_queues)
{
u32 hash;
u16 qoffset = 0;
u16 qcount = num_tx_queues;
if (skb_rx_queue_recorded(skb)) {
hash = skb_get_rx_queue(skb);
while (unlikely(hash >= num_tx_queues))
hash -= num_tx_queues;
return hash;
}
if (dev->num_tc) {
u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
qoffset = dev->tc_to_txq[tc].offset;
qcount = dev->tc_to_txq[tc].count;
}
return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
}
EXPORT_SYMBOL(__skb_tx_hash);
static void skb_warn_bad_offload(const struct sk_buff *skb) static void skb_warn_bad_offload(const struct sk_buff *skb)
{ {
static const netdev_features_t null_features = 0; static const netdev_features_t null_features = 0;
...@@ -2908,6 +2936,84 @@ int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb) ...@@ -2908,6 +2936,84 @@ int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
} }
EXPORT_SYMBOL(dev_loopback_xmit); EXPORT_SYMBOL(dev_loopback_xmit);
static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_XPS
struct xps_dev_maps *dev_maps;
struct xps_map *map;
int queue_index = -1;
rcu_read_lock();
dev_maps = rcu_dereference(dev->xps_maps);
if (dev_maps) {
map = rcu_dereference(
dev_maps->cpu_map[skb->sender_cpu - 1]);
if (map) {
if (map->len == 1)
queue_index = map->queues[0];
else
queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
map->len)];
if (unlikely(queue_index >= dev->real_num_tx_queues))
queue_index = -1;
}
}
rcu_read_unlock();
return queue_index;
#else
return -1;
#endif
}
static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
{
struct sock *sk = skb->sk;
int queue_index = sk_tx_queue_get(sk);
if (queue_index < 0 || skb->ooo_okay ||
queue_index >= dev->real_num_tx_queues) {
int new_index = get_xps_queue(dev, skb);
if (new_index < 0)
new_index = skb_tx_hash(dev, skb);
if (queue_index != new_index && sk &&
rcu_access_pointer(sk->sk_dst_cache))
sk_tx_queue_set(sk, new_index);
queue_index = new_index;
}
return queue_index;
}
struct netdev_queue *netdev_pick_tx(struct net_device *dev,
struct sk_buff *skb,
void *accel_priv)
{
int queue_index = 0;
#ifdef CONFIG_XPS
if (skb->sender_cpu == 0)
skb->sender_cpu = raw_smp_processor_id() + 1;
#endif
if (dev->real_num_tx_queues != 1) {
const struct net_device_ops *ops = dev->netdev_ops;
if (ops->ndo_select_queue)
queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
__netdev_pick_tx);
else
queue_index = __netdev_pick_tx(dev, skb);
if (!accel_priv)
queue_index = netdev_cap_txqueue(dev, queue_index);
}
skb_set_queue_mapping(skb, queue_index);
return netdev_get_tx_queue(dev, queue_index);
}
/** /**
* __dev_queue_xmit - transmit a buffer * __dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit * @skb: buffer to transmit
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <net/netlink.h> #include <net/netlink.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <net/sock.h> #include <net/sock.h>
#include <net/flow_dissector.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/timer.h> #include <linux/timer.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
......
#include <linux/kernel.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/export.h> #include <linux/export.h>
#include <linux/ip.h> #include <linux/ip.h>
...@@ -12,19 +13,57 @@ ...@@ -12,19 +13,57 @@
#include <linux/if_tunnel.h> #include <linux/if_tunnel.h>
#include <linux/if_pppox.h> #include <linux/if_pppox.h>
#include <linux/ppp_defs.h> #include <linux/ppp_defs.h>
#include <net/flow_keys.h> #include <linux/stddef.h>
#include <linux/if_ether.h>
#include <net/flow_dissector.h>
#include <scsi/fc/fc_fcoe.h> #include <scsi/fc/fc_fcoe.h>
/* copy saddr & daddr, possibly using 64bit load/store static bool skb_flow_dissector_uses_key(struct flow_dissector *flow_dissector,
* Equivalent to : flow->src = iph->saddr; enum flow_dissector_key_id key_id)
* flow->dst = iph->daddr; {
*/ return flow_dissector->used_keys & (1 << key_id);
static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph) }
static void skb_flow_dissector_set_key(struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id)
{
flow_dissector->used_keys |= (1 << key_id);
}
static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id,
void *target_container)
{
return ((char *) target_container) + flow_dissector->offset[key_id];
}
void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
const struct flow_dissector_key *key,
unsigned int key_count)
{ {
BUILD_BUG_ON(offsetof(typeof(*flow), dst) != unsigned int i;
offsetof(typeof(*flow), src) + sizeof(flow->src));
memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); memset(flow_dissector, 0, sizeof(*flow_dissector));
for (i = 0; i < key_count; i++, key++) {
/* User should make sure that every key target offset is withing
* boundaries of unsigned short.
*/
BUG_ON(key->offset > USHRT_MAX);
BUG_ON(skb_flow_dissector_uses_key(flow_dissector,
key->key_id));
skb_flow_dissector_set_key(flow_dissector, key->key_id);
flow_dissector->offset[key->key_id] = key->offset;
}
/* Ensure that the dissector always includes basic key. That way
* we are able to avoid handling lack of it in fast path.
*/
BUG_ON(!skb_flow_dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_BASIC));
} }
EXPORT_SYMBOL(skb_flow_dissector_init);
/** /**
* __skb_flow_get_ports - extract the upper layer ports and return them * __skb_flow_get_ports - extract the upper layer ports and return them
...@@ -63,17 +102,27 @@ EXPORT_SYMBOL(__skb_flow_get_ports); ...@@ -63,17 +102,27 @@ EXPORT_SYMBOL(__skb_flow_get_ports);
/** /**
* __skb_flow_dissect - extract the flow_keys struct and return it * __skb_flow_dissect - extract the flow_keys struct and return it
* @skb: sk_buff to extract the flow from, can be NULL if the rest are specified * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
* @flow_dissector: list of keys to dissect
* @target_container: target structure to put dissected values into
* @data: raw buffer pointer to the packet, if NULL use skb->data * @data: raw buffer pointer to the packet, if NULL use skb->data
* @proto: protocol for which to get the flow, if @data is NULL use skb->protocol * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
* @nhoff: network header offset, if @data is NULL use skb_network_offset(skb) * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
* @hlen: packet header length, if @data is NULL use skb_headlen(skb) * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
* *
* The function will try to retrieve the struct flow_keys from either the skbuff * The function will try to retrieve individual keys into target specified
* or a raw buffer specified by the rest parameters * by flow_dissector from either the skbuff or a raw buffer specified by the
* rest parameters.
*
* Caller must take care of zeroing target container memory.
*/ */
bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container,
void *data, __be16 proto, int nhoff, int hlen) void *data, __be16 proto, int nhoff, int hlen)
{ {
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
struct flow_dissector_key_ports *key_ports;
u8 ip_proto; u8 ip_proto;
if (!data) { if (!data) {
...@@ -83,7 +132,23 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, ...@@ -83,7 +132,23 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
hlen = skb_headlen(skb); hlen = skb_headlen(skb);
} }
memset(flow, 0, sizeof(*flow)); /* It is ensured by skb_flow_dissector_init() that basic key will
* be always present.
*/
key_basic = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_BASIC,
target_container);
if (skb_flow_dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
struct ethhdr *eth = eth_hdr(skb);
struct flow_dissector_key_eth_addrs *key_eth_addrs;
key_eth_addrs = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_ETH_ADDRS,
target_container);
memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
}
again: again:
switch (proto) { switch (proto) {
...@@ -100,14 +165,13 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, ...@@ -100,14 +165,13 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
if (ip_is_fragment(iph)) if (ip_is_fragment(iph))
ip_proto = 0; ip_proto = 0;
/* skip the address processing if skb is NULL. The assumption if (!skb_flow_dissector_uses_key(flow_dissector,
* here is that if there is no skb we are not looking for flow FLOW_DISSECTOR_KEY_IPV4_ADDRS))
* info but lengths and protocols.
*/
if (!skb)
break; break;
key_addrs = skb_flow_dissector_target(flow_dissector,
iph_to_flow_copy_addrs(flow, iph); FLOW_DISSECTOR_KEY_IPV4_ADDRS,
target_container);
memcpy(key_addrs, &iph->saddr, sizeof(*key_addrs));
break; break;
} }
case htons(ETH_P_IPV6): { case htons(ETH_P_IPV6): {
...@@ -123,23 +187,47 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, ...@@ -123,23 +187,47 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
ip_proto = iph->nexthdr; ip_proto = iph->nexthdr;
nhoff += sizeof(struct ipv6hdr); nhoff += sizeof(struct ipv6hdr);
/* see comment above in IPv4 section */ if (skb_flow_dissector_uses_key(flow_dissector,
if (!skb) FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) {
break; key_addrs = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
target_container);
flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); key_addrs->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); key_addrs->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
goto flow_label;
}
if (skb_flow_dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs;
key_ipv6_addrs = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_IPV6_ADDRS,
target_container);
memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs));
goto flow_label;
}
break;
flow_label:
flow_label = ip6_flowlabel(iph); flow_label = ip6_flowlabel(iph);
if (flow_label) { if (flow_label) {
/* Awesome, IPv6 packet has a flow label so we can /* Awesome, IPv6 packet has a flow label so we can
* use that to represent the ports without any * use that to represent the ports without any
* further dissection. * further dissection.
*/ */
flow->n_proto = proto;
flow->ip_proto = ip_proto; key_basic->n_proto = proto;
flow->ports = flow_label; key_basic->ip_proto = ip_proto;
flow->thoff = (u16)nhoff; key_basic->thoff = (u16)nhoff;
if (!skb_flow_dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_PORTS))
break;
key_ports = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_PORTS,
target_container);
key_ports->ports = flow_label;
return true; return true;
} }
...@@ -186,14 +274,22 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, ...@@ -186,14 +274,22 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
if (!hdr) if (!hdr)
return false; return false;
flow->src = hdr->srcnode; key_basic->n_proto = proto;
flow->dst = 0; key_basic->thoff = (u16)nhoff;
flow->n_proto = proto;
flow->thoff = (u16)nhoff; if (skb_flow_dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) {
return true;
key_addrs = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
target_container);
key_addrs->src = hdr->srcnode;
key_addrs->dst = 0;
}
return true; return true;
} }
case htons(ETH_P_FCOE): case htons(ETH_P_FCOE):
flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN); key_basic->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
/* fall through */ /* fall through */
default: default:
return false; return false;
...@@ -248,14 +344,24 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, ...@@ -248,14 +344,24 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
break; break;
} }
flow->n_proto = proto; /* It is ensured by skb_flow_dissector_init() that basic key will
flow->ip_proto = ip_proto; * be always present.
flow->thoff = (u16) nhoff; */
key_basic = skb_flow_dissector_target(flow_dissector,
/* unless skb is set we don't need to record port info */ FLOW_DISSECTOR_KEY_BASIC,
if (skb) target_container);
flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, key_basic->n_proto = proto;
key_basic->ip_proto = ip_proto;
key_basic->thoff = (u16) nhoff;
if (skb_flow_dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_PORTS)) {
key_ports = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_PORTS,
target_container);
key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
data, hlen); data, hlen);
}
return true; return true;
} }
...@@ -277,16 +383,16 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) ...@@ -277,16 +383,16 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
u32 hash; u32 hash;
/* get a consistent hash (same value on both flow directions) */ /* get a consistent hash (same value on both flow directions) */
if (((__force u32)keys->dst < (__force u32)keys->src) || if (((__force u32)keys->addrs.dst < (__force u32)keys->addrs.src) ||
(((__force u32)keys->dst == (__force u32)keys->src) && (((__force u32)keys->addrs.dst == (__force u32)keys->addrs.src) &&
((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) { ((__force u16)keys->ports.dst < (__force u16)keys->ports.src))) {
swap(keys->dst, keys->src); swap(keys->addrs.dst, keys->addrs.src);
swap(keys->port16[0], keys->port16[1]); swap(keys->ports.src, keys->ports.dst);
} }
hash = __flow_hash_3words((__force u32)keys->dst, hash = __flow_hash_3words((__force u32)keys->addrs.dst,
(__force u32)keys->src, (__force u32)keys->addrs.src,
(__force u32)keys->ports, (__force u32)keys->ports.ports,
keyval); keyval);
if (!hash) if (!hash)
hash = 1; hash = 1;
...@@ -304,7 +410,7 @@ EXPORT_SYMBOL(flow_hash_from_keys); ...@@ -304,7 +410,7 @@ EXPORT_SYMBOL(flow_hash_from_keys);
static inline u32 ___skb_get_hash(const struct sk_buff *skb, static inline u32 ___skb_get_hash(const struct sk_buff *skb,
struct flow_keys *keys, u32 keyval) struct flow_keys *keys, u32 keyval)
{ {
if (!skb_flow_dissect(skb, keys)) if (!skb_flow_dissect_flow_keys(skb, keys))
return 0; return 0;
return __flow_hash_from_keys(keys, keyval); return __flow_hash_from_keys(keys, keyval);
...@@ -329,16 +435,19 @@ void make_flow_keys_digest(struct flow_keys_digest *digest, ...@@ -329,16 +435,19 @@ void make_flow_keys_digest(struct flow_keys_digest *digest,
memset(digest, 0, sizeof(*digest)); memset(digest, 0, sizeof(*digest));
data->n_proto = flow->n_proto; data->n_proto = flow->basic.n_proto;
data->ip_proto = flow->ip_proto; data->ip_proto = flow->basic.ip_proto;
data->ports = flow->ports; data->ports = flow->ports.ports;
data->src = flow->src; data->src = flow->addrs.src;
data->dst = flow->dst; data->dst = flow->addrs.dst;
} }
EXPORT_SYMBOL(make_flow_keys_digest); EXPORT_SYMBOL(make_flow_keys_digest);
/* /**
* __skb_get_hash: calculate a flow hash based on src/dst addresses * __skb_get_hash: calculate a flow hash
* @skb: sk_buff to calculate flow hash from
*
* This function calculates a flow hash based on src/dst addresses
* and src/dst port numbers. Sets hash in skb to non-zero hash value * and src/dst port numbers. Sets hash in skb to non-zero hash value
* on success, zero indicates no valid hash. Also, sets l4_hash in skb * on success, zero indicates no valid hash. Also, sets l4_hash in skb
* if hash is a canonical 4-tuple hash over transport ports. * if hash is a canonical 4-tuple hash over transport ports.
...@@ -353,12 +462,9 @@ void __skb_get_hash(struct sk_buff *skb) ...@@ -353,12 +462,9 @@ void __skb_get_hash(struct sk_buff *skb)
hash = ___skb_get_hash(skb, &keys, hashrnd); hash = ___skb_get_hash(skb, &keys, hashrnd);
if (!hash) if (!hash)
return; return;
if (keys.ports.ports)
if (keys.ports)
skb->l4_hash = 1; skb->l4_hash = 1;
skb->sw_hash = 1; skb->sw_hash = 1;
skb->hash = hash; skb->hash = hash;
} }
EXPORT_SYMBOL(__skb_get_hash); EXPORT_SYMBOL(__skb_get_hash);
...@@ -371,40 +477,12 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) ...@@ -371,40 +477,12 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
} }
EXPORT_SYMBOL(skb_get_hash_perturb); EXPORT_SYMBOL(skb_get_hash_perturb);
/*
* Returns a Tx hash based on the given packet descriptor a Tx queues' number
* to be used as a distribution range.
*/
u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
unsigned int num_tx_queues)
{
u32 hash;
u16 qoffset = 0;
u16 qcount = num_tx_queues;
if (skb_rx_queue_recorded(skb)) {
hash = skb_get_rx_queue(skb);
while (unlikely(hash >= num_tx_queues))
hash -= num_tx_queues;
return hash;
}
if (dev->num_tc) {
u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
qoffset = dev->tc_to_txq[tc].offset;
qcount = dev->tc_to_txq[tc].count;
}
return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
}
EXPORT_SYMBOL(__skb_tx_hash);
u32 __skb_get_poff(const struct sk_buff *skb, void *data, u32 __skb_get_poff(const struct sk_buff *skb, void *data,
const struct flow_keys *keys, int hlen) const struct flow_keys *keys, int hlen)
{ {
u32 poff = keys->thoff; u32 poff = keys->basic.thoff;
switch (keys->ip_proto) { switch (keys->basic.ip_proto) {
case IPPROTO_TCP: { case IPPROTO_TCP: {
/* access doff as u8 to avoid unaligned access */ /* access doff as u8 to avoid unaligned access */
const u8 *doff; const u8 *doff;
...@@ -445,7 +523,11 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data, ...@@ -445,7 +523,11 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
return poff; return poff;
} }
/* skb_get_poff() returns the offset to the payload as far as it could /**
* skb_get_poff - get the offset to the payload
* @skb: sk_buff to get the payload offset from
*
* The function will get the offset to the payload as far as it could
* be dissected. The main user is currently BPF, so that we can dynamically * be dissected. The main user is currently BPF, so that we can dynamically
* truncate packets without needing to push actual payload to the user * truncate packets without needing to push actual payload to the user
* space and can analyze headers only, instead. * space and can analyze headers only, instead.
...@@ -454,86 +536,52 @@ u32 skb_get_poff(const struct sk_buff *skb) ...@@ -454,86 +536,52 @@ u32 skb_get_poff(const struct sk_buff *skb)
{ {
struct flow_keys keys; struct flow_keys keys;
if (!skb_flow_dissect(skb, &keys)) if (!skb_flow_dissect_flow_keys(skb, &keys))
return 0; return 0;
return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
} }
static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) static const struct flow_dissector_key flow_keys_dissector_keys[] = {
{ {
#ifdef CONFIG_XPS .key_id = FLOW_DISSECTOR_KEY_BASIC,
struct xps_dev_maps *dev_maps; .offset = offsetof(struct flow_keys, basic),
struct xps_map *map; },
int queue_index = -1; {
.key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
rcu_read_lock(); .offset = offsetof(struct flow_keys, addrs),
dev_maps = rcu_dereference(dev->xps_maps); },
if (dev_maps) { {
map = rcu_dereference( .key_id = FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
dev_maps->cpu_map[skb->sender_cpu - 1]); .offset = offsetof(struct flow_keys, addrs),
if (map) { },
if (map->len == 1) {
queue_index = map->queues[0]; .key_id = FLOW_DISSECTOR_KEY_PORTS,
else .offset = offsetof(struct flow_keys, ports),
queue_index = map->queues[reciprocal_scale(skb_get_hash(skb), },
map->len)]; };
if (unlikely(queue_index >= dev->real_num_tx_queues))
queue_index = -1;
}
}
rcu_read_unlock();
return queue_index;
#else
return -1;
#endif
}
static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
{
struct sock *sk = skb->sk;
int queue_index = sk_tx_queue_get(sk);
if (queue_index < 0 || skb->ooo_okay ||
queue_index >= dev->real_num_tx_queues) {
int new_index = get_xps_queue(dev, skb);
if (new_index < 0)
new_index = skb_tx_hash(dev, skb);
if (queue_index != new_index && sk && static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
rcu_access_pointer(sk->sk_dst_cache)) {
sk_tx_queue_set(sk, new_index); .key_id = FLOW_DISSECTOR_KEY_BASIC,
.offset = offsetof(struct flow_keys, basic),
},
};
queue_index = new_index; struct flow_dissector flow_keys_dissector __read_mostly;
} EXPORT_SYMBOL(flow_keys_dissector);
return queue_index; struct flow_dissector flow_keys_buf_dissector __read_mostly;
}
struct netdev_queue *netdev_pick_tx(struct net_device *dev, static int __init init_default_flow_dissectors(void)
struct sk_buff *skb,
void *accel_priv)
{ {
int queue_index = 0; skb_flow_dissector_init(&flow_keys_dissector,
flow_keys_dissector_keys,
#ifdef CONFIG_XPS ARRAY_SIZE(flow_keys_dissector_keys));
if (skb->sender_cpu == 0) skb_flow_dissector_init(&flow_keys_buf_dissector,
skb->sender_cpu = raw_smp_processor_id() + 1; flow_keys_buf_dissector_keys,
#endif ARRAY_SIZE(flow_keys_buf_dissector_keys));
return 0;
if (dev->real_num_tx_queues != 1) {
const struct net_device_ops *ops = dev->netdev_ops;
if (ops->ndo_select_queue)
queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
__netdev_pick_tx);
else
queue_index = __netdev_pick_tx(dev, skb);
if (!accel_priv)
queue_index = netdev_cap_txqueue(dev, queue_index);
}
skb_set_queue_mapping(skb, queue_index);
return netdev_get_tx_queue(dev, queue_index);
} }
late_initcall_sync(init_default_flow_dissectors);
...@@ -58,6 +58,7 @@ ...@@ -58,6 +58,7 @@
#include <net/ipv6.h> #include <net/ipv6.h>
#include <net/ip.h> #include <net/ip.h>
#include <net/dsa.h> #include <net/dsa.h>
#include <net/flow_dissector.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
__setup("ether=", netdev_boot_setup); __setup("ether=", netdev_boot_setup);
...@@ -130,9 +131,9 @@ u32 eth_get_headlen(void *data, unsigned int len) ...@@ -130,9 +131,9 @@ u32 eth_get_headlen(void *data, unsigned int len)
return len; return len;
/* parse any remaining L2/L3 headers, check for L4 */ /* parse any remaining L2/L3 headers, check for L4 */
if (!__skb_flow_dissect(NULL, &keys, data, if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto,
eth->h_proto, sizeof(*eth), len)) sizeof(*eth), len))
return max_t(u32, keys.thoff, sizeof(*eth)); return max_t(u32, keys.basic.thoff, sizeof(*eth));
/* parse for any L4 headers */ /* parse for any L4 headers */
return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len); return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len);
......
...@@ -477,6 +477,16 @@ config NET_CLS_BPF ...@@ -477,6 +477,16 @@ config NET_CLS_BPF
To compile this code as a module, choose M here: the module will To compile this code as a module, choose M here: the module will
be called cls_bpf. be called cls_bpf.
config NET_CLS_FLOWER
tristate "Flower classifier"
select NET_CLS
---help---
If you say Y here, you will be able to classify packets based on
a configurable combination of packet keys and masks.
To compile this code as a module, choose M here: the module will
be called cls_flower.
config NET_EMATCH config NET_EMATCH
bool "Extended Matches" bool "Extended Matches"
select NET_CLS select NET_CLS
......
...@@ -56,6 +56,7 @@ obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o ...@@ -56,6 +56,7 @@ obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
obj-$(CONFIG_NET_CLS_BPF) += cls_bpf.o obj-$(CONFIG_NET_CLS_BPF) += cls_bpf.o
obj-$(CONFIG_NET_CLS_FLOWER) += cls_flower.o
obj-$(CONFIG_NET_EMATCH) += ematch.o obj-$(CONFIG_NET_EMATCH) += ematch.o
obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o
obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o
......
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
#include <net/pkt_cls.h> #include <net/pkt_cls.h>
#include <net/ip.h> #include <net/ip.h>
#include <net/route.h> #include <net/route.h>
#include <net/flow_keys.h> #include <net/flow_dissector.h>
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack.h>
...@@ -68,35 +68,35 @@ static inline u32 addr_fold(void *addr) ...@@ -68,35 +68,35 @@ static inline u32 addr_fold(void *addr)
static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow) static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
{ {
if (flow->src) if (flow->addrs.src)
return ntohl(flow->src); return ntohl(flow->addrs.src);
return addr_fold(skb->sk); return addr_fold(skb->sk);
} }
static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{ {
if (flow->dst) if (flow->addrs.dst)
return ntohl(flow->dst); return ntohl(flow->addrs.dst);
return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
} }
static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
{ {
return flow->ip_proto; return flow->basic.ip_proto;
} }
static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
{ {
if (flow->ports) if (flow->ports.ports)
return ntohs(flow->port16[0]); return ntohs(flow->ports.src);
return addr_fold(skb->sk); return addr_fold(skb->sk);
} }
static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{ {
if (flow->ports) if (flow->ports.ports)
return ntohs(flow->port16[1]); return ntohs(flow->ports.dst);
return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
} }
...@@ -295,7 +295,7 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, ...@@ -295,7 +295,7 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
keymask = f->keymask; keymask = f->keymask;
if (keymask & FLOW_KEYS_NEEDED) if (keymask & FLOW_KEYS_NEEDED)
skb_flow_dissect(skb, &flow_keys); skb_flow_dissect_flow_keys(skb, &flow_keys);
for (n = 0; n < f->nkeys; n++) { for (n = 0; n < f->nkeys; n++) {
key = ffs(keymask) - 1; key = ffs(keymask) - 1;
......
/*
* net/sched/cls_flower.c Flower classifier
*
* Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/rhashtable.h>
#include <linux/if_ether.h>
#include <linux/in6.h>
#include <linux/ip.h>
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
#include <net/ip.h>
#include <net/flow_dissector.h>
struct fl_flow_key {
int indev_ifindex;
struct flow_dissector_key_basic basic;
struct flow_dissector_key_eth_addrs eth;
union {
struct flow_dissector_key_addrs ipv4;
struct flow_dissector_key_ipv6_addrs ipv6;
};
struct flow_dissector_key_ports tp;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
struct fl_flow_mask_range {
unsigned short int start;
unsigned short int end;
};
struct fl_flow_mask {
struct fl_flow_key key;
struct fl_flow_mask_range range;
struct rcu_head rcu;
};
struct cls_fl_head {
struct rhashtable ht;
struct fl_flow_mask mask;
struct flow_dissector dissector;
u32 hgen;
bool mask_assigned;
struct list_head filters;
struct rhashtable_params ht_params;
struct rcu_head rcu;
};
struct cls_fl_filter {
struct rhash_head ht_node;
struct fl_flow_key mkey;
struct tcf_exts exts;
struct tcf_result res;
struct fl_flow_key key;
struct list_head list;
u32 handle;
struct rcu_head rcu;
};
static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
{
return mask->range.end - mask->range.start;
}
static void fl_mask_update_range(struct fl_flow_mask *mask)
{
const u8 *bytes = (const u8 *) &mask->key;
size_t size = sizeof(mask->key);
size_t i, first = 0, last = size - 1;
for (i = 0; i < sizeof(mask->key); i++) {
if (bytes[i]) {
if (!first && i)
first = i;
last = i;
}
}
mask->range.start = rounddown(first, sizeof(long));
mask->range.end = roundup(last + 1, sizeof(long));
}
static void *fl_key_get_start(struct fl_flow_key *key,
const struct fl_flow_mask *mask)
{
return (u8 *) key + mask->range.start;
}
static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
struct fl_flow_mask *mask)
{
const long *lkey = fl_key_get_start(key, mask);
const long *lmask = fl_key_get_start(&mask->key, mask);
long *lmkey = fl_key_get_start(mkey, mask);
int i;
for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
*lmkey++ = *lkey++ & *lmask++;
}
static void fl_clear_masked_range(struct fl_flow_key *key,
struct fl_flow_mask *mask)
{
memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
}
static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct cls_fl_head *head = rcu_dereference_bh(tp->root);
struct cls_fl_filter *f;
struct fl_flow_key skb_key;
struct fl_flow_key skb_mkey;
fl_clear_masked_range(&skb_key, &head->mask);
skb_key.indev_ifindex = skb->skb_iif;
/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
* so do it rather here.
*/
skb_key.basic.n_proto = skb->protocol;
skb_flow_dissect(skb, &head->dissector, &skb_key);
fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
f = rhashtable_lookup_fast(&head->ht,
fl_key_get_start(&skb_mkey, &head->mask),
head->ht_params);
if (f) {
*res = f->res;
return tcf_exts_exec(skb, &f->exts, res);
}
return -1;
}
static int fl_init(struct tcf_proto *tp)
{
struct cls_fl_head *head;
head = kzalloc(sizeof(*head), GFP_KERNEL);
if (!head)
return -ENOBUFS;
INIT_LIST_HEAD_RCU(&head->filters);
rcu_assign_pointer(tp->root, head);
return 0;
}
static void fl_destroy_filter(struct rcu_head *head)
{
struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
tcf_exts_destroy(&f->exts);
kfree(f);
}
static bool fl_destroy(struct tcf_proto *tp, bool force)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f, *next;
if (!force && !list_empty(&head->filters))
return false;
list_for_each_entry_safe(f, next, &head->filters, list) {
list_del_rcu(&f->list);
call_rcu(&f->rcu, fl_destroy_filter);
}
RCU_INIT_POINTER(tp->root, NULL);
if (head->mask_assigned)
rhashtable_destroy(&head->ht);
kfree_rcu(head, rcu);
return true;
}
static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f;
list_for_each_entry(f, &head->filters, list)
if (f->handle == handle)
return (unsigned long) f;
return 0;
}
static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_UNSPEC] = { .type = NLA_UNSPEC },
[TCA_FLOWER_CLASSID] = { .type = NLA_U32 },
[TCA_FLOWER_INDEV] = { .type = NLA_STRING,
.len = IFNAMSIZ },
[TCA_FLOWER_KEY_ETH_DST] = { .len = ETH_ALEN },
[TCA_FLOWER_KEY_ETH_DST_MASK] = { .len = ETH_ALEN },
[TCA_FLOWER_KEY_ETH_SRC] = { .len = ETH_ALEN },
[TCA_FLOWER_KEY_ETH_SRC_MASK] = { .len = ETH_ALEN },
[TCA_FLOWER_KEY_ETH_TYPE] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_IP_PROTO] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_IPV4_SRC] = { .type = NLA_U32 },
[TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NLA_U32 },
[TCA_FLOWER_KEY_IPV4_DST] = { .type = NLA_U32 },
[TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NLA_U32 },
[TCA_FLOWER_KEY_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
[TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
[TCA_FLOWER_KEY_IPV6_DST] = { .len = sizeof(struct in6_addr) },
[TCA_FLOWER_KEY_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
[TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 },
};
static void fl_set_key_val(struct nlattr **tb,
void *val, int val_type,
void *mask, int mask_type, int len)
{
if (!tb[val_type])
return;
memcpy(val, nla_data(tb[val_type]), len);
if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
memset(mask, 0xff, len);
else
memcpy(mask, nla_data(tb[mask_type]), len);
}
static int fl_set_key(struct net *net, struct nlattr **tb,
struct fl_flow_key *key, struct fl_flow_key *mask)
{
int err;
if (tb[TCA_FLOWER_INDEV]) {
err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
if (err < 0)
return err;
key->indev_ifindex = err;
mask->indev_ifindex = 0xffffffff;
}
fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
sizeof(key->eth.dst));
fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
sizeof(key->eth.src));
fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
&mask->basic.n_proto, TCA_FLOWER_UNSPEC,
sizeof(key->basic.n_proto));
if (key->basic.n_proto == htons(ETH_P_IP) ||
key->basic.n_proto == htons(ETH_P_IPV6)) {
fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
&mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
sizeof(key->basic.ip_proto));
}
if (key->basic.n_proto == htons(ETH_P_IP)) {
fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
&mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
sizeof(key->ipv4.src));
fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
&mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
sizeof(key->ipv4.dst));
} else if (key->basic.n_proto == htons(ETH_P_IPV6)) {
fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
&mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
sizeof(key->ipv6.src));
fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
&mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
sizeof(key->ipv6.dst));
}
if (key->basic.ip_proto == IPPROTO_TCP) {
fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
&mask->tp.src, TCA_FLOWER_UNSPEC,
sizeof(key->tp.src));
fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
&mask->tp.dst, TCA_FLOWER_UNSPEC,
sizeof(key->tp.dst));
} else if (key->basic.ip_proto == IPPROTO_UDP) {
fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
&mask->tp.src, TCA_FLOWER_UNSPEC,
sizeof(key->tp.src));
fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
&mask->tp.dst, TCA_FLOWER_UNSPEC,
sizeof(key->tp.dst));
}
return 0;
}
static bool fl_mask_eq(struct fl_flow_mask *mask1,
struct fl_flow_mask *mask2)
{
const long *lmask1 = fl_key_get_start(&mask1->key, mask1);
const long *lmask2 = fl_key_get_start(&mask2->key, mask2);
return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) &&
!memcmp(lmask1, lmask2, fl_mask_range(mask1));
}
static const struct rhashtable_params fl_ht_params = {
.key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
.head_offset = offsetof(struct cls_fl_filter, ht_node),
.automatic_shrinking = true,
};
static int fl_init_hashtable(struct cls_fl_head *head,
struct fl_flow_mask *mask)
{
head->ht_params = fl_ht_params;
head->ht_params.key_len = fl_mask_range(mask);
head->ht_params.key_offset += mask->range.start;
return rhashtable_init(&head->ht, &head->ht_params);
}
#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
#define FL_KEY_MEMBER_END_OFFSET(member) \
(FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member))
#define FL_KEY_IN_RANGE(mask, member) \
(FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end && \
FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start)
#define FL_KEY_SET(keys, cnt, id, member) \
do { \
keys[cnt].key_id = id; \
keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member); \
cnt++; \
} while(0);
#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member) \
do { \
if (FL_KEY_IN_RANGE(mask, member)) \
FL_KEY_SET(keys, cnt, id, member); \
} while(0);
static void fl_init_dissector(struct cls_fl_head *head,
struct fl_flow_mask *mask)
{
struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
size_t cnt = 0;
FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
FLOW_DISSECTOR_KEY_PORTS, tp);
skb_flow_dissector_init(&head->dissector, keys, cnt);
}
static int fl_check_assign_mask(struct cls_fl_head *head,
struct fl_flow_mask *mask)
{
int err;
if (head->mask_assigned) {
if (!fl_mask_eq(&head->mask, mask))
return -EINVAL;
else
return 0;
}
/* Mask is not assigned yet. So assign it and init hashtable
* according to that.
*/
err = fl_init_hashtable(head, mask);
if (err)
return err;
memcpy(&head->mask, mask, sizeof(head->mask));
head->mask_assigned = true;
fl_init_dissector(head, mask);
return 0;
}
static int fl_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_fl_filter *f, struct fl_flow_mask *mask,
unsigned long base, struct nlattr **tb,
struct nlattr *est, bool ovr)
{
struct tcf_exts e;
int err;
tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
if (err < 0)
return err;
if (tb[TCA_FLOWER_CLASSID]) {
f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
tcf_bind_filter(tp, &f->res, base);
}
err = fl_set_key(net, tb, &f->key, &mask->key);
if (err)
goto errout;
fl_mask_update_range(mask);
fl_set_masked_key(&f->mkey, &f->key, mask);
tcf_exts_change(tp, &f->exts, &e);
return 0;
errout:
tcf_exts_destroy(&e);
return err;
}
static u32 fl_grab_new_handle(struct tcf_proto *tp,
struct cls_fl_head *head)
{
unsigned int i = 0x80000000;
u32 handle;
do {
if (++head->hgen == 0x7FFFFFFF)
head->hgen = 1;
} while (--i > 0 && fl_get(tp, head->hgen));
if (unlikely(i == 0)) {
pr_err("Insufficient number of handles\n");
handle = 0;
} else {
handle = head->hgen;
}
return handle;
}
static int fl_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
unsigned long *arg, bool ovr)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
struct cls_fl_filter *fnew;
struct nlattr *tb[TCA_FLOWER_MAX + 1];
struct fl_flow_mask mask = {};
int err;
if (!tca[TCA_OPTIONS])
return -EINVAL;
err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
if (err < 0)
return err;
if (fold && handle && fold->handle != handle)
return -EINVAL;
fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
if (!fnew)
return -ENOBUFS;
tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
if (!handle) {
handle = fl_grab_new_handle(tp, head);
if (!handle) {
err = -EINVAL;
goto errout;
}
}
fnew->handle = handle;
err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
if (err)
goto errout;
err = fl_check_assign_mask(head, &mask);
if (err)
goto errout;
err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
head->ht_params);
if (err)
goto errout;
if (fold)
rhashtable_remove_fast(&head->ht, &fold->ht_node,
head->ht_params);
*arg = (unsigned long) fnew;
if (fold) {
list_replace_rcu(&fnew->list, &fold->list);
tcf_unbind_filter(tp, &fold->res);
call_rcu(&fold->rcu, fl_destroy_filter);
} else {
list_add_tail_rcu(&fnew->list, &head->filters);
}
return 0;
errout:
kfree(fnew);
return err;
}
static int fl_delete(struct tcf_proto *tp, unsigned long arg)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
rhashtable_remove_fast(&head->ht, &f->ht_node,
head->ht_params);
list_del_rcu(&f->list);
tcf_unbind_filter(tp, &f->res);
call_rcu(&f->rcu, fl_destroy_filter);
return 0;
}
static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f;
list_for_each_entry_rcu(f, &head->filters, list) {
if (arg->count < arg->skip)
goto skip;
if (arg->fn(tp, (unsigned long) f, arg) < 0) {
arg->stop = 1;
break;
}
skip:
arg->count++;
}
}
static int fl_dump_key_val(struct sk_buff *skb,
void *val, int val_type,
void *mask, int mask_type, int len)
{
int err;
if (!memchr_inv(mask, 0, len))
return 0;
err = nla_put(skb, val_type, len, val);
if (err)
return err;
if (mask_type != TCA_FLOWER_UNSPEC) {
err = nla_put(skb, mask_type, len, mask);
if (err)
return err;
}
return 0;
}
static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
struct nlattr *nest;
struct fl_flow_key *key, *mask;
if (!f)
return skb->len;
t->tcm_handle = f->handle;
nest = nla_nest_start(skb, TCA_OPTIONS);
if (!nest)
goto nla_put_failure;
if (f->res.classid &&
nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
goto nla_put_failure;
key = &f->key;
mask = &head->mask.key;
if (mask->indev_ifindex) {
struct net_device *dev;
dev = __dev_get_by_index(net, key->indev_ifindex);
if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
goto nla_put_failure;
}
if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
sizeof(key->eth.dst)) ||
fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
sizeof(key->eth.src)) ||
fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
&mask->basic.n_proto, TCA_FLOWER_UNSPEC,
sizeof(key->basic.n_proto)))
goto nla_put_failure;
if ((key->basic.n_proto == htons(ETH_P_IP) ||
key->basic.n_proto == htons(ETH_P_IPV6)) &&
fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
&mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
sizeof(key->basic.ip_proto)))
goto nla_put_failure;
if (key->basic.n_proto == htons(ETH_P_IP) &&
(fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
&mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
sizeof(key->ipv4.src)) ||
fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
&mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
sizeof(key->ipv4.dst))))
goto nla_put_failure;
else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
(fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
&mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
sizeof(key->ipv6.src)) ||
fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
&mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
sizeof(key->ipv6.dst))))
goto nla_put_failure;
if (key->basic.ip_proto == IPPROTO_TCP &&
(fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
&mask->tp.src, TCA_FLOWER_UNSPEC,
sizeof(key->tp.src)) ||
fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
&mask->tp.dst, TCA_FLOWER_UNSPEC,
sizeof(key->tp.dst))))
goto nla_put_failure;
else if (key->basic.ip_proto == IPPROTO_UDP &&
(fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
&mask->tp.src, TCA_FLOWER_UNSPEC,
sizeof(key->tp.src)) ||
fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
&mask->tp.dst, TCA_FLOWER_UNSPEC,
sizeof(key->tp.dst))))
goto nla_put_failure;
if (tcf_exts_dump(skb, &f->exts))
goto nla_put_failure;
nla_nest_end(skb, nest);
if (tcf_exts_dump_stats(skb, &f->exts) < 0)
goto nla_put_failure;
return skb->len;
nla_put_failure:
nla_nest_cancel(skb, nest);
return -1;
}
static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.kind = "flower",
.classify = fl_classify,
.init = fl_init,
.destroy = fl_destroy,
.get = fl_get,
.change = fl_change,
.delete = fl_delete,
.walk = fl_walk,
.dump = fl_dump,
.owner = THIS_MODULE,
};
static int __init cls_fl_init(void)
{
return register_tcf_proto_ops(&cls_fl_ops);
}
static void __exit cls_fl_exit(void)
{
unregister_tcf_proto_ops(&cls_fl_ops);
}
module_init(cls_fl_init);
module_exit(cls_fl_exit);
MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
MODULE_DESCRIPTION("Flower classifier");
MODULE_LICENSE("GPL v2");
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#include <net/pkt_sched.h> #include <net/pkt_sched.h>
#include <net/inet_ecn.h> #include <net/inet_ecn.h>
#include <net/red.h> #include <net/red.h>
#include <net/flow_keys.h> #include <net/flow_dissector.h>
/* /*
CHOKe stateless AQM for fair bandwidth allocation CHOKe stateless AQM for fair bandwidth allocation
...@@ -170,13 +170,13 @@ static bool choke_match_flow(struct sk_buff *skb1, ...@@ -170,13 +170,13 @@ static bool choke_match_flow(struct sk_buff *skb1,
if (!choke_skb_cb(skb1)->keys_valid) { if (!choke_skb_cb(skb1)->keys_valid) {
choke_skb_cb(skb1)->keys_valid = 1; choke_skb_cb(skb1)->keys_valid = 1;
skb_flow_dissect(skb1, &temp); skb_flow_dissect_flow_keys(skb1, &temp);
make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp); make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
} }
if (!choke_skb_cb(skb2)->keys_valid) { if (!choke_skb_cb(skb2)->keys_valid) {
choke_skb_cb(skb2)->keys_valid = 1; choke_skb_cb(skb2)->keys_valid = 1;
skb_flow_dissect(skb2, &temp); skb_flow_dissect_flow_keys(skb2, &temp);
make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp); make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment