Commit e54740e6 authored by David S. Miller's avatar David S. Miller

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch

Jesse Gross says:

====================
A set of OVS changes for net-next/3.16.

The major change here is a switch from per-CPU to per-NUMA flow
statistics. This improves scalability by reducing kernel overhead
in flow setup and maintenance.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ad2ebb3d 944df8ae
...@@ -134,8 +134,8 @@ static int set_eth_addr(struct sk_buff *skb, ...@@ -134,8 +134,8 @@ static int set_eth_addr(struct sk_buff *skb,
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN); ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src);
memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN); ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst);
ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
......
...@@ -524,7 +524,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) ...@@ -524,7 +524,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
packet->protocol = htons(ETH_P_802_2); packet->protocol = htons(ETH_P_802_2);
/* Build an sw_flow for sending this packet. */ /* Build an sw_flow for sending this packet. */
flow = ovs_flow_alloc(false); flow = ovs_flow_alloc();
err = PTR_ERR(flow); err = PTR_ERR(flow);
if (IS_ERR(flow)) if (IS_ERR(flow))
goto err_kfree_skb; goto err_kfree_skb;
...@@ -782,7 +782,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) ...@@ -782,7 +782,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp; struct datapath *dp;
struct sw_flow_actions *acts = NULL; struct sw_flow_actions *acts = NULL;
struct sw_flow_match match; struct sw_flow_match match;
bool exact_5tuple;
int error; int error;
/* Extract key. */ /* Extract key. */
...@@ -791,7 +790,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) ...@@ -791,7 +790,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto error; goto error;
ovs_match_init(&match, &key, &mask); ovs_match_init(&match, &key, &mask);
error = ovs_nla_get_match(&match, &exact_5tuple, error = ovs_nla_get_match(&match,
a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
if (error) if (error)
goto error; goto error;
...@@ -830,7 +829,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) ...@@ -830,7 +829,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto err_unlock_ovs; goto err_unlock_ovs;
/* Allocate flow. */ /* Allocate flow. */
flow = ovs_flow_alloc(!exact_5tuple); flow = ovs_flow_alloc();
if (IS_ERR(flow)) { if (IS_ERR(flow)) {
error = PTR_ERR(flow); error = PTR_ERR(flow);
goto err_unlock_ovs; goto err_unlock_ovs;
...@@ -914,7 +913,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) ...@@ -914,7 +913,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
} }
ovs_match_init(&match, &key, NULL); ovs_match_init(&match, &key, NULL);
err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
if (err) if (err)
return err; return err;
...@@ -968,7 +967,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ...@@ -968,7 +967,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
} }
ovs_match_init(&match, &key, NULL); ovs_match_init(&match, &key, NULL);
err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
if (err) if (err)
goto unlock; goto unlock;
......
...@@ -194,7 +194,9 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, ...@@ -194,7 +194,9 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb); int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
void ovs_dp_notify_wq(struct work_struct *work); void ovs_dp_notify_wq(struct work_struct *work);
#define OVS_NLERR(fmt, ...) \ #define OVS_NLERR(fmt, ...) \
pr_info_once("netlink: " fmt, ##__VA_ARGS__) do { \
if (net_ratelimit()) \
pr_info("netlink: " fmt, ##__VA_ARGS__); \
} while (0)
#endif /* datapath.h */ #endif /* datapath.h */
...@@ -65,87 +65,112 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) ...@@ -65,87 +65,112 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
{ {
struct flow_stats *stats; struct flow_stats *stats;
__be16 tcp_flags = 0; __be16 tcp_flags = 0;
int node = numa_node_id();
if (!flow->stats.is_percpu) stats = rcu_dereference(flow->stats[node]);
stats = flow->stats.stat;
else if (likely(flow->key.ip.proto == IPPROTO_TCP)) {
stats = this_cpu_ptr(flow->stats.cpu_stats); if (likely(flow->key.eth.type == htons(ETH_P_IP)))
tcp_flags = flow->key.ipv4.tp.flags;
if ((flow->key.eth.type == htons(ETH_P_IP) || else if (likely(flow->key.eth.type == htons(ETH_P_IPV6)))
flow->key.eth.type == htons(ETH_P_IPV6)) && tcp_flags = flow->key.ipv6.tp.flags;
flow->key.ip.frag != OVS_FRAG_TYPE_LATER && }
flow->key.ip.proto == IPPROTO_TCP && /* Check if already have node-specific stats. */
likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { if (likely(stats)) {
tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); spin_lock(&stats->lock);
/* Mark if we write on the pre-allocated stats. */
if (node == 0 && unlikely(flow->stats_last_writer != node))
flow->stats_last_writer = node;
} else {
stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */
spin_lock(&stats->lock);
/* If the current NUMA-node is the only writer on the
* pre-allocated stats keep using them.
*/
if (unlikely(flow->stats_last_writer != node)) {
/* A previous locker may have already allocated the
* stats, so we need to check again. If node-specific
* stats were already allocated, we update the pre-
* allocated stats as we have already locked them.
*/
if (likely(flow->stats_last_writer != NUMA_NO_NODE)
&& likely(!rcu_dereference(flow->stats[node]))) {
/* Try to allocate node-specific stats. */
struct flow_stats *new_stats;
new_stats =
kmem_cache_alloc_node(flow_stats_cache,
GFP_THISNODE |
__GFP_NOMEMALLOC,
node);
if (likely(new_stats)) {
new_stats->used = jiffies;
new_stats->packet_count = 1;
new_stats->byte_count = skb->len;
new_stats->tcp_flags = tcp_flags;
spin_lock_init(&new_stats->lock);
rcu_assign_pointer(flow->stats[node],
new_stats);
goto unlock;
}
}
flow->stats_last_writer = node;
}
} }
spin_lock(&stats->lock);
stats->used = jiffies; stats->used = jiffies;
stats->packet_count++; stats->packet_count++;
stats->byte_count += skb->len; stats->byte_count += skb->len;
stats->tcp_flags |= tcp_flags; stats->tcp_flags |= tcp_flags;
spin_unlock(&stats->lock); unlock:
}
static void stats_read(struct flow_stats *stats,
struct ovs_flow_stats *ovs_stats,
unsigned long *used, __be16 *tcp_flags)
{
spin_lock(&stats->lock);
if (!*used || time_after(stats->used, *used))
*used = stats->used;
*tcp_flags |= stats->tcp_flags;
ovs_stats->n_packets += stats->packet_count;
ovs_stats->n_bytes += stats->byte_count;
spin_unlock(&stats->lock); spin_unlock(&stats->lock);
} }
void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats,
unsigned long *used, __be16 *tcp_flags) unsigned long *used, __be16 *tcp_flags)
{ {
int cpu; int node;
*used = 0; *used = 0;
*tcp_flags = 0; *tcp_flags = 0;
memset(ovs_stats, 0, sizeof(*ovs_stats)); memset(ovs_stats, 0, sizeof(*ovs_stats));
local_bh_disable(); for_each_node(node) {
if (!flow->stats.is_percpu) { struct flow_stats *stats = rcu_dereference(flow->stats[node]);
stats_read(flow->stats.stat, ovs_stats, used, tcp_flags);
} else {
for_each_possible_cpu(cpu) {
struct flow_stats *stats;
stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); if (stats) {
stats_read(stats, ovs_stats, used, tcp_flags); /* Local CPU may write on non-local stats, so we must
* block bottom-halves here.
*/
spin_lock_bh(&stats->lock);
if (!*used || time_after(stats->used, *used))
*used = stats->used;
*tcp_flags |= stats->tcp_flags;
ovs_stats->n_packets += stats->packet_count;
ovs_stats->n_bytes += stats->byte_count;
spin_unlock_bh(&stats->lock);
} }
} }
local_bh_enable();
}
static void stats_reset(struct flow_stats *stats)
{
spin_lock(&stats->lock);
stats->used = 0;
stats->packet_count = 0;
stats->byte_count = 0;
stats->tcp_flags = 0;
spin_unlock(&stats->lock);
} }
void ovs_flow_stats_clear(struct sw_flow *flow) void ovs_flow_stats_clear(struct sw_flow *flow)
{ {
int cpu; int node;
local_bh_disable(); for_each_node(node) {
if (!flow->stats.is_percpu) { struct flow_stats *stats = rcu_dereference(flow->stats[node]);
stats_reset(flow->stats.stat);
} else { if (stats) {
for_each_possible_cpu(cpu) { spin_lock_bh(&stats->lock);
stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu)); stats->used = 0;
stats->packet_count = 0;
stats->byte_count = 0;
stats->tcp_flags = 0;
spin_unlock_bh(&stats->lock);
} }
} }
local_bh_enable();
} }
static int check_header(struct sk_buff *skb, int len) static int check_header(struct sk_buff *skb, int len)
...@@ -372,14 +397,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, ...@@ -372,14 +397,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
&& opt_len == 8) { && opt_len == 8) {
if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll))) if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll)))
goto invalid; goto invalid;
memcpy(key->ipv6.nd.sll, ether_addr_copy(key->ipv6.nd.sll,
&nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); &nd->opt[offset+sizeof(*nd_opt)]);
} else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
&& opt_len == 8) { && opt_len == 8) {
if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll))) if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
goto invalid; goto invalid;
memcpy(key->ipv6.nd.tll, ether_addr_copy(key->ipv6.nd.tll,
&nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); &nd->opt[offset+sizeof(*nd_opt)]);
} }
icmp_len -= opt_len; icmp_len -= opt_len;
...@@ -439,8 +464,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) ...@@ -439,8 +464,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
* header in the linear data area. * header in the linear data area.
*/ */
eth = eth_hdr(skb); eth = eth_hdr(skb);
memcpy(key->eth.src, eth->h_source, ETH_ALEN); ether_addr_copy(key->eth.src, eth->h_source);
memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); ether_addr_copy(key->eth.dst, eth->h_dest);
__skb_pull(skb, 2 * ETH_ALEN); __skb_pull(skb, 2 * ETH_ALEN);
/* We are going to push all headers that we pull, so no need to /* We are going to push all headers that we pull, so no need to
...@@ -538,8 +563,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) ...@@ -538,8 +563,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
key->ip.proto = ntohs(arp->ar_op); key->ip.proto = ntohs(arp->ar_op);
memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src)); memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst)); memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN); ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN); ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
} }
} else if (key->eth.type == htons(ETH_P_IPV6)) { } else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */ int nh_len; /* IPv6 Header + Extensions */
......
...@@ -155,24 +155,22 @@ struct flow_stats { ...@@ -155,24 +155,22 @@ struct flow_stats {
__be16 tcp_flags; /* Union of seen TCP flags. */ __be16 tcp_flags; /* Union of seen TCP flags. */
}; };
struct sw_flow_stats {
bool is_percpu;
union {
struct flow_stats *stat;
struct flow_stats __percpu *cpu_stats;
};
};
struct sw_flow { struct sw_flow {
struct rcu_head rcu; struct rcu_head rcu;
struct hlist_node hash_node[2]; struct hlist_node hash_node[2];
u32 hash; u32 hash;
int stats_last_writer; /* NUMA-node id of the last writer on
* 'stats[0]'.
*/
struct sw_flow_key key; struct sw_flow_key key;
struct sw_flow_key unmasked_key; struct sw_flow_key unmasked_key;
struct sw_flow_mask *mask; struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts; struct sw_flow_actions __rcu *sf_acts;
struct sw_flow_stats stats; struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one
* is allocated at flow creation time,
* the rest are allocated on demand
* while holding the 'stats[0].lock'.
*/
}; };
struct arp_eth_header { struct arp_eth_header {
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
* 02110-1301, USA * 02110-1301, USA
*/ */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include "flow.h" #include "flow.h"
#include "datapath.h" #include "datapath.h"
#include <linux/uaccess.h> #include <linux/uaccess.h>
...@@ -216,14 +218,14 @@ static bool match_validate(const struct sw_flow_match *match, ...@@ -216,14 +218,14 @@ static bool match_validate(const struct sw_flow_match *match,
if ((key_attrs & key_expected) != key_expected) { if ((key_attrs & key_expected) != key_expected) {
/* Key attributes check failed. */ /* Key attributes check failed. */
OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
key_attrs, key_expected); (unsigned long long)key_attrs, (unsigned long long)key_expected);
return false; return false;
} }
if ((mask_attrs & mask_allowed) != mask_attrs) { if ((mask_attrs & mask_allowed) != mask_attrs) {
/* Mask attributes check failed. */ /* Mask attributes check failed. */
OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
mask_attrs, mask_allowed); (unsigned long long)mask_attrs, (unsigned long long)mask_allowed);
return false; return false;
} }
...@@ -266,20 +268,6 @@ static bool is_all_zero(const u8 *fp, size_t size) ...@@ -266,20 +268,6 @@ static bool is_all_zero(const u8 *fp, size_t size)
return true; return true;
} }
static bool is_all_set(const u8 *fp, size_t size)
{
int i;
if (!fp)
return false;
for (i = 0; i < size; i++)
if (fp[i] != 0xff)
return false;
return true;
}
static int __parse_flow_nlattrs(const struct nlattr *attr, static int __parse_flow_nlattrs(const struct nlattr *attr,
const struct nlattr *a[], const struct nlattr *a[],
u64 *attrsp, bool nz) u64 *attrsp, bool nz)
...@@ -501,9 +489,8 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, ...@@ -501,9 +489,8 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
return 0; return 0;
} }
static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple, static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
u64 attrs, const struct nlattr **a, const struct nlattr **a, bool is_mask)
bool is_mask)
{ {
int err; int err;
u64 orig_attrs = attrs; u64 orig_attrs = attrs;
...@@ -560,11 +547,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple ...@@ -560,11 +547,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
} }
if (is_mask && exact_5tuple) {
if (match->mask->key.eth.type != htons(0xffff))
*exact_5tuple = false;
}
if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
const struct ovs_key_ipv4 *ipv4_key; const struct ovs_key_ipv4 *ipv4_key;
...@@ -587,13 +569,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple ...@@ -587,13 +569,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
SW_FLOW_KEY_PUT(match, ipv4.addr.dst, SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
ipv4_key->ipv4_dst, is_mask); ipv4_key->ipv4_dst, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_IPV4); attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
if (is_mask && exact_5tuple && *exact_5tuple) {
if (ipv4_key->ipv4_proto != 0xff ||
ipv4_key->ipv4_src != htonl(0xffffffff) ||
ipv4_key->ipv4_dst != htonl(0xffffffff))
*exact_5tuple = false;
}
} }
if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
...@@ -625,13 +600,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple ...@@ -625,13 +600,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
is_mask); is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_IPV6); attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
if (is_mask && exact_5tuple && *exact_5tuple) {
if (ipv6_key->ipv6_proto != 0xff ||
!is_all_set((u8 *)ipv6_key->ipv6_src, sizeof(match->key->ipv6.addr.src)) ||
!is_all_set((u8 *)ipv6_key->ipv6_dst, sizeof(match->key->ipv6.addr.dst)))
*exact_5tuple = false;
}
} }
if (attrs & (1 << OVS_KEY_ATTR_ARP)) { if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
...@@ -674,11 +642,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple ...@@ -674,11 +642,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
tcp_key->tcp_dst, is_mask); tcp_key->tcp_dst, is_mask);
} }
attrs &= ~(1 << OVS_KEY_ATTR_TCP); attrs &= ~(1 << OVS_KEY_ATTR_TCP);
if (is_mask && exact_5tuple && *exact_5tuple &&
(tcp_key->tcp_src != htons(0xffff) ||
tcp_key->tcp_dst != htons(0xffff)))
*exact_5tuple = false;
} }
if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
...@@ -710,11 +673,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple ...@@ -710,11 +673,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
udp_key->udp_dst, is_mask); udp_key->udp_dst, is_mask);
} }
attrs &= ~(1 << OVS_KEY_ATTR_UDP); attrs &= ~(1 << OVS_KEY_ATTR_UDP);
if (is_mask && exact_5tuple && *exact_5tuple &&
(udp_key->udp_src != htons(0xffff) ||
udp_key->udp_dst != htons(0xffff)))
*exact_5tuple = false;
} }
if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
...@@ -800,7 +758,6 @@ static void sw_flow_mask_set(struct sw_flow_mask *mask, ...@@ -800,7 +758,6 @@ static void sw_flow_mask_set(struct sw_flow_mask *mask,
* attribute specifies the mask field of the wildcarded flow. * attribute specifies the mask field of the wildcarded flow.
*/ */
int ovs_nla_get_match(struct sw_flow_match *match, int ovs_nla_get_match(struct sw_flow_match *match,
bool *exact_5tuple,
const struct nlattr *key, const struct nlattr *key,
const struct nlattr *mask) const struct nlattr *mask)
{ {
...@@ -848,13 +805,10 @@ int ovs_nla_get_match(struct sw_flow_match *match, ...@@ -848,13 +805,10 @@ int ovs_nla_get_match(struct sw_flow_match *match,
} }
} }
err = ovs_key_from_nlattrs(match, NULL, key_attrs, a, false); err = ovs_key_from_nlattrs(match, key_attrs, a, false);
if (err) if (err)
return err; return err;
if (exact_5tuple)
*exact_5tuple = true;
if (mask) { if (mask) {
err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
if (err) if (err)
...@@ -892,7 +846,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, ...@@ -892,7 +846,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
} }
} }
err = ovs_key_from_nlattrs(match, exact_5tuple, mask_attrs, a, true); err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
if (err) if (err)
return err; return err;
} else { } else {
...@@ -982,8 +936,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, ...@@ -982,8 +936,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
goto nla_put_failure; goto nla_put_failure;
eth_key = nla_data(nla); eth_key = nla_data(nla);
memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN); ether_addr_copy(eth_key->eth_src, output->eth.src);
memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN); ether_addr_copy(eth_key->eth_dst, output->eth.dst);
if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
__be16 eth_type; __be16 eth_type;
...@@ -1055,8 +1009,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, ...@@ -1055,8 +1009,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
arp_key->arp_sip = output->ipv4.addr.src; arp_key->arp_sip = output->ipv4.addr.src;
arp_key->arp_tip = output->ipv4.addr.dst; arp_key->arp_tip = output->ipv4.addr.dst;
arp_key->arp_op = htons(output->ip.proto); arp_key->arp_op = htons(output->ip.proto);
memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN); ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN); ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
} }
if ((swkey->eth.type == htons(ETH_P_IP) || if ((swkey->eth.type == htons(ETH_P_IP) ||
...@@ -1105,11 +1059,11 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, ...@@ -1105,11 +1059,11 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
goto nla_put_failure; goto nla_put_failure;
sctp_key = nla_data(nla); sctp_key = nla_data(nla);
if (swkey->eth.type == htons(ETH_P_IP)) { if (swkey->eth.type == htons(ETH_P_IP)) {
sctp_key->sctp_src = swkey->ipv4.tp.src; sctp_key->sctp_src = output->ipv4.tp.src;
sctp_key->sctp_dst = swkey->ipv4.tp.dst; sctp_key->sctp_dst = output->ipv4.tp.dst;
} else if (swkey->eth.type == htons(ETH_P_IPV6)) { } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
sctp_key->sctp_src = swkey->ipv6.tp.src; sctp_key->sctp_src = output->ipv6.tp.src;
sctp_key->sctp_dst = swkey->ipv6.tp.dst; sctp_key->sctp_dst = output->ipv6.tp.dst;
} }
} else if (swkey->eth.type == htons(ETH_P_IP) && } else if (swkey->eth.type == htons(ETH_P_IP) &&
swkey->ip.proto == IPPROTO_ICMP) { swkey->ip.proto == IPPROTO_ICMP) {
...@@ -1143,8 +1097,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, ...@@ -1143,8 +1097,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
nd_key = nla_data(nla); nd_key = nla_data(nla);
memcpy(nd_key->nd_target, &output->ipv6.nd.target, memcpy(nd_key->nd_target, &output->ipv6.nd.target,
sizeof(nd_key->nd_target)); sizeof(nd_key->nd_target));
memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN); ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN); ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
} }
} }
} }
......
...@@ -45,7 +45,6 @@ int ovs_nla_put_flow(const struct sw_flow_key *, ...@@ -45,7 +45,6 @@ int ovs_nla_put_flow(const struct sw_flow_key *,
int ovs_nla_get_flow_metadata(struct sw_flow *flow, int ovs_nla_get_flow_metadata(struct sw_flow *flow,
const struct nlattr *attr); const struct nlattr *attr);
int ovs_nla_get_match(struct sw_flow_match *match, int ovs_nla_get_match(struct sw_flow_match *match,
bool *exact_5tuple,
const struct nlattr *, const struct nlattr *,
const struct nlattr *); const struct nlattr *);
......
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
#define REHASH_INTERVAL (10 * 60 * HZ) #define REHASH_INTERVAL (10 * 60 * HZ)
static struct kmem_cache *flow_cache; static struct kmem_cache *flow_cache;
struct kmem_cache *flow_stats_cache __read_mostly;
static u16 range_n_bytes(const struct sw_flow_key_range *range) static u16 range_n_bytes(const struct sw_flow_key_range *range)
{ {
...@@ -57,8 +58,10 @@ static u16 range_n_bytes(const struct sw_flow_key_range *range) ...@@ -57,8 +58,10 @@ static u16 range_n_bytes(const struct sw_flow_key_range *range)
void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
const struct sw_flow_mask *mask) const struct sw_flow_mask *mask)
{ {
const long *m = (long *)((u8 *)&mask->key + mask->range.start); const long *m = (const long *)((const u8 *)&mask->key +
const long *s = (long *)((u8 *)src + mask->range.start); mask->range.start);
const long *s = (const long *)((const u8 *)src +
mask->range.start);
long *d = (long *)((u8 *)dst + mask->range.start); long *d = (long *)((u8 *)dst + mask->range.start);
int i; int i;
...@@ -70,10 +73,11 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, ...@@ -70,10 +73,11 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
*d++ = *s++ & *m++; *d++ = *s++ & *m++;
} }
struct sw_flow *ovs_flow_alloc(bool percpu_stats) struct sw_flow *ovs_flow_alloc(void)
{ {
struct sw_flow *flow; struct sw_flow *flow;
int cpu; struct flow_stats *stats;
int node;
flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
if (!flow) if (!flow)
...@@ -81,27 +85,22 @@ struct sw_flow *ovs_flow_alloc(bool percpu_stats) ...@@ -81,27 +85,22 @@ struct sw_flow *ovs_flow_alloc(bool percpu_stats)
flow->sf_acts = NULL; flow->sf_acts = NULL;
flow->mask = NULL; flow->mask = NULL;
flow->stats_last_writer = NUMA_NO_NODE;
flow->stats.is_percpu = percpu_stats; /* Initialize the default stat node. */
stats = kmem_cache_alloc_node(flow_stats_cache,
GFP_KERNEL | __GFP_ZERO, 0);
if (!stats)
goto err;
if (!percpu_stats) { spin_lock_init(&stats->lock);
flow->stats.stat = kzalloc(sizeof(*flow->stats.stat), GFP_KERNEL);
if (!flow->stats.stat)
goto err;
spin_lock_init(&flow->stats.stat->lock); RCU_INIT_POINTER(flow->stats[0], stats);
} else {
flow->stats.cpu_stats = alloc_percpu(struct flow_stats);
if (!flow->stats.cpu_stats)
goto err;
for_each_possible_cpu(cpu) { for_each_node(node)
struct flow_stats *cpu_stats; if (node != 0)
RCU_INIT_POINTER(flow->stats[node], NULL);
cpu_stats = per_cpu_ptr(flow->stats.cpu_stats, cpu);
spin_lock_init(&cpu_stats->lock);
}
}
return flow; return flow;
err: err:
kmem_cache_free(flow_cache, flow); kmem_cache_free(flow_cache, flow);
...@@ -138,11 +137,13 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets) ...@@ -138,11 +137,13 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets)
static void flow_free(struct sw_flow *flow) static void flow_free(struct sw_flow *flow)
{ {
int node;
kfree((struct sf_flow_acts __force *)flow->sf_acts); kfree((struct sf_flow_acts __force *)flow->sf_acts);
if (flow->stats.is_percpu) for_each_node(node)
free_percpu(flow->stats.cpu_stats); if (flow->stats[node])
else kmem_cache_free(flow_stats_cache,
kfree(flow->stats.stat); (struct flow_stats __force *)flow->stats[node]);
kmem_cache_free(flow_cache, flow); kmem_cache_free(flow_cache, flow);
} }
...@@ -375,7 +376,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table) ...@@ -375,7 +376,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
static u32 flow_hash(const struct sw_flow_key *key, int key_start, static u32 flow_hash(const struct sw_flow_key *key, int key_start,
int key_end) int key_end)
{ {
u32 *hash_key = (u32 *)((u8 *)key + key_start); const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
int hash_u32s = (key_end - key_start) >> 2; int hash_u32s = (key_end - key_start) >> 2;
/* Make sure number of hash bytes are multiple of u32. */ /* Make sure number of hash bytes are multiple of u32. */
...@@ -397,8 +398,8 @@ static bool cmp_key(const struct sw_flow_key *key1, ...@@ -397,8 +398,8 @@ static bool cmp_key(const struct sw_flow_key *key1,
const struct sw_flow_key *key2, const struct sw_flow_key *key2,
int key_start, int key_end) int key_start, int key_end)
{ {
const long *cp1 = (long *)((u8 *)key1 + key_start); const long *cp1 = (const long *)((const u8 *)key1 + key_start);
const long *cp2 = (long *)((u8 *)key2 + key_start); const long *cp2 = (const long *)((const u8 *)key2 + key_start);
long diffs = 0; long diffs = 0;
int i; int i;
...@@ -513,8 +514,8 @@ static struct sw_flow_mask *mask_alloc(void) ...@@ -513,8 +514,8 @@ static struct sw_flow_mask *mask_alloc(void)
static bool mask_equal(const struct sw_flow_mask *a, static bool mask_equal(const struct sw_flow_mask *a,
const struct sw_flow_mask *b) const struct sw_flow_mask *b)
{ {
u8 *a_ = (u8 *)&a->key + a->range.start; const u8 *a_ = (const u8 *)&a->key + a->range.start;
u8 *b_ = (u8 *)&b->key + b->range.start; const u8 *b_ = (const u8 *)&b->key + b->range.start;
return (a->range.end == b->range.end) return (a->range.end == b->range.end)
&& (a->range.start == b->range.start) && (a->range.start == b->range.start)
...@@ -597,16 +598,28 @@ int ovs_flow_init(void) ...@@ -597,16 +598,28 @@ int ovs_flow_init(void)
BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
0, NULL); + (num_possible_nodes()
* sizeof(struct flow_stats *)),
0, 0, NULL);
if (flow_cache == NULL) if (flow_cache == NULL)
return -ENOMEM; return -ENOMEM;
flow_stats_cache
= kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats),
0, SLAB_HWCACHE_ALIGN, NULL);
if (flow_stats_cache == NULL) {
kmem_cache_destroy(flow_cache);
flow_cache = NULL;
return -ENOMEM;
}
return 0; return 0;
} }
/* Uninitializes the flow module. */ /* Uninitializes the flow module. */
void ovs_flow_exit(void) void ovs_flow_exit(void)
{ {
kmem_cache_destroy(flow_stats_cache);
kmem_cache_destroy(flow_cache); kmem_cache_destroy(flow_cache);
} }
...@@ -52,10 +52,12 @@ struct flow_table { ...@@ -52,10 +52,12 @@ struct flow_table {
unsigned int count; unsigned int count;
}; };
extern struct kmem_cache *flow_stats_cache;
int ovs_flow_init(void); int ovs_flow_init(void);
void ovs_flow_exit(void); void ovs_flow_exit(void);
struct sw_flow *ovs_flow_alloc(bool percpu_stats); struct sw_flow *ovs_flow_alloc(void);
void ovs_flow_free(struct sw_flow *, bool deferred); void ovs_flow_free(struct sw_flow *, bool deferred);
int ovs_flow_tbl_init(struct flow_table *); int ovs_flow_tbl_init(struct flow_table *);
......
...@@ -256,7 +256,7 @@ static void gre_tnl_destroy(struct vport *vport) ...@@ -256,7 +256,7 @@ static void gre_tnl_destroy(struct vport *vport)
ovs_net = net_generic(net, ovs_net_id); ovs_net = net_generic(net, ovs_net_id);
rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL); RCU_INIT_POINTER(ovs_net->vport_net.gre_vport, NULL);
ovs_vport_deferred_free(vport); ovs_vport_deferred_free(vport);
gre_exit(); gre_exit();
} }
......
...@@ -172,7 +172,7 @@ void ovs_vport_deferred_free(struct vport *vport); ...@@ -172,7 +172,7 @@ void ovs_vport_deferred_free(struct vport *vport);
*/ */
static inline void *vport_priv(const struct vport *vport) static inline void *vport_priv(const struct vport *vport)
{ {
return (u8 *)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN); return (u8 *)(uintptr_t)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN);
} }
/** /**
...@@ -185,9 +185,9 @@ static inline void *vport_priv(const struct vport *vport) ...@@ -185,9 +185,9 @@ static inline void *vport_priv(const struct vport *vport)
* the result of a hash table lookup. @priv must point to the start of the * the result of a hash table lookup. @priv must point to the start of the
* private data area. * private data area.
*/ */
static inline struct vport *vport_from_priv(const void *priv) static inline struct vport *vport_from_priv(void *priv)
{ {
return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); return (struct vport *)((u8 *)priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
} }
void ovs_vport_receive(struct vport *, struct sk_buff *, void ovs_vport_receive(struct vport *, struct sk_buff *,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment