Commit c219a166 authored by David S. Miller's avatar David S. Miller

Merge branch 'optimize-openvswitch-flow-looking-up'

Tonghao Zhang says:

====================
optimize openvswitch flow looking up

This series patch optimize openvswitch for performance or simplify
codes.

Patch 1, 2, 4: Port Pravin B Shelar patches to
linux upstream with little changes.

Patch 5, 6, 7: Optimize the flow looking up and
simplify the flow hash.

Patch 8, 9: are bugfix.

The performance test is on Intel Xeon E5-2630 v4.
The test topology is show as below:

+-----------------------------------+
|   +---------------------------+   |
|   | eth0   ovs-switch    eth1 |   | Host0
|   +---------------------------+   |
+-----------------------------------+
      ^                       |
      |                       |
      |                       |
      |                       |
      |                       v
+-----+----+             +----+-----+
| netperf  | Host1       | netserver| Host2
+----------+             +----------+

We use netperf send the 64B packets, and insert 255+ flow-mask:
$ ovs-dpctl add-flow ovs-switch "in_port(1),eth(dst=00:01:00:00:00:00/ff:ff:ff:ff:ff:01),eth_type(0x0800),ipv4(frag=no)" 2
...
$ ovs-dpctl add-flow ovs-switch "in_port(1),eth(dst=00:ff:00:00:00:00/ff:ff:ff:ff:ff:ff),eth_type(0x0800),ipv4(frag=no)" 2
$
$ netperf -t UDP_STREAM -H 2.2.2.200 -l 40 -- -m 18

* Without series patch, throughput 8.28Mbps
* With series patch, throughput 46.05Mbps

v6:
some coding style fixes

v5:
rewrite patch 8, release flow-mask when freeing flow

v4:
access ma->count with READ_ONCE/WRITE_ONCE API. More information,
see patch 5 comments.

v3:
update ma point when realloc mask_array in patch 5

v2:
simplify codes. e.g. use kfree_rcu instead of call_rcu
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ae8a76fb eec62ead
......@@ -227,7 +227,8 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
stats = this_cpu_ptr(dp->stats_percpu);
/* Look up flow. */
flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
&n_mask_hit);
if (unlikely(!flow)) {
struct dp_upcall_info upcall;
......@@ -1575,6 +1576,31 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
return 0;
}
static int ovs_dp_stats_init(struct datapath *dp)
{
dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
if (!dp->stats_percpu)
return -ENOMEM;
return 0;
}
static int ovs_dp_vport_init(struct datapath *dp)
{
int i;
dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
sizeof(struct hlist_head),
GFP_KERNEL);
if (!dp->ports)
return -ENOMEM;
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(&dp->ports[i]);
return 0;
}
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
......@@ -1583,7 +1609,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct vport *vport;
struct ovs_net *ovs_net;
int err, i;
int err;
err = -EINVAL;
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
......@@ -1596,35 +1622,26 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
err = -ENOMEM;
dp = kzalloc(sizeof(*dp), GFP_KERNEL);
if (dp == NULL)
goto err_free_reply;
goto err_destroy_reply;
ovs_dp_set_net(dp, sock_net(skb->sk));
/* Allocate table. */
err = ovs_flow_tbl_init(&dp->table);
if (err)
goto err_free_dp;
goto err_destroy_dp;
dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
if (!dp->stats_percpu) {
err = -ENOMEM;
err = ovs_dp_stats_init(dp);
if (err)
goto err_destroy_table;
}
dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
sizeof(struct hlist_head),
GFP_KERNEL);
if (!dp->ports) {
err = -ENOMEM;
goto err_destroy_percpu;
}
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(&dp->ports[i]);
err = ovs_dp_vport_init(dp);
if (err)
goto err_destroy_stats;
err = ovs_meters_init(dp);
if (err)
goto err_destroy_ports_array;
goto err_destroy_ports;
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
......@@ -1656,6 +1673,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_dp_reset_user_features(skb, info);
}
ovs_unlock();
goto err_destroy_meters;
}
......@@ -1672,17 +1690,16 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
return 0;
err_destroy_meters:
ovs_unlock();
ovs_meters_exit(dp);
err_destroy_ports_array:
err_destroy_ports:
kfree(dp->ports);
err_destroy_percpu:
err_destroy_stats:
free_percpu(dp->stats_percpu);
err_destroy_table:
ovs_flow_tbl_destroy(&dp->table);
err_free_dp:
err_destroy_dp:
kfree(dp);
err_free_reply:
err_destroy_reply:
kfree_skb(reply);
err:
return err;
......
......@@ -166,7 +166,6 @@ struct sw_flow_key_range {
struct sw_flow_mask {
int ref_count;
struct rcu_head rcu;
struct list_head list;
struct sw_flow_key_range range;
struct sw_flow_key key;
};
......
This diff is collapsed.
......@@ -22,6 +22,17 @@
#include "flow.h"
struct mask_cache_entry {
u32 skb_hash;
u32 mask_index;
};
struct mask_array {
struct rcu_head rcu;
int count, max;
struct sw_flow_mask __rcu *masks[];
};
struct table_instance {
struct hlist_head *buckets;
unsigned int n_buckets;
......@@ -34,7 +45,8 @@ struct table_instance {
struct flow_table {
struct table_instance __rcu *ti;
struct table_instance __rcu *ufid_ti;
struct list_head mask_list;
struct mask_cache_entry __percpu *mask_cache;
struct mask_array __rcu *mask_array;
unsigned long last_rehash;
unsigned int count;
unsigned int ufid_count;
......@@ -60,8 +72,9 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table);
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
u32 *bucket, u32 *idx);
struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
const struct sw_flow_key *,
u32 *n_mask_hit);
const struct sw_flow_key *,
u32 skb_hash,
u32 *n_mask_hit);
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
const struct sw_flow_key *);
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment