Commit c219a166 authored by David S. Miller's avatar David S. Miller

Merge branch 'optimize-openvswitch-flow-looking-up'

Tonghao Zhang says:

====================
optimize openvswitch flow looking up

This series patch optimize openvswitch for performance or simplify
codes.

Patch 1, 2, 4: Port Pravin B Shelar patches to
linux upstream with little changes.

Patch 5, 6, 7: Optimize the flow looking up and
simplify the flow hash.

Patch 8, 9: are bugfix.

The performance test is on Intel Xeon E5-2630 v4.
The test topology is show as below:

+-----------------------------------+
|   +---------------------------+   |
|   | eth0   ovs-switch    eth1 |   | Host0
|   +---------------------------+   |
+-----------------------------------+
      ^                       |
      |                       |
      |                       |
      |                       |
      |                       v
+-----+----+             +----+-----+
| netperf  | Host1       | netserver| Host2
+----------+             +----------+

We use netperf send the 64B packets, and insert 255+ flow-mask:
$ ovs-dpctl add-flow ovs-switch "in_port(1),eth(dst=00:01:00:00:00:00/ff:ff:ff:ff:ff:01),eth_type(0x0800),ipv4(frag=no)" 2
...
$ ovs-dpctl add-flow ovs-switch "in_port(1),eth(dst=00:ff:00:00:00:00/ff:ff:ff:ff:ff:ff),eth_type(0x0800),ipv4(frag=no)" 2
$
$ netperf -t UDP_STREAM -H 2.2.2.200 -l 40 -- -m 18

* Without series patch, throughput 8.28Mbps
* With series patch, throughput 46.05Mbps

v6:
some coding style fixes

v5:
rewrite patch 8, release flow-mask when freeing flow

v4:
access ma->count with READ_ONCE/WRITE_ONCE API. More information,
see patch 5 comments.

v3:
update ma point when realloc mask_array in patch 5

v2:
simplify codes. e.g. use kfree_rcu instead of call_rcu
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ae8a76fb eec62ead
...@@ -227,7 +227,8 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) ...@@ -227,7 +227,8 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
stats = this_cpu_ptr(dp->stats_percpu); stats = this_cpu_ptr(dp->stats_percpu);
/* Look up flow. */ /* Look up flow. */
flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit); flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
&n_mask_hit);
if (unlikely(!flow)) { if (unlikely(!flow)) {
struct dp_upcall_info upcall; struct dp_upcall_info upcall;
...@@ -1575,6 +1576,31 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) ...@@ -1575,6 +1576,31 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
return 0; return 0;
} }
static int ovs_dp_stats_init(struct datapath *dp)
{
dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
if (!dp->stats_percpu)
return -ENOMEM;
return 0;
}
static int ovs_dp_vport_init(struct datapath *dp)
{
int i;
dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
sizeof(struct hlist_head),
GFP_KERNEL);
if (!dp->ports)
return -ENOMEM;
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(&dp->ports[i]);
return 0;
}
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
{ {
struct nlattr **a = info->attrs; struct nlattr **a = info->attrs;
...@@ -1583,7 +1609,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ...@@ -1583,7 +1609,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp; struct datapath *dp;
struct vport *vport; struct vport *vport;
struct ovs_net *ovs_net; struct ovs_net *ovs_net;
int err, i; int err;
err = -EINVAL; err = -EINVAL;
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
...@@ -1596,35 +1622,26 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ...@@ -1596,35 +1622,26 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
err = -ENOMEM; err = -ENOMEM;
dp = kzalloc(sizeof(*dp), GFP_KERNEL); dp = kzalloc(sizeof(*dp), GFP_KERNEL);
if (dp == NULL) if (dp == NULL)
goto err_free_reply; goto err_destroy_reply;
ovs_dp_set_net(dp, sock_net(skb->sk)); ovs_dp_set_net(dp, sock_net(skb->sk));
/* Allocate table. */ /* Allocate table. */
err = ovs_flow_tbl_init(&dp->table); err = ovs_flow_tbl_init(&dp->table);
if (err) if (err)
goto err_free_dp; goto err_destroy_dp;
dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); err = ovs_dp_stats_init(dp);
if (!dp->stats_percpu) { if (err)
err = -ENOMEM;
goto err_destroy_table; goto err_destroy_table;
}
dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
sizeof(struct hlist_head),
GFP_KERNEL);
if (!dp->ports) {
err = -ENOMEM;
goto err_destroy_percpu;
}
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) err = ovs_dp_vport_init(dp);
INIT_HLIST_HEAD(&dp->ports[i]); if (err)
goto err_destroy_stats;
err = ovs_meters_init(dp); err = ovs_meters_init(dp);
if (err) if (err)
goto err_destroy_ports_array; goto err_destroy_ports;
/* Set up our datapath device. */ /* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]); parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
...@@ -1656,6 +1673,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ...@@ -1656,6 +1673,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_dp_reset_user_features(skb, info); ovs_dp_reset_user_features(skb, info);
} }
ovs_unlock();
goto err_destroy_meters; goto err_destroy_meters;
} }
...@@ -1672,17 +1690,16 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ...@@ -1672,17 +1690,16 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
return 0; return 0;
err_destroy_meters: err_destroy_meters:
ovs_unlock();
ovs_meters_exit(dp); ovs_meters_exit(dp);
err_destroy_ports_array: err_destroy_ports:
kfree(dp->ports); kfree(dp->ports);
err_destroy_percpu: err_destroy_stats:
free_percpu(dp->stats_percpu); free_percpu(dp->stats_percpu);
err_destroy_table: err_destroy_table:
ovs_flow_tbl_destroy(&dp->table); ovs_flow_tbl_destroy(&dp->table);
err_free_dp: err_destroy_dp:
kfree(dp); kfree(dp);
err_free_reply: err_destroy_reply:
kfree_skb(reply); kfree_skb(reply);
err: err:
return err; return err;
......
...@@ -166,7 +166,6 @@ struct sw_flow_key_range { ...@@ -166,7 +166,6 @@ struct sw_flow_key_range {
struct sw_flow_mask { struct sw_flow_mask {
int ref_count; int ref_count;
struct rcu_head rcu; struct rcu_head rcu;
struct list_head list;
struct sw_flow_key_range range; struct sw_flow_key_range range;
struct sw_flow_key key; struct sw_flow_key key;
}; };
......
...@@ -34,8 +34,13 @@ ...@@ -34,8 +34,13 @@
#include <net/ndisc.h> #include <net/ndisc.h>
#define TBL_MIN_BUCKETS 1024 #define TBL_MIN_BUCKETS 1024
#define MASK_ARRAY_SIZE_MIN 16
#define REHASH_INTERVAL (10 * 60 * HZ) #define REHASH_INTERVAL (10 * 60 * HZ)
#define MC_HASH_SHIFT 8
#define MC_HASH_ENTRIES (1u << MC_HASH_SHIFT)
#define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
static struct kmem_cache *flow_cache; static struct kmem_cache *flow_cache;
struct kmem_cache *flow_stats_cache __read_mostly; struct kmem_cache *flow_stats_cache __read_mostly;
...@@ -164,14 +169,133 @@ static struct table_instance *table_instance_alloc(int new_size) ...@@ -164,14 +169,133 @@ static struct table_instance *table_instance_alloc(int new_size)
return ti; return ti;
} }
static struct mask_array *tbl_mask_array_alloc(int size)
{
struct mask_array *new;
size = max(MASK_ARRAY_SIZE_MIN, size);
new = kzalloc(sizeof(struct mask_array) +
sizeof(struct sw_flow_mask *) * size, GFP_KERNEL);
if (!new)
return NULL;
new->count = 0;
new->max = size;
return new;
}
static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
{
struct mask_array *old;
struct mask_array *new;
new = tbl_mask_array_alloc(size);
if (!new)
return -ENOMEM;
old = ovsl_dereference(tbl->mask_array);
if (old) {
int i;
for (i = 0; i < old->max; i++) {
if (ovsl_dereference(old->masks[i]))
new->masks[new->count++] = old->masks[i];
}
}
rcu_assign_pointer(tbl->mask_array, new);
kfree_rcu(old, rcu);
return 0;
}
static int tbl_mask_array_add_mask(struct flow_table *tbl,
struct sw_flow_mask *new)
{
struct mask_array *ma = ovsl_dereference(tbl->mask_array);
int err, ma_count = READ_ONCE(ma->count);
if (ma_count >= ma->max) {
err = tbl_mask_array_realloc(tbl, ma->max +
MASK_ARRAY_SIZE_MIN);
if (err)
return err;
ma = ovsl_dereference(tbl->mask_array);
}
BUG_ON(ovsl_dereference(ma->masks[ma_count]));
rcu_assign_pointer(ma->masks[ma_count], new);
WRITE_ONCE(ma->count, ma_count +1);
return 0;
}
static void tbl_mask_array_del_mask(struct flow_table *tbl,
struct sw_flow_mask *mask)
{
struct mask_array *ma = ovsl_dereference(tbl->mask_array);
int i, ma_count = READ_ONCE(ma->count);
/* Remove the deleted mask pointers from the array */
for (i = 0; i < ma_count; i++) {
if (mask == ovsl_dereference(ma->masks[i]))
goto found;
}
BUG();
return;
found:
WRITE_ONCE(ma->count, ma_count -1);
rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]);
RCU_INIT_POINTER(ma->masks[ma_count -1], NULL);
kfree_rcu(mask, rcu);
/* Shrink the mask array if necessary. */
if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
ma_count <= (ma->max / 3))
tbl_mask_array_realloc(tbl, ma->max / 2);
}
/* Remove 'mask' from the mask list, if it is not needed any more. */
static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
{
if (mask) {
/* ovs-lock is required to protect mask-refcount and
* mask list.
*/
ASSERT_OVSL();
BUG_ON(!mask->ref_count);
mask->ref_count--;
if (!mask->ref_count)
tbl_mask_array_del_mask(tbl, mask);
}
}
int ovs_flow_tbl_init(struct flow_table *table) int ovs_flow_tbl_init(struct flow_table *table)
{ {
struct table_instance *ti, *ufid_ti; struct table_instance *ti, *ufid_ti;
struct mask_array *ma;
ti = table_instance_alloc(TBL_MIN_BUCKETS); table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
MC_HASH_ENTRIES,
__alignof__(struct mask_cache_entry));
if (!table->mask_cache)
return -ENOMEM;
ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
if (!ma)
goto free_mask_cache;
ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ti) if (!ti)
return -ENOMEM; goto free_mask_array;
ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ufid_ti) if (!ufid_ti)
...@@ -179,7 +303,7 @@ int ovs_flow_tbl_init(struct flow_table *table) ...@@ -179,7 +303,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
rcu_assign_pointer(table->ti, ti); rcu_assign_pointer(table->ti, ti);
rcu_assign_pointer(table->ufid_ti, ufid_ti); rcu_assign_pointer(table->ufid_ti, ufid_ti);
INIT_LIST_HEAD(&table->mask_list); rcu_assign_pointer(table->mask_array, ma);
table->last_rehash = jiffies; table->last_rehash = jiffies;
table->count = 0; table->count = 0;
table->ufid_count = 0; table->ufid_count = 0;
...@@ -187,6 +311,10 @@ int ovs_flow_tbl_init(struct flow_table *table) ...@@ -187,6 +311,10 @@ int ovs_flow_tbl_init(struct flow_table *table)
free_ti: free_ti:
__table_instance_destroy(ti); __table_instance_destroy(ti);
free_mask_array:
kfree(ma);
free_mask_cache:
free_percpu(table->mask_cache);
return -ENOMEM; return -ENOMEM;
} }
...@@ -197,7 +325,28 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) ...@@ -197,7 +325,28 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
__table_instance_destroy(ti); __table_instance_destroy(ti);
} }
static void table_instance_destroy(struct table_instance *ti, static void table_instance_flow_free(struct flow_table *table,
struct table_instance *ti,
struct table_instance *ufid_ti,
struct sw_flow *flow,
bool count)
{
hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
if (count)
table->count--;
if (ovs_identifier_is_ufid(&flow->id)) {
hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
if (count)
table->ufid_count--;
}
flow_mask_remove(table, flow->mask);
}
static void table_instance_destroy(struct flow_table *table,
struct table_instance *ti,
struct table_instance *ufid_ti, struct table_instance *ufid_ti,
bool deferred) bool deferred)
{ {
...@@ -214,13 +363,12 @@ static void table_instance_destroy(struct table_instance *ti, ...@@ -214,13 +363,12 @@ static void table_instance_destroy(struct table_instance *ti,
struct sw_flow *flow; struct sw_flow *flow;
struct hlist_head *head = &ti->buckets[i]; struct hlist_head *head = &ti->buckets[i];
struct hlist_node *n; struct hlist_node *n;
int ver = ti->node_ver;
int ufid_ver = ufid_ti->node_ver;
hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) { hlist_for_each_entry_safe(flow, n, head,
hlist_del_rcu(&flow->flow_table.node[ver]); flow_table.node[ti->node_ver]) {
if (ovs_identifier_is_ufid(&flow->id))
hlist_del_rcu(&flow->ufid_table.node[ufid_ver]); table_instance_flow_free(table, ti, ufid_ti,
flow, false);
ovs_flow_free(flow, deferred); ovs_flow_free(flow, deferred);
} }
} }
...@@ -243,7 +391,9 @@ void ovs_flow_tbl_destroy(struct flow_table *table) ...@@ -243,7 +391,9 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
struct table_instance *ti = rcu_dereference_raw(table->ti); struct table_instance *ti = rcu_dereference_raw(table->ti);
struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
table_instance_destroy(ti, ufid_ti, false); free_percpu(table->mask_cache);
kfree_rcu(rcu_dereference_raw(table->mask_array), rcu);
table_instance_destroy(table, ti, ufid_ti, false);
} }
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
...@@ -359,7 +509,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table) ...@@ -359,7 +509,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
flow_table->count = 0; flow_table->count = 0;
flow_table->ufid_count = 0; flow_table->ufid_count = 0;
table_instance_destroy(old_ti, old_ufid_ti, true); table_instance_destroy(flow_table, old_ti, old_ufid_ti, true);
return 0; return 0;
err_free_ti: err_free_ti:
...@@ -370,13 +520,10 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table) ...@@ -370,13 +520,10 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
static u32 flow_hash(const struct sw_flow_key *key, static u32 flow_hash(const struct sw_flow_key *key,
const struct sw_flow_key_range *range) const struct sw_flow_key_range *range)
{ {
int key_start = range->start; const u32 *hash_key = (const u32 *)((const u8 *)key + range->start);
int key_end = range->end;
const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
int hash_u32s = (key_end - key_start) >> 2;
/* Make sure number of hash bytes are multiple of u32. */ /* Make sure number of hash bytes are multiple of u32. */
BUILD_BUG_ON(sizeof(long) % sizeof(u32)); int hash_u32s = range_n_bytes(range) >> 2;
return jhash2(hash_key, hash_u32s, 0); return jhash2(hash_key, hash_u32s, 0);
} }
...@@ -425,7 +572,8 @@ static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, ...@@ -425,7 +572,8 @@ static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
static struct sw_flow *masked_flow_lookup(struct table_instance *ti, static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
const struct sw_flow_key *unmasked, const struct sw_flow_key *unmasked,
const struct sw_flow_mask *mask) const struct sw_flow_mask *mask,
u32 *n_mask_hit)
{ {
struct sw_flow *flow; struct sw_flow *flow;
struct hlist_head *head; struct hlist_head *head;
...@@ -435,6 +583,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, ...@@ -435,6 +583,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
ovs_flow_mask_key(&masked_key, unmasked, false, mask); ovs_flow_mask_key(&masked_key, unmasked, false, mask);
hash = flow_hash(&masked_key, &mask->range); hash = flow_hash(&masked_key, &mask->range);
head = find_bucket(ti, hash); head = find_bucket(ti, hash);
(*n_mask_hit)++;
hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
if (flow->mask == mask && flow->flow_table.hash == hash && if (flow->mask == mask && flow->flow_table.hash == hash &&
flow_cmp_masked_key(flow, &masked_key, &mask->range)) flow_cmp_masked_key(flow, &masked_key, &mask->range))
...@@ -443,46 +593,147 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, ...@@ -443,46 +593,147 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
return NULL; return NULL;
} }
struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, /* Flow lookup does full lookup on flow table. It starts with
const struct sw_flow_key *key, * mask from index passed in *index.
u32 *n_mask_hit) */
static struct sw_flow *flow_lookup(struct flow_table *tbl,
struct table_instance *ti,
struct mask_array *ma,
const struct sw_flow_key *key,
u32 *n_mask_hit,
u32 *index)
{ {
struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); struct sw_flow *flow;
struct sw_flow_mask *mask; struct sw_flow_mask *mask;
int i;
if (likely(*index < ma->max)) {
mask = rcu_dereference_ovsl(ma->masks[*index]);
if (mask) {
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
if (flow)
return flow;
}
}
for (i = 0; i < ma->max; i++) {
if (i == *index)
continue;
mask = rcu_dereference_ovsl(ma->masks[i]);
if (unlikely(!mask))
break;
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
if (flow) { /* Found */
*index = i;
return flow;
}
}
return NULL;
}
/*
* mask_cache maps flow to probable mask. This cache is not tightly
* coupled cache, It means updates to mask list can result in inconsistent
* cache entry in mask cache.
* This is per cpu cache and is divided in MC_HASH_SEGS segments.
* In case of a hash collision the entry is hashed in next segment.
* */
struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
const struct sw_flow_key *key,
u32 skb_hash,
u32 *n_mask_hit)
{
struct mask_array *ma = rcu_dereference(tbl->mask_array);
struct table_instance *ti = rcu_dereference(tbl->ti);
struct mask_cache_entry *entries, *ce;
struct sw_flow *flow; struct sw_flow *flow;
u32 hash;
int seg;
*n_mask_hit = 0; *n_mask_hit = 0;
list_for_each_entry_rcu(mask, &tbl->mask_list, list) { if (unlikely(!skb_hash)) {
(*n_mask_hit)++; u32 mask_index = 0;
flow = masked_flow_lookup(ti, key, mask);
if (flow) /* Found */ return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
}
/* Pre and post recirulation flows usually have the same skb_hash
* value. To avoid hash collisions, rehash the 'skb_hash' with
* 'recirc_id'. */
if (key->recirc_id)
skb_hash = jhash_1word(skb_hash, key->recirc_id);
ce = NULL;
hash = skb_hash;
entries = this_cpu_ptr(tbl->mask_cache);
/* Find the cache entry 'ce' to operate on. */
for (seg = 0; seg < MC_HASH_SEGS; seg++) {
int index = hash & (MC_HASH_ENTRIES - 1);
struct mask_cache_entry *e;
e = &entries[index];
if (e->skb_hash == skb_hash) {
flow = flow_lookup(tbl, ti, ma, key, n_mask_hit,
&e->mask_index);
if (!flow)
e->skb_hash = 0;
return flow; return flow;
}
if (!ce || e->skb_hash < ce->skb_hash)
ce = e; /* A better replacement cache candidate. */
hash >>= MC_HASH_SHIFT;
} }
return NULL;
/* Cache miss, do full lookup. */
flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index);
if (flow)
ce->skb_hash = skb_hash;
return flow;
} }
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
const struct sw_flow_key *key) const struct sw_flow_key *key)
{ {
struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
u32 __always_unused n_mask_hit; u32 __always_unused n_mask_hit;
u32 index = 0;
return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit); return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index);
} }
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
const struct sw_flow_match *match) const struct sw_flow_match *match)
{ {
struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); struct mask_array *ma = ovsl_dereference(tbl->mask_array);
struct sw_flow_mask *mask; int i;
struct sw_flow *flow;
/* Always called under ovs-mutex. */ /* Always called under ovs-mutex. */
list_for_each_entry(mask, &tbl->mask_list, list) { for (i = 0; i < ma->max; i++) {
flow = masked_flow_lookup(ti, match->key, mask); struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
u32 __always_unused n_mask_hit;
struct sw_flow_mask *mask;
struct sw_flow *flow;
mask = ovsl_dereference(ma->masks[i]);
if (!mask)
continue;
flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
if (flow && ovs_identifier_is_key(&flow->id) && if (flow && ovs_identifier_is_key(&flow->id) &&
ovs_flow_cmp_unmasked_key(flow, match)) ovs_flow_cmp_unmasked_key(flow, match)) {
return flow; return flow;
}
} }
return NULL; return NULL;
} }
...@@ -528,13 +779,8 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl, ...@@ -528,13 +779,8 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
int ovs_flow_tbl_num_masks(const struct flow_table *table) int ovs_flow_tbl_num_masks(const struct flow_table *table)
{ {
struct sw_flow_mask *mask; struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
int num = 0; return READ_ONCE(ma->count);
list_for_each_entry(mask, &table->mask_list, list)
num++;
return num;
} }
static struct table_instance *table_instance_expand(struct table_instance *ti, static struct table_instance *table_instance_expand(struct table_instance *ti,
...@@ -543,24 +789,6 @@ static struct table_instance *table_instance_expand(struct table_instance *ti, ...@@ -543,24 +789,6 @@ static struct table_instance *table_instance_expand(struct table_instance *ti,
return table_instance_rehash(ti, ti->n_buckets * 2, ufid); return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
} }
/* Remove 'mask' from the mask list, if it is not needed any more. */
static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
{
if (mask) {
/* ovs-lock is required to protect mask-refcount and
* mask list.
*/
ASSERT_OVSL();
BUG_ON(!mask->ref_count);
mask->ref_count--;
if (!mask->ref_count) {
list_del_rcu(&mask->list);
kfree_rcu(mask, rcu);
}
}
}
/* Must be called with OVS mutex held. */ /* Must be called with OVS mutex held. */
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
{ {
...@@ -568,17 +796,7 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) ...@@ -568,17 +796,7 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti); struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
BUG_ON(table->count == 0); BUG_ON(table->count == 0);
hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); table_instance_flow_free(table, ti, ufid_ti, flow, true);
table->count--;
if (ovs_identifier_is_ufid(&flow->id)) {
hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
table->ufid_count--;
}
/* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
* accessible as long as the RCU read lock is held.
*/
flow_mask_remove(table, flow->mask);
} }
static struct sw_flow_mask *mask_alloc(void) static struct sw_flow_mask *mask_alloc(void)
...@@ -606,13 +824,16 @@ static bool mask_equal(const struct sw_flow_mask *a, ...@@ -606,13 +824,16 @@ static bool mask_equal(const struct sw_flow_mask *a,
static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
const struct sw_flow_mask *mask) const struct sw_flow_mask *mask)
{ {
struct list_head *ml; struct mask_array *ma;
int i;
ma = ovsl_dereference(tbl->mask_array);
for (i = 0; i < ma->max; i++) {
struct sw_flow_mask *t;
t = ovsl_dereference(ma->masks[i]);
list_for_each(ml, &tbl->mask_list) { if (t && mask_equal(mask, t))
struct sw_flow_mask *m; return t;
m = container_of(ml, struct sw_flow_mask, list);
if (mask_equal(mask, m))
return m;
} }
return NULL; return NULL;
...@@ -623,6 +844,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, ...@@ -623,6 +844,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
const struct sw_flow_mask *new) const struct sw_flow_mask *new)
{ {
struct sw_flow_mask *mask; struct sw_flow_mask *mask;
mask = flow_mask_find(tbl, new); mask = flow_mask_find(tbl, new);
if (!mask) { if (!mask) {
/* Allocate a new mask if none exsits. */ /* Allocate a new mask if none exsits. */
...@@ -631,7 +853,12 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, ...@@ -631,7 +853,12 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
return -ENOMEM; return -ENOMEM;
mask->key = new->key; mask->key = new->key;
mask->range = new->range; mask->range = new->range;
list_add_rcu(&mask->list, &tbl->mask_list);
/* Add mask to mask-list. */
if (tbl_mask_array_add_mask(tbl, mask)) {
kfree(mask);
return -ENOMEM;
}
} else { } else {
BUG_ON(!mask->ref_count); BUG_ON(!mask->ref_count);
mask->ref_count++; mask->ref_count++;
......
...@@ -22,6 +22,17 @@ ...@@ -22,6 +22,17 @@
#include "flow.h" #include "flow.h"
struct mask_cache_entry {
u32 skb_hash;
u32 mask_index;
};
struct mask_array {
struct rcu_head rcu;
int count, max;
struct sw_flow_mask __rcu *masks[];
};
struct table_instance { struct table_instance {
struct hlist_head *buckets; struct hlist_head *buckets;
unsigned int n_buckets; unsigned int n_buckets;
...@@ -34,7 +45,8 @@ struct table_instance { ...@@ -34,7 +45,8 @@ struct table_instance {
struct flow_table { struct flow_table {
struct table_instance __rcu *ti; struct table_instance __rcu *ti;
struct table_instance __rcu *ufid_ti; struct table_instance __rcu *ufid_ti;
struct list_head mask_list; struct mask_cache_entry __percpu *mask_cache;
struct mask_array __rcu *mask_array;
unsigned long last_rehash; unsigned long last_rehash;
unsigned int count; unsigned int count;
unsigned int ufid_count; unsigned int ufid_count;
...@@ -60,8 +72,9 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table); ...@@ -60,8 +72,9 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table);
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
u32 *bucket, u32 *idx); u32 *bucket, u32 *idx);
struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *, struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
const struct sw_flow_key *, const struct sw_flow_key *,
u32 *n_mask_hit); u32 skb_hash,
u32 *n_mask_hit);
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
const struct sw_flow_key *); const struct sw_flow_key *);
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment