Commit 152bff37 authored by David S. Miller's avatar David S. Miller

Merge branch 'bridge-improve-cache-utilization'

Nikolay Aleksandrov says:

====================
bridge: improve cache utilization

This is the first set which begins to deal with the bad bridge cache
access patterns. The first patch rearranges the bridge and port structs
a little so the frequently (and closely) accessed members are in the same
cache line. The second patch then moves the garbage collection to a
workqueue trying to improve system responsiveness under load (many fdbs)
and more importantly removes the need to check if the matched entry is
expired in __br_fdb_get which was a major source of false-sharing.
The third patch is a preparation for the final one which
If properly configured, i.e. ports bound to CPUs (thus updating "updated"
locally) then the bridge's HitM goes from 100% to 0%, but even without
binding we get a win because previously every lookup that iterated over
the hash chain caused false-sharing due to the first cache line being
used for both mac/vid and used/updated fields.

Some results from tests I've run:
(note that these were run in good conditions for the baseline, everything
 ran on a single NUMA node and there were only 3 fdbs)

1. baseline
100% Load HitM on the fdbs (between everyone who has done lookups and hit
                            one of the 3 hash chains of the communicating
                            src/dst fdbs)
Overall 5.06% Load HitM for the bridge, first place in the list

2. patched & ports bound to CPUs
0% Local load HitM, bridge is not even in the c2c report list
Also there's 3% consistent improvement in netperf tests.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 63dfef75 83a718d6
...@@ -411,4 +411,5 @@ void br_dev_setup(struct net_device *dev) ...@@ -411,4 +411,5 @@ void br_dev_setup(struct net_device *dev)
br_netfilter_rtable_init(br); br_netfilter_rtable_init(br);
br_stp_timer_init(br); br_stp_timer_init(br);
br_multicast_init(br); br_multicast_init(br);
INIT_DELAYED_WORK(&br->gc_work, br_fdb_cleanup);
} }
...@@ -154,7 +154,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) ...@@ -154,7 +154,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
if (f->added_by_external_learn) if (f->added_by_external_learn)
fdb_del_external_learn(f); fdb_del_external_learn(f);
hlist_del_rcu(&f->hlist); hlist_del_init_rcu(&f->hlist);
fdb_notify(br, f, RTM_DELNEIGH); fdb_notify(br, f, RTM_DELNEIGH);
call_rcu(&f->rcu, fdb_rcu_free); call_rcu(&f->rcu, fdb_rcu_free);
} }
...@@ -290,34 +290,43 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) ...@@ -290,34 +290,43 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
spin_unlock_bh(&br->hash_lock); spin_unlock_bh(&br->hash_lock);
} }
void br_fdb_cleanup(unsigned long _data) void br_fdb_cleanup(struct work_struct *work)
{ {
struct net_bridge *br = (struct net_bridge *)_data; struct net_bridge *br = container_of(work, struct net_bridge,
gc_work.work);
unsigned long delay = hold_time(br); unsigned long delay = hold_time(br);
unsigned long next_timer = jiffies + br->ageing_time; unsigned long work_delay = delay;
unsigned long now = jiffies;
int i; int i;
spin_lock(&br->hash_lock);
for (i = 0; i < BR_HASH_SIZE; i++) { for (i = 0; i < BR_HASH_SIZE; i++) {
struct net_bridge_fdb_entry *f; struct net_bridge_fdb_entry *f;
struct hlist_node *n; struct hlist_node *n;
if (!br->hash[i].first)
continue;
spin_lock_bh(&br->hash_lock);
hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) {
unsigned long this_timer; unsigned long this_timer;
if (f->is_static) if (f->is_static)
continue; continue;
if (f->added_by_external_learn) if (f->added_by_external_learn)
continue; continue;
this_timer = f->updated + delay; this_timer = f->updated + delay;
if (time_before_eq(this_timer, jiffies)) if (time_after(this_timer, now))
work_delay = min(work_delay, this_timer - now);
else
fdb_delete(br, f); fdb_delete(br, f);
else if (time_before(this_timer, next_timer))
next_timer = this_timer;
} }
spin_unlock_bh(&br->hash_lock);
cond_resched();
} }
spin_unlock(&br->hash_lock);
mod_timer(&br->gc_timer, round_jiffies_up(next_timer)); /* Cleanup minimum 10 milliseconds apart */
work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10));
mod_delayed_work(system_long_wq, &br->gc_work, work_delay);
} }
/* Completely flush all dynamic entries in forwarding database.*/ /* Completely flush all dynamic entries in forwarding database.*/
...@@ -382,8 +391,6 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, ...@@ -382,8 +391,6 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
&br->hash[br_mac_hash(addr, vid)], hlist) { &br->hash[br_mac_hash(addr, vid)], hlist) {
if (ether_addr_equal(fdb->addr.addr, addr) && if (ether_addr_equal(fdb->addr.addr, addr) &&
fdb->vlan_id == vid) { fdb->vlan_id == vid) {
if (unlikely(has_expired(br, fdb)))
break;
return fdb; return fdb;
} }
} }
...@@ -590,6 +597,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, ...@@ -590,6 +597,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
fdb->dst = source; fdb->dst = source;
fdb_modified = true; fdb_modified = true;
} }
if (jiffies != fdb->updated)
fdb->updated = jiffies; fdb->updated = jiffies;
if (unlikely(added_by_user)) if (unlikely(added_by_user))
fdb->added_by_user = 1; fdb->added_by_user = 1;
......
...@@ -313,7 +313,7 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) ...@@ -313,7 +313,7 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
br_vlan_flush(br); br_vlan_flush(br);
br_multicast_dev_del(br); br_multicast_dev_del(br);
del_timer_sync(&br->gc_timer); cancel_delayed_work_sync(&br->gc_work);
br_sysfs_delbr(br->dev); br_sysfs_delbr(br->dev);
unregister_netdevice_queue(br->dev, head); unregister_netdevice_queue(br->dev, head);
......
...@@ -198,6 +198,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb ...@@ -198,6 +198,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (dst->is_local) if (dst->is_local)
return br_pass_frame_up(skb); return br_pass_frame_up(skb);
if (jiffies != dst->used)
dst->used = jiffies; dst->used = jiffies;
br_forward(dst->dst, skb, local_rcv, false); br_forward(dst->dst, skb, local_rcv, false);
} else { } else {
......
...@@ -149,7 +149,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) ...@@ -149,7 +149,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
b.hello_timer_value = br_timer_value(&br->hello_timer); b.hello_timer_value = br_timer_value(&br->hello_timer);
b.tcn_timer_value = br_timer_value(&br->tcn_timer); b.tcn_timer_value = br_timer_value(&br->tcn_timer);
b.topology_change_timer_value = br_timer_value(&br->topology_change_timer); b.topology_change_timer_value = br_timer_value(&br->topology_change_timer);
b.gc_timer_value = br_timer_value(&br->gc_timer); b.gc_timer_value = br_timer_value(&br->gc_work.timer);
rcu_read_unlock(); rcu_read_unlock();
if (copy_to_user((void __user *)args[1], &b, sizeof(b))) if (copy_to_user((void __user *)args[1], &b, sizeof(b)))
......
...@@ -1250,7 +1250,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) ...@@ -1250,7 +1250,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
if (nla_put_u64_64bit(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval, if (nla_put_u64_64bit(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval,
IFLA_BR_PAD)) IFLA_BR_PAD))
return -EMSGSIZE; return -EMSGSIZE;
clockval = br_timer_value(&br->gc_timer); clockval = br_timer_value(&br->gc_work.timer);
if (nla_put_u64_64bit(skb, IFLA_BR_GC_TIMER, clockval, IFLA_BR_PAD)) if (nla_put_u64_64bit(skb, IFLA_BR_GC_TIMER, clockval, IFLA_BR_PAD))
return -EMSGSIZE; return -EMSGSIZE;
......
...@@ -160,19 +160,21 @@ struct net_bridge_vlan_group { ...@@ -160,19 +160,21 @@ struct net_bridge_vlan_group {
u16 pvid; u16 pvid;
}; };
struct net_bridge_fdb_entry struct net_bridge_fdb_entry {
{
struct hlist_node hlist; struct hlist_node hlist;
struct net_bridge_port *dst; struct net_bridge_port *dst;
unsigned long updated;
unsigned long used;
mac_addr addr; mac_addr addr;
__u16 vlan_id; __u16 vlan_id;
unsigned char is_local:1, unsigned char is_local:1,
is_static:1, is_static:1,
added_by_user:1, added_by_user:1,
added_by_external_learn:1; added_by_external_learn:1;
/* write-heavy members should not affect lookups */
unsigned long updated ____cacheline_aligned_in_smp;
unsigned long used;
struct rcu_head rcu; struct rcu_head rcu;
}; };
...@@ -212,12 +214,16 @@ struct net_bridge_mdb_htable ...@@ -212,12 +214,16 @@ struct net_bridge_mdb_htable
u32 ver; u32 ver;
}; };
struct net_bridge_port struct net_bridge_port {
{
struct net_bridge *br; struct net_bridge *br;
struct net_device *dev; struct net_device *dev;
struct list_head list; struct list_head list;
unsigned long flags;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
struct net_bridge_vlan_group __rcu *vlgrp;
#endif
/* STP */ /* STP */
u8 priority; u8 priority;
u8 state; u8 state;
...@@ -238,8 +244,6 @@ struct net_bridge_port ...@@ -238,8 +244,6 @@ struct net_bridge_port
struct kobject kobj; struct kobject kobj;
struct rcu_head rcu; struct rcu_head rcu;
unsigned long flags;
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
struct bridge_mcast_own_query ip4_own_query; struct bridge_mcast_own_query ip4_own_query;
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
...@@ -259,9 +263,6 @@ struct net_bridge_port ...@@ -259,9 +263,6 @@ struct net_bridge_port
#ifdef CONFIG_NET_POLL_CONTROLLER #ifdef CONFIG_NET_POLL_CONTROLLER
struct netpoll *np; struct netpoll *np;
#endif #endif
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
struct net_bridge_vlan_group __rcu *vlgrp;
#endif
#ifdef CONFIG_NET_SWITCHDEV #ifdef CONFIG_NET_SWITCHDEV
int offload_fwd_mark; int offload_fwd_mark;
#endif #endif
...@@ -283,14 +284,21 @@ static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device * ...@@ -283,14 +284,21 @@ static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device *
rtnl_dereference(dev->rx_handler_data) : NULL; rtnl_dereference(dev->rx_handler_data) : NULL;
} }
struct net_bridge struct net_bridge {
{
spinlock_t lock; spinlock_t lock;
spinlock_t hash_lock;
struct list_head port_list; struct list_head port_list;
struct net_device *dev; struct net_device *dev;
struct pcpu_sw_netstats __percpu *stats; struct pcpu_sw_netstats __percpu *stats;
spinlock_t hash_lock; /* These fields are accessed on each packet */
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
u8 vlan_enabled;
u8 vlan_stats_enabled;
__be16 vlan_proto;
u16 default_pvid;
struct net_bridge_vlan_group __rcu *vlgrp;
#endif
struct hlist_head hash[BR_HASH_SIZE]; struct hlist_head hash[BR_HASH_SIZE];
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
union { union {
...@@ -308,6 +316,9 @@ struct net_bridge ...@@ -308,6 +316,9 @@ struct net_bridge
bridge_id designated_root; bridge_id designated_root;
bridge_id bridge_id; bridge_id bridge_id;
u32 root_path_cost; u32 root_path_cost;
unsigned char topology_change;
unsigned char topology_change_detected;
u16 root_port;
unsigned long max_age; unsigned long max_age;
unsigned long hello_time; unsigned long hello_time;
unsigned long forward_delay; unsigned long forward_delay;
...@@ -319,7 +330,6 @@ struct net_bridge ...@@ -319,7 +330,6 @@ struct net_bridge
u8 group_addr[ETH_ALEN]; u8 group_addr[ETH_ALEN];
bool group_addr_set; bool group_addr_set;
u16 root_port;
enum { enum {
BR_NO_STP, /* no spanning tree */ BR_NO_STP, /* no spanning tree */
...@@ -327,9 +337,6 @@ struct net_bridge ...@@ -327,9 +337,6 @@ struct net_bridge
BR_USER_STP, /* new RSTP in userspace */ BR_USER_STP, /* new RSTP in userspace */
} stp_enabled; } stp_enabled;
unsigned char topology_change;
unsigned char topology_change_detected;
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
unsigned char multicast_router; unsigned char multicast_router;
...@@ -374,21 +381,13 @@ struct net_bridge ...@@ -374,21 +381,13 @@ struct net_bridge
struct timer_list hello_timer; struct timer_list hello_timer;
struct timer_list tcn_timer; struct timer_list tcn_timer;
struct timer_list topology_change_timer; struct timer_list topology_change_timer;
struct timer_list gc_timer; struct delayed_work gc_work;
struct kobject *ifobj; struct kobject *ifobj;
u32 auto_cnt; u32 auto_cnt;
#ifdef CONFIG_NET_SWITCHDEV #ifdef CONFIG_NET_SWITCHDEV
int offload_fwd_mark; int offload_fwd_mark;
#endif #endif
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
struct net_bridge_vlan_group __rcu *vlgrp;
u8 vlan_enabled;
u8 vlan_stats_enabled;
__be16 vlan_proto;
u16 default_pvid;
#endif
}; };
struct br_input_skb_cb { struct br_input_skb_cb {
...@@ -505,7 +504,7 @@ void br_fdb_find_delete_local(struct net_bridge *br, ...@@ -505,7 +504,7 @@ void br_fdb_find_delete_local(struct net_bridge *br,
const unsigned char *addr, u16 vid); const unsigned char *addr, u16 vid);
void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr); void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr);
void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr); void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr);
void br_fdb_cleanup(unsigned long arg); void br_fdb_cleanup(struct work_struct *work);
void br_fdb_delete_by_port(struct net_bridge *br, void br_fdb_delete_by_port(struct net_bridge *br,
const struct net_bridge_port *p, u16 vid, int do_all); const struct net_bridge_port *p, u16 vid, int do_all);
struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
......
...@@ -602,7 +602,7 @@ int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time) ...@@ -602,7 +602,7 @@ int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time)
br->ageing_time = t; br->ageing_time = t;
spin_unlock_bh(&br->lock); spin_unlock_bh(&br->lock);
mod_timer(&br->gc_timer, jiffies); mod_delayed_work(system_long_wq, &br->gc_work, 0);
return 0; return 0;
} }
......
...@@ -57,7 +57,7 @@ void br_stp_enable_bridge(struct net_bridge *br) ...@@ -57,7 +57,7 @@ void br_stp_enable_bridge(struct net_bridge *br)
spin_lock_bh(&br->lock); spin_lock_bh(&br->lock);
if (br->stp_enabled == BR_KERNEL_STP) if (br->stp_enabled == BR_KERNEL_STP)
mod_timer(&br->hello_timer, jiffies + br->hello_time); mod_timer(&br->hello_timer, jiffies + br->hello_time);
mod_timer(&br->gc_timer, jiffies + HZ/10); mod_delayed_work(system_long_wq, &br->gc_work, HZ / 10);
br_config_bpdu_generation(br); br_config_bpdu_generation(br);
...@@ -88,7 +88,7 @@ void br_stp_disable_bridge(struct net_bridge *br) ...@@ -88,7 +88,7 @@ void br_stp_disable_bridge(struct net_bridge *br)
del_timer_sync(&br->hello_timer); del_timer_sync(&br->hello_timer);
del_timer_sync(&br->topology_change_timer); del_timer_sync(&br->topology_change_timer);
del_timer_sync(&br->tcn_timer); del_timer_sync(&br->tcn_timer);
del_timer_sync(&br->gc_timer); cancel_delayed_work_sync(&br->gc_work);
} }
/* called under bridge lock */ /* called under bridge lock */
......
...@@ -153,8 +153,6 @@ void br_stp_timer_init(struct net_bridge *br) ...@@ -153,8 +153,6 @@ void br_stp_timer_init(struct net_bridge *br)
setup_timer(&br->topology_change_timer, setup_timer(&br->topology_change_timer,
br_topology_change_timer_expired, br_topology_change_timer_expired,
(unsigned long) br); (unsigned long) br);
setup_timer(&br->gc_timer, br_fdb_cleanup, (unsigned long) br);
} }
void br_stp_port_timer_init(struct net_bridge_port *p) void br_stp_port_timer_init(struct net_bridge_port *p)
......
...@@ -263,7 +263,7 @@ static ssize_t gc_timer_show(struct device *d, struct device_attribute *attr, ...@@ -263,7 +263,7 @@ static ssize_t gc_timer_show(struct device *d, struct device_attribute *attr,
char *buf) char *buf)
{ {
struct net_bridge *br = to_bridge(d); struct net_bridge *br = to_bridge(d);
return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer)); return sprintf(buf, "%ld\n", br_timer_value(&br->gc_work.timer));
} }
static DEVICE_ATTR_RO(gc_timer); static DEVICE_ATTR_RO(gc_timer);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment