Commit 278b2083 authored by nikolay@redhat.com's avatar nikolay@redhat.com Committed by David S. Miller

bonding: initial RCU conversion

This patch does the initial bonding conversion to RCU. After it the
following modes are protected by RCU alone: roundrobin, active-backup,
broadcast and xor. Modes ALB/TLB and 3ad still acquire bond->lock for
reading, and will be dealt with later. curr_active_slave needs to be
dereferenced via rcu in the converted modes because the only thing
protecting the slave after this patch is rcu_read_lock, so we need the
proper barrier for weakly ordered archs and to make sure we don't have
stale pointer. It's not tagged with __rcu yet because there's still work
to be done to remove the curr_slave_lock, so sparse will complain when
rcu_assign_pointer and rcu_dereference are used, but the alternative to use
rcu_dereference_protected would've created much bigger code churn which is
more difficult to test and review. That will be converted in time.

1. Active-backup mode
 1.1 Perf recording while doing iperf -P 4
  - old bonding: iperf spent 0.55% in bonding, system spent 0.29% CPU
                 in bonding
  - new bonding: iperf spent 0.29% in bonding, system spent 0.15% CPU
                 in bonding
 1.2. Bandwidth measurements
  - old bonding: 16.1 gbps consistently
  - new bonding: 17.5 gbps consistently

2. Round-robin mode
 2.1 Perf recording while doing iperf -P 4
  - old bonding: iperf spent 0.51% in bonding, system spent 0.24% CPU
                 in bonding
  - new bonding: iperf spent 0.16% in bonding, system spent 0.11% CPU
                 in bonding
 2.2 Bandwidth measurements
  - old bonding: 8 gbps (variable due to packet reorderings)
  - new bonding: 10 gbps (variable due to packet reorderings)

Of course the latency has improved in all converted modes, and moreover
while
doing enslave/release (since it doesn't affect tx anymore).

Also I've stress tested all modes doing enslave/release in a loop while
transmitting traffic.
Signed-off-by: default avatarNikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 15077228
...@@ -2426,6 +2426,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) ...@@ -2426,6 +2426,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
struct ad_info ad_info; struct ad_info ad_info;
int res = 1; int res = 1;
read_lock(&bond->lock);
if (__bond_3ad_get_active_agg_info(bond, &ad_info)) { if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
pr_debug("%s: Error: __bond_3ad_get_active_agg_info failed\n", pr_debug("%s: Error: __bond_3ad_get_active_agg_info failed\n",
dev->name); dev->name);
...@@ -2475,6 +2476,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) ...@@ -2475,6 +2476,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
} }
out: out:
read_unlock(&bond->lock);
if (res) { if (res) {
/* no suitable interface, frame not sent */ /* no suitable interface, frame not sent */
kfree_skb(skb); kfree_skb(skb);
......
...@@ -1337,6 +1337,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) ...@@ -1337,6 +1337,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
/* make sure that the curr_active_slave do not change during tx /* make sure that the curr_active_slave do not change during tx
*/ */
read_lock(&bond->lock);
read_lock(&bond->curr_slave_lock); read_lock(&bond->curr_slave_lock);
switch (ntohs(skb->protocol)) { switch (ntohs(skb->protocol)) {
...@@ -1441,11 +1442,12 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) ...@@ -1441,11 +1442,12 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
} }
read_unlock(&bond->curr_slave_lock); read_unlock(&bond->curr_slave_lock);
read_unlock(&bond->lock);
if (res) { if (res) {
/* no suitable interface, frame not sent */ /* no suitable interface, frame not sent */
kfree_skb(skb); kfree_skb(skb);
} }
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
...@@ -1663,7 +1665,7 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave ...@@ -1663,7 +1665,7 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave
} }
swap_slave = bond->curr_active_slave; swap_slave = bond->curr_active_slave;
bond->curr_active_slave = new_slave; rcu_assign_pointer(bond->curr_active_slave, new_slave);
if (!new_slave || list_empty(&bond->slave_list)) if (!new_slave || list_empty(&bond->slave_list))
return; return;
......
...@@ -77,6 +77,7 @@ ...@@ -77,6 +77,7 @@
#include <net/net_namespace.h> #include <net/net_namespace.h>
#include <net/netns/generic.h> #include <net/netns/generic.h>
#include <net/pkt_sched.h> #include <net/pkt_sched.h>
#include <linux/rculist.h>
#include "bonding.h" #include "bonding.h"
#include "bond_3ad.h" #include "bond_3ad.h"
#include "bond_alb.h" #include "bond_alb.h"
...@@ -1037,7 +1038,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) ...@@ -1037,7 +1038,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
if (new_active) if (new_active)
bond_set_slave_active_flags(new_active); bond_set_slave_active_flags(new_active);
} else { } else {
bond->curr_active_slave = new_active; rcu_assign_pointer(bond->curr_active_slave, new_active);
} }
if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
...@@ -1127,7 +1128,7 @@ void bond_select_active_slave(struct bonding *bond) ...@@ -1127,7 +1128,7 @@ void bond_select_active_slave(struct bonding *bond)
*/ */
static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) static void bond_attach_slave(struct bonding *bond, struct slave *new_slave)
{ {
list_add_tail(&new_slave->list, &bond->slave_list); list_add_tail_rcu(&new_slave->list, &bond->slave_list);
bond->slave_cnt++; bond->slave_cnt++;
} }
...@@ -1143,7 +1144,7 @@ static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) ...@@ -1143,7 +1144,7 @@ static void bond_attach_slave(struct bonding *bond, struct slave *new_slave)
*/ */
static void bond_detach_slave(struct bonding *bond, struct slave *slave) static void bond_detach_slave(struct bonding *bond, struct slave *slave)
{ {
list_del(&slave->list); list_del_rcu(&slave->list);
bond->slave_cnt--; bond->slave_cnt--;
} }
...@@ -1751,7 +1752,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) ...@@ -1751,7 +1752,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
* so we can change it without calling change_active_interface() * so we can change it without calling change_active_interface()
*/ */
if (!bond->curr_active_slave && new_slave->link == BOND_LINK_UP) if (!bond->curr_active_slave && new_slave->link == BOND_LINK_UP)
bond->curr_active_slave = new_slave; rcu_assign_pointer(bond->curr_active_slave, new_slave);
break; break;
} /* switch(bond_mode) */ } /* switch(bond_mode) */
...@@ -1951,7 +1952,7 @@ static int __bond_release_one(struct net_device *bond_dev, ...@@ -1951,7 +1952,7 @@ static int __bond_release_one(struct net_device *bond_dev,
} }
if (all) { if (all) {
bond->curr_active_slave = NULL; rcu_assign_pointer(bond->curr_active_slave, NULL);
} else if (oldcurrent == slave) { } else if (oldcurrent == slave) {
/* /*
* Note that we hold RTNL over this sequence, so there * Note that we hold RTNL over this sequence, so there
...@@ -1983,6 +1984,7 @@ static int __bond_release_one(struct net_device *bond_dev, ...@@ -1983,6 +1984,7 @@ static int __bond_release_one(struct net_device *bond_dev,
write_unlock_bh(&bond->lock); write_unlock_bh(&bond->lock);
unblock_netpoll_tx(); unblock_netpoll_tx();
synchronize_rcu();
if (list_empty(&bond->slave_list)) { if (list_empty(&bond->slave_list)) {
call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev); call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev);
...@@ -3811,7 +3813,7 @@ void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) ...@@ -3811,7 +3813,7 @@ void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id)
int i = slave_id; int i = slave_id;
/* Here we start from the slave with slave_id */ /* Here we start from the slave with slave_id */
bond_for_each_slave(bond, slave) { bond_for_each_slave_rcu(bond, slave) {
if (--i < 0) { if (--i < 0) {
if (slave_can_tx(slave)) { if (slave_can_tx(slave)) {
bond_dev_queue_xmit(bond, skb, slave->dev); bond_dev_queue_xmit(bond, skb, slave->dev);
...@@ -3822,7 +3824,7 @@ void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) ...@@ -3822,7 +3824,7 @@ void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id)
/* Here we start from the first slave up to slave_id */ /* Here we start from the first slave up to slave_id */
i = slave_id; i = slave_id;
bond_for_each_slave(bond, slave) { bond_for_each_slave_rcu(bond, slave) {
if (--i < 0) if (--i < 0)
break; break;
if (slave_can_tx(slave)) { if (slave_can_tx(slave)) {
...@@ -3848,7 +3850,7 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev ...@@ -3848,7 +3850,7 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev
* will send all of this type of traffic. * will send all of this type of traffic.
*/ */
if (iph->protocol == IPPROTO_IGMP && skb->protocol == htons(ETH_P_IP)) { if (iph->protocol == IPPROTO_IGMP && skb->protocol == htons(ETH_P_IP)) {
slave = bond->curr_active_slave; slave = rcu_dereference(bond->curr_active_slave);
if (slave && slave_can_tx(slave)) if (slave && slave_can_tx(slave))
bond_dev_queue_xmit(bond, skb, slave->dev); bond_dev_queue_xmit(bond, skb, slave->dev);
else else
...@@ -3870,7 +3872,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d ...@@ -3870,7 +3872,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
struct bonding *bond = netdev_priv(bond_dev); struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave; struct slave *slave;
slave = bond->curr_active_slave; slave = rcu_dereference(bond->curr_active_slave);
if (slave) if (slave)
bond_dev_queue_xmit(bond, skb, slave->dev); bond_dev_queue_xmit(bond, skb, slave->dev);
else else
...@@ -3900,7 +3902,7 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) ...@@ -3900,7 +3902,7 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
struct bonding *bond = netdev_priv(bond_dev); struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave = NULL; struct slave *slave = NULL;
bond_for_each_slave(bond, slave) { bond_for_each_slave_rcu(bond, slave) {
if (bond_is_last_slave(bond, slave)) if (bond_is_last_slave(bond, slave))
break; break;
if (IS_UP(slave->dev) && slave->link == BOND_LINK_UP) { if (IS_UP(slave->dev) && slave->link == BOND_LINK_UP) {
...@@ -3955,7 +3957,7 @@ static inline int bond_slave_override(struct bonding *bond, ...@@ -3955,7 +3957,7 @@ static inline int bond_slave_override(struct bonding *bond,
return 1; return 1;
/* Find out if any slaves have the same mapping as this skb. */ /* Find out if any slaves have the same mapping as this skb. */
bond_for_each_slave(bond, check_slave) { bond_for_each_slave_rcu(bond, check_slave) {
if (check_slave->queue_id == skb->queue_mapping) { if (check_slave->queue_id == skb->queue_mapping) {
slave = check_slave; slave = check_slave;
break; break;
...@@ -4040,14 +4042,12 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -4040,14 +4042,12 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (is_netpoll_tx_blocked(dev)) if (is_netpoll_tx_blocked(dev))
return NETDEV_TX_BUSY; return NETDEV_TX_BUSY;
read_lock(&bond->lock); rcu_read_lock();
if (!list_empty(&bond->slave_list)) if (!list_empty(&bond->slave_list))
ret = __bond_start_xmit(skb, dev); ret = __bond_start_xmit(skb, dev);
else else
kfree_skb(skb); kfree_skb(skb);
rcu_read_unlock();
read_unlock(&bond->lock);
return ret; return ret;
} }
......
...@@ -1243,16 +1243,16 @@ static ssize_t bonding_show_active_slave(struct device *d, ...@@ -1243,16 +1243,16 @@ static ssize_t bonding_show_active_slave(struct device *d,
struct device_attribute *attr, struct device_attribute *attr,
char *buf) char *buf)
{ {
struct slave *curr;
struct bonding *bond = to_bond(d); struct bonding *bond = to_bond(d);
struct slave *curr;
int count = 0; int count = 0;
read_lock(&bond->curr_slave_lock); rcu_read_lock();
curr = bond->curr_active_slave; curr = rcu_dereference(bond->curr_active_slave);
read_unlock(&bond->curr_slave_lock);
if (USES_PRIMARY(bond->params.mode) && curr) if (USES_PRIMARY(bond->params.mode) && curr)
count = sprintf(buf, "%s\n", curr->dev->name); count = sprintf(buf, "%s\n", curr->dev->name);
rcu_read_unlock();
return count; return count;
} }
...@@ -1284,7 +1284,7 @@ static ssize_t bonding_store_active_slave(struct device *d, ...@@ -1284,7 +1284,7 @@ static ssize_t bonding_store_active_slave(struct device *d,
if (!strlen(ifname) || buf[0] == '\n') { if (!strlen(ifname) || buf[0] == '\n') {
pr_info("%s: Clearing current active slave.\n", pr_info("%s: Clearing current active slave.\n",
bond->dev->name); bond->dev->name);
bond->curr_active_slave = NULL; rcu_assign_pointer(bond->curr_active_slave, NULL);
bond_select_active_slave(bond); bond_select_active_slave(bond);
goto out; goto out;
} }
...@@ -1347,14 +1347,9 @@ static ssize_t bonding_show_mii_status(struct device *d, ...@@ -1347,14 +1347,9 @@ static ssize_t bonding_show_mii_status(struct device *d,
struct device_attribute *attr, struct device_attribute *attr,
char *buf) char *buf)
{ {
struct slave *curr;
struct bonding *bond = to_bond(d); struct bonding *bond = to_bond(d);
read_lock(&bond->curr_slave_lock); return sprintf(buf, "%s\n", bond->curr_active_slave ? "up" : "down");
curr = bond->curr_active_slave;
read_unlock(&bond->curr_slave_lock);
return sprintf(buf, "%s\n", curr ? "up" : "down");
} }
static DEVICE_ATTR(mii_status, S_IRUGO, bonding_show_mii_status, NULL); static DEVICE_ATTR(mii_status, S_IRUGO, bonding_show_mii_status, NULL);
......
...@@ -116,6 +116,10 @@ ...@@ -116,6 +116,10 @@
#define bond_for_each_slave(bond, pos) \ #define bond_for_each_slave(bond, pos) \
list_for_each_entry(pos, &(bond)->slave_list, list) list_for_each_entry(pos, &(bond)->slave_list, list)
/* Caller must have rcu_read_lock */
#define bond_for_each_slave_rcu(bond, pos) \
list_for_each_entry_rcu(pos, &(bond)->slave_list, list)
/** /**
* bond_for_each_slave_reverse - iterate in reverse from a given position * bond_for_each_slave_reverse - iterate in reverse from a given position
* @bond: the bond holding this list * @bond: the bond holding this list
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment