Commit d7ef9b04 authored by David S. Miller's avatar David S. Miller

Merge branch 'bond_vlan'

Veaceslav Falico says:

====================
bonding: remove vlan special handling

v1: Per Jiri's advice, remove the exported netdev_upper struct to keep it
    inside dev.c only, and instead implement a macro to iterate over the
    list and return only net_device *.
v2: Jiri noted that we need to see every upper device, but not only the
    first level. Modify the netdev_upper logic to include a list of lower
    devices and for both upper/lower lists every device would see both its
    first-level devices and every other devices that is lower/upper of it.
    Also, convert some annoying spamming warnings to pr_debug in
    bond_arp_send_all.
v3: move renaming part completely to patch 1 (did I forget to git add
    before commiting?) and address Jiri's input about comments/style of
    patch 2.
v4: as Vlad found spotted, bond_arp_send_all() won't work in a config where
    we have a device with ip on top of our upper vlan. It fails to send
    packets because we don't tag the packet, while the device on top of
    vlan will emit tagged packets through this vlan. Fix this by first
    searching for all upper vlans, and for each vlan - for the devs on top
    of it. If we find the dev - then tag the packet with the underling's
    vlan_id, otherwise just search the old way - for all devices on top of
    bonding. Also, move the version changes under "---" so they won't get
    into the commit message, if/when applied.

The aim of this patchset is to remove bondings' own vlan handling as much
as possible and replace it with the netdev upper device functionality.

The upper device functionality is extended to include also lower devices
and to have, for each device, a full view of every lower and upper device,
but not only the first-level ones. This might permit in the future to
avoid, for any grouping/teaming/upper/lower devices, to maintain its own
lists of slaves/vlans/etc.

This is achieved by adding a helper function to upper dev list handling -
netdev_upper_get_next_dev(dev, iter), which returns the next device after
the list_head **iter, and sets *iter to the next list_head *. This patchset
also adds netdev_for_each_upper_dev(dev, upper, iter), which iterates
through the whole dev->upper_dev_list, setting upper to the net_device.
The only special treatment of vlans remains in rlb code.

This patchset solves several issues with bonding, simplifies it overall,
RCUify further and exports upper list functions for any other users which
might also want to get rid of its own vlan_lists or slaves.

I'm testing it continuously currently, no issues found, will update on
anything.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 6d508cce 3e32582f
...@@ -971,26 +971,22 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) ...@@ -971,26 +971,22 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
/*********************** tlb/rlb shared functions *********************/ /*********************** tlb/rlb shared functions *********************/
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[],
u16 vid)
{ {
struct bonding *bond = bond_get_bond_by_slave(slave);
struct learning_pkt pkt; struct learning_pkt pkt;
struct sk_buff *skb;
int size = sizeof(struct learning_pkt); int size = sizeof(struct learning_pkt);
int i; char *data;
memset(&pkt, 0, size); memset(&pkt, 0, size);
memcpy(pkt.mac_dst, mac_addr, ETH_ALEN); memcpy(pkt.mac_dst, mac_addr, ETH_ALEN);
memcpy(pkt.mac_src, mac_addr, ETH_ALEN); memcpy(pkt.mac_src, mac_addr, ETH_ALEN);
pkt.type = cpu_to_be16(ETH_P_LOOP); pkt.type = cpu_to_be16(ETH_P_LOOP);
for (i = 0; i < MAX_LP_BURST; i++) {
struct sk_buff *skb;
char *data;
skb = dev_alloc_skb(size); skb = dev_alloc_skb(size);
if (!skb) { if (!skb)
return; return;
}
data = skb_put(skb, size); data = skb_put(skb, size);
memcpy(data, &pkt, size); memcpy(data, &pkt, size);
...@@ -1001,28 +997,36 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) ...@@ -1001,28 +997,36 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
skb->priority = TC_PRIO_CONTROL; skb->priority = TC_PRIO_CONTROL;
skb->dev = slave->dev; skb->dev = slave->dev;
if (bond_vlan_used(bond)) { if (vid) {
struct vlan_entry *vlan; skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vid);
vlan = bond_next_vlan(bond,
bond->alb_info.current_alb_vlan);
bond->alb_info.current_alb_vlan = vlan;
if (!vlan) {
kfree_skb(skb);
continue;
}
skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vlan->vlan_id);
if (!skb) { if (!skb) {
pr_err("%s: Error: failed to insert VLAN tag\n", pr_err("%s: Error: failed to insert VLAN tag\n",
bond->dev->name); slave->bond->dev->name);
continue; return;
} }
} }
dev_queue_xmit(skb); dev_queue_xmit(skb);
}
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
{
struct bonding *bond = bond_get_bond_by_slave(slave);
struct net_device *upper;
struct list_head *iter;
/* send untagged */
alb_send_lp_vid(slave, mac_addr, 0);
/* loop through vlans and send one packet for each */
rcu_read_lock();
netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
if (upper->priv_flags & IFF_802_1Q_VLAN)
alb_send_lp_vid(slave, mac_addr,
vlan_dev_vlan_id(upper));
} }
rcu_read_unlock();
} }
static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[]) static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[])
...@@ -1759,11 +1763,6 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) ...@@ -1759,11 +1763,6 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)
void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id) void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
{ {
if (bond->alb_info.current_alb_vlan &&
(bond->alb_info.current_alb_vlan->vlan_id == vlan_id)) {
bond->alb_info.current_alb_vlan = NULL;
}
if (bond->alb_info.rlb_enabled) { if (bond->alb_info.rlb_enabled) {
rlb_clear_vlan(bond, vlan_id); rlb_clear_vlan(bond, vlan_id);
} }
......
...@@ -53,7 +53,6 @@ struct slave; ...@@ -53,7 +53,6 @@ struct slave;
#define TLB_NULL_INDEX 0xffffffff #define TLB_NULL_INDEX 0xffffffff
#define MAX_LP_BURST 3
/* rlb defs */ /* rlb defs */
#define RLB_HASH_TABLE_SIZE 256 #define RLB_HASH_TABLE_SIZE 256
...@@ -170,7 +169,6 @@ struct alb_bond_info { ...@@ -170,7 +169,6 @@ struct alb_bond_info {
* rx traffic should be * rx traffic should be
* rebalanced * rebalanced
*/ */
struct vlan_entry *current_alb_vlan;
}; };
int bond_alb_initialize(struct bonding *bond, int rlb_enabled); int bond_alb_initialize(struct bonding *bond, int rlb_enabled);
......
...@@ -282,116 +282,6 @@ const char *bond_mode_name(int mode) ...@@ -282,116 +282,6 @@ const char *bond_mode_name(int mode)
/*---------------------------------- VLAN -----------------------------------*/ /*---------------------------------- VLAN -----------------------------------*/
/**
* bond_add_vlan - add a new vlan id on bond
* @bond: bond that got the notification
* @vlan_id: the vlan id to add
*
* Returns -ENOMEM if allocation failed.
*/
static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id)
{
struct vlan_entry *vlan;
pr_debug("bond: %s, vlan id %d\n",
(bond ? bond->dev->name : "None"), vlan_id);
vlan = kzalloc(sizeof(struct vlan_entry), GFP_KERNEL);
if (!vlan)
return -ENOMEM;
INIT_LIST_HEAD(&vlan->vlan_list);
vlan->vlan_id = vlan_id;
write_lock_bh(&bond->lock);
list_add_tail(&vlan->vlan_list, &bond->vlan_list);
write_unlock_bh(&bond->lock);
pr_debug("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name);
return 0;
}
/**
* bond_del_vlan - delete a vlan id from bond
* @bond: bond that got the notification
* @vlan_id: the vlan id to delete
*
* returns -ENODEV if @vlan_id was not found in @bond.
*/
static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id)
{
struct vlan_entry *vlan;
int res = -ENODEV;
pr_debug("bond: %s, vlan id %d\n", bond->dev->name, vlan_id);
block_netpoll_tx();
write_lock_bh(&bond->lock);
list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
if (vlan->vlan_id == vlan_id) {
list_del(&vlan->vlan_list);
if (bond_is_lb(bond))
bond_alb_clear_vlan(bond, vlan_id);
pr_debug("removed VLAN ID %d from bond %s\n",
vlan_id, bond->dev->name);
kfree(vlan);
res = 0;
goto out;
}
}
pr_debug("couldn't find VLAN ID %d in bond %s\n",
vlan_id, bond->dev->name);
out:
write_unlock_bh(&bond->lock);
unblock_netpoll_tx();
return res;
}
/**
* bond_next_vlan - safely skip to the next item in the vlans list.
* @bond: the bond we're working on
* @curr: item we're advancing from
*
* Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL,
* or @curr->next otherwise (even if it is @curr itself again).
*
* Caller must hold bond->lock
*/
struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr)
{
struct vlan_entry *next, *last;
if (list_empty(&bond->vlan_list))
return NULL;
if (!curr) {
next = list_entry(bond->vlan_list.next,
struct vlan_entry, vlan_list);
} else {
last = list_entry(bond->vlan_list.prev,
struct vlan_entry, vlan_list);
if (last == curr) {
next = list_entry(bond->vlan_list.next,
struct vlan_entry, vlan_list);
} else {
next = list_entry(curr->vlan_list.next,
struct vlan_entry, vlan_list);
}
}
return next;
}
/** /**
* bond_dev_queue_xmit - Prepare skb for xmit. * bond_dev_queue_xmit - Prepare skb for xmit.
* *
...@@ -451,13 +341,6 @@ static int bond_vlan_rx_add_vid(struct net_device *bond_dev, ...@@ -451,13 +341,6 @@ static int bond_vlan_rx_add_vid(struct net_device *bond_dev,
goto unwind; goto unwind;
} }
res = bond_add_vlan(bond, vid);
if (res) {
pr_err("%s: Error: Failed to add vlan id %d\n",
bond_dev->name, vid);
goto unwind;
}
return 0; return 0;
unwind: unwind:
...@@ -478,17 +361,12 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, ...@@ -478,17 +361,12 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev,
{ {
struct bonding *bond = netdev_priv(bond_dev); struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave; struct slave *slave;
int res;
bond_for_each_slave(bond, slave) bond_for_each_slave(bond, slave)
vlan_vid_del(slave->dev, proto, vid); vlan_vid_del(slave->dev, proto, vid);
res = bond_del_vlan(bond, vid); if (bond_is_lb(bond))
if (res) { bond_alb_clear_vlan(bond, vid);
pr_err("%s: Error: Failed to remove vlan id %d\n",
bond_dev->name, vid);
return res;
}
return 0; return 0;
} }
...@@ -1954,7 +1832,7 @@ static int __bond_release_one(struct net_device *bond_dev, ...@@ -1954,7 +1832,7 @@ static int __bond_release_one(struct net_device *bond_dev,
bond_set_carrier(bond); bond_set_carrier(bond);
eth_hw_addr_random(bond_dev); eth_hw_addr_random(bond_dev);
if (bond_vlan_used(bond)) { if (vlan_uses_dev(bond_dev)) {
pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n", pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n",
bond_dev->name, bond_dev->name); bond_dev->name, bond_dev->name);
pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n", pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n",
...@@ -2392,24 +2270,25 @@ void bond_mii_monitor(struct work_struct *work) ...@@ -2392,24 +2270,25 @@ void bond_mii_monitor(struct work_struct *work)
} }
} }
static int bond_has_this_ip(struct bonding *bond, __be32 ip) static bool bond_has_this_ip(struct bonding *bond, __be32 ip)
{ {
struct vlan_entry *vlan; struct net_device *upper;
struct net_device *vlan_dev; struct list_head *iter;
bool ret = false;
if (ip == bond_confirm_addr(bond->dev, 0, ip)) if (ip == bond_confirm_addr(bond->dev, 0, ip))
return 1; return true;
list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
rcu_read_lock(); rcu_read_lock();
vlan_dev = __vlan_find_dev_deep(bond->dev, htons(ETH_P_8021Q), netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
vlan->vlan_id); if (ip == bond_confirm_addr(upper, 0, ip)) {
rcu_read_unlock(); ret = true;
if (vlan_dev && ip == bond_confirm_addr(vlan_dev, 0, ip)) break;
return 1;
} }
}
rcu_read_unlock();
return 0; return ret;
} }
/* /*
...@@ -2444,81 +2323,79 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ ...@@ -2444,81 +2323,79 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_
static void bond_arp_send_all(struct bonding *bond, struct slave *slave) static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
{ {
int i, vlan_id; struct net_device *upper, *vlan_upper;
__be32 *targets = bond->params.arp_targets; struct list_head *iter, *vlan_iter;
struct vlan_entry *vlan;
struct net_device *vlan_dev = NULL;
struct rtable *rt; struct rtable *rt;
__be32 *targets = bond->params.arp_targets, addr;
int i, vlan_id;
for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) {
__be32 addr;
if (!targets[i])
break;
pr_debug("basa: target %pI4\n", &targets[i]); pr_debug("basa: target %pI4\n", &targets[i]);
if (!bond_vlan_used(bond)) {
pr_debug("basa: empty vlan: arp_send\n");
addr = bond_confirm_addr(bond->dev, targets[i], 0);
bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
addr, 0);
continue;
}
/* /* Find out through which dev should the packet go */
* If VLANs are configured, we do a route lookup to
* determine which VLAN interface would be used, so we
* can tag the ARP with the proper VLAN tag.
*/
rt = ip_route_output(dev_net(bond->dev), targets[i], 0, rt = ip_route_output(dev_net(bond->dev), targets[i], 0,
RTO_ONLINK, 0); RTO_ONLINK, 0);
if (IS_ERR(rt)) { if (IS_ERR(rt)) {
if (net_ratelimit()) { pr_debug("%s: no route to arp_ip_target %pI4\n",
pr_warning("%s: no route to arp_ip_target %pI4\n",
bond->dev->name, &targets[i]); bond->dev->name, &targets[i]);
}
continue; continue;
} }
/* vlan_id = 0;
* This target is not on a VLAN
/* bond device itself */
if (rt->dst.dev == bond->dev)
goto found;
rcu_read_lock();
/* first we search only for vlan devices. for every vlan
* found we verify its upper dev list, searching for the
* rt->dst.dev. If found we save the tag of the vlan and
* proceed to send the packet.
*
* TODO: QinQ?
*/ */
if (rt->dst.dev == bond->dev) { netdev_for_each_upper_dev_rcu(bond->dev, vlan_upper, vlan_iter) {
ip_rt_put(rt); if (!is_vlan_dev(vlan_upper))
pr_debug("basa: rtdev == bond->dev: arp_send\n");
addr = bond_confirm_addr(bond->dev, targets[i], 0);
bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
addr, 0);
continue; continue;
netdev_for_each_upper_dev_rcu(vlan_upper, upper, iter) {
if (upper == rt->dst.dev) {
vlan_id = vlan_dev_vlan_id(vlan_upper);
rcu_read_unlock();
goto found;
}
}
} }
vlan_id = 0; /* if the device we're looking for is not on top of any of
list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { * our upper vlans, then just search for any dev that
rcu_read_lock(); * matches, and in case it's a vlan - save the id
vlan_dev = __vlan_find_dev_deep(bond->dev, */
htons(ETH_P_8021Q), netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
vlan->vlan_id); if (upper == rt->dst.dev) {
/* if it's a vlan - get its VID */
if (is_vlan_dev(upper))
vlan_id = vlan_dev_vlan_id(upper);
rcu_read_unlock(); rcu_read_unlock();
if (vlan_dev == rt->dst.dev) { goto found;
vlan_id = vlan->vlan_id;
pr_debug("basa: vlan match on %s %d\n",
vlan_dev->name, vlan_id);
break;
} }
} }
rcu_read_unlock();
/* Not our device - skip */
pr_debug("%s: no path to arp_ip_target %pI4 via rt.dev %s\n",
bond->dev->name, &targets[i],
rt->dst.dev ? rt->dst.dev->name : "NULL");
if (vlan_id && vlan_dev) {
ip_rt_put(rt); ip_rt_put(rt);
addr = bond_confirm_addr(vlan_dev, targets[i], 0);
bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
addr, vlan_id);
continue; continue;
}
if (net_ratelimit()) { found:
pr_warning("%s: no path to arp_ip_target %pI4 via rt.dev %s\n", addr = bond_confirm_addr(rt->dst.dev, targets[i], 0);
bond->dev->name, &targets[i],
rt->dst.dev ? rt->dst.dev->name : "NULL");
}
ip_rt_put(rt); ip_rt_put(rt);
bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
addr, vlan_id);
} }
} }
...@@ -4142,7 +4019,6 @@ static void bond_setup(struct net_device *bond_dev) ...@@ -4142,7 +4019,6 @@ static void bond_setup(struct net_device *bond_dev)
/* Initialize pointers */ /* Initialize pointers */
bond->dev = bond_dev; bond->dev = bond_dev;
INIT_LIST_HEAD(&bond->vlan_list);
/* Initialize the device entry points */ /* Initialize the device entry points */
ether_setup(bond_dev); ether_setup(bond_dev);
...@@ -4195,7 +4071,6 @@ static void bond_uninit(struct net_device *bond_dev) ...@@ -4195,7 +4071,6 @@ static void bond_uninit(struct net_device *bond_dev)
{ {
struct bonding *bond = netdev_priv(bond_dev); struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave, *tmp_slave; struct slave *slave, *tmp_slave;
struct vlan_entry *vlan, *tmp;
bond_netpoll_cleanup(bond_dev); bond_netpoll_cleanup(bond_dev);
...@@ -4207,11 +4082,6 @@ static void bond_uninit(struct net_device *bond_dev) ...@@ -4207,11 +4082,6 @@ static void bond_uninit(struct net_device *bond_dev)
list_del(&bond->bond_list); list_del(&bond->bond_list);
bond_debug_unregister(bond); bond_debug_unregister(bond);
list_for_each_entry_safe(vlan, tmp, &bond->vlan_list, vlan_list) {
list_del(&vlan->vlan_list);
kfree(vlan);
}
} }
/*------------------------- Module initialization ---------------------------*/ /*------------------------- Module initialization ---------------------------*/
......
...@@ -185,11 +185,6 @@ struct bond_parm_tbl { ...@@ -185,11 +185,6 @@ struct bond_parm_tbl {
#define BOND_MAX_MODENAME_LEN 20 #define BOND_MAX_MODENAME_LEN 20
struct vlan_entry {
struct list_head vlan_list;
unsigned short vlan_id;
};
struct slave { struct slave {
struct net_device *dev; /* first - useful for panic debug */ struct net_device *dev; /* first - useful for panic debug */
struct list_head list; struct list_head list;
...@@ -254,7 +249,6 @@ struct bonding { ...@@ -254,7 +249,6 @@ struct bonding {
struct ad_bond_info ad_info; struct ad_bond_info ad_info;
struct alb_bond_info alb_info; struct alb_bond_info alb_info;
struct bond_params params; struct bond_params params;
struct list_head vlan_list;
struct workqueue_struct *wq; struct workqueue_struct *wq;
struct delayed_work mii_work; struct delayed_work mii_work;
struct delayed_work arp_work; struct delayed_work arp_work;
...@@ -267,9 +261,22 @@ struct bonding { ...@@ -267,9 +261,22 @@ struct bonding {
#endif /* CONFIG_DEBUG_FS */ #endif /* CONFIG_DEBUG_FS */
}; };
/* if we hold rtnl_lock() - call vlan_uses_dev() */
static inline bool bond_vlan_used(struct bonding *bond) static inline bool bond_vlan_used(struct bonding *bond)
{ {
return !list_empty(&bond->vlan_list); struct net_device *upper;
struct list_head *iter;
rcu_read_lock();
netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
if (upper->priv_flags & IFF_802_1Q_VLAN) {
rcu_read_unlock();
return true;
}
}
rcu_read_unlock();
return false;
} }
#define bond_slave_get_rcu(dev) \ #define bond_slave_get_rcu(dev) \
......
...@@ -1125,6 +1125,7 @@ struct net_device { ...@@ -1125,6 +1125,7 @@ struct net_device {
struct list_head napi_list; struct list_head napi_list;
struct list_head unreg_list; struct list_head unreg_list;
struct list_head upper_dev_list; /* List of upper devices */ struct list_head upper_dev_list; /* List of upper devices */
struct list_head lower_dev_list;
/* currently active device features */ /* currently active device features */
...@@ -2767,6 +2768,16 @@ extern int bpf_jit_enable; ...@@ -2767,6 +2768,16 @@ extern int bpf_jit_enable;
extern bool netdev_has_upper_dev(struct net_device *dev, extern bool netdev_has_upper_dev(struct net_device *dev,
struct net_device *upper_dev); struct net_device *upper_dev);
extern bool netdev_has_any_upper_dev(struct net_device *dev); extern bool netdev_has_any_upper_dev(struct net_device *dev);
extern struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
struct list_head **iter);
/* iterate through upper list, must be called under RCU read lock */
#define netdev_for_each_upper_dev_rcu(dev, upper, iter) \
for (iter = &(dev)->upper_dev_list, \
upper = netdev_upper_get_next_dev_rcu(dev, &(iter)); \
upper; \
upper = netdev_upper_get_next_dev_rcu(dev, &(iter)))
extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev); extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
extern int netdev_upper_dev_link(struct net_device *dev, extern int netdev_upper_dev_link(struct net_device *dev,
......
...@@ -4367,57 +4367,48 @@ static void net_rx_action(struct softirq_action *h) ...@@ -4367,57 +4367,48 @@ static void net_rx_action(struct softirq_action *h)
goto out; goto out;
} }
struct netdev_upper { struct netdev_adjacent {
struct net_device *dev; struct net_device *dev;
/* upper master flag, there can only be one master device per list */
bool master; bool master;
/* indicates that this dev is our first-level lower/upper device */
bool neighbour;
/* counter for the number of times this device was added to us */
u16 ref_nr;
struct list_head list; struct list_head list;
struct rcu_head rcu; struct rcu_head rcu;
struct list_head search_list;
}; };
static void __append_search_uppers(struct list_head *search_list, static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
struct net_device *dev) struct net_device *adj_dev,
bool upper)
{ {
struct netdev_upper *upper; struct netdev_adjacent *adj;
struct list_head *dev_list;
list_for_each_entry(upper, &dev->upper_dev_list, list) { dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list;
/* check if this upper is not already in search list */
if (list_empty(&upper->search_list)) list_for_each_entry(adj, dev_list, list) {
list_add_tail(&upper->search_list, search_list); if (adj->dev == adj_dev)
return adj;
} }
return NULL;
} }
static bool __netdev_search_upper_dev(struct net_device *dev, static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev,
struct net_device *upper_dev) struct net_device *udev)
{ {
LIST_HEAD(search_list); return __netdev_find_adj(dev, udev, true);
struct netdev_upper *upper;
struct netdev_upper *tmp;
bool ret = false;
__append_search_uppers(&search_list, dev);
list_for_each_entry(upper, &search_list, search_list) {
if (upper->dev == upper_dev) {
ret = true;
break;
}
__append_search_uppers(&search_list, upper->dev);
}
list_for_each_entry_safe(upper, tmp, &search_list, search_list)
INIT_LIST_HEAD(&upper->search_list);
return ret;
} }
static struct netdev_upper *__netdev_find_upper(struct net_device *dev, static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev,
struct net_device *upper_dev) struct net_device *ldev)
{ {
struct netdev_upper *upper; return __netdev_find_adj(dev, ldev, false);
list_for_each_entry(upper, &dev->upper_dev_list, list) {
if (upper->dev == upper_dev)
return upper;
}
return NULL;
} }
/** /**
...@@ -4462,7 +4453,7 @@ EXPORT_SYMBOL(netdev_has_any_upper_dev); ...@@ -4462,7 +4453,7 @@ EXPORT_SYMBOL(netdev_has_any_upper_dev);
*/ */
struct net_device *netdev_master_upper_dev_get(struct net_device *dev) struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
{ {
struct netdev_upper *upper; struct netdev_adjacent *upper;
ASSERT_RTNL(); ASSERT_RTNL();
...@@ -4470,13 +4461,38 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev) ...@@ -4470,13 +4461,38 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
return NULL; return NULL;
upper = list_first_entry(&dev->upper_dev_list, upper = list_first_entry(&dev->upper_dev_list,
struct netdev_upper, list); struct netdev_adjacent, list);
if (likely(upper->master)) if (likely(upper->master))
return upper->dev; return upper->dev;
return NULL; return NULL;
} }
EXPORT_SYMBOL(netdev_master_upper_dev_get); EXPORT_SYMBOL(netdev_master_upper_dev_get);
/* netdev_upper_get_next_dev_rcu - Get the next dev from upper list
* @dev: device
* @iter: list_head ** of the current position
*
* Gets the next device from the dev's upper list, starting from iter
* position. The caller must hold RCU read lock.
*/
struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
struct list_head **iter)
{
struct netdev_adjacent *upper;
WARN_ON_ONCE(!rcu_read_lock_held());
upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
if (&upper->list == &dev->upper_dev_list)
return NULL;
*iter = &upper->list;
return upper->dev;
}
EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
/** /**
* netdev_master_upper_dev_get_rcu - Get master upper device * netdev_master_upper_dev_get_rcu - Get master upper device
* @dev: device * @dev: device
...@@ -4486,20 +4502,158 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get); ...@@ -4486,20 +4502,158 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get);
*/ */
struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
{ {
struct netdev_upper *upper; struct netdev_adjacent *upper;
upper = list_first_or_null_rcu(&dev->upper_dev_list, upper = list_first_or_null_rcu(&dev->upper_dev_list,
struct netdev_upper, list); struct netdev_adjacent, list);
if (upper && likely(upper->master)) if (upper && likely(upper->master))
return upper->dev; return upper->dev;
return NULL; return NULL;
} }
EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
static int __netdev_adjacent_dev_insert(struct net_device *dev,
struct net_device *adj_dev,
bool neighbour, bool master,
bool upper)
{
struct netdev_adjacent *adj;
adj = __netdev_find_adj(dev, adj_dev, upper);
if (adj) {
BUG_ON(neighbour);
adj->ref_nr++;
return 0;
}
adj = kmalloc(sizeof(*adj), GFP_KERNEL);
if (!adj)
return -ENOMEM;
adj->dev = adj_dev;
adj->master = master;
adj->neighbour = neighbour;
adj->ref_nr = 1;
dev_hold(adj_dev);
pr_debug("dev_hold for %s, because of %s link added from %s to %s\n",
adj_dev->name, upper ? "upper" : "lower", dev->name,
adj_dev->name);
if (!upper) {
list_add_tail_rcu(&adj->list, &dev->lower_dev_list);
return 0;
}
/* Ensure that master upper link is always the first item in list. */
if (master)
list_add_rcu(&adj->list, &dev->upper_dev_list);
else
list_add_tail_rcu(&adj->list, &dev->upper_dev_list);
return 0;
}
static inline int __netdev_upper_dev_insert(struct net_device *dev,
struct net_device *udev,
bool master, bool neighbour)
{
return __netdev_adjacent_dev_insert(dev, udev, neighbour, master,
true);
}
static inline int __netdev_lower_dev_insert(struct net_device *dev,
struct net_device *ldev,
bool neighbour)
{
return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false,
false);
}
void __netdev_adjacent_dev_remove(struct net_device *dev,
struct net_device *adj_dev, bool upper)
{
struct netdev_adjacent *adj;
if (upper)
adj = __netdev_find_upper(dev, adj_dev);
else
adj = __netdev_find_lower(dev, adj_dev);
if (!adj)
BUG();
if (adj->ref_nr > 1) {
adj->ref_nr--;
return;
}
list_del_rcu(&adj->list);
pr_debug("dev_put for %s, because of %s link removed from %s to %s\n",
adj_dev->name, upper ? "upper" : "lower", dev->name,
adj_dev->name);
dev_put(adj_dev);
kfree_rcu(adj, rcu);
}
static inline void __netdev_upper_dev_remove(struct net_device *dev,
struct net_device *udev)
{
return __netdev_adjacent_dev_remove(dev, udev, true);
}
static inline void __netdev_lower_dev_remove(struct net_device *dev,
struct net_device *ldev)
{
return __netdev_adjacent_dev_remove(dev, ldev, false);
}
int __netdev_adjacent_dev_insert_link(struct net_device *dev,
struct net_device *upper_dev,
bool master, bool neighbour)
{
int ret;
ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour);
if (ret)
return ret;
ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour);
if (ret) {
__netdev_upper_dev_remove(dev, upper_dev);
return ret;
}
return 0;
}
static inline int __netdev_adjacent_dev_link(struct net_device *dev,
struct net_device *udev)
{
return __netdev_adjacent_dev_insert_link(dev, udev, false, false);
}
static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
struct net_device *udev,
bool master)
{
return __netdev_adjacent_dev_insert_link(dev, udev, master, true);
}
void __netdev_adjacent_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
__netdev_upper_dev_remove(dev, upper_dev);
__netdev_lower_dev_remove(upper_dev, dev);
}
static int __netdev_upper_dev_link(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master) struct net_device *upper_dev, bool master)
{ {
struct netdev_upper *upper; struct netdev_adjacent *i, *j, *to_i, *to_j;
int ret = 0;
ASSERT_RTNL(); ASSERT_RTNL();
...@@ -4507,7 +4661,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, ...@@ -4507,7 +4661,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
return -EBUSY; return -EBUSY;
/* To prevent loops, check if dev is not upper device to upper_dev. */ /* To prevent loops, check if dev is not upper device to upper_dev. */
if (__netdev_search_upper_dev(upper_dev, dev)) if (__netdev_find_upper(upper_dev, dev))
return -EBUSY; return -EBUSY;
if (__netdev_find_upper(dev, upper_dev)) if (__netdev_find_upper(dev, upper_dev))
...@@ -4516,22 +4670,76 @@ static int __netdev_upper_dev_link(struct net_device *dev, ...@@ -4516,22 +4670,76 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (master && netdev_master_upper_dev_get(dev)) if (master && netdev_master_upper_dev_get(dev))
return -EBUSY; return -EBUSY;
upper = kmalloc(sizeof(*upper), GFP_KERNEL); ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master);
if (!upper) if (ret)
return -ENOMEM; return ret;
/* Now that we linked these devs, make all the upper_dev's
* upper_dev_list visible to every dev's lower_dev_list and vice
* versa, and don't forget the devices itself. All of these
* links are non-neighbours.
*/
list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
list_for_each_entry(j, &dev->lower_dev_list, list) {
ret = __netdev_adjacent_dev_link(i->dev, j->dev);
if (ret)
goto rollback_mesh;
}
}
upper->dev = upper_dev; /* add dev to every upper_dev's upper device */
upper->master = master; list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
INIT_LIST_HEAD(&upper->search_list); ret = __netdev_adjacent_dev_link(dev, i->dev);
if (ret)
goto rollback_upper_mesh;
}
/* add upper_dev to every dev's lower device */
list_for_each_entry(i, &dev->lower_dev_list, list) {
ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
if (ret)
goto rollback_lower_mesh;
}
/* Ensure that master upper link is always the first item in list. */
if (master)
list_add_rcu(&upper->list, &dev->upper_dev_list);
else
list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
dev_hold(upper_dev);
call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
return 0; return 0;
rollback_lower_mesh:
to_i = i;
list_for_each_entry(i, &dev->lower_dev_list, list) {
if (i == to_i)
break;
__netdev_adjacent_dev_unlink(i->dev, upper_dev);
}
i = NULL;
rollback_upper_mesh:
to_i = i;
list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
if (i == to_i)
break;
__netdev_adjacent_dev_unlink(dev, i->dev);
}
i = j = NULL;
rollback_mesh:
to_i = i;
to_j = j;
list_for_each_entry(i, &dev->lower_dev_list, list) {
list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
if (i == to_i && j == to_j)
break;
__netdev_adjacent_dev_unlink(i->dev, j->dev);
}
if (i == to_i)
break;
}
__netdev_adjacent_dev_unlink(dev, upper_dev);
return ret;
} }
/** /**
...@@ -4580,16 +4788,28 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link); ...@@ -4580,16 +4788,28 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link);
void netdev_upper_dev_unlink(struct net_device *dev, void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev) struct net_device *upper_dev)
{ {
struct netdev_upper *upper; struct netdev_adjacent *i, *j;
ASSERT_RTNL(); ASSERT_RTNL();
upper = __netdev_find_upper(dev, upper_dev); __netdev_adjacent_dev_unlink(dev, upper_dev);
if (!upper)
return; /* Here is the tricky part. We must remove all dev's lower
list_del_rcu(&upper->list); * devices from all upper_dev's upper devices and vice
dev_put(upper_dev); * versa, to maintain the graph relationship.
kfree_rcu(upper, rcu); */
list_for_each_entry(i, &dev->lower_dev_list, list)
list_for_each_entry(j, &upper_dev->upper_dev_list, list)
__netdev_adjacent_dev_unlink(i->dev, j->dev);
/* remove also the devices itself from lower/upper device
* list
*/
list_for_each_entry(i, &dev->lower_dev_list, list)
__netdev_adjacent_dev_unlink(i->dev, upper_dev);
list_for_each_entry(i, &upper_dev->upper_dev_list, list)
__netdev_adjacent_dev_unlink(dev, i->dev);
call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
} }
EXPORT_SYMBOL(netdev_upper_dev_unlink); EXPORT_SYMBOL(netdev_upper_dev_unlink);
...@@ -5850,6 +6070,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, ...@@ -5850,6 +6070,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->unreg_list); INIT_LIST_HEAD(&dev->unreg_list);
INIT_LIST_HEAD(&dev->link_watch_list); INIT_LIST_HEAD(&dev->link_watch_list);
INIT_LIST_HEAD(&dev->upper_dev_list); INIT_LIST_HEAD(&dev->upper_dev_list);
INIT_LIST_HEAD(&dev->lower_dev_list);
dev->priv_flags = IFF_XMIT_DST_RELEASE; dev->priv_flags = IFF_XMIT_DST_RELEASE;
setup(dev); setup(dev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment