Commit 4361eb66 authored by David S. Miller's avatar David S. Miller

Merge branch 'ipv6-cleanups'

Cong Wang says:

====================
ipv6: clean up locking code in anycast and mcast

This patchset cleans up the locking code in anycast.c and mcast.c
and makes the refcount code more readable.
Signed-off-by: default avatarCong Wang <xiyou.wangcong@gmail.com>

v1 -> v2:
* refactor some code and make it in a separated patch
* update comments
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 8801d48c 1691c63e
......@@ -2083,8 +2083,8 @@ void __dev_remove_pack(struct packet_type *pt);
void dev_add_offload(struct packet_offload *po);
void dev_remove_offload(struct packet_offload *po);
struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags,
unsigned short mask);
struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
unsigned short mask);
struct net_device *dev_get_by_name(struct net *net, const char *name);
struct net_device *dev_get_by_name_rcu(struct net *net, const char *name);
struct net_device *__dev_get_by_name(struct net *net, const char *name);
......
......@@ -202,7 +202,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex,
const struct in6_addr *addr);
void ipv6_sock_ac_close(struct sock *sk);
int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr);
int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr);
int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr);
bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
const struct in6_addr *addr);
......
......@@ -897,23 +897,25 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
EXPORT_SYMBOL(dev_getfirstbyhwtype);
/**
* dev_get_by_flags_rcu - find any device with given flags
* __dev_get_by_flags - find any device with given flags
* @net: the applicable net namespace
* @if_flags: IFF_* values
* @mask: bitmask of bits in if_flags to check
*
* Search for any interface with the given flags. Returns NULL if a device
* is not found or a pointer to the device. Must be called inside
* rcu_read_lock(), and result refcount is unchanged.
* rtnl_lock(), and result refcount is unchanged.
*/
struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
unsigned short mask)
struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
unsigned short mask)
{
struct net_device *dev, *ret;
ASSERT_RTNL();
ret = NULL;
for_each_netdev_rcu(net, dev) {
for_each_netdev(net, dev) {
if (((dev->flags ^ if_flags) & mask) == 0) {
ret = dev;
break;
......@@ -921,7 +923,7 @@ struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags
}
return ret;
}
EXPORT_SYMBOL(dev_get_by_flags_rcu);
EXPORT_SYMBOL(__dev_get_by_flags);
/**
* dev_valid_name - check if name is okay for network device
......
......@@ -1725,7 +1725,7 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
if (ipv6_addr_any(&addr))
return;
ipv6_dev_ac_inc(ifp->idev->dev, &addr);
__ipv6_dev_ac_inc(ifp->idev, &addr);
}
/* caller must hold RTNL */
......
......@@ -46,10 +46,6 @@
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
/* Big ac list lock for all the sockets */
static DEFINE_SPINLOCK(ipv6_sk_ac_lock);
/*
* socket join an anycast group
*/
......@@ -78,7 +74,6 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
pac->acl_addr = *addr;
rtnl_lock();
rcu_read_lock();
if (ifindex == 0) {
struct rt6_info *rt;
......@@ -91,11 +86,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
goto error;
} else {
/* router, no matching interface: just pick one */
dev = dev_get_by_flags_rcu(net, IFF_UP,
IFF_UP | IFF_LOOPBACK);
dev = __dev_get_by_flags(net, IFF_UP,
IFF_UP | IFF_LOOPBACK);
}
} else
dev = dev_get_by_index_rcu(net, ifindex);
dev = __dev_get_by_index(net, ifindex);
if (dev == NULL) {
err = -ENODEV;
......@@ -127,17 +122,14 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
goto error;
}
err = ipv6_dev_ac_inc(dev, addr);
err = __ipv6_dev_ac_inc(idev, addr);
if (!err) {
spin_lock_bh(&ipv6_sk_ac_lock);
pac->acl_next = np->ipv6_ac_list;
np->ipv6_ac_list = pac;
spin_unlock_bh(&ipv6_sk_ac_lock);
pac = NULL;
}
error:
rcu_read_unlock();
rtnl_unlock();
if (pac)
sock_kfree_s(sk, pac, sizeof(*pac));
......@@ -154,7 +146,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
struct ipv6_ac_socklist *pac, *prev_pac;
struct net *net = sock_net(sk);
spin_lock_bh(&ipv6_sk_ac_lock);
rtnl_lock();
prev_pac = NULL;
for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
......@@ -163,7 +155,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
prev_pac = pac;
}
if (!pac) {
spin_unlock_bh(&ipv6_sk_ac_lock);
rtnl_unlock();
return -ENOENT;
}
if (prev_pac)
......@@ -171,14 +163,9 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
else
np->ipv6_ac_list = pac->acl_next;
spin_unlock_bh(&ipv6_sk_ac_lock);
rtnl_lock();
rcu_read_lock();
dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
dev = __dev_get_by_index(net, pac->acl_ifindex);
if (dev)
ipv6_dev_ac_dec(dev, &pac->acl_addr);
rcu_read_unlock();
rtnl_unlock();
sock_kfree_s(sk, pac, sizeof(*pac));
......@@ -196,19 +183,16 @@ void ipv6_sock_ac_close(struct sock *sk)
if (!np->ipv6_ac_list)
return;
spin_lock_bh(&ipv6_sk_ac_lock);
rtnl_lock();
pac = np->ipv6_ac_list;
np->ipv6_ac_list = NULL;
spin_unlock_bh(&ipv6_sk_ac_lock);
prev_index = 0;
rtnl_lock();
rcu_read_lock();
while (pac) {
struct ipv6_ac_socklist *next = pac->acl_next;
if (pac->acl_ifindex != prev_index) {
dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
dev = __dev_get_by_index(net, pac->acl_ifindex);
prev_index = pac->acl_ifindex;
}
if (dev)
......@@ -216,10 +200,14 @@ void ipv6_sock_ac_close(struct sock *sk)
sock_kfree_s(sk, pac, sizeof(*pac));
pac = next;
}
rcu_read_unlock();
rtnl_unlock();
}
static void aca_get(struct ifacaddr6 *aca)
{
atomic_inc(&aca->aca_refcnt);
}
static void aca_put(struct ifacaddr6 *ac)
{
if (atomic_dec_and_test(&ac->aca_refcnt)) {
......@@ -229,23 +217,40 @@ static void aca_put(struct ifacaddr6 *ac)
}
}
static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
const struct in6_addr *addr)
{
struct inet6_dev *idev = rt->rt6i_idev;
struct ifacaddr6 *aca;
aca = kzalloc(sizeof(*aca), GFP_ATOMIC);
if (aca == NULL)
return NULL;
aca->aca_addr = *addr;
in6_dev_hold(idev);
aca->aca_idev = idev;
aca->aca_rt = rt;
aca->aca_users = 1;
/* aca_tstamp should be updated upon changes */
aca->aca_cstamp = aca->aca_tstamp = jiffies;
atomic_set(&aca->aca_refcnt, 1);
spin_lock_init(&aca->aca_lock);
return aca;
}
/*
* device anycast group inc (add if not found)
*/
int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct ifacaddr6 *aca;
struct inet6_dev *idev;
struct rt6_info *rt;
int err;
ASSERT_RTNL();
idev = in6_dev_get(dev);
if (idev == NULL)
return -EINVAL;
write_lock_bh(&idev->lock);
if (idev->dead) {
err = -ENODEV;
......@@ -260,46 +265,35 @@ int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
}
}
/*
* not found: create a new one.
*/
aca = kzalloc(sizeof(struct ifacaddr6), GFP_ATOMIC);
if (aca == NULL) {
err = -ENOMEM;
goto out;
}
rt = addrconf_dst_alloc(idev, addr, true);
if (IS_ERR(rt)) {
kfree(aca);
err = PTR_ERR(rt);
goto out;
}
aca->aca_addr = *addr;
aca->aca_idev = idev;
aca->aca_rt = rt;
aca->aca_users = 1;
/* aca_tstamp should be updated upon changes */
aca->aca_cstamp = aca->aca_tstamp = jiffies;
atomic_set(&aca->aca_refcnt, 2);
spin_lock_init(&aca->aca_lock);
aca = aca_alloc(rt, addr);
if (aca == NULL) {
ip6_rt_put(rt);
err = -ENOMEM;
goto out;
}
aca->aca_next = idev->ac_list;
idev->ac_list = aca;
/* Hold this for addrconf_join_solict() below before we unlock,
* it is already exposed via idev->ac_list.
*/
aca_get(aca);
write_unlock_bh(&idev->lock);
ip6_ins_rt(rt);
addrconf_join_solict(dev, &aca->aca_addr);
addrconf_join_solict(idev->dev, &aca->aca_addr);
aca_put(aca);
return 0;
out:
write_unlock_bh(&idev->lock);
in6_dev_put(idev);
return err;
}
......@@ -341,7 +335,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
return 0;
}
/* called with rcu_read_lock() */
/* called with rtnl_lock() */
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
{
struct inet6_dev *idev = __in6_dev_get(dev);
......
......@@ -73,9 +73,6 @@ static void *__mld2_query_bugs[] __attribute__((__unused__)) = {
static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
/* Big mc list lock for all the sockets */
static DEFINE_SPINLOCK(ipv6_sk_mc_lock);
static void igmp6_join_group(struct ifmcaddr6 *ma);
static void igmp6_leave_group(struct ifmcaddr6 *ma);
static void igmp6_timer_handler(unsigned long data);
......@@ -165,7 +162,6 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
mc_lst->addr = *addr;
rtnl_lock();
rcu_read_lock();
if (ifindex == 0) {
struct rt6_info *rt;
rt = rt6_lookup(net, addr, NULL, 0, 0);
......@@ -174,10 +170,9 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
ip6_rt_put(rt);
}
} else
dev = dev_get_by_index_rcu(net, ifindex);
dev = __dev_get_by_index(net, ifindex);
if (dev == NULL) {
rcu_read_unlock();
rtnl_unlock();
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
return -ENODEV;
......@@ -195,18 +190,14 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
err = ipv6_dev_mc_inc(dev, addr);
if (err) {
rcu_read_unlock();
rtnl_unlock();
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
return err;
}
spin_lock(&ipv6_sk_mc_lock);
mc_lst->next = np->ipv6_mc_list;
rcu_assign_pointer(np->ipv6_mc_list, mc_lst);
spin_unlock(&ipv6_sk_mc_lock);
rcu_read_unlock();
rtnl_unlock();
return 0;
......@@ -226,20 +217,16 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
return -EINVAL;
rtnl_lock();
spin_lock(&ipv6_sk_mc_lock);
for (lnk = &np->ipv6_mc_list;
(mc_lst = rcu_dereference_protected(*lnk,
lockdep_is_held(&ipv6_sk_mc_lock))) != NULL;
(mc_lst = rtnl_dereference(*lnk)) != NULL;
lnk = &mc_lst->next) {
if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
ipv6_addr_equal(&mc_lst->addr, addr)) {
struct net_device *dev;
*lnk = mc_lst->next;
spin_unlock(&ipv6_sk_mc_lock);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
dev = __dev_get_by_index(net, mc_lst->ifindex);
if (dev != NULL) {
struct inet6_dev *idev = __in6_dev_get(dev);
......@@ -248,7 +235,6 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
__ipv6_dev_mc_dec(idev, &mc_lst->addr);
} else
(void) ip6_mc_leave_src(sk, mc_lst, NULL);
rcu_read_unlock();
rtnl_unlock();
atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
......@@ -256,7 +242,6 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
return 0;
}
}
spin_unlock(&ipv6_sk_mc_lock);
rtnl_unlock();
return -EADDRNOTAVAIL;
......@@ -303,16 +288,12 @@ void ipv6_sock_mc_close(struct sock *sk)
return;
rtnl_lock();
spin_lock(&ipv6_sk_mc_lock);
while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list,
lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) {
while ((mc_lst = rtnl_dereference(np->ipv6_mc_list)) != NULL) {
struct net_device *dev;
np->ipv6_mc_list = mc_lst->next;
spin_unlock(&ipv6_sk_mc_lock);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
dev = __dev_get_by_index(net, mc_lst->ifindex);
if (dev) {
struct inet6_dev *idev = __in6_dev_get(dev);
......@@ -321,14 +302,11 @@ void ipv6_sock_mc_close(struct sock *sk)
__ipv6_dev_mc_dec(idev, &mc_lst->addr);
} else
(void) ip6_mc_leave_src(sk, mc_lst, NULL);
rcu_read_unlock();
atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
kfree_rcu(mc_lst, rcu);
spin_lock(&ipv6_sk_mc_lock);
}
spin_unlock(&ipv6_sk_mc_lock);
rtnl_unlock();
}
......@@ -578,9 +556,8 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
}
err = -EADDRNOTAVAIL;
/*
* changes to the ipv6_mc_list require the socket lock and
* a read lock on ip6_sk_mc_lock. We have the socket lock,
/* changes to the ipv6_mc_list require the socket lock and
* rtnl lock. We have the socket lock and rcu read lock,
* so reading the list is safe.
*/
......@@ -604,9 +581,8 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
return -EFAULT;
}
/* changes to psl require the socket lock, a read lock on
* on ipv6_sk_mc_lock and a write lock on pmc->sflock. We
* have the socket lock, so reading here is safe.
/* changes to psl require the socket lock, and a write lock
* on pmc->sflock. We have the socket lock so reading here is safe.
*/
for (i = 0; i < copycount; i++) {
struct sockaddr_in6 *psin6;
......@@ -665,14 +641,6 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
return rv;
}
static void ma_put(struct ifmcaddr6 *mc)
{
if (atomic_dec_and_test(&mc->mca_refcnt)) {
in6_dev_put(mc->idev);
kfree(mc);
}
}
static void igmp6_group_added(struct ifmcaddr6 *mc)
{
struct net_device *dev = mc->idev->dev;
......@@ -838,6 +806,48 @@ static void mld_clear_delrec(struct inet6_dev *idev)
read_unlock_bh(&idev->lock);
}
static void mca_get(struct ifmcaddr6 *mc)
{
atomic_inc(&mc->mca_refcnt);
}
static void ma_put(struct ifmcaddr6 *mc)
{
if (atomic_dec_and_test(&mc->mca_refcnt)) {
in6_dev_put(mc->idev);
kfree(mc);
}
}
static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
const struct in6_addr *addr)
{
struct ifmcaddr6 *mc;
mc = kzalloc(sizeof(*mc), GFP_ATOMIC);
if (mc == NULL)
return NULL;
setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
mc->mca_addr = *addr;
mc->idev = idev; /* reference taken by caller */
mc->mca_users = 1;
/* mca_stamp should be updated upon changes */
mc->mca_cstamp = mc->mca_tstamp = jiffies;
atomic_set(&mc->mca_refcnt, 1);
spin_lock_init(&mc->mca_lock);
/* initial mode is (EX, empty) */
mc->mca_sfmode = MCAST_EXCLUDE;
mc->mca_sfcount[MCAST_EXCLUDE] = 1;
if (ipv6_addr_is_ll_all_nodes(&mc->mca_addr) ||
IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
mc->mca_flags |= MAF_NOREPORT;
return mc;
}
/*
* device multicast group inc (add if not found)
......@@ -873,38 +883,20 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
}
}
/*
* not found: create a new one.
*/
mc = kzalloc(sizeof(struct ifmcaddr6), GFP_ATOMIC);
if (mc == NULL) {
mc = mca_alloc(idev, addr);
if (!mc) {
write_unlock_bh(&idev->lock);
in6_dev_put(idev);
return -ENOMEM;
}
setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
mc->mca_addr = *addr;
mc->idev = idev; /* (reference taken) */
mc->mca_users = 1;
/* mca_stamp should be updated upon changes */
mc->mca_cstamp = mc->mca_tstamp = jiffies;
atomic_set(&mc->mca_refcnt, 2);
spin_lock_init(&mc->mca_lock);
/* initial mode is (EX, empty) */
mc->mca_sfmode = MCAST_EXCLUDE;
mc->mca_sfcount[MCAST_EXCLUDE] = 1;
if (ipv6_addr_is_ll_all_nodes(&mc->mca_addr) ||
IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
mc->mca_flags |= MAF_NOREPORT;
mc->next = idev->mc_list;
idev->mc_list = mc;
/* Hold this for the code below before we unlock,
* it is already exposed via idev->mc_list.
*/
mca_get(mc);
write_unlock_bh(&idev->lock);
mld_del_delrec(idev, &mc->mca_addr);
......@@ -948,7 +940,7 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
struct inet6_dev *idev;
int err;
rcu_read_lock();
ASSERT_RTNL();
idev = __in6_dev_get(dev);
if (!idev)
......@@ -956,7 +948,6 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
else
err = __ipv6_dev_mc_dec(idev, addr);
rcu_read_unlock();
return err;
}
......@@ -2373,7 +2364,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
{
int err;
/* callers have the socket lock and a write lock on ipv6_sk_mc_lock,
/* callers have the socket lock and rtnl lock
* so no other readers or writers of iml or its sflist
*/
if (!iml->sflist) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment