Commit 19a2afbe authored by David S. Miller's avatar David S. Miller

Merge branch 'vrf-Support-for-local-traffic-with-sockets-bound-to-enslaved-devices'

David Ahern says:

====================
net: vrf: Support for local traffic with sockets bound to enslaved devices

This set gets local traffic working for sockets bound to enslaved
devices. The local rtable and rt6_info added in June 2016 to get
local traffic in VRFs working is no longer needed and actually
keeps local traffic for sockets bound to an enslaved device from
working. Patch 1 removes them.

Patch 2 adds a fix up for IPv4 IP_PKTINFO to return rt_iif for
packets sent over the VRF device. This is similar to the handling
of loopback.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9438c871 1dfa7639
...@@ -47,9 +47,7 @@ static unsigned int vrf_net_id; ...@@ -47,9 +47,7 @@ static unsigned int vrf_net_id;
struct net_vrf { struct net_vrf {
struct rtable __rcu *rth; struct rtable __rcu *rth;
struct rtable __rcu *rth_local;
struct rt6_info __rcu *rt6; struct rt6_info __rcu *rt6;
struct rt6_info __rcu *rt6_local;
u32 tb_id; u32 tb_id;
}; };
...@@ -194,42 +192,10 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, ...@@ -194,42 +192,10 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
/* if dst.dev is loopback or the VRF device again this is locally /* if dst.dev is loopback or the VRF device again this is locally
* originated traffic destined to a local address. Short circuit * originated traffic destined to a local address. Short circuit
* to Rx path using our local dst * to Rx path
*/ */
if (dst->dev == net->loopback_dev || dst->dev == dev) { if (dst->dev == dev)
struct net_vrf *vrf = netdev_priv(dev); return vrf_local_xmit(skb, dev, dst);
struct rt6_info *rt6_local;
/* release looked up dst and use cached local dst */
dst_release(dst);
rcu_read_lock();
rt6_local = rcu_dereference(vrf->rt6_local);
if (unlikely(!rt6_local)) {
rcu_read_unlock();
goto err;
}
/* Ordering issue: cached local dst is created on newlink
* before the IPv6 initialization. Using the local dst
* requires rt6i_idev to be set so make sure it is.
*/
if (unlikely(!rt6_local->rt6i_idev)) {
rt6_local->rt6i_idev = in6_dev_get(dev);
if (!rt6_local->rt6i_idev) {
rcu_read_unlock();
goto err;
}
}
dst = &rt6_local->dst;
dst_hold(dst);
rcu_read_unlock();
return vrf_local_xmit(skb, dev, &rt6_local->dst);
}
skb_dst_set(skb, dst); skb_dst_set(skb, dst);
...@@ -296,30 +262,10 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, ...@@ -296,30 +262,10 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
/* if dst.dev is loopback or the VRF device again this is locally /* if dst.dev is loopback or the VRF device again this is locally
* originated traffic destined to a local address. Short circuit * originated traffic destined to a local address. Short circuit
* to Rx path using our local dst * to Rx path
*/ */
if (rt->dst.dev == net->loopback_dev || rt->dst.dev == vrf_dev) { if (rt->dst.dev == vrf_dev)
struct net_vrf *vrf = netdev_priv(vrf_dev); return vrf_local_xmit(skb, vrf_dev, &rt->dst);
struct rtable *rth_local;
struct dst_entry *dst = NULL;
ip_rt_put(rt);
rcu_read_lock();
rth_local = rcu_dereference(vrf->rth_local);
if (likely(rth_local)) {
dst = &rth_local->dst;
dst_hold(dst);
}
rcu_read_unlock();
if (unlikely(!dst))
goto err;
return vrf_local_xmit(skb, vrf_dev, dst);
}
skb_dst_set(skb, &rt->dst); skb_dst_set(skb, &rt->dst);
...@@ -528,12 +474,10 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, ...@@ -528,12 +474,10 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
{ {
struct rt6_info *rt6 = rtnl_dereference(vrf->rt6); struct rt6_info *rt6 = rtnl_dereference(vrf->rt6);
struct rt6_info *rt6_local = rtnl_dereference(vrf->rt6_local);
struct net *net = dev_net(dev); struct net *net = dev_net(dev);
struct dst_entry *dst; struct dst_entry *dst;
RCU_INIT_POINTER(vrf->rt6, NULL); RCU_INIT_POINTER(vrf->rt6, NULL);
RCU_INIT_POINTER(vrf->rt6_local, NULL);
synchronize_rcu(); synchronize_rcu();
/* move dev in dst's to loopback so this VRF device can be deleted /* move dev in dst's to loopback so this VRF device can be deleted
...@@ -546,19 +490,6 @@ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) ...@@ -546,19 +490,6 @@ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
dev_hold(dst->dev); dev_hold(dst->dev);
dst_release(dst); dst_release(dst);
} }
if (rt6_local) {
if (rt6_local->rt6i_idev) {
in6_dev_put(rt6_local->rt6i_idev);
rt6_local->rt6i_idev = NULL;
}
dst = &rt6_local->dst;
dev_put(dst->dev);
dst->dev = net->loopback_dev;
dev_hold(dst->dev);
dst_release(dst);
}
} }
static int vrf_rt6_create(struct net_device *dev) static int vrf_rt6_create(struct net_device *dev)
...@@ -567,7 +498,7 @@ static int vrf_rt6_create(struct net_device *dev) ...@@ -567,7 +498,7 @@ static int vrf_rt6_create(struct net_device *dev)
struct net_vrf *vrf = netdev_priv(dev); struct net_vrf *vrf = netdev_priv(dev);
struct net *net = dev_net(dev); struct net *net = dev_net(dev);
struct fib6_table *rt6i_table; struct fib6_table *rt6i_table;
struct rt6_info *rt6, *rt6_local; struct rt6_info *rt6;
int rc = -ENOMEM; int rc = -ENOMEM;
/* IPv6 can be CONFIG enabled and then disabled runtime */ /* IPv6 can be CONFIG enabled and then disabled runtime */
...@@ -586,22 +517,7 @@ static int vrf_rt6_create(struct net_device *dev) ...@@ -586,22 +517,7 @@ static int vrf_rt6_create(struct net_device *dev)
rt6->rt6i_table = rt6i_table; rt6->rt6i_table = rt6i_table;
rt6->dst.output = vrf_output6; rt6->dst.output = vrf_output6;
/* create a dst for local routing - packets sent locally
* to local address via the VRF device as a loopback
*/
rt6_local = ip6_dst_alloc(net, dev, flags);
if (!rt6_local) {
dst_release(&rt6->dst);
goto out;
}
rt6_local->rt6i_idev = in6_dev_get(dev);
rt6_local->rt6i_flags = RTF_UP | RTF_NONEXTHOP | RTF_LOCAL;
rt6_local->rt6i_table = rt6i_table;
rt6_local->dst.input = ip6_input;
rcu_assign_pointer(vrf->rt6, rt6); rcu_assign_pointer(vrf->rt6, rt6);
rcu_assign_pointer(vrf->rt6_local, rt6_local);
rc = 0; rc = 0;
out: out:
...@@ -788,12 +704,10 @@ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev, ...@@ -788,12 +704,10 @@ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
{ {
struct rtable *rth = rtnl_dereference(vrf->rth); struct rtable *rth = rtnl_dereference(vrf->rth);
struct rtable *rth_local = rtnl_dereference(vrf->rth_local);
struct net *net = dev_net(dev); struct net *net = dev_net(dev);
struct dst_entry *dst; struct dst_entry *dst;
RCU_INIT_POINTER(vrf->rth, NULL); RCU_INIT_POINTER(vrf->rth, NULL);
RCU_INIT_POINTER(vrf->rth_local, NULL);
synchronize_rcu(); synchronize_rcu();
/* move dev in dst's to loopback so this VRF device can be deleted /* move dev in dst's to loopback so this VRF device can be deleted
...@@ -806,20 +720,12 @@ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) ...@@ -806,20 +720,12 @@ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
dev_hold(dst->dev); dev_hold(dst->dev);
dst_release(dst); dst_release(dst);
} }
if (rth_local) {
dst = &rth_local->dst;
dev_put(dst->dev);
dst->dev = net->loopback_dev;
dev_hold(dst->dev);
dst_release(dst);
}
} }
static int vrf_rtable_create(struct net_device *dev) static int vrf_rtable_create(struct net_device *dev)
{ {
struct net_vrf *vrf = netdev_priv(dev); struct net_vrf *vrf = netdev_priv(dev);
struct rtable *rth, *rth_local; struct rtable *rth;
if (!fib_new_table(dev_net(dev), vrf->tb_id)) if (!fib_new_table(dev_net(dev), vrf->tb_id))
return -ENOMEM; return -ENOMEM;
...@@ -829,22 +735,10 @@ static int vrf_rtable_create(struct net_device *dev) ...@@ -829,22 +735,10 @@ static int vrf_rtable_create(struct net_device *dev)
if (!rth) if (!rth)
return -ENOMEM; return -ENOMEM;
/* create a dst for local ingress routing - packets sent locally
* to local address via the VRF device as a loopback
*/
rth_local = rt_dst_alloc(dev, RTCF_LOCAL, RTN_LOCAL, 1, 1, 0);
if (!rth_local) {
dst_release(&rth->dst);
return -ENOMEM;
}
rth->dst.output = vrf_output; rth->dst.output = vrf_output;
rth->rt_table_id = vrf->tb_id; rth->rt_table_id = vrf->tb_id;
rth_local->rt_table_id = vrf->tb_id;
rcu_assign_pointer(vrf->rth, rth); rcu_assign_pointer(vrf->rth, rth);
rcu_assign_pointer(vrf->rth_local, rth_local);
return 0; return 0;
} }
......
...@@ -1207,6 +1207,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, ...@@ -1207,6 +1207,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
{ {
struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags);
bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) || bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) ||
ipv6_sk_rxinfo(sk); ipv6_sk_rxinfo(sk);
...@@ -1220,7 +1221,7 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) ...@@ -1220,7 +1221,7 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
* (e.g., process binds socket to eth0 for Tx which is * (e.g., process binds socket to eth0 for Tx which is
* redirected to loopback in the rtable/dst). * redirected to loopback in the rtable/dst).
*/ */
if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX || l3slave)
pktinfo->ipi_ifindex = inet_iif(skb); pktinfo->ipi_ifindex = inet_iif(skb);
pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment