Commit 3a70417c authored by David S. Miller's avatar David S. Miller

Merge branch 'rt6i_gateway'

Julian Anastasov says:

====================
ipv6: use rt6i_gateway as nexthop

	The following patchset makes sure that rt6i_gateway
contains valid nexthop information in all cases, so that
we can use different nexthop for sending.

	The first patch is a simple fix that makes IPVS, TEE,
RAW(hdrincl) and RTF_DYNAMIC(without RTF_GATEWAY) work as
before 3.9. There is a single corner case not solved by
this patch: RAW(hdrincl) or TEE using local address for
nexthop, a silly feature, I guess. In this case we
see zeroes in rt6i_gateway because we get route that is not
cloned. This is solved only with patch 2.

	The second patch is an optimization that makes sure
all resulting routes have rt6i_gateway filled, so that we
can avoid the complex ipv6_addr_any() call added to rt6_nexthop()
by patch 1. And it sets rt6i_gateway for local routes, a case
not handled by patch 1.

	The third patch uses the new rt6_nexthop() function to fix
the matching of gateways in the same way as commit bbb5823c
("netfilter: nf_conntrack: fix rt_gateway checks for H.323 helper")
fixes nf_conntrack_h323_main.c for IPv4. Currently, it depends on
the new definition of rt6_nexthop() in patch 2. Actually, if
patch 2 is applied, patch 3 becomes a cosmetic change.

	I see the following two alternatives for applying these
patches:

1. Linger patch 2 in net-next to avoid surprises in the upcoming
release. In this case patch 3 can be reworked not to depend on
the new rt6_nexthop() definition in patch 2. I guess this is a
better option, so that patch 2 can be reviewed and tested for
longer time.

2. Include all 3 patches in net tree - more risky because this
is my first attempt to change IPv6.

	Here is the situation as handled by patch 2:

	In IPv6 the resolved routes are always host routes (/128
with DST_HOST), mostly cloned ones. We allow routes in FIB
to contain rt6i_gateway with zeroes (eg. for local subnets) but
on cloning we can fill the rt6i_gateway field in result.
This works even without this patchset.

	There is a single special case where dst is provided as
skb_dst directly without a routing call: icmp6_dst_alloc(). It is a
private dst allocated just for the particular ICMP packet. Patch 2
fills rt6i_gateway in this case, needed for the new rt6_nexthop()
simplification.

	The last case is addrconf_dst_alloc(), it can put in
FIB local/anycast routes when addresses are added. Patch 2
needs to fill rt6i_gateway in this case because such routes
are returned without cloning.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4440c6f7 56e42441
...@@ -194,11 +194,9 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb) ...@@ -194,11 +194,9 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
} }
static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, struct in6_addr *dest) static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt)
{ {
if (rt->rt6i_flags & RTF_GATEWAY) return &rt->rt6i_gateway;
return &rt->rt6i_gateway;
return dest;
} }
#endif #endif
...@@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sk_buff *skb) ...@@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
} }
rcu_read_lock_bh(); rcu_read_lock_bh();
nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); nexthop = rt6_nexthop((struct rt6_info *)dst);
neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
if (unlikely(!neigh)) if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
...@@ -874,7 +874,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, ...@@ -874,7 +874,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
*/ */
rt = (struct rt6_info *) *dst; rt = (struct rt6_info *) *dst;
rcu_read_lock_bh(); rcu_read_lock_bh();
n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
rcu_read_unlock_bh(); rcu_read_unlock_bh();
......
...@@ -851,7 +851,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, ...@@ -851,7 +851,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
if (ort->rt6i_dst.plen != 128 && if (ort->rt6i_dst.plen != 128 &&
ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
rt->rt6i_flags |= RTF_ANYCAST; rt->rt6i_flags |= RTF_ANYCAST;
rt->rt6i_gateway = *daddr;
} }
rt->rt6i_flags |= RTF_CACHE; rt->rt6i_flags |= RTF_CACHE;
...@@ -1338,6 +1337,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, ...@@ -1338,6 +1337,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
rt->dst.flags |= DST_HOST; rt->dst.flags |= DST_HOST;
rt->dst.output = ip6_output; rt->dst.output = ip6_output;
atomic_set(&rt->dst.__refcnt, 1); atomic_set(&rt->dst.__refcnt, 1);
rt->rt6i_gateway = fl6->daddr;
rt->rt6i_dst.addr = fl6->daddr; rt->rt6i_dst.addr = fl6->daddr;
rt->rt6i_dst.plen = 128; rt->rt6i_dst.plen = 128;
rt->rt6i_idev = idev; rt->rt6i_idev = idev;
...@@ -1873,7 +1873,10 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, ...@@ -1873,7 +1873,10 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
in6_dev_hold(rt->rt6i_idev); in6_dev_hold(rt->rt6i_idev);
rt->dst.lastuse = jiffies; rt->dst.lastuse = jiffies;
rt->rt6i_gateway = ort->rt6i_gateway; if (ort->rt6i_flags & RTF_GATEWAY)
rt->rt6i_gateway = ort->rt6i_gateway;
else
rt->rt6i_gateway = *dest;
rt->rt6i_flags = ort->rt6i_flags; rt->rt6i_flags = ort->rt6i_flags;
if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
(RTF_DEFAULT | RTF_ADDRCONF)) (RTF_DEFAULT | RTF_ADDRCONF))
...@@ -2160,6 +2163,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, ...@@ -2160,6 +2163,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
else else
rt->rt6i_flags |= RTF_LOCAL; rt->rt6i_flags |= RTF_LOCAL;
rt->rt6i_gateway = *addr;
rt->rt6i_dst.addr = *addr; rt->rt6i_dst.addr = *addr;
rt->rt6i_dst.plen = 128; rt->rt6i_dst.plen = 128;
rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
......
...@@ -778,8 +778,8 @@ static int callforward_do_filter(const union nf_inet_addr *src, ...@@ -778,8 +778,8 @@ static int callforward_do_filter(const union nf_inet_addr *src,
flowi6_to_flowi(&fl1), false)) { flowi6_to_flowi(&fl1), false)) {
if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
flowi6_to_flowi(&fl2), false)) { flowi6_to_flowi(&fl2), false)) {
if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, if (ipv6_addr_equal(rt6_nexthop(rt1),
sizeof(rt1->rt6i_gateway)) && rt6_nexthop(rt2)) &&
rt1->dst.dev == rt2->dst.dev) rt1->dst.dev == rt2->dst.dev)
ret = 1; ret = 1;
dst_release(&rt2->dst); dst_release(&rt2->dst);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment