Commit 9cbc590d authored by Wei Wang's avatar Wei Wang Committed by Greg Kroah-Hartman

ipv6: fix src addr routing with the exception table

[ Upstream commit 510e2ced ]

When inserting route cache into the exception table, the key is
generated with both src_addr and dest_addr with src addr routing.
However, current logic always assumes the src_addr used to generate the
key is a /128 host address. This is not true in the following scenarios:
1. When the route is a gateway route or does not have next hop.
   (rt6_is_gw_or_nonexthop() == false)
2. When calling ip6_rt_cache_alloc(), saddr is passed in as NULL.
This means, when looking for a route cache in the exception table, we
have to do the lookup twice: first time with the passed in /128 host
address, second time with the src_addr stored in fib6_info.

This solves the pmtu discovery issue reported by Mikael Magnusson where
a route cache with a lower mtu info is created for a gateway route with
src addr. However, the lookup code is not able to find this route cache.

Fixes: 2b760fcf ("ipv6: hook up exception table to store dst cache")
Reported-by: default avatarMikael Magnusson <mikael.kernel@lists.m7n.se>
Bisected-by: default avatarDavid Ahern <dsahern@gmail.com>
Signed-off-by: default avatarWei Wang <weiwan@google.com>
Cc: Martin Lau <kafai@fb.com>
Cc: Eric Dumazet <edumazet@google.com>
Acked-by: default avatarMartin KaFai Lau <kafai@fb.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent e0e8106a
...@@ -110,8 +110,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, ...@@ -110,8 +110,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
int iif, int type, u32 portid, u32 seq, int iif, int type, u32 portid, u32 seq,
unsigned int flags); unsigned int flags);
static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
struct in6_addr *daddr, const struct in6_addr *daddr,
struct in6_addr *saddr); const struct in6_addr *saddr);
#ifdef CONFIG_IPV6_ROUTE_INFO #ifdef CONFIG_IPV6_ROUTE_INFO
static struct fib6_info *rt6_add_route_info(struct net *net, static struct fib6_info *rt6_add_route_info(struct net *net,
...@@ -1529,31 +1529,44 @@ void rt6_flush_exceptions(struct fib6_info *rt) ...@@ -1529,31 +1529,44 @@ void rt6_flush_exceptions(struct fib6_info *rt)
* Caller has to hold rcu_read_lock() * Caller has to hold rcu_read_lock()
*/ */
static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
struct in6_addr *daddr, const struct in6_addr *daddr,
struct in6_addr *saddr) const struct in6_addr *saddr)
{ {
const struct in6_addr *src_key = NULL;
struct rt6_exception_bucket *bucket; struct rt6_exception_bucket *bucket;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex; struct rt6_exception *rt6_ex;
struct rt6_info *res = NULL; struct rt6_info *res = NULL;
bucket = rcu_dereference(rt->rt6i_exception_bucket);
#ifdef CONFIG_IPV6_SUBTREES #ifdef CONFIG_IPV6_SUBTREES
/* rt6i_src.plen != 0 indicates rt is in subtree /* rt6i_src.plen != 0 indicates rt is in subtree
* and exception table is indexed by a hash of * and exception table is indexed by a hash of
* both rt6i_dst and rt6i_src. * both rt6i_dst and rt6i_src.
* Otherwise, the exception table is indexed by * However, the src addr used to create the hash
* a hash of only rt6i_dst. * might not be exactly the passed in saddr which
* is a /128 addr from the flow.
* So we need to use f6i->fib6_src to redo lookup
* if the passed in saddr does not find anything.
* (See the logic in ip6_rt_cache_alloc() on how
* rt->rt6i_src is updated.)
*/ */
if (rt->fib6_src.plen) if (rt->fib6_src.plen)
src_key = saddr; src_key = saddr;
find_ex:
#endif #endif
bucket = rcu_dereference(rt->rt6i_exception_bucket);
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
res = rt6_ex->rt6i; res = rt6_ex->rt6i;
#ifdef CONFIG_IPV6_SUBTREES
/* Use fib6_src as src_key and redo lookup */
if (!res && src_key && src_key != &rt->fib6_src.addr) {
src_key = &rt->fib6_src.addr;
goto find_ex;
}
#endif
return res; return res;
} }
...@@ -2608,10 +2621,8 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) ...@@ -2608,10 +2621,8 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
struct in6_addr *saddr) struct in6_addr *saddr)
{ {
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
struct in6_addr *src_key;
struct inet6_dev *idev; struct inet6_dev *idev;
struct rt6_info *rt;
u32 mtu = 0; u32 mtu = 0;
if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) { if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
...@@ -2620,18 +2631,10 @@ u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, ...@@ -2620,18 +2631,10 @@ u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
goto out; goto out;
} }
src_key = NULL; rt = rt6_find_cached_rt(f6i, daddr, saddr);
#ifdef CONFIG_IPV6_SUBTREES if (unlikely(rt)) {
if (f6i->fib6_src.plen) mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
src_key = saddr; } else {
#endif
bucket = rcu_dereference(f6i->rt6i_exception_bucket);
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
if (likely(!mtu)) {
struct net_device *dev = fib6_info_nh_dev(f6i); struct net_device *dev = fib6_info_nh_dev(f6i);
mtu = IPV6_MIN_MTU; mtu = IPV6_MIN_MTU;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment