Commit dcdfa50e authored by David S. Miller's avatar David S. Miller

Merge branch 'cached-route-listings'

Stefano Brivio says:

====================
Fix listing (IPv4, IPv6) and flushing (IPv6) of cached route exceptions

For IPv6 cached routes, the commands 'ip -6 route list cache' and
'ip -6 route flush cache' don't work at all after route exceptions have
been moved to a separate hash table in commit 2b760fcf ("ipv6: hook
up exception table to store dst cache").

For IPv4 cached routes, the command 'ip route list cache' has also
stopped working in kernel 3.5 after commit 4895c771 ("ipv4: Add FIB
nexthop exceptions.") introduced storage for route exceptions as a
separate entity.

Fix this by allowing userspace to clearly request cached routes with
the RTM_F_CLONED flag used as a filter (in conjuction with strict
checking) and by retrieving and dumping cached routes if requested.

If strict checking is not requested (iproute2 < 5.0.0), we don't have a
way to consistently filter results on other selectors (e.g. on tables),
so skip filtering entirely and dump both regular routes and exceptions.

For IPv4, cache flushing uses a completely different mechanism, so it
wasn't affected. Listing of exception routes (modified routes pre-3.5) was
tested against these versions of kernel and iproute2:

                    iproute2
kernel         4.14.0   4.15.0   4.19.0   5.0.0   5.1.0
 3.5-rc4         +        +        +        +       +
 4.4
 4.9
 4.14
 4.15
 4.19
 5.0
 5.1
 fixed           +        +        +        +       +

For IPv6, a separate iproute2 patch is required. Versions of iproute2
and kernel tested:

                    iproute2
kernel             4.14.0   4.15.0   4.19.0   5.0.0   5.1.0    5.1.0, patched
 3.18    list        +        +        +        +       +            +
         flush       +        +        +        +       +            +
 4.4     list        +        +        +        +       +            +
         flush       +        +        +        +       +            +
 4.9     list        +        +        +        +       +            +
         flush       +        +        +        +       +            +
 4.14    list        +        +        +        +       +            +
         flush       +        +        +        +       +            +
 4.15    list
         flush
 4.19    list
         flush
 5.0     list
         flush
 5.1     list
         flush
 with    list        +        +        +        +       +            +
 fix     flush       +        +        +                             +

v7: Make sure r->rtm_tos is initialised in 3/11, move loop over nexthop
    objects in 4/11, add comments about usage of "skip" counters in commit
    messages of 4/11 and 8/11

v6: Target for net-next, rebase and adapt to nexthop objects for IPv6 paths.
    Merge selftests into this series (as they were addressed for net-next).
    A number of minor changes detailed in logs of single patches.

v5: Skip filtering altogether if no strict checking is requested: selecting
    routes or exceptions only would be inconsistent with the fact we can't
    filter on tables. Drop 1/8 (non-strict dump filter function no longer
    needed), replace 2/8 (don't use NLM_F_MATCH, decide to skip routes or
    exceptions in filter function), drop 6/8 (2/8 is enough for IPv6 too).
    Introduce dump_routes and dump_exceptions flags in filter, adapt other
    patches to that.

v4: Fix the listing issue also for IPv4, making the behaviour consistent
    with IPv6. Honour NLM_F_MATCH as per RFC 3549 and allow usage of
    RTM_F_CLONED filter. Split patches into smaller logical changes.

v3: Drop check on RTM_F_CLONED and rework logic of return values of
    rt6_dump_route()

v2: Add count of routes handled in partial dumps, and skip them, in patch 1/2.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 97236cda b964641e
...@@ -316,6 +316,7 @@ struct fib6_walker { ...@@ -316,6 +316,7 @@ struct fib6_walker {
enum fib6_walk_state state; enum fib6_walk_state state;
unsigned int skip; unsigned int skip;
unsigned int count; unsigned int count;
unsigned int skip_in_node;
int (*func)(struct fib6_walker *); int (*func)(struct fib6_walker *);
void *args; void *args;
}; };
......
...@@ -197,7 +197,7 @@ struct rt6_rtnl_dump_arg { ...@@ -197,7 +197,7 @@ struct rt6_rtnl_dump_arg {
struct fib_dump_filter filter; struct fib_dump_filter filter;
}; };
int rt6_dump_route(struct fib6_info *f6i, void *p_arg); int rt6_dump_route(struct fib6_info *f6i, void *p_arg, unsigned int skip);
void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
......
...@@ -245,6 +245,8 @@ struct fib_dump_filter { ...@@ -245,6 +245,8 @@ struct fib_dump_filter {
/* filter_set is an optimization that an entry is set */ /* filter_set is an optimization that an entry is set */
bool filter_set; bool filter_set;
bool dump_all_families; bool dump_all_families;
bool dump_routes;
bool dump_exceptions;
unsigned char protocol; unsigned char protocol;
unsigned char rt_type; unsigned char rt_type;
unsigned int flags; unsigned int flags;
......
...@@ -230,6 +230,10 @@ void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric); ...@@ -230,6 +230,10 @@ void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric);
void rt_add_uncached_list(struct rtable *rt); void rt_add_uncached_list(struct rtable *rt);
void rt_del_uncached_list(struct rtable *rt); void rt_del_uncached_list(struct rtable *rt);
int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
u32 table_id, struct fib_info *fi,
int *fa_index, int fa_start);
static inline void ip_rt_put(struct rtable *rt) static inline void ip_rt_put(struct rtable *rt)
{ {
/* dst_release() accepts a NULL parameter. /* dst_release() accepts a NULL parameter.
......
...@@ -912,10 +912,15 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, ...@@ -912,10 +912,15 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request"); NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request");
return -EINVAL; return -EINVAL;
} }
if (rtm->rtm_flags & ~(RTM_F_CLONED | RTM_F_PREFIX)) { if (rtm->rtm_flags & ~(RTM_F_CLONED | RTM_F_PREFIX)) {
NL_SET_ERR_MSG(extack, "Invalid flags for FIB dump request"); NL_SET_ERR_MSG(extack, "Invalid flags for FIB dump request");
return -EINVAL; return -EINVAL;
} }
if (rtm->rtm_flags & RTM_F_CLONED)
filter->dump_routes = false;
else
filter->dump_exceptions = false;
filter->dump_all_families = (rtm->rtm_family == AF_UNSPEC); filter->dump_all_families = (rtm->rtm_family == AF_UNSPEC);
filter->flags = rtm->rtm_flags; filter->flags = rtm->rtm_flags;
...@@ -962,9 +967,10 @@ EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req); ...@@ -962,9 +967,10 @@ EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req);
static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{ {
struct fib_dump_filter filter = { .dump_routes = true,
.dump_exceptions = true };
const struct nlmsghdr *nlh = cb->nlh; const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk); struct net *net = sock_net(skb->sk);
struct fib_dump_filter filter = {};
unsigned int h, s_h; unsigned int h, s_h;
unsigned int e = 0, s_e; unsigned int e = 0, s_e;
struct fib_table *tb; struct fib_table *tb;
...@@ -981,8 +987,8 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -981,8 +987,8 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
filter.flags = rtm->rtm_flags & (RTM_F_PREFIX | RTM_F_CLONED); filter.flags = rtm->rtm_flags & (RTM_F_PREFIX | RTM_F_CLONED);
} }
/* fib entries are never clones and ipv4 does not use prefix flag */ /* ipv4 does not use prefix flag */
if (filter.flags & (RTM_F_PREFIX | RTM_F_CLONED)) if (filter.flags & RTM_F_PREFIX)
return skb->len; return skb->len;
if (filter.table_id) { if (filter.table_id) {
......
...@@ -2090,22 +2090,26 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, ...@@ -2090,22 +2090,26 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
{ {
unsigned int flags = NLM_F_MULTI; unsigned int flags = NLM_F_MULTI;
__be32 xkey = htonl(l->key); __be32 xkey = htonl(l->key);
int i, s_i, i_fa, s_fa, err;
struct fib_alias *fa; struct fib_alias *fa;
int i, s_i;
if (filter->filter_set) if (filter->filter_set ||
!filter->dump_exceptions || !filter->dump_routes)
flags |= NLM_F_DUMP_FILTERED; flags |= NLM_F_DUMP_FILTERED;
s_i = cb->args[4]; s_i = cb->args[4];
s_fa = cb->args[5];
i = 0; i = 0;
/* rcu_read_lock is hold by caller */ /* rcu_read_lock is hold by caller */
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
int err; struct fib_info *fi = fa->fa_info;
if (i < s_i) if (i < s_i)
goto next; goto next;
i_fa = 0;
if (tb->tb_id != fa->tb_id) if (tb->tb_id != fa->tb_id)
goto next; goto next;
...@@ -2114,29 +2118,43 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, ...@@ -2114,29 +2118,43 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
goto next; goto next;
if ((filter->protocol && if ((filter->protocol &&
fa->fa_info->fib_protocol != filter->protocol)) fi->fib_protocol != filter->protocol))
goto next; goto next;
if (filter->dev && if (filter->dev &&
!fib_info_nh_uses_dev(fa->fa_info, filter->dev)) !fib_info_nh_uses_dev(fi, filter->dev))
goto next; goto next;
} }
err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid, if (filter->dump_routes && !s_fa) {
cb->nlh->nlmsg_seq, RTM_NEWROUTE, err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
tb->tb_id, fa->fa_type, cb->nlh->nlmsg_seq, RTM_NEWROUTE,
xkey, KEYLENGTH - fa->fa_slen, tb->tb_id, fa->fa_type,
fa->fa_tos, fa->fa_info, flags); xkey, KEYLENGTH - fa->fa_slen,
if (err < 0) { fa->fa_tos, fi, flags);
cb->args[4] = i; if (err < 0)
return err; goto stop;
i_fa++;
} }
if (filter->dump_exceptions) {
err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi,
&i_fa, s_fa);
if (err < 0)
goto stop;
}
next: next:
i++; i++;
} }
cb->args[4] = i; cb->args[4] = i;
return skb->len; return skb->len;
stop:
cb->args[4] = i;
cb->args[5] = i_fa;
return err;
} }
/* rcu_read_lock needs to be hold by caller from readside */ /* rcu_read_lock needs to be hold by caller from readside */
......
...@@ -2699,7 +2699,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, ...@@ -2699,7 +2699,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
r->rtm_family = AF_INET; r->rtm_family = AF_INET;
r->rtm_dst_len = 32; r->rtm_dst_len = 32;
r->rtm_src_len = 0; r->rtm_src_len = 0;
r->rtm_tos = fl4->flowi4_tos; r->rtm_tos = fl4 ? fl4->flowi4_tos : 0;
r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT; r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT;
if (nla_put_u32(skb, RTA_TABLE, table_id)) if (nla_put_u32(skb, RTA_TABLE, table_id))
goto nla_put_failure; goto nla_put_failure;
...@@ -2727,7 +2727,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, ...@@ -2727,7 +2727,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid)) nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
goto nla_put_failure; goto nla_put_failure;
#endif #endif
if (!rt_is_input_route(rt) && if (fl4 && !rt_is_input_route(rt) &&
fl4->saddr != src) { fl4->saddr != src) {
if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr)) if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
goto nla_put_failure; goto nla_put_failure;
...@@ -2767,36 +2767,40 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, ...@@ -2767,36 +2767,40 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
if (rtnetlink_put_metrics(skb, metrics) < 0) if (rtnetlink_put_metrics(skb, metrics) < 0)
goto nla_put_failure; goto nla_put_failure;
if (fl4->flowi4_mark && if (fl4) {
nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) if (fl4->flowi4_mark &&
goto nla_put_failure; nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
goto nla_put_failure;
if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
nla_put_u32(skb, RTA_UID,
from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
goto nla_put_failure;
error = rt->dst.error; if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
nla_put_u32(skb, RTA_UID,
from_kuid_munged(current_user_ns(),
fl4->flowi4_uid)))
goto nla_put_failure;
if (rt_is_input_route(rt)) { if (rt_is_input_route(rt)) {
#ifdef CONFIG_IP_MROUTE #ifdef CONFIG_IP_MROUTE
if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && if (ipv4_is_multicast(dst) &&
IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { !ipv4_is_local_multicast(dst) &&
int err = ipmr_get_route(net, skb, IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
fl4->saddr, fl4->daddr, int err = ipmr_get_route(net, skb,
r, portid); fl4->saddr, fl4->daddr,
r, portid);
if (err <= 0) {
if (err == 0) if (err <= 0) {
return 0; if (err == 0)
goto nla_put_failure; return 0;
} goto nla_put_failure;
} else }
} else
#endif #endif
if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif)) if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
goto nla_put_failure; goto nla_put_failure;
}
} }
error = rt->dst.error;
if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
goto nla_put_failure; goto nla_put_failure;
...@@ -2808,6 +2812,79 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, ...@@ -2808,6 +2812,79 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
return -EMSGSIZE; return -EMSGSIZE;
} }
static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, u32 table_id,
struct fnhe_hash_bucket *bucket, int genid,
int *fa_index, int fa_start)
{
int i;
for (i = 0; i < FNHE_HASH_SIZE; i++) {
struct fib_nh_exception *fnhe;
for (fnhe = rcu_dereference(bucket[i].chain); fnhe;
fnhe = rcu_dereference(fnhe->fnhe_next)) {
struct rtable *rt;
int err;
if (*fa_index < fa_start)
goto next;
if (fnhe->fnhe_genid != genid)
goto next;
if (fnhe->fnhe_expires &&
time_after(jiffies, fnhe->fnhe_expires))
goto next;
rt = rcu_dereference(fnhe->fnhe_rth_input);
if (!rt)
rt = rcu_dereference(fnhe->fnhe_rth_output);
if (!rt)
goto next;
err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt,
table_id, NULL, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq);
if (err)
return err;
next:
(*fa_index)++;
}
}
return 0;
}
int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
u32 table_id, struct fib_info *fi,
int *fa_index, int fa_start)
{
struct net *net = sock_net(cb->skb->sk);
int nhsel, genid = fnhe_genid(net);
for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
struct fnhe_hash_bucket *bucket;
int err;
if (nhc->nhc_flags & RTNH_F_DEAD)
continue;
bucket = rcu_dereference(nhc->nhc_exceptions);
if (!bucket)
continue;
err = fnhe_dump_bucket(net, skb, cb, table_id, bucket, genid,
fa_index, fa_start);
if (err)
return err;
}
return 0;
}
static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst, static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
u8 ip_proto, __be16 sport, u8 ip_proto, __be16 sport,
__be16 dport) __be16 dport)
......
...@@ -464,12 +464,19 @@ static int fib6_dump_node(struct fib6_walker *w) ...@@ -464,12 +464,19 @@ static int fib6_dump_node(struct fib6_walker *w)
struct fib6_info *rt; struct fib6_info *rt;
for_each_fib6_walker_rt(w) { for_each_fib6_walker_rt(w) {
res = rt6_dump_route(rt, w->args); res = rt6_dump_route(rt, w->args, w->skip_in_node);
if (res < 0) { if (res >= 0) {
/* Frame is full, suspend walking */ /* Frame is full, suspend walking */
w->leaf = rt; w->leaf = rt;
/* We'll restart from this node, so if some routes were
* already dumped, skip them next time.
*/
w->skip_in_node += res;
return 1; return 1;
} }
w->skip_in_node = 0;
/* Multipath routes are dumped in one route with the /* Multipath routes are dumped in one route with the
* RTA_MULTIPATH attribute. Jump 'rt' to point to the * RTA_MULTIPATH attribute. Jump 'rt' to point to the
...@@ -521,6 +528,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, ...@@ -521,6 +528,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
if (cb->args[4] == 0) { if (cb->args[4] == 0) {
w->count = 0; w->count = 0;
w->skip = 0; w->skip = 0;
w->skip_in_node = 0;
spin_lock_bh(&table->tb6_lock); spin_lock_bh(&table->tb6_lock);
res = fib6_walk(net, w); res = fib6_walk(net, w);
...@@ -536,6 +544,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, ...@@ -536,6 +544,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
w->state = FWS_INIT; w->state = FWS_INIT;
w->node = w->root; w->node = w->root;
w->skip = w->count; w->skip = w->count;
w->skip_in_node = 0;
} else } else
w->skip = 0; w->skip = 0;
...@@ -553,9 +562,10 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, ...@@ -553,9 +562,10 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{ {
struct rt6_rtnl_dump_arg arg = { .filter.dump_exceptions = true,
.filter.dump_routes = true };
const struct nlmsghdr *nlh = cb->nlh; const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk); struct net *net = sock_net(skb->sk);
struct rt6_rtnl_dump_arg arg = {};
unsigned int h, s_h; unsigned int h, s_h;
unsigned int e = 0, s_e; unsigned int e = 0, s_e;
struct fib6_walker *w; struct fib6_walker *w;
...@@ -572,13 +582,10 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -572,13 +582,10 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) { } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
struct rtmsg *rtm = nlmsg_data(nlh); struct rtmsg *rtm = nlmsg_data(nlh);
arg.filter.flags = rtm->rtm_flags & (RTM_F_PREFIX|RTM_F_CLONED); if (rtm->rtm_flags & RTM_F_PREFIX)
arg.filter.flags = RTM_F_PREFIX;
} }
/* fib entries are never clones */
if (arg.filter.flags & RTM_F_CLONED)
goto out;
w = (void *)cb->args[2]; w = (void *)cb->args[2];
if (!w) { if (!w) {
/* New dump: /* New dump:
...@@ -1589,7 +1596,8 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root, ...@@ -1589,7 +1596,8 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root,
if (plen == fn->fn_bit) if (plen == fn->fn_bit)
return fn; return fn;
prev = fn; if (fn->fn_flags & RTN_RTINFO)
prev = fn;
next: next:
/* /*
...@@ -2096,6 +2104,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, ...@@ -2096,6 +2104,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
c.w.func = fib6_clean_node; c.w.func = fib6_clean_node;
c.w.count = 0; c.w.count = 0;
c.w.skip = 0; c.w.skip = 0;
c.w.skip_in_node = 0;
c.func = func; c.func = func;
c.sernum = sernum; c.sernum = sernum;
c.arg = arg; c.arg = arg;
......
...@@ -3840,7 +3840,8 @@ static int ip6_route_del(struct fib6_config *cfg, ...@@ -3840,7 +3840,8 @@ static int ip6_route_del(struct fib6_config *cfg,
for_each_fib6_node_rt_rcu(fn) { for_each_fib6_node_rt_rcu(fn) {
struct fib6_nh *nh; struct fib6_nh *nh;
if (rt->nh && rt->nh->id != cfg->fc_nh_id) if (rt->nh && cfg->fc_nh_id &&
rt->nh->id != cfg->fc_nh_id)
continue; continue;
if (cfg->fc_flags & RTF_CACHE) { if (cfg->fc_flags & RTF_CACHE) {
...@@ -5521,33 +5522,129 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i, ...@@ -5521,33 +5522,129 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i,
return false; return false;
} }
int rt6_dump_route(struct fib6_info *rt, void *p_arg) struct fib6_nh_exception_dump_walker {
struct rt6_rtnl_dump_arg *dump;
struct fib6_info *rt;
unsigned int flags;
unsigned int skip;
unsigned int count;
};
static int rt6_nh_dump_exceptions(struct fib6_nh *nh, void *arg)
{
struct fib6_nh_exception_dump_walker *w = arg;
struct rt6_rtnl_dump_arg *dump = w->dump;
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
int i, err;
bucket = fib6_nh_get_excptn_bucket(nh, NULL);
if (!bucket)
return 0;
for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
if (w->skip) {
w->skip--;
continue;
}
/* Expiration of entries doesn't bump sernum, insertion
* does. Removal is triggered by insertion, so we can
* rely on the fact that if entries change between two
* partial dumps, this node is scanned again completely,
* see rt6_insert_exception() and fib6_dump_table().
*
* Count expired entries we go through as handled
* entries that we'll skip next time, in case of partial
* node dump. Otherwise, if entries expire meanwhile,
* we'll skip the wrong amount.
*/
if (rt6_check_expired(rt6_ex->rt6i)) {
w->count++;
continue;
}
err = rt6_fill_node(dump->net, dump->skb, w->rt,
&rt6_ex->rt6i->dst, NULL, NULL, 0,
RTM_NEWROUTE,
NETLINK_CB(dump->cb->skb).portid,
dump->cb->nlh->nlmsg_seq, w->flags);
if (err)
return err;
w->count++;
}
bucket++;
}
return 0;
}
/* Return -1 if done with node, number of handled routes on partial dump */
int rt6_dump_route(struct fib6_info *rt, void *p_arg, unsigned int skip)
{ {
struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
struct fib_dump_filter *filter = &arg->filter; struct fib_dump_filter *filter = &arg->filter;
unsigned int flags = NLM_F_MULTI; unsigned int flags = NLM_F_MULTI;
struct net *net = arg->net; struct net *net = arg->net;
int count = 0;
if (rt == net->ipv6.fib6_null_entry) if (rt == net->ipv6.fib6_null_entry)
return 0; return -1;
if ((filter->flags & RTM_F_PREFIX) && if ((filter->flags & RTM_F_PREFIX) &&
!(rt->fib6_flags & RTF_PREFIX_RT)) { !(rt->fib6_flags & RTF_PREFIX_RT)) {
/* success since this is not a prefix route */ /* success since this is not a prefix route */
return 1; return -1;
} }
if (filter->filter_set) { if (filter->filter_set &&
if ((filter->rt_type && rt->fib6_type != filter->rt_type) || ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
(filter->dev && !fib6_info_uses_dev(rt, filter->dev)) || (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
(filter->protocol && rt->fib6_protocol != filter->protocol)) { (filter->protocol && rt->fib6_protocol != filter->protocol))) {
return 1; return -1;
} }
if (filter->filter_set ||
!filter->dump_routes || !filter->dump_exceptions) {
flags |= NLM_F_DUMP_FILTERED; flags |= NLM_F_DUMP_FILTERED;
} }
return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0, if (filter->dump_routes) {
RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid, if (skip) {
arg->cb->nlh->nlmsg_seq, flags); skip--;
} else {
if (rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL,
0, RTM_NEWROUTE,
NETLINK_CB(arg->cb->skb).portid,
arg->cb->nlh->nlmsg_seq, flags)) {
return 0;
}
count++;
}
}
if (filter->dump_exceptions) {
struct fib6_nh_exception_dump_walker w = { .dump = arg,
.rt = rt,
.flags = flags,
.skip = skip,
.count = 0 };
int err;
if (rt->nh) {
err = nexthop_for_each_fib6_nh(rt->nh,
rt6_nh_dump_exceptions,
&w);
} else {
err = rt6_nh_dump_exceptions(rt->fib6_nh, &w);
}
if (err)
return count += w.count;
}
return -1;
} }
static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
......
...@@ -112,6 +112,10 @@ ...@@ -112,6 +112,10 @@
# - cleanup_ipv6_exception # - cleanup_ipv6_exception
# Same as above, but use IPv6 transport from A to B # Same as above, but use IPv6 transport from A to B
# #
# - list_flush_ipv4_exception
# Using the same topology as in pmtu_ipv4, create exceptions, and check
# they are shown when listing exception caches, gone after flushing them
#
# - list_flush_ipv6_exception # - list_flush_ipv6_exception
# Using the same topology as in pmtu_ipv6, create exceptions, and check # Using the same topology as in pmtu_ipv6, create exceptions, and check
# they are shown when listing exception caches, gone after flushing them # they are shown when listing exception caches, gone after flushing them
...@@ -156,6 +160,7 @@ tests=" ...@@ -156,6 +160,7 @@ tests="
pmtu_vti6_link_change_mtu vti6: MTU changes on link changes 0 pmtu_vti6_link_change_mtu vti6: MTU changes on link changes 0
cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1 cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1
cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1 cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1
list_flush_ipv4_exception ipv4: list and flush cached exceptions 1
list_flush_ipv6_exception ipv6: list and flush cached exceptions 1" list_flush_ipv6_exception ipv6: list and flush cached exceptions 1"
NS_A="ns-A" NS_A="ns-A"
...@@ -1207,6 +1212,61 @@ run_test_nh() { ...@@ -1207,6 +1212,61 @@ run_test_nh() {
USE_NH=no USE_NH=no
} }
test_list_flush_ipv4_exception() {
setup namespaces routing || return 2
trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
"${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
"${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
"${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
dst_prefix1="${prefix4}.${b_r1}."
dst2="${prefix4}.${b_r2}.1"
# Set up initial MTU values
mtu "${ns_a}" veth_A-R1 2000
mtu "${ns_r1}" veth_R1-A 2000
mtu "${ns_r1}" veth_R1-B 1500
mtu "${ns_b}" veth_B-R1 1500
mtu "${ns_a}" veth_A-R2 2000
mtu "${ns_r2}" veth_R2-A 2000
mtu "${ns_r2}" veth_R2-B 1500
mtu "${ns_b}" veth_B-R2 1500
fail=0
# Add 100 addresses for veth endpoint on B reached by default A route
for i in $(seq 100 199); do
run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
done
# Create 100 cached route exceptions for path via R1, one via R2. Note
# that with IPv4 we need to actually cause a route lookup that matches
# the exception caused by ICMP, in order to actually have a cached
# route, so we need to ping each destination twice
for i in $(seq 100 199); do
run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst_prefix1}${i}"
done
run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst2}"
# Each exception is printed as two lines
if [ "$(${ns_a} ip route list cache | wc -l)" -ne 202 ]; then
err " can't list cached exceptions"
fail=1
fi
run_cmd ${ns_a} ip route flush cache
pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}1)"
pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}2)"
if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
[ -n "$(${ns_a} ip route list cache)" ]; then
err " can't flush cached exceptions"
fail=1
fi
return ${fail}
}
test_list_flush_ipv6_exception() { test_list_flush_ipv6_exception() {
setup namespaces routing || return 2 setup namespaces routing || return 2
trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
...@@ -1214,7 +1274,7 @@ test_list_flush_ipv6_exception() { ...@@ -1214,7 +1274,7 @@ test_list_flush_ipv6_exception() {
"${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
"${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
dst1="${prefix6}:${b_r1}::1" dst_prefix1="${prefix6}:${b_r1}::"
dst2="${prefix6}:${b_r2}::1" dst2="${prefix6}:${b_r2}::1"
# Set up initial MTU values # Set up initial MTU values
...@@ -1230,20 +1290,26 @@ test_list_flush_ipv6_exception() { ...@@ -1230,20 +1290,26 @@ test_list_flush_ipv6_exception() {
fail=0 fail=0
# Create route exceptions # Add 100 addresses for veth endpoint on B reached by default A route
run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} for i in $(seq 100 199); do
run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
done
if [ "$(${ns_a} ip -6 route list cache | wc -l)" -ne 2 ]; then # Create 100 cached route exceptions for path via R1, one via R2
for i in $(seq 100 199); do
run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst_prefix1}${i}"
done
run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst2}"
if [ "$(${ns_a} ip -6 route list cache | wc -l)" -ne 101 ]; then
err " can't list cached exceptions" err " can't list cached exceptions"
fail=1 fail=1
fi fi
run_cmd ${ns_a} ip -6 route flush cache run_cmd ${ns_a} ip -6 route flush cache
sleep 1 pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst_prefix1}100")"
pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ]; then if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
[ -n "$(${ns_a} ip -6 route list cache)" ]; then
err " can't flush cached exceptions" err " can't flush cached exceptions"
fail=1 fail=1
fi fi
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment