Commit 0565de29 authored by David S. Miller's avatar David S. Miller

Merge branch 'ipv6-Separate-data-structures-for-FIB-and-data-path'

David Ahern says:

====================
net/ipv6: Separate data structures for FIB and data path

IPv6 uses the same data struct for both control plane (FIB entries) and
data path (dst entries). This struct has elements needed for both paths
adding memory overhead and complexity (taking a dst hold in most places
but an additional reference on rt6i_ref in a few). Furthermore, because
of the dst_alloc tie, all FIB entries are allocated with GFP_ATOMIC.

This patch set separates FIB entries from dst entries, better aligning
IPv6 code with IPv4, simplifying the reference counting and allowing
FIB entries added by userspace (not autoconf) to use GFP_KERNEL. It is
first step to a number of performance and scalability changes.

The end result of this patch set:
  - FIB entries (fib6_info):
        /* size: 208, cachelines: 4, members: 25 */
        /* sum members: 207, holes: 1, sum holes: 1 */

  - dst entries (rt6_info)
       /* size: 240, cachelines: 4, members: 11 */

Versus the the single rt6_info struct today for both paths:
      /* size: 320, cachelines: 5, members: 28 */

This amounts to a 35% reduction in memory use for FIB entries and a
25% reduction for dst entries.

With respect to locking FIB entries use RCU and a single atomic
counter with fib6_info_hold and fib6_info_release helpers to manage
the reference counting. dst entries use only the traditional dst
refcounts with dst_hold and dst_release.

FIB entries for host routes are referenced by inet6_ifaddr and
ifacaddr6. In both cases, additional holds are taken -- similar to
what is done for devices.

This set is the first of many changes to improve the scalability of the
IPv6 code. Follow on changes include:
- consolidating duplicate fib6_info references like IPv4 does with
  duplicate fib_info

- moving fib6_info into a slab cache to avoid allocation roundups to
  power of 2 (the 208 size becomes a 256 actual allocation)

- Allow FIB lookups without generating a dst (e.g., most rt6_lookup
  users just want to verify the egress device). Means moving dst
  allocation to the other side of fib6_rule_lookup which again aligns
  with IPv4 behavior

- using separate standalone nexthop objects which have performance
  benefits beyond fib_info consolidation

At this point I am not seeing any refcount leaks or underflows, no
oops or bug_ons, or warnings from kasan, so I think it is ready for
others to beat up on it finding errors in code paths I have missed.

v2 changes
- rebased to top of tree
- improved commit message on patch 7

v1 changes
- rebased to top of tree
- fix memory leak of metrics as noted by Ido
- MTU fixes based on pmtu tests (thanks Stefano Brivio for writing)

RFC v2 changes
- improved commit messages
- move common metrics code from dst.c to net/ipv4/metrics.c (comment
  from DaveM)
- address comments from Wei Wang and Martin KaFai Lau (let me know if
  I missed something)
- fixes detected by kernel test robots
  + added fib6_metric_set to change metric on a FIB entry which could
    be pointing to read-only dst_default_metrics
  + 0day testing found a problem with an intermediate patch; added
    dst_hold_safe on rt->from. Code is removed 3 patches later
- allow cacheinfo to handle NULL dst; means only expires is pushed to
  userspace
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents a2d481b3 77634cc6
......@@ -48,6 +48,9 @@ static unsigned int vrf_net_id;
struct net_vrf {
struct rtable __rcu *rth;
struct rt6_info __rcu *rt6;
#if IS_ENABLED(CONFIG_IPV6)
struct fib6_table *fib6_table;
#endif
u32 tb_id;
};
......@@ -496,7 +499,6 @@ static int vrf_rt6_create(struct net_device *dev)
int flags = DST_HOST | DST_NOPOLICY | DST_NOXFRM;
struct net_vrf *vrf = netdev_priv(dev);
struct net *net = dev_net(dev);
struct fib6_table *rt6i_table;
struct rt6_info *rt6;
int rc = -ENOMEM;
......@@ -504,8 +506,8 @@ static int vrf_rt6_create(struct net_device *dev)
if (!ipv6_mod_enabled())
return 0;
rt6i_table = fib6_new_table(net, vrf->tb_id);
if (!rt6i_table)
vrf->fib6_table = fib6_new_table(net, vrf->tb_id);
if (!vrf->fib6_table)
goto out;
/* create a dst for routing packets out a VRF device */
......@@ -513,7 +515,6 @@ static int vrf_rt6_create(struct net_device *dev)
if (!rt6)
goto out;
rt6->rt6i_table = rt6i_table;
rt6->dst.output = vrf_output6;
rcu_assign_pointer(vrf->rt6, rt6);
......@@ -946,22 +947,8 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
int flags)
{
struct net_vrf *vrf = netdev_priv(dev);
struct fib6_table *table = NULL;
struct rt6_info *rt6;
rcu_read_lock();
/* fib6_table does not have a refcnt and can not be freed */
rt6 = rcu_dereference(vrf->rt6);
if (likely(rt6))
table = rt6->rt6i_table;
rcu_read_unlock();
if (!table)
return NULL;
return ip6_pol_route(net, table, ifindex, fl6, skb, flags);
return ip6_pol_route(net, vrf->fib6_table, ifindex, fl6, skb, flags);
}
static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
......
......@@ -64,7 +64,7 @@ struct inet6_ifaddr {
struct delayed_work dad_work;
struct inet6_dev *idev;
struct rt6_info *rt;
struct fib6_info *rt;
struct hlist_node addr_lst;
struct list_head if_list;
......@@ -144,7 +144,7 @@ struct ipv6_ac_socklist {
struct ifacaddr6 {
struct in6_addr aca_addr;
struct inet6_dev *aca_idev;
struct rt6_info *aca_rt;
struct fib6_info *aca_rt;
struct ifacaddr6 *aca_next;
int aca_users;
refcount_t aca_refcnt;
......
......@@ -396,6 +396,9 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU);
}
int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len,
u32 *metrics);
u32 ip_idents_reserve(u32 hash, int segs);
void __ip_select_ident(struct net *net, struct iphdr *iph, int segs);
......
......@@ -38,6 +38,7 @@
#endif
struct rt6_info;
struct fib6_info;
struct fib6_config {
u32 fc_table;
......@@ -74,12 +75,12 @@ struct fib6_node {
#ifdef CONFIG_IPV6_SUBTREES
struct fib6_node __rcu *subtree;
#endif
struct rt6_info __rcu *leaf;
struct fib6_info __rcu *leaf;
__u16 fn_bit; /* bit key */
__u16 fn_flags;
int fn_sernum;
struct rt6_info __rcu *rr_ptr;
struct fib6_info __rcu *rr_ptr;
struct rcu_head rcu;
};
......@@ -94,11 +95,6 @@ struct fib6_gc_args {
#define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1))
#endif
struct mx6_config {
const u32 *mx;
DECLARE_BITMAP(mx_valid, RTAX_MAX);
};
/*
* routing information
*
......@@ -127,56 +123,72 @@ struct rt6_exception {
#define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT)
#define FIB6_MAX_DEPTH 5
struct rt6_info {
struct dst_entry dst;
struct rt6_info __rcu *rt6_next;
struct rt6_info *from;
struct fib6_nh {
struct in6_addr nh_gw;
struct net_device *nh_dev;
struct lwtunnel_state *nh_lwtstate;
/*
* Tail elements of dst_entry (__refcnt etc.)
* and these elements (rarely used in hot path) are in
* the same cache line.
*/
unsigned int nh_flags;
atomic_t nh_upper_bound;
int nh_weight;
};
struct fib6_info {
struct fib6_table *rt6i_table;
struct fib6_info __rcu *rt6_next;
struct fib6_node __rcu *rt6i_node;
struct in6_addr rt6i_gateway;
/* Multipath routes:
* siblings is a list of rt6_info that have the the same metric/weight,
* siblings is a list of fib6_info that have the the same metric/weight,
* destination, but not the same gateway. nsiblings is just a cache
* to speed up lookup.
*/
struct list_head rt6i_siblings;
unsigned int rt6i_nsiblings;
atomic_t rt6i_nh_upper_bound;
atomic_t rt6i_ref;
struct inet6_dev *rt6i_idev;
unsigned long expires;
struct dst_metrics *fib6_metrics;
#define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1]
unsigned int rt6i_nh_flags;
/* These are in a separate cache line. */
struct rt6key rt6i_dst ____cacheline_aligned_in_smp;
struct rt6key rt6i_dst;
u32 rt6i_flags;
struct rt6key rt6i_src;
struct rt6key rt6i_prefsrc;
struct list_head rt6i_uncached;
struct uncached_list *rt6i_uncached_list;
struct inet6_dev *rt6i_idev;
struct rt6_info * __percpu *rt6i_pcpu;
struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
u32 rt6i_metric;
u32 rt6i_pmtu;
/* more non-fragment space at head required */
int rt6i_nh_weight;
unsigned short rt6i_nfheader_len;
u8 rt6i_protocol;
u8 fib6_type;
u8 exception_bucket_flushed:1,
should_flush:1,
unused:6;
dst_nocount:1,
dst_nopolicy:1,
dst_host:1,
unused:3;
struct fib6_nh fib6_nh;
};
struct rt6_info {
struct dst_entry dst;
struct fib6_info *from;
struct rt6key rt6i_dst;
struct rt6key rt6i_src;
struct in6_addr rt6i_gateway;
struct inet6_dev *rt6i_idev;
u32 rt6i_flags;
struct rt6key rt6i_prefsrc;
struct list_head rt6i_uncached;
struct uncached_list *rt6i_uncached_list;
/* more non-fragment space at head required */
unsigned short rt6i_nfheader_len;
};
#define for_each_fib6_node_rt_rcu(fn) \
......@@ -192,6 +204,26 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
return ((struct rt6_info *)dst)->rt6i_idev;
}
static inline void fib6_clean_expires(struct fib6_info *f6i)
{
f6i->rt6i_flags &= ~RTF_EXPIRES;
f6i->expires = 0;
}
static inline void fib6_set_expires(struct fib6_info *f6i,
unsigned long expires)
{
f6i->expires = expires;
f6i->rt6i_flags |= RTF_EXPIRES;
}
static inline bool fib6_check_expired(const struct fib6_info *f6i)
{
if (f6i->rt6i_flags & RTF_EXPIRES)
return time_after(jiffies, f6i->expires);
return false;
}
static inline void rt6_clean_expires(struct rt6_info *rt)
{
rt->rt6i_flags &= ~RTF_EXPIRES;
......@@ -206,11 +238,9 @@ static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires)
static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
{
struct rt6_info *rt;
if (!(rt0->rt6i_flags & RTF_EXPIRES) && rt0->from)
rt0->dst.expires = rt0->from->expires;
for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); rt = rt->from);
if (rt && rt != rt0)
rt0->dst.expires = rt->dst.expires;
dst_set_expires(&rt0->dst, timeout);
rt0->rt6i_flags |= RTF_EXPIRES;
}
......@@ -220,7 +250,7 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
* Return true if we can get cookie safely
* Return false if not
*/
static inline bool rt6_get_cookie_safe(const struct rt6_info *rt,
static inline bool rt6_get_cookie_safe(const struct fib6_info *rt,
u32 *cookie)
{
struct fib6_node *fn;
......@@ -246,9 +276,7 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt)
if (rt->rt6i_flags & RTF_PCPU ||
(unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
rt = rt->from;
rt6_get_cookie_safe(rt, &cookie);
rt6_get_cookie_safe(rt->from, &cookie);
return cookie;
}
......@@ -262,20 +290,18 @@ static inline void ip6_rt_put(struct rt6_info *rt)
dst_release(&rt->dst);
}
void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
struct fib6_info *fib6_info_alloc(gfp_t gfp_flags);
void fib6_info_destroy(struct fib6_info *f6i);
static inline void rt6_hold(struct rt6_info *rt)
static inline void fib6_info_hold(struct fib6_info *f6i)
{
atomic_inc(&rt->rt6i_ref);
atomic_inc(&f6i->rt6i_ref);
}
static inline void rt6_release(struct rt6_info *rt)
static inline void fib6_info_release(struct fib6_info *f6i)
{
if (atomic_dec_and_test(&rt->rt6i_ref)) {
rt6_free_pcpu(rt);
dst_dev_put(&rt->dst);
dst_release(&rt->dst);
}
if (f6i && atomic_dec_and_test(&f6i->rt6i_ref))
fib6_info_destroy(f6i);
}
enum fib6_walk_state {
......@@ -291,7 +317,7 @@ enum fib6_walk_state {
struct fib6_walker {
struct list_head lh;
struct fib6_node *root, *node;
struct rt6_info *leaf;
struct fib6_info *leaf;
enum fib6_walk_state state;
unsigned int skip;
unsigned int count;
......@@ -355,7 +381,7 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *,
struct fib6_entry_notifier_info {
struct fib_notifier_info info; /* must be first */
struct rt6_info *rt;
struct fib6_info *rt;
};
/*
......@@ -377,15 +403,14 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
const struct in6_addr *saddr, int src_len,
bool exact_match);
void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg),
void *arg);
int fib6_add(struct fib6_node *root, struct rt6_info *rt,
struct nl_info *info, struct mx6_config *mxc,
struct netlink_ext_ack *extack);
int fib6_del(struct rt6_info *rt, struct nl_info *info);
int fib6_add(struct fib6_node *root, struct fib6_info *rt,
struct nl_info *info, struct netlink_ext_ack *extack);
int fib6_del(struct fib6_info *rt, struct nl_info *info);
void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
unsigned int flags);
void fib6_run_gc(unsigned long expires, struct net *net, bool force);
......@@ -408,8 +433,14 @@ void __net_exit fib6_notifier_exit(struct net *net);
unsigned int fib6_tables_seq_read(struct net *net);
int fib6_tables_dump(struct net *net, struct notifier_block *nb);
void fib6_update_sernum(struct rt6_info *rt);
void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt);
void fib6_update_sernum(struct net *net, struct fib6_info *rt);
void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt);
void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val);
static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
{
return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric));
}
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
int fib6_rules_init(void);
......
......@@ -66,7 +66,7 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}
static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
static inline bool rt6_qualify_for_ecmp(const struct fib6_info *rt)
{
return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
RTF_GATEWAY;
......@@ -100,22 +100,21 @@ void ip6_route_cleanup(void);
int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg);
int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack);
int ip6_ins_rt(struct rt6_info *);
int ip6_del_rt(struct rt6_info *);
int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack);
int ip6_ins_rt(struct net *net, struct fib6_info *rt);
int ip6_del_rt(struct net *net, struct fib6_info *rt);
void rt6_flush_exceptions(struct rt6_info *rt);
int rt6_remove_exception_rt(struct rt6_info *rt);
void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args,
void rt6_flush_exceptions(struct fib6_info *rt);
void rt6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args,
unsigned long now);
static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *rt,
const struct in6_addr *daddr,
unsigned int prefs,
struct in6_addr *saddr)
{
struct inet6_dev *idev =
rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
struct inet6_dev *idev = rt ? rt->rt6i_idev : NULL;
int err = 0;
if (rt && rt->rt6i_prefsrc.plen)
......@@ -137,8 +136,9 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
void fib6_force_start_gc(struct net *net);
struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
const struct in6_addr *addr, bool anycast);
struct fib6_info *addrconf_dst_alloc(struct net *net, struct inet6_dev *idev,
const struct in6_addr *addr, bool anycast,
gfp_t gfp_flags);
struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
int flags);
......@@ -147,9 +147,11 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
* support functions for ND
*
*/
struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr,
struct fib6_info *rt6_get_dflt_router(struct net *net,
const struct in6_addr *addr,
struct net_device *dev);
struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
struct fib6_info *rt6_add_dflt_router(struct net *net,
const struct in6_addr *gwaddr,
struct net_device *dev, unsigned int pref);
void rt6_purge_dflt_routers(struct net *net);
......@@ -174,14 +176,14 @@ struct rt6_rtnl_dump_arg {
struct net *net;
};
int rt6_dump_route(struct rt6_info *rt, void *p_arg);
int rt6_dump_route(struct fib6_info *rt, void *p_arg);
void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
void rt6_sync_up(struct net_device *dev, unsigned int nh_flags);
void rt6_disable_ip(struct net_device *dev, unsigned long event);
void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
void rt6_multipath_rebalance(struct rt6_info *rt);
void rt6_multipath_rebalance(struct fib6_info *rt);
void rt6_uncached_list_add(struct rt6_info *rt);
void rt6_uncached_list_del(struct rt6_info *rt);
......@@ -269,12 +271,15 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
return daddr;
}
static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b)
static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b)
{
return a->dst.dev == b->dst.dev &&
return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev &&
a->rt6i_idev == b->rt6i_idev &&
ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
!lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
ipv6_addr_equal(&a->fib6_nh.nh_gw, &b->fib6_nh.nh_gw) &&
!lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate);
}
struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
struct net_device *dev, struct sk_buff *skb,
const void *daddr);
#endif
......@@ -60,6 +60,7 @@ struct netns_ipv6 {
#endif
struct xt_table *ip6table_nat;
#endif
struct fib6_info *fib6_null_entry;
struct rt6_info *ip6_null_entry;
struct rt6_statistics *rt6_stats;
struct timer_list ip6_fib_timer;
......
......@@ -58,6 +58,7 @@ const struct dst_metrics dst_default_metrics = {
*/
.refcnt = REFCOUNT_INIT(1),
};
EXPORT_SYMBOL(dst_default_metrics);
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
struct net_device *dev, int initial_ref, int initial_obsolete,
......
......@@ -785,13 +785,15 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
long expires, u32 error)
{
struct rta_cacheinfo ci = {
.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse),
.rta_used = dst->__use,
.rta_clntref = atomic_read(&(dst->__refcnt)),
.rta_error = error,
.rta_id = id,
};
if (dst) {
ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse);
ci.rta_used = dst->__use;
ci.rta_clntref = atomic_read(&dst->__refcnt);
}
if (expires) {
unsigned long clock;
......
......@@ -13,7 +13,8 @@ obj-y := route.o inetpeer.o protocol.o \
tcp_offload.o datagram.o raw.o udp.o udplite.o \
udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o
inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
metrics.o
obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
......
......@@ -1019,47 +1019,8 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
static int
fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
{
bool ecn_ca = false;
struct nlattr *nla;
int remaining;
if (!cfg->fc_mx)
return 0;
nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
int type = nla_type(nla);
u32 val;
if (!type)
continue;
if (type > RTAX_MAX)
return -EINVAL;
if (type == RTAX_CC_ALGO) {
char tmp[TCP_CA_NAME_MAX];
nla_strlcpy(tmp, nla, sizeof(tmp));
val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
if (val == TCP_CA_UNSPEC)
return -EINVAL;
} else {
val = nla_get_u32(nla);
}
if (type == RTAX_ADVMSS && val > 65535 - 40)
val = 65535 - 40;
if (type == RTAX_MTU && val > 65535 - 15)
val = 65535 - 15;
if (type == RTAX_HOPLIMIT && val > 255)
val = 255;
if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
return -EINVAL;
fi->fib_metrics->metrics[type - 1] = val;
}
if (ecn_ca)
fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
return 0;
return ip_metrics_convert(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len,
fi->fib_metrics->metrics);
}
struct fib_info *fib_create_info(struct fib_config *cfg,
......
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/types.h>
#include <net/ip.h>
#include <net/net_namespace.h>
#include <net/tcp.h>
int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len,
u32 *metrics)
{
bool ecn_ca = false;
struct nlattr *nla;
int remaining;
if (!fc_mx)
return 0;
nla_for_each_attr(nla, fc_mx, fc_mx_len, remaining) {
int type = nla_type(nla);
u32 val;
if (!type)
continue;
if (type > RTAX_MAX)
return -EINVAL;
if (type == RTAX_CC_ALGO) {
char tmp[TCP_CA_NAME_MAX];
nla_strlcpy(tmp, nla, sizeof(tmp));
val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
if (val == TCP_CA_UNSPEC)
return -EINVAL;
} else {
val = nla_get_u32(nla);
}
if (type == RTAX_ADVMSS && val > 65535 - 40)
val = 65535 - 40;
if (type == RTAX_MTU && val > 65535 - 15)
val = 65535 - 15;
if (type == RTAX_HOPLIMIT && val > 255)
val = 255;
if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
return -EINVAL;
metrics[type - 1] = val;
}
if (ecn_ca)
metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
return 0;
}
EXPORT_SYMBOL_GPL(ip_metrics_convert);
This diff is collapsed.
......@@ -213,12 +213,12 @@ static void aca_put(struct ifacaddr6 *ac)
{
if (refcount_dec_and_test(&ac->aca_refcnt)) {
in6_dev_put(ac->aca_idev);
dst_release(&ac->aca_rt->dst);
fib6_info_release(ac->aca_rt);
kfree(ac);
}
}
static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
static struct ifacaddr6 *aca_alloc(struct fib6_info *rt,
const struct in6_addr *addr)
{
struct inet6_dev *idev = rt->rt6i_idev;
......@@ -231,6 +231,7 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
aca->aca_addr = *addr;
in6_dev_hold(idev);
aca->aca_idev = idev;
fib6_info_hold(rt);
aca->aca_rt = rt;
aca->aca_users = 1;
/* aca_tstamp should be updated upon changes */
......@@ -246,7 +247,8 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct ifacaddr6 *aca;
struct rt6_info *rt;
struct fib6_info *rt;
struct net *net;
int err;
ASSERT_RTNL();
......@@ -265,14 +267,15 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
}
}
rt = addrconf_dst_alloc(idev, addr, true);
net = dev_net(idev->dev);
rt = addrconf_dst_alloc(net, idev, addr, true, GFP_ATOMIC);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto out;
}
aca = aca_alloc(rt, addr);
if (!aca) {
ip6_rt_put(rt);
fib6_info_release(rt);
err = -ENOMEM;
goto out;
}
......@@ -286,7 +289,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
aca_get(aca);
write_unlock_bh(&idev->lock);
ip6_ins_rt(rt);
ip6_ins_rt(net, rt);
addrconf_join_solict(idev->dev, &aca->aca_addr);
......@@ -328,8 +331,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
write_unlock_bh(&idev->lock);
addrconf_leave_solict(idev, &aca->aca_addr);
dst_hold(&aca->aca_rt->dst);
ip6_del_rt(aca->aca_rt);
ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
aca_put(aca);
return 0;
......@@ -356,8 +358,7 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
addrconf_leave_solict(idev, &aca->aca_addr);
dst_hold(&aca->aca_rt->dst);
ip6_del_rt(aca->aca_rt);
ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
aca_put(aca);
......
This diff is collapsed.
......@@ -968,7 +968,8 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
if (!had_dst)
*dst = ip6_route_output(net, sk, fl6);
rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
err = ip6_route_get_saddr(net, rt, &fl6->daddr,
err = ip6_route_get_saddr(net, rt ? rt->from : NULL,
&fl6->daddr,
sk ? inet6_sk(sk)->srcprefs : 0,
&fl6->saddr);
if (err)
......
......@@ -1155,7 +1155,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
struct neighbour *neigh = NULL;
struct inet6_dev *in6_dev;
struct rt6_info *rt = NULL;
struct fib6_info *rt = NULL;
struct net *net;
int lifetime;
struct ndisc_options ndopts;
int optlen;
......@@ -1253,9 +1254,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
/* Do not accept RA with source-addr found on local machine unless
* accept_ra_from_local is set to true.
*/
net = dev_net(in6_dev->dev);
if (!in6_dev->cnf.accept_ra_from_local &&
ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
in6_dev->dev, 0)) {
ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) {
ND_PRINTK(2, info,
"RA from local address detected on dev: %s: default router ignored\n",
skb->dev->name);
......@@ -1272,20 +1273,22 @@ static void ndisc_router_discovery(struct sk_buff *skb)
pref = ICMPV6_ROUTER_PREF_MEDIUM;
#endif
rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
if (rt) {
neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr);
neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw,
rt->fib6_nh.nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
if (!neigh) {
ND_PRINTK(0, err,
"RA: %s got default router without neighbour\n",
__func__);
ip6_rt_put(rt);
fib6_info_release(rt);
return;
}
}
if (rt && lifetime == 0) {
ip6_del_rt(rt);
ip6_del_rt(net, rt);
rt = NULL;
}
......@@ -1294,7 +1297,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (!rt && lifetime) {
ND_PRINTK(3, info, "RA: adding default router\n");
rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
skb->dev, pref);
if (!rt) {
ND_PRINTK(0, err,
"RA: %s failed to add default route\n",
......@@ -1302,12 +1306,14 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr);
neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw,
rt->fib6_nh.nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
if (!neigh) {
ND_PRINTK(0, err,
"RA: %s got default router without neighbour\n",
__func__);
ip6_rt_put(rt);
fib6_info_release(rt);
return;
}
neigh->flags |= NTF_ROUTER;
......@@ -1316,13 +1322,12 @@ static void ndisc_router_discovery(struct sk_buff *skb)
}
if (rt)
rt6_set_expires(rt, jiffies + (HZ * lifetime));
fib6_set_expires(rt, jiffies + (HZ * lifetime));
if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
ra_msg->icmph.icmp6_hop_limit) {
if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
if (rt)
dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
fib6_metric_set(rt, RTAX_HOPLIMIT,
ra_msg->icmph.icmp6_hop_limit);
} else {
ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n");
......@@ -1475,10 +1480,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
} else if (in6_dev->cnf.mtu6 != mtu) {
in6_dev->cnf.mtu6 = mtu;
if (rt)
dst_metric_set(&rt->dst, RTAX_MTU, mtu);
fib6_metric_set(rt, RTAX_MTU, mtu);
rt6_mtu_change(skb->dev, mtu);
}
}
......@@ -1497,7 +1499,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
ND_PRINTK(2, warn, "RA: invalid RA options\n");
}
out:
ip6_rt_put(rt);
fib6_info_release(rt);
if (neigh)
neigh_release(neigh);
}
......
This diff is collapsed.
......@@ -107,8 +107,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
* it was magically lost, so this code needs audit */
xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
RTF_LOCAL);
xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
xdst->u.rt6.rt6i_node = rt->rt6i_node;
xdst->route_cookie = rt6_get_cookie(rt);
xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment