Commit 753c104b authored by David S. Miller's avatar David S. Miller

Merge branch 'vrf-fib-rule-improve'

David Ahern says:

====================
net: vrf: Improve use of FIB rules

Currently, VRFs require 1 oif and 1 iif rule per address family per
VRF. As the number of VRF devices increases it brings scalability
issues with the increasing rule list. All of the VRF rules have the
same format with the exception of the specific table id to direct the
lookup. Since the table id is available from the oif or iif in the
loopup, the VRF rules can be consolidated to a single rule that pulls
the table from the VRF device.

This solution still allows a user to insert their own rules for VRFs,
including rules with additional attributes. Accordingly, it is backwards
compatible with existing setups and allows other policy routing as
desired.

Hopefully v5 is the charm; my e-waste can is getting full.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 6278e03d 1aa6c4f6
......@@ -35,6 +35,7 @@
#include <net/route.h>
#include <net/addrconf.h>
#include <net/l3mdev.h>
#include <net/fib_rules.h>
#define RT_FL_TOS(oldflp4) \
((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
......@@ -42,6 +43,9 @@
#define DRV_NAME "vrf"
#define DRV_VERSION "1.0"
#define FIB_RULE_PREF 1000 /* default preference for FIB rules */
static bool add_fib_rules = true;
struct net_vrf {
struct rtable __rcu *rth;
struct rtable __rcu *rth_local;
......@@ -897,6 +901,91 @@ static const struct ethtool_ops vrf_ethtool_ops = {
.get_drvinfo = vrf_get_drvinfo,
};
static inline size_t vrf_fib_rule_nl_size(void)
{
size_t sz;
sz = NLMSG_ALIGN(sizeof(struct fib_rule_hdr));
sz += nla_total_size(sizeof(u8)); /* FRA_L3MDEV */
sz += nla_total_size(sizeof(u32)); /* FRA_PRIORITY */
return sz;
}
static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
{
struct fib_rule_hdr *frh;
struct nlmsghdr *nlh;
struct sk_buff *skb;
int err;
skb = nlmsg_new(vrf_fib_rule_nl_size(), GFP_KERNEL);
if (!skb)
return -ENOMEM;
nlh = nlmsg_put(skb, 0, 0, 0, sizeof(*frh), 0);
if (!nlh)
goto nla_put_failure;
/* rule only needs to appear once */
nlh->nlmsg_flags &= NLM_F_EXCL;
frh = nlmsg_data(nlh);
memset(frh, 0, sizeof(*frh));
frh->family = family;
frh->action = FR_ACT_TO_TBL;
if (nla_put_u32(skb, FRA_L3MDEV, 1))
goto nla_put_failure;
if (nla_put_u32(skb, FRA_PRIORITY, FIB_RULE_PREF))
goto nla_put_failure;
nlmsg_end(skb, nlh);
/* fib_nl_{new,del}rule handling looks for net from skb->sk */
skb->sk = dev_net(dev)->rtnl;
if (add_it) {
err = fib_nl_newrule(skb, nlh);
if (err == -EEXIST)
err = 0;
} else {
err = fib_nl_delrule(skb, nlh);
if (err == -ENOENT)
err = 0;
}
nlmsg_free(skb);
return err;
nla_put_failure:
nlmsg_free(skb);
return -EMSGSIZE;
}
static int vrf_add_fib_rules(const struct net_device *dev)
{
int err;
err = vrf_fib_rule(dev, AF_INET, true);
if (err < 0)
goto out_err;
err = vrf_fib_rule(dev, AF_INET6, true);
if (err < 0)
goto ipv6_err;
return 0;
ipv6_err:
vrf_fib_rule(dev, AF_INET, false);
out_err:
netdev_err(dev, "Failed to add FIB rules.\n");
return err;
}
static void vrf_setup(struct net_device *dev)
{
ether_setup(dev);
......@@ -937,6 +1026,7 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct net_vrf *vrf = netdev_priv(dev);
int err;
if (!data || !data[IFLA_VRF_TABLE])
return -EINVAL;
......@@ -945,7 +1035,21 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
dev->priv_flags |= IFF_L3MDEV_MASTER;
return register_netdevice(dev);
err = register_netdevice(dev);
if (err)
goto out;
if (add_fib_rules) {
err = vrf_add_fib_rules(dev);
if (err) {
unregister_netdevice(dev);
goto out;
}
add_fib_rules = false;
}
out:
return err;
}
static size_t vrf_nl_getsize(const struct net_device *dev)
......
......@@ -17,7 +17,8 @@ struct fib_rule {
u32 flags;
u32 table;
u8 action;
/* 3 bytes hole, try to use */
u8 l3mdev;
/* 2 bytes hole, try to use */
u32 target;
__be64 tun_id;
struct fib_rule __rcu *ctarget;
......@@ -36,6 +37,7 @@ struct fib_lookup_arg {
void *lookup_ptr;
void *result;
struct fib_rule *rule;
u32 table;
int flags;
#define FIB_LOOKUP_NOREF 1
#define FIB_LOOKUP_IGNORE_LINKSTATE 2
......@@ -89,7 +91,8 @@ struct fib_rules_ops {
[FRA_TABLE] = { .type = NLA_U32 }, \
[FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \
[FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \
[FRA_GOTO] = { .type = NLA_U32 }
[FRA_GOTO] = { .type = NLA_U32 }, \
[FRA_L3MDEV] = { .type = NLA_U8 }
static inline void fib_rule_get(struct fib_rule *rule)
{
......@@ -102,6 +105,20 @@ static inline void fib_rule_put(struct fib_rule *rule)
kfree_rcu(rule, rcu);
}
#ifdef CONFIG_NET_L3_MASTER_DEV
static inline u32 fib_rule_get_table(struct fib_rule *rule,
struct fib_lookup_arg *arg)
{
return rule->l3mdev ? arg->table : rule->table;
}
#else
static inline u32 fib_rule_get_table(struct fib_rule *rule,
struct fib_lookup_arg *arg)
{
return rule->table;
}
#endif
static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
{
if (nla[FRA_TABLE])
......@@ -117,4 +134,7 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
struct fib_lookup_arg *);
int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
u32 flags);
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh);
int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh);
#endif
......@@ -11,6 +11,8 @@
#ifndef _NET_L3MDEV_H_
#define _NET_L3MDEV_H_
#include <net/fib_rules.h>
/**
* struct l3mdev_ops - l3mdev operations
*
......@@ -41,6 +43,9 @@ struct l3mdev_ops {
#ifdef CONFIG_NET_L3_MASTER_DEV
int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
struct fib_lookup_arg *arg);
int l3mdev_master_ifindex_rcu(const struct net_device *dev);
static inline int l3mdev_master_ifindex(struct net_device *dev)
{
......@@ -236,6 +241,13 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
{
return skb;
}
static inline
int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
struct fib_lookup_arg *arg)
{
return 1;
}
#endif
#endif /* _NET_L3MDEV_H_ */
......@@ -50,6 +50,7 @@ enum {
FRA_FWMASK, /* mask for netfilter mark */
FRA_OIFNAME,
FRA_PAD,
FRA_L3MDEV, /* iif or oif is l3mdev goto its table */
__FRA_MAX
};
......
......@@ -173,7 +173,8 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
EXPORT_SYMBOL_GPL(fib_rules_unregister);
static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
struct flowi *fl, int flags)
struct flowi *fl, int flags,
struct fib_lookup_arg *arg)
{
int ret = 0;
......@@ -189,6 +190,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id))
goto out;
if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg))
goto out;
ret = ops->match(rule, fl, flags);
out:
return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
......@@ -204,7 +208,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
list_for_each_entry_rcu(rule, &ops->rules_list, list) {
jumped:
if (!fib_rule_match(rule, ops, fl, flags))
if (!fib_rule_match(rule, ops, fl, flags, arg))
continue;
if (rule->action == FR_ACT_GOTO) {
......@@ -265,7 +269,7 @@ static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
return err;
}
static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
......@@ -336,6 +340,14 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
if (tb[FRA_TUN_ID])
rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
if (tb[FRA_L3MDEV]) {
#ifdef CONFIG_NET_L3_MASTER_DEV
rule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]);
if (rule->l3mdev != 1)
#endif
goto errout_free;
}
rule->action = frh->action;
rule->flags = frh->flags;
rule->table = frh_get_table(frh, tb);
......@@ -371,6 +383,9 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
} else if (rule->action == FR_ACT_GOTO)
goto errout_free;
if (rule->l3mdev && rule->table)
goto errout_free;
err = ops->configure(rule, skb, frh, tb);
if (err < 0)
goto errout_free;
......@@ -424,8 +439,9 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
rules_ops_put(ops);
return err;
}
EXPORT_SYMBOL_GPL(fib_nl_newrule);
static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
......@@ -483,6 +499,10 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
(rule->tun_id != nla_get_be64(tb[FRA_TUN_ID])))
continue;
if (tb[FRA_L3MDEV] &&
(rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV])))
continue;
if (!ops->compare(rule, frh, tb))
continue;
......@@ -536,6 +556,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
rules_ops_put(ops);
return err;
}
EXPORT_SYMBOL_GPL(fib_nl_delrule);
static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
struct fib_rule *rule)
......@@ -607,7 +628,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
(rule->target &&
nla_put_u32(skb, FRA_GOTO, rule->target)) ||
(rule->tun_id &&
nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)))
nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) ||
(rule->l3mdev &&
nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)))
goto nla_put_failure;
if (rule->suppress_ifgroup != -1) {
......
......@@ -76,6 +76,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
{
int err = -EAGAIN;
struct fib_table *tbl;
u32 tb_id;
switch (rule->action) {
case FR_ACT_TO_TBL:
......@@ -94,7 +95,8 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
rcu_read_lock();
tbl = fib_get_table(rule->fr_net, rule->table);
tb_id = fib_rule_get_table(rule, arg);
tbl = fib_get_table(rule->fr_net, tb_id);
if (tbl)
err = fib_table_lookup(tbl, &flp->u.ip4,
(struct fib_result *)arg->result,
......@@ -180,7 +182,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
if (err)
goto errout;
if (rule->table == RT_TABLE_UNSPEC) {
if (rule->table == RT_TABLE_UNSPEC && !rule->l3mdev) {
if (rule->action == FR_ACT_TO_TBL) {
struct fib_table *table;
......
......@@ -67,6 +67,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
struct net *net = rule->fr_net;
pol_lookup_t lookup = arg->lookup_ptr;
int err = 0;
u32 tb_id;
switch (rule->action) {
case FR_ACT_TO_TBL:
......@@ -86,7 +87,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
goto discard_pkt;
}
table = fib6_get_table(net, rule->table);
tb_id = fib_rule_get_table(rule, arg);
table = fib6_get_table(net, tb_id);
if (!table) {
err = -EAGAIN;
goto out;
......@@ -199,7 +201,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
struct fib6_rule *rule6 = (struct fib6_rule *) rule;
if (rule->action == FR_ACT_TO_TBL) {
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
if (rule->table == RT6_TABLE_UNSPEC)
goto errout;
......
......@@ -10,6 +10,7 @@
*/
#include <linux/netdevice.h>
#include <net/fib_rules.h>
#include <net/l3mdev.h>
/**
......@@ -160,3 +161,40 @@ int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4)
return rc;
}
EXPORT_SYMBOL_GPL(l3mdev_get_saddr);
/**
* l3mdev_fib_rule_match - Determine if flowi references an
* L3 master device
* @net: network namespace for device index lookup
* @fl: flow struct
*/
int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
struct fib_lookup_arg *arg)
{
struct net_device *dev;
int rc = 0;
rcu_read_lock();
dev = dev_get_by_index_rcu(net, fl->flowi_oif);
if (dev && netif_is_l3_master(dev) &&
dev->l3mdev_ops->l3mdev_fib_table) {
arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
rc = 1;
goto out;
}
dev = dev_get_by_index_rcu(net, fl->flowi_iif);
if (dev && netif_is_l3_master(dev) &&
dev->l3mdev_ops->l3mdev_fib_table) {
arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
rc = 1;
goto out;
}
out:
rcu_read_unlock();
return rc;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment