Commit 3b4202a4 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'nexthop-add-support-for-nexthop-objects-offload'

Ido Schimmel says:

====================
nexthop: Add support for nexthop objects offload

This patch set adds support for nexthop objects offload with a dummy
implementation over netdevsim. mlxsw support will be added later.

The general idea is very similar to route offload in that notifications
are sent whenever nexthop objects are changed. A listener can veto the
change and the error will be communicated to user space with extack.

To keep listeners as simple as possible, they not only receive
notifications for the nexthop object that is changed, but also for all
the other objects affected by this change. For example, when a single
nexthop is replaced, a replace notification is sent for the single
nexthop, but also for all the nexthop groups this nexthop is member in.
This relieves listeners from the need to track such dependencies.

To simplify things further for listeners, the notification info does not
contain the raw nexthop data structures (e.g., 'struct nexthop'), but
less complex data structures into which the raw data structures are
parsed into.

Tested with a new selftest over netdevsim and with fib_nexthops.sh:

Tests passed: 164
Tests failed:   0

Patch set overview:

Patches #1-#4 introduce the aforementioned data structures and convert
existing listeners (i.e., the VXLAN driver) to use them.

Patches #5-#6 add a new RTNH_F_TRAP flag and the ability to set it and
RTNH_F_OFFLOAD on nexthops. This flag is used by netdevsim for testing
purposes and will also be used by mlxsw. These flags are consistent with
the existing RTM_F_OFFLOAD and RTM_F_TRAP flags.

Patches #7-#14 gradually add the new nexthop notifications.

Patches #15-#18 add a dummy implementation for nexthop offload over
netdevsim and a selftest to exercise both good and bad flows.

Changes since RFC [1]:

Patch #1: s/is_encap/has_encap/
Patch #3: Add a blank line in __nh_notifier_single_info_init()
Patch #5: Reword commit message
Patch #6: s/nexthop_hw_flags_set/nexthop_set_hw_flags/
Patch #7: Reword commit message
Patch #11: Allocate extack on the stack

Follow-up patch sets:

selftests: forwarding: Add nexthop objects tests
mlxsw: Preparations for nexthop objects support - part 1/2
mlxsw: Preparations for nexthop objects support - part 2/2
mlxsw: Add support for nexthop objects
mlxsw: Add support for blackhole nexthops
mlxsw: Update adjacency index more efficiently

[1] https://lore.kernel.org/netdev/20200908091037.2709823-1-idosch@idosch.org/
====================

Link: https://lore.kernel.org/r/20201104133040.1125369-1-idosch@idosch.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents c9448e82 21584e6a
......@@ -46,7 +46,7 @@ Resources
=========
The ``netdevsim`` driver exposes resources to control the number of FIB
entries and FIB rule entries that the driver will allow.
entries, FIB rule entries and nexthops that the driver will allow.
.. code:: shell
......@@ -54,6 +54,7 @@ entries and FIB rule entries that the driver will allow.
$ devlink resource set netdevsim/netdevsim0 path /IPv4/fib-rules size 16
$ devlink resource set netdevsim/netdevsim0 path /IPv6/fib size 64
$ devlink resource set netdevsim/netdevsim0 path /IPv6/fib-rules size 16
$ devlink resource set netdevsim/netdevsim0 path /nexthops size 16
$ devlink dev reload netdevsim/netdevsim0
Driver-specific Traps
......
......@@ -324,6 +324,12 @@ static int nsim_dev_resources_register(struct devlink *devlink)
return err;
}
/* Resources for nexthops */
err = devlink_resource_register(devlink, "nexthops", (u64)-1,
NSIM_RESOURCE_NEXTHOPS,
DEVLINK_RESOURCE_ID_PARENT_TOP,
&params);
out:
return err;
}
......
This diff is collapsed.
......@@ -158,6 +158,7 @@ enum nsim_resource_id {
NSIM_RESOURCE_IPV6,
NSIM_RESOURCE_IPV6_FIB,
NSIM_RESOURCE_IPV6_FIB_RULES,
NSIM_RESOURCE_NEXTHOPS,
};
struct nsim_dev_health {
......
......@@ -4684,9 +4684,14 @@ static void vxlan_fdb_nh_flush(struct nexthop *nh)
static int vxlan_nexthop_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct nexthop *nh = ptr;
struct nh_notifier_info *info = ptr;
struct nexthop *nh;
if (event != NEXTHOP_EVENT_DEL)
return NOTIFY_DONE;
if (!nh || event != NEXTHOP_EVENT_DEL)
nh = nexthop_find_by_id(info->net, info->id);
if (!nh)
return NOTIFY_DONE;
vxlan_fdb_nh_flush(nh);
......@@ -4706,7 +4711,8 @@ static __net_init int vxlan_init_net(struct net *net)
for (h = 0; h < PORT_HASH_SIZE; ++h)
INIT_HLIST_HEAD(&vn->sock_list[h]);
return register_nexthop_notifier(net, &vn->nexthop_notifier_block);
return register_nexthop_notifier(net, &vn->nexthop_notifier_block,
NULL);
}
static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
......
......@@ -105,11 +105,49 @@ struct nexthop {
};
enum nexthop_event_type {
NEXTHOP_EVENT_DEL
NEXTHOP_EVENT_DEL,
NEXTHOP_EVENT_REPLACE,
};
int register_nexthop_notifier(struct net *net, struct notifier_block *nb);
struct nh_notifier_single_info {
struct net_device *dev;
u8 gw_family;
union {
__be32 ipv4;
struct in6_addr ipv6;
};
u8 is_reject:1,
is_fdb:1,
has_encap:1;
};
struct nh_notifier_grp_entry_info {
u8 weight;
u32 id;
struct nh_notifier_single_info nh;
};
struct nh_notifier_grp_info {
u16 num_nh;
bool is_fdb;
struct nh_notifier_grp_entry_info nh_entries[];
};
struct nh_notifier_info {
struct net *net;
struct netlink_ext_ack *extack;
u32 id;
bool is_grp;
union {
struct nh_notifier_single_info *nh;
struct nh_notifier_grp_info *nh_grp;
};
};
int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack);
int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
/* caller is holding rcu or rtnl; no reference taken to nexthop */
struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
......
......@@ -396,11 +396,13 @@ struct rtnexthop {
#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
#define RTNH_F_ONLINK 4 /* Gateway is forced on link */
#define RTNH_F_OFFLOAD 8 /* offloaded route */
#define RTNH_F_OFFLOAD 8 /* Nexthop is offloaded */
#define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */
#define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */
#define RTNH_F_TRAP 64 /* Nexthop is trapping packets */
#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | RTNH_F_OFFLOAD)
#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \
RTNH_F_OFFLOAD | RTNH_F_TRAP)
/* Macros to handle hexthops */
......
......@@ -1644,6 +1644,8 @@ int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc,
*flags |= (nhc->nhc_flags & RTNH_F_ONLINK);
if (nhc->nhc_flags & RTNH_F_OFFLOAD)
*flags |= RTNH_F_OFFLOAD;
if (nhc->nhc_flags & RTNH_F_TRAP)
*flags |= RTNH_F_TRAP;
if (!skip_oif && nhc->nhc_dev &&
nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex))
......
......@@ -2100,15 +2100,6 @@ static void __fib_info_notify_update(struct net *net, struct fib_table *tb,
rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa,
KEYLENGTH - fa->fa_slen, tb->tb_id,
info, NLM_F_REPLACE);
/* call_fib_entry_notifiers will be removed when
* in-kernel notifier is implemented and supported
* for nexthop objects
*/
call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
n->key,
KEYLENGTH - fa->fa_slen, fa,
NULL);
}
}
}
......
......@@ -36,14 +36,145 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
[NHA_FDB] = { .type = NLA_FLAG },
};
static bool nexthop_notifiers_is_empty(struct net *net)
{
return !net->nexthop.notifier_chain.head;
}
static void
__nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
const struct nexthop *nh)
{
struct nh_info *nhi = rtnl_dereference(nh->nh_info);
nh_info->dev = nhi->fib_nhc.nhc_dev;
nh_info->gw_family = nhi->fib_nhc.nhc_gw_family;
if (nh_info->gw_family == AF_INET)
nh_info->ipv4 = nhi->fib_nhc.nhc_gw.ipv4;
else if (nh_info->gw_family == AF_INET6)
nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6;
nh_info->is_reject = nhi->reject_nh;
nh_info->is_fdb = nhi->fdb_nh;
nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate;
}
static int nh_notifier_single_info_init(struct nh_notifier_info *info,
const struct nexthop *nh)
{
info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL);
if (!info->nh)
return -ENOMEM;
__nh_notifier_single_info_init(info->nh, nh);
return 0;
}
static void nh_notifier_single_info_fini(struct nh_notifier_info *info)
{
kfree(info->nh);
}
static int nh_notifier_grp_info_init(struct nh_notifier_info *info,
const struct nexthop *nh)
{
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
u16 num_nh = nhg->num_nh;
int i;
info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh),
GFP_KERNEL);
if (!info->nh_grp)
return -ENOMEM;
info->nh_grp->num_nh = num_nh;
info->nh_grp->is_fdb = nhg->fdb_nh;
for (i = 0; i < num_nh; i++) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
info->nh_grp->nh_entries[i].id = nhge->nh->id;
info->nh_grp->nh_entries[i].weight = nhge->weight;
__nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
nhge->nh);
}
return 0;
}
static void nh_notifier_grp_info_fini(struct nh_notifier_info *info)
{
kfree(info->nh_grp);
}
static int nh_notifier_info_init(struct nh_notifier_info *info,
const struct nexthop *nh)
{
info->id = nh->id;
info->is_grp = nh->is_group;
if (info->is_grp)
return nh_notifier_grp_info_init(info, nh);
else
return nh_notifier_single_info_init(info, nh);
}
static void nh_notifier_info_fini(struct nh_notifier_info *info)
{
if (info->is_grp)
nh_notifier_grp_info_fini(info);
else
nh_notifier_single_info_fini(info);
}
static int call_nexthop_notifiers(struct net *net,
enum nexthop_event_type event_type,
struct nexthop *nh)
struct nexthop *nh,
struct netlink_ext_ack *extack)
{
struct nh_notifier_info info = {
.net = net,
.extack = extack,
};
int err;
ASSERT_RTNL();
if (nexthop_notifiers_is_empty(net))
return 0;
err = nh_notifier_info_init(&info, nh);
if (err) {
NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info");
return err;
}
err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
event_type, nh);
event_type, &info);
nh_notifier_info_fini(&info);
return notifier_to_errno(err);
}
static int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
enum nexthop_event_type event_type,
struct nexthop *nh,
struct netlink_ext_ack *extack)
{
struct nh_notifier_info info = {
.net = net,
.extack = extack,
};
int err;
err = nh_notifier_info_init(&info, nh);
if (err)
return err;
err = nb->notifier_call(nb, event_type, &info);
nh_notifier_info_fini(&info);
return notifier_to_errno(err);
}
......@@ -782,9 +913,10 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
{
struct nh_grp_entry *nhges, *new_nhges;
struct nexthop *nhp = nhge->nh_parent;
struct netlink_ext_ack extack;
struct nexthop *nh = nhge->nh;
struct nh_group *nhg, *newg;
int i, j;
int i, j, err;
WARN_ON(!nh);
......@@ -832,6 +964,10 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
list_del(&nhge->nh_list);
nexthop_put(nhge->nh);
err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, &extack);
if (err)
pr_err("%s\n", extack._msg);
if (nlinfo)
nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo);
}
......@@ -907,7 +1043,7 @@ static void __remove_nexthop(struct net *net, struct nexthop *nh,
static void remove_nexthop(struct net *net, struct nexthop *nh,
struct nl_info *nlinfo)
{
call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh);
call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh, NULL);
/* remove from the tree */
rb_erase(&nh->rb_node, &net->nexthop.rb_root);
......@@ -940,13 +1076,17 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old,
struct netlink_ext_ack *extack)
{
struct nh_group *oldg, *newg;
int i;
int i, err;
if (!new->is_group) {
NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop.");
return -EINVAL;
}
err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
if (err)
return err;
oldg = rtnl_dereference(old->nh_grp);
newg = rtnl_dereference(new->nh_grp);
......@@ -985,31 +1125,54 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old,
struct nexthop *new,
struct netlink_ext_ack *extack)
{
u8 old_protocol, old_nh_flags;
struct nh_info *oldi, *newi;
struct nh_grp_entry *nhge;
int err;
if (new->is_group) {
NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group.");
return -EINVAL;
}
err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
if (err)
return err;
/* Hardware flags were set on 'old' as 'new' is not in the red-black
* tree. Therefore, inherit the flags from 'old' to 'new'.
*/
new->nh_flags |= old->nh_flags & (RTNH_F_OFFLOAD | RTNH_F_TRAP);
oldi = rtnl_dereference(old->nh_info);
newi = rtnl_dereference(new->nh_info);
newi->nh_parent = old;
oldi->nh_parent = new;
old_protocol = old->protocol;
old_nh_flags = old->nh_flags;
old->protocol = new->protocol;
old->nh_flags = new->nh_flags;
rcu_assign_pointer(old->nh_info, newi);
rcu_assign_pointer(new->nh_info, oldi);
/* Send a replace notification for all the groups using the nexthop. */
list_for_each_entry(nhge, &old->grp_list, nh_list) {
struct nexthop *nhp = nhge->nh_parent;
err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp,
extack);
if (err)
goto err_notify;
}
/* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially
* update IPv4 indication in all the groups using the nexthop.
*/
if (oldi->family == AF_INET && newi->family == AF_INET6) {
struct nh_grp_entry *nhge;
list_for_each_entry(nhge, &old->grp_list, nh_list) {
struct nexthop *nhp = nhge->nh_parent;
struct nh_group *nhg;
......@@ -1020,6 +1183,21 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old,
}
return 0;
err_notify:
rcu_assign_pointer(new->nh_info, newi);
rcu_assign_pointer(old->nh_info, oldi);
old->nh_flags = old_nh_flags;
old->protocol = old_protocol;
oldi->nh_parent = old;
newi->nh_parent = new;
list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) {
struct nexthop *nhp = nhge->nh_parent;
call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, extack);
}
call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack);
return err;
}
static void __nexthop_replace_notify(struct net *net, struct nexthop *nh,
......@@ -1168,7 +1346,11 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
rb_link_node_rcu(&new_nh->rb_node, parent, pp);
rb_insert_color(&new_nh->rb_node, root);
rc = 0;
rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack);
if (rc)
rb_erase(&new_nh->rb_node, &net->nexthop.rb_root);
out:
if (!rc) {
nh_base_seq_inc(net);
......@@ -1957,10 +2139,40 @@ static struct notifier_block nh_netdev_notifier = {
.notifier_call = nh_netdev_event,
};
int register_nexthop_notifier(struct net *net, struct notifier_block *nb)
static int nexthops_dump(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack)
{
struct rb_root *root = &net->nexthop.rb_root;
struct rb_node *node;
int err = 0;
for (node = rb_first(root); node; node = rb_next(node)) {
struct nexthop *nh;
nh = rb_entry(node, struct nexthop, rb_node);
err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh,
extack);
if (err)
break;
}
return err;
}
int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack)
{
return blocking_notifier_chain_register(&net->nexthop.notifier_chain,
nb);
int err;
rtnl_lock();
err = nexthops_dump(net, nb, extack);
if (err)
goto unlock;
err = blocking_notifier_chain_register(&net->nexthop.notifier_chain,
nb);
unlock:
rtnl_unlock();
return err;
}
EXPORT_SYMBOL(register_nexthop_notifier);
......@@ -1971,6 +2183,27 @@ int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
}
EXPORT_SYMBOL(unregister_nexthop_notifier);
void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap)
{
struct nexthop *nexthop;
rcu_read_lock();
nexthop = nexthop_find_by_id(net, id);
if (!nexthop)
goto out;
nexthop->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
if (offload)
nexthop->nh_flags |= RTNH_F_OFFLOAD;
if (trap)
nexthop->nh_flags |= RTNH_F_TRAP;
out:
rcu_read_unlock();
}
EXPORT_SYMBOL(nexthop_set_hw_flags);
static void __net_exit nexthop_net_exit(struct net *net)
{
rtnl_lock();
......
......@@ -6039,11 +6039,6 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt,
struct sk_buff *skb;
int err = -ENOBUFS;
/* call_fib6_entry_notifiers will be removed when in-kernel notifier
* is implemented and supported for nexthop objects
*/
call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL);
skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
if (!skb)
goto errout;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment