Commit 2ae6b594 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-Improve-IPv6-route-insertion-rate'

Ido Schimmel says:

====================
mlxsw: Improve IPv6 route insertion rate

Unlike IPv4, an IPv6 multipath route in the kernel is composed from
multiple sibling routes, each representing a single nexthop.

Therefore, an addition of a multipath route with N nexthops translates
to N in-kernel notifications. This is inefficient for device drivers
that need to program the route to the underlying device. Each time a new
nexthop is appended, a new nexthop group needs to be constructed and the
old one deleted.

This patchset improves the situation by sending a single notification
for a multipath route addition / deletion instead of one per-nexthop.
When adding thousands of multipath routes with 16 nexthops, I measured
an improvement of about x10 in the insertion rate.

Patches #1-#3 add a flag that indicates that in-kernel notifications
need to be suppressed and extend the IPv6 FIB notification info with
information about the number of sibling routes that are being notified.

Patches #4-#5 adjust the two current listeners to these notifications to
ignore notifications about IPv6 multipath routes.

Patches #6-#7 adds add / delete notifications for IPv6 multipath routes.

Patches #8-#14 do the same for mlxsw.

Patch #15 finally removes the limitations added in patches #4-#5 and
stops the kernel from sending a notification for each added / deleted
nexthop.

Patch #16 adds test cases.

v2 (David Ahern):
* Remove patch adjusting netdevsim to consume resources for each
  fib6_info. Instead, consume one resource for the entire multipath
  route
* Remove 'multipath_rt' usage in patch #10
* Remove 'multipath_rt' from 'struct fib6_entry_notifier_info' in patch
  #15. The member is only removed in this patch to prevent drivers from
  processing multipath routes twice during the series
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 714a485a 12ee8220
...@@ -5278,17 +5278,21 @@ mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, ...@@ -5278,17 +5278,21 @@ mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
static int static int
mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib6_entry *fib6_entry, struct mlxsw_sp_fib6_entry *fib6_entry,
struct fib6_info *rt) struct fib6_info **rt_arr, unsigned int nrt6)
{ {
struct mlxsw_sp_rt6 *mlxsw_sp_rt6; struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
int err; int err, i;
mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); for (i = 0; i < nrt6; i++) {
if (IS_ERR(mlxsw_sp_rt6)) mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
return PTR_ERR(mlxsw_sp_rt6); if (IS_ERR(mlxsw_sp_rt6)) {
err = PTR_ERR(mlxsw_sp_rt6);
goto err_rt6_create;
}
list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
fib6_entry->nrt6++; fib6_entry->nrt6++;
}
err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
if (err) if (err)
...@@ -5297,27 +5301,38 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, ...@@ -5297,27 +5301,38 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
return 0; return 0;
err_nexthop6_group_update: err_nexthop6_group_update:
i = nrt6;
err_rt6_create:
for (i--; i >= 0; i--) {
fib6_entry->nrt6--; fib6_entry->nrt6--;
mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
struct mlxsw_sp_rt6, list);
list_del(&mlxsw_sp_rt6->list); list_del(&mlxsw_sp_rt6->list);
mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
}
return err; return err;
} }
static void static void
mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib6_entry *fib6_entry, struct mlxsw_sp_fib6_entry *fib6_entry,
struct fib6_info *rt) struct fib6_info **rt_arr, unsigned int nrt6)
{ {
struct mlxsw_sp_rt6 *mlxsw_sp_rt6; struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
int i;
mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt); for (i = 0; i < nrt6; i++) {
if (WARN_ON(!mlxsw_sp_rt6)) mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
return; rt_arr[i]);
if (WARN_ON_ONCE(!mlxsw_sp_rt6))
continue;
fib6_entry->nrt6--; fib6_entry->nrt6--;
list_del(&mlxsw_sp_rt6->list); list_del(&mlxsw_sp_rt6->list);
mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
}
mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
} }
static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
...@@ -5358,29 +5373,32 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry) ...@@ -5358,29 +5373,32 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
static struct mlxsw_sp_fib6_entry * static struct mlxsw_sp_fib6_entry *
mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_node *fib_node, struct mlxsw_sp_fib_node *fib_node,
struct fib6_info *rt) struct fib6_info **rt_arr, unsigned int nrt6)
{ {
struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_fib_entry *fib_entry; struct mlxsw_sp_fib_entry *fib_entry;
struct mlxsw_sp_rt6 *mlxsw_sp_rt6; struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
int err; int err, i;
fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL); fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
if (!fib6_entry) if (!fib6_entry)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
fib_entry = &fib6_entry->common; fib_entry = &fib6_entry->common;
mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); INIT_LIST_HEAD(&fib6_entry->rt6_list);
for (i = 0; i < nrt6; i++) {
mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
if (IS_ERR(mlxsw_sp_rt6)) { if (IS_ERR(mlxsw_sp_rt6)) {
err = PTR_ERR(mlxsw_sp_rt6); err = PTR_ERR(mlxsw_sp_rt6);
goto err_rt6_create; goto err_rt6_create;
} }
list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
fib6_entry->nrt6++;
}
mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt); mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
INIT_LIST_HEAD(&fib6_entry->rt6_list);
list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
fib6_entry->nrt6 = 1;
err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
if (err) if (err)
goto err_nexthop6_group_get; goto err_nexthop6_group_get;
...@@ -5390,9 +5408,15 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, ...@@ -5390,9 +5408,15 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
return fib6_entry; return fib6_entry;
err_nexthop6_group_get: err_nexthop6_group_get:
i = nrt6;
err_rt6_create:
for (i--; i >= 0; i--) {
fib6_entry->nrt6--;
mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
struct mlxsw_sp_rt6, list);
list_del(&mlxsw_sp_rt6->list); list_del(&mlxsw_sp_rt6->list);
mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
err_rt6_create: }
kfree(fib6_entry); kfree(fib6_entry);
return ERR_PTR(err); return ERR_PTR(err);
} }
...@@ -5435,16 +5459,16 @@ mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, ...@@ -5435,16 +5459,16 @@ mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
static int static int
mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry, mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
bool replace) bool *p_replace)
{ {
struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node; struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib6_entry *fib6_entry;
fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace); fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, *p_replace);
if (replace && WARN_ON(!fib6_entry)) if (*p_replace && !fib6_entry)
return -EINVAL; *p_replace = false;
if (fib6_entry) { if (fib6_entry) {
list_add_tail(&new6_entry->common.list, list_add_tail(&new6_entry->common.list,
...@@ -5479,11 +5503,11 @@ mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry) ...@@ -5479,11 +5503,11 @@ mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib6_entry *fib6_entry, struct mlxsw_sp_fib6_entry *fib6_entry,
bool replace) bool *p_replace)
{ {
int err; int err;
err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace); err = mlxsw_sp_fib6_node_list_insert(fib6_entry, p_replace);
if (err) if (err)
return err; return err;
...@@ -5556,10 +5580,12 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp, ...@@ -5556,10 +5580,12 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
} }
static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
struct fib6_info *rt, bool replace) struct fib6_info **rt_arr,
unsigned int nrt6, bool replace)
{ {
struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_fib_node *fib_node; struct mlxsw_sp_fib_node *fib_node;
struct fib6_info *rt = rt_arr[0];
int err; int err;
if (mlxsw_sp->router->aborted) if (mlxsw_sp->router->aborted)
...@@ -5584,19 +5610,21 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, ...@@ -5584,19 +5610,21 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
*/ */
fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace); fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
if (fib6_entry) { if (fib6_entry) {
err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt); err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry,
rt_arr, nrt6);
if (err) if (err)
goto err_fib6_entry_nexthop_add; goto err_fib6_entry_nexthop_add;
return 0; return 0;
} }
fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt); fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
nrt6);
if (IS_ERR(fib6_entry)) { if (IS_ERR(fib6_entry)) {
err = PTR_ERR(fib6_entry); err = PTR_ERR(fib6_entry);
goto err_fib6_entry_create; goto err_fib6_entry_create;
} }
err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace); err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, &replace);
if (err) if (err)
goto err_fib6_node_entry_link; goto err_fib6_node_entry_link;
...@@ -5613,10 +5641,12 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, ...@@ -5613,10 +5641,12 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
} }
static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
struct fib6_info *rt) struct fib6_info **rt_arr,
unsigned int nrt6)
{ {
struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_fib_node *fib_node; struct mlxsw_sp_fib_node *fib_node;
struct fib6_info *rt = rt_arr[0];
if (mlxsw_sp->router->aborted) if (mlxsw_sp->router->aborted)
return; return;
...@@ -5628,11 +5658,12 @@ static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, ...@@ -5628,11 +5658,12 @@ static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
if (WARN_ON(!fib6_entry)) if (WARN_ON(!fib6_entry))
return; return;
/* If route is part of a multipath entry, but not the last one /* If not all the nexthops are deleted, then only reduce the nexthop
* removed, then only reduce its nexthop group. * group.
*/ */
if (!list_is_singular(&fib6_entry->rt6_list)) { if (nrt6 != fib6_entry->nrt6) {
mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt); mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
nrt6);
return; return;
} }
...@@ -5893,10 +5924,15 @@ static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp) ...@@ -5893,10 +5924,15 @@ static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
} }
struct mlxsw_sp_fib6_event_work {
struct fib6_info **rt_arr;
unsigned int nrt6;
};
struct mlxsw_sp_fib_event_work { struct mlxsw_sp_fib_event_work {
struct work_struct work; struct work_struct work;
union { union {
struct fib6_entry_notifier_info fen6_info; struct mlxsw_sp_fib6_event_work fib6_work;
struct fib_entry_notifier_info fen_info; struct fib_entry_notifier_info fen_info;
struct fib_rule_notifier_info fr_info; struct fib_rule_notifier_info fr_info;
struct fib_nh_notifier_info fnh_info; struct fib_nh_notifier_info fnh_info;
...@@ -5907,6 +5943,54 @@ struct mlxsw_sp_fib_event_work { ...@@ -5907,6 +5943,54 @@ struct mlxsw_sp_fib_event_work {
unsigned long event; unsigned long event;
}; };
static int
mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
struct fib6_entry_notifier_info *fen6_info)
{
struct fib6_info *rt = fen6_info->rt;
struct fib6_info **rt_arr;
struct fib6_info *iter;
unsigned int nrt6;
int i = 0;
nrt6 = fen6_info->nsiblings + 1;
rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
if (!rt_arr)
return -ENOMEM;
fib6_work->rt_arr = rt_arr;
fib6_work->nrt6 = nrt6;
rt_arr[0] = rt;
fib6_info_hold(rt);
if (!fen6_info->nsiblings)
return 0;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
if (i == fen6_info->nsiblings)
break;
rt_arr[i + 1] = iter;
fib6_info_hold(iter);
i++;
}
WARN_ON_ONCE(i != fen6_info->nsiblings);
return 0;
}
static void
mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
{
int i;
for (i = 0; i < fib6_work->nrt6; i++)
mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
kfree(fib6_work->rt_arr);
}
static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
{ {
struct mlxsw_sp_fib_event_work *fib_work = struct mlxsw_sp_fib_event_work *fib_work =
...@@ -5965,18 +6049,21 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) ...@@ -5965,18 +6049,21 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
switch (fib_work->event) { switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_APPEND: /* fall through */
case FIB_EVENT_ENTRY_ADD: case FIB_EVENT_ENTRY_ADD:
replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
err = mlxsw_sp_router_fib6_add(mlxsw_sp, err = mlxsw_sp_router_fib6_add(mlxsw_sp,
fib_work->fen6_info.rt, replace); fib_work->fib6_work.rt_arr,
fib_work->fib6_work.nrt6,
replace);
if (err) if (err)
mlxsw_sp_router_fib_abort(mlxsw_sp); mlxsw_sp_router_fib_abort(mlxsw_sp);
mlxsw_sp_rt6_release(fib_work->fen6_info.rt); mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
break; break;
case FIB_EVENT_ENTRY_DEL: case FIB_EVENT_ENTRY_DEL:
mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt); mlxsw_sp_router_fib6_del(mlxsw_sp,
mlxsw_sp_rt6_release(fib_work->fen6_info.rt); fib_work->fib6_work.rt_arr,
fib_work->fib6_work.nrt6);
mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
break; break;
case FIB_EVENT_RULE_ADD: case FIB_EVENT_RULE_ADD:
/* if we get here, a rule was added that we do not support. /* if we get here, a rule was added that we do not support.
...@@ -6065,22 +6152,26 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, ...@@ -6065,22 +6152,26 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
} }
} }
static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
struct fib_notifier_info *info) struct fib_notifier_info *info)
{ {
struct fib6_entry_notifier_info *fen6_info; struct fib6_entry_notifier_info *fen6_info;
int err;
switch (fib_work->event) { switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_APPEND: /* fall through */
case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_ADD: /* fall through */
case FIB_EVENT_ENTRY_DEL: case FIB_EVENT_ENTRY_DEL:
fen6_info = container_of(info, struct fib6_entry_notifier_info, fen6_info = container_of(info, struct fib6_entry_notifier_info,
info); info);
fib_work->fen6_info = *fen6_info; err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
fib6_info_hold(fib_work->fen6_info.rt); fen6_info);
if (err)
return err;
break; break;
} }
return 0;
} }
static void static void
...@@ -6221,7 +6312,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, ...@@ -6221,7 +6312,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
break; break;
case AF_INET6: case AF_INET6:
INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work); INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
mlxsw_sp_router_fib6_event(fib_work, info); err = mlxsw_sp_router_fib6_event(fib_work, info);
if (err)
goto err_fib_event;
break; break;
case RTNL_FAMILY_IP6MR: case RTNL_FAMILY_IP6MR:
case RTNL_FAMILY_IPMR: case RTNL_FAMILY_IPMR:
...@@ -6233,6 +6326,10 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, ...@@ -6233,6 +6326,10 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
mlxsw_core_schedule_work(&fib_work->work); mlxsw_core_schedule_work(&fib_work->work);
return NOTIFY_DONE; return NOTIFY_DONE;
err_fib_event:
kfree(fib_work);
return NOTIFY_BAD;
} }
struct mlxsw_sp_rif * struct mlxsw_sp_rif *
......
...@@ -377,6 +377,7 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *, ...@@ -377,6 +377,7 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *,
struct fib6_entry_notifier_info { struct fib6_entry_notifier_info {
struct fib_notifier_info info; /* must be first */ struct fib_notifier_info info; /* must be first */
struct fib6_info *rt; struct fib6_info *rt;
unsigned int nsiblings;
}; };
/* /*
...@@ -450,6 +451,11 @@ int call_fib6_entry_notifiers(struct net *net, ...@@ -450,6 +451,11 @@ int call_fib6_entry_notifiers(struct net *net,
enum fib_event_type event_type, enum fib_event_type event_type,
struct fib6_info *rt, struct fib6_info *rt,
struct netlink_ext_ack *extack); struct netlink_ext_ack *extack);
int call_fib6_multipath_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct fib6_info *rt,
unsigned int nsiblings,
struct netlink_ext_ack *extack);
void fib6_rt_update(struct net *net, struct fib6_info *rt, void fib6_rt_update(struct net *net, struct fib6_info *rt,
struct nl_info *info); struct nl_info *info);
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
......
...@@ -378,13 +378,17 @@ struct nla_policy { ...@@ -378,13 +378,17 @@ struct nla_policy {
/** /**
* struct nl_info - netlink source information * struct nl_info - netlink source information
* @nlh: Netlink message header of original request * @nlh: Netlink message header of original request
* @nl_net: Network namespace
* @portid: Netlink PORTID of requesting application * @portid: Netlink PORTID of requesting application
* @skip_notify: Skip netlink notifications to user space
* @skip_notify_kernel: Skip selected in-kernel notifications
*/ */
struct nl_info { struct nl_info {
struct nlmsghdr *nlh; struct nlmsghdr *nlh;
struct net *nl_net; struct net *nl_net;
u32 portid; u32 portid;
bool skip_notify; u8 skip_notify:1,
skip_notify_kernel:1;
}; };
/** /**
......
...@@ -381,6 +381,22 @@ int call_fib6_entry_notifiers(struct net *net, ...@@ -381,6 +381,22 @@ int call_fib6_entry_notifiers(struct net *net,
return call_fib6_notifiers(net, event_type, &info.info); return call_fib6_notifiers(net, event_type, &info.info);
} }
int call_fib6_multipath_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct fib6_info *rt,
unsigned int nsiblings,
struct netlink_ext_ack *extack)
{
struct fib6_entry_notifier_info info = {
.info.extack = extack,
.rt = rt,
.nsiblings = nsiblings,
};
rt->fib6_table->fib_seq++;
return call_fib6_notifiers(net, event_type, &info.info);
}
struct fib6_dump_arg { struct fib6_dump_arg {
struct net *net; struct net *net;
struct notifier_block *nb; struct notifier_block *nb;
...@@ -1123,11 +1139,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, ...@@ -1123,11 +1139,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
add: add:
nlflags |= NLM_F_CREATE; nlflags |= NLM_F_CREATE;
if (!info->skip_notify_kernel) {
err = call_fib6_entry_notifiers(info->nl_net, err = call_fib6_entry_notifiers(info->nl_net,
FIB_EVENT_ENTRY_ADD, FIB_EVENT_ENTRY_ADD,
rt, extack); rt, extack);
if (err) if (err)
return err; return err;
}
rcu_assign_pointer(rt->fib6_next, iter); rcu_assign_pointer(rt->fib6_next, iter);
fib6_info_hold(rt); fib6_info_hold(rt);
...@@ -1152,11 +1170,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, ...@@ -1152,11 +1170,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
return -ENOENT; return -ENOENT;
} }
if (!info->skip_notify_kernel) {
err = call_fib6_entry_notifiers(info->nl_net, err = call_fib6_entry_notifiers(info->nl_net,
FIB_EVENT_ENTRY_REPLACE, FIB_EVENT_ENTRY_REPLACE,
rt, extack); rt, extack);
if (err) if (err)
return err; return err;
}
fib6_info_hold(rt); fib6_info_hold(rt);
rcu_assign_pointer(rt->fib6_node, fn); rcu_assign_pointer(rt->fib6_node, fn);
...@@ -1839,9 +1859,11 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, ...@@ -1839,9 +1859,11 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
fib6_purge_rt(rt, fn, net); fib6_purge_rt(rt, fn, net);
if (!info->skip_notify_kernel)
call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL); call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
if (!info->skip_notify) if (!info->skip_notify)
inet6_rt_notify(RTM_DELROUTE, rt, info, 0); inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
fib6_info_release(rt); fib6_info_release(rt);
} }
......
...@@ -3718,6 +3718,12 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg) ...@@ -3718,6 +3718,12 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
info->skip_notify = 1; info->skip_notify = 1;
} }
info->skip_notify_kernel = 1;
call_fib6_multipath_entry_notifiers(net,
FIB_EVENT_ENTRY_DEL,
rt,
rt->fib6_nsiblings,
NULL);
list_for_each_entry_safe(sibling, next_sibling, list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, &rt->fib6_siblings,
fib6_siblings) { fib6_siblings) {
...@@ -4965,6 +4971,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, ...@@ -4965,6 +4971,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
{ {
struct fib6_info *rt_notif = NULL, *rt_last = NULL; struct fib6_info *rt_notif = NULL, *rt_last = NULL;
struct nl_info *info = &cfg->fc_nlinfo; struct nl_info *info = &cfg->fc_nlinfo;
enum fib_event_type event_type;
struct fib6_config r_cfg; struct fib6_config r_cfg;
struct rtnexthop *rtnh; struct rtnexthop *rtnh;
struct fib6_info *rt; struct fib6_info *rt;
...@@ -5042,6 +5049,11 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, ...@@ -5042,6 +5049,11 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
*/ */
info->skip_notify = 1; info->skip_notify = 1;
/* For add and replace, send one notification with all nexthops. For
* append, send one notification with all appended nexthops.
*/
info->skip_notify_kernel = 1;
err_nh = NULL; err_nh = NULL;
list_for_each_entry(nh, &rt6_nh_list, next) { list_for_each_entry(nh, &rt6_nh_list, next) {
err = __ip6_ins_rt(nh->fib6_info, info, extack); err = __ip6_ins_rt(nh->fib6_info, info, extack);
...@@ -5078,6 +5090,15 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, ...@@ -5078,6 +5090,15 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
nhn++; nhn++;
} }
event_type = replace ? FIB_EVENT_ENTRY_REPLACE : FIB_EVENT_ENTRY_ADD;
err = call_fib6_multipath_entry_notifiers(info->nl_net, event_type,
rt_notif, nhn - 1, extack);
if (err) {
/* Delete all the siblings that were just added */
err_nh = NULL;
goto add_errout;
}
/* success ... tell user about new route */ /* success ... tell user about new route */
ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
goto cleanup; goto cleanup;
......
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Test unicast FIB offload indication.
lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="
ipv6_route_add
ipv6_route_replace
ipv6_route_nexthop_group_share
ipv6_route_rate
"
NUM_NETIFS=4
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
tor1_create()
{
simple_if_init $tor1_p1 2001:db8:1::2/128 2001:db8:1::3/128
}
tor1_destroy()
{
simple_if_fini $tor1_p1 2001:db8:1::2/128 2001:db8:1::3/128
}
tor2_create()
{
simple_if_init $tor2_p1 2001:db8:2::2/128 2001:db8:2::3/128
}
tor2_destroy()
{
simple_if_fini $tor2_p1 2001:db8:2::2/128 2001:db8:2::3/128
}
spine_create()
{
ip link set dev $spine_p1 up
ip link set dev $spine_p2 up
__addr_add_del $spine_p1 add 2001:db8:1::1/64
__addr_add_del $spine_p2 add 2001:db8:2::1/64
}
spine_destroy()
{
__addr_add_del $spine_p2 del 2001:db8:2::1/64
__addr_add_del $spine_p1 del 2001:db8:1::1/64
ip link set dev $spine_p2 down
ip link set dev $spine_p1 down
}
ipv6_offload_check()
{
local pfx="$1"; shift
local expected_num=$1; shift
local num
# Try to avoid races with route offload
sleep .1
num=$(ip -6 route show match ${pfx} | grep "offload" | wc -l)
if [ $num -eq $expected_num ]; then
return 0
fi
return 1
}
ipv6_route_add_prefix()
{
RET=0
# Add a prefix route and check that it is offloaded.
ip -6 route add 2001:db8:3::/64 dev $spine_p1 metric 100
ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 1
check_err $? "prefix route not offloaded"
# Append an identical prefix route with an higher metric and check that
# offload indication did not change.
ip -6 route append 2001:db8:3::/64 dev $spine_p1 metric 200
ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 1
check_err $? "lowest metric not offloaded after append"
ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 200" 0
check_err $? "highest metric offloaded when should not"
# Prepend an identical prefix route with lower metric and check that
# it is offloaded and the others are not.
ip -6 route append 2001:db8:3::/64 dev $spine_p1 metric 10
ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 10" 1
check_err $? "lowest metric not offloaded after prepend"
ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 0
check_err $? "mid metric offloaded when should not"
ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 200" 0
check_err $? "highest metric offloaded when should not"
# Delete the routes and add the same route with a different nexthop
# device. Check that it is offloaded.
ip -6 route flush 2001:db8:3::/64 dev $spine_p1
ip -6 route add 2001:db8:3::/64 dev $spine_p2
ipv6_offload_check "2001:db8:3::/64 dev $spine_p2" 1
log_test "IPv6 prefix route add"
ip -6 route flush 2001:db8:3::/64
}
ipv6_route_add_mpath()
{
RET=0
# Add a multipath route and check that it is offloaded.
ip -6 route add 2001:db8:3::/64 metric 100 \
nexthop via 2001:db8:1::2 dev $spine_p1 \
nexthop via 2001:db8:2::2 dev $spine_p2
ipv6_offload_check "2001:db8:3::/64 metric 100" 2
check_err $? "multipath route not offloaded when should"
# Append another nexthop and check that it is offloaded as well.
ip -6 route append 2001:db8:3::/64 metric 100 \
nexthop via 2001:db8:1::3 dev $spine_p1
ipv6_offload_check "2001:db8:3::/64 metric 100" 3
check_err $? "appended nexthop not offloaded when should"
# Mimic route replace by removing the route and adding it back with
# only two nexthops.
ip -6 route del 2001:db8:3::/64
ip -6 route add 2001:db8:3::/64 metric 100 \
nexthop via 2001:db8:1::2 dev $spine_p1 \
nexthop via 2001:db8:2::2 dev $spine_p2
ipv6_offload_check "2001:db8:3::/64 metric 100" 2
check_err $? "multipath route not offloaded after delete & add"
# Append a nexthop with an higher metric and check that the offload
# indication did not change.
ip -6 route append 2001:db8:3::/64 metric 200 \
nexthop via 2001:db8:1::3 dev $spine_p1
ipv6_offload_check "2001:db8:3::/64 metric 100" 2
check_err $? "lowest metric not offloaded after append"
ipv6_offload_check "2001:db8:3::/64 metric 200" 0
check_err $? "highest metric offloaded when should not"
# Prepend a nexthop with a lower metric and check that it is offloaded
# and the others are not.
ip -6 route append 2001:db8:3::/64 metric 10 \
nexthop via 2001:db8:1::3 dev $spine_p1
ipv6_offload_check "2001:db8:3::/64 metric 10" 1
check_err $? "lowest metric not offloaded after prepend"
ipv6_offload_check "2001:db8:3::/64 metric 100" 0
check_err $? "mid metric offloaded when should not"
ipv6_offload_check "2001:db8:3::/64 metric 200" 0
check_err $? "highest metric offloaded when should not"
log_test "IPv6 multipath route add"
ip -6 route flush 2001:db8:3::/64
}
ipv6_route_add()
{
ipv6_route_add_prefix
ipv6_route_add_mpath
}
ipv6_route_replace()
{
RET=0
# Replace prefix route with prefix route.
ip -6 route add 2001:db8:3::/64 metric 100 dev $spine_p1
ipv6_offload_check "2001:db8:3::/64 metric 100" 1
check_err $? "prefix route not offloaded when should"
ip -6 route replace 2001:db8:3::/64 metric 100 dev $spine_p2
ipv6_offload_check "2001:db8:3::/64 metric 100" 1
check_err $? "prefix route not offloaded after replace"
# Replace prefix route with multipath route.
ip -6 route replace 2001:db8:3::/64 metric 100 \
nexthop via 2001:db8:1::2 dev $spine_p1 \
nexthop via 2001:db8:2::2 dev $spine_p2
ipv6_offload_check "2001:db8:3::/64 metric 100" 2
check_err $? "multipath route not offloaded after replace"
# Replace multipath route with prefix route. A prefix route cannot
# replace a multipath route, so it is appended.
ip -6 route replace 2001:db8:3::/64 metric 100 dev $spine_p1
ipv6_offload_check "2001:db8:3::/64 metric 100 dev $spine_p1" 0
check_err $? "prefix route offloaded after 'replacing' multipath route"
ipv6_offload_check "2001:db8:3::/64 metric 100" 2
check_err $? "multipath route not offloaded after being 'replaced' by prefix route"
# Replace multipath route with multipath route.
ip -6 route replace 2001:db8:3::/64 metric 100 \
nexthop via 2001:db8:1::3 dev $spine_p1 \
nexthop via 2001:db8:2::3 dev $spine_p2
ipv6_offload_check "2001:db8:3::/64 metric 100" 2
check_err $? "multipath route not offloaded after replacing multipath route"
# Replace a non-existing multipath route with a multipath route and
# check that it is appended and not offloaded.
ip -6 route replace 2001:db8:3::/64 metric 200 \
nexthop via 2001:db8:1::3 dev $spine_p1 \
nexthop via 2001:db8:2::3 dev $spine_p2
ipv6_offload_check "2001:db8:3::/64 metric 100" 2
check_err $? "multipath route not offloaded after non-existing route was 'replaced'"
ipv6_offload_check "2001:db8:3::/64 metric 200" 0
check_err $? "multipath route offloaded after 'replacing' non-existing route"
log_test "IPv6 route replace"
ip -6 route flush 2001:db8:3::/64
}
ipv6_route_nexthop_group_share()
{
RET=0
# The driver consolidates identical nexthop groups in order to reduce
# the resource usage in its adjacency table. Check that the deletion
# of one multipath route using the group does not affect the other.
ip -6 route add 2001:db8:3::/64 \
nexthop via 2001:db8:1::2 dev $spine_p1 \
nexthop via 2001:db8:2::2 dev $spine_p2
ip -6 route add 2001:db8:4::/64 \
nexthop via 2001:db8:1::2 dev $spine_p1 \
nexthop via 2001:db8:2::2 dev $spine_p2
ipv6_offload_check "2001:db8:3::/64" 2
check_err $? "multipath route not offloaded when should"
ipv6_offload_check "2001:db8:4::/64" 2
check_err $? "multipath route not offloaded when should"
ip -6 route del 2001:db8:3::/64
ipv6_offload_check "2001:db8:4::/64" 2
check_err $? "multipath route not offloaded after deletion of route sharing the nexthop group"
# Check that after unsharing a nexthop group the routes are still
# marked as offloaded.
ip -6 route add 2001:db8:3::/64 \
nexthop via 2001:db8:1::2 dev $spine_p1 \
nexthop via 2001:db8:2::2 dev $spine_p2
ip -6 route del 2001:db8:4::/64 \
nexthop via 2001:db8:1::2 dev $spine_p1
ipv6_offload_check "2001:db8:4::/64" 1
check_err $? "singlepath route not offloaded after unsharing the nexthop group"
ipv6_offload_check "2001:db8:3::/64" 2
check_err $? "multipath route not offloaded after unsharing the nexthop group"
log_test "IPv6 nexthop group sharing"
ip -6 route flush 2001:db8:3::/64
ip -6 route flush 2001:db8:4::/64
}
ipv6_route_rate()
{
local batch_dir=$(mktemp -d)
local num_rts=$((40 * 1024))
local num_nhs=16
local total
local start
local diff
local end
local nhs
local i
RET=0
# Prepare 40K /64 multipath routes with 16 nexthops each and check how
# long it takes to add them. A limit of 60 seconds is set. It is much
# higher than insertion should take and meant to flag a serious
# regression.
total=$((nums_nhs * num_rts))
for i in $(seq 1 $num_nhs); do
ip -6 address add 2001:db8:1::10:$i/128 dev $tor1_p1
nexthops+=" nexthop via 2001:db8:1::10:$i dev $spine_p1"
done
for i in $(seq 1 $num_rts); do
echo "route add 2001:db8:8:$(printf "%x" $i)::/64$nexthops" \
>> $batch_dir/add.batch
echo "route del 2001:db8:8:$(printf "%x" $i)::/64$nexthops" \
>> $batch_dir/del.batch
done
start=$(date +%s.%N)
ip -batch $batch_dir/add.batch
count=$(ip -6 route show | grep offload | wc -l)
while [ $count -lt $total ]; do
sleep .01
count=$(ip -6 route show | grep offload | wc -l)
done
end=$(date +%s.%N)
diff=$(echo "$end - $start" | bc -l)
test "$(echo "$diff > 60" | bc -l)" -eq 0
check_err $? "route insertion took too long"
log_info "inserted $num_rts routes in $diff seconds"
log_test "IPv6 routes insertion rate"
ip -batch $batch_dir/del.batch
for i in $(seq 1 $num_nhs); do
ip -6 address del 2001:db8:1::10:$i/128 dev $tor1_p1
done
rm -rf $batch_dir
}
setup_prepare()
{
spine_p1=${NETIFS[p1]}
tor1_p1=${NETIFS[p2]}
spine_p2=${NETIFS[p3]}
tor2_p1=${NETIFS[p4]}
vrf_prepare
forwarding_enable
tor1_create
tor2_create
spine_create
}
cleanup()
{
pre_cleanup
spine_destroy
tor2_destroy
tor1_destroy
forwarding_restore
vrf_cleanup
}
trap cleanup EXIT
setup_prepare
setup_wait
tests_run
exit $EXIT_STATUS
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment