Commit d9e1661d authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-Reflect-nexthop-status-changes'

Jiri Pirko says:

====================
mlxsw: Reflect nexthop status changes

Ido says:

When the kernel forwards IPv4 packets via multipath routes it doesn't
consider nexthops that are dead or linkdown. For example, if the nexthop
netdev is administratively down or doesn't have a carrier.

Devices capable of offloading such multipath routes need to be made
aware of changes in the reflected nexthops' status. Otherwise, the
device might forward packets via non-functional nexthops, resulting in
packet loss. This patchset aims to fix that.

The first 11 patches deal with the necessary restructuring in the
mlxsw driver, so that it's able to correctly add and remove nexthops
from the device's adjacency table.

The 12th patch adds the NH_{ADD,DEL} events to the FIB notification
chain. These notifications are sent whenever the kernel decides to add
or remove a nexthop from the forwarding plane.

Finally, the last three patches add support for these events in the
mlxsw driver, which is currently the only driver capable of offloading
multipath routes.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b05d0cfa 9665b745
...@@ -3473,6 +3473,8 @@ mlxsw_sp_rif_alloc(u16 rif, struct net_device *l3_dev, struct mlxsw_sp_fid *f) ...@@ -3473,6 +3473,8 @@ mlxsw_sp_rif_alloc(u16 rif, struct net_device *l3_dev, struct mlxsw_sp_fid *f)
if (!r) if (!r)
return NULL; return NULL;
INIT_LIST_HEAD(&r->nexthop_list);
INIT_LIST_HEAD(&r->neigh_list);
ether_addr_copy(r->addr, l3_dev->dev_addr); ether_addr_copy(r->addr, l3_dev->dev_addr);
r->mtu = l3_dev->mtu; r->mtu = l3_dev->mtu;
r->ref_count = 1; r->ref_count = 1;
...@@ -3541,6 +3543,8 @@ static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport, ...@@ -3541,6 +3543,8 @@ static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport,
u16 fid = f->fid; u16 fid = f->fid;
u16 rif = r->rif; u16 rif = r->rif;
mlxsw_sp_router_rif_gone_sync(mlxsw_sp, r);
mlxsw_sp->rifs[rif] = NULL; mlxsw_sp->rifs[rif] = NULL;
f->r = NULL; f->r = NULL;
...@@ -3770,6 +3774,8 @@ void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, ...@@ -3770,6 +3774,8 @@ void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fid *f = r->f; struct mlxsw_sp_fid *f = r->f;
u16 rif = r->rif; u16 rif = r->rif;
mlxsw_sp_router_rif_gone_sync(mlxsw_sp, r);
mlxsw_sp->rifs[rif] = NULL; mlxsw_sp->rifs[rif] = NULL;
f->r = NULL; f->r = NULL;
......
...@@ -108,6 +108,8 @@ struct mlxsw_sp_fid { ...@@ -108,6 +108,8 @@ struct mlxsw_sp_fid {
}; };
struct mlxsw_sp_rif { struct mlxsw_sp_rif {
struct list_head nexthop_list;
struct list_head neigh_list;
struct net_device *dev; struct net_device *dev;
unsigned int ref_count; unsigned int ref_count;
struct mlxsw_sp_fid *f; struct mlxsw_sp_fid *f;
...@@ -254,13 +256,14 @@ struct mlxsw_sp_router { ...@@ -254,13 +256,14 @@ struct mlxsw_sp_router {
struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT]; struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT];
struct mlxsw_sp_vr *vrs; struct mlxsw_sp_vr *vrs;
struct rhashtable neigh_ht; struct rhashtable neigh_ht;
struct rhashtable nexthop_group_ht;
struct rhashtable nexthop_ht;
struct { struct {
struct delayed_work dw; struct delayed_work dw;
unsigned long interval; /* ms */ unsigned long interval; /* ms */
} neighs_update; } neighs_update;
struct delayed_work nexthop_probe_dw; struct delayed_work nexthop_probe_dw;
#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
struct list_head nexthop_group_list;
struct list_head nexthop_neighs_list; struct list_head nexthop_neighs_list;
bool aborted; bool aborted;
}; };
...@@ -601,6 +604,8 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); ...@@ -601,6 +604,8 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
int mlxsw_sp_router_netevent_event(struct notifier_block *unused, int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
unsigned long event, void *ptr); unsigned long event, void *ptr);
void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_rif *r);
int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count); int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count);
void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index); void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
......
...@@ -214,11 +214,18 @@ struct fib_entry_notifier_info { ...@@ -214,11 +214,18 @@ struct fib_entry_notifier_info {
u32 nlflags; u32 nlflags;
}; };
struct fib_nh_notifier_info {
struct fib_notifier_info info; /* must be first */
struct fib_nh *fib_nh;
};
enum fib_event_type { enum fib_event_type {
FIB_EVENT_ENTRY_ADD, FIB_EVENT_ENTRY_ADD,
FIB_EVENT_ENTRY_DEL, FIB_EVENT_ENTRY_DEL,
FIB_EVENT_RULE_ADD, FIB_EVENT_RULE_ADD,
FIB_EVENT_RULE_DEL, FIB_EVENT_RULE_DEL,
FIB_EVENT_NH_ADD,
FIB_EVENT_NH_DEL,
}; };
int register_fib_notifier(struct notifier_block *nb, int register_fib_notifier(struct notifier_block *nb,
......
...@@ -1355,6 +1355,36 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local) ...@@ -1355,6 +1355,36 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local)
return ret; return ret;
} }
static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
enum fib_event_type event_type)
{
struct in_device *in_dev = __in_dev_get_rtnl(fib_nh->nh_dev);
struct fib_nh_notifier_info info = {
.fib_nh = fib_nh,
};
switch (event_type) {
case FIB_EVENT_NH_ADD:
if (fib_nh->nh_flags & RTNH_F_DEAD)
break;
if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
fib_nh->nh_flags & RTNH_F_LINKDOWN)
break;
return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type,
&info.info);
case FIB_EVENT_NH_DEL:
if ((IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
(fib_nh->nh_flags & RTNH_F_DEAD))
return call_fib_notifiers(dev_net(fib_nh->nh_dev),
event_type, &info.info);
default:
break;
}
return NOTIFY_DONE;
}
/* Event force Flags Description /* Event force Flags Description
* NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host
* NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host
...@@ -1396,6 +1426,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) ...@@ -1396,6 +1426,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
nexthop_nh->nh_flags |= RTNH_F_LINKDOWN; nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
break; break;
} }
call_fib_nh_notifiers(nexthop_nh,
FIB_EVENT_NH_DEL);
dead++; dead++;
} }
#ifdef CONFIG_IP_ROUTE_MULTIPATH #ifdef CONFIG_IP_ROUTE_MULTIPATH
...@@ -1550,6 +1582,7 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags) ...@@ -1550,6 +1582,7 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
continue; continue;
alive++; alive++;
nexthop_nh->nh_flags &= ~nh_flags; nexthop_nh->nh_flags &= ~nh_flags;
call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD);
} endfor_nexthops(fi) } endfor_nexthops(fi)
if (alive > 0) { if (alive > 0) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment