Commit 53e50a6e authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-Add-VxLAN-support'

Ido Schimmel says:

====================
mlxsw: Add VxLAN support

This patchset adds support for VxLAN offload in the mlxsw driver.

With regards to the forwarding plane, VxLAN support is composed from two
main parts: Encapsulation and decapsulation.

In the device, NVE encapsulation (and VxLAN in particular) takes place
in the bridge. A packet can be encapsulated using VxLAN either because
it hit an FDB entry that forwards it to the router with the IP of the
remote VTEP or because it was flooded, in which case it is sent to a
list of remote VTEPs (in addition to local ports). In either case, the
VNI is derived from the filtering identifier (FID) the packet was
classified to at ingress and the underlay source IP is taken from a
device global configuration.

VxLAN decapsulation takes place in the underlay router, where packets
that hit a local route that corresponds to the source IP of the local
VTEP are decapsulated and injected to the bridge. The packets are
classified to a FID based on the VNI they came with.

The first six patches export the required APIs in the VxLAN and mlxsw
drivers in order to allow for the introduction of the NVE core in the
next two patches. The NVE core is designed to support a variety of NVE
encapsulations (e.g., VxLAN, NVGRE) and different ASICs, but currently
only VxLAN and Spectrum are supported. Spectrum-2 support will be added
in the future.

The last 10 patches add support for VxLAN decapsulation and
encapsulation and include the addition of the required switchdev APIs in
the VxLAN driver. These APIs allow capable drivers to get a notification
about the addition / deletion of FDB entries to / from the VxLAN's FDB.

Subsequent patchset will add selftests (generic and mlxsw-specific),
data plane learning, FDB extack and vetoing and support for VLAN-aware
bridges (one VNI per VxLAN device model).

v2:
* Implement netif_is_vxlan() using rtnl_link_ops->kind (Jakub & Stephen)
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents aadd4355 1231e04f
......@@ -27,7 +27,8 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \
spectrum_acl_flex_keys.o \
spectrum1_mr_tcam.o spectrum2_mr_tcam.o \
spectrum_mr_tcam.o spectrum_mr.o \
spectrum_qdisc.o spectrum_span.o
spectrum_qdisc.o spectrum_span.o \
spectrum_nve.o spectrum_nve_vxlan.o
mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o
mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o
......
......@@ -2994,6 +2994,13 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
goto err_port_qdiscs_init;
}
err = mlxsw_sp_port_nve_init(mlxsw_sp_port);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize NVE\n",
mlxsw_sp_port->local_port);
goto err_port_nve_init;
}
mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1);
if (IS_ERR(mlxsw_sp_port_vlan)) {
dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create VID 1\n",
......@@ -3022,6 +3029,8 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
err_port_vlan_get:
mlxsw_sp_port_nve_fini(mlxsw_sp_port);
err_port_nve_init:
mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
err_port_qdiscs_init:
mlxsw_sp_port_fids_fini(mlxsw_sp_port);
......@@ -3061,6 +3070,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
mlxsw_sp->ports[local_port] = NULL;
mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
mlxsw_sp_port_vlan_flush(mlxsw_sp_port);
mlxsw_sp_port_nve_fini(mlxsw_sp_port);
mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
mlxsw_sp_port_fids_fini(mlxsw_sp_port);
mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
......@@ -3795,6 +3805,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
goto err_afa_init;
}
err = mlxsw_sp_nve_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize NVE\n");
goto err_nve_init;
}
err = mlxsw_sp_router_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
......@@ -3841,6 +3857,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
err_netdev_notifier:
mlxsw_sp_router_fini(mlxsw_sp);
err_router_init:
mlxsw_sp_nve_fini(mlxsw_sp);
err_nve_init:
mlxsw_sp_afa_fini(mlxsw_sp);
err_afa_init:
mlxsw_sp_counter_pool_fini(mlxsw_sp);
......@@ -3873,6 +3891,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops;
mlxsw_sp->mr_tcam_ops = &mlxsw_sp1_mr_tcam_ops;
mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops;
mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr;
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
}
......@@ -3887,6 +3906,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops;
mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops;
mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops;
mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr;
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
}
......@@ -3900,6 +3920,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_acl_fini(mlxsw_sp);
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
mlxsw_sp_router_fini(mlxsw_sp);
mlxsw_sp_nve_fini(mlxsw_sp);
mlxsw_sp_afa_fini(mlxsw_sp);
mlxsw_sp_counter_pool_fini(mlxsw_sp);
mlxsw_sp_switchdev_fini(mlxsw_sp);
......@@ -4566,6 +4587,41 @@ static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port)
mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false);
}
static bool mlxsw_sp_bridge_has_multiple_vxlans(struct net_device *br_dev)
{
unsigned int num_vxlans = 0;
struct net_device *dev;
struct list_head *iter;
netdev_for_each_lower_dev(br_dev, dev, iter) {
if (netif_is_vxlan(dev))
num_vxlans++;
}
return num_vxlans > 1;
}
static bool mlxsw_sp_bridge_vxlan_is_valid(struct net_device *br_dev,
struct netlink_ext_ack *extack)
{
if (br_multicast_enabled(br_dev)) {
NL_SET_ERR_MSG_MOD(extack, "Multicast can not be enabled on a bridge with a VxLAN device");
return false;
}
if (br_vlan_enabled(br_dev)) {
NL_SET_ERR_MSG_MOD(extack, "VLAN filtering can not be enabled on a bridge with a VxLAN device");
return false;
}
if (mlxsw_sp_bridge_has_multiple_vxlans(br_dev)) {
NL_SET_ERR_MSG_MOD(extack, "Multiple VxLAN devices are not supported in a VLAN-unaware bridge");
return false;
}
return true;
}
static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
struct net_device *dev,
unsigned long event, void *ptr)
......@@ -4595,6 +4651,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
}
if (!info->linking)
break;
if (netif_is_bridge_master(upper_dev) &&
!mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) &&
mlxsw_sp_bridge_has_vxlan(upper_dev) &&
!mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
return -EOPNOTSUPP;
if (netdev_has_any_upper_dev(upper_dev) &&
(!netif_is_bridge_master(upper_dev) ||
!mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
......@@ -4752,6 +4813,11 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
}
if (!info->linking)
break;
if (netif_is_bridge_master(upper_dev) &&
!mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) &&
mlxsw_sp_bridge_has_vxlan(upper_dev) &&
!mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
return -EOPNOTSUPP;
if (netdev_has_any_upper_dev(upper_dev) &&
(!netif_is_bridge_master(upper_dev) ||
!mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
......@@ -4898,6 +4964,63 @@ static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
return netif_is_l3_master(info->upper_dev);
}
static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *dev,
unsigned long event, void *ptr)
{
struct netdev_notifier_changeupper_info *cu_info;
struct netdev_notifier_info *info = ptr;
struct netlink_ext_ack *extack;
struct net_device *upper_dev;
extack = netdev_notifier_info_to_extack(info);
switch (event) {
case NETDEV_CHANGEUPPER:
cu_info = container_of(info,
struct netdev_notifier_changeupper_info,
info);
upper_dev = cu_info->upper_dev;
if (!netif_is_bridge_master(upper_dev))
return 0;
if (!mlxsw_sp_lower_get(upper_dev))
return 0;
if (!mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
return -EOPNOTSUPP;
if (cu_info->linking) {
if (!netif_running(dev))
return 0;
return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev,
dev, extack);
} else {
mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev);
}
break;
case NETDEV_PRE_UP:
upper_dev = netdev_master_upper_dev_get(dev);
if (!upper_dev)
return 0;
if (!netif_is_bridge_master(upper_dev))
return 0;
if (!mlxsw_sp_lower_get(upper_dev))
return 0;
return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, dev,
extack);
case NETDEV_DOWN:
upper_dev = netdev_master_upper_dev_get(dev);
if (!upper_dev)
return 0;
if (!netif_is_bridge_master(upper_dev))
return 0;
if (!mlxsw_sp_lower_get(upper_dev))
return 0;
mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev);
break;
}
return 0;
}
static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
......@@ -4914,6 +5037,8 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
}
mlxsw_sp_span_respin(mlxsw_sp);
if (netif_is_vxlan(dev))
err = mlxsw_sp_netdevice_vxlan_event(mlxsw_sp, dev, event, ptr);
if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
event, ptr);
......
......@@ -16,6 +16,7 @@
#include <net/psample.h>
#include <net/pkt_cls.h>
#include <net/red.h>
#include <net/vxlan.h>
#include "port.h"
#include "core.h"
......@@ -55,6 +56,8 @@ enum mlxsw_sp_resource_id {
struct mlxsw_sp_port;
struct mlxsw_sp_rif;
struct mlxsw_sp_span_entry;
enum mlxsw_sp_l3proto;
union mlxsw_sp_l3addr;
struct mlxsw_sp_upper {
struct net_device *dev;
......@@ -113,9 +116,11 @@ struct mlxsw_sp_acl;
struct mlxsw_sp_counter_pool;
struct mlxsw_sp_fid_core;
struct mlxsw_sp_kvdl;
struct mlxsw_sp_nve;
struct mlxsw_sp_kvdl_ops;
struct mlxsw_sp_mr_tcam_ops;
struct mlxsw_sp_acl_tcam_ops;
struct mlxsw_sp_nve_ops;
struct mlxsw_sp {
struct mlxsw_sp_port **ports;
......@@ -132,6 +137,7 @@ struct mlxsw_sp {
struct mlxsw_sp_acl *acl;
struct mlxsw_sp_fid_core *fid_core;
struct mlxsw_sp_kvdl *kvdl;
struct mlxsw_sp_nve *nve;
struct notifier_block netdevice_nb;
struct mlxsw_sp_counter_pool *counter_pool;
......@@ -146,6 +152,7 @@ struct mlxsw_sp {
const struct mlxsw_afk_ops *afk_ops;
const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops;
const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops;
const struct mlxsw_sp_nve_ops **nve_ops_arr;
};
static inline struct mlxsw_sp_upper *
......@@ -235,6 +242,25 @@ struct mlxsw_sp_port {
struct mlxsw_sp_acl_block *eg_acl_block;
};
static inline struct net_device *
mlxsw_sp_bridge_vxlan_dev_find(struct net_device *br_dev)
{
struct net_device *dev;
struct list_head *iter;
netdev_for_each_lower_dev(br_dev, dev, iter) {
if (netif_is_vxlan(dev))
return dev;
}
return NULL;
}
static inline bool mlxsw_sp_bridge_has_vxlan(struct net_device *br_dev)
{
return !!mlxsw_sp_bridge_vxlan_dev_find(br_dev);
}
static inline bool
mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port)
{
......@@ -330,6 +356,13 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
struct net_device *br_dev);
bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *br_dev);
int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
const struct net_device *br_dev,
const struct net_device *vxlan_dev,
struct netlink_ext_ack *extack);
void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
const struct net_device *br_dev,
const struct net_device *vxlan_dev);
/* spectrum.c */
int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
......@@ -431,6 +464,15 @@ struct mlxsw_sp_rif *mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev);
u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp);
struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif);
int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
enum mlxsw_sp_l3proto ul_proto,
const union mlxsw_sp_l3addr *ul_sip,
u32 tunnel_index);
void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
enum mlxsw_sp_l3proto ul_proto,
const union mlxsw_sp_l3addr *ul_sip);
int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
u16 *vr_id);
/* spectrum_kvdl.c */
enum mlxsw_sp_kvdl_entry_type {
......@@ -679,6 +721,16 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_prio_qopt_offload *p);
/* spectrum_fid.c */
struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp,
__be32 vni);
int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni);
int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid,
u32 nve_flood_index);
void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid);
bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid);
int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni);
void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid);
bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid);
int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid,
enum mlxsw_sp_flood_type packet_type, u8 local_port,
bool member);
......@@ -697,6 +749,8 @@ u16 mlxsw_sp_fid_8021q_vid(const struct mlxsw_sp_fid *fid);
struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid);
struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp,
int br_ifindex);
struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp,
int br_ifindex);
struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp,
u16 rif_index);
struct mlxsw_sp_fid *mlxsw_sp_fid_dummy_get(struct mlxsw_sp *mlxsw_sp);
......@@ -742,4 +796,39 @@ extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops;
/* spectrum2_mr_tcam.c */
extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops;
/* spectrum_nve.c */
enum mlxsw_sp_nve_type {
MLXSW_SP_NVE_TYPE_VXLAN,
};
struct mlxsw_sp_nve_params {
enum mlxsw_sp_nve_type type;
__be32 vni;
const struct net_device *dev;
};
extern const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[];
extern const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[];
int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fid *fid,
enum mlxsw_sp_l3proto proto,
union mlxsw_sp_l3addr *addr);
void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fid *fid,
enum mlxsw_sp_l3proto proto,
union mlxsw_sp_l3addr *addr);
u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp);
bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp,
u32 tb_id, __be32 addr);
int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid,
struct mlxsw_sp_nve_params *params,
struct netlink_ext_ack *extack);
void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fid *fid);
int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port);
void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port);
int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp);
void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp);
#endif
This diff is collapsed.
/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
#ifndef _MLXSW_SPECTRUM_NVE_H
#define _MLXSW_SPECTRUM_NVE_H
#include <linux/netlink.h>
#include <linux/rhashtable.h>
#include "spectrum.h"
struct mlxsw_sp_nve_config {
enum mlxsw_sp_nve_type type;
u8 ttl;
u8 learning_en:1;
__be16 udp_dport;
__be32 flowlabel;
u32 ul_tb_id;
enum mlxsw_sp_l3proto ul_proto;
union mlxsw_sp_l3addr ul_sip;
};
struct mlxsw_sp_nve {
struct mlxsw_sp_nve_config config;
struct rhashtable mc_list_ht;
struct mlxsw_sp *mlxsw_sp;
const struct mlxsw_sp_nve_ops **nve_ops_arr;
unsigned int num_nve_tunnels; /* Protected by RTNL */
unsigned int num_max_mc_entries[MLXSW_SP_L3_PROTO_MAX];
u32 tunnel_index;
};
struct mlxsw_sp_nve_ops {
enum mlxsw_sp_nve_type type;
bool (*can_offload)(const struct mlxsw_sp_nve *nve,
const struct net_device *dev,
struct netlink_ext_ack *extack);
void (*nve_config)(const struct mlxsw_sp_nve *nve,
const struct net_device *dev,
struct mlxsw_sp_nve_config *config);
int (*init)(struct mlxsw_sp_nve *nve,
const struct mlxsw_sp_nve_config *config);
void (*fini)(struct mlxsw_sp_nve *nve);
};
extern const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops;
extern const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops;
#endif
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/random.h>
#include <net/vxlan.h>
#include "reg.h"
#include "spectrum_nve.h"
/* Eth (18B) | IPv6 (40B) | UDP (8B) | VxLAN (8B) | Eth (14B) | IPv6 (40B)
*
* In the worst case - where we have a VLAN tag on the outer Ethernet
* header and IPv6 in overlay and underlay - we need to parse 128 bytes
*/
#define MLXSW_SP_NVE_VXLAN_PARSING_DEPTH 128
#define MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH 96
#define MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS VXLAN_F_UDP_ZERO_CSUM_TX
static bool mlxsw_sp1_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
const struct net_device *dev,
struct netlink_ext_ack *extack)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_config *cfg = &vxlan->cfg;
if (cfg->saddr.sa.sa_family != AF_INET) {
NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only IPv4 underlay is supported");
return false;
}
if (vxlan_addr_multicast(&cfg->remote_ip)) {
NL_SET_ERR_MSG_MOD(extack, "VxLAN: Multicast destination IP is not supported");
return false;
}
if (vxlan_addr_any(&cfg->saddr)) {
NL_SET_ERR_MSG_MOD(extack, "VxLAN: Source address must be specified");
return false;
}
if (cfg->remote_ifindex) {
NL_SET_ERR_MSG_MOD(extack, "VxLAN: Local interface is not supported");
return false;
}
if (cfg->port_min || cfg->port_max) {
NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only default UDP source port range is supported");
return false;
}
if (cfg->tos != 1) {
NL_SET_ERR_MSG_MOD(extack, "VxLAN: TOS must be configured to inherit");
return false;
}
if (cfg->flags & VXLAN_F_TTL_INHERIT) {
NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to inherit");
return false;
}
if (cfg->flags & VXLAN_F_LEARN) {
NL_SET_ERR_MSG_MOD(extack, "VxLAN: Learning is not supported");
return false;
}
if (!(cfg->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) {
NL_SET_ERR_MSG_MOD(extack, "VxLAN: UDP checksum is not supported");
return false;
}
if (cfg->flags & ~MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS) {
NL_SET_ERR_MSG_MOD(extack, "VxLAN: Unsupported flag");
return false;
}
if (cfg->ttl == 0) {
NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to 0");
return false;
}
if (cfg->label != 0) {
NL_SET_ERR_MSG_MOD(extack, "VxLAN: Flow label must be configured to 0");
return false;
}
return true;
}
static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve,
const struct net_device *dev,
struct mlxsw_sp_nve_config *config)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_config *cfg = &vxlan->cfg;
config->type = MLXSW_SP_NVE_TYPE_VXLAN;
config->ttl = cfg->ttl;
config->flowlabel = cfg->label;
config->learning_en = cfg->flags & VXLAN_F_LEARN ? 1 : 0;
config->ul_tb_id = RT_TABLE_MAIN;
config->ul_proto = MLXSW_SP_L3_PROTO_IPV4;
config->ul_sip.addr4 = cfg->saddr.sin.sin_addr.s_addr;
config->udp_dport = cfg->dst_port;
}
static int mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
unsigned int parsing_depth,
__be16 udp_dport)
{
char mprs_pl[MLXSW_REG_MPRS_LEN];
mlxsw_reg_mprs_pack(mprs_pl, parsing_depth, be16_to_cpu(udp_dport));
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl);
}
static int
mlxsw_sp1_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_nve_config *config)
{
char tngcr_pl[MLXSW_REG_TNGCR_LEN];
u16 ul_vr_id;
u8 udp_sport;
int err;
err = mlxsw_sp_router_tb_id_vr_id(mlxsw_sp, config->ul_tb_id,
&ul_vr_id);
if (err)
return err;
mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, true,
config->ttl);
/* VxLAN driver's default UDP source port range is 32768 (0x8000)
* to 60999 (0xee47). Set the upper 8 bits of the UDP source port
* to a random number between 0x80 and 0xee
*/
get_random_bytes(&udp_sport, sizeof(udp_sport));
udp_sport = (udp_sport % (0xee - 0x80 + 1)) + 0x80;
mlxsw_reg_tngcr_nve_udp_sport_prefix_set(tngcr_pl, udp_sport);
mlxsw_reg_tngcr_learn_enable_set(tngcr_pl, config->learning_en);
mlxsw_reg_tngcr_underlay_virtual_router_set(tngcr_pl, ul_vr_id);
mlxsw_reg_tngcr_usipv4_set(tngcr_pl, be32_to_cpu(config->ul_sip.addr4));
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
}
static void mlxsw_sp1_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp)
{
char tngcr_pl[MLXSW_REG_TNGCR_LEN];
mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0);
mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
}
static int mlxsw_sp1_nve_vxlan_rtdp_set(struct mlxsw_sp *mlxsw_sp,
unsigned int tunnel_index)
{
char rtdp_pl[MLXSW_REG_RTDP_LEN];
mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_NVE, tunnel_index);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
}
static int mlxsw_sp1_nve_vxlan_init(struct mlxsw_sp_nve *nve,
const struct mlxsw_sp_nve_config *config)
{
struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
int err;
err = mlxsw_sp_nve_parsing_set(mlxsw_sp,
MLXSW_SP_NVE_VXLAN_PARSING_DEPTH,
config->udp_dport);
if (err)
return err;
err = mlxsw_sp1_nve_vxlan_config_set(mlxsw_sp, config);
if (err)
goto err_config_set;
err = mlxsw_sp1_nve_vxlan_rtdp_set(mlxsw_sp, nve->tunnel_index);
if (err)
goto err_rtdp_set;
err = mlxsw_sp_router_nve_promote_decap(mlxsw_sp, config->ul_tb_id,
config->ul_proto,
&config->ul_sip,
nve->tunnel_index);
if (err)
goto err_promote_decap;
return 0;
err_promote_decap:
err_rtdp_set:
mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
err_config_set:
mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH,
config->udp_dport);
return err;
}
static void mlxsw_sp1_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
{
struct mlxsw_sp_nve_config *config = &nve->config;
struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id,
config->ul_proto, &config->ul_sip);
mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH,
config->udp_dport);
}
const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops = {
.type = MLXSW_SP_NVE_TYPE_VXLAN,
.can_offload = mlxsw_sp1_nve_vxlan_can_offload,
.nve_config = mlxsw_sp_nve_vxlan_config,
.init = mlxsw_sp1_nve_vxlan_init,
.fini = mlxsw_sp1_nve_vxlan_fini,
};
static bool mlxsw_sp2_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
const struct net_device *dev,
struct netlink_ext_ack *extack)
{
return false;
}
static int mlxsw_sp2_nve_vxlan_init(struct mlxsw_sp_nve *nve,
const struct mlxsw_sp_nve_config *config)
{
return -EOPNOTSUPP;
}
static void mlxsw_sp2_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
{
}
const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops = {
.type = MLXSW_SP_NVE_TYPE_VXLAN,
.can_offload = mlxsw_sp2_nve_vxlan_can_offload,
.nve_config = mlxsw_sp_nve_vxlan_config,
.init = mlxsw_sp2_nve_vxlan_init,
.fini = mlxsw_sp2_nve_vxlan_fini,
};
......@@ -366,6 +366,7 @@ enum mlxsw_sp_fib_entry_type {
* encapsulating entries.)
*/
MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
};
struct mlxsw_sp_nexthop_group;
......@@ -741,6 +742,19 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
return NULL;
}
int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
u16 *vr_id)
{
struct mlxsw_sp_vr *vr;
vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
if (!vr)
return -ESRCH;
*vr_id = vr->id;
return 0;
}
static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
enum mlxsw_sp_l3proto proto)
{
......@@ -1128,6 +1142,52 @@ mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
}
static struct mlxsw_sp_fib_entry *
mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
enum mlxsw_sp_l3proto proto,
const union mlxsw_sp_l3addr *addr,
enum mlxsw_sp_fib_entry_type type)
{
struct mlxsw_sp_fib_entry *fib_entry;
struct mlxsw_sp_fib_node *fib_node;
unsigned char addr_prefix_len;
struct mlxsw_sp_fib *fib;
struct mlxsw_sp_vr *vr;
const void *addrp;
size_t addr_len;
u32 addr4;
vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
if (!vr)
return NULL;
fib = mlxsw_sp_vr_fib(vr, proto);
switch (proto) {
case MLXSW_SP_L3_PROTO_IPV4:
addr4 = be32_to_cpu(addr->addr4);
addrp = &addr4;
addr_len = 4;
addr_prefix_len = 32;
break;
case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
default:
WARN_ON(1);
return NULL;
}
fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
addr_prefix_len);
if (!fib_node || list_empty(&fib_node->entry_list))
return NULL;
fib_entry = list_first_entry(&fib_node->entry_list,
struct mlxsw_sp_fib_entry, list);
if (fib_entry->type != type)
return NULL;
return fib_entry;
}
/* Given an IPIP entry, find the corresponding decap route. */
static struct mlxsw_sp_fib_entry *
mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
......@@ -1765,6 +1825,56 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
return 0;
}
int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
enum mlxsw_sp_l3proto ul_proto,
const union mlxsw_sp_l3addr *ul_sip,
u32 tunnel_index)
{
enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
struct mlxsw_sp_fib_entry *fib_entry;
int err;
/* It is valid to create a tunnel with a local IP and only later
* assign this IP address to a local interface
*/
fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
ul_proto, ul_sip,
type);
if (!fib_entry)
return 0;
fib_entry->decap.tunnel_index = tunnel_index;
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
if (err)
goto err_fib_entry_update;
return 0;
err_fib_entry_update:
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
return err;
}
void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
enum mlxsw_sp_l3proto ul_proto,
const union mlxsw_sp_l3addr *ul_sip)
{
enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
struct mlxsw_sp_fib_entry *fib_entry;
fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
ul_proto, ul_sip,
type);
if (!fib_entry)
return;
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
}
struct mlxsw_sp_neigh_key {
struct neighbour *n;
};
......@@ -3815,6 +3925,7 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
return !!nh_group->nh_rif;
case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
return true;
default:
return false;
......@@ -3848,7 +3959,8 @@ mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
int i;
if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
return;
}
......@@ -4072,6 +4184,18 @@ mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
fib_entry->decap.tunnel_index);
}
static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry,
enum mlxsw_reg_ralue_op op)
{
char ralue_pl[MLXSW_REG_RALUE_LEN];
mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
fib_entry->decap.tunnel_index);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
}
static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry,
enum mlxsw_reg_ralue_op op)
......@@ -4086,6 +4210,8 @@ static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
fib_entry, op);
case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
}
return -EINVAL;
}
......@@ -4121,6 +4247,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry)
{
union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
struct net_device *dev = fen_info->fi->fib_dev;
struct mlxsw_sp_ipip_entry *ipip_entry;
struct fib_info *fi = fen_info->fi;
......@@ -4135,6 +4262,15 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
fib_entry,
ipip_entry);
}
if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
dip.addr4)) {
u32 t_index;
t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
fib_entry->decap.tunnel_index = t_index;
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
return 0;
}
/* fall through */
case RTN_BROADCAST:
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
......
......@@ -4,6 +4,7 @@
#include <linux/etherdevice.h>
#include <linux/inetdevice.h>
#include <net/netevent.h>
#include <net/vxlan.h>
#include <linux/idr.h>
#include <net/dst_metadata.h>
#include <net/arp.h>
......@@ -187,7 +188,7 @@ static bool nfp_tun_is_netdev_to_offload(struct net_device *netdev)
return false;
if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch"))
return true;
if (!strcmp(netdev->rtnl_link_ops->kind, "vxlan"))
if (netif_is_vxlan(netdev))
return true;
return false;
......
......@@ -2728,6 +2728,7 @@ rocker_fdb_offload_notify(struct rocker_port *rocker_port,
info.addr = recv_info->addr;
info.vid = recv_info->vid;
info.offloaded = true;
call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED,
rocker_port->dev, &info.info);
}
......
......@@ -103,22 +103,6 @@ bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
}
static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
{
if (ipa->sa.sa_family == AF_INET6)
return ipv6_addr_any(&ipa->sin6.sin6_addr);
else
return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
}
static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
{
if (ipa->sa.sa_family == AF_INET6)
return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
else
return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
}
static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
{
if (nla_len(nla) >= sizeof(struct in6_addr)) {
......@@ -151,16 +135,6 @@ bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
}
static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
{
return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
}
static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
{
return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
}
static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
{
if (nla_len(nla) >= sizeof(struct in6_addr)) {
......@@ -298,6 +272,8 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
ndm->ndm_state = fdb->state;
ndm->ndm_ifindex = vxlan->dev->ifindex;
ndm->ndm_flags = fdb->flags;
if (rdst->offloaded)
ndm->ndm_flags |= NTF_OFFLOADED;
ndm->ndm_type = RTN_UNICAST;
if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
......@@ -353,8 +329,8 @@ static inline size_t vxlan_nlmsg_size(void)
+ nla_total_size(sizeof(struct nda_cacheinfo));
}
static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
struct vxlan_rdst *rd, int type)
static void __vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
struct vxlan_rdst *rd, int type)
{
struct net *net = dev_net(vxlan->dev);
struct sk_buff *skb;
......@@ -379,6 +355,49 @@ static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
}
static void vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan,
struct vxlan_fdb *fdb,
struct vxlan_rdst *rd,
bool adding)
{
struct switchdev_notifier_vxlan_fdb_info info;
enum switchdev_notifier_type notifier_type;
if (WARN_ON(!rd))
return;
notifier_type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE
: SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE;
info = (struct switchdev_notifier_vxlan_fdb_info){
.remote_ip = rd->remote_ip,
.remote_port = rd->remote_port,
.remote_vni = rd->remote_vni,
.remote_ifindex = rd->remote_ifindex,
.vni = fdb->vni,
.offloaded = rd->offloaded,
};
memcpy(info.eth_addr, fdb->eth_addr, ETH_ALEN);
call_switchdev_notifiers(notifier_type, vxlan->dev,
&info.info);
}
static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
struct vxlan_rdst *rd, int type)
{
switch (type) {
case RTM_NEWNEIGH:
vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, true);
break;
case RTM_DELNEIGH:
vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, false);
break;
}
__vxlan_fdb_notify(vxlan, fdb, rd, type);
}
static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
......@@ -488,6 +507,47 @@ static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
return NULL;
}
int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
struct switchdev_notifier_vxlan_fdb_info *fdb_info)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
u8 eth_addr[ETH_ALEN + 2] = { 0 };
struct vxlan_rdst *rdst;
struct vxlan_fdb *f;
int rc = 0;
if (is_multicast_ether_addr(mac) ||
is_zero_ether_addr(mac))
return -EINVAL;
ether_addr_copy(eth_addr, mac);
rcu_read_lock();
f = __vxlan_find_mac(vxlan, eth_addr, vni);
if (!f) {
rc = -ENOENT;
goto out;
}
rdst = first_remote_rcu(f);
memset(fdb_info, 0, sizeof(*fdb_info));
fdb_info->info.dev = dev;
fdb_info->remote_ip = rdst->remote_ip;
fdb_info->remote_port = rdst->remote_port;
fdb_info->remote_vni = rdst->remote_vni;
fdb_info->remote_ifindex = rdst->remote_ifindex;
fdb_info->vni = vni;
fdb_info->offloaded = rdst->offloaded;
ether_addr_copy(fdb_info->eth_addr, mac);
out:
rcu_read_unlock();
return rc;
}
EXPORT_SYMBOL_GPL(vxlan_fdb_find_uc);
/* Replace destination of unicast mac */
static int vxlan_fdb_replace(struct vxlan_fdb *f,
union vxlan_addr *ip, __be16 port, __be32 vni,
......@@ -533,6 +593,7 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
rd->remote_ip = *ip;
rd->remote_port = port;
rd->offloaded = false;
rd->remote_vni = vni;
rd->remote_ifindex = ifindex;
......@@ -782,12 +843,15 @@ static void vxlan_fdb_free(struct rcu_head *head)
static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
bool do_notify)
{
struct vxlan_rdst *rd;
netdev_dbg(vxlan->dev,
"delete %pM\n", f->eth_addr);
--vxlan->addrcnt;
if (do_notify)
vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
list_for_each_entry(rd, &f->remotes, list)
vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH);
hlist_del_rcu(&f->hlist);
call_rcu(&f->rcu, vxlan_fdb_free);
......@@ -3761,6 +3825,51 @@ static struct notifier_block vxlan_notifier_block __read_mostly = {
.notifier_call = vxlan_netdevice_event,
};
static void
vxlan_fdb_offloaded_set(struct net_device *dev,
struct switchdev_notifier_vxlan_fdb_info *fdb_info)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_rdst *rdst;
struct vxlan_fdb *f;
spin_lock_bh(&vxlan->hash_lock);
f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
if (!f)
goto out;
rdst = vxlan_fdb_find_rdst(f, &fdb_info->remote_ip,
fdb_info->remote_port,
fdb_info->remote_vni,
fdb_info->remote_ifindex);
if (!rdst)
goto out;
rdst->offloaded = fdb_info->offloaded;
out:
spin_unlock_bh(&vxlan->hash_lock);
}
static int vxlan_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
switch (event) {
case SWITCHDEV_VXLAN_FDB_OFFLOADED:
vxlan_fdb_offloaded_set(dev, ptr);
break;
}
return 0;
}
static struct notifier_block vxlan_switchdev_notifier_block __read_mostly = {
.notifier_call = vxlan_switchdev_event,
};
static __net_init int vxlan_init_net(struct net *net)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
......@@ -3834,11 +3943,17 @@ static int __init vxlan_init_module(void)
if (rc)
goto out2;
rc = rtnl_link_register(&vxlan_link_ops);
rc = register_switchdev_notifier(&vxlan_switchdev_notifier_block);
if (rc)
goto out3;
rc = rtnl_link_register(&vxlan_link_ops);
if (rc)
goto out4;
return 0;
out4:
unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
out3:
unregister_netdevice_notifier(&vxlan_notifier_block);
out2:
......@@ -3851,6 +3966,7 @@ late_initcall(vxlan_init_module);
static void __exit vxlan_cleanup_module(void)
{
rtnl_link_unregister(&vxlan_link_ops);
unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
unregister_netdevice_notifier(&vxlan_notifier_block);
unregister_pernet_subsys(&vxlan_net_ops);
/* rcu_barrier() is called by netns */
......
......@@ -183,8 +183,7 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb)
* 1 if something is broken and should be logged (!!! above)
* 2 if packet should be dropped
*/
static inline int INET_ECN_decapsulate(struct sk_buff *skb,
__u8 outer, __u8 inner)
static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce)
{
if (INET_ECN_is_not_ect(inner)) {
switch (outer & INET_ECN_MASK) {
......@@ -198,10 +197,21 @@ static inline int INET_ECN_decapsulate(struct sk_buff *skb,
}
}
if (INET_ECN_is_ce(outer))
*set_ce = INET_ECN_is_ce(outer);
return 0;
}
static inline int INET_ECN_decapsulate(struct sk_buff *skb,
__u8 outer, __u8 inner)
{
bool set_ce = false;
int rc;
rc = __INET_ECN_decapsulate(outer, inner, &set_ce);
if (!rc && set_ce)
INET_ECN_set_ce(skb);
return 0;
return rc;
}
static inline int IP_ECN_decapsulate(const struct iphdr *oiph,
......
......@@ -145,6 +145,10 @@ enum switchdev_notifier_type {
SWITCHDEV_FDB_ADD_TO_DEVICE,
SWITCHDEV_FDB_DEL_TO_DEVICE,
SWITCHDEV_FDB_OFFLOADED,
SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE,
SWITCHDEV_VXLAN_FDB_OFFLOADED,
};
struct switchdev_notifier_info {
......@@ -155,7 +159,8 @@ struct switchdev_notifier_fdb_info {
struct switchdev_notifier_info info; /* must be first */
const unsigned char *addr;
u16 vid;
bool added_by_user;
u8 added_by_user:1,
offloaded:1;
};
static inline struct net_device *
......
......@@ -5,6 +5,8 @@
#include <linux/if_vlan.h>
#include <net/udp_tunnel.h>
#include <net/dst_metadata.h>
#include <net/rtnetlink.h>
#include <net/switchdev.h>
/* VXLAN protocol (RFC 7348) header:
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
......@@ -190,6 +192,7 @@ union vxlan_addr {
struct vxlan_rdst {
union vxlan_addr remote_ip;
__be16 remote_port;
u8 offloaded:1;
__be32 remote_vni;
u32 remote_ifindex;
struct list_head list;
......@@ -370,4 +373,65 @@ static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs)
return vs->sock->sk->sk_family;
}
#if IS_ENABLED(CONFIG_IPV6)
static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
{
if (ipa->sa.sa_family == AF_INET6)
return ipv6_addr_any(&ipa->sin6.sin6_addr);
else
return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
}
static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
{
if (ipa->sa.sa_family == AF_INET6)
return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
else
return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
}
#else /* !IS_ENABLED(CONFIG_IPV6) */
static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
{
return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
}
static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
{
return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
}
#endif /* IS_ENABLED(CONFIG_IPV6) */
static inline bool netif_is_vxlan(const struct net_device *dev)
{
return dev->rtnl_link_ops &&
!strcmp(dev->rtnl_link_ops->kind, "vxlan");
}
struct switchdev_notifier_vxlan_fdb_info {
struct switchdev_notifier_info info; /* must be first */
union vxlan_addr remote_ip;
__be16 remote_port;
__be32 remote_vni;
u32 remote_ifindex;
u8 eth_addr[ETH_ALEN];
__be32 vni;
bool offloaded;
};
#if IS_ENABLED(CONFIG_VXLAN)
int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
struct switchdev_notifier_vxlan_fdb_info *fdb_info);
#else
static inline int
vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
struct switchdev_notifier_vxlan_fdb_info *fdb_info)
{
return -ENOENT;
}
#endif
#endif
......@@ -151,7 +151,7 @@ static int br_switchdev_event(struct notifier_block *unused,
break;
}
br_fdb_offloaded_set(br, p, fdb_info->addr,
fdb_info->vid);
fdb_info->vid, true);
break;
case SWITCHDEV_FDB_DEL_TO_BRIDGE:
fdb_info = ptr;
......@@ -163,7 +163,7 @@ static int br_switchdev_event(struct notifier_block *unused,
case SWITCHDEV_FDB_OFFLOADED:
fdb_info = ptr;
br_fdb_offloaded_set(br, p, fdb_info->addr,
fdb_info->vid);
fdb_info->vid, fdb_info->offloaded);
break;
}
......
......@@ -1152,7 +1152,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
}
void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid)
const unsigned char *addr, u16 vid, bool offloaded)
{
struct net_bridge_fdb_entry *fdb;
......@@ -1160,7 +1160,7 @@ void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
fdb = br_fdb_find(br, addr, vid);
if (fdb)
fdb->offloaded = 1;
fdb->offloaded = offloaded;
spin_unlock_bh(&br->hash_lock);
}
......@@ -574,7 +574,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid,
bool swdev_notify);
void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid);
const unsigned char *addr, u16 vid, bool offloaded);
/* br_forward.c */
enum br_pkt_type {
......
......@@ -103,7 +103,7 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
static void
br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
u16 vid, struct net_device *dev,
bool added_by_user)
bool added_by_user, bool offloaded)
{
struct switchdev_notifier_fdb_info info;
unsigned long notifier_type;
......@@ -111,6 +111,7 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
info.addr = mac;
info.vid = vid;
info.added_by_user = added_by_user;
info.offloaded = offloaded;
notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE;
call_switchdev_notifiers(notifier_type, dev, &info.info);
}
......@@ -126,13 +127,15 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr,
fdb->key.vlan_id,
fdb->dst->dev,
fdb->added_by_user);
fdb->added_by_user,
fdb->offloaded);
break;
case RTM_NEWNEIGH:
br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr,
fdb->key.vlan_id,
fdb->dst->dev,
fdb->added_by_user);
fdb->added_by_user,
fdb->offloaded);
break;
}
}
......
......@@ -1478,6 +1478,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
netdev_dbg(dev, "fdb add failed err=%d\n", err);
break;
}
fdb_info->offloaded = true;
call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
&fdb_info->info);
break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment