Commit 897ca373 authored by David S. Miller's avatar David S. Miller

Merge branch 'geneve-udp-port-offload'

Anjali Singhai Jain says:

====================
Add support for Geneve udp port offload

This patch series adds new ndo ops for Geneve add/del port, so as
to help offload Geneve tunnel functionalities such as RX checksum,
RSS, filters etc.

i40e driver has been tested with the changes to make sure the offloads
happen.

We do understand that this is not the ideal solution and most likely
will be redone with a more generic offload framework.
But this certainly will enable us to start seeing benefits of the
accelerations for Geneve tunnels.

As a side note, we did find an existing issue in i40e driver where a
service task can modify tunnel data structures with no locks held to
help linearize access. A separate patch will be taking care of that issue.

A question out to the community is regarding the driver Kconfig parameters
for VxLAN and Geneve, it would be ideal to drop those if there is a way
to help resolve vxlan/geneve_get_rx_port symbols while the tunnel modules
are not loaded.

Performance numbers:
With the offloads enable on X722 devices with remote checksum enabled
and no other tuning in terms of cpu governer etc on my test machine:

With offload
Throughput: 5527Mbits/sec with a single thread
%cpu: ~43% per core with 4 threads

Without offload
Throughput: 2364Mbits/sec with a single thread
%cpu: ~99% per core with 4 threads

These numbers will get better for X722 as it is being worked. But
this does bring out the delta in terms of when the stack is notified
with csum_level 1 and CHECKSUM_UNNECESSARY vs not without the RX offload.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 566178f8 cd866606
...@@ -280,6 +280,16 @@ config I40E_VXLAN ...@@ -280,6 +280,16 @@ config I40E_VXLAN
Say Y here if you want to use Virtual eXtensible Local Area Network Say Y here if you want to use Virtual eXtensible Local Area Network
(VXLAN) in the driver. (VXLAN) in the driver.
config I40E_GENEVE
bool "Generic Network Virtualization Encapsulation (GENEVE) Support"
depends on I40E && GENEVE && !(I40E=y && GENEVE=m)
default n
---help---
This allows one to create GENEVE virtual interfaces that provide
Layer 2 Networks over Layer 3 Networks. GENEVE is often used
to tunnel virtual network infrastructure in virtualized environments.
Say Y here if you want to use GENEVE in the driver.
config I40E_DCB config I40E_DCB
bool "Data Center Bridging (DCB) Support" bool "Data Center Bridging (DCB) Support"
default n default n
......
...@@ -245,6 +245,11 @@ struct i40e_tc_configuration { ...@@ -245,6 +245,11 @@ struct i40e_tc_configuration {
struct i40e_tc_info tc_info[I40E_MAX_TRAFFIC_CLASS]; struct i40e_tc_info tc_info[I40E_MAX_TRAFFIC_CLASS];
}; };
struct i40e_udp_port_config {
__be16 index;
u8 type;
};
/* struct that defines the Ethernet device */ /* struct that defines the Ethernet device */
struct i40e_pf { struct i40e_pf {
struct pci_dev *pdev; struct pci_dev *pdev;
...@@ -281,11 +286,9 @@ struct i40e_pf { ...@@ -281,11 +286,9 @@ struct i40e_pf {
u32 fd_atr_cnt; u32 fd_atr_cnt;
u32 fd_tcp_rule; u32 fd_tcp_rule;
#ifdef CONFIG_I40E_VXLAN struct i40e_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS];
__be16 vxlan_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS]; u16 pending_udp_bitmap;
u16 pending_vxlan_bitmap;
#endif
enum i40e_interrupt_policy int_policy; enum i40e_interrupt_policy int_policy;
u16 rx_itr_default; u16 rx_itr_default;
u16 tx_itr_default; u16 tx_itr_default;
...@@ -322,9 +325,7 @@ struct i40e_pf { ...@@ -322,9 +325,7 @@ struct i40e_pf {
#define I40E_FLAG_FD_ATR_ENABLED BIT_ULL(22) #define I40E_FLAG_FD_ATR_ENABLED BIT_ULL(22)
#define I40E_FLAG_PTP BIT_ULL(25) #define I40E_FLAG_PTP BIT_ULL(25)
#define I40E_FLAG_MFP_ENABLED BIT_ULL(26) #define I40E_FLAG_MFP_ENABLED BIT_ULL(26)
#ifdef CONFIG_I40E_VXLAN #define I40E_FLAG_UDP_FILTER_SYNC BIT_ULL(27)
#define I40E_FLAG_VXLAN_FILTER_SYNC BIT_ULL(27)
#endif
#define I40E_FLAG_PORT_ID_VALID BIT_ULL(28) #define I40E_FLAG_PORT_ID_VALID BIT_ULL(28)
#define I40E_FLAG_DCB_CAPABLE BIT_ULL(29) #define I40E_FLAG_DCB_CAPABLE BIT_ULL(29)
#define I40E_FLAG_RSS_AQ_CAPABLE BIT_ULL(31) #define I40E_FLAG_RSS_AQ_CAPABLE BIT_ULL(31)
...@@ -336,6 +337,7 @@ struct i40e_pf { ...@@ -336,6 +337,7 @@ struct i40e_pf {
#define I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE BIT_ULL(38) #define I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE BIT_ULL(38)
#define I40E_FLAG_LINK_POLLING_ENABLED BIT_ULL(39) #define I40E_FLAG_LINK_POLLING_ENABLED BIT_ULL(39)
#define I40E_FLAG_VEB_MODE_ENABLED BIT_ULL(40) #define I40E_FLAG_VEB_MODE_ENABLED BIT_ULL(40)
#define I40E_FLAG_GENEVE_OFFLOAD_CAPABLE BIT_ULL(41)
#define I40E_FLAG_NO_PCI_LINK_CHECK BIT_ULL(42) #define I40E_FLAG_NO_PCI_LINK_CHECK BIT_ULL(42)
/* tracks features that get auto disabled by errors */ /* tracks features that get auto disabled by errors */
......
This diff is collapsed.
...@@ -1380,7 +1380,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, ...@@ -1380,7 +1380,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT)) if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
return; return;
/* If VXLAN traffic has an outer UDPv4 checksum we need to check /* If VXLAN/GENEVE traffic has an outer UDPv4 checksum we need to check
* it in the driver, hardware does not do it for us. * it in the driver, hardware does not do it for us.
* Since L3L4P bit was set we assume a valid IHL value (>=5) * Since L3L4P bit was set we assume a valid IHL value (>=5)
* so the total length of IPv4 header is IHL*4 bytes * so the total length of IPv4 header is IHL*4 bytes
...@@ -2001,7 +2001,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, ...@@ -2001,7 +2001,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6))) if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
return; return;
if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL)) { if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL)) {
/* snag network header to get L4 type and address */ /* snag network header to get L4 type and address */
hdr.network = skb_network_header(skb); hdr.network = skb_network_header(skb);
...@@ -2086,7 +2086,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, ...@@ -2086,7 +2086,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT; I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL)) if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL))
dtype_cmd |= dtype_cmd |=
((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) << ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) <<
I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
...@@ -2319,7 +2319,7 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, ...@@ -2319,7 +2319,7 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
oudph = udp_hdr(skb); oudph = udp_hdr(skb);
oiph = ip_hdr(skb); oiph = ip_hdr(skb);
l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING; l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING;
*tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
break; break;
case IPPROTO_GRE: case IPPROTO_GRE:
l4_tunnel = I40E_TXD_CTX_GRE_TUNNELING; l4_tunnel = I40E_TXD_CTX_GRE_TUNNELING;
......
...@@ -163,7 +163,7 @@ enum i40e_dyn_idx_t { ...@@ -163,7 +163,7 @@ enum i40e_dyn_idx_t {
#define I40E_TX_FLAGS_FSO BIT(7) #define I40E_TX_FLAGS_FSO BIT(7)
#define I40E_TX_FLAGS_TSYN BIT(8) #define I40E_TX_FLAGS_TSYN BIT(8)
#define I40E_TX_FLAGS_FD_SB BIT(9) #define I40E_TX_FLAGS_FD_SB BIT(9)
#define I40E_TX_FLAGS_VXLAN_TUNNEL BIT(10) #define I40E_TX_FLAGS_UDP_TUNNEL BIT(10)
#define I40E_TX_FLAGS_VLAN_MASK 0xffff0000 #define I40E_TX_FLAGS_VLAN_MASK 0xffff0000
#define I40E_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000 #define I40E_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000
#define I40E_TX_FLAGS_VLAN_PRIO_SHIFT 29 #define I40E_TX_FLAGS_VLAN_PRIO_SHIFT 29
......
...@@ -380,8 +380,11 @@ static struct socket *geneve_create_sock(struct net *net, bool ipv6, ...@@ -380,8 +380,11 @@ static struct socket *geneve_create_sock(struct net *net, bool ipv6,
static void geneve_notify_add_rx_port(struct geneve_sock *gs) static void geneve_notify_add_rx_port(struct geneve_sock *gs)
{ {
struct net_device *dev;
struct sock *sk = gs->sock->sk; struct sock *sk = gs->sock->sk;
struct net *net = sock_net(sk);
sa_family_t sa_family = sk->sk_family; sa_family_t sa_family = sk->sk_family;
__be16 port = inet_sk(sk)->inet_sport;
int err; int err;
if (sa_family == AF_INET) { if (sa_family == AF_INET) {
...@@ -390,6 +393,14 @@ static void geneve_notify_add_rx_port(struct geneve_sock *gs) ...@@ -390,6 +393,14 @@ static void geneve_notify_add_rx_port(struct geneve_sock *gs)
pr_warn("geneve: udp_add_offload failed with status %d\n", pr_warn("geneve: udp_add_offload failed with status %d\n",
err); err);
} }
rcu_read_lock();
for_each_netdev_rcu(net, dev) {
if (dev->netdev_ops->ndo_add_geneve_port)
dev->netdev_ops->ndo_add_geneve_port(dev, sa_family,
port);
}
rcu_read_unlock();
} }
static int geneve_hlen(struct genevehdr *gh) static int geneve_hlen(struct genevehdr *gh)
...@@ -530,8 +541,20 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, ...@@ -530,8 +541,20 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
static void geneve_notify_del_rx_port(struct geneve_sock *gs) static void geneve_notify_del_rx_port(struct geneve_sock *gs)
{ {
struct net_device *dev;
struct sock *sk = gs->sock->sk; struct sock *sk = gs->sock->sk;
struct net *net = sock_net(sk);
sa_family_t sa_family = sk->sk_family; sa_family_t sa_family = sk->sk_family;
__be16 port = inet_sk(sk)->inet_sport;
rcu_read_lock();
for_each_netdev_rcu(net, dev) {
if (dev->netdev_ops->ndo_del_geneve_port)
dev->netdev_ops->ndo_del_geneve_port(dev, sa_family,
port);
}
rcu_read_unlock();
if (sa_family == AF_INET) if (sa_family == AF_INET)
udp_del_offload(&gs->udp_offloads); udp_del_offload(&gs->udp_offloads);
...@@ -1086,6 +1109,30 @@ static struct device_type geneve_type = { ...@@ -1086,6 +1109,30 @@ static struct device_type geneve_type = {
.name = "geneve", .name = "geneve",
}; };
/* Calls the ndo_add_geneve_port of the caller in order to
* supply the listening GENEVE udp ports. Callers are expected
* to implement the ndo_add_geneve_port.
*/
void geneve_get_rx_port(struct net_device *dev)
{
struct net *net = dev_net(dev);
struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_sock *gs;
sa_family_t sa_family;
struct sock *sk;
__be16 port;
rcu_read_lock();
list_for_each_entry_rcu(gs, &gn->sock_list, list) {
sk = gs->sock->sk;
sa_family = sk->sk_family;
port = inet_sk(sk)->inet_sport;
dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, port);
}
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(geneve_get_rx_port);
/* Initialize the device structure. */ /* Initialize the device structure. */
static void geneve_setup(struct net_device *dev) static void geneve_setup(struct net_device *dev)
{ {
......
...@@ -1013,6 +1013,19 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, ...@@ -1013,6 +1013,19 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
* a new port starts listening. The operation is protected by the * a new port starts listening. The operation is protected by the
* vxlan_net->sock_lock. * vxlan_net->sock_lock.
* *
* void (*ndo_add_geneve_port)(struct net_device *dev,
* sa_family_t sa_family, __be16 port);
* Called by geneve to notify a driver about the UDP port and socket
* address family that geneve is listnening to. It is called only when
* a new port starts listening. The operation is protected by the
* geneve_net->sock_lock.
*
* void (*ndo_del_geneve_port)(struct net_device *dev,
* sa_family_t sa_family, __be16 port);
* Called by geneve to notify the driver about a UDP port and socket
* address family that geneve is not listening to anymore. The operation
* is protected by the geneve_net->sock_lock.
*
* void (*ndo_del_vxlan_port)(struct net_device *dev, * void (*ndo_del_vxlan_port)(struct net_device *dev,
* sa_family_t sa_family, __be16 port); * sa_family_t sa_family, __be16 port);
* Called by vxlan to notify the driver about a UDP port and socket * Called by vxlan to notify the driver about a UDP port and socket
...@@ -1217,7 +1230,12 @@ struct net_device_ops { ...@@ -1217,7 +1230,12 @@ struct net_device_ops {
void (*ndo_del_vxlan_port)(struct net_device *dev, void (*ndo_del_vxlan_port)(struct net_device *dev,
sa_family_t sa_family, sa_family_t sa_family,
__be16 port); __be16 port);
void (*ndo_add_geneve_port)(struct net_device *dev,
sa_family_t sa_family,
__be16 port);
void (*ndo_del_geneve_port)(struct net_device *dev,
sa_family_t sa_family,
__be16 port);
void* (*ndo_dfwd_add_station)(struct net_device *pdev, void* (*ndo_dfwd_add_station)(struct net_device *pdev,
struct net_device *dev); struct net_device *dev);
void (*ndo_dfwd_del_station)(struct net_device *pdev, void (*ndo_dfwd_del_station)(struct net_device *pdev,
......
...@@ -62,6 +62,14 @@ struct genevehdr { ...@@ -62,6 +62,14 @@ struct genevehdr {
struct geneve_opt options[]; struct geneve_opt options[];
}; };
#if IS_ENABLED(CONFIG_GENEVE)
void geneve_get_rx_port(struct net_device *netdev);
#else
static inline void geneve_get_rx_port(struct net_device *netdev)
{
}
#endif
#ifdef CONFIG_INET #ifdef CONFIG_INET
struct net_device *geneve_dev_create_fb(struct net *net, const char *name, struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
u8 name_assign_type, u16 dst_port); u8 name_assign_type, u16 dst_port);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment