Commit c8e4eff4 authored by Haiyang Zhang's avatar Haiyang Zhang Committed by David S. Miller

hv_netvsc: Add support for LRO/RSC in the vSwitch

LRO/RSC in the vSwitch is a feature available in Windows Server 2019
hosts and later. It reduces the per packet processing overhead by
coalescing multiple TCP segments when possible. This patch adds netvsc
driver support for this feature.
Signed-off-by: default avatarHaiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent bd4d08da
...@@ -186,6 +186,7 @@ struct rndis_device { ...@@ -186,6 +186,7 @@ struct rndis_device {
/* Interface */ /* Interface */
struct rndis_message; struct rndis_message;
struct netvsc_device; struct netvsc_device;
struct netvsc_channel;
struct net_device_context; struct net_device_context;
extern u32 netvsc_ring_bytes; extern u32 netvsc_ring_bytes;
...@@ -203,10 +204,7 @@ void netvsc_linkstatus_callback(struct net_device *net, ...@@ -203,10 +204,7 @@ void netvsc_linkstatus_callback(struct net_device *net,
struct rndis_message *resp); struct rndis_message *resp);
int netvsc_recv_callback(struct net_device *net, int netvsc_recv_callback(struct net_device *net,
struct netvsc_device *nvdev, struct netvsc_device *nvdev,
struct vmbus_channel *channel, struct netvsc_channel *nvchan);
void *data, u32 len,
const struct ndis_tcp_ip_checksum_info *csum_info,
const struct ndis_pkt_8021q_info *vlan);
void netvsc_channel_cb(void *context); void netvsc_channel_cb(void *context);
int netvsc_poll(struct napi_struct *napi, int budget); int netvsc_poll(struct napi_struct *napi, int budget);
...@@ -222,7 +220,7 @@ int rndis_filter_set_rss_param(struct rndis_device *rdev, ...@@ -222,7 +220,7 @@ int rndis_filter_set_rss_param(struct rndis_device *rdev,
const u8 *key); const u8 *key);
int rndis_filter_receive(struct net_device *ndev, int rndis_filter_receive(struct net_device *ndev,
struct netvsc_device *net_dev, struct netvsc_device *net_dev,
struct vmbus_channel *channel, struct netvsc_channel *nvchan,
void *data, u32 buflen); void *data, u32 buflen);
int rndis_filter_set_device_mac(struct netvsc_device *ndev, int rndis_filter_set_device_mac(struct netvsc_device *ndev,
...@@ -524,6 +522,8 @@ struct nvsp_2_vsc_capability { ...@@ -524,6 +522,8 @@ struct nvsp_2_vsc_capability {
u64 ieee8021q:1; u64 ieee8021q:1;
u64 correlation_id:1; u64 correlation_id:1;
u64 teaming:1; u64 teaming:1;
u64 vsubnetid:1;
u64 rsc:1;
}; };
}; };
} __packed; } __packed;
...@@ -826,7 +826,7 @@ struct nvsp_message { ...@@ -826,7 +826,7 @@ struct nvsp_message {
#define NETVSC_SUPPORTED_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | \ #define NETVSC_SUPPORTED_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | \
NETIF_F_TSO | NETIF_F_IPV6_CSUM | \ NETIF_F_TSO | NETIF_F_IPV6_CSUM | \
NETIF_F_TSO6) NETIF_F_TSO6 | NETIF_F_LRO)
#define VRSS_SEND_TAB_SIZE 16 /* must be power of 2 */ #define VRSS_SEND_TAB_SIZE 16 /* must be power of 2 */
#define VRSS_CHANNEL_MAX 64 #define VRSS_CHANNEL_MAX 64
...@@ -852,6 +852,18 @@ struct multi_recv_comp { ...@@ -852,6 +852,18 @@ struct multi_recv_comp {
u32 next; /* next entry for writing */ u32 next; /* next entry for writing */
}; };
#define NVSP_RSC_MAX 562 /* Max #RSC frags in a vmbus xfer page pkt */
struct nvsc_rsc {
const struct ndis_pkt_8021q_info *vlan;
const struct ndis_tcp_ip_checksum_info *csum_info;
u8 is_last; /* last RNDIS msg in a vmtransfer_page */
u32 cnt; /* #fragments in an RSC packet */
u32 pktlen; /* Full packet length */
void *data[NVSP_RSC_MAX];
u32 len[NVSP_RSC_MAX];
};
struct netvsc_stats { struct netvsc_stats {
u64 packets; u64 packets;
u64 bytes; u64 bytes;
...@@ -955,6 +967,7 @@ struct netvsc_channel { ...@@ -955,6 +967,7 @@ struct netvsc_channel {
struct multi_send_data msd; struct multi_send_data msd;
struct multi_recv_comp mrc; struct multi_recv_comp mrc;
atomic_t queue_sends; atomic_t queue_sends;
struct nvsc_rsc rsc;
struct netvsc_stats tx_stats; struct netvsc_stats tx_stats;
struct netvsc_stats rx_stats; struct netvsc_stats rx_stats;
...@@ -1136,7 +1149,8 @@ struct rndis_oobd { ...@@ -1136,7 +1149,8 @@ struct rndis_oobd {
/* Packet extension field contents associated with a Data message. */ /* Packet extension field contents associated with a Data message. */
struct rndis_per_packet_info { struct rndis_per_packet_info {
u32 size; u32 size;
u32 type; u32 type:31;
u32 internal:1;
u32 ppi_offset; u32 ppi_offset;
}; };
...@@ -1157,6 +1171,25 @@ enum ndis_per_pkt_info_type { ...@@ -1157,6 +1171,25 @@ enum ndis_per_pkt_info_type {
MAX_PER_PKT_INFO MAX_PER_PKT_INFO
}; };
enum rndis_per_pkt_info_interal_type {
RNDIS_PKTINFO_ID = 1,
/* Add more memebers here */
RNDIS_PKTINFO_MAX
};
#define RNDIS_PKTINFO_SUBALLOC BIT(0)
#define RNDIS_PKTINFO_1ST_FRAG BIT(1)
#define RNDIS_PKTINFO_LAST_FRAG BIT(2)
#define RNDIS_PKTINFO_ID_V1 1
struct rndis_pktinfo_id {
u8 ver;
u8 flag;
u16 pkt_id;
};
struct ndis_pkt_8021q_info { struct ndis_pkt_8021q_info {
union { union {
struct { struct {
......
...@@ -542,6 +542,9 @@ static int negotiate_nvsp_ver(struct hv_device *device, ...@@ -542,6 +542,9 @@ static int negotiate_nvsp_ver(struct hv_device *device,
init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1; init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
} }
if (nvsp_ver >= NVSP_PROTOCOL_VERSION_61)
init_packet->msg.v2_msg.send_ndis_config.capability.rsc = 1;
trace_nvsp_send(ndev, init_packet); trace_nvsp_send(ndev, init_packet);
ret = vmbus_sendpacket(device->channel, init_packet, ret = vmbus_sendpacket(device->channel, init_packet,
...@@ -1111,11 +1114,12 @@ static void enq_receive_complete(struct net_device *ndev, ...@@ -1111,11 +1114,12 @@ static void enq_receive_complete(struct net_device *ndev,
static int netvsc_receive(struct net_device *ndev, static int netvsc_receive(struct net_device *ndev,
struct netvsc_device *net_device, struct netvsc_device *net_device,
struct vmbus_channel *channel, struct netvsc_channel *nvchan,
const struct vmpacket_descriptor *desc, const struct vmpacket_descriptor *desc,
const struct nvsp_message *nvsp) const struct nvsp_message *nvsp)
{ {
struct net_device_context *net_device_ctx = netdev_priv(ndev); struct net_device_context *net_device_ctx = netdev_priv(ndev);
struct vmbus_channel *channel = nvchan->channel;
const struct vmtransfer_page_packet_header *vmxferpage_packet const struct vmtransfer_page_packet_header *vmxferpage_packet
= container_of(desc, const struct vmtransfer_page_packet_header, d); = container_of(desc, const struct vmtransfer_page_packet_header, d);
u16 q_idx = channel->offermsg.offer.sub_channel_index; u16 q_idx = channel->offermsg.offer.sub_channel_index;
...@@ -1150,6 +1154,7 @@ static int netvsc_receive(struct net_device *ndev, ...@@ -1150,6 +1154,7 @@ static int netvsc_receive(struct net_device *ndev,
int ret; int ret;
if (unlikely(offset + buflen > net_device->recv_buf_size)) { if (unlikely(offset + buflen > net_device->recv_buf_size)) {
nvchan->rsc.cnt = 0;
status = NVSP_STAT_FAIL; status = NVSP_STAT_FAIL;
netif_err(net_device_ctx, rx_err, ndev, netif_err(net_device_ctx, rx_err, ndev,
"Packet offset:%u + len:%u too big\n", "Packet offset:%u + len:%u too big\n",
...@@ -1160,11 +1165,13 @@ static int netvsc_receive(struct net_device *ndev, ...@@ -1160,11 +1165,13 @@ static int netvsc_receive(struct net_device *ndev,
data = recv_buf + offset; data = recv_buf + offset;
nvchan->rsc.is_last = (i == count - 1);
trace_rndis_recv(ndev, q_idx, data); trace_rndis_recv(ndev, q_idx, data);
/* Pass it to the upper layer */ /* Pass it to the upper layer */
ret = rndis_filter_receive(ndev, net_device, ret = rndis_filter_receive(ndev, net_device,
channel, data, buflen); nvchan, data, buflen);
if (unlikely(ret != NVSP_STAT_SUCCESS)) if (unlikely(ret != NVSP_STAT_SUCCESS))
status = NVSP_STAT_FAIL; status = NVSP_STAT_FAIL;
...@@ -1223,12 +1230,13 @@ static void netvsc_receive_inband(struct net_device *ndev, ...@@ -1223,12 +1230,13 @@ static void netvsc_receive_inband(struct net_device *ndev,
} }
static int netvsc_process_raw_pkt(struct hv_device *device, static int netvsc_process_raw_pkt(struct hv_device *device,
struct vmbus_channel *channel, struct netvsc_channel *nvchan,
struct netvsc_device *net_device, struct netvsc_device *net_device,
struct net_device *ndev, struct net_device *ndev,
const struct vmpacket_descriptor *desc, const struct vmpacket_descriptor *desc,
int budget) int budget)
{ {
struct vmbus_channel *channel = nvchan->channel;
const struct nvsp_message *nvmsg = hv_pkt_data(desc); const struct nvsp_message *nvmsg = hv_pkt_data(desc);
trace_nvsp_recv(ndev, channel, nvmsg); trace_nvsp_recv(ndev, channel, nvmsg);
...@@ -1240,7 +1248,7 @@ static int netvsc_process_raw_pkt(struct hv_device *device, ...@@ -1240,7 +1248,7 @@ static int netvsc_process_raw_pkt(struct hv_device *device,
break; break;
case VM_PKT_DATA_USING_XFER_PAGES: case VM_PKT_DATA_USING_XFER_PAGES:
return netvsc_receive(ndev, net_device, channel, return netvsc_receive(ndev, net_device, nvchan,
desc, nvmsg); desc, nvmsg);
break; break;
...@@ -1284,7 +1292,7 @@ int netvsc_poll(struct napi_struct *napi, int budget) ...@@ -1284,7 +1292,7 @@ int netvsc_poll(struct napi_struct *napi, int budget)
nvchan->desc = hv_pkt_iter_first(channel); nvchan->desc = hv_pkt_iter_first(channel);
while (nvchan->desc && work_done < budget) { while (nvchan->desc && work_done < budget) {
work_done += netvsc_process_raw_pkt(device, channel, net_device, work_done += netvsc_process_raw_pkt(device, nvchan, net_device,
ndev, nvchan->desc, budget); ndev, nvchan->desc, budget);
nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc); nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
} }
......
...@@ -744,14 +744,16 @@ void netvsc_linkstatus_callback(struct net_device *net, ...@@ -744,14 +744,16 @@ void netvsc_linkstatus_callback(struct net_device *net,
} }
static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
struct napi_struct *napi, struct netvsc_channel *nvchan)
const struct ndis_tcp_ip_checksum_info *csum_info,
const struct ndis_pkt_8021q_info *vlan,
void *data, u32 buflen)
{ {
struct napi_struct *napi = &nvchan->napi;
const struct ndis_pkt_8021q_info *vlan = nvchan->rsc.vlan;
const struct ndis_tcp_ip_checksum_info *csum_info =
nvchan->rsc.csum_info;
struct sk_buff *skb; struct sk_buff *skb;
int i;
skb = napi_alloc_skb(napi, buflen); skb = napi_alloc_skb(napi, nvchan->rsc.pktlen);
if (!skb) if (!skb)
return skb; return skb;
...@@ -759,7 +761,8 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, ...@@ -759,7 +761,8 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
* Copy to skb. This copy is needed here since the memory pointed by * Copy to skb. This copy is needed here since the memory pointed by
* hv_netvsc_packet cannot be deallocated * hv_netvsc_packet cannot be deallocated
*/ */
skb_put_data(skb, data, buflen); for (i = 0; i < nvchan->rsc.cnt; i++)
skb_put_data(skb, nvchan->rsc.data[i], nvchan->rsc.len[i]);
skb->protocol = eth_type_trans(skb, net); skb->protocol = eth_type_trans(skb, net);
...@@ -792,14 +795,11 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, ...@@ -792,14 +795,11 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
*/ */
int netvsc_recv_callback(struct net_device *net, int netvsc_recv_callback(struct net_device *net,
struct netvsc_device *net_device, struct netvsc_device *net_device,
struct vmbus_channel *channel, struct netvsc_channel *nvchan)
void *data, u32 len,
const struct ndis_tcp_ip_checksum_info *csum_info,
const struct ndis_pkt_8021q_info *vlan)
{ {
struct net_device_context *net_device_ctx = netdev_priv(net); struct net_device_context *net_device_ctx = netdev_priv(net);
struct vmbus_channel *channel = nvchan->channel;
u16 q_idx = channel->offermsg.offer.sub_channel_index; u16 q_idx = channel->offermsg.offer.sub_channel_index;
struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
struct sk_buff *skb; struct sk_buff *skb;
struct netvsc_stats *rx_stats; struct netvsc_stats *rx_stats;
...@@ -807,8 +807,8 @@ int netvsc_recv_callback(struct net_device *net, ...@@ -807,8 +807,8 @@ int netvsc_recv_callback(struct net_device *net,
return NVSP_STAT_FAIL; return NVSP_STAT_FAIL;
/* Allocate a skb - TODO direct I/O to pages? */ /* Allocate a skb - TODO direct I/O to pages? */
skb = netvsc_alloc_recv_skb(net, &nvchan->napi, skb = netvsc_alloc_recv_skb(net, nvchan);
csum_info, vlan, data, len);
if (unlikely(!skb)) { if (unlikely(!skb)) {
++net_device_ctx->eth_stats.rx_no_memory; ++net_device_ctx->eth_stats.rx_no_memory;
rcu_read_unlock(); rcu_read_unlock();
...@@ -825,7 +825,7 @@ int netvsc_recv_callback(struct net_device *net, ...@@ -825,7 +825,7 @@ int netvsc_recv_callback(struct net_device *net,
rx_stats = &nvchan->rx_stats; rx_stats = &nvchan->rx_stats;
u64_stats_update_begin(&rx_stats->syncp); u64_stats_update_begin(&rx_stats->syncp);
rx_stats->packets++; rx_stats->packets++;
rx_stats->bytes += len; rx_stats->bytes += nvchan->rsc.pktlen;
if (skb->pkt_type == PACKET_BROADCAST) if (skb->pkt_type == PACKET_BROADCAST)
++rx_stats->broadcast; ++rx_stats->broadcast;
......
...@@ -342,7 +342,8 @@ static void rndis_filter_receive_response(struct net_device *ndev, ...@@ -342,7 +342,8 @@ static void rndis_filter_receive_response(struct net_device *ndev,
* Get the Per-Packet-Info with the specified type * Get the Per-Packet-Info with the specified type
* return NULL if not found. * return NULL if not found.
*/ */
static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type) static inline void *rndis_get_ppi(struct rndis_packet *rpkt,
u32 type, u8 internal)
{ {
struct rndis_per_packet_info *ppi; struct rndis_per_packet_info *ppi;
int len; int len;
...@@ -355,7 +356,7 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type) ...@@ -355,7 +356,7 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type)
len = rpkt->per_pkt_info_len; len = rpkt->per_pkt_info_len;
while (len > 0) { while (len > 0) {
if (ppi->type == type) if (ppi->type == type && ppi->internal == internal)
return (void *)((ulong)ppi + ppi->ppi_offset); return (void *)((ulong)ppi + ppi->ppi_offset);
len -= ppi->size; len -= ppi->size;
ppi = (struct rndis_per_packet_info *)((ulong)ppi + ppi->size); ppi = (struct rndis_per_packet_info *)((ulong)ppi + ppi->size);
...@@ -364,17 +365,41 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type) ...@@ -364,17 +365,41 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type)
return NULL; return NULL;
} }
static inline
void rsc_add_data(struct netvsc_channel *nvchan,
const struct ndis_pkt_8021q_info *vlan,
const struct ndis_tcp_ip_checksum_info *csum_info,
void *data, u32 len)
{
u32 cnt = nvchan->rsc.cnt;
if (cnt) {
nvchan->rsc.pktlen += len;
} else {
nvchan->rsc.vlan = vlan;
nvchan->rsc.csum_info = csum_info;
nvchan->rsc.pktlen = len;
}
nvchan->rsc.data[cnt] = data;
nvchan->rsc.len[cnt] = len;
nvchan->rsc.cnt++;
}
static int rndis_filter_receive_data(struct net_device *ndev, static int rndis_filter_receive_data(struct net_device *ndev,
struct netvsc_device *nvdev, struct netvsc_device *nvdev,
struct vmbus_channel *channel, struct netvsc_channel *nvchan,
struct rndis_message *msg, struct rndis_message *msg,
u32 data_buflen) u32 data_buflen)
{ {
struct rndis_packet *rndis_pkt = &msg->msg.pkt; struct rndis_packet *rndis_pkt = &msg->msg.pkt;
const struct ndis_tcp_ip_checksum_info *csum_info; const struct ndis_tcp_ip_checksum_info *csum_info;
const struct ndis_pkt_8021q_info *vlan; const struct ndis_pkt_8021q_info *vlan;
const struct rndis_pktinfo_id *pktinfo_id;
u32 data_offset; u32 data_offset;
void *data; void *data;
bool rsc_more = false;
int ret;
/* Remove the rndis header and pass it back up the stack */ /* Remove the rndis header and pass it back up the stack */
data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset; data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
...@@ -393,25 +418,59 @@ static int rndis_filter_receive_data(struct net_device *ndev, ...@@ -393,25 +418,59 @@ static int rndis_filter_receive_data(struct net_device *ndev,
return NVSP_STAT_FAIL; return NVSP_STAT_FAIL;
} }
vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO); vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO, 0);
csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO, 0);
csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO); pktinfo_id = rndis_get_ppi(rndis_pkt, RNDIS_PKTINFO_ID, 1);
data = (void *)msg + data_offset; data = (void *)msg + data_offset;
/* /* Identify RSC frags, drop erroneous packets */
* Remove the rndis trailer padding from rndis packet message if (pktinfo_id && (pktinfo_id->flag & RNDIS_PKTINFO_SUBALLOC)) {
if (pktinfo_id->flag & RNDIS_PKTINFO_1ST_FRAG)
nvchan->rsc.cnt = 0;
else if (nvchan->rsc.cnt == 0)
goto drop;
rsc_more = true;
if (pktinfo_id->flag & RNDIS_PKTINFO_LAST_FRAG)
rsc_more = false;
if (rsc_more && nvchan->rsc.is_last)
goto drop;
} else {
nvchan->rsc.cnt = 0;
}
if (unlikely(nvchan->rsc.cnt >= NVSP_RSC_MAX))
goto drop;
/* Put data into per channel structure.
* Also, remove the rndis trailer padding from rndis packet message
* rndis_pkt->data_len tell us the real data length, we only copy * rndis_pkt->data_len tell us the real data length, we only copy
* the data packet to the stack, without the rndis trailer padding * the data packet to the stack, without the rndis trailer padding
*/ */
return netvsc_recv_callback(ndev, nvdev, channel, rsc_add_data(nvchan, vlan, csum_info, data, rndis_pkt->data_len);
data, rndis_pkt->data_len,
csum_info, vlan); if (rsc_more)
return NVSP_STAT_SUCCESS;
ret = netvsc_recv_callback(ndev, nvdev, nvchan);
nvchan->rsc.cnt = 0;
return ret;
drop:
/* Drop incomplete packet */
nvchan->rsc.cnt = 0;
return NVSP_STAT_FAIL;
} }
int rndis_filter_receive(struct net_device *ndev, int rndis_filter_receive(struct net_device *ndev,
struct netvsc_device *net_dev, struct netvsc_device *net_dev,
struct vmbus_channel *channel, struct netvsc_channel *nvchan,
void *data, u32 buflen) void *data, u32 buflen)
{ {
struct net_device_context *net_device_ctx = netdev_priv(ndev); struct net_device_context *net_device_ctx = netdev_priv(ndev);
...@@ -422,7 +481,7 @@ int rndis_filter_receive(struct net_device *ndev, ...@@ -422,7 +481,7 @@ int rndis_filter_receive(struct net_device *ndev,
switch (rndis_msg->ndis_msg_type) { switch (rndis_msg->ndis_msg_type) {
case RNDIS_MSG_PACKET: case RNDIS_MSG_PACKET:
return rndis_filter_receive_data(ndev, net_dev, channel, return rndis_filter_receive_data(ndev, net_dev, nvchan,
rndis_msg, buflen); rndis_msg, buflen);
case RNDIS_MSG_INIT_C: case RNDIS_MSG_INIT_C:
case RNDIS_MSG_QUERY_C: case RNDIS_MSG_QUERY_C:
...@@ -1184,6 +1243,13 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, ...@@ -1184,6 +1243,13 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
} }
} }
if (hwcaps.rsc.ip4 && hwcaps.rsc.ip6) {
net->hw_features |= NETIF_F_LRO;
offloads.rsc_ip_v4 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED;
offloads.rsc_ip_v6 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED;
}
/* In case some hw_features disappeared we need to remove them from /* In case some hw_features disappeared we need to remove them from
* net->features list as they're no longer supported. * net->features list as they're no longer supported.
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment