Commit 5b54dac8 authored by Haiyang Zhang's avatar Haiyang Zhang Committed by David S. Miller

hyperv: Add support for virtual Receive Side Scaling (vRSS)

This feature allows multiple channels to be used by each virtual NIC.
It is available on Hyper-V host 2012 R2.
Signed-off-by: default avatarHaiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: default avatarK. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 86fd14ad
...@@ -28,6 +28,96 @@ ...@@ -28,6 +28,96 @@
#include <linux/hyperv.h> #include <linux/hyperv.h>
#include <linux/rndis.h> #include <linux/rndis.h>
/* RSS related */
#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 /* query only */
#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204 /* query and set */
#define NDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88
#define NDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89
#define NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2
#define NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2
struct ndis_obj_header {
u8 type;
u8 rev;
u16 size;
} __packed;
/* ndis_recv_scale_cap/cap_flag */
#define NDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000
#define NDIS_RSS_CAPS_CLASSIFICATION_AT_ISR 0x02000000
#define NDIS_RSS_CAPS_CLASSIFICATION_AT_DPC 0x04000000
#define NDIS_RSS_CAPS_USING_MSI_X 0x08000000
#define NDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS 0x10000000
#define NDIS_RSS_CAPS_SUPPORTS_MSI_X 0x20000000
#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4 0x00000100
#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6 0x00000200
#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX 0x00000400
struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */
struct ndis_obj_header hdr;
u32 cap_flag;
u32 num_int_msg;
u32 num_recv_que;
u16 num_indirect_tabent;
} __packed;
/* ndis_recv_scale_param flags */
#define NDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED 0x0001
#define NDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED 0x0002
#define NDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED 0x0004
#define NDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED 0x0008
#define NDIS_RSS_PARAM_FLAG_DISABLE_RSS 0x0010
/* Hash info bits */
#define NDIS_HASH_FUNC_TOEPLITZ 0x00000001
#define NDIS_HASH_IPV4 0x00000100
#define NDIS_HASH_TCP_IPV4 0x00000200
#define NDIS_HASH_IPV6 0x00000400
#define NDIS_HASH_IPV6_EX 0x00000800
#define NDIS_HASH_TCP_IPV6 0x00001000
#define NDIS_HASH_TCP_IPV6_EX 0x00002000
#define NDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4)
#define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40
#define ITAB_NUM 128
#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
extern u8 netvsc_hash_key[];
struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */
struct ndis_obj_header hdr;
/* Qualifies the rest of the information */
u16 flag;
/* The base CPU number to do receive processing. not used */
u16 base_cpu_number;
/* This describes the hash function and type being enabled */
u32 hashinfo;
/* The size of indirection table array */
u16 indirect_tabsize;
/* The offset of the indirection table from the beginning of this
* structure
*/
u32 indirect_taboffset;
/* The size of the hash secret key */
u16 hashkey_size;
/* The offset of the secret key from the beginning of this structure */
u32 kashkey_offset;
u32 processor_masks_offset;
u32 num_processor_masks;
u32 processor_masks_entry_size;
};
/* Fwd declaration */ /* Fwd declaration */
struct hv_netvsc_packet; struct hv_netvsc_packet;
struct ndis_tcp_ip_checksum_info; struct ndis_tcp_ip_checksum_info;
...@@ -39,6 +129,8 @@ struct xferpage_packet { ...@@ -39,6 +129,8 @@ struct xferpage_packet {
/* # of netvsc packets this xfer packet contains */ /* # of netvsc packets this xfer packet contains */
u32 count; u32 count;
struct vmbus_channel *channel;
}; };
/* /*
...@@ -54,6 +146,9 @@ struct hv_netvsc_packet { ...@@ -54,6 +146,9 @@ struct hv_netvsc_packet {
bool is_data_pkt; bool is_data_pkt;
u16 vlan_tci; u16 vlan_tci;
u16 q_idx;
struct vmbus_channel *channel;
/* /*
* Valid only for receives when we break a xfer page packet * Valid only for receives when we break a xfer page packet
* into multiple netvsc packets * into multiple netvsc packets
...@@ -120,6 +215,7 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, ...@@ -120,6 +215,7 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
int netvsc_recv_callback(struct hv_device *device_obj, int netvsc_recv_callback(struct hv_device *device_obj,
struct hv_netvsc_packet *packet, struct hv_netvsc_packet *packet,
struct ndis_tcp_ip_checksum_info *csum_info); struct ndis_tcp_ip_checksum_info *csum_info);
void netvsc_channel_cb(void *context);
int rndis_filter_open(struct hv_device *dev); int rndis_filter_open(struct hv_device *dev);
int rndis_filter_close(struct hv_device *dev); int rndis_filter_close(struct hv_device *dev);
int rndis_filter_device_add(struct hv_device *dev, int rndis_filter_device_add(struct hv_device *dev,
...@@ -522,6 +618,8 @@ struct nvsp_message { ...@@ -522,6 +618,8 @@ struct nvsp_message {
#define NETVSC_PACKET_SIZE 2048 #define NETVSC_PACKET_SIZE 2048
#define VRSS_SEND_TAB_SIZE 16
/* Per netvsc channel-specific */ /* Per netvsc channel-specific */
struct netvsc_device { struct netvsc_device {
struct hv_device *dev; struct hv_device *dev;
...@@ -555,10 +653,20 @@ struct netvsc_device { ...@@ -555,10 +653,20 @@ struct netvsc_device {
struct net_device *ndev; struct net_device *ndev;
struct vmbus_channel *chn_table[NR_CPUS];
u32 send_table[VRSS_SEND_TAB_SIZE];
u32 num_chn;
atomic_t queue_sends[NR_CPUS];
/* Holds rndis device info */ /* Holds rndis device info */
void *extension; void *extension;
/* The recive buffer for this device */
int ring_size;
/* The primary channel callback buffer */
unsigned char cb_buffer[NETVSC_PACKET_SIZE]; unsigned char cb_buffer[NETVSC_PACKET_SIZE];
/* The sub channel callback buffer */
unsigned char *sub_cb_buf;
}; };
/* NdisInitialize message */ /* NdisInitialize message */
......
...@@ -422,6 +422,9 @@ int netvsc_device_remove(struct hv_device *device) ...@@ -422,6 +422,9 @@ int netvsc_device_remove(struct hv_device *device)
kfree(netvsc_packet); kfree(netvsc_packet);
} }
if (net_device->sub_cb_buf)
vfree(net_device->sub_cb_buf);
kfree(net_device); kfree(net_device);
return 0; return 0;
} }
...@@ -461,7 +464,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device, ...@@ -461,7 +464,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
(nvsp_packet->hdr.msg_type == (nvsp_packet->hdr.msg_type ==
NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) || NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
(nvsp_packet->hdr.msg_type == (nvsp_packet->hdr.msg_type ==
NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE)) { NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) ||
(nvsp_packet->hdr.msg_type ==
NVSP_MSG5_TYPE_SUBCHANNEL)) {
/* Copy the response back */ /* Copy the response back */
memcpy(&net_device->channel_init_pkt, nvsp_packet, memcpy(&net_device->channel_init_pkt, nvsp_packet,
sizeof(struct nvsp_message)); sizeof(struct nvsp_message));
...@@ -469,28 +474,37 @@ static void netvsc_send_completion(struct netvsc_device *net_device, ...@@ -469,28 +474,37 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
} else if (nvsp_packet->hdr.msg_type == } else if (nvsp_packet->hdr.msg_type ==
NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) { NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
int num_outstanding_sends; int num_outstanding_sends;
u16 q_idx = 0;
struct vmbus_channel *channel = device->channel;
int queue_sends;
/* Get the send context */ /* Get the send context */
nvsc_packet = (struct hv_netvsc_packet *)(unsigned long) nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
packet->trans_id; packet->trans_id;
/* Notify the layer above us */ /* Notify the layer above us */
if (nvsc_packet) if (nvsc_packet) {
q_idx = nvsc_packet->q_idx;
channel = nvsc_packet->channel;
nvsc_packet->completion.send.send_completion( nvsc_packet->completion.send.send_completion(
nvsc_packet->completion.send. nvsc_packet->completion.send.
send_completion_ctx); send_completion_ctx);
}
num_outstanding_sends = num_outstanding_sends =
atomic_dec_return(&net_device->num_outstanding_sends); atomic_dec_return(&net_device->num_outstanding_sends);
queue_sends = atomic_dec_return(&net_device->
queue_sends[q_idx]);
if (net_device->destroy && num_outstanding_sends == 0) if (net_device->destroy && num_outstanding_sends == 0)
wake_up(&net_device->wait_drain); wake_up(&net_device->wait_drain);
if (netif_queue_stopped(ndev) && !net_device->start_remove && if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
(hv_ringbuf_avail_percent(&device->channel->outbound) !net_device->start_remove &&
> RING_AVAIL_PERCENT_HIWATER || (hv_ringbuf_avail_percent(&channel->outbound) >
num_outstanding_sends < 1)) RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
netif_wake_queue(ndev); netif_tx_wake_queue(netdev_get_tx_queue(
ndev, q_idx));
} else { } else {
netdev_err(ndev, "Unknown send completion packet type- " netdev_err(ndev, "Unknown send completion packet type- "
"%d received!!\n", nvsp_packet->hdr.msg_type); "%d received!!\n", nvsp_packet->hdr.msg_type);
...@@ -505,6 +519,7 @@ int netvsc_send(struct hv_device *device, ...@@ -505,6 +519,7 @@ int netvsc_send(struct hv_device *device,
int ret = 0; int ret = 0;
struct nvsp_message sendMessage; struct nvsp_message sendMessage;
struct net_device *ndev; struct net_device *ndev;
struct vmbus_channel *out_channel = NULL;
u64 req_id; u64 req_id;
net_device = get_outbound_net_device(device); net_device = get_outbound_net_device(device);
...@@ -531,15 +546,20 @@ int netvsc_send(struct hv_device *device, ...@@ -531,15 +546,20 @@ int netvsc_send(struct hv_device *device,
else else
req_id = 0; req_id = 0;
out_channel = net_device->chn_table[packet->q_idx];
if (out_channel == NULL)
out_channel = device->channel;
packet->channel = out_channel;
if (packet->page_buf_cnt) { if (packet->page_buf_cnt) {
ret = vmbus_sendpacket_pagebuffer(device->channel, ret = vmbus_sendpacket_pagebuffer(out_channel,
packet->page_buf, packet->page_buf,
packet->page_buf_cnt, packet->page_buf_cnt,
&sendMessage, &sendMessage,
sizeof(struct nvsp_message), sizeof(struct nvsp_message),
req_id); req_id);
} else { } else {
ret = vmbus_sendpacket(device->channel, &sendMessage, ret = vmbus_sendpacket(out_channel, &sendMessage,
sizeof(struct nvsp_message), sizeof(struct nvsp_message),
req_id, req_id,
VM_PKT_DATA_INBAND, VM_PKT_DATA_INBAND,
...@@ -548,17 +568,24 @@ int netvsc_send(struct hv_device *device, ...@@ -548,17 +568,24 @@ int netvsc_send(struct hv_device *device,
if (ret == 0) { if (ret == 0) {
atomic_inc(&net_device->num_outstanding_sends); atomic_inc(&net_device->num_outstanding_sends);
if (hv_ringbuf_avail_percent(&device->channel->outbound) < atomic_inc(&net_device->queue_sends[packet->q_idx]);
if (hv_ringbuf_avail_percent(&out_channel->outbound) <
RING_AVAIL_PERCENT_LOWATER) { RING_AVAIL_PERCENT_LOWATER) {
netif_stop_queue(ndev); netif_tx_stop_queue(netdev_get_tx_queue(
ndev, packet->q_idx));
if (atomic_read(&net_device-> if (atomic_read(&net_device->
num_outstanding_sends) < 1) queue_sends[packet->q_idx]) < 1)
netif_wake_queue(ndev); netif_tx_wake_queue(netdev_get_tx_queue(
ndev, packet->q_idx));
} }
} else if (ret == -EAGAIN) { } else if (ret == -EAGAIN) {
netif_stop_queue(ndev); netif_tx_stop_queue(netdev_get_tx_queue(
if (atomic_read(&net_device->num_outstanding_sends) < 1) { ndev, packet->q_idx));
netif_wake_queue(ndev); if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) {
netif_tx_wake_queue(netdev_get_tx_queue(
ndev, packet->q_idx));
ret = -ENOSPC; ret = -ENOSPC;
} }
} else { } else {
...@@ -570,6 +597,7 @@ int netvsc_send(struct hv_device *device, ...@@ -570,6 +597,7 @@ int netvsc_send(struct hv_device *device,
} }
static void netvsc_send_recv_completion(struct hv_device *device, static void netvsc_send_recv_completion(struct hv_device *device,
struct vmbus_channel *channel,
struct netvsc_device *net_device, struct netvsc_device *net_device,
u64 transaction_id, u32 status) u64 transaction_id, u32 status)
{ {
...@@ -587,7 +615,7 @@ static void netvsc_send_recv_completion(struct hv_device *device, ...@@ -587,7 +615,7 @@ static void netvsc_send_recv_completion(struct hv_device *device,
retry_send_cmplt: retry_send_cmplt:
/* Send the completion */ /* Send the completion */
ret = vmbus_sendpacket(device->channel, &recvcompMessage, ret = vmbus_sendpacket(channel, &recvcompMessage,
sizeof(struct nvsp_message), transaction_id, sizeof(struct nvsp_message), transaction_id,
VM_PKT_COMP, 0); VM_PKT_COMP, 0);
if (ret == 0) { if (ret == 0) {
...@@ -618,6 +646,7 @@ static void netvsc_receive_completion(void *context) ...@@ -618,6 +646,7 @@ static void netvsc_receive_completion(void *context)
{ {
struct hv_netvsc_packet *packet = context; struct hv_netvsc_packet *packet = context;
struct hv_device *device = packet->device; struct hv_device *device = packet->device;
struct vmbus_channel *channel;
struct netvsc_device *net_device; struct netvsc_device *net_device;
u64 transaction_id = 0; u64 transaction_id = 0;
bool fsend_receive_comp = false; bool fsend_receive_comp = false;
...@@ -649,6 +678,7 @@ static void netvsc_receive_completion(void *context) ...@@ -649,6 +678,7 @@ static void netvsc_receive_completion(void *context)
*/ */
if (packet->xfer_page_pkt->count == 0) { if (packet->xfer_page_pkt->count == 0) {
fsend_receive_comp = true; fsend_receive_comp = true;
channel = packet->xfer_page_pkt->channel;
transaction_id = packet->completion.recv.recv_completion_tid; transaction_id = packet->completion.recv.recv_completion_tid;
status = packet->xfer_page_pkt->status; status = packet->xfer_page_pkt->status;
list_add_tail(&packet->xfer_page_pkt->list_ent, list_add_tail(&packet->xfer_page_pkt->list_ent,
...@@ -662,12 +692,13 @@ static void netvsc_receive_completion(void *context) ...@@ -662,12 +692,13 @@ static void netvsc_receive_completion(void *context)
/* Send a receive completion for the xfer page packet */ /* Send a receive completion for the xfer page packet */
if (fsend_receive_comp) if (fsend_receive_comp)
netvsc_send_recv_completion(device, net_device, transaction_id, netvsc_send_recv_completion(device, channel, net_device,
status); transaction_id, status);
} }
static void netvsc_receive(struct netvsc_device *net_device, static void netvsc_receive(struct netvsc_device *net_device,
struct vmbus_channel *channel,
struct hv_device *device, struct hv_device *device,
struct vmpacket_descriptor *packet) struct vmpacket_descriptor *packet)
{ {
...@@ -748,7 +779,7 @@ static void netvsc_receive(struct netvsc_device *net_device, ...@@ -748,7 +779,7 @@ static void netvsc_receive(struct netvsc_device *net_device,
spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, spin_unlock_irqrestore(&net_device->recv_pkt_list_lock,
flags); flags);
netvsc_send_recv_completion(device, net_device, netvsc_send_recv_completion(device, channel, net_device,
vmxferpage_packet->d.trans_id, vmxferpage_packet->d.trans_id,
NVSP_STAT_FAIL); NVSP_STAT_FAIL);
...@@ -759,6 +790,7 @@ static void netvsc_receive(struct netvsc_device *net_device, ...@@ -759,6 +790,7 @@ static void netvsc_receive(struct netvsc_device *net_device,
xferpage_packet = (struct xferpage_packet *)listHead.next; xferpage_packet = (struct xferpage_packet *)listHead.next;
list_del(&xferpage_packet->list_ent); list_del(&xferpage_packet->list_ent);
xferpage_packet->status = NVSP_STAT_SUCCESS; xferpage_packet->status = NVSP_STAT_SUCCESS;
xferpage_packet->channel = channel;
/* This is how much we can satisfy */ /* This is how much we can satisfy */
xferpage_packet->count = count - 1; xferpage_packet->count = count - 1;
...@@ -800,10 +832,45 @@ static void netvsc_receive(struct netvsc_device *net_device, ...@@ -800,10 +832,45 @@ static void netvsc_receive(struct netvsc_device *net_device,
} }
static void netvsc_channel_cb(void *context)
static void netvsc_send_table(struct hv_device *hdev,
struct vmpacket_descriptor *vmpkt)
{
struct netvsc_device *nvscdev;
struct net_device *ndev;
struct nvsp_message *nvmsg;
int i;
u32 count, *tab;
nvscdev = get_outbound_net_device(hdev);
if (!nvscdev)
return;
ndev = nvscdev->ndev;
nvmsg = (struct nvsp_message *)((unsigned long)vmpkt +
(vmpkt->offset8 << 3));
if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE)
return;
count = nvmsg->msg.v5_msg.send_table.count;
if (count != VRSS_SEND_TAB_SIZE) {
netdev_err(ndev, "Received wrong send-table size:%u\n", count);
return;
}
tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
nvmsg->msg.v5_msg.send_table.offset);
for (i = 0; i < count; i++)
nvscdev->send_table[i] = tab[i];
}
void netvsc_channel_cb(void *context)
{ {
int ret; int ret;
struct hv_device *device = context; struct vmbus_channel *channel = (struct vmbus_channel *)context;
struct hv_device *device;
struct netvsc_device *net_device; struct netvsc_device *net_device;
u32 bytes_recvd; u32 bytes_recvd;
u64 request_id; u64 request_id;
...@@ -812,14 +879,19 @@ static void netvsc_channel_cb(void *context) ...@@ -812,14 +879,19 @@ static void netvsc_channel_cb(void *context)
int bufferlen = NETVSC_PACKET_SIZE; int bufferlen = NETVSC_PACKET_SIZE;
struct net_device *ndev; struct net_device *ndev;
if (channel->primary_channel != NULL)
device = channel->primary_channel->device_obj;
else
device = channel->device_obj;
net_device = get_inbound_net_device(device); net_device = get_inbound_net_device(device);
if (!net_device) if (!net_device)
return; return;
ndev = net_device->ndev; ndev = net_device->ndev;
buffer = net_device->cb_buffer; buffer = get_per_channel_state(channel);
do { do {
ret = vmbus_recvpacket_raw(device->channel, buffer, bufferlen, ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
&bytes_recvd, &request_id); &bytes_recvd, &request_id);
if (ret == 0) { if (ret == 0) {
if (bytes_recvd > 0) { if (bytes_recvd > 0) {
...@@ -831,10 +903,14 @@ static void netvsc_channel_cb(void *context) ...@@ -831,10 +903,14 @@ static void netvsc_channel_cb(void *context)
break; break;
case VM_PKT_DATA_USING_XFER_PAGES: case VM_PKT_DATA_USING_XFER_PAGES:
netvsc_receive(net_device, netvsc_receive(net_device, channel,
device, desc); device, desc);
break; break;
case VM_PKT_DATA_INBAND:
netvsc_send_table(device, desc);
break;
default: default:
netdev_err(ndev, netdev_err(ndev,
"unhandled packet type %d, " "unhandled packet type %d, "
...@@ -893,6 +969,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) ...@@ -893,6 +969,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
goto cleanup; goto cleanup;
} }
net_device->ring_size = ring_size;
/* /*
* Coming into this function, struct net_device * is * Coming into this function, struct net_device * is
* registered as the driver private data. * registered as the driver private data.
...@@ -917,10 +995,12 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) ...@@ -917,10 +995,12 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
} }
init_completion(&net_device->channel_init_wait); init_completion(&net_device->channel_init_wait);
set_per_channel_state(device->channel, net_device->cb_buffer);
/* Open the channel */ /* Open the channel */
ret = vmbus_open(device->channel, ring_size * PAGE_SIZE, ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
ring_size * PAGE_SIZE, NULL, 0, ring_size * PAGE_SIZE, NULL, 0,
netvsc_channel_cb, device); netvsc_channel_cb, device->channel);
if (ret != 0) { if (ret != 0) {
netdev_err(ndev, "unable to open channel: %d\n", ret); netdev_err(ndev, "unable to open channel: %d\n", ret);
...@@ -930,6 +1010,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) ...@@ -930,6 +1010,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
/* Channel is opened */ /* Channel is opened */
pr_info("hv_netvsc channel opened successfully\n"); pr_info("hv_netvsc channel opened successfully\n");
net_device->chn_table[0] = device->channel;
/* Connect with the NetVsp */ /* Connect with the NetVsp */
ret = netvsc_connect_vsp(device); ret = netvsc_connect_vsp(device);
if (ret != 0) { if (ret != 0) {
......
...@@ -101,7 +101,7 @@ static int netvsc_open(struct net_device *net) ...@@ -101,7 +101,7 @@ static int netvsc_open(struct net_device *net)
return ret; return ret;
} }
netif_start_queue(net); netif_tx_start_all_queues(net);
nvdev = hv_get_drvdata(device_obj); nvdev = hv_get_drvdata(device_obj);
rdev = nvdev->extension; rdev = nvdev->extension;
...@@ -149,6 +149,88 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, ...@@ -149,6 +149,88 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
return ppi; return ppi;
} }
union sub_key {
u64 k;
struct {
u8 pad[3];
u8 kb;
u32 ka;
};
};
/* Toeplitz hash function
* data: network byte order
* return: host byte order
*/
static u32 comp_hash(u8 *key, int klen, u8 *data, int dlen)
{
union sub_key subk;
int k_next = 4;
u8 dt;
int i, j;
u32 ret = 0;
subk.k = 0;
subk.ka = ntohl(*(u32 *)key);
for (i = 0; i < dlen; i++) {
subk.kb = key[k_next];
k_next = (k_next + 1) % klen;
dt = data[i];
for (j = 0; j < 8; j++) {
if (dt & 0x80)
ret ^= subk.ka;
dt <<= 1;
subk.k <<= 1;
}
}
return ret;
}
static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb)
{
struct iphdr *iphdr;
int data_len;
bool ret = false;
if (eth_hdr(skb)->h_proto != htons(ETH_P_IP))
return false;
iphdr = ip_hdr(skb);
if (iphdr->version == 4) {
if (iphdr->protocol == IPPROTO_TCP)
data_len = 12;
else
data_len = 8;
*hash = comp_hash(netvsc_hash_key, HASH_KEYLEN,
(u8 *)&iphdr->saddr, data_len);
ret = true;
}
return ret;
}
static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
void *accel_priv, select_queue_fallback_t fallback)
{
struct net_device_context *net_device_ctx = netdev_priv(ndev);
struct hv_device *hdev = net_device_ctx->device_ctx;
struct netvsc_device *nvsc_dev = hv_get_drvdata(hdev);
u32 hash;
u16 q_idx = 0;
if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1)
return 0;
if (netvsc_set_hash(&hash, skb))
q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
ndev->real_num_tx_queues;
return q_idx;
}
static void netvsc_xmit_completion(void *context) static void netvsc_xmit_completion(void *context)
{ {
struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
...@@ -333,6 +415,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) ...@@ -333,6 +415,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
packet->vlan_tci = skb->vlan_tci; packet->vlan_tci = skb->vlan_tci;
packet->q_idx = skb_get_queue_mapping(skb);
packet->is_data_pkt = true; packet->is_data_pkt = true;
packet->total_data_buflen = skb->len; packet->total_data_buflen = skb->len;
...@@ -554,6 +638,10 @@ int netvsc_recv_callback(struct hv_device *device_obj, ...@@ -554,6 +638,10 @@ int netvsc_recv_callback(struct hv_device *device_obj,
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
packet->vlan_tci); packet->vlan_tci);
skb_record_rx_queue(skb, packet->xfer_page_pkt->channel->
offermsg.offer.sub_channel_index %
net->real_num_rx_queues);
net->stats.rx_packets++; net->stats.rx_packets++;
net->stats.rx_bytes += packet->total_data_buflen; net->stats.rx_bytes += packet->total_data_buflen;
...@@ -602,7 +690,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) ...@@ -602,7 +690,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
hv_set_drvdata(hdev, ndev); hv_set_drvdata(hdev, ndev);
device_info.ring_size = ring_size; device_info.ring_size = ring_size;
rndis_filter_device_add(hdev, &device_info); rndis_filter_device_add(hdev, &device_info);
netif_wake_queue(ndev); netif_tx_wake_all_queues(ndev);
return 0; return 0;
} }
...@@ -648,6 +736,7 @@ static const struct net_device_ops device_ops = { ...@@ -648,6 +736,7 @@ static const struct net_device_ops device_ops = {
.ndo_change_mtu = netvsc_change_mtu, .ndo_change_mtu = netvsc_change_mtu,
.ndo_validate_addr = eth_validate_addr, .ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = netvsc_set_mac_addr, .ndo_set_mac_address = netvsc_set_mac_addr,
.ndo_select_queue = netvsc_select_queue,
}; };
/* /*
...@@ -694,9 +783,11 @@ static int netvsc_probe(struct hv_device *dev, ...@@ -694,9 +783,11 @@ static int netvsc_probe(struct hv_device *dev,
struct net_device *net = NULL; struct net_device *net = NULL;
struct net_device_context *net_device_ctx; struct net_device_context *net_device_ctx;
struct netvsc_device_info device_info; struct netvsc_device_info device_info;
struct netvsc_device *nvdev;
int ret; int ret;
net = alloc_etherdev(sizeof(struct net_device_context)); net = alloc_etherdev_mq(sizeof(struct net_device_context),
num_online_cpus());
if (!net) if (!net)
return -ENOMEM; return -ENOMEM;
...@@ -729,6 +820,12 @@ static int netvsc_probe(struct hv_device *dev, ...@@ -729,6 +820,12 @@ static int netvsc_probe(struct hv_device *dev,
} }
memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
nvdev = hv_get_drvdata(dev);
netif_set_real_num_tx_queues(net, nvdev->num_chn);
netif_set_real_num_rx_queues(net, nvdev->num_chn);
dev_info(&dev->device, "real num tx,rx queues:%u, %u\n",
net->real_num_tx_queues, net->real_num_rx_queues);
ret = register_netdev(net); ret = register_netdev(net);
if (ret != 0) { if (ret != 0) {
pr_err("Unable to register netdev.\n"); pr_err("Unable to register netdev.\n");
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include "hyperv_net.h" #include "hyperv_net.h"
#define RNDIS_EXT_LEN 100 #define RNDIS_EXT_LEN PAGE_SIZE
struct rndis_request { struct rndis_request {
struct list_head list_ent; struct list_head list_ent;
struct completion wait_event; struct completion wait_event;
...@@ -94,6 +94,8 @@ static struct rndis_request *get_rndis_request(struct rndis_device *dev, ...@@ -94,6 +94,8 @@ static struct rndis_request *get_rndis_request(struct rndis_device *dev,
rndis_msg->ndis_msg_type = msg_type; rndis_msg->ndis_msg_type = msg_type;
rndis_msg->msg_len = msg_len; rndis_msg->msg_len = msg_len;
request->pkt.q_idx = 0;
/* /*
* Set the request id. This field is always after the rndis header for * Set the request id. This field is always after the rndis header for
* request/response packet types so we just used the SetRequest as a * request/response packet types so we just used the SetRequest as a
...@@ -509,6 +511,19 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid, ...@@ -509,6 +511,19 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
query->info_buflen = 0; query->info_buflen = 0;
query->dev_vc_handle = 0; query->dev_vc_handle = 0;
if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) {
struct ndis_recv_scale_cap *cap;
request->request_msg.msg_len +=
sizeof(struct ndis_recv_scale_cap);
query->info_buflen = sizeof(struct ndis_recv_scale_cap);
cap = (struct ndis_recv_scale_cap *)((unsigned long)query +
query->info_buf_offset);
cap->hdr.type = NDIS_OBJECT_TYPE_RSS_CAPABILITIES;
cap->hdr.rev = NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2;
cap->hdr.size = sizeof(struct ndis_recv_scale_cap);
}
ret = rndis_filter_send_request(dev, request); ret = rndis_filter_send_request(dev, request);
if (ret != 0) if (ret != 0)
goto cleanup; goto cleanup;
...@@ -695,6 +710,89 @@ int rndis_filter_set_offload_params(struct hv_device *hdev, ...@@ -695,6 +710,89 @@ int rndis_filter_set_offload_params(struct hv_device *hdev,
return ret; return ret;
} }
u8 netvsc_hash_key[HASH_KEYLEN] = {
0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
};
int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue)
{
struct net_device *ndev = rdev->net_dev->ndev;
struct rndis_request *request;
struct rndis_set_request *set;
struct rndis_set_complete *set_complete;
u32 extlen = sizeof(struct ndis_recv_scale_param) +
4*ITAB_NUM + HASH_KEYLEN;
struct ndis_recv_scale_param *rssp;
u32 *itab;
u8 *keyp;
int i, t, ret;
request = get_rndis_request(
rdev, RNDIS_MSG_SET,
RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen);
if (!request)
return -ENOMEM;
set = &request->request_msg.msg.set_req;
set->oid = OID_GEN_RECEIVE_SCALE_PARAMETERS;
set->info_buflen = extlen;
set->info_buf_offset = sizeof(struct rndis_set_request);
set->dev_vc_handle = 0;
rssp = (struct ndis_recv_scale_param *)(set + 1);
rssp->hdr.type = NDIS_OBJECT_TYPE_RSS_PARAMETERS;
rssp->hdr.rev = NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2;
rssp->hdr.size = sizeof(struct ndis_recv_scale_param);
rssp->flag = 0;
rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 |
NDIS_HASH_TCP_IPV4;
rssp->indirect_tabsize = 4*ITAB_NUM;
rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param);
rssp->hashkey_size = HASH_KEYLEN;
rssp->kashkey_offset = rssp->indirect_taboffset +
rssp->indirect_tabsize;
/* Set indirection table entries */
itab = (u32 *)(rssp + 1);
for (i = 0; i < ITAB_NUM; i++)
itab[i] = i % num_queue;
/* Set hask key values */
keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset);
for (i = 0; i < HASH_KEYLEN; i++)
keyp[i] = netvsc_hash_key[i];
ret = rndis_filter_send_request(rdev, request);
if (ret != 0)
goto cleanup;
t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
if (t == 0) {
netdev_err(ndev, "timeout before we got a set response...\n");
/* can't put_rndis_request, since we may still receive a
* send-completion.
*/
return -ETIMEDOUT;
} else {
set_complete = &request->response_msg.msg.set_complete;
if (set_complete->status != RNDIS_STATUS_SUCCESS) {
netdev_err(ndev, "Fail to set RSS parameters:0x%x\n",
set_complete->status);
ret = -EINVAL;
}
}
cleanup:
put_rndis_request(rdev, request);
return ret;
}
static int rndis_filter_query_device_link_status(struct rndis_device *dev) static int rndis_filter_query_device_link_status(struct rndis_device *dev)
{ {
u32 size = sizeof(u32); u32 size = sizeof(u32);
...@@ -886,6 +984,28 @@ static int rndis_filter_close_device(struct rndis_device *dev) ...@@ -886,6 +984,28 @@ static int rndis_filter_close_device(struct rndis_device *dev)
return ret; return ret;
} }
static void netvsc_sc_open(struct vmbus_channel *new_sc)
{
struct netvsc_device *nvscdev;
u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
int ret;
nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj);
if (chn_index >= nvscdev->num_chn)
return;
set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) *
NETVSC_PACKET_SIZE);
ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE,
nvscdev->ring_size * PAGE_SIZE, NULL, 0,
netvsc_channel_cb, new_sc);
if (ret == 0)
nvscdev->chn_table[chn_index] = new_sc;
}
int rndis_filter_device_add(struct hv_device *dev, int rndis_filter_device_add(struct hv_device *dev,
void *additional_info) void *additional_info)
{ {
...@@ -894,6 +1014,10 @@ int rndis_filter_device_add(struct hv_device *dev, ...@@ -894,6 +1014,10 @@ int rndis_filter_device_add(struct hv_device *dev,
struct rndis_device *rndis_device; struct rndis_device *rndis_device;
struct netvsc_device_info *device_info = additional_info; struct netvsc_device_info *device_info = additional_info;
struct ndis_offload_params offloads; struct ndis_offload_params offloads;
struct nvsp_message *init_packet;
int t;
struct ndis_recv_scale_cap rsscap;
u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
rndis_device = get_rndis_device(); rndis_device = get_rndis_device();
if (!rndis_device) if (!rndis_device)
...@@ -913,6 +1037,7 @@ int rndis_filter_device_add(struct hv_device *dev, ...@@ -913,6 +1037,7 @@ int rndis_filter_device_add(struct hv_device *dev,
/* Initialize the rndis device */ /* Initialize the rndis device */
net_device = hv_get_drvdata(dev); net_device = hv_get_drvdata(dev);
net_device->num_chn = 1;
net_device->extension = rndis_device; net_device->extension = rndis_device;
rndis_device->net_dev = net_device; rndis_device->net_dev = net_device;
...@@ -952,7 +1077,6 @@ int rndis_filter_device_add(struct hv_device *dev, ...@@ -952,7 +1077,6 @@ int rndis_filter_device_add(struct hv_device *dev,
if (ret) if (ret)
goto err_dev_remv; goto err_dev_remv;
rndis_filter_query_device_link_status(rndis_device); rndis_filter_query_device_link_status(rndis_device);
device_info->link_state = rndis_device->link_state; device_info->link_state = rndis_device->link_state;
...@@ -961,7 +1085,66 @@ int rndis_filter_device_add(struct hv_device *dev, ...@@ -961,7 +1085,66 @@ int rndis_filter_device_add(struct hv_device *dev,
rndis_device->hw_mac_adr, rndis_device->hw_mac_adr,
device_info->link_state ? "down" : "up"); device_info->link_state ? "down" : "up");
return ret; if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
return 0;
/* vRSS setup */
memset(&rsscap, 0, rsscap_size);
ret = rndis_filter_query_device(rndis_device,
OID_GEN_RECEIVE_SCALE_CAPABILITIES,
&rsscap, &rsscap_size);
if (ret || rsscap.num_recv_que < 2)
goto out;
net_device->num_chn = (num_online_cpus() < rsscap.num_recv_que) ?
num_online_cpus() : rsscap.num_recv_que;
if (net_device->num_chn == 1)
goto out;
net_device->sub_cb_buf = vzalloc((net_device->num_chn - 1) *
NETVSC_PACKET_SIZE);
if (!net_device->sub_cb_buf) {
net_device->num_chn = 1;
dev_info(&dev->device, "No memory for subchannels.\n");
goto out;
}
vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
init_packet = &net_device->channel_init_pkt;
memset(init_packet, 0, sizeof(struct nvsp_message));
init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL;
init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE;
init_packet->msg.v5_msg.subchn_req.num_subchannels =
net_device->num_chn - 1;
ret = vmbus_sendpacket(dev->channel, init_packet,
sizeof(struct nvsp_message),
(unsigned long)init_packet,
VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (ret)
goto out;
t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
if (t == 0) {
ret = -ETIMEDOUT;
goto out;
}
if (init_packet->msg.v5_msg.subchn_comp.status !=
NVSP_STAT_SUCCESS) {
ret = -ENODEV;
goto out;
}
net_device->num_chn = 1 +
init_packet->msg.v5_msg.subchn_comp.num_subchannels;
vmbus_are_subchannels_present(dev->channel);
ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn);
out:
if (ret)
net_device->num_chn = 1;
return 0; /* return 0 because primary channel can be used alone */
err_dev_remv: err_dev_remv:
rndis_filter_device_remove(dev); rndis_filter_device_remove(dev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment