Commit f5cedc84 authored by Catherine Sullivan's avatar Catherine Sullivan Committed by David S. Miller

gve: Add transmit and receive support

Add support for passing traffic.
Signed-off-by: default avatarCatherine Sullivan <csully@google.com>
Signed-off-by: default avatarSagi Shahar <sagis@google.com>
Signed-off-by: default avatarJon Olson <jonolson@google.com>
Acked-by: default avatarWillem de Bruijn <willemb@google.com>
Reviewed-by: default avatarLuigi Rizzo <lrizzo@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 893ce44d
......@@ -42,6 +42,8 @@ The driver interacts with the device in the following ways:
- See description below
- Interrupts
- See supported interrupts below
- Transmit and Receive Queues
- See description below
Registers
---------
......@@ -80,3 +82,31 @@ Notification Block Interrupts
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The notification block interrupts are used to tell the driver to poll
the queues associated with that interrupt.
The handler for these irqs schedule the napi for that block to run
and poll the queues.
Traffic Queues
--------------
gVNIC's queues are composed of a descriptor ring and a buffer and are
assigned to a notification block.
The descriptor rings are power-of-two-sized ring buffers consisting of
fixed-size descriptors. They advance their head pointer using a __be32
doorbell located in Bar2. The tail pointers are advanced by consuming
descriptors in-order and updating a __be32 counter. Both the doorbell
and the counter overflow to zero.
Each queue's buffers must be registered in advance with the device as a
queue page list, and packet data can only be put in those pages.
Transmit
~~~~~~~~
gve maps the buffers for transmit rings into a FIFO and copies the packets
into the FIFO before sending them to the NIC.
Receive
~~~~~~~
The buffers for receive rings are put into a data ring that is the same
length as the descriptor ring and the head and tail pointers advance over
the rings together.
# Makefile for the Google virtual Ethernet (gve) driver
obj-$(CONFIG_GVE) += gve.o
gve-objs := gve_main.o gve_adminq.o
gve-objs := gve_main.o gve_tx.o gve_rx.o gve_adminq.o
This diff is collapsed.
......@@ -190,6 +190,72 @@ int gve_adminq_deconfigure_device_resources(struct gve_priv *priv)
return gve_adminq_execute_cmd(priv, &cmd);
}
int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
{
struct gve_tx_ring *tx = &priv->tx[queue_index];
union gve_adminq_command cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_TX_QUEUE);
cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) {
.queue_id = cpu_to_be32(queue_index),
.reserved = 0,
.queue_resources_addr = cpu_to_be64(tx->q_resources_bus),
.tx_ring_addr = cpu_to_be64(tx->bus),
.queue_page_list_id = cpu_to_be32(tx->tx_fifo.qpl->id),
.ntfy_id = cpu_to_be32(tx->ntfy_id),
};
return gve_adminq_execute_cmd(priv, &cmd);
}
int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
{
struct gve_rx_ring *rx = &priv->rx[queue_index];
union gve_adminq_command cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) {
.queue_id = cpu_to_be32(queue_index),
.index = cpu_to_be32(queue_index),
.reserved = 0,
.ntfy_id = cpu_to_be32(rx->ntfy_id),
.queue_resources_addr = cpu_to_be64(rx->q_resources_bus),
.rx_desc_ring_addr = cpu_to_be64(rx->desc.bus),
.rx_data_ring_addr = cpu_to_be64(rx->data.data_bus),
.queue_page_list_id = cpu_to_be32(rx->data.qpl->id),
};
return gve_adminq_execute_cmd(priv, &cmd);
}
int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index)
{
union gve_adminq_command cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_TX_QUEUE);
cmd.destroy_tx_queue = (struct gve_adminq_destroy_tx_queue) {
.queue_id = cpu_to_be32(queue_index),
};
return gve_adminq_execute_cmd(priv, &cmd);
}
int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index)
{
union gve_adminq_command cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE);
cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) {
.queue_id = cpu_to_be32(queue_index),
};
return gve_adminq_execute_cmd(priv, &cmd);
}
int gve_adminq_describe_device(struct gve_priv *priv)
{
struct gve_device_descriptor *descriptor;
......@@ -215,6 +281,25 @@ int gve_adminq_describe_device(struct gve_priv *priv)
if (err)
goto free_device_descriptor;
priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
if (priv->tx_desc_cnt * sizeof(priv->tx->desc[0]) < PAGE_SIZE) {
netif_err(priv, drv, priv->dev, "Tx desc count %d too low\n",
priv->tx_desc_cnt);
err = -EINVAL;
goto free_device_descriptor;
}
priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
if (priv->rx_desc_cnt * sizeof(priv->rx->desc.desc_ring[0])
< PAGE_SIZE ||
priv->rx_desc_cnt * sizeof(priv->rx->data.data_ring[0])
< PAGE_SIZE) {
netif_err(priv, drv, priv->dev, "Rx desc count %d too low\n",
priv->rx_desc_cnt);
err = -EINVAL;
goto free_device_descriptor;
}
priv->max_registered_pages =
be64_to_cpu(descriptor->max_registered_pages);
mtu = be16_to_cpu(descriptor->mtu);
if (mtu < ETH_MIN_MTU) {
netif_err(priv, drv, priv->dev, "MTU %d below minimum MTU\n",
......@@ -227,6 +312,14 @@ int gve_adminq_describe_device(struct gve_priv *priv)
ether_addr_copy(priv->dev->dev_addr, descriptor->mac);
mac = descriptor->mac;
netif_info(priv, drv, priv->dev, "MAC addr: %pM\n", mac);
priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl);
priv->rx_pages_per_qpl = be16_to_cpu(descriptor->rx_pages_per_qpl);
if (priv->rx_pages_per_qpl < priv->rx_desc_cnt) {
netif_err(priv, drv, priv->dev, "rx_pages_per_qpl cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n",
priv->rx_pages_per_qpl);
priv->rx_desc_cnt = priv->rx_pages_per_qpl;
}
priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
free_device_descriptor:
dma_free_coherent(&priv->pdev->dev, sizeof(*descriptor), descriptor,
......@@ -234,6 +327,51 @@ int gve_adminq_describe_device(struct gve_priv *priv)
return err;
}
int gve_adminq_register_page_list(struct gve_priv *priv,
struct gve_queue_page_list *qpl)
{
struct device *hdev = &priv->pdev->dev;
u32 num_entries = qpl->num_entries;
u32 size = num_entries * sizeof(qpl->page_buses[0]);
union gve_adminq_command cmd;
dma_addr_t page_list_bus;
__be64 *page_list;
int err;
int i;
memset(&cmd, 0, sizeof(cmd));
page_list = dma_alloc_coherent(hdev, size, &page_list_bus, GFP_KERNEL);
if (!page_list)
return -ENOMEM;
for (i = 0; i < num_entries; i++)
page_list[i] = cpu_to_be64(qpl->page_buses[i]);
cmd.opcode = cpu_to_be32(GVE_ADMINQ_REGISTER_PAGE_LIST);
cmd.reg_page_list = (struct gve_adminq_register_page_list) {
.page_list_id = cpu_to_be32(qpl->id),
.num_pages = cpu_to_be32(num_entries),
.page_address_list_addr = cpu_to_be64(page_list_bus),
};
err = gve_adminq_execute_cmd(priv, &cmd);
dma_free_coherent(hdev, size, page_list, page_list_bus);
return err;
}
int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id)
{
union gve_adminq_command cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = cpu_to_be32(GVE_ADMINQ_UNREGISTER_PAGE_LIST);
cmd.unreg_page_list = (struct gve_adminq_unregister_page_list) {
.page_list_id = cpu_to_be32(page_list_id),
};
return gve_adminq_execute_cmd(priv, &cmd);
}
int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu)
{
union gve_adminq_command cmd;
......
......@@ -13,6 +13,12 @@
enum gve_adminq_opcodes {
GVE_ADMINQ_DESCRIBE_DEVICE = 0x1,
GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES = 0x2,
GVE_ADMINQ_REGISTER_PAGE_LIST = 0x3,
GVE_ADMINQ_UNREGISTER_PAGE_LIST = 0x4,
GVE_ADMINQ_CREATE_TX_QUEUE = 0x5,
GVE_ADMINQ_CREATE_RX_QUEUE = 0x6,
GVE_ADMINQ_DESTROY_TX_QUEUE = 0x7,
GVE_ADMINQ_DESTROY_RX_QUEUE = 0x8,
GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES = 0x9,
GVE_ADMINQ_SET_DRIVER_PARAMETER = 0xB,
};
......@@ -89,6 +95,70 @@ struct gve_adminq_configure_device_resources {
static_assert(sizeof(struct gve_adminq_configure_device_resources) == 32);
struct gve_adminq_register_page_list {
__be32 page_list_id;
__be32 num_pages;
__be64 page_address_list_addr;
};
static_assert(sizeof(struct gve_adminq_register_page_list) == 16);
struct gve_adminq_unregister_page_list {
__be32 page_list_id;
};
static_assert(sizeof(struct gve_adminq_unregister_page_list) == 4);
struct gve_adminq_create_tx_queue {
__be32 queue_id;
__be32 reserved;
__be64 queue_resources_addr;
__be64 tx_ring_addr;
__be32 queue_page_list_id;
__be32 ntfy_id;
};
static_assert(sizeof(struct gve_adminq_create_tx_queue) == 32);
struct gve_adminq_create_rx_queue {
__be32 queue_id;
__be32 index;
__be32 reserved;
__be32 ntfy_id;
__be64 queue_resources_addr;
__be64 rx_desc_ring_addr;
__be64 rx_data_ring_addr;
__be32 queue_page_list_id;
u8 padding[4];
};
static_assert(sizeof(struct gve_adminq_create_rx_queue) == 48);
/* Queue resources that are shared with the device */
struct gve_queue_resources {
union {
struct {
__be32 db_index; /* Device -> Guest */
__be32 counter_index; /* Device -> Guest */
};
u8 reserved[64];
};
};
static_assert(sizeof(struct gve_queue_resources) == 64);
struct gve_adminq_destroy_tx_queue {
__be32 queue_id;
};
static_assert(sizeof(struct gve_adminq_destroy_tx_queue) == 4);
struct gve_adminq_destroy_rx_queue {
__be32 queue_id;
};
static_assert(sizeof(struct gve_adminq_destroy_rx_queue) == 4);
/* GVE Set Driver Parameter Types */
enum gve_set_driver_param_types {
GVE_SET_PARAM_MTU = 0x1,
......@@ -109,7 +179,13 @@ union gve_adminq_command {
union {
struct gve_adminq_configure_device_resources
configure_device_resources;
struct gve_adminq_create_tx_queue create_tx_queue;
struct gve_adminq_create_rx_queue create_rx_queue;
struct gve_adminq_destroy_tx_queue destroy_tx_queue;
struct gve_adminq_destroy_rx_queue destroy_rx_queue;
struct gve_adminq_describe_device describe_device;
struct gve_adminq_register_page_list reg_page_list;
struct gve_adminq_unregister_page_list unreg_page_list;
struct gve_adminq_set_driver_parameter set_driver_param;
};
};
......@@ -130,5 +206,12 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv,
dma_addr_t db_array_bus_addr,
u32 num_ntfy_blks);
int gve_adminq_deconfigure_device_resources(struct gve_priv *priv);
int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_id);
int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_id);
int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_id);
int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_id);
int gve_adminq_register_page_list(struct gve_priv *priv,
struct gve_queue_page_list *qpl);
int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id);
int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu);
#endif /* _GVE_ADMINQ_H */
/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
* Google virtual Ethernet (gve) driver
*
* Copyright (C) 2015-2019 Google, Inc.
*/
/* GVE Transmit Descriptor formats */
#ifndef _GVE_DESC_H_
#define _GVE_DESC_H_
#include <linux/build_bug.h>
/* A note on seg_addrs
*
* Base addresses encoded in seg_addr are not assumed to be physical
* addresses. The ring format assumes these come from some linear address
* space. This could be physical memory, kernel virtual memory, user virtual
* memory. gVNIC uses lists of registered pages. Each queue is assumed
* to be associated with a single such linear address space to ensure a
* consistent meaning for seg_addrs posted to its rings.
*/
struct gve_tx_pkt_desc {
u8 type_flags; /* desc type is lower 4 bits, flags upper */
u8 l4_csum_offset; /* relative offset of L4 csum word */
u8 l4_hdr_offset; /* Offset of start of L4 headers in packet */
u8 desc_cnt; /* Total descriptors for this packet */
__be16 len; /* Total length of this packet (in bytes) */
__be16 seg_len; /* Length of this descriptor's segment */
__be64 seg_addr; /* Base address (see note) of this segment */
} __packed;
struct gve_tx_seg_desc {
u8 type_flags; /* type is lower 4 bits, flags upper */
u8 l3_offset; /* TSO: 2 byte units to start of IPH */
__be16 reserved;
__be16 mss; /* TSO MSS */
__be16 seg_len;
__be64 seg_addr;
} __packed;
/* GVE Transmit Descriptor Types */
#define GVE_TXD_STD (0x0 << 4) /* Std with Host Address */
#define GVE_TXD_TSO (0x1 << 4) /* TSO with Host Address */
#define GVE_TXD_SEG (0x2 << 4) /* Seg with Host Address */
/* GVE Transmit Descriptor Flags for Std Pkts */
#define GVE_TXF_L4CSUM BIT(0) /* Need csum offload */
#define GVE_TXF_TSTAMP BIT(2) /* Timestamp required */
/* GVE Transmit Descriptor Flags for TSO Segs */
#define GVE_TXSF_IPV6 BIT(1) /* IPv6 TSO */
/* GVE Receive Packet Descriptor */
/* The start of an ethernet packet comes 2 bytes into the rx buffer.
* gVNIC adds this padding so that both the DMA and the L3/4 protocol header
* access is aligned.
*/
#define GVE_RX_PAD 2
struct gve_rx_desc {
u8 padding[48];
__be32 rss_hash; /* Receive-side scaling hash (Toeplitz for gVNIC) */
__be16 mss;
__be16 reserved; /* Reserved to zero */
u8 hdr_len; /* Header length (L2-L4) including padding */
u8 hdr_off; /* 64-byte-scaled offset into RX_DATA entry */
__sum16 csum; /* 1's-complement partial checksum of L3+ bytes */
__be16 len; /* Length of the received packet */
__be16 flags_seq; /* Flags [15:3] and sequence number [2:0] (1-7) */
} __packed;
static_assert(sizeof(struct gve_rx_desc) == 64);
/* As with the Tx ring format, the qpl_offset entries below are offsets into an
* ordered list of registered pages.
*/
struct gve_rx_data_slot {
/* byte offset into the rx registered segment of this slot */
__be64 qpl_offset;
};
/* GVE Recive Packet Descriptor Seq No */
#define GVE_SEQNO(x) (be16_to_cpu(x) & 0x7)
/* GVE Recive Packet Descriptor Flags */
#define GVE_RXFLG(x) cpu_to_be16(1 << (3 + (x)))
#define GVE_RXF_FRAG GVE_RXFLG(3) /* IP Fragment */
#define GVE_RXF_IPV4 GVE_RXFLG(4) /* IPv4 */
#define GVE_RXF_IPV6 GVE_RXFLG(5) /* IPv6 */
#define GVE_RXF_TCP GVE_RXFLG(6) /* TCP Packet */
#define GVE_RXF_UDP GVE_RXFLG(7) /* UDP Packet */
#define GVE_RXF_ERR GVE_RXFLG(8) /* Packet Error Detected */
/* GVE IRQ */
#define GVE_IRQ_ACK BIT(31)
#define GVE_IRQ_MASK BIT(30)
#define GVE_IRQ_EVENT BIT(29)
static inline bool gve_needs_rss(__be16 flag)
{
if (flag & GVE_RXF_FRAG)
return false;
if (flag & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
return true;
return false;
}
static inline u8 gve_next_seqno(u8 seq)
{
return (seq + 1) == 8 ? 1 : seq + 1;
}
#endif /* _GVE_DESC_H_ */
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment