Commit 75eaae15 authored by Praveen Kaligineedi's avatar Praveen Kaligineedi Committed by David S. Miller

gve: Add XDP DROP and TX support for GQI-QPL format

Add support for XDP PASS, DROP and TX actions.

This patch contains the following changes:
1) Support installing/uninstalling XDP program
2) Add dedicated XDP TX queues
3) Add support for XDP DROP action
4) Add support for XDP TX action
Signed-off-by: default avatarPraveen Kaligineedi <pkaligineedi@google.com>
Reviewed-by: default avatarJeroen de Borst <jeroendb@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 7fc2bf78
......@@ -47,6 +47,10 @@
#define GVE_RX_BUFFER_SIZE_DQO 2048
#define GVE_XDP_ACTIONS 5
#define GVE_TX_MAX_HEADER_SIZE 182
/* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
struct gve_rx_desc_queue {
struct gve_rx_desc *desc_ring; /* the descriptor ring */
......@@ -230,7 +234,9 @@ struct gve_rx_ring {
u64 rx_frag_flip_cnt; /* free-running count of rx segments where page_flip was used */
u64 rx_frag_copy_cnt; /* free-running count of rx segments copied */
u64 rx_frag_alloc_cnt; /* free-running count of rx page allocations */
u64 xdp_tx_errors;
u64 xdp_redirect_errors;
u64 xdp_actions[GVE_XDP_ACTIONS];
u32 q_num; /* queue index */
u32 ntfy_id; /* notification block index */
struct gve_queue_resources *q_resources; /* head and tail pointer idx */
......@@ -238,6 +244,9 @@ struct gve_rx_ring {
struct u64_stats_sync statss; /* sync stats for 32bit archs */
struct gve_rx_ctx ctx; /* Info for packet currently being processed in this ring. */
/* XDP stuff */
struct xdp_rxq_info xdp_rxq;
};
/* A TX desc ring entry */
......@@ -259,6 +268,9 @@ struct gve_tx_iovec {
*/
struct gve_tx_buffer_state {
struct sk_buff *skb; /* skb for this pkt */
struct {
u16 size; /* size of xmitted xdp pkt */
} xdp;
union {
struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */
struct {
......@@ -526,9 +538,11 @@ struct gve_priv {
u16 rx_data_slot_cnt; /* rx buffer length */
u64 max_registered_pages;
u64 num_registered_pages; /* num pages registered with NIC */
struct bpf_prog *xdp_prog; /* XDP BPF program */
u32 rx_copybreak; /* copy packets smaller than this */
u16 default_num_queues; /* default num queues to set up */
u16 num_xdp_queues;
struct gve_queue_config tx_cfg;
struct gve_queue_config rx_cfg;
struct gve_qpl_config qpl_cfg; /* map used QPL ids */
......@@ -785,7 +799,17 @@ static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
if (priv->queue_format != GVE_GQI_QPL_FORMAT)
return 0;
return priv->tx_cfg.num_queues;
return priv->tx_cfg.num_queues + priv->num_xdp_queues;
}
/* Returns the number of XDP tx queue page lists
*/
static inline u32 gve_num_xdp_qpls(struct gve_priv *priv)
{
if (priv->queue_format != GVE_GQI_QPL_FORMAT)
return 0;
return priv->num_xdp_queues;
}
/* Returns the number of rx queue page lists
......@@ -874,7 +898,17 @@ static inline bool gve_is_gqi(struct gve_priv *priv)
static inline u32 gve_num_tx_queues(struct gve_priv *priv)
{
return priv->tx_cfg.num_queues;
return priv->tx_cfg.num_queues + priv->num_xdp_queues;
}
static inline u32 gve_xdp_tx_queue_id(struct gve_priv *priv, u32 queue_id)
{
return priv->tx_cfg.num_queues + queue_id;
}
static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv)
{
return gve_xdp_tx_queue_id(priv, 0);
}
/* buffers */
......@@ -885,7 +919,11 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
enum dma_data_direction);
/* tx handling */
netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev);
int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx,
void *data, int len);
void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid);
bool gve_tx_poll(struct gve_notify_block *block, int budget);
bool gve_xdp_poll(struct gve_notify_block *block, int budget);
int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings);
void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings);
u32 gve_tx_load_event_counter(struct gve_priv *priv,
......
......@@ -34,6 +34,11 @@ static u32 gve_get_msglevel(struct net_device *netdev)
return priv->msg_enable;
}
/* For the following stats column string names, make sure the order
* matches how it is filled in the code. For xdp_aborted, xdp_drop,
* xdp_pass, xdp_tx, xdp_redirect, make sure it also matches the order
* as declared in enum xdp_action inside file uapi/linux/bpf.h .
*/
static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = {
"rx_packets", "tx_packets", "rx_bytes", "tx_bytes",
"rx_dropped", "tx_dropped", "tx_timeouts",
......@@ -49,6 +54,9 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
"rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]",
"rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]",
"rx_drops_packet_over_mru[%u]", "rx_drops_invalid_checksum[%u]",
"rx_xdp_aborted[%u]", "rx_xdp_drop[%u]", "rx_xdp_pass[%u]",
"rx_xdp_tx[%u]", "rx_xdp_redirect[%u]",
"rx_xdp_tx_errors[%u]", "rx_xdp_redirect_errors[%u]",
};
static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = {
......@@ -289,14 +297,25 @@ gve_get_ethtool_stats(struct net_device *netdev,
if (skip_nic_stats) {
/* skip NIC rx stats */
i += NIC_RX_STATS_REPORT_NUM;
continue;
}
for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) {
u64 value =
be64_to_cpu(report_stats[rx_qid_to_stats_idx[ring] + j].value);
} else {
stats_idx = rx_qid_to_stats_idx[ring];
for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) {
u64 value =
be64_to_cpu(report_stats[stats_idx + j].value);
data[i++] = value;
data[i++] = value;
}
}
/* XDP rx counters */
do {
start = u64_stats_fetch_begin(&priv->rx[ring].statss);
for (j = 0; j < GVE_XDP_ACTIONS; j++)
data[i + j] = rx->xdp_actions[j];
data[i + j++] = rx->xdp_tx_errors;
data[i + j++] = rx->xdp_redirect_errors;
} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
start));
i += GVE_XDP_ACTIONS + 2; /* XDP rx counters */
}
} else {
i += priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS;
......@@ -418,6 +437,12 @@ static int gve_set_channels(struct net_device *netdev,
if (!new_rx || !new_tx)
return -EINVAL;
if (priv->num_xdp_queues &&
(new_tx != new_rx || (2 * new_tx > priv->tx_cfg.max_queues))) {
dev_err(&priv->pdev->dev, "XDP load failed: The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues");
return -EINVAL;
}
if (!netif_carrier_ok(netdev)) {
priv->tx_cfg.num_queues = new_tx;
priv->rx_cfg.num_queues = new_rx;
......
This diff is collapsed.
......@@ -8,6 +8,8 @@
#include "gve_adminq.h"
#include "gve_utils.h"
#include <linux/etherdevice.h>
#include <linux/filter.h>
#include <net/xdp.h>
static void gve_rx_free_buffer(struct device *dev,
struct gve_rx_slot_page_info *page_info,
......@@ -591,6 +593,43 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx,
return skb;
}
static void gve_xdp_done(struct gve_priv *priv, struct gve_rx_ring *rx,
struct xdp_buff *xdp, struct bpf_prog *xprog,
int xdp_act)
{
struct gve_tx_ring *tx;
int tx_qid;
int err;
switch (xdp_act) {
case XDP_ABORTED:
case XDP_DROP:
default:
break;
case XDP_TX:
tx_qid = gve_xdp_tx_queue_id(priv, rx->q_num);
tx = &priv->tx[tx_qid];
err = gve_xdp_xmit_one(priv, tx, xdp->data,
xdp->data_end - xdp->data);
if (unlikely(err)) {
u64_stats_update_begin(&rx->statss);
rx->xdp_tx_errors++;
u64_stats_update_end(&rx->statss);
}
break;
case XDP_REDIRECT:
u64_stats_update_begin(&rx->statss);
rx->xdp_redirect_errors++;
u64_stats_update_end(&rx->statss);
break;
}
u64_stats_update_begin(&rx->statss);
if ((u32)xdp_act < GVE_XDP_ACTIONS)
rx->xdp_actions[xdp_act]++;
u64_stats_update_end(&rx->statss);
}
#define GVE_PKTCONT_BIT_IS_SET(x) (GVE_RXF_PKT_CONT & (x))
static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat,
struct gve_rx_desc *desc, u32 idx,
......@@ -603,9 +642,12 @@ static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat,
union gve_rx_data_slot *data_slot;
struct gve_priv *priv = rx->gve;
struct sk_buff *skb = NULL;
struct bpf_prog *xprog;
struct xdp_buff xdp;
dma_addr_t page_bus;
void *va;
u16 len = frag_size;
struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
bool is_first_frag = ctx->frag_cnt == 0;
......@@ -645,9 +687,35 @@ static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat,
dma_sync_single_for_cpu(&priv->pdev->dev, page_bus,
PAGE_SIZE, DMA_FROM_DEVICE);
page_info->pad = is_first_frag ? GVE_RX_PAD : 0;
len -= page_info->pad;
frag_size -= page_info->pad;
skb = gve_rx_skb(priv, rx, page_info, napi, frag_size,
xprog = READ_ONCE(priv->xdp_prog);
if (xprog && is_only_frag) {
void *old_data;
int xdp_act;
xdp_init_buff(&xdp, rx->packet_buffer_size, &rx->xdp_rxq);
xdp_prepare_buff(&xdp, page_info->page_address +
page_info->page_offset, GVE_RX_PAD,
len, false);
old_data = xdp.data;
xdp_act = bpf_prog_run_xdp(xprog, &xdp);
if (xdp_act != XDP_PASS) {
gve_xdp_done(priv, rx, &xdp, xprog, xdp_act);
ctx->total_size += frag_size;
goto finish_ok_pkt;
}
page_info->pad += xdp.data - old_data;
len = xdp.data_end - xdp.data;
u64_stats_update_begin(&rx->statss);
rx->xdp_actions[XDP_PASS]++;
u64_stats_update_end(&rx->statss);
}
skb = gve_rx_skb(priv, rx, page_info, napi, len,
data_slot, is_only_frag);
if (!skb) {
u64_stats_update_begin(&rx->statss);
......@@ -773,6 +841,7 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
netdev_features_t feat)
{
u64 xdp_txs = rx->xdp_actions[XDP_TX];
struct gve_rx_ctx *ctx = &rx->ctx;
struct gve_priv *priv = rx->gve;
struct gve_rx_cnts cnts = {0};
......@@ -820,6 +889,9 @@ static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
u64_stats_update_end(&rx->statss);
}
if (xdp_txs != rx->xdp_actions[XDP_TX])
gve_xdp_tx_flush(priv, rx->q_num);
/* restock ring slots */
if (!rx->data.raw_addressing) {
/* In QPL mode buffs are refilled as the desc are processed */
......
......@@ -19,6 +19,14 @@ static inline void gve_tx_put_doorbell(struct gve_priv *priv,
iowrite32be(val, &priv->db_bar2[be32_to_cpu(q_resources->db_index)]);
}
void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid)
{
u32 tx_qid = gve_xdp_tx_queue_id(priv, xdp_qid);
struct gve_tx_ring *tx = &priv->tx[tx_qid];
gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
}
/* gvnic can only transmit from a Registered Segment.
* We copy skb payloads into the registered segment before writing Tx
* descriptors and ringing the Tx doorbell.
......@@ -132,6 +140,50 @@ static void gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes)
atomic_add(bytes, &fifo->available);
}
static size_t gve_tx_clear_buffer_state(struct gve_tx_buffer_state *info)
{
size_t space_freed = 0;
int i;
for (i = 0; i < ARRAY_SIZE(info->iov); i++) {
space_freed += info->iov[i].iov_len + info->iov[i].iov_padding;
info->iov[i].iov_len = 0;
info->iov[i].iov_padding = 0;
}
return space_freed;
}
static int gve_clean_xdp_done(struct gve_priv *priv, struct gve_tx_ring *tx,
u32 to_do)
{
struct gve_tx_buffer_state *info;
u32 clean_end = tx->done + to_do;
u64 pkts = 0, bytes = 0;
size_t space_freed = 0;
u32 idx;
for (; tx->done < clean_end; tx->done++) {
idx = tx->done & tx->mask;
info = &tx->info[idx];
if (unlikely(!info->xdp.size))
continue;
bytes += info->xdp.size;
pkts++;
info->xdp.size = 0;
space_freed += gve_tx_clear_buffer_state(info);
}
gve_tx_free_fifo(&tx->tx_fifo, space_freed);
u64_stats_update_begin(&tx->statss);
tx->bytes_done += bytes;
tx->pkt_done += pkts;
u64_stats_update_end(&tx->statss);
return pkts;
}
static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
u32 to_do, bool try_to_wake);
......@@ -144,8 +196,12 @@ static void gve_tx_free_ring(struct gve_priv *priv, int idx)
gve_tx_remove_from_block(priv, idx);
slots = tx->mask + 1;
gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false);
netdev_tx_reset_queue(tx->netdev_txq);
if (tx->q_num < priv->tx_cfg.num_queues) {
gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false);
netdev_tx_reset_queue(tx->netdev_txq);
} else {
gve_clean_xdp_done(priv, tx, priv->tx_desc_cnt);
}
dma_free_coherent(hdev, sizeof(*tx->q_resources),
tx->q_resources, tx->q_resources_bus);
......@@ -213,7 +269,8 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx,
(unsigned long)tx->bus);
tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
if (idx < priv->tx_cfg.num_queues)
tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
gve_tx_add_to_block(priv, idx);
return 0;
......@@ -657,6 +714,65 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
static int gve_tx_fill_xdp(struct gve_priv *priv, struct gve_tx_ring *tx,
void *data, int len)
{
int pad, nfrags, ndescs, iovi, offset;
struct gve_tx_buffer_state *info;
u32 reqi = tx->req;
pad = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, len);
if (pad >= GVE_TX_MAX_HEADER_SIZE)
pad = 0;
info = &tx->info[reqi & tx->mask];
info->xdp.size = len;
nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, pad + len,
&info->iov[0]);
iovi = pad > 0;
ndescs = nfrags - iovi;
offset = 0;
while (iovi < nfrags) {
if (!offset)
gve_tx_fill_pkt_desc(&tx->desc[reqi & tx->mask], 0,
CHECKSUM_NONE, false, 0, ndescs,
info->iov[iovi].iov_len,
info->iov[iovi].iov_offset, len);
else
gve_tx_fill_seg_desc(&tx->desc[reqi & tx->mask],
0, 0, false, false,
info->iov[iovi].iov_len,
info->iov[iovi].iov_offset);
memcpy(tx->tx_fifo.base + info->iov[iovi].iov_offset,
data + offset, info->iov[iovi].iov_len);
gve_dma_sync_for_device(&priv->pdev->dev,
tx->tx_fifo.qpl->page_buses,
info->iov[iovi].iov_offset,
info->iov[iovi].iov_len);
offset += info->iov[iovi].iov_len;
iovi++;
reqi++;
}
return ndescs;
}
int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx,
void *data, int len)
{
int nsegs;
if (!gve_can_tx(tx, len + GVE_TX_MAX_HEADER_SIZE - 1))
return -EBUSY;
nsegs = gve_tx_fill_xdp(priv, tx, data, len);
tx->req += nsegs;
return 0;
}
#define GVE_TX_START_THRESH PAGE_SIZE
static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
......@@ -666,8 +782,8 @@ static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
u64 pkts = 0, bytes = 0;
size_t space_freed = 0;
struct sk_buff *skb;
int i, j;
u32 idx;
int j;
for (j = 0; j < to_do; j++) {
idx = tx->done & tx->mask;
......@@ -689,12 +805,7 @@ static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
dev_consume_skb_any(skb);
if (tx->raw_addressing)
continue;
/* FIFO free */
for (i = 0; i < ARRAY_SIZE(info->iov); i++) {
space_freed += info->iov[i].iov_len + info->iov[i].iov_padding;
info->iov[i].iov_len = 0;
info->iov[i].iov_padding = 0;
}
space_freed += gve_tx_clear_buffer_state(info);
}
}
......@@ -729,6 +840,24 @@ u32 gve_tx_load_event_counter(struct gve_priv *priv,
return be32_to_cpu(counter);
}
bool gve_xdp_poll(struct gve_notify_block *block, int budget)
{
struct gve_priv *priv = block->priv;
struct gve_tx_ring *tx = block->tx;
u32 nic_done;
u32 to_do;
/* If budget is 0, do all the work */
if (budget == 0)
budget = INT_MAX;
/* Find out how much work there is to be done */
nic_done = gve_tx_load_event_counter(priv, tx);
to_do = min_t(u32, (nic_done - tx->done), budget);
gve_clean_xdp_done(priv, tx, to_do);
return nic_done != tx->done;
}
bool gve_tx_poll(struct gve_notify_block *block, int budget)
{
struct gve_priv *priv = block->priv;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment