Commit 57939fdc authored by David S. Miller's avatar David S. Miller

Merge branch 'nfp3800'

Simon Horman says:

====================
nfp: support for NFP-3800

Yinjun Zhan says:

This is the second of a two part series to support the NFP-3800 device.

To utilize the new hardware features of the NFP-3800, driver adds support
of a new data path NFDK. This series mainly does some refactor work to the
data path related implementations. The data path specific implementations
are now separated into nfd3 and nfdk directories respectively, and the
common part is also moved into a new file.

* The series starts with a small refinement in Patch 1/10. Patches 2/10 and
  3/10 are the main refactoring of data path implementation, which prepares
  for the adding the NFDK data path.
* Before the introduction of NFDK, there's some more preparation work
  for NFP-3800 features, such as multi-descriptor per-packet and write-back
  mechanism of TX pointer, which is done in patches 4/10, 5/10, 6/10, 7/10.
* Patch 8/10 allows the driver to select data path according
  to firmware version. Finally, patches 9/10 and 10/10 introduce the new
  NFDK data path.

Changes between v1 and v2
* Correct kdoc for nfp_nfdk_tx()
* Correct build warnings on 32-bit

Thanks to everyone who contributed to this work.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4b45e079 d9d95049
...@@ -20,12 +20,18 @@ nfp-objs := \ ...@@ -20,12 +20,18 @@ nfp-objs := \
ccm_mbox.o \ ccm_mbox.o \
devlink_param.o \ devlink_param.o \
nfp_asm.o \ nfp_asm.o \
nfd3/dp.o \
nfd3/rings.o \
nfd3/xsk.o \
nfdk/dp.o \
nfdk/rings.o \
nfp_app.o \ nfp_app.o \
nfp_app_nic.o \ nfp_app_nic.o \
nfp_devlink.o \ nfp_devlink.o \
nfp_hwmon.o \ nfp_hwmon.o \
nfp_main.o \ nfp_main.o \
nfp_net_common.o \ nfp_net_common.o \
nfp_net_dp.o \
nfp_net_ctrl.o \ nfp_net_ctrl.o \
nfp_net_debugdump.o \ nfp_net_debugdump.o \
nfp_net_ethtool.o \ nfp_net_ethtool.o \
......
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2015-2019 Netronome Systems, Inc. */
#include <linux/bpf_trace.h>
#include <linux/netdevice.h>
#include "../nfp_app.h"
#include "../nfp_net.h"
#include "../nfp_net_dp.h"
#include "../nfp_net_xsk.h"
#include "../crypto/crypto.h"
#include "../crypto/fw.h"
#include "nfd3.h"
/* Transmit processing
*
* One queue controller peripheral queue is used for transmit. The
* driver en-queues packets for transmit by advancing the write
* pointer. The device indicates that packets have transmitted by
* advancing the read pointer. The driver maintains a local copy of
* the read and write pointer in @struct nfp_net_tx_ring. The driver
* keeps @wr_p in sync with the queue controller write pointer and can
* determine how many packets have been transmitted by comparing its
* copy of the read pointer @rd_p with the read pointer maintained by
* the queue controller peripheral.
*/
/* Wrappers for deciding when to stop and restart TX queues */
static int nfp_nfd3_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring)
{
return !nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS * 4);
}
static int nfp_nfd3_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring)
{
return nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS + 1);
}
/**
* nfp_nfd3_tx_ring_stop() - stop tx ring
* @nd_q: netdev queue
* @tx_ring: driver tx queue structure
*
* Safely stop TX ring. Remember that while we are running .start_xmit()
* someone else may be cleaning the TX ring completions so we need to be
* extra careful here.
*/
static void
nfp_nfd3_tx_ring_stop(struct netdev_queue *nd_q,
struct nfp_net_tx_ring *tx_ring)
{
netif_tx_stop_queue(nd_q);
/* We can race with the TX completion out of NAPI so recheck */
smp_mb();
if (unlikely(nfp_nfd3_tx_ring_should_wake(tx_ring)))
netif_tx_start_queue(nd_q);
}
/**
* nfp_nfd3_tx_tso() - Set up Tx descriptor for LSO
* @r_vec: per-ring structure
* @txbuf: Pointer to driver soft TX descriptor
* @txd: Pointer to HW TX descriptor
* @skb: Pointer to SKB
* @md_bytes: Prepend length
*
* Set up Tx descriptor for LSO, do nothing for non-LSO skbs.
* Return error on packet header greater than maximum supported LSO header size.
*/
static void
nfp_nfd3_tx_tso(struct nfp_net_r_vector *r_vec, struct nfp_nfd3_tx_buf *txbuf,
struct nfp_nfd3_tx_desc *txd, struct sk_buff *skb, u32 md_bytes)
{
u32 l3_offset, l4_offset, hdrlen;
u16 mss;
if (!skb_is_gso(skb))
return;
if (!skb->encapsulation) {
l3_offset = skb_network_offset(skb);
l4_offset = skb_transport_offset(skb);
hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
} else {
l3_offset = skb_inner_network_offset(skb);
l4_offset = skb_inner_transport_offset(skb);
hdrlen = skb_inner_transport_header(skb) - skb->data +
inner_tcp_hdrlen(skb);
}
txbuf->pkt_cnt = skb_shinfo(skb)->gso_segs;
txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1);
mss = skb_shinfo(skb)->gso_size & NFD3_DESC_TX_MSS_MASK;
txd->l3_offset = l3_offset - md_bytes;
txd->l4_offset = l4_offset - md_bytes;
txd->lso_hdrlen = hdrlen - md_bytes;
txd->mss = cpu_to_le16(mss);
txd->flags |= NFD3_DESC_TX_LSO;
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_lso++;
u64_stats_update_end(&r_vec->tx_sync);
}
/**
* nfp_nfd3_tx_csum() - Set TX CSUM offload flags in TX descriptor
* @dp: NFP Net data path struct
* @r_vec: per-ring structure
* @txbuf: Pointer to driver soft TX descriptor
* @txd: Pointer to TX descriptor
* @skb: Pointer to SKB
*
* This function sets the TX checksum flags in the TX descriptor based
* on the configuration and the protocol of the packet to be transmitted.
*/
static void
nfp_nfd3_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
struct nfp_nfd3_tx_buf *txbuf, struct nfp_nfd3_tx_desc *txd,
struct sk_buff *skb)
{
struct ipv6hdr *ipv6h;
struct iphdr *iph;
u8 l4_hdr;
if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM))
return;
if (skb->ip_summed != CHECKSUM_PARTIAL)
return;
txd->flags |= NFD3_DESC_TX_CSUM;
if (skb->encapsulation)
txd->flags |= NFD3_DESC_TX_ENCAP;
iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
if (iph->version == 4) {
txd->flags |= NFD3_DESC_TX_IP4_CSUM;
l4_hdr = iph->protocol;
} else if (ipv6h->version == 6) {
l4_hdr = ipv6h->nexthdr;
} else {
nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version);
return;
}
switch (l4_hdr) {
case IPPROTO_TCP:
txd->flags |= NFD3_DESC_TX_TCP_CSUM;
break;
case IPPROTO_UDP:
txd->flags |= NFD3_DESC_TX_UDP_CSUM;
break;
default:
nn_dp_warn(dp, "partial checksum but l4 proto=%x!\n", l4_hdr);
return;
}
u64_stats_update_begin(&r_vec->tx_sync);
if (skb->encapsulation)
r_vec->hw_csum_tx_inner += txbuf->pkt_cnt;
else
r_vec->hw_csum_tx += txbuf->pkt_cnt;
u64_stats_update_end(&r_vec->tx_sync);
}
static int nfp_nfd3_prep_tx_meta(struct sk_buff *skb, u64 tls_handle)
{
struct metadata_dst *md_dst = skb_metadata_dst(skb);
unsigned char *data;
u32 meta_id = 0;
int md_bytes;
if (likely(!md_dst && !tls_handle))
return 0;
if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) {
if (!tls_handle)
return 0;
md_dst = NULL;
}
md_bytes = 4 + !!md_dst * 4 + !!tls_handle * 8;
if (unlikely(skb_cow_head(skb, md_bytes)))
return -ENOMEM;
meta_id = 0;
data = skb_push(skb, md_bytes) + md_bytes;
if (md_dst) {
data -= 4;
put_unaligned_be32(md_dst->u.port_info.port_id, data);
meta_id = NFP_NET_META_PORTID;
}
if (tls_handle) {
/* conn handle is opaque, we just use u64 to be able to quickly
* compare it to zero
*/
data -= 8;
memcpy(data, &tls_handle, sizeof(tls_handle));
meta_id <<= NFP_NET_META_FIELD_SIZE;
meta_id |= NFP_NET_META_CONN_HANDLE;
}
data -= 4;
put_unaligned_be32(meta_id, data);
return md_bytes;
}
/**
* nfp_nfd3_tx() - Main transmit entry point
* @skb: SKB to transmit
* @netdev: netdev structure
*
* Return: NETDEV_TX_OK on success.
*/
netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev)
{
struct nfp_net *nn = netdev_priv(netdev);
int f, nr_frags, wr_idx, md_bytes;
struct nfp_net_tx_ring *tx_ring;
struct nfp_net_r_vector *r_vec;
struct nfp_nfd3_tx_buf *txbuf;
struct nfp_nfd3_tx_desc *txd;
struct netdev_queue *nd_q;
const skb_frag_t *frag;
struct nfp_net_dp *dp;
dma_addr_t dma_addr;
unsigned int fsize;
u64 tls_handle = 0;
u16 qidx;
dp = &nn->dp;
qidx = skb_get_queue_mapping(skb);
tx_ring = &dp->tx_rings[qidx];
r_vec = tx_ring->r_vec;
nr_frags = skb_shinfo(skb)->nr_frags;
if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) {
nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n",
qidx, tx_ring->wr_p, tx_ring->rd_p);
nd_q = netdev_get_tx_queue(dp->netdev, qidx);
netif_tx_stop_queue(nd_q);
nfp_net_tx_xmit_more_flush(tx_ring);
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_busy++;
u64_stats_update_end(&r_vec->tx_sync);
return NETDEV_TX_BUSY;
}
skb = nfp_net_tls_tx(dp, r_vec, skb, &tls_handle, &nr_frags);
if (unlikely(!skb)) {
nfp_net_tx_xmit_more_flush(tx_ring);
return NETDEV_TX_OK;
}
md_bytes = nfp_nfd3_prep_tx_meta(skb, tls_handle);
if (unlikely(md_bytes < 0))
goto err_flush;
/* Start with the head skbuf */
dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb),
DMA_TO_DEVICE);
if (dma_mapping_error(dp->dev, dma_addr))
goto err_dma_err;
wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
/* Stash the soft descriptor of the head then initialize it */
txbuf = &tx_ring->txbufs[wr_idx];
txbuf->skb = skb;
txbuf->dma_addr = dma_addr;
txbuf->fidx = -1;
txbuf->pkt_cnt = 1;
txbuf->real_len = skb->len;
/* Build TX descriptor */
txd = &tx_ring->txds[wr_idx];
txd->offset_eop = (nr_frags ? 0 : NFD3_DESC_TX_EOP) | md_bytes;
txd->dma_len = cpu_to_le16(skb_headlen(skb));
nfp_desc_set_dma_addr(txd, dma_addr);
txd->data_len = cpu_to_le16(skb->len);
txd->flags = 0;
txd->mss = 0;
txd->lso_hdrlen = 0;
/* Do not reorder - tso may adjust pkt cnt, vlan may override fields */
nfp_nfd3_tx_tso(r_vec, txbuf, txd, skb, md_bytes);
nfp_nfd3_tx_csum(dp, r_vec, txbuf, txd, skb);
if (skb_vlan_tag_present(skb) && dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN) {
txd->flags |= NFD3_DESC_TX_VLAN;
txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb));
}
/* Gather DMA */
if (nr_frags > 0) {
__le64 second_half;
/* all descs must match except for in addr, length and eop */
second_half = txd->vals8[1];
for (f = 0; f < nr_frags; f++) {
frag = &skb_shinfo(skb)->frags[f];
fsize = skb_frag_size(frag);
dma_addr = skb_frag_dma_map(dp->dev, frag, 0,
fsize, DMA_TO_DEVICE);
if (dma_mapping_error(dp->dev, dma_addr))
goto err_unmap;
wr_idx = D_IDX(tx_ring, wr_idx + 1);
tx_ring->txbufs[wr_idx].skb = skb;
tx_ring->txbufs[wr_idx].dma_addr = dma_addr;
tx_ring->txbufs[wr_idx].fidx = f;
txd = &tx_ring->txds[wr_idx];
txd->dma_len = cpu_to_le16(fsize);
nfp_desc_set_dma_addr(txd, dma_addr);
txd->offset_eop = md_bytes |
((f == nr_frags - 1) ? NFD3_DESC_TX_EOP : 0);
txd->vals8[1] = second_half;
}
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_gather++;
u64_stats_update_end(&r_vec->tx_sync);
}
skb_tx_timestamp(skb);
nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
tx_ring->wr_p += nr_frags + 1;
if (nfp_nfd3_tx_ring_should_stop(tx_ring))
nfp_nfd3_tx_ring_stop(nd_q, tx_ring);
tx_ring->wr_ptr_add += nr_frags + 1;
if (__netdev_tx_sent_queue(nd_q, txbuf->real_len, netdev_xmit_more()))
nfp_net_tx_xmit_more_flush(tx_ring);
return NETDEV_TX_OK;
err_unmap:
while (--f >= 0) {
frag = &skb_shinfo(skb)->frags[f];
dma_unmap_page(dp->dev, tx_ring->txbufs[wr_idx].dma_addr,
skb_frag_size(frag), DMA_TO_DEVICE);
tx_ring->txbufs[wr_idx].skb = NULL;
tx_ring->txbufs[wr_idx].dma_addr = 0;
tx_ring->txbufs[wr_idx].fidx = -2;
wr_idx = wr_idx - 1;
if (wr_idx < 0)
wr_idx += tx_ring->cnt;
}
dma_unmap_single(dp->dev, tx_ring->txbufs[wr_idx].dma_addr,
skb_headlen(skb), DMA_TO_DEVICE);
tx_ring->txbufs[wr_idx].skb = NULL;
tx_ring->txbufs[wr_idx].dma_addr = 0;
tx_ring->txbufs[wr_idx].fidx = -2;
err_dma_err:
nn_dp_warn(dp, "Failed to map DMA TX buffer\n");
err_flush:
nfp_net_tx_xmit_more_flush(tx_ring);
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_errors++;
u64_stats_update_end(&r_vec->tx_sync);
nfp_net_tls_tx_undo(skb, tls_handle);
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
/**
* nfp_nfd3_tx_complete() - Handled completed TX packets
* @tx_ring: TX ring structure
* @budget: NAPI budget (only used as bool to determine if in NAPI context)
*/
void nfp_nfd3_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
u32 done_pkts = 0, done_bytes = 0;
struct netdev_queue *nd_q;
u32 qcp_rd_p;
int todo;
if (tx_ring->wr_p == tx_ring->rd_p)
return;
/* Work out how many descriptors have been transmitted */
qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp);
if (qcp_rd_p == tx_ring->qcp_rd_p)
return;
todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
while (todo--) {
const skb_frag_t *frag;
struct nfp_nfd3_tx_buf *tx_buf;
struct sk_buff *skb;
int fidx, nr_frags;
int idx;
idx = D_IDX(tx_ring, tx_ring->rd_p++);
tx_buf = &tx_ring->txbufs[idx];
skb = tx_buf->skb;
if (!skb)
continue;
nr_frags = skb_shinfo(skb)->nr_frags;
fidx = tx_buf->fidx;
if (fidx == -1) {
/* unmap head */
dma_unmap_single(dp->dev, tx_buf->dma_addr,
skb_headlen(skb), DMA_TO_DEVICE);
done_pkts += tx_buf->pkt_cnt;
done_bytes += tx_buf->real_len;
} else {
/* unmap fragment */
frag = &skb_shinfo(skb)->frags[fidx];
dma_unmap_page(dp->dev, tx_buf->dma_addr,
skb_frag_size(frag), DMA_TO_DEVICE);
}
/* check for last gather fragment */
if (fidx == nr_frags - 1)
napi_consume_skb(skb, budget);
tx_buf->dma_addr = 0;
tx_buf->skb = NULL;
tx_buf->fidx = -2;
}
tx_ring->qcp_rd_p = qcp_rd_p;
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_bytes += done_bytes;
r_vec->tx_pkts += done_pkts;
u64_stats_update_end(&r_vec->tx_sync);
if (!dp->netdev)
return;
nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
netdev_tx_completed_queue(nd_q, done_pkts, done_bytes);
if (nfp_nfd3_tx_ring_should_wake(tx_ring)) {
/* Make sure TX thread will see updated tx_ring->rd_p */
smp_mb();
if (unlikely(netif_tx_queue_stopped(nd_q)))
netif_tx_wake_queue(nd_q);
}
WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
"TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
}
static bool nfp_nfd3_xdp_complete(struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
u32 done_pkts = 0, done_bytes = 0;
bool done_all;
int idx, todo;
u32 qcp_rd_p;
/* Work out how many descriptors have been transmitted */
qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp);
if (qcp_rd_p == tx_ring->qcp_rd_p)
return true;
todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
done_all = todo <= NFP_NET_XDP_MAX_COMPLETE;
todo = min(todo, NFP_NET_XDP_MAX_COMPLETE);
tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo);
done_pkts = todo;
while (todo--) {
idx = D_IDX(tx_ring, tx_ring->rd_p);
tx_ring->rd_p++;
done_bytes += tx_ring->txbufs[idx].real_len;
}
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_bytes += done_bytes;
r_vec->tx_pkts += done_pkts;
u64_stats_update_end(&r_vec->tx_sync);
WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
"XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
return done_all;
}
/* Receive processing
*/
static void *
nfp_nfd3_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
{
void *frag;
if (!dp->xdp_prog) {
frag = napi_alloc_frag(dp->fl_bufsz);
if (unlikely(!frag))
return NULL;
} else {
struct page *page;
page = dev_alloc_page();
if (unlikely(!page))
return NULL;
frag = page_address(page);
}
*dma_addr = nfp_net_dma_map_rx(dp, frag);
if (dma_mapping_error(dp->dev, *dma_addr)) {
nfp_net_free_frag(frag, dp->xdp_prog);
nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
return NULL;
}
return frag;
}
/**
* nfp_nfd3_rx_give_one() - Put mapped skb on the software and hardware rings
* @dp: NFP Net data path struct
* @rx_ring: RX ring structure
* @frag: page fragment buffer
* @dma_addr: DMA address of skb mapping
*/
static void
nfp_nfd3_rx_give_one(const struct nfp_net_dp *dp,
struct nfp_net_rx_ring *rx_ring,
void *frag, dma_addr_t dma_addr)
{
unsigned int wr_idx;
wr_idx = D_IDX(rx_ring, rx_ring->wr_p);
nfp_net_dma_sync_dev_rx(dp, dma_addr);
/* Stash SKB and DMA address away */
rx_ring->rxbufs[wr_idx].frag = frag;
rx_ring->rxbufs[wr_idx].dma_addr = dma_addr;
/* Fill freelist descriptor */
rx_ring->rxds[wr_idx].fld.reserved = 0;
rx_ring->rxds[wr_idx].fld.meta_len_dd = 0;
nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld,
dma_addr + dp->rx_dma_off);
rx_ring->wr_p++;
if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) {
/* Update write pointer of the freelist queue. Make
* sure all writes are flushed before telling the hardware.
*/
wmb();
nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH);
}
}
/**
* nfp_nfd3_rx_ring_fill_freelist() - Give buffers from the ring to FW
* @dp: NFP Net data path struct
* @rx_ring: RX ring to fill
*/
void nfp_nfd3_rx_ring_fill_freelist(struct nfp_net_dp *dp,
struct nfp_net_rx_ring *rx_ring)
{
unsigned int i;
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx))
return nfp_net_xsk_rx_ring_fill_freelist(rx_ring);
for (i = 0; i < rx_ring->cnt - 1; i++)
nfp_nfd3_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag,
rx_ring->rxbufs[i].dma_addr);
}
/**
* nfp_nfd3_rx_csum_has_errors() - group check if rxd has any csum errors
* @flags: RX descriptor flags field in CPU byte order
*/
static int nfp_nfd3_rx_csum_has_errors(u16 flags)
{
u16 csum_all_checked, csum_all_ok;
csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL;
csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK;
return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT);
}
/**
* nfp_nfd3_rx_csum() - set SKB checksum field based on RX descriptor flags
* @dp: NFP Net data path struct
* @r_vec: per-ring structure
* @rxd: Pointer to RX descriptor
* @meta: Parsed metadata prepend
* @skb: Pointer to SKB
*/
void
nfp_nfd3_rx_csum(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
const struct nfp_net_rx_desc *rxd,
const struct nfp_meta_parsed *meta, struct sk_buff *skb)
{
skb_checksum_none_assert(skb);
if (!(dp->netdev->features & NETIF_F_RXCSUM))
return;
if (meta->csum_type) {
skb->ip_summed = meta->csum_type;
skb->csum = meta->csum;
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->hw_csum_rx_complete++;
u64_stats_update_end(&r_vec->rx_sync);
return;
}
if (nfp_nfd3_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) {
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->hw_csum_rx_error++;
u64_stats_update_end(&r_vec->rx_sync);
return;
}
/* Assume that the firmware will never report inner CSUM_OK unless outer
* L4 headers were successfully parsed. FW will always report zero UDP
* checksum as CSUM_OK.
*/
if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK ||
rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) {
__skb_incr_checksum_unnecessary(skb);
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->hw_csum_rx_ok++;
u64_stats_update_end(&r_vec->rx_sync);
}
if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK ||
rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) {
__skb_incr_checksum_unnecessary(skb);
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->hw_csum_rx_inner_ok++;
u64_stats_update_end(&r_vec->rx_sync);
}
}
static void
nfp_nfd3_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta,
unsigned int type, __be32 *hash)
{
if (!(netdev->features & NETIF_F_RXHASH))
return;
switch (type) {
case NFP_NET_RSS_IPV4:
case NFP_NET_RSS_IPV6:
case NFP_NET_RSS_IPV6_EX:
meta->hash_type = PKT_HASH_TYPE_L3;
break;
default:
meta->hash_type = PKT_HASH_TYPE_L4;
break;
}
meta->hash = get_unaligned_be32(hash);
}
static void
nfp_nfd3_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta,
void *data, struct nfp_net_rx_desc *rxd)
{
struct nfp_net_rx_hash *rx_hash = data;
if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
return;
nfp_nfd3_set_hash(netdev, meta, get_unaligned_be32(&rx_hash->hash_type),
&rx_hash->hash);
}
bool
nfp_nfd3_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
void *data, void *pkt, unsigned int pkt_len, int meta_len)
{
u32 meta_info;
meta_info = get_unaligned_be32(data);
data += 4;
while (meta_info) {
switch (meta_info & NFP_NET_META_FIELD_MASK) {
case NFP_NET_META_HASH:
meta_info >>= NFP_NET_META_FIELD_SIZE;
nfp_nfd3_set_hash(netdev, meta,
meta_info & NFP_NET_META_FIELD_MASK,
(__be32 *)data);
data += 4;
break;
case NFP_NET_META_MARK:
meta->mark = get_unaligned_be32(data);
data += 4;
break;
case NFP_NET_META_PORTID:
meta->portid = get_unaligned_be32(data);
data += 4;
break;
case NFP_NET_META_CSUM:
meta->csum_type = CHECKSUM_COMPLETE;
meta->csum =
(__force __wsum)__get_unaligned_cpu32(data);
data += 4;
break;
case NFP_NET_META_RESYNC_INFO:
if (nfp_net_tls_rx_resync_req(netdev, data, pkt,
pkt_len))
return false;
data += sizeof(struct nfp_net_tls_resync_req);
break;
default:
return true;
}
meta_info >>= NFP_NET_META_FIELD_SIZE;
}
return data != pkt;
}
static void
nfp_nfd3_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf,
struct sk_buff *skb)
{
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_drops++;
/* If we have both skb and rxbuf the replacement buffer allocation
* must have failed, count this as an alloc failure.
*/
if (skb && rxbuf)
r_vec->rx_replace_buf_alloc_fail++;
u64_stats_update_end(&r_vec->rx_sync);
/* skb is build based on the frag, free_skb() would free the frag
* so to be able to reuse it we need an extra ref.
*/
if (skb && rxbuf && skb->head == rxbuf->frag)
page_ref_inc(virt_to_head_page(rxbuf->frag));
if (rxbuf)
nfp_nfd3_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr);
if (skb)
dev_kfree_skb_any(skb);
}
static bool
nfp_nfd3_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
struct nfp_net_tx_ring *tx_ring,
struct nfp_net_rx_buf *rxbuf, unsigned int dma_off,
unsigned int pkt_len, bool *completed)
{
unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA;
struct nfp_nfd3_tx_buf *txbuf;
struct nfp_nfd3_tx_desc *txd;
int wr_idx;
/* Reject if xdp_adjust_tail grow packet beyond DMA area */
if (pkt_len + dma_off > dma_map_sz)
return false;
if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
if (!*completed) {
nfp_nfd3_xdp_complete(tx_ring);
*completed = true;
}
if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
nfp_nfd3_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf,
NULL);
return false;
}
}
wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
/* Stash the soft descriptor of the head then initialize it */
txbuf = &tx_ring->txbufs[wr_idx];
nfp_nfd3_rx_give_one(dp, rx_ring, txbuf->frag, txbuf->dma_addr);
txbuf->frag = rxbuf->frag;
txbuf->dma_addr = rxbuf->dma_addr;
txbuf->fidx = -1;
txbuf->pkt_cnt = 1;
txbuf->real_len = pkt_len;
dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off,
pkt_len, DMA_BIDIRECTIONAL);
/* Build TX descriptor */
txd = &tx_ring->txds[wr_idx];
txd->offset_eop = NFD3_DESC_TX_EOP;
txd->dma_len = cpu_to_le16(pkt_len);
nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + dma_off);
txd->data_len = cpu_to_le16(pkt_len);
txd->flags = 0;
txd->mss = 0;
txd->lso_hdrlen = 0;
tx_ring->wr_p++;
tx_ring->wr_ptr_add++;
return true;
}
/**
* nfp_nfd3_rx() - receive up to @budget packets on @rx_ring
* @rx_ring: RX ring to receive from
* @budget: NAPI budget
*
* Note, this function is separated out from the napi poll function to
* more cleanly separate packet receive code from other bookkeeping
* functions performed in the napi poll function.
*
* Return: Number of packets received.
*/
static int nfp_nfd3_rx(struct nfp_net_rx_ring *rx_ring, int budget)
{
struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
struct nfp_net_tx_ring *tx_ring;
struct bpf_prog *xdp_prog;
bool xdp_tx_cmpl = false;
unsigned int true_bufsz;
struct sk_buff *skb;
int pkts_polled = 0;
struct xdp_buff xdp;
int idx;
xdp_prog = READ_ONCE(dp->xdp_prog);
true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM,
&rx_ring->xdp_rxq);
tx_ring = r_vec->xdp_ring;
while (pkts_polled < budget) {
unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off;
struct nfp_net_rx_buf *rxbuf;
struct nfp_net_rx_desc *rxd;
struct nfp_meta_parsed meta;
bool redir_egress = false;
struct net_device *netdev;
dma_addr_t new_dma_addr;
u32 meta_len_xdp = 0;
void *new_frag;
idx = D_IDX(rx_ring, rx_ring->rd_p);
rxd = &rx_ring->rxds[idx];
if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
break;
/* Memory barrier to ensure that we won't do other reads
* before the DD bit.
*/
dma_rmb();
memset(&meta, 0, sizeof(meta));
rx_ring->rd_p++;
pkts_polled++;
rxbuf = &rx_ring->rxbufs[idx];
/* < meta_len >
* <-- [rx_offset] -->
* ---------------------------------------------------------
* | [XX] | metadata | packet | XXXX |
* ---------------------------------------------------------
* <---------------- data_len --------------->
*
* The rx_offset is fixed for all packets, the meta_len can vary
* on a packet by packet basis. If rx_offset is set to zero
* (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the
* buffer and is immediately followed by the packet (no [XX]).
*/
meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
data_len = le16_to_cpu(rxd->rxd.data_len);
pkt_len = data_len - meta_len;
pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off;
if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
pkt_off += meta_len;
else
pkt_off += dp->rx_offset;
meta_off = pkt_off - meta_len;
/* Stats update */
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_pkts++;
r_vec->rx_bytes += pkt_len;
u64_stats_update_end(&r_vec->rx_sync);
if (unlikely(meta_len > NFP_NET_MAX_PREPEND ||
(dp->rx_offset && meta_len > dp->rx_offset))) {
nn_dp_warn(dp, "oversized RX packet metadata %u\n",
meta_len);
nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
continue;
}
nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off,
data_len);
if (!dp->chained_metadata_format) {
nfp_nfd3_set_hash_desc(dp->netdev, &meta,
rxbuf->frag + meta_off, rxd);
} else if (meta_len) {
if (unlikely(nfp_nfd3_parse_meta(dp->netdev, &meta,
rxbuf->frag + meta_off,
rxbuf->frag + pkt_off,
pkt_len, meta_len))) {
nn_dp_warn(dp, "invalid RX packet metadata\n");
nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf,
NULL);
continue;
}
}
if (xdp_prog && !meta.portid) {
void *orig_data = rxbuf->frag + pkt_off;
unsigned int dma_off;
int act;
xdp_prepare_buff(&xdp,
rxbuf->frag + NFP_NET_RX_BUF_HEADROOM,
pkt_off - NFP_NET_RX_BUF_HEADROOM,
pkt_len, true);
act = bpf_prog_run_xdp(xdp_prog, &xdp);
pkt_len = xdp.data_end - xdp.data;
pkt_off += xdp.data - orig_data;
switch (act) {
case XDP_PASS:
meta_len_xdp = xdp.data - xdp.data_meta;
break;
case XDP_TX:
dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM;
if (unlikely(!nfp_nfd3_tx_xdp_buf(dp, rx_ring,
tx_ring,
rxbuf,
dma_off,
pkt_len,
&xdp_tx_cmpl)))
trace_xdp_exception(dp->netdev,
xdp_prog, act);
continue;
default:
bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act);
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(dp->netdev, xdp_prog, act);
fallthrough;
case XDP_DROP:
nfp_nfd3_rx_give_one(dp, rx_ring, rxbuf->frag,
rxbuf->dma_addr);
continue;
}
}
if (likely(!meta.portid)) {
netdev = dp->netdev;
} else if (meta.portid == NFP_META_PORT_ID_CTRL) {
struct nfp_net *nn = netdev_priv(dp->netdev);
nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off,
pkt_len);
nfp_nfd3_rx_give_one(dp, rx_ring, rxbuf->frag,
rxbuf->dma_addr);
continue;
} else {
struct nfp_net *nn;
nn = netdev_priv(dp->netdev);
netdev = nfp_app_dev_get(nn->app, meta.portid,
&redir_egress);
if (unlikely(!netdev)) {
nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf,
NULL);
continue;
}
if (nfp_netdev_is_nfp_repr(netdev))
nfp_repr_inc_rx_stats(netdev, pkt_len);
}
skb = build_skb(rxbuf->frag, true_bufsz);
if (unlikely(!skb)) {
nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
continue;
}
new_frag = nfp_nfd3_napi_alloc_one(dp, &new_dma_addr);
if (unlikely(!new_frag)) {
nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
continue;
}
nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
nfp_nfd3_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
skb_reserve(skb, pkt_off);
skb_put(skb, pkt_len);
skb->mark = meta.mark;
skb_set_hash(skb, meta.hash, meta.hash_type);
skb_record_rx_queue(skb, rx_ring->idx);
skb->protocol = eth_type_trans(skb, netdev);
nfp_nfd3_rx_csum(dp, r_vec, rxd, &meta, skb);
#ifdef CONFIG_TLS_DEVICE
if (rxd->rxd.flags & PCIE_DESC_RX_DECRYPTED) {
skb->decrypted = true;
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->hw_tls_rx++;
u64_stats_update_end(&r_vec->rx_sync);
}
#endif
if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
le16_to_cpu(rxd->rxd.vlan));
if (meta_len_xdp)
skb_metadata_set(skb, meta_len_xdp);
if (likely(!redir_egress)) {
napi_gro_receive(&rx_ring->r_vec->napi, skb);
} else {
skb->dev = netdev;
skb_reset_network_header(skb);
__skb_push(skb, ETH_HLEN);
dev_queue_xmit(skb);
}
}
if (xdp_prog) {
if (tx_ring->wr_ptr_add)
nfp_net_tx_xmit_more_flush(tx_ring);
else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) &&
!xdp_tx_cmpl)
if (!nfp_nfd3_xdp_complete(tx_ring))
pkts_polled = budget;
}
return pkts_polled;
}
/**
* nfp_nfd3_poll() - napi poll function
* @napi: NAPI structure
* @budget: NAPI budget
*
* Return: number of packets polled.
*/
int nfp_nfd3_poll(struct napi_struct *napi, int budget)
{
struct nfp_net_r_vector *r_vec =
container_of(napi, struct nfp_net_r_vector, napi);
unsigned int pkts_polled = 0;
if (r_vec->tx_ring)
nfp_nfd3_tx_complete(r_vec->tx_ring, budget);
if (r_vec->rx_ring)
pkts_polled = nfp_nfd3_rx(r_vec->rx_ring, budget);
if (pkts_polled < budget)
if (napi_complete_done(napi, pkts_polled))
nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) {
struct dim_sample dim_sample = {};
unsigned int start;
u64 pkts, bytes;
do {
start = u64_stats_fetch_begin(&r_vec->rx_sync);
pkts = r_vec->rx_pkts;
bytes = r_vec->rx_bytes;
} while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
net_dim(&r_vec->rx_dim, dim_sample);
}
if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) {
struct dim_sample dim_sample = {};
unsigned int start;
u64 pkts, bytes;
do {
start = u64_stats_fetch_begin(&r_vec->tx_sync);
pkts = r_vec->tx_pkts;
bytes = r_vec->tx_bytes;
} while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
net_dim(&r_vec->tx_dim, dim_sample);
}
return pkts_polled;
}
/* Control device data path
*/
bool
nfp_nfd3_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
struct sk_buff *skb, bool old)
{
unsigned int real_len = skb->len, meta_len = 0;
struct nfp_net_tx_ring *tx_ring;
struct nfp_nfd3_tx_buf *txbuf;
struct nfp_nfd3_tx_desc *txd;
struct nfp_net_dp *dp;
dma_addr_t dma_addr;
int wr_idx;
dp = &r_vec->nfp_net->dp;
tx_ring = r_vec->tx_ring;
if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) {
nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n");
goto err_free;
}
if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_busy++;
u64_stats_update_end(&r_vec->tx_sync);
if (!old)
__skb_queue_tail(&r_vec->queue, skb);
else
__skb_queue_head(&r_vec->queue, skb);
return true;
}
if (nfp_app_ctrl_has_meta(nn->app)) {
if (unlikely(skb_headroom(skb) < 8)) {
nn_dp_warn(dp, "CTRL TX on skb without headroom\n");
goto err_free;
}
meta_len = 8;
put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4));
put_unaligned_be32(NFP_NET_META_PORTID, skb_push(skb, 4));
}
/* Start with the head skbuf */
dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb),
DMA_TO_DEVICE);
if (dma_mapping_error(dp->dev, dma_addr))
goto err_dma_warn;
wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
/* Stash the soft descriptor of the head then initialize it */
txbuf = &tx_ring->txbufs[wr_idx];
txbuf->skb = skb;
txbuf->dma_addr = dma_addr;
txbuf->fidx = -1;
txbuf->pkt_cnt = 1;
txbuf->real_len = real_len;
/* Build TX descriptor */
txd = &tx_ring->txds[wr_idx];
txd->offset_eop = meta_len | NFD3_DESC_TX_EOP;
txd->dma_len = cpu_to_le16(skb_headlen(skb));
nfp_desc_set_dma_addr(txd, dma_addr);
txd->data_len = cpu_to_le16(skb->len);
txd->flags = 0;
txd->mss = 0;
txd->lso_hdrlen = 0;
tx_ring->wr_p++;
tx_ring->wr_ptr_add++;
nfp_net_tx_xmit_more_flush(tx_ring);
return false;
err_dma_warn:
nn_dp_warn(dp, "Failed to DMA map TX CTRL buffer\n");
err_free:
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_errors++;
u64_stats_update_end(&r_vec->tx_sync);
dev_kfree_skb_any(skb);
return false;
}
static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec)
{
struct sk_buff *skb;
while ((skb = __skb_dequeue(&r_vec->queue)))
if (nfp_nfd3_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true))
return;
}
static bool
nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len)
{
u32 meta_type, meta_tag;
if (!nfp_app_ctrl_has_meta(nn->app))
return !meta_len;
if (meta_len != 8)
return false;
meta_type = get_unaligned_be32(data);
meta_tag = get_unaligned_be32(data + 4);
return (meta_type == NFP_NET_META_PORTID &&
meta_tag == NFP_META_PORT_ID_CTRL);
}
static bool
nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp,
struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring)
{
unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off;
struct nfp_net_rx_buf *rxbuf;
struct nfp_net_rx_desc *rxd;
dma_addr_t new_dma_addr;
struct sk_buff *skb;
void *new_frag;
int idx;
idx = D_IDX(rx_ring, rx_ring->rd_p);
rxd = &rx_ring->rxds[idx];
if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
return false;
/* Memory barrier to ensure that we won't do other reads
* before the DD bit.
*/
dma_rmb();
rx_ring->rd_p++;
rxbuf = &rx_ring->rxbufs[idx];
meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
data_len = le16_to_cpu(rxd->rxd.data_len);
pkt_len = data_len - meta_len;
pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off;
if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
pkt_off += meta_len;
else
pkt_off += dp->rx_offset;
meta_off = pkt_off - meta_len;
/* Stats update */
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_pkts++;
r_vec->rx_bytes += pkt_len;
u64_stats_update_end(&r_vec->rx_sync);
nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len);
if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) {
nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n",
meta_len);
nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
return true;
}
skb = build_skb(rxbuf->frag, dp->fl_bufsz);
if (unlikely(!skb)) {
nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
return true;
}
new_frag = nfp_nfd3_napi_alloc_one(dp, &new_dma_addr);
if (unlikely(!new_frag)) {
nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
return true;
}
nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
nfp_nfd3_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
skb_reserve(skb, pkt_off);
skb_put(skb, pkt_len);
nfp_app_ctrl_rx(nn->app, skb);
return true;
}
static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec)
{
struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring;
struct nfp_net *nn = r_vec->nfp_net;
struct nfp_net_dp *dp = &nn->dp;
unsigned int budget = 512;
while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--)
continue;
return budget;
}
void nfp_nfd3_ctrl_poll(struct tasklet_struct *t)
{
struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet);
spin_lock(&r_vec->lock);
nfp_nfd3_tx_complete(r_vec->tx_ring, 0);
__nfp_ctrl_tx_queued(r_vec);
spin_unlock(&r_vec->lock);
if (nfp_ctrl_rx(r_vec)) {
nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
} else {
tasklet_schedule(&r_vec->tasklet);
nn_dp_warn(&r_vec->nfp_net->dp,
"control message budget exceeded!\n");
}
}
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
/* Copyright (C) 2015-2019 Netronome Systems, Inc. */
#ifndef _NFP_DP_NFD3_H_
#define _NFP_DP_NFD3_H_
struct sk_buff;
struct net_device;
/* TX descriptor format */
#define NFD3_DESC_TX_EOP BIT(7)
#define NFD3_DESC_TX_OFFSET_MASK GENMASK(6, 0)
#define NFD3_DESC_TX_MSS_MASK GENMASK(13, 0)
/* Flags in the host TX descriptor */
#define NFD3_DESC_TX_CSUM BIT(7)
#define NFD3_DESC_TX_IP4_CSUM BIT(6)
#define NFD3_DESC_TX_TCP_CSUM BIT(5)
#define NFD3_DESC_TX_UDP_CSUM BIT(4)
#define NFD3_DESC_TX_VLAN BIT(3)
#define NFD3_DESC_TX_LSO BIT(2)
#define NFD3_DESC_TX_ENCAP BIT(1)
#define NFD3_DESC_TX_O_IP4_CSUM BIT(0)
struct nfp_nfd3_tx_desc {
union {
struct {
u8 dma_addr_hi; /* High bits of host buf address */
__le16 dma_len; /* Length to DMA for this desc */
u8 offset_eop; /* Offset in buf where pkt starts +
* highest bit is eop flag.
*/
__le32 dma_addr_lo; /* Low 32bit of host buf addr */
__le16 mss; /* MSS to be used for LSO */
u8 lso_hdrlen; /* LSO, TCP payload offset */
u8 flags; /* TX Flags, see @NFD3_DESC_TX_* */
union {
struct {
u8 l3_offset; /* L3 header offset */
u8 l4_offset; /* L4 header offset */
};
__le16 vlan; /* VLAN tag to add if indicated */
};
__le16 data_len; /* Length of frame + meta data */
} __packed;
__le32 vals[4];
__le64 vals8[2];
};
};
/**
* struct nfp_nfd3_tx_buf - software TX buffer descriptor
* @skb: normal ring, sk_buff associated with this buffer
* @frag: XDP ring, page frag associated with this buffer
* @xdp: XSK buffer pool handle (for AF_XDP)
* @dma_addr: DMA mapping address of the buffer
* @fidx: Fragment index (-1 for the head and [0..nr_frags-1] for frags)
* @pkt_cnt: Number of packets to be produced out of the skb associated
* with this buffer (valid only on the head's buffer).
* Will be 1 for all non-TSO packets.
* @is_xsk_tx: Flag if buffer is a RX buffer after a XDP_TX action and not a
* buffer from the TX queue (for AF_XDP).
* @real_len: Number of bytes which to be produced out of the skb (valid only
* on the head's buffer). Equal to skb->len for non-TSO packets.
*/
struct nfp_nfd3_tx_buf {
union {
struct sk_buff *skb;
void *frag;
struct xdp_buff *xdp;
};
dma_addr_t dma_addr;
union {
struct {
short int fidx;
u16 pkt_cnt;
};
struct {
bool is_xsk_tx;
};
};
u32 real_len;
};
void
nfp_nfd3_rx_csum(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
const struct nfp_net_rx_desc *rxd,
const struct nfp_meta_parsed *meta, struct sk_buff *skb);
bool
nfp_nfd3_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
void *data, void *pkt, unsigned int pkt_len, int meta_len);
void nfp_nfd3_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget);
int nfp_nfd3_poll(struct napi_struct *napi, int budget);
netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev);
bool
nfp_nfd3_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
struct sk_buff *skb, bool old);
void nfp_nfd3_ctrl_poll(struct tasklet_struct *t);
void nfp_nfd3_rx_ring_fill_freelist(struct nfp_net_dp *dp,
struct nfp_net_rx_ring *rx_ring);
void nfp_nfd3_xsk_tx_free(struct nfp_nfd3_tx_buf *txbuf);
int nfp_nfd3_xsk_poll(struct napi_struct *napi, int budget);
#endif
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2015-2019 Netronome Systems, Inc. */
#include <linux/seq_file.h>
#include "../nfp_net.h"
#include "../nfp_net_dp.h"
#include "../nfp_net_xsk.h"
#include "nfd3.h"
static void nfp_nfd3_xsk_tx_bufs_free(struct nfp_net_tx_ring *tx_ring)
{
struct nfp_nfd3_tx_buf *txbuf;
unsigned int idx;
while (tx_ring->rd_p != tx_ring->wr_p) {
idx = D_IDX(tx_ring, tx_ring->rd_p);
txbuf = &tx_ring->txbufs[idx];
txbuf->real_len = 0;
tx_ring->qcp_rd_p++;
tx_ring->rd_p++;
if (tx_ring->r_vec->xsk_pool) {
if (txbuf->is_xsk_tx)
nfp_nfd3_xsk_tx_free(txbuf);
xsk_tx_completed(tx_ring->r_vec->xsk_pool, 1);
}
}
}
/**
* nfp_nfd3_tx_ring_reset() - Free any untransmitted buffers and reset pointers
* @dp: NFP Net data path struct
* @tx_ring: TX ring structure
*
* Assumes that the device is stopped, must be idempotent.
*/
static void
nfp_nfd3_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
{
struct netdev_queue *nd_q;
const skb_frag_t *frag;
while (!tx_ring->is_xdp && tx_ring->rd_p != tx_ring->wr_p) {
struct nfp_nfd3_tx_buf *tx_buf;
struct sk_buff *skb;
int idx, nr_frags;
idx = D_IDX(tx_ring, tx_ring->rd_p);
tx_buf = &tx_ring->txbufs[idx];
skb = tx_ring->txbufs[idx].skb;
nr_frags = skb_shinfo(skb)->nr_frags;
if (tx_buf->fidx == -1) {
/* unmap head */
dma_unmap_single(dp->dev, tx_buf->dma_addr,
skb_headlen(skb), DMA_TO_DEVICE);
} else {
/* unmap fragment */
frag = &skb_shinfo(skb)->frags[tx_buf->fidx];
dma_unmap_page(dp->dev, tx_buf->dma_addr,
skb_frag_size(frag), DMA_TO_DEVICE);
}
/* check for last gather fragment */
if (tx_buf->fidx == nr_frags - 1)
dev_kfree_skb_any(skb);
tx_buf->dma_addr = 0;
tx_buf->skb = NULL;
tx_buf->fidx = -2;
tx_ring->qcp_rd_p++;
tx_ring->rd_p++;
}
if (tx_ring->is_xdp)
nfp_nfd3_xsk_tx_bufs_free(tx_ring);
memset(tx_ring->txds, 0, tx_ring->size);
tx_ring->wr_p = 0;
tx_ring->rd_p = 0;
tx_ring->qcp_rd_p = 0;
tx_ring->wr_ptr_add = 0;
if (tx_ring->is_xdp || !dp->netdev)
return;
nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
netdev_tx_reset_queue(nd_q);
}
/**
* nfp_nfd3_tx_ring_free() - Free resources allocated to a TX ring
* @tx_ring: TX ring to free
*/
static void nfp_nfd3_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
kvfree(tx_ring->txbufs);
if (tx_ring->txds)
dma_free_coherent(dp->dev, tx_ring->size,
tx_ring->txds, tx_ring->dma);
tx_ring->cnt = 0;
tx_ring->txbufs = NULL;
tx_ring->txds = NULL;
tx_ring->dma = 0;
tx_ring->size = 0;
}
/**
* nfp_nfd3_tx_ring_alloc() - Allocate resource for a TX ring
* @dp: NFP Net data path struct
* @tx_ring: TX Ring structure to allocate
*
* Return: 0 on success, negative errno otherwise.
*/
static int
nfp_nfd3_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
tx_ring->cnt = dp->txd_cnt;
tx_ring->size = array_size(tx_ring->cnt, sizeof(*tx_ring->txds));
tx_ring->txds = dma_alloc_coherent(dp->dev, tx_ring->size,
&tx_ring->dma,
GFP_KERNEL | __GFP_NOWARN);
if (!tx_ring->txds) {
netdev_warn(dp->netdev, "failed to allocate TX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n",
tx_ring->cnt);
goto err_alloc;
}
tx_ring->txbufs = kvcalloc(tx_ring->cnt, sizeof(*tx_ring->txbufs),
GFP_KERNEL);
if (!tx_ring->txbufs)
goto err_alloc;
if (!tx_ring->is_xdp && dp->netdev)
netif_set_xps_queue(dp->netdev, &r_vec->affinity_mask,
tx_ring->idx);
return 0;
err_alloc:
nfp_nfd3_tx_ring_free(tx_ring);
return -ENOMEM;
}
static void
nfp_nfd3_tx_ring_bufs_free(struct nfp_net_dp *dp,
struct nfp_net_tx_ring *tx_ring)
{
unsigned int i;
if (!tx_ring->is_xdp)
return;
for (i = 0; i < tx_ring->cnt; i++) {
if (!tx_ring->txbufs[i].frag)
return;
nfp_net_dma_unmap_rx(dp, tx_ring->txbufs[i].dma_addr);
__free_page(virt_to_page(tx_ring->txbufs[i].frag));
}
}
static int
nfp_nfd3_tx_ring_bufs_alloc(struct nfp_net_dp *dp,
struct nfp_net_tx_ring *tx_ring)
{
struct nfp_nfd3_tx_buf *txbufs = tx_ring->txbufs;
unsigned int i;
if (!tx_ring->is_xdp)
return 0;
for (i = 0; i < tx_ring->cnt; i++) {
txbufs[i].frag = nfp_net_rx_alloc_one(dp, &txbufs[i].dma_addr);
if (!txbufs[i].frag) {
nfp_nfd3_tx_ring_bufs_free(dp, tx_ring);
return -ENOMEM;
}
}
return 0;
}
static void
nfp_nfd3_print_tx_descs(struct seq_file *file,
struct nfp_net_r_vector *r_vec,
struct nfp_net_tx_ring *tx_ring,
u32 d_rd_p, u32 d_wr_p)
{
struct nfp_nfd3_tx_desc *txd;
u32 txd_cnt = tx_ring->cnt;
int i;
for (i = 0; i < txd_cnt; i++) {
struct xdp_buff *xdp;
struct sk_buff *skb;
txd = &tx_ring->txds[i];
seq_printf(file, "%04d: 0x%08x 0x%08x 0x%08x 0x%08x", i,
txd->vals[0], txd->vals[1],
txd->vals[2], txd->vals[3]);
if (!tx_ring->is_xdp) {
skb = READ_ONCE(tx_ring->txbufs[i].skb);
if (skb)
seq_printf(file, " skb->head=%p skb->data=%p",
skb->head, skb->data);
} else {
xdp = READ_ONCE(tx_ring->txbufs[i].xdp);
if (xdp)
seq_printf(file, " xdp->data=%p", xdp->data);
}
if (tx_ring->txbufs[i].dma_addr)
seq_printf(file, " dma_addr=%pad",
&tx_ring->txbufs[i].dma_addr);
if (i == tx_ring->rd_p % txd_cnt)
seq_puts(file, " H_RD");
if (i == tx_ring->wr_p % txd_cnt)
seq_puts(file, " H_WR");
if (i == d_rd_p % txd_cnt)
seq_puts(file, " D_RD");
if (i == d_wr_p % txd_cnt)
seq_puts(file, " D_WR");
seq_putc(file, '\n');
}
}
#define NFP_NFD3_CFG_CTRL_SUPPORTED \
(NFP_NET_CFG_CTRL_ENABLE | NFP_NET_CFG_CTRL_PROMISC | \
NFP_NET_CFG_CTRL_L2BC | NFP_NET_CFG_CTRL_L2MC | \
NFP_NET_CFG_CTRL_RXCSUM | NFP_NET_CFG_CTRL_TXCSUM | \
NFP_NET_CFG_CTRL_RXVLAN | NFP_NET_CFG_CTRL_TXVLAN | \
NFP_NET_CFG_CTRL_GATHER | NFP_NET_CFG_CTRL_LSO | \
NFP_NET_CFG_CTRL_CTAG_FILTER | NFP_NET_CFG_CTRL_CMSG_DATA | \
NFP_NET_CFG_CTRL_RINGCFG | NFP_NET_CFG_CTRL_RSS | \
NFP_NET_CFG_CTRL_IRQMOD | NFP_NET_CFG_CTRL_TXRWB | \
NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE | \
NFP_NET_CFG_CTRL_BPF | NFP_NET_CFG_CTRL_LSO2 | \
NFP_NET_CFG_CTRL_RSS2 | NFP_NET_CFG_CTRL_CSUM_COMPLETE | \
NFP_NET_CFG_CTRL_LIVE_ADDR)
const struct nfp_dp_ops nfp_nfd3_ops = {
.version = NFP_NFD_VER_NFD3,
.tx_min_desc_per_pkt = 1,
.cap_mask = NFP_NFD3_CFG_CTRL_SUPPORTED,
.poll = nfp_nfd3_poll,
.xsk_poll = nfp_nfd3_xsk_poll,
.ctrl_poll = nfp_nfd3_ctrl_poll,
.xmit = nfp_nfd3_tx,
.ctrl_tx_one = nfp_nfd3_ctrl_tx_one,
.rx_ring_fill_freelist = nfp_nfd3_rx_ring_fill_freelist,
.tx_ring_alloc = nfp_nfd3_tx_ring_alloc,
.tx_ring_reset = nfp_nfd3_tx_ring_reset,
.tx_ring_free = nfp_nfd3_tx_ring_free,
.tx_ring_bufs_alloc = nfp_nfd3_tx_ring_bufs_alloc,
.tx_ring_bufs_free = nfp_nfd3_tx_ring_bufs_free,
.print_tx_descs = nfp_nfd3_print_tx_descs
};
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2018 Netronome Systems, Inc */
/* Copyright (C) 2021 Corigine, Inc */
#include <linux/bpf_trace.h>
#include <linux/netdevice.h>
#include "../nfp_app.h"
#include "../nfp_net.h"
#include "../nfp_net_dp.h"
#include "../nfp_net_xsk.h"
#include "nfd3.h"
static bool
nfp_nfd3_xsk_tx_xdp(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
struct nfp_net_rx_ring *rx_ring,
struct nfp_net_tx_ring *tx_ring,
struct nfp_net_xsk_rx_buf *xrxbuf, unsigned int pkt_len,
int pkt_off)
{
struct xsk_buff_pool *pool = r_vec->xsk_pool;
struct nfp_nfd3_tx_buf *txbuf;
struct nfp_nfd3_tx_desc *txd;
unsigned int wr_idx;
if (nfp_net_tx_space(tx_ring) < 1)
return false;
xsk_buff_raw_dma_sync_for_device(pool, xrxbuf->dma_addr + pkt_off,
pkt_len);
wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
txbuf = &tx_ring->txbufs[wr_idx];
txbuf->xdp = xrxbuf->xdp;
txbuf->real_len = pkt_len;
txbuf->is_xsk_tx = true;
/* Build TX descriptor */
txd = &tx_ring->txds[wr_idx];
txd->offset_eop = NFD3_DESC_TX_EOP;
txd->dma_len = cpu_to_le16(pkt_len);
nfp_desc_set_dma_addr(txd, xrxbuf->dma_addr + pkt_off);
txd->data_len = cpu_to_le16(pkt_len);
txd->flags = 0;
txd->mss = 0;
txd->lso_hdrlen = 0;
tx_ring->wr_ptr_add++;
tx_ring->wr_p++;
return true;
}
static void nfp_nfd3_xsk_rx_skb(struct nfp_net_rx_ring *rx_ring,
const struct nfp_net_rx_desc *rxd,
struct nfp_net_xsk_rx_buf *xrxbuf,
const struct nfp_meta_parsed *meta,
unsigned int pkt_len,
bool meta_xdp,
unsigned int *skbs_polled)
{
struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
struct net_device *netdev;
struct sk_buff *skb;
if (likely(!meta->portid)) {
netdev = dp->netdev;
} else {
struct nfp_net *nn = netdev_priv(dp->netdev);
netdev = nfp_app_dev_get(nn->app, meta->portid, NULL);
if (unlikely(!netdev)) {
nfp_net_xsk_rx_drop(r_vec, xrxbuf);
return;
}
nfp_repr_inc_rx_stats(netdev, pkt_len);
}
skb = napi_alloc_skb(&r_vec->napi, pkt_len);
if (!skb) {
nfp_net_xsk_rx_drop(r_vec, xrxbuf);
return;
}
memcpy(skb_put(skb, pkt_len), xrxbuf->xdp->data, pkt_len);
skb->mark = meta->mark;
skb_set_hash(skb, meta->hash, meta->hash_type);
skb_record_rx_queue(skb, rx_ring->idx);
skb->protocol = eth_type_trans(skb, netdev);
nfp_nfd3_rx_csum(dp, r_vec, rxd, meta, skb);
if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
le16_to_cpu(rxd->rxd.vlan));
if (meta_xdp)
skb_metadata_set(skb,
xrxbuf->xdp->data - xrxbuf->xdp->data_meta);
napi_gro_receive(&rx_ring->r_vec->napi, skb);
nfp_net_xsk_rx_free(xrxbuf);
(*skbs_polled)++;
}
static unsigned int
nfp_nfd3_xsk_rx(struct nfp_net_rx_ring *rx_ring, int budget,
unsigned int *skbs_polled)
{
struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
struct nfp_net_tx_ring *tx_ring;
struct bpf_prog *xdp_prog;
bool xdp_redir = false;
int pkts_polled = 0;
xdp_prog = READ_ONCE(dp->xdp_prog);
tx_ring = r_vec->xdp_ring;
while (pkts_polled < budget) {
unsigned int meta_len, data_len, pkt_len, pkt_off;
struct nfp_net_xsk_rx_buf *xrxbuf;
struct nfp_net_rx_desc *rxd;
struct nfp_meta_parsed meta;
int idx, act;
idx = D_IDX(rx_ring, rx_ring->rd_p);
rxd = &rx_ring->rxds[idx];
if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
break;
rx_ring->rd_p++;
pkts_polled++;
xrxbuf = &rx_ring->xsk_rxbufs[idx];
/* If starved of buffers "drop" it and scream. */
if (rx_ring->rd_p >= rx_ring->wr_p) {
nn_dp_warn(dp, "Starved of RX buffers\n");
nfp_net_xsk_rx_drop(r_vec, xrxbuf);
break;
}
/* Memory barrier to ensure that we won't do other reads
* before the DD bit.
*/
dma_rmb();
memset(&meta, 0, sizeof(meta));
/* Only supporting AF_XDP with dynamic metadata so buffer layout
* is always:
*
* ---------------------------------------------------------
* | off | metadata | packet | XXXX |
* ---------------------------------------------------------
*/
meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
data_len = le16_to_cpu(rxd->rxd.data_len);
pkt_len = data_len - meta_len;
if (unlikely(meta_len > NFP_NET_MAX_PREPEND)) {
nn_dp_warn(dp, "Oversized RX packet metadata %u\n",
meta_len);
nfp_net_xsk_rx_drop(r_vec, xrxbuf);
continue;
}
/* Stats update. */
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_pkts++;
r_vec->rx_bytes += pkt_len;
u64_stats_update_end(&r_vec->rx_sync);
xrxbuf->xdp->data += meta_len;
xrxbuf->xdp->data_end = xrxbuf->xdp->data + pkt_len;
xdp_set_data_meta_invalid(xrxbuf->xdp);
xsk_buff_dma_sync_for_cpu(xrxbuf->xdp, r_vec->xsk_pool);
net_prefetch(xrxbuf->xdp->data);
if (meta_len) {
if (unlikely(nfp_nfd3_parse_meta(dp->netdev, &meta,
xrxbuf->xdp->data -
meta_len,
xrxbuf->xdp->data,
pkt_len, meta_len))) {
nn_dp_warn(dp, "Invalid RX packet metadata\n");
nfp_net_xsk_rx_drop(r_vec, xrxbuf);
continue;
}
if (unlikely(meta.portid)) {
struct nfp_net *nn = netdev_priv(dp->netdev);
if (meta.portid != NFP_META_PORT_ID_CTRL) {
nfp_nfd3_xsk_rx_skb(rx_ring, rxd,
xrxbuf, &meta,
pkt_len, false,
skbs_polled);
continue;
}
nfp_app_ctrl_rx_raw(nn->app, xrxbuf->xdp->data,
pkt_len);
nfp_net_xsk_rx_free(xrxbuf);
continue;
}
}
act = bpf_prog_run_xdp(xdp_prog, xrxbuf->xdp);
pkt_len = xrxbuf->xdp->data_end - xrxbuf->xdp->data;
pkt_off = xrxbuf->xdp->data - xrxbuf->xdp->data_hard_start;
switch (act) {
case XDP_PASS:
nfp_nfd3_xsk_rx_skb(rx_ring, rxd, xrxbuf, &meta, pkt_len,
true, skbs_polled);
break;
case XDP_TX:
if (!nfp_nfd3_xsk_tx_xdp(dp, r_vec, rx_ring, tx_ring,
xrxbuf, pkt_len, pkt_off))
nfp_net_xsk_rx_drop(r_vec, xrxbuf);
else
nfp_net_xsk_rx_unstash(xrxbuf);
break;
case XDP_REDIRECT:
if (xdp_do_redirect(dp->netdev, xrxbuf->xdp, xdp_prog)) {
nfp_net_xsk_rx_drop(r_vec, xrxbuf);
} else {
nfp_net_xsk_rx_unstash(xrxbuf);
xdp_redir = true;
}
break;
default:
bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act);
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(dp->netdev, xdp_prog, act);
fallthrough;
case XDP_DROP:
nfp_net_xsk_rx_drop(r_vec, xrxbuf);
break;
}
}
nfp_net_xsk_rx_ring_fill_freelist(r_vec->rx_ring);
if (xdp_redir)
xdp_do_flush_map();
if (tx_ring->wr_ptr_add)
nfp_net_tx_xmit_more_flush(tx_ring);
return pkts_polled;
}
void nfp_nfd3_xsk_tx_free(struct nfp_nfd3_tx_buf *txbuf)
{
xsk_buff_free(txbuf->xdp);
txbuf->dma_addr = 0;
txbuf->xdp = NULL;
}
static bool nfp_nfd3_xsk_complete(struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
u32 done_pkts = 0, done_bytes = 0, reused = 0;
bool done_all;
int idx, todo;
u32 qcp_rd_p;
if (tx_ring->wr_p == tx_ring->rd_p)
return true;
/* Work out how many descriptors have been transmitted. */
qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
if (qcp_rd_p == tx_ring->qcp_rd_p)
return true;
todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
done_all = todo <= NFP_NET_XDP_MAX_COMPLETE;
todo = min(todo, NFP_NET_XDP_MAX_COMPLETE);
tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo);
done_pkts = todo;
while (todo--) {
struct nfp_nfd3_tx_buf *txbuf;
idx = D_IDX(tx_ring, tx_ring->rd_p);
tx_ring->rd_p++;
txbuf = &tx_ring->txbufs[idx];
if (unlikely(!txbuf->real_len))
continue;
done_bytes += txbuf->real_len;
txbuf->real_len = 0;
if (txbuf->is_xsk_tx) {
nfp_nfd3_xsk_tx_free(txbuf);
reused++;
}
}
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_bytes += done_bytes;
r_vec->tx_pkts += done_pkts;
u64_stats_update_end(&r_vec->tx_sync);
xsk_tx_completed(r_vec->xsk_pool, done_pkts - reused);
WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
"XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
return done_all;
}
static void nfp_nfd3_xsk_tx(struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
struct xdp_desc desc[NFP_NET_XSK_TX_BATCH];
struct xsk_buff_pool *xsk_pool;
struct nfp_nfd3_tx_desc *txd;
u32 pkts = 0, wr_idx;
u32 i, got;
xsk_pool = r_vec->xsk_pool;
while (nfp_net_tx_space(tx_ring) >= NFP_NET_XSK_TX_BATCH) {
for (i = 0; i < NFP_NET_XSK_TX_BATCH; i++)
if (!xsk_tx_peek_desc(xsk_pool, &desc[i]))
break;
got = i;
if (!got)
break;
wr_idx = D_IDX(tx_ring, tx_ring->wr_p + i);
prefetchw(&tx_ring->txds[wr_idx]);
for (i = 0; i < got; i++)
xsk_buff_raw_dma_sync_for_device(xsk_pool, desc[i].addr,
desc[i].len);
for (i = 0; i < got; i++) {
wr_idx = D_IDX(tx_ring, tx_ring->wr_p + i);
tx_ring->txbufs[wr_idx].real_len = desc[i].len;
tx_ring->txbufs[wr_idx].is_xsk_tx = false;
/* Build TX descriptor. */
txd = &tx_ring->txds[wr_idx];
nfp_desc_set_dma_addr(txd,
xsk_buff_raw_get_dma(xsk_pool,
desc[i].addr
));
txd->offset_eop = NFD3_DESC_TX_EOP;
txd->dma_len = cpu_to_le16(desc[i].len);
txd->data_len = cpu_to_le16(desc[i].len);
}
tx_ring->wr_p += got;
pkts += got;
}
if (!pkts)
return;
xsk_tx_release(xsk_pool);
/* Ensure all records are visible before incrementing write counter. */
wmb();
nfp_qcp_wr_ptr_add(tx_ring->qcp_q, pkts);
}
int nfp_nfd3_xsk_poll(struct napi_struct *napi, int budget)
{
struct nfp_net_r_vector *r_vec =
container_of(napi, struct nfp_net_r_vector, napi);
unsigned int pkts_polled, skbs = 0;
pkts_polled = nfp_nfd3_xsk_rx(r_vec->rx_ring, budget, &skbs);
if (pkts_polled < budget) {
if (r_vec->tx_ring)
nfp_nfd3_tx_complete(r_vec->tx_ring, budget);
if (!nfp_nfd3_xsk_complete(r_vec->xdp_ring))
pkts_polled = budget;
nfp_nfd3_xsk_tx(r_vec->xdp_ring);
if (pkts_polled < budget && napi_complete_done(napi, skbs))
nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
}
return pkts_polled;
}
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2015-2019 Netronome Systems, Inc. */
#include <linux/bpf_trace.h>
#include <linux/netdevice.h>
#include <linux/overflow.h>
#include <linux/sizes.h>
#include <linux/bitfield.h>
#include "../nfp_app.h"
#include "../nfp_net.h"
#include "../nfp_net_dp.h"
#include "../crypto/crypto.h"
#include "../crypto/fw.h"
#include "nfdk.h"
static int nfp_nfdk_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring)
{
return !nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT * 2);
}
static int nfp_nfdk_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring)
{
return nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT);
}
static void nfp_nfdk_tx_ring_stop(struct netdev_queue *nd_q,
struct nfp_net_tx_ring *tx_ring)
{
netif_tx_stop_queue(nd_q);
/* We can race with the TX completion out of NAPI so recheck */
smp_mb();
if (unlikely(nfp_nfdk_tx_ring_should_wake(tx_ring)))
netif_tx_start_queue(nd_q);
}
static __le64
nfp_nfdk_tx_tso(struct nfp_net_r_vector *r_vec, struct nfp_nfdk_tx_buf *txbuf,
struct sk_buff *skb)
{
u32 segs, hdrlen, l3_offset, l4_offset;
struct nfp_nfdk_tx_desc txd;
u16 mss;
if (!skb->encapsulation) {
l3_offset = skb_network_offset(skb);
l4_offset = skb_transport_offset(skb);
hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
} else {
l3_offset = skb_inner_network_offset(skb);
l4_offset = skb_inner_transport_offset(skb);
hdrlen = skb_inner_transport_header(skb) - skb->data +
inner_tcp_hdrlen(skb);
}
segs = skb_shinfo(skb)->gso_segs;
mss = skb_shinfo(skb)->gso_size & NFDK_DESC_TX_MSS_MASK;
/* Note: TSO of the packet with metadata prepended to skb is not
* supported yet, in which case l3/l4_offset and lso_hdrlen need
* be correctly handled here.
* Concern:
* The driver doesn't have md_bytes easily available at this point.
* The PCI.IN PD ME won't have md_bytes bytes to add to lso_hdrlen,
* so it needs the full length there. The app MEs might prefer
* l3_offset and l4_offset relative to the start of packet data,
* but could probably cope with it being relative to the CTM buf
* data offset.
*/
txd.l3_offset = l3_offset;
txd.l4_offset = l4_offset;
txd.lso_meta_res = 0;
txd.mss = cpu_to_le16(mss);
txd.lso_hdrlen = hdrlen;
txd.lso_totsegs = segs;
txbuf->pkt_cnt = segs;
txbuf->real_len = skb->len + hdrlen * (txbuf->pkt_cnt - 1);
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_lso++;
u64_stats_update_end(&r_vec->tx_sync);
return txd.raw;
}
static u8
nfp_nfdk_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
unsigned int pkt_cnt, struct sk_buff *skb, u64 flags)
{
struct ipv6hdr *ipv6h;
struct iphdr *iph;
if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM))
return flags;
if (skb->ip_summed != CHECKSUM_PARTIAL)
return flags;
flags |= NFDK_DESC_TX_L4_CSUM;
iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
/* L3 checksum offloading flag is not required for ipv6 */
if (iph->version == 4) {
flags |= NFDK_DESC_TX_L3_CSUM;
} else if (ipv6h->version != 6) {
nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version);
return flags;
}
u64_stats_update_begin(&r_vec->tx_sync);
if (!skb->encapsulation) {
r_vec->hw_csum_tx += pkt_cnt;
} else {
flags |= NFDK_DESC_TX_ENCAP;
r_vec->hw_csum_tx_inner += pkt_cnt;
}
u64_stats_update_end(&r_vec->tx_sync);
return flags;
}
static int
nfp_nfdk_tx_maybe_close_block(struct nfp_net_tx_ring *tx_ring,
unsigned int nr_frags, struct sk_buff *skb)
{
unsigned int n_descs, wr_p, nop_slots;
const skb_frag_t *frag, *fend;
struct nfp_nfdk_tx_desc *txd;
unsigned int wr_idx;
int err;
recount_descs:
n_descs = nfp_nfdk_headlen_to_segs(skb_headlen(skb));
frag = skb_shinfo(skb)->frags;
fend = frag + nr_frags;
for (; frag < fend; frag++)
n_descs += DIV_ROUND_UP(skb_frag_size(frag),
NFDK_TX_MAX_DATA_PER_DESC);
if (unlikely(n_descs > NFDK_TX_DESC_GATHER_MAX)) {
if (skb_is_nonlinear(skb)) {
err = skb_linearize(skb);
if (err)
return err;
goto recount_descs;
}
return -EINVAL;
}
/* Under count by 1 (don't count meta) for the round down to work out */
n_descs += !!skb_is_gso(skb);
if (round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) !=
round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT))
goto close_block;
if ((u32)tx_ring->data_pending + skb->len > NFDK_TX_MAX_DATA_PER_BLOCK)
goto close_block;
return 0;
close_block:
wr_p = tx_ring->wr_p;
nop_slots = D_BLOCK_CPL(wr_p);
wr_idx = D_IDX(tx_ring, wr_p);
tx_ring->ktxbufs[wr_idx].skb = NULL;
txd = &tx_ring->ktxds[wr_idx];
memset(txd, 0, array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc)));
tx_ring->data_pending = 0;
tx_ring->wr_p += nop_slots;
tx_ring->wr_ptr_add += nop_slots;
return 0;
}
static int nfp_nfdk_prep_port_id(struct sk_buff *skb)
{
struct metadata_dst *md_dst = skb_metadata_dst(skb);
unsigned char *data;
if (likely(!md_dst))
return 0;
if (unlikely(md_dst->type != METADATA_HW_PORT_MUX))
return 0;
/* Note: Unsupported case when TSO a skb with metedata prepended.
* See the comments in `nfp_nfdk_tx_tso` for details.
*/
if (unlikely(md_dst && skb_is_gso(skb)))
return -EOPNOTSUPP;
if (unlikely(skb_cow_head(skb, sizeof(md_dst->u.port_info.port_id))))
return -ENOMEM;
data = skb_push(skb, sizeof(md_dst->u.port_info.port_id));
put_unaligned_be32(md_dst->u.port_info.port_id, data);
return sizeof(md_dst->u.port_info.port_id);
}
static int
nfp_nfdk_prep_tx_meta(struct nfp_app *app, struct sk_buff *skb,
struct nfp_net_r_vector *r_vec)
{
unsigned char *data;
int res, md_bytes;
u32 meta_id = 0;
res = nfp_nfdk_prep_port_id(skb);
if (unlikely(res <= 0))
return res;
md_bytes = res;
meta_id = NFP_NET_META_PORTID;
if (unlikely(skb_cow_head(skb, sizeof(meta_id))))
return -ENOMEM;
md_bytes += sizeof(meta_id);
meta_id = FIELD_PREP(NFDK_META_LEN, md_bytes) |
FIELD_PREP(NFDK_META_FIELDS, meta_id);
data = skb_push(skb, sizeof(meta_id));
put_unaligned_be32(meta_id, data);
return NFDK_DESC_TX_CHAIN_META;
}
/**
* nfp_nfdk_tx() - Main transmit entry point
* @skb: SKB to transmit
* @netdev: netdev structure
*
* Return: NETDEV_TX_OK on success.
*/
netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev)
{
struct nfp_net *nn = netdev_priv(netdev);
struct nfp_nfdk_tx_buf *txbuf, *etxbuf;
u32 cnt, tmp_dlen, dlen_type = 0;
struct nfp_net_tx_ring *tx_ring;
struct nfp_net_r_vector *r_vec;
const skb_frag_t *frag, *fend;
struct nfp_nfdk_tx_desc *txd;
unsigned int real_len, qidx;
unsigned int dma_len, type;
struct netdev_queue *nd_q;
struct nfp_net_dp *dp;
int nr_frags, wr_idx;
dma_addr_t dma_addr;
u64 metadata;
dp = &nn->dp;
qidx = skb_get_queue_mapping(skb);
tx_ring = &dp->tx_rings[qidx];
r_vec = tx_ring->r_vec;
nd_q = netdev_get_tx_queue(dp->netdev, qidx);
/* Don't bother counting frags, assume the worst */
if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) {
nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n",
qidx, tx_ring->wr_p, tx_ring->rd_p);
netif_tx_stop_queue(nd_q);
nfp_net_tx_xmit_more_flush(tx_ring);
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_busy++;
u64_stats_update_end(&r_vec->tx_sync);
return NETDEV_TX_BUSY;
}
metadata = nfp_nfdk_prep_tx_meta(nn->app, skb, r_vec);
if (unlikely((int)metadata < 0))
goto err_flush;
nr_frags = skb_shinfo(skb)->nr_frags;
if (nfp_nfdk_tx_maybe_close_block(tx_ring, nr_frags, skb))
goto err_flush;
/* DMA map all */
wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
txd = &tx_ring->ktxds[wr_idx];
txbuf = &tx_ring->ktxbufs[wr_idx];
dma_len = skb_headlen(skb);
if (skb_is_gso(skb))
type = NFDK_DESC_TX_TYPE_TSO;
else if (!nr_frags && dma_len < NFDK_TX_MAX_DATA_PER_HEAD)
type = NFDK_DESC_TX_TYPE_SIMPLE;
else
type = NFDK_DESC_TX_TYPE_GATHER;
dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE);
if (dma_mapping_error(dp->dev, dma_addr))
goto err_warn_dma;
txbuf->skb = skb;
txbuf++;
txbuf->dma_addr = dma_addr;
txbuf++;
/* FIELD_PREP() implicitly truncates to chunk */
dma_len -= 1;
dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) |
FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
txd->dma_len_type = cpu_to_le16(dlen_type);
nfp_desc_set_dma_addr(txd, dma_addr);
/* starts at bit 0 */
BUILD_BUG_ON(!(NFDK_DESC_TX_DMA_LEN_HEAD & 1));
/* Preserve the original dlen_type, this way below the EOP logic
* can use dlen_type.
*/
tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD;
dma_len -= tmp_dlen;
dma_addr += tmp_dlen + 1;
txd++;
/* The rest of the data (if any) will be in larger dma descritors
* and is handled with the fragment loop.
*/
frag = skb_shinfo(skb)->frags;
fend = frag + nr_frags;
while (true) {
while (dma_len > 0) {
dma_len -= 1;
dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len);
txd->dma_len_type = cpu_to_le16(dlen_type);
nfp_desc_set_dma_addr(txd, dma_addr);
dma_len -= dlen_type;
dma_addr += dlen_type + 1;
txd++;
}
if (frag >= fend)
break;
dma_len = skb_frag_size(frag);
dma_addr = skb_frag_dma_map(dp->dev, frag, 0, dma_len,
DMA_TO_DEVICE);
if (dma_mapping_error(dp->dev, dma_addr))
goto err_unmap;
txbuf->dma_addr = dma_addr;
txbuf++;
frag++;
}
(txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP);
if (!skb_is_gso(skb)) {
real_len = skb->len;
/* Metadata desc */
metadata = nfp_nfdk_tx_csum(dp, r_vec, 1, skb, metadata);
txd->raw = cpu_to_le64(metadata);
txd++;
} else {
/* lso desc should be placed after metadata desc */
(txd + 1)->raw = nfp_nfdk_tx_tso(r_vec, txbuf, skb);
real_len = txbuf->real_len;
/* Metadata desc */
metadata = nfp_nfdk_tx_csum(dp, r_vec, txbuf->pkt_cnt, skb, metadata);
txd->raw = cpu_to_le64(metadata);
txd += 2;
txbuf++;
}
cnt = txd - tx_ring->ktxds - wr_idx;
if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) !=
round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT)))
goto err_warn_overflow;
skb_tx_timestamp(skb);
tx_ring->wr_p += cnt;
if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT)
tx_ring->data_pending += skb->len;
else
tx_ring->data_pending = 0;
if (nfp_nfdk_tx_ring_should_stop(tx_ring))
nfp_nfdk_tx_ring_stop(nd_q, tx_ring);
tx_ring->wr_ptr_add += cnt;
if (__netdev_tx_sent_queue(nd_q, real_len, netdev_xmit_more()))
nfp_net_tx_xmit_more_flush(tx_ring);
return NETDEV_TX_OK;
err_warn_overflow:
WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d",
wr_idx, skb_headlen(skb), nr_frags, cnt);
if (skb_is_gso(skb))
txbuf--;
err_unmap:
/* txbuf pointed to the next-to-use */
etxbuf = txbuf;
/* first txbuf holds the skb */
txbuf = &tx_ring->ktxbufs[wr_idx + 1];
if (txbuf < etxbuf) {
dma_unmap_single(dp->dev, txbuf->dma_addr,
skb_headlen(skb), DMA_TO_DEVICE);
txbuf->raw = 0;
txbuf++;
}
frag = skb_shinfo(skb)->frags;
while (etxbuf < txbuf) {
dma_unmap_page(dp->dev, txbuf->dma_addr,
skb_frag_size(frag), DMA_TO_DEVICE);
txbuf->raw = 0;
frag++;
txbuf++;
}
err_warn_dma:
nn_dp_warn(dp, "Failed to map DMA TX buffer\n");
err_flush:
nfp_net_tx_xmit_more_flush(tx_ring);
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_errors++;
u64_stats_update_end(&r_vec->tx_sync);
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
/**
* nfp_nfdk_tx_complete() - Handled completed TX packets
* @tx_ring: TX ring structure
* @budget: NAPI budget (only used as bool to determine if in NAPI context)
*/
static void nfp_nfdk_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
u32 done_pkts = 0, done_bytes = 0;
struct nfp_nfdk_tx_buf *ktxbufs;
struct device *dev = dp->dev;
struct netdev_queue *nd_q;
u32 rd_p, qcp_rd_p;
int todo;
rd_p = tx_ring->rd_p;
if (tx_ring->wr_p == rd_p)
return;
/* Work out how many descriptors have been transmitted */
qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp);
if (qcp_rd_p == tx_ring->qcp_rd_p)
return;
todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
ktxbufs = tx_ring->ktxbufs;
while (todo > 0) {
const skb_frag_t *frag, *fend;
unsigned int size, n_descs = 1;
struct nfp_nfdk_tx_buf *txbuf;
struct sk_buff *skb;
txbuf = &ktxbufs[D_IDX(tx_ring, rd_p)];
skb = txbuf->skb;
txbuf++;
/* Closed block */
if (!skb) {
n_descs = D_BLOCK_CPL(rd_p);
goto next;
}
/* Unmap head */
size = skb_headlen(skb);
n_descs += nfp_nfdk_headlen_to_segs(size);
dma_unmap_single(dev, txbuf->dma_addr, size, DMA_TO_DEVICE);
txbuf++;
/* Unmap frags */
frag = skb_shinfo(skb)->frags;
fend = frag + skb_shinfo(skb)->nr_frags;
for (; frag < fend; frag++) {
size = skb_frag_size(frag);
n_descs += DIV_ROUND_UP(size,
NFDK_TX_MAX_DATA_PER_DESC);
dma_unmap_page(dev, txbuf->dma_addr,
skb_frag_size(frag), DMA_TO_DEVICE);
txbuf++;
}
if (!skb_is_gso(skb)) {
done_bytes += skb->len;
done_pkts++;
} else {
done_bytes += txbuf->real_len;
done_pkts += txbuf->pkt_cnt;
n_descs++;
}
napi_consume_skb(skb, budget);
next:
rd_p += n_descs;
todo -= n_descs;
}
tx_ring->rd_p = rd_p;
tx_ring->qcp_rd_p = qcp_rd_p;
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_bytes += done_bytes;
r_vec->tx_pkts += done_pkts;
u64_stats_update_end(&r_vec->tx_sync);
if (!dp->netdev)
return;
nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
netdev_tx_completed_queue(nd_q, done_pkts, done_bytes);
if (nfp_nfdk_tx_ring_should_wake(tx_ring)) {
/* Make sure TX thread will see updated tx_ring->rd_p */
smp_mb();
if (unlikely(netif_tx_queue_stopped(nd_q)))
netif_tx_wake_queue(nd_q);
}
WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
"TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
}
/* Receive processing */
static void *
nfp_nfdk_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
{
void *frag;
if (!dp->xdp_prog) {
frag = napi_alloc_frag(dp->fl_bufsz);
if (unlikely(!frag))
return NULL;
} else {
struct page *page;
page = dev_alloc_page();
if (unlikely(!page))
return NULL;
frag = page_address(page);
}
*dma_addr = nfp_net_dma_map_rx(dp, frag);
if (dma_mapping_error(dp->dev, *dma_addr)) {
nfp_net_free_frag(frag, dp->xdp_prog);
nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
return NULL;
}
return frag;
}
/**
* nfp_nfdk_rx_give_one() - Put mapped skb on the software and hardware rings
* @dp: NFP Net data path struct
* @rx_ring: RX ring structure
* @frag: page fragment buffer
* @dma_addr: DMA address of skb mapping
*/
static void
nfp_nfdk_rx_give_one(const struct nfp_net_dp *dp,
struct nfp_net_rx_ring *rx_ring,
void *frag, dma_addr_t dma_addr)
{
unsigned int wr_idx;
wr_idx = D_IDX(rx_ring, rx_ring->wr_p);
nfp_net_dma_sync_dev_rx(dp, dma_addr);
/* Stash SKB and DMA address away */
rx_ring->rxbufs[wr_idx].frag = frag;
rx_ring->rxbufs[wr_idx].dma_addr = dma_addr;
/* Fill freelist descriptor */
rx_ring->rxds[wr_idx].fld.reserved = 0;
rx_ring->rxds[wr_idx].fld.meta_len_dd = 0;
nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld,
dma_addr + dp->rx_dma_off);
rx_ring->wr_p++;
if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) {
/* Update write pointer of the freelist queue. Make
* sure all writes are flushed before telling the hardware.
*/
wmb();
nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH);
}
}
/**
* nfp_nfdk_rx_ring_fill_freelist() - Give buffers from the ring to FW
* @dp: NFP Net data path struct
* @rx_ring: RX ring to fill
*/
void nfp_nfdk_rx_ring_fill_freelist(struct nfp_net_dp *dp,
struct nfp_net_rx_ring *rx_ring)
{
unsigned int i;
for (i = 0; i < rx_ring->cnt - 1; i++)
nfp_nfdk_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag,
rx_ring->rxbufs[i].dma_addr);
}
/**
* nfp_nfdk_rx_csum_has_errors() - group check if rxd has any csum errors
* @flags: RX descriptor flags field in CPU byte order
*/
static int nfp_nfdk_rx_csum_has_errors(u16 flags)
{
u16 csum_all_checked, csum_all_ok;
csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL;
csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK;
return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT);
}
/**
* nfp_nfdk_rx_csum() - set SKB checksum field based on RX descriptor flags
* @dp: NFP Net data path struct
* @r_vec: per-ring structure
* @rxd: Pointer to RX descriptor
* @meta: Parsed metadata prepend
* @skb: Pointer to SKB
*/
static void
nfp_nfdk_rx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
struct nfp_net_rx_desc *rxd, struct nfp_meta_parsed *meta,
struct sk_buff *skb)
{
skb_checksum_none_assert(skb);
if (!(dp->netdev->features & NETIF_F_RXCSUM))
return;
if (meta->csum_type) {
skb->ip_summed = meta->csum_type;
skb->csum = meta->csum;
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->hw_csum_rx_complete++;
u64_stats_update_end(&r_vec->rx_sync);
return;
}
if (nfp_nfdk_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) {
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->hw_csum_rx_error++;
u64_stats_update_end(&r_vec->rx_sync);
return;
}
/* Assume that the firmware will never report inner CSUM_OK unless outer
* L4 headers were successfully parsed. FW will always report zero UDP
* checksum as CSUM_OK.
*/
if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK ||
rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) {
__skb_incr_checksum_unnecessary(skb);
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->hw_csum_rx_ok++;
u64_stats_update_end(&r_vec->rx_sync);
}
if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK ||
rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) {
__skb_incr_checksum_unnecessary(skb);
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->hw_csum_rx_inner_ok++;
u64_stats_update_end(&r_vec->rx_sync);
}
}
static void
nfp_nfdk_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta,
unsigned int type, __be32 *hash)
{
if (!(netdev->features & NETIF_F_RXHASH))
return;
switch (type) {
case NFP_NET_RSS_IPV4:
case NFP_NET_RSS_IPV6:
case NFP_NET_RSS_IPV6_EX:
meta->hash_type = PKT_HASH_TYPE_L3;
break;
default:
meta->hash_type = PKT_HASH_TYPE_L4;
break;
}
meta->hash = get_unaligned_be32(hash);
}
static bool
nfp_nfdk_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
void *data, void *pkt, unsigned int pkt_len, int meta_len)
{
u32 meta_info;
meta_info = get_unaligned_be32(data);
data += 4;
while (meta_info) {
switch (meta_info & NFP_NET_META_FIELD_MASK) {
case NFP_NET_META_HASH:
meta_info >>= NFP_NET_META_FIELD_SIZE;
nfp_nfdk_set_hash(netdev, meta,
meta_info & NFP_NET_META_FIELD_MASK,
(__be32 *)data);
data += 4;
break;
case NFP_NET_META_MARK:
meta->mark = get_unaligned_be32(data);
data += 4;
break;
case NFP_NET_META_PORTID:
meta->portid = get_unaligned_be32(data);
data += 4;
break;
case NFP_NET_META_CSUM:
meta->csum_type = CHECKSUM_COMPLETE;
meta->csum =
(__force __wsum)__get_unaligned_cpu32(data);
data += 4;
break;
case NFP_NET_META_RESYNC_INFO:
if (nfp_net_tls_rx_resync_req(netdev, data, pkt,
pkt_len))
return false;
data += sizeof(struct nfp_net_tls_resync_req);
break;
default:
return true;
}
meta_info >>= NFP_NET_META_FIELD_SIZE;
}
return data != pkt;
}
static void
nfp_nfdk_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf,
struct sk_buff *skb)
{
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_drops++;
/* If we have both skb and rxbuf the replacement buffer allocation
* must have failed, count this as an alloc failure.
*/
if (skb && rxbuf)
r_vec->rx_replace_buf_alloc_fail++;
u64_stats_update_end(&r_vec->rx_sync);
/* skb is build based on the frag, free_skb() would free the frag
* so to be able to reuse it we need an extra ref.
*/
if (skb && rxbuf && skb->head == rxbuf->frag)
page_ref_inc(virt_to_head_page(rxbuf->frag));
if (rxbuf)
nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr);
if (skb)
dev_kfree_skb_any(skb);
}
static bool nfp_nfdk_xdp_complete(struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
struct nfp_net_rx_ring *rx_ring;
u32 qcp_rd_p, done = 0;
bool done_all;
int todo;
/* Work out how many descriptors have been transmitted */
qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp);
if (qcp_rd_p == tx_ring->qcp_rd_p)
return true;
todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
done_all = todo <= NFP_NET_XDP_MAX_COMPLETE;
todo = min(todo, NFP_NET_XDP_MAX_COMPLETE);
rx_ring = r_vec->rx_ring;
while (todo > 0) {
int idx = D_IDX(tx_ring, tx_ring->rd_p + done);
struct nfp_nfdk_tx_buf *txbuf;
unsigned int step = 1;
txbuf = &tx_ring->ktxbufs[idx];
if (!txbuf->raw)
goto next;
if (NFDK_TX_BUF_INFO(txbuf->val) != NFDK_TX_BUF_INFO_SOP) {
WARN_ONCE(1, "Unexpected TX buffer in XDP TX ring\n");
goto next;
}
/* Two successive txbufs are used to stash virtual and dma
* address respectively, recycle and clean them here.
*/
nfp_nfdk_rx_give_one(dp, rx_ring,
(void *)NFDK_TX_BUF_PTR(txbuf[0].val),
txbuf[1].dma_addr);
txbuf[0].raw = 0;
txbuf[1].raw = 0;
step = 2;
u64_stats_update_begin(&r_vec->tx_sync);
/* Note: tx_bytes not accumulated. */
r_vec->tx_pkts++;
u64_stats_update_end(&r_vec->tx_sync);
next:
todo -= step;
done += step;
}
tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + done);
tx_ring->rd_p += done;
WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
"XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
return done_all;
}
static bool
nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
struct nfp_net_tx_ring *tx_ring,
struct nfp_net_rx_buf *rxbuf, unsigned int dma_off,
unsigned int pkt_len, bool *completed)
{
unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA;
unsigned int dma_len, type, cnt, dlen_type, tmp_dlen;
struct nfp_nfdk_tx_buf *txbuf;
struct nfp_nfdk_tx_desc *txd;
unsigned int n_descs;
dma_addr_t dma_addr;
int wr_idx;
/* Reject if xdp_adjust_tail grow packet beyond DMA area */
if (pkt_len + dma_off > dma_map_sz)
return false;
/* Make sure there's still at least one block available after
* aligning to block boundary, so that the txds used below
* won't wrap around the tx_ring.
*/
if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) {
if (!*completed) {
nfp_nfdk_xdp_complete(tx_ring);
*completed = true;
}
if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) {
nfp_nfdk_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf,
NULL);
return false;
}
}
/* Check if cross block boundary */
n_descs = nfp_nfdk_headlen_to_segs(pkt_len);
if ((round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) !=
round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) ||
((u32)tx_ring->data_pending + pkt_len >
NFDK_TX_MAX_DATA_PER_BLOCK)) {
unsigned int nop_slots = D_BLOCK_CPL(tx_ring->wr_p);
wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
txd = &tx_ring->ktxds[wr_idx];
memset(txd, 0,
array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc)));
tx_ring->data_pending = 0;
tx_ring->wr_p += nop_slots;
tx_ring->wr_ptr_add += nop_slots;
}
wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
txbuf = &tx_ring->ktxbufs[wr_idx];
txbuf[0].val = (unsigned long)rxbuf->frag | NFDK_TX_BUF_INFO_SOP;
txbuf[1].dma_addr = rxbuf->dma_addr;
/* Note: pkt len not stored */
dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off,
pkt_len, DMA_BIDIRECTIONAL);
/* Build TX descriptor */
txd = &tx_ring->ktxds[wr_idx];
dma_len = pkt_len;
dma_addr = rxbuf->dma_addr + dma_off;
if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD)
type = NFDK_DESC_TX_TYPE_SIMPLE;
else
type = NFDK_DESC_TX_TYPE_GATHER;
/* FIELD_PREP() implicitly truncates to chunk */
dma_len -= 1;
dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) |
FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
txd->dma_len_type = cpu_to_le16(dlen_type);
nfp_desc_set_dma_addr(txd, dma_addr);
tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD;
dma_len -= tmp_dlen;
dma_addr += tmp_dlen + 1;
txd++;
while (dma_len > 0) {
dma_len -= 1;
dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len);
txd->dma_len_type = cpu_to_le16(dlen_type);
nfp_desc_set_dma_addr(txd, dma_addr);
dlen_type &= NFDK_DESC_TX_DMA_LEN;
dma_len -= dlen_type;
dma_addr += dlen_type + 1;
txd++;
}
(txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP);
/* Metadata desc */
txd->raw = 0;
txd++;
cnt = txd - tx_ring->ktxds - wr_idx;
tx_ring->wr_p += cnt;
if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT)
tx_ring->data_pending += pkt_len;
else
tx_ring->data_pending = 0;
tx_ring->wr_ptr_add += cnt;
return true;
}
/**
* nfp_nfdk_rx() - receive up to @budget packets on @rx_ring
* @rx_ring: RX ring to receive from
* @budget: NAPI budget
*
* Note, this function is separated out from the napi poll function to
* more cleanly separate packet receive code from other bookkeeping
* functions performed in the napi poll function.
*
* Return: Number of packets received.
*/
static int nfp_nfdk_rx(struct nfp_net_rx_ring *rx_ring, int budget)
{
struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
struct nfp_net_tx_ring *tx_ring;
struct bpf_prog *xdp_prog;
bool xdp_tx_cmpl = false;
unsigned int true_bufsz;
struct sk_buff *skb;
int pkts_polled = 0;
struct xdp_buff xdp;
int idx;
xdp_prog = READ_ONCE(dp->xdp_prog);
true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM,
&rx_ring->xdp_rxq);
tx_ring = r_vec->xdp_ring;
while (pkts_polled < budget) {
unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off;
struct nfp_net_rx_buf *rxbuf;
struct nfp_net_rx_desc *rxd;
struct nfp_meta_parsed meta;
bool redir_egress = false;
struct net_device *netdev;
dma_addr_t new_dma_addr;
u32 meta_len_xdp = 0;
void *new_frag;
idx = D_IDX(rx_ring, rx_ring->rd_p);
rxd = &rx_ring->rxds[idx];
if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
break;
/* Memory barrier to ensure that we won't do other reads
* before the DD bit.
*/
dma_rmb();
memset(&meta, 0, sizeof(meta));
rx_ring->rd_p++;
pkts_polled++;
rxbuf = &rx_ring->rxbufs[idx];
/* < meta_len >
* <-- [rx_offset] -->
* ---------------------------------------------------------
* | [XX] | metadata | packet | XXXX |
* ---------------------------------------------------------
* <---------------- data_len --------------->
*
* The rx_offset is fixed for all packets, the meta_len can vary
* on a packet by packet basis. If rx_offset is set to zero
* (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the
* buffer and is immediately followed by the packet (no [XX]).
*/
meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
data_len = le16_to_cpu(rxd->rxd.data_len);
pkt_len = data_len - meta_len;
pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off;
if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
pkt_off += meta_len;
else
pkt_off += dp->rx_offset;
meta_off = pkt_off - meta_len;
/* Stats update */
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_pkts++;
r_vec->rx_bytes += pkt_len;
u64_stats_update_end(&r_vec->rx_sync);
if (unlikely(meta_len > NFP_NET_MAX_PREPEND ||
(dp->rx_offset && meta_len > dp->rx_offset))) {
nn_dp_warn(dp, "oversized RX packet metadata %u\n",
meta_len);
nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
continue;
}
nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off,
data_len);
if (meta_len) {
if (unlikely(nfp_nfdk_parse_meta(dp->netdev, &meta,
rxbuf->frag + meta_off,
rxbuf->frag + pkt_off,
pkt_len, meta_len))) {
nn_dp_warn(dp, "invalid RX packet metadata\n");
nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf,
NULL);
continue;
}
}
if (xdp_prog && !meta.portid) {
void *orig_data = rxbuf->frag + pkt_off;
unsigned int dma_off;
int act;
xdp_prepare_buff(&xdp,
rxbuf->frag + NFP_NET_RX_BUF_HEADROOM,
pkt_off - NFP_NET_RX_BUF_HEADROOM,
pkt_len, true);
act = bpf_prog_run_xdp(xdp_prog, &xdp);
pkt_len = xdp.data_end - xdp.data;
pkt_off += xdp.data - orig_data;
switch (act) {
case XDP_PASS:
meta_len_xdp = xdp.data - xdp.data_meta;
break;
case XDP_TX:
dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM;
if (unlikely(!nfp_nfdk_tx_xdp_buf(dp, rx_ring,
tx_ring,
rxbuf,
dma_off,
pkt_len,
&xdp_tx_cmpl)))
trace_xdp_exception(dp->netdev,
xdp_prog, act);
continue;
default:
bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act);
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(dp->netdev, xdp_prog, act);
fallthrough;
case XDP_DROP:
nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag,
rxbuf->dma_addr);
continue;
}
}
if (likely(!meta.portid)) {
netdev = dp->netdev;
} else if (meta.portid == NFP_META_PORT_ID_CTRL) {
struct nfp_net *nn = netdev_priv(dp->netdev);
nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off,
pkt_len);
nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag,
rxbuf->dma_addr);
continue;
} else {
struct nfp_net *nn;
nn = netdev_priv(dp->netdev);
netdev = nfp_app_dev_get(nn->app, meta.portid,
&redir_egress);
if (unlikely(!netdev)) {
nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf,
NULL);
continue;
}
if (nfp_netdev_is_nfp_repr(netdev))
nfp_repr_inc_rx_stats(netdev, pkt_len);
}
skb = build_skb(rxbuf->frag, true_bufsz);
if (unlikely(!skb)) {
nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
continue;
}
new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr);
if (unlikely(!new_frag)) {
nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
continue;
}
nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
skb_reserve(skb, pkt_off);
skb_put(skb, pkt_len);
skb->mark = meta.mark;
skb_set_hash(skb, meta.hash, meta.hash_type);
skb_record_rx_queue(skb, rx_ring->idx);
skb->protocol = eth_type_trans(skb, netdev);
nfp_nfdk_rx_csum(dp, r_vec, rxd, &meta, skb);
if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
le16_to_cpu(rxd->rxd.vlan));
if (meta_len_xdp)
skb_metadata_set(skb, meta_len_xdp);
if (likely(!redir_egress)) {
napi_gro_receive(&rx_ring->r_vec->napi, skb);
} else {
skb->dev = netdev;
skb_reset_network_header(skb);
__skb_push(skb, ETH_HLEN);
dev_queue_xmit(skb);
}
}
if (xdp_prog) {
if (tx_ring->wr_ptr_add)
nfp_net_tx_xmit_more_flush(tx_ring);
else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) &&
!xdp_tx_cmpl)
if (!nfp_nfdk_xdp_complete(tx_ring))
pkts_polled = budget;
}
return pkts_polled;
}
/**
* nfp_nfdk_poll() - napi poll function
* @napi: NAPI structure
* @budget: NAPI budget
*
* Return: number of packets polled.
*/
int nfp_nfdk_poll(struct napi_struct *napi, int budget)
{
struct nfp_net_r_vector *r_vec =
container_of(napi, struct nfp_net_r_vector, napi);
unsigned int pkts_polled = 0;
if (r_vec->tx_ring)
nfp_nfdk_tx_complete(r_vec->tx_ring, budget);
if (r_vec->rx_ring)
pkts_polled = nfp_nfdk_rx(r_vec->rx_ring, budget);
if (pkts_polled < budget)
if (napi_complete_done(napi, pkts_polled))
nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) {
struct dim_sample dim_sample = {};
unsigned int start;
u64 pkts, bytes;
do {
start = u64_stats_fetch_begin(&r_vec->rx_sync);
pkts = r_vec->rx_pkts;
bytes = r_vec->rx_bytes;
} while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
net_dim(&r_vec->rx_dim, dim_sample);
}
if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) {
struct dim_sample dim_sample = {};
unsigned int start;
u64 pkts, bytes;
do {
start = u64_stats_fetch_begin(&r_vec->tx_sync);
pkts = r_vec->tx_pkts;
bytes = r_vec->tx_bytes;
} while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
net_dim(&r_vec->tx_dim, dim_sample);
}
return pkts_polled;
}
/* Control device data path
*/
bool
nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
struct sk_buff *skb, bool old)
{
u32 cnt, tmp_dlen, dlen_type = 0;
struct nfp_net_tx_ring *tx_ring;
struct nfp_nfdk_tx_buf *txbuf;
struct nfp_nfdk_tx_desc *txd;
unsigned int dma_len, type;
struct nfp_net_dp *dp;
dma_addr_t dma_addr;
u64 metadata = 0;
int wr_idx;
dp = &r_vec->nfp_net->dp;
tx_ring = r_vec->tx_ring;
if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) {
nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n");
goto err_free;
}
/* Don't bother counting frags, assume the worst */
if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) {
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_busy++;
u64_stats_update_end(&r_vec->tx_sync);
if (!old)
__skb_queue_tail(&r_vec->queue, skb);
else
__skb_queue_head(&r_vec->queue, skb);
return NETDEV_TX_BUSY;
}
if (nfp_app_ctrl_has_meta(nn->app)) {
if (unlikely(skb_headroom(skb) < 8)) {
nn_dp_warn(dp, "CTRL TX on skb without headroom\n");
goto err_free;
}
metadata = NFDK_DESC_TX_CHAIN_META;
put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4));
put_unaligned_be32(FIELD_PREP(NFDK_META_LEN, 8) |
FIELD_PREP(NFDK_META_FIELDS,
NFP_NET_META_PORTID),
skb_push(skb, 4));
}
if (nfp_nfdk_tx_maybe_close_block(tx_ring, 0, skb))
goto err_free;
/* DMA map all */
wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
txd = &tx_ring->ktxds[wr_idx];
txbuf = &tx_ring->ktxbufs[wr_idx];
dma_len = skb_headlen(skb);
if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD)
type = NFDK_DESC_TX_TYPE_SIMPLE;
else
type = NFDK_DESC_TX_TYPE_GATHER;
dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE);
if (dma_mapping_error(dp->dev, dma_addr))
goto err_warn_dma;
txbuf->skb = skb;
txbuf++;
txbuf->dma_addr = dma_addr;
txbuf++;
dma_len -= 1;
dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) |
FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
txd->dma_len_type = cpu_to_le16(dlen_type);
nfp_desc_set_dma_addr(txd, dma_addr);
tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD;
dma_len -= tmp_dlen;
dma_addr += tmp_dlen + 1;
txd++;
while (dma_len > 0) {
dma_len -= 1;
dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len);
txd->dma_len_type = cpu_to_le16(dlen_type);
nfp_desc_set_dma_addr(txd, dma_addr);
dlen_type &= NFDK_DESC_TX_DMA_LEN;
dma_len -= dlen_type;
dma_addr += dlen_type + 1;
txd++;
}
(txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP);
/* Metadata desc */
txd->raw = cpu_to_le64(metadata);
txd++;
cnt = txd - tx_ring->ktxds - wr_idx;
if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) !=
round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT)))
goto err_warn_overflow;
tx_ring->wr_p += cnt;
if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT)
tx_ring->data_pending += skb->len;
else
tx_ring->data_pending = 0;
tx_ring->wr_ptr_add += cnt;
nfp_net_tx_xmit_more_flush(tx_ring);
return NETDEV_TX_OK;
err_warn_overflow:
WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d",
wr_idx, skb_headlen(skb), 0, cnt);
txbuf--;
dma_unmap_single(dp->dev, txbuf->dma_addr,
skb_headlen(skb), DMA_TO_DEVICE);
txbuf->raw = 0;
err_warn_dma:
nn_dp_warn(dp, "Failed to map DMA TX buffer\n");
err_free:
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_errors++;
u64_stats_update_end(&r_vec->tx_sync);
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec)
{
struct sk_buff *skb;
while ((skb = __skb_dequeue(&r_vec->queue)))
if (nfp_nfdk_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true))
return;
}
static bool
nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len)
{
u32 meta_type, meta_tag;
if (!nfp_app_ctrl_has_meta(nn->app))
return !meta_len;
if (meta_len != 8)
return false;
meta_type = get_unaligned_be32(data);
meta_tag = get_unaligned_be32(data + 4);
return (meta_type == NFP_NET_META_PORTID &&
meta_tag == NFP_META_PORT_ID_CTRL);
}
static bool
nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp,
struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring)
{
unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off;
struct nfp_net_rx_buf *rxbuf;
struct nfp_net_rx_desc *rxd;
dma_addr_t new_dma_addr;
struct sk_buff *skb;
void *new_frag;
int idx;
idx = D_IDX(rx_ring, rx_ring->rd_p);
rxd = &rx_ring->rxds[idx];
if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
return false;
/* Memory barrier to ensure that we won't do other reads
* before the DD bit.
*/
dma_rmb();
rx_ring->rd_p++;
rxbuf = &rx_ring->rxbufs[idx];
meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
data_len = le16_to_cpu(rxd->rxd.data_len);
pkt_len = data_len - meta_len;
pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off;
if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
pkt_off += meta_len;
else
pkt_off += dp->rx_offset;
meta_off = pkt_off - meta_len;
/* Stats update */
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_pkts++;
r_vec->rx_bytes += pkt_len;
u64_stats_update_end(&r_vec->rx_sync);
nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len);
if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) {
nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n",
meta_len);
nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
return true;
}
skb = build_skb(rxbuf->frag, dp->fl_bufsz);
if (unlikely(!skb)) {
nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
return true;
}
new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr);
if (unlikely(!new_frag)) {
nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
return true;
}
nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
skb_reserve(skb, pkt_off);
skb_put(skb, pkt_len);
nfp_app_ctrl_rx(nn->app, skb);
return true;
}
static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec)
{
struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring;
struct nfp_net *nn = r_vec->nfp_net;
struct nfp_net_dp *dp = &nn->dp;
unsigned int budget = 512;
while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--)
continue;
return budget;
}
void nfp_nfdk_ctrl_poll(struct tasklet_struct *t)
{
struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet);
spin_lock(&r_vec->lock);
nfp_nfdk_tx_complete(r_vec->tx_ring, 0);
__nfp_ctrl_tx_queued(r_vec);
spin_unlock(&r_vec->lock);
if (nfp_ctrl_rx(r_vec)) {
nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
} else {
tasklet_schedule(&r_vec->tasklet);
nn_dp_warn(&r_vec->nfp_net->dp,
"control message budget exceeded!\n");
}
}
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
/* Copyright (C) 2019 Netronome Systems, Inc. */
#ifndef _NFP_DP_NFDK_H_
#define _NFP_DP_NFDK_H_
#include <linux/bitops.h>
#include <linux/types.h>
#define NFDK_TX_DESC_PER_SIMPLE_PKT 2
#define NFDK_TX_MAX_DATA_PER_HEAD SZ_4K
#define NFDK_TX_MAX_DATA_PER_DESC SZ_16K
#define NFDK_TX_DESC_BLOCK_SZ 256
#define NFDK_TX_DESC_BLOCK_CNT (NFDK_TX_DESC_BLOCK_SZ / \
sizeof(struct nfp_nfdk_tx_desc))
#define NFDK_TX_DESC_STOP_CNT (NFDK_TX_DESC_BLOCK_CNT * \
NFDK_TX_DESC_PER_SIMPLE_PKT)
#define NFDK_TX_MAX_DATA_PER_BLOCK SZ_64K
#define NFDK_TX_DESC_GATHER_MAX 17
/* TX descriptor format */
#define NFDK_DESC_TX_MSS_MASK GENMASK(13, 0)
#define NFDK_DESC_TX_CHAIN_META BIT(3)
#define NFDK_DESC_TX_ENCAP BIT(2)
#define NFDK_DESC_TX_L4_CSUM BIT(1)
#define NFDK_DESC_TX_L3_CSUM BIT(0)
#define NFDK_DESC_TX_DMA_LEN_HEAD GENMASK(11, 0)
#define NFDK_DESC_TX_TYPE_HEAD GENMASK(15, 12)
#define NFDK_DESC_TX_DMA_LEN GENMASK(13, 0)
#define NFDK_DESC_TX_TYPE_NOP 0
#define NFDK_DESC_TX_TYPE_GATHER 1
#define NFDK_DESC_TX_TYPE_TSO 2
#define NFDK_DESC_TX_TYPE_SIMPLE 8
#define NFDK_DESC_TX_EOP BIT(14)
#define NFDK_META_LEN GENMASK(7, 0)
#define NFDK_META_FIELDS GENMASK(31, 8)
#define D_BLOCK_CPL(idx) (NFDK_TX_DESC_BLOCK_CNT - \
(idx) % NFDK_TX_DESC_BLOCK_CNT)
struct nfp_nfdk_tx_desc {
union {
struct {
u8 dma_addr_hi; /* High bits of host buf address */
u8 padding; /* Must be zero */
__le16 dma_len_type; /* Length to DMA for this desc */
__le32 dma_addr_lo; /* Low 32bit of host buf addr */
};
struct {
__le16 mss; /* MSS to be used for LSO */
u8 lso_hdrlen; /* LSO, TCP payload offset */
u8 lso_totsegs; /* LSO, total segments */
u8 l3_offset; /* L3 header offset */
u8 l4_offset; /* L4 header offset */
__le16 lso_meta_res; /* Rsvd bits in TSO metadata */
};
struct {
u8 flags; /* TX Flags, see @NFDK_DESC_TX_* */
u8 reserved[7]; /* meta byte placeholder */
};
__le32 vals[2];
__le64 raw;
};
};
/* The device don't make use of the 2 or 3 least significant bits of the address
* due to alignment constraints. The driver can make use of those bits to carry
* information about the buffer before giving it to the device.
*
* NOTE: The driver must clear the lower bits before handing the buffer to the
* device.
*
* - NFDK_TX_BUF_INFO_SOP - Start of a packet
* Mark the buffer as a start of a packet. This is used in the XDP TX process
* to stash virtual and DMA address so that they can be recycled when the TX
* operation is completed.
*/
#define NFDK_TX_BUF_PTR(val) ((val) & ~(sizeof(void *) - 1))
#define NFDK_TX_BUF_INFO(val) ((val) & (sizeof(void *) - 1))
#define NFDK_TX_BUF_INFO_SOP BIT(0)
struct nfp_nfdk_tx_buf {
union {
/* First slot */
union {
struct sk_buff *skb;
void *frag;
unsigned long val;
};
/* 1 + nr_frags next slots */
dma_addr_t dma_addr;
/* TSO (optional) */
struct {
u32 pkt_cnt;
u32 real_len;
};
u64 raw;
};
};
static inline int nfp_nfdk_headlen_to_segs(unsigned int headlen)
{
/* First descriptor fits less data, so adjust for that */
return DIV_ROUND_UP(headlen +
NFDK_TX_MAX_DATA_PER_DESC -
NFDK_TX_MAX_DATA_PER_HEAD,
NFDK_TX_MAX_DATA_PER_DESC);
}
int nfp_nfdk_poll(struct napi_struct *napi, int budget);
netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev);
bool
nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
struct sk_buff *skb, bool old);
void nfp_nfdk_ctrl_poll(struct tasklet_struct *t);
void nfp_nfdk_rx_ring_fill_freelist(struct nfp_net_dp *dp,
struct nfp_net_rx_ring *rx_ring);
#endif
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2019 Netronome Systems, Inc. */
#include <linux/seq_file.h>
#include "../nfp_net.h"
#include "../nfp_net_dp.h"
#include "nfdk.h"
static void
nfp_nfdk_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
{
struct device *dev = dp->dev;
struct netdev_queue *nd_q;
while (!tx_ring->is_xdp && tx_ring->rd_p != tx_ring->wr_p) {
const skb_frag_t *frag, *fend;
unsigned int size, n_descs = 1;
struct nfp_nfdk_tx_buf *txbuf;
int nr_frags, rd_idx;
struct sk_buff *skb;
rd_idx = D_IDX(tx_ring, tx_ring->rd_p);
txbuf = &tx_ring->ktxbufs[rd_idx];
skb = txbuf->skb;
if (!skb) {
n_descs = D_BLOCK_CPL(tx_ring->rd_p);
goto next;
}
nr_frags = skb_shinfo(skb)->nr_frags;
txbuf++;
/* Unmap head */
size = skb_headlen(skb);
dma_unmap_single(dev, txbuf->dma_addr, size, DMA_TO_DEVICE);
n_descs += nfp_nfdk_headlen_to_segs(size);
txbuf++;
frag = skb_shinfo(skb)->frags;
fend = frag + nr_frags;
for (; frag < fend; frag++) {
size = skb_frag_size(frag);
dma_unmap_page(dev, txbuf->dma_addr,
skb_frag_size(frag), DMA_TO_DEVICE);
n_descs += DIV_ROUND_UP(size,
NFDK_TX_MAX_DATA_PER_DESC);
txbuf++;
}
if (skb_is_gso(skb))
n_descs++;
dev_kfree_skb_any(skb);
next:
tx_ring->rd_p += n_descs;
}
memset(tx_ring->txds, 0, tx_ring->size);
tx_ring->data_pending = 0;
tx_ring->wr_p = 0;
tx_ring->rd_p = 0;
tx_ring->qcp_rd_p = 0;
tx_ring->wr_ptr_add = 0;
if (tx_ring->is_xdp || !dp->netdev)
return;
nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
netdev_tx_reset_queue(nd_q);
}
static void nfp_nfdk_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
kvfree(tx_ring->ktxbufs);
if (tx_ring->ktxds)
dma_free_coherent(dp->dev, tx_ring->size,
tx_ring->ktxds, tx_ring->dma);
tx_ring->cnt = 0;
tx_ring->txbufs = NULL;
tx_ring->txds = NULL;
tx_ring->dma = 0;
tx_ring->size = 0;
}
static int
nfp_nfdk_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
tx_ring->cnt = dp->txd_cnt * NFDK_TX_DESC_PER_SIMPLE_PKT;
tx_ring->size = array_size(tx_ring->cnt, sizeof(*tx_ring->ktxds));
tx_ring->ktxds = dma_alloc_coherent(dp->dev, tx_ring->size,
&tx_ring->dma,
GFP_KERNEL | __GFP_NOWARN);
if (!tx_ring->ktxds) {
netdev_warn(dp->netdev, "failed to allocate TX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n",
tx_ring->cnt);
goto err_alloc;
}
tx_ring->ktxbufs = kvcalloc(tx_ring->cnt, sizeof(*tx_ring->ktxbufs),
GFP_KERNEL);
if (!tx_ring->ktxbufs)
goto err_alloc;
if (!tx_ring->is_xdp && dp->netdev)
netif_set_xps_queue(dp->netdev, &r_vec->affinity_mask,
tx_ring->idx);
return 0;
err_alloc:
nfp_nfdk_tx_ring_free(tx_ring);
return -ENOMEM;
}
static void
nfp_nfdk_tx_ring_bufs_free(struct nfp_net_dp *dp,
struct nfp_net_tx_ring *tx_ring)
{
}
static int
nfp_nfdk_tx_ring_bufs_alloc(struct nfp_net_dp *dp,
struct nfp_net_tx_ring *tx_ring)
{
return 0;
}
static void
nfp_nfdk_print_tx_descs(struct seq_file *file,
struct nfp_net_r_vector *r_vec,
struct nfp_net_tx_ring *tx_ring,
u32 d_rd_p, u32 d_wr_p)
{
struct nfp_nfdk_tx_desc *txd;
u32 txd_cnt = tx_ring->cnt;
int i;
for (i = 0; i < txd_cnt; i++) {
txd = &tx_ring->ktxds[i];
seq_printf(file, "%04d: 0x%08x 0x%08x 0x%016llx", i,
txd->vals[0], txd->vals[1], tx_ring->ktxbufs[i].raw);
if (i == tx_ring->rd_p % txd_cnt)
seq_puts(file, " H_RD");
if (i == tx_ring->wr_p % txd_cnt)
seq_puts(file, " H_WR");
if (i == d_rd_p % txd_cnt)
seq_puts(file, " D_RD");
if (i == d_wr_p % txd_cnt)
seq_puts(file, " D_WR");
seq_putc(file, '\n');
}
}
#define NFP_NFDK_CFG_CTRL_SUPPORTED \
(NFP_NET_CFG_CTRL_ENABLE | NFP_NET_CFG_CTRL_PROMISC | \
NFP_NET_CFG_CTRL_L2BC | NFP_NET_CFG_CTRL_L2MC | \
NFP_NET_CFG_CTRL_RXCSUM | NFP_NET_CFG_CTRL_TXCSUM | \
NFP_NET_CFG_CTRL_RXVLAN | \
NFP_NET_CFG_CTRL_GATHER | NFP_NET_CFG_CTRL_LSO | \
NFP_NET_CFG_CTRL_CTAG_FILTER | NFP_NET_CFG_CTRL_CMSG_DATA | \
NFP_NET_CFG_CTRL_RINGCFG | NFP_NET_CFG_CTRL_IRQMOD | \
NFP_NET_CFG_CTRL_TXRWB | \
NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE | \
NFP_NET_CFG_CTRL_BPF | NFP_NET_CFG_CTRL_LSO2 | \
NFP_NET_CFG_CTRL_RSS2 | NFP_NET_CFG_CTRL_CSUM_COMPLETE | \
NFP_NET_CFG_CTRL_LIVE_ADDR)
const struct nfp_dp_ops nfp_nfdk_ops = {
.version = NFP_NFD_VER_NFDK,
.tx_min_desc_per_pkt = NFDK_TX_DESC_PER_SIMPLE_PKT,
.cap_mask = NFP_NFDK_CFG_CTRL_SUPPORTED,
.poll = nfp_nfdk_poll,
.ctrl_poll = nfp_nfdk_ctrl_poll,
.xmit = nfp_nfdk_tx,
.ctrl_tx_one = nfp_nfdk_ctrl_tx_one,
.rx_ring_fill_freelist = nfp_nfdk_rx_ring_fill_freelist,
.tx_ring_alloc = nfp_nfdk_tx_ring_alloc,
.tx_ring_reset = nfp_nfdk_tx_ring_reset,
.tx_ring_free = nfp_nfdk_tx_ring_free,
.tx_ring_bufs_alloc = nfp_nfdk_tx_ring_bufs_alloc,
.tx_ring_bufs_free = nfp_nfdk_tx_ring_bufs_free,
.print_tx_descs = nfp_nfdk_print_tx_descs
};
...@@ -98,12 +98,19 @@ ...@@ -98,12 +98,19 @@
/* Forward declarations */ /* Forward declarations */
struct nfp_cpp; struct nfp_cpp;
struct nfp_dev_info; struct nfp_dev_info;
struct nfp_dp_ops;
struct nfp_eth_table_port; struct nfp_eth_table_port;
struct nfp_net; struct nfp_net;
struct nfp_net_r_vector; struct nfp_net_r_vector;
struct nfp_port; struct nfp_port;
struct xsk_buff_pool; struct xsk_buff_pool;
struct nfp_nfd3_tx_desc;
struct nfp_nfd3_tx_buf;
struct nfp_nfdk_tx_desc;
struct nfp_nfdk_tx_buf;
/* Convenience macro for wrapping descriptor index on ring size */ /* Convenience macro for wrapping descriptor index on ring size */
#define D_IDX(ring, idx) ((idx) & ((ring)->cnt - 1)) #define D_IDX(ring, idx) ((idx) & ((ring)->cnt - 1))
...@@ -117,97 +124,25 @@ struct xsk_buff_pool; ...@@ -117,97 +124,25 @@ struct xsk_buff_pool;
__d->dma_addr_hi = upper_32_bits(__addr) & 0xff; \ __d->dma_addr_hi = upper_32_bits(__addr) & 0xff; \
} while (0) } while (0)
/* TX descriptor format */
#define PCIE_DESC_TX_EOP BIT(7)
#define PCIE_DESC_TX_OFFSET_MASK GENMASK(6, 0)
#define PCIE_DESC_TX_MSS_MASK GENMASK(13, 0)
/* Flags in the host TX descriptor */
#define PCIE_DESC_TX_CSUM BIT(7)
#define PCIE_DESC_TX_IP4_CSUM BIT(6)
#define PCIE_DESC_TX_TCP_CSUM BIT(5)
#define PCIE_DESC_TX_UDP_CSUM BIT(4)
#define PCIE_DESC_TX_VLAN BIT(3)
#define PCIE_DESC_TX_LSO BIT(2)
#define PCIE_DESC_TX_ENCAP BIT(1)
#define PCIE_DESC_TX_O_IP4_CSUM BIT(0)
struct nfp_net_tx_desc {
union {
struct {
u8 dma_addr_hi; /* High bits of host buf address */
__le16 dma_len; /* Length to DMA for this desc */
u8 offset_eop; /* Offset in buf where pkt starts +
* highest bit is eop flag.
*/
__le32 dma_addr_lo; /* Low 32bit of host buf addr */
__le16 mss; /* MSS to be used for LSO */
u8 lso_hdrlen; /* LSO, TCP payload offset */
u8 flags; /* TX Flags, see @PCIE_DESC_TX_* */
union {
struct {
u8 l3_offset; /* L3 header offset */
u8 l4_offset; /* L4 header offset */
};
__le16 vlan; /* VLAN tag to add if indicated */
};
__le16 data_len; /* Length of frame + meta data */
} __packed;
__le32 vals[4];
__le64 vals8[2];
};
};
/**
* struct nfp_net_tx_buf - software TX buffer descriptor
* @skb: normal ring, sk_buff associated with this buffer
* @frag: XDP ring, page frag associated with this buffer
* @xdp: XSK buffer pool handle (for AF_XDP)
* @dma_addr: DMA mapping address of the buffer
* @fidx: Fragment index (-1 for the head and [0..nr_frags-1] for frags)
* @pkt_cnt: Number of packets to be produced out of the skb associated
* with this buffer (valid only on the head's buffer).
* Will be 1 for all non-TSO packets.
* @is_xsk_tx: Flag if buffer is a RX buffer after a XDP_TX action and not a
* buffer from the TX queue (for AF_XDP).
* @real_len: Number of bytes which to be produced out of the skb (valid only
* on the head's buffer). Equal to skb->len for non-TSO packets.
*/
struct nfp_net_tx_buf {
union {
struct sk_buff *skb;
void *frag;
struct xdp_buff *xdp;
};
dma_addr_t dma_addr;
union {
struct {
short int fidx;
u16 pkt_cnt;
};
struct {
bool is_xsk_tx;
};
};
u32 real_len;
};
/** /**
* struct nfp_net_tx_ring - TX ring structure * struct nfp_net_tx_ring - TX ring structure
* @r_vec: Back pointer to ring vector structure * @r_vec: Back pointer to ring vector structure
* @idx: Ring index from Linux's perspective * @idx: Ring index from Linux's perspective
* @qcidx: Queue Controller Peripheral (QCP) queue index for the TX queue * @data_pending: number of bytes added to current block (NFDK only)
* @qcp_q: Pointer to base of the QCP TX queue * @qcp_q: Pointer to base of the QCP TX queue
* @txrwb: TX pointer write back area
* @cnt: Size of the queue in number of descriptors * @cnt: Size of the queue in number of descriptors
* @wr_p: TX ring write pointer (free running) * @wr_p: TX ring write pointer (free running)
* @rd_p: TX ring read pointer (free running) * @rd_p: TX ring read pointer (free running)
* @qcp_rd_p: Local copy of QCP TX queue read pointer * @qcp_rd_p: Local copy of QCP TX queue read pointer
* @wr_ptr_add: Accumulated number of buffers to add to QCP write pointer * @wr_ptr_add: Accumulated number of buffers to add to QCP write pointer
* (used for .xmit_more delayed kick) * (used for .xmit_more delayed kick)
* @txbufs: Array of transmitted TX buffers, to free on transmit * @txbufs: Array of transmitted TX buffers, to free on transmit (NFD3)
* @txds: Virtual address of TX ring in host memory * @ktxbufs: Array of transmitted TX buffers, to free on transmit (NFDK)
* @txds: Virtual address of TX ring in host memory (NFD3)
* @ktxds: Virtual address of TX ring in host memory (NFDK)
*
* @qcidx: Queue Controller Peripheral (QCP) queue index for the TX queue
* @dma: DMA address of the TX ring * @dma: DMA address of the TX ring
* @size: Size, in bytes, of the TX ring (needed to free) * @size: Size, in bytes, of the TX ring (needed to free)
* @is_xdp: Is this a XDP TX ring? * @is_xdp: Is this a XDP TX ring?
...@@ -215,9 +150,10 @@ struct nfp_net_tx_buf { ...@@ -215,9 +150,10 @@ struct nfp_net_tx_buf {
struct nfp_net_tx_ring { struct nfp_net_tx_ring {
struct nfp_net_r_vector *r_vec; struct nfp_net_r_vector *r_vec;
u32 idx; u16 idx;
int qcidx; u16 data_pending;
u8 __iomem *qcp_q; u8 __iomem *qcp_q;
u64 *txrwb;
u32 cnt; u32 cnt;
u32 wr_p; u32 wr_p;
...@@ -226,8 +162,17 @@ struct nfp_net_tx_ring { ...@@ -226,8 +162,17 @@ struct nfp_net_tx_ring {
u32 wr_ptr_add; u32 wr_ptr_add;
struct nfp_net_tx_buf *txbufs; union {
struct nfp_net_tx_desc *txds; struct nfp_nfd3_tx_buf *txbufs;
struct nfp_nfdk_tx_buf *ktxbufs;
};
union {
struct nfp_nfd3_tx_desc *txds;
struct nfp_nfdk_tx_desc *ktxds;
};
/* Cold data follows */
int qcidx;
dma_addr_t dma; dma_addr_t dma;
size_t size; size_t size;
...@@ -479,13 +424,17 @@ struct nfp_net_fw_version { ...@@ -479,13 +424,17 @@ struct nfp_net_fw_version {
u8 minor; u8 minor;
u8 major; u8 major;
u8 class; u8 class;
u8 resv;
/* This byte can be exploited for more use, currently,
* BIT0: dp type, BIT[7:1]: reserved
*/
u8 extend;
} __packed; } __packed;
static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver,
u8 resv, u8 class, u8 major, u8 minor) u8 extend, u8 class, u8 major, u8 minor)
{ {
return fw_ver->resv == resv && return fw_ver->extend == extend &&
fw_ver->class == class && fw_ver->class == class &&
fw_ver->major == major && fw_ver->major == major &&
fw_ver->minor == minor; fw_ver->minor == minor;
...@@ -513,8 +462,11 @@ struct nfp_stat_pair { ...@@ -513,8 +462,11 @@ struct nfp_stat_pair {
* @rx_rings: Array of pre-allocated RX ring structures * @rx_rings: Array of pre-allocated RX ring structures
* @ctrl_bar: Pointer to mapped control BAR * @ctrl_bar: Pointer to mapped control BAR
* *
* @txd_cnt: Size of the TX ring in number of descriptors * @ops: Callbacks and parameters for this vNIC's NFD version
* @rxd_cnt: Size of the RX ring in number of descriptors * @txrwb: TX pointer write back area (indexed by queue id)
* @txrwb_dma: TX pointer write back area DMA address
* @txd_cnt: Size of the TX ring in number of min size packets
* @rxd_cnt: Size of the RX ring in number of min size packets
* @num_r_vecs: Number of used ring vectors * @num_r_vecs: Number of used ring vectors
* @num_tx_rings: Currently configured number of TX rings * @num_tx_rings: Currently configured number of TX rings
* @num_stack_tx_rings: Number of TX rings used by the stack (not XDP) * @num_stack_tx_rings: Number of TX rings used by the stack (not XDP)
...@@ -547,6 +499,11 @@ struct nfp_net_dp { ...@@ -547,6 +499,11 @@ struct nfp_net_dp {
/* Cold data follows */ /* Cold data follows */
const struct nfp_dp_ops *ops;
u64 *txrwb;
dma_addr_t txrwb_dma;
unsigned int txd_cnt; unsigned int txd_cnt;
unsigned int rxd_cnt; unsigned int rxd_cnt;
...@@ -915,11 +872,13 @@ static inline void nn_ctrl_bar_unlock(struct nfp_net *nn) ...@@ -915,11 +872,13 @@ static inline void nn_ctrl_bar_unlock(struct nfp_net *nn)
/* Globals */ /* Globals */
extern const char nfp_driver_version[]; extern const char nfp_driver_version[];
extern const struct net_device_ops nfp_net_netdev_ops; extern const struct net_device_ops nfp_nfd3_netdev_ops;
extern const struct net_device_ops nfp_nfdk_netdev_ops;
static inline bool nfp_netdev_is_nfp_net(struct net_device *netdev) static inline bool nfp_netdev_is_nfp_net(struct net_device *netdev)
{ {
return netdev->netdev_ops == &nfp_net_netdev_ops; return netdev->netdev_ops == &nfp_nfd3_netdev_ops ||
netdev->netdev_ops == &nfp_nfdk_netdev_ops;
} }
static inline int nfp_net_coalesce_para_check(u32 usecs, u32 pkts) static inline int nfp_net_coalesce_para_check(u32 usecs, u32 pkts)
...@@ -960,7 +919,6 @@ int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd); ...@@ -960,7 +919,6 @@ int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd);
void nfp_net_mbox_reconfig_post(struct nfp_net *nn, u32 update); void nfp_net_mbox_reconfig_post(struct nfp_net *nn, u32 update);
int nfp_net_mbox_reconfig_wait_posted(struct nfp_net *nn); int nfp_net_mbox_reconfig_wait_posted(struct nfp_net *nn);
void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr);
unsigned int unsigned int
nfp_net_irqs_alloc(struct pci_dev *pdev, struct msix_entry *irq_entries, nfp_net_irqs_alloc(struct pci_dev *pdev, struct msix_entry *irq_entries,
unsigned int min_irqs, unsigned int want_irqs); unsigned int min_irqs, unsigned int want_irqs);
...@@ -968,19 +926,10 @@ void nfp_net_irqs_disable(struct pci_dev *pdev); ...@@ -968,19 +926,10 @@ void nfp_net_irqs_disable(struct pci_dev *pdev);
void void
nfp_net_irqs_assign(struct nfp_net *nn, struct msix_entry *irq_entries, nfp_net_irqs_assign(struct nfp_net *nn, struct msix_entry *irq_entries,
unsigned int n); unsigned int n);
struct sk_buff *
void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring); nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget); struct sk_buff *skb, u64 *tls_handle, int *nr_frags);
void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle);
bool
nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
void *data, void *pkt, unsigned int pkt_len, int meta_len);
void nfp_net_rx_csum(const struct nfp_net_dp *dp,
struct nfp_net_r_vector *r_vec,
const struct nfp_net_rx_desc *rxd,
const struct nfp_meta_parsed *meta,
struct sk_buff *skb);
struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn); struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn);
int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *new, int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *new,
......
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ /* Copyright (C) 2015-2019 Netronome Systems, Inc. */
/* /*
* nfp_net_common.c * nfp_net_common.c
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
#include <linux/bitfield.h> #include <linux/bitfield.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/init.h> #include <linux/init.h>
...@@ -46,6 +45,7 @@ ...@@ -46,6 +45,7 @@
#include "nfp_app.h" #include "nfp_app.h"
#include "nfp_net_ctrl.h" #include "nfp_net_ctrl.h"
#include "nfp_net.h" #include "nfp_net.h"
#include "nfp_net_dp.h"
#include "nfp_net_sriov.h" #include "nfp_net_sriov.h"
#include "nfp_net_xsk.h" #include "nfp_net_xsk.h"
#include "nfp_port.h" #include "nfp_port.h"
...@@ -72,35 +72,6 @@ u32 nfp_qcp_queue_offset(const struct nfp_dev_info *dev_info, u16 queue) ...@@ -72,35 +72,6 @@ u32 nfp_qcp_queue_offset(const struct nfp_dev_info *dev_info, u16 queue)
return dev_info->qc_addr_offset + NFP_QCP_QUEUE_ADDR_SZ * queue; return dev_info->qc_addr_offset + NFP_QCP_QUEUE_ADDR_SZ * queue;
} }
static dma_addr_t nfp_net_dma_map_rx(struct nfp_net_dp *dp, void *frag)
{
return dma_map_single_attrs(dp->dev, frag + NFP_NET_RX_BUF_HEADROOM,
dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
}
static void
nfp_net_dma_sync_dev_rx(const struct nfp_net_dp *dp, dma_addr_t dma_addr)
{
dma_sync_single_for_device(dp->dev, dma_addr,
dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
dp->rx_dma_dir);
}
static void nfp_net_dma_unmap_rx(struct nfp_net_dp *dp, dma_addr_t dma_addr)
{
dma_unmap_single_attrs(dp->dev, dma_addr,
dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
}
static void nfp_net_dma_sync_cpu_rx(struct nfp_net_dp *dp, dma_addr_t dma_addr,
unsigned int len)
{
dma_sync_single_for_cpu(dp->dev, dma_addr - NFP_NET_RX_BUF_HEADROOM,
len, dp->rx_dma_dir);
}
/* Firmware reconfig /* Firmware reconfig
* *
* Firmware reconfig may take a while so we have two versions of it - * Firmware reconfig may take a while so we have two versions of it -
...@@ -383,19 +354,6 @@ int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd) ...@@ -383,19 +354,6 @@ int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd)
/* Interrupt configuration and handling /* Interrupt configuration and handling
*/ */
/**
* nfp_net_irq_unmask() - Unmask automasked interrupt
* @nn: NFP Network structure
* @entry_nr: MSI-X table entry
*
* Clear the ICR for the IRQ entry.
*/
void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr)
{
nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED);
nn_pci_flush(nn);
}
/** /**
* nfp_net_irqs_alloc() - allocates MSI-X irqs * nfp_net_irqs_alloc() - allocates MSI-X irqs
* @pdev: PCI device structure * @pdev: PCI device structure
...@@ -577,49 +535,6 @@ static irqreturn_t nfp_net_irq_exn(int irq, void *data) ...@@ -577,49 +535,6 @@ static irqreturn_t nfp_net_irq_exn(int irq, void *data)
return IRQ_HANDLED; return IRQ_HANDLED;
} }
/**
* nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring
* @tx_ring: TX ring structure
* @r_vec: IRQ vector servicing this ring
* @idx: Ring index
* @is_xdp: Is this an XDP TX ring?
*/
static void
nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring,
struct nfp_net_r_vector *r_vec, unsigned int idx,
bool is_xdp)
{
struct nfp_net *nn = r_vec->nfp_net;
tx_ring->idx = idx;
tx_ring->r_vec = r_vec;
tx_ring->is_xdp = is_xdp;
u64_stats_init(&tx_ring->r_vec->tx_sync);
tx_ring->qcidx = tx_ring->idx * nn->stride_tx;
tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx);
}
/**
* nfp_net_rx_ring_init() - Fill in the boilerplate for a RX ring
* @rx_ring: RX ring structure
* @r_vec: IRQ vector servicing this ring
* @idx: Ring index
*/
static void
nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring,
struct nfp_net_r_vector *r_vec, unsigned int idx)
{
struct nfp_net *nn = r_vec->nfp_net;
rx_ring->idx = idx;
rx_ring->r_vec = r_vec;
u64_stats_init(&rx_ring->r_vec->rx_sync);
rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx;
rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx);
}
/** /**
* nfp_net_aux_irq_request() - Request an auxiliary interrupt (LSC or EXN) * nfp_net_aux_irq_request() - Request an auxiliary interrupt (LSC or EXN)
* @nn: NFP Network structure * @nn: NFP Network structure
...@@ -667,178 +582,7 @@ static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset, ...@@ -667,178 +582,7 @@ static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset,
free_irq(nn->irq_entries[vector_idx].vector, nn); free_irq(nn->irq_entries[vector_idx].vector, nn);
} }
/* Transmit struct sk_buff *
*
* One queue controller peripheral queue is used for transmit. The
* driver en-queues packets for transmit by advancing the write
* pointer. The device indicates that packets have transmitted by
* advancing the read pointer. The driver maintains a local copy of
* the read and write pointer in @struct nfp_net_tx_ring. The driver
* keeps @wr_p in sync with the queue controller write pointer and can
* determine how many packets have been transmitted by comparing its
* copy of the read pointer @rd_p with the read pointer maintained by
* the queue controller peripheral.
*/
/**
* nfp_net_tx_full() - Check if the TX ring is full
* @tx_ring: TX ring to check
* @dcnt: Number of descriptors that need to be enqueued (must be >= 1)
*
* This function checks, based on the *host copy* of read/write
* pointer if a given TX ring is full. The real TX queue may have
* some newly made available slots.
*
* Return: True if the ring is full.
*/
static int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt)
{
return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt);
}
/* Wrappers for deciding when to stop and restart TX queues */
static int nfp_net_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring)
{
return !nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS * 4);
}
static int nfp_net_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring)
{
return nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS + 1);
}
/**
* nfp_net_tx_ring_stop() - stop tx ring
* @nd_q: netdev queue
* @tx_ring: driver tx queue structure
*
* Safely stop TX ring. Remember that while we are running .start_xmit()
* someone else may be cleaning the TX ring completions so we need to be
* extra careful here.
*/
static void nfp_net_tx_ring_stop(struct netdev_queue *nd_q,
struct nfp_net_tx_ring *tx_ring)
{
netif_tx_stop_queue(nd_q);
/* We can race with the TX completion out of NAPI so recheck */
smp_mb();
if (unlikely(nfp_net_tx_ring_should_wake(tx_ring)))
netif_tx_start_queue(nd_q);
}
/**
* nfp_net_tx_tso() - Set up Tx descriptor for LSO
* @r_vec: per-ring structure
* @txbuf: Pointer to driver soft TX descriptor
* @txd: Pointer to HW TX descriptor
* @skb: Pointer to SKB
* @md_bytes: Prepend length
*
* Set up Tx descriptor for LSO, do nothing for non-LSO skbs.
* Return error on packet header greater than maximum supported LSO header size.
*/
static void nfp_net_tx_tso(struct nfp_net_r_vector *r_vec,
struct nfp_net_tx_buf *txbuf,
struct nfp_net_tx_desc *txd, struct sk_buff *skb,
u32 md_bytes)
{
u32 l3_offset, l4_offset, hdrlen;
u16 mss;
if (!skb_is_gso(skb))
return;
if (!skb->encapsulation) {
l3_offset = skb_network_offset(skb);
l4_offset = skb_transport_offset(skb);
hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
} else {
l3_offset = skb_inner_network_offset(skb);
l4_offset = skb_inner_transport_offset(skb);
hdrlen = skb_inner_transport_header(skb) - skb->data +
inner_tcp_hdrlen(skb);
}
txbuf->pkt_cnt = skb_shinfo(skb)->gso_segs;
txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1);
mss = skb_shinfo(skb)->gso_size & PCIE_DESC_TX_MSS_MASK;
txd->l3_offset = l3_offset - md_bytes;
txd->l4_offset = l4_offset - md_bytes;
txd->lso_hdrlen = hdrlen - md_bytes;
txd->mss = cpu_to_le16(mss);
txd->flags |= PCIE_DESC_TX_LSO;
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_lso++;
u64_stats_update_end(&r_vec->tx_sync);
}
/**
* nfp_net_tx_csum() - Set TX CSUM offload flags in TX descriptor
* @dp: NFP Net data path struct
* @r_vec: per-ring structure
* @txbuf: Pointer to driver soft TX descriptor
* @txd: Pointer to TX descriptor
* @skb: Pointer to SKB
*
* This function sets the TX checksum flags in the TX descriptor based
* on the configuration and the protocol of the packet to be transmitted.
*/
static void nfp_net_tx_csum(struct nfp_net_dp *dp,
struct nfp_net_r_vector *r_vec,
struct nfp_net_tx_buf *txbuf,
struct nfp_net_tx_desc *txd, struct sk_buff *skb)
{
struct ipv6hdr *ipv6h;
struct iphdr *iph;
u8 l4_hdr;
if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM))
return;
if (skb->ip_summed != CHECKSUM_PARTIAL)
return;
txd->flags |= PCIE_DESC_TX_CSUM;
if (skb->encapsulation)
txd->flags |= PCIE_DESC_TX_ENCAP;
iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
if (iph->version == 4) {
txd->flags |= PCIE_DESC_TX_IP4_CSUM;
l4_hdr = iph->protocol;
} else if (ipv6h->version == 6) {
l4_hdr = ipv6h->nexthdr;
} else {
nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version);
return;
}
switch (l4_hdr) {
case IPPROTO_TCP:
txd->flags |= PCIE_DESC_TX_TCP_CSUM;
break;
case IPPROTO_UDP:
txd->flags |= PCIE_DESC_TX_UDP_CSUM;
break;
default:
nn_dp_warn(dp, "partial checksum but l4 proto=%x!\n", l4_hdr);
return;
}
u64_stats_update_begin(&r_vec->tx_sync);
if (skb->encapsulation)
r_vec->hw_csum_tx_inner += txbuf->pkt_cnt;
else
r_vec->hw_csum_tx += txbuf->pkt_cnt;
u64_stats_update_end(&r_vec->tx_sync);
}
static struct sk_buff *
nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
struct sk_buff *skb, u64 *tls_handle, int *nr_frags) struct sk_buff *skb, u64 *tls_handle, int *nr_frags)
{ {
...@@ -891,1496 +635,91 @@ nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, ...@@ -891,1496 +635,91 @@ nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
tls_offload_tx_resync_request(nskb->sk, seq, tls_offload_tx_resync_request(nskb->sk, seq,
ntls->next_seq); ntls->next_seq);
*nr_frags = 0; *nr_frags = 0;
return nskb; return nskb;
} }
if (datalen) {
u64_stats_update_begin(&r_vec->tx_sync);
if (!skb_is_gso(skb))
r_vec->hw_tls_tx++;
else
r_vec->hw_tls_tx += skb_shinfo(skb)->gso_segs;
u64_stats_update_end(&r_vec->tx_sync);
}
memcpy(tls_handle, ntls->fw_handle, sizeof(ntls->fw_handle));
ntls->next_seq += datalen;
#endif
return skb;
}
static void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle)
{
#ifdef CONFIG_TLS_DEVICE
struct nfp_net_tls_offload_ctx *ntls;
u32 datalen, seq;
if (!tls_handle)
return;
if (WARN_ON_ONCE(!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)))
return;
datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
seq = ntohl(tcp_hdr(skb)->seq);
ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
if (ntls->next_seq == seq + datalen)
ntls->next_seq = seq;
else
WARN_ON_ONCE(1);
#endif
}
void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring)
{
wmb();
nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add);
tx_ring->wr_ptr_add = 0;
}
static int nfp_net_prep_tx_meta(struct sk_buff *skb, u64 tls_handle)
{
struct metadata_dst *md_dst = skb_metadata_dst(skb);
unsigned char *data;
u32 meta_id = 0;
int md_bytes;
if (likely(!md_dst && !tls_handle))
return 0;
if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) {
if (!tls_handle)
return 0;
md_dst = NULL;
}
md_bytes = 4 + !!md_dst * 4 + !!tls_handle * 8;
if (unlikely(skb_cow_head(skb, md_bytes)))
return -ENOMEM;
meta_id = 0;
data = skb_push(skb, md_bytes) + md_bytes;
if (md_dst) {
data -= 4;
put_unaligned_be32(md_dst->u.port_info.port_id, data);
meta_id = NFP_NET_META_PORTID;
}
if (tls_handle) {
/* conn handle is opaque, we just use u64 to be able to quickly
* compare it to zero
*/
data -= 8;
memcpy(data, &tls_handle, sizeof(tls_handle));
meta_id <<= NFP_NET_META_FIELD_SIZE;
meta_id |= NFP_NET_META_CONN_HANDLE;
}
data -= 4;
put_unaligned_be32(meta_id, data);
return md_bytes;
}
/**
* nfp_net_tx() - Main transmit entry point
* @skb: SKB to transmit
* @netdev: netdev structure
*
* Return: NETDEV_TX_OK on success.
*/
static netdev_tx_t nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
{
struct nfp_net *nn = netdev_priv(netdev);
const skb_frag_t *frag;
int f, nr_frags, wr_idx, md_bytes;
struct nfp_net_tx_ring *tx_ring;
struct nfp_net_r_vector *r_vec;
struct nfp_net_tx_buf *txbuf;
struct nfp_net_tx_desc *txd;
struct netdev_queue *nd_q;
struct nfp_net_dp *dp;
dma_addr_t dma_addr;
unsigned int fsize;
u64 tls_handle = 0;
u16 qidx;
dp = &nn->dp;
qidx = skb_get_queue_mapping(skb);
tx_ring = &dp->tx_rings[qidx];
r_vec = tx_ring->r_vec;
nr_frags = skb_shinfo(skb)->nr_frags;
if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) {
nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n",
qidx, tx_ring->wr_p, tx_ring->rd_p);
nd_q = netdev_get_tx_queue(dp->netdev, qidx);
netif_tx_stop_queue(nd_q);
nfp_net_tx_xmit_more_flush(tx_ring);
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_busy++;
u64_stats_update_end(&r_vec->tx_sync);
return NETDEV_TX_BUSY;
}
skb = nfp_net_tls_tx(dp, r_vec, skb, &tls_handle, &nr_frags);
if (unlikely(!skb)) {
nfp_net_tx_xmit_more_flush(tx_ring);
return NETDEV_TX_OK;
}
md_bytes = nfp_net_prep_tx_meta(skb, tls_handle);
if (unlikely(md_bytes < 0))
goto err_flush;
/* Start with the head skbuf */
dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb),
DMA_TO_DEVICE);
if (dma_mapping_error(dp->dev, dma_addr))
goto err_dma_err;
wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
/* Stash the soft descriptor of the head then initialize it */
txbuf = &tx_ring->txbufs[wr_idx];
txbuf->skb = skb;
txbuf->dma_addr = dma_addr;
txbuf->fidx = -1;
txbuf->pkt_cnt = 1;
txbuf->real_len = skb->len;
/* Build TX descriptor */
txd = &tx_ring->txds[wr_idx];
txd->offset_eop = (nr_frags ? 0 : PCIE_DESC_TX_EOP) | md_bytes;
txd->dma_len = cpu_to_le16(skb_headlen(skb));
nfp_desc_set_dma_addr(txd, dma_addr);
txd->data_len = cpu_to_le16(skb->len);
txd->flags = 0;
txd->mss = 0;
txd->lso_hdrlen = 0;
/* Do not reorder - tso may adjust pkt cnt, vlan may override fields */
nfp_net_tx_tso(r_vec, txbuf, txd, skb, md_bytes);
nfp_net_tx_csum(dp, r_vec, txbuf, txd, skb);
if (skb_vlan_tag_present(skb) && dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN) {
txd->flags |= PCIE_DESC_TX_VLAN;
txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb));
}
/* Gather DMA */
if (nr_frags > 0) {
__le64 second_half;
/* all descs must match except for in addr, length and eop */
second_half = txd->vals8[1];
for (f = 0; f < nr_frags; f++) {
frag = &skb_shinfo(skb)->frags[f];
fsize = skb_frag_size(frag);
dma_addr = skb_frag_dma_map(dp->dev, frag, 0,
fsize, DMA_TO_DEVICE);
if (dma_mapping_error(dp->dev, dma_addr))
goto err_unmap;
wr_idx = D_IDX(tx_ring, wr_idx + 1);
tx_ring->txbufs[wr_idx].skb = skb;
tx_ring->txbufs[wr_idx].dma_addr = dma_addr;
tx_ring->txbufs[wr_idx].fidx = f;
txd = &tx_ring->txds[wr_idx];
txd->dma_len = cpu_to_le16(fsize);
nfp_desc_set_dma_addr(txd, dma_addr);
txd->offset_eop = md_bytes |
((f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0);
txd->vals8[1] = second_half;
}
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_gather++;
u64_stats_update_end(&r_vec->tx_sync);
}
skb_tx_timestamp(skb);
nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
tx_ring->wr_p += nr_frags + 1;
if (nfp_net_tx_ring_should_stop(tx_ring))
nfp_net_tx_ring_stop(nd_q, tx_ring);
tx_ring->wr_ptr_add += nr_frags + 1;
if (__netdev_tx_sent_queue(nd_q, txbuf->real_len, netdev_xmit_more()))
nfp_net_tx_xmit_more_flush(tx_ring);
return NETDEV_TX_OK;
err_unmap:
while (--f >= 0) {
frag = &skb_shinfo(skb)->frags[f];
dma_unmap_page(dp->dev, tx_ring->txbufs[wr_idx].dma_addr,
skb_frag_size(frag), DMA_TO_DEVICE);
tx_ring->txbufs[wr_idx].skb = NULL;
tx_ring->txbufs[wr_idx].dma_addr = 0;
tx_ring->txbufs[wr_idx].fidx = -2;
wr_idx = wr_idx - 1;
if (wr_idx < 0)
wr_idx += tx_ring->cnt;
}
dma_unmap_single(dp->dev, tx_ring->txbufs[wr_idx].dma_addr,
skb_headlen(skb), DMA_TO_DEVICE);
tx_ring->txbufs[wr_idx].skb = NULL;
tx_ring->txbufs[wr_idx].dma_addr = 0;
tx_ring->txbufs[wr_idx].fidx = -2;
err_dma_err:
nn_dp_warn(dp, "Failed to map DMA TX buffer\n");
err_flush:
nfp_net_tx_xmit_more_flush(tx_ring);
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_errors++;
u64_stats_update_end(&r_vec->tx_sync);
nfp_net_tls_tx_undo(skb, tls_handle);
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
/**
* nfp_net_tx_complete() - Handled completed TX packets
* @tx_ring: TX ring structure
* @budget: NAPI budget (only used as bool to determine if in NAPI context)
*/
void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
struct netdev_queue *nd_q;
u32 done_pkts = 0, done_bytes = 0;
u32 qcp_rd_p;
int todo;
if (tx_ring->wr_p == tx_ring->rd_p)
return;
/* Work out how many descriptors have been transmitted */
qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
if (qcp_rd_p == tx_ring->qcp_rd_p)
return;
todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
while (todo--) {
const skb_frag_t *frag;
struct nfp_net_tx_buf *tx_buf;
struct sk_buff *skb;
int fidx, nr_frags;
int idx;
idx = D_IDX(tx_ring, tx_ring->rd_p++);
tx_buf = &tx_ring->txbufs[idx];
skb = tx_buf->skb;
if (!skb)
continue;
nr_frags = skb_shinfo(skb)->nr_frags;
fidx = tx_buf->fidx;
if (fidx == -1) {
/* unmap head */
dma_unmap_single(dp->dev, tx_buf->dma_addr,
skb_headlen(skb), DMA_TO_DEVICE);
done_pkts += tx_buf->pkt_cnt;
done_bytes += tx_buf->real_len;
} else {
/* unmap fragment */
frag = &skb_shinfo(skb)->frags[fidx];
dma_unmap_page(dp->dev, tx_buf->dma_addr,
skb_frag_size(frag), DMA_TO_DEVICE);
}
/* check for last gather fragment */
if (fidx == nr_frags - 1)
napi_consume_skb(skb, budget);
tx_buf->dma_addr = 0;
tx_buf->skb = NULL;
tx_buf->fidx = -2;
}
tx_ring->qcp_rd_p = qcp_rd_p;
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_bytes += done_bytes;
r_vec->tx_pkts += done_pkts;
u64_stats_update_end(&r_vec->tx_sync);
if (!dp->netdev)
return;
nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
netdev_tx_completed_queue(nd_q, done_pkts, done_bytes);
if (nfp_net_tx_ring_should_wake(tx_ring)) {
/* Make sure TX thread will see updated tx_ring->rd_p */
smp_mb();
if (unlikely(netif_tx_queue_stopped(nd_q)))
netif_tx_wake_queue(nd_q);
}
WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
"TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
}
static bool nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
u32 done_pkts = 0, done_bytes = 0;
bool done_all;
int idx, todo;
u32 qcp_rd_p;
/* Work out how many descriptors have been transmitted */
qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
if (qcp_rd_p == tx_ring->qcp_rd_p)
return true;
todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
done_all = todo <= NFP_NET_XDP_MAX_COMPLETE;
todo = min(todo, NFP_NET_XDP_MAX_COMPLETE);
tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo);
done_pkts = todo;
while (todo--) {
idx = D_IDX(tx_ring, tx_ring->rd_p);
tx_ring->rd_p++;
done_bytes += tx_ring->txbufs[idx].real_len;
}
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_bytes += done_bytes;
r_vec->tx_pkts += done_pkts;
u64_stats_update_end(&r_vec->tx_sync);
WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
"XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
return done_all;
}
/**
* nfp_net_tx_ring_reset() - Free any untransmitted buffers and reset pointers
* @dp: NFP Net data path struct
* @tx_ring: TX ring structure
*
* Assumes that the device is stopped, must be idempotent.
*/
static void
nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
{
const skb_frag_t *frag;
struct netdev_queue *nd_q;
while (!tx_ring->is_xdp && tx_ring->rd_p != tx_ring->wr_p) {
struct nfp_net_tx_buf *tx_buf;
struct sk_buff *skb;
int idx, nr_frags;
idx = D_IDX(tx_ring, tx_ring->rd_p);
tx_buf = &tx_ring->txbufs[idx];
skb = tx_ring->txbufs[idx].skb;
nr_frags = skb_shinfo(skb)->nr_frags;
if (tx_buf->fidx == -1) {
/* unmap head */
dma_unmap_single(dp->dev, tx_buf->dma_addr,
skb_headlen(skb), DMA_TO_DEVICE);
} else {
/* unmap fragment */
frag = &skb_shinfo(skb)->frags[tx_buf->fidx];
dma_unmap_page(dp->dev, tx_buf->dma_addr,
skb_frag_size(frag), DMA_TO_DEVICE);
}
/* check for last gather fragment */
if (tx_buf->fidx == nr_frags - 1)
dev_kfree_skb_any(skb);
tx_buf->dma_addr = 0;
tx_buf->skb = NULL;
tx_buf->fidx = -2;
tx_ring->qcp_rd_p++;
tx_ring->rd_p++;
}
if (tx_ring->is_xdp)
nfp_net_xsk_tx_bufs_free(tx_ring);
memset(tx_ring->txds, 0, tx_ring->size);
tx_ring->wr_p = 0;
tx_ring->rd_p = 0;
tx_ring->qcp_rd_p = 0;
tx_ring->wr_ptr_add = 0;
if (tx_ring->is_xdp || !dp->netdev)
return;
nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
netdev_tx_reset_queue(nd_q);
}
static void nfp_net_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct nfp_net *nn = netdev_priv(netdev);
nn_warn(nn, "TX watchdog timeout on ring: %u\n", txqueue);
}
/* Receive processing
*/
static unsigned int
nfp_net_calc_fl_bufsz_data(struct nfp_net_dp *dp)
{
unsigned int fl_bufsz = 0;
if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
fl_bufsz += NFP_NET_MAX_PREPEND;
else
fl_bufsz += dp->rx_offset;
fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + dp->mtu;
return fl_bufsz;
}
static unsigned int nfp_net_calc_fl_bufsz(struct nfp_net_dp *dp)
{
unsigned int fl_bufsz;
fl_bufsz = NFP_NET_RX_BUF_HEADROOM;
fl_bufsz += dp->rx_dma_off;
fl_bufsz += nfp_net_calc_fl_bufsz_data(dp);
fl_bufsz = SKB_DATA_ALIGN(fl_bufsz);
fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
return fl_bufsz;
}
static unsigned int nfp_net_calc_fl_bufsz_xsk(struct nfp_net_dp *dp)
{
unsigned int fl_bufsz;
fl_bufsz = XDP_PACKET_HEADROOM;
fl_bufsz += nfp_net_calc_fl_bufsz_data(dp);
return fl_bufsz;
}
static void
nfp_net_free_frag(void *frag, bool xdp)
{
if (!xdp)
skb_free_frag(frag);
else
__free_page(virt_to_page(frag));
}
/**
* nfp_net_rx_alloc_one() - Allocate and map page frag for RX
* @dp: NFP Net data path struct
* @dma_addr: Pointer to storage for DMA address (output param)
*
* This function will allcate a new page frag, map it for DMA.
*
* Return: allocated page frag or NULL on failure.
*/
static void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
{
void *frag;
if (!dp->xdp_prog) {
frag = netdev_alloc_frag(dp->fl_bufsz);
} else {
struct page *page;
page = alloc_page(GFP_KERNEL);
frag = page ? page_address(page) : NULL;
}
if (!frag) {
nn_dp_warn(dp, "Failed to alloc receive page frag\n");
return NULL;
}
*dma_addr = nfp_net_dma_map_rx(dp, frag);
if (dma_mapping_error(dp->dev, *dma_addr)) {
nfp_net_free_frag(frag, dp->xdp_prog);
nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
return NULL;
}
return frag;
}
static void *nfp_net_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
{
void *frag;
if (!dp->xdp_prog) {
frag = napi_alloc_frag(dp->fl_bufsz);
if (unlikely(!frag))
return NULL;
} else {
struct page *page;
page = dev_alloc_page();
if (unlikely(!page))
return NULL;
frag = page_address(page);
}
*dma_addr = nfp_net_dma_map_rx(dp, frag);
if (dma_mapping_error(dp->dev, *dma_addr)) {
nfp_net_free_frag(frag, dp->xdp_prog);
nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
return NULL;
}
return frag;
}
/**
* nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings
* @dp: NFP Net data path struct
* @rx_ring: RX ring structure
* @frag: page fragment buffer
* @dma_addr: DMA address of skb mapping
*/
static void nfp_net_rx_give_one(const struct nfp_net_dp *dp,
struct nfp_net_rx_ring *rx_ring,
void *frag, dma_addr_t dma_addr)
{
unsigned int wr_idx;
wr_idx = D_IDX(rx_ring, rx_ring->wr_p);
nfp_net_dma_sync_dev_rx(dp, dma_addr);
/* Stash SKB and DMA address away */
rx_ring->rxbufs[wr_idx].frag = frag;
rx_ring->rxbufs[wr_idx].dma_addr = dma_addr;
/* Fill freelist descriptor */
rx_ring->rxds[wr_idx].fld.reserved = 0;
rx_ring->rxds[wr_idx].fld.meta_len_dd = 0;
nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld,
dma_addr + dp->rx_dma_off);
rx_ring->wr_p++;
if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) {
/* Update write pointer of the freelist queue. Make
* sure all writes are flushed before telling the hardware.
*/
wmb();
nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH);
}
}
/**
* nfp_net_rx_ring_reset() - Reflect in SW state of freelist after disable
* @rx_ring: RX ring structure
*
* Assumes that the device is stopped, must be idempotent.
*/
static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring)
{
unsigned int wr_idx, last_idx;
/* wr_p == rd_p means ring was never fed FL bufs. RX rings are always
* kept at cnt - 1 FL bufs.
*/
if (rx_ring->wr_p == 0 && rx_ring->rd_p == 0)
return;
/* Move the empty entry to the end of the list */
wr_idx = D_IDX(rx_ring, rx_ring->wr_p);
last_idx = rx_ring->cnt - 1;
if (rx_ring->r_vec->xsk_pool) {
rx_ring->xsk_rxbufs[wr_idx] = rx_ring->xsk_rxbufs[last_idx];
memset(&rx_ring->xsk_rxbufs[last_idx], 0,
sizeof(*rx_ring->xsk_rxbufs));
} else {
rx_ring->rxbufs[wr_idx] = rx_ring->rxbufs[last_idx];
memset(&rx_ring->rxbufs[last_idx], 0, sizeof(*rx_ring->rxbufs));
}
memset(rx_ring->rxds, 0, rx_ring->size);
rx_ring->wr_p = 0;
rx_ring->rd_p = 0;
}
/**
* nfp_net_rx_ring_bufs_free() - Free any buffers currently on the RX ring
* @dp: NFP Net data path struct
* @rx_ring: RX ring to remove buffers from
*
* Assumes that the device is stopped and buffers are in [0, ring->cnt - 1)
* entries. After device is disabled nfp_net_rx_ring_reset() must be called
* to restore required ring geometry.
*/
static void
nfp_net_rx_ring_bufs_free(struct nfp_net_dp *dp,
struct nfp_net_rx_ring *rx_ring)
{
unsigned int i;
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx))
return;
for (i = 0; i < rx_ring->cnt - 1; i++) {
/* NULL skb can only happen when initial filling of the ring
* fails to allocate enough buffers and calls here to free
* already allocated ones.
*/
if (!rx_ring->rxbufs[i].frag)
continue;
nfp_net_dma_unmap_rx(dp, rx_ring->rxbufs[i].dma_addr);
nfp_net_free_frag(rx_ring->rxbufs[i].frag, dp->xdp_prog);
rx_ring->rxbufs[i].dma_addr = 0;
rx_ring->rxbufs[i].frag = NULL;
}
}
/**
* nfp_net_rx_ring_bufs_alloc() - Fill RX ring with buffers (don't give to FW)
* @dp: NFP Net data path struct
* @rx_ring: RX ring to remove buffers from
*/
static int
nfp_net_rx_ring_bufs_alloc(struct nfp_net_dp *dp,
struct nfp_net_rx_ring *rx_ring)
{
struct nfp_net_rx_buf *rxbufs;
unsigned int i;
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx))
return 0;
rxbufs = rx_ring->rxbufs;
for (i = 0; i < rx_ring->cnt - 1; i++) {
rxbufs[i].frag = nfp_net_rx_alloc_one(dp, &rxbufs[i].dma_addr);
if (!rxbufs[i].frag) {
nfp_net_rx_ring_bufs_free(dp, rx_ring);
return -ENOMEM;
}
}
return 0;
}
/**
* nfp_net_rx_ring_fill_freelist() - Give buffers from the ring to FW
* @dp: NFP Net data path struct
* @rx_ring: RX ring to fill
*/
static void
nfp_net_rx_ring_fill_freelist(struct nfp_net_dp *dp,
struct nfp_net_rx_ring *rx_ring)
{
unsigned int i;
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx))
return nfp_net_xsk_rx_ring_fill_freelist(rx_ring);
for (i = 0; i < rx_ring->cnt - 1; i++)
nfp_net_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag,
rx_ring->rxbufs[i].dma_addr);
}
/**
* nfp_net_rx_csum_has_errors() - group check if rxd has any csum errors
* @flags: RX descriptor flags field in CPU byte order
*/
static int nfp_net_rx_csum_has_errors(u16 flags)
{
u16 csum_all_checked, csum_all_ok;
csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL;
csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK;
return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT);
}
/**
* nfp_net_rx_csum() - set SKB checksum field based on RX descriptor flags
* @dp: NFP Net data path struct
* @r_vec: per-ring structure
* @rxd: Pointer to RX descriptor
* @meta: Parsed metadata prepend
* @skb: Pointer to SKB
*/
void nfp_net_rx_csum(const struct nfp_net_dp *dp,
struct nfp_net_r_vector *r_vec,
const struct nfp_net_rx_desc *rxd,
const struct nfp_meta_parsed *meta, struct sk_buff *skb)
{
skb_checksum_none_assert(skb);
if (!(dp->netdev->features & NETIF_F_RXCSUM))
return;
if (meta->csum_type) {
skb->ip_summed = meta->csum_type;
skb->csum = meta->csum;
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->hw_csum_rx_complete++;
u64_stats_update_end(&r_vec->rx_sync);
return;
}
if (nfp_net_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) {
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->hw_csum_rx_error++;
u64_stats_update_end(&r_vec->rx_sync);
return;
}
/* Assume that the firmware will never report inner CSUM_OK unless outer
* L4 headers were successfully parsed. FW will always report zero UDP
* checksum as CSUM_OK.
*/
if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK ||
rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) {
__skb_incr_checksum_unnecessary(skb);
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->hw_csum_rx_ok++;
u64_stats_update_end(&r_vec->rx_sync);
}
if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK ||
rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) {
__skb_incr_checksum_unnecessary(skb);
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->hw_csum_rx_inner_ok++;
u64_stats_update_end(&r_vec->rx_sync);
}
}
static void
nfp_net_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta,
unsigned int type, __be32 *hash)
{
if (!(netdev->features & NETIF_F_RXHASH))
return;
switch (type) {
case NFP_NET_RSS_IPV4:
case NFP_NET_RSS_IPV6:
case NFP_NET_RSS_IPV6_EX:
meta->hash_type = PKT_HASH_TYPE_L3;
break;
default:
meta->hash_type = PKT_HASH_TYPE_L4;
break;
}
meta->hash = get_unaligned_be32(hash);
}
static void
nfp_net_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta,
void *data, struct nfp_net_rx_desc *rxd)
{
struct nfp_net_rx_hash *rx_hash = data;
if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
return;
nfp_net_set_hash(netdev, meta, get_unaligned_be32(&rx_hash->hash_type),
&rx_hash->hash);
}
bool
nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
void *data, void *pkt, unsigned int pkt_len, int meta_len)
{
u32 meta_info;
meta_info = get_unaligned_be32(data);
data += 4;
while (meta_info) {
switch (meta_info & NFP_NET_META_FIELD_MASK) {
case NFP_NET_META_HASH:
meta_info >>= NFP_NET_META_FIELD_SIZE;
nfp_net_set_hash(netdev, meta,
meta_info & NFP_NET_META_FIELD_MASK,
(__be32 *)data);
data += 4;
break;
case NFP_NET_META_MARK:
meta->mark = get_unaligned_be32(data);
data += 4;
break;
case NFP_NET_META_PORTID:
meta->portid = get_unaligned_be32(data);
data += 4;
break;
case NFP_NET_META_CSUM:
meta->csum_type = CHECKSUM_COMPLETE;
meta->csum =
(__force __wsum)__get_unaligned_cpu32(data);
data += 4;
break;
case NFP_NET_META_RESYNC_INFO:
if (nfp_net_tls_rx_resync_req(netdev, data, pkt,
pkt_len))
return false;
data += sizeof(struct nfp_net_tls_resync_req);
break;
default:
return true;
}
meta_info >>= NFP_NET_META_FIELD_SIZE;
}
return data != pkt;
}
static void
nfp_net_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf,
struct sk_buff *skb)
{
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_drops++;
/* If we have both skb and rxbuf the replacement buffer allocation
* must have failed, count this as an alloc failure.
*/
if (skb && rxbuf)
r_vec->rx_replace_buf_alloc_fail++;
u64_stats_update_end(&r_vec->rx_sync);
/* skb is build based on the frag, free_skb() would free the frag
* so to be able to reuse it we need an extra ref.
*/
if (skb && rxbuf && skb->head == rxbuf->frag)
page_ref_inc(virt_to_head_page(rxbuf->frag));
if (rxbuf)
nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr);
if (skb)
dev_kfree_skb_any(skb);
}
static bool
nfp_net_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
struct nfp_net_tx_ring *tx_ring,
struct nfp_net_rx_buf *rxbuf, unsigned int dma_off,
unsigned int pkt_len, bool *completed)
{
unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA;
struct nfp_net_tx_buf *txbuf;
struct nfp_net_tx_desc *txd;
int wr_idx;
/* Reject if xdp_adjust_tail grow packet beyond DMA area */
if (pkt_len + dma_off > dma_map_sz)
return false;
if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
if (!*completed) {
nfp_net_xdp_complete(tx_ring);
*completed = true;
}
if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
nfp_net_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf,
NULL);
return false;
}
}
wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
/* Stash the soft descriptor of the head then initialize it */
txbuf = &tx_ring->txbufs[wr_idx];
nfp_net_rx_give_one(dp, rx_ring, txbuf->frag, txbuf->dma_addr);
txbuf->frag = rxbuf->frag;
txbuf->dma_addr = rxbuf->dma_addr;
txbuf->fidx = -1;
txbuf->pkt_cnt = 1;
txbuf->real_len = pkt_len;
dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off,
pkt_len, DMA_BIDIRECTIONAL);
/* Build TX descriptor */
txd = &tx_ring->txds[wr_idx];
txd->offset_eop = PCIE_DESC_TX_EOP;
txd->dma_len = cpu_to_le16(pkt_len);
nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + dma_off);
txd->data_len = cpu_to_le16(pkt_len);
txd->flags = 0;
txd->mss = 0;
txd->lso_hdrlen = 0;
tx_ring->wr_p++;
tx_ring->wr_ptr_add++;
return true;
}
/**
* nfp_net_rx() - receive up to @budget packets on @rx_ring
* @rx_ring: RX ring to receive from
* @budget: NAPI budget
*
* Note, this function is separated out from the napi poll function to
* more cleanly separate packet receive code from other bookkeeping
* functions performed in the napi poll function.
*
* Return: Number of packets received.
*/
static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
{
struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
struct nfp_net_tx_ring *tx_ring;
struct bpf_prog *xdp_prog;
bool xdp_tx_cmpl = false;
unsigned int true_bufsz;
struct sk_buff *skb;
int pkts_polled = 0;
struct xdp_buff xdp;
int idx;
xdp_prog = READ_ONCE(dp->xdp_prog);
true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM,
&rx_ring->xdp_rxq);
tx_ring = r_vec->xdp_ring;
while (pkts_polled < budget) {
unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off;
struct nfp_net_rx_buf *rxbuf;
struct nfp_net_rx_desc *rxd;
struct nfp_meta_parsed meta;
bool redir_egress = false;
struct net_device *netdev;
dma_addr_t new_dma_addr;
u32 meta_len_xdp = 0;
void *new_frag;
idx = D_IDX(rx_ring, rx_ring->rd_p);
rxd = &rx_ring->rxds[idx];
if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
break;
/* Memory barrier to ensure that we won't do other reads
* before the DD bit.
*/
dma_rmb();
memset(&meta, 0, sizeof(meta));
rx_ring->rd_p++;
pkts_polled++;
rxbuf = &rx_ring->rxbufs[idx];
/* < meta_len >
* <-- [rx_offset] -->
* ---------------------------------------------------------
* | [XX] | metadata | packet | XXXX |
* ---------------------------------------------------------
* <---------------- data_len --------------->
*
* The rx_offset is fixed for all packets, the meta_len can vary
* on a packet by packet basis. If rx_offset is set to zero
* (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the
* buffer and is immediately followed by the packet (no [XX]).
*/
meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
data_len = le16_to_cpu(rxd->rxd.data_len);
pkt_len = data_len - meta_len;
pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off;
if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
pkt_off += meta_len;
else
pkt_off += dp->rx_offset;
meta_off = pkt_off - meta_len;
/* Stats update */
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_pkts++;
r_vec->rx_bytes += pkt_len;
u64_stats_update_end(&r_vec->rx_sync);
if (unlikely(meta_len > NFP_NET_MAX_PREPEND ||
(dp->rx_offset && meta_len > dp->rx_offset))) {
nn_dp_warn(dp, "oversized RX packet metadata %u\n",
meta_len);
nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
continue;
}
nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off,
data_len);
if (!dp->chained_metadata_format) {
nfp_net_set_hash_desc(dp->netdev, &meta,
rxbuf->frag + meta_off, rxd);
} else if (meta_len) {
if (unlikely(nfp_net_parse_meta(dp->netdev, &meta,
rxbuf->frag + meta_off,
rxbuf->frag + pkt_off,
pkt_len, meta_len))) {
nn_dp_warn(dp, "invalid RX packet metadata\n");
nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf,
NULL);
continue;
}
}
if (xdp_prog && !meta.portid) {
void *orig_data = rxbuf->frag + pkt_off;
unsigned int dma_off;
int act;
xdp_prepare_buff(&xdp,
rxbuf->frag + NFP_NET_RX_BUF_HEADROOM,
pkt_off - NFP_NET_RX_BUF_HEADROOM,
pkt_len, true);
act = bpf_prog_run_xdp(xdp_prog, &xdp);
pkt_len = xdp.data_end - xdp.data;
pkt_off += xdp.data - orig_data;
switch (act) {
case XDP_PASS:
meta_len_xdp = xdp.data - xdp.data_meta;
break;
case XDP_TX:
dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM;
if (unlikely(!nfp_net_tx_xdp_buf(dp, rx_ring,
tx_ring, rxbuf,
dma_off,
pkt_len,
&xdp_tx_cmpl)))
trace_xdp_exception(dp->netdev,
xdp_prog, act);
continue;
default:
bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act);
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(dp->netdev, xdp_prog, act);
fallthrough;
case XDP_DROP:
nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag,
rxbuf->dma_addr);
continue;
}
}
if (likely(!meta.portid)) {
netdev = dp->netdev;
} else if (meta.portid == NFP_META_PORT_ID_CTRL) {
struct nfp_net *nn = netdev_priv(dp->netdev);
nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off,
pkt_len);
nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag,
rxbuf->dma_addr);
continue;
} else {
struct nfp_net *nn;
nn = netdev_priv(dp->netdev);
netdev = nfp_app_dev_get(nn->app, meta.portid,
&redir_egress);
if (unlikely(!netdev)) {
nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf,
NULL);
continue;
}
if (nfp_netdev_is_nfp_repr(netdev))
nfp_repr_inc_rx_stats(netdev, pkt_len);
}
skb = build_skb(rxbuf->frag, true_bufsz);
if (unlikely(!skb)) {
nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
continue;
}
new_frag = nfp_net_napi_alloc_one(dp, &new_dma_addr);
if (unlikely(!new_frag)) {
nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
continue;
}
nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
skb_reserve(skb, pkt_off);
skb_put(skb, pkt_len);
skb->mark = meta.mark;
skb_set_hash(skb, meta.hash, meta.hash_type);
skb_record_rx_queue(skb, rx_ring->idx);
skb->protocol = eth_type_trans(skb, netdev);
nfp_net_rx_csum(dp, r_vec, rxd, &meta, skb);
#ifdef CONFIG_TLS_DEVICE
if (rxd->rxd.flags & PCIE_DESC_RX_DECRYPTED) {
skb->decrypted = true;
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->hw_tls_rx++;
u64_stats_update_end(&r_vec->rx_sync);
}
#endif
if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
le16_to_cpu(rxd->rxd.vlan));
if (meta_len_xdp)
skb_metadata_set(skb, meta_len_xdp);
if (likely(!redir_egress)) {
napi_gro_receive(&rx_ring->r_vec->napi, skb);
} else {
skb->dev = netdev;
skb_reset_network_header(skb);
__skb_push(skb, ETH_HLEN);
dev_queue_xmit(skb);
}
}
if (xdp_prog) {
if (tx_ring->wr_ptr_add)
nfp_net_tx_xmit_more_flush(tx_ring);
else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) &&
!xdp_tx_cmpl)
if (!nfp_net_xdp_complete(tx_ring))
pkts_polled = budget;
}
return pkts_polled;
}
/**
* nfp_net_poll() - napi poll function
* @napi: NAPI structure
* @budget: NAPI budget
*
* Return: number of packets polled.
*/
static int nfp_net_poll(struct napi_struct *napi, int budget)
{
struct nfp_net_r_vector *r_vec =
container_of(napi, struct nfp_net_r_vector, napi);
unsigned int pkts_polled = 0;
if (r_vec->tx_ring)
nfp_net_tx_complete(r_vec->tx_ring, budget);
if (r_vec->rx_ring)
pkts_polled = nfp_net_rx(r_vec->rx_ring, budget);
if (pkts_polled < budget)
if (napi_complete_done(napi, pkts_polled))
nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) {
struct dim_sample dim_sample = {};
unsigned int start;
u64 pkts, bytes;
do {
start = u64_stats_fetch_begin(&r_vec->rx_sync);
pkts = r_vec->rx_pkts;
bytes = r_vec->rx_bytes;
} while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
net_dim(&r_vec->rx_dim, dim_sample);
}
if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) {
struct dim_sample dim_sample = {};
unsigned int start;
u64 pkts, bytes;
do {
start = u64_stats_fetch_begin(&r_vec->tx_sync);
pkts = r_vec->tx_pkts;
bytes = r_vec->tx_bytes;
} while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
net_dim(&r_vec->tx_dim, dim_sample);
}
return pkts_polled;
}
/* Control device data path
*/
static bool
nfp_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
struct sk_buff *skb, bool old)
{
unsigned int real_len = skb->len, meta_len = 0;
struct nfp_net_tx_ring *tx_ring;
struct nfp_net_tx_buf *txbuf;
struct nfp_net_tx_desc *txd;
struct nfp_net_dp *dp;
dma_addr_t dma_addr;
int wr_idx;
dp = &r_vec->nfp_net->dp;
tx_ring = r_vec->tx_ring;
if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) {
nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n");
goto err_free;
}
if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_busy++;
u64_stats_update_end(&r_vec->tx_sync);
if (!old)
__skb_queue_tail(&r_vec->queue, skb);
else
__skb_queue_head(&r_vec->queue, skb);
return true;
}
if (nfp_app_ctrl_has_meta(nn->app)) {
if (unlikely(skb_headroom(skb) < 8)) {
nn_dp_warn(dp, "CTRL TX on skb without headroom\n");
goto err_free;
}
meta_len = 8;
put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4));
put_unaligned_be32(NFP_NET_META_PORTID, skb_push(skb, 4));
}
/* Start with the head skbuf */
dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb),
DMA_TO_DEVICE);
if (dma_mapping_error(dp->dev, dma_addr))
goto err_dma_warn;
wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
/* Stash the soft descriptor of the head then initialize it */
txbuf = &tx_ring->txbufs[wr_idx];
txbuf->skb = skb;
txbuf->dma_addr = dma_addr;
txbuf->fidx = -1;
txbuf->pkt_cnt = 1;
txbuf->real_len = real_len;
/* Build TX descriptor */
txd = &tx_ring->txds[wr_idx];
txd->offset_eop = meta_len | PCIE_DESC_TX_EOP;
txd->dma_len = cpu_to_le16(skb_headlen(skb));
nfp_desc_set_dma_addr(txd, dma_addr);
txd->data_len = cpu_to_le16(skb->len);
txd->flags = 0;
txd->mss = 0;
txd->lso_hdrlen = 0;
tx_ring->wr_p++;
tx_ring->wr_ptr_add++;
nfp_net_tx_xmit_more_flush(tx_ring);
return false;
err_dma_warn:
nn_dp_warn(dp, "Failed to DMA map TX CTRL buffer\n");
err_free:
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_errors++;
u64_stats_update_end(&r_vec->tx_sync);
dev_kfree_skb_any(skb);
return false;
}
bool __nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb) if (datalen) {
{ u64_stats_update_begin(&r_vec->tx_sync);
struct nfp_net_r_vector *r_vec = &nn->r_vecs[0]; if (!skb_is_gso(skb))
r_vec->hw_tls_tx++;
else
r_vec->hw_tls_tx += skb_shinfo(skb)->gso_segs;
u64_stats_update_end(&r_vec->tx_sync);
}
return nfp_ctrl_tx_one(nn, r_vec, skb, false); memcpy(tls_handle, ntls->fw_handle, sizeof(ntls->fw_handle));
ntls->next_seq += datalen;
#endif
return skb;
} }
bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb) void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle)
{ {
struct nfp_net_r_vector *r_vec = &nn->r_vecs[0]; #ifdef CONFIG_TLS_DEVICE
bool ret; struct nfp_net_tls_offload_ctx *ntls;
u32 datalen, seq;
spin_lock_bh(&r_vec->lock);
ret = nfp_ctrl_tx_one(nn, r_vec, skb, false);
spin_unlock_bh(&r_vec->lock);
return ret; if (!tls_handle)
} return;
if (WARN_ON_ONCE(!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)))
return;
static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec) datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
{ seq = ntohl(tcp_hdr(skb)->seq);
struct sk_buff *skb;
while ((skb = __skb_dequeue(&r_vec->queue))) ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
if (nfp_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true)) if (ntls->next_seq == seq + datalen)
return; ntls->next_seq = seq;
else
WARN_ON_ONCE(1);
#endif
} }
static bool static void nfp_net_tx_timeout(struct net_device *netdev, unsigned int txqueue)
nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len)
{ {
u32 meta_type, meta_tag; struct nfp_net *nn = netdev_priv(netdev);
if (!nfp_app_ctrl_has_meta(nn->app))
return !meta_len;
if (meta_len != 8)
return false;
meta_type = get_unaligned_be32(data);
meta_tag = get_unaligned_be32(data + 4);
return (meta_type == NFP_NET_META_PORTID && nn_warn(nn, "TX watchdog timeout on ring: %u\n", txqueue);
meta_tag == NFP_META_PORT_ID_CTRL);
} }
static bool /* Receive processing */
nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp, static unsigned int
struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring) nfp_net_calc_fl_bufsz_data(struct nfp_net_dp *dp)
{ {
unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; unsigned int fl_bufsz = 0;
struct nfp_net_rx_buf *rxbuf;
struct nfp_net_rx_desc *rxd;
dma_addr_t new_dma_addr;
struct sk_buff *skb;
void *new_frag;
int idx;
idx = D_IDX(rx_ring, rx_ring->rd_p);
rxd = &rx_ring->rxds[idx];
if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
return false;
/* Memory barrier to ensure that we won't do other reads
* before the DD bit.
*/
dma_rmb();
rx_ring->rd_p++;
rxbuf = &rx_ring->rxbufs[idx];
meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
data_len = le16_to_cpu(rxd->rxd.data_len);
pkt_len = data_len - meta_len;
pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off;
if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
pkt_off += meta_len; fl_bufsz += NFP_NET_MAX_PREPEND;
else else
pkt_off += dp->rx_offset; fl_bufsz += dp->rx_offset;
meta_off = pkt_off - meta_len; fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + dp->mtu;
/* Stats update */
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_pkts++;
r_vec->rx_bytes += pkt_len;
u64_stats_update_end(&r_vec->rx_sync);
nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len);
if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) {
nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n",
meta_len);
nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
return true;
}
skb = build_skb(rxbuf->frag, dp->fl_bufsz);
if (unlikely(!skb)) {
nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
return true;
}
new_frag = nfp_net_napi_alloc_one(dp, &new_dma_addr);
if (unlikely(!new_frag)) {
nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
return true;
}
nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
skb_reserve(skb, pkt_off);
skb_put(skb, pkt_len);
nfp_app_ctrl_rx(nn->app, skb);
return true; return fl_bufsz;
} }
static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) static unsigned int nfp_net_calc_fl_bufsz(struct nfp_net_dp *dp)
{ {
struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; unsigned int fl_bufsz;
struct nfp_net *nn = r_vec->nfp_net;
struct nfp_net_dp *dp = &nn->dp; fl_bufsz = NFP_NET_RX_BUF_HEADROOM;
unsigned int budget = 512; fl_bufsz += dp->rx_dma_off;
fl_bufsz += nfp_net_calc_fl_bufsz_data(dp);
while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) fl_bufsz = SKB_DATA_ALIGN(fl_bufsz);
continue; fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
return budget; return fl_bufsz;
} }
static void nfp_ctrl_poll(struct tasklet_struct *t) static unsigned int nfp_net_calc_fl_bufsz_xsk(struct nfp_net_dp *dp)
{ {
struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet); unsigned int fl_bufsz;
spin_lock(&r_vec->lock); fl_bufsz = XDP_PACKET_HEADROOM;
nfp_net_tx_complete(r_vec->tx_ring, 0); fl_bufsz += nfp_net_calc_fl_bufsz_data(dp);
__nfp_ctrl_tx_queued(r_vec);
spin_unlock(&r_vec->lock);
if (nfp_ctrl_rx(r_vec)) { return fl_bufsz;
nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
} else {
tasklet_schedule(&r_vec->tasklet);
nn_dp_warn(&r_vec->nfp_net->dp,
"control message budget exceeded!\n");
}
} }
/* Setup and Configuration /* Setup and Configuration
...@@ -2415,7 +754,7 @@ static void nfp_net_vecs_init(struct nfp_net *nn) ...@@ -2415,7 +754,7 @@ static void nfp_net_vecs_init(struct nfp_net *nn)
__skb_queue_head_init(&r_vec->queue); __skb_queue_head_init(&r_vec->queue);
spin_lock_init(&r_vec->lock); spin_lock_init(&r_vec->lock);
tasklet_setup(&r_vec->tasklet, nfp_ctrl_poll); tasklet_setup(&r_vec->tasklet, nn->dp.ops->ctrl_poll);
tasklet_disable(&r_vec->tasklet); tasklet_disable(&r_vec->tasklet);
} }
...@@ -2423,299 +762,13 @@ static void nfp_net_vecs_init(struct nfp_net *nn) ...@@ -2423,299 +762,13 @@ static void nfp_net_vecs_init(struct nfp_net *nn)
} }
} }
/**
* nfp_net_tx_ring_free() - Free resources allocated to a TX ring
* @tx_ring: TX ring to free
*/
static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
kvfree(tx_ring->txbufs);
if (tx_ring->txds)
dma_free_coherent(dp->dev, tx_ring->size,
tx_ring->txds, tx_ring->dma);
tx_ring->cnt = 0;
tx_ring->txbufs = NULL;
tx_ring->txds = NULL;
tx_ring->dma = 0;
tx_ring->size = 0;
}
/**
* nfp_net_tx_ring_alloc() - Allocate resource for a TX ring
* @dp: NFP Net data path struct
* @tx_ring: TX Ring structure to allocate
*
* Return: 0 on success, negative errno otherwise.
*/
static int
nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
tx_ring->cnt = dp->txd_cnt;
tx_ring->size = array_size(tx_ring->cnt, sizeof(*tx_ring->txds));
tx_ring->txds = dma_alloc_coherent(dp->dev, tx_ring->size,
&tx_ring->dma,
GFP_KERNEL | __GFP_NOWARN);
if (!tx_ring->txds) {
netdev_warn(dp->netdev, "failed to allocate TX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n",
tx_ring->cnt);
goto err_alloc;
}
tx_ring->txbufs = kvcalloc(tx_ring->cnt, sizeof(*tx_ring->txbufs),
GFP_KERNEL);
if (!tx_ring->txbufs)
goto err_alloc;
if (!tx_ring->is_xdp && dp->netdev)
netif_set_xps_queue(dp->netdev, &r_vec->affinity_mask,
tx_ring->idx);
return 0;
err_alloc:
nfp_net_tx_ring_free(tx_ring);
return -ENOMEM;
}
static void
nfp_net_tx_ring_bufs_free(struct nfp_net_dp *dp,
struct nfp_net_tx_ring *tx_ring)
{
unsigned int i;
if (!tx_ring->is_xdp)
return;
for (i = 0; i < tx_ring->cnt; i++) {
if (!tx_ring->txbufs[i].frag)
return;
nfp_net_dma_unmap_rx(dp, tx_ring->txbufs[i].dma_addr);
__free_page(virt_to_page(tx_ring->txbufs[i].frag));
}
}
static int
nfp_net_tx_ring_bufs_alloc(struct nfp_net_dp *dp,
struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_tx_buf *txbufs = tx_ring->txbufs;
unsigned int i;
if (!tx_ring->is_xdp)
return 0;
for (i = 0; i < tx_ring->cnt; i++) {
txbufs[i].frag = nfp_net_rx_alloc_one(dp, &txbufs[i].dma_addr);
if (!txbufs[i].frag) {
nfp_net_tx_ring_bufs_free(dp, tx_ring);
return -ENOMEM;
}
}
return 0;
}
static int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
{
unsigned int r;
dp->tx_rings = kcalloc(dp->num_tx_rings, sizeof(*dp->tx_rings),
GFP_KERNEL);
if (!dp->tx_rings)
return -ENOMEM;
for (r = 0; r < dp->num_tx_rings; r++) {
int bias = 0;
if (r >= dp->num_stack_tx_rings)
bias = dp->num_stack_tx_rings;
nfp_net_tx_ring_init(&dp->tx_rings[r], &nn->r_vecs[r - bias],
r, bias);
if (nfp_net_tx_ring_alloc(dp, &dp->tx_rings[r]))
goto err_free_prev;
if (nfp_net_tx_ring_bufs_alloc(dp, &dp->tx_rings[r]))
goto err_free_ring;
}
return 0;
err_free_prev:
while (r--) {
nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]);
err_free_ring:
nfp_net_tx_ring_free(&dp->tx_rings[r]);
}
kfree(dp->tx_rings);
return -ENOMEM;
}
static void nfp_net_tx_rings_free(struct nfp_net_dp *dp)
{
unsigned int r;
for (r = 0; r < dp->num_tx_rings; r++) {
nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]);
nfp_net_tx_ring_free(&dp->tx_rings[r]);
}
kfree(dp->tx_rings);
}
/**
* nfp_net_rx_ring_free() - Free resources allocated to a RX ring
* @rx_ring: RX ring to free
*/
static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
{
struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
if (dp->netdev)
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx))
kvfree(rx_ring->xsk_rxbufs);
else
kvfree(rx_ring->rxbufs);
if (rx_ring->rxds)
dma_free_coherent(dp->dev, rx_ring->size,
rx_ring->rxds, rx_ring->dma);
rx_ring->cnt = 0;
rx_ring->rxbufs = NULL;
rx_ring->xsk_rxbufs = NULL;
rx_ring->rxds = NULL;
rx_ring->dma = 0;
rx_ring->size = 0;
}
/**
* nfp_net_rx_ring_alloc() - Allocate resource for a RX ring
* @dp: NFP Net data path struct
* @rx_ring: RX ring to allocate
*
* Return: 0 on success, negative errno otherwise.
*/
static int
nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring)
{
enum xdp_mem_type mem_type;
size_t rxbuf_sw_desc_sz;
int err;
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) {
mem_type = MEM_TYPE_XSK_BUFF_POOL;
rxbuf_sw_desc_sz = sizeof(*rx_ring->xsk_rxbufs);
} else {
mem_type = MEM_TYPE_PAGE_ORDER0;
rxbuf_sw_desc_sz = sizeof(*rx_ring->rxbufs);
}
if (dp->netdev) {
err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev,
rx_ring->idx, rx_ring->r_vec->napi.napi_id);
if (err < 0)
return err;
err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq,
mem_type, NULL);
if (err)
goto err_alloc;
}
rx_ring->cnt = dp->rxd_cnt;
rx_ring->size = array_size(rx_ring->cnt, sizeof(*rx_ring->rxds));
rx_ring->rxds = dma_alloc_coherent(dp->dev, rx_ring->size,
&rx_ring->dma,
GFP_KERNEL | __GFP_NOWARN);
if (!rx_ring->rxds) {
netdev_warn(dp->netdev, "failed to allocate RX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n",
rx_ring->cnt);
goto err_alloc;
}
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) {
rx_ring->xsk_rxbufs = kvcalloc(rx_ring->cnt, rxbuf_sw_desc_sz,
GFP_KERNEL);
if (!rx_ring->xsk_rxbufs)
goto err_alloc;
} else {
rx_ring->rxbufs = kvcalloc(rx_ring->cnt, rxbuf_sw_desc_sz,
GFP_KERNEL);
if (!rx_ring->rxbufs)
goto err_alloc;
}
return 0;
err_alloc:
nfp_net_rx_ring_free(rx_ring);
return -ENOMEM;
}
static int nfp_net_rx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
{
unsigned int r;
dp->rx_rings = kcalloc(dp->num_rx_rings, sizeof(*dp->rx_rings),
GFP_KERNEL);
if (!dp->rx_rings)
return -ENOMEM;
for (r = 0; r < dp->num_rx_rings; r++) {
nfp_net_rx_ring_init(&dp->rx_rings[r], &nn->r_vecs[r], r);
if (nfp_net_rx_ring_alloc(dp, &dp->rx_rings[r]))
goto err_free_prev;
if (nfp_net_rx_ring_bufs_alloc(dp, &dp->rx_rings[r]))
goto err_free_ring;
}
return 0;
err_free_prev:
while (r--) {
nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]);
err_free_ring:
nfp_net_rx_ring_free(&dp->rx_rings[r]);
}
kfree(dp->rx_rings);
return -ENOMEM;
}
static void nfp_net_rx_rings_free(struct nfp_net_dp *dp)
{
unsigned int r;
for (r = 0; r < dp->num_rx_rings; r++) {
nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]);
nfp_net_rx_ring_free(&dp->rx_rings[r]);
}
kfree(dp->rx_rings);
}
static void static void
nfp_net_napi_add(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, int idx) nfp_net_napi_add(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, int idx)
{ {
if (dp->netdev) if (dp->netdev)
netif_napi_add(dp->netdev, &r_vec->napi, netif_napi_add(dp->netdev, &r_vec->napi,
nfp_net_has_xsk_pool_slow(dp, idx) ? nfp_net_has_xsk_pool_slow(dp, idx) ?
nfp_net_xsk_poll : nfp_net_poll, dp->ops->xsk_poll : dp->ops->poll,
NAPI_POLL_WEIGHT); NAPI_POLL_WEIGHT);
else else
tasklet_enable(&r_vec->tasklet); tasklet_enable(&r_vec->tasklet);
...@@ -2858,17 +911,6 @@ static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *addr) ...@@ -2858,17 +911,6 @@ static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *addr)
nn_writew(nn, NFP_NET_CFG_MACADDR + 6, get_unaligned_be16(addr + 4)); nn_writew(nn, NFP_NET_CFG_MACADDR + 6, get_unaligned_be16(addr + 4));
} }
static void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx)
{
nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), 0);
nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), 0);
nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), 0);
nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), 0);
nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), 0);
nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), 0);
}
/** /**
* nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP
* @nn: NFP Net device to reconfigure * @nn: NFP Net device to reconfigure
...@@ -2911,25 +953,6 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn) ...@@ -2911,25 +953,6 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
nn->dp.ctrl = new_ctrl; nn->dp.ctrl = new_ctrl;
} }
static void
nfp_net_rx_ring_hw_cfg_write(struct nfp_net *nn,
struct nfp_net_rx_ring *rx_ring, unsigned int idx)
{
/* Write the DMA address, size and MSI-X info to the device */
nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), rx_ring->dma);
nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(rx_ring->cnt));
nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), rx_ring->r_vec->irq_entry);
}
static void
nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn,
struct nfp_net_tx_ring *tx_ring, unsigned int idx)
{
nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), tx_ring->dma);
nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(tx_ring->cnt));
nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_entry);
}
/** /**
* nfp_net_set_config_and_enable() - Write control BAR and enable NFP * nfp_net_set_config_and_enable() - Write control BAR and enable NFP
* @nn: NFP Net device to reconfigure * @nn: NFP Net device to reconfigure
...@@ -2959,11 +982,11 @@ static int nfp_net_set_config_and_enable(struct nfp_net *nn) ...@@ -2959,11 +982,11 @@ static int nfp_net_set_config_and_enable(struct nfp_net *nn)
for (r = 0; r < nn->dp.num_rx_rings; r++) for (r = 0; r < nn->dp.num_rx_rings; r++)
nfp_net_rx_ring_hw_cfg_write(nn, &nn->dp.rx_rings[r], r); nfp_net_rx_ring_hw_cfg_write(nn, &nn->dp.rx_rings[r], r);
nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->dp.num_tx_rings == 64 ? nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE,
0xffffffffffffffffULL : ((u64)1 << nn->dp.num_tx_rings) - 1); U64_MAX >> (64 - nn->dp.num_tx_rings));
nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->dp.num_rx_rings == 64 ? nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE,
0xffffffffffffffffULL : ((u64)1 << nn->dp.num_rx_rings) - 1); U64_MAX >> (64 - nn->dp.num_rx_rings));
if (nn->dp.netdev) if (nn->dp.netdev)
nfp_net_write_mac_addr(nn, nn->dp.netdev->dev_addr); nfp_net_write_mac_addr(nn, nn->dp.netdev->dev_addr);
...@@ -3404,6 +1427,8 @@ struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn) ...@@ -3404,6 +1427,8 @@ struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn)
new->rx_rings = NULL; new->rx_rings = NULL;
new->num_r_vecs = 0; new->num_r_vecs = 0;
new->num_stack_tx_rings = 0; new->num_stack_tx_rings = 0;
new->txrwb = NULL;
new->txrwb_dma = 0;
return new; return new;
} }
...@@ -3867,7 +1892,7 @@ static int nfp_net_set_mac_address(struct net_device *netdev, void *addr) ...@@ -3867,7 +1892,7 @@ static int nfp_net_set_mac_address(struct net_device *netdev, void *addr)
return 0; return 0;
} }
const struct net_device_ops nfp_net_netdev_ops = { const struct net_device_ops nfp_nfd3_netdev_ops = {
.ndo_init = nfp_app_ndo_init, .ndo_init = nfp_app_ndo_init,
.ndo_uninit = nfp_app_ndo_uninit, .ndo_uninit = nfp_app_ndo_uninit,
.ndo_open = nfp_net_netdev_open, .ndo_open = nfp_net_netdev_open,
...@@ -3895,6 +1920,33 @@ const struct net_device_ops nfp_net_netdev_ops = { ...@@ -3895,6 +1920,33 @@ const struct net_device_ops nfp_net_netdev_ops = {
.ndo_get_devlink_port = nfp_devlink_get_devlink_port, .ndo_get_devlink_port = nfp_devlink_get_devlink_port,
}; };
const struct net_device_ops nfp_nfdk_netdev_ops = {
.ndo_init = nfp_app_ndo_init,
.ndo_uninit = nfp_app_ndo_uninit,
.ndo_open = nfp_net_netdev_open,
.ndo_stop = nfp_net_netdev_close,
.ndo_start_xmit = nfp_net_tx,
.ndo_get_stats64 = nfp_net_stat64,
.ndo_vlan_rx_add_vid = nfp_net_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = nfp_net_vlan_rx_kill_vid,
.ndo_set_vf_mac = nfp_app_set_vf_mac,
.ndo_set_vf_vlan = nfp_app_set_vf_vlan,
.ndo_set_vf_spoofchk = nfp_app_set_vf_spoofchk,
.ndo_set_vf_trust = nfp_app_set_vf_trust,
.ndo_get_vf_config = nfp_app_get_vf_config,
.ndo_set_vf_link_state = nfp_app_set_vf_link_state,
.ndo_setup_tc = nfp_port_setup_tc,
.ndo_tx_timeout = nfp_net_tx_timeout,
.ndo_set_rx_mode = nfp_net_set_rx_mode,
.ndo_change_mtu = nfp_net_change_mtu,
.ndo_set_mac_address = nfp_net_set_mac_address,
.ndo_set_features = nfp_net_set_features,
.ndo_features_check = nfp_net_features_check,
.ndo_get_phys_port_name = nfp_net_get_phys_port_name,
.ndo_bpf = nfp_net_xdp,
.ndo_get_devlink_port = nfp_devlink_get_devlink_port,
};
static int nfp_udp_tunnel_sync(struct net_device *netdev, unsigned int table) static int nfp_udp_tunnel_sync(struct net_device *netdev, unsigned int table)
{ {
struct nfp_net *nn = netdev_priv(netdev); struct nfp_net *nn = netdev_priv(netdev);
...@@ -3937,10 +1989,10 @@ void nfp_net_info(struct nfp_net *nn) ...@@ -3937,10 +1989,10 @@ void nfp_net_info(struct nfp_net *nn)
nn->dp.num_tx_rings, nn->max_tx_rings, nn->dp.num_tx_rings, nn->max_tx_rings,
nn->dp.num_rx_rings, nn->max_rx_rings); nn->dp.num_rx_rings, nn->max_rx_rings);
nn_info(nn, "VER: %d.%d.%d.%d, Maximum supported MTU: %d\n", nn_info(nn, "VER: %d.%d.%d.%d, Maximum supported MTU: %d\n",
nn->fw_ver.resv, nn->fw_ver.class, nn->fw_ver.extend, nn->fw_ver.class,
nn->fw_ver.major, nn->fw_ver.minor, nn->fw_ver.major, nn->fw_ver.minor,
nn->max_mtu); nn->max_mtu);
nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
nn->cap, nn->cap,
nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "", nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "",
nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "", nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "",
...@@ -3958,6 +2010,7 @@ void nfp_net_info(struct nfp_net *nn) ...@@ -3958,6 +2010,7 @@ void nfp_net_info(struct nfp_net *nn)
nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER ? "CTAG_FILTER " : "", nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER ? "CTAG_FILTER " : "",
nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "", nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "", nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "",
nn->cap & NFP_NET_CFG_CTRL_TXRWB ? "TXRWB " : "",
nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "", nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "",
nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "", nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "",
nn->cap & NFP_NET_CFG_CTRL_CSUM_COMPLETE ? nn->cap & NFP_NET_CFG_CTRL_CSUM_COMPLETE ?
...@@ -4010,6 +2063,26 @@ nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info, ...@@ -4010,6 +2063,26 @@ nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info,
nn->dp.ctrl_bar = ctrl_bar; nn->dp.ctrl_bar = ctrl_bar;
nn->dev_info = dev_info; nn->dev_info = dev_info;
nn->pdev = pdev; nn->pdev = pdev;
nfp_net_get_fw_version(&nn->fw_ver, ctrl_bar);
switch (FIELD_GET(NFP_NET_CFG_VERSION_DP_MASK, nn->fw_ver.extend)) {
case NFP_NET_CFG_VERSION_DP_NFD3:
nn->dp.ops = &nfp_nfd3_ops;
break;
case NFP_NET_CFG_VERSION_DP_NFDK:
if (nn->fw_ver.major < 5) {
dev_err(&pdev->dev,
"NFDK must use ABI 5 or newer, found: %d\n",
nn->fw_ver.major);
err = -EINVAL;
goto err_free_nn;
}
nn->dp.ops = &nfp_nfdk_ops;
break;
default:
err = -EINVAL;
goto err_free_nn;
}
nn->max_tx_rings = max_tx_rings; nn->max_tx_rings = max_tx_rings;
nn->max_rx_rings = max_rx_rings; nn->max_rx_rings = max_rx_rings;
...@@ -4228,7 +2301,15 @@ static void nfp_net_netdev_init(struct nfp_net *nn) ...@@ -4228,7 +2301,15 @@ static void nfp_net_netdev_init(struct nfp_net *nn)
nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_LSO_ANY; nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_LSO_ANY;
/* Finalise the netdev setup */ /* Finalise the netdev setup */
netdev->netdev_ops = &nfp_net_netdev_ops; switch (nn->dp.ops->version) {
case NFP_NFD_VER_NFD3:
netdev->netdev_ops = &nfp_nfd3_netdev_ops;
break;
case NFP_NFD_VER_NFDK:
netdev->netdev_ops = &nfp_nfdk_netdev_ops;
break;
}
netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000); netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000);
/* MTU range: 68 - hw-specific max */ /* MTU range: 68 - hw-specific max */
...@@ -4276,6 +2357,9 @@ static int nfp_net_read_caps(struct nfp_net *nn) ...@@ -4276,6 +2357,9 @@ static int nfp_net_read_caps(struct nfp_net *nn)
nn->dp.rx_offset = NFP_NET_RX_OFFSET; nn->dp.rx_offset = NFP_NET_RX_OFFSET;
} }
/* Mask out NFD-version-specific features */
nn->cap &= nn->dp.ops->cap_mask;
/* For control vNICs mask out the capabilities app doesn't want. */ /* For control vNICs mask out the capabilities app doesn't want. */
if (!nn->dp.netdev) if (!nn->dp.netdev)
nn->cap &= nn->app->type->ctrl_cap_mask; nn->cap &= nn->app->type->ctrl_cap_mask;
...@@ -4328,6 +2412,10 @@ int nfp_net_init(struct nfp_net *nn) ...@@ -4328,6 +2412,10 @@ int nfp_net_init(struct nfp_net *nn)
nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD; nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
} }
/* Enable TX pointer writeback, if supported */
if (nn->cap & NFP_NET_CFG_CTRL_TXRWB)
nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXRWB;
/* Stash the re-configuration queue away. First odd queue in TX Bar */ /* Stash the re-configuration queue away. First odd queue in TX Bar */
nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ; nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
......
...@@ -149,7 +149,10 @@ ...@@ -149,7 +149,10 @@
* - define more STS bits * - define more STS bits
*/ */
#define NFP_NET_CFG_VERSION 0x0030 #define NFP_NET_CFG_VERSION 0x0030
#define NFP_NET_CFG_VERSION_RESERVED_MASK (0xff << 24) #define NFP_NET_CFG_VERSION_RESERVED_MASK (0xfe << 24)
#define NFP_NET_CFG_VERSION_DP_NFD3 0
#define NFP_NET_CFG_VERSION_DP_NFDK 1
#define NFP_NET_CFG_VERSION_DP_MASK 1
#define NFP_NET_CFG_VERSION_CLASS_MASK (0xff << 16) #define NFP_NET_CFG_VERSION_CLASS_MASK (0xff << 16)
#define NFP_NET_CFG_VERSION_CLASS(x) (((x) & 0xff) << 16) #define NFP_NET_CFG_VERSION_CLASS(x) (((x) & 0xff) << 16)
#define NFP_NET_CFG_VERSION_CLASS_GENERIC 0 #define NFP_NET_CFG_VERSION_CLASS_GENERIC 0
......
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ /* Copyright (C) 2015-2019 Netronome Systems, Inc. */
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/rtnetlink.h> #include <linux/rtnetlink.h>
#include "nfp_net.h" #include "nfp_net.h"
#include "nfp_net_dp.h"
static struct dentry *nfp_dir; static struct dentry *nfp_dir;
...@@ -80,10 +81,8 @@ static int nfp_tx_q_show(struct seq_file *file, void *data) ...@@ -80,10 +81,8 @@ static int nfp_tx_q_show(struct seq_file *file, void *data)
{ {
struct nfp_net_r_vector *r_vec = file->private; struct nfp_net_r_vector *r_vec = file->private;
struct nfp_net_tx_ring *tx_ring; struct nfp_net_tx_ring *tx_ring;
struct nfp_net_tx_desc *txd;
int d_rd_p, d_wr_p, txd_cnt;
struct nfp_net *nn; struct nfp_net *nn;
int i; int d_rd_p, d_wr_p;
rtnl_lock(); rtnl_lock();
...@@ -97,52 +96,20 @@ static int nfp_tx_q_show(struct seq_file *file, void *data) ...@@ -97,52 +96,20 @@ static int nfp_tx_q_show(struct seq_file *file, void *data)
if (!nfp_net_running(nn)) if (!nfp_net_running(nn))
goto out; goto out;
txd_cnt = tx_ring->cnt;
d_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); d_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
d_wr_p = nfp_qcp_wr_ptr_read(tx_ring->qcp_q); d_wr_p = nfp_qcp_wr_ptr_read(tx_ring->qcp_q);
seq_printf(file, "TX[%02d,%02d%s]: cnt=%u dma=%pad host=%p H_RD=%u H_WR=%u D_RD=%u D_WR=%u\n", seq_printf(file, "TX[%02d,%02d%s]: cnt=%u dma=%pad host=%p H_RD=%u H_WR=%u D_RD=%u D_WR=%u",
tx_ring->idx, tx_ring->qcidx, tx_ring->idx, tx_ring->qcidx,
tx_ring == r_vec->tx_ring ? "" : "xdp", tx_ring == r_vec->tx_ring ? "" : "xdp",
tx_ring->cnt, &tx_ring->dma, tx_ring->txds, tx_ring->cnt, &tx_ring->dma, tx_ring->txds,
tx_ring->rd_p, tx_ring->wr_p, d_rd_p, d_wr_p); tx_ring->rd_p, tx_ring->wr_p, d_rd_p, d_wr_p);
if (tx_ring->txrwb)
seq_printf(file, " TXRWB=%llu", *tx_ring->txrwb);
seq_putc(file, '\n');
for (i = 0; i < txd_cnt; i++) { nfp_net_debugfs_print_tx_descs(file, &nn->dp, r_vec, tx_ring,
struct xdp_buff *xdp; d_rd_p, d_wr_p);
struct sk_buff *skb;
txd = &tx_ring->txds[i];
seq_printf(file, "%04d: 0x%08x 0x%08x 0x%08x 0x%08x", i,
txd->vals[0], txd->vals[1],
txd->vals[2], txd->vals[3]);
if (!tx_ring->is_xdp) {
skb = READ_ONCE(tx_ring->txbufs[i].skb);
if (skb)
seq_printf(file, " skb->head=%p skb->data=%p",
skb->head, skb->data);
} else {
xdp = READ_ONCE(tx_ring->txbufs[i].xdp);
if (xdp)
seq_printf(file, " xdp->data=%p", xdp->data);
}
if (tx_ring->txbufs[i].dma_addr)
seq_printf(file, " dma_addr=%pad",
&tx_ring->txbufs[i].dma_addr);
if (i == tx_ring->rd_p % txd_cnt)
seq_puts(file, " H_RD");
if (i == tx_ring->wr_p % txd_cnt)
seq_puts(file, " H_WR");
if (i == d_rd_p % txd_cnt)
seq_puts(file, " D_RD");
if (i == d_wr_p % txd_cnt)
seq_puts(file, " D_WR");
seq_putc(file, '\n');
}
out: out:
rtnl_unlock(); rtnl_unlock();
return 0; return 0;
......
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2015-2019 Netronome Systems, Inc. */
#include "nfp_app.h"
#include "nfp_net_dp.h"
#include "nfp_net_xsk.h"
/**
* nfp_net_rx_alloc_one() - Allocate and map page frag for RX
* @dp: NFP Net data path struct
* @dma_addr: Pointer to storage for DMA address (output param)
*
* This function will allcate a new page frag, map it for DMA.
*
* Return: allocated page frag or NULL on failure.
*/
void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
{
void *frag;
if (!dp->xdp_prog) {
frag = netdev_alloc_frag(dp->fl_bufsz);
} else {
struct page *page;
page = alloc_page(GFP_KERNEL);
frag = page ? page_address(page) : NULL;
}
if (!frag) {
nn_dp_warn(dp, "Failed to alloc receive page frag\n");
return NULL;
}
*dma_addr = nfp_net_dma_map_rx(dp, frag);
if (dma_mapping_error(dp->dev, *dma_addr)) {
nfp_net_free_frag(frag, dp->xdp_prog);
nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
return NULL;
}
return frag;
}
/**
* nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring
* @tx_ring: TX ring structure
* @dp: NFP Net data path struct
* @r_vec: IRQ vector servicing this ring
* @idx: Ring index
* @is_xdp: Is this an XDP TX ring?
*/
static void
nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring, struct nfp_net_dp *dp,
struct nfp_net_r_vector *r_vec, unsigned int idx,
bool is_xdp)
{
struct nfp_net *nn = r_vec->nfp_net;
tx_ring->idx = idx;
tx_ring->r_vec = r_vec;
tx_ring->is_xdp = is_xdp;
u64_stats_init(&tx_ring->r_vec->tx_sync);
tx_ring->qcidx = tx_ring->idx * nn->stride_tx;
tx_ring->txrwb = dp->txrwb ? &dp->txrwb[idx] : NULL;
tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx);
}
/**
* nfp_net_rx_ring_init() - Fill in the boilerplate for a RX ring
* @rx_ring: RX ring structure
* @r_vec: IRQ vector servicing this ring
* @idx: Ring index
*/
static void
nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring,
struct nfp_net_r_vector *r_vec, unsigned int idx)
{
struct nfp_net *nn = r_vec->nfp_net;
rx_ring->idx = idx;
rx_ring->r_vec = r_vec;
u64_stats_init(&rx_ring->r_vec->rx_sync);
rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx;
rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx);
}
/**
* nfp_net_rx_ring_reset() - Reflect in SW state of freelist after disable
* @rx_ring: RX ring structure
*
* Assumes that the device is stopped, must be idempotent.
*/
void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring)
{
unsigned int wr_idx, last_idx;
/* wr_p == rd_p means ring was never fed FL bufs. RX rings are always
* kept at cnt - 1 FL bufs.
*/
if (rx_ring->wr_p == 0 && rx_ring->rd_p == 0)
return;
/* Move the empty entry to the end of the list */
wr_idx = D_IDX(rx_ring, rx_ring->wr_p);
last_idx = rx_ring->cnt - 1;
if (rx_ring->r_vec->xsk_pool) {
rx_ring->xsk_rxbufs[wr_idx] = rx_ring->xsk_rxbufs[last_idx];
memset(&rx_ring->xsk_rxbufs[last_idx], 0,
sizeof(*rx_ring->xsk_rxbufs));
} else {
rx_ring->rxbufs[wr_idx] = rx_ring->rxbufs[last_idx];
memset(&rx_ring->rxbufs[last_idx], 0, sizeof(*rx_ring->rxbufs));
}
memset(rx_ring->rxds, 0, rx_ring->size);
rx_ring->wr_p = 0;
rx_ring->rd_p = 0;
}
/**
* nfp_net_rx_ring_bufs_free() - Free any buffers currently on the RX ring
* @dp: NFP Net data path struct
* @rx_ring: RX ring to remove buffers from
*
* Assumes that the device is stopped and buffers are in [0, ring->cnt - 1)
* entries. After device is disabled nfp_net_rx_ring_reset() must be called
* to restore required ring geometry.
*/
static void
nfp_net_rx_ring_bufs_free(struct nfp_net_dp *dp,
struct nfp_net_rx_ring *rx_ring)
{
unsigned int i;
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx))
return;
for (i = 0; i < rx_ring->cnt - 1; i++) {
/* NULL skb can only happen when initial filling of the ring
* fails to allocate enough buffers and calls here to free
* already allocated ones.
*/
if (!rx_ring->rxbufs[i].frag)
continue;
nfp_net_dma_unmap_rx(dp, rx_ring->rxbufs[i].dma_addr);
nfp_net_free_frag(rx_ring->rxbufs[i].frag, dp->xdp_prog);
rx_ring->rxbufs[i].dma_addr = 0;
rx_ring->rxbufs[i].frag = NULL;
}
}
/**
* nfp_net_rx_ring_bufs_alloc() - Fill RX ring with buffers (don't give to FW)
* @dp: NFP Net data path struct
* @rx_ring: RX ring to remove buffers from
*/
static int
nfp_net_rx_ring_bufs_alloc(struct nfp_net_dp *dp,
struct nfp_net_rx_ring *rx_ring)
{
struct nfp_net_rx_buf *rxbufs;
unsigned int i;
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx))
return 0;
rxbufs = rx_ring->rxbufs;
for (i = 0; i < rx_ring->cnt - 1; i++) {
rxbufs[i].frag = nfp_net_rx_alloc_one(dp, &rxbufs[i].dma_addr);
if (!rxbufs[i].frag) {
nfp_net_rx_ring_bufs_free(dp, rx_ring);
return -ENOMEM;
}
}
return 0;
}
int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
{
unsigned int r;
dp->tx_rings = kcalloc(dp->num_tx_rings, sizeof(*dp->tx_rings),
GFP_KERNEL);
if (!dp->tx_rings)
return -ENOMEM;
if (dp->ctrl & NFP_NET_CFG_CTRL_TXRWB) {
dp->txrwb = dma_alloc_coherent(dp->dev,
dp->num_tx_rings * sizeof(u64),
&dp->txrwb_dma, GFP_KERNEL);
if (!dp->txrwb)
goto err_free_rings;
}
for (r = 0; r < dp->num_tx_rings; r++) {
int bias = 0;
if (r >= dp->num_stack_tx_rings)
bias = dp->num_stack_tx_rings;
nfp_net_tx_ring_init(&dp->tx_rings[r], dp,
&nn->r_vecs[r - bias], r, bias);
if (nfp_net_tx_ring_alloc(dp, &dp->tx_rings[r]))
goto err_free_prev;
if (nfp_net_tx_ring_bufs_alloc(dp, &dp->tx_rings[r]))
goto err_free_ring;
}
return 0;
err_free_prev:
while (r--) {
nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]);
err_free_ring:
nfp_net_tx_ring_free(dp, &dp->tx_rings[r]);
}
if (dp->txrwb)
dma_free_coherent(dp->dev, dp->num_tx_rings * sizeof(u64),
dp->txrwb, dp->txrwb_dma);
err_free_rings:
kfree(dp->tx_rings);
return -ENOMEM;
}
void nfp_net_tx_rings_free(struct nfp_net_dp *dp)
{
unsigned int r;
for (r = 0; r < dp->num_tx_rings; r++) {
nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]);
nfp_net_tx_ring_free(dp, &dp->tx_rings[r]);
}
if (dp->txrwb)
dma_free_coherent(dp->dev, dp->num_tx_rings * sizeof(u64),
dp->txrwb, dp->txrwb_dma);
kfree(dp->tx_rings);
}
/**
* nfp_net_rx_ring_free() - Free resources allocated to a RX ring
* @rx_ring: RX ring to free
*/
static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
{
struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
if (dp->netdev)
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx))
kvfree(rx_ring->xsk_rxbufs);
else
kvfree(rx_ring->rxbufs);
if (rx_ring->rxds)
dma_free_coherent(dp->dev, rx_ring->size,
rx_ring->rxds, rx_ring->dma);
rx_ring->cnt = 0;
rx_ring->rxbufs = NULL;
rx_ring->xsk_rxbufs = NULL;
rx_ring->rxds = NULL;
rx_ring->dma = 0;
rx_ring->size = 0;
}
/**
* nfp_net_rx_ring_alloc() - Allocate resource for a RX ring
* @dp: NFP Net data path struct
* @rx_ring: RX ring to allocate
*
* Return: 0 on success, negative errno otherwise.
*/
static int
nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring)
{
enum xdp_mem_type mem_type;
size_t rxbuf_sw_desc_sz;
int err;
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) {
mem_type = MEM_TYPE_XSK_BUFF_POOL;
rxbuf_sw_desc_sz = sizeof(*rx_ring->xsk_rxbufs);
} else {
mem_type = MEM_TYPE_PAGE_ORDER0;
rxbuf_sw_desc_sz = sizeof(*rx_ring->rxbufs);
}
if (dp->netdev) {
err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev,
rx_ring->idx, rx_ring->r_vec->napi.napi_id);
if (err < 0)
return err;
err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, mem_type, NULL);
if (err)
goto err_alloc;
}
rx_ring->cnt = dp->rxd_cnt;
rx_ring->size = array_size(rx_ring->cnt, sizeof(*rx_ring->rxds));
rx_ring->rxds = dma_alloc_coherent(dp->dev, rx_ring->size,
&rx_ring->dma,
GFP_KERNEL | __GFP_NOWARN);
if (!rx_ring->rxds) {
netdev_warn(dp->netdev, "failed to allocate RX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n",
rx_ring->cnt);
goto err_alloc;
}
if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) {
rx_ring->xsk_rxbufs = kvcalloc(rx_ring->cnt, rxbuf_sw_desc_sz,
GFP_KERNEL);
if (!rx_ring->xsk_rxbufs)
goto err_alloc;
} else {
rx_ring->rxbufs = kvcalloc(rx_ring->cnt, rxbuf_sw_desc_sz,
GFP_KERNEL);
if (!rx_ring->rxbufs)
goto err_alloc;
}
return 0;
err_alloc:
nfp_net_rx_ring_free(rx_ring);
return -ENOMEM;
}
int nfp_net_rx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
{
unsigned int r;
dp->rx_rings = kcalloc(dp->num_rx_rings, sizeof(*dp->rx_rings),
GFP_KERNEL);
if (!dp->rx_rings)
return -ENOMEM;
for (r = 0; r < dp->num_rx_rings; r++) {
nfp_net_rx_ring_init(&dp->rx_rings[r], &nn->r_vecs[r], r);
if (nfp_net_rx_ring_alloc(dp, &dp->rx_rings[r]))
goto err_free_prev;
if (nfp_net_rx_ring_bufs_alloc(dp, &dp->rx_rings[r]))
goto err_free_ring;
}
return 0;
err_free_prev:
while (r--) {
nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]);
err_free_ring:
nfp_net_rx_ring_free(&dp->rx_rings[r]);
}
kfree(dp->rx_rings);
return -ENOMEM;
}
void nfp_net_rx_rings_free(struct nfp_net_dp *dp)
{
unsigned int r;
for (r = 0; r < dp->num_rx_rings; r++) {
nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]);
nfp_net_rx_ring_free(&dp->rx_rings[r]);
}
kfree(dp->rx_rings);
}
void
nfp_net_rx_ring_hw_cfg_write(struct nfp_net *nn,
struct nfp_net_rx_ring *rx_ring, unsigned int idx)
{
/* Write the DMA address, size and MSI-X info to the device */
nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), rx_ring->dma);
nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(rx_ring->cnt));
nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), rx_ring->r_vec->irq_entry);
}
void
nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn,
struct nfp_net_tx_ring *tx_ring, unsigned int idx)
{
nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), tx_ring->dma);
if (tx_ring->txrwb) {
*tx_ring->txrwb = 0;
nn_writeq(nn, NFP_NET_CFG_TXR_WB_ADDR(idx),
nn->dp.txrwb_dma + idx * sizeof(u64));
}
nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(tx_ring->cnt));
nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_entry);
}
void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx)
{
nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), 0);
nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), 0);
nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), 0);
nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), 0);
nn_writeq(nn, NFP_NET_CFG_TXR_WB_ADDR(idx), 0);
nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), 0);
nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), 0);
}
netdev_tx_t nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
{
struct nfp_net *nn = netdev_priv(netdev);
return nn->dp.ops->xmit(skb, netdev);
}
bool __nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb)
{
struct nfp_net_r_vector *r_vec = &nn->r_vecs[0];
return nn->dp.ops->ctrl_tx_one(nn, r_vec, skb, false);
}
bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb)
{
struct nfp_net_r_vector *r_vec = &nn->r_vecs[0];
bool ret;
spin_lock_bh(&r_vec->lock);
ret = nn->dp.ops->ctrl_tx_one(nn, r_vec, skb, false);
spin_unlock_bh(&r_vec->lock);
return ret;
}
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
/* Copyright (C) 2019 Netronome Systems, Inc. */
#ifndef _NFP_NET_DP_
#define _NFP_NET_DP_
#include "nfp_net.h"
static inline dma_addr_t nfp_net_dma_map_rx(struct nfp_net_dp *dp, void *frag)
{
return dma_map_single_attrs(dp->dev, frag + NFP_NET_RX_BUF_HEADROOM,
dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
}
static inline void
nfp_net_dma_sync_dev_rx(const struct nfp_net_dp *dp, dma_addr_t dma_addr)
{
dma_sync_single_for_device(dp->dev, dma_addr,
dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
dp->rx_dma_dir);
}
static inline void nfp_net_dma_unmap_rx(struct nfp_net_dp *dp,
dma_addr_t dma_addr)
{
dma_unmap_single_attrs(dp->dev, dma_addr,
dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
}
static inline void nfp_net_dma_sync_cpu_rx(struct nfp_net_dp *dp,
dma_addr_t dma_addr,
unsigned int len)
{
dma_sync_single_for_cpu(dp->dev, dma_addr - NFP_NET_RX_BUF_HEADROOM,
len, dp->rx_dma_dir);
}
/**
* nfp_net_tx_full() - check if the TX ring is full
* @tx_ring: TX ring to check
* @dcnt: Number of descriptors that need to be enqueued (must be >= 1)
*
* This function checks, based on the *host copy* of read/write
* pointer if a given TX ring is full. The real TX queue may have
* some newly made available slots.
*
* Return: True if the ring is full.
*/
static inline int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt)
{
return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt);
}
static inline void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring)
{
wmb(); /* drain writebuffer */
nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add);
tx_ring->wr_ptr_add = 0;
}
static inline u32
nfp_net_read_tx_cmpl(struct nfp_net_tx_ring *tx_ring, struct nfp_net_dp *dp)
{
if (tx_ring->txrwb)
return *tx_ring->txrwb;
return nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
}
static inline void nfp_net_free_frag(void *frag, bool xdp)
{
if (!xdp)
skb_free_frag(frag);
else
__free_page(virt_to_page(frag));
}
/**
* nfp_net_irq_unmask() - Unmask automasked interrupt
* @nn: NFP Network structure
* @entry_nr: MSI-X table entry
*
* Clear the ICR for the IRQ entry.
*/
static inline void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr)
{
nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED);
nn_pci_flush(nn);
}
struct seq_file;
/* Common */
void
nfp_net_rx_ring_hw_cfg_write(struct nfp_net *nn,
struct nfp_net_rx_ring *rx_ring, unsigned int idx);
void
nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn,
struct nfp_net_tx_ring *tx_ring, unsigned int idx);
void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx);
void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr);
int nfp_net_rx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp);
int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp);
void nfp_net_rx_rings_free(struct nfp_net_dp *dp);
void nfp_net_tx_rings_free(struct nfp_net_dp *dp);
void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring);
enum nfp_nfd_version {
NFP_NFD_VER_NFD3,
NFP_NFD_VER_NFDK,
};
/**
* struct nfp_dp_ops - Hooks to wrap different implementation of different dp
* @version: Indicate dp type
* @tx_min_desc_per_pkt: Minimal TX descs needed for each packet
* @cap_mask: Mask of supported features
* @poll: Napi poll for normal rx/tx
* @xsk_poll: Napi poll when xsk is enabled
* @ctrl_poll: Tasklet poll for ctrl rx/tx
* @xmit: Xmit for normal path
* @ctrl_tx_one: Xmit for ctrl path
* @rx_ring_fill_freelist: Give buffers from the ring to FW
* @tx_ring_alloc: Allocate resource for a TX ring
* @tx_ring_reset: Free any untransmitted buffers and reset pointers
* @tx_ring_free: Free resources allocated to a TX ring
* @tx_ring_bufs_alloc: Allocate resource for each TX buffer
* @tx_ring_bufs_free: Free resources allocated to each TX buffer
* @print_tx_descs: Show TX ring's info for debug purpose
*/
struct nfp_dp_ops {
enum nfp_nfd_version version;
unsigned int tx_min_desc_per_pkt;
u32 cap_mask;
int (*poll)(struct napi_struct *napi, int budget);
int (*xsk_poll)(struct napi_struct *napi, int budget);
void (*ctrl_poll)(struct tasklet_struct *t);
netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *netdev);
bool (*ctrl_tx_one)(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
struct sk_buff *skb, bool old);
void (*rx_ring_fill_freelist)(struct nfp_net_dp *dp,
struct nfp_net_rx_ring *rx_ring);
int (*tx_ring_alloc)(struct nfp_net_dp *dp,
struct nfp_net_tx_ring *tx_ring);
void (*tx_ring_reset)(struct nfp_net_dp *dp,
struct nfp_net_tx_ring *tx_ring);
void (*tx_ring_free)(struct nfp_net_tx_ring *tx_ring);
int (*tx_ring_bufs_alloc)(struct nfp_net_dp *dp,
struct nfp_net_tx_ring *tx_ring);
void (*tx_ring_bufs_free)(struct nfp_net_dp *dp,
struct nfp_net_tx_ring *tx_ring);
void (*print_tx_descs)(struct seq_file *file,
struct nfp_net_r_vector *r_vec,
struct nfp_net_tx_ring *tx_ring,
u32 d_rd_p, u32 d_wr_p);
};
static inline void
nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
{
return dp->ops->tx_ring_reset(dp, tx_ring);
}
static inline void
nfp_net_rx_ring_fill_freelist(struct nfp_net_dp *dp,
struct nfp_net_rx_ring *rx_ring)
{
dp->ops->rx_ring_fill_freelist(dp, rx_ring);
}
static inline int
nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
{
return dp->ops->tx_ring_alloc(dp, tx_ring);
}
static inline void
nfp_net_tx_ring_free(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
{
dp->ops->tx_ring_free(tx_ring);
}
static inline int
nfp_net_tx_ring_bufs_alloc(struct nfp_net_dp *dp,
struct nfp_net_tx_ring *tx_ring)
{
return dp->ops->tx_ring_bufs_alloc(dp, tx_ring);
}
static inline void
nfp_net_tx_ring_bufs_free(struct nfp_net_dp *dp,
struct nfp_net_tx_ring *tx_ring)
{
dp->ops->tx_ring_bufs_free(dp, tx_ring);
}
static inline void
nfp_net_debugfs_print_tx_descs(struct seq_file *file, struct nfp_net_dp *dp,
struct nfp_net_r_vector *r_vec,
struct nfp_net_tx_ring *tx_ring,
u32 d_rd_p, u32 d_wr_p)
{
dp->ops->print_tx_descs(file, r_vec, tx_ring, d_rd_p, d_wr_p);
}
extern const struct nfp_dp_ops nfp_nfd3_ops;
extern const struct nfp_dp_ops nfp_nfdk_ops;
netdev_tx_t nfp_net_tx(struct sk_buff *skb, struct net_device *netdev);
#endif /* _NFP_NET_DP_ */
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "nfp_app.h" #include "nfp_app.h"
#include "nfp_main.h" #include "nfp_main.h"
#include "nfp_net_ctrl.h" #include "nfp_net_ctrl.h"
#include "nfp_net_dp.h"
#include "nfp_net.h" #include "nfp_net.h"
#include "nfp_port.h" #include "nfp_port.h"
...@@ -218,7 +219,7 @@ nfp_net_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) ...@@ -218,7 +219,7 @@ nfp_net_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
struct nfp_net *nn = netdev_priv(netdev); struct nfp_net *nn = netdev_priv(netdev);
snprintf(vnic_version, sizeof(vnic_version), "%d.%d.%d.%d", snprintf(vnic_version, sizeof(vnic_version), "%d.%d.%d.%d",
nn->fw_ver.resv, nn->fw_ver.class, nn->fw_ver.extend, nn->fw_ver.class,
nn->fw_ver.major, nn->fw_ver.minor); nn->fw_ver.major, nn->fw_ver.minor);
strlcpy(drvinfo->bus_info, pci_name(nn->pdev), strlcpy(drvinfo->bus_info, pci_name(nn->pdev),
sizeof(drvinfo->bus_info)); sizeof(drvinfo->bus_info));
...@@ -390,7 +391,7 @@ static void nfp_net_get_ringparam(struct net_device *netdev, ...@@ -390,7 +391,7 @@ static void nfp_net_get_ringparam(struct net_device *netdev,
u32 qc_max = nn->dev_info->max_qc_size; u32 qc_max = nn->dev_info->max_qc_size;
ring->rx_max_pending = qc_max; ring->rx_max_pending = qc_max;
ring->tx_max_pending = qc_max; ring->tx_max_pending = qc_max / nn->dp.ops->tx_min_desc_per_pkt;
ring->rx_pending = nn->dp.rxd_cnt; ring->rx_pending = nn->dp.rxd_cnt;
ring->tx_pending = nn->dp.txd_cnt; ring->tx_pending = nn->dp.txd_cnt;
} }
...@@ -414,8 +415,8 @@ static int nfp_net_set_ringparam(struct net_device *netdev, ...@@ -414,8 +415,8 @@ static int nfp_net_set_ringparam(struct net_device *netdev,
struct kernel_ethtool_ringparam *kernel_ring, struct kernel_ethtool_ringparam *kernel_ring,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
u32 tx_dpp, qc_min, qc_max, rxd_cnt, txd_cnt;
struct nfp_net *nn = netdev_priv(netdev); struct nfp_net *nn = netdev_priv(netdev);
u32 qc_min, qc_max, rxd_cnt, txd_cnt;
/* We don't have separate queues/rings for small/large frames. */ /* We don't have separate queues/rings for small/large frames. */
if (ring->rx_mini_pending || ring->rx_jumbo_pending) if (ring->rx_mini_pending || ring->rx_jumbo_pending)
...@@ -423,12 +424,13 @@ static int nfp_net_set_ringparam(struct net_device *netdev, ...@@ -423,12 +424,13 @@ static int nfp_net_set_ringparam(struct net_device *netdev,
qc_min = nn->dev_info->min_qc_size; qc_min = nn->dev_info->min_qc_size;
qc_max = nn->dev_info->max_qc_size; qc_max = nn->dev_info->max_qc_size;
tx_dpp = nn->dp.ops->tx_min_desc_per_pkt;
/* Round up to supported values */ /* Round up to supported values */
rxd_cnt = roundup_pow_of_two(ring->rx_pending); rxd_cnt = roundup_pow_of_two(ring->rx_pending);
txd_cnt = roundup_pow_of_two(ring->tx_pending); txd_cnt = roundup_pow_of_two(ring->tx_pending);
if (rxd_cnt < qc_min || rxd_cnt > qc_max || if (rxd_cnt < qc_min || rxd_cnt > qc_max ||
txd_cnt < qc_min || txd_cnt > qc_max) txd_cnt < qc_min / tx_dpp || txd_cnt > qc_max / tx_dpp)
return -EINVAL; return -EINVAL;
if (nn->dp.rxd_cnt == rxd_cnt && nn->dp.txd_cnt == txd_cnt) if (nn->dp.rxd_cnt == rxd_cnt && nn->dp.txd_cnt == txd_cnt)
......
...@@ -123,7 +123,6 @@ nfp_net_pf_alloc_vnic(struct nfp_pf *pf, bool needs_netdev, ...@@ -123,7 +123,6 @@ nfp_net_pf_alloc_vnic(struct nfp_pf *pf, bool needs_netdev,
return nn; return nn;
nn->app = pf->app; nn->app = pf->app;
nfp_net_get_fw_version(&nn->fw_ver, ctrl_bar);
nn->tx_bar = qc_bar + tx_base * NFP_QCP_QUEUE_ADDR_SZ; nn->tx_bar = qc_bar + tx_base * NFP_QCP_QUEUE_ADDR_SZ;
nn->rx_bar = qc_bar + rx_base * NFP_QCP_QUEUE_ADDR_SZ; nn->rx_bar = qc_bar + rx_base * NFP_QCP_QUEUE_ADDR_SZ;
nn->dp.is_vf = 0; nn->dp.is_vf = 0;
...@@ -679,9 +678,11 @@ int nfp_net_pci_probe(struct nfp_pf *pf) ...@@ -679,9 +678,11 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
} }
nfp_net_get_fw_version(&fw_ver, ctrl_bar); nfp_net_get_fw_version(&fw_ver, ctrl_bar);
if (fw_ver.resv || fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) { if (fw_ver.extend & NFP_NET_CFG_VERSION_RESERVED_MASK ||
fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) {
nfp_err(pf->cpp, "Unknown Firmware ABI %d.%d.%d.%d\n", nfp_err(pf->cpp, "Unknown Firmware ABI %d.%d.%d.%d\n",
fw_ver.resv, fw_ver.class, fw_ver.major, fw_ver.minor); fw_ver.extend, fw_ver.class,
fw_ver.major, fw_ver.minor);
err = -EINVAL; err = -EINVAL;
goto err_unmap; goto err_unmap;
} }
...@@ -697,7 +698,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf) ...@@ -697,7 +698,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
break; break;
default: default:
nfp_err(pf->cpp, "Unsupported Firmware ABI %d.%d.%d.%d\n", nfp_err(pf->cpp, "Unsupported Firmware ABI %d.%d.%d.%d\n",
fw_ver.resv, fw_ver.class, fw_ver.extend, fw_ver.class,
fw_ver.major, fw_ver.minor); fw_ver.major, fw_ver.minor);
err = -EINVAL; err = -EINVAL;
goto err_unmap; goto err_unmap;
......
...@@ -10,204 +10,9 @@ ...@@ -10,204 +10,9 @@
#include "nfp_app.h" #include "nfp_app.h"
#include "nfp_net.h" #include "nfp_net.h"
#include "nfp_net_dp.h"
#include "nfp_net_xsk.h" #include "nfp_net_xsk.h"
static int nfp_net_tx_space(struct nfp_net_tx_ring *tx_ring)
{
return tx_ring->cnt - tx_ring->wr_p + tx_ring->rd_p - 1;
}
static void nfp_net_xsk_tx_free(struct nfp_net_tx_buf *txbuf)
{
xsk_buff_free(txbuf->xdp);
txbuf->dma_addr = 0;
txbuf->xdp = NULL;
}
void nfp_net_xsk_tx_bufs_free(struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_tx_buf *txbuf;
unsigned int idx;
while (tx_ring->rd_p != tx_ring->wr_p) {
idx = D_IDX(tx_ring, tx_ring->rd_p);
txbuf = &tx_ring->txbufs[idx];
txbuf->real_len = 0;
tx_ring->qcp_rd_p++;
tx_ring->rd_p++;
if (tx_ring->r_vec->xsk_pool) {
if (txbuf->is_xsk_tx)
nfp_net_xsk_tx_free(txbuf);
xsk_tx_completed(tx_ring->r_vec->xsk_pool, 1);
}
}
}
static bool nfp_net_xsk_complete(struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
u32 done_pkts = 0, done_bytes = 0, reused = 0;
bool done_all;
int idx, todo;
u32 qcp_rd_p;
if (tx_ring->wr_p == tx_ring->rd_p)
return true;
/* Work out how many descriptors have been transmitted. */
qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
if (qcp_rd_p == tx_ring->qcp_rd_p)
return true;
todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
done_all = todo <= NFP_NET_XDP_MAX_COMPLETE;
todo = min(todo, NFP_NET_XDP_MAX_COMPLETE);
tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo);
done_pkts = todo;
while (todo--) {
struct nfp_net_tx_buf *txbuf;
idx = D_IDX(tx_ring, tx_ring->rd_p);
tx_ring->rd_p++;
txbuf = &tx_ring->txbufs[idx];
if (unlikely(!txbuf->real_len))
continue;
done_bytes += txbuf->real_len;
txbuf->real_len = 0;
if (txbuf->is_xsk_tx) {
nfp_net_xsk_tx_free(txbuf);
reused++;
}
}
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_bytes += done_bytes;
r_vec->tx_pkts += done_pkts;
u64_stats_update_end(&r_vec->tx_sync);
xsk_tx_completed(r_vec->xsk_pool, done_pkts - reused);
WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
"XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
return done_all;
}
static void nfp_net_xsk_tx(struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
struct xdp_desc desc[NFP_NET_XSK_TX_BATCH];
struct xsk_buff_pool *xsk_pool;
struct nfp_net_tx_desc *txd;
u32 pkts = 0, wr_idx;
u32 i, got;
xsk_pool = r_vec->xsk_pool;
while (nfp_net_tx_space(tx_ring) >= NFP_NET_XSK_TX_BATCH) {
for (i = 0; i < NFP_NET_XSK_TX_BATCH; i++)
if (!xsk_tx_peek_desc(xsk_pool, &desc[i]))
break;
got = i;
if (!got)
break;
wr_idx = D_IDX(tx_ring, tx_ring->wr_p + i);
prefetchw(&tx_ring->txds[wr_idx]);
for (i = 0; i < got; i++)
xsk_buff_raw_dma_sync_for_device(xsk_pool, desc[i].addr,
desc[i].len);
for (i = 0; i < got; i++) {
wr_idx = D_IDX(tx_ring, tx_ring->wr_p + i);
tx_ring->txbufs[wr_idx].real_len = desc[i].len;
tx_ring->txbufs[wr_idx].is_xsk_tx = false;
/* Build TX descriptor. */
txd = &tx_ring->txds[wr_idx];
nfp_desc_set_dma_addr(txd,
xsk_buff_raw_get_dma(xsk_pool,
desc[i].addr
));
txd->offset_eop = PCIE_DESC_TX_EOP;
txd->dma_len = cpu_to_le16(desc[i].len);
txd->data_len = cpu_to_le16(desc[i].len);
}
tx_ring->wr_p += got;
pkts += got;
}
if (!pkts)
return;
xsk_tx_release(xsk_pool);
/* Ensure all records are visible before incrementing write counter. */
wmb();
nfp_qcp_wr_ptr_add(tx_ring->qcp_q, pkts);
}
static bool
nfp_net_xsk_tx_xdp(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
struct nfp_net_rx_ring *rx_ring,
struct nfp_net_tx_ring *tx_ring,
struct nfp_net_xsk_rx_buf *xrxbuf, unsigned int pkt_len,
int pkt_off)
{
struct xsk_buff_pool *pool = r_vec->xsk_pool;
struct nfp_net_tx_buf *txbuf;
struct nfp_net_tx_desc *txd;
unsigned int wr_idx;
if (nfp_net_tx_space(tx_ring) < 1)
return false;
xsk_buff_raw_dma_sync_for_device(pool, xrxbuf->dma_addr + pkt_off, pkt_len);
wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
txbuf = &tx_ring->txbufs[wr_idx];
txbuf->xdp = xrxbuf->xdp;
txbuf->real_len = pkt_len;
txbuf->is_xsk_tx = true;
/* Build TX descriptor */
txd = &tx_ring->txds[wr_idx];
txd->offset_eop = PCIE_DESC_TX_EOP;
txd->dma_len = cpu_to_le16(pkt_len);
nfp_desc_set_dma_addr(txd, xrxbuf->dma_addr + pkt_off);
txd->data_len = cpu_to_le16(pkt_len);
txd->flags = 0;
txd->mss = 0;
txd->lso_hdrlen = 0;
tx_ring->wr_ptr_add++;
tx_ring->wr_p++;
return true;
}
static int nfp_net_rx_space(struct nfp_net_rx_ring *rx_ring)
{
return rx_ring->cnt - rx_ring->wr_p + rx_ring->rd_p - 1;
}
static void static void
nfp_net_xsk_rx_bufs_stash(struct nfp_net_rx_ring *rx_ring, unsigned int idx, nfp_net_xsk_rx_bufs_stash(struct nfp_net_rx_ring *rx_ring, unsigned int idx,
struct xdp_buff *xdp) struct xdp_buff *xdp)
...@@ -224,13 +29,13 @@ nfp_net_xsk_rx_bufs_stash(struct nfp_net_rx_ring *rx_ring, unsigned int idx, ...@@ -224,13 +29,13 @@ nfp_net_xsk_rx_bufs_stash(struct nfp_net_rx_ring *rx_ring, unsigned int idx,
xsk_buff_xdp_get_frame_dma(xdp) + headroom; xsk_buff_xdp_get_frame_dma(xdp) + headroom;
} }
static void nfp_net_xsk_rx_unstash(struct nfp_net_xsk_rx_buf *rxbuf) void nfp_net_xsk_rx_unstash(struct nfp_net_xsk_rx_buf *rxbuf)
{ {
rxbuf->dma_addr = 0; rxbuf->dma_addr = 0;
rxbuf->xdp = NULL; rxbuf->xdp = NULL;
} }
static void nfp_net_xsk_rx_free(struct nfp_net_xsk_rx_buf *rxbuf) void nfp_net_xsk_rx_free(struct nfp_net_xsk_rx_buf *rxbuf)
{ {
if (rxbuf->xdp) if (rxbuf->xdp)
xsk_buff_free(rxbuf->xdp); xsk_buff_free(rxbuf->xdp);
...@@ -277,8 +82,8 @@ void nfp_net_xsk_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring) ...@@ -277,8 +82,8 @@ void nfp_net_xsk_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring)
nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, wr_ptr_add); nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, wr_ptr_add);
} }
static void nfp_net_xsk_rx_drop(struct nfp_net_r_vector *r_vec, void nfp_net_xsk_rx_drop(struct nfp_net_r_vector *r_vec,
struct nfp_net_xsk_rx_buf *xrxbuf) struct nfp_net_xsk_rx_buf *xrxbuf)
{ {
u64_stats_update_begin(&r_vec->rx_sync); u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_drops++; r_vec->rx_drops++;
...@@ -287,213 +92,6 @@ static void nfp_net_xsk_rx_drop(struct nfp_net_r_vector *r_vec, ...@@ -287,213 +92,6 @@ static void nfp_net_xsk_rx_drop(struct nfp_net_r_vector *r_vec,
nfp_net_xsk_rx_free(xrxbuf); nfp_net_xsk_rx_free(xrxbuf);
} }
static void nfp_net_xsk_rx_skb(struct nfp_net_rx_ring *rx_ring,
const struct nfp_net_rx_desc *rxd,
struct nfp_net_xsk_rx_buf *xrxbuf,
const struct nfp_meta_parsed *meta,
unsigned int pkt_len,
bool meta_xdp,
unsigned int *skbs_polled)
{
struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
struct net_device *netdev;
struct sk_buff *skb;
if (likely(!meta->portid)) {
netdev = dp->netdev;
} else {
struct nfp_net *nn = netdev_priv(dp->netdev);
netdev = nfp_app_dev_get(nn->app, meta->portid, NULL);
if (unlikely(!netdev)) {
nfp_net_xsk_rx_drop(r_vec, xrxbuf);
return;
}
nfp_repr_inc_rx_stats(netdev, pkt_len);
}
skb = napi_alloc_skb(&r_vec->napi, pkt_len);
if (!skb) {
nfp_net_xsk_rx_drop(r_vec, xrxbuf);
return;
}
memcpy(skb_put(skb, pkt_len), xrxbuf->xdp->data, pkt_len);
skb->mark = meta->mark;
skb_set_hash(skb, meta->hash, meta->hash_type);
skb_record_rx_queue(skb, rx_ring->idx);
skb->protocol = eth_type_trans(skb, netdev);
nfp_net_rx_csum(dp, r_vec, rxd, meta, skb);
if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
le16_to_cpu(rxd->rxd.vlan));
if (meta_xdp)
skb_metadata_set(skb,
xrxbuf->xdp->data - xrxbuf->xdp->data_meta);
napi_gro_receive(&rx_ring->r_vec->napi, skb);
nfp_net_xsk_rx_free(xrxbuf);
(*skbs_polled)++;
}
static unsigned int
nfp_net_xsk_rx(struct nfp_net_rx_ring *rx_ring, int budget,
unsigned int *skbs_polled)
{
struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
struct nfp_net_tx_ring *tx_ring;
struct bpf_prog *xdp_prog;
bool xdp_redir = false;
int pkts_polled = 0;
xdp_prog = READ_ONCE(dp->xdp_prog);
tx_ring = r_vec->xdp_ring;
while (pkts_polled < budget) {
unsigned int meta_len, data_len, pkt_len, pkt_off;
struct nfp_net_xsk_rx_buf *xrxbuf;
struct nfp_net_rx_desc *rxd;
struct nfp_meta_parsed meta;
int idx, act;
idx = D_IDX(rx_ring, rx_ring->rd_p);
rxd = &rx_ring->rxds[idx];
if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
break;
rx_ring->rd_p++;
pkts_polled++;
xrxbuf = &rx_ring->xsk_rxbufs[idx];
/* If starved of buffers "drop" it and scream. */
if (rx_ring->rd_p >= rx_ring->wr_p) {
nn_dp_warn(dp, "Starved of RX buffers\n");
nfp_net_xsk_rx_drop(r_vec, xrxbuf);
break;
}
/* Memory barrier to ensure that we won't do other reads
* before the DD bit.
*/
dma_rmb();
memset(&meta, 0, sizeof(meta));
/* Only supporting AF_XDP with dynamic metadata so buffer layout
* is always:
*
* ---------------------------------------------------------
* | off | metadata | packet | XXXX |
* ---------------------------------------------------------
*/
meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
data_len = le16_to_cpu(rxd->rxd.data_len);
pkt_len = data_len - meta_len;
if (unlikely(meta_len > NFP_NET_MAX_PREPEND)) {
nn_dp_warn(dp, "Oversized RX packet metadata %u\n",
meta_len);
nfp_net_xsk_rx_drop(r_vec, xrxbuf);
continue;
}
/* Stats update. */
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_pkts++;
r_vec->rx_bytes += pkt_len;
u64_stats_update_end(&r_vec->rx_sync);
xrxbuf->xdp->data += meta_len;
xrxbuf->xdp->data_end = xrxbuf->xdp->data + pkt_len;
xdp_set_data_meta_invalid(xrxbuf->xdp);
xsk_buff_dma_sync_for_cpu(xrxbuf->xdp, r_vec->xsk_pool);
net_prefetch(xrxbuf->xdp->data);
if (meta_len) {
if (unlikely(nfp_net_parse_meta(dp->netdev, &meta,
xrxbuf->xdp->data -
meta_len,
xrxbuf->xdp->data,
pkt_len, meta_len))) {
nn_dp_warn(dp, "Invalid RX packet metadata\n");
nfp_net_xsk_rx_drop(r_vec, xrxbuf);
continue;
}
if (unlikely(meta.portid)) {
struct nfp_net *nn = netdev_priv(dp->netdev);
if (meta.portid != NFP_META_PORT_ID_CTRL) {
nfp_net_xsk_rx_skb(rx_ring, rxd, xrxbuf,
&meta, pkt_len,
false, skbs_polled);
continue;
}
nfp_app_ctrl_rx_raw(nn->app, xrxbuf->xdp->data,
pkt_len);
nfp_net_xsk_rx_free(xrxbuf);
continue;
}
}
act = bpf_prog_run_xdp(xdp_prog, xrxbuf->xdp);
pkt_len = xrxbuf->xdp->data_end - xrxbuf->xdp->data;
pkt_off = xrxbuf->xdp->data - xrxbuf->xdp->data_hard_start;
switch (act) {
case XDP_PASS:
nfp_net_xsk_rx_skb(rx_ring, rxd, xrxbuf, &meta, pkt_len,
true, skbs_polled);
break;
case XDP_TX:
if (!nfp_net_xsk_tx_xdp(dp, r_vec, rx_ring, tx_ring,
xrxbuf, pkt_len, pkt_off))
nfp_net_xsk_rx_drop(r_vec, xrxbuf);
else
nfp_net_xsk_rx_unstash(xrxbuf);
break;
case XDP_REDIRECT:
if (xdp_do_redirect(dp->netdev, xrxbuf->xdp, xdp_prog)) {
nfp_net_xsk_rx_drop(r_vec, xrxbuf);
} else {
nfp_net_xsk_rx_unstash(xrxbuf);
xdp_redir = true;
}
break;
default:
bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act);
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(dp->netdev, xdp_prog, act);
fallthrough;
case XDP_DROP:
nfp_net_xsk_rx_drop(r_vec, xrxbuf);
break;
}
}
nfp_net_xsk_rx_ring_fill_freelist(r_vec->rx_ring);
if (xdp_redir)
xdp_do_flush_map();
if (tx_ring->wr_ptr_add)
nfp_net_tx_xmit_more_flush(tx_ring);
return pkts_polled;
}
static void nfp_net_xsk_pool_unmap(struct device *dev, static void nfp_net_xsk_pool_unmap(struct device *dev,
struct xsk_buff_pool *pool) struct xsk_buff_pool *pool)
{ {
...@@ -514,6 +112,10 @@ int nfp_net_xsk_setup_pool(struct net_device *netdev, ...@@ -514,6 +112,10 @@ int nfp_net_xsk_setup_pool(struct net_device *netdev,
struct nfp_net_dp *dp; struct nfp_net_dp *dp;
int err; int err;
/* NFDK doesn't implement xsk yet. */
if (nn->dp.ops->version == NFP_NFD_VER_NFDK)
return -EOPNOTSUPP;
/* Reject on old FWs so we can drop some checks on datapath. */ /* Reject on old FWs so we can drop some checks on datapath. */
if (nn->dp.rx_offset != NFP_NET_CFG_RX_OFFSET_DYNAMIC) if (nn->dp.rx_offset != NFP_NET_CFG_RX_OFFSET_DYNAMIC)
return -EOPNOTSUPP; return -EOPNOTSUPP;
...@@ -566,27 +168,3 @@ int nfp_net_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags) ...@@ -566,27 +168,3 @@ int nfp_net_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags)
return 0; return 0;
} }
int nfp_net_xsk_poll(struct napi_struct *napi, int budget)
{
struct nfp_net_r_vector *r_vec =
container_of(napi, struct nfp_net_r_vector, napi);
unsigned int pkts_polled, skbs = 0;
pkts_polled = nfp_net_xsk_rx(r_vec->rx_ring, budget, &skbs);
if (pkts_polled < budget) {
if (r_vec->tx_ring)
nfp_net_tx_complete(r_vec->tx_ring, budget);
if (!nfp_net_xsk_complete(r_vec->xdp_ring))
pkts_polled = budget;
nfp_net_xsk_tx(r_vec->xdp_ring);
if (pkts_polled < budget && napi_complete_done(napi, skbs))
nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
}
return pkts_polled;
}
...@@ -15,15 +15,27 @@ static inline bool nfp_net_has_xsk_pool_slow(struct nfp_net_dp *dp, ...@@ -15,15 +15,27 @@ static inline bool nfp_net_has_xsk_pool_slow(struct nfp_net_dp *dp,
return dp->xdp_prog && dp->xsk_pools[qid]; return dp->xdp_prog && dp->xsk_pools[qid];
} }
static inline int nfp_net_rx_space(struct nfp_net_rx_ring *rx_ring)
{
return rx_ring->cnt - rx_ring->wr_p + rx_ring->rd_p - 1;
}
static inline int nfp_net_tx_space(struct nfp_net_tx_ring *tx_ring)
{
return tx_ring->cnt - tx_ring->wr_p + tx_ring->rd_p - 1;
}
void nfp_net_xsk_rx_unstash(struct nfp_net_xsk_rx_buf *rxbuf);
void nfp_net_xsk_rx_free(struct nfp_net_xsk_rx_buf *rxbuf);
void nfp_net_xsk_rx_drop(struct nfp_net_r_vector *r_vec,
struct nfp_net_xsk_rx_buf *xrxbuf);
int nfp_net_xsk_setup_pool(struct net_device *netdev, struct xsk_buff_pool *pool, int nfp_net_xsk_setup_pool(struct net_device *netdev, struct xsk_buff_pool *pool,
u16 queue_id); u16 queue_id);
void nfp_net_xsk_tx_bufs_free(struct nfp_net_tx_ring *tx_ring);
void nfp_net_xsk_rx_bufs_free(struct nfp_net_rx_ring *rx_ring); void nfp_net_xsk_rx_bufs_free(struct nfp_net_rx_ring *rx_ring);
void nfp_net_xsk_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring); void nfp_net_xsk_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring);
int nfp_net_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); int nfp_net_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
int nfp_net_xsk_poll(struct napi_struct *napi, int budget);
#endif /* _NFP_XSK_H_ */ #endif /* _NFP_XSK_H_ */
...@@ -122,9 +122,11 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, ...@@ -122,9 +122,11 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
} }
nfp_net_get_fw_version(&fw_ver, ctrl_bar); nfp_net_get_fw_version(&fw_ver, ctrl_bar);
if (fw_ver.resv || fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) { if (fw_ver.extend & NFP_NET_CFG_VERSION_RESERVED_MASK ||
fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) {
dev_err(&pdev->dev, "Unknown Firmware ABI %d.%d.%d.%d\n", dev_err(&pdev->dev, "Unknown Firmware ABI %d.%d.%d.%d\n",
fw_ver.resv, fw_ver.class, fw_ver.major, fw_ver.minor); fw_ver.extend, fw_ver.class,
fw_ver.major, fw_ver.minor);
err = -EINVAL; err = -EINVAL;
goto err_ctrl_unmap; goto err_ctrl_unmap;
} }
...@@ -144,7 +146,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, ...@@ -144,7 +146,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
break; break;
default: default:
dev_err(&pdev->dev, "Unsupported Firmware ABI %d.%d.%d.%d\n", dev_err(&pdev->dev, "Unsupported Firmware ABI %d.%d.%d.%d\n",
fw_ver.resv, fw_ver.class, fw_ver.extend, fw_ver.class,
fw_ver.major, fw_ver.minor); fw_ver.major, fw_ver.minor);
err = -EINVAL; err = -EINVAL;
goto err_ctrl_unmap; goto err_ctrl_unmap;
...@@ -186,7 +188,6 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, ...@@ -186,7 +188,6 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
} }
vf->nn = nn; vf->nn = nn;
nn->fw_ver = fw_ver;
nn->dp.is_vf = 1; nn->dp.is_vf = 1;
nn->stride_tx = stride; nn->stride_tx = stride;
nn->stride_rx = stride; nn->stride_rx = stride;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment