Commit 8577dd8a authored by David S. Miller's avatar David S. Miller

Merge branch 'dpaa2-rx-copybreak'

Ioana Ciornei says:

====================
dpaa2-eth: add rx copybreak support

DMA unmapping, allocating a new buffer and DMA mapping it back on the
refill path is really not that efficient. Proper buffer recycling (page
pool, flipping the page and using the other half) cannot be done for
DPAA2 since it's not a ring based controller but it rather deals with
multiple queues which all get their buffers from the same buffer pool on
Rx.

To circumvent these limitations, add support for Rx copybreak in
dpaa2-eth.

Below you can find a summary of the tests that were run to end up
with the default rx copybreak value of 512.
A bit about the setup - a LS2088A SoC, 8 x Cortex A72 @ 1.8GHz, IPfwd
zero loss test @ 20Gbit/s throughput.  I tested multiple frame sizes to
get an idea where is the break even point.

Here are 2 sets of results, (1) is the baseline and (2) is just
allocating a new skb for all frames sizes received (as if the copybreak
was even to the MTU). All numbers are in Mpps.

         64   128    256   512  640   768   896

(1)     3.23  3.23  3.24  3.21  3.1  2.76  2.71
(2)     3.95  3.88  3.79  3.62  3.3  3.02  2.65

It seems that even for 512 bytes frame sizes it's comfortably better when
allocating a new skb. After that, we see diminishing rewards or even worse.

Changes in v2:
 - properly marked dpaa2_eth_copybreak as static
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3e8db636 8ed3cefc
......@@ -223,31 +223,31 @@ static void dpaa2_eth_free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array,
}
}
static void dpaa2_eth_xdp_release_buf(struct dpaa2_eth_priv *priv,
struct dpaa2_eth_channel *ch,
dma_addr_t addr)
static void dpaa2_eth_recycle_buf(struct dpaa2_eth_priv *priv,
struct dpaa2_eth_channel *ch,
dma_addr_t addr)
{
int retries = 0;
int err;
ch->xdp.drop_bufs[ch->xdp.drop_cnt++] = addr;
if (ch->xdp.drop_cnt < DPAA2_ETH_BUFS_PER_CMD)
ch->recycled_bufs[ch->recycled_bufs_cnt++] = addr;
if (ch->recycled_bufs_cnt < DPAA2_ETH_BUFS_PER_CMD)
return;
while ((err = dpaa2_io_service_release(ch->dpio, priv->bpid,
ch->xdp.drop_bufs,
ch->xdp.drop_cnt)) == -EBUSY) {
ch->recycled_bufs,
ch->recycled_bufs_cnt)) == -EBUSY) {
if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES)
break;
cpu_relax();
}
if (err) {
dpaa2_eth_free_bufs(priv, ch->xdp.drop_bufs, ch->xdp.drop_cnt);
ch->buf_count -= ch->xdp.drop_cnt;
dpaa2_eth_free_bufs(priv, ch->recycled_bufs, ch->recycled_bufs_cnt);
ch->buf_count -= ch->recycled_bufs_cnt;
}
ch->xdp.drop_cnt = 0;
ch->recycled_bufs_cnt = 0;
}
static int dpaa2_eth_xdp_flush(struct dpaa2_eth_priv *priv,
......@@ -300,7 +300,7 @@ static void dpaa2_eth_xdp_tx_flush(struct dpaa2_eth_priv *priv,
ch->stats.xdp_tx++;
}
for (i = enqueued; i < fq->xdp_tx_fds.num; i++) {
dpaa2_eth_xdp_release_buf(priv, ch, dpaa2_fd_get_addr(&fds[i]));
dpaa2_eth_recycle_buf(priv, ch, dpaa2_fd_get_addr(&fds[i]));
percpu_stats->tx_errors++;
ch->stats.xdp_tx_err++;
}
......@@ -382,7 +382,7 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv,
trace_xdp_exception(priv->net_dev, xdp_prog, xdp_act);
fallthrough;
case XDP_DROP:
dpaa2_eth_xdp_release_buf(priv, ch, addr);
dpaa2_eth_recycle_buf(priv, ch, addr);
ch->stats.xdp_drop++;
break;
case XDP_REDIRECT:
......@@ -403,7 +403,7 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv,
free_pages((unsigned long)vaddr, 0);
} else {
ch->buf_count++;
dpaa2_eth_xdp_release_buf(priv, ch, addr);
dpaa2_eth_recycle_buf(priv, ch, addr);
}
ch->stats.xdp_drop++;
} else {
......@@ -418,6 +418,35 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv,
return xdp_act;
}
static struct sk_buff *dpaa2_eth_copybreak(struct dpaa2_eth_channel *ch,
const struct dpaa2_fd *fd,
void *fd_vaddr)
{
u16 fd_offset = dpaa2_fd_get_offset(fd);
struct dpaa2_eth_priv *priv = ch->priv;
u32 fd_length = dpaa2_fd_get_len(fd);
struct sk_buff *skb = NULL;
unsigned int skb_len;
if (fd_length > priv->rx_copybreak)
return NULL;
skb_len = fd_length + dpaa2_eth_needed_headroom(NULL);
skb = napi_alloc_skb(&ch->napi, skb_len);
if (!skb)
return NULL;
skb_reserve(skb, dpaa2_eth_needed_headroom(NULL));
skb_put(skb, fd_length);
memcpy(skb->data, fd_vaddr + fd_offset, fd_length);
dpaa2_eth_recycle_buf(priv, ch, dpaa2_fd_get_addr(fd));
return skb;
}
/* Main Rx frame processing routine */
static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
struct dpaa2_eth_channel *ch,
......@@ -459,9 +488,12 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
return;
}
dma_unmap_page(dev, addr, priv->rx_buf_size,
DMA_BIDIRECTIONAL);
skb = dpaa2_eth_build_linear_skb(ch, fd, vaddr);
skb = dpaa2_eth_copybreak(ch, fd, vaddr);
if (!skb) {
dma_unmap_page(dev, addr, priv->rx_buf_size,
DMA_BIDIRECTIONAL);
skb = dpaa2_eth_build_linear_skb(ch, fd, vaddr);
}
} else if (fd_format == dpaa2_fd_sg) {
WARN_ON(priv->xdp_prog);
......@@ -4302,6 +4334,8 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
skb_queue_head_init(&priv->tx_skbs);
priv->rx_copybreak = DPAA2_ETH_DEFAULT_COPYBREAK;
/* Obtain a MC portal */
err = fsl_mc_portal_allocate(dpni_dev, FSL_MC_IO_ATOMIC_CONTEXT_PORTAL,
&priv->mc_io);
......
......@@ -438,8 +438,6 @@ struct dpaa2_eth_fq {
struct dpaa2_eth_ch_xdp {
struct bpf_prog *prog;
u64 drop_bufs[DPAA2_ETH_BUFS_PER_CMD];
int drop_cnt;
unsigned int res;
};
......@@ -457,6 +455,10 @@ struct dpaa2_eth_channel {
struct dpaa2_eth_ch_xdp xdp;
struct xdp_rxq_info xdp_rxq;
struct list_head *rx_list;
/* Buffers to be recycled back in the buffer pool */
u64 recycled_bufs[DPAA2_ETH_BUFS_PER_CMD];
int recycled_bufs_cnt;
};
struct dpaa2_eth_dist_fields {
......@@ -487,6 +489,8 @@ struct dpaa2_eth_trap_data {
struct dpaa2_eth_priv *priv;
};
#define DPAA2_ETH_DEFAULT_COPYBREAK 512
/* Driver private data */
struct dpaa2_eth_priv {
struct net_device *net_dev;
......@@ -567,6 +571,8 @@ struct dpaa2_eth_priv {
struct devlink *devlink;
struct dpaa2_eth_trap_data *trap_data;
struct devlink_port devlink_port;
u32 rx_copybreak;
};
struct dpaa2_eth_devlink_priv {
......
......@@ -782,6 +782,44 @@ static int dpaa2_eth_get_ts_info(struct net_device *dev,
return 0;
}
static int dpaa2_eth_get_tunable(struct net_device *net_dev,
const struct ethtool_tunable *tuna,
void *data)
{
struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
int err = 0;
switch (tuna->id) {
case ETHTOOL_RX_COPYBREAK:
*(u32 *)data = priv->rx_copybreak;
break;
default:
err = -EOPNOTSUPP;
break;
}
return err;
}
static int dpaa2_eth_set_tunable(struct net_device *net_dev,
const struct ethtool_tunable *tuna,
const void *data)
{
struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
int err = 0;
switch (tuna->id) {
case ETHTOOL_RX_COPYBREAK:
priv->rx_copybreak = *(u32 *)data;
break;
default:
err = -EOPNOTSUPP;
break;
}
return err;
}
const struct ethtool_ops dpaa2_ethtool_ops = {
.get_drvinfo = dpaa2_eth_get_drvinfo,
.nway_reset = dpaa2_eth_nway_reset,
......@@ -796,4 +834,6 @@ const struct ethtool_ops dpaa2_ethtool_ops = {
.get_rxnfc = dpaa2_eth_get_rxnfc,
.set_rxnfc = dpaa2_eth_set_rxnfc,
.get_ts_info = dpaa2_eth_get_ts_info,
.get_tunable = dpaa2_eth_get_tunable,
.set_tunable = dpaa2_eth_set_tunable,
};
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment