Commit 285b2a46 authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'net-mvneta-reduce-size-of-tso-header-allocation'

Russell King says:

====================
net: mvneta: reduce size of TSO header allocation

With reference to
https://forum.turris.cz/t/random-kernel-exceptions-on-hbl-tos-7-0/18865/
https://github.com/openwrt/openwrt/pull/12375#issuecomment-1528842334

It appears that mvneta attempts an order-6 allocation for the TSO
header memory. While this succeeds early on in the system's life time,
trying order-6 allocations later can result in failure due to memory
fragmentation.

Firstly, the reason it's so large is that we take the number of
transmit descriptors, and allocate a TSO header buffer for each, and
each TSO header is 256 bytes. The driver uses a simple mechanism to
determine the address - it uses the transmit descriptor index as an
index into the TSO header memory.

	(The first obvious question is: do there need to be this
	many? Won't each TSO header always have at least one bit
	of data to go with it? In other words, wouldn't the maximum
	number of TSO headers that a ring could accept be the number
	of ring entries divided by 2?)

There is no real need for this memory to be an order-6 allocation,
since nothing in hardware requires this buffer to be contiguous.

Therefore, this series splits this order-6 allocation up into 32
order-1 allocations (8k pages on 4k page platforms), each giving
32 TSO headers per page.

In order to do this, these patches:

1) fix a horrible transmit path error-cleanup bug - the existing
   code unmaps from the first descriptor that was allocated at
   interface bringup, not the first descriptor that the packet
   is using, resulting in the wrong descriptors being unmapped.

2) since xdp support was added, we now have buf->type which indicates
   what this transmit buffer contains. Use this to mark TSO header
   buffers.

3) get rid of IS_TSO_HEADER(), instead using buf->type to determine
   whether this transmit buffer needs to be DMA-unmapped.

4) move tso_build_hdr() into mvneta_tso_put_hdr() to keep all the
   TSO header building code together.

5) split the TSO header allocation into chunks of order-1 pages.

This has now been tested by the Turris folk and has been found to fix
the allocation error.
====================

Link: https://lore.kernel.org/r/ZFtuhJOC03qpASt2@shell.armlinux.org.ukSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents ccce324d 33f4cefb
......@@ -344,6 +344,15 @@
#define MVNETA_MAX_SKB_DESCS (MVNETA_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
/* The size of a TSO header page */
#define MVNETA_TSO_PAGE_SIZE (2 * PAGE_SIZE)
/* Number of TSO headers per page. This should be a power of 2 */
#define MVNETA_TSO_PER_PAGE (MVNETA_TSO_PAGE_SIZE / TSO_HEADER_SIZE)
/* Maximum number of TSO header pages */
#define MVNETA_MAX_TSO_PAGES (MVNETA_MAX_TXD / MVNETA_TSO_PER_PAGE)
/* descriptor aligned size */
#define MVNETA_DESC_ALIGNED_SIZE 32
......@@ -364,10 +373,6 @@
MVNETA_SKB_HEADROOM))
#define MVNETA_MAX_RX_BUF_SIZE (PAGE_SIZE - MVNETA_SKB_PAD)
#define IS_TSO_HEADER(txq, addr) \
((addr >= txq->tso_hdrs_phys) && \
(addr < txq->tso_hdrs_phys + txq->size * TSO_HEADER_SIZE))
#define MVNETA_RX_GET_BM_POOL_ID(rxd) \
(((rxd)->status & MVNETA_RXD_BM_POOL_MASK) >> MVNETA_RXD_BM_POOL_SHIFT)
......@@ -638,6 +643,7 @@ struct mvneta_rx_desc {
#endif
enum mvneta_tx_buf_type {
MVNETA_TYPE_TSO,
MVNETA_TYPE_SKB,
MVNETA_TYPE_XDP_TX,
MVNETA_TYPE_XDP_NDO,
......@@ -690,10 +696,10 @@ struct mvneta_tx_queue {
int next_desc_to_proc;
/* DMA buffers for TSO headers */
char *tso_hdrs;
char *tso_hdrs[MVNETA_MAX_TSO_PAGES];
/* DMA address of TSO headers */
dma_addr_t tso_hdrs_phys;
dma_addr_t tso_hdrs_phys[MVNETA_MAX_TSO_PAGES];
/* Affinity mask for CPUs*/
cpumask_t affinity_mask;
......@@ -1878,12 +1884,13 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
mvneta_txq_inc_get(txq);
if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr) &&
buf->type != MVNETA_TYPE_XDP_TX)
if (buf->type == MVNETA_TYPE_XDP_NDO ||
buf->type == MVNETA_TYPE_SKB)
dma_unmap_single(pp->dev->dev.parent,
tx_desc->buf_phys_addr,
tx_desc->data_size, DMA_TO_DEVICE);
if (buf->type == MVNETA_TYPE_SKB && buf->skb) {
if ((buf->type == MVNETA_TYPE_TSO ||
buf->type == MVNETA_TYPE_SKB) && buf->skb) {
bytes_compl += buf->skb->len;
pkts_compl++;
dev_kfree_skb_any(buf->skb);
......@@ -2661,20 +2668,72 @@ static int mvneta_rx_hwbm(struct napi_struct *napi,
return rx_done;
}
static inline void
mvneta_tso_put_hdr(struct sk_buff *skb, struct mvneta_tx_queue *txq)
static void mvneta_free_tso_hdrs(struct mvneta_port *pp,
struct mvneta_tx_queue *txq)
{
struct device *dev = pp->dev->dev.parent;
int i;
for (i = 0; i < MVNETA_MAX_TSO_PAGES; i++) {
if (txq->tso_hdrs[i]) {
dma_free_coherent(dev, MVNETA_TSO_PAGE_SIZE,
txq->tso_hdrs[i],
txq->tso_hdrs_phys[i]);
txq->tso_hdrs[i] = NULL;
}
}
}
static int mvneta_alloc_tso_hdrs(struct mvneta_port *pp,
struct mvneta_tx_queue *txq)
{
struct device *dev = pp->dev->dev.parent;
int i, num;
num = DIV_ROUND_UP(txq->size, MVNETA_TSO_PER_PAGE);
for (i = 0; i < num; i++) {
txq->tso_hdrs[i] = dma_alloc_coherent(dev, MVNETA_TSO_PAGE_SIZE,
&txq->tso_hdrs_phys[i],
GFP_KERNEL);
if (!txq->tso_hdrs[i]) {
mvneta_free_tso_hdrs(pp, txq);
return -ENOMEM;
}
}
return 0;
}
static char *mvneta_get_tso_hdr(struct mvneta_tx_queue *txq, dma_addr_t *dma)
{
int index, offset;
index = txq->txq_put_index / MVNETA_TSO_PER_PAGE;
offset = (txq->txq_put_index % MVNETA_TSO_PER_PAGE) * TSO_HEADER_SIZE;
*dma = txq->tso_hdrs_phys[index] + offset;
return txq->tso_hdrs[index] + offset;
}
static void mvneta_tso_put_hdr(struct sk_buff *skb, struct mvneta_tx_queue *txq,
struct tso_t *tso, int size, bool is_last)
{
struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
int hdr_len = skb_tcp_all_headers(skb);
struct mvneta_tx_desc *tx_desc;
dma_addr_t hdr_phys;
char *hdr;
hdr = mvneta_get_tso_hdr(txq, &hdr_phys);
tso_build_hdr(skb, hdr, tso, size, is_last);
tx_desc = mvneta_txq_next_desc_get(txq);
tx_desc->data_size = hdr_len;
tx_desc->command = mvneta_skb_tx_csum(skb);
tx_desc->command |= MVNETA_TXD_F_DESC;
tx_desc->buf_phys_addr = txq->tso_hdrs_phys +
txq->txq_put_index * TSO_HEADER_SIZE;
buf->type = MVNETA_TYPE_SKB;
tx_desc->buf_phys_addr = hdr_phys;
buf->type = MVNETA_TYPE_TSO;
buf->skb = NULL;
mvneta_txq_inc_put(txq);
......@@ -2714,14 +2773,41 @@ mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq,
return 0;
}
static void mvneta_release_descs(struct mvneta_port *pp,
struct mvneta_tx_queue *txq,
int first, int num)
{
int desc_idx, i;
desc_idx = first + num;
if (desc_idx >= txq->size)
desc_idx -= txq->size;
for (i = num; i >= 0; i--) {
struct mvneta_tx_desc *tx_desc = txq->descs + desc_idx;
struct mvneta_tx_buf *buf = &txq->buf[desc_idx];
if (buf->type == MVNETA_TYPE_SKB)
dma_unmap_single(pp->dev->dev.parent,
tx_desc->buf_phys_addr,
tx_desc->data_size,
DMA_TO_DEVICE);
mvneta_txq_desc_put(txq);
if (desc_idx == 0)
desc_idx = txq->size;
desc_idx -= 1;
}
}
static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev,
struct mvneta_tx_queue *txq)
{
int hdr_len, total_len, data_left;
int desc_count = 0;
int first_desc, desc_count = 0;
struct mvneta_port *pp = netdev_priv(dev);
struct tso_t tso;
int i;
/* Count needed descriptors */
if ((txq->count + tso_count_descs(skb)) >= txq->size)
......@@ -2732,22 +2818,19 @@ static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev,
return 0;
}
first_desc = txq->txq_put_index;
/* Initialize the TSO handler, and prepare the first payload */
hdr_len = tso_start(skb, &tso);
total_len = skb->len - hdr_len;
while (total_len > 0) {
char *hdr;
data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
total_len -= data_left;
desc_count++;
/* prepare packet headers: MAC + IP + TCP */
hdr = txq->tso_hdrs + txq->txq_put_index * TSO_HEADER_SIZE;
tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
mvneta_tso_put_hdr(skb, txq);
mvneta_tso_put_hdr(skb, txq, &tso, data_left, total_len == 0);
while (data_left > 0) {
int size;
......@@ -2772,15 +2855,7 @@ static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev,
/* Release all used data descriptors; header descriptors must not
* be DMA-unmapped.
*/
for (i = desc_count - 1; i >= 0; i--) {
struct mvneta_tx_desc *tx_desc = txq->descs + i;
if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr))
dma_unmap_single(pp->dev->dev.parent,
tx_desc->buf_phys_addr,
tx_desc->data_size,
DMA_TO_DEVICE);
mvneta_txq_desc_put(txq);
}
mvneta_release_descs(pp, txq, first_desc, desc_count - 1);
return 0;
}
......@@ -2790,6 +2865,7 @@ static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb,
{
struct mvneta_tx_desc *tx_desc;
int i, nr_frags = skb_shinfo(skb)->nr_frags;
int first_desc = txq->txq_put_index;
for (i = 0; i < nr_frags; i++) {
struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
......@@ -2828,15 +2904,7 @@ static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb,
/* Release all descriptors that were used to map fragments of
* this packet, as well as the corresponding DMA mappings
*/
for (i = i - 1; i >= 0; i--) {
tx_desc = txq->descs + i;
dma_unmap_single(pp->dev->dev.parent,
tx_desc->buf_phys_addr,
tx_desc->data_size,
DMA_TO_DEVICE);
mvneta_txq_desc_put(txq);
}
mvneta_release_descs(pp, txq, first_desc, i - 1);
return -ENOMEM;
}
......@@ -3457,7 +3525,7 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp,
static int mvneta_txq_sw_init(struct mvneta_port *pp,
struct mvneta_tx_queue *txq)
{
int cpu;
int cpu, err;
txq->size = pp->tx_ring_size;
......@@ -3482,11 +3550,9 @@ static int mvneta_txq_sw_init(struct mvneta_port *pp,
return -ENOMEM;
/* Allocate DMA buffers for TSO MAC/IP/TCP headers */
txq->tso_hdrs = dma_alloc_coherent(pp->dev->dev.parent,
txq->size * TSO_HEADER_SIZE,
&txq->tso_hdrs_phys, GFP_KERNEL);
if (!txq->tso_hdrs)
return -ENOMEM;
err = mvneta_alloc_tso_hdrs(pp, txq);
if (err)
return err;
/* Setup XPS mapping */
if (pp->neta_armada3700)
......@@ -3538,10 +3604,7 @@ static void mvneta_txq_sw_deinit(struct mvneta_port *pp,
kfree(txq->buf);
if (txq->tso_hdrs)
dma_free_coherent(pp->dev->dev.parent,
txq->size * TSO_HEADER_SIZE,
txq->tso_hdrs, txq->tso_hdrs_phys);
mvneta_free_tso_hdrs(pp, txq);
if (txq->descs)
dma_free_coherent(pp->dev->dev.parent,
txq->size * MVNETA_DESC_ALIGNED_SIZE,
......@@ -3550,7 +3613,6 @@ static void mvneta_txq_sw_deinit(struct mvneta_port *pp,
netdev_tx_reset_queue(nq);
txq->buf = NULL;
txq->tso_hdrs = NULL;
txq->descs = NULL;
txq->last_desc = 0;
txq->next_desc_to_proc = 0;
......@@ -5821,6 +5883,8 @@ static int __init mvneta_driver_init(void)
{
int ret;
BUILD_BUG_ON_NOT_POWER_OF_2(MVNETA_TSO_PER_PAGE);
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/mvneta:online",
mvneta_cpu_online,
mvneta_cpu_down_prepare);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment