Commit a992005c authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'ibmvnic-use-a-set-of-ltbs-per-pool'

Sukadev Bhattiprolu says:

====================
ibmvnic: Use a set of LTBs per pool

ibmvnic uses a single large long term buffer (LTB) per rx or tx
pool (queue). This has two limitations.

First, if we need to free/allocate an LTB (eg during a reset), under
low memory conditions, the allocation can fail.

Second, the kernel limits the size of single LTB (DMA buffer) to 16MB
(based on MAX_ORDER). With jumbo frames (mtu = 9000) we can only have
about 1763 buffers per LTB (16MB / 9588 bytes per frame) even though
the max supported buffers is 4096. (The 9588 instead of 9088 comes from
IBMVNIC_BUFFER_HLEN.)

To overcome these limitations, allow creating a set of LTBs per queue.
====================

Link: https://lore.kernel.org/r/20220413171026.1264294-1-drt@linux.ibm.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 7b05c542 93b1ebb3
...@@ -257,12 +257,14 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, ...@@ -257,12 +257,14 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
struct ibmvnic_long_term_buff *ltb, int size) struct ibmvnic_long_term_buff *ltb, int size)
{ {
struct device *dev = &adapter->vdev->dev; struct device *dev = &adapter->vdev->dev;
u64 prev = 0;
int rc; int rc;
if (!reuse_ltb(ltb, size)) { if (!reuse_ltb(ltb, size)) {
dev_dbg(dev, dev_dbg(dev,
"LTB size changed from 0x%llx to 0x%x, reallocating\n", "LTB size changed from 0x%llx to 0x%x, reallocating\n",
ltb->size, size); ltb->size, size);
prev = ltb->size;
free_long_term_buff(adapter, ltb); free_long_term_buff(adapter, ltb);
} }
...@@ -283,8 +285,8 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, ...@@ -283,8 +285,8 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
bitmap_set(adapter->map_ids, ltb->map_id, 1); bitmap_set(adapter->map_ids, ltb->map_id, 1);
dev_dbg(dev, dev_dbg(dev,
"Allocated new LTB [map %d, size 0x%llx]\n", "Allocated new LTB [map %d, size 0x%llx was 0x%llx]\n",
ltb->map_id, ltb->size); ltb->map_id, ltb->size, prev);
} }
/* Ensure ltb is zeroed - specially when reusing it. */ /* Ensure ltb is zeroed - specially when reusing it. */
...@@ -345,6 +347,208 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter, ...@@ -345,6 +347,208 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter,
ltb->map_id = 0; ltb->map_id = 0;
} }
/**
* free_ltb_set - free the given set of long term buffers (LTBS)
* @adapter: The ibmvnic adapter containing this ltb set
* @ltb_set: The ltb_set to be freed
*
* Free the set of LTBs in the given set.
*/
static void free_ltb_set(struct ibmvnic_adapter *adapter,
struct ibmvnic_ltb_set *ltb_set)
{
int i;
for (i = 0; i < ltb_set->num_ltbs; i++)
free_long_term_buff(adapter, &ltb_set->ltbs[i]);
kfree(ltb_set->ltbs);
ltb_set->ltbs = NULL;
ltb_set->num_ltbs = 0;
}
/**
* alloc_ltb_set() - Allocate a set of long term buffers (LTBs)
*
* @adapter: ibmvnic adapter associated to the LTB
* @ltb_set: container object for the set of LTBs
* @num_buffs: Number of buffers in the LTB
* @buff_size: Size of each buffer in the LTB
*
* Allocate a set of LTBs to accommodate @num_buffs buffers of @buff_size
* each. We currently cap size each LTB to IBMVNIC_ONE_LTB_SIZE. If the
* new set of LTBs have fewer LTBs than the old set, free the excess LTBs.
* If new set needs more than in old set, allocate the remaining ones.
* Try and reuse as many LTBs as possible and avoid reallocation.
*
* Any changes to this allocation strategy must be reflected in
* map_rxpool_buff_to_ltb() and map_txpool_buff_to_ltb().
*/
static int alloc_ltb_set(struct ibmvnic_adapter *adapter,
struct ibmvnic_ltb_set *ltb_set, int num_buffs,
int buff_size)
{
struct device *dev = &adapter->vdev->dev;
struct ibmvnic_ltb_set old_set;
struct ibmvnic_ltb_set new_set;
int rem_size;
int tot_size; /* size of all ltbs */
int ltb_size; /* size of one ltb */
int nltbs;
int rc;
int n;
int i;
dev_dbg(dev, "%s() num_buffs %d, buff_size %d\n", __func__, num_buffs,
buff_size);
ltb_size = rounddown(IBMVNIC_ONE_LTB_SIZE, buff_size);
tot_size = num_buffs * buff_size;
if (ltb_size > tot_size)
ltb_size = tot_size;
nltbs = tot_size / ltb_size;
if (tot_size % ltb_size)
nltbs++;
old_set = *ltb_set;
if (old_set.num_ltbs == nltbs) {
new_set = old_set;
} else {
int tmp = nltbs * sizeof(struct ibmvnic_long_term_buff);
new_set.ltbs = kzalloc(tmp, GFP_KERNEL);
if (!new_set.ltbs)
return -ENOMEM;
new_set.num_ltbs = nltbs;
/* Free any excess ltbs in old set */
for (i = new_set.num_ltbs; i < old_set.num_ltbs; i++)
free_long_term_buff(adapter, &old_set.ltbs[i]);
/* Copy remaining ltbs to new set. All LTBs except the
* last one are of the same size. alloc_long_term_buff()
* will realloc if the size changes.
*/
n = min(old_set.num_ltbs, new_set.num_ltbs);
for (i = 0; i < n; i++)
new_set.ltbs[i] = old_set.ltbs[i];
/* Any additional ltbs in new set will have NULL ltbs for
* now and will be allocated in alloc_long_term_buff().
*/
/* We no longer need the old_set so free it. Note that we
* may have reused some ltbs from old set and freed excess
* ltbs above. So we only need to free the container now
* not the LTBs themselves. (i.e. dont free_ltb_set()!)
*/
kfree(old_set.ltbs);
old_set.ltbs = NULL;
old_set.num_ltbs = 0;
/* Install the new set. If allocations fail below, we will
* retry later and know what size LTBs we need.
*/
*ltb_set = new_set;
}
i = 0;
rem_size = tot_size;
while (rem_size) {
if (ltb_size > rem_size)
ltb_size = rem_size;
rem_size -= ltb_size;
rc = alloc_long_term_buff(adapter, &new_set.ltbs[i], ltb_size);
if (rc)
goto out;
i++;
}
WARN_ON(i != new_set.num_ltbs);
return 0;
out:
/* We may have allocated one/more LTBs before failing and we
* want to try and reuse on next reset. So don't free ltb set.
*/
return rc;
}
/**
* map_rxpool_buf_to_ltb - Map given rxpool buffer to offset in an LTB.
* @rxpool: The receive buffer pool containing buffer
* @bufidx: Index of buffer in rxpool
* @ltbp: (Output) pointer to the long term buffer containing the buffer
* @offset: (Output) offset of buffer in the LTB from @ltbp
*
* Map the given buffer identified by [rxpool, bufidx] to an LTB in the
* pool and its corresponding offset. Assume for now that each LTB is of
* different size but could possibly be optimized based on the allocation
* strategy in alloc_ltb_set().
*/
static void map_rxpool_buf_to_ltb(struct ibmvnic_rx_pool *rxpool,
unsigned int bufidx,
struct ibmvnic_long_term_buff **ltbp,
unsigned int *offset)
{
struct ibmvnic_long_term_buff *ltb;
int nbufs; /* # of buffers in one ltb */
int i;
WARN_ON(bufidx >= rxpool->size);
for (i = 0; i < rxpool->ltb_set.num_ltbs; i++) {
ltb = &rxpool->ltb_set.ltbs[i];
nbufs = ltb->size / rxpool->buff_size;
if (bufidx < nbufs)
break;
bufidx -= nbufs;
}
*ltbp = ltb;
*offset = bufidx * rxpool->buff_size;
}
/**
* map_txpool_buf_to_ltb - Map given txpool buffer to offset in an LTB.
* @txpool: The transmit buffer pool containing buffer
* @bufidx: Index of buffer in txpool
* @ltbp: (Output) pointer to the long term buffer (LTB) containing the buffer
* @offset: (Output) offset of buffer in the LTB from @ltbp
*
* Map the given buffer identified by [txpool, bufidx] to an LTB in the
* pool and its corresponding offset.
*/
static void map_txpool_buf_to_ltb(struct ibmvnic_tx_pool *txpool,
unsigned int bufidx,
struct ibmvnic_long_term_buff **ltbp,
unsigned int *offset)
{
struct ibmvnic_long_term_buff *ltb;
int nbufs; /* # of buffers in one ltb */
int i;
WARN_ON_ONCE(bufidx >= txpool->num_buffers);
for (i = 0; i < txpool->ltb_set.num_ltbs; i++) {
ltb = &txpool->ltb_set.ltbs[i];
nbufs = ltb->size / txpool->buf_size;
if (bufidx < nbufs)
break;
bufidx -= nbufs;
}
*ltbp = ltb;
*offset = bufidx * txpool->buf_size;
}
static void deactivate_rx_pools(struct ibmvnic_adapter *adapter) static void deactivate_rx_pools(struct ibmvnic_adapter *adapter)
{ {
int i; int i;
...@@ -361,6 +565,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, ...@@ -361,6 +565,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
struct device *dev = &adapter->vdev->dev; struct device *dev = &adapter->vdev->dev;
struct ibmvnic_ind_xmit_queue *ind_bufp; struct ibmvnic_ind_xmit_queue *ind_bufp;
struct ibmvnic_sub_crq_queue *rx_scrq; struct ibmvnic_sub_crq_queue *rx_scrq;
struct ibmvnic_long_term_buff *ltb;
union sub_crq *sub_crq; union sub_crq *sub_crq;
int buffers_added = 0; int buffers_added = 0;
unsigned long lpar_rc; unsigned long lpar_rc;
...@@ -369,7 +574,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, ...@@ -369,7 +574,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
dma_addr_t dma_addr; dma_addr_t dma_addr;
unsigned char *dst; unsigned char *dst;
int shift = 0; int shift = 0;
int index; int bufidx;
int i; int i;
if (!pool->active) if (!pool->active)
...@@ -385,14 +590,14 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, ...@@ -385,14 +590,14 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
* be 0. * be 0.
*/ */
for (i = ind_bufp->index; i < count; ++i) { for (i = ind_bufp->index; i < count; ++i) {
index = pool->free_map[pool->next_free]; bufidx = pool->free_map[pool->next_free];
/* We maybe reusing the skb from earlier resets. Allocate /* We maybe reusing the skb from earlier resets. Allocate
* only if necessary. But since the LTB may have changed * only if necessary. But since the LTB may have changed
* during reset (see init_rx_pools()), update LTB below * during reset (see init_rx_pools()), update LTB below
* even if reusing skb. * even if reusing skb.
*/ */
skb = pool->rx_buff[index].skb; skb = pool->rx_buff[bufidx].skb;
if (!skb) { if (!skb) {
skb = netdev_alloc_skb(adapter->netdev, skb = netdev_alloc_skb(adapter->netdev,
pool->buff_size); pool->buff_size);
...@@ -407,26 +612,26 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, ...@@ -407,26 +612,26 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
pool->next_free = (pool->next_free + 1) % pool->size; pool->next_free = (pool->next_free + 1) % pool->size;
/* Copy the skb to the long term mapped DMA buffer */ /* Copy the skb to the long term mapped DMA buffer */
offset = index * pool->buff_size; map_rxpool_buf_to_ltb(pool, bufidx, &ltb, &offset);
dst = pool->long_term_buff.buff + offset; dst = ltb->buff + offset;
memset(dst, 0, pool->buff_size); memset(dst, 0, pool->buff_size);
dma_addr = pool->long_term_buff.addr + offset; dma_addr = ltb->addr + offset;
/* add the skb to an rx_buff in the pool */ /* add the skb to an rx_buff in the pool */
pool->rx_buff[index].data = dst; pool->rx_buff[bufidx].data = dst;
pool->rx_buff[index].dma = dma_addr; pool->rx_buff[bufidx].dma = dma_addr;
pool->rx_buff[index].skb = skb; pool->rx_buff[bufidx].skb = skb;
pool->rx_buff[index].pool_index = pool->index; pool->rx_buff[bufidx].pool_index = pool->index;
pool->rx_buff[index].size = pool->buff_size; pool->rx_buff[bufidx].size = pool->buff_size;
/* queue the rx_buff for the next send_subcrq_indirect */ /* queue the rx_buff for the next send_subcrq_indirect */
sub_crq = &ind_bufp->indir_arr[ind_bufp->index++]; sub_crq = &ind_bufp->indir_arr[ind_bufp->index++];
memset(sub_crq, 0, sizeof(*sub_crq)); memset(sub_crq, 0, sizeof(*sub_crq));
sub_crq->rx_add.first = IBMVNIC_CRQ_CMD; sub_crq->rx_add.first = IBMVNIC_CRQ_CMD;
sub_crq->rx_add.correlator = sub_crq->rx_add.correlator =
cpu_to_be64((u64)&pool->rx_buff[index]); cpu_to_be64((u64)&pool->rx_buff[bufidx]);
sub_crq->rx_add.ioba = cpu_to_be32(dma_addr); sub_crq->rx_add.ioba = cpu_to_be32(dma_addr);
sub_crq->rx_add.map_id = pool->long_term_buff.map_id; sub_crq->rx_add.map_id = ltb->map_id;
/* The length field of the sCRQ is defined to be 24 bits so the /* The length field of the sCRQ is defined to be 24 bits so the
* buffer size needs to be left shifted by a byte before it is * buffer size needs to be left shifted by a byte before it is
...@@ -466,10 +671,10 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, ...@@ -466,10 +671,10 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
sub_crq = &ind_bufp->indir_arr[i]; sub_crq = &ind_bufp->indir_arr[i];
rx_buff = (struct ibmvnic_rx_buff *) rx_buff = (struct ibmvnic_rx_buff *)
be64_to_cpu(sub_crq->rx_add.correlator); be64_to_cpu(sub_crq->rx_add.correlator);
index = (int)(rx_buff - pool->rx_buff); bufidx = (int)(rx_buff - pool->rx_buff);
pool->free_map[pool->next_free] = index; pool->free_map[pool->next_free] = bufidx;
dev_kfree_skb_any(pool->rx_buff[index].skb); dev_kfree_skb_any(pool->rx_buff[bufidx].skb);
pool->rx_buff[index].skb = NULL; pool->rx_buff[bufidx].skb = NULL;
} }
adapter->replenish_add_buff_failure += ind_bufp->index; adapter->replenish_add_buff_failure += ind_bufp->index;
atomic_add(buffers_added, &pool->available); atomic_add(buffers_added, &pool->available);
...@@ -579,7 +784,7 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter) ...@@ -579,7 +784,7 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter)
kfree(rx_pool->free_map); kfree(rx_pool->free_map);
free_long_term_buff(adapter, &rx_pool->long_term_buff); free_ltb_set(adapter, &rx_pool->ltb_set);
if (!rx_pool->rx_buff) if (!rx_pool->rx_buff)
continue; continue;
...@@ -724,8 +929,8 @@ static int init_rx_pools(struct net_device *netdev) ...@@ -724,8 +929,8 @@ static int init_rx_pools(struct net_device *netdev)
dev_dbg(dev, "Updating LTB for rx pool %d [%d, %d]\n", dev_dbg(dev, "Updating LTB for rx pool %d [%d, %d]\n",
i, rx_pool->size, rx_pool->buff_size); i, rx_pool->size, rx_pool->buff_size);
rc = alloc_long_term_buff(adapter, &rx_pool->long_term_buff, rc = alloc_ltb_set(adapter, &rx_pool->ltb_set,
rx_pool->size * rx_pool->buff_size); rx_pool->size, rx_pool->buff_size);
if (rc) if (rc)
goto out; goto out;
...@@ -782,7 +987,7 @@ static void release_one_tx_pool(struct ibmvnic_adapter *adapter, ...@@ -782,7 +987,7 @@ static void release_one_tx_pool(struct ibmvnic_adapter *adapter,
{ {
kfree(tx_pool->tx_buff); kfree(tx_pool->tx_buff);
kfree(tx_pool->free_map); kfree(tx_pool->free_map);
free_long_term_buff(adapter, &tx_pool->long_term_buff); free_ltb_set(adapter, &tx_pool->ltb_set);
} }
/** /**
...@@ -972,17 +1177,16 @@ static int init_tx_pools(struct net_device *netdev) ...@@ -972,17 +1177,16 @@ static int init_tx_pools(struct net_device *netdev)
for (i = 0; i < num_pools; i++) { for (i = 0; i < num_pools; i++) {
struct ibmvnic_tx_pool *tso_pool; struct ibmvnic_tx_pool *tso_pool;
struct ibmvnic_tx_pool *tx_pool; struct ibmvnic_tx_pool *tx_pool;
u32 ltb_size;
tx_pool = &adapter->tx_pool[i]; tx_pool = &adapter->tx_pool[i];
ltb_size = tx_pool->num_buffers * tx_pool->buf_size;
if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
ltb_size))
goto out;
dev_dbg(dev, "Updated LTB for tx pool %d [%p, %d, %d]\n", dev_dbg(dev, "Updating LTB for tx pool %d [%d, %d]\n",
i, tx_pool->long_term_buff.buff, i, tx_pool->num_buffers, tx_pool->buf_size);
tx_pool->num_buffers, tx_pool->buf_size);
rc = alloc_ltb_set(adapter, &tx_pool->ltb_set,
tx_pool->num_buffers, tx_pool->buf_size);
if (rc)
goto out;
tx_pool->consumer_index = 0; tx_pool->consumer_index = 0;
tx_pool->producer_index = 0; tx_pool->producer_index = 0;
...@@ -991,14 +1195,14 @@ static int init_tx_pools(struct net_device *netdev) ...@@ -991,14 +1195,14 @@ static int init_tx_pools(struct net_device *netdev)
tx_pool->free_map[j] = j; tx_pool->free_map[j] = j;
tso_pool = &adapter->tso_pool[i]; tso_pool = &adapter->tso_pool[i];
ltb_size = tso_pool->num_buffers * tso_pool->buf_size;
if (alloc_long_term_buff(adapter, &tso_pool->long_term_buff,
ltb_size))
goto out;
dev_dbg(dev, "Updated LTB for tso pool %d [%p, %d, %d]\n", dev_dbg(dev, "Updating LTB for tso pool %d [%d, %d]\n",
i, tso_pool->long_term_buff.buff, i, tso_pool->num_buffers, tso_pool->buf_size);
tso_pool->num_buffers, tso_pool->buf_size);
rc = alloc_ltb_set(adapter, &tso_pool->ltb_set,
tso_pool->num_buffers, tso_pool->buf_size);
if (rc)
goto out;
tso_pool->consumer_index = 0; tso_pool->consumer_index = 0;
tso_pool->producer_index = 0; tso_pool->producer_index = 0;
...@@ -1911,6 +2115,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ...@@ -1911,6 +2115,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
struct ibmvnic_ind_xmit_queue *ind_bufp; struct ibmvnic_ind_xmit_queue *ind_bufp;
struct ibmvnic_tx_buff *tx_buff = NULL; struct ibmvnic_tx_buff *tx_buff = NULL;
struct ibmvnic_sub_crq_queue *tx_scrq; struct ibmvnic_sub_crq_queue *tx_scrq;
struct ibmvnic_long_term_buff *ltb;
struct ibmvnic_tx_pool *tx_pool; struct ibmvnic_tx_pool *tx_pool;
unsigned int tx_send_failed = 0; unsigned int tx_send_failed = 0;
netdev_tx_t ret = NETDEV_TX_OK; netdev_tx_t ret = NETDEV_TX_OK;
...@@ -1926,7 +2131,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ...@@ -1926,7 +2131,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
unsigned int offset; unsigned int offset;
int num_entries = 1; int num_entries = 1;
unsigned char *dst; unsigned char *dst;
int index = 0; int bufidx = 0;
u8 proto = 0; u8 proto = 0;
/* If a reset is in progress, drop the packet since /* If a reset is in progress, drop the packet since
...@@ -1960,9 +2165,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ...@@ -1960,9 +2165,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
else else
tx_pool = &adapter->tx_pool[queue_num]; tx_pool = &adapter->tx_pool[queue_num];
index = tx_pool->free_map[tx_pool->consumer_index]; bufidx = tx_pool->free_map[tx_pool->consumer_index];
if (index == IBMVNIC_INVALID_MAP) { if (bufidx == IBMVNIC_INVALID_MAP) {
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
tx_send_failed++; tx_send_failed++;
tx_dropped++; tx_dropped++;
...@@ -1973,10 +2178,11 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ...@@ -1973,10 +2178,11 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP; tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP;
offset = index * tx_pool->buf_size; map_txpool_buf_to_ltb(tx_pool, bufidx, &ltb, &offset);
dst = tx_pool->long_term_buff.buff + offset;
dst = ltb->buff + offset;
memset(dst, 0, tx_pool->buf_size); memset(dst, 0, tx_pool->buf_size);
data_dma_addr = tx_pool->long_term_buff.addr + offset; data_dma_addr = ltb->addr + offset;
if (skb_shinfo(skb)->nr_frags) { if (skb_shinfo(skb)->nr_frags) {
int cur, i; int cur, i;
...@@ -2003,9 +2209,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ...@@ -2003,9 +2209,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_pool->consumer_index = tx_pool->consumer_index =
(tx_pool->consumer_index + 1) % tx_pool->num_buffers; (tx_pool->consumer_index + 1) % tx_pool->num_buffers;
tx_buff = &tx_pool->tx_buff[index]; tx_buff = &tx_pool->tx_buff[bufidx];
tx_buff->skb = skb; tx_buff->skb = skb;
tx_buff->index = index; tx_buff->index = bufidx;
tx_buff->pool_index = queue_num; tx_buff->pool_index = queue_num;
memset(&tx_crq, 0, sizeof(tx_crq)); memset(&tx_crq, 0, sizeof(tx_crq));
...@@ -2017,10 +2223,10 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ...@@ -2017,10 +2223,10 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
if (skb_is_gso(skb)) if (skb_is_gso(skb))
tx_crq.v1.correlator = tx_crq.v1.correlator =
cpu_to_be32(index | IBMVNIC_TSO_POOL_MASK); cpu_to_be32(bufidx | IBMVNIC_TSO_POOL_MASK);
else else
tx_crq.v1.correlator = cpu_to_be32(index); tx_crq.v1.correlator = cpu_to_be32(bufidx);
tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id); tx_crq.v1.dma_reg = cpu_to_be16(ltb->map_id);
tx_crq.v1.sge_len = cpu_to_be32(skb->len); tx_crq.v1.sge_len = cpu_to_be32(skb->len);
tx_crq.v1.ioba = cpu_to_be64(data_dma_addr); tx_crq.v1.ioba = cpu_to_be64(data_dma_addr);
...@@ -4031,16 +4237,16 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) ...@@ -4031,16 +4237,16 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry)
adapter->desired.rx_entries = adapter->desired.rx_entries =
adapter->max_rx_add_entries_per_subcrq; adapter->max_rx_add_entries_per_subcrq;
max_entries = IBMVNIC_MAX_LTB_SIZE / max_entries = IBMVNIC_LTB_SET_SIZE /
(adapter->req_mtu + IBMVNIC_BUFFER_HLEN); (adapter->req_mtu + IBMVNIC_BUFFER_HLEN);
if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) *
adapter->desired.tx_entries > IBMVNIC_MAX_LTB_SIZE) { adapter->desired.tx_entries > IBMVNIC_LTB_SET_SIZE) {
adapter->desired.tx_entries = max_entries; adapter->desired.tx_entries = max_entries;
} }
if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) *
adapter->desired.rx_entries > IBMVNIC_MAX_LTB_SIZE) { adapter->desired.rx_entries > IBMVNIC_LTB_SET_SIZE) {
adapter->desired.rx_entries = max_entries; adapter->desired.rx_entries = max_entries;
} }
......
...@@ -36,9 +36,50 @@ ...@@ -36,9 +36,50 @@
#define IBMVNIC_TSO_BUFS 64 #define IBMVNIC_TSO_BUFS 64
#define IBMVNIC_TSO_POOL_MASK 0x80000000 #define IBMVNIC_TSO_POOL_MASK 0x80000000
#define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE) /* A VNIC adapter has set of Rx and Tx pools (aka queues). Each Rx/Tx pool
#define IBMVNIC_BUFFER_HLEN 500 * has a set of buffers. The size of each buffer is determined by the MTU.
*
* Each Rx/Tx pool is also associated with a DMA region that is shared
* with the "hardware" (VIOS) and used to send/receive packets. The DMA
* region is also referred to as a Long Term Buffer or LTB.
*
* The size of the DMA region required for an Rx/Tx pool depends on the
* number and size (MTU) of the buffers in the pool. At the max levels
* of 4096 jumbo frames (MTU=9000) we will need about 9K*4K = 36MB plus
* some padding.
*
* But the size of a single DMA region is limited by MAX_ORDER in the
* kernel (about 16MB currently). To support say 4K Jumbo frames, we
* use a set of LTBs (struct ltb_set) per pool.
*
* IBMVNIC_ONE_LTB_MAX - max size of each LTB supported by kernel
* IBMVNIC_ONE_LTB_SIZE - current max size of each LTB in an ltb_set
* (must be <= IBMVNIC_ONE_LTB_MAX)
* IBMVNIC_LTB_SET_SIZE - current size of all LTBs in an ltb_set
*
* Each VNIC can have upto 16 Rx, 16 Tx and 16 TSO pools. The TSO pools
* are of fixed length (IBMVNIC_TSO_BUF_SZ * IBMVNIC_TSO_BUFS) of 4MB.
*
* The Rx and Tx pools can have upto 4096 buffers. The max size of these
* buffers is about 9588 (for jumbo frames, including IBMVNIC_BUFFER_HLEN).
* So, setting the IBMVNIC_LTB_SET_SIZE for a pool to 4096 * 9588 ~= 38MB.
*
* There is a trade-off in setting IBMVNIC_ONE_LTB_SIZE. If it is large,
* the allocation of the LTB can fail when system is low in memory. If
* its too small, we would need several mappings for each of the Rx/
* Tx/TSO pools but there is a limit of 255 mappings per vnic in the
* VNIC protocol.
*
* So setting IBMVNIC_ONE_LTB_SIZE to 8MB. With IBMVNIC_LTB_SET_SIZE set
* to 38MB, we will need 5 LTBs per Rx and Tx pool and 1 LTB per TSO
* pool for the 4MB. Thus the 16 Rx and Tx queues require 32 * 5 = 160
* plus 16 for the TSO pools for a total of 176 LTB mappings per VNIC.
*/
#define IBMVNIC_ONE_LTB_MAX ((u32)((1 << (MAX_ORDER - 1)) * PAGE_SIZE))
#define IBMVNIC_ONE_LTB_SIZE min((u32)(8 << 20), IBMVNIC_ONE_LTB_MAX)
#define IBMVNIC_LTB_SET_SIZE (38 << 20)
#define IBMVNIC_BUFFER_HLEN 500
#define IBMVNIC_RESET_DELAY 100 #define IBMVNIC_RESET_DELAY 100
static const char ibmvnic_priv_flags[][ETH_GSTRING_LEN] = { static const char ibmvnic_priv_flags[][ETH_GSTRING_LEN] = {
...@@ -798,6 +839,11 @@ struct ibmvnic_long_term_buff { ...@@ -798,6 +839,11 @@ struct ibmvnic_long_term_buff {
u8 map_id; u8 map_id;
}; };
struct ibmvnic_ltb_set {
int num_ltbs;
struct ibmvnic_long_term_buff *ltbs;
};
struct ibmvnic_tx_buff { struct ibmvnic_tx_buff {
struct sk_buff *skb; struct sk_buff *skb;
int index; int index;
...@@ -810,7 +856,7 @@ struct ibmvnic_tx_pool { ...@@ -810,7 +856,7 @@ struct ibmvnic_tx_pool {
int *free_map; int *free_map;
int consumer_index; int consumer_index;
int producer_index; int producer_index;
struct ibmvnic_long_term_buff long_term_buff; struct ibmvnic_ltb_set ltb_set;
int num_buffers; int num_buffers;
int buf_size; int buf_size;
} ____cacheline_aligned; } ____cacheline_aligned;
...@@ -833,7 +879,7 @@ struct ibmvnic_rx_pool { ...@@ -833,7 +879,7 @@ struct ibmvnic_rx_pool {
int next_free; int next_free;
int next_alloc; int next_alloc;
int active; int active;
struct ibmvnic_long_term_buff long_term_buff; struct ibmvnic_ltb_set ltb_set;
} ____cacheline_aligned; } ____cacheline_aligned;
struct ibmvnic_vpd { struct ibmvnic_vpd {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment