Commit defe4b87 authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'net-mana-fix-some-tx-processing-bugs'

Haiyang Zhang says:

====================
net: mana: Fix some TX processing bugs

Fix TX processing bugs on error handling, tso_bytes calculation,
and sge0 size.
====================

Link: https://lore.kernel.org/r/1696020147-14989-1-git-send-email-haiyangz@microsoft.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents d0f95894 a43e8e9f
......@@ -91,63 +91,137 @@ static unsigned int mana_checksum_info(struct sk_buff *skb)
return 0;
}
static void mana_add_sge(struct mana_tx_package *tp, struct mana_skb_head *ash,
int sg_i, dma_addr_t da, int sge_len, u32 gpa_mkey)
{
ash->dma_handle[sg_i] = da;
ash->size[sg_i] = sge_len;
tp->wqe_req.sgl[sg_i].address = da;
tp->wqe_req.sgl[sg_i].mem_key = gpa_mkey;
tp->wqe_req.sgl[sg_i].size = sge_len;
}
static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc,
struct mana_tx_package *tp)
struct mana_tx_package *tp, int gso_hs)
{
struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
int hsg = 1; /* num of SGEs of linear part */
struct gdma_dev *gd = apc->ac->gdma_dev;
int skb_hlen = skb_headlen(skb);
int sge0_len, sge1_len = 0;
struct gdma_context *gc;
struct device *dev;
skb_frag_t *frag;
dma_addr_t da;
int sg_i;
int i;
gc = gd->gdma_context;
dev = gc->dev;
da = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
if (gso_hs && gso_hs < skb_hlen) {
sge0_len = gso_hs;
sge1_len = skb_hlen - gso_hs;
} else {
sge0_len = skb_hlen;
}
da = dma_map_single(dev, skb->data, sge0_len, DMA_TO_DEVICE);
if (dma_mapping_error(dev, da))
return -ENOMEM;
ash->dma_handle[0] = da;
ash->size[0] = skb_headlen(skb);
mana_add_sge(tp, ash, 0, da, sge0_len, gd->gpa_mkey);
tp->wqe_req.sgl[0].address = ash->dma_handle[0];
tp->wqe_req.sgl[0].mem_key = gd->gpa_mkey;
tp->wqe_req.sgl[0].size = ash->size[0];
if (sge1_len) {
sg_i = 1;
da = dma_map_single(dev, skb->data + sge0_len, sge1_len,
DMA_TO_DEVICE);
if (dma_mapping_error(dev, da))
goto frag_err;
mana_add_sge(tp, ash, sg_i, da, sge1_len, gd->gpa_mkey);
hsg = 2;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
sg_i = hsg + i;
frag = &skb_shinfo(skb)->frags[i];
da = skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag),
DMA_TO_DEVICE);
if (dma_mapping_error(dev, da))
goto frag_err;
ash->dma_handle[i + 1] = da;
ash->size[i + 1] = skb_frag_size(frag);
tp->wqe_req.sgl[i + 1].address = ash->dma_handle[i + 1];
tp->wqe_req.sgl[i + 1].mem_key = gd->gpa_mkey;
tp->wqe_req.sgl[i + 1].size = ash->size[i + 1];
mana_add_sge(tp, ash, sg_i, da, skb_frag_size(frag),
gd->gpa_mkey);
}
return 0;
frag_err:
for (i = i - 1; i >= 0; i--)
dma_unmap_page(dev, ash->dma_handle[i + 1], ash->size[i + 1],
for (i = sg_i - 1; i >= hsg; i--)
dma_unmap_page(dev, ash->dma_handle[i], ash->size[i],
DMA_TO_DEVICE);
dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE);
for (i = hsg - 1; i >= 0; i--)
dma_unmap_single(dev, ash->dma_handle[i], ash->size[i],
DMA_TO_DEVICE);
return -ENOMEM;
}
/* Handle the case when GSO SKB linear length is too large.
* MANA NIC requires GSO packets to put only the packet header to SGE0.
* So, we need 2 SGEs for the skb linear part which contains more than the
* header.
* Return a positive value for the number of SGEs, or a negative value
* for an error.
*/
static int mana_fix_skb_head(struct net_device *ndev, struct sk_buff *skb,
int gso_hs)
{
int num_sge = 1 + skb_shinfo(skb)->nr_frags;
int skb_hlen = skb_headlen(skb);
if (gso_hs < skb_hlen) {
num_sge++;
} else if (gso_hs > skb_hlen) {
if (net_ratelimit())
netdev_err(ndev,
"TX nonlinear head: hs:%d, skb_hlen:%d\n",
gso_hs, skb_hlen);
return -EINVAL;
}
return num_sge;
}
/* Get the GSO packet's header size */
static int mana_get_gso_hs(struct sk_buff *skb)
{
int gso_hs;
if (skb->encapsulation) {
gso_hs = skb_inner_tcp_all_headers(skb);
} else {
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
gso_hs = skb_transport_offset(skb) +
sizeof(struct udphdr);
} else {
gso_hs = skb_tcp_all_headers(skb);
}
}
return gso_hs;
}
netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
{
enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT;
struct mana_port_context *apc = netdev_priv(ndev);
int gso_hs = 0; /* zero for non-GSO pkts */
u16 txq_idx = skb_get_queue_mapping(skb);
struct gdma_dev *gd = apc->ac->gdma_dev;
bool ipv4 = false, ipv6 = false;
......@@ -159,7 +233,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
struct mana_txq *txq;
struct mana_cq *cq;
int err, len;
u16 ihs;
if (unlikely(!apc->port_is_up))
goto tx_drop;
......@@ -209,19 +282,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
pkg.wqe_req.client_data_unit = 0;
pkg.wqe_req.num_sge = 1 + skb_shinfo(skb)->nr_frags;
WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
pkg.wqe_req.sgl = pkg.sgl_array;
} else {
pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge,
sizeof(struct gdma_sge),
GFP_ATOMIC);
if (!pkg.sgl_ptr)
goto tx_drop_count;
pkg.wqe_req.sgl = pkg.sgl_ptr;
}
if (skb->protocol == htons(ETH_P_IP))
ipv4 = true;
......@@ -229,6 +289,26 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
ipv6 = true;
if (skb_is_gso(skb)) {
int num_sge;
gso_hs = mana_get_gso_hs(skb);
num_sge = mana_fix_skb_head(ndev, skb, gso_hs);
if (num_sge > 0)
pkg.wqe_req.num_sge = num_sge;
else
goto tx_drop_count;
u64_stats_update_begin(&tx_stats->syncp);
if (skb->encapsulation) {
tx_stats->tso_inner_packets++;
tx_stats->tso_inner_bytes += skb->len - gso_hs;
} else {
tx_stats->tso_packets++;
tx_stats->tso_bytes += skb->len - gso_hs;
}
u64_stats_update_end(&tx_stats->syncp);
pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4;
pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6;
......@@ -252,28 +332,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
&ipv6_hdr(skb)->daddr, 0,
IPPROTO_TCP, 0);
}
if (skb->encapsulation) {
ihs = skb_inner_tcp_all_headers(skb);
u64_stats_update_begin(&tx_stats->syncp);
tx_stats->tso_inner_packets++;
tx_stats->tso_inner_bytes += skb->len - ihs;
u64_stats_update_end(&tx_stats->syncp);
} else {
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
ihs = skb_transport_offset(skb) + sizeof(struct udphdr);
} else {
ihs = skb_tcp_all_headers(skb);
if (ipv6_has_hopopt_jumbo(skb))
ihs -= sizeof(struct hop_jumbo_hdr);
}
u64_stats_update_begin(&tx_stats->syncp);
tx_stats->tso_packets++;
tx_stats->tso_bytes += skb->len - ihs;
u64_stats_update_end(&tx_stats->syncp);
}
} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
csum_type = mana_checksum_info(skb);
......@@ -296,11 +354,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
} else {
/* Can't do offload of this type of checksum */
if (skb_checksum_help(skb))
goto free_sgl_ptr;
goto tx_drop_count;
}
}
if (mana_map_skb(skb, apc, &pkg)) {
WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
pkg.wqe_req.sgl = pkg.sgl_array;
} else {
pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge,
sizeof(struct gdma_sge),
GFP_ATOMIC);
if (!pkg.sgl_ptr)
goto tx_drop_count;
pkg.wqe_req.sgl = pkg.sgl_ptr;
}
if (mana_map_skb(skb, apc, &pkg, gso_hs)) {
u64_stats_update_begin(&tx_stats->syncp);
tx_stats->mana_map_err++;
u64_stats_update_end(&tx_stats->syncp);
......@@ -1258,11 +1330,16 @@ static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
struct device *dev = gc->dev;
int i;
int hsg, i;
dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE);
/* Number of SGEs of linear part */
hsg = (skb_is_gso(skb) && skb_headlen(skb) > ash->size[0]) ? 2 : 1;
for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++)
for (i = 0; i < hsg; i++)
dma_unmap_single(dev, ash->dma_handle[i], ash->size[i],
DMA_TO_DEVICE);
for (i = hsg; i < skb_shinfo(skb)->nr_frags + hsg; i++)
dma_unmap_page(dev, ash->dma_handle[i], ash->size[i],
DMA_TO_DEVICE);
}
......@@ -1317,19 +1394,23 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
case CQE_TX_VPORT_IDX_OUT_OF_RANGE:
case CQE_TX_VPORT_DISABLED:
case CQE_TX_VLAN_TAGGING_VIOLATION:
WARN_ONCE(1, "TX: CQE error %d: ignored.\n",
if (net_ratelimit())
netdev_err(ndev, "TX: CQE error %d\n",
cqe_oob->cqe_hdr.cqe_type);
apc->eth_stats.tx_cqe_err++;
break;
default:
/* If the CQE type is unexpected, log an error, assert,
* and go through the error path.
/* If the CQE type is unknown, log an error,
* and still free the SKB, update tail, etc.
*/
WARN_ONCE(1, "TX: Unexpected CQE type %d: HW BUG?\n",
if (net_ratelimit())
netdev_err(ndev, "TX: unknown CQE type %d\n",
cqe_oob->cqe_hdr.cqe_type);
apc->eth_stats.tx_cqe_unknown_type++;
return;
break;
}
if (WARN_ON_ONCE(txq->gdma_txq_id != completions[i].wq_num))
......
......@@ -103,9 +103,10 @@ struct mana_txq {
/* skb data and frags dma mappings */
struct mana_skb_head {
dma_addr_t dma_handle[MAX_SKB_FRAGS + 1];
/* GSO pkts may have 2 SGEs for the linear part*/
dma_addr_t dma_handle[MAX_SKB_FRAGS + 2];
u32 size[MAX_SKB_FRAGS + 1];
u32 size[MAX_SKB_FRAGS + 2];
};
#define MANA_HEADROOM sizeof(struct mana_skb_head)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment