Commit 40ca1988 authored by Eli Cohen's avatar Eli Cohen Committed by Roland Dreier

IPoIB: Add LSO support

For HCAs that support TCP segmentation offload (IB_DEVICE_UD_TSO), set
NETIF_F_TSO and use HW LSO to offload TCP segmentation.
Signed-off-by: default avatarEli Cohen <eli@mellanox.co.il>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent c93570f2
...@@ -319,6 +319,7 @@ struct ipoib_dev_priv { ...@@ -319,6 +319,7 @@ struct ipoib_dev_priv {
struct dentry *mcg_dentry; struct dentry *mcg_dentry;
struct dentry *path_dentry; struct dentry *path_dentry;
#endif #endif
int hca_caps;
}; };
struct ipoib_ah { struct ipoib_ah {
......
...@@ -1384,7 +1384,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, ...@@ -1384,7 +1384,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
ipoib_warn(priv, "enabling connected mode " ipoib_warn(priv, "enabling connected mode "
"will cause multicast packet drops\n"); "will cause multicast packet drops\n");
dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG); dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO);
priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
ipoib_flush_paths(dev); ipoib_flush_paths(dev);
...@@ -1396,8 +1396,11 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, ...@@ -1396,8 +1396,11 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
dev->mtu = min(priv->mcast_mtu, dev->mtu); dev->mtu = min(priv->mcast_mtu, dev->mtu);
ipoib_flush_paths(dev); ipoib_flush_paths(dev);
if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) {
dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
dev->features |= NETIF_F_TSO;
}
return count; return count;
} }
......
...@@ -39,6 +39,8 @@ ...@@ -39,6 +39,8 @@
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <rdma/ib_cache.h> #include <rdma/ib_cache.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include "ipoib.h" #include "ipoib.h"
...@@ -249,29 +251,37 @@ static int ipoib_dma_map_tx(struct ib_device *ca, ...@@ -249,29 +251,37 @@ static int ipoib_dma_map_tx(struct ib_device *ca,
struct sk_buff *skb = tx_req->skb; struct sk_buff *skb = tx_req->skb;
u64 *mapping = tx_req->mapping; u64 *mapping = tx_req->mapping;
int i; int i;
int off;
if (skb_headlen(skb)) {
mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb), mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
DMA_TO_DEVICE); DMA_TO_DEVICE);
if (unlikely(ib_dma_mapping_error(ca, mapping[0]))) if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
return -EIO; return -EIO;
off = 1;
} else
off = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
mapping[i + 1] = ib_dma_map_page(ca, frag->page, mapping[i + off] = ib_dma_map_page(ca, frag->page,
frag->page_offset, frag->size, frag->page_offset, frag->size,
DMA_TO_DEVICE); DMA_TO_DEVICE);
if (unlikely(ib_dma_mapping_error(ca, mapping[i + 1]))) if (unlikely(ib_dma_mapping_error(ca, mapping[i + off])))
goto partial_error; goto partial_error;
} }
return 0; return 0;
partial_error: partial_error:
ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
for (; i > 0; --i) { for (; i > 0; --i) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
ib_dma_unmap_page(ca, mapping[i], frag->size, DMA_TO_DEVICE); ib_dma_unmap_page(ca, mapping[i - !off], frag->size, DMA_TO_DEVICE);
} }
if (off)
ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
return -EIO; return -EIO;
} }
...@@ -281,12 +291,17 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca, ...@@ -281,12 +291,17 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca,
struct sk_buff *skb = tx_req->skb; struct sk_buff *skb = tx_req->skb;
u64 *mapping = tx_req->mapping; u64 *mapping = tx_req->mapping;
int i; int i;
int off;
if (skb_headlen(skb)) {
ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE); ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
off = 1;
} else
off = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
ib_dma_unmap_page(ca, mapping[i + 1], frag->size, ib_dma_unmap_page(ca, mapping[i + off], frag->size,
DMA_TO_DEVICE); DMA_TO_DEVICE);
} }
} }
...@@ -392,24 +407,40 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) ...@@ -392,24 +407,40 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
static inline int post_send(struct ipoib_dev_priv *priv, static inline int post_send(struct ipoib_dev_priv *priv,
unsigned int wr_id, unsigned int wr_id,
struct ib_ah *address, u32 qpn, struct ib_ah *address, u32 qpn,
u64 *mapping, int headlen, struct ipoib_tx_buf *tx_req,
skb_frag_t *frags, void *head, int hlen)
int nr_frags)
{ {
struct ib_send_wr *bad_wr; struct ib_send_wr *bad_wr;
int i; int i, off;
struct sk_buff *skb = tx_req->skb;
skb_frag_t *frags = skb_shinfo(skb)->frags;
int nr_frags = skb_shinfo(skb)->nr_frags;
u64 *mapping = tx_req->mapping;
if (skb_headlen(skb)) {
priv->tx_sge[0].addr = mapping[0]; priv->tx_sge[0].addr = mapping[0];
priv->tx_sge[0].length = headlen; priv->tx_sge[0].length = skb_headlen(skb);
off = 1;
} else
off = 0;
for (i = 0; i < nr_frags; ++i) { for (i = 0; i < nr_frags; ++i) {
priv->tx_sge[i + 1].addr = mapping[i + 1]; priv->tx_sge[i + off].addr = mapping[i + off];
priv->tx_sge[i + 1].length = frags[i].size; priv->tx_sge[i + off].length = frags[i].size;
} }
priv->tx_wr.num_sge = nr_frags + 1; priv->tx_wr.num_sge = nr_frags + off;
priv->tx_wr.wr_id = wr_id; priv->tx_wr.wr_id = wr_id;
priv->tx_wr.wr.ud.remote_qpn = qpn; priv->tx_wr.wr.ud.remote_qpn = qpn;
priv->tx_wr.wr.ud.ah = address; priv->tx_wr.wr.ud.ah = address;
if (head) {
priv->tx_wr.wr.ud.mss = skb_shinfo(skb)->gso_size;
priv->tx_wr.wr.ud.header = head;
priv->tx_wr.wr.ud.hlen = hlen;
priv->tx_wr.opcode = IB_WR_LSO;
} else
priv->tx_wr.opcode = IB_WR_SEND;
return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr); return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
} }
...@@ -418,7 +449,20 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ...@@ -418,7 +449,20 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_tx_buf *tx_req; struct ipoib_tx_buf *tx_req;
int hlen;
void *phead;
if (skb_is_gso(skb)) {
hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
phead = skb->data;
if (unlikely(!skb_pull(skb, hlen))) {
ipoib_warn(priv, "linear data too small\n");
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
} else {
if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) { if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
skb->len, priv->mcast_mtu + IPOIB_ENCAP_LEN); skb->len, priv->mcast_mtu + IPOIB_ENCAP_LEN);
...@@ -427,6 +471,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ...@@ -427,6 +471,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu); ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
return; return;
} }
phead = NULL;
hlen = 0;
}
ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n", ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
skb->len, address, qpn); skb->len, address, qpn);
...@@ -452,9 +499,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ...@@ -452,9 +499,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
address->ah, qpn, address->ah, qpn, tx_req, phead, hlen))) {
tx_req->mapping, skb_headlen(skb),
skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags))) {
ipoib_warn(priv, "post_send failed\n"); ipoib_warn(priv, "post_send failed\n");
++dev->stats.tx_errors; ++dev->stats.tx_errors;
ipoib_dma_unmap_tx(priv->ca, tx_req); ipoib_dma_unmap_tx(priv->ca, tx_req);
......
...@@ -1134,14 +1134,15 @@ static struct net_device *ipoib_add_port(const char *format, ...@@ -1134,14 +1134,15 @@ static struct net_device *ipoib_add_port(const char *format,
kfree(device_attr); kfree(device_attr);
goto device_init_failed; goto device_init_failed;
} }
priv->hca_caps = device_attr->device_cap_flags;
if (device_attr->device_cap_flags & IB_DEVICE_UD_IP_CSUM) { kfree(device_attr);
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
set_bit(IPOIB_FLAG_CSUM, &priv->flags); set_bit(IPOIB_FLAG_CSUM, &priv->flags);
priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
} }
kfree(device_attr);
/* /*
* Set the full membership bit, so that we join the right * Set the full membership bit, so that we join the right
* broadcast group, etc. * broadcast group, etc.
...@@ -1176,6 +1177,9 @@ static struct net_device *ipoib_add_port(const char *format, ...@@ -1176,6 +1177,9 @@ static struct net_device *ipoib_add_port(const char *format,
goto event_failed; goto event_failed;
} }
if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO)
priv->dev->features |= NETIF_F_TSO;
result = register_netdev(priv->dev); result = register_netdev(priv->dev);
if (result) { if (result) {
printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n", printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n",
......
...@@ -192,6 +192,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -192,6 +192,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
init_attr.send_cq = priv->cq; init_attr.send_cq = priv->cq;
init_attr.recv_cq = priv->cq; init_attr.recv_cq = priv->cq;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO;
if (dev->features & NETIF_F_SG) if (dev->features & NETIF_F_SG)
init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment