Commit 7af40ad9 authored by Alexander Duyck's avatar Alexander Duyck Committed by Jeff Kirsher

igb: push data into first igb_tx_buffer sooner to reduce stack usage

Instead of storing most of the data for the TX hot path in the stack until
we are ready to write the descriptor we can save ourselves some time and
effort by pushing the SKB, tx_flags, gso_size, bytecount, and protocol into
the first igb_tx_buffer since that is where we will end up putting it
anyway.
Signed-off-by: default avatarAlexander Duyck <alexander.h.duyck@intel.com>
Tested-by: default avatarAaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
parent 1d0861ac
...@@ -146,6 +146,7 @@ struct igb_tx_buffer { ...@@ -146,6 +146,7 @@ struct igb_tx_buffer {
struct sk_buff *skb; struct sk_buff *skb;
unsigned int bytecount; unsigned int bytecount;
u16 gso_segs; u16 gso_segs;
__be16 protocol;
dma_addr_t dma; dma_addr_t dma;
u32 length; u32 length;
u32 tx_flags; u32 tx_flags;
......
...@@ -3975,10 +3975,11 @@ void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens, ...@@ -3975,10 +3975,11 @@ void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
} }
static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb, static int igb_tso(struct igb_ring *tx_ring,
u32 tx_flags, __be16 protocol, u8 *hdr_len) struct igb_tx_buffer *first,
u8 *hdr_len)
{ {
int err; struct sk_buff *skb = first->skb;
u32 vlan_macip_lens, type_tucmd; u32 vlan_macip_lens, type_tucmd;
u32 mss_l4len_idx, l4len; u32 mss_l4len_idx, l4len;
...@@ -3986,7 +3987,7 @@ static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb, ...@@ -3986,7 +3987,7 @@ static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb,
return 0; return 0;
if (skb_header_cloned(skb)) { if (skb_header_cloned(skb)) {
err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
if (err) if (err)
return err; return err;
} }
...@@ -3994,7 +3995,7 @@ static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb, ...@@ -3994,7 +3995,7 @@ static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb,
/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
if (protocol == __constant_htons(ETH_P_IP)) { if (first->protocol == __constant_htons(ETH_P_IP)) {
struct iphdr *iph = ip_hdr(skb); struct iphdr *iph = ip_hdr(skb);
iph->tot_len = 0; iph->tot_len = 0;
iph->check = 0; iph->check = 0;
...@@ -4003,16 +4004,26 @@ static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb, ...@@ -4003,16 +4004,26 @@ static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb,
IPPROTO_TCP, IPPROTO_TCP,
0); 0);
type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
first->tx_flags |= IGB_TX_FLAGS_TSO |
IGB_TX_FLAGS_CSUM |
IGB_TX_FLAGS_IPV4;
} else if (skb_is_gso_v6(skb)) { } else if (skb_is_gso_v6(skb)) {
ipv6_hdr(skb)->payload_len = 0; ipv6_hdr(skb)->payload_len = 0;
tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->daddr,
0, IPPROTO_TCP, 0); 0, IPPROTO_TCP, 0);
first->tx_flags |= IGB_TX_FLAGS_TSO |
IGB_TX_FLAGS_CSUM;
} }
/* compute header lengths */
l4len = tcp_hdrlen(skb); l4len = tcp_hdrlen(skb);
*hdr_len = skb_transport_offset(skb) + l4len; *hdr_len = skb_transport_offset(skb) + l4len;
/* update gso size and bytecount with header size */
first->gso_segs = skb_shinfo(skb)->gso_segs;
first->bytecount += (first->gso_segs - 1) * *hdr_len;
/* MSS L4LEN IDX */ /* MSS L4LEN IDX */
mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT; mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT; mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
...@@ -4020,26 +4031,26 @@ static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb, ...@@ -4020,26 +4031,26 @@ static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb,
/* VLAN MACLEN IPLEN */ /* VLAN MACLEN IPLEN */
vlan_macip_lens = skb_network_header_len(skb); vlan_macip_lens = skb_network_header_len(skb);
vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
vlan_macip_lens |= tx_flags & IGB_TX_FLAGS_VLAN_MASK; vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
return 1; return 1;
} }
static inline bool igb_tx_csum(struct igb_ring *tx_ring, struct sk_buff *skb, static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
u32 tx_flags, __be16 protocol)
{ {
struct sk_buff *skb = first->skb;
u32 vlan_macip_lens = 0; u32 vlan_macip_lens = 0;
u32 mss_l4len_idx = 0; u32 mss_l4len_idx = 0;
u32 type_tucmd = 0; u32 type_tucmd = 0;
if (skb->ip_summed != CHECKSUM_PARTIAL) { if (skb->ip_summed != CHECKSUM_PARTIAL) {
if (!(tx_flags & IGB_TX_FLAGS_VLAN)) if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
return false; return;
} else { } else {
u8 l4_hdr = 0; u8 l4_hdr = 0;
switch (protocol) { switch (first->protocol) {
case __constant_htons(ETH_P_IP): case __constant_htons(ETH_P_IP):
vlan_macip_lens |= skb_network_header_len(skb); vlan_macip_lens |= skb_network_header_len(skb);
type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
...@@ -4053,7 +4064,7 @@ static inline bool igb_tx_csum(struct igb_ring *tx_ring, struct sk_buff *skb, ...@@ -4053,7 +4064,7 @@ static inline bool igb_tx_csum(struct igb_ring *tx_ring, struct sk_buff *skb,
if (unlikely(net_ratelimit())) { if (unlikely(net_ratelimit())) {
dev_warn(tx_ring->dev, dev_warn(tx_ring->dev,
"partial checksum but proto=%x!\n", "partial checksum but proto=%x!\n",
protocol); first->protocol);
} }
break; break;
} }
...@@ -4081,14 +4092,15 @@ static inline bool igb_tx_csum(struct igb_ring *tx_ring, struct sk_buff *skb, ...@@ -4081,14 +4092,15 @@ static inline bool igb_tx_csum(struct igb_ring *tx_ring, struct sk_buff *skb,
} }
break; break;
} }
/* update TX checksum flag */
first->tx_flags |= IGB_TX_FLAGS_CSUM;
} }
vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
vlan_macip_lens |= tx_flags & IGB_TX_FLAGS_VLAN_MASK; vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
return (skb->ip_summed == CHECKSUM_PARTIAL);
} }
static __le32 igb_tx_cmd_type(u32 tx_flags) static __le32 igb_tx_cmd_type(u32 tx_flags)
...@@ -4113,8 +4125,9 @@ static __le32 igb_tx_cmd_type(u32 tx_flags) ...@@ -4113,8 +4125,9 @@ static __le32 igb_tx_cmd_type(u32 tx_flags)
return cmd_type; return cmd_type;
} }
static __le32 igb_tx_olinfo_status(u32 tx_flags, unsigned int paylen, static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
struct igb_ring *tx_ring) union e1000_adv_tx_desc *tx_desc,
u32 tx_flags, unsigned int paylen)
{ {
u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT; u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
...@@ -4132,7 +4145,7 @@ static __le32 igb_tx_olinfo_status(u32 tx_flags, unsigned int paylen, ...@@ -4132,7 +4145,7 @@ static __le32 igb_tx_olinfo_status(u32 tx_flags, unsigned int paylen,
olinfo_status |= E1000_TXD_POPTS_IXSM << 8; olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
} }
return cpu_to_le32(olinfo_status); tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
} }
/* /*
...@@ -4140,12 +4153,13 @@ static __le32 igb_tx_olinfo_status(u32 tx_flags, unsigned int paylen, ...@@ -4140,12 +4153,13 @@ static __le32 igb_tx_olinfo_status(u32 tx_flags, unsigned int paylen,
* maintain a power of two alignment we have to limit ourselves to 32K. * maintain a power of two alignment we have to limit ourselves to 32K.
*/ */
#define IGB_MAX_TXD_PWR 15 #define IGB_MAX_TXD_PWR 15
#define IGB_MAX_DATA_PER_TXD (1 << IGB_MAX_TXD_PWR) #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
static void igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb, static void igb_tx_map(struct igb_ring *tx_ring,
struct igb_tx_buffer *first, u32 tx_flags, struct igb_tx_buffer *first,
const u8 hdr_len) const u8 hdr_len)
{ {
struct sk_buff *skb = first->skb;
struct igb_tx_buffer *tx_buffer_info; struct igb_tx_buffer *tx_buffer_info;
union e1000_adv_tx_desc *tx_desc; union e1000_adv_tx_desc *tx_desc;
dma_addr_t dma; dma_addr_t dma;
...@@ -4154,24 +4168,12 @@ static void igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb, ...@@ -4154,24 +4168,12 @@ static void igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb,
unsigned int size = skb_headlen(skb); unsigned int size = skb_headlen(skb);
unsigned int paylen = skb->len - hdr_len; unsigned int paylen = skb->len - hdr_len;
__le32 cmd_type; __le32 cmd_type;
u32 tx_flags = first->tx_flags;
u16 i = tx_ring->next_to_use; u16 i = tx_ring->next_to_use;
u16 gso_segs;
if (tx_flags & IGB_TX_FLAGS_TSO)
gso_segs = skb_shinfo(skb)->gso_segs;
else
gso_segs = 1;
/* multiply data chunks by size of headers */
first->bytecount = paylen + (gso_segs * hdr_len);
first->gso_segs = gso_segs;
first->skb = skb;
tx_desc = IGB_TX_DESC(tx_ring, i); tx_desc = IGB_TX_DESC(tx_ring, i);
tx_desc->read.olinfo_status = igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
igb_tx_olinfo_status(tx_flags, paylen, tx_ring);
cmd_type = igb_tx_cmd_type(tx_flags); cmd_type = igb_tx_cmd_type(tx_flags);
dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
...@@ -4181,7 +4183,6 @@ static void igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb, ...@@ -4181,7 +4183,6 @@ static void igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb,
/* record length, and DMA address */ /* record length, and DMA address */
first->length = size; first->length = size;
first->dma = dma; first->dma = dma;
first->tx_flags = tx_flags;
tx_desc->read.buffer_addr = cpu_to_le64(dma); tx_desc->read.buffer_addr = cpu_to_le64(dma);
for (;;) { for (;;) {
...@@ -4336,6 +4337,12 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, ...@@ -4336,6 +4337,12 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
return NETDEV_TX_BUSY; return NETDEV_TX_BUSY;
} }
/* record the location of the first descriptor for this packet */
first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
first->skb = skb;
first->bytecount = skb->len;
first->gso_segs = 1;
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
tx_flags |= IGB_TX_FLAGS_TSTAMP; tx_flags |= IGB_TX_FLAGS_TSTAMP;
...@@ -4346,22 +4353,17 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, ...@@ -4346,22 +4353,17 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT); tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
} }
/* record the location of the first descriptor for this packet */ /* record initial flags and protocol */
first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; first->tx_flags = tx_flags;
first->protocol = protocol;
tso = igb_tso(tx_ring, skb, tx_flags, protocol, &hdr_len); tso = igb_tso(tx_ring, first, &hdr_len);
if (tso < 0) { if (tso < 0)
goto out_drop; goto out_drop;
} else if (tso) { else if (!tso)
tx_flags |= IGB_TX_FLAGS_TSO | IGB_TX_FLAGS_CSUM; igb_tx_csum(tx_ring, first);
if (protocol == htons(ETH_P_IP))
tx_flags |= IGB_TX_FLAGS_IPV4;
} else if (igb_tx_csum(tx_ring, skb, tx_flags, protocol) &&
(skb->ip_summed == CHECKSUM_PARTIAL)) {
tx_flags |= IGB_TX_FLAGS_CSUM;
}
igb_tx_map(tx_ring, skb, first, tx_flags, hdr_len); igb_tx_map(tx_ring, first, hdr_len);
/* Make sure there is space in the ring for the next send. */ /* Make sure there is space in the ring for the next send. */
igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4); igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
...@@ -4369,7 +4371,8 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, ...@@ -4369,7 +4371,8 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
return NETDEV_TX_OK; return NETDEV_TX_OK;
out_drop: out_drop:
dev_kfree_skb_any(skb); igb_unmap_and_free_tx_resource(tx_ring, first);
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment