Commit 9208d286 authored by Ilya Ponetayev's avatar Ilya Ponetayev Committed by David S. Miller

sch_cake: don't try to reallocate or unshare skb unconditionally

cake_handle_diffserv() tries to linearize mac and network header parts of
skb and to make it writable unconditionally. In some cases it leads to full
skb reallocation, which reduces throughput and increases CPU load. Some
measurements of IPv4 forward + NAPT on MIPS router with 580 MHz single-core
CPU was conducted. It appears that on kernel 4.9 skb_try_make_writable()
reallocates skb, if skb was allocated in ethernet driver via so-called
'build skb' method from page cache (it was discovered by strange increase
of kmalloc-2048 slab at first).

Obtain DSCP value via read-only skb_header_pointer() call, and leave
linearization only for DSCP bleaching or ECN CE setting. And, as an
additional optimisation, skip diffserv parsing entirely if it is not needed
by the current configuration.

Fixes: c87b4ecd ("sch_cake: Make sure we can write the IP header before changing DSCP bits")
Signed-off-by: default avatarIlya Ponetayev <i.ponetaev@ndmsystems.com>
[ fix a few style issues, reflow commit message ]
Signed-off-by: default avatarToke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 1ae71d99
...@@ -1553,30 +1553,49 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) ...@@ -1553,30 +1553,49 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
{ {
int wlen = skb_network_offset(skb); const int offset = skb_network_offset(skb);
u16 *buf, buf_;
u8 dscp; u8 dscp;
switch (tc_skb_protocol(skb)) { switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP): case htons(ETH_P_IP):
wlen += sizeof(struct iphdr); buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
if (unlikely(!buf))
return 0;
/* ToS is in the second byte of iphdr */
dscp = ipv4_get_dsfield((struct iphdr *)buf) >> 2;
if (wash && dscp) {
const int wlen = offset + sizeof(struct iphdr);
if (!pskb_may_pull(skb, wlen) || if (!pskb_may_pull(skb, wlen) ||
skb_try_make_writable(skb, wlen)) skb_try_make_writable(skb, wlen))
return 0; return 0;
dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
if (wash && dscp)
ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
}
return dscp; return dscp;
case htons(ETH_P_IPV6): case htons(ETH_P_IPV6):
wlen += sizeof(struct ipv6hdr); buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
if (unlikely(!buf))
return 0;
/* Traffic class is in the first and second bytes of ipv6hdr */
dscp = ipv6_get_dsfield((struct ipv6hdr *)buf) >> 2;
if (wash && dscp) {
const int wlen = offset + sizeof(struct ipv6hdr);
if (!pskb_may_pull(skb, wlen) || if (!pskb_may_pull(skb, wlen) ||
skb_try_make_writable(skb, wlen)) skb_try_make_writable(skb, wlen))
return 0; return 0;
dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
if (wash && dscp)
ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
}
return dscp; return dscp;
case htons(ETH_P_ARP): case htons(ETH_P_ARP):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment