Commit 87fb4b7b authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net: more accurate skb truesize

skb truesize currently accounts for sk_buff struct and part of skb head.
kmalloc() roundings are also ignored.

Considering that skb_shared_info is larger than sk_buff, its time to
take it into account for better memory accounting.

This patch introduces SKB_TRUESIZE(X) macro to centralize various
assumptions into a single place.

At skb alloc phase, we put skb_shared_info struct at the exact end of
skb head, to allow a better use of memory (lowering number of
reallocations), since kmalloc() gives us power-of-two memory blocks.

Unless SLUB/SLUB debug is active, both skb->head and skb_shared_info are
aligned to cache lines, as before.

Note: This patch might trigger performance regressions because of
misconfigured protocol stacks, hitting per socket or global memory
limits that were previously not reached. But its a necessary step for a
more accurate memory accounting.
Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
CC: Andi Kleen <ak@linux.intel.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 97ba0eb6
...@@ -46,6 +46,11 @@ ...@@ -46,6 +46,11 @@
#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0)) #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0))
#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2)) #define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2))
/* return minimum truesize of one skb containing X bytes of data */
#define SKB_TRUESIZE(X) ((X) + \
SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
/* A. Checksumming of received packets by device. /* A. Checksumming of received packets by device.
* *
* NONE: device failed to checksum this packet. * NONE: device failed to checksum this packet.
......
...@@ -184,11 +184,20 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, ...@@ -184,11 +184,20 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
goto out; goto out;
prefetchw(skb); prefetchw(skb);
size = SKB_DATA_ALIGN(size); /* We do our best to align skb_shared_info on a separate cache
data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
gfp_mask, node); * aligned memory blocks, unless SLUB/SLAB debug is enabled.
* Both skb->head and skb_shared_info are cache line aligned.
*/
size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
data = kmalloc_node_track_caller(size, gfp_mask, node);
if (!data) if (!data)
goto nodata; goto nodata;
/* kmalloc(size) might give us more room than requested.
* Put skb_shared_info exactly at the end of allocated zone,
* to allow max possible filling before reallocation.
*/
size = SKB_WITH_OVERHEAD(ksize(data));
prefetchw(data + size); prefetchw(data + size);
/* /*
...@@ -197,7 +206,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, ...@@ -197,7 +206,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
* the tail pointer in struct sk_buff! * the tail pointer in struct sk_buff!
*/ */
memset(skb, 0, offsetof(struct sk_buff, tail)); memset(skb, 0, offsetof(struct sk_buff, tail));
skb->truesize = size + sizeof(struct sk_buff); /* Account for allocated memory : skb + skb->head */
skb->truesize = SKB_TRUESIZE(size);
atomic_set(&skb->users, 1); atomic_set(&skb->users, 1);
skb->head = data; skb->head = data;
skb->data = data; skb->data = data;
......
...@@ -207,7 +207,7 @@ static struct lock_class_key af_callback_keys[AF_MAX]; ...@@ -207,7 +207,7 @@ static struct lock_class_key af_callback_keys[AF_MAX];
* not depend upon such differences. * not depend upon such differences.
*/ */
#define _SK_MEM_PACKETS 256 #define _SK_MEM_PACKETS 256
#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256) #define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
......
...@@ -1152,10 +1152,9 @@ static int __net_init icmp_sk_init(struct net *net) ...@@ -1152,10 +1152,9 @@ static int __net_init icmp_sk_init(struct net *net)
net->ipv4.icmp_sk[i] = sk; net->ipv4.icmp_sk[i] = sk;
/* Enough space for 2 64K ICMP packets, including /* Enough space for 2 64K ICMP packets, including
* sk_buff struct overhead. * sk_buff/skb_shared_info struct overhead.
*/ */
sk->sk_sndbuf = sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
(2 * ((64 * 1024) + sizeof(struct sk_buff)));
/* /*
* Speedup sock_wfree() * Speedup sock_wfree()
......
...@@ -265,8 +265,7 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) ...@@ -265,8 +265,7 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
static void tcp_fixup_sndbuf(struct sock *sk) static void tcp_fixup_sndbuf(struct sock *sk)
{ {
int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
sizeof(struct sk_buff);
if (sk->sk_sndbuf < 3 * sndmem) { if (sk->sk_sndbuf < 3 * sndmem) {
sk->sk_sndbuf = 3 * sndmem; sk->sk_sndbuf = 3 * sndmem;
...@@ -349,7 +348,7 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb) ...@@ -349,7 +348,7 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
static void tcp_fixup_rcvbuf(struct sock *sk) static void tcp_fixup_rcvbuf(struct sock *sk)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); int rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
/* Try to select rcvbuf so that 4 mss-sized segments /* Try to select rcvbuf so that 4 mss-sized segments
* will fit to window and corresponding skbs will fit to our rcvbuf. * will fit to window and corresponding skbs will fit to our rcvbuf.
...@@ -540,8 +539,7 @@ void tcp_rcv_space_adjust(struct sock *sk) ...@@ -540,8 +539,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
space /= tp->advmss; space /= tp->advmss;
if (!space) if (!space)
space = 1; space = 1;
rcvmem = (tp->advmss + MAX_TCP_HEADER + rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
16 + sizeof(struct sk_buff));
while (tcp_win_from_space(rcvmem) < tp->advmss) while (tcp_win_from_space(rcvmem) < tp->advmss)
rcvmem += 128; rcvmem += 128;
space *= rcvmem; space *= rcvmem;
...@@ -4950,8 +4948,10 @@ static void tcp_new_space(struct sock *sk) ...@@ -4950,8 +4948,10 @@ static void tcp_new_space(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
if (tcp_should_expand_sndbuf(sk)) { if (tcp_should_expand_sndbuf(sk)) {
int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + int sndmem = SKB_TRUESIZE(max_t(u32,
MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); tp->rx_opt.mss_clamp,
tp->mss_cache) +
MAX_TCP_HEADER);
int demanded = max_t(unsigned int, tp->snd_cwnd, int demanded = max_t(unsigned int, tp->snd_cwnd,
tp->reordering + 1); tp->reordering + 1);
sndmem *= 2 * demanded; sndmem *= 2 * demanded;
......
...@@ -835,8 +835,7 @@ static int __net_init icmpv6_sk_init(struct net *net) ...@@ -835,8 +835,7 @@ static int __net_init icmpv6_sk_init(struct net *net)
/* Enough space for 2 64K ICMP packets, including /* Enough space for 2 64K ICMP packets, including
* sk_buff struct overhead. * sk_buff struct overhead.
*/ */
sk->sk_sndbuf = sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
(2 * ((64 * 1024) + sizeof(struct sk_buff)));
} }
return 0; return 0;
......
...@@ -1819,7 +1819,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) ...@@ -1819,7 +1819,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
goto save_message; goto save_message;
len = atomic_read(&sk->sk_rmem_alloc); len = atomic_read(&sk->sk_rmem_alloc);
len += iucv_msg_length(msg) + sizeof(struct sk_buff); len += SKB_TRUESIZE(iucv_msg_length(msg));
if (len > sk->sk_rcvbuf) if (len > sk->sk_rcvbuf)
goto save_message; goto save_message;
......
...@@ -1299,7 +1299,7 @@ SCTP_STATIC __init int sctp_init(void) ...@@ -1299,7 +1299,7 @@ SCTP_STATIC __init int sctp_init(void)
max_share = min(4UL*1024*1024, limit); max_share = min(4UL*1024*1024, limit);
sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */ sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */
sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1);
sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share);
sysctl_sctp_wmem[0] = SK_MEM_QUANTUM; sysctl_sctp_wmem[0] = SK_MEM_QUANTUM;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment