Commit 4113f36b authored by David S. Miller's avatar David S. Miller

Merge branch 'net-revert-lib-percpu_counter-API-for-fragmentation-mem-accounting'

Jesper Dangaard Brouer says:

====================
net: revert lib/percpu_counter API for fragmentation mem accounting

There is a bug in fragmentation codes use of the percpu_counter API,
that can cause issues on systems with many CPUs, above 24 CPUs.

After much consideration and different attempts at solving the API
usage.  The conclusion is to revert to the simple atomic_t API instead.

The ratio between batch size and threshold size make it a bad use-case
for the lib/percpu_counter API.  As using the correct API calls will
unfortunately cause systems with many CPUs to always execute an
expensive sum across all CPUs. Plus the added complexity is not worth it.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 64327fc8 5a63643e
#ifndef __NET_FRAG_H__
#define __NET_FRAG_H__
#include <linux/percpu_counter.h>
struct netns_frags {
/* The percpu_counter "mem" need to be cacheline aligned.
* mem.count must not share cacheline with other writers
*/
struct percpu_counter mem ____cacheline_aligned_in_smp;
/* Keep atomic mem on separate cachelines in structs that include it */
atomic_t mem ____cacheline_aligned_in_smp;
/* sysctls */
int timeout;
int high_thresh;
......@@ -108,15 +103,10 @@ struct inet_frags {
int inet_frags_init(struct inet_frags *);
void inet_frags_fini(struct inet_frags *);
static inline int inet_frags_init_net(struct netns_frags *nf)
{
return percpu_counter_init(&nf->mem, 0, GFP_KERNEL);
}
static inline void inet_frags_uninit_net(struct netns_frags *nf)
static inline void inet_frags_init_net(struct netns_frags *nf)
{
percpu_counter_destroy(&nf->mem);
atomic_set(&nf->mem, 0);
}
void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
......@@ -140,31 +130,24 @@ static inline bool inet_frag_evicting(struct inet_frag_queue *q)
/* Memory Tracking Functions. */
/* The default percpu_counter batch size is not big enough to scale to
* fragmentation mem acct sizes.
* The mem size of a 64K fragment is approx:
* (44 fragments * 2944 truesize) + frag_queue struct(200) = 129736 bytes
*/
static unsigned int frag_percpu_counter_batch = 130000;
static inline int frag_mem_limit(struct netns_frags *nf)
{
return percpu_counter_read(&nf->mem);
return atomic_read(&nf->mem);
}
static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
{
percpu_counter_add_batch(&nf->mem, -i, frag_percpu_counter_batch);
atomic_sub(i, &nf->mem);
}
static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
{
percpu_counter_add_batch(&nf->mem, i, frag_percpu_counter_batch);
atomic_add(i, &nf->mem);
}
static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf)
static inline int sum_frag_mem_limit(struct netns_frags *nf)
{
return percpu_counter_sum_positive(&nf->mem);
return atomic_read(&nf->mem);
}
/* RFC 3168 support :
......
......@@ -580,19 +580,14 @@ static int __net_init lowpan_frags_init_net(struct net *net)
{
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
int res;
ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
res = inet_frags_init_net(&ieee802154_lowpan->frags);
if (res)
return res;
res = lowpan_frags_ns_sysctl_register(net);
if (res)
inet_frags_uninit_net(&ieee802154_lowpan->frags);
return res;
inet_frags_init_net(&ieee802154_lowpan->frags);
return lowpan_frags_ns_sysctl_register(net);
}
static void __net_exit lowpan_frags_exit_net(struct net *net)
......
......@@ -234,10 +234,8 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
cond_resched();
if (read_seqretry(&f->rnd_seqlock, seq) ||
percpu_counter_sum(&nf->mem))
sum_frag_mem_limit(nf))
goto evict_again;
percpu_counter_destroy(&nf->mem);
}
EXPORT_SYMBOL(inet_frags_exit_net);
......
......@@ -844,8 +844,6 @@ static void __init ip4_frags_ctl_register(void)
static int __net_init ipv4_frags_init_net(struct net *net)
{
int res;
/* Fragment cache limits.
*
* The fragment memory accounting code, (tries to) account for
......@@ -871,13 +869,9 @@ static int __net_init ipv4_frags_init_net(struct net *net)
net->ipv4.frags.max_dist = 64;
res = inet_frags_init_net(&net->ipv4.frags);
if (res)
return res;
res = ip4_frags_ns_ctl_register(net);
if (res)
inet_frags_uninit_net(&net->ipv4.frags);
return res;
inet_frags_init_net(&net->ipv4.frags);
return ip4_frags_ns_ctl_register(net);
}
static void __net_exit ipv4_frags_exit_net(struct net *net)
......
......@@ -622,18 +622,12 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
static int nf_ct_net_init(struct net *net)
{
int res;
net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
res = inet_frags_init_net(&net->nf_frag.frags);
if (res)
return res;
res = nf_ct_frag6_sysctl_register(net);
if (res)
inet_frags_uninit_net(&net->nf_frag.frags);
return res;
inet_frags_init_net(&net->nf_frag.frags);
return nf_ct_frag6_sysctl_register(net);
}
static void nf_ct_net_exit(struct net *net)
......
......@@ -714,19 +714,13 @@ static void ip6_frags_sysctl_unregister(void)
static int __net_init ipv6_frags_init_net(struct net *net)
{
int res;
net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
res = inet_frags_init_net(&net->ipv6.frags);
if (res)
return res;
res = ip6_frags_ns_sysctl_register(net);
if (res)
inet_frags_uninit_net(&net->ipv6.frags);
return res;
inet_frags_init_net(&net->ipv6.frags);
return ip6_frags_ns_sysctl_register(net);
}
static void __net_exit ipv6_frags_exit_net(struct net *net)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment