Commit f5769fae authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net: Namespace-ify sysctl_optmem_max

optmem_max being used in tx zerocopy,
we want to be able to control it on a netns basis.

Following patch changes two tests.

Tested:

oqq130:~# cat /proc/sys/net/core/optmem_max
131072
oqq130:~# echo 1000000 >/proc/sys/net/core/optmem_max
oqq130:~# cat /proc/sys/net/core/optmem_max
1000000
oqq130:~# unshare -n
oqq130:~# cat /proc/sys/net/core/optmem_max
131072
oqq130:~# exit
logout
oqq130:~# cat /proc/sys/net/core/optmem_max
1000000
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarWillem de Bruijn <willemb@google.com>
Acked-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 49445667
...@@ -13,6 +13,7 @@ struct netns_core { ...@@ -13,6 +13,7 @@ struct netns_core {
struct ctl_table_header *sysctl_hdr; struct ctl_table_header *sysctl_hdr;
int sysctl_somaxconn; int sysctl_somaxconn;
int sysctl_optmem_max;
u8 sysctl_txrehash; u8 sysctl_txrehash;
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
......
...@@ -2920,7 +2920,6 @@ extern __u32 sysctl_wmem_max; ...@@ -2920,7 +2920,6 @@ extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max; extern __u32 sysctl_rmem_max;
extern int sysctl_tstamp_allow_data; extern int sysctl_tstamp_allow_data;
extern int sysctl_optmem_max;
extern __u32 sysctl_wmem_default; extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default; extern __u32 sysctl_rmem_default;
......
...@@ -275,9 +275,10 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) ...@@ -275,9 +275,10 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap, static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap,
void *owner, u32 size) void *owner, u32 size)
{ {
int optmem_max = READ_ONCE(sysctl_optmem_max);
struct sock *sk = (struct sock *)owner; struct sock *sk = (struct sock *)owner;
int optmem_max;
optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
/* same check as in sock_kmalloc() */ /* same check as in sock_kmalloc() */
if (size <= optmem_max && if (size <= optmem_max &&
atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
......
...@@ -1219,8 +1219,8 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) ...@@ -1219,8 +1219,8 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
*/ */
static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp) static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{ {
int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
u32 filter_size = bpf_prog_size(fp->prog->len); u32 filter_size = bpf_prog_size(fp->prog->len);
int optmem_max = READ_ONCE(sysctl_optmem_max);
/* same check as in sock_kmalloc() */ /* same check as in sock_kmalloc() */
if (filter_size <= optmem_max && if (filter_size <= optmem_max &&
...@@ -1550,12 +1550,13 @@ EXPORT_SYMBOL_GPL(sk_attach_filter); ...@@ -1550,12 +1550,13 @@ EXPORT_SYMBOL_GPL(sk_attach_filter);
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{ {
struct bpf_prog *prog = __get_filter(fprog, sk); struct bpf_prog *prog = __get_filter(fprog, sk);
int err; int err, optmem_max;
if (IS_ERR(prog)) if (IS_ERR(prog))
return PTR_ERR(prog); return PTR_ERR(prog);
if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
if (bpf_prog_size(prog->len) > optmem_max)
err = -ENOMEM; err = -ENOMEM;
else else
err = reuseport_attach_prog(sk, prog); err = reuseport_attach_prog(sk, prog);
...@@ -1594,7 +1595,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) ...@@ -1594,7 +1595,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
{ {
struct bpf_prog *prog; struct bpf_prog *prog;
int err; int err, optmem_max;
if (sock_flag(sk, SOCK_FILTER_LOCKED)) if (sock_flag(sk, SOCK_FILTER_LOCKED))
return -EPERM; return -EPERM;
...@@ -1622,7 +1623,8 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) ...@@ -1622,7 +1623,8 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
} }
} else { } else {
/* BPF_PROG_TYPE_SOCKET_FILTER */ /* BPF_PROG_TYPE_SOCKET_FILTER */
if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) { optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
if (bpf_prog_size(prog->len) > optmem_max) {
err = -ENOMEM; err = -ENOMEM;
goto err_prog_put; goto err_prog_put;
} }
......
...@@ -372,6 +372,10 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) ...@@ -372,6 +372,10 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
static int __net_init net_defaults_init_net(struct net *net) static int __net_init net_defaults_init_net(struct net *net)
{ {
net->core.sysctl_somaxconn = SOMAXCONN; net->core.sysctl_somaxconn = SOMAXCONN;
/* Limits per socket sk_omem_alloc usage.
* TCP zerocopy regular usage needs 128 KB.
*/
net->core.sysctl_optmem_max = 128 * 1024;
net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED; net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
return 0; return 0;
......
...@@ -283,12 +283,6 @@ EXPORT_SYMBOL(sysctl_rmem_max); ...@@ -283,12 +283,6 @@ EXPORT_SYMBOL(sysctl_rmem_max);
__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
/* Limits per socket sk_omem_alloc usage.
* TCP zerocopy regular usage needs 128 KB.
*/
int sysctl_optmem_max __read_mostly = 128 * 1024;
EXPORT_SYMBOL(sysctl_optmem_max);
int sysctl_tstamp_allow_data __read_mostly = 1; int sysctl_tstamp_allow_data __read_mostly = 1;
DEFINE_STATIC_KEY_FALSE(memalloc_socks_key); DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
...@@ -2653,7 +2647,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, ...@@ -2653,7 +2647,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */ /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) > if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
READ_ONCE(sysctl_optmem_max)) READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
return NULL; return NULL;
skb = alloc_skb(size, priority); skb = alloc_skb(size, priority);
...@@ -2671,7 +2665,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, ...@@ -2671,7 +2665,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
*/ */
void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
{ {
int optmem_max = READ_ONCE(sysctl_optmem_max); int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
if ((unsigned int)size <= optmem_max && if ((unsigned int)size <= optmem_max &&
atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
......
...@@ -508,13 +508,6 @@ static struct ctl_table net_core_table[] = { ...@@ -508,13 +508,6 @@ static struct ctl_table net_core_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec, .proc_handler = proc_dointvec,
}, },
{
.procname = "optmem_max",
.data = &sysctl_optmem_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{ {
.procname = "tstamp_allow_data", .procname = "tstamp_allow_data",
.data = &sysctl_tstamp_allow_data, .data = &sysctl_tstamp_allow_data,
...@@ -673,6 +666,14 @@ static struct ctl_table netns_core_table[] = { ...@@ -673,6 +666,14 @@ static struct ctl_table netns_core_table[] = {
.extra1 = SYSCTL_ZERO, .extra1 = SYSCTL_ZERO,
.proc_handler = proc_dointvec_minmax .proc_handler = proc_dointvec_minmax
}, },
{
.procname = "optmem_max",
.data = &init_net.core.sysctl_optmem_max,
.maxlen = sizeof(int),
.mode = 0644,
.extra1 = SYSCTL_ZERO,
.proc_handler = proc_dointvec_minmax
},
{ {
.procname = "txrehash", .procname = "txrehash",
.data = &init_net.core.sysctl_txrehash, .data = &init_net.core.sysctl_txrehash,
......
...@@ -775,7 +775,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) ...@@ -775,7 +775,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
if (optlen < GROUP_FILTER_SIZE(0)) if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL; return -EINVAL;
if (optlen > READ_ONCE(sysctl_optmem_max)) if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
return -ENOBUFS; return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen); gsf = memdup_sockptr(optval, optlen);
...@@ -811,7 +811,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, ...@@ -811,7 +811,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < size0) if (optlen < size0)
return -EINVAL; return -EINVAL;
if (optlen > READ_ONCE(sysctl_optmem_max) - 4) if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max) - 4)
return -ENOBUFS; return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL); p = kmalloc(optlen + 4, GFP_KERNEL);
...@@ -1254,7 +1254,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, ...@@ -1254,7 +1254,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
if (optlen < IP_MSFILTER_SIZE(0)) if (optlen < IP_MSFILTER_SIZE(0))
goto e_inval; goto e_inval;
if (optlen > READ_ONCE(sysctl_optmem_max)) { if (optlen > READ_ONCE(net->core.sysctl_optmem_max)) {
err = -ENOBUFS; err = -ENOBUFS;
break; break;
} }
......
...@@ -210,7 +210,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, ...@@ -210,7 +210,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < GROUP_FILTER_SIZE(0)) if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL; return -EINVAL;
if (optlen > READ_ONCE(sysctl_optmem_max)) if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
return -ENOBUFS; return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen); gsf = memdup_sockptr(optval, optlen);
...@@ -244,7 +244,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, ...@@ -244,7 +244,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < size0) if (optlen < size0)
return -EINVAL; return -EINVAL;
if (optlen > READ_ONCE(sysctl_optmem_max) - 4) if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max) - 4)
return -ENOBUFS; return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL); p = kmalloc(optlen + 4, GFP_KERNEL);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment