Commit 547b9ca8 authored by David S. Miller's avatar David S. Miller

Merge branch 'ip-sysctl-namespaceify'

Nikolay Borisov says:

====================
Namespacify various ip sysctl knobs

This series continues namespacifying more net related knobs.
The focus here is on ip options. Patches 1,3,4,5 namespacify
the respective sysctl knobs. Patch 2 moves some igmp code to the
correct file (and function) and also adds some #ifdef guards to
silence compilation warnings.

Finally, patch 5 exposes the ip fragmentation related sysctls
since all of the knobs are namespaced.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 6cd21d79 52a773d6
......@@ -13,6 +13,7 @@ struct netns_frags {
int timeout;
int high_thresh;
int low_thresh;
int max_dist;
};
/**
......
......@@ -245,12 +245,6 @@ extern int inet_peer_threshold;
extern int inet_peer_minttl;
extern int inet_peer_maxttl;
/* From ip_input.c */
extern int sysctl_ip_early_demux;
/* From ip_output.c */
extern int sysctl_ip_dynaddr;
void ipfrag_init(void);
void ip_static_sysctl_init(void);
......
......@@ -80,9 +80,13 @@ struct netns_ipv4 {
int sysctl_tcp_ecn;
int sysctl_tcp_ecn_fallback;
int sysctl_ip_default_ttl;
int sysctl_ip_no_pmtu_disc;
int sysctl_ip_fwd_use_pmtu;
int sysctl_ip_nonlocal_bind;
/* Shall we try to damage output packets if routing dev changes? */
int sysctl_ip_dynaddr;
int sysctl_ip_early_demux;
int sysctl_fwmark_reflect;
int sysctl_tcp_fwmark_accept;
......
......@@ -329,14 +329,13 @@ static inline int inet_iif(const struct sk_buff *skb)
return skb->skb_iif;
}
extern int sysctl_ip_default_ttl;
static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
{
int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
struct net *net = dev_net(dst->dev);
if (hoplimit == 0)
hoplimit = sysctl_ip_default_ttl;
hoplimit = net->ipv4.sysctl_ip_default_ttl;
return hoplimit;
}
......
......@@ -48,6 +48,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb,
struct iphdr *niph;
const struct tcphdr *oth;
struct tcphdr _oth;
struct net *net = sock_net(oldskb->sk);
if (!nft_bridge_iphdr_validate(oldskb))
return;
......@@ -63,9 +64,9 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb,
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
sysctl_ip_default_ttl);
net->ipv4.sysctl_ip_default_ttl);
nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
niph->ttl = sysctl_ip_default_ttl;
niph->ttl = net->ipv4.sysctl_ip_default_ttl;
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
......@@ -85,6 +86,7 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb,
void *payload;
__wsum csum;
u8 proto;
struct net *net = sock_net(oldskb->sk);
if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb))
return;
......@@ -119,7 +121,7 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb,
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP,
sysctl_ip_default_ttl);
net->ipv4.sysctl_ip_default_ttl);
skb_reset_transport_header(nskb);
icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr));
......
......@@ -1095,12 +1095,6 @@ void inet_unregister_protosw(struct inet_protosw *p)
}
EXPORT_SYMBOL(inet_unregister_protosw);
/*
* Shall we try to damage output packets if routing dev changes?
*/
int sysctl_ip_dynaddr __read_mostly;
static int inet_sk_reselect_saddr(struct sock *sk)
{
struct inet_sock *inet = inet_sk(sk);
......@@ -1131,7 +1125,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
if (new_saddr == old_saddr)
return 0;
if (sysctl_ip_dynaddr > 1) {
if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) {
pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
__func__, &old_saddr, &new_saddr);
}
......@@ -1186,7 +1180,7 @@ int inet_sk_rebuild_header(struct sock *sk)
* Other protocols have to map its equivalent state to TCP_SYN_SENT.
* DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
*/
if (!sysctl_ip_dynaddr ||
if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr ||
sk->sk_state != TCP_SYN_SENT ||
(sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
(err = inet_sk_reselect_saddr(sk)) != 0)
......
......@@ -1224,7 +1224,9 @@ static void igmp_group_dropped(struct ip_mc_list *im)
static void igmp_group_added(struct ip_mc_list *im)
{
struct in_device *in_dev = im->interface;
#ifdef CONFIG_IP_MULTICAST
struct net *net = dev_net(in_dev->dev);
#endif
if (im->loaded == 0) {
im->loaded = 1;
......@@ -1316,7 +1318,9 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
{
struct ip_mc_list *im;
#ifdef CONFIG_IP_MULTICAST
struct net *net = dev_net(in_dev->dev);
#endif
ASSERT_RTNL();
......@@ -1643,7 +1647,9 @@ void ip_mc_down(struct in_device *in_dev)
void ip_mc_init_dev(struct in_device *in_dev)
{
#ifdef CONFIG_IP_MULTICAST
struct net *net = dev_net(in_dev->dev);
#endif
ASSERT_RTNL();
#ifdef CONFIG_IP_MULTICAST
......@@ -1662,7 +1668,9 @@ void ip_mc_init_dev(struct in_device *in_dev)
void ip_mc_up(struct in_device *in_dev)
{
struct ip_mc_list *pmc;
#ifdef CONFIG_IP_MULTICAST
struct net *net = dev_net(in_dev->dev);
#endif
ASSERT_RTNL();
......@@ -2923,6 +2931,12 @@ static int __net_init igmp_net_init(struct net *net)
goto out_sock;
}
/* Sysctl initialization */
net->ipv4.sysctl_igmp_max_memberships = 20;
net->ipv4.sysctl_igmp_max_msf = 10;
/* IGMP reports for link-local multicast groups are enabled by default */
net->ipv4.sysctl_igmp_llm_reports = 1;
net->ipv4.sysctl_igmp_qrv = 2;
return 0;
out_sock:
......
......@@ -54,8 +54,6 @@
* code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
* as well. Or notify me, at least. --ANK
*/
static int sysctl_ipfrag_max_dist __read_mostly = 64;
static const char ip_frag_cache_name[] = "ip4-frags";
struct ipfrag_skb_cb
......@@ -150,7 +148,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
qp->daddr = arg->iph->daddr;
qp->vif = arg->vif;
qp->user = arg->user;
qp->peer = sysctl_ipfrag_max_dist ?
qp->peer = q->net->max_dist ?
inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) :
NULL;
}
......@@ -275,7 +273,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
static int ip_frag_too_far(struct ipq *qp)
{
struct inet_peer *peer = qp->peer;
unsigned int max = sysctl_ipfrag_max_dist;
unsigned int max = qp->q.net->max_dist;
unsigned int start, end;
int rc;
......@@ -749,6 +747,14 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "ipfrag_max_dist",
.data = &init_net.ipv4.frags.max_dist,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero
},
{ }
};
......@@ -762,14 +768,6 @@ static struct ctl_table ip4_frags_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "ipfrag_max_dist",
.data = &sysctl_ipfrag_max_dist,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero
},
{ }
};
......@@ -790,10 +788,7 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
table[1].data = &net->ipv4.frags.low_thresh;
table[1].extra2 = &net->ipv4.frags.high_thresh;
table[2].data = &net->ipv4.frags.timeout;
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
table[0].procname = NULL;
table[3].data = &net->ipv4.frags.max_dist;
}
hdr = register_net_sysctl(net, "net/ipv4", table);
......@@ -865,6 +860,8 @@ static int __net_init ipv4_frags_init_net(struct net *net)
*/
net->ipv4.frags.timeout = IP_FRAG_TIME;
net->ipv4.frags.max_dist = 64;
res = inet_frags_init_net(&net->ipv4.frags);
if (res)
return res;
......
......@@ -308,15 +308,12 @@ static inline bool ip_rcv_options(struct sk_buff *skb)
return true;
}
int sysctl_ip_early_demux __read_mostly = 1;
EXPORT_SYMBOL(sysctl_ip_early_demux);
static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
if (sysctl_ip_early_demux &&
if (net->ipv4.sysctl_ip_early_demux &&
!skb_dst(skb) &&
!skb->sk &&
!ip_is_fragment(iph)) {
......
......@@ -79,9 +79,6 @@
#include <linux/netlink.h>
#include <linux/tcp.h>
int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
EXPORT_SYMBOL(sysctl_ip_default_ttl);
static int
ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
unsigned int mtu,
......
......@@ -1341,10 +1341,13 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
val = inet->tos;
break;
case IP_TTL:
{
struct net *net = sock_net(sk);
val = (inet->uc_ttl == -1 ?
sysctl_ip_default_ttl :
net->ipv4.sysctl_ip_default_ttl :
inet->uc_ttl);
break;
}
case IP_HDRINCL:
val = inet->hdrincl;
break;
......
......@@ -21,6 +21,7 @@ static struct iphdr *
synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr)
{
struct iphdr *iph;
struct net *net = sock_net(skb->sk);
skb_reset_network_header(skb);
iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
......@@ -29,7 +30,7 @@ synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr)
iph->tos = 0;
iph->id = 0;
iph->frag_off = htons(IP_DF);
iph->ttl = sysctl_ip_default_ttl;
iph->ttl = net->ipv4.sysctl_ip_default_ttl;
iph->protocol = IPPROTO_TCP;
iph->check = 0;
iph->saddr = saddr;
......
......@@ -390,7 +390,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "\nIp: %d %d",
IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
sysctl_ip_default_ttl);
net->ipv4.sysctl_ip_default_ttl);
BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
......
......@@ -282,15 +282,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "ip_default_ttl",
.data = &sysctl_ip_default_ttl,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &ip_ttl_min,
.extra2 = &ip_ttl_max,
},
{
.procname = "tcp_max_orphans",
.data = &sysctl_tcp_max_orphans,
......@@ -305,20 +296,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "ip_early_demux",
.data = &sysctl_ip_early_demux,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "ip_dynaddr",
.data = &sysctl_ip_dynaddr,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_fastopen",
.data = &sysctl_tcp_fastopen,
......@@ -752,6 +729,29 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "ip_dynaddr",
.data = &init_net.ipv4.sysctl_ip_dynaddr,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "ip_early_demux",
.data = &init_net.ipv4.sysctl_ip_early_demux,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "ip_default_ttl",
.data = &init_net.ipv4.sysctl_ip_default_ttl,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &ip_ttl_min,
.extra2 = &ip_ttl_max,
},
{
.procname = "ip_local_port_range",
.maxlen = sizeof(init_net.ipv4.ip_local_ports.range),
......@@ -988,6 +988,10 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
if (!net->ipv4.sysctl_local_reserved_ports)
goto err_ports;
net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
net->ipv4.sysctl_ip_dynaddr = 0;
net->ipv4.sysctl_ip_early_demux = 1;
return 0;
err_ports:
......
......@@ -2399,12 +2399,6 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
net->ipv4.sysctl_igmp_max_memberships = 20;
net->ipv4.sysctl_igmp_max_msf = 10;
/* IGMP reports for link-local multicast groups are enabled by default */
net->ipv4.sysctl_igmp_llm_reports = 1;
net->ipv4.sysctl_igmp_qrv = 2;
return 0;
fail:
tcp_sk_exit(net);
......
......@@ -49,7 +49,7 @@
int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
const struct inet6_protocol *ipprot;
ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment