Commit bf277b0c authored by David S. Miller's avatar David S. Miller

Merge git://1984.lsi.us.es/nf-next

Pablo Neira Ayuso says:

====================
This is the first batch of Netfilter and IPVS updates for your
net-next tree. Mostly cleanups for the Netfilter side. They are:

* Remove unnecessary RTNL locking now that we have support
  for namespace in nf_conntrack, from Patrick McHardy.

* Cleanup to eliminate unnecessary goto in the initialization
  path of several Netfilter tables, from Jean Sacren.

* Another cleanup from Wu Fengguang, this time to PTR_RET instead
  of if IS_ERR then return PTR_ERR.

* Use list_for_each_entry_continue_rcu in nf_iterate, from
  Michael Wang.

* Add pmtu_disc sysctl option to disable PMTU in their tunneling
  transmitter, from Julian Anastasov.

* Generalize application protocol registration in IPVS and modify
  IPVS FTP helper to use it, from Julian Anastasov.

* update Kconfig. The IPVS FTP helper depends on the Netfilter FTP
  helper for NAT support, from Julian Anastasov.

* Add logic to update PMTU for IPIP packets in IPVS, again
  from Julian Anastasov.

* A couple of sparse warning fixes for IPVS and Netfilter from
  Claudiu Ghioc and Patrick McHardy respectively.

Patrick's IPv6 NAT changes will follow after this batch, I need
to flush this batch first before refreshing my tree.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents bba6ec7e 90efbed1
...@@ -808,8 +808,6 @@ struct netns_ipvs { ...@@ -808,8 +808,6 @@ struct netns_ipvs {
struct list_head rs_table[IP_VS_RTAB_SIZE]; struct list_head rs_table[IP_VS_RTAB_SIZE];
/* ip_vs_app */ /* ip_vs_app */
struct list_head app_list; struct list_head app_list;
/* ip_vs_ftp */
struct ip_vs_app *ftp_app;
/* ip_vs_proto */ /* ip_vs_proto */
#define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE]; struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
...@@ -890,6 +888,7 @@ struct netns_ipvs { ...@@ -890,6 +888,7 @@ struct netns_ipvs {
unsigned int sysctl_sync_refresh_period; unsigned int sysctl_sync_refresh_period;
int sysctl_sync_retries; int sysctl_sync_retries;
int sysctl_nat_icmp_send; int sysctl_nat_icmp_send;
int sysctl_pmtu_disc;
/* ip_vs_lblc */ /* ip_vs_lblc */
int sysctl_lblc_expiration; int sysctl_lblc_expiration;
...@@ -976,6 +975,11 @@ static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs) ...@@ -976,6 +975,11 @@ static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
return ipvs->sysctl_sync_sock_size; return ipvs->sysctl_sync_sock_size;
} }
static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
{
return ipvs->sysctl_pmtu_disc;
}
#else #else
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
...@@ -1018,6 +1022,11 @@ static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs) ...@@ -1018,6 +1022,11 @@ static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
return 0; return 0;
} }
static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
{
return 1;
}
#endif #endif
/* /*
...@@ -1179,7 +1188,8 @@ extern void ip_vs_service_net_cleanup(struct net *net); ...@@ -1179,7 +1188,8 @@ extern void ip_vs_service_net_cleanup(struct net *net);
* (from ip_vs_app.c) * (from ip_vs_app.c)
*/ */
#define IP_VS_APP_MAX_PORTS 8 #define IP_VS_APP_MAX_PORTS 8
extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app); extern struct ip_vs_app *register_ip_vs_app(struct net *net,
struct ip_vs_app *app);
extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app); extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp); extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
extern void ip_vs_unbind_app(struct ip_vs_conn *cp); extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
......
...@@ -100,9 +100,7 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { ...@@ -100,9 +100,7 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
static int __net_init frame_filter_net_init(struct net *net) static int __net_init frame_filter_net_init(struct net *net)
{ {
net->xt.frame_filter = ebt_register_table(net, &frame_filter); net->xt.frame_filter = ebt_register_table(net, &frame_filter);
if (IS_ERR(net->xt.frame_filter)) return PTR_RET(net->xt.frame_filter);
return PTR_ERR(net->xt.frame_filter);
return 0;
} }
static void __net_exit frame_filter_net_exit(struct net *net) static void __net_exit frame_filter_net_exit(struct net *net)
......
...@@ -100,9 +100,7 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { ...@@ -100,9 +100,7 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
static int __net_init frame_nat_net_init(struct net *net) static int __net_init frame_nat_net_init(struct net *net)
{ {
net->xt.frame_nat = ebt_register_table(net, &frame_nat); net->xt.frame_nat = ebt_register_table(net, &frame_nat);
if (IS_ERR(net->xt.frame_nat)) return PTR_RET(net->xt.frame_nat);
return PTR_ERR(net->xt.frame_nat);
return 0;
} }
static void __net_exit frame_nat_net_exit(struct net *net) static void __net_exit frame_nat_net_exit(struct net *net)
......
...@@ -69,9 +69,7 @@ static int __net_init iptable_filter_net_init(struct net *net) ...@@ -69,9 +69,7 @@ static int __net_init iptable_filter_net_init(struct net *net)
net->ipv4.iptable_filter = net->ipv4.iptable_filter =
ipt_register_table(net, &packet_filter, repl); ipt_register_table(net, &packet_filter, repl);
kfree(repl); kfree(repl);
if (IS_ERR(net->ipv4.iptable_filter)) return PTR_RET(net->ipv4.iptable_filter);
return PTR_ERR(net->ipv4.iptable_filter);
return 0;
} }
static void __net_exit iptable_filter_net_exit(struct net *net) static void __net_exit iptable_filter_net_exit(struct net *net)
...@@ -96,14 +94,10 @@ static int __init iptable_filter_init(void) ...@@ -96,14 +94,10 @@ static int __init iptable_filter_init(void)
filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook); filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook);
if (IS_ERR(filter_ops)) { if (IS_ERR(filter_ops)) {
ret = PTR_ERR(filter_ops); ret = PTR_ERR(filter_ops);
goto cleanup_table; unregister_pernet_subsys(&iptable_filter_net_ops);
} }
return ret; return ret;
cleanup_table:
unregister_pernet_subsys(&iptable_filter_net_ops);
return ret;
} }
static void __exit iptable_filter_fini(void) static void __exit iptable_filter_fini(void)
......
...@@ -104,9 +104,7 @@ static int __net_init iptable_mangle_net_init(struct net *net) ...@@ -104,9 +104,7 @@ static int __net_init iptable_mangle_net_init(struct net *net)
net->ipv4.iptable_mangle = net->ipv4.iptable_mangle =
ipt_register_table(net, &packet_mangler, repl); ipt_register_table(net, &packet_mangler, repl);
kfree(repl); kfree(repl);
if (IS_ERR(net->ipv4.iptable_mangle)) return PTR_RET(net->ipv4.iptable_mangle);
return PTR_ERR(net->ipv4.iptable_mangle);
return 0;
} }
static void __net_exit iptable_mangle_net_exit(struct net *net) static void __net_exit iptable_mangle_net_exit(struct net *net)
...@@ -131,14 +129,10 @@ static int __init iptable_mangle_init(void) ...@@ -131,14 +129,10 @@ static int __init iptable_mangle_init(void)
mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook); mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook);
if (IS_ERR(mangle_ops)) { if (IS_ERR(mangle_ops)) {
ret = PTR_ERR(mangle_ops); ret = PTR_ERR(mangle_ops);
goto cleanup_table; unregister_pernet_subsys(&iptable_mangle_net_ops);
} }
return ret; return ret;
cleanup_table:
unregister_pernet_subsys(&iptable_mangle_net_ops);
return ret;
} }
static void __exit iptable_mangle_fini(void) static void __exit iptable_mangle_fini(void)
......
...@@ -48,9 +48,7 @@ static int __net_init iptable_raw_net_init(struct net *net) ...@@ -48,9 +48,7 @@ static int __net_init iptable_raw_net_init(struct net *net)
net->ipv4.iptable_raw = net->ipv4.iptable_raw =
ipt_register_table(net, &packet_raw, repl); ipt_register_table(net, &packet_raw, repl);
kfree(repl); kfree(repl);
if (IS_ERR(net->ipv4.iptable_raw)) return PTR_RET(net->ipv4.iptable_raw);
return PTR_ERR(net->ipv4.iptable_raw);
return 0;
} }
static void __net_exit iptable_raw_net_exit(struct net *net) static void __net_exit iptable_raw_net_exit(struct net *net)
...@@ -75,14 +73,10 @@ static int __init iptable_raw_init(void) ...@@ -75,14 +73,10 @@ static int __init iptable_raw_init(void)
rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook); rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook);
if (IS_ERR(rawtable_ops)) { if (IS_ERR(rawtable_ops)) {
ret = PTR_ERR(rawtable_ops); ret = PTR_ERR(rawtable_ops);
goto cleanup_table; unregister_pernet_subsys(&iptable_raw_net_ops);
} }
return ret; return ret;
cleanup_table:
unregister_pernet_subsys(&iptable_raw_net_ops);
return ret;
} }
static void __exit iptable_raw_fini(void) static void __exit iptable_raw_fini(void)
......
...@@ -66,10 +66,7 @@ static int __net_init iptable_security_net_init(struct net *net) ...@@ -66,10 +66,7 @@ static int __net_init iptable_security_net_init(struct net *net)
net->ipv4.iptable_security = net->ipv4.iptable_security =
ipt_register_table(net, &security_table, repl); ipt_register_table(net, &security_table, repl);
kfree(repl); kfree(repl);
if (IS_ERR(net->ipv4.iptable_security)) return PTR_RET(net->ipv4.iptable_security);
return PTR_ERR(net->ipv4.iptable_security);
return 0;
} }
static void __net_exit iptable_security_net_exit(struct net *net) static void __net_exit iptable_security_net_exit(struct net *net)
......
...@@ -61,9 +61,7 @@ static int __net_init ip6table_filter_net_init(struct net *net) ...@@ -61,9 +61,7 @@ static int __net_init ip6table_filter_net_init(struct net *net)
net->ipv6.ip6table_filter = net->ipv6.ip6table_filter =
ip6t_register_table(net, &packet_filter, repl); ip6t_register_table(net, &packet_filter, repl);
kfree(repl); kfree(repl);
if (IS_ERR(net->ipv6.ip6table_filter)) return PTR_RET(net->ipv6.ip6table_filter);
return PTR_ERR(net->ipv6.ip6table_filter);
return 0;
} }
static void __net_exit ip6table_filter_net_exit(struct net *net) static void __net_exit ip6table_filter_net_exit(struct net *net)
......
...@@ -97,9 +97,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net) ...@@ -97,9 +97,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net)
net->ipv6.ip6table_mangle = net->ipv6.ip6table_mangle =
ip6t_register_table(net, &packet_mangler, repl); ip6t_register_table(net, &packet_mangler, repl);
kfree(repl); kfree(repl);
if (IS_ERR(net->ipv6.ip6table_mangle)) return PTR_RET(net->ipv6.ip6table_mangle);
return PTR_ERR(net->ipv6.ip6table_mangle);
return 0;
} }
static void __net_exit ip6table_mangle_net_exit(struct net *net) static void __net_exit ip6table_mangle_net_exit(struct net *net)
......
...@@ -40,9 +40,7 @@ static int __net_init ip6table_raw_net_init(struct net *net) ...@@ -40,9 +40,7 @@ static int __net_init ip6table_raw_net_init(struct net *net)
net->ipv6.ip6table_raw = net->ipv6.ip6table_raw =
ip6t_register_table(net, &packet_raw, repl); ip6t_register_table(net, &packet_raw, repl);
kfree(repl); kfree(repl);
if (IS_ERR(net->ipv6.ip6table_raw)) return PTR_RET(net->ipv6.ip6table_raw);
return PTR_ERR(net->ipv6.ip6table_raw);
return 0;
} }
static void __net_exit ip6table_raw_net_exit(struct net *net) static void __net_exit ip6table_raw_net_exit(struct net *net)
......
...@@ -58,10 +58,7 @@ static int __net_init ip6table_security_net_init(struct net *net) ...@@ -58,10 +58,7 @@ static int __net_init ip6table_security_net_init(struct net *net)
net->ipv6.ip6table_security = net->ipv6.ip6table_security =
ip6t_register_table(net, &security_table, repl); ip6t_register_table(net, &security_table, repl);
kfree(repl); kfree(repl);
if (IS_ERR(net->ipv6.ip6table_security)) return PTR_RET(net->ipv6.ip6table_security);
return PTR_ERR(net->ipv6.ip6table_security);
return 0;
} }
static void __net_exit ip6table_security_net_exit(struct net *net) static void __net_exit ip6table_security_net_exit(struct net *net)
......
...@@ -131,14 +131,13 @@ unsigned int nf_iterate(struct list_head *head, ...@@ -131,14 +131,13 @@ unsigned int nf_iterate(struct list_head *head,
int hook_thresh) int hook_thresh)
{ {
unsigned int verdict; unsigned int verdict;
struct nf_hook_ops *elem = list_entry_rcu(*i, struct nf_hook_ops, list);
/* /*
* The caller must not block between calls to this * The caller must not block between calls to this
* function because of risk of continuing from deleted element. * function because of risk of continuing from deleted element.
*/ */
list_for_each_continue_rcu(*i, head) { list_for_each_entry_continue_rcu(elem, head, list) {
struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
if (hook_thresh > elem->priority) if (hook_thresh > elem->priority)
continue; continue;
...@@ -155,11 +154,14 @@ unsigned int nf_iterate(struct list_head *head, ...@@ -155,11 +154,14 @@ unsigned int nf_iterate(struct list_head *head,
continue; continue;
} }
#endif #endif
if (verdict != NF_REPEAT) if (verdict != NF_REPEAT) {
*i = &elem->list;
return verdict; return verdict;
}
goto repeat; goto repeat;
} }
} }
*i = &elem->list;
return NF_ACCEPT; return NF_ACCEPT;
} }
......
...@@ -250,7 +250,8 @@ comment 'IPVS application helper' ...@@ -250,7 +250,8 @@ comment 'IPVS application helper'
config IP_VS_FTP config IP_VS_FTP
tristate "FTP protocol helper" tristate "FTP protocol helper"
depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT && \
NF_CONNTRACK_FTP
select IP_VS_NFCT select IP_VS_NFCT
---help--- ---help---
FTP is a protocol that transfers IP address and/or port number in FTP is a protocol that transfers IP address and/or port number in
......
...@@ -180,22 +180,38 @@ register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, ...@@ -180,22 +180,38 @@ register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
} }
/* /* Register application for netns */
* ip_vs_app registration routine struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app)
*/
int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
{ {
struct netns_ipvs *ipvs = net_ipvs(net); struct netns_ipvs *ipvs = net_ipvs(net);
/* increase the module use count */ struct ip_vs_app *a;
ip_vs_use_count_inc(); int err = 0;
if (!ipvs)
return ERR_PTR(-ENOENT);
mutex_lock(&__ip_vs_app_mutex); mutex_lock(&__ip_vs_app_mutex);
list_add(&app->a_list, &ipvs->app_list); list_for_each_entry(a, &ipvs->app_list, a_list) {
if (!strcmp(app->name, a->name)) {
err = -EEXIST;
goto out_unlock;
}
}
a = kmemdup(app, sizeof(*app), GFP_KERNEL);
if (!a) {
err = -ENOMEM;
goto out_unlock;
}
INIT_LIST_HEAD(&a->incs_list);
list_add(&a->a_list, &ipvs->app_list);
/* increase the module use count */
ip_vs_use_count_inc();
out_unlock:
mutex_unlock(&__ip_vs_app_mutex); mutex_unlock(&__ip_vs_app_mutex);
return 0; return err ? ERR_PTR(err) : a;
} }
...@@ -205,20 +221,29 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app) ...@@ -205,20 +221,29 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
*/ */
void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
{ {
struct ip_vs_app *inc, *nxt; struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_app *a, *anxt, *inc, *nxt;
if (!ipvs)
return;
mutex_lock(&__ip_vs_app_mutex); mutex_lock(&__ip_vs_app_mutex);
list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
if (app && strcmp(app->name, a->name))
continue;
list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
ip_vs_app_inc_release(net, inc); ip_vs_app_inc_release(net, inc);
} }
list_del(&app->a_list); list_del(&a->a_list);
kfree(a);
mutex_unlock(&__ip_vs_app_mutex);
/* decrease the module use count */ /* decrease the module use count */
ip_vs_use_count_dec(); ip_vs_use_count_dec();
}
mutex_unlock(&__ip_vs_app_mutex);
} }
...@@ -586,5 +611,6 @@ int __net_init ip_vs_app_net_init(struct net *net) ...@@ -586,5 +611,6 @@ int __net_init ip_vs_app_net_init(struct net *net)
void __net_exit ip_vs_app_net_cleanup(struct net *net) void __net_exit ip_vs_app_net_cleanup(struct net *net)
{ {
unregister_ip_vs_app(net, NULL /* all */);
proc_net_remove(net, "ip_vs_app"); proc_net_remove(net, "ip_vs_app");
} }
...@@ -1303,7 +1303,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ...@@ -1303,7 +1303,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
struct ip_vs_protocol *pp; struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd; struct ip_vs_proto_data *pd;
unsigned int offset, ihl, verdict; unsigned int offset, offset2, ihl, verdict;
bool ipip;
*related = 1; *related = 1;
...@@ -1345,6 +1346,21 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ...@@ -1345,6 +1346,21 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
net = skb_net(skb); net = skb_net(skb);
/* Special case for errors for IPIP packets */
ipip = false;
if (cih->protocol == IPPROTO_IPIP) {
if (unlikely(cih->frag_off & htons(IP_OFFSET)))
return NF_ACCEPT;
/* Error for our IPIP must arrive at LOCAL_IN */
if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL))
return NF_ACCEPT;
offset += cih->ihl * 4;
cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
ipip = true;
}
pd = ip_vs_proto_data_get(net, cih->protocol); pd = ip_vs_proto_data_get(net, cih->protocol);
if (!pd) if (!pd)
return NF_ACCEPT; return NF_ACCEPT;
...@@ -1358,11 +1374,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ...@@ -1358,11 +1374,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
"Checking incoming ICMP for"); "Checking incoming ICMP for");
offset2 = offset;
offset += cih->ihl * 4; offset += cih->ihl * 4;
ip_vs_fill_iphdr(AF_INET, cih, &ciph); ip_vs_fill_iphdr(AF_INET, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */ /* The embedded headers contain source and dest in reverse order.
cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1); * For IPIP this is error for request, not for reply.
*/
cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, ipip ? 0 : 1);
if (!cp) if (!cp)
return NF_ACCEPT; return NF_ACCEPT;
...@@ -1376,6 +1395,57 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ...@@ -1376,6 +1395,57 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
goto out; goto out;
} }
if (ipip) {
__be32 info = ic->un.gateway;
/* Update the MTU */
if (ic->type == ICMP_DEST_UNREACH &&
ic->code == ICMP_FRAG_NEEDED) {
struct ip_vs_dest *dest = cp->dest;
u32 mtu = ntohs(ic->un.frag.mtu);
/* Strip outer IP and ICMP, go to IPIP header */
__skb_pull(skb, ihl + sizeof(_icmph));
offset2 -= ihl + sizeof(_icmph);
skb_reset_network_header(skb);
IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu);
rcu_read_lock();
ipv4_update_pmtu(skb, dev_net(skb->dev),
mtu, 0, 0, 0, 0);
rcu_read_unlock();
/* Client uses PMTUD? */
if (!(cih->frag_off & htons(IP_DF)))
goto ignore_ipip;
/* Prefer the resulting PMTU */
if (dest) {
spin_lock(&dest->dst_lock);
if (dest->dst_cache)
mtu = dst_mtu(dest->dst_cache);
spin_unlock(&dest->dst_lock);
}
if (mtu > 68 + sizeof(struct iphdr))
mtu -= sizeof(struct iphdr);
info = htonl(mtu);
}
/* Strip outer IP, ICMP and IPIP, go to IP header of
* original request.
*/
__skb_pull(skb, offset2);
skb_reset_network_header(skb);
IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n",
&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
ic->type, ic->code, ntohl(info));
icmp_send(skb, ic->type, ic->code, info);
/* ICMP can be shorter but anyways, account it */
ip_vs_out_stats(cp, skb);
ignore_ipip:
consume_skb(skb);
verdict = NF_STOLEN;
goto out;
}
/* do the statistics and put it back */ /* do the statistics and put it back */
ip_vs_in_stats(cp, skb); ip_vs_in_stats(cp, skb);
if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
......
...@@ -1801,6 +1801,12 @@ static struct ctl_table vs_vars[] = { ...@@ -1801,6 +1801,12 @@ static struct ctl_table vs_vars[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec, .proc_handler = proc_dointvec,
}, },
{
.procname = "pmtu_disc",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
#ifdef CONFIG_IP_VS_DEBUG #ifdef CONFIG_IP_VS_DEBUG
{ {
.procname = "debug_level", .procname = "debug_level",
...@@ -3676,7 +3682,7 @@ static void ip_vs_genl_unregister(void) ...@@ -3676,7 +3682,7 @@ static void ip_vs_genl_unregister(void)
* per netns intit/exit func. * per netns intit/exit func.
*/ */
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
int __net_init ip_vs_control_net_init_sysctl(struct net *net) static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
{ {
int idx; int idx;
struct netns_ipvs *ipvs = net_ipvs(net); struct netns_ipvs *ipvs = net_ipvs(net);
...@@ -3727,6 +3733,8 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net) ...@@ -3727,6 +3733,8 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3); ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
tbl[idx++].data = &ipvs->sysctl_sync_retries; tbl[idx++].data = &ipvs->sysctl_sync_retries;
tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
ipvs->sysctl_pmtu_disc = 1;
tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
...@@ -3744,7 +3752,7 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net) ...@@ -3744,7 +3752,7 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
return 0; return 0;
} }
void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
{ {
struct netns_ipvs *ipvs = net_ipvs(net); struct netns_ipvs *ipvs = net_ipvs(net);
...@@ -3755,8 +3763,8 @@ void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) ...@@ -3755,8 +3763,8 @@ void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
#else #else
int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { } static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
#endif #endif
......
...@@ -441,16 +441,10 @@ static int __net_init __ip_vs_ftp_init(struct net *net) ...@@ -441,16 +441,10 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
if (!ipvs) if (!ipvs)
return -ENOENT; return -ENOENT;
app = kmemdup(&ip_vs_ftp, sizeof(struct ip_vs_app), GFP_KERNEL);
if (!app) app = register_ip_vs_app(net, &ip_vs_ftp);
return -ENOMEM; if (IS_ERR(app))
INIT_LIST_HEAD(&app->a_list); return PTR_ERR(app);
INIT_LIST_HEAD(&app->incs_list);
ipvs->ftp_app = app;
ret = register_ip_vs_app(net, app);
if (ret)
goto err_exit;
for (i = 0; i < ports_count; i++) { for (i = 0; i < ports_count; i++) {
if (!ports[i]) if (!ports[i])
...@@ -464,9 +458,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net) ...@@ -464,9 +458,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
return 0; return 0;
err_unreg: err_unreg:
unregister_ip_vs_app(net, app); unregister_ip_vs_app(net, &ip_vs_ftp);
err_exit:
kfree(ipvs->ftp_app);
return ret; return ret;
} }
/* /*
...@@ -474,10 +466,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net) ...@@ -474,10 +466,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
*/ */
static void __ip_vs_ftp_exit(struct net *net) static void __ip_vs_ftp_exit(struct net *net)
{ {
struct netns_ipvs *ipvs = net_ipvs(net); unregister_ip_vs_app(net, &ip_vs_ftp);
unregister_ip_vs_app(net, ipvs->ftp_app);
kfree(ipvs->ftp_app);
} }
static struct pernet_operations ip_vs_ftp_ops = { static struct pernet_operations ip_vs_ftp_ops = {
......
...@@ -49,6 +49,7 @@ enum { ...@@ -49,6 +49,7 @@ enum {
IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to
* local * local
*/ */
IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
}; };
/* /*
...@@ -84,6 +85,42 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) ...@@ -84,6 +85,42 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
return dst; return dst;
} }
/* Get route to daddr, update *saddr, optionally bind route to saddr */
static struct rtable *do_output_route4(struct net *net, __be32 daddr,
u32 rtos, int rt_mode, __be32 *saddr)
{
struct flowi4 fl4;
struct rtable *rt;
int loop = 0;
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = daddr;
fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
fl4.flowi4_tos = rtos;
retry:
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt)) {
/* Invalid saddr ? */
if (PTR_ERR(rt) == -EINVAL && *saddr &&
rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
*saddr = 0;
flowi4_update_output(&fl4, 0, rtos, daddr, 0);
goto retry;
}
IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
return NULL;
} else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
ip_rt_put(rt);
*saddr = fl4.saddr;
flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr);
loop++;
goto retry;
}
*saddr = fl4.saddr;
return rt;
}
/* Get route to destination or remote server */ /* Get route to destination or remote server */
static struct rtable * static struct rtable *
__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
...@@ -98,20 +135,13 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, ...@@ -98,20 +135,13 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
spin_lock(&dest->dst_lock); spin_lock(&dest->dst_lock);
if (!(rt = (struct rtable *) if (!(rt = (struct rtable *)
__ip_vs_dst_check(dest, rtos))) { __ip_vs_dst_check(dest, rtos))) {
struct flowi4 fl4; rt = do_output_route4(net, dest->addr.ip, rtos,
rt_mode, &dest->dst_saddr.ip);
memset(&fl4, 0, sizeof(fl4)); if (!rt) {
fl4.daddr = dest->addr.ip;
fl4.flowi4_tos = rtos;
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt)) {
spin_unlock(&dest->dst_lock); spin_unlock(&dest->dst_lock);
IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
&dest->addr.ip);
return NULL; return NULL;
} }
__ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
dest->dst_saddr.ip = fl4.saddr;
IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, " IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
"rtos=%X\n", "rtos=%X\n",
&dest->addr.ip, &dest->dst_saddr.ip, &dest->addr.ip, &dest->dst_saddr.ip,
...@@ -122,19 +152,17 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, ...@@ -122,19 +152,17 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
*ret_saddr = dest->dst_saddr.ip; *ret_saddr = dest->dst_saddr.ip;
spin_unlock(&dest->dst_lock); spin_unlock(&dest->dst_lock);
} else { } else {
struct flowi4 fl4; __be32 saddr = htonl(INADDR_ANY);
memset(&fl4, 0, sizeof(fl4)); /* For such unconfigured boxes avoid many route lookups
fl4.daddr = daddr; * for performance reasons because we do not remember saddr
fl4.flowi4_tos = rtos; */
rt = ip_route_output_key(net, &fl4); rt_mode &= ~IP_VS_RT_MODE_CONNECT;
if (IS_ERR(rt)) { rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr);
IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", if (!rt)
&daddr);
return NULL; return NULL;
}
if (ret_saddr) if (ret_saddr)
*ret_saddr = fl4.saddr; *ret_saddr = saddr;
} }
local = rt->rt_flags & RTCF_LOCAL; local = rt->rt_flags & RTCF_LOCAL;
...@@ -331,6 +359,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) ...@@ -331,6 +359,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
old_dst = dest->dst_cache; old_dst = dest->dst_cache;
dest->dst_cache = NULL; dest->dst_cache = NULL;
dst_release(old_dst); dst_release(old_dst);
dest->dst_saddr.ip = 0;
} }
#define IP_VS_XMIT_TUNNEL(skb, cp) \ #define IP_VS_XMIT_TUNNEL(skb, cp) \
...@@ -766,12 +795,13 @@ int ...@@ -766,12 +795,13 @@ int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp) struct ip_vs_protocol *pp)
{ {
struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
struct rtable *rt; /* Route to the other host */ struct rtable *rt; /* Route to the other host */
__be32 saddr; /* Source for tunnel */ __be32 saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */ struct net_device *tdev; /* Device to other host */
struct iphdr *old_iph = ip_hdr(skb); struct iphdr *old_iph = ip_hdr(skb);
u8 tos = old_iph->tos; u8 tos = old_iph->tos;
__be16 df = old_iph->frag_off; __be16 df;
struct iphdr *iph; /* Our new IP header */ struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */ unsigned int max_headroom; /* The extra header space needed */
int mtu; int mtu;
...@@ -781,7 +811,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -781,7 +811,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
RT_TOS(tos), IP_VS_RT_MODE_LOCAL | RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL, IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_CONNECT,
&saddr))) &saddr)))
goto tx_error_icmp; goto tx_error_icmp;
if (rt->rt_flags & RTCF_LOCAL) { if (rt->rt_flags & RTCF_LOCAL) {
...@@ -796,13 +827,13 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -796,13 +827,13 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
goto tx_error_put; goto tx_error_put;
} }
if (skb_dst(skb)) if (rt_is_output_route(skb_rtable(skb)))
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
df |= (old_iph->frag_off & htons(IP_DF)); /* Copy DF, reset fragment offset and MF */
df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
if ((old_iph->frag_off & htons(IP_DF) && if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL("%s(): frag needed\n", __func__); IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error_put; goto tx_error_put;
......
...@@ -21,7 +21,6 @@ ...@@ -21,7 +21,6 @@
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_l3proto.h>
...@@ -294,9 +293,7 @@ void nf_conntrack_l3proto_unregister(struct net *net, ...@@ -294,9 +293,7 @@ void nf_conntrack_l3proto_unregister(struct net *net,
nf_ct_l3proto_unregister_sysctl(net, proto); nf_ct_l3proto_unregister_sysctl(net, proto);
/* Remove all contrack entries for this protocol */ /* Remove all contrack entries for this protocol */
rtnl_lock();
nf_ct_iterate_cleanup(net, kill_l3proto, proto); nf_ct_iterate_cleanup(net, kill_l3proto, proto);
rtnl_unlock();
} }
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
...@@ -502,9 +499,7 @@ void nf_conntrack_l4proto_unregister(struct net *net, ...@@ -502,9 +499,7 @@ void nf_conntrack_l4proto_unregister(struct net *net,
nf_ct_l4proto_unregister_sysctl(net, pn, l4proto); nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
/* Remove all contrack entries for this protocol */ /* Remove all contrack entries for this protocol */
rtnl_lock();
nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
rtnl_unlock();
} }
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
......
...@@ -79,11 +79,11 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, ...@@ -79,11 +79,11 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
if (tb[NFACCT_BYTES]) { if (tb[NFACCT_BYTES]) {
atomic64_set(&nfacct->bytes, atomic64_set(&nfacct->bytes,
be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES]))); be64_to_cpu(nla_get_be64(tb[NFACCT_BYTES])));
} }
if (tb[NFACCT_PKTS]) { if (tb[NFACCT_PKTS]) {
atomic64_set(&nfacct->pkts, atomic64_set(&nfacct->pkts,
be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS]))); be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS])));
} }
atomic_set(&nfacct->refcnt, 1); atomic_set(&nfacct->refcnt, 1);
list_add_tail_rcu(&nfacct->head, &nfnl_acct_list); list_add_tail_rcu(&nfacct->head, &nfnl_acct_list);
......
...@@ -74,7 +74,7 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, ...@@ -74,7 +74,7 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM])
return -EINVAL; return -EINVAL;
tuple->src.l3num = ntohs(nla_get_u16(tb[NFCTH_TUPLE_L3PROTONUM])); tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM]));
tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]);
return 0; return 0;
......
...@@ -43,7 +43,7 @@ static u32 hash_v4(const struct sk_buff *skb) ...@@ -43,7 +43,7 @@ static u32 hash_v4(const struct sk_buff *skb)
const struct iphdr *iph = ip_hdr(skb); const struct iphdr *iph = ip_hdr(skb);
/* packets in either direction go into same queue */ /* packets in either direction go into same queue */
if (iph->saddr < iph->daddr) if ((__force u32)iph->saddr < (__force u32)iph->daddr)
return jhash_3words((__force u32)iph->saddr, return jhash_3words((__force u32)iph->saddr,
(__force u32)iph->daddr, iph->protocol, jhash_initval); (__force u32)iph->daddr, iph->protocol, jhash_initval);
...@@ -57,7 +57,8 @@ static u32 hash_v6(const struct sk_buff *skb) ...@@ -57,7 +57,8 @@ static u32 hash_v6(const struct sk_buff *skb)
const struct ipv6hdr *ip6h = ipv6_hdr(skb); const struct ipv6hdr *ip6h = ipv6_hdr(skb);
u32 a, b, c; u32 a, b, c;
if (ip6h->saddr.s6_addr32[3] < ip6h->daddr.s6_addr32[3]) { if ((__force u32)ip6h->saddr.s6_addr32[3] <
(__force u32)ip6h->daddr.s6_addr32[3]) {
a = (__force u32) ip6h->saddr.s6_addr32[3]; a = (__force u32) ip6h->saddr.s6_addr32[3];
b = (__force u32) ip6h->daddr.s6_addr32[3]; b = (__force u32) ip6h->daddr.s6_addr32[3];
} else { } else {
...@@ -65,7 +66,8 @@ static u32 hash_v6(const struct sk_buff *skb) ...@@ -65,7 +66,8 @@ static u32 hash_v6(const struct sk_buff *skb)
a = (__force u32) ip6h->daddr.s6_addr32[3]; a = (__force u32) ip6h->daddr.s6_addr32[3];
} }
if (ip6h->saddr.s6_addr32[1] < ip6h->daddr.s6_addr32[1]) if ((__force u32)ip6h->saddr.s6_addr32[1] <
(__force u32)ip6h->daddr.s6_addr32[1])
c = (__force u32) ip6h->saddr.s6_addr32[1]; c = (__force u32) ip6h->saddr.s6_addr32[1];
else else
c = (__force u32) ip6h->daddr.s6_addr32[1]; c = (__force u32) ip6h->daddr.s6_addr32[1];
......
...@@ -269,7 +269,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) ...@@ -269,7 +269,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
mss <<= 8; mss <<= 8;
mss |= optp[2]; mss |= optp[2];
mss = ntohs(mss); mss = ntohs((__force __be16)mss);
break; break;
case OSFOPT_TS: case OSFOPT_TS:
loop_cont = 1; loop_cont = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment