Commit 49d16b23 authored by Andy Zhou's avatar Andy Zhou Committed by David S. Miller

bridge_netfilter: No ICMP packet on IPv4 fragmentation error

When bridge netfilter re-fragments an IP packet for output, all
packets that can not be re-fragmented to their original input size
should be silently discarded.

However, current bridge netfilter output path generates an ICMP packet
with 'size exceeded MTU' message for such packets, this is a bug.

This patch refactors the ip_fragment() API to allow two separate
use cases. The bridge netfilter user case will not
send ICMP, the routing output will, as before.
Signed-off-by: default avatarAndy Zhou <azhou@nicira.com>
Acked-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 8bc04864
...@@ -108,8 +108,8 @@ int ip_local_deliver(struct sk_buff *skb); ...@@ -108,8 +108,8 @@ int ip_local_deliver(struct sk_buff *skb);
int ip_mr_input(struct sk_buff *skb); int ip_mr_input(struct sk_buff *skb);
int ip_output(struct sock *sk, struct sk_buff *skb); int ip_output(struct sock *sk, struct sk_buff *skb);
int ip_mc_output(struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct sock *sk, struct sk_buff *skb);
int ip_fragment(struct sock *sk, struct sk_buff *skb, int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *)); int (*output)(struct sock *, struct sk_buff *));
int ip_do_nat(struct sk_buff *skb); int ip_do_nat(struct sk_buff *skb);
void ip_send_check(struct iphdr *ip); void ip_send_check(struct iphdr *ip);
int __ip_local_out(struct sk_buff *skb); int __ip_local_out(struct sk_buff *skb);
......
...@@ -853,6 +853,25 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) ...@@ -853,6 +853,25 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
return br_dev_queue_push_xmit(sk, skb); return br_dev_queue_push_xmit(sk, skb);
} }
static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *))
{
unsigned int mtu = ip_skb_dst_mtu(skb);
struct iphdr *iph = ip_hdr(skb);
struct rtable *rt = skb_rtable(skb);
struct net_device *dev = rt->dst.dev;
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
(IPCB(skb)->frag_max_size &&
IPCB(skb)->frag_max_size > mtu))) {
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
return ip_do_fragment(sk, skb, output);
}
static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
{ {
int ret; int ret;
...@@ -886,7 +905,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) ...@@ -886,7 +905,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
skb_copy_from_linear_data_offset(skb, -data->size, data->mac, skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
data->size); data->size);
ret = ip_fragment(sk, skb, br_nf_push_frag_xmit); ret = br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit);
} else { } else {
nf_bridge_info_free(skb); nf_bridge_info_free(skb);
ret = br_dev_queue_push_xmit(sk, skb); ret = br_dev_queue_push_xmit(sk, skb);
......
...@@ -83,6 +83,9 @@ ...@@ -83,6 +83,9 @@
int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
EXPORT_SYMBOL(sysctl_ip_default_ttl); EXPORT_SYMBOL(sysctl_ip_default_ttl);
static int ip_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *));
/* Generate a checksum for an outgoing IP datagram. */ /* Generate a checksum for an outgoing IP datagram. */
void ip_send_check(struct iphdr *iph) void ip_send_check(struct iphdr *iph)
{ {
...@@ -478,6 +481,28 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) ...@@ -478,6 +481,28 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from); skb_copy_secmark(to, from);
} }
static int ip_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *))
{
struct iphdr *iph = ip_hdr(skb);
unsigned int mtu = ip_skb_dst_mtu(skb);
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
(IPCB(skb)->frag_max_size &&
IPCB(skb)->frag_max_size > mtu))) {
struct rtable *rt = skb_rtable(skb);
struct net_device *dev = rt->dst.dev;
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
kfree_skb(skb);
return -EMSGSIZE;
}
return ip_do_fragment(sk, skb, output);
}
/* /*
* This IP datagram is too large to be sent in one piece. Break it up into * This IP datagram is too large to be sent in one piece. Break it up into
* smaller pieces (each of size equal to IP header plus * smaller pieces (each of size equal to IP header plus
...@@ -485,8 +510,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) ...@@ -485,8 +510,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
* single device frame, and queue such a frame for sending. * single device frame, and queue such a frame for sending.
*/ */
int ip_fragment(struct sock *sk, struct sk_buff *skb, int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *)) int (*output)(struct sock *, struct sk_buff *))
{ {
struct iphdr *iph; struct iphdr *iph;
int ptr; int ptr;
...@@ -507,15 +532,6 @@ int ip_fragment(struct sock *sk, struct sk_buff *skb, ...@@ -507,15 +532,6 @@ int ip_fragment(struct sock *sk, struct sk_buff *skb,
iph = ip_hdr(skb); iph = ip_hdr(skb);
mtu = ip_skb_dst_mtu(skb); mtu = ip_skb_dst_mtu(skb);
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
(IPCB(skb)->frag_max_size &&
IPCB(skb)->frag_max_size > mtu))) {
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
kfree_skb(skb);
return -EMSGSIZE;
}
/* /*
* Setup starting values. * Setup starting values.
...@@ -751,7 +767,7 @@ int ip_fragment(struct sock *sk, struct sk_buff *skb, ...@@ -751,7 +767,7 @@ int ip_fragment(struct sock *sk, struct sk_buff *skb,
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
return err; return err;
} }
EXPORT_SYMBOL(ip_fragment); EXPORT_SYMBOL(ip_do_fragment);
int int
ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment