Commit ba804bb4 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

 1) The per-network-namespace loopback device, and thus its namespace,
    can have its teardown deferred for a long time if a kernel created
    TCP socket closes and the namespace is exiting meanwhile. The kernel
    keeps trying to finish the close sequence until it times out (which
    takes quite some time).

    Fix this by forcing the socket closed in this situation, from Dan
    Streetman.

 2) Fix regression where we're trying to invoke the update_pmtu method
    on route types (in this case metadata tunnel routes) that don't
    implement the dst_ops method. Fix from Nicolas Dichtel.

 3) Fix long standing memory corruption issues in r8169 driver by
    performing the chip statistics DMA programming more correctly. From
    Francois Romieu.

 4) Handle local broadcast sends over VRF routes properly, from David
    Ahern.

 5) Don't refire the DCCP CCID2 timer endlessly, otherwise the socket
    can never be released. From Alexey Kodanev.

 6) Set poll flags properly in VSOCK protocol layer, from Stefan
    Hajnoczi.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net:
  VSOCK: set POLLOUT | POLLWRNORM for TCP_CLOSING
  dccp: don't restart ccid2_hc_tx_rto_expire() if sk in closed state
  net: vrf: Add support for sends to local broadcast address
  r8169: fix memory corruption on retrieval of hardware statistics.
  net: don't call update_pmtu unconditionally
  net: tcp: close sock if net namespace is exiting
parents db218549 ba3169fc
...@@ -1456,8 +1456,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, ...@@ -1456,8 +1456,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_dev_priv *priv = ipoib_priv(dev);
int e = skb_queue_empty(&priv->cm.skb_queue); int e = skb_queue_empty(&priv->cm.skb_queue);
if (skb_dst(skb)) skb_dst_update_pmtu(skb, mtu);
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
skb_queue_tail(&priv->cm.skb_queue, skb); skb_queue_tail(&priv->cm.skb_queue, skb);
if (e) if (e)
......
...@@ -2244,19 +2244,14 @@ static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd) ...@@ -2244,19 +2244,14 @@ static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd)
void __iomem *ioaddr = tp->mmio_addr; void __iomem *ioaddr = tp->mmio_addr;
dma_addr_t paddr = tp->counters_phys_addr; dma_addr_t paddr = tp->counters_phys_addr;
u32 cmd; u32 cmd;
bool ret;
RTL_W32(CounterAddrHigh, (u64)paddr >> 32); RTL_W32(CounterAddrHigh, (u64)paddr >> 32);
RTL_R32(CounterAddrHigh);
cmd = (u64)paddr & DMA_BIT_MASK(32); cmd = (u64)paddr & DMA_BIT_MASK(32);
RTL_W32(CounterAddrLow, cmd); RTL_W32(CounterAddrLow, cmd);
RTL_W32(CounterAddrLow, cmd | counter_cmd); RTL_W32(CounterAddrLow, cmd | counter_cmd);
ret = rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000); return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
RTL_W32(CounterAddrLow, 0);
RTL_W32(CounterAddrHigh, 0);
return ret;
} }
static bool rtl8169_reset_counters(struct net_device *dev) static bool rtl8169_reset_counters(struct net_device *dev)
......
...@@ -829,7 +829,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, ...@@ -829,7 +829,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) - int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) -
GENEVE_BASE_HLEN - info->options_len - 14; GENEVE_BASE_HLEN - info->options_len - 14;
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); skb_dst_update_pmtu(skb, mtu);
} }
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
...@@ -875,7 +875,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, ...@@ -875,7 +875,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) - int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) -
GENEVE_BASE_HLEN - info->options_len - 14; GENEVE_BASE_HLEN - info->options_len - 14;
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); skb_dst_update_pmtu(skb, mtu);
} }
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
......
...@@ -673,8 +673,9 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, ...@@ -673,8 +673,9 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
struct sock *sk, struct sock *sk,
struct sk_buff *skb) struct sk_buff *skb)
{ {
/* don't divert multicast */ /* don't divert multicast or local broadcast */
if (ipv4_is_multicast(ip_hdr(skb)->daddr)) if (ipv4_is_multicast(ip_hdr(skb)->daddr) ||
ipv4_is_lbcast(ip_hdr(skb)->daddr))
return skb; return skb;
if (qdisc_tx_is_default(vrf_dev)) if (qdisc_tx_is_default(vrf_dev))
......
...@@ -2158,8 +2158,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -2158,8 +2158,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (skb_dst(skb)) { if (skb_dst(skb)) {
int mtu = dst_mtu(ndst) - VXLAN_HEADROOM; int mtu = dst_mtu(ndst) - VXLAN_HEADROOM;
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb_dst_update_pmtu(skb, mtu);
skb, mtu);
} }
tos = ip_tunnel_ecn_encap(tos, old_iph, skb); tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
...@@ -2200,8 +2199,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -2200,8 +2199,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (skb_dst(skb)) { if (skb_dst(skb)) {
int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM; int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM;
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb_dst_update_pmtu(skb, mtu);
skb, mtu);
} }
tos = ip_tunnel_ecn_encap(tos, old_iph, skb); tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
......
...@@ -521,4 +521,12 @@ static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) ...@@ -521,4 +521,12 @@ static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst)
} }
#endif #endif
static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu)
{
struct dst_entry *dst = skb_dst(skb);
if (dst && dst->ops->update_pmtu)
dst->ops->update_pmtu(dst, NULL, skb, mtu);
}
#endif /* _NET_DST_H */ #endif /* _NET_DST_H */
...@@ -223,6 +223,11 @@ int net_eq(const struct net *net1, const struct net *net2) ...@@ -223,6 +223,11 @@ int net_eq(const struct net *net1, const struct net *net2)
return net1 == net2; return net1 == net2;
} }
static inline int check_net(const struct net *net)
{
return atomic_read(&net->count) != 0;
}
void net_drop_ns(void *); void net_drop_ns(void *);
#else #else
...@@ -247,6 +252,11 @@ int net_eq(const struct net *net1, const struct net *net2) ...@@ -247,6 +252,11 @@ int net_eq(const struct net *net1, const struct net *net2)
return 1; return 1;
} }
static inline int check_net(const struct net *net)
{
return 1;
}
#define net_drop_ns NULL #define net_drop_ns NULL
#endif #endif
......
...@@ -140,6 +140,9 @@ static void ccid2_hc_tx_rto_expire(struct timer_list *t) ...@@ -140,6 +140,9 @@ static void ccid2_hc_tx_rto_expire(struct timer_list *t)
ccid2_pr_debug("RTO_EXPIRE\n"); ccid2_pr_debug("RTO_EXPIRE\n");
if (sk->sk_state == DCCP_CLOSED)
goto out;
/* back-off timer */ /* back-off timer */
hc->tx_rto <<= 1; hc->tx_rto <<= 1;
if (hc->tx_rto > DCCP_RTO_MAX) if (hc->tx_rto > DCCP_RTO_MAX)
......
...@@ -520,8 +520,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, ...@@ -520,8 +520,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
else else
mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
if (skb_dst(skb)) skb_dst_update_pmtu(skb, mtu);
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
if (skb->protocol == htons(ETH_P_IP)) { if (skb->protocol == htons(ETH_P_IP)) {
if (!skb_is_gso(skb) && if (!skb_is_gso(skb) &&
......
...@@ -200,7 +200,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, ...@@ -200,7 +200,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
mtu = dst_mtu(dst); mtu = dst_mtu(dst);
if (skb->len > mtu) { if (skb->len > mtu) {
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); skb_dst_update_pmtu(skb, mtu);
if (skb->protocol == htons(ETH_P_IP)) { if (skb->protocol == htons(ETH_P_IP)) {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu)); htonl(mtu));
......
...@@ -2298,6 +2298,9 @@ void tcp_close(struct sock *sk, long timeout) ...@@ -2298,6 +2298,9 @@ void tcp_close(struct sock *sk, long timeout)
tcp_send_active_reset(sk, GFP_ATOMIC); tcp_send_active_reset(sk, GFP_ATOMIC);
__NET_INC_STATS(sock_net(sk), __NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONMEMORY); LINUX_MIB_TCPABORTONMEMORY);
} else if (!check_net(sock_net(sk))) {
/* Not possible to send reset; just close */
tcp_set_state(sk, TCP_CLOSE);
} }
} }
......
...@@ -48,11 +48,19 @@ static void tcp_write_err(struct sock *sk) ...@@ -48,11 +48,19 @@ static void tcp_write_err(struct sock *sk)
* to prevent DoS attacks. It is called when a retransmission timeout * to prevent DoS attacks. It is called when a retransmission timeout
* or zero probe timeout occurs on orphaned socket. * or zero probe timeout occurs on orphaned socket.
* *
* Also close if our net namespace is exiting; in that case there is no
* hope of ever communicating again since all netns interfaces are already
* down (or about to be down), and we need to release our dst references,
* which have been moved to the netns loopback interface, so the namespace
* can finish exiting. This condition is only possible if we are a kernel
* socket, as those do not hold references to the namespace.
*
* Criteria is still not confirmed experimentally and may change. * Criteria is still not confirmed experimentally and may change.
* We kill the socket, if: * We kill the socket, if:
* 1. If number of orphaned sockets exceeds an administratively configured * 1. If number of orphaned sockets exceeds an administratively configured
* limit. * limit.
* 2. If we have strong memory pressure. * 2. If we have strong memory pressure.
* 3. If our net namespace is exiting.
*/ */
static int tcp_out_of_resources(struct sock *sk, bool do_reset) static int tcp_out_of_resources(struct sock *sk, bool do_reset)
{ {
...@@ -81,6 +89,13 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) ...@@ -81,6 +89,13 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
return 1; return 1;
} }
if (!check_net(sock_net(sk))) {
/* Not possible to send reset; just close */
tcp_done(sk);
return 1;
}
return 0; return 0;
} }
......
...@@ -642,8 +642,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ...@@ -642,8 +642,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rel_info > dst_mtu(skb_dst(skb2))) if (rel_info > dst_mtu(skb_dst(skb2)))
goto out; goto out;
skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, skb_dst_update_pmtu(skb2, rel_info);
rel_info);
} }
icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
...@@ -1131,8 +1130,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, ...@@ -1131,8 +1130,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
mtu = 576; mtu = 576;
} }
if (skb_dst(skb) && !t->parms.collect_md) skb_dst_update_pmtu(skb, mtu);
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
*pmtu = mtu; *pmtu = mtu;
err = -EMSGSIZE; err = -EMSGSIZE;
......
...@@ -483,7 +483,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) ...@@ -483,7 +483,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
mtu = dst_mtu(dst); mtu = dst_mtu(dst);
if (!skb->ignore_df && skb->len > mtu) { if (!skb->ignore_df && skb->len > mtu) {
skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu); skb_dst_update_pmtu(skb, mtu);
if (skb->protocol == htons(ETH_P_IPV6)) { if (skb->protocol == htons(ETH_P_IPV6)) {
if (mtu < IPV6_MIN_MTU) if (mtu < IPV6_MIN_MTU)
......
...@@ -934,8 +934,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, ...@@ -934,8 +934,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
df = 0; df = 0;
} }
if (tunnel->parms.iph.daddr && skb_dst(skb)) if (tunnel->parms.iph.daddr)
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); skb_dst_update_pmtu(skb, mtu);
if (skb->len > mtu && !skb_is_gso(skb)) { if (skb->len > mtu && !skb_is_gso(skb)) {
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
......
...@@ -951,7 +951,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, ...@@ -951,7 +951,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
* POLLOUT|POLLWRNORM when peer is closed and nothing to read, * POLLOUT|POLLWRNORM when peer is closed and nothing to read,
* but local send is not shutdown. * but local send is not shutdown.
*/ */
if (sk->sk_state == TCP_CLOSE) { if (sk->sk_state == TCP_CLOSE || sk->sk_state == TCP_CLOSING) {
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) if (!(sk->sk_shutdown & SEND_SHUTDOWN))
mask |= POLLOUT | POLLWRNORM; mask |= POLLOUT | POLLWRNORM;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment