Commit 8f18e4d0 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

 1) Various ipvlan fixes from Eric Dumazet and Mahesh Bandewar.

    The most important is to not assume the packet is RX just because
    the destination address matches that of the device. Such an
    assumption causes problems when an interface is put into loopback
    mode.

 2) If we retry when creating a new tc entry (because we dropped the
    RTNL mutex in order to load a module, for example) we end up with
    -EAGAIN and then loop trying to replay the request. But we didn't
    reset some state when looping back to the top like this, and if
    another thread meanwhile inserted the same tc entry we were trying
    to, we re-link it creating an enless loop in the tc chain. Fix from
    Daniel Borkmann.

 3) There are two different WRITE bits in the MDIO address register for
    the stmmac chip, depending upon the chip variant. Due to a bug we
    could set them both, fix from Hock Leong Kweh.

 4) Fix mlx4 bug in XDP_TX handling, from Tariq Toukan.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net:
  net: stmmac: fix incorrect bit set in gmac4 mdio addr register
  r8169: add support for RTL8168 series add-on card.
  net: xdp: remove unused bfp_warn_invalid_xdp_buffer()
  openvswitch: upcall: Fix vlan handling.
  ipv4: Namespaceify tcp_tw_reuse knob
  net: korina: Fix NAPI versus resources freeing
  net, sched: fix soft lockup in tc_classify
  net/mlx4_en: Fix user prio field in XDP forward
  tipc: don't send FIN message from connectionless socket
  ipvlan: fix multicast processing
  ipvlan: fix various issues in ipvlan_process_multicast()
parents 0dad3a30 5799fc90
......@@ -900,10 +900,10 @@ static void korina_restart_task(struct work_struct *work)
DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR,
&lp->rx_dma_regs->dmasm);
korina_free_ring(dev);
napi_disable(&lp->napi);
korina_free_ring(dev);
if (korina_init(dev) < 0) {
printk(KERN_ERR "%s: cannot restart device\n", dev->name);
return;
......@@ -1064,12 +1064,12 @@ static int korina_close(struct net_device *dev)
tmp = tmp | DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR;
writel(tmp, &lp->rx_dma_regs->dmasm);
korina_free_ring(dev);
napi_disable(&lp->napi);
cancel_work_sync(&lp->restart_task);
korina_free_ring(dev);
free_irq(lp->rx_irq, dev);
free_irq(lp->tx_irq, dev);
free_irq(lp->ovr_irq, dev);
......
......@@ -1638,7 +1638,8 @@ int mlx4_en_start_port(struct net_device *dev)
/* Configure tx cq's and rings */
for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) {
u8 num_tx_rings_p_up = t == TX ? priv->num_tx_rings_p_up : 1;
u8 num_tx_rings_p_up = t == TX ?
priv->num_tx_rings_p_up : priv->tx_ring_num[t];
for (i = 0; i < priv->tx_ring_num[t]; i++) {
/* Configure cq */
......
......@@ -326,6 +326,7 @@ enum cfg_version {
static const struct pci_device_id rtl8169_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8129), 0, 0, RTL_CFG_0 },
{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8136), 0, 0, RTL_CFG_2 },
{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8161), 0, 0, RTL_CFG_1 },
{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8167), 0, 0, RTL_CFG_0 },
{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 },
{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 },
......
......@@ -116,7 +116,7 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
unsigned int mii_address = priv->hw->mii.addr;
unsigned int mii_data = priv->hw->mii.data;
u32 value = MII_WRITE | MII_BUSY;
u32 value = MII_BUSY;
value |= (phyaddr << priv->hw->mii.addr_shift)
& priv->hw->mii.addr_mask;
......@@ -126,6 +126,8 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
& priv->hw->mii.clk_csr_mask;
if (priv->plat->has_gmac4)
value |= MII_GMAC4_WRITE;
else
value |= MII_WRITE;
/* Wait until any existing MII operation is complete */
if (stmmac_mdio_busy_wait(priv->ioaddr, mii_address))
......
......@@ -99,6 +99,11 @@ struct ipvl_port {
int count;
};
struct ipvl_skb_cb {
bool tx_pkt;
};
#define IPVL_SKB_CB(_skb) ((struct ipvl_skb_cb *)&((_skb)->cb[0]))
static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d)
{
return rcu_dereference(d->rx_handler_data);
......
......@@ -198,7 +198,7 @@ void ipvlan_process_multicast(struct work_struct *work)
unsigned int mac_hash;
int ret;
u8 pkt_type;
bool hlocal, dlocal;
bool tx_pkt;
__skb_queue_head_init(&list);
......@@ -207,8 +207,11 @@ void ipvlan_process_multicast(struct work_struct *work)
spin_unlock_bh(&port->backlog.lock);
while ((skb = __skb_dequeue(&list)) != NULL) {
struct net_device *dev = skb->dev;
bool consumed = false;
ethh = eth_hdr(skb);
hlocal = ether_addr_equal(ethh->h_source, port->dev->dev_addr);
tx_pkt = IPVL_SKB_CB(skb)->tx_pkt;
mac_hash = ipvlan_mac_hash(ethh->h_dest);
if (ether_addr_equal(ethh->h_dest, port->dev->broadcast))
......@@ -216,41 +219,45 @@ void ipvlan_process_multicast(struct work_struct *work)
else
pkt_type = PACKET_MULTICAST;
dlocal = false;
rcu_read_lock();
list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
if (hlocal && (ipvlan->dev == skb->dev)) {
dlocal = true;
if (tx_pkt && (ipvlan->dev == skb->dev))
continue;
}
if (!test_bit(mac_hash, ipvlan->mac_filters))
continue;
if (!(ipvlan->dev->flags & IFF_UP))
continue;
ret = NET_RX_DROP;
len = skb->len + ETH_HLEN;
nskb = skb_clone(skb, GFP_ATOMIC);
if (!nskb)
goto acct;
nskb->pkt_type = pkt_type;
nskb->dev = ipvlan->dev;
if (hlocal)
ret = dev_forward_skb(ipvlan->dev, nskb);
else
ret = netif_rx(nskb);
acct:
local_bh_disable();
if (nskb) {
consumed = true;
nskb->pkt_type = pkt_type;
nskb->dev = ipvlan->dev;
if (tx_pkt)
ret = dev_forward_skb(ipvlan->dev, nskb);
else
ret = netif_rx(nskb);
}
ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
local_bh_enable();
}
rcu_read_unlock();
if (dlocal) {
if (tx_pkt) {
/* If the packet originated here, send it out. */
skb->dev = port->dev;
skb->pkt_type = pkt_type;
dev_queue_xmit(skb);
} else {
kfree_skb(skb);
if (consumed)
consume_skb(skb);
else
kfree_skb(skb);
}
if (dev)
dev_put(dev);
}
}
......@@ -470,15 +477,24 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
}
static void ipvlan_multicast_enqueue(struct ipvl_port *port,
struct sk_buff *skb)
struct sk_buff *skb, bool tx_pkt)
{
if (skb->protocol == htons(ETH_P_PAUSE)) {
kfree_skb(skb);
return;
}
/* Record that the deferred packet is from TX or RX path. By
* looking at mac-addresses on packet will lead to erronus decisions.
* (This would be true for a loopback-mode on master device or a
* hair-pin mode of the switch.)
*/
IPVL_SKB_CB(skb)->tx_pkt = tx_pkt;
spin_lock(&port->backlog.lock);
if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) {
if (skb->dev)
dev_hold(skb->dev);
__skb_queue_tail(&port->backlog, skb);
spin_unlock(&port->backlog.lock);
schedule_work(&port->wq);
......@@ -537,7 +553,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
} else if (is_multicast_ether_addr(eth->h_dest)) {
ipvlan_skb_crossing_ns(skb, NULL);
ipvlan_multicast_enqueue(ipvlan->port, skb);
ipvlan_multicast_enqueue(ipvlan->port, skb, true);
return NET_XMIT_SUCCESS;
}
......@@ -634,7 +650,7 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
*/
if (nskb) {
ipvlan_skb_crossing_ns(nskb, NULL);
ipvlan_multicast_enqueue(port, nskb);
ipvlan_multicast_enqueue(port, nskb, false);
}
}
} else {
......
......@@ -135,6 +135,7 @@ static int ipvlan_port_create(struct net_device *dev)
static void ipvlan_port_destroy(struct net_device *dev)
{
struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
struct sk_buff *skb;
dev->priv_flags &= ~IFF_IPVLAN_MASTER;
if (port->mode == IPVLAN_MODE_L3S) {
......@@ -144,7 +145,11 @@ static void ipvlan_port_destroy(struct net_device *dev)
}
netdev_rx_handler_unregister(dev);
cancel_work_sync(&port->wq);
__skb_queue_purge(&port->backlog);
while ((skb = __skb_dequeue(&port->backlog)) != NULL) {
if (skb->dev)
dev_put(skb->dev);
kfree_skb(skb);
}
kfree(port);
}
......
......@@ -610,7 +610,6 @@ bool bpf_helper_changes_pkt_data(void *func);
struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
const struct bpf_insn *patch, u32 len);
void bpf_warn_invalid_xdp_action(u32 act);
void bpf_warn_invalid_xdp_buffer(void);
#ifdef CONFIG_BPF_JIT
extern int bpf_jit_enable;
......
......@@ -110,6 +110,7 @@ struct netns_ipv4 {
int sysctl_tcp_orphan_retries;
int sysctl_tcp_fin_timeout;
unsigned int sysctl_tcp_notsent_lowat;
int sysctl_tcp_tw_reuse;
int sysctl_igmp_max_memberships;
int sysctl_igmp_max_msf;
......
......@@ -252,7 +252,6 @@ extern int sysctl_tcp_wmem[3];
extern int sysctl_tcp_rmem[3];
extern int sysctl_tcp_app_win;
extern int sysctl_tcp_adv_win_scale;
extern int sysctl_tcp_tw_reuse;
extern int sysctl_tcp_frto;
extern int sysctl_tcp_low_latency;
extern int sysctl_tcp_nometrics_save;
......
......@@ -2972,12 +2972,6 @@ void bpf_warn_invalid_xdp_action(u32 act)
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
void bpf_warn_invalid_xdp_buffer(void)
{
WARN_ONCE(1, "Illegal XDP buffer encountered, expect throughput degradation\n");
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_buffer);
static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
int src_reg, int ctx_off,
struct bpf_insn *insn_buf,
......
......@@ -432,13 +432,6 @@ static struct ctl_table ipv4_table[] = {
.extra1 = &tcp_adv_win_scale_min,
.extra2 = &tcp_adv_win_scale_max,
},
{
.procname = "tcp_tw_reuse",
.data = &sysctl_tcp_tw_reuse,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_frto",
.data = &sysctl_tcp_frto,
......@@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_tw_reuse",
.data = &init_net.ipv4.sysctl_tcp_tw_reuse,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
#ifdef CONFIG_IP_ROUTE_MULTIPATH
{
.procname = "fib_multipath_use_neigh",
......
......@@ -84,7 +84,6 @@
#include <crypto/hash.h>
#include <linux/scatterlist.h>
int sysctl_tcp_tw_reuse __read_mostly;
int sysctl_tcp_low_latency __read_mostly;
#ifdef CONFIG_TCP_MD5SIG
......@@ -120,7 +119,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
and use initial timestamp retrieved from peer table.
*/
if (tcptw->tw_ts_recent_stamp &&
(!twp || (sysctl_tcp_tw_reuse &&
(!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&
get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
if (tp->write_seq == 0)
......@@ -2456,6 +2455,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_orphan_retries = 0;
net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
net->ipv4.sysctl_tcp_tw_reuse = 0;
return 0;
fail:
......
......@@ -606,7 +606,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
rcu_assign_pointer(flow->sf_acts, acts);
packet->priority = flow->key.phy.priority;
packet->mark = flow->key.phy.skb_mark;
packet->protocol = flow->key.eth.type;
rcu_read_lock();
dp = get_dp_rcu(net, ovs_header->dp_ifindex);
......
......@@ -312,7 +312,8 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
* Returns 0 if it encounters a non-vlan or incomplete packet.
* Returns 1 after successfully parsing vlan tag.
*/
static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh,
bool untag_vlan)
{
struct vlan_head *vh = (struct vlan_head *)skb->data;
......@@ -330,7 +331,20 @@ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT);
key_vh->tpid = vh->tpid;
__skb_pull(skb, sizeof(struct vlan_head));
if (unlikely(untag_vlan)) {
int offset = skb->data - skb_mac_header(skb);
u16 tci;
int err;
__skb_push(skb, offset);
err = __skb_vlan_pop(skb, &tci);
__skb_pull(skb, offset);
if (err)
return err;
__vlan_hwaccel_put_tag(skb, key_vh->tpid, tci);
} else {
__skb_pull(skb, sizeof(struct vlan_head));
}
return 1;
}
......@@ -351,13 +365,13 @@ static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
key->eth.vlan.tpid = skb->vlan_proto;
} else {
/* Parse outer vlan tag in the non-accelerated case. */
res = parse_vlan_tag(skb, &key->eth.vlan);
res = parse_vlan_tag(skb, &key->eth.vlan, true);
if (res <= 0)
return res;
}
/* Parse inner vlan tag. */
res = parse_vlan_tag(skb, &key->eth.cvlan);
res = parse_vlan_tag(skb, &key->eth.cvlan, false);
if (res <= 0)
return res;
......@@ -800,29 +814,15 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
if (err)
return err;
if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
/* key_extract assumes that skb->protocol is set-up for
* layer 3 packets which is the case for other callers,
* in particular packets recieved from the network stack.
* Here the correct value can be set from the metadata
* extracted above.
*/
skb->protocol = key->eth.type;
} else {
struct ethhdr *eth;
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
/* Normally, setting the skb 'protocol' field would be
* handled by a call to eth_type_trans(), but it assumes
* there's a sending device, which we may not have.
*/
if (eth_proto_is_802_3(eth->h_proto))
skb->protocol = eth->h_proto;
else
skb->protocol = htons(ETH_P_802_2);
}
/* key_extract assumes that skb->protocol is set-up for
* layer 3 packets which is the case for other callers,
* in particular packets received from the network stack.
* Here the correct value can be set from the metadata
* extracted above.
* For L2 packet key eth type would be zero. skb protocol
* would be set to correct value later during key-extact.
*/
skb->protocol = key->eth.type;
return key_extract(skb, key);
}
......@@ -148,13 +148,15 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
unsigned long cl;
unsigned long fh;
int err;
int tp_created = 0;
int tp_created;
if ((n->nlmsg_type != RTM_GETTFILTER) &&
!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
replay:
tp_created = 0;
err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
if (err < 0)
return err;
......
......@@ -441,15 +441,19 @@ static void __tipc_shutdown(struct socket *sock, int error)
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
if (TIPC_SKB_CB(skb)->bytes_read) {
kfree_skb(skb);
} else {
if (!tipc_sk_type_connectionless(sk) &&
sk->sk_state != TIPC_DISCONNECTING) {
tipc_set_sk_state(sk, TIPC_DISCONNECTING);
tipc_node_remove_conn(net, dnode, tsk->portid);
}
tipc_sk_respond(sk, skb, error);
continue;
}
if (!tipc_sk_type_connectionless(sk) &&
sk->sk_state != TIPC_DISCONNECTING) {
tipc_set_sk_state(sk, TIPC_DISCONNECTING);
tipc_node_remove_conn(net, dnode, tsk->portid);
}
tipc_sk_respond(sk, skb, error);
}
if (tipc_sk_type_connectionless(sk))
return;
if (sk->sk_state != TIPC_DISCONNECTING) {
skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
......@@ -457,10 +461,8 @@ static void __tipc_shutdown(struct socket *sock, int error)
tsk->portid, error);
if (skb)
tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
if (!tipc_sk_type_connectionless(sk)) {
tipc_node_remove_conn(net, dnode, tsk->portid);
tipc_set_sk_state(sk, TIPC_DISCONNECTING);
}
tipc_node_remove_conn(net, dnode, tsk->portid);
tipc_set_sk_state(sk, TIPC_DISCONNECTING);
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment