Commit 40d9018e authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

 1) BPF tail call handling bug fixes from Daniel Borkmann.

 2) Fix allowance of too many rx queues in sfc driver, from Bert
    Kenward.

 3) Non-loopback ipv6 packets claiming src of ::1 should be dropped,
    from Florian Westphal.

 4) Statistics requests on KSZ9031 can crash, fix from Grygorii
    Strashko.

 5) TX ring handling fixes in mediatek driver, from Sean Wang.

 6) ip_ra_control can deadlock, fix lock acquisition ordering to fix,
    from Cong WANG.

 7) Fix use after free in ip_recv_error(), from Willem de Buijn.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net:
  bpf: fix checking xdp_adjust_head on tail calls
  bpf: fix cb access in socket filter programs on tail calls
  ipv6: drop non loopback packets claiming to originate from ::1
  net: ethernet: mediatek: fix inconsistency of port number carried in TXD
  net: ethernet: mediatek: fix inconsistency between TXD and the used buffer
  net: phy: micrel: fix crash when statistic requested for KSZ9031 phy
  net: vrf: Fix setting NLM_F_EXCL flag when adding l3mdev rule
  net: thunderx: Fix set_max_bgx_per_node for 81xx rgx
  net-timestamp: avoid use-after-free in ip_recv_error
  ipv4: fix a deadlock in ip_ra_control
  sfc: limit the number of receive queues
parents fb5e2154 acf167f3
......@@ -134,6 +134,7 @@ static void set_max_bgx_per_node(struct pci_dev *pdev)
pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid);
switch (sdevid) {
case PCI_SUBSYS_DEVID_81XX_BGX:
case PCI_SUBSYS_DEVID_81XX_RGX:
max_bgx_per_node = MAX_BGX_PER_CN81XX;
break;
case PCI_SUBSYS_DEVID_83XX_BGX:
......
......@@ -16,6 +16,7 @@
/* Subsystem device IDs */
#define PCI_SUBSYS_DEVID_88XX_BGX 0xA126
#define PCI_SUBSYS_DEVID_81XX_BGX 0xA226
#define PCI_SUBSYS_DEVID_81XX_RGX 0xA254
#define PCI_SUBSYS_DEVID_83XX_BGX 0xA326
#define MAX_BGX_THUNDER 8 /* Max 2 nodes, 4 per node */
......
......@@ -613,7 +613,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
struct mtk_mac *mac = netdev_priv(dev);
struct mtk_eth *eth = mac->hw;
struct mtk_tx_dma *itxd, *txd;
struct mtk_tx_buf *tx_buf;
struct mtk_tx_buf *itx_buf, *tx_buf;
dma_addr_t mapped_addr;
unsigned int nr_frags;
int i, n_desc = 1;
......@@ -627,8 +627,8 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
fport = (mac->id + 1) << TX_DMA_FPORT_SHIFT;
txd4 |= fport;
tx_buf = mtk_desc_to_tx_buf(ring, itxd);
memset(tx_buf, 0, sizeof(*tx_buf));
itx_buf = mtk_desc_to_tx_buf(ring, itxd);
memset(itx_buf, 0, sizeof(*itx_buf));
if (gso)
txd4 |= TX_DMA_TSO;
......@@ -647,9 +647,11 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
return -ENOMEM;
WRITE_ONCE(itxd->txd1, mapped_addr);
tx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb));
itx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
itx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
MTK_TX_FLAGS_FPORT1;
dma_unmap_addr_set(itx_buf, dma_addr0, mapped_addr);
dma_unmap_len_set(itx_buf, dma_len0, skb_headlen(skb));
/* TX SG offload */
txd = itxd;
......@@ -685,11 +687,13 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
last_frag * TX_DMA_LS0));
WRITE_ONCE(txd->txd4, fport);
tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
tx_buf = mtk_desc_to_tx_buf(ring, txd);
memset(tx_buf, 0, sizeof(*tx_buf));
tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
tx_buf->flags |= MTK_TX_FLAGS_PAGE0;
tx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
MTK_TX_FLAGS_FPORT1;
dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
dma_unmap_len_set(tx_buf, dma_len0, frag_map_size);
frag_size -= frag_map_size;
......@@ -698,7 +702,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
}
/* store skb to cleanup */
tx_buf->skb = skb;
itx_buf->skb = skb;
WRITE_ONCE(itxd->txd4, txd4);
WRITE_ONCE(itxd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(skb_headlen(skb)) |
......@@ -1012,17 +1016,16 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
while ((cpu != dma) && budget) {
u32 next_cpu = desc->txd2;
int mac;
int mac = 0;
desc = mtk_qdma_phys_to_virt(ring, desc->txd2);
if ((desc->txd3 & TX_DMA_OWNER_CPU) == 0)
break;
mac = (desc->txd4 >> TX_DMA_FPORT_SHIFT) &
TX_DMA_FPORT_MASK;
mac--;
tx_buf = mtk_desc_to_tx_buf(ring, desc);
if (tx_buf->flags & MTK_TX_FLAGS_FPORT1)
mac = 1;
skb = tx_buf->skb;
if (!skb) {
condition = 1;
......
......@@ -406,12 +406,18 @@ struct mtk_hw_stats {
struct u64_stats_sync syncp;
};
/* PDMA descriptor can point at 1-2 segments. This enum allows us to track how
* memory was allocated so that it can be freed properly
*/
enum mtk_tx_flags {
/* PDMA descriptor can point at 1-2 segments. This enum allows us to
* track how memory was allocated so that it can be freed properly.
*/
MTK_TX_FLAGS_SINGLE0 = 0x01,
MTK_TX_FLAGS_PAGE0 = 0x02,
/* MTK_TX_FLAGS_FPORTx allows tracking which port the transmitted
* SKB out instead of looking up through hardware TX descriptor.
*/
MTK_TX_FLAGS_FPORT0 = 0x04,
MTK_TX_FLAGS_FPORT1 = 0x08,
};
/* This enum allows us to identify how the clock is defined on the array of the
......
......@@ -1371,6 +1371,13 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
free_cpumask_var(thread_mask);
}
if (count > EFX_MAX_RX_QUEUES) {
netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
"Reducing number of rx queues from %u to %u.\n",
count, EFX_MAX_RX_QUEUES);
count = EFX_MAX_RX_QUEUES;
}
/* If RSS is requested for the PF *and* VFs then we can't write RSS
* table entries that are inaccessible to VFs
*/
......
......@@ -1354,6 +1354,13 @@ static unsigned int ef4_wanted_parallelism(struct ef4_nic *efx)
free_cpumask_var(thread_mask);
}
if (count > EF4_MAX_RX_QUEUES) {
netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
"Reducing number of rx queues from %u to %u.\n",
count, EF4_MAX_RX_QUEUES);
count = EF4_MAX_RX_QUEUES;
}
return count;
}
......
......@@ -798,9 +798,6 @@ static struct phy_driver ksphy_driver[] = {
.read_status = genphy_read_status,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
......@@ -940,9 +937,6 @@ static struct phy_driver ksphy_driver[] = {
.read_status = genphy_read_status,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
......@@ -952,6 +946,7 @@ static struct phy_driver ksphy_driver[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
.driver_data = &ksz9021_type,
.probe = kszphy_probe,
.config_init = ksz9021_config_init,
.config_aneg = genphy_config_aneg,
.read_status = genphy_read_status,
......@@ -971,6 +966,7 @@ static struct phy_driver ksphy_driver[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
.driver_data = &ksz9021_type,
.probe = kszphy_probe,
.config_init = ksz9031_config_init,
.config_aneg = genphy_config_aneg,
.read_status = ksz9031_read_status,
......@@ -989,9 +985,6 @@ static struct phy_driver ksphy_driver[] = {
.config_init = kszphy_config_init,
.config_aneg = ksz8873mll_config_aneg,
.read_status = ksz8873mll_read_status,
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
......@@ -1003,9 +996,6 @@ static struct phy_driver ksphy_driver[] = {
.config_init = kszphy_config_init,
.config_aneg = genphy_config_aneg,
.read_status = genphy_read_status,
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
......@@ -1017,9 +1007,6 @@ static struct phy_driver ksphy_driver[] = {
.config_init = kszphy_config_init,
.config_aneg = ksz8873mll_config_aneg,
.read_status = ksz8873mll_read_status,
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
.suspend = genphy_suspend,
.resume = genphy_resume,
} };
......
......@@ -1128,7 +1128,7 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
goto nla_put_failure;
/* rule only needs to appear once */
nlh->nlmsg_flags &= NLM_F_EXCL;
nlh->nlmsg_flags |= NLM_F_EXCL;
frh = nlmsg_data(nlh);
memset(frh, 0, sizeof(*frh));
......
......@@ -617,6 +617,14 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
if (insn->imm == BPF_FUNC_xdp_adjust_head)
prog->xdp_adjust_head = 1;
if (insn->imm == BPF_FUNC_tail_call) {
/* If we tail call into other programs, we
* cannot make any assumptions since they
* can be replaced dynamically during runtime
* in the program array.
*/
prog->cb_access = 1;
prog->xdp_adjust_head = 1;
/* mark bpf_tail_call as different opcode
* to avoid conditional branch in
* interpeter for every normal call
......
......@@ -3807,6 +3807,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
serr->ee.ee_info = tstype;
serr->opt_stats = opt_stats;
serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
if (sk->sk_protocol == IPPROTO_TCP &&
......
......@@ -488,16 +488,15 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk,
return false;
/* Support IP_PKTINFO on tstamp packets if requested, to correlate
* timestamp with egress dev. Not possible for packets without dev
* timestamp with egress dev. Not possible for packets without iif
* or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
*/
if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
(!skb->dev))
info = PKTINFO_SKB_CB(skb);
if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
!info->ipi_ifindex)
return false;
info = PKTINFO_SKB_CB(skb);
info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
info->ipi_ifindex = skb->dev->ifindex;
return true;
}
......@@ -591,6 +590,7 @@ static bool setsockopt_needs_rtnl(int optname)
case MCAST_LEAVE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_UNBLOCK_SOURCE:
case IP_ROUTER_ALERT:
return true;
}
return false;
......
......@@ -1278,7 +1278,7 @@ static void mrtsock_destruct(struct sock *sk)
struct net *net = sock_net(sk);
struct mr_table *mrt;
rtnl_lock();
ASSERT_RTNL();
ipmr_for_each_table(mrt, net) {
if (sk == rtnl_dereference(mrt->mroute_sk)) {
IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
......@@ -1289,7 +1289,6 @@ static void mrtsock_destruct(struct sock *sk)
mroute_clean_tables(mrt, false);
}
}
rtnl_unlock();
}
/* Socket options and virtual interface manipulation. The whole
......@@ -1353,13 +1352,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
if (sk != rcu_access_pointer(mrt->mroute_sk)) {
ret = -EACCES;
} else {
/* We need to unlock here because mrtsock_destruct takes
* care of rtnl itself and we can't change that due to
* the IP_ROUTER_ALERT setsockopt which runs without it.
*/
rtnl_unlock();
ret = ip_ra_control(sk, 0, NULL);
goto out;
goto out_unlock;
}
break;
case MRT_ADD_VIF:
......@@ -1470,7 +1464,6 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
}
out_unlock:
rtnl_unlock();
out:
return ret;
}
......
......@@ -682,7 +682,9 @@ static void raw_close(struct sock *sk, long timeout)
/*
* Raw sockets may have direct kernel references. Kill them.
*/
rtnl_lock();
ip_ra_control(sk, 0, NULL);
rtnl_unlock();
sk_common_release(sk);
}
......
......@@ -405,9 +405,6 @@ static inline bool ipv6_datagram_support_addr(struct sock_exterr_skb *serr)
* At one point, excluding local errors was a quick test to identify icmp/icmp6
* errors. This is no longer true, but the test remained, so the v6 stack,
* unlike v4, also honors cmsg requests on all wifi and timestamp errors.
*
* Timestamp code paths do not initialize the fields expected by cmsg:
* the PKTINFO fields in skb->cb[]. Fill those in here.
*/
static bool ip6_datagram_support_cmsg(struct sk_buff *skb,
struct sock_exterr_skb *serr)
......@@ -419,14 +416,9 @@ static bool ip6_datagram_support_cmsg(struct sk_buff *skb,
if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL)
return false;
if (!skb->dev)
if (!IP6CB(skb)->iif)
return false;
if (skb->protocol == htons(ETH_P_IPV6))
IP6CB(skb)->iif = skb->dev->ifindex;
else
PKTINFO_SKB_CB(skb)->ipi_ifindex = skb->dev->ifindex;
return true;
}
......
......@@ -122,11 +122,14 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
/*
* RFC4291 2.5.3
* The loopback address must not be used as the source address in IPv6
* packets that are sent outside of a single node. [..]
* A packet received on an interface with a destination address
* of loopback must be dropped.
*/
if (!(dev->flags & IFF_LOOPBACK) &&
ipv6_addr_loopback(&hdr->daddr))
if ((ipv6_addr_loopback(&hdr->saddr) ||
ipv6_addr_loopback(&hdr->daddr)) &&
!(dev->flags & IFF_LOOPBACK))
goto err;
/* RFC4291 Errata ID: 3480
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment