Commit 3c7a9f32 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

 1) In order to avoid problems in the future, make cgroup bpf overriding
    explicit using BPF_F_ALLOW_OVERRIDE. From Alexei Staovoitov.

 2) LLC sets skb->sk without proper skb->destructor and this explodes,
    fix from Eric Dumazet.

 3) Make sure when we have an ipv4 mapped source address, the
    destination is either also an ipv4 mapped address or
    ipv6_addr_any(). Fix from Jonathan T. Leighton.

 4) Avoid packet loss in fec driver by programming the multicast filter
    more intelligently. From Rui Sousa.

 5) Handle multiple threads invoking fanout_add(), fix from Eric
    Dumazet.

 6) Since we can invoke the TCP input path in process context, without
    BH being disabled, we have to accomodate that in the locking of the
    TCP probe. Also from Eric Dumazet.

 7) Fix erroneous emission of NETEVENT_DELAY_PROBE_TIME_UPDATE when we
    aren't even updating that sysctl value. From Marcus Huewe.

 8) Fix endian bugs in ibmvnic driver, from Thomas Falcon.

[ This is the second version of the pull that reverts the nested
  rhashtable changes that looked a bit too scary for this late in the
  release  - Linus ]

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (27 commits)
  rhashtable: Revert nested table changes.
  ibmvnic: Fix endian errors in error reporting output
  ibmvnic: Fix endian error when requesting device capabilities
  net: neigh: Fix netevent NETEVENT_DELAY_PROBE_TIME_UPDATE notification
  net: xilinx_emaclite: fix freezes due to unordered I/O
  net: xilinx_emaclite: fix receive buffer overflow
  bpf: kernel header files need to be copied into the tools directory
  tcp: tcp_probe: use spin_lock_bh()
  uapi: fix linux/if_pppol2tp.h userspace compilation errors
  packet: fix races in fanout_add()
  ibmvnic: Fix initial MTU settings
  net: ethernet: ti: cpsw: fix cpsw assignment in resume
  kcm: fix a null pointer dereference in kcm_sendmsg()
  net: fec: fix multicast filtering hardware setup
  ipv6: Handle IPv4-mapped src to in6addr_any dst.
  ipv6: Inhibit IPv4-mapped src address on the wire.
  net/mlx5e: Disable preemption when doing TC statistics upcall
  rhashtable: Add nested tables
  tipc: Fix tipc_sk_reinit race conditions
  gfs2: Use rhashtable walk interface in glock_hash_walk
  ...
parents 747ae0a9 bf3f14d6
...@@ -2478,12 +2478,11 @@ S: D-90453 Nuernberg ...@@ -2478,12 +2478,11 @@ S: D-90453 Nuernberg
S: Germany S: Germany
N: Arnaldo Carvalho de Melo N: Arnaldo Carvalho de Melo
E: acme@ghostprotocols.net E: acme@kernel.org
E: arnaldo.melo@gmail.com E: arnaldo.melo@gmail.com
E: acme@redhat.com E: acme@redhat.com
W: http://oops.ghostprotocols.net:81/blog/
P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01 P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01
D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks D: tools/, IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks
S: Brazil S: Brazil
N: Karsten Merker N: Karsten Merker
......
...@@ -877,8 +877,8 @@ S: Odd fixes ...@@ -877,8 +877,8 @@ S: Odd fixes
F: drivers/hwmon/applesmc.c F: drivers/hwmon/applesmc.c
APPLETALK NETWORK LAYER APPLETALK NETWORK LAYER
M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> L: netdev@vger.kernel.org
S: Maintained S: Odd fixes
F: drivers/net/appletalk/ F: drivers/net/appletalk/
F: net/appletalk/ F: net/appletalk/
...@@ -6727,9 +6727,8 @@ S: Odd Fixes ...@@ -6727,9 +6727,8 @@ S: Odd Fixes
F: drivers/tty/ipwireless/ F: drivers/tty/ipwireless/
IPX NETWORK LAYER IPX NETWORK LAYER
M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
S: Maintained S: Odd fixes
F: include/net/ipx.h F: include/net/ipx.h
F: include/uapi/linux/ipx.h F: include/uapi/linux/ipx.h
F: net/ipx/ F: net/ipx/
...@@ -7501,8 +7500,8 @@ S: Maintained ...@@ -7501,8 +7500,8 @@ S: Maintained
F: drivers/misc/lkdtm* F: drivers/misc/lkdtm*
LLC (802.2) LLC (802.2)
M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> L: netdev@vger.kernel.org
S: Maintained S: Odd fixes
F: include/linux/llc.h F: include/linux/llc.h
F: include/uapi/linux/llc.h F: include/uapi/linux/llc.h
F: include/net/llc* F: include/net/llc*
...@@ -13373,10 +13372,8 @@ S: Maintained ...@@ -13373,10 +13372,8 @@ S: Maintained
F: drivers/input/misc/wistron_btns.c F: drivers/input/misc/wistron_btns.c
WL3501 WIRELESS PCMCIA CARD DRIVER WL3501 WIRELESS PCMCIA CARD DRIVER
M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
L: linux-wireless@vger.kernel.org L: linux-wireless@vger.kernel.org
W: http://oops.ghostprotocols.net:81/blog S: Odd fixes
S: Maintained
F: drivers/net/wireless/wl3501* F: drivers/net/wireless/wl3501*
WOLFSON MICROELECTRONICS DRIVERS WOLFSON MICROELECTRONICS DRIVERS
......
...@@ -2910,6 +2910,7 @@ static void set_multicast_list(struct net_device *ndev) ...@@ -2910,6 +2910,7 @@ static void set_multicast_list(struct net_device *ndev)
struct netdev_hw_addr *ha; struct netdev_hw_addr *ha;
unsigned int i, bit, data, crc, tmp; unsigned int i, bit, data, crc, tmp;
unsigned char hash; unsigned char hash;
unsigned int hash_high = 0, hash_low = 0;
if (ndev->flags & IFF_PROMISC) { if (ndev->flags & IFF_PROMISC) {
tmp = readl(fep->hwp + FEC_R_CNTRL); tmp = readl(fep->hwp + FEC_R_CNTRL);
...@@ -2932,11 +2933,7 @@ static void set_multicast_list(struct net_device *ndev) ...@@ -2932,11 +2933,7 @@ static void set_multicast_list(struct net_device *ndev)
return; return;
} }
/* Clear filter and add the addresses in hash register /* Add the addresses in hash register */
*/
writel(0, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
writel(0, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
netdev_for_each_mc_addr(ha, ndev) { netdev_for_each_mc_addr(ha, ndev) {
/* calculate crc32 value of mac address */ /* calculate crc32 value of mac address */
crc = 0xffffffff; crc = 0xffffffff;
...@@ -2954,16 +2951,14 @@ static void set_multicast_list(struct net_device *ndev) ...@@ -2954,16 +2951,14 @@ static void set_multicast_list(struct net_device *ndev)
*/ */
hash = (crc >> (32 - FEC_HASH_BITS)) & 0x3f; hash = (crc >> (32 - FEC_HASH_BITS)) & 0x3f;
if (hash > 31) { if (hash > 31)
tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH); hash_high |= 1 << (hash - 32);
tmp |= 1 << (hash - 32); else
writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); hash_low |= 1 << hash;
} else {
tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_LOW);
tmp |= 1 << hash;
writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
}
} }
writel(hash_high, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
writel(hash_low, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
} }
/* Set a MAC change in hardware. */ /* Set a MAC change in hardware. */
......
...@@ -189,9 +189,10 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, ...@@ -189,9 +189,10 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
} }
ltb->map_id = adapter->map_id; ltb->map_id = adapter->map_id;
adapter->map_id++; adapter->map_id++;
init_completion(&adapter->fw_done);
send_request_map(adapter, ltb->addr, send_request_map(adapter, ltb->addr,
ltb->size, ltb->map_id); ltb->size, ltb->map_id);
init_completion(&adapter->fw_done);
wait_for_completion(&adapter->fw_done); wait_for_completion(&adapter->fw_done);
return 0; return 0;
} }
...@@ -505,7 +506,7 @@ static int ibmvnic_open(struct net_device *netdev) ...@@ -505,7 +506,7 @@ static int ibmvnic_open(struct net_device *netdev)
adapter->rx_pool = NULL; adapter->rx_pool = NULL;
rx_pool_arr_alloc_failed: rx_pool_arr_alloc_failed:
for (i = 0; i < adapter->req_rx_queues; i++) for (i = 0; i < adapter->req_rx_queues; i++)
napi_enable(&adapter->napi[i]); napi_disable(&adapter->napi[i]);
alloc_napi_failed: alloc_napi_failed:
return -ENOMEM; return -ENOMEM;
} }
...@@ -1121,10 +1122,10 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev, ...@@ -1121,10 +1122,10 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev,
crq.request_statistics.ioba = cpu_to_be32(adapter->stats_token); crq.request_statistics.ioba = cpu_to_be32(adapter->stats_token);
crq.request_statistics.len = crq.request_statistics.len =
cpu_to_be32(sizeof(struct ibmvnic_statistics)); cpu_to_be32(sizeof(struct ibmvnic_statistics));
ibmvnic_send_crq(adapter, &crq);
/* Wait for data to be written */ /* Wait for data to be written */
init_completion(&adapter->stats_done); init_completion(&adapter->stats_done);
ibmvnic_send_crq(adapter, &crq);
wait_for_completion(&adapter->stats_done); wait_for_completion(&adapter->stats_done);
for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++) for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++)
...@@ -1496,7 +1497,7 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry) ...@@ -1496,7 +1497,7 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
adapter->req_rx_queues = adapter->opt_rx_comp_queues; adapter->req_rx_queues = adapter->opt_rx_comp_queues;
adapter->req_rx_add_queues = adapter->max_rx_add_queues; adapter->req_rx_add_queues = adapter->max_rx_add_queues;
adapter->req_mtu = adapter->max_mtu; adapter->req_mtu = adapter->netdev->mtu + ETH_HLEN;
} }
total_queues = adapter->req_tx_queues + adapter->req_rx_queues; total_queues = adapter->req_tx_queues + adapter->req_rx_queues;
...@@ -2185,12 +2186,12 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq, ...@@ -2185,12 +2186,12 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq,
if (!found) { if (!found) {
dev_err(dev, "Couldn't find error id %x\n", dev_err(dev, "Couldn't find error id %x\n",
crq->request_error_rsp.error_id); be32_to_cpu(crq->request_error_rsp.error_id));
return; return;
} }
dev_err(dev, "Detailed info for error id %x:", dev_err(dev, "Detailed info for error id %x:",
crq->request_error_rsp.error_id); be32_to_cpu(crq->request_error_rsp.error_id));
for (i = 0; i < error_buff->len; i++) { for (i = 0; i < error_buff->len; i++) {
pr_cont("%02x", (int)error_buff->buff[i]); pr_cont("%02x", (int)error_buff->buff[i]);
...@@ -2269,8 +2270,8 @@ static void handle_error_indication(union ibmvnic_crq *crq, ...@@ -2269,8 +2270,8 @@ static void handle_error_indication(union ibmvnic_crq *crq,
dev_err(dev, "Firmware reports %serror id %x, cause %d\n", dev_err(dev, "Firmware reports %serror id %x, cause %d\n",
crq->error_indication. crq->error_indication.
flags & IBMVNIC_FATAL_ERROR ? "FATAL " : "", flags & IBMVNIC_FATAL_ERROR ? "FATAL " : "",
crq->error_indication.error_id, be32_to_cpu(crq->error_indication.error_id),
crq->error_indication.error_cause); be16_to_cpu(crq->error_indication.error_cause));
error_buff = kmalloc(sizeof(*error_buff), GFP_ATOMIC); error_buff = kmalloc(sizeof(*error_buff), GFP_ATOMIC);
if (!error_buff) if (!error_buff)
...@@ -2388,10 +2389,10 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, ...@@ -2388,10 +2389,10 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq,
case PARTIALSUCCESS: case PARTIALSUCCESS:
dev_info(dev, "req=%lld, rsp=%ld in %s queue, retrying.\n", dev_info(dev, "req=%lld, rsp=%ld in %s queue, retrying.\n",
*req_value, *req_value,
(long int)be32_to_cpu(crq->request_capability_rsp. (long int)be64_to_cpu(crq->request_capability_rsp.
number), name); number), name);
release_sub_crqs_no_irqs(adapter); release_sub_crqs_no_irqs(adapter);
*req_value = be32_to_cpu(crq->request_capability_rsp.number); *req_value = be64_to_cpu(crq->request_capability_rsp.number);
init_sub_crqs(adapter, 1); init_sub_crqs(adapter, 1);
return; return;
default: default:
...@@ -2626,12 +2627,12 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq, ...@@ -2626,12 +2627,12 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq,
break; break;
case MIN_MTU: case MIN_MTU:
adapter->min_mtu = be64_to_cpu(crq->query_capability.number); adapter->min_mtu = be64_to_cpu(crq->query_capability.number);
netdev->min_mtu = adapter->min_mtu; netdev->min_mtu = adapter->min_mtu - ETH_HLEN;
netdev_dbg(netdev, "min_mtu = %lld\n", adapter->min_mtu); netdev_dbg(netdev, "min_mtu = %lld\n", adapter->min_mtu);
break; break;
case MAX_MTU: case MAX_MTU:
adapter->max_mtu = be64_to_cpu(crq->query_capability.number); adapter->max_mtu = be64_to_cpu(crq->query_capability.number);
netdev->max_mtu = adapter->max_mtu; netdev->max_mtu = adapter->max_mtu - ETH_HLEN;
netdev_dbg(netdev, "max_mtu = %lld\n", adapter->max_mtu); netdev_dbg(netdev, "max_mtu = %lld\n", adapter->max_mtu);
break; break;
case MAX_MULTICAST_FILTERS: case MAX_MULTICAST_FILTERS:
...@@ -2799,9 +2800,9 @@ static ssize_t trace_read(struct file *file, char __user *user_buf, size_t len, ...@@ -2799,9 +2800,9 @@ static ssize_t trace_read(struct file *file, char __user *user_buf, size_t len,
crq.collect_fw_trace.correlator = adapter->ras_comps[num].correlator; crq.collect_fw_trace.correlator = adapter->ras_comps[num].correlator;
crq.collect_fw_trace.ioba = cpu_to_be32(trace_tok); crq.collect_fw_trace.ioba = cpu_to_be32(trace_tok);
crq.collect_fw_trace.len = adapter->ras_comps[num].trace_buff_size; crq.collect_fw_trace.len = adapter->ras_comps[num].trace_buff_size;
ibmvnic_send_crq(adapter, &crq);
init_completion(&adapter->fw_done); init_completion(&adapter->fw_done);
ibmvnic_send_crq(adapter, &crq);
wait_for_completion(&adapter->fw_done); wait_for_completion(&adapter->fw_done);
if (*ppos + len > be32_to_cpu(adapter->ras_comps[num].trace_buff_size)) if (*ppos + len > be32_to_cpu(adapter->ras_comps[num].trace_buff_size))
...@@ -3581,9 +3582,9 @@ static int ibmvnic_dump_show(struct seq_file *seq, void *v) ...@@ -3581,9 +3582,9 @@ static int ibmvnic_dump_show(struct seq_file *seq, void *v)
memset(&crq, 0, sizeof(crq)); memset(&crq, 0, sizeof(crq));
crq.request_dump_size.first = IBMVNIC_CRQ_CMD; crq.request_dump_size.first = IBMVNIC_CRQ_CMD;
crq.request_dump_size.cmd = REQUEST_DUMP_SIZE; crq.request_dump_size.cmd = REQUEST_DUMP_SIZE;
ibmvnic_send_crq(adapter, &crq);
init_completion(&adapter->fw_done); init_completion(&adapter->fw_done);
ibmvnic_send_crq(adapter, &crq);
wait_for_completion(&adapter->fw_done); wait_for_completion(&adapter->fw_done);
seq_write(seq, adapter->dump_data, adapter->dump_data_size); seq_write(seq, adapter->dump_data, adapter->dump_data_size);
...@@ -3629,8 +3630,8 @@ static void handle_crq_init_rsp(struct work_struct *work) ...@@ -3629,8 +3630,8 @@ static void handle_crq_init_rsp(struct work_struct *work)
} }
} }
send_version_xchg(adapter);
reinit_completion(&adapter->init_done); reinit_completion(&adapter->init_done);
send_version_xchg(adapter);
if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
dev_err(dev, "Passive init timeout\n"); dev_err(dev, "Passive init timeout\n");
goto task_failed; goto task_failed;
...@@ -3640,9 +3641,9 @@ static void handle_crq_init_rsp(struct work_struct *work) ...@@ -3640,9 +3641,9 @@ static void handle_crq_init_rsp(struct work_struct *work)
if (adapter->renegotiate) { if (adapter->renegotiate) {
adapter->renegotiate = false; adapter->renegotiate = false;
release_sub_crqs_no_irqs(adapter); release_sub_crqs_no_irqs(adapter);
send_cap_queries(adapter);
reinit_completion(&adapter->init_done); reinit_completion(&adapter->init_done);
send_cap_queries(adapter);
if (!wait_for_completion_timeout(&adapter->init_done, if (!wait_for_completion_timeout(&adapter->init_done,
timeout)) { timeout)) {
dev_err(dev, "Passive init timeout\n"); dev_err(dev, "Passive init timeout\n");
...@@ -3656,9 +3657,7 @@ static void handle_crq_init_rsp(struct work_struct *work) ...@@ -3656,9 +3657,7 @@ static void handle_crq_init_rsp(struct work_struct *work)
goto task_failed; goto task_failed;
netdev->real_num_tx_queues = adapter->req_tx_queues; netdev->real_num_tx_queues = adapter->req_tx_queues;
netdev->mtu = adapter->req_mtu; netdev->mtu = adapter->req_mtu - ETH_HLEN;
netdev->min_mtu = adapter->min_mtu;
netdev->max_mtu = adapter->max_mtu;
if (adapter->failover) { if (adapter->failover) {
adapter->failover = false; adapter->failover = false;
...@@ -3772,9 +3771,9 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) ...@@ -3772,9 +3771,9 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
adapter->debugfs_dump = ent; adapter->debugfs_dump = ent;
} }
} }
ibmvnic_send_crq_init(adapter);
init_completion(&adapter->init_done); init_completion(&adapter->init_done);
ibmvnic_send_crq_init(adapter);
if (!wait_for_completion_timeout(&adapter->init_done, timeout)) if (!wait_for_completion_timeout(&adapter->init_done, timeout))
return 0; return 0;
...@@ -3782,9 +3781,9 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) ...@@ -3782,9 +3781,9 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
if (adapter->renegotiate) { if (adapter->renegotiate) {
adapter->renegotiate = false; adapter->renegotiate = false;
release_sub_crqs_no_irqs(adapter); release_sub_crqs_no_irqs(adapter);
send_cap_queries(adapter);
reinit_completion(&adapter->init_done); reinit_completion(&adapter->init_done);
send_cap_queries(adapter);
if (!wait_for_completion_timeout(&adapter->init_done, if (!wait_for_completion_timeout(&adapter->init_done,
timeout)) timeout))
return 0; return 0;
...@@ -3798,7 +3797,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) ...@@ -3798,7 +3797,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
} }
netdev->real_num_tx_queues = adapter->req_tx_queues; netdev->real_num_tx_queues = adapter->req_tx_queues;
netdev->mtu = adapter->req_mtu; netdev->mtu = adapter->req_mtu - ETH_HLEN;
rc = register_netdev(netdev); rc = register_netdev(netdev);
if (rc) { if (rc) {
......
...@@ -1087,10 +1087,14 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, ...@@ -1087,10 +1087,14 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
preempt_disable();
tcf_exts_to_list(f->exts, &actions); tcf_exts_to_list(f->exts, &actions);
list_for_each_entry(a, &actions, list) list_for_each_entry(a, &actions, list)
tcf_action_stats_update(a, bytes, packets, lastuse); tcf_action_stats_update(a, bytes, packets, lastuse);
preempt_enable();
return 0; return 0;
} }
......
...@@ -3160,7 +3160,7 @@ static int cpsw_resume(struct device *dev) ...@@ -3160,7 +3160,7 @@ static int cpsw_resume(struct device *dev)
{ {
struct platform_device *pdev = to_platform_device(dev); struct platform_device *pdev = to_platform_device(dev);
struct net_device *ndev = platform_get_drvdata(pdev); struct net_device *ndev = platform_get_drvdata(pdev);
struct cpsw_common *cpsw = netdev_priv(ndev); struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
/* Select default pin state */ /* Select default pin state */
pinctrl_pm_select_default_state(dev); pinctrl_pm_select_default_state(dev);
......
...@@ -113,10 +113,10 @@ struct xenvif_stats { ...@@ -113,10 +113,10 @@ struct xenvif_stats {
* A subset of struct net_device_stats that contains only the * A subset of struct net_device_stats that contains only the
* fields that are updated in netback.c for each queue. * fields that are updated in netback.c for each queue.
*/ */
unsigned int rx_bytes; u64 rx_bytes;
unsigned int rx_packets; u64 rx_packets;
unsigned int tx_bytes; u64 tx_bytes;
unsigned int tx_packets; u64 tx_packets;
/* Additional stats used by xenvif */ /* Additional stats used by xenvif */
unsigned long rx_gso_checksum_fixup; unsigned long rx_gso_checksum_fixup;
......
...@@ -221,10 +221,10 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev) ...@@ -221,10 +221,10 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
{ {
struct xenvif *vif = netdev_priv(dev); struct xenvif *vif = netdev_priv(dev);
struct xenvif_queue *queue = NULL; struct xenvif_queue *queue = NULL;
unsigned long rx_bytes = 0; u64 rx_bytes = 0;
unsigned long rx_packets = 0; u64 rx_packets = 0;
unsigned long tx_bytes = 0; u64 tx_bytes = 0;
unsigned long tx_packets = 0; u64 tx_packets = 0;
unsigned int index; unsigned int index;
spin_lock(&vif->lock); spin_lock(&vif->lock);
......
...@@ -21,20 +21,19 @@ struct cgroup_bpf { ...@@ -21,20 +21,19 @@ struct cgroup_bpf {
*/ */
struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE]; struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE]; struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE];
bool disallow_override[MAX_BPF_ATTACH_TYPE];
}; };
void cgroup_bpf_put(struct cgroup *cgrp); void cgroup_bpf_put(struct cgroup *cgrp);
void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent); void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent);
void __cgroup_bpf_update(struct cgroup *cgrp, int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
struct cgroup *parent, struct bpf_prog *prog, enum bpf_attach_type type,
struct bpf_prog *prog, bool overridable);
enum bpf_attach_type type);
/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */ /* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
void cgroup_bpf_update(struct cgroup *cgrp, int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_prog *prog, enum bpf_attach_type type, bool overridable);
enum bpf_attach_type type);
int __cgroup_bpf_run_filter_skb(struct sock *sk, int __cgroup_bpf_run_filter_skb(struct sock *sk,
struct sk_buff *skb, struct sk_buff *skb,
......
...@@ -116,6 +116,12 @@ enum bpf_attach_type { ...@@ -116,6 +116,12 @@ enum bpf_attach_type {
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
* to the given target_fd cgroup the descendent cgroup will be able to
* override effective bpf program that was inherited from this cgroup
*/
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
#define BPF_PSEUDO_MAP_FD 1 #define BPF_PSEUDO_MAP_FD 1
/* flags for BPF_MAP_UPDATE_ELEM command */ /* flags for BPF_MAP_UPDATE_ELEM command */
...@@ -171,6 +177,7 @@ union bpf_attr { ...@@ -171,6 +177,7 @@ union bpf_attr {
__u32 target_fd; /* container object to attach to */ __u32 target_fd; /* container object to attach to */
__u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type; __u32 attach_type;
__u32 attach_flags;
}; };
} __attribute__((aligned(8))); } __attribute__((aligned(8)));
......
...@@ -9,9 +9,8 @@ ...@@ -9,9 +9,8 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/socket.h> #include <linux/socket.h>
#ifndef __KERNEL__ #include <linux/in.h>
#include <netinet/in.h> #include <linux/in6.h>
#endif
#define IPPROTO_L2TP 115 #define IPPROTO_L2TP 115
...@@ -31,7 +30,7 @@ struct sockaddr_l2tpip { ...@@ -31,7 +30,7 @@ struct sockaddr_l2tpip {
__u32 l2tp_conn_id; /* Connection ID of tunnel */ __u32 l2tp_conn_id; /* Connection ID of tunnel */
/* Pad to size of `struct sockaddr'. */ /* Pad to size of `struct sockaddr'. */
unsigned char __pad[sizeof(struct sockaddr) - unsigned char __pad[__SOCK_SIZE__ -
sizeof(__kernel_sa_family_t) - sizeof(__kernel_sa_family_t) -
sizeof(__be16) - sizeof(struct in_addr) - sizeof(__be16) - sizeof(struct in_addr) -
sizeof(__u32)]; sizeof(__u32)];
......
...@@ -52,6 +52,7 @@ void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent) ...@@ -52,6 +52,7 @@ void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
e = rcu_dereference_protected(parent->bpf.effective[type], e = rcu_dereference_protected(parent->bpf.effective[type],
lockdep_is_held(&cgroup_mutex)); lockdep_is_held(&cgroup_mutex));
rcu_assign_pointer(cgrp->bpf.effective[type], e); rcu_assign_pointer(cgrp->bpf.effective[type], e);
cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type];
} }
} }
...@@ -82,30 +83,63 @@ void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent) ...@@ -82,30 +83,63 @@ void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
* *
* Must be called with cgroup_mutex held. * Must be called with cgroup_mutex held.
*/ */
void __cgroup_bpf_update(struct cgroup *cgrp, int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
struct cgroup *parent, struct bpf_prog *prog, enum bpf_attach_type type,
struct bpf_prog *prog, bool new_overridable)
enum bpf_attach_type type)
{ {
struct bpf_prog *old_prog, *effective; struct bpf_prog *old_prog, *effective = NULL;
struct cgroup_subsys_state *pos; struct cgroup_subsys_state *pos;
bool overridable = true;
if (parent) {
overridable = !parent->bpf.disallow_override[type];
effective = rcu_dereference_protected(parent->bpf.effective[type],
lockdep_is_held(&cgroup_mutex));
}
old_prog = xchg(cgrp->bpf.prog + type, prog); if (prog && effective && !overridable)
/* if parent has non-overridable prog attached, disallow
* attaching new programs to descendent cgroup
*/
return -EPERM;
if (prog && effective && overridable != new_overridable)
/* if parent has overridable prog attached, only
* allow overridable programs in descendent cgroup
*/
return -EPERM;
effective = (!prog && parent) ? old_prog = cgrp->bpf.prog[type];
rcu_dereference_protected(parent->bpf.effective[type],
lockdep_is_held(&cgroup_mutex)) : if (prog) {
prog; overridable = new_overridable;
effective = prog;
if (old_prog &&
cgrp->bpf.disallow_override[type] == new_overridable)
/* disallow attaching non-overridable on top
* of existing overridable in this cgroup
* and vice versa
*/
return -EPERM;
}
if (!prog && !old_prog)
/* report error when trying to detach and nothing is attached */
return -ENOENT;
cgrp->bpf.prog[type] = prog;
css_for_each_descendant_pre(pos, &cgrp->self) { css_for_each_descendant_pre(pos, &cgrp->self) {
struct cgroup *desc = container_of(pos, struct cgroup, self); struct cgroup *desc = container_of(pos, struct cgroup, self);
/* skip the subtree if the descendant has its own program */ /* skip the subtree if the descendant has its own program */
if (desc->bpf.prog[type] && desc != cgrp) if (desc->bpf.prog[type] && desc != cgrp) {
pos = css_rightmost_descendant(pos); pos = css_rightmost_descendant(pos);
else } else {
rcu_assign_pointer(desc->bpf.effective[type], rcu_assign_pointer(desc->bpf.effective[type],
effective); effective);
desc->bpf.disallow_override[type] = !overridable;
}
} }
if (prog) if (prog)
...@@ -115,6 +149,7 @@ void __cgroup_bpf_update(struct cgroup *cgrp, ...@@ -115,6 +149,7 @@ void __cgroup_bpf_update(struct cgroup *cgrp,
bpf_prog_put(old_prog); bpf_prog_put(old_prog);
static_branch_dec(&cgroup_bpf_enabled_key); static_branch_dec(&cgroup_bpf_enabled_key);
} }
return 0;
} }
/** /**
......
...@@ -920,13 +920,14 @@ static int bpf_obj_get(const union bpf_attr *attr) ...@@ -920,13 +920,14 @@ static int bpf_obj_get(const union bpf_attr *attr)
#ifdef CONFIG_CGROUP_BPF #ifdef CONFIG_CGROUP_BPF
#define BPF_PROG_ATTACH_LAST_FIELD attach_type #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
static int bpf_prog_attach(const union bpf_attr *attr) static int bpf_prog_attach(const union bpf_attr *attr)
{ {
enum bpf_prog_type ptype;
struct bpf_prog *prog; struct bpf_prog *prog;
struct cgroup *cgrp; struct cgroup *cgrp;
enum bpf_prog_type ptype; int ret;
if (!capable(CAP_NET_ADMIN)) if (!capable(CAP_NET_ADMIN))
return -EPERM; return -EPERM;
...@@ -934,6 +935,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) ...@@ -934,6 +935,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
if (CHECK_ATTR(BPF_PROG_ATTACH)) if (CHECK_ATTR(BPF_PROG_ATTACH))
return -EINVAL; return -EINVAL;
if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
return -EINVAL;
switch (attr->attach_type) { switch (attr->attach_type) {
case BPF_CGROUP_INET_INGRESS: case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS: case BPF_CGROUP_INET_EGRESS:
...@@ -956,10 +960,13 @@ static int bpf_prog_attach(const union bpf_attr *attr) ...@@ -956,10 +960,13 @@ static int bpf_prog_attach(const union bpf_attr *attr)
return PTR_ERR(cgrp); return PTR_ERR(cgrp);
} }
cgroup_bpf_update(cgrp, prog, attr->attach_type); ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
if (ret)
bpf_prog_put(prog);
cgroup_put(cgrp); cgroup_put(cgrp);
return 0; return ret;
} }
#define BPF_PROG_DETACH_LAST_FIELD attach_type #define BPF_PROG_DETACH_LAST_FIELD attach_type
...@@ -967,6 +974,7 @@ static int bpf_prog_attach(const union bpf_attr *attr) ...@@ -967,6 +974,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
static int bpf_prog_detach(const union bpf_attr *attr) static int bpf_prog_detach(const union bpf_attr *attr)
{ {
struct cgroup *cgrp; struct cgroup *cgrp;
int ret;
if (!capable(CAP_NET_ADMIN)) if (!capable(CAP_NET_ADMIN))
return -EPERM; return -EPERM;
...@@ -982,7 +990,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) ...@@ -982,7 +990,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
if (IS_ERR(cgrp)) if (IS_ERR(cgrp))
return PTR_ERR(cgrp); return PTR_ERR(cgrp);
cgroup_bpf_update(cgrp, NULL, attr->attach_type); ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
cgroup_put(cgrp); cgroup_put(cgrp);
break; break;
...@@ -990,7 +998,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) ...@@ -990,7 +998,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
return -EINVAL; return -EINVAL;
} }
return 0; return ret;
} }
#endif /* CONFIG_CGROUP_BPF */ #endif /* CONFIG_CGROUP_BPF */
......
...@@ -6498,15 +6498,16 @@ static __init int cgroup_namespaces_init(void) ...@@ -6498,15 +6498,16 @@ static __init int cgroup_namespaces_init(void)
subsys_initcall(cgroup_namespaces_init); subsys_initcall(cgroup_namespaces_init);
#ifdef CONFIG_CGROUP_BPF #ifdef CONFIG_CGROUP_BPF
void cgroup_bpf_update(struct cgroup *cgrp, int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_prog *prog, enum bpf_attach_type type, bool overridable)
enum bpf_attach_type type)
{ {
struct cgroup *parent = cgroup_parent(cgrp); struct cgroup *parent = cgroup_parent(cgrp);
int ret;
mutex_lock(&cgroup_mutex); mutex_lock(&cgroup_mutex);
__cgroup_bpf_update(cgrp, parent, prog, type); ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable);
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_mutex);
return ret;
} }
#endif /* CONFIG_CGROUP_BPF */ #endif /* CONFIG_CGROUP_BPF */
......
...@@ -2923,6 +2923,7 @@ static void neigh_proc_update(struct ctl_table *ctl, int write) ...@@ -2923,6 +2923,7 @@ static void neigh_proc_update(struct ctl_table *ctl, int write)
return; return;
set_bit(index, p->data_state); set_bit(index, p->data_state);
if (index == NEIGH_VAR_DELAY_PROBE_TIME)
call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
if (!dev) /* NULL dev means this is default value */ if (!dev) /* NULL dev means this is default value */
neigh_copy_dflt_parms(net, p, index); neigh_copy_dflt_parms(net, p, index);
......
...@@ -1263,7 +1263,7 @@ void __init arp_init(void) ...@@ -1263,7 +1263,7 @@ void __init arp_init(void)
/* /*
* ax25 -> ASCII conversion * ax25 -> ASCII conversion
*/ */
static char *ax2asc2(ax25_address *a, char *buf) static void ax2asc2(ax25_address *a, char *buf)
{ {
char c, *s; char c, *s;
int n; int n;
...@@ -1285,10 +1285,10 @@ static char *ax2asc2(ax25_address *a, char *buf) ...@@ -1285,10 +1285,10 @@ static char *ax2asc2(ax25_address *a, char *buf)
*s++ = n + '0'; *s++ = n + '0';
*s++ = '\0'; *s++ = '\0';
if (*buf == '\0' || *buf == '-') if (*buf == '\0' || *buf == '-') {
return "*"; buf[0] = '*';
buf[1] = '\0';
return buf; }
} }
#endif /* CONFIG_AX25 */ #endif /* CONFIG_AX25 */
...@@ -1322,7 +1322,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, ...@@ -1322,7 +1322,7 @@ static void arp_format_neigh_entry(struct seq_file *seq,
} }
#endif #endif
sprintf(tbuf, "%pI4", n->primary_key); sprintf(tbuf, "%pI4", n->primary_key);
seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", seq_printf(seq, "%-16s 0x%-10x0x%-10x%-17s * %s\n",
tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name); tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name);
read_unlock(&n->lock); read_unlock(&n->lock);
} }
......
...@@ -117,7 +117,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, ...@@ -117,7 +117,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
(fwmark > 0 && skb->mark == fwmark)) && (fwmark > 0 && skb->mark == fwmark)) &&
(full || tp->snd_cwnd != tcp_probe.lastcwnd)) { (full || tp->snd_cwnd != tcp_probe.lastcwnd)) {
spin_lock(&tcp_probe.lock); spin_lock_bh(&tcp_probe.lock);
/* If log fills, just silently drop */ /* If log fills, just silently drop */
if (tcp_probe_avail() > 1) { if (tcp_probe_avail() > 1) {
struct tcp_log *p = tcp_probe.log + tcp_probe.head; struct tcp_log *p = tcp_probe.log + tcp_probe.head;
...@@ -157,7 +157,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, ...@@ -157,7 +157,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1);
} }
tcp_probe.lastcwnd = tp->snd_cwnd; tcp_probe.lastcwnd = tp->snd_cwnd;
spin_unlock(&tcp_probe.lock); spin_unlock_bh(&tcp_probe.lock);
wake_up(&tcp_probe.wait); wake_up(&tcp_probe.wait);
} }
......
...@@ -167,18 +167,22 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, ...@@ -167,18 +167,22 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
if (np->sndflow) if (np->sndflow)
fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
addr_type = ipv6_addr_type(&usin->sin6_addr); if (ipv6_addr_any(&usin->sin6_addr)) {
if (addr_type == IPV6_ADDR_ANY) {
/* /*
* connect to self * connect to self
*/ */
usin->sin6_addr.s6_addr[15] = 0x01; if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
&usin->sin6_addr);
else
usin->sin6_addr = in6addr_loopback;
} }
addr_type = ipv6_addr_type(&usin->sin6_addr);
daddr = &usin->sin6_addr; daddr = &usin->sin6_addr;
if (addr_type == IPV6_ADDR_MAPPED) { if (addr_type & IPV6_ADDR_MAPPED) {
struct sockaddr_in sin; struct sockaddr_in sin;
if (__ipv6_only_sock(sk)) { if (__ipv6_only_sock(sk)) {
......
...@@ -1021,6 +1021,9 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, ...@@ -1021,6 +1021,9 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
} }
} }
#endif #endif
if (ipv6_addr_v4mapped(&fl6->saddr) &&
!(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr)))
return -EAFNOSUPPORT;
return 0; return 0;
......
...@@ -148,8 +148,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ...@@ -148,8 +148,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
* connect() to INADDR_ANY means loopback (BSD'ism). * connect() to INADDR_ANY means loopback (BSD'ism).
*/ */
if (ipv6_addr_any(&usin->sin6_addr)) if (ipv6_addr_any(&usin->sin6_addr)) {
usin->sin6_addr.s6_addr[15] = 0x1; if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
&usin->sin6_addr);
else
usin->sin6_addr = in6addr_loopback;
}
addr_type = ipv6_addr_type(&usin->sin6_addr); addr_type = ipv6_addr_type(&usin->sin6_addr);
...@@ -188,7 +193,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ...@@ -188,7 +193,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
* TCP over IPv4 * TCP over IPv4
*/ */
if (addr_type == IPV6_ADDR_MAPPED) { if (addr_type & IPV6_ADDR_MAPPED) {
u32 exthdrlen = icsk->icsk_ext_hdr_len; u32 exthdrlen = icsk->icsk_ext_hdr_len;
struct sockaddr_in sin; struct sockaddr_in sin;
......
...@@ -1033,6 +1033,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ...@@ -1033,6 +1033,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (addr_len < SIN6_LEN_RFC2133) if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL; return -EINVAL;
daddr = &sin6->sin6_addr; daddr = &sin6->sin6_addr;
if (ipv6_addr_any(daddr) &&
ipv6_addr_v4mapped(&np->saddr))
ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
daddr);
break; break;
case AF_INET: case AF_INET:
goto do_udp_sendmsg; goto do_udp_sendmsg;
......
...@@ -1044,9 +1044,11 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) ...@@ -1044,9 +1044,11 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
} else { } else {
/* Message not complete, save state */ /* Message not complete, save state */
partial_message: partial_message:
if (head) {
kcm->seq_skb = head; kcm->seq_skb = head;
kcm_tx_msg(head)->last_skb = skb; kcm_tx_msg(head)->last_skb = skb;
} }
}
KCM_STATS_ADD(kcm->stats.tx_bytes, copied); KCM_STATS_ADD(kcm->stats.tx_bytes, copied);
......
...@@ -821,7 +821,10 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) ...@@ -821,7 +821,10 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
* another trick required to cope with how the PROCOM state * another trick required to cope with how the PROCOM state
* machine works. -acme * machine works. -acme
*/ */
skb_orphan(skb);
sock_hold(sk);
skb->sk = sk; skb->sk = sk;
skb->destructor = sock_efree;
} }
if (!sock_owned_by_user(sk)) if (!sock_owned_by_user(sk))
llc_conn_rcv(sk, skb); llc_conn_rcv(sk, skb);
......
...@@ -290,7 +290,10 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb, ...@@ -290,7 +290,10 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb,
ev->type = LLC_SAP_EV_TYPE_PDU; ev->type = LLC_SAP_EV_TYPE_PDU;
ev->reason = 0; ev->reason = 0;
skb_orphan(skb);
sock_hold(sk);
skb->sk = sk; skb->sk = sk;
skb->destructor = sock_efree;
llc_sap_state_process(sap, skb); llc_sap_state_process(sap, skb);
} }
......
...@@ -1619,6 +1619,7 @@ static void fanout_release_data(struct packet_fanout *f) ...@@ -1619,6 +1619,7 @@ static void fanout_release_data(struct packet_fanout *f)
static int fanout_add(struct sock *sk, u16 id, u16 type_flags) static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
{ {
struct packet_rollover *rollover = NULL;
struct packet_sock *po = pkt_sk(sk); struct packet_sock *po = pkt_sk(sk);
struct packet_fanout *f, *match; struct packet_fanout *f, *match;
u8 type = type_flags & 0xff; u8 type = type_flags & 0xff;
...@@ -1641,23 +1642,28 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) ...@@ -1641,23 +1642,28 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
return -EINVAL; return -EINVAL;
} }
mutex_lock(&fanout_mutex);
err = -EINVAL;
if (!po->running) if (!po->running)
return -EINVAL; goto out;
err = -EALREADY;
if (po->fanout) if (po->fanout)
return -EALREADY; goto out;
if (type == PACKET_FANOUT_ROLLOVER || if (type == PACKET_FANOUT_ROLLOVER ||
(type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) { (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) {
po->rollover = kzalloc(sizeof(*po->rollover), GFP_KERNEL); err = -ENOMEM;
if (!po->rollover) rollover = kzalloc(sizeof(*rollover), GFP_KERNEL);
return -ENOMEM; if (!rollover)
atomic_long_set(&po->rollover->num, 0); goto out;
atomic_long_set(&po->rollover->num_huge, 0); atomic_long_set(&rollover->num, 0);
atomic_long_set(&po->rollover->num_failed, 0); atomic_long_set(&rollover->num_huge, 0);
atomic_long_set(&rollover->num_failed, 0);
po->rollover = rollover;
} }
mutex_lock(&fanout_mutex);
match = NULL; match = NULL;
list_for_each_entry(f, &fanout_list, list) { list_for_each_entry(f, &fanout_list, list) {
if (f->id == id && if (f->id == id &&
...@@ -1704,11 +1710,11 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) ...@@ -1704,11 +1710,11 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
} }
} }
out: out:
mutex_unlock(&fanout_mutex); if (err && rollover) {
if (err) { kfree(rollover);
kfree(po->rollover);
po->rollover = NULL; po->rollover = NULL;
} }
mutex_unlock(&fanout_mutex);
return err; return err;
} }
...@@ -1717,11 +1723,9 @@ static void fanout_release(struct sock *sk) ...@@ -1717,11 +1723,9 @@ static void fanout_release(struct sock *sk)
struct packet_sock *po = pkt_sk(sk); struct packet_sock *po = pkt_sk(sk);
struct packet_fanout *f; struct packet_fanout *f;
f = po->fanout;
if (!f)
return;
mutex_lock(&fanout_mutex); mutex_lock(&fanout_mutex);
f = po->fanout;
if (f) {
po->fanout = NULL; po->fanout = NULL;
if (atomic_dec_and_test(&f->sk_ref)) { if (atomic_dec_and_test(&f->sk_ref)) {
...@@ -1730,10 +1734,11 @@ static void fanout_release(struct sock *sk) ...@@ -1730,10 +1734,11 @@ static void fanout_release(struct sock *sk)
fanout_release_data(f); fanout_release_data(f);
kfree(f); kfree(f);
} }
mutex_unlock(&fanout_mutex);
if (po->rollover) if (po->rollover)
kfree_rcu(po->rollover, rcu); kfree_rcu(po->rollover, rcu);
}
mutex_unlock(&fanout_mutex);
} }
static bool packet_extra_vlan_len_allowed(const struct net_device *dev, static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
......
...@@ -104,7 +104,7 @@ static int attach_filter(int cg_fd, int type, int verdict) ...@@ -104,7 +104,7 @@ static int attach_filter(int cg_fd, int type, int verdict)
return EXIT_FAILURE; return EXIT_FAILURE;
} }
ret = bpf_prog_attach(prog_fd, cg_fd, type); ret = bpf_prog_attach(prog_fd, cg_fd, type, 0);
if (ret < 0) { if (ret < 0) {
printf("Failed to attach prog to cgroup: '%s'\n", printf("Failed to attach prog to cgroup: '%s'\n",
strerror(errno)); strerror(errno));
......
...@@ -79,11 +79,12 @@ int main(int argc, char **argv) ...@@ -79,11 +79,12 @@ int main(int argc, char **argv)
if (join_cgroup(FOO)) if (join_cgroup(FOO))
goto err; goto err;
if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS)) { if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) {
log_err("Attaching prog to /foo"); log_err("Attaching prog to /foo");
goto err; goto err;
} }
printf("Attached DROP prog. This ping in cgroup /foo should fail...\n");
assert(system(PING_CMD) != 0); assert(system(PING_CMD) != 0);
/* Create cgroup /foo/bar, get fd, and join it */ /* Create cgroup /foo/bar, get fd, and join it */
...@@ -94,24 +95,27 @@ int main(int argc, char **argv) ...@@ -94,24 +95,27 @@ int main(int argc, char **argv)
if (join_cgroup(BAR)) if (join_cgroup(BAR))
goto err; goto err;
printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
assert(system(PING_CMD) != 0); assert(system(PING_CMD) != 0);
if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) { if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
log_err("Attaching prog to /foo/bar"); log_err("Attaching prog to /foo/bar");
goto err; goto err;
} }
printf("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
assert(system(PING_CMD) == 0); assert(system(PING_CMD) == 0);
if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
log_err("Detaching program from /foo/bar"); log_err("Detaching program from /foo/bar");
goto err; goto err;
} }
printf("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
"This ping in cgroup /foo/bar should fail...\n");
assert(system(PING_CMD) != 0); assert(system(PING_CMD) != 0);
if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) { if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
log_err("Attaching prog to /foo/bar"); log_err("Attaching prog to /foo/bar");
goto err; goto err;
} }
...@@ -121,8 +125,60 @@ int main(int argc, char **argv) ...@@ -121,8 +125,60 @@ int main(int argc, char **argv)
goto err; goto err;
} }
printf("Attached PASS from /foo/bar and detached DROP from /foo.\n"
"This ping in cgroup /foo/bar should pass...\n");
assert(system(PING_CMD) == 0); assert(system(PING_CMD) == 0);
if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
log_err("Attaching prog to /foo/bar");
goto err;
}
if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
errno = 0;
log_err("Unexpected success attaching prog to /foo/bar");
goto err;
}
if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
log_err("Detaching program from /foo/bar");
goto err;
}
if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
errno = 0;
log_err("Unexpected success in double detach from /foo");
goto err;
}
if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
log_err("Attaching non-overridable prog to /foo");
goto err;
}
if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
errno = 0;
log_err("Unexpected success attaching non-overridable prog to /foo/bar");
goto err;
}
if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
errno = 0;
log_err("Unexpected success attaching overridable prog to /foo/bar");
goto err;
}
if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) {
errno = 0;
log_err("Unexpected success attaching overridable prog to /foo");
goto err;
}
if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
log_err("Attaching different non-overridable prog to /foo");
goto err;
}
goto out; goto out;
err: err:
...@@ -132,5 +188,9 @@ int main(int argc, char **argv) ...@@ -132,5 +188,9 @@ int main(int argc, char **argv)
close(foo); close(foo);
close(bar); close(bar);
cleanup_cgroup_environment(); cleanup_cgroup_environment();
if (!rc)
printf("PASS\n");
else
printf("FAIL\n");
return rc; return rc;
} }
...@@ -75,7 +75,7 @@ int main(int argc, char **argv) ...@@ -75,7 +75,7 @@ int main(int argc, char **argv)
return EXIT_FAILURE; return EXIT_FAILURE;
} }
ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE); ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE, 0);
if (ret < 0) { if (ret < 0) {
printf("Failed to attach prog to cgroup: '%s'\n", printf("Failed to attach prog to cgroup: '%s'\n",
strerror(errno)); strerror(errno));
......
...@@ -55,7 +55,7 @@ int main(int argc, char **argv) ...@@ -55,7 +55,7 @@ int main(int argc, char **argv)
} }
ret = bpf_prog_attach(prog_fd[filter_id], cg_fd, ret = bpf_prog_attach(prog_fd[filter_id], cg_fd,
BPF_CGROUP_INET_SOCK_CREATE); BPF_CGROUP_INET_SOCK_CREATE, 0);
if (ret < 0) { if (ret < 0) {
printf("Failed to attach prog to cgroup: '%s'\n", printf("Failed to attach prog to cgroup: '%s'\n",
strerror(errno)); strerror(errno));
......
...@@ -116,6 +116,12 @@ enum bpf_attach_type { ...@@ -116,6 +116,12 @@ enum bpf_attach_type {
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
* to the given target_fd cgroup the descendent cgroup will be able to
* override effective bpf program that was inherited from this cgroup
*/
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
#define BPF_PSEUDO_MAP_FD 1 #define BPF_PSEUDO_MAP_FD 1
/* flags for BPF_MAP_UPDATE_ELEM command */ /* flags for BPF_MAP_UPDATE_ELEM command */
...@@ -171,6 +177,7 @@ union bpf_attr { ...@@ -171,6 +177,7 @@ union bpf_attr {
__u32 target_fd; /* container object to attach to */ __u32 target_fd; /* container object to attach to */
__u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type; __u32 attach_type;
__u32 attach_flags;
}; };
} __attribute__((aligned(8))); } __attribute__((aligned(8)));
......
...@@ -168,7 +168,8 @@ int bpf_obj_get(const char *pathname) ...@@ -168,7 +168,8 @@ int bpf_obj_get(const char *pathname)
return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
} }
int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
unsigned int flags)
{ {
union bpf_attr attr; union bpf_attr attr;
...@@ -176,6 +177,7 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) ...@@ -176,6 +177,7 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type)
attr.target_fd = target_fd; attr.target_fd = target_fd;
attr.attach_bpf_fd = prog_fd; attr.attach_bpf_fd = prog_fd;
attr.attach_type = type; attr.attach_type = type;
attr.attach_flags = flags;
return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
} }
......
...@@ -41,7 +41,8 @@ int bpf_map_delete_elem(int fd, void *key); ...@@ -41,7 +41,8 @@ int bpf_map_delete_elem(int fd, void *key);
int bpf_map_get_next_key(int fd, void *key, void *next_key); int bpf_map_get_next_key(int fd, void *key, void *next_key);
int bpf_obj_pin(int fd, const char *pathname); int bpf_obj_pin(int fd, const char *pathname);
int bpf_obj_get(const char *pathname); int bpf_obj_get(const char *pathname);
int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type); int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type,
unsigned int flags);
int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment