Commit 0b6f164d authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'generic-tx-reallocation-for-dsa'

Vladimir Oltean says:

====================
Generic TX reallocation for DSA

Christian has reported buggy usage of skb_put() in tag_ksz.c, which is
only triggerable in real life using his not-yet-published patches for
IEEE 1588 timestamping on Micrel KSZ switches.

The concrete problem there is that the driver can end up calling
skb_put() and exceed the end of the skb data area, because even though
it had reallocated the frame once before, it hadn't reallocated it large
enough. Christian explained it in more detail here:

https://lore.kernel.org/netdev/20201014161719.30289-1-ceggers@arri.de/
https://lore.kernel.org/netdev/20201016200226.23994-1-ceggers@arri.de/

But actually there's a bigger problem, which is that some taggers which
get more rarely tested tend to do some shenanigans which are uncaught
for the longest time, and in the meanwhile, their code gets copy-pasted
into other taggers, creating a mess. For example, the tail tagging
driver for Marvell 88E6060 currently reallocates _every_single_frame_ on
TX. Is that an obvious indication that nobody is using it? Sure. Is it a
good model to follow when developing a new tail tagging driver? No.

DSA has all the information it needs in order to simplify the job of a
tagger on TX. It knows whether it's a normal or a tail tagger, and what
is the protocol overhead it incurs. So this series performs the
reallocation centrally.

Changes in v3:
- Use dev_kfree_skb_any due to potential hardirq context in xmit path.

Changes in v2:
- Dropped the tx_realloc counters for now, since the patch was pretty
  controversial and I lack the time at the moment to introduce new UAPI
  for that.
- Do padding for tail taggers irrespective of whether they need to
  reallocate the skb or not.
====================

Link: https://lore.kernel.org/r/20201101191620.589272-1-vladimir.oltean@nxp.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 92f9e238 86c4ad9a
......@@ -548,6 +548,30 @@ netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev)
}
EXPORT_SYMBOL_GPL(dsa_enqueue_skb);
static int dsa_realloc_skb(struct sk_buff *skb, struct net_device *dev)
{
int needed_headroom = dev->needed_headroom;
int needed_tailroom = dev->needed_tailroom;
/* For tail taggers, we need to pad short frames ourselves, to ensure
* that the tail tag does not fail at its role of being at the end of
* the packet, once the master interface pads the frame. Account for
* that pad length here, and pad later.
*/
if (unlikely(needed_tailroom && skb->len < ETH_ZLEN))
needed_tailroom += ETH_ZLEN - skb->len;
/* skb_headroom() returns unsigned int... */
needed_headroom = max_t(int, needed_headroom - skb_headroom(skb), 0);
needed_tailroom = max_t(int, needed_tailroom - skb_tailroom(skb), 0);
if (likely(!needed_headroom && !needed_tailroom && !skb_cloned(skb)))
/* No reallocation needed, yay! */
return 0;
return pskb_expand_head(skb, needed_headroom, needed_tailroom,
GFP_ATOMIC);
}
static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
......@@ -567,6 +591,17 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
*/
dsa_skb_tx_timestamp(p, skb);
if (dsa_realloc_skb(skb, dev)) {
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
/* needed_tailroom should still be 'warm' in the cache line from
* dsa_realloc_skb(), which has also ensured that padding is safe.
*/
if (dev->needed_tailroom)
eth_skb_pad(skb);
/* Transmit function may have to reallocate the original SKB,
* in which case it must have freed it. Only free it here on error.
*/
......@@ -1791,6 +1826,16 @@ int dsa_slave_create(struct dsa_port *port)
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
if (ds->ops->port_max_mtu)
slave_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index);
if (cpu_dp->tag_ops->tail_tag)
slave_dev->needed_tailroom = cpu_dp->tag_ops->overhead;
else
slave_dev->needed_headroom = cpu_dp->tag_ops->overhead;
/* Try to save one extra realloc later in the TX path (in the master)
* by also inheriting the master's needed headroom and tailroom.
* The 8021q driver also does this.
*/
slave_dev->needed_headroom += master->needed_headroom;
slave_dev->needed_tailroom += master->needed_tailroom;
SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
......
......@@ -31,9 +31,6 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb,
__le16 *phdr;
u16 hdr;
if (skb_cow_head(skb, AR9331_HDR_LEN) < 0)
return NULL;
phdr = skb_push(skb, AR9331_HDR_LEN);
hdr = FIELD_PREP(AR9331_HDR_VERSION_MASK, AR9331_HDR_VERSION);
......
......@@ -66,9 +66,6 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
u16 queue = skb_get_queue_mapping(skb);
u8 *brcm_tag;
if (skb_cow_head(skb, BRCM_TAG_LEN) < 0)
return NULL;
/* The Ethernet switch we are interfaced with needs packets to be at
* least 64 bytes (including FCS) otherwise they will be discarded when
* they enter the switch port logic. When Broadcom tags are enabled, we
......
......@@ -23,9 +23,6 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
* the ethertype field for untagged packets.
*/
if (skb->protocol == htons(ETH_P_8021Q)) {
if (skb_cow_head(skb, 0) < 0)
return NULL;
/*
* Construct tagged FROM_CPU DSA tag from 802.1q tag.
*/
......@@ -41,8 +38,6 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
dsa_header[2] &= ~0x10;
}
} else {
if (skb_cow_head(skb, DSA_HLEN) < 0)
return NULL;
skb_push(skb, DSA_HLEN);
memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN);
......
......@@ -35,8 +35,6 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
* current ethertype field if the packet is untagged.
*/
if (skb->protocol == htons(ETH_P_8021Q)) {
if (skb_cow_head(skb, DSA_HLEN) < 0)
return NULL;
skb_push(skb, DSA_HLEN);
memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN);
......@@ -60,8 +58,6 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
edsa_header[6] &= ~0x10;
}
} else {
if (skb_cow_head(skb, EDSA_HLEN) < 0)
return NULL;
skb_push(skb, EDSA_HLEN);
memmove(skb->data, skb->data + EDSA_HLEN, 2 * ETH_ALEN);
......
......@@ -60,13 +60,8 @@ static struct sk_buff *gswip_tag_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
u8 *gswip_tag;
err = skb_cow_head(skb, GSWIP_TX_HEADER_LEN);
if (err)
return NULL;
skb_push(skb, GSWIP_TX_HEADER_LEN);
gswip_tag = skb->data;
......
......@@ -14,46 +14,6 @@
#define KSZ_EGRESS_TAG_LEN 1
#define KSZ_INGRESS_TAG_LEN 1
static struct sk_buff *ksz_common_xmit(struct sk_buff *skb,
struct net_device *dev, int len)
{
struct sk_buff *nskb;
int padlen;
padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len;
if (skb_tailroom(skb) >= padlen + len) {
/* Let dsa_slave_xmit() free skb */
if (__skb_put_padto(skb, skb->len + padlen, false))
return NULL;
nskb = skb;
} else {
nskb = alloc_skb(NET_IP_ALIGN + skb->len +
padlen + len, GFP_ATOMIC);
if (!nskb)
return NULL;
skb_reserve(nskb, NET_IP_ALIGN);
skb_reset_mac_header(nskb);
skb_set_network_header(nskb,
skb_network_header(skb) - skb->head);
skb_set_transport_header(nskb,
skb_transport_header(skb) - skb->head);
skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len));
/* Let skb_put_padto() free nskb, and let dsa_slave_xmit() free
* skb
*/
if (skb_put_padto(nskb, nskb->len + padlen))
return NULL;
consume_skb(skb);
}
return nskb;
}
static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
struct net_device *dev,
unsigned int port, unsigned int len)
......@@ -90,23 +50,18 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
struct sk_buff *nskb;
u8 *tag;
u8 *addr;
nskb = ksz_common_xmit(skb, dev, KSZ_INGRESS_TAG_LEN);
if (!nskb)
return NULL;
/* Tag encoding */
tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
addr = skb_mac_header(nskb);
tag = skb_put(skb, KSZ_INGRESS_TAG_LEN);
addr = skb_mac_header(skb);
*tag = 1 << dp->index;
if (is_link_local_ether_addr(addr))
*tag |= KSZ8795_TAIL_TAG_OVERRIDE;
return nskb;
return skb;
}
static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev,
......@@ -156,18 +111,13 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
struct sk_buff *nskb;
__be16 *tag;
u8 *addr;
u16 val;
nskb = ksz_common_xmit(skb, dev, KSZ9477_INGRESS_TAG_LEN);
if (!nskb)
return NULL;
/* Tag encoding */
tag = skb_put(nskb, KSZ9477_INGRESS_TAG_LEN);
addr = skb_mac_header(nskb);
tag = skb_put(skb, KSZ9477_INGRESS_TAG_LEN);
addr = skb_mac_header(skb);
val = BIT(dp->index);
......@@ -176,7 +126,7 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
*tag = cpu_to_be16(val);
return nskb;
return skb;
}
static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev,
......@@ -213,24 +163,19 @@ static struct sk_buff *ksz9893_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
struct sk_buff *nskb;
u8 *addr;
u8 *tag;
nskb = ksz_common_xmit(skb, dev, KSZ_INGRESS_TAG_LEN);
if (!nskb)
return NULL;
/* Tag encoding */
tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
addr = skb_mac_header(nskb);
tag = skb_put(skb, KSZ_INGRESS_TAG_LEN);
addr = skb_mac_header(skb);
*tag = BIT(dp->index);
if (is_link_local_ether_addr(addr))
*tag |= KSZ9893_TAIL_TAG_OVERRIDE;
return nskb;
return skb;
}
static const struct dsa_device_ops ksz9893_netdev_ops = {
......
......@@ -58,15 +58,6 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
__be16 *lan9303_tag;
u16 tag;
/* insert a special VLAN tag between the MAC addresses
* and the current ethertype field.
*/
if (skb_cow_head(skb, LAN9303_TAG_LEN) < 0) {
dev_dbg(&dev->dev,
"Cannot make room for the special tag. Dropping packet\n");
return NULL;
}
/* provide 'LAN9303_TAG_LEN' bytes additional space */
skb_push(skb, LAN9303_TAG_LEN);
......
......@@ -34,9 +34,6 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
* table with VID.
*/
if (!skb_vlan_tagged(skb)) {
if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
return NULL;
skb_push(skb, MTK_HDR_LEN);
memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
is_vlan_skb = false;
......
......@@ -143,13 +143,6 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
struct ocelot_port *ocelot_port;
u8 *prefix, *injection;
u64 qos_class, rew_op;
int err;
err = skb_cow_head(skb, OCELOT_TOTAL_TAG_LEN);
if (unlikely(err < 0)) {
netdev_err(netdev, "Cannot make room for tag.\n");
return NULL;
}
ocelot_port = ocelot->ports[dp->index];
......
......@@ -34,9 +34,6 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
__be16 *phdr;
u16 hdr;
if (skb_cow_head(skb, QCA_HDR_LEN) < 0)
return NULL;
skb_push(skb, QCA_HDR_LEN);
memmove(skb->data, skb->data + QCA_HDR_LEN, 2 * ETH_ALEN);
......
......@@ -13,42 +13,15 @@
static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
struct sk_buff *nskb;
int padlen;
u8 *trailer;
/*
* We have to make sure that the trailer ends up as the very
* last 4 bytes of the packet. This means that we have to pad
* the packet to the minimum ethernet frame size, if necessary,
* before adding the trailer.
*/
padlen = 0;
if (skb->len < 60)
padlen = 60 - skb->len;
nskb = alloc_skb(NET_IP_ALIGN + skb->len + padlen + 4, GFP_ATOMIC);
if (!nskb)
return NULL;
skb_reserve(nskb, NET_IP_ALIGN);
skb_reset_mac_header(nskb);
skb_set_network_header(nskb, skb_network_header(skb) - skb->head);
skb_set_transport_header(nskb, skb_transport_header(skb) - skb->head);
skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len));
consume_skb(skb);
if (padlen) {
skb_put_zero(nskb, padlen);
}
trailer = skb_put(nskb, 4);
trailer = skb_put(skb, 4);
trailer[0] = 0x80;
trailer[1] = 1 << dp->index;
trailer[2] = 0x10;
trailer[3] = 0x00;
return nskb;
return skb;
}
static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment