Commit a75d1801 authored by David S. Miller's avatar David S. Miller

Merge branch 'veth-XDP-stats-improvement'

Toshiaki Makita says:

====================
veth: XDP stats improvement

ndo_xdp_xmit in veth did not update packet counters as described in [1].
Also, current implementation only updates counters on tx side so rx side
events like XDP_DROP were not collected.
This series implements the missing accounting as well as support for
ethtool per-queue stats in veth.

Patch 1: Update drop counter in ndo_xdp_xmit.
Patch 2: Update packet and byte counters for all XDP path, and drop
         counter on XDP_DROP.
Patch 3: Support per-queue ethtool stats for XDP counters.

Note that counters are maintained on per-queue basis for XDP but not
otherwise (per-cpu and atomic as before). This is because 1) tx path in
veth is essentially lockless so we cannot update per-queue stats on tx,
and 2) rx path is net core routine (process_backlog) which cannot update
per-queue based stats when XDP is disabled. On the other hand there are
real rxqs and napi handlers for veth XDP, so update per-queue stats on
rx for XDP packets, and use them to calculate tx counters as well,
contrary to the existing non-XDP counters.

[1] https://patchwork.ozlabs.org/cover/953071/#1967449
====================
Signed-off-by: default avatarToshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents acad76a5 d397b968
...@@ -37,11 +37,19 @@ ...@@ -37,11 +37,19 @@
#define VETH_XDP_TX BIT(0) #define VETH_XDP_TX BIT(0)
#define VETH_XDP_REDIR BIT(1) #define VETH_XDP_REDIR BIT(1)
struct veth_rq_stats {
u64 xdp_packets;
u64 xdp_bytes;
u64 xdp_drops;
struct u64_stats_sync syncp;
};
struct veth_rq { struct veth_rq {
struct napi_struct xdp_napi; struct napi_struct xdp_napi;
struct net_device *dev; struct net_device *dev;
struct bpf_prog __rcu *xdp_prog; struct bpf_prog __rcu *xdp_prog;
struct xdp_mem_info xdp_mem; struct xdp_mem_info xdp_mem;
struct veth_rq_stats stats;
bool rx_notify_masked; bool rx_notify_masked;
struct ptr_ring xdp_ring; struct ptr_ring xdp_ring;
struct xdp_rxq_info xdp_rxq; struct xdp_rxq_info xdp_rxq;
...@@ -59,6 +67,21 @@ struct veth_priv { ...@@ -59,6 +67,21 @@ struct veth_priv {
* ethtool interface * ethtool interface
*/ */
struct veth_q_stat_desc {
char desc[ETH_GSTRING_LEN];
size_t offset;
};
#define VETH_RQ_STAT(m) offsetof(struct veth_rq_stats, m)
static const struct veth_q_stat_desc veth_rq_stats_desc[] = {
{ "xdp_packets", VETH_RQ_STAT(xdp_packets) },
{ "xdp_bytes", VETH_RQ_STAT(xdp_bytes) },
{ "xdp_drops", VETH_RQ_STAT(xdp_drops) },
};
#define VETH_RQ_STATS_LEN ARRAY_SIZE(veth_rq_stats_desc)
static struct { static struct {
const char string[ETH_GSTRING_LEN]; const char string[ETH_GSTRING_LEN];
} ethtool_stats_keys[] = { } ethtool_stats_keys[] = {
...@@ -83,9 +106,20 @@ static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *inf ...@@ -83,9 +106,20 @@ static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *inf
static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
{ {
char *p = (char *)buf;
int i, j;
switch(stringset) { switch(stringset) {
case ETH_SS_STATS: case ETH_SS_STATS:
memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys)); memcpy(p, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
p += sizeof(ethtool_stats_keys);
for (i = 0; i < dev->real_num_rx_queues; i++) {
for (j = 0; j < VETH_RQ_STATS_LEN; j++) {
snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_%s",
i, veth_rq_stats_desc[j].desc);
p += ETH_GSTRING_LEN;
}
}
break; break;
} }
} }
...@@ -94,7 +128,8 @@ static int veth_get_sset_count(struct net_device *dev, int sset) ...@@ -94,7 +128,8 @@ static int veth_get_sset_count(struct net_device *dev, int sset)
{ {
switch (sset) { switch (sset) {
case ETH_SS_STATS: case ETH_SS_STATS:
return ARRAY_SIZE(ethtool_stats_keys); return ARRAY_SIZE(ethtool_stats_keys) +
VETH_RQ_STATS_LEN * dev->real_num_rx_queues;
default: default:
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
...@@ -105,8 +140,25 @@ static void veth_get_ethtool_stats(struct net_device *dev, ...@@ -105,8 +140,25 @@ static void veth_get_ethtool_stats(struct net_device *dev,
{ {
struct veth_priv *priv = netdev_priv(dev); struct veth_priv *priv = netdev_priv(dev);
struct net_device *peer = rtnl_dereference(priv->peer); struct net_device *peer = rtnl_dereference(priv->peer);
int i, j, idx;
data[0] = peer ? peer->ifindex : 0; data[0] = peer ? peer->ifindex : 0;
idx = 1;
for (i = 0; i < dev->real_num_rx_queues; i++) {
const struct veth_rq_stats *rq_stats = &priv->rq[i].stats;
const void *stats_base = (void *)rq_stats;
unsigned int start;
size_t offset;
do {
start = u64_stats_fetch_begin_irq(&rq_stats->syncp);
for (j = 0; j < VETH_RQ_STATS_LEN; j++) {
offset = veth_rq_stats_desc[j].offset;
data[idx + j] = *(u64 *)(stats_base + offset);
}
} while (u64_stats_fetch_retry_irq(&rq_stats->syncp, start));
idx += VETH_RQ_STATS_LEN;
}
} }
static int veth_get_ts_info(struct net_device *dev, static int veth_get_ts_info(struct net_device *dev,
...@@ -211,12 +263,14 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -211,12 +263,14 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
skb_tx_timestamp(skb); skb_tx_timestamp(skb);
if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) { if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) {
if (!rcv_xdp) {
struct pcpu_lstats *stats = this_cpu_ptr(dev->lstats); struct pcpu_lstats *stats = this_cpu_ptr(dev->lstats);
u64_stats_update_begin(&stats->syncp); u64_stats_update_begin(&stats->syncp);
stats->bytes += length; stats->bytes += length;
stats->packets++; stats->packets++;
u64_stats_update_end(&stats->syncp); u64_stats_update_end(&stats->syncp);
}
} else { } else {
drop: drop:
atomic64_inc(&priv->dropped); atomic64_inc(&priv->dropped);
...@@ -230,7 +284,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -230,7 +284,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
static u64 veth_stats_one(struct pcpu_lstats *result, struct net_device *dev) static u64 veth_stats_tx(struct pcpu_lstats *result, struct net_device *dev)
{ {
struct veth_priv *priv = netdev_priv(dev); struct veth_priv *priv = netdev_priv(dev);
int cpu; int cpu;
...@@ -253,23 +307,58 @@ static u64 veth_stats_one(struct pcpu_lstats *result, struct net_device *dev) ...@@ -253,23 +307,58 @@ static u64 veth_stats_one(struct pcpu_lstats *result, struct net_device *dev)
return atomic64_read(&priv->dropped); return atomic64_read(&priv->dropped);
} }
static void veth_stats_rx(struct veth_rq_stats *result, struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);
int i;
result->xdp_packets = 0;
result->xdp_bytes = 0;
result->xdp_drops = 0;
for (i = 0; i < dev->num_rx_queues; i++) {
struct veth_rq_stats *stats = &priv->rq[i].stats;
u64 packets, bytes, drops;
unsigned int start;
do {
start = u64_stats_fetch_begin_irq(&stats->syncp);
packets = stats->xdp_packets;
bytes = stats->xdp_bytes;
drops = stats->xdp_drops;
} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
result->xdp_packets += packets;
result->xdp_bytes += bytes;
result->xdp_drops += drops;
}
}
static void veth_get_stats64(struct net_device *dev, static void veth_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *tot) struct rtnl_link_stats64 *tot)
{ {
struct veth_priv *priv = netdev_priv(dev); struct veth_priv *priv = netdev_priv(dev);
struct net_device *peer; struct net_device *peer;
struct pcpu_lstats one; struct veth_rq_stats rx;
struct pcpu_lstats tx;
tot->tx_dropped = veth_stats_one(&one, dev); tot->tx_dropped = veth_stats_tx(&tx, dev);
tot->tx_bytes = one.bytes; tot->tx_bytes = tx.bytes;
tot->tx_packets = one.packets; tot->tx_packets = tx.packets;
veth_stats_rx(&rx, dev);
tot->rx_dropped = rx.xdp_drops;
tot->rx_bytes = rx.xdp_bytes;
tot->rx_packets = rx.xdp_packets;
rcu_read_lock(); rcu_read_lock();
peer = rcu_dereference(priv->peer); peer = rcu_dereference(priv->peer);
if (peer) { if (peer) {
tot->rx_dropped = veth_stats_one(&one, peer); tot->rx_dropped += veth_stats_tx(&tx, peer);
tot->rx_bytes = one.bytes; tot->rx_bytes += tx.bytes;
tot->rx_packets = one.packets; tot->rx_packets += tx.packets;
veth_stats_rx(&rx, peer);
tot->tx_bytes += rx.xdp_bytes;
tot->tx_packets += rx.xdp_packets;
} }
rcu_read_unlock(); rcu_read_unlock();
} }
...@@ -308,16 +397,20 @@ static int veth_xdp_xmit(struct net_device *dev, int n, ...@@ -308,16 +397,20 @@ static int veth_xdp_xmit(struct net_device *dev, int n,
{ {
struct veth_priv *rcv_priv, *priv = netdev_priv(dev); struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
struct net_device *rcv; struct net_device *rcv;
int i, ret, drops = n;
unsigned int max_len; unsigned int max_len;
struct veth_rq *rq; struct veth_rq *rq;
int i, drops = 0;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
return -EINVAL; ret = -EINVAL;
goto drop;
}
rcv = rcu_dereference(priv->peer); rcv = rcu_dereference(priv->peer);
if (unlikely(!rcv)) if (unlikely(!rcv)) {
return -ENXIO; ret = -ENXIO;
goto drop;
}
rcv_priv = netdev_priv(rcv); rcv_priv = netdev_priv(rcv);
rq = &rcv_priv->rq[veth_select_rxq(rcv)]; rq = &rcv_priv->rq[veth_select_rxq(rcv)];
...@@ -325,9 +418,12 @@ static int veth_xdp_xmit(struct net_device *dev, int n, ...@@ -325,9 +418,12 @@ static int veth_xdp_xmit(struct net_device *dev, int n,
* side. This means an XDP program is loaded on the peer and the peer * side. This means an XDP program is loaded on the peer and the peer
* device is up. * device is up.
*/ */
if (!rcu_access_pointer(rq->xdp_prog)) if (!rcu_access_pointer(rq->xdp_prog)) {
return -ENXIO; ret = -ENXIO;
goto drop;
}
drops = 0;
max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN; max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN;
spin_lock(&rq->xdp_ring.producer_lock); spin_lock(&rq->xdp_ring.producer_lock);
...@@ -346,7 +442,14 @@ static int veth_xdp_xmit(struct net_device *dev, int n, ...@@ -346,7 +442,14 @@ static int veth_xdp_xmit(struct net_device *dev, int n,
if (flags & XDP_XMIT_FLUSH) if (flags & XDP_XMIT_FLUSH)
__veth_xdp_flush(rq); __veth_xdp_flush(rq);
return n - drops; if (likely(!drops))
return n;
ret = n - drops;
drop:
atomic64_add(drops, &priv->dropped);
return ret;
} }
static void veth_xdp_flush(struct net_device *dev) static void veth_xdp_flush(struct net_device *dev)
...@@ -595,28 +698,42 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb, ...@@ -595,28 +698,42 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb,
static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit) static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit)
{ {
int i, done = 0; int i, done = 0, drops = 0, bytes = 0;
for (i = 0; i < budget; i++) { for (i = 0; i < budget; i++) {
void *ptr = __ptr_ring_consume(&rq->xdp_ring); void *ptr = __ptr_ring_consume(&rq->xdp_ring);
unsigned int xdp_xmit_one = 0;
struct sk_buff *skb; struct sk_buff *skb;
if (!ptr) if (!ptr)
break; break;
if (veth_is_xdp_frame(ptr)) { if (veth_is_xdp_frame(ptr)) {
skb = veth_xdp_rcv_one(rq, veth_ptr_to_xdp(ptr), struct xdp_frame *frame = veth_ptr_to_xdp(ptr);
xdp_xmit);
bytes += frame->len;
skb = veth_xdp_rcv_one(rq, frame, &xdp_xmit_one);
} else { } else {
skb = veth_xdp_rcv_skb(rq, ptr, xdp_xmit); skb = ptr;
bytes += skb->len;
skb = veth_xdp_rcv_skb(rq, skb, &xdp_xmit_one);
} }
*xdp_xmit |= xdp_xmit_one;
if (skb) if (skb)
napi_gro_receive(&rq->xdp_napi, skb); napi_gro_receive(&rq->xdp_napi, skb);
else if (!xdp_xmit_one)
drops++;
done++; done++;
} }
u64_stats_update_begin(&rq->stats.syncp);
rq->stats.xdp_packets += done;
rq->stats.xdp_bytes += bytes;
rq->stats.xdp_drops += drops;
u64_stats_update_end(&rq->stats.syncp);
return done; return done;
} }
...@@ -807,8 +924,10 @@ static int veth_alloc_queues(struct net_device *dev) ...@@ -807,8 +924,10 @@ static int veth_alloc_queues(struct net_device *dev)
if (!priv->rq) if (!priv->rq)
return -ENOMEM; return -ENOMEM;
for (i = 0; i < dev->num_rx_queues; i++) for (i = 0; i < dev->num_rx_queues; i++) {
priv->rq[i].dev = dev; priv->rq[i].dev = dev;
u64_stats_init(&priv->rq[i].stats.syncp);
}
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment