Commit 55a93b3e authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

qdisc: validate skb without holding lock

Validation of skb can be pretty expensive :

GSO segmentation and/or checksum computations.

We can do this without holding qdisc lock, so that other cpus
can queue additional packets.

Trick is that requeued packets were already validated, so we carry
a boolean so that sch_direct_xmit() can validate a fresh skb list,
or directly use an old one.

Tested on 40Gb NIC (8 TX queues) and 200 concurrent flows, 48 threads
host.

Turning TSO on or off had no effect on throughput, only few more cpu
cycles. Lock contention on qdisc lock disappeared.

Same if disabling TX checksum offload.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 6a05880a
...@@ -2821,7 +2821,7 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *); ...@@ -2821,7 +2821,7 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *);
int dev_change_carrier(struct net_device *, bool new_carrier); int dev_change_carrier(struct net_device *, bool new_carrier);
int dev_get_phys_port_id(struct net_device *dev, int dev_get_phys_port_id(struct net_device *dev,
struct netdev_phys_port_id *ppid); struct netdev_phys_port_id *ppid);
struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, int *ret); struct netdev_queue *txq, int *ret);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
......
...@@ -99,7 +99,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab); ...@@ -99,7 +99,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab);
void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc); void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc);
int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev, struct netdev_queue *txq, struct net_device *dev, struct netdev_queue *txq,
spinlock_t *root_lock); spinlock_t *root_lock, bool validate);
void __qdisc_run(struct Qdisc *q); void __qdisc_run(struct Qdisc *q);
......
...@@ -2655,7 +2655,7 @@ struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t featur ...@@ -2655,7 +2655,7 @@ struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t featur
return skb; return skb;
} }
struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
{ {
netdev_features_t features; netdev_features_t features;
...@@ -2720,6 +2720,30 @@ struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) ...@@ -2720,6 +2720,30 @@ struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
return NULL; return NULL;
} }
struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev)
{
struct sk_buff *next, *head = NULL, *tail;
while (skb) {
next = skb->next;
skb->next = NULL;
skb = validate_xmit_skb(skb, dev);
if (skb) {
struct sk_buff *end = skb;
while (end->next)
end = end->next;
if (!head)
head = skb;
else
tail->next = skb;
tail = end;
}
skb = next;
}
return head;
}
static void qdisc_pkt_len_init(struct sk_buff *skb) static void qdisc_pkt_len_init(struct sk_buff *skb)
{ {
const struct skb_shared_info *shinfo = skb_shinfo(skb); const struct skb_shared_info *shinfo = skb_shinfo(skb);
...@@ -2786,8 +2810,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, ...@@ -2786,8 +2810,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_bstats_update(q, skb); qdisc_bstats_update(q, skb);
skb = validate_xmit_skb(skb, dev); if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
if (skb && sch_direct_xmit(skb, q, dev, txq, root_lock)) {
if (unlikely(contended)) { if (unlikely(contended)) {
spin_unlock(&q->busylock); spin_unlock(&q->busylock);
contended = false; contended = false;
......
...@@ -56,40 +56,34 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) ...@@ -56,40 +56,34 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
return 0; return 0;
} }
static struct sk_buff *try_bulk_dequeue_skb(struct Qdisc *q, static void try_bulk_dequeue_skb(struct Qdisc *q,
struct sk_buff *head_skb, struct sk_buff *skb,
int bytelimit) const struct netdev_queue *txq)
{ {
struct sk_buff *skb, *tail_skb = head_skb; int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
while (bytelimit > 0) { while (bytelimit > 0) {
skb = q->dequeue(q); struct sk_buff *nskb = q->dequeue(q);
if (!skb)
break;
bytelimit -= skb->len; /* covers GSO len */ if (!nskb)
skb = validate_xmit_skb(skb, qdisc_dev(q));
if (!skb)
break; break;
while (tail_skb->next) /* GSO list goto tail */ bytelimit -= nskb->len; /* covers GSO len */
tail_skb = tail_skb->next; skb->next = nskb;
skb = nskb;
tail_skb->next = skb;
tail_skb = skb;
} }
skb->next = NULL;
return head_skb;
} }
/* Note that dequeue_skb can possibly return a SKB list (via skb->next). /* Note that dequeue_skb can possibly return a SKB list (via skb->next).
* A requeued skb (via q->gso_skb) can also be a SKB list. * A requeued skb (via q->gso_skb) can also be a SKB list.
*/ */
static inline struct sk_buff *dequeue_skb(struct Qdisc *q) static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate)
{ {
struct sk_buff *skb = q->gso_skb; struct sk_buff *skb = q->gso_skb;
const struct netdev_queue *txq = q->dev_queue; const struct netdev_queue *txq = q->dev_queue;
*validate = true;
if (unlikely(skb)) { if (unlikely(skb)) {
/* check the reason of requeuing without tx lock first */ /* check the reason of requeuing without tx lock first */
txq = skb_get_tx_queue(txq->dev, skb); txq = skb_get_tx_queue(txq->dev, skb);
...@@ -98,21 +92,16 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) ...@@ -98,21 +92,16 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
q->q.qlen--; q->q.qlen--;
} else } else
skb = NULL; skb = NULL;
/* skb in gso_skb were already validated */
*validate = false;
} else { } else {
if (!(q->flags & TCQ_F_ONETXQUEUE) || if (!(q->flags & TCQ_F_ONETXQUEUE) ||
!netif_xmit_frozen_or_stopped(txq)) { !netif_xmit_frozen_or_stopped(txq)) {
int bytelimit = qdisc_avail_bulklimit(txq);
skb = q->dequeue(q); skb = q->dequeue(q);
if (skb) {
bytelimit -= skb->len;
skb = validate_xmit_skb(skb, qdisc_dev(q));
}
if (skb && qdisc_may_bulk(q)) if (skb && qdisc_may_bulk(q))
skb = try_bulk_dequeue_skb(q, skb, bytelimit); try_bulk_dequeue_skb(q, skb, txq);
} }
} }
return skb; return skb;
} }
...@@ -156,19 +145,24 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, ...@@ -156,19 +145,24 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
*/ */
int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev, struct netdev_queue *txq, struct net_device *dev, struct netdev_queue *txq,
spinlock_t *root_lock) spinlock_t *root_lock, bool validate)
{ {
int ret = NETDEV_TX_BUSY; int ret = NETDEV_TX_BUSY;
/* And release qdisc */ /* And release qdisc */
spin_unlock(root_lock); spin_unlock(root_lock);
HARD_TX_LOCK(dev, txq, smp_processor_id()); /* Note that we validate skb (GSO, checksum, ...) outside of locks */
if (!netif_xmit_frozen_or_stopped(txq)) if (validate)
skb = dev_hard_start_xmit(skb, dev, txq, &ret); skb = validate_xmit_skb_list(skb, dev);
HARD_TX_UNLOCK(dev, txq); if (skb) {
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_stopped(txq))
skb = dev_hard_start_xmit(skb, dev, txq, &ret);
HARD_TX_UNLOCK(dev, txq);
}
spin_lock(root_lock); spin_lock(root_lock);
if (dev_xmit_complete(ret)) { if (dev_xmit_complete(ret)) {
...@@ -217,9 +211,10 @@ static inline int qdisc_restart(struct Qdisc *q) ...@@ -217,9 +211,10 @@ static inline int qdisc_restart(struct Qdisc *q)
struct net_device *dev; struct net_device *dev;
spinlock_t *root_lock; spinlock_t *root_lock;
struct sk_buff *skb; struct sk_buff *skb;
bool validate;
/* Dequeue packet */ /* Dequeue packet */
skb = dequeue_skb(q); skb = dequeue_skb(q, &validate);
if (unlikely(!skb)) if (unlikely(!skb))
return 0; return 0;
...@@ -229,7 +224,7 @@ static inline int qdisc_restart(struct Qdisc *q) ...@@ -229,7 +224,7 @@ static inline int qdisc_restart(struct Qdisc *q)
dev = qdisc_dev(q); dev = qdisc_dev(q);
txq = skb_get_tx_queue(dev, skb); txq = skb_get_tx_queue(dev, skb);
return sch_direct_xmit(skb, q, dev, txq, root_lock); return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
} }
void __qdisc_run(struct Qdisc *q) void __qdisc_run(struct Qdisc *q)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment