Commit d518d2ed authored by Paolo Abeni's avatar Paolo Abeni Committed by David S. Miller

net/sched: fix race between deactivation and dequeue for NOLOCK qdisc

The test implemented by some_qdisc_is_busy() is somewhat loosy for
NOLOCK qdisc, as we may hit the following scenario:

CPU1						CPU2
// in net_tx_action()
clear_bit(__QDISC_STATE_SCHED...);
						// in some_qdisc_is_busy()
						val = (qdisc_is_running(q) ||
						       test_bit(__QDISC_STATE_SCHED,
								&q->state));
						// here val is 0 but...
qdisc_run(q)
// ... CPU1 is going to run the qdisc next

As a conseguence qdisc_run() in net_tx_action() can race with qdisc_reset()
in dev_qdisc_reset(). Such race is not possible for !NOLOCK qdisc as
both the above bit operations are under the root qdisc lock().

After commit 021a17ed ("pfifo_fast: drop unneeded additional lock on dequeue")
the race can cause use after free and/or null ptr dereference, but the root
cause is likely older.

This patch addresses the issue explicitly checking for deactivation under
the seqlock for NOLOCK qdisc, so that the qdisc_run() in the critical
scenario becomes a no-op.

Note that the enqueue() op can still execute concurrently with dev_qdisc_reset(),
but that is safe due to the skb_array() locking, and we can't avoid that
for NOLOCK qdiscs.

Fixes: 021a17ed ("pfifo_fast: drop unneeded additional lock on dequeue")
Reported-by: default avatarLi Shuang <shuali@redhat.com>
Reported-and-tested-by: default avatarDavide Caratti <dcaratti@redhat.com>
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 1609d760
...@@ -118,7 +118,12 @@ void __qdisc_run(struct Qdisc *q); ...@@ -118,7 +118,12 @@ void __qdisc_run(struct Qdisc *q);
static inline void qdisc_run(struct Qdisc *q) static inline void qdisc_run(struct Qdisc *q)
{ {
if (qdisc_run_begin(q)) { if (qdisc_run_begin(q)) {
__qdisc_run(q); /* NOLOCK qdisc must check 'state' under the qdisc seqlock
* to avoid racing with dev_qdisc_reset()
*/
if (!(q->flags & TCQ_F_NOLOCK) ||
likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
__qdisc_run(q);
qdisc_run_end(q); qdisc_run_end(q);
} }
} }
......
...@@ -3467,18 +3467,22 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, ...@@ -3467,18 +3467,22 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q); qdisc_calculate_pkt_len(skb, q);
if (q->flags & TCQ_F_NOLOCK) { if (q->flags & TCQ_F_NOLOCK) {
if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty &&
__qdisc_drop(skb, &to_free); qdisc_run_begin(q)) {
rc = NET_XMIT_DROP; if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
} else if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty && &q->state))) {
qdisc_run_begin(q)) { __qdisc_drop(skb, &to_free);
rc = NET_XMIT_DROP;
goto end_run;
}
qdisc_bstats_cpu_update(q, skb); qdisc_bstats_cpu_update(q, skb);
rc = NET_XMIT_SUCCESS;
if (sch_direct_xmit(skb, q, dev, txq, NULL, true)) if (sch_direct_xmit(skb, q, dev, txq, NULL, true))
__qdisc_run(q); __qdisc_run(q);
end_run:
qdisc_run_end(q); qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
} else { } else {
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
qdisc_run(q); qdisc_run(q);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment