Commit e722db8d authored by Sebastian Andrzej Siewior's avatar Sebastian Andrzej Siewior Committed by David S. Miller

net: dev: Make rps_lock() disable interrupts.

Disabling interrupts and in the RPS case locking input_pkt_queue is
split into local_irq_disable() and optional spin_lock().

This breaks on PREEMPT_RT because the spinlock_t typed lock can not be
acquired with disabled interrupts.
The sections in which the lock is acquired is usually short in a sense that it
is not causing long und unbounded latiencies. One exception is the
skb_flow_limit() invocation which may invoke a BPF program (and may
require sleeping locks).

By moving local_irq_disable() + spin_lock() into rps_lock(), we can keep
interrupts disabled on !PREEMPT_RT and enabled on PREEMPT_RT kernels.
Without RPS on a PREEMPT_RT kernel, the needed synchronisation happens
as part of local_bh_disable() on the local CPU.
____napi_schedule() is only invoked if sd is from the local CPU. Replace
it with __napi_schedule_irqoff() which already disables interrupts on
PREEMPT_RT as needed. Move this call to rps_ipi_queued() and rename the
function to napi_schedule_rps as suggested by Jakub.
Signed-off-by: default avatarSebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: default avatarJakub Kicinski <kuba@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent baebdf48
...@@ -216,18 +216,38 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) ...@@ -216,18 +216,38 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
} }
static inline void rps_lock(struct softnet_data *sd) static inline void rps_lock_irqsave(struct softnet_data *sd,
unsigned long *flags)
{ {
#ifdef CONFIG_RPS if (IS_ENABLED(CONFIG_RPS))
spin_lock(&sd->input_pkt_queue.lock); spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags);
#endif else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_irq_save(*flags);
} }
static inline void rps_unlock(struct softnet_data *sd) static inline void rps_lock_irq_disable(struct softnet_data *sd)
{ {
#ifdef CONFIG_RPS if (IS_ENABLED(CONFIG_RPS))
spin_unlock(&sd->input_pkt_queue.lock); spin_lock_irq(&sd->input_pkt_queue.lock);
#endif else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_irq_disable();
}
static inline void rps_unlock_irq_restore(struct softnet_data *sd,
unsigned long *flags)
{
if (IS_ENABLED(CONFIG_RPS))
spin_unlock_irqrestore(&sd->input_pkt_queue.lock, *flags);
else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_irq_restore(*flags);
}
static inline void rps_unlock_irq_enable(struct softnet_data *sd)
{
if (IS_ENABLED(CONFIG_RPS))
spin_unlock_irq(&sd->input_pkt_queue.lock);
else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_irq_enable();
} }
static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev, static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
...@@ -4456,11 +4476,11 @@ static void rps_trigger_softirq(void *data) ...@@ -4456,11 +4476,11 @@ static void rps_trigger_softirq(void *data)
* If yes, queue it to our IPI list and return 1 * If yes, queue it to our IPI list and return 1
* If no, return 0 * If no, return 0
*/ */
static int rps_ipi_queued(struct softnet_data *sd) static int napi_schedule_rps(struct softnet_data *sd)
{ {
#ifdef CONFIG_RPS
struct softnet_data *mysd = this_cpu_ptr(&softnet_data); struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
#ifdef CONFIG_RPS
if (sd != mysd) { if (sd != mysd) {
sd->rps_ipi_next = mysd->rps_ipi_list; sd->rps_ipi_next = mysd->rps_ipi_list;
mysd->rps_ipi_list = sd; mysd->rps_ipi_list = sd;
...@@ -4469,6 +4489,7 @@ static int rps_ipi_queued(struct softnet_data *sd) ...@@ -4469,6 +4489,7 @@ static int rps_ipi_queued(struct softnet_data *sd)
return 1; return 1;
} }
#endif /* CONFIG_RPS */ #endif /* CONFIG_RPS */
__napi_schedule_irqoff(&mysd->backlog);
return 0; return 0;
} }
...@@ -4525,9 +4546,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, ...@@ -4525,9 +4546,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
sd = &per_cpu(softnet_data, cpu); sd = &per_cpu(softnet_data, cpu);
local_irq_save(flags); rps_lock_irqsave(sd, &flags);
rps_lock(sd);
if (!netif_running(skb->dev)) if (!netif_running(skb->dev))
goto drop; goto drop;
qlen = skb_queue_len(&sd->input_pkt_queue); qlen = skb_queue_len(&sd->input_pkt_queue);
...@@ -4536,26 +4555,21 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, ...@@ -4536,26 +4555,21 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
enqueue: enqueue:
__skb_queue_tail(&sd->input_pkt_queue, skb); __skb_queue_tail(&sd->input_pkt_queue, skb);
input_queue_tail_incr_save(sd, qtail); input_queue_tail_incr_save(sd, qtail);
rps_unlock(sd); rps_unlock_irq_restore(sd, &flags);
local_irq_restore(flags);
return NET_RX_SUCCESS; return NET_RX_SUCCESS;
} }
/* Schedule NAPI for backlog device /* Schedule NAPI for backlog device
* We can use non atomic operation since we own the queue lock * We can use non atomic operation since we own the queue lock
*/ */
if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) { if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state))
if (!rps_ipi_queued(sd)) napi_schedule_rps(sd);
____napi_schedule(sd, &sd->backlog);
}
goto enqueue; goto enqueue;
} }
drop: drop:
sd->dropped++; sd->dropped++;
rps_unlock(sd); rps_unlock_irq_restore(sd, &flags);
local_irq_restore(flags);
atomic_long_inc(&skb->dev->rx_dropped); atomic_long_inc(&skb->dev->rx_dropped);
kfree_skb(skb); kfree_skb(skb);
...@@ -5638,8 +5652,7 @@ static void flush_backlog(struct work_struct *work) ...@@ -5638,8 +5652,7 @@ static void flush_backlog(struct work_struct *work)
local_bh_disable(); local_bh_disable();
sd = this_cpu_ptr(&softnet_data); sd = this_cpu_ptr(&softnet_data);
local_irq_disable(); rps_lock_irq_disable(sd);
rps_lock(sd);
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
if (skb->dev->reg_state == NETREG_UNREGISTERING) { if (skb->dev->reg_state == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->input_pkt_queue); __skb_unlink(skb, &sd->input_pkt_queue);
...@@ -5647,8 +5660,7 @@ static void flush_backlog(struct work_struct *work) ...@@ -5647,8 +5660,7 @@ static void flush_backlog(struct work_struct *work)
input_queue_head_incr(sd); input_queue_head_incr(sd);
} }
} }
rps_unlock(sd); rps_unlock_irq_enable(sd);
local_irq_enable();
skb_queue_walk_safe(&sd->process_queue, skb, tmp) { skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
if (skb->dev->reg_state == NETREG_UNREGISTERING) { if (skb->dev->reg_state == NETREG_UNREGISTERING) {
...@@ -5666,16 +5678,14 @@ static bool flush_required(int cpu) ...@@ -5666,16 +5678,14 @@ static bool flush_required(int cpu)
struct softnet_data *sd = &per_cpu(softnet_data, cpu); struct softnet_data *sd = &per_cpu(softnet_data, cpu);
bool do_flush; bool do_flush;
local_irq_disable(); rps_lock_irq_disable(sd);
rps_lock(sd);
/* as insertion into process_queue happens with the rps lock held, /* as insertion into process_queue happens with the rps lock held,
* process_queue access may race only with dequeue * process_queue access may race only with dequeue
*/ */
do_flush = !skb_queue_empty(&sd->input_pkt_queue) || do_flush = !skb_queue_empty(&sd->input_pkt_queue) ||
!skb_queue_empty_lockless(&sd->process_queue); !skb_queue_empty_lockless(&sd->process_queue);
rps_unlock(sd); rps_unlock_irq_enable(sd);
local_irq_enable();
return do_flush; return do_flush;
#endif #endif
...@@ -5790,8 +5800,7 @@ static int process_backlog(struct napi_struct *napi, int quota) ...@@ -5790,8 +5800,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
} }
local_irq_disable(); rps_lock_irq_disable(sd);
rps_lock(sd);
if (skb_queue_empty(&sd->input_pkt_queue)) { if (skb_queue_empty(&sd->input_pkt_queue)) {
/* /*
* Inline a custom version of __napi_complete(). * Inline a custom version of __napi_complete().
...@@ -5807,8 +5816,7 @@ static int process_backlog(struct napi_struct *napi, int quota) ...@@ -5807,8 +5816,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
skb_queue_splice_tail_init(&sd->input_pkt_queue, skb_queue_splice_tail_init(&sd->input_pkt_queue,
&sd->process_queue); &sd->process_queue);
} }
rps_unlock(sd); rps_unlock_irq_enable(sd);
local_irq_enable();
} }
return work; return work;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment