Commit d7ce3692 authored by Eric Dumazet's avatar Eric Dumazet Committed by Linus Torvalds

dump_stack: avoid potential deadlocks

Some servers experienced fatal deadlocks because of a combination of
bugs, leading to multiple cpus calling dump_stack().

The checksumming bug was fixed in commit 34ae6a1a ("ipv6: update
skb->csum when CE mark is propagated").

The second problem is a faulty locking in dump_stack()

CPU1 runs in process context and calls dump_stack(), grabs dump_lock.

   CPU2 receives a TCP packet under softirq, grabs socket spinlock, and
   call dump_stack() from netdev_rx_csum_fault().

   dump_stack() spins on atomic_cmpxchg(&dump_lock, -1, 2), since
   dump_lock is owned by CPU1

While dumping its stack, CPU1 is interrupted by a softirq, and happens
to process a packet for the TCP socket locked by CPU2.

CPU1 spins forever in spin_lock() : deadlock

Stack trace on CPU1 looked like :

    NMI backtrace for cpu 1
    RIP: _raw_spin_lock+0x25/0x30
    ...
    Call Trace:
      <IRQ>
      tcp_v6_rcv+0x243/0x620
      ip6_input_finish+0x11f/0x330
      ip6_input+0x38/0x40
      ip6_rcv_finish+0x3c/0x90
      ipv6_rcv+0x2a9/0x500
      process_backlog+0x461/0xaa0
      net_rx_action+0x147/0x430
      __do_softirq+0x167/0x2d0
      call_softirq+0x1c/0x30
      do_softirq+0x3f/0x80
      irq_exit+0x6e/0xc0
      smp_call_function_single_interrupt+0x35/0x40
      call_function_single_interrupt+0x6a/0x70
      <EOI>
      printk+0x4d/0x4f
      printk_address+0x31/0x33
      print_trace_address+0x33/0x3c
      print_context_stack+0x7f/0x119
      dump_trace+0x26b/0x28e
      show_trace_log_lvl+0x4f/0x5c
      show_stack_log_lvl+0x104/0x113
      show_stack+0x42/0x44
      dump_stack+0x46/0x58
      netdev_rx_csum_fault+0x38/0x3c
      __skb_checksum_complete_head+0x6e/0x80
      __skb_checksum_complete+0x11/0x20
      tcp_rcv_established+0x2bd5/0x2fd0
      tcp_v6_do_rcv+0x13c/0x620
      sk_backlog_rcv+0x15/0x30
      release_sock+0xd2/0x150
      tcp_recvmsg+0x1c1/0xfc0
      inet_recvmsg+0x7d/0x90
      sock_recvmsg+0xaf/0xe0
      ___sys_recvmsg+0x111/0x3b0
      SyS_recvmsg+0x5c/0xb0
      system_call_fastpath+0x16/0x1b

Fixes: b58d9774 ("dump_stack: serialize the output from dump_stack()")
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Alex Thorlton <athorlton@sgi.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent acf128d0
...@@ -25,6 +25,7 @@ static atomic_t dump_lock = ATOMIC_INIT(-1); ...@@ -25,6 +25,7 @@ static atomic_t dump_lock = ATOMIC_INIT(-1);
asmlinkage __visible void dump_stack(void) asmlinkage __visible void dump_stack(void)
{ {
unsigned long flags;
int was_locked; int was_locked;
int old; int old;
int cpu; int cpu;
...@@ -33,9 +34,8 @@ asmlinkage __visible void dump_stack(void) ...@@ -33,9 +34,8 @@ asmlinkage __visible void dump_stack(void)
* Permit this cpu to perform nested stack dumps while serialising * Permit this cpu to perform nested stack dumps while serialising
* against other CPUs * against other CPUs
*/ */
preempt_disable();
retry: retry:
local_irq_save(flags);
cpu = smp_processor_id(); cpu = smp_processor_id();
old = atomic_cmpxchg(&dump_lock, -1, cpu); old = atomic_cmpxchg(&dump_lock, -1, cpu);
if (old == -1) { if (old == -1) {
...@@ -43,6 +43,7 @@ asmlinkage __visible void dump_stack(void) ...@@ -43,6 +43,7 @@ asmlinkage __visible void dump_stack(void)
} else if (old == cpu) { } else if (old == cpu) {
was_locked = 1; was_locked = 1;
} else { } else {
local_irq_restore(flags);
cpu_relax(); cpu_relax();
goto retry; goto retry;
} }
...@@ -52,7 +53,7 @@ asmlinkage __visible void dump_stack(void) ...@@ -52,7 +53,7 @@ asmlinkage __visible void dump_stack(void)
if (!was_locked) if (!was_locked)
atomic_set(&dump_lock, -1); atomic_set(&dump_lock, -1);
preempt_enable(); local_irq_restore(flags);
} }
#else #else
asmlinkage __visible void dump_stack(void) asmlinkage __visible void dump_stack(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment