Commit 1ccd85f5 authored by Petr Mladek's avatar Petr Mladek

Merge branch 'for-5.18-panic-deadlocks' into for-linus

parents 0834c6f0 ce06e863
...@@ -93,6 +93,12 @@ EXPORT_SYMBOL_GPL(console_drivers); ...@@ -93,6 +93,12 @@ EXPORT_SYMBOL_GPL(console_drivers);
*/ */
int __read_mostly suppress_printk; int __read_mostly suppress_printk;
/*
* During panic, heavy printk by other CPUs can delay the
* panic and risk deadlock on console resources.
*/
static int __read_mostly suppress_panic_printk;
#ifdef CONFIG_LOCKDEP #ifdef CONFIG_LOCKDEP
static struct lockdep_map console_lock_dep_map = { static struct lockdep_map console_lock_dep_map = {
.name = "console_lock" .name = "console_lock"
...@@ -258,6 +264,11 @@ static void __up_console_sem(unsigned long ip) ...@@ -258,6 +264,11 @@ static void __up_console_sem(unsigned long ip)
} }
#define up_console_sem() __up_console_sem(_RET_IP_) #define up_console_sem() __up_console_sem(_RET_IP_)
static bool panic_in_progress(void)
{
return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID);
}
/* /*
* This is used for debugging the mess that is the VT code by * This is used for debugging the mess that is the VT code by
* keeping track if we have the console semaphore held. It's * keeping track if we have the console semaphore held. It's
...@@ -1844,6 +1855,16 @@ static int console_trylock_spinning(void) ...@@ -1844,6 +1855,16 @@ static int console_trylock_spinning(void)
if (console_trylock()) if (console_trylock())
return 1; return 1;
/*
* It's unsafe to spin once a panic has begun. If we are the
* panic CPU, we may have already halted the owner of the
* console_sem. If we are not the panic CPU, then we should
* avoid taking console_sem, so the panic CPU has a better
* chance of cleanly acquiring it later.
*/
if (panic_in_progress())
return 0;
printk_safe_enter_irqsave(flags); printk_safe_enter_irqsave(flags);
raw_spin_lock(&console_owner_lock); raw_spin_lock(&console_owner_lock);
...@@ -2219,6 +2240,10 @@ asmlinkage int vprintk_emit(int facility, int level, ...@@ -2219,6 +2240,10 @@ asmlinkage int vprintk_emit(int facility, int level,
if (unlikely(suppress_printk)) if (unlikely(suppress_printk))
return 0; return 0;
if (unlikely(suppress_panic_printk) &&
atomic_read(&panic_cpu) != raw_smp_processor_id())
return 0;
if (level == LOGLEVEL_SCHED) { if (level == LOGLEVEL_SCHED) {
level = LOGLEVEL_DEFAULT; level = LOGLEVEL_DEFAULT;
in_sched = true; in_sched = true;
...@@ -2586,6 +2611,25 @@ static int have_callable_console(void) ...@@ -2586,6 +2611,25 @@ static int have_callable_console(void)
return 0; return 0;
} }
/*
* Return true when this CPU should unlock console_sem without pushing all
* messages to the console. This reduces the chance that the console is
* locked when the panic CPU tries to use it.
*/
static bool abandon_console_lock_in_panic(void)
{
if (!panic_in_progress())
return false;
/*
* We can use raw_smp_processor_id() here because it is impossible for
* the task to be migrated to the panic_cpu, or away from it. If
* panic_cpu has already been set, and we're not currently executing on
* that CPU, then we never will be.
*/
return atomic_read(&panic_cpu) != raw_smp_processor_id();
}
/* /*
* Can we actually use the console at this time on this cpu? * Can we actually use the console at this time on this cpu?
* *
...@@ -2616,6 +2660,7 @@ void console_unlock(void) ...@@ -2616,6 +2660,7 @@ void console_unlock(void)
{ {
static char ext_text[CONSOLE_EXT_LOG_MAX]; static char ext_text[CONSOLE_EXT_LOG_MAX];
static char text[CONSOLE_LOG_MAX]; static char text[CONSOLE_LOG_MAX];
static int panic_console_dropped;
unsigned long flags; unsigned long flags;
bool do_cond_resched, retry; bool do_cond_resched, retry;
struct printk_info info; struct printk_info info;
...@@ -2670,6 +2715,10 @@ void console_unlock(void) ...@@ -2670,6 +2715,10 @@ void console_unlock(void)
if (console_seq != r.info->seq) { if (console_seq != r.info->seq) {
console_dropped += r.info->seq - console_seq; console_dropped += r.info->seq - console_seq;
console_seq = r.info->seq; console_seq = r.info->seq;
if (panic_in_progress() && panic_console_dropped++ > 10) {
suppress_panic_printk = 1;
pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n");
}
} }
if (suppress_message_printing(r.info->level)) { if (suppress_message_printing(r.info->level)) {
...@@ -2729,6 +2778,10 @@ void console_unlock(void) ...@@ -2729,6 +2778,10 @@ void console_unlock(void)
if (handover) if (handover)
return; return;
/* Allow panic_cpu to take over the consoles safely */
if (abandon_console_lock_in_panic())
break;
if (do_cond_resched) if (do_cond_resched)
cond_resched(); cond_resched();
} }
...@@ -2746,7 +2799,7 @@ void console_unlock(void) ...@@ -2746,7 +2799,7 @@ void console_unlock(void)
* flush, no worries. * flush, no worries.
*/ */
retry = prb_read_valid(prb, next_seq, NULL); retry = prb_read_valid(prb, next_seq, NULL);
if (retry && console_trylock()) if (retry && !abandon_console_lock_in_panic() && console_trylock())
goto again; goto again;
} }
EXPORT_SYMBOL(console_unlock); EXPORT_SYMBOL(console_unlock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment