Commit 05a4a952 authored by Nicholas Piggin's avatar Nicholas Piggin Committed by Linus Torvalds

kernel/watchdog: split up config options

Split SOFTLOCKUP_DETECTOR from LOCKUP_DETECTOR, and split
HARDLOCKUP_DETECTOR_PERF from HARDLOCKUP_DETECTOR.

LOCKUP_DETECTOR implies the general boot, sysctl, and programming
interfaces for the lockup detectors.

An architecture that wants to use a hard lockup detector must define
HAVE_HARDLOCKUP_DETECTOR_PERF or HAVE_HARDLOCKUP_DETECTOR_ARCH.

Alternatively an arch can define HAVE_NMI_WATCHDOG, which provides the
minimum arch_touch_nmi_watchdog, and it otherwise does its own thing and
does not implement the LOCKUP_DETECTOR interfaces.

sparc is unusual in that it has started to implement some of the
interfaces, but not fully yet.  It should probably be converted to a full
HAVE_HARDLOCKUP_DETECTOR_ARCH.

[npiggin@gmail.com: fix]
  Link: http://lkml.kernel.org/r/20170617223522.66c0ad88@roar.ozlabs.ibm.com
Link: http://lkml.kernel.org/r/20170616065715.18390-4-npiggin@gmail.comSigned-off-by: default avatarNicholas Piggin <npiggin@gmail.com>
Reviewed-by: default avatarDon Zickus <dzickus@redhat.com>
Reviewed-by: default avatarBabu Moger <babu.moger@oracle.com>
Tested-by: Babu Moger <babu.moger@oracle.com>	[sparc]
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent f2e0cff8
...@@ -198,9 +198,6 @@ config HAVE_KPROBES_ON_FTRACE ...@@ -198,9 +198,6 @@ config HAVE_KPROBES_ON_FTRACE
config HAVE_NMI config HAVE_NMI
bool bool
config HAVE_NMI_WATCHDOG
depends on HAVE_NMI
bool
# #
# An arch should select this if it provides all these things: # An arch should select this if it provides all these things:
# #
...@@ -288,6 +285,28 @@ config HAVE_PERF_EVENTS_NMI ...@@ -288,6 +285,28 @@ config HAVE_PERF_EVENTS_NMI
subsystem. Also has support for calculating CPU cycle events subsystem. Also has support for calculating CPU cycle events
to determine how many clock cycles in a given period. to determine how many clock cycles in a given period.
config HAVE_HARDLOCKUP_DETECTOR_PERF
bool
depends on HAVE_PERF_EVENTS_NMI
help
The arch chooses to use the generic perf-NMI-based hardlockup
detector. Must define HAVE_PERF_EVENTS_NMI.
config HAVE_NMI_WATCHDOG
depends on HAVE_NMI
bool
help
The arch provides a low level NMI watchdog. It provides
asm/nmi.h, and defines its own arch_touch_nmi_watchdog().
config HAVE_HARDLOCKUP_DETECTOR_ARCH
bool
select HAVE_NMI_WATCHDOG
help
The arch chooses to provide its own hardlockup detector, which is
a superset of the HAVE_NMI_WATCHDOG. It also conforms to config
interfaces and parameters provided by hardlockup detector subsystem.
config HAVE_PERF_REGS config HAVE_PERF_REGS
bool bool
help help
......
...@@ -197,6 +197,7 @@ config PPC ...@@ -197,6 +197,7 @@ config PPC
select HAVE_OPTPROBES if PPC64 select HAVE_OPTPROBES if PPC64
select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_EVENTS_NMI if PPC64
select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI
select HAVE_PERF_REGS select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE if SMP select HAVE_RCU_TABLE_FREE if SMP
......
...@@ -752,7 +752,7 @@ struct ppc_pci_io ppc_pci_io; ...@@ -752,7 +752,7 @@ struct ppc_pci_io ppc_pci_io;
EXPORT_SYMBOL(ppc_pci_io); EXPORT_SYMBOL(ppc_pci_io);
#endif #endif
#ifdef CONFIG_HARDLOCKUP_DETECTOR #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
u64 hw_nmi_get_sample_period(int watchdog_thresh) u64 hw_nmi_get_sample_period(int watchdog_thresh)
{ {
return ppc_proc_freq * watchdog_thresh; return ppc_proc_freq * watchdog_thresh;
......
...@@ -162,6 +162,7 @@ config X86 ...@@ -162,6 +162,7 @@ config X86
select HAVE_PCSPKR_PLATFORM select HAVE_PCSPKR_PLATFORM
select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI select HAVE_PERF_EVENTS_NMI
select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI
select HAVE_PERF_REGS select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_REGS_AND_STACK_ACCESS_API
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/delay.h> #include <linux/delay.h>
#ifdef CONFIG_HARDLOCKUP_DETECTOR #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
u64 hw_nmi_get_sample_period(int watchdog_thresh) u64 hw_nmi_get_sample_period(int watchdog_thresh)
{ {
return (u64)(cpu_khz) * 1000 * watchdog_thresh; return (u64)(cpu_khz) * 1000 * watchdog_thresh;
......
...@@ -11,13 +11,21 @@ ...@@ -11,13 +11,21 @@
#endif #endif
#ifdef CONFIG_LOCKUP_DETECTOR #ifdef CONFIG_LOCKUP_DETECTOR
void lockup_detector_init(void);
#else
static inline void lockup_detector_init(void)
{
}
#endif
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
extern void touch_softlockup_watchdog_sched(void); extern void touch_softlockup_watchdog_sched(void);
extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog(void);
extern void touch_softlockup_watchdog_sync(void); extern void touch_softlockup_watchdog_sync(void);
extern void touch_all_softlockup_watchdogs(void); extern void touch_all_softlockup_watchdogs(void);
extern unsigned int softlockup_panic; extern unsigned int softlockup_panic;
extern unsigned int hardlockup_panic; extern int soft_watchdog_enabled;
void lockup_detector_init(void); extern atomic_t watchdog_park_in_progress;
#else #else
static inline void touch_softlockup_watchdog_sched(void) static inline void touch_softlockup_watchdog_sched(void)
{ {
...@@ -31,9 +39,6 @@ static inline void touch_softlockup_watchdog_sync(void) ...@@ -31,9 +39,6 @@ static inline void touch_softlockup_watchdog_sync(void)
static inline void touch_all_softlockup_watchdogs(void) static inline void touch_all_softlockup_watchdogs(void)
{ {
} }
static inline void lockup_detector_init(void)
{
}
#endif #endif
#ifdef CONFIG_DETECT_HUNG_TASK #ifdef CONFIG_DETECT_HUNG_TASK
...@@ -63,15 +68,18 @@ static inline void reset_hung_task_detector(void) ...@@ -63,15 +68,18 @@ static inline void reset_hung_task_detector(void)
#if defined(CONFIG_HARDLOCKUP_DETECTOR) #if defined(CONFIG_HARDLOCKUP_DETECTOR)
extern void hardlockup_detector_disable(void); extern void hardlockup_detector_disable(void);
extern unsigned int hardlockup_panic;
#else #else
static inline void hardlockup_detector_disable(void) {} static inline void hardlockup_detector_disable(void) {}
#endif #endif
#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
extern void arch_touch_nmi_watchdog(void); extern void arch_touch_nmi_watchdog(void);
#else #else
#if !defined(CONFIG_HAVE_NMI_WATCHDOG)
static inline void arch_touch_nmi_watchdog(void) {} static inline void arch_touch_nmi_watchdog(void) {}
#endif #endif
#endif
/** /**
* touch_nmi_watchdog - restart NMI watchdog timeout. * touch_nmi_watchdog - restart NMI watchdog timeout.
...@@ -141,15 +149,18 @@ static inline bool trigger_single_cpu_backtrace(int cpu) ...@@ -141,15 +149,18 @@ static inline bool trigger_single_cpu_backtrace(int cpu)
} }
#endif #endif
#ifdef CONFIG_LOCKUP_DETECTOR #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
u64 hw_nmi_get_sample_period(int watchdog_thresh); u64 hw_nmi_get_sample_period(int watchdog_thresh);
#endif
#ifdef CONFIG_LOCKUP_DETECTOR
extern int nmi_watchdog_enabled; extern int nmi_watchdog_enabled;
extern int soft_watchdog_enabled;
extern int watchdog_user_enabled; extern int watchdog_user_enabled;
extern int watchdog_thresh; extern int watchdog_thresh;
extern unsigned long watchdog_enabled; extern unsigned long watchdog_enabled;
extern struct cpumask watchdog_cpumask;
extern unsigned long *watchdog_cpumask_bits; extern unsigned long *watchdog_cpumask_bits;
extern atomic_t watchdog_park_in_progress; extern int __read_mostly watchdog_suspended;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
extern int sysctl_softlockup_all_cpu_backtrace; extern int sysctl_softlockup_all_cpu_backtrace;
extern int sysctl_hardlockup_all_cpu_backtrace; extern int sysctl_hardlockup_all_cpu_backtrace;
......
...@@ -82,7 +82,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o ...@@ -82,7 +82,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KGDB) += debug/ obj-$(CONFIG_KGDB) += debug/
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
......
...@@ -900,6 +900,14 @@ static struct ctl_table kern_table[] = { ...@@ -900,6 +900,14 @@ static struct ctl_table kern_table[] = {
.extra2 = &zero, .extra2 = &zero,
#endif #endif
}, },
{
.procname = "watchdog_cpumask",
.data = &watchdog_cpumask_bits,
.maxlen = NR_CPUS,
.mode = 0644,
.proc_handler = proc_watchdog_cpumask,
},
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
{ {
.procname = "soft_watchdog", .procname = "soft_watchdog",
.data = &soft_watchdog_enabled, .data = &soft_watchdog_enabled,
...@@ -909,13 +917,6 @@ static struct ctl_table kern_table[] = { ...@@ -909,13 +917,6 @@ static struct ctl_table kern_table[] = {
.extra1 = &zero, .extra1 = &zero,
.extra2 = &one, .extra2 = &one,
}, },
{
.procname = "watchdog_cpumask",
.data = &watchdog_cpumask_bits,
.maxlen = NR_CPUS,
.mode = 0644,
.proc_handler = proc_watchdog_cpumask,
},
{ {
.procname = "softlockup_panic", .procname = "softlockup_panic",
.data = &softlockup_panic, .data = &softlockup_panic,
...@@ -925,27 +926,29 @@ static struct ctl_table kern_table[] = { ...@@ -925,27 +926,29 @@ static struct ctl_table kern_table[] = {
.extra1 = &zero, .extra1 = &zero,
.extra2 = &one, .extra2 = &one,
}, },
#ifdef CONFIG_HARDLOCKUP_DETECTOR #ifdef CONFIG_SMP
{ {
.procname = "hardlockup_panic", .procname = "softlockup_all_cpu_backtrace",
.data = &hardlockup_panic, .data = &sysctl_softlockup_all_cpu_backtrace,
.maxlen = sizeof(int), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec_minmax, .proc_handler = proc_dointvec_minmax,
.extra1 = &zero, .extra1 = &zero,
.extra2 = &one, .extra2 = &one,
}, },
#endif /* CONFIG_SMP */
#endif #endif
#ifdef CONFIG_SMP #ifdef CONFIG_HARDLOCKUP_DETECTOR
{ {
.procname = "softlockup_all_cpu_backtrace", .procname = "hardlockup_panic",
.data = &sysctl_softlockup_all_cpu_backtrace, .data = &hardlockup_panic,
.maxlen = sizeof(int), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec_minmax, .proc_handler = proc_dointvec_minmax,
.extra1 = &zero, .extra1 = &zero,
.extra2 = &one, .extra2 = &one,
}, },
#ifdef CONFIG_SMP
{ {
.procname = "hardlockup_all_cpu_backtrace", .procname = "hardlockup_all_cpu_backtrace",
.data = &sysctl_hardlockup_all_cpu_backtrace, .data = &sysctl_hardlockup_all_cpu_backtrace,
...@@ -957,6 +960,8 @@ static struct ctl_table kern_table[] = { ...@@ -957,6 +960,8 @@ static struct ctl_table kern_table[] = {
}, },
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#endif #endif
#endif
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
{ {
.procname = "unknown_nmi_panic", .procname = "unknown_nmi_panic",
......
...@@ -29,15 +29,58 @@ ...@@ -29,15 +29,58 @@
#include <linux/kvm_para.h> #include <linux/kvm_para.h>
#include <linux/kthread.h> #include <linux/kthread.h>
/* Watchdog configuration */
static DEFINE_MUTEX(watchdog_proc_mutex); static DEFINE_MUTEX(watchdog_proc_mutex);
#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) int __read_mostly nmi_watchdog_enabled;
unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED |
NMI_WATCHDOG_ENABLED;
#else #else
unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
#endif #endif
int __read_mostly nmi_watchdog_enabled;
#ifdef CONFIG_HARDLOCKUP_DETECTOR
/* boot commands */
/*
* Should we panic when a soft-lockup or hard-lockup occurs:
*/
unsigned int __read_mostly hardlockup_panic =
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
/*
* We may not want to enable hard lockup detection by default in all cases,
* for example when running the kernel as a guest on a hypervisor. In these
* cases this function can be called to disable hard lockup detection. This
* function should only be executed once by the boot processor before the
* kernel command line parameters are parsed, because otherwise it is not
* possible to override this in hardlockup_panic_setup().
*/
void hardlockup_detector_disable(void)
{
watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
}
static int __init hardlockup_panic_setup(char *str)
{
if (!strncmp(str, "panic", 5))
hardlockup_panic = 1;
else if (!strncmp(str, "nopanic", 7))
hardlockup_panic = 0;
else if (!strncmp(str, "0", 1))
watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
else if (!strncmp(str, "1", 1))
watchdog_enabled |= NMI_WATCHDOG_ENABLED;
return 1;
}
__setup("nmi_watchdog=", hardlockup_panic_setup);
#endif
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
int __read_mostly soft_watchdog_enabled; int __read_mostly soft_watchdog_enabled;
#endif
int __read_mostly watchdog_user_enabled; int __read_mostly watchdog_user_enabled;
int __read_mostly watchdog_thresh = 10; int __read_mostly watchdog_thresh = 10;
...@@ -45,15 +88,9 @@ int __read_mostly watchdog_thresh = 10; ...@@ -45,15 +88,9 @@ int __read_mostly watchdog_thresh = 10;
int __read_mostly sysctl_softlockup_all_cpu_backtrace; int __read_mostly sysctl_softlockup_all_cpu_backtrace;
int __read_mostly sysctl_hardlockup_all_cpu_backtrace; int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
#endif #endif
static struct cpumask watchdog_cpumask __read_mostly; struct cpumask watchdog_cpumask __read_mostly;
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
/* Helper for online, unparked cpus. */
#define for_each_watchdog_cpu(cpu) \
for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
/* /*
* The 'watchdog_running' variable is set to 1 when the watchdog threads * The 'watchdog_running' variable is set to 1 when the watchdog threads
* are registered/started and is set to 0 when the watchdog threads are * are registered/started and is set to 0 when the watchdog threads are
...@@ -72,7 +109,27 @@ static int __read_mostly watchdog_running; ...@@ -72,7 +109,27 @@ static int __read_mostly watchdog_running;
* of 'watchdog_running' cannot change while the watchdog is deactivated * of 'watchdog_running' cannot change while the watchdog is deactivated
* temporarily (see related code in 'proc' handlers). * temporarily (see related code in 'proc' handlers).
*/ */
static int __read_mostly watchdog_suspended; int __read_mostly watchdog_suspended;
/*
* These functions can be overridden if an architecture implements its
* own hardlockup detector.
*/
int __weak watchdog_nmi_enable(unsigned int cpu)
{
return 0;
}
void __weak watchdog_nmi_disable(unsigned int cpu)
{
}
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
/* Helper for online, unparked cpus. */
#define for_each_watchdog_cpu(cpu) \
for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
static u64 __read_mostly sample_period; static u64 __read_mostly sample_period;
...@@ -120,6 +177,7 @@ static int __init softlockup_all_cpu_backtrace_setup(char *str) ...@@ -120,6 +177,7 @@ static int __init softlockup_all_cpu_backtrace_setup(char *str)
return 1; return 1;
} }
__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
#ifdef CONFIG_HARDLOCKUP_DETECTOR
static int __init hardlockup_all_cpu_backtrace_setup(char *str) static int __init hardlockup_all_cpu_backtrace_setup(char *str)
{ {
sysctl_hardlockup_all_cpu_backtrace = sysctl_hardlockup_all_cpu_backtrace =
...@@ -128,6 +186,7 @@ static int __init hardlockup_all_cpu_backtrace_setup(char *str) ...@@ -128,6 +186,7 @@ static int __init hardlockup_all_cpu_backtrace_setup(char *str)
} }
__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
#endif #endif
#endif
/* /*
* Hard-lockup warnings should be triggered after just a few seconds. Soft- * Hard-lockup warnings should be triggered after just a few seconds. Soft-
...@@ -213,18 +272,6 @@ void touch_softlockup_watchdog_sync(void) ...@@ -213,18 +272,6 @@ void touch_softlockup_watchdog_sync(void)
__this_cpu_write(watchdog_touch_ts, 0); __this_cpu_write(watchdog_touch_ts, 0);
} }
/* watchdog detector functions */
bool is_hardlockup(void)
{
unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
return true;
__this_cpu_write(hrtimer_interrupts_saved, hrint);
return false;
}
static int is_softlockup(unsigned long touch_ts) static int is_softlockup(unsigned long touch_ts)
{ {
unsigned long now = get_timestamp(); unsigned long now = get_timestamp();
...@@ -237,21 +284,21 @@ static int is_softlockup(unsigned long touch_ts) ...@@ -237,21 +284,21 @@ static int is_softlockup(unsigned long touch_ts)
return 0; return 0;
} }
static void watchdog_interrupt_count(void) /* watchdog detector functions */
bool is_hardlockup(void)
{ {
__this_cpu_inc(hrtimer_interrupts); unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
}
/* if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
* These two functions are mostly architecture specific return true;
* defining them as weak here.
*/ __this_cpu_write(hrtimer_interrupts_saved, hrint);
int __weak watchdog_nmi_enable(unsigned int cpu) return false;
{
return 0;
} }
void __weak watchdog_nmi_disable(unsigned int cpu)
static void watchdog_interrupt_count(void)
{ {
__this_cpu_inc(hrtimer_interrupts);
} }
static int watchdog_enable_all_cpus(void); static int watchdog_enable_all_cpus(void);
...@@ -502,57 +549,6 @@ static void watchdog_unpark_threads(void) ...@@ -502,57 +549,6 @@ static void watchdog_unpark_threads(void)
kthread_unpark(per_cpu(softlockup_watchdog, cpu)); kthread_unpark(per_cpu(softlockup_watchdog, cpu));
} }
/*
* Suspend the hard and soft lockup detector by parking the watchdog threads.
*/
int lockup_detector_suspend(void)
{
int ret = 0;
get_online_cpus();
mutex_lock(&watchdog_proc_mutex);
/*
* Multiple suspend requests can be active in parallel (counted by
* the 'watchdog_suspended' variable). If the watchdog threads are
* running, the first caller takes care that they will be parked.
* The state of 'watchdog_running' cannot change while a suspend
* request is active (see related code in 'proc' handlers).
*/
if (watchdog_running && !watchdog_suspended)
ret = watchdog_park_threads();
if (ret == 0)
watchdog_suspended++;
else {
watchdog_disable_all_cpus();
pr_err("Failed to suspend lockup detectors, disabled\n");
watchdog_enabled = 0;
}
mutex_unlock(&watchdog_proc_mutex);
return ret;
}
/*
* Resume the hard and soft lockup detector by unparking the watchdog threads.
*/
void lockup_detector_resume(void)
{
mutex_lock(&watchdog_proc_mutex);
watchdog_suspended--;
/*
* The watchdog threads are unparked if they were previously running
* and if there is no more active suspend request.
*/
if (watchdog_running && !watchdog_suspended)
watchdog_unpark_threads();
mutex_unlock(&watchdog_proc_mutex);
put_online_cpus();
}
static int update_watchdog_all_cpus(void) static int update_watchdog_all_cpus(void)
{ {
int ret; int ret;
...@@ -604,6 +600,81 @@ static void watchdog_disable_all_cpus(void) ...@@ -604,6 +600,81 @@ static void watchdog_disable_all_cpus(void)
} }
} }
#else /* SOFTLOCKUP */
static int watchdog_park_threads(void)
{
return 0;
}
static void watchdog_unpark_threads(void)
{
}
static int watchdog_enable_all_cpus(void)
{
return 0;
}
static void watchdog_disable_all_cpus(void)
{
}
static void set_sample_period(void)
{
}
#endif /* SOFTLOCKUP */
/*
* Suspend the hard and soft lockup detector by parking the watchdog threads.
*/
int lockup_detector_suspend(void)
{
int ret = 0;
get_online_cpus();
mutex_lock(&watchdog_proc_mutex);
/*
* Multiple suspend requests can be active in parallel (counted by
* the 'watchdog_suspended' variable). If the watchdog threads are
* running, the first caller takes care that they will be parked.
* The state of 'watchdog_running' cannot change while a suspend
* request is active (see related code in 'proc' handlers).
*/
if (watchdog_running && !watchdog_suspended)
ret = watchdog_park_threads();
if (ret == 0)
watchdog_suspended++;
else {
watchdog_disable_all_cpus();
pr_err("Failed to suspend lockup detectors, disabled\n");
watchdog_enabled = 0;
}
mutex_unlock(&watchdog_proc_mutex);
return ret;
}
/*
* Resume the hard and soft lockup detector by unparking the watchdog threads.
*/
void lockup_detector_resume(void)
{
mutex_lock(&watchdog_proc_mutex);
watchdog_suspended--;
/*
* The watchdog threads are unparked if they were previously running
* and if there is no more active suspend request.
*/
if (watchdog_running && !watchdog_suspended)
watchdog_unpark_threads();
mutex_unlock(&watchdog_proc_mutex);
put_online_cpus();
}
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
/* /*
...@@ -810,9 +881,11 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, ...@@ -810,9 +881,11 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
* a temporary cpumask, so we are likely not in a * a temporary cpumask, so we are likely not in a
* position to do much else to make things better. * position to do much else to make things better.
*/ */
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
if (smpboot_update_cpumask_percpu_thread( if (smpboot_update_cpumask_percpu_thread(
&watchdog_threads, &watchdog_cpumask) != 0) &watchdog_threads, &watchdog_cpumask) != 0)
pr_err("cpumask update failed\n"); pr_err("cpumask update failed\n");
#endif
} }
} }
out: out:
......
...@@ -22,39 +22,7 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn); ...@@ -22,39 +22,7 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
/* boot commands */
/*
* Should we panic when a soft-lockup or hard-lockup occurs:
*/
unsigned int __read_mostly hardlockup_panic =
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
static unsigned long hardlockup_allcpu_dumped; static unsigned long hardlockup_allcpu_dumped;
/*
* We may not want to enable hard lockup detection by default in all cases,
* for example when running the kernel as a guest on a hypervisor. In these
* cases this function can be called to disable hard lockup detection. This
* function should only be executed once by the boot processor before the
* kernel command line parameters are parsed, because otherwise it is not
* possible to override this in hardlockup_panic_setup().
*/
void hardlockup_detector_disable(void)
{
watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
}
static int __init hardlockup_panic_setup(char *str)
{
if (!strncmp(str, "panic", 5))
hardlockup_panic = 1;
else if (!strncmp(str, "nopanic", 7))
hardlockup_panic = 0;
else if (!strncmp(str, "0", 1))
watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
else if (!strncmp(str, "1", 1))
watchdog_enabled |= NMI_WATCHDOG_ENABLED;
return 1;
}
__setup("nmi_watchdog=", hardlockup_panic_setup);
void arch_touch_nmi_watchdog(void) void arch_touch_nmi_watchdog(void)
{ {
......
...@@ -778,34 +778,45 @@ config DEBUG_SHIRQ ...@@ -778,34 +778,45 @@ config DEBUG_SHIRQ
menu "Debug Lockups and Hangs" menu "Debug Lockups and Hangs"
config LOCKUP_DETECTOR config LOCKUP_DETECTOR
bool "Detect Hard and Soft Lockups" bool
config SOFTLOCKUP_DETECTOR
bool "Detect Soft Lockups"
depends on DEBUG_KERNEL && !S390 depends on DEBUG_KERNEL && !S390
select LOCKUP_DETECTOR
help help
Say Y here to enable the kernel to act as a watchdog to detect Say Y here to enable the kernel to act as a watchdog to detect
hard and soft lockups. soft lockups.
Softlockups are bugs that cause the kernel to loop in kernel Softlockups are bugs that cause the kernel to loop in kernel
mode for more than 20 seconds, without giving other tasks a mode for more than 20 seconds, without giving other tasks a
chance to run. The current stack trace is displayed upon chance to run. The current stack trace is displayed upon
detection and the system will stay locked up. detection and the system will stay locked up.
config HARDLOCKUP_DETECTOR_PERF
bool
select SOFTLOCKUP_DETECTOR
#
# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard
# lockup detector rather than the perf based detector.
#
config HARDLOCKUP_DETECTOR
bool "Detect Hard Lockups"
depends on DEBUG_KERNEL && !S390
depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH
select LOCKUP_DETECTOR
select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF
select HARDLOCKUP_DETECTOR_ARCH if HAVE_HARDLOCKUP_DETECTOR_ARCH
help
Say Y here to enable the kernel to act as a watchdog to detect
hard lockups.
Hardlockups are bugs that cause the CPU to loop in kernel mode Hardlockups are bugs that cause the CPU to loop in kernel mode
for more than 10 seconds, without letting other interrupts have a for more than 10 seconds, without letting other interrupts have a
chance to run. The current stack trace is displayed upon detection chance to run. The current stack trace is displayed upon detection
and the system will stay locked up. and the system will stay locked up.
The overhead should be minimal. A periodic hrtimer runs to
generate interrupts and kick the watchdog task every 4 seconds.
An NMI is generated every 10 seconds or so to check for hardlockups.
The frequency of hrtimer and NMI events and the soft and hard lockup
thresholds can be controlled through the sysctl watchdog_thresh.
config HARDLOCKUP_DETECTOR
def_bool y
depends on LOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG
depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
config BOOTPARAM_HARDLOCKUP_PANIC config BOOTPARAM_HARDLOCKUP_PANIC
bool "Panic (Reboot) On Hard Lockups" bool "Panic (Reboot) On Hard Lockups"
depends on HARDLOCKUP_DETECTOR depends on HARDLOCKUP_DETECTOR
...@@ -826,7 +837,7 @@ config BOOTPARAM_HARDLOCKUP_PANIC_VALUE ...@@ -826,7 +837,7 @@ config BOOTPARAM_HARDLOCKUP_PANIC_VALUE
config BOOTPARAM_SOFTLOCKUP_PANIC config BOOTPARAM_SOFTLOCKUP_PANIC
bool "Panic (Reboot) On Soft Lockups" bool "Panic (Reboot) On Soft Lockups"
depends on LOCKUP_DETECTOR depends on SOFTLOCKUP_DETECTOR
help help
Say Y here to enable the kernel to panic on "soft lockups", Say Y here to enable the kernel to panic on "soft lockups",
which are bugs that cause the kernel to loop in kernel which are bugs that cause the kernel to loop in kernel
...@@ -843,7 +854,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC ...@@ -843,7 +854,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
int int
depends on LOCKUP_DETECTOR depends on SOFTLOCKUP_DETECTOR
range 0 1 range 0 1
default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
default 1 if BOOTPARAM_SOFTLOCKUP_PANIC default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
...@@ -851,7 +862,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE ...@@ -851,7 +862,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
config DETECT_HUNG_TASK config DETECT_HUNG_TASK
bool "Detect Hung Tasks" bool "Detect Hung Tasks"
depends on DEBUG_KERNEL depends on DEBUG_KERNEL
default LOCKUP_DETECTOR default SOFTLOCKUP_DETECTOR
help help
Say Y here to enable the kernel to detect "hung tasks", Say Y here to enable the kernel to detect "hung tasks",
which are bugs that cause the task to be stuck in which are bugs that cause the task to be stuck in
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment