Commit b90d72a6 authored by Will Deacon's avatar Will Deacon Committed by Catalin Marinas

Revert "arm64: Enable perf events based hard lockup detector"

This reverts commit 367c820e.

lockup_detector_init() makes heavy use of per-cpu variables and must be
called with preemption disabled. Usually, it's handled early during boot
in kernel_init_freeable(), before SMP has been initialised.

Since we do not know whether or not our PMU interrupt can be signalled
as an NMI until considerably later in the boot process, the Arm PMU
driver attempts to re-initialise the lockup detector off the back of a
device_initcall(). Unfortunately, this is called from preemptible
context and results in the following splat:

  | BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1
  | caller is debug_smp_processor_id+0x20/0x2c
  | CPU: 2 PID: 1 Comm: swapper/0 Not tainted 5.10.0+ #276
  | Hardware name: linux,dummy-virt (DT)
  | Call trace:
  |   dump_backtrace+0x0/0x3c0
  |   show_stack+0x20/0x6c
  |   dump_stack+0x2f0/0x42c
  |   check_preemption_disabled+0x1cc/0x1dc
  |   debug_smp_processor_id+0x20/0x2c
  |   hardlockup_detector_event_create+0x34/0x18c
  |   hardlockup_detector_perf_init+0x2c/0x134
  |   watchdog_nmi_probe+0x18/0x24
  |   lockup_detector_init+0x44/0xa8
  |   armv8_pmu_driver_init+0x54/0x78
  |   do_one_initcall+0x184/0x43c
  |   kernel_init_freeable+0x368/0x380
  |   kernel_init+0x1c/0x1cc
  |   ret_from_fork+0x10/0x30

Rather than bodge this with raw_smp_processor_id() or randomly disabling
preemption, simply revert the culprit for now until we figure out how to
do this properly.
Reported-by: default avatarLecopzer Chen <lecopzer.chen@mediatek.com>
Signed-off-by: default avatarWill Deacon <will@kernel.org>
Acked-by: default avatarMark Rutland <mark.rutland@arm.com>
Cc: Sumit Garg <sumit.garg@linaro.org>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Link: https://lore.kernel.org/r/20201221162249.3119-1-lecopzer.chen@mediatek.com
Link: https://lore.kernel.org/r/20210112221855.10666-1-will@kernel.orgSigned-off-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
parent df068247
......@@ -174,8 +174,6 @@ config ARM64
select HAVE_NMI
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI && HW_PERF_EVENTS
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
......
......@@ -23,8 +23,6 @@
#include <linux/platform_device.h>
#include <linux/sched_clock.h>
#include <linux/smp.h>
#include <linux/nmi.h>
#include <linux/cpufreq.h>
/* ARMv8 Cortex-A53 specific event types. */
#define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2
......@@ -1250,21 +1248,10 @@ static struct platform_driver armv8_pmu_driver = {
static int __init armv8_pmu_driver_init(void)
{
int ret;
if (acpi_disabled)
ret = platform_driver_register(&armv8_pmu_driver);
return platform_driver_register(&armv8_pmu_driver);
else
ret = arm_pmu_acpi_probe(armv8_pmuv3_init);
/*
* Try to re-initialize lockup detector after PMU init in
* case PMU events are triggered via NMIs.
*/
if (ret == 0 && arm_pmu_irq_is_nmi())
lockup_detector_init();
return ret;
return arm_pmu_acpi_probe(armv8_pmuv3_init);
}
device_initcall(armv8_pmu_driver_init)
......@@ -1322,27 +1309,3 @@ void arch_perf_update_userpage(struct perf_event *event,
userpg->cap_user_time_zero = 1;
userpg->cap_user_time_short = 1;
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
/*
* Safe maximum CPU frequency in case a particular platform doesn't implement
* cpufreq driver. Although, architecture doesn't put any restrictions on
* maximum frequency but 5 GHz seems to be safe maximum given the available
* Arm CPUs in the market which are clocked much less than 5 GHz. On the other
* hand, we can't make it much higher as it would lead to a large hard-lockup
* detection timeout on parts which are running slower (eg. 1GHz on
* Developerbox) and doesn't possess a cpufreq driver.
*/
#define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz
u64 hw_nmi_get_sample_period(int watchdog_thresh)
{
unsigned int cpu = smp_processor_id();
unsigned long max_cpu_freq;
max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
if (!max_cpu_freq)
max_cpu_freq = SAFE_MAX_CPU_FREQ;
return (u64)max_cpu_freq * watchdog_thresh;
}
#endif
......@@ -726,11 +726,6 @@ static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
return per_cpu(hw_events->irq, cpu);
}
bool arm_pmu_irq_is_nmi(void)
{
return has_nmi;
}
/*
* PMU hardware loses all context when a CPU goes offline.
* When a CPU is hotplugged back in, since some hardware registers are
......
......@@ -163,8 +163,6 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn);
static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; }
#endif
bool arm_pmu_irq_is_nmi(void);
/* Internal functions only for core arm_pmu code */
struct arm_pmu *armpmu_alloc(void);
struct arm_pmu *armpmu_alloc_atomic(void);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment