Commit cb957121 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'powerpc-5.9-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:

 - Add perf support for emitting extended registers for power10.

 - A fix for CPU hotplug on pseries, where on large/loaded systems we
   may not wait long enough for the CPU to be offlined, leading to
   crashes.

 - Addition of a raw cputable entry for Power10, which is not required
   to boot, but is required to make our PMU setup work correctly in
   guests.

 - Three fixes for the recent changes on 32-bit Book3S to move modules
   into their own segment for strict RWX.

 - A fix for a recent change in our powernv PCI code that could lead to
   crashes.

 - A change to our perf interrupt accounting to avoid soft lockups when
   using some events, found by syzkaller.

 - A change in the way we handle power loss events from the hypervisor
   on pseries. We no longer immediately shut down if we're told we're
   running on a UPS.

 - A few other minor fixes.

Thanks to Alexey Kardashevskiy, Andreas Schwab, Aneesh Kumar K.V, Anju T
Sudhakar, Athira Rajeev, Christophe Leroy, Frederic Barrat, Greg Kurz,
Kajol Jain, Madhavan Srinivasan, Michael Neuling, Michael Roth,
Nageswara R Sastry, Oliver O'Halloran, Thiago Jung Bauermann,
Vaidyanathan Srinivasan, Vasant Hegde.

* tag 'powerpc-5.9-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/perf/hv-24x7: Move cpumask file to top folder of hv-24x7 driver
  powerpc/32s: Fix module loading failure when VMALLOC_END is over 0xf0000000
  powerpc/pseries: Do not initiate shutdown when system is running on UPS
  powerpc/perf: Fix soft lockups due to missed interrupt accounting
  powerpc/powernv/pci: Fix possible crash when releasing DMA resources
  powerpc/pseries/hotplug-cpu: wait indefinitely for vCPU death
  powerpc/32s: Fix is_module_segment() when MODULES_VADDR is defined
  powerpc/kasan: Fix KASAN_SHADOW_START on BOOK3S_32
  powerpc/fixmap: Fix the size of the early debug area
  powerpc/pkeys: Fix build error with PPC_MEM_KEYS disabled
  powerpc/kernel: Cleanup machine check function declarations
  powerpc: Add POWER10 raw mode cputable entry
  powerpc/perf: Add extended regs support for power10 platform
  powerpc/perf: Add support for outputting extended regs in perf intr_regs
  powerpc: Fix P10 PVR revision in /proc/cpuinfo for SMT4 cores
parents 550c2129 64ef8f2c
......@@ -43,7 +43,7 @@ Description: read only
This sysfs interface exposes the number of cores per chip
present in the system.
What: /sys/devices/hv_24x7/interface/cpumask
What: /sys/devices/hv_24x7/cpumask
Date: July 2020
Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
Description: read only
......
......@@ -9,6 +9,11 @@
#ifndef __ASSEMBLY__
/*
* Added to include __machine_check_early_realmode_* functions
*/
#include <asm/mce.h>
/* This structure can grow, it's real size is used by head.S code
* via the mkdefs mechanism.
*/
......
......@@ -52,7 +52,7 @@ enum fixed_addresses {
FIX_HOLE,
/* reserve the top 128K for early debugging purposes */
FIX_EARLY_DEBUG_TOP = FIX_HOLE,
FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128, PAGE_SIZE)/PAGE_SIZE)-1,
FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128K, PAGE_SIZE)/PAGE_SIZE)-1,
#ifdef CONFIG_HIGHMEM
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
......
......@@ -15,11 +15,18 @@
#ifndef __ASSEMBLY__
#include <asm/page.h>
#include <linux/sizes.h>
#define KASAN_SHADOW_SCALE_SHIFT 3
#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_MODULES) && defined(CONFIG_STRICT_KERNEL_RWX)
#define KASAN_KERN_START ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M)
#else
#define KASAN_KERN_START PAGE_OFFSET
#endif
#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \
(PAGE_OFFSET >> KASAN_SHADOW_SCALE_SHIFT))
(KASAN_KERN_START >> KASAN_SHADOW_SCALE_SHIFT))
#define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET)
......
......@@ -210,6 +210,9 @@ struct mce_error_info {
#define MCE_EVENT_RELEASE true
#define MCE_EVENT_DONTRELEASE false
struct pt_regs;
struct notifier_block;
extern void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err, uint64_t nip,
uint64_t addr, uint64_t phys_addr);
......@@ -225,5 +228,9 @@ int mce_register_notifier(struct notifier_block *nb);
int mce_unregister_notifier(struct notifier_block *nb);
#ifdef CONFIG_PPC_BOOK3S_64
void flush_and_reload_slb(void);
long __machine_check_early_realmode_p7(struct pt_regs *regs);
long __machine_check_early_realmode_p8(struct pt_regs *regs);
long __machine_check_early_realmode_p9(struct pt_regs *regs);
long __machine_check_early_realmode_p10(struct pt_regs *regs);
#endif /* CONFIG_PPC_BOOK3S_64 */
#endif /* __ASM_PPC64_MCE_H__ */
......@@ -40,4 +40,7 @@ static inline bool is_sier_available(void) { return false; }
/* To support perf_regs sier update */
extern bool is_sier_available(void);
/* To define perf extended regs mask value */
extern u64 PERF_REG_EXTENDED_MASK;
#define PERF_REG_EXTENDED_MASK PERF_REG_EXTENDED_MASK
#endif
......@@ -62,6 +62,11 @@ struct power_pmu {
int *blacklist_ev;
/* BHRB entries in the PMU */
int bhrb_nr;
/*
* set this flag with `PERF_PMU_CAP_EXTENDED_REGS` if
* the pmu supports extended perf regs capability
*/
int capabilities;
};
/*
......
......@@ -48,6 +48,24 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_DSISR,
PERF_REG_POWERPC_SIER,
PERF_REG_POWERPC_MMCRA,
PERF_REG_POWERPC_MAX,
/* Extended registers */
PERF_REG_POWERPC_MMCR0,
PERF_REG_POWERPC_MMCR1,
PERF_REG_POWERPC_MMCR2,
PERF_REG_POWERPC_MMCR3,
PERF_REG_POWERPC_SIER2,
PERF_REG_POWERPC_SIER3,
/* Max regs without the extended regs */
PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
};
#define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1)
/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */
#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK)
/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */
#define PERF_REG_PMU_MASK_31 (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK)
#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1)
#define PERF_REG_MAX_ISA_31 (PERF_REG_POWERPC_SIER3 + 1)
#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
......@@ -72,9 +72,6 @@ extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power9(void);
extern void __setup_cpu_power10(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power10(void);
extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
#endif /* CONFIG_PPC64 */
#if defined(CONFIG_E500)
extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
......@@ -542,6 +539,25 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
{ /* Power10 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00800000,
.cpu_name = "POWER10 (raw)",
.cpu_features = CPU_FTRS_POWER10,
.cpu_user_features = COMMON_USER_POWER10,
.cpu_user_features2 = COMMON_USER2_POWER10,
.mmu_features = MMU_FTRS_POWER10,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power10",
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power10,
.cpu_restore = __restore_cpu_power10,
.machine_check_early = __machine_check_early_realmode_p10,
.platform = "power10",
},
{ /* Cell Broadband Engine */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00700000,
......
......@@ -64,10 +64,6 @@ struct dt_cpu_feature {
* Set up the base CPU
*/
extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
extern long __machine_check_early_realmode_p10(struct pt_regs *regs);
static int hv_mode;
static struct {
......
......@@ -311,6 +311,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
min = pvr & 0xFF;
break;
case 0x004e: /* POWER9 bits 12-15 give chip type */
case 0x0080: /* POWER10 bit 12 gives SMT8/4 */
maj = (pvr >> 8) & 0x0F;
min = pvr & 0xFF;
break;
......
......@@ -191,10 +191,17 @@ static bool is_module_segment(unsigned long addr)
{
if (!IS_ENABLED(CONFIG_MODULES))
return false;
#ifdef MODULES_VADDR
if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M))
return false;
if (addr > ALIGN(MODULES_END, SZ_256M) - 1)
return false;
#else
if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M))
return false;
if (addr >= ALIGN(VMALLOC_END, SZ_256M))
if (addr > ALIGN(VMALLOC_END, SZ_256M) - 1)
return false;
#endif
return true;
}
......
......@@ -1115,8 +1115,10 @@ void hash__early_init_mmu_secondary(void)
&& cpu_has_feature(CPU_FTR_HVMODE))
tlbiel_all();
if (IS_ENABLED(CONFIG_PPC_MEM_KEYS) && mmu_has_feature(MMU_FTR_PKEY))
#ifdef CONFIG_PPC_MEM_KEYS
if (mmu_has_feature(MMU_FTR_PKEY))
mtspr(SPRN_UAMOR, default_uamor);
#endif
}
#endif /* CONFIG_SMP */
......
......@@ -2141,6 +2141,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (perf_event_overflow(event, &data, regs))
power_pmu_stop(event, 0);
} else if (period) {
/* Account for interrupt in case of invalid SIAR */
if (perf_event_account_interrupt(event))
power_pmu_stop(event, 0);
}
}
......@@ -2323,6 +2327,7 @@ int register_power_pmu(struct power_pmu *pmu)
pmu->name);
power_pmu.attr_groups = ppmu->attr_groups;
power_pmu.capabilities |= (ppmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS);
#ifdef MSR_HV
/*
......
......@@ -1128,6 +1128,15 @@ static struct bin_attribute *if_bin_attrs[] = {
NULL,
};
static struct attribute *cpumask_attrs[] = {
&dev_attr_cpumask.attr,
NULL,
};
static struct attribute_group cpumask_attr_group = {
.attrs = cpumask_attrs,
};
static struct attribute *if_attrs[] = {
&dev_attr_catalog_len.attr,
&dev_attr_catalog_version.attr,
......@@ -1135,7 +1144,6 @@ static struct attribute *if_attrs[] = {
&dev_attr_sockets.attr,
&dev_attr_chipspersocket.attr,
&dev_attr_coresperchip.attr,
&dev_attr_cpumask.attr,
NULL,
};
......@@ -1151,6 +1159,7 @@ static const struct attribute_group *attr_groups[] = {
&event_desc_group,
&event_long_desc_group,
&if_group,
&cpumask_attr_group,
NULL,
};
......
......@@ -13,9 +13,11 @@
#include <asm/ptrace.h>
#include <asm/perf_regs.h>
u64 PERF_REG_EXTENDED_MASK;
#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
#define REG_RESERVED (~(PERF_REG_EXTENDED_MASK | PERF_REG_PMU_MASK))
static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
PT_REGS_OFFSET(PERF_REG_POWERPC_R0, gpr[0]),
......@@ -69,10 +71,36 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr),
};
/* Function to return the extended register values */
static u64 get_ext_regs_value(int idx)
{
switch (idx) {
case PERF_REG_POWERPC_MMCR0:
return mfspr(SPRN_MMCR0);
case PERF_REG_POWERPC_MMCR1:
return mfspr(SPRN_MMCR1);
case PERF_REG_POWERPC_MMCR2:
return mfspr(SPRN_MMCR2);
#ifdef CONFIG_PPC64
case PERF_REG_POWERPC_MMCR3:
return mfspr(SPRN_MMCR3);
case PERF_REG_POWERPC_SIER2:
return mfspr(SPRN_SIER2);
case PERF_REG_POWERPC_SIER3:
return mfspr(SPRN_SIER3);
#endif
default: return 0;
}
}
u64 perf_reg_value(struct pt_regs *regs, int idx)
{
if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
return 0;
u64 perf_reg_extended_max = PERF_REG_POWERPC_MAX;
if (cpu_has_feature(CPU_FTR_ARCH_31))
perf_reg_extended_max = PERF_REG_MAX_ISA_31;
else if (cpu_has_feature(CPU_FTR_ARCH_300))
perf_reg_extended_max = PERF_REG_MAX_ISA_300;
if (idx == PERF_REG_POWERPC_SIER &&
(IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
......@@ -85,6 +113,16 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
IS_ENABLED(CONFIG_PPC32)))
return 0;
if (idx >= PERF_REG_POWERPC_MAX && idx < perf_reg_extended_max)
return get_ext_regs_value(idx);
/*
* If the idx is referring to value beyond the
* supported registers, return 0 with a warning
*/
if (WARN_ON_ONCE(idx >= perf_reg_extended_max))
return 0;
return regs_get_register(regs, pt_regs_offset[idx]);
}
......
......@@ -87,6 +87,8 @@
#define POWER10_MMCRA_IFM3 0x00000000C0000000UL
#define POWER10_MMCRA_BHRB_MASK 0x00000000C0000000UL
extern u64 PERF_REG_EXTENDED_MASK;
/* Table of alternatives, sorted by column 0 */
static const unsigned int power10_event_alternatives[][MAX_ALT] = {
{ PM_RUN_CYC_ALT, PM_RUN_CYC },
......@@ -397,6 +399,7 @@ static struct power_pmu power10_pmu = {
.cache_events = &power10_cache_events,
.attr_groups = power10_pmu_attr_groups,
.bhrb_nr = 32,
.capabilities = PERF_PMU_CAP_EXTENDED_REGS,
};
int init_power10_pmu(void)
......@@ -408,6 +411,9 @@ int init_power10_pmu(void)
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10"))
return -ENODEV;
/* Set the PERF_REG_EXTENDED_MASK here */
PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31;
rc = register_power_pmu(&power10_pmu);
if (rc)
return rc;
......
......@@ -90,6 +90,8 @@ enum {
#define POWER9_MMCRA_IFM3 0x00000000C0000000UL
#define POWER9_MMCRA_BHRB_MASK 0x00000000C0000000UL
extern u64 PERF_REG_EXTENDED_MASK;
/* Nasty Power9 specific hack */
#define PVR_POWER9_CUMULUS 0x00002000
......@@ -434,6 +436,7 @@ static struct power_pmu power9_pmu = {
.cache_events = &power9_cache_events,
.attr_groups = power9_pmu_attr_groups,
.bhrb_nr = 32,
.capabilities = PERF_PMU_CAP_EXTENDED_REGS,
};
int init_power9_pmu(void)
......@@ -457,6 +460,9 @@ int init_power9_pmu(void)
}
}
/* Set the PERF_REG_EXTENDED_MASK here */
PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_300;
rc = register_power_pmu(&power9_pmu);
if (rc)
return rc;
......
......@@ -2705,7 +2705,7 @@ void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
struct iommu_table *tbl = pe->table_group.tables[0];
int64_t rc;
if (pe->dma_setup_done)
if (!pe->dma_setup_done)
return;
rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
......
......@@ -107,22 +107,28 @@ static int pseries_cpu_disable(void)
*/
static void pseries_cpu_die(unsigned int cpu)
{
int tries;
int cpu_status = 1;
unsigned int pcpu = get_hard_smp_processor_id(cpu);
unsigned long timeout = jiffies + msecs_to_jiffies(120000);
for (tries = 0; tries < 25; tries++) {
while (true) {
cpu_status = smp_query_cpu_stopped(pcpu);
if (cpu_status == QCSS_STOPPED ||
cpu_status == QCSS_HARDWARE_ERROR)
break;
cpu_relax();
if (time_after(jiffies, timeout)) {
pr_warn("CPU %i (hwid %i) didn't die after 120 seconds\n",
cpu, pcpu);
timeout = jiffies + msecs_to_jiffies(120000);
}
cond_resched();
}
if (cpu_status != 0) {
printk("Querying DEAD? cpu %i (%i) shows %i\n",
cpu, pcpu, cpu_status);
if (cpu_status == QCSS_HARDWARE_ERROR) {
pr_warn("CPU %i (hwid %i) reported error while dying\n",
cpu, pcpu);
}
/* Isolation and deallocation are definitely done by
......
......@@ -184,7 +184,6 @@ static void handle_system_shutdown(char event_modifier)
case EPOW_SHUTDOWN_ON_UPS:
pr_emerg("Loss of system power detected. System is running on"
" UPS/battery. Check RTAS error log for details\n");
orderly_poweroff(true);
break;
case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment