Commit ff7b862a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ras_updates_for_5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Borislav Petkov:

 - Do not report spurious MCEs on some Intel platforms caused by errata;
   by Prarit Bhargava.

 - Change dev-mcelog's hardcoded limit of 32 error records to a dynamic
   one, controlled by the number of logical CPUs, by Tony Luck.

 - Add support for the processor identification number (PPIN) on AMD, by
   Wei Huang.

* tag 'ras_updates_for_5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce/amd: Add PPIN support for AMD MCE
  x86/mce/dev-mcelog: Dynamically allocate space for machine check records
  x86/mce: Do not log spurious corrected mce errors
parents aaf985e2 077168e2
...@@ -299,6 +299,7 @@ ...@@ -299,6 +299,7 @@
#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */ #define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */
#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
......
...@@ -102,7 +102,7 @@ ...@@ -102,7 +102,7 @@
#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ #define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
#define MCE_LOG_LEN 32 #define MCE_LOG_MIN_LEN 32U
#define MCE_LOG_SIGNATURE "MACHINECHECK" #define MCE_LOG_SIGNATURE "MACHINECHECK"
/* AMD Scalable MCA */ /* AMD Scalable MCA */
...@@ -135,11 +135,11 @@ ...@@ -135,11 +135,11 @@
*/ */
struct mce_log_buffer { struct mce_log_buffer {
char signature[12]; /* "MACHINECHECK" */ char signature[12]; /* "MACHINECHECK" */
unsigned len; /* = MCE_LOG_LEN */ unsigned len; /* = elements in .mce_entry[] */
unsigned next; unsigned next;
unsigned flags; unsigned flags;
unsigned recordlen; /* length of struct mce */ unsigned recordlen; /* length of struct mce */
struct mce entry[MCE_LOG_LEN]; struct mce entry[];
}; };
enum mce_notifier_prios { enum mce_notifier_prios {
......
...@@ -394,6 +394,35 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) ...@@ -394,6 +394,35 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
} }
static void amd_detect_ppin(struct cpuinfo_x86 *c)
{
unsigned long long val;
if (!cpu_has(c, X86_FEATURE_AMD_PPIN))
return;
/* When PPIN is defined in CPUID, still need to check PPIN_CTL MSR */
if (rdmsrl_safe(MSR_AMD_PPIN_CTL, &val))
goto clear_ppin;
/* PPIN is locked in disabled mode, clear feature bit */
if ((val & 3UL) == 1UL)
goto clear_ppin;
/* If PPIN is disabled, try to enable it */
if (!(val & 2UL)) {
wrmsrl_safe(MSR_AMD_PPIN_CTL, val | 2UL);
rdmsrl_safe(MSR_AMD_PPIN_CTL, &val);
}
/* If PPIN_EN bit is 1, return from here; otherwise fall through */
if (val & 2UL)
return;
clear_ppin:
clear_cpu_cap(c, X86_FEATURE_AMD_PPIN);
}
u16 amd_get_nb_id(int cpu) u16 amd_get_nb_id(int cpu)
{ {
return per_cpu(cpu_llc_id, cpu); return per_cpu(cpu_llc_id, cpu);
...@@ -941,6 +970,7 @@ static void init_amd(struct cpuinfo_x86 *c) ...@@ -941,6 +970,7 @@ static void init_amd(struct cpuinfo_x86 *c)
amd_detect_cmp(c); amd_detect_cmp(c);
amd_get_topology(c); amd_get_topology(c);
srat_detect_node(c); srat_detect_node(c);
amd_detect_ppin(c);
init_amd_cacheinfo(c); init_amd_cacheinfo(c);
......
...@@ -142,6 +142,8 @@ void mce_setup(struct mce *m) ...@@ -142,6 +142,8 @@ void mce_setup(struct mce *m)
if (this_cpu_has(X86_FEATURE_INTEL_PPIN)) if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
rdmsrl(MSR_PPIN, m->ppin); rdmsrl(MSR_PPIN, m->ppin);
else if (this_cpu_has(X86_FEATURE_AMD_PPIN))
rdmsrl(MSR_AMD_PPIN, m->ppin);
m->microcode = boot_cpu_data.microcode; m->microcode = boot_cpu_data.microcode;
} }
...@@ -1877,6 +1879,8 @@ bool filter_mce(struct mce *m) ...@@ -1877,6 +1879,8 @@ bool filter_mce(struct mce *m)
{ {
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
return amd_filter_mce(m); return amd_filter_mce(m);
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
return intel_filter_mce(m);
return false; return false;
} }
......
...@@ -29,11 +29,7 @@ static char *mce_helper_argv[2] = { mce_helper, NULL }; ...@@ -29,11 +29,7 @@ static char *mce_helper_argv[2] = { mce_helper, NULL };
* separate MCEs from kernel messages to avoid bogus bug reports. * separate MCEs from kernel messages to avoid bogus bug reports.
*/ */
static struct mce_log_buffer mcelog = { static struct mce_log_buffer *mcelog;
.signature = MCE_LOG_SIGNATURE,
.len = MCE_LOG_LEN,
.recordlen = sizeof(struct mce),
};
static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
...@@ -45,21 +41,21 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val, ...@@ -45,21 +41,21 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
mutex_lock(&mce_chrdev_read_mutex); mutex_lock(&mce_chrdev_read_mutex);
entry = mcelog.next; entry = mcelog->next;
/* /*
* When the buffer fills up discard new entries. Assume that the * When the buffer fills up discard new entries. Assume that the
* earlier errors are the more interesting ones: * earlier errors are the more interesting ones:
*/ */
if (entry >= MCE_LOG_LEN) { if (entry >= mcelog->len) {
set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog->flags);
goto unlock; goto unlock;
} }
mcelog.next = entry + 1; mcelog->next = entry + 1;
memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); memcpy(mcelog->entry + entry, mce, sizeof(struct mce));
mcelog.entry[entry].finished = 1; mcelog->entry[entry].finished = 1;
/* wake processes polling /dev/mcelog */ /* wake processes polling /dev/mcelog */
wake_up_interruptible(&mce_chrdev_wait); wake_up_interruptible(&mce_chrdev_wait);
...@@ -214,21 +210,21 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, ...@@ -214,21 +210,21 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
/* Only supports full reads right now */ /* Only supports full reads right now */
err = -EINVAL; err = -EINVAL;
if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) if (*off != 0 || usize < mcelog->len * sizeof(struct mce))
goto out; goto out;
next = mcelog.next; next = mcelog->next;
err = 0; err = 0;
for (i = 0; i < next; i++) { for (i = 0; i < next; i++) {
struct mce *m = &mcelog.entry[i]; struct mce *m = &mcelog->entry[i];
err |= copy_to_user(buf, m, sizeof(*m)); err |= copy_to_user(buf, m, sizeof(*m));
buf += sizeof(*m); buf += sizeof(*m);
} }
memset(mcelog.entry, 0, next * sizeof(struct mce)); memset(mcelog->entry, 0, next * sizeof(struct mce));
mcelog.next = 0; mcelog->next = 0;
if (err) if (err)
err = -EFAULT; err = -EFAULT;
...@@ -242,7 +238,7 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, ...@@ -242,7 +238,7 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait) static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait)
{ {
poll_wait(file, &mce_chrdev_wait, wait); poll_wait(file, &mce_chrdev_wait, wait);
if (READ_ONCE(mcelog.next)) if (READ_ONCE(mcelog->next))
return EPOLLIN | EPOLLRDNORM; return EPOLLIN | EPOLLRDNORM;
if (!mce_apei_read_done && apei_check_mce()) if (!mce_apei_read_done && apei_check_mce())
return EPOLLIN | EPOLLRDNORM; return EPOLLIN | EPOLLRDNORM;
...@@ -261,13 +257,13 @@ static long mce_chrdev_ioctl(struct file *f, unsigned int cmd, ...@@ -261,13 +257,13 @@ static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
case MCE_GET_RECORD_LEN: case MCE_GET_RECORD_LEN:
return put_user(sizeof(struct mce), p); return put_user(sizeof(struct mce), p);
case MCE_GET_LOG_LEN: case MCE_GET_LOG_LEN:
return put_user(MCE_LOG_LEN, p); return put_user(mcelog->len, p);
case MCE_GETCLEAR_FLAGS: { case MCE_GETCLEAR_FLAGS: {
unsigned flags; unsigned flags;
do { do {
flags = mcelog.flags; flags = mcelog->flags;
} while (cmpxchg(&mcelog.flags, flags, 0) != flags); } while (cmpxchg(&mcelog->flags, flags, 0) != flags);
return put_user(flags, p); return put_user(flags, p);
} }
...@@ -339,8 +335,18 @@ static struct miscdevice mce_chrdev_device = { ...@@ -339,8 +335,18 @@ static struct miscdevice mce_chrdev_device = {
static __init int dev_mcelog_init_device(void) static __init int dev_mcelog_init_device(void)
{ {
int mce_log_len;
int err; int err;
mce_log_len = max(MCE_LOG_MIN_LEN, num_online_cpus());
mcelog = kzalloc(sizeof(*mcelog) + mce_log_len * sizeof(struct mce), GFP_KERNEL);
if (!mcelog)
return -ENOMEM;
strncpy(mcelog->signature, MCE_LOG_SIGNATURE, sizeof(mcelog->signature));
mcelog->len = mce_log_len;
mcelog->recordlen = sizeof(struct mce);
/* register character device /dev/mcelog */ /* register character device /dev/mcelog */
err = misc_register(&mce_chrdev_device); err = misc_register(&mce_chrdev_device);
if (err) { if (err) {
...@@ -350,6 +356,7 @@ static __init int dev_mcelog_init_device(void) ...@@ -350,6 +356,7 @@ static __init int dev_mcelog_init_device(void)
else else
pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err); pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
kfree(mcelog);
return err; return err;
} }
......
...@@ -521,3 +521,20 @@ void mce_intel_feature_clear(struct cpuinfo_x86 *c) ...@@ -521,3 +521,20 @@ void mce_intel_feature_clear(struct cpuinfo_x86 *c)
{ {
intel_clear_lmce(); intel_clear_lmce();
} }
bool intel_filter_mce(struct mce *m)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
/* MCE errata HSD131, HSM142, HSW131, BDM48, and HSM142 */
if ((c->x86 == 6) &&
((c->x86_model == INTEL_FAM6_HASWELL) ||
(c->x86_model == INTEL_FAM6_HASWELL_L) ||
(c->x86_model == INTEL_FAM6_BROADWELL) ||
(c->x86_model == INTEL_FAM6_HASWELL_G)) &&
(m->bank == 0) &&
((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
return true;
return false;
}
...@@ -48,6 +48,7 @@ void cmci_disable_bank(int bank); ...@@ -48,6 +48,7 @@ void cmci_disable_bank(int bank);
void intel_init_cmci(void); void intel_init_cmci(void);
void intel_init_lmce(void); void intel_init_lmce(void);
void intel_clear_lmce(void); void intel_clear_lmce(void);
bool intel_filter_mce(struct mce *m);
#else #else
# define cmci_intel_adjust_timer mce_adjust_timer_default # define cmci_intel_adjust_timer mce_adjust_timer_default
static inline bool mce_intel_cmci_poll(void) { return false; } static inline bool mce_intel_cmci_poll(void) { return false; }
...@@ -56,6 +57,7 @@ static inline void cmci_disable_bank(int bank) { } ...@@ -56,6 +57,7 @@ static inline void cmci_disable_bank(int bank) { }
static inline void intel_init_cmci(void) { } static inline void intel_init_cmci(void) { }
static inline void intel_init_lmce(void) { } static inline void intel_init_lmce(void) { }
static inline void intel_clear_lmce(void) { } static inline void intel_clear_lmce(void) { }
static inline bool intel_filter_mce(struct mce *m) { return false; };
#endif #endif
void mce_timer_kick(unsigned long interval); void mce_timer_kick(unsigned long interval);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment