Commit 650e5455 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:
 "A handful of x86 fixes:

   - a syscall ABI fix, fixing an Android breakage
   - a Xen PV guest fix relating to the RTC device, causing a
     non-working console
   - a Xen guest syscall stack frame fix
   - an MCE hotplug CPU crash fix"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/numachip: Fix NumaConnect2 MMCFG PCI access
  x86/entry: Restore traditional SYSENTER calling convention
  x86/entry: Fix some comments
  x86/paravirt: Prevent rtc_cmos platform device init on PV guests
  x86/xen: Avoid fast syscall path for Xen PV guests
  x86/mce: Ensure offline CPUs don't participate in rendezvous process
parents de030179 dd7a5ab4
...@@ -421,7 +421,7 @@ __visible long do_fast_syscall_32(struct pt_regs *regs) ...@@ -421,7 +421,7 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
regs->ip = landing_pad; regs->ip = landing_pad;
/* /*
* Fetch ECX from where the vDSO stashed it. * Fetch EBP from where the vDSO stashed it.
* *
* WARNING: We are in CONTEXT_USER and RCU isn't paying attention! * WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
*/ */
...@@ -432,10 +432,10 @@ __visible long do_fast_syscall_32(struct pt_regs *regs) ...@@ -432,10 +432,10 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
* Micro-optimization: the pointer we're following is explicitly * Micro-optimization: the pointer we're following is explicitly
* 32 bits, so it can't be out of range. * 32 bits, so it can't be out of range.
*/ */
__get_user(*(u32 *)&regs->cx, __get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp) (u32 __user __force *)(unsigned long)(u32)regs->sp)
#else #else
get_user(*(u32 *)&regs->cx, get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp) (u32 __user __force *)(unsigned long)(u32)regs->sp)
#endif #endif
) { ) {
......
...@@ -292,7 +292,7 @@ ENTRY(entry_SYSENTER_32) ...@@ -292,7 +292,7 @@ ENTRY(entry_SYSENTER_32)
movl TSS_sysenter_sp0(%esp), %esp movl TSS_sysenter_sp0(%esp), %esp
sysenter_past_esp: sysenter_past_esp:
pushl $__USER_DS /* pt_regs->ss */ pushl $__USER_DS /* pt_regs->ss */
pushl %ecx /* pt_regs->cx */ pushl %ebp /* pt_regs->sp (stashed in bp) */
pushfl /* pt_regs->flags (except IF = 0) */ pushfl /* pt_regs->flags (except IF = 0) */
orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
pushl $__USER_CS /* pt_regs->cs */ pushl $__USER_CS /* pt_regs->cs */
...@@ -308,8 +308,9 @@ sysenter_past_esp: ...@@ -308,8 +308,9 @@ sysenter_past_esp:
movl %esp, %eax movl %esp, %eax
call do_fast_syscall_32 call do_fast_syscall_32
testl %eax, %eax /* XEN PV guests always use IRET path */
jz .Lsyscall_32_done ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
/* Opportunistic SYSEXIT */ /* Opportunistic SYSEXIT */
TRACE_IRQS_ON /* User mode traces as IRQs on. */ TRACE_IRQS_ON /* User mode traces as IRQs on. */
......
...@@ -63,7 +63,7 @@ ENTRY(entry_SYSENTER_compat) ...@@ -63,7 +63,7 @@ ENTRY(entry_SYSENTER_compat)
/* Construct struct pt_regs on stack */ /* Construct struct pt_regs on stack */
pushq $__USER32_DS /* pt_regs->ss */ pushq $__USER32_DS /* pt_regs->ss */
pushq %rcx /* pt_regs->sp */ pushq %rbp /* pt_regs->sp (stashed in bp) */
/* /*
* Push flags. This is nasty. First, interrupts are currently * Push flags. This is nasty. First, interrupts are currently
...@@ -82,14 +82,14 @@ ENTRY(entry_SYSENTER_compat) ...@@ -82,14 +82,14 @@ ENTRY(entry_SYSENTER_compat)
pushq %rdi /* pt_regs->di */ pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */ pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */ pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx (will be overwritten) */ pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */ pushq $-ENOSYS /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 = 0 */ pushq %r8 /* pt_regs->r8 = 0 */
pushq %r8 /* pt_regs->r9 = 0 */ pushq %r8 /* pt_regs->r9 = 0 */
pushq %r8 /* pt_regs->r10 = 0 */ pushq %r8 /* pt_regs->r10 = 0 */
pushq %r8 /* pt_regs->r11 = 0 */ pushq %r8 /* pt_regs->r11 = 0 */
pushq %rbx /* pt_regs->rbx */ pushq %rbx /* pt_regs->rbx */
pushq %rbp /* pt_regs->rbp */ pushq %rbp /* pt_regs->rbp (will be overwritten) */
pushq %r8 /* pt_regs->r12 = 0 */ pushq %r8 /* pt_regs->r12 = 0 */
pushq %r8 /* pt_regs->r13 = 0 */ pushq %r8 /* pt_regs->r13 = 0 */
pushq %r8 /* pt_regs->r14 = 0 */ pushq %r8 /* pt_regs->r14 = 0 */
...@@ -121,8 +121,9 @@ sysenter_flags_fixed: ...@@ -121,8 +121,9 @@ sysenter_flags_fixed:
movq %rsp, %rdi movq %rsp, %rdi
call do_fast_syscall_32 call do_fast_syscall_32
testl %eax, %eax /* XEN PV guests always use IRET path */
jz .Lsyscall_32_done ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
jmp sysret32_from_system_call jmp sysret32_from_system_call
sysenter_fix_flags: sysenter_fix_flags:
...@@ -178,7 +179,7 @@ ENTRY(entry_SYSCALL_compat) ...@@ -178,7 +179,7 @@ ENTRY(entry_SYSCALL_compat)
pushq %rdi /* pt_regs->di */ pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */ pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */ pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx (will be overwritten) */ pushq %rbp /* pt_regs->cx (stashed in bp) */
pushq $-ENOSYS /* pt_regs->ax */ pushq $-ENOSYS /* pt_regs->ax */
xorq %r8,%r8 xorq %r8,%r8
pushq %r8 /* pt_regs->r8 = 0 */ pushq %r8 /* pt_regs->r8 = 0 */
...@@ -186,7 +187,7 @@ ENTRY(entry_SYSCALL_compat) ...@@ -186,7 +187,7 @@ ENTRY(entry_SYSCALL_compat)
pushq %r8 /* pt_regs->r10 = 0 */ pushq %r8 /* pt_regs->r10 = 0 */
pushq %r8 /* pt_regs->r11 = 0 */ pushq %r8 /* pt_regs->r11 = 0 */
pushq %rbx /* pt_regs->rbx */ pushq %rbx /* pt_regs->rbx */
pushq %rbp /* pt_regs->rbp */ pushq %rbp /* pt_regs->rbp (will be overwritten) */
pushq %r8 /* pt_regs->r12 = 0 */ pushq %r8 /* pt_regs->r12 = 0 */
pushq %r8 /* pt_regs->r13 = 0 */ pushq %r8 /* pt_regs->r13 = 0 */
pushq %r8 /* pt_regs->r14 = 0 */ pushq %r8 /* pt_regs->r14 = 0 */
...@@ -200,8 +201,9 @@ ENTRY(entry_SYSCALL_compat) ...@@ -200,8 +201,9 @@ ENTRY(entry_SYSCALL_compat)
movq %rsp, %rdi movq %rsp, %rdi
call do_fast_syscall_32 call do_fast_syscall_32
testl %eax, %eax /* XEN PV guests always use IRET path */
jz .Lsyscall_32_done ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
/* Opportunistic SYSRET */ /* Opportunistic SYSRET */
sysret32_from_system_call: sysret32_from_system_call:
......
/* /*
* Code for the vDSO. This version uses the old int $0x80 method. * AT_SYSINFO entry point
*/ */
#include <asm/dwarf2.h> #include <asm/dwarf2.h>
...@@ -21,35 +21,67 @@ __kernel_vsyscall: ...@@ -21,35 +21,67 @@ __kernel_vsyscall:
/* /*
* Reshuffle regs so that all of any of the entry instructions * Reshuffle regs so that all of any of the entry instructions
* will preserve enough state. * will preserve enough state.
*
* A really nice entry sequence would be:
* pushl %edx
* pushl %ecx
* movl %esp, %ecx
*
* Unfortunately, naughty Android versions between July and December
* 2015 actually hardcode the traditional Linux SYSENTER entry
* sequence. That is severely broken for a number of reasons (ask
* anyone with an AMD CPU, for example). Nonetheless, we try to keep
* it working approximately as well as it ever worked.
*
* This link may eludicate some of the history:
* https://android-review.googlesource.com/#/q/Iac3295376d61ef83e713ac9b528f3b50aa780cd7
* personally, I find it hard to understand what's going on there.
*
* Note to future user developers: DO NOT USE SYSENTER IN YOUR CODE.
* Execute an indirect call to the address in the AT_SYSINFO auxv
* entry. That is the ONLY correct way to make a fast 32-bit system
* call on Linux. (Open-coding int $0x80 is also fine, but it's
* slow.)
*/ */
pushl %ecx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ecx, 0
pushl %edx pushl %edx
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edx, 0 CFI_REL_OFFSET edx, 0
pushl %ecx pushl %ebp
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ecx, 0 CFI_REL_OFFSET ebp, 0
movl %esp, %ecx
#define SYSENTER_SEQUENCE "movl %esp, %ebp; sysenter"
#define SYSCALL_SEQUENCE "movl %ecx, %ebp; syscall"
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */ /* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */
ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \ ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, \
"syscall", X86_FEATURE_SYSCALL32 SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32
#else #else
ALTERNATIVE "", "sysenter", X86_FEATURE_SEP ALTERNATIVE "", SYSENTER_SEQUENCE, X86_FEATURE_SEP
#endif #endif
/* Enter using int $0x80 */ /* Enter using int $0x80 */
movl (%esp), %ecx
int $0x80 int $0x80
GLOBAL(int80_landing_pad) GLOBAL(int80_landing_pad)
/* Restore ECX and EDX in case they were clobbered. */ /*
popl %ecx * Restore EDX and ECX in case they were clobbered. EBP is not
CFI_RESTORE ecx * clobbered (the kernel restores it), but it's cleaner and
* probably faster to pop it than to adjust ESP using addl.
*/
popl %ebp
CFI_RESTORE ebp
CFI_ADJUST_CFA_OFFSET -4 CFI_ADJUST_CFA_OFFSET -4
popl %edx popl %edx
CFI_RESTORE edx CFI_RESTORE edx
CFI_ADJUST_CFA_OFFSET -4 CFI_ADJUST_CFA_OFFSET -4
popl %ecx
CFI_RESTORE ecx
CFI_ADJUST_CFA_OFFSET -4
ret ret
CFI_ENDPROC CFI_ENDPROC
......
...@@ -216,6 +216,7 @@ ...@@ -216,6 +216,7 @@
#define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */ #define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
#define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */ #define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ #define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
......
...@@ -19,6 +19,12 @@ static inline int paravirt_enabled(void) ...@@ -19,6 +19,12 @@ static inline int paravirt_enabled(void)
return pv_info.paravirt_enabled; return pv_info.paravirt_enabled;
} }
static inline int paravirt_has_feature(unsigned int feature)
{
WARN_ON_ONCE(!pv_info.paravirt_enabled);
return (pv_info.features & feature);
}
static inline void load_sp0(struct tss_struct *tss, static inline void load_sp0(struct tss_struct *tss,
struct thread_struct *thread) struct thread_struct *thread)
{ {
......
...@@ -70,9 +70,14 @@ struct pv_info { ...@@ -70,9 +70,14 @@ struct pv_info {
#endif #endif
int paravirt_enabled; int paravirt_enabled;
unsigned int features; /* valid only if paravirt_enabled is set */
const char *name; const char *name;
}; };
#define paravirt_has(x) paravirt_has_feature(PV_SUPPORTED_##x)
/* Supported features */
#define PV_SUPPORTED_RTC (1<<0)
struct pv_init_ops { struct pv_init_ops {
/* /*
* Patch may replace one of the defined code sequences with * Patch may replace one of the defined code sequences with
......
...@@ -472,6 +472,7 @@ static inline unsigned long current_top_of_stack(void) ...@@ -472,6 +472,7 @@ static inline unsigned long current_top_of_stack(void)
#else #else
#define __cpuid native_cpuid #define __cpuid native_cpuid
#define paravirt_enabled() 0 #define paravirt_enabled() 0
#define paravirt_has(x) 0
static inline void load_sp0(struct tss_struct *tss, static inline void load_sp0(struct tss_struct *tss,
struct thread_struct *thread) struct thread_struct *thread)
......
...@@ -193,20 +193,17 @@ static int __init numachip_system_init(void) ...@@ -193,20 +193,17 @@ static int __init numachip_system_init(void)
case 1: case 1:
init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE); init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
numachip_apic_icr_write = numachip1_apic_icr_write; numachip_apic_icr_write = numachip1_apic_icr_write;
x86_init.pci.arch_init = pci_numachip_init;
break; break;
case 2: case 2:
init_extra_mapping_uc(NUMACHIP2_LCSR_BASE, NUMACHIP2_LCSR_SIZE); init_extra_mapping_uc(NUMACHIP2_LCSR_BASE, NUMACHIP2_LCSR_SIZE);
numachip_apic_icr_write = numachip2_apic_icr_write; numachip_apic_icr_write = numachip2_apic_icr_write;
/* Use MCFG config cycles rather than locked CF8 cycles */
raw_pci_ops = &pci_mmcfg;
break; break;
default: default:
return 0; return 0;
} }
x86_cpuinit.fixup_cpu_id = fixup_cpu_id; x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
x86_init.pci.arch_init = pci_numachip_init;
return 0; return 0;
} }
......
...@@ -999,6 +999,17 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -999,6 +999,17 @@ void do_machine_check(struct pt_regs *regs, long error_code)
int flags = MF_ACTION_REQUIRED; int flags = MF_ACTION_REQUIRED;
int lmce = 0; int lmce = 0;
/* If this CPU is offline, just bail out. */
if (cpu_is_offline(smp_processor_id())) {
u64 mcgstatus;
mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
if (mcgstatus & MCG_STATUS_RIPV) {
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
return;
}
}
ist_enter(regs); ist_enter(regs);
this_cpu_inc(mce_exception_count); this_cpu_inc(mce_exception_count);
......
...@@ -200,6 +200,9 @@ static __init int add_rtc_cmos(void) ...@@ -200,6 +200,9 @@ static __init int add_rtc_cmos(void)
} }
#endif #endif
if (paravirt_enabled() && !paravirt_has(RTC))
return -ENODEV;
platform_device_register(&rtc_device); platform_device_register(&rtc_device);
dev_info(&rtc_device.dev, dev_info(&rtc_device.dev,
"registered platform RTC device (no PNP device found)\n"); "registered platform RTC device (no PNP device found)\n");
......
...@@ -1414,6 +1414,7 @@ __init void lguest_init(void) ...@@ -1414,6 +1414,7 @@ __init void lguest_init(void)
pv_info.kernel_rpl = 1; pv_info.kernel_rpl = 1;
/* Everyone except Xen runs with this set. */ /* Everyone except Xen runs with this set. */
pv_info.shared_kernel_pmd = 1; pv_info.shared_kernel_pmd = 1;
pv_info.features = 0;
/* /*
* We set up all the lguest overrides for sensitive operations. These * We set up all the lguest overrides for sensitive operations. These
......
...@@ -1192,7 +1192,7 @@ static const struct pv_info xen_info __initconst = { ...@@ -1192,7 +1192,7 @@ static const struct pv_info xen_info __initconst = {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
.extra_user_64bit_cs = FLAT_USER_CS64, .extra_user_64bit_cs = FLAT_USER_CS64,
#endif #endif
.features = 0,
.name = "Xen", .name = "Xen",
}; };
...@@ -1535,6 +1535,8 @@ asmlinkage __visible void __init xen_start_kernel(void) ...@@ -1535,6 +1535,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* Install Xen paravirt ops */ /* Install Xen paravirt ops */
pv_info = xen_info; pv_info = xen_info;
if (xen_initial_domain())
pv_info.features |= PV_SUPPORTED_RTC;
pv_init_ops = xen_init_ops; pv_init_ops = xen_init_ops;
pv_apic_ops = xen_apic_ops; pv_apic_ops = xen_apic_ops;
if (!xen_pvh_domain()) { if (!xen_pvh_domain()) {
...@@ -1886,8 +1888,10 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); ...@@ -1886,8 +1888,10 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
static void xen_set_cpu_features(struct cpuinfo_x86 *c) static void xen_set_cpu_features(struct cpuinfo_x86 *c)
{ {
if (xen_pv_domain()) if (xen_pv_domain()) {
clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
set_cpu_cap(c, X86_FEATURE_XENPV);
}
} }
const struct hypervisor_x86 x86_hyper_xen = { const struct hypervisor_x86 x86_hyper_xen = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment