Commit 2f6474e4 authored by Thomas Gleixner's avatar Thomas Gleixner

x86/entry: Switch XEN/PV hypercall entry to IDTENTRY

Convert the XEN/PV hypercall to IDTENTRY:

  - Emit the ASM stub with DECLARE_IDTENTRY
  - Remove the ASM idtentry in 64-bit
  - Remove the open coded ASM entry code in 32-bit
  - Remove the old prototypes

The handler stubs need to stay in ASM code as they need corner case handling
and adjustment of the stack pointer.

Provide a new C function which invokes the entry/exit handling and calls
into the XEN handler on the interrupt stack if required.

The exit code is slightly different from the regular idtentry_exit() on
non-preemptible kernels. If the hypercall is preemptible and need_resched()
is set then XEN provides a preempt hypercall scheduling function.

Move this functionality into the entry code so it can use the existing
idtentry functionality.

[ mingo: Build fixes. ]
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Acked-by: default avatarAndy Lutomirski <luto@kernel.org>
Acked-by: default avatarJuergen Gross <jgross@suse.com>
Tested-by: default avatarJuergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20200521202118.055270078@linutronix.de
parent 1de16e0c
...@@ -27,6 +27,11 @@ ...@@ -27,6 +27,11 @@
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#ifdef CONFIG_XEN_PV
#include <xen/xen-ops.h>
#include <xen/events.h>
#endif
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/traps.h> #include <asm/traps.h>
#include <asm/vdso.h> #include <asm/vdso.h>
...@@ -35,6 +40,7 @@ ...@@ -35,6 +40,7 @@
#include <asm/nospec-branch.h> #include <asm/nospec-branch.h>
#include <asm/io_bitmap.h> #include <asm/io_bitmap.h>
#include <asm/syscall.h> #include <asm/syscall.h>
#include <asm/irq_stack.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h> #include <trace/events/syscalls.h>
...@@ -680,3 +686,75 @@ void noinstr idtentry_exit_user(struct pt_regs *regs) ...@@ -680,3 +686,75 @@ void noinstr idtentry_exit_user(struct pt_regs *regs)
prepare_exit_to_usermode(regs); prepare_exit_to_usermode(regs);
} }
#ifdef CONFIG_XEN_PV
#ifndef CONFIG_PREEMPTION
/*
* Some hypercalls issued by the toolstack can take many 10s of
* seconds. Allow tasks running hypercalls via the privcmd driver to
* be voluntarily preempted even if full kernel preemption is
* disabled.
*
* Such preemptible hypercalls are bracketed by
* xen_preemptible_hcall_begin() and xen_preemptible_hcall_end()
* calls.
*/
DEFINE_PER_CPU(bool, xen_in_preemptible_hcall);
EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
/*
* In case of scheduling the flag must be cleared and restored after
* returning from schedule as the task might move to a different CPU.
*/
static __always_inline bool get_and_clear_inhcall(void)
{
bool inhcall = __this_cpu_read(xen_in_preemptible_hcall);
__this_cpu_write(xen_in_preemptible_hcall, false);
return inhcall;
}
static __always_inline void restore_inhcall(bool inhcall)
{
__this_cpu_write(xen_in_preemptible_hcall, inhcall);
}
#else
static __always_inline bool get_and_clear_inhcall(void) { return false; }
static __always_inline void restore_inhcall(bool inhcall) { }
#endif
static void __xen_pv_evtchn_do_upcall(void)
{
irq_enter_rcu();
inc_irq_stat(irq_hv_callback_count);
xen_hvm_evtchn_do_upcall();
irq_exit_rcu();
}
__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
{
struct pt_regs *old_regs;
bool inhcall, rcu_exit;
rcu_exit = idtentry_enter_cond_rcu(regs);
old_regs = set_irq_regs(regs);
instrumentation_begin();
run_on_irqstack_cond(__xen_pv_evtchn_do_upcall, NULL, regs);
instrumentation_begin();
set_irq_regs(old_regs);
inhcall = get_and_clear_inhcall();
if (inhcall && !WARN_ON_ONCE(rcu_exit)) {
instrumentation_begin();
idtentry_exit_cond_resched(regs, true);
instrumentation_end();
restore_inhcall(inhcall);
} else {
idtentry_exit_cond_rcu(regs, rcu_exit);
}
}
#endif /* CONFIG_XEN_PV */
...@@ -1298,7 +1298,13 @@ SYM_CODE_END(native_iret) ...@@ -1298,7 +1298,13 @@ SYM_CODE_END(native_iret)
#endif #endif
#ifdef CONFIG_XEN_PV #ifdef CONFIG_XEN_PV
SYM_FUNC_START(xen_hypervisor_callback) /*
* See comment in entry_64.S for further explanation
*
* Note: This is not an actual IDT entry point. It's a XEN specific entry
* point and therefore named to match the 64-bit trampoline counterpart.
*/
SYM_FUNC_START(xen_asm_exc_xen_hypervisor_callback)
/* /*
* Check to see if we got the event in the critical * Check to see if we got the event in the critical
* region in xen_iret_direct, after we've reenabled * region in xen_iret_direct, after we've reenabled
...@@ -1315,14 +1321,11 @@ SYM_FUNC_START(xen_hypervisor_callback) ...@@ -1315,14 +1321,11 @@ SYM_FUNC_START(xen_hypervisor_callback)
pushl $-1 /* orig_ax = -1 => not a system call */ pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL SAVE_ALL
ENCODE_FRAME_POINTER ENCODE_FRAME_POINTER
TRACE_IRQS_OFF
mov %esp, %eax mov %esp, %eax
call xen_evtchn_do_upcall call xen_pv_evtchn_do_upcall
#ifndef CONFIG_PREEMPTION jmp handle_exception_return
call xen_maybe_preempt_hcall SYM_FUNC_END(xen_asm_exc_xen_hypervisor_callback)
#endif
jmp ret_from_intr
SYM_FUNC_END(xen_hypervisor_callback)
/* /*
* Hypervisor uses this for application faults while it executes. * Hypervisor uses this for application faults while it executes.
...@@ -1464,6 +1467,7 @@ SYM_CODE_START_LOCAL_NOALIGN(handle_exception) ...@@ -1464,6 +1467,7 @@ SYM_CODE_START_LOCAL_NOALIGN(handle_exception)
movl %esp, %eax # pt_regs pointer movl %esp, %eax # pt_regs pointer
CALL_NOSPEC edi CALL_NOSPEC edi
handle_exception_return:
#ifdef CONFIG_VM86 #ifdef CONFIG_VM86
movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
movb PT_CS(%esp), %al movb PT_CS(%esp), %al
......
...@@ -1067,10 +1067,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt ...@@ -1067,10 +1067,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
idtentry X86_TRAP_PF page_fault do_page_fault has_error_code=1 idtentry X86_TRAP_PF page_fault do_page_fault has_error_code=1
#ifdef CONFIG_XEN_PV
idtentry 512 /* dummy */ hypervisor_callback xen_do_hypervisor_callback has_error_code=0
#endif
/* /*
* Reload gs selector with exception handling * Reload gs selector with exception handling
* edi: new selector * edi: new selector
...@@ -1158,9 +1154,10 @@ SYM_FUNC_END(asm_call_on_stack) ...@@ -1158,9 +1154,10 @@ SYM_FUNC_END(asm_call_on_stack)
* So, on entry to the handler we detect whether we interrupted an * So, on entry to the handler we detect whether we interrupted an
* existing activation in its critical region -- if so, we pop the current * existing activation in its critical region -- if so, we pop the current
* activation and restart the handler using the previous one. * activation and restart the handler using the previous one.
*
* C calling convention: exc_xen_hypervisor_callback(struct *pt_regs)
*/ */
/* do_hypervisor_callback(struct *pt_regs) */ SYM_CODE_START_LOCAL(exc_xen_hypervisor_callback)
SYM_CODE_START_LOCAL(xen_do_hypervisor_callback)
/* /*
* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
...@@ -1170,15 +1167,10 @@ SYM_CODE_START_LOCAL(xen_do_hypervisor_callback) ...@@ -1170,15 +1167,10 @@ SYM_CODE_START_LOCAL(xen_do_hypervisor_callback)
movq %rdi, %rsp /* we don't return, adjust the stack frame */ movq %rdi, %rsp /* we don't return, adjust the stack frame */
UNWIND_HINT_REGS UNWIND_HINT_REGS
ENTER_IRQ_STACK old_rsp=%r10 call xen_pv_evtchn_do_upcall
call xen_evtchn_do_upcall
LEAVE_IRQ_STACK
#ifndef CONFIG_PREEMPTION jmp error_return
call xen_maybe_preempt_hcall SYM_CODE_END(exc_xen_hypervisor_callback)
#endif
jmp error_exit
SYM_CODE_END(xen_do_hypervisor_callback)
/* /*
* Hypervisor uses this for application faults while it executes. * Hypervisor uses this for application faults while it executes.
......
...@@ -165,6 +165,21 @@ __visible noinstr void func(struct pt_regs *regs) ...@@ -165,6 +165,21 @@ __visible noinstr void func(struct pt_regs *regs)
#define DEFINE_IDTENTRY_RAW_ERRORCODE(func) \ #define DEFINE_IDTENTRY_RAW_ERRORCODE(func) \
__visible noinstr void func(struct pt_regs *regs, unsigned long error_code) __visible noinstr void func(struct pt_regs *regs, unsigned long error_code)
/**
* DECLARE_IDTENTRY_XENCB - Declare functions for XEN HV callback entry point
* @vector: Vector number (ignored for C)
* @func: Function name of the entry point
*
* Declares three functions:
* - The ASM entry point: asm_##func
* - The XEN PV trap entry point: xen_##func (maybe unused)
* - The C handler called from the ASM entry point
*
* Maps to DECLARE_IDTENTRY(). Distinct entry point to handle the 32/64-bit
* difference
*/
#define DECLARE_IDTENTRY_XENCB(vector, func) \
DECLARE_IDTENTRY(vector, func)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/** /**
...@@ -307,6 +322,9 @@ __visible noinstr void func(struct pt_regs *regs, \ ...@@ -307,6 +322,9 @@ __visible noinstr void func(struct pt_regs *regs, \
# define DECLARE_IDTENTRY_DF(vector, func) \ # define DECLARE_IDTENTRY_DF(vector, func) \
idtentry_df vector asm_##func func idtentry_df vector asm_##func func
# define DECLARE_IDTENTRY_XENCB(vector, func) \
DECLARE_IDTENTRY(vector, func)
#else #else
# define DECLARE_IDTENTRY_MCE(vector, func) \ # define DECLARE_IDTENTRY_MCE(vector, func) \
DECLARE_IDTENTRY(vector, func) DECLARE_IDTENTRY(vector, func)
...@@ -317,6 +335,9 @@ __visible noinstr void func(struct pt_regs *regs, \ ...@@ -317,6 +335,9 @@ __visible noinstr void func(struct pt_regs *regs, \
/* No ASM emitted for DF as this goes through a C shim */ /* No ASM emitted for DF as this goes through a C shim */
# define DECLARE_IDTENTRY_DF(vector, func) # define DECLARE_IDTENTRY_DF(vector, func)
/* No ASM emitted for XEN hypervisor callback */
# define DECLARE_IDTENTRY_XENCB(vector, func)
#endif #endif
/* No ASM code emitted for NMI */ /* No ASM code emitted for NMI */
...@@ -337,6 +358,13 @@ __visible noinstr void func(struct pt_regs *regs, \ ...@@ -337,6 +358,13 @@ __visible noinstr void func(struct pt_regs *regs, \
* This avoids duplicate defines and ensures that everything is consistent. * This avoids duplicate defines and ensures that everything is consistent.
*/ */
/*
* Dummy trap number so the low level ASM macro vector number checks do not
* match which results in emitting plain IDTENTRY stubs without bells and
* whistels.
*/
#define X86_TRAP_OTHER 0xFFFF
/* Simple exception entry points. No hardware error code */ /* Simple exception entry points. No hardware error code */
DECLARE_IDTENTRY(X86_TRAP_DE, exc_divide_error); DECLARE_IDTENTRY(X86_TRAP_DE, exc_divide_error);
DECLARE_IDTENTRY(X86_TRAP_OF, exc_overflow); DECLARE_IDTENTRY(X86_TRAP_OF, exc_overflow);
...@@ -376,4 +404,10 @@ DECLARE_IDTENTRY_XEN(X86_TRAP_DB, debug); ...@@ -376,4 +404,10 @@ DECLARE_IDTENTRY_XEN(X86_TRAP_DB, debug);
/* #DF */ /* #DF */
DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault); DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault);
#ifdef CONFIG_XEN_PV
DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER, exc_xen_hypervisor_callback);
#endif
#undef X86_TRAP_OTHER
#endif #endif
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/acpi.h> #include <asm/acpi.h>
#include <asm/numa.h> #include <asm/numa.h>
#include <asm/idtentry.h>
#include <asm/xen/hypervisor.h> #include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h> #include <asm/xen/hypercall.h>
...@@ -993,7 +994,8 @@ static void __init xen_pvmmu_arch_setup(void) ...@@ -993,7 +994,8 @@ static void __init xen_pvmmu_arch_setup(void)
HYPERVISOR_vm_assist(VMASST_CMD_enable, HYPERVISOR_vm_assist(VMASST_CMD_enable,
VMASST_TYPE_pae_extended_cr3); VMASST_TYPE_pae_extended_cr3);
if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || if (register_callback(CALLBACKTYPE_event,
xen_asm_exc_xen_hypervisor_callback) ||
register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
BUG(); BUG();
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <linux/pgtable.h> #include <linux/pgtable.h>
#include <asm/paravirt.h> #include <asm/paravirt.h>
#include <asm/idtentry.h>
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/cpu.h> #include <asm/cpu.h>
...@@ -348,7 +349,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ...@@ -348,7 +349,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->gs_base_kernel = per_cpu_offset(cpu); ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif #endif
ctxt->event_callback_eip = ctxt->event_callback_eip =
(unsigned long)xen_hypervisor_callback; (unsigned long)xen_asm_exc_xen_hypervisor_callback;
ctxt->failsafe_callback_eip = ctxt->failsafe_callback_eip =
(unsigned long)xen_failsafe_callback; (unsigned long)xen_failsafe_callback;
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
......
...@@ -93,7 +93,7 @@ xen_iret_start_crit: ...@@ -93,7 +93,7 @@ xen_iret_start_crit:
/* /*
* If there's something pending, mask events again so we can * If there's something pending, mask events again so we can
* jump back into xen_hypervisor_callback. Otherwise do not * jump back into exc_xen_hypervisor_callback. Otherwise do not
* touch XEN_vcpu_info_mask. * touch XEN_vcpu_info_mask.
*/ */
jne 1f jne 1f
...@@ -113,7 +113,7 @@ iret_restore_end: ...@@ -113,7 +113,7 @@ iret_restore_end:
* Events are masked, so jumping out of the critical region is * Events are masked, so jumping out of the critical region is
* OK. * OK.
*/ */
je xen_hypervisor_callback je xen_asm_exc_xen_hypervisor_callback
1: iret 1: iret
xen_iret_end_crit: xen_iret_end_crit:
...@@ -127,7 +127,7 @@ SYM_CODE_END(xen_iret) ...@@ -127,7 +127,7 @@ SYM_CODE_END(xen_iret)
.globl xen_iret_start_crit, xen_iret_end_crit .globl xen_iret_start_crit, xen_iret_end_crit
/* /*
* This is called by xen_hypervisor_callback in entry_32.S when it sees * This is called by xen_asm_exc_xen_hypervisor_callback in entry_32.S when it sees
* that the EIP at the time of interrupt was between * that the EIP at the time of interrupt was between
* xen_iret_start_crit and xen_iret_end_crit. * xen_iret_start_crit and xen_iret_end_crit.
* *
...@@ -144,7 +144,7 @@ SYM_CODE_END(xen_iret) ...@@ -144,7 +144,7 @@ SYM_CODE_END(xen_iret)
* eflags } * eflags }
* cs } nested exception info * cs } nested exception info
* eip } * eip }
* return address : (into xen_hypervisor_callback) * return address : (into xen_asm_exc_xen_hypervisor_callback)
* *
* In order to deliver the nested exception properly, we need to discard the * In order to deliver the nested exception properly, we need to discard the
* nested exception frame such that when we handle the exception, we do it * nested exception frame such that when we handle the exception, we do it
...@@ -152,7 +152,8 @@ SYM_CODE_END(xen_iret) ...@@ -152,7 +152,8 @@ SYM_CODE_END(xen_iret)
* *
* The only caveat is that if the outer eax hasn't been restored yet (i.e. * The only caveat is that if the outer eax hasn't been restored yet (i.e.
* it's still on stack), we need to restore its value here. * it's still on stack), we need to restore its value here.
*/ */
.pushsection .noinstr.text, "ax"
SYM_CODE_START(xen_iret_crit_fixup) SYM_CODE_START(xen_iret_crit_fixup)
/* /*
* Paranoia: Make sure we're really coming from kernel space. * Paranoia: Make sure we're really coming from kernel space.
...@@ -181,3 +182,4 @@ SYM_CODE_START(xen_iret_crit_fixup) ...@@ -181,3 +182,4 @@ SYM_CODE_START(xen_iret_crit_fixup)
2: 2:
ret ret
SYM_CODE_END(xen_iret_crit_fixup) SYM_CODE_END(xen_iret_crit_fixup)
.popsection
...@@ -54,7 +54,7 @@ xen_pv_trap asm_exc_simd_coprocessor_error ...@@ -54,7 +54,7 @@ xen_pv_trap asm_exc_simd_coprocessor_error
#ifdef CONFIG_IA32_EMULATION #ifdef CONFIG_IA32_EMULATION
xen_pv_trap entry_INT80_compat xen_pv_trap entry_INT80_compat
#endif #endif
xen_pv_trap hypervisor_callback xen_pv_trap asm_exc_xen_hypervisor_callback
__INIT __INIT
SYM_CODE_START(xen_early_idt_handler_array) SYM_CODE_START(xen_early_idt_handler_array)
......
...@@ -8,7 +8,6 @@ ...@@ -8,7 +8,6 @@
#include <xen/xen-ops.h> #include <xen/xen-ops.h>
/* These are code, but not functions. Defined in entry.S */ /* These are code, but not functions. Defined in entry.S */
extern const char xen_hypervisor_callback[];
extern const char xen_failsafe_callback[]; extern const char xen_failsafe_callback[];
void xen_sysenter_target(void); void xen_sysenter_target(void);
......
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
obj-y += grant-table.o features.o balloon.o manage.o preempt.o time.o obj-y += grant-table.o features.o balloon.o manage.o time.o
obj-y += mem-reservation.o obj-y += mem-reservation.o
obj-y += events/ obj-y += events/
obj-y += xenbus/ obj-y += xenbus/
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Preemptible hypercalls
*
* Copyright (C) 2014 Citrix Systems R&D ltd.
*/
#include <linux/sched.h>
#include <xen/xen-ops.h>
#ifndef CONFIG_PREEMPTION
/*
* Some hypercalls issued by the toolstack can take many 10s of
* seconds. Allow tasks running hypercalls via the privcmd driver to
* be voluntarily preempted even if full kernel preemption is
* disabled.
*
* Such preemptible hypercalls are bracketed by
* xen_preemptible_hcall_begin() and xen_preemptible_hcall_end()
* calls.
*/
DEFINE_PER_CPU(bool, xen_in_preemptible_hcall);
EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
asmlinkage __visible void xen_maybe_preempt_hcall(void)
{
if (unlikely(__this_cpu_read(xen_in_preemptible_hcall)
&& need_resched())) {
/*
* Clear flag as we may be rescheduled on a different
* cpu.
*/
__this_cpu_write(xen_in_preemptible_hcall, false);
local_irq_enable();
cond_resched();
local_irq_disable();
__this_cpu_write(xen_in_preemptible_hcall, true);
}
}
#endif /* CONFIG_PREEMPTION */
...@@ -215,17 +215,7 @@ bool xen_running_on_version_or_later(unsigned int major, unsigned int minor); ...@@ -215,17 +215,7 @@ bool xen_running_on_version_or_later(unsigned int major, unsigned int minor);
void xen_efi_runtime_setup(void); void xen_efi_runtime_setup(void);
#ifdef CONFIG_PREEMPTION #if defined(CONFIG_XEN_PV) && !defined(CONFIG_PREEMPTION)
static inline void xen_preemptible_hcall_begin(void)
{
}
static inline void xen_preemptible_hcall_end(void)
{
}
#else
DECLARE_PER_CPU(bool, xen_in_preemptible_hcall); DECLARE_PER_CPU(bool, xen_in_preemptible_hcall);
...@@ -239,6 +229,11 @@ static inline void xen_preemptible_hcall_end(void) ...@@ -239,6 +229,11 @@ static inline void xen_preemptible_hcall_end(void)
__this_cpu_write(xen_in_preemptible_hcall, false); __this_cpu_write(xen_in_preemptible_hcall, false);
} }
#endif /* CONFIG_PREEMPTION */ #else
static inline void xen_preemptible_hcall_begin(void) { }
static inline void xen_preemptible_hcall_end(void) { }
#endif /* CONFIG_XEN_PV && !CONFIG_PREEMPTION */
#endif /* INCLUDE_XEN_OPS_H */ #endif /* INCLUDE_XEN_OPS_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment