Commit 252b95c0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-4.11-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen updates from Juergen Gross:
 "Xen features and fixes:

   - a series from Boris Ostrovsky adding support for booting Linux as
     Xen PVH guest

   - a series from Juergen Gross streamlining the xenbus driver

   - a series from Paul Durrant adding support for the new device model
     hypercall

   - several small corrections"

* tag 'for-linus-4.11-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen/privcmd: add IOCTL_PRIVCMD_RESTRICT
  xen/privcmd: Add IOCTL_PRIVCMD_DM_OP
  xen/privcmd: return -ENOTTY for unimplemented IOCTLs
  xen: optimize xenbus driver for multiple concurrent xenstore accesses
  xen: modify xenstore watch event interface
  xen: clean up xenbus internal headers
  xenbus: Neaten xenbus_va_dev_error
  xen/pvh: Use Xen's emergency_restart op for PVH guests
  xen/pvh: Enable CPU hotplug
  xen/pvh: PVH guests always have PV devices
  xen/pvh: Initialize grant table for PVH guests
  xen/pvh: Make sure we don't use ACPI_IRQ_MODEL_PIC for SCI
  xen/pvh: Bootstrap PVH guest
  xen/pvh: Import PVH-related Xen public interfaces
  xen/x86: Remove PVH support
  x86/boot/32: Convert the 32-bit pgtable setup code from assembly to C
  xen/manage: correct return value check on xenbus_scanf()
  x86/xen: Fix APIC id mismatch warning on Intel
  xen/netback: set default upper limit of tx/rx queues to 8
  xen/netfront: set default upper limit of tx/rx queues to 8
parents b8989bcc 4610d240
...@@ -457,4 +457,5 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); ...@@ -457,4 +457,5 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op); EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); EXPORT_SYMBOL_GPL(HYPERVISOR_multicall);
EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist); EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist);
EXPORT_SYMBOL_GPL(HYPERVISOR_dm_op);
EXPORT_SYMBOL_GPL(privcmd_call); EXPORT_SYMBOL_GPL(privcmd_call);
...@@ -92,6 +92,7 @@ HYPERCALL1(tmem_op); ...@@ -92,6 +92,7 @@ HYPERCALL1(tmem_op);
HYPERCALL1(platform_op_raw); HYPERCALL1(platform_op_raw);
HYPERCALL2(multicall); HYPERCALL2(multicall);
HYPERCALL2(vm_assist); HYPERCALL2(vm_assist);
HYPERCALL3(dm_op);
ENTRY(privcmd_call) ENTRY(privcmd_call)
stmdb sp!, {r4} stmdb sp!, {r4}
......
...@@ -84,6 +84,7 @@ HYPERCALL1(tmem_op); ...@@ -84,6 +84,7 @@ HYPERCALL1(tmem_op);
HYPERCALL1(platform_op_raw); HYPERCALL1(platform_op_raw);
HYPERCALL2(multicall); HYPERCALL2(multicall);
HYPERCALL2(vm_assist); HYPERCALL2(vm_assist);
HYPERCALL3(dm_op);
ENTRY(privcmd_call) ENTRY(privcmd_call)
mov x16, x0 mov x16, x0
......
...@@ -472,6 +472,13 @@ HYPERVISOR_xenpmu_op(unsigned int op, void *arg) ...@@ -472,6 +472,13 @@ HYPERVISOR_xenpmu_op(unsigned int op, void *arg)
return _hypercall2(int, xenpmu_op, op, arg); return _hypercall2(int, xenpmu_op, op, arg);
} }
static inline int
HYPERVISOR_dm_op(
domid_t dom, unsigned int nr_bufs, void *bufs)
{
return _hypercall3(int, dm_op, dom, nr_bufs, bufs);
}
static inline void static inline void
MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
{ {
......
...@@ -53,5 +53,5 @@ config XEN_DEBUG_FS ...@@ -53,5 +53,5 @@ config XEN_DEBUG_FS
config XEN_PVH config XEN_PVH
bool "Support for running as a PVH guest" bool "Support for running as a PVH guest"
depends on X86_64 && XEN && XEN_PVHVM depends on XEN && XEN_PVHVM && ACPI
def_bool n def_bool n
...@@ -23,3 +23,4 @@ obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o ...@@ -23,3 +23,4 @@ obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
obj-$(CONFIG_XEN_DOM0) += vga.o obj-$(CONFIG_XEN_DOM0) += vga.o
obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
obj-$(CONFIG_XEN_EFI) += efi.o obj-$(CONFIG_XEN_EFI) += efi.o
obj-$(CONFIG_XEN_PVH) += xen-pvh.o
...@@ -145,7 +145,7 @@ static void xen_silent_inquire(int apicid) ...@@ -145,7 +145,7 @@ static void xen_silent_inquire(int apicid)
static int xen_cpu_present_to_apicid(int cpu) static int xen_cpu_present_to_apicid(int cpu)
{ {
if (cpu_present(cpu)) if (cpu_present(cpu))
return xen_get_apic_id(xen_apic_read(APIC_ID)); return cpu_data(cpu).apicid;
else else
return BAD_APICID; return BAD_APICID;
} }
......
This diff is collapsed.
...@@ -1792,10 +1792,6 @@ static void __init set_page_prot_flags(void *addr, pgprot_t prot, ...@@ -1792,10 +1792,6 @@ static void __init set_page_prot_flags(void *addr, pgprot_t prot,
unsigned long pfn = __pa(addr) >> PAGE_SHIFT; unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
pte_t pte = pfn_pte(pfn, prot); pte_t pte = pfn_pte(pfn, prot);
/* For PVH no need to set R/O or R/W to pin them or unpin them. */
if (xen_feature(XENFEAT_auto_translated_physmap))
return;
if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
BUG(); BUG();
} }
...@@ -1902,8 +1898,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, ...@@ -1902,8 +1898,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
* level2_ident_pgt, and level2_kernel_pgt. This means that only the * level2_ident_pgt, and level2_kernel_pgt. This means that only the
* kernel has a physical mapping to start with - but that's enough to * kernel has a physical mapping to start with - but that's enough to
* get __va working. We need to fill in the rest of the physical * get __va working. We need to fill in the rest of the physical
* mapping once some sort of allocator has been set up. NOTE: for * mapping once some sort of allocator has been set up.
* PVH, the page tables are native.
*/ */
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
{ {
...@@ -2812,16 +2807,6 @@ static int do_remap_gfn(struct vm_area_struct *vma, ...@@ -2812,16 +2807,6 @@ static int do_remap_gfn(struct vm_area_struct *vma,
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
if (xen_feature(XENFEAT_auto_translated_physmap)) {
#ifdef CONFIG_XEN_PVH
/* We need to update the local page tables and the xen HAP */
return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
prot, domid, pages);
#else
return -EINVAL;
#endif
}
rmd.mfn = gfn; rmd.mfn = gfn;
rmd.prot = prot; rmd.prot = prot;
/* We use the err_ptr to indicate if there we are doing a contiguous /* We use the err_ptr to indicate if there we are doing a contiguous
...@@ -2915,10 +2900,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, ...@@ -2915,10 +2900,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
if (!pages || !xen_feature(XENFEAT_auto_translated_physmap)) if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
return 0; return 0;
#ifdef CONFIG_XEN_PVH
return xen_xlate_unmap_gfn_range(vma, numpgs, pages);
#else
return -EINVAL; return -EINVAL;
#endif
} }
EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
...@@ -73,8 +73,8 @@ bool xen_has_pv_devices(void) ...@@ -73,8 +73,8 @@ bool xen_has_pv_devices(void)
if (!xen_domain()) if (!xen_domain())
return false; return false;
/* PV domains always have them. */ /* PV and PVH domains always have them. */
if (xen_pv_domain()) if (xen_pv_domain() || xen_pvh_domain())
return true; return true;
/* And user has xen_platform_pci=0 set in guest config as /* And user has xen_platform_pci=0 set in guest config as
......
...@@ -914,39 +914,6 @@ char * __init xen_memory_setup(void) ...@@ -914,39 +914,6 @@ char * __init xen_memory_setup(void)
return "Xen"; return "Xen";
} }
/*
* Machine specific memory setup for auto-translated guests.
*/
char * __init xen_auto_xlated_memory_setup(void)
{
struct xen_memory_map memmap;
int i;
int rc;
memmap.nr_entries = ARRAY_SIZE(xen_e820_map);
set_xen_guest_handle(memmap.buffer, xen_e820_map);
rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
if (rc < 0)
panic("No memory map (%d)\n", rc);
xen_e820_map_entries = memmap.nr_entries;
sanitize_e820_map(xen_e820_map, ARRAY_SIZE(xen_e820_map),
&xen_e820_map_entries);
for (i = 0; i < xen_e820_map_entries; i++)
e820_add_region(xen_e820_map[i].addr, xen_e820_map[i].size,
xen_e820_map[i].type);
/* Remove p2m info, it is not needed. */
xen_start_info->mfn_list = 0;
xen_start_info->first_p2m_pfn = 0;
xen_start_info->nr_p2m_frames = 0;
return "Xen";
}
/* /*
* Set the bit indicating "nosegneg" library variants should be used. * Set the bit indicating "nosegneg" library variants should be used.
* We only need to bother in pure 32-bit mode; compat 32-bit processes * We only need to bother in pure 32-bit mode; compat 32-bit processes
...@@ -1032,8 +999,8 @@ void __init xen_pvmmu_arch_setup(void) ...@@ -1032,8 +999,8 @@ void __init xen_pvmmu_arch_setup(void)
void __init xen_arch_setup(void) void __init xen_arch_setup(void)
{ {
xen_panic_handler_init(); xen_panic_handler_init();
if (!xen_feature(XENFEAT_auto_translated_physmap))
xen_pvmmu_arch_setup(); xen_pvmmu_arch_setup();
#ifdef CONFIG_ACPI #ifdef CONFIG_ACPI
if (!(xen_start_info->flags & SIF_INITDOMAIN)) { if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
......
...@@ -99,18 +99,8 @@ static void cpu_bringup(void) ...@@ -99,18 +99,8 @@ static void cpu_bringup(void)
local_irq_enable(); local_irq_enable();
} }
/* asmlinkage __visible void cpu_bringup_and_idle(void)
* Note: cpu parameter is only relevant for PVH. The reason for passing it
* is we can't do smp_processor_id until the percpu segments are loaded, for
* which we need the cpu number! So we pass it in rdi as first parameter.
*/
asmlinkage __visible void cpu_bringup_and_idle(int cpu)
{ {
#ifdef CONFIG_XEN_PVH
if (xen_feature(XENFEAT_auto_translated_physmap) &&
xen_feature(XENFEAT_supervisor_mode_kernel))
xen_pvh_secondary_vcpu_init(cpu);
#endif
cpu_bringup(); cpu_bringup();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
} }
...@@ -404,61 +394,47 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ...@@ -404,61 +394,47 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
gdt = get_cpu_gdt_table(cpu); gdt = get_cpu_gdt_table(cpu);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/* Note: PVH is not yet supported on x86_32. */
ctxt->user_regs.fs = __KERNEL_PERCPU; ctxt->user_regs.fs = __KERNEL_PERCPU;
ctxt->user_regs.gs = __KERNEL_STACK_CANARY; ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
#endif #endif
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
if (!xen_feature(XENFEAT_auto_translated_physmap)) { ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; ctxt->flags = VGCF_IN_KERNEL;
ctxt->flags = VGCF_IN_KERNEL; ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ ctxt->user_regs.ds = __USER_DS;
ctxt->user_regs.ds = __USER_DS; ctxt->user_regs.es = __USER_DS;
ctxt->user_regs.es = __USER_DS; ctxt->user_regs.ss = __KERNEL_DS;
ctxt->user_regs.ss = __KERNEL_DS;
xen_copy_trap_info(ctxt->trap_ctxt); xen_copy_trap_info(ctxt->trap_ctxt);
ctxt->ldt_ents = 0; ctxt->ldt_ents = 0;
BUG_ON((unsigned long)gdt & ~PAGE_MASK); BUG_ON((unsigned long)gdt & ~PAGE_MASK);
gdt_mfn = arbitrary_virt_to_mfn(gdt); gdt_mfn = arbitrary_virt_to_mfn(gdt);
make_lowmem_page_readonly(gdt); make_lowmem_page_readonly(gdt);
make_lowmem_page_readonly(mfn_to_virt(gdt_mfn)); make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
ctxt->gdt_frames[0] = gdt_mfn; ctxt->gdt_frames[0] = gdt_mfn;
ctxt->gdt_ents = GDT_ENTRIES; ctxt->gdt_ents = GDT_ENTRIES;
ctxt->kernel_ss = __KERNEL_DS; ctxt->kernel_ss = __KERNEL_DS;
ctxt->kernel_sp = idle->thread.sp0; ctxt->kernel_sp = idle->thread.sp0;
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
ctxt->event_callback_cs = __KERNEL_CS; ctxt->event_callback_cs = __KERNEL_CS;
ctxt->failsafe_callback_cs = __KERNEL_CS; ctxt->failsafe_callback_cs = __KERNEL_CS;
#else #else
ctxt->gs_base_kernel = per_cpu_offset(cpu); ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif
ctxt->event_callback_eip =
(unsigned long)xen_hypervisor_callback;
ctxt->failsafe_callback_eip =
(unsigned long)xen_failsafe_callback;
ctxt->user_regs.cs = __KERNEL_CS;
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
}
#ifdef CONFIG_XEN_PVH
else {
/*
* The vcpu comes on kernel page tables which have the NX pte
* bit set. This means before DS/SS is touched, NX in
* EFER must be set. Hence the following assembly glue code.
*/
ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init;
ctxt->user_regs.rdi = cpu;
ctxt->user_regs.rsi = true; /* entry == true */
}
#endif #endif
ctxt->event_callback_eip =
(unsigned long)xen_hypervisor_callback;
ctxt->failsafe_callback_eip =
(unsigned long)xen_failsafe_callback;
ctxt->user_regs.cs = __KERNEL_CS;
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir)); ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt)) if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
......
...@@ -21,12 +21,4 @@ static inline int xen_smp_intr_init(unsigned int cpu) ...@@ -21,12 +21,4 @@ static inline int xen_smp_intr_init(unsigned int cpu)
static inline void xen_smp_intr_free(unsigned int cpu) {} static inline void xen_smp_intr_free(unsigned int cpu) {}
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#ifdef CONFIG_XEN_PVH
extern void xen_pvh_early_cpu_init(int cpu, bool entry);
#else
static inline void xen_pvh_early_cpu_init(int cpu, bool entry)
{
}
#endif
#endif #endif
...@@ -16,25 +16,6 @@ ...@@ -16,25 +16,6 @@
#include <xen/interface/xen-mca.h> #include <xen/interface/xen-mca.h>
#include <asm/xen/interface.h> #include <asm/xen/interface.h>
#ifdef CONFIG_XEN_PVH
#define PVH_FEATURES_STR "|writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel"
/* Note the lack of 'hvm_callback_vector'. Older hypervisor will
* balk at this being part of XEN_ELFNOTE_FEATURES, so we put it in
* XEN_ELFNOTE_SUPPORTED_FEATURES which older hypervisors will ignore.
*/
#define PVH_FEATURES ((1 << XENFEAT_writable_page_tables) | \
(1 << XENFEAT_auto_translated_physmap) | \
(1 << XENFEAT_supervisor_mode_kernel) | \
(1 << XENFEAT_hvm_callback_vector))
/* The XENFEAT_writable_page_tables is not stricly necessary as we set that
* up regardless whether this CONFIG option is enabled or not, but it
* clarifies what the right flags need to be.
*/
#else
#define PVH_FEATURES_STR ""
#define PVH_FEATURES (0)
#endif
__INIT __INIT
ENTRY(startup_xen) ENTRY(startup_xen)
cld cld
...@@ -54,41 +35,6 @@ ENTRY(startup_xen) ...@@ -54,41 +35,6 @@ ENTRY(startup_xen)
__FINIT __FINIT
#ifdef CONFIG_XEN_PVH
/*
* xen_pvh_early_cpu_init() - early PVH VCPU initialization
* @cpu: this cpu number (%rdi)
* @entry: true if this is a secondary vcpu coming up on this entry
* point, false if this is the boot CPU being initialized for
* the first time (%rsi)
*
* Note: This is called as a function on the boot CPU, and is the entry point
* on the secondary CPU.
*/
ENTRY(xen_pvh_early_cpu_init)
mov %rsi, %r11
/* Gather features to see if NX implemented. */
mov $0x80000001, %eax
cpuid
mov %edx, %esi
mov $MSR_EFER, %ecx
rdmsr
bts $_EFER_SCE, %eax
bt $20, %esi
jnc 1f /* No NX, skip setting it */
bts $_EFER_NX, %eax
1: wrmsr
#ifdef CONFIG_SMP
cmp $0, %r11b
jne cpu_bringup_and_idle
#endif
ret
#endif /* CONFIG_XEN_PVH */
.pushsection .text .pushsection .text
.balign PAGE_SIZE .balign PAGE_SIZE
ENTRY(hypercall_page) ENTRY(hypercall_page)
...@@ -114,10 +60,10 @@ ENTRY(hypercall_page) ...@@ -114,10 +60,10 @@ ENTRY(hypercall_page)
#endif #endif
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb"; .asciz PVH_FEATURES_STR) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,
ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, .long (PVH_FEATURES) | .ascii "!writable_page_tables|pae_pgdir_above_4gb")
(1 << XENFEAT_writable_page_tables) | ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES,
(1 << XENFEAT_dom0)) .long (1 << XENFEAT_writable_page_tables) | (1 << XENFEAT_dom0))
ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
......
...@@ -146,5 +146,4 @@ __visible void xen_adjust_exception_frame(void); ...@@ -146,5 +146,4 @@ __visible void xen_adjust_exception_frame(void);
extern int xen_panic_handler_init(void); extern int xen_panic_handler_init(void);
void xen_pvh_secondary_vcpu_init(int cpu);
#endif /* XEN_OPS_H */ #endif /* XEN_OPS_H */
/*
* Copyright C 2016, Oracle and/or its affiliates. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program. If not, see <http://www.gnu.org/licenses/>.
*/
.code32
.text
#define _pa(x) ((x) - __START_KERNEL_map)
#include <linux/elfnote.h>
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/asm.h>
#include <asm/boot.h>
#include <asm/processor-flags.h>
#include <asm/msr.h>
#include <xen/interface/elfnote.h>
__HEAD
/*
* Entry point for PVH guests.
*
* Xen ABI specifies the following register state when we come here:
*
* - `ebx`: contains the physical memory address where the loader has placed
* the boot start info structure.
* - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared.
* - `cr4`: all bits are cleared.
* - `cs `: must be a 32-bit read/execute code segment with a base of 0
* and a limit of 0xFFFFFFFF. The selector value is unspecified.
* - `ds`, `es`: must be a 32-bit read/write data segment with a base of
* 0 and a limit of 0xFFFFFFFF. The selector values are all
* unspecified.
* - `tr`: must be a 32-bit TSS (active) with a base of '0' and a limit
* of '0x67'.
* - `eflags`: bit 17 (VM) must be cleared. Bit 9 (IF) must be cleared.
* Bit 8 (TF) must be cleared. Other bits are all unspecified.
*
* All other processor registers and flag bits are unspecified. The OS is in
* charge of setting up it's own stack, GDT and IDT.
*/
ENTRY(pvh_start_xen)
cld
lgdt (_pa(gdt))
mov $(__BOOT_DS),%eax
mov %eax,%ds
mov %eax,%es
mov %eax,%ss
/* Stash hvm_start_info. */
mov $_pa(pvh_start_info), %edi
mov %ebx, %esi
mov _pa(pvh_start_info_sz), %ecx
shr $2,%ecx
rep
movsl
mov $_pa(early_stack_end), %esp
/* Enable PAE mode. */
mov %cr4, %eax
orl $X86_CR4_PAE, %eax
mov %eax, %cr4
#ifdef CONFIG_X86_64
/* Enable Long mode. */
mov $MSR_EFER, %ecx
rdmsr
btsl $_EFER_LME, %eax
wrmsr
/* Enable pre-constructed page tables. */
mov $_pa(init_level4_pgt), %eax
mov %eax, %cr3
mov $(X86_CR0_PG | X86_CR0_PE), %eax
mov %eax, %cr0
/* Jump to 64-bit mode. */
ljmp $__KERNEL_CS, $_pa(1f)
/* 64-bit entry point. */
.code64
1:
call xen_prepare_pvh
/* startup_64 expects boot_params in %rsi. */
mov $_pa(pvh_bootparams), %rsi
mov $_pa(startup_64), %rax
jmp *%rax
#else /* CONFIG_X86_64 */
call mk_early_pgtbl_32
mov $_pa(initial_page_table), %eax
mov %eax, %cr3
mov %cr0, %eax
or $(X86_CR0_PG | X86_CR0_PE), %eax
mov %eax, %cr0
ljmp $__BOOT_CS, $1f
1:
call xen_prepare_pvh
mov $_pa(pvh_bootparams), %esi
/* startup_32 doesn't expect paging and PAE to be on. */
ljmp $__BOOT_CS, $_pa(2f)
2:
mov %cr0, %eax
and $~X86_CR0_PG, %eax
mov %eax, %cr0
mov %cr4, %eax
and $~X86_CR4_PAE, %eax
mov %eax, %cr4
ljmp $__BOOT_CS, $_pa(startup_32)
#endif
END(pvh_start_xen)
.section ".init.data","aw"
.balign 8
gdt:
.word gdt_end - gdt_start
.long _pa(gdt_start)
.word 0
gdt_start:
.quad 0x0000000000000000 /* NULL descriptor */
.quad 0x0000000000000000 /* reserved */
#ifdef CONFIG_X86_64
.quad GDT_ENTRY(0xa09a, 0, 0xfffff) /* __KERNEL_CS */
#else
.quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* __KERNEL_CS */
#endif
.quad GDT_ENTRY(0xc092, 0, 0xfffff) /* __KERNEL_DS */
gdt_end:
.balign 4
early_stack:
.fill 256, 1, 0
early_stack_end:
ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
_ASM_PTR (pvh_start_xen - __START_KERNEL_map))
...@@ -38,8 +38,8 @@ struct backend_info { ...@@ -38,8 +38,8 @@ struct backend_info {
static struct kmem_cache *xen_blkif_cachep; static struct kmem_cache *xen_blkif_cachep;
static void connect(struct backend_info *); static void connect(struct backend_info *);
static int connect_ring(struct backend_info *); static int connect_ring(struct backend_info *);
static void backend_changed(struct xenbus_watch *, const char **, static void backend_changed(struct xenbus_watch *, const char *,
unsigned int); const char *);
static void xen_blkif_free(struct xen_blkif *blkif); static void xen_blkif_free(struct xen_blkif *blkif);
static void xen_vbd_free(struct xen_vbd *vbd); static void xen_vbd_free(struct xen_vbd *vbd);
...@@ -661,7 +661,7 @@ static int xen_blkbk_probe(struct xenbus_device *dev, ...@@ -661,7 +661,7 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
* ready, connect. * ready, connect.
*/ */
static void backend_changed(struct xenbus_watch *watch, static void backend_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len) const char *path, const char *token)
{ {
int err; int err;
unsigned major; unsigned major;
......
...@@ -67,6 +67,7 @@ module_param(rx_drain_timeout_msecs, uint, 0444); ...@@ -67,6 +67,7 @@ module_param(rx_drain_timeout_msecs, uint, 0444);
unsigned int rx_stall_timeout_msecs = 60000; unsigned int rx_stall_timeout_msecs = 60000;
module_param(rx_stall_timeout_msecs, uint, 0444); module_param(rx_stall_timeout_msecs, uint, 0444);
#define MAX_QUEUES_DEFAULT 8
unsigned int xenvif_max_queues; unsigned int xenvif_max_queues;
module_param_named(max_queues, xenvif_max_queues, uint, 0644); module_param_named(max_queues, xenvif_max_queues, uint, 0644);
MODULE_PARM_DESC(max_queues, MODULE_PARM_DESC(max_queues,
...@@ -1622,11 +1623,12 @@ static int __init netback_init(void) ...@@ -1622,11 +1623,12 @@ static int __init netback_init(void)
if (!xen_domain()) if (!xen_domain())
return -ENODEV; return -ENODEV;
/* Allow as many queues as there are CPUs if user has not /* Allow as many queues as there are CPUs but max. 8 if user has not
* specified a value. * specified a value.
*/ */
if (xenvif_max_queues == 0) if (xenvif_max_queues == 0)
xenvif_max_queues = num_online_cpus(); xenvif_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT,
num_online_cpus());
if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
......
...@@ -734,7 +734,7 @@ static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) ...@@ -734,7 +734,7 @@ static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
} }
static void xen_net_rate_changed(struct xenbus_watch *watch, static void xen_net_rate_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len) const char *path, const char *token)
{ {
struct xenvif *vif = container_of(watch, struct xenvif, credit_watch); struct xenvif *vif = container_of(watch, struct xenvif, credit_watch);
struct xenbus_device *dev = xenvif_to_xenbus_device(vif); struct xenbus_device *dev = xenvif_to_xenbus_device(vif);
...@@ -791,7 +791,7 @@ static void xen_unregister_credit_watch(struct xenvif *vif) ...@@ -791,7 +791,7 @@ static void xen_unregister_credit_watch(struct xenvif *vif)
} }
static void xen_mcast_ctrl_changed(struct xenbus_watch *watch, static void xen_mcast_ctrl_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len) const char *path, const char *token)
{ {
struct xenvif *vif = container_of(watch, struct xenvif, struct xenvif *vif = container_of(watch, struct xenvif,
mcast_ctrl_watch); mcast_ctrl_watch);
...@@ -866,8 +866,8 @@ static void unregister_hotplug_status_watch(struct backend_info *be) ...@@ -866,8 +866,8 @@ static void unregister_hotplug_status_watch(struct backend_info *be)
} }
static void hotplug_status_changed(struct xenbus_watch *watch, static void hotplug_status_changed(struct xenbus_watch *watch,
const char **vec, const char *path,
unsigned int vec_size) const char *token)
{ {
struct backend_info *be = container_of(watch, struct backend_info *be = container_of(watch,
struct backend_info, struct backend_info,
......
...@@ -57,6 +57,7 @@ ...@@ -57,6 +57,7 @@
#include <xen/interface/grant_table.h> #include <xen/interface/grant_table.h>
/* Module parameters */ /* Module parameters */
#define MAX_QUEUES_DEFAULT 8
static unsigned int xennet_max_queues; static unsigned int xennet_max_queues;
module_param_named(max_queues, xennet_max_queues, uint, 0644); module_param_named(max_queues, xennet_max_queues, uint, 0644);
MODULE_PARM_DESC(max_queues, MODULE_PARM_DESC(max_queues,
...@@ -2166,11 +2167,12 @@ static int __init netif_init(void) ...@@ -2166,11 +2167,12 @@ static int __init netif_init(void)
pr_info("Initialising Xen virtual ethernet driver\n"); pr_info("Initialising Xen virtual ethernet driver\n");
/* Allow as many queues as there are CPUs if user has not /* Allow as many queues as there are CPUs inut max. 8 if user has not
* specified a value. * specified a value.
*/ */
if (xennet_max_queues == 0) if (xennet_max_queues == 0)
xennet_max_queues = num_online_cpus(); xennet_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT,
num_online_cpus());
return xenbus_register_frontend(&netfront_driver); return xenbus_register_frontend(&netfront_driver);
} }
......
...@@ -68,13 +68,12 @@ static void vcpu_hotplug(unsigned int cpu) ...@@ -68,13 +68,12 @@ static void vcpu_hotplug(unsigned int cpu)
} }
static void handle_vcpu_hotplug_event(struct xenbus_watch *watch, static void handle_vcpu_hotplug_event(struct xenbus_watch *watch,
const char **vec, unsigned int len) const char *path, const char *token)
{ {
unsigned int cpu; unsigned int cpu;
char *cpustr; char *cpustr;
const char *node = vec[XS_WATCH_PATH];
cpustr = strstr(node, "cpu/"); cpustr = strstr(path, "cpu/");
if (cpustr != NULL) { if (cpustr != NULL) {
sscanf(cpustr, "cpu/%u", &cpu); sscanf(cpustr, "cpu/%u", &cpu);
vcpu_hotplug(cpu); vcpu_hotplug(cpu);
...@@ -107,7 +106,7 @@ static int __init setup_vcpu_hotplug_event(void) ...@@ -107,7 +106,7 @@ static int __init setup_vcpu_hotplug_event(void)
.notifier_call = setup_cpu_watcher }; .notifier_call = setup_cpu_watcher };
#ifdef CONFIG_X86 #ifdef CONFIG_X86
if (!xen_pv_domain()) if (!xen_pv_domain() && !xen_pvh_domain())
#else #else
if (!xen_domain()) if (!xen_domain())
#endif #endif
......
...@@ -1704,7 +1704,6 @@ void __init xen_init_IRQ(void) ...@@ -1704,7 +1704,6 @@ void __init xen_init_IRQ(void)
pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map); eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map);
rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn); rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
/* TODO: No PVH support for PIRQ EOI */
if (rc != 0) { if (rc != 0) {
free_page((unsigned long) pirq_eoi_map); free_page((unsigned long) pirq_eoi_map);
pirq_eoi_map = NULL; pirq_eoi_map = NULL;
......
...@@ -1146,13 +1146,13 @@ EXPORT_SYMBOL_GPL(gnttab_init); ...@@ -1146,13 +1146,13 @@ EXPORT_SYMBOL_GPL(gnttab_init);
static int __gnttab_init(void) static int __gnttab_init(void)
{ {
if (!xen_domain())
return -ENODEV;
/* Delay grant-table initialization in the PV on HVM case */ /* Delay grant-table initialization in the PV on HVM case */
if (xen_hvm_domain()) if (xen_hvm_domain() && !xen_pvh_domain())
return 0; return 0;
if (!xen_pv_domain())
return -ENODEV;
return gnttab_init(); return gnttab_init();
} }
/* Starts after core_initcall so that xen_pvh_gnttab_setup can be called /* Starts after core_initcall so that xen_pvh_gnttab_setup can be called
......
...@@ -218,7 +218,7 @@ static struct shutdown_handler shutdown_handlers[] = { ...@@ -218,7 +218,7 @@ static struct shutdown_handler shutdown_handlers[] = {
}; };
static void shutdown_handler(struct xenbus_watch *watch, static void shutdown_handler(struct xenbus_watch *watch,
const char **vec, unsigned int len) const char *path, const char *token)
{ {
char *str; char *str;
struct xenbus_transaction xbt; struct xenbus_transaction xbt;
...@@ -266,8 +266,8 @@ static void shutdown_handler(struct xenbus_watch *watch, ...@@ -266,8 +266,8 @@ static void shutdown_handler(struct xenbus_watch *watch,
} }
#ifdef CONFIG_MAGIC_SYSRQ #ifdef CONFIG_MAGIC_SYSRQ
static void sysrq_handler(struct xenbus_watch *watch, const char **vec, static void sysrq_handler(struct xenbus_watch *watch, const char *path,
unsigned int len) const char *token)
{ {
char sysrq_key = '\0'; char sysrq_key = '\0';
struct xenbus_transaction xbt; struct xenbus_transaction xbt;
...@@ -277,7 +277,7 @@ static void sysrq_handler(struct xenbus_watch *watch, const char **vec, ...@@ -277,7 +277,7 @@ static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
err = xenbus_transaction_start(&xbt); err = xenbus_transaction_start(&xbt);
if (err) if (err)
return; return;
if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) { if (xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key) < 0) {
pr_err("Unable to read sysrq code in control/sysrq\n"); pr_err("Unable to read sysrq code in control/sysrq\n");
xenbus_transaction_end(xbt, 1); xenbus_transaction_end(xbt, 1);
return; return;
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/miscdevice.h> #include <linux/miscdevice.h>
#include <linux/moduleparam.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
...@@ -32,6 +33,7 @@ ...@@ -32,6 +33,7 @@
#include <xen/xen.h> #include <xen/xen.h>
#include <xen/privcmd.h> #include <xen/privcmd.h>
#include <xen/interface/xen.h> #include <xen/interface/xen.h>
#include <xen/interface/hvm/dm_op.h>
#include <xen/features.h> #include <xen/features.h>
#include <xen/page.h> #include <xen/page.h>
#include <xen/xen-ops.h> #include <xen/xen-ops.h>
...@@ -43,16 +45,36 @@ MODULE_LICENSE("GPL"); ...@@ -43,16 +45,36 @@ MODULE_LICENSE("GPL");
#define PRIV_VMA_LOCKED ((void *)1) #define PRIV_VMA_LOCKED ((void *)1)
static unsigned int privcmd_dm_op_max_num = 16;
module_param_named(dm_op_max_nr_bufs, privcmd_dm_op_max_num, uint, 0644);
MODULE_PARM_DESC(dm_op_max_nr_bufs,
"Maximum number of buffers per dm_op hypercall");
static unsigned int privcmd_dm_op_buf_max_size = 4096;
module_param_named(dm_op_buf_max_size, privcmd_dm_op_buf_max_size, uint,
0644);
MODULE_PARM_DESC(dm_op_buf_max_size,
"Maximum size of a dm_op hypercall buffer");
struct privcmd_data {
domid_t domid;
};
static int privcmd_vma_range_is_mapped( static int privcmd_vma_range_is_mapped(
struct vm_area_struct *vma, struct vm_area_struct *vma,
unsigned long addr, unsigned long addr,
unsigned long nr_pages); unsigned long nr_pages);
static long privcmd_ioctl_hypercall(void __user *udata) static long privcmd_ioctl_hypercall(struct file *file, void __user *udata)
{ {
struct privcmd_data *data = file->private_data;
struct privcmd_hypercall hypercall; struct privcmd_hypercall hypercall;
long ret; long ret;
/* Disallow arbitrary hypercalls if restricted */
if (data->domid != DOMID_INVALID)
return -EPERM;
if (copy_from_user(&hypercall, udata, sizeof(hypercall))) if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
return -EFAULT; return -EFAULT;
...@@ -229,8 +251,9 @@ static int mmap_gfn_range(void *data, void *state) ...@@ -229,8 +251,9 @@ static int mmap_gfn_range(void *data, void *state)
return 0; return 0;
} }
static long privcmd_ioctl_mmap(void __user *udata) static long privcmd_ioctl_mmap(struct file *file, void __user *udata)
{ {
struct privcmd_data *data = file->private_data;
struct privcmd_mmap mmapcmd; struct privcmd_mmap mmapcmd;
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
struct vm_area_struct *vma; struct vm_area_struct *vma;
...@@ -245,6 +268,10 @@ static long privcmd_ioctl_mmap(void __user *udata) ...@@ -245,6 +268,10 @@ static long privcmd_ioctl_mmap(void __user *udata)
if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
return -EFAULT; return -EFAULT;
/* If restriction is in place, check the domid matches */
if (data->domid != DOMID_INVALID && data->domid != mmapcmd.dom)
return -EPERM;
rc = gather_array(&pagelist, rc = gather_array(&pagelist,
mmapcmd.num, sizeof(struct privcmd_mmap_entry), mmapcmd.num, sizeof(struct privcmd_mmap_entry),
mmapcmd.entry); mmapcmd.entry);
...@@ -416,8 +443,10 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) ...@@ -416,8 +443,10 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
static const struct vm_operations_struct privcmd_vm_ops; static const struct vm_operations_struct privcmd_vm_ops;
static long privcmd_ioctl_mmap_batch(void __user *udata, int version) static long privcmd_ioctl_mmap_batch(
struct file *file, void __user *udata, int version)
{ {
struct privcmd_data *data = file->private_data;
int ret; int ret;
struct privcmd_mmapbatch_v2 m; struct privcmd_mmapbatch_v2 m;
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
...@@ -446,6 +475,10 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) ...@@ -446,6 +475,10 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
return -EINVAL; return -EINVAL;
} }
/* If restriction is in place, check the domid matches */
if (data->domid != DOMID_INVALID && data->domid != m.dom)
return -EPERM;
nr_pages = DIV_ROUND_UP(m.num, XEN_PFN_PER_PAGE); nr_pages = DIV_ROUND_UP(m.num, XEN_PFN_PER_PAGE);
if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
return -EINVAL; return -EINVAL;
...@@ -548,37 +581,210 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) ...@@ -548,37 +581,210 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
goto out; goto out;
} }
static int lock_pages(
struct privcmd_dm_op_buf kbufs[], unsigned int num,
struct page *pages[], unsigned int nr_pages)
{
unsigned int i;
for (i = 0; i < num; i++) {
unsigned int requested;
int pinned;
requested = DIV_ROUND_UP(
offset_in_page(kbufs[i].uptr) + kbufs[i].size,
PAGE_SIZE);
if (requested > nr_pages)
return -ENOSPC;
pinned = get_user_pages_fast(
(unsigned long) kbufs[i].uptr,
requested, FOLL_WRITE, pages);
if (pinned < 0)
return pinned;
nr_pages -= pinned;
pages += pinned;
}
return 0;
}
static void unlock_pages(struct page *pages[], unsigned int nr_pages)
{
unsigned int i;
if (!pages)
return;
for (i = 0; i < nr_pages; i++) {
if (pages[i])
put_page(pages[i]);
}
}
static long privcmd_ioctl_dm_op(struct file *file, void __user *udata)
{
struct privcmd_data *data = file->private_data;
struct privcmd_dm_op kdata;
struct privcmd_dm_op_buf *kbufs;
unsigned int nr_pages = 0;
struct page **pages = NULL;
struct xen_dm_op_buf *xbufs = NULL;
unsigned int i;
long rc;
if (copy_from_user(&kdata, udata, sizeof(kdata)))
return -EFAULT;
/* If restriction is in place, check the domid matches */
if (data->domid != DOMID_INVALID && data->domid != kdata.dom)
return -EPERM;
if (kdata.num == 0)
return 0;
if (kdata.num > privcmd_dm_op_max_num)
return -E2BIG;
kbufs = kcalloc(kdata.num, sizeof(*kbufs), GFP_KERNEL);
if (!kbufs)
return -ENOMEM;
if (copy_from_user(kbufs, kdata.ubufs,
sizeof(*kbufs) * kdata.num)) {
rc = -EFAULT;
goto out;
}
for (i = 0; i < kdata.num; i++) {
if (kbufs[i].size > privcmd_dm_op_buf_max_size) {
rc = -E2BIG;
goto out;
}
if (!access_ok(VERIFY_WRITE, kbufs[i].uptr,
kbufs[i].size)) {
rc = -EFAULT;
goto out;
}
nr_pages += DIV_ROUND_UP(
offset_in_page(kbufs[i].uptr) + kbufs[i].size,
PAGE_SIZE);
}
pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL);
if (!pages) {
rc = -ENOMEM;
goto out;
}
xbufs = kcalloc(kdata.num, sizeof(*xbufs), GFP_KERNEL);
if (!xbufs) {
rc = -ENOMEM;
goto out;
}
rc = lock_pages(kbufs, kdata.num, pages, nr_pages);
if (rc)
goto out;
for (i = 0; i < kdata.num; i++) {
set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr);
xbufs[i].size = kbufs[i].size;
}
xen_preemptible_hcall_begin();
rc = HYPERVISOR_dm_op(kdata.dom, kdata.num, xbufs);
xen_preemptible_hcall_end();
out:
unlock_pages(pages, nr_pages);
kfree(xbufs);
kfree(pages);
kfree(kbufs);
return rc;
}
static long privcmd_ioctl_restrict(struct file *file, void __user *udata)
{
struct privcmd_data *data = file->private_data;
domid_t dom;
if (copy_from_user(&dom, udata, sizeof(dom)))
return -EFAULT;
/* Set restriction to the specified domain, or check it matches */
if (data->domid == DOMID_INVALID)
data->domid = dom;
else if (data->domid != dom)
return -EINVAL;
return 0;
}
static long privcmd_ioctl(struct file *file, static long privcmd_ioctl(struct file *file,
unsigned int cmd, unsigned long data) unsigned int cmd, unsigned long data)
{ {
int ret = -ENOSYS; int ret = -ENOTTY;
void __user *udata = (void __user *) data; void __user *udata = (void __user *) data;
switch (cmd) { switch (cmd) {
case IOCTL_PRIVCMD_HYPERCALL: case IOCTL_PRIVCMD_HYPERCALL:
ret = privcmd_ioctl_hypercall(udata); ret = privcmd_ioctl_hypercall(file, udata);
break; break;
case IOCTL_PRIVCMD_MMAP: case IOCTL_PRIVCMD_MMAP:
ret = privcmd_ioctl_mmap(udata); ret = privcmd_ioctl_mmap(file, udata);
break; break;
case IOCTL_PRIVCMD_MMAPBATCH: case IOCTL_PRIVCMD_MMAPBATCH:
ret = privcmd_ioctl_mmap_batch(udata, 1); ret = privcmd_ioctl_mmap_batch(file, udata, 1);
break; break;
case IOCTL_PRIVCMD_MMAPBATCH_V2: case IOCTL_PRIVCMD_MMAPBATCH_V2:
ret = privcmd_ioctl_mmap_batch(udata, 2); ret = privcmd_ioctl_mmap_batch(file, udata, 2);
break;
case IOCTL_PRIVCMD_DM_OP:
ret = privcmd_ioctl_dm_op(file, udata);
break;
case IOCTL_PRIVCMD_RESTRICT:
ret = privcmd_ioctl_restrict(file, udata);
break; break;
default: default:
ret = -EINVAL;
break; break;
} }
return ret; return ret;
} }
static int privcmd_open(struct inode *ino, struct file *file)
{
struct privcmd_data *data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
/* DOMID_INVALID implies no restriction */
data->domid = DOMID_INVALID;
file->private_data = data;
return 0;
}
static int privcmd_release(struct inode *ino, struct file *file)
{
struct privcmd_data *data = file->private_data;
kfree(data);
return 0;
}
static void privcmd_close(struct vm_area_struct *vma) static void privcmd_close(struct vm_area_struct *vma)
{ {
struct page **pages = vma->vm_private_data; struct page **pages = vma->vm_private_data;
...@@ -647,6 +853,8 @@ static int privcmd_vma_range_is_mapped( ...@@ -647,6 +853,8 @@ static int privcmd_vma_range_is_mapped(
const struct file_operations xen_privcmd_fops = { const struct file_operations xen_privcmd_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.unlocked_ioctl = privcmd_ioctl, .unlocked_ioctl = privcmd_ioctl,
.open = privcmd_open,
.release = privcmd_release,
.mmap = privcmd_mmap, .mmap = privcmd_mmap,
}; };
EXPORT_SYMBOL_GPL(xen_privcmd_fops); EXPORT_SYMBOL_GPL(xen_privcmd_fops);
......
...@@ -55,7 +55,7 @@ static int register_balloon(struct device *dev); ...@@ -55,7 +55,7 @@ static int register_balloon(struct device *dev);
/* React to a change in the target key */ /* React to a change in the target key */
static void watch_target(struct xenbus_watch *watch, static void watch_target(struct xenbus_watch *watch,
const char **vec, unsigned int len) const char *path, const char *token)
{ {
unsigned long long new_target; unsigned long long new_target;
int err; int err;
......
...@@ -652,7 +652,7 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev) ...@@ -652,7 +652,7 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)
} }
static void xen_pcibk_be_watch(struct xenbus_watch *watch, static void xen_pcibk_be_watch(struct xenbus_watch *watch,
const char **vec, unsigned int len) const char *path, const char *token)
{ {
struct xen_pcibk_device *pdev = struct xen_pcibk_device *pdev =
container_of(watch, struct xen_pcibk_device, be_watch); container_of(watch, struct xen_pcibk_device, be_watch);
......
/****************************************************************************** /*
* xenbus_probe.h * Private include for xenbus communications.
*
* Talks to Xen Store to figure out what devices we have.
* *
* Copyright (C) 2005 Rusty Russell, IBM Corporation * Copyright (C) 2005 Rusty Russell, IBM Corporation
* Copyright (C) 2005 XenSource Ltd. * Copyright (C) 2005 XenSource Ltd.
...@@ -31,8 +29,12 @@ ...@@ -31,8 +29,12 @@
* IN THE SOFTWARE. * IN THE SOFTWARE.
*/ */
#ifndef _XENBUS_PROBE_H #ifndef _XENBUS_XENBUS_H
#define _XENBUS_PROBE_H #define _XENBUS_XENBUS_H
#include <linux/mutex.h>
#include <linux/uio.h>
#include <xen/xenbus.h>
#define XEN_BUS_ID_SIZE 20 #define XEN_BUS_ID_SIZE 20
...@@ -42,8 +44,8 @@ struct xen_bus_type { ...@@ -42,8 +44,8 @@ struct xen_bus_type {
int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename);
int (*probe)(struct xen_bus_type *bus, const char *type, int (*probe)(struct xen_bus_type *bus, const char *type,
const char *dir); const char *dir);
void (*otherend_changed)(struct xenbus_watch *watch, const char **vec, void (*otherend_changed)(struct xenbus_watch *watch, const char *path,
unsigned int len); const char *token);
struct bus_type bus; struct bus_type bus;
}; };
...@@ -54,35 +56,80 @@ enum xenstore_init { ...@@ -54,35 +56,80 @@ enum xenstore_init {
XS_LOCAL, XS_LOCAL,
}; };
struct xs_watch_event {
struct list_head list;
unsigned int len;
struct xenbus_watch *handle;
const char *path;
const char *token;
char body[];
};
enum xb_req_state {
xb_req_state_queued,
xb_req_state_wait_reply,
xb_req_state_got_reply,
xb_req_state_aborted
};
struct xb_req_data {
struct list_head list;
wait_queue_head_t wq;
struct xsd_sockmsg msg;
enum xsd_sockmsg_type type;
char *body;
const struct kvec *vec;
int num_vecs;
int err;
enum xb_req_state state;
void (*cb)(struct xb_req_data *);
void *par;
};
extern enum xenstore_init xen_store_domain_type;
extern const struct attribute_group *xenbus_dev_groups[]; extern const struct attribute_group *xenbus_dev_groups[];
extern struct mutex xs_response_mutex;
extern struct list_head xs_reply_list;
extern struct list_head xb_write_list;
extern wait_queue_head_t xb_waitq;
extern struct mutex xb_write_mutex;
extern int xenbus_match(struct device *_dev, struct device_driver *_drv); int xs_init(void);
extern int xenbus_dev_probe(struct device *_dev); int xb_init_comms(void);
extern int xenbus_dev_remove(struct device *_dev); void xb_deinit_comms(void);
extern int xenbus_register_driver_common(struct xenbus_driver *drv, int xs_watch_msg(struct xs_watch_event *event);
struct xen_bus_type *bus, void xs_request_exit(struct xb_req_data *req);
struct module *owner,
const char *mod_name);
extern int xenbus_probe_node(struct xen_bus_type *bus,
const char *type,
const char *nodename);
extern int xenbus_probe_devices(struct xen_bus_type *bus);
extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); int xenbus_match(struct device *_dev, struct device_driver *_drv);
int xenbus_dev_probe(struct device *_dev);
int xenbus_dev_remove(struct device *_dev);
int xenbus_register_driver_common(struct xenbus_driver *drv,
struct xen_bus_type *bus,
struct module *owner,
const char *mod_name);
int xenbus_probe_node(struct xen_bus_type *bus,
const char *type,
const char *nodename);
int xenbus_probe_devices(struct xen_bus_type *bus);
extern void xenbus_dev_shutdown(struct device *_dev); void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
extern int xenbus_dev_suspend(struct device *dev); void xenbus_dev_shutdown(struct device *_dev);
extern int xenbus_dev_resume(struct device *dev);
extern int xenbus_dev_cancel(struct device *dev);
extern void xenbus_otherend_changed(struct xenbus_watch *watch, int xenbus_dev_suspend(struct device *dev);
const char **vec, unsigned int len, int xenbus_dev_resume(struct device *dev);
int ignore_on_shutdown); int xenbus_dev_cancel(struct device *dev);
extern int xenbus_read_otherend_details(struct xenbus_device *xendev, void xenbus_otherend_changed(struct xenbus_watch *watch,
char *id_node, char *path_node); const char *path, const char *token,
int ignore_on_shutdown);
int xenbus_read_otherend_details(struct xenbus_device *xendev,
char *id_node, char *path_node);
void xenbus_ring_ops_init(void); void xenbus_ring_ops_init(void);
int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par);
void xenbus_dev_queue_reply(struct xb_req_data *req);
#endif #endif
...@@ -47,7 +47,7 @@ ...@@ -47,7 +47,7 @@
#include <xen/xen.h> #include <xen/xen.h>
#include <xen/features.h> #include <xen/features.h>
#include "xenbus_probe.h" #include "xenbus.h"
#define XENBUS_PAGES(_grants) (DIV_ROUND_UP(_grants, XEN_PFN_PER_PAGE)) #define XENBUS_PAGES(_grants) (DIV_ROUND_UP(_grants, XEN_PFN_PER_PAGE))
...@@ -115,7 +115,7 @@ EXPORT_SYMBOL_GPL(xenbus_strstate); ...@@ -115,7 +115,7 @@ EXPORT_SYMBOL_GPL(xenbus_strstate);
int xenbus_watch_path(struct xenbus_device *dev, const char *path, int xenbus_watch_path(struct xenbus_device *dev, const char *path,
struct xenbus_watch *watch, struct xenbus_watch *watch,
void (*callback)(struct xenbus_watch *, void (*callback)(struct xenbus_watch *,
const char **, unsigned int)) const char *, const char *))
{ {
int err; int err;
...@@ -153,7 +153,7 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path); ...@@ -153,7 +153,7 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path);
int xenbus_watch_pathfmt(struct xenbus_device *dev, int xenbus_watch_pathfmt(struct xenbus_device *dev,
struct xenbus_watch *watch, struct xenbus_watch *watch,
void (*callback)(struct xenbus_watch *, void (*callback)(struct xenbus_watch *,
const char **, unsigned int), const char *, const char *),
const char *pathfmt, ...) const char *pathfmt, ...)
{ {
int err; int err;
...@@ -259,53 +259,34 @@ int xenbus_frontend_closed(struct xenbus_device *dev) ...@@ -259,53 +259,34 @@ int xenbus_frontend_closed(struct xenbus_device *dev)
} }
EXPORT_SYMBOL_GPL(xenbus_frontend_closed); EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
/**
* Return the path to the error node for the given device, or NULL on failure.
* If the value returned is non-NULL, then it is the caller's to kfree.
*/
static char *error_path(struct xenbus_device *dev)
{
return kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
}
static void xenbus_va_dev_error(struct xenbus_device *dev, int err, static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
const char *fmt, va_list ap) const char *fmt, va_list ap)
{ {
unsigned int len; unsigned int len;
char *printf_buffer = NULL; char *printf_buffer;
char *path_buffer = NULL; char *path_buffer;
#define PRINTF_BUFFER_SIZE 4096 #define PRINTF_BUFFER_SIZE 4096
printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
if (printf_buffer == NULL) if (!printf_buffer)
goto fail; return;
len = sprintf(printf_buffer, "%i ", -err); len = sprintf(printf_buffer, "%i ", -err);
vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap); vsnprintf(printf_buffer + len, PRINTF_BUFFER_SIZE - len, fmt, ap);
dev_err(&dev->dev, "%s\n", printf_buffer); dev_err(&dev->dev, "%s\n", printf_buffer);
path_buffer = error_path(dev); path_buffer = kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
if (!path_buffer ||
if (path_buffer == NULL) { xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer))
dev_err(&dev->dev, "failed to write error node for %s (%s)\n", dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
dev->nodename, printf_buffer); dev->nodename, printf_buffer);
goto fail;
}
if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
dev->nodename, printf_buffer);
goto fail;
}
fail:
kfree(printf_buffer); kfree(printf_buffer);
kfree(path_buffer); kfree(path_buffer);
} }
/** /**
* xenbus_dev_error * xenbus_dev_error
* @dev: xenbus device * @dev: xenbus device
......
...@@ -34,19 +34,31 @@ ...@@ -34,19 +34,31 @@
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/kthread.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/err.h> #include <linux/err.h>
#include <xen/xenbus.h> #include <xen/xenbus.h>
#include <asm/xen/hypervisor.h> #include <asm/xen/hypervisor.h>
#include <xen/events.h> #include <xen/events.h>
#include <xen/page.h> #include <xen/page.h>
#include "xenbus_comms.h" #include "xenbus.h"
/* A list of replies. Currently only one will ever be outstanding. */
LIST_HEAD(xs_reply_list);
/* A list of write requests. */
LIST_HEAD(xb_write_list);
DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
DEFINE_MUTEX(xb_write_mutex);
/* Protect xenbus reader thread against save/restore. */
DEFINE_MUTEX(xs_response_mutex);
static int xenbus_irq; static int xenbus_irq;
static struct task_struct *xenbus_task;
static DECLARE_WORK(probe_work, xenbus_probe); static DECLARE_WORK(probe_work, xenbus_probe);
static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
static irqreturn_t wake_waiting(int irq, void *unused) static irqreturn_t wake_waiting(int irq, void *unused)
{ {
...@@ -84,30 +96,31 @@ static const void *get_input_chunk(XENSTORE_RING_IDX cons, ...@@ -84,30 +96,31 @@ static const void *get_input_chunk(XENSTORE_RING_IDX cons,
return buf + MASK_XENSTORE_IDX(cons); return buf + MASK_XENSTORE_IDX(cons);
} }
static int xb_data_to_write(void)
{
struct xenstore_domain_interface *intf = xen_store_interface;
return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE &&
!list_empty(&xb_write_list);
}
/** /**
* xb_write - low level write * xb_write - low level write
* @data: buffer to send * @data: buffer to send
* @len: length of buffer * @len: length of buffer
* *
* Returns 0 on success, error otherwise. * Returns number of bytes written or -err.
*/ */
int xb_write(const void *data, unsigned len) static int xb_write(const void *data, unsigned int len)
{ {
struct xenstore_domain_interface *intf = xen_store_interface; struct xenstore_domain_interface *intf = xen_store_interface;
XENSTORE_RING_IDX cons, prod; XENSTORE_RING_IDX cons, prod;
int rc; unsigned int bytes = 0;
while (len != 0) { while (len != 0) {
void *dst; void *dst;
unsigned int avail; unsigned int avail;
rc = wait_event_interruptible(
xb_waitq,
(intf->req_prod - intf->req_cons) !=
XENSTORE_RING_SIZE);
if (rc < 0)
return rc;
/* Read indexes, then verify. */ /* Read indexes, then verify. */
cons = intf->req_cons; cons = intf->req_cons;
prod = intf->req_prod; prod = intf->req_prod;
...@@ -115,6 +128,11 @@ int xb_write(const void *data, unsigned len) ...@@ -115,6 +128,11 @@ int xb_write(const void *data, unsigned len)
intf->req_cons = intf->req_prod = 0; intf->req_cons = intf->req_prod = 0;
return -EIO; return -EIO;
} }
if (!xb_data_to_write())
return bytes;
/* Must write data /after/ reading the consumer index. */
virt_mb();
dst = get_output_chunk(cons, prod, intf->req, &avail); dst = get_output_chunk(cons, prod, intf->req, &avail);
if (avail == 0) if (avail == 0)
...@@ -122,52 +140,45 @@ int xb_write(const void *data, unsigned len) ...@@ -122,52 +140,45 @@ int xb_write(const void *data, unsigned len)
if (avail > len) if (avail > len)
avail = len; avail = len;
/* Must write data /after/ reading the consumer index. */
virt_mb();
memcpy(dst, data, avail); memcpy(dst, data, avail);
data += avail; data += avail;
len -= avail; len -= avail;
bytes += avail;
/* Other side must not see new producer until data is there. */ /* Other side must not see new producer until data is there. */
virt_wmb(); virt_wmb();
intf->req_prod += avail; intf->req_prod += avail;
/* Implies mb(): other side will see the updated producer. */ /* Implies mb(): other side will see the updated producer. */
notify_remote_via_evtchn(xen_store_evtchn); if (prod <= intf->req_cons)
notify_remote_via_evtchn(xen_store_evtchn);
} }
return 0; return bytes;
} }
int xb_data_to_read(void) static int xb_data_to_read(void)
{ {
struct xenstore_domain_interface *intf = xen_store_interface; struct xenstore_domain_interface *intf = xen_store_interface;
return (intf->rsp_cons != intf->rsp_prod); return (intf->rsp_cons != intf->rsp_prod);
} }
int xb_wait_for_data_to_read(void) static int xb_read(void *data, unsigned int len)
{
return wait_event_interruptible(xb_waitq, xb_data_to_read());
}
int xb_read(void *data, unsigned len)
{ {
struct xenstore_domain_interface *intf = xen_store_interface; struct xenstore_domain_interface *intf = xen_store_interface;
XENSTORE_RING_IDX cons, prod; XENSTORE_RING_IDX cons, prod;
int rc; unsigned int bytes = 0;
while (len != 0) { while (len != 0) {
unsigned int avail; unsigned int avail;
const char *src; const char *src;
rc = xb_wait_for_data_to_read();
if (rc < 0)
return rc;
/* Read indexes, then verify. */ /* Read indexes, then verify. */
cons = intf->rsp_cons; cons = intf->rsp_cons;
prod = intf->rsp_prod; prod = intf->rsp_prod;
if (cons == prod)
return bytes;
if (!check_indexes(cons, prod)) { if (!check_indexes(cons, prod)) {
intf->rsp_cons = intf->rsp_prod = 0; intf->rsp_cons = intf->rsp_prod = 0;
return -EIO; return -EIO;
...@@ -185,17 +196,243 @@ int xb_read(void *data, unsigned len) ...@@ -185,17 +196,243 @@ int xb_read(void *data, unsigned len)
memcpy(data, src, avail); memcpy(data, src, avail);
data += avail; data += avail;
len -= avail; len -= avail;
bytes += avail;
/* Other side must not see free space until we've copied out */ /* Other side must not see free space until we've copied out */
virt_mb(); virt_mb();
intf->rsp_cons += avail; intf->rsp_cons += avail;
pr_debug("Finished read of %i bytes (%i to go)\n", avail, len);
/* Implies mb(): other side will see the updated consumer. */ /* Implies mb(): other side will see the updated consumer. */
notify_remote_via_evtchn(xen_store_evtchn); if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE)
notify_remote_via_evtchn(xen_store_evtchn);
}
return bytes;
}
static int process_msg(void)
{
static struct {
struct xsd_sockmsg msg;
char *body;
union {
void *alloc;
struct xs_watch_event *watch;
};
bool in_msg;
bool in_hdr;
unsigned int read;
} state;
struct xb_req_data *req;
int err;
unsigned int len;
if (!state.in_msg) {
state.in_msg = true;
state.in_hdr = true;
state.read = 0;
/*
* We must disallow save/restore while reading a message.
* A partial read across s/r leaves us out of sync with
* xenstored.
* xs_response_mutex is locked as long as we are processing one
* message. state.in_msg will be true as long as we are holding
* the lock here.
*/
mutex_lock(&xs_response_mutex);
if (!xb_data_to_read()) {
/* We raced with save/restore: pending data 'gone'. */
mutex_unlock(&xs_response_mutex);
state.in_msg = false;
return 0;
}
}
if (state.in_hdr) {
if (state.read != sizeof(state.msg)) {
err = xb_read((void *)&state.msg + state.read,
sizeof(state.msg) - state.read);
if (err < 0)
goto out;
state.read += err;
if (state.read != sizeof(state.msg))
return 0;
if (state.msg.len > XENSTORE_PAYLOAD_MAX) {
err = -EINVAL;
goto out;
}
}
len = state.msg.len + 1;
if (state.msg.type == XS_WATCH_EVENT)
len += sizeof(*state.watch);
state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH);
if (!state.alloc)
return -ENOMEM;
if (state.msg.type == XS_WATCH_EVENT)
state.body = state.watch->body;
else
state.body = state.alloc;
state.in_hdr = false;
state.read = 0;
}
err = xb_read(state.body + state.read, state.msg.len - state.read);
if (err < 0)
goto out;
state.read += err;
if (state.read != state.msg.len)
return 0;
state.body[state.msg.len] = '\0';
if (state.msg.type == XS_WATCH_EVENT) {
state.watch->len = state.msg.len;
err = xs_watch_msg(state.watch);
} else {
err = -ENOENT;
mutex_lock(&xb_write_mutex);
list_for_each_entry(req, &xs_reply_list, list) {
if (req->msg.req_id == state.msg.req_id) {
if (req->state == xb_req_state_wait_reply) {
req->msg.type = state.msg.type;
req->msg.len = state.msg.len;
req->body = state.body;
req->state = xb_req_state_got_reply;
list_del(&req->list);
req->cb(req);
} else {
list_del(&req->list);
kfree(req);
}
err = 0;
break;
}
}
mutex_unlock(&xb_write_mutex);
if (err)
goto out;
} }
mutex_unlock(&xs_response_mutex);
state.in_msg = false;
state.alloc = NULL;
return err;
out:
mutex_unlock(&xs_response_mutex);
state.in_msg = false;
kfree(state.alloc);
state.alloc = NULL;
return err;
}
static int process_writes(void)
{
static struct {
struct xb_req_data *req;
int idx;
unsigned int written;
} state;
void *base;
unsigned int len;
int err = 0;
if (!xb_data_to_write())
return 0;
mutex_lock(&xb_write_mutex);
if (!state.req) {
state.req = list_first_entry(&xb_write_list,
struct xb_req_data, list);
state.idx = -1;
state.written = 0;
}
if (state.req->state == xb_req_state_aborted)
goto out_err;
while (state.idx < state.req->num_vecs) {
if (state.idx < 0) {
base = &state.req->msg;
len = sizeof(state.req->msg);
} else {
base = state.req->vec[state.idx].iov_base;
len = state.req->vec[state.idx].iov_len;
}
err = xb_write(base + state.written, len - state.written);
if (err < 0)
goto out_err;
state.written += err;
if (state.written != len)
goto out;
state.idx++;
state.written = 0;
}
list_del(&state.req->list);
state.req->state = xb_req_state_wait_reply;
list_add_tail(&state.req->list, &xs_reply_list);
state.req = NULL;
out:
mutex_unlock(&xb_write_mutex);
return 0;
out_err:
state.req->msg.type = XS_ERROR;
state.req->err = err;
list_del(&state.req->list);
if (state.req->state == xb_req_state_aborted)
kfree(state.req);
else {
state.req->state = xb_req_state_got_reply;
wake_up(&state.req->wq);
}
mutex_unlock(&xb_write_mutex);
state.req = NULL;
return err;
}
static int xb_thread_work(void)
{
return xb_data_to_read() || xb_data_to_write();
}
static int xenbus_thread(void *unused)
{
int err;
while (!kthread_should_stop()) {
if (wait_event_interruptible(xb_waitq, xb_thread_work()))
continue;
err = process_msg();
if (err == -ENOMEM)
schedule();
else if (err)
pr_warn_ratelimited("error %d while reading message\n",
err);
err = process_writes();
if (err)
pr_warn_ratelimited("error %d while writing message\n",
err);
}
xenbus_task = NULL;
return 0; return 0;
} }
...@@ -223,6 +460,7 @@ int xb_init_comms(void) ...@@ -223,6 +460,7 @@ int xb_init_comms(void)
rebind_evtchn_irq(xen_store_evtchn, xenbus_irq); rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
} else { } else {
int err; int err;
err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
0, "xenbus", &xb_waitq); 0, "xenbus", &xb_waitq);
if (err < 0) { if (err < 0) {
...@@ -231,6 +469,13 @@ int xb_init_comms(void) ...@@ -231,6 +469,13 @@ int xb_init_comms(void)
} }
xenbus_irq = err; xenbus_irq = err;
if (!xenbus_task) {
xenbus_task = kthread_run(xenbus_thread, NULL,
"xenbus");
if (IS_ERR(xenbus_task))
return PTR_ERR(xenbus_task);
}
} }
return 0; return 0;
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include <xen/events.h> #include <xen/events.h>
#include <asm/xen/hypervisor.h> #include <asm/xen/hypervisor.h>
#include "xenbus_comms.h" #include "xenbus.h"
static int xenbus_backend_open(struct inode *inode, struct file *filp) static int xenbus_backend_open(struct inode *inode, struct file *filp)
{ {
......
...@@ -57,12 +57,12 @@ ...@@ -57,12 +57,12 @@
#include <linux/miscdevice.h> #include <linux/miscdevice.h>
#include <linux/init.h> #include <linux/init.h>
#include "xenbus_comms.h"
#include <xen/xenbus.h> #include <xen/xenbus.h>
#include <xen/xen.h> #include <xen/xen.h>
#include <asm/xen/hypervisor.h> #include <asm/xen/hypervisor.h>
#include "xenbus.h"
/* /*
* An element of a list of outstanding transactions, for which we're * An element of a list of outstanding transactions, for which we're
* still waiting a reply. * still waiting a reply.
...@@ -113,6 +113,7 @@ struct xenbus_file_priv { ...@@ -113,6 +113,7 @@ struct xenbus_file_priv {
struct list_head read_buffers; struct list_head read_buffers;
wait_queue_head_t read_waitq; wait_queue_head_t read_waitq;
struct kref kref;
}; };
/* Read out any raw xenbus messages queued up. */ /* Read out any raw xenbus messages queued up. */
...@@ -258,26 +259,23 @@ static struct watch_adapter *alloc_watch_adapter(const char *path, ...@@ -258,26 +259,23 @@ static struct watch_adapter *alloc_watch_adapter(const char *path,
} }
static void watch_fired(struct xenbus_watch *watch, static void watch_fired(struct xenbus_watch *watch,
const char **vec, const char *path,
unsigned int len) const char *token)
{ {
struct watch_adapter *adap; struct watch_adapter *adap;
struct xsd_sockmsg hdr; struct xsd_sockmsg hdr;
const char *path, *token; const char *token_caller;
int path_len, tok_len, body_len, data_len = 0; int path_len, tok_len, body_len;
int ret; int ret;
LIST_HEAD(staging_q); LIST_HEAD(staging_q);
adap = container_of(watch, struct watch_adapter, watch); adap = container_of(watch, struct watch_adapter, watch);
path = vec[XS_WATCH_PATH]; token_caller = adap->token;
token = adap->token;
path_len = strlen(path) + 1; path_len = strlen(path) + 1;
tok_len = strlen(token) + 1; tok_len = strlen(token_caller) + 1;
if (len > 2) body_len = path_len + tok_len;
data_len = vec[len] - vec[2] + 1;
body_len = path_len + tok_len + data_len;
hdr.type = XS_WATCH_EVENT; hdr.type = XS_WATCH_EVENT;
hdr.len = body_len; hdr.len = body_len;
...@@ -288,9 +286,7 @@ static void watch_fired(struct xenbus_watch *watch, ...@@ -288,9 +286,7 @@ static void watch_fired(struct xenbus_watch *watch,
if (!ret) if (!ret)
ret = queue_reply(&staging_q, path, path_len); ret = queue_reply(&staging_q, path, path_len);
if (!ret) if (!ret)
ret = queue_reply(&staging_q, token, tok_len); ret = queue_reply(&staging_q, token_caller, tok_len);
if (!ret && len > 2)
ret = queue_reply(&staging_q, vec[2], data_len);
if (!ret) { if (!ret) {
/* success: pass reply list onto watcher */ /* success: pass reply list onto watcher */
...@@ -302,6 +298,107 @@ static void watch_fired(struct xenbus_watch *watch, ...@@ -302,6 +298,107 @@ static void watch_fired(struct xenbus_watch *watch,
mutex_unlock(&adap->dev_data->reply_mutex); mutex_unlock(&adap->dev_data->reply_mutex);
} }
static void xenbus_file_free(struct kref *kref)
{
struct xenbus_file_priv *u;
struct xenbus_transaction_holder *trans, *tmp;
struct watch_adapter *watch, *tmp_watch;
struct read_buffer *rb, *tmp_rb;
u = container_of(kref, struct xenbus_file_priv, kref);
/*
* No need for locking here because there are no other users,
* by definition.
*/
list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
xenbus_transaction_end(trans->handle, 1);
list_del(&trans->list);
kfree(trans);
}
list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
unregister_xenbus_watch(&watch->watch);
list_del(&watch->list);
free_watch_adapter(watch);
}
list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) {
list_del(&rb->list);
kfree(rb);
}
kfree(u);
}
static struct xenbus_transaction_holder *xenbus_get_transaction(
struct xenbus_file_priv *u, uint32_t tx_id)
{
struct xenbus_transaction_holder *trans;
list_for_each_entry(trans, &u->transactions, list)
if (trans->handle.id == tx_id)
return trans;
return NULL;
}
void xenbus_dev_queue_reply(struct xb_req_data *req)
{
struct xenbus_file_priv *u = req->par;
struct xenbus_transaction_holder *trans = NULL;
int rc;
LIST_HEAD(staging_q);
xs_request_exit(req);
mutex_lock(&u->msgbuffer_mutex);
if (req->type == XS_TRANSACTION_START) {
trans = xenbus_get_transaction(u, 0);
if (WARN_ON(!trans))
goto out;
if (req->msg.type == XS_ERROR) {
list_del(&trans->list);
kfree(trans);
} else {
rc = kstrtou32(req->body, 10, &trans->handle.id);
if (WARN_ON(rc))
goto out;
}
} else if (req->msg.type == XS_TRANSACTION_END) {
trans = xenbus_get_transaction(u, req->msg.tx_id);
if (WARN_ON(!trans))
goto out;
list_del(&trans->list);
kfree(trans);
}
mutex_unlock(&u->msgbuffer_mutex);
mutex_lock(&u->reply_mutex);
rc = queue_reply(&staging_q, &req->msg, sizeof(req->msg));
if (!rc)
rc = queue_reply(&staging_q, req->body, req->msg.len);
if (!rc) {
list_splice_tail(&staging_q, &u->read_buffers);
wake_up(&u->read_waitq);
} else {
queue_cleanup(&staging_q);
}
mutex_unlock(&u->reply_mutex);
kfree(req->body);
kfree(req);
kref_put(&u->kref, xenbus_file_free);
return;
out:
mutex_unlock(&u->msgbuffer_mutex);
}
static int xenbus_command_reply(struct xenbus_file_priv *u, static int xenbus_command_reply(struct xenbus_file_priv *u,
unsigned int msg_type, const char *reply) unsigned int msg_type, const char *reply)
{ {
...@@ -322,6 +419,9 @@ static int xenbus_command_reply(struct xenbus_file_priv *u, ...@@ -322,6 +419,9 @@ static int xenbus_command_reply(struct xenbus_file_priv *u,
wake_up(&u->read_waitq); wake_up(&u->read_waitq);
mutex_unlock(&u->reply_mutex); mutex_unlock(&u->reply_mutex);
if (!rc)
kref_put(&u->kref, xenbus_file_free);
return rc; return rc;
} }
...@@ -329,57 +429,22 @@ static int xenbus_write_transaction(unsigned msg_type, ...@@ -329,57 +429,22 @@ static int xenbus_write_transaction(unsigned msg_type,
struct xenbus_file_priv *u) struct xenbus_file_priv *u)
{ {
int rc; int rc;
void *reply;
struct xenbus_transaction_holder *trans = NULL; struct xenbus_transaction_holder *trans = NULL;
LIST_HEAD(staging_q);
if (msg_type == XS_TRANSACTION_START) { if (msg_type == XS_TRANSACTION_START) {
trans = kmalloc(sizeof(*trans), GFP_KERNEL); trans = kzalloc(sizeof(*trans), GFP_KERNEL);
if (!trans) { if (!trans) {
rc = -ENOMEM; rc = -ENOMEM;
goto out; goto out;
} }
} else if (u->u.msg.tx_id != 0) { list_add(&trans->list, &u->transactions);
list_for_each_entry(trans, &u->transactions, list) } else if (u->u.msg.tx_id != 0 &&
if (trans->handle.id == u->u.msg.tx_id) !xenbus_get_transaction(u, u->u.msg.tx_id))
break; return xenbus_command_reply(u, XS_ERROR, "ENOENT");
if (&trans->list == &u->transactions)
return xenbus_command_reply(u, XS_ERROR, "ENOENT");
}
reply = xenbus_dev_request_and_reply(&u->u.msg);
if (IS_ERR(reply)) {
if (msg_type == XS_TRANSACTION_START)
kfree(trans);
rc = PTR_ERR(reply);
goto out;
}
if (msg_type == XS_TRANSACTION_START) { rc = xenbus_dev_request_and_reply(&u->u.msg, u);
if (u->u.msg.type == XS_ERROR) if (rc)
kfree(trans);
else {
trans->handle.id = simple_strtoul(reply, NULL, 0);
list_add(&trans->list, &u->transactions);
}
} else if (u->u.msg.type == XS_TRANSACTION_END) {
list_del(&trans->list);
kfree(trans); kfree(trans);
}
mutex_lock(&u->reply_mutex);
rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg));
if (!rc)
rc = queue_reply(&staging_q, reply, u->u.msg.len);
if (!rc) {
list_splice_tail(&staging_q, &u->read_buffers);
wake_up(&u->read_waitq);
} else {
queue_cleanup(&staging_q);
}
mutex_unlock(&u->reply_mutex);
kfree(reply);
out: out:
return rc; return rc;
...@@ -511,6 +576,8 @@ static ssize_t xenbus_file_write(struct file *filp, ...@@ -511,6 +576,8 @@ static ssize_t xenbus_file_write(struct file *filp,
* OK, now we have a complete message. Do something with it. * OK, now we have a complete message. Do something with it.
*/ */
kref_get(&u->kref);
msg_type = u->u.msg.type; msg_type = u->u.msg.type;
switch (msg_type) { switch (msg_type) {
...@@ -525,8 +592,10 @@ static ssize_t xenbus_file_write(struct file *filp, ...@@ -525,8 +592,10 @@ static ssize_t xenbus_file_write(struct file *filp,
ret = xenbus_write_transaction(msg_type, u); ret = xenbus_write_transaction(msg_type, u);
break; break;
} }
if (ret != 0) if (ret != 0) {
rc = ret; rc = ret;
kref_put(&u->kref, xenbus_file_free);
}
/* Buffered message consumed */ /* Buffered message consumed */
u->len = 0; u->len = 0;
...@@ -551,6 +620,8 @@ static int xenbus_file_open(struct inode *inode, struct file *filp) ...@@ -551,6 +620,8 @@ static int xenbus_file_open(struct inode *inode, struct file *filp)
if (u == NULL) if (u == NULL)
return -ENOMEM; return -ENOMEM;
kref_init(&u->kref);
INIT_LIST_HEAD(&u->transactions); INIT_LIST_HEAD(&u->transactions);
INIT_LIST_HEAD(&u->watches); INIT_LIST_HEAD(&u->watches);
INIT_LIST_HEAD(&u->read_buffers); INIT_LIST_HEAD(&u->read_buffers);
...@@ -567,32 +638,8 @@ static int xenbus_file_open(struct inode *inode, struct file *filp) ...@@ -567,32 +638,8 @@ static int xenbus_file_open(struct inode *inode, struct file *filp)
static int xenbus_file_release(struct inode *inode, struct file *filp) static int xenbus_file_release(struct inode *inode, struct file *filp)
{ {
struct xenbus_file_priv *u = filp->private_data; struct xenbus_file_priv *u = filp->private_data;
struct xenbus_transaction_holder *trans, *tmp;
struct watch_adapter *watch, *tmp_watch;
struct read_buffer *rb, *tmp_rb;
/*
* No need for locking here because there are no other users,
* by definition.
*/
list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
xenbus_transaction_end(trans->handle, 1);
list_del(&trans->list);
kfree(trans);
}
list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
unregister_xenbus_watch(&watch->watch);
list_del(&watch->list);
free_watch_adapter(watch);
}
list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) { kref_put(&u->kref, xenbus_file_free);
list_del(&rb->list);
kfree(rb);
}
kfree(u);
return 0; return 0;
} }
......
...@@ -62,8 +62,7 @@ ...@@ -62,8 +62,7 @@
#include <xen/hvm.h> #include <xen/hvm.h>
#include "xenbus_comms.h" #include "xenbus.h"
#include "xenbus_probe.h"
int xen_store_evtchn; int xen_store_evtchn;
...@@ -170,7 +169,7 @@ int xenbus_read_otherend_details(struct xenbus_device *xendev, ...@@ -170,7 +169,7 @@ int xenbus_read_otherend_details(struct xenbus_device *xendev,
EXPORT_SYMBOL_GPL(xenbus_read_otherend_details); EXPORT_SYMBOL_GPL(xenbus_read_otherend_details);
void xenbus_otherend_changed(struct xenbus_watch *watch, void xenbus_otherend_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len, const char *path, const char *token,
int ignore_on_shutdown) int ignore_on_shutdown)
{ {
struct xenbus_device *dev = struct xenbus_device *dev =
...@@ -181,18 +180,15 @@ void xenbus_otherend_changed(struct xenbus_watch *watch, ...@@ -181,18 +180,15 @@ void xenbus_otherend_changed(struct xenbus_watch *watch,
/* Protect us against watches firing on old details when the otherend /* Protect us against watches firing on old details when the otherend
details change, say immediately after a resume. */ details change, say immediately after a resume. */
if (!dev->otherend || if (!dev->otherend ||
strncmp(dev->otherend, vec[XS_WATCH_PATH], strncmp(dev->otherend, path, strlen(dev->otherend))) {
strlen(dev->otherend))) { dev_dbg(&dev->dev, "Ignoring watch at %s\n", path);
dev_dbg(&dev->dev, "Ignoring watch at %s\n",
vec[XS_WATCH_PATH]);
return; return;
} }
state = xenbus_read_driver_state(dev->otherend); state = xenbus_read_driver_state(dev->otherend);
dev_dbg(&dev->dev, "state is %d, (%s), %s, %s\n", dev_dbg(&dev->dev, "state is %d, (%s), %s, %s\n",
state, xenbus_strstate(state), dev->otherend_watch.node, state, xenbus_strstate(state), dev->otherend_watch.node, path);
vec[XS_WATCH_PATH]);
/* /*
* Ignore xenbus transitions during shutdown. This prevents us doing * Ignore xenbus transitions during shutdown. This prevents us doing
......
...@@ -53,8 +53,7 @@ ...@@ -53,8 +53,7 @@
#include <xen/xenbus.h> #include <xen/xenbus.h>
#include <xen/features.h> #include <xen/features.h>
#include "xenbus_comms.h" #include "xenbus.h"
#include "xenbus_probe.h"
/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */ /* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
static int backend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) static int backend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename)
...@@ -182,9 +181,9 @@ static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type, ...@@ -182,9 +181,9 @@ static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type,
} }
static void frontend_changed(struct xenbus_watch *watch, static void frontend_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len) const char *path, const char *token)
{ {
xenbus_otherend_changed(watch, vec, len, 0); xenbus_otherend_changed(watch, path, token, 0);
} }
static struct xen_bus_type xenbus_backend = { static struct xen_bus_type xenbus_backend = {
...@@ -205,11 +204,11 @@ static struct xen_bus_type xenbus_backend = { ...@@ -205,11 +204,11 @@ static struct xen_bus_type xenbus_backend = {
}; };
static void backend_changed(struct xenbus_watch *watch, static void backend_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len) const char *path, const char *token)
{ {
DPRINTK(""); DPRINTK("");
xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_backend); xenbus_dev_changed(path, &xenbus_backend);
} }
static struct xenbus_watch be_watch = { static struct xenbus_watch be_watch = {
......
...@@ -27,8 +27,7 @@ ...@@ -27,8 +27,7 @@
#include <xen/platform_pci.h> #include <xen/platform_pci.h>
#include "xenbus_comms.h" #include "xenbus.h"
#include "xenbus_probe.h"
...@@ -87,9 +86,9 @@ static int xenbus_uevent_frontend(struct device *_dev, ...@@ -87,9 +86,9 @@ static int xenbus_uevent_frontend(struct device *_dev,
static void backend_changed(struct xenbus_watch *watch, static void backend_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len) const char *path, const char *token)
{ {
xenbus_otherend_changed(watch, vec, len, 1); xenbus_otherend_changed(watch, path, token, 1);
} }
static void xenbus_frontend_delayed_resume(struct work_struct *w) static void xenbus_frontend_delayed_resume(struct work_struct *w)
...@@ -154,11 +153,11 @@ static struct xen_bus_type xenbus_frontend = { ...@@ -154,11 +153,11 @@ static struct xen_bus_type xenbus_frontend = {
}; };
static void frontend_changed(struct xenbus_watch *watch, static void frontend_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len) const char *path, const char *token)
{ {
DPRINTK(""); DPRINTK("");
xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); xenbus_dev_changed(path, &xenbus_frontend);
} }
...@@ -333,13 +332,13 @@ static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq); ...@@ -333,13 +332,13 @@ static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
static int backend_state; static int backend_state;
static void xenbus_reset_backend_state_changed(struct xenbus_watch *w, static void xenbus_reset_backend_state_changed(struct xenbus_watch *w,
const char **v, unsigned int l) const char *path, const char *token)
{ {
if (xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", if (xenbus_scanf(XBT_NIL, path, "", "%i",
&backend_state) != 1) &backend_state) != 1)
backend_state = XenbusStateUnknown; backend_state = XenbusStateUnknown;
printk(KERN_DEBUG "XENBUS: backend %s %s\n", printk(KERN_DEBUG "XENBUS: backend %s %s\n",
v[XS_WATCH_PATH], xenbus_strstate(backend_state)); path, xenbus_strstate(backend_state));
wake_up(&backend_state_wq); wake_up(&backend_state_wq);
} }
......
This diff is collapsed.
...@@ -16,10 +16,10 @@ ...@@ -16,10 +16,10 @@
#include <linux/magic.h> #include <linux/magic.h>
#include <xen/xen.h> #include <xen/xen.h>
#include <xen/xenbus.h>
#include "xenfs.h" #include "xenfs.h"
#include "../privcmd.h" #include "../privcmd.h"
#include "../xenbus/xenbus_comms.h"
#include <asm/xen/hypervisor.h> #include <asm/xen/hypervisor.h>
......
...@@ -4,9 +4,9 @@ ...@@ -4,9 +4,9 @@
#include <linux/fs.h> #include <linux/fs.h>
#include <xen/page.h> #include <xen/page.h>
#include <xen/xenbus.h>
#include "xenfs.h" #include "xenfs.h"
#include "../xenbus/xenbus_comms.h"
static ssize_t xsd_read(struct file *file, char __user *buf, static ssize_t xsd_read(struct file *file, char __user *buf,
size_t size, loff_t *off) size_t size, loff_t *off)
......
...@@ -77,6 +77,17 @@ struct privcmd_mmapbatch_v2 { ...@@ -77,6 +77,17 @@ struct privcmd_mmapbatch_v2 {
int __user *err; /* array of error codes */ int __user *err; /* array of error codes */
}; };
struct privcmd_dm_op_buf {
void __user *uptr;
size_t size;
};
struct privcmd_dm_op {
domid_t dom;
__u16 num;
const struct privcmd_dm_op_buf __user *ubufs;
};
/* /*
* @cmd: IOCTL_PRIVCMD_HYPERCALL * @cmd: IOCTL_PRIVCMD_HYPERCALL
* @arg: &privcmd_hypercall_t * @arg: &privcmd_hypercall_t
...@@ -98,5 +109,9 @@ struct privcmd_mmapbatch_v2 { ...@@ -98,5 +109,9 @@ struct privcmd_mmapbatch_v2 {
_IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch)) _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
#define IOCTL_PRIVCMD_MMAPBATCH_V2 \ #define IOCTL_PRIVCMD_MMAPBATCH_V2 \
_IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2)) _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
#define IOCTL_PRIVCMD_DM_OP \
_IOC(_IOC_NONE, 'P', 5, sizeof(struct privcmd_dm_op))
#define IOCTL_PRIVCMD_RESTRICT \
_IOC(_IOC_NONE, 'P', 6, sizeof(domid_t))
#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
...@@ -53,6 +53,7 @@ int HYPERVISOR_physdev_op(int cmd, void *arg); ...@@ -53,6 +53,7 @@ int HYPERVISOR_physdev_op(int cmd, void *arg);
int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args);
int HYPERVISOR_tmem_op(void *arg); int HYPERVISOR_tmem_op(void *arg);
int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type); int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type);
int HYPERVISOR_dm_op(domid_t domid, unsigned int nr_bufs, void *bufs);
int HYPERVISOR_platform_op_raw(void *arg); int HYPERVISOR_platform_op_raw(void *arg);
static inline int HYPERVISOR_platform_op(struct xen_platform_op *op) static inline int HYPERVISOR_platform_op(struct xen_platform_op *op)
{ {
......
...@@ -192,10 +192,20 @@ ...@@ -192,10 +192,20 @@
*/ */
#define XEN_ELFNOTE_SUPPORTED_FEATURES 17 #define XEN_ELFNOTE_SUPPORTED_FEATURES 17
/*
* Physical entry point into the kernel.
*
* 32bit entry point into the kernel. When requested to launch the
* guest kernel in a HVM container, Xen will use this entry point to
* launch the guest in 32bit protected mode with paging disabled.
* Ignored otherwise.
*/
#define XEN_ELFNOTE_PHYS32_ENTRY 18
/* /*
* The number of the highest elfnote defined. * The number of the highest elfnote defined.
*/ */
#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES #define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY
#endif /* __XEN_PUBLIC_ELFNOTE_H__ */ #endif /* __XEN_PUBLIC_ELFNOTE_H__ */
......
/* /*
* Private include for xenbus communications. * Copyright (c) 2016, Citrix Systems Inc
*
* Copyright (C) 2005 Rusty Russell, IBM Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without * of this software and associated documentation files (the "Software"), to
* restriction, including without limitation the rights to use, copy, modify, * deal in the Software without restriction, including without limitation the
* merge, publish, distribute, sublicense, and/or sell copies of the Software, * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* and to permit persons to whom the Software is furnished to do so, subject to * sell copies of the Software, and to permit persons to whom the Software is
* the following conditions: * furnished to do so, subject to the following conditions:
* *
* The above copyright notice and this permission notice shall be included in * The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software. * all copies or substantial portions of the Software.
...@@ -24,28 +16,17 @@ ...@@ -24,28 +16,17 @@
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* IN THE SOFTWARE. * DEALINGS IN THE SOFTWARE.
*/ */
#ifndef _XENBUS_COMMS_H #ifndef __XEN_PUBLIC_HVM_DM_OP_H__
#define _XENBUS_COMMS_H #define __XEN_PUBLIC_HVM_DM_OP_H__
#include <linux/fs.h>
int xs_init(void);
int xb_init_comms(void);
void xb_deinit_comms(void);
/* Low level routines. */
int xb_write(const void *data, unsigned len);
int xb_read(void *data, unsigned len);
int xb_data_to_read(void);
int xb_wait_for_data_to_read(void);
extern struct xenstore_domain_interface *xen_store_interface;
extern int xen_store_evtchn;
extern enum xenstore_init xen_store_domain_type;
extern const struct file_operations xen_xenbus_fops; struct xen_dm_op_buf {
GUEST_HANDLE(void) h;
xen_ulong_t size;
};
DEFINE_GUEST_HANDLE_STRUCT(xen_dm_op_buf);
#endif /* _XENBUS_COMMS_H */ #endif /* __XEN_PUBLIC_HVM_DM_OP_H__ */
/*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Copyright (c) 2015, Roger Pau Monne <roger.pau@citrix.com>
*/
#ifndef __XEN_PUBLIC_HVM_HVM_VCPU_H__
#define __XEN_PUBLIC_HVM_HVM_VCPU_H__
#include "../xen.h"
struct vcpu_hvm_x86_32 {
uint32_t eax;
uint32_t ecx;
uint32_t edx;
uint32_t ebx;
uint32_t esp;
uint32_t ebp;
uint32_t esi;
uint32_t edi;
uint32_t eip;
uint32_t eflags;
uint32_t cr0;
uint32_t cr3;
uint32_t cr4;
uint32_t pad1;
/*
* EFER should only be used to set the NXE bit (if required)
* when starting a vCPU in 32bit mode with paging enabled or
* to set the LME/LMA bits in order to start the vCPU in
* compatibility mode.
*/
uint64_t efer;
uint32_t cs_base;
uint32_t ds_base;
uint32_t ss_base;
uint32_t es_base;
uint32_t tr_base;
uint32_t cs_limit;
uint32_t ds_limit;
uint32_t ss_limit;
uint32_t es_limit;
uint32_t tr_limit;
uint16_t cs_ar;
uint16_t ds_ar;
uint16_t ss_ar;
uint16_t es_ar;
uint16_t tr_ar;
uint16_t pad2[3];
};
/*
* The layout of the _ar fields of the segment registers is the
* following:
*
* Bits [0,3]: type (bits 40-43).
* Bit 4: s (descriptor type, bit 44).
* Bit [5,6]: dpl (descriptor privilege level, bits 45-46).
* Bit 7: p (segment-present, bit 47).
* Bit 8: avl (available for system software, bit 52).
* Bit 9: l (64-bit code segment, bit 53).
* Bit 10: db (meaning depends on the segment, bit 54).
* Bit 11: g (granularity, bit 55)
* Bits [12,15]: unused, must be blank.
*
* A more complete description of the meaning of this fields can be
* obtained from the Intel SDM, Volume 3, section 3.4.5.
*/
struct vcpu_hvm_x86_64 {
uint64_t rax;
uint64_t rcx;
uint64_t rdx;
uint64_t rbx;
uint64_t rsp;
uint64_t rbp;
uint64_t rsi;
uint64_t rdi;
uint64_t rip;
uint64_t rflags;
uint64_t cr0;
uint64_t cr3;
uint64_t cr4;
uint64_t efer;
/*
* Using VCPU_HVM_MODE_64B implies that the vCPU is launched
* directly in long mode, so the cached parts of the segment
* registers get set to match that environment.
*
* If the user wants to launch the vCPU in compatibility mode
* the 32-bit structure should be used instead.
*/
};
struct vcpu_hvm_context {
#define VCPU_HVM_MODE_32B 0 /* 32bit fields of the structure will be used. */
#define VCPU_HVM_MODE_64B 1 /* 64bit fields of the structure will be used. */
uint32_t mode;
uint32_t pad;
/* CPU registers. */
union {
struct vcpu_hvm_x86_32 x86_32;
struct vcpu_hvm_x86_64 x86_64;
} cpu_regs;
};
typedef struct vcpu_hvm_context vcpu_hvm_context_t;
#endif /* __XEN_PUBLIC_HVM_HVM_VCPU_H__ */
/*
* Local variables:
* mode: C
* c-file-style: "BSD"
* c-basic-offset: 4
* tab-width: 4
* indent-tabs-mode: nil
* End:
*/
/*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Copyright (c) 2016, Citrix Systems, Inc.
*/
#ifndef __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__
#define __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__
/*
* Start of day structure passed to PVH guests and to HVM guests in %ebx.
*
* NOTE: nothing will be loaded at physical address 0, so a 0 value in any
* of the address fields should be treated as not present.
*
* 0 +----------------+
* | magic | Contains the magic value XEN_HVM_START_MAGIC_VALUE
* | | ("xEn3" with the 0x80 bit of the "E" set).
* 4 +----------------+
* | version | Version of this structure. Current version is 0. New
* | | versions are guaranteed to be backwards-compatible.
* 8 +----------------+
* | flags | SIF_xxx flags.
* 12 +----------------+
* | nr_modules | Number of modules passed to the kernel.
* 16 +----------------+
* | modlist_paddr | Physical address of an array of modules
* | | (layout of the structure below).
* 24 +----------------+
* | cmdline_paddr | Physical address of the command line,
* | | a zero-terminated ASCII string.
* 32 +----------------+
* | rsdp_paddr | Physical address of the RSDP ACPI data structure.
* 40 +----------------+
*
* The layout of each entry in the module structure is the following:
*
* 0 +----------------+
* | paddr | Physical address of the module.
* 8 +----------------+
* | size | Size of the module in bytes.
* 16 +----------------+
* | cmdline_paddr | Physical address of the command line,
* | | a zero-terminated ASCII string.
* 24 +----------------+
* | reserved |
* 32 +----------------+
*
* The address and sizes are always a 64bit little endian unsigned integer.
*
* NB: Xen on x86 will always try to place all the data below the 4GiB
* boundary.
*/
#define XEN_HVM_START_MAGIC_VALUE 0x336ec578
/*
* C representation of the x86/HVM start info layout.
*
* The canonical definition of this layout is above, this is just a way to
* represent the layout described there using C types.
*/
struct hvm_start_info {
uint32_t magic; /* Contains the magic value 0x336ec578 */
/* ("xEn3" with the 0x80 bit of the "E" set).*/
uint32_t version; /* Version of this structure. */
uint32_t flags; /* SIF_xxx flags. */
uint32_t nr_modules; /* Number of modules passed to the kernel. */
uint64_t modlist_paddr; /* Physical address of an array of */
/* hvm_modlist_entry. */
uint64_t cmdline_paddr; /* Physical address of the command line. */
uint64_t rsdp_paddr; /* Physical address of the RSDP ACPI data */
/* structure. */
};
struct hvm_modlist_entry {
uint64_t paddr; /* Physical address of the module. */
uint64_t size; /* Size of the module in bytes. */
uint64_t cmdline_paddr; /* Physical address of the command line. */
uint64_t reserved;
};
#endif /* __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__ */
...@@ -81,6 +81,7 @@ ...@@ -81,6 +81,7 @@
#define __HYPERVISOR_tmem_op 38 #define __HYPERVISOR_tmem_op 38
#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ #define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */
#define __HYPERVISOR_xenpmu_op 40 #define __HYPERVISOR_xenpmu_op 40
#define __HYPERVISOR_dm_op 41
/* Architecture-specific hypercall definitions. */ /* Architecture-specific hypercall definitions. */
#define __HYPERVISOR_arch_0 48 #define __HYPERVISOR_arch_0 48
......
...@@ -30,16 +30,10 @@ extern enum xen_domain_type xen_domain_type; ...@@ -30,16 +30,10 @@ extern enum xen_domain_type xen_domain_type;
#endif /* CONFIG_XEN_DOM0 */ #endif /* CONFIG_XEN_DOM0 */
#ifdef CONFIG_XEN_PVH #ifdef CONFIG_XEN_PVH
/* This functionality exists only for x86. The XEN_PVHVM support exists extern bool xen_pvh;
* only in x86 world - hence on ARM it will be always disabled. #define xen_pvh_domain() (xen_hvm_domain() && xen_pvh)
* N.B. ARM guests are neither PV nor HVM nor PVHVM.
* It's a bit like PVH but is different also (it's further towards the H
* end of the spectrum than even PVH).
*/
#include <xen/features.h>
#define xen_pvh_domain() (xen_pv_domain() && \
xen_feature(XENFEAT_auto_translated_physmap))
#else #else
#define xen_pvh_domain() (0) #define xen_pvh_domain() (0)
#endif #endif
#endif /* _XEN_XEN_H */ #endif /* _XEN_XEN_H */
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/export.h> #include <linux/export.h>
#include <linux/fs.h>
#include <linux/completion.h> #include <linux/completion.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/slab.h> #include <linux/slab.h>
...@@ -60,7 +61,7 @@ struct xenbus_watch ...@@ -60,7 +61,7 @@ struct xenbus_watch
/* Callback (executed in a process context with no locks held). */ /* Callback (executed in a process context with no locks held). */
void (*callback)(struct xenbus_watch *, void (*callback)(struct xenbus_watch *,
const char **vec, unsigned int len); const char *path, const char *token);
}; };
...@@ -175,16 +176,9 @@ void xs_suspend(void); ...@@ -175,16 +176,9 @@ void xs_suspend(void);
void xs_resume(void); void xs_resume(void);
void xs_suspend_cancel(void); void xs_suspend_cancel(void);
/* Used by xenbus_dev to borrow kernel's store connection. */
void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg);
struct work_struct; struct work_struct;
/* Prepare for domain suspend: then resume or cancel the suspend. */
void xenbus_suspend(void);
void xenbus_resume(void);
void xenbus_probe(struct work_struct *); void xenbus_probe(struct work_struct *);
void xenbus_suspend_cancel(void);
#define XENBUS_IS_ERR_READ(str) ({ \ #define XENBUS_IS_ERR_READ(str) ({ \
if (!IS_ERR(str) && strlen(str) == 0) { \ if (!IS_ERR(str) && strlen(str) == 0) { \
...@@ -199,11 +193,11 @@ void xenbus_suspend_cancel(void); ...@@ -199,11 +193,11 @@ void xenbus_suspend_cancel(void);
int xenbus_watch_path(struct xenbus_device *dev, const char *path, int xenbus_watch_path(struct xenbus_device *dev, const char *path,
struct xenbus_watch *watch, struct xenbus_watch *watch,
void (*callback)(struct xenbus_watch *, void (*callback)(struct xenbus_watch *,
const char **, unsigned int)); const char *, const char *));
__printf(4, 5) __printf(4, 5)
int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
void (*callback)(struct xenbus_watch *, void (*callback)(struct xenbus_watch *,
const char **, unsigned int), const char *, const char *),
const char *pathfmt, ...); const char *pathfmt, ...);
int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
...@@ -235,4 +229,8 @@ const char *xenbus_strstate(enum xenbus_state state); ...@@ -235,4 +229,8 @@ const char *xenbus_strstate(enum xenbus_state state);
int xenbus_dev_is_online(struct xenbus_device *dev); int xenbus_dev_is_online(struct xenbus_device *dev);
int xenbus_frontend_closed(struct xenbus_device *dev); int xenbus_frontend_closed(struct xenbus_device *dev);
extern const struct file_operations xen_xenbus_fops;
extern struct xenstore_domain_interface *xen_store_interface;
extern int xen_store_evtchn;
#endif /* _XEN_XENBUS_H */ #endif /* _XEN_XENBUS_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment