Commit 4b7227ca authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-xen-next

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-xen-next: (52 commits)
  xen: add balloon driver
  xen: allow compilation with non-flat memory
  xen: fold xen_sysexit into xen_iret
  xen: allow set_pte_at on init_mm to be lockless
  xen: disable preemption during tlb flush
  xen pvfb: Para-virtual framebuffer, keyboard and pointer driver
  xen: Add compatibility aliases for frontend drivers
  xen: Module autoprobing support for frontend drivers
  xen blkfront: Delay wait for block devices until after the disk is added
  xen/blkfront: use bdget_disk
  xen: Make xen-blkfront write its protocol ABI to xenstore
  xen: import arch generic part of xencomm
  xen: make grant table arch portable
  xen: replace callers of alloc_vm_area()/free_vm_area() with xen_ prefixed one
  xen: make include/xen/page.h portable moving those definitions under asm dir
  xen: add resend_irq_on_evtchn() definition into events.c
  Xen: make events.c portable for ia64/xen support
  xen: move events.c to drivers/xen for IA64/Xen support
  xen: move features.c from arch/x86/xen/features.c to drivers/xen
  xen: add missing definitions in include/xen/interface/vcpu.h which ia64/xen needs
  ...
parents 5dae61b8 1775826c
...@@ -409,7 +409,7 @@ restore_nocheck_notrace: ...@@ -409,7 +409,7 @@ restore_nocheck_notrace:
irq_return: irq_return:
INTERRUPT_RETURN INTERRUPT_RETURN
.section .fixup,"ax" .section .fixup,"ax"
iret_exc: ENTRY(iret_exc)
pushl $0 # no error code pushl $0 # no error code
pushl $do_iret_error pushl $do_iret_error
jmp error_code jmp error_code
...@@ -1017,6 +1017,13 @@ ENTRY(kernel_thread_helper) ...@@ -1017,6 +1017,13 @@ ENTRY(kernel_thread_helper)
ENDPROC(kernel_thread_helper) ENDPROC(kernel_thread_helper)
#ifdef CONFIG_XEN #ifdef CONFIG_XEN
/* Xen doesn't set %esp to be precisely what the normal sysenter
entrypoint expects, so fix it up before using the normal path. */
ENTRY(xen_sysenter_target)
RING0_INT_FRAME
addl $5*4, %esp /* remove xen-provided frame */
jmp sysenter_past_esp
ENTRY(xen_hypervisor_callback) ENTRY(xen_hypervisor_callback)
CFI_STARTPROC CFI_STARTPROC
pushl $0 pushl $0
...@@ -1035,8 +1042,9 @@ ENTRY(xen_hypervisor_callback) ...@@ -1035,8 +1042,9 @@ ENTRY(xen_hypervisor_callback)
cmpl $xen_iret_end_crit,%eax cmpl $xen_iret_end_crit,%eax
jae 1f jae 1f
call xen_iret_crit_fixup jmp xen_iret_crit_fixup
ENTRY(xen_do_upcall)
1: mov %esp, %eax 1: mov %esp, %eax
call xen_evtchn_do_upcall call xen_evtchn_do_upcall
jmp ret_from_intr jmp ret_from_intr
......
...@@ -366,11 +366,13 @@ struct pv_mmu_ops pv_mmu_ops = { ...@@ -366,11 +366,13 @@ struct pv_mmu_ops pv_mmu_ops = {
.flush_tlb_single = native_flush_tlb_single, .flush_tlb_single = native_flush_tlb_single,
.flush_tlb_others = native_flush_tlb_others, .flush_tlb_others = native_flush_tlb_others,
.alloc_pt = paravirt_nop, .alloc_pte = paravirt_nop,
.alloc_pd = paravirt_nop, .alloc_pmd = paravirt_nop,
.alloc_pd_clone = paravirt_nop, .alloc_pmd_clone = paravirt_nop,
.release_pt = paravirt_nop, .alloc_pud = paravirt_nop,
.release_pd = paravirt_nop, .release_pte = paravirt_nop,
.release_pmd = paravirt_nop,
.release_pud = paravirt_nop,
.set_pte = native_set_pte, .set_pte = native_set_pte,
.set_pte_at = native_set_pte_at, .set_pte_at = native_set_pte_at,
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/hpet.h> #include <asm/hpet.h>
#include <asm/pgtable.h>
#include <asm/reboot_fixups.h> #include <asm/reboot_fixups.h>
#include <asm/reboot.h> #include <asm/reboot.h>
...@@ -15,7 +16,6 @@ ...@@ -15,7 +16,6 @@
# include <linux/dmi.h> # include <linux/dmi.h>
# include <linux/ctype.h> # include <linux/ctype.h>
# include <linux/mc146818rtc.h> # include <linux/mc146818rtc.h>
# include <asm/pgtable.h>
#else #else
# include <asm/iommu.h> # include <asm/iommu.h>
#endif #endif
...@@ -275,7 +275,7 @@ void machine_real_restart(unsigned char *code, int length) ...@@ -275,7 +275,7 @@ void machine_real_restart(unsigned char *code, int length)
/* Remap the kernel at virtual address zero, as well as offset zero /* Remap the kernel at virtual address zero, as well as offset zero
from the kernel segment. This assumes the kernel segment starts at from the kernel segment. This assumes the kernel segment starts at
virtual address PAGE_OFFSET. */ virtual address PAGE_OFFSET. */
memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS); sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
/* /*
......
...@@ -1039,8 +1039,8 @@ int __cpuinit native_cpu_up(unsigned int cpu) ...@@ -1039,8 +1039,8 @@ int __cpuinit native_cpu_up(unsigned int cpu)
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/* init low mem mapping */ /* init low mem mapping */
clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
flush_tlb_all(); flush_tlb_all();
#endif #endif
......
...@@ -320,7 +320,7 @@ static void check_zeroed_page(u32 pfn, int type, struct page *page) ...@@ -320,7 +320,7 @@ static void check_zeroed_page(u32 pfn, int type, struct page *page)
* pdes need to be zeroed. * pdes need to be zeroed.
*/ */
if (type & VMI_PAGE_CLONE) if (type & VMI_PAGE_CLONE)
limit = USER_PTRS_PER_PGD; limit = KERNEL_PGD_BOUNDARY;
for (i = 0; i < limit; i++) for (i = 0; i < limit; i++)
BUG_ON(ptr[i]); BUG_ON(ptr[i]);
} }
...@@ -392,13 +392,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) ...@@ -392,13 +392,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
} }
#endif #endif
static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn) static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn)
{ {
vmi_set_page_type(pfn, VMI_PAGE_L1); vmi_set_page_type(pfn, VMI_PAGE_L1);
vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
} }
static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn) static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn)
{ {
/* /*
* This call comes in very early, before mem_map is setup. * This call comes in very early, before mem_map is setup.
...@@ -409,20 +409,20 @@ static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn) ...@@ -409,20 +409,20 @@ static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn)
vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
} }
static void vmi_allocate_pd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
{ {
vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
vmi_check_page_type(clonepfn, VMI_PAGE_L2); vmi_check_page_type(clonepfn, VMI_PAGE_L2);
vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
} }
static void vmi_release_pt(u32 pfn) static void vmi_release_pte(u32 pfn)
{ {
vmi_ops.release_page(pfn, VMI_PAGE_L1); vmi_ops.release_page(pfn, VMI_PAGE_L1);
vmi_set_page_type(pfn, VMI_PAGE_NORMAL); vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
} }
static void vmi_release_pd(u32 pfn) static void vmi_release_pmd(u32 pfn)
{ {
vmi_ops.release_page(pfn, VMI_PAGE_L2); vmi_ops.release_page(pfn, VMI_PAGE_L2);
vmi_set_page_type(pfn, VMI_PAGE_NORMAL); vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
...@@ -871,15 +871,15 @@ static inline int __init activate_vmi(void) ...@@ -871,15 +871,15 @@ static inline int __init activate_vmi(void)
vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
if (vmi_ops.allocate_page) { if (vmi_ops.allocate_page) {
pv_mmu_ops.alloc_pt = vmi_allocate_pt; pv_mmu_ops.alloc_pte = vmi_allocate_pte;
pv_mmu_ops.alloc_pd = vmi_allocate_pd; pv_mmu_ops.alloc_pmd = vmi_allocate_pmd;
pv_mmu_ops.alloc_pd_clone = vmi_allocate_pd_clone; pv_mmu_ops.alloc_pmd_clone = vmi_allocate_pmd_clone;
} }
vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
if (vmi_ops.release_page) { if (vmi_ops.release_page) {
pv_mmu_ops.release_pt = vmi_release_pt; pv_mmu_ops.release_pte = vmi_release_pte;
pv_mmu_ops.release_pd = vmi_release_pd; pv_mmu_ops.release_pmd = vmi_release_pmd;
} }
/* Set linear is needed in all cases */ /* Set linear is needed in all cases */
......
...@@ -543,8 +543,8 @@ static void __init do_boot_cpu(__u8 cpu) ...@@ -543,8 +543,8 @@ static void __init do_boot_cpu(__u8 cpu)
hijack_source.idt.Offset, stack_start.sp)); hijack_source.idt.Offset, stack_start.sp));
/* init lowmem identity mapping */ /* init lowmem identity mapping */
clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
flush_tlb_all(); flush_tlb_all();
if (quad_boot) { if (quad_boot) {
......
obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
pat.o pat.o pgtable.o
obj-$(CONFIG_X86_32) += pgtable_32.o obj-$(CONFIG_X86_32) += pgtable_32.o
......
...@@ -71,7 +71,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) ...@@ -71,7 +71,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
pud = pud_offset(pgd, 0); pud = pud_offset(pgd, 0);
BUG_ON(pmd_table != pmd_offset(pud, 0)); BUG_ON(pmd_table != pmd_offset(pud, 0));
...@@ -100,7 +100,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) ...@@ -100,7 +100,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
} }
paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
BUG_ON(page_table != pte_offset_kernel(pmd, 0)); BUG_ON(page_table != pte_offset_kernel(pmd, 0));
} }
...@@ -365,7 +365,7 @@ void __init native_pagetable_setup_start(pgd_t *base) ...@@ -365,7 +365,7 @@ void __init native_pagetable_setup_start(pgd_t *base)
pte_clear(NULL, va, pte); pte_clear(NULL, va, pte);
} }
paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT); paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
} }
void __init native_pagetable_setup_done(pgd_t *base) void __init native_pagetable_setup_done(pgd_t *base)
...@@ -457,7 +457,7 @@ void zap_low_mappings(void) ...@@ -457,7 +457,7 @@ void zap_low_mappings(void)
* Note that "pgd_clear()" doesn't do it for * Note that "pgd_clear()" doesn't do it for
* us, because pgd_clear() is a no-op on i386. * us, because pgd_clear() is a no-op on i386.
*/ */
for (i = 0; i < USER_PTRS_PER_PGD; i++) { for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
#ifdef CONFIG_X86_PAE #ifdef CONFIG_X86_PAE
set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
#else #else
......
...@@ -407,7 +407,7 @@ void __init early_ioremap_clear(void) ...@@ -407,7 +407,7 @@ void __init early_ioremap_clear(void)
pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
pmd_clear(pmd); pmd_clear(pmd);
paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT); paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT);
__flush_tlb_all(); __flush_tlb_all();
} }
......
...@@ -483,9 +483,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) ...@@ -483,9 +483,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
goto out_unlock; goto out_unlock;
pbase = (pte_t *)page_address(base); pbase = (pte_t *)page_address(base);
#ifdef CONFIG_X86_32 paravirt_alloc_pte(&init_mm, page_to_pfn(base));
paravirt_alloc_pt(&init_mm, page_to_pfn(base));
#endif
ref_prot = pte_pgprot(pte_clrhuge(*kpte)); ref_prot = pte_pgprot(pte_clrhuge(*kpte));
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
......
#include <linux/mm.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/tlb.h>
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
}
pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *pte;
#ifdef CONFIG_HIGHPTE
pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
#else
pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
#endif
if (pte)
pgtable_page_ctor(pte);
return pte;
}
void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
{
pgtable_page_dtor(pte);
paravirt_release_pte(page_to_pfn(pte));
tlb_remove_page(tlb, pte);
}
#if PAGETABLE_LEVELS > 2
void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
{
paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
tlb_remove_page(tlb, virt_to_page(pmd));
}
#if PAGETABLE_LEVELS > 3
void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
{
paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
tlb_remove_page(tlb, virt_to_page(pud));
}
#endif /* PAGETABLE_LEVELS > 3 */
#endif /* PAGETABLE_LEVELS > 2 */
static inline void pgd_list_add(pgd_t *pgd)
{
struct page *page = virt_to_page(pgd);
list_add(&page->lru, &pgd_list);
}
static inline void pgd_list_del(pgd_t *pgd)
{
struct page *page = virt_to_page(pgd);
list_del(&page->lru);
}
#define UNSHARED_PTRS_PER_PGD \
(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
static void pgd_ctor(void *p)
{
pgd_t *pgd = p;
unsigned long flags;
/* Clear usermode parts of PGD */
memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t));
spin_lock_irqsave(&pgd_lock, flags);
/* If the pgd points to a shared pagetable level (either the
ptes in non-PAE, or shared PMD in PAE), then just copy the
references from swapper_pg_dir. */
if (PAGETABLE_LEVELS == 2 ||
(PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
PAGETABLE_LEVELS == 4) {
clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);
paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
__pa(swapper_pg_dir) >> PAGE_SHIFT,
KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);
}
/* list required to sync kernel mapping updates */
if (!SHARED_KERNEL_PMD)
pgd_list_add(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
}
static void pgd_dtor(void *pgd)
{
unsigned long flags; /* can be called from interrupt context */
if (SHARED_KERNEL_PMD)
return;
spin_lock_irqsave(&pgd_lock, flags);
pgd_list_del(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
}
/*
* List of all pgd's needed for non-PAE so it can invalidate entries
* in both cached and uncached pgd's; not needed for PAE since the
* kernel pmd is shared. If PAE were not to share the pmd a similar
* tactic would be needed. This is essentially codepath-based locking
* against pageattr.c; it is the unique case in which a valid change
* of kernel pagetables can't be lazily synchronized by vmalloc faults.
* vmalloc faults work because attached pagetables are never freed.
* -- wli
*/
#ifdef CONFIG_X86_PAE
/*
* Mop up any pmd pages which may still be attached to the pgd.
* Normally they will be freed by munmap/exit_mmap, but any pmd we
* preallocate which never got a corresponding vma will need to be
* freed manually.
*/
static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
{
int i;
for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
pgd_t pgd = pgdp[i];
if (pgd_val(pgd) != 0) {
pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
pgdp[i] = native_make_pgd(0);
paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
pmd_free(mm, pmd);
}
}
}
/*
* In PAE mode, we need to do a cr3 reload (=tlb flush) when
* updating the top-level pagetable entries to guarantee the
* processor notices the update. Since this is expensive, and
* all 4 top-level entries are used almost immediately in a
* new process's life, we just pre-populate them here.
*
* Also, if we're in a paravirt environment where the kernel pmd is
* not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
* and initialize the kernel pmds here.
*/
static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
{
pud_t *pud;
unsigned long addr;
int i;
pud = pud_offset(pgd, 0);
for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
i++, pud++, addr += PUD_SIZE) {
pmd_t *pmd = pmd_alloc_one(mm, addr);
if (!pmd) {
pgd_mop_up_pmds(mm, pgd);
return 0;
}
if (i >= KERNEL_PGD_BOUNDARY)
memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
sizeof(pmd_t) * PTRS_PER_PMD);
pud_populate(mm, pud, pmd);
}
return 1;
}
void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
{
paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
/* Note: almost everything apart from _PAGE_PRESENT is
reserved at the pmd (PDPT) level. */
set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
/*
* According to Intel App note "TLBs, Paging-Structure Caches,
* and Their Invalidation", April 2007, document 317080-001,
* section 8.1: in PAE mode we explicitly have to flush the
* TLB via cr3 if the top-level pgd is changed...
*/
if (mm == current->active_mm)
write_cr3(read_cr3());
}
#else /* !CONFIG_X86_PAE */
/* No need to prepopulate any pagetable entries in non-PAE modes. */
static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
{
return 1;
}
static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd)
{
}
#endif /* CONFIG_X86_PAE */
pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
/* so that alloc_pmd can use it */
mm->pgd = pgd;
if (pgd)
pgd_ctor(pgd);
if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
pgd_dtor(pgd);
free_page((unsigned long)pgd);
pgd = NULL;
}
return pgd;
}
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
pgd_mop_up_pmds(mm, pgd);
pgd_dtor(pgd);
free_page((unsigned long)pgd);
}
int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
pte_t entry, int dirty)
{
int changed = !pte_same(*ptep, entry);
if (changed && dirty) {
*ptep = entry;
pte_update_defer(vma->vm_mm, address, ptep);
flush_tlb_page(vma, address);
}
return changed;
}
int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
int ret = 0;
if (pte_young(*ptep))
ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
&ptep->pte);
if (ret)
pte_update(vma->vm_mm, addr, ptep);
return ret;
}
int ptep_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
int young;
young = ptep_test_and_clear_young(vma, address, ptep);
if (young)
flush_tlb_page(vma, address);
return young;
}
...@@ -173,210 +173,6 @@ void reserve_top_address(unsigned long reserve) ...@@ -173,210 +173,6 @@ void reserve_top_address(unsigned long reserve)
__VMALLOC_RESERVE += reserve; __VMALLOC_RESERVE += reserve;
} }
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
}
pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *pte;
#ifdef CONFIG_HIGHPTE
pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
#else
pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
#endif
if (pte)
pgtable_page_ctor(pte);
return pte;
}
/*
* List of all pgd's needed for non-PAE so it can invalidate entries
* in both cached and uncached pgd's; not needed for PAE since the
* kernel pmd is shared. If PAE were not to share the pmd a similar
* tactic would be needed. This is essentially codepath-based locking
* against pageattr.c; it is the unique case in which a valid change
* of kernel pagetables can't be lazily synchronized by vmalloc faults.
* vmalloc faults work because attached pagetables are never freed.
* -- wli
*/
static inline void pgd_list_add(pgd_t *pgd)
{
struct page *page = virt_to_page(pgd);
list_add(&page->lru, &pgd_list);
}
static inline void pgd_list_del(pgd_t *pgd)
{
struct page *page = virt_to_page(pgd);
list_del(&page->lru);
}
#define UNSHARED_PTRS_PER_PGD \
(SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
static void pgd_ctor(void *p)
{
pgd_t *pgd = p;
unsigned long flags;
/* Clear usermode parts of PGD */
memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
spin_lock_irqsave(&pgd_lock, flags);
/* If the pgd points to a shared pagetable level (either the
ptes in non-PAE, or shared PMD in PAE), then just copy the
references from swapper_pg_dir. */
if (PAGETABLE_LEVELS == 2 ||
(PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
clone_pgd_range(pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
KERNEL_PGD_PTRS);
paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
__pa(swapper_pg_dir) >> PAGE_SHIFT,
USER_PTRS_PER_PGD,
KERNEL_PGD_PTRS);
}
/* list required to sync kernel mapping updates */
if (!SHARED_KERNEL_PMD)
pgd_list_add(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
}
static void pgd_dtor(void *pgd)
{
unsigned long flags; /* can be called from interrupt context */
if (SHARED_KERNEL_PMD)
return;
spin_lock_irqsave(&pgd_lock, flags);
pgd_list_del(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
}
#ifdef CONFIG_X86_PAE
/*
* Mop up any pmd pages which may still be attached to the pgd.
* Normally they will be freed by munmap/exit_mmap, but any pmd we
* preallocate which never got a corresponding vma will need to be
* freed manually.
*/
static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
{
int i;
for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
pgd_t pgd = pgdp[i];
if (pgd_val(pgd) != 0) {
pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
pgdp[i] = native_make_pgd(0);
paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
pmd_free(mm, pmd);
}
}
}
/*
* In PAE mode, we need to do a cr3 reload (=tlb flush) when
* updating the top-level pagetable entries to guarantee the
* processor notices the update. Since this is expensive, and
* all 4 top-level entries are used almost immediately in a
* new process's life, we just pre-populate them here.
*
* Also, if we're in a paravirt environment where the kernel pmd is
* not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
* and initialize the kernel pmds here.
*/
static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
{
pud_t *pud;
unsigned long addr;
int i;
pud = pud_offset(pgd, 0);
for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
i++, pud++, addr += PUD_SIZE) {
pmd_t *pmd = pmd_alloc_one(mm, addr);
if (!pmd) {
pgd_mop_up_pmds(mm, pgd);
return 0;
}
if (i >= USER_PTRS_PER_PGD)
memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
sizeof(pmd_t) * PTRS_PER_PMD);
pud_populate(mm, pud, pmd);
}
return 1;
}
#else /* !CONFIG_X86_PAE */
/* No need to prepopulate any pagetable entries in non-PAE modes. */
static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
{
return 1;
}
static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
{
}
#endif /* CONFIG_X86_PAE */
pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
/* so that alloc_pd can use it */
mm->pgd = pgd;
if (pgd)
pgd_ctor(pgd);
if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
pgd_dtor(pgd);
free_page((unsigned long)pgd);
pgd = NULL;
}
return pgd;
}
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
pgd_mop_up_pmds(mm, pgd);
pgd_dtor(pgd);
free_page((unsigned long)pgd);
}
void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
{
pgtable_page_dtor(pte);
paravirt_release_pt(page_to_pfn(pte));
tlb_remove_page(tlb, pte);
}
#ifdef CONFIG_X86_PAE
void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
{
paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
tlb_remove_page(tlb, virt_to_page(pmd));
}
#endif
int pmd_bad(pmd_t pmd) int pmd_bad(pmd_t pmd)
{ {
WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd)); WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd));
......
...@@ -6,7 +6,7 @@ config XEN ...@@ -6,7 +6,7 @@ config XEN
bool "Xen guest support" bool "Xen guest support"
select PARAVIRT select PARAVIRT
depends on X86_32 depends on X86_32
depends on X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES && !(X86_VISWS || X86_VOYAGER) depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER)
help help
This is the Linux Xen port. Enabling this will allow the This is the Linux Xen port. Enabling this will allow the
kernel to boot in a paravirtualized environment under the kernel to boot in a paravirtualized environment under the
......
obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \ obj-y := enlighten.o setup.o multicalls.o mmu.o \
events.o time.o manage.o xen-asm.o time.o manage.o xen-asm.o grant-table.o
obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += smp.o
...@@ -155,7 +155,8 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, ...@@ -155,7 +155,8 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
if (*ax == 1) if (*ax == 1)
maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */
(1 << X86_FEATURE_ACPI) | /* disable ACPI */ (1 << X86_FEATURE_ACPI) | /* disable ACPI */
(1 << X86_FEATURE_SEP) | /* disable SEP */ (1 << X86_FEATURE_MCE) | /* disable MCE */
(1 << X86_FEATURE_MCA) | /* disable MCA */
(1 << X86_FEATURE_ACC)); /* thermal monitoring */ (1 << X86_FEATURE_ACC)); /* thermal monitoring */
asm(XEN_EMULATE_PREFIX "cpuid" asm(XEN_EMULATE_PREFIX "cpuid"
...@@ -531,26 +532,37 @@ static void xen_apic_write(unsigned long reg, u32 val) ...@@ -531,26 +532,37 @@ static void xen_apic_write(unsigned long reg, u32 val)
static void xen_flush_tlb(void) static void xen_flush_tlb(void)
{ {
struct mmuext_op *op; struct mmuext_op *op;
struct multicall_space mcs = xen_mc_entry(sizeof(*op)); struct multicall_space mcs;
preempt_disable();
mcs = xen_mc_entry(sizeof(*op));
op = mcs.args; op = mcs.args;
op->cmd = MMUEXT_TLB_FLUSH_LOCAL; op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
xen_mc_issue(PARAVIRT_LAZY_MMU); xen_mc_issue(PARAVIRT_LAZY_MMU);
preempt_enable();
} }
static void xen_flush_tlb_single(unsigned long addr) static void xen_flush_tlb_single(unsigned long addr)
{ {
struct mmuext_op *op; struct mmuext_op *op;
struct multicall_space mcs = xen_mc_entry(sizeof(*op)); struct multicall_space mcs;
preempt_disable();
mcs = xen_mc_entry(sizeof(*op));
op = mcs.args; op = mcs.args;
op->cmd = MMUEXT_INVLPG_LOCAL; op->cmd = MMUEXT_INVLPG_LOCAL;
op->arg1.linear_addr = addr & PAGE_MASK; op->arg1.linear_addr = addr & PAGE_MASK;
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
xen_mc_issue(PARAVIRT_LAZY_MMU); xen_mc_issue(PARAVIRT_LAZY_MMU);
preempt_enable();
} }
static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
...@@ -655,15 +667,17 @@ static void xen_write_cr3(unsigned long cr3) ...@@ -655,15 +667,17 @@ static void xen_write_cr3(unsigned long cr3)
/* Early in boot, while setting up the initial pagetable, assume /* Early in boot, while setting up the initial pagetable, assume
everything is pinned. */ everything is pinned. */
static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn) static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
{ {
#ifdef CONFIG_FLATMEM
BUG_ON(mem_map); /* should only be used early */ BUG_ON(mem_map); /* should only be used early */
#endif
make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
} }
/* Early release_pt assumes that all pts are pinned, since there's /* Early release_pte assumes that all pts are pinned, since there's
only init_mm and anything attached to that is pinned. */ only init_mm and anything attached to that is pinned. */
static void xen_release_pt_init(u32 pfn) static void xen_release_pte_init(u32 pfn)
{ {
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
} }
...@@ -697,12 +711,12 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level) ...@@ -697,12 +711,12 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
} }
} }
static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) static void xen_alloc_pte(struct mm_struct *mm, u32 pfn)
{ {
xen_alloc_ptpage(mm, pfn, PT_PTE); xen_alloc_ptpage(mm, pfn, PT_PTE);
} }
static void xen_alloc_pd(struct mm_struct *mm, u32 pfn) static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn)
{ {
xen_alloc_ptpage(mm, pfn, PT_PMD); xen_alloc_ptpage(mm, pfn, PT_PMD);
} }
...@@ -722,12 +736,12 @@ static void xen_release_ptpage(u32 pfn, unsigned level) ...@@ -722,12 +736,12 @@ static void xen_release_ptpage(u32 pfn, unsigned level)
} }
} }
static void xen_release_pt(u32 pfn) static void xen_release_pte(u32 pfn)
{ {
xen_release_ptpage(pfn, PT_PTE); xen_release_ptpage(pfn, PT_PTE);
} }
static void xen_release_pd(u32 pfn) static void xen_release_pmd(u32 pfn)
{ {
xen_release_ptpage(pfn, PT_PMD); xen_release_ptpage(pfn, PT_PMD);
} }
...@@ -849,10 +863,10 @@ static __init void xen_pagetable_setup_done(pgd_t *base) ...@@ -849,10 +863,10 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
{ {
/* This will work as long as patching hasn't happened yet /* This will work as long as patching hasn't happened yet
(which it hasn't) */ (which it hasn't) */
pv_mmu_ops.alloc_pt = xen_alloc_pt; pv_mmu_ops.alloc_pte = xen_alloc_pte;
pv_mmu_ops.alloc_pd = xen_alloc_pd; pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
pv_mmu_ops.release_pt = xen_release_pt; pv_mmu_ops.release_pte = xen_release_pte;
pv_mmu_ops.release_pd = xen_release_pd; pv_mmu_ops.release_pmd = xen_release_pmd;
pv_mmu_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pte = xen_set_pte;
setup_shared_info(); setup_shared_info();
...@@ -994,7 +1008,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { ...@@ -994,7 +1008,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
.read_pmc = native_read_pmc, .read_pmc = native_read_pmc,
.iret = xen_iret, .iret = xen_iret,
.irq_enable_syscall_ret = NULL, /* never called */ .irq_enable_syscall_ret = xen_sysexit,
.load_tr_desc = paravirt_nop, .load_tr_desc = paravirt_nop,
.set_ldt = xen_set_ldt, .set_ldt = xen_set_ldt,
...@@ -1059,11 +1073,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { ...@@ -1059,11 +1073,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
.pte_update = paravirt_nop, .pte_update = paravirt_nop,
.pte_update_defer = paravirt_nop, .pte_update_defer = paravirt_nop,
.alloc_pt = xen_alloc_pt_init, .alloc_pte = xen_alloc_pte_init,
.release_pt = xen_release_pt_init, .release_pte = xen_release_pte_init,
.alloc_pd = xen_alloc_pt_init, .alloc_pmd = xen_alloc_pte_init,
.alloc_pd_clone = paravirt_nop, .alloc_pmd_clone = paravirt_nop,
.release_pd = xen_release_pt_init, .release_pmd = xen_release_pte_init,
#ifdef CONFIG_HIGHPTE #ifdef CONFIG_HIGHPTE
.kmap_atomic_pte = xen_kmap_atomic_pte, .kmap_atomic_pte = xen_kmap_atomic_pte,
......
/******************************************************************************
* grant_table.c
* x86 specific part
*
* Granting foreign access to our memory reservation.
*
* Copyright (c) 2005-2006, Christopher Clark
* Copyright (c) 2004-2005, K A Fraser
* Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
* VA Linux Systems Japan. Split out x86 specific part.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <xen/interface/xen.h>
#include <xen/page.h>
#include <xen/grant_table.h>
#include <asm/pgtable.h>
static int map_pte_fn(pte_t *pte, struct page *pmd_page,
unsigned long addr, void *data)
{
unsigned long **frames = (unsigned long **)data;
set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
(*frames)++;
return 0;
}
static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
unsigned long addr, void *data)
{
set_pte_at(&init_mm, addr, pte, __pte(0));
return 0;
}
int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
unsigned long max_nr_gframes,
struct grant_entry **__shared)
{
int rc;
struct grant_entry *shared = *__shared;
if (shared == NULL) {
struct vm_struct *area =
xen_alloc_vm_area(PAGE_SIZE * max_nr_gframes);
BUG_ON(area == NULL);
shared = area->addr;
*__shared = shared;
}
rc = apply_to_page_range(&init_mm, (unsigned long)shared,
PAGE_SIZE * nr_gframes,
map_pte_fn, &frames);
return rc;
}
void arch_gnttab_unmap_shared(struct grant_entry *shared,
unsigned long nr_gframes)
{
apply_to_page_range(&init_mm, (unsigned long)shared,
PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
}
...@@ -156,6 +156,10 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) ...@@ -156,6 +156,10 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval) pte_t *ptep, pte_t pteval)
{ {
/* updates to init_mm may be done without lock */
if (mm == &init_mm)
preempt_disable();
if (mm == current->mm || mm == &init_mm) { if (mm == current->mm || mm == &init_mm) {
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
struct multicall_space mcs; struct multicall_space mcs;
...@@ -163,14 +167,61 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, ...@@ -163,14 +167,61 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
xen_mc_issue(PARAVIRT_LAZY_MMU); xen_mc_issue(PARAVIRT_LAZY_MMU);
return; goto out;
} else } else
if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0) if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
return; goto out;
} }
xen_set_pte(ptep, pteval); xen_set_pte(ptep, pteval);
out:
if (mm == &init_mm)
preempt_enable();
}
pteval_t xen_pte_val(pte_t pte)
{
pteval_t ret = pte.pte;
if (ret & _PAGE_PRESENT)
ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
return ret;
}
pgdval_t xen_pgd_val(pgd_t pgd)
{
pgdval_t ret = pgd.pgd;
if (ret & _PAGE_PRESENT)
ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
return ret;
}
pte_t xen_make_pte(pteval_t pte)
{
if (pte & _PAGE_PRESENT) {
pte = phys_to_machine(XPADDR(pte)).maddr;
pte &= ~(_PAGE_PCD | _PAGE_PWT);
}
return (pte_t){ .pte = pte };
} }
pgd_t xen_make_pgd(pgdval_t pgd)
{
if (pgd & _PAGE_PRESENT)
pgd = phys_to_machine(XPADDR(pgd)).maddr;
return (pgd_t){ pgd };
}
pmdval_t xen_pmd_val(pmd_t pmd)
{
pmdval_t ret = native_pmd_val(pmd);
if (ret & _PAGE_PRESENT)
ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
return ret;
}
#ifdef CONFIG_X86_PAE #ifdef CONFIG_X86_PAE
void xen_set_pud(pud_t *ptr, pud_t val) void xen_set_pud(pud_t *ptr, pud_t val)
{ {
...@@ -214,100 +265,18 @@ void xen_pmd_clear(pmd_t *pmdp) ...@@ -214,100 +265,18 @@ void xen_pmd_clear(pmd_t *pmdp)
xen_set_pmd(pmdp, __pmd(0)); xen_set_pmd(pmdp, __pmd(0));
} }
unsigned long long xen_pte_val(pte_t pte) pmd_t xen_make_pmd(pmdval_t pmd)
{ {
unsigned long long ret = 0; if (pmd & _PAGE_PRESENT)
if (pte.pte_low) {
ret = ((unsigned long long)pte.pte_high << 32) | pte.pte_low;
ret = machine_to_phys(XMADDR(ret)).paddr | 1;
}
return ret;
}
unsigned long long xen_pmd_val(pmd_t pmd)
{
unsigned long long ret = pmd.pmd;
if (ret)
ret = machine_to_phys(XMADDR(ret)).paddr | 1;
return ret;
}
unsigned long long xen_pgd_val(pgd_t pgd)
{
unsigned long long ret = pgd.pgd;
if (ret)
ret = machine_to_phys(XMADDR(ret)).paddr | 1;
return ret;
}
pte_t xen_make_pte(unsigned long long pte)
{
if (pte & _PAGE_PRESENT) {
pte = phys_to_machine(XPADDR(pte)).maddr;
pte &= ~(_PAGE_PCD | _PAGE_PWT);
}
return (pte_t){ .pte = pte };
}
pmd_t xen_make_pmd(unsigned long long pmd)
{
if (pmd & 1)
pmd = phys_to_machine(XPADDR(pmd)).maddr; pmd = phys_to_machine(XPADDR(pmd)).maddr;
return (pmd_t){ pmd }; return native_make_pmd(pmd);
}
pgd_t xen_make_pgd(unsigned long long pgd)
{
if (pgd & _PAGE_PRESENT)
pgd = phys_to_machine(XPADDR(pgd)).maddr;
return (pgd_t){ pgd };
} }
#else /* !PAE */ #else /* !PAE */
void xen_set_pte(pte_t *ptep, pte_t pte) void xen_set_pte(pte_t *ptep, pte_t pte)
{ {
*ptep = pte; *ptep = pte;
} }
unsigned long xen_pte_val(pte_t pte)
{
unsigned long ret = pte.pte_low;
if (ret & _PAGE_PRESENT)
ret = machine_to_phys(XMADDR(ret)).paddr;
return ret;
}
unsigned long xen_pgd_val(pgd_t pgd)
{
unsigned long ret = pgd.pgd;
if (ret)
ret = machine_to_phys(XMADDR(ret)).paddr | 1;
return ret;
}
pte_t xen_make_pte(unsigned long pte)
{
if (pte & _PAGE_PRESENT) {
pte = phys_to_machine(XPADDR(pte)).maddr;
pte &= ~(_PAGE_PCD | _PAGE_PWT);
}
return (pte_t){ pte };
}
pgd_t xen_make_pgd(unsigned long pgd)
{
if (pgd & _PAGE_PRESENT)
pgd = phys_to_machine(XPADDR(pgd)).maddr;
return (pgd_t){ pgd };
}
#endif /* CONFIG_X86_PAE */ #endif /* CONFIG_X86_PAE */
/* /*
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <asm/xen/hypervisor.h> #include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h> #include <asm/xen/hypercall.h>
#include <xen/interface/callback.h>
#include <xen/interface/physdev.h> #include <xen/interface/physdev.h>
#include <xen/features.h> #include <xen/features.h>
...@@ -68,6 +69,24 @@ static void __init fiddle_vdso(void) ...@@ -68,6 +69,24 @@ static void __init fiddle_vdso(void)
*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
} }
void xen_enable_sysenter(void)
{
int cpu = smp_processor_id();
extern void xen_sysenter_target(void);
/* Mask events on entry, even though they get enabled immediately */
static struct callback_register sysenter = {
.type = CALLBACKTYPE_sysenter,
.address = { __KERNEL_CS, (unsigned long)xen_sysenter_target },
.flags = CALLBACKF_mask_events,
};
if (!boot_cpu_has(X86_FEATURE_SEP) ||
HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) {
clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP);
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
}
}
void __init xen_arch_setup(void) void __init xen_arch_setup(void)
{ {
struct physdev_set_iopl set_iopl; struct physdev_set_iopl set_iopl;
...@@ -82,6 +101,8 @@ void __init xen_arch_setup(void) ...@@ -82,6 +101,8 @@ void __init xen_arch_setup(void)
HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback, HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback,
__KERNEL_CS, (unsigned long)xen_failsafe_callback); __KERNEL_CS, (unsigned long)xen_failsafe_callback);
xen_enable_sysenter();
set_iopl.iopl = 1; set_iopl.iopl = 1;
rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
if (rc != 0) if (rc != 0)
......
...@@ -36,8 +36,9 @@ ...@@ -36,8 +36,9 @@
#include "mmu.h" #include "mmu.h"
static cpumask_t xen_cpu_initialized_map; static cpumask_t xen_cpu_initialized_map;
static DEFINE_PER_CPU(int, resched_irq); static DEFINE_PER_CPU(int, resched_irq) = -1;
static DEFINE_PER_CPU(int, callfunc_irq); static DEFINE_PER_CPU(int, callfunc_irq) = -1;
static DEFINE_PER_CPU(int, debug_irq) = -1;
/* /*
* Structure and data for smp_call_function(). This is designed to minimise * Structure and data for smp_call_function(). This is designed to minimise
...@@ -72,6 +73,7 @@ static __cpuinit void cpu_bringup_and_idle(void) ...@@ -72,6 +73,7 @@ static __cpuinit void cpu_bringup_and_idle(void)
int cpu = smp_processor_id(); int cpu = smp_processor_id();
cpu_init(); cpu_init();
xen_enable_sysenter();
preempt_disable(); preempt_disable();
per_cpu(cpu_state, cpu) = CPU_ONLINE; per_cpu(cpu_state, cpu) = CPU_ONLINE;
...@@ -88,9 +90,7 @@ static __cpuinit void cpu_bringup_and_idle(void) ...@@ -88,9 +90,7 @@ static __cpuinit void cpu_bringup_and_idle(void)
static int xen_smp_intr_init(unsigned int cpu) static int xen_smp_intr_init(unsigned int cpu)
{ {
int rc; int rc;
const char *resched_name, *callfunc_name; const char *resched_name, *callfunc_name, *debug_name;
per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
...@@ -114,6 +114,14 @@ static int xen_smp_intr_init(unsigned int cpu) ...@@ -114,6 +114,14 @@ static int xen_smp_intr_init(unsigned int cpu)
goto fail; goto fail;
per_cpu(callfunc_irq, cpu) = rc; per_cpu(callfunc_irq, cpu) = rc;
debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING,
debug_name, NULL);
if (rc < 0)
goto fail;
per_cpu(debug_irq, cpu) = rc;
return 0; return 0;
fail: fail:
...@@ -121,6 +129,8 @@ static int xen_smp_intr_init(unsigned int cpu) ...@@ -121,6 +129,8 @@ static int xen_smp_intr_init(unsigned int cpu)
unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
if (per_cpu(callfunc_irq, cpu) >= 0) if (per_cpu(callfunc_irq, cpu) >= 0)
unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
if (per_cpu(debug_irq, cpu) >= 0)
unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
return rc; return rc;
} }
......
...@@ -107,6 +107,20 @@ ENDPATCH(xen_restore_fl_direct) ...@@ -107,6 +107,20 @@ ENDPATCH(xen_restore_fl_direct)
ENDPROC(xen_restore_fl_direct) ENDPROC(xen_restore_fl_direct)
RELOC(xen_restore_fl_direct, 2b+1) RELOC(xen_restore_fl_direct, 2b+1)
/*
We can't use sysexit directly, because we're not running in ring0.
But we can easily fake it up using iret. Assuming xen_sysexit
is jumped to with a standard stack frame, we can just strip it
back to a standard iret frame and use iret.
*/
ENTRY(xen_sysexit)
movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
lea PT_EIP(%esp), %esp
jmp xen_iret
ENDPROC(xen_sysexit)
/* /*
This is run where a normal iret would be run, with the same stack setup: This is run where a normal iret would be run, with the same stack setup:
8: eflags 8: eflags
...@@ -184,8 +198,12 @@ iret_restore_end: ...@@ -184,8 +198,12 @@ iret_restore_end:
region is OK. */ region is OK. */
je xen_hypervisor_callback je xen_hypervisor_callback
iret 1: iret
xen_iret_end_crit: xen_iret_end_crit:
.section __ex_table,"a"
.align 4
.long 1b,iret_exc
.previous
hyper_iret: hyper_iret:
/* put this out of line since its very rarely used */ /* put this out of line since its very rarely used */
...@@ -219,9 +237,7 @@ hyper_iret: ...@@ -219,9 +237,7 @@ hyper_iret:
ds } SAVE_ALL state ds } SAVE_ALL state
eax } eax }
: : : :
ebx } ebx }<- esp
----------------
return addr <- esp
---------------- ----------------
In order to deliver the nested exception properly, we need to shift In order to deliver the nested exception properly, we need to shift
...@@ -236,10 +252,8 @@ hyper_iret: ...@@ -236,10 +252,8 @@ hyper_iret:
it's usermode state which we eventually need to restore. it's usermode state which we eventually need to restore.
*/ */
ENTRY(xen_iret_crit_fixup) ENTRY(xen_iret_crit_fixup)
/* offsets +4 for return address */
/* /*
Paranoia: Make sure we're really coming from userspace. Paranoia: Make sure we're really coming from kernel space.
One could imagine a case where userspace jumps into the One could imagine a case where userspace jumps into the
critical range address, but just before the CPU delivers a GP, critical range address, but just before the CPU delivers a GP,
it decides to deliver an interrupt instead. Unlikely? it decides to deliver an interrupt instead. Unlikely?
...@@ -248,32 +262,32 @@ ENTRY(xen_iret_crit_fixup) ...@@ -248,32 +262,32 @@ ENTRY(xen_iret_crit_fixup)
jump instruction itself, not the destination, but some virtual jump instruction itself, not the destination, but some virtual
environments get this wrong. environments get this wrong.
*/ */
movl PT_CS+4(%esp), %ecx movl PT_CS(%esp), %ecx
andl $SEGMENT_RPL_MASK, %ecx andl $SEGMENT_RPL_MASK, %ecx
cmpl $USER_RPL, %ecx cmpl $USER_RPL, %ecx
je 2f je 2f
lea PT_ORIG_EAX+4(%esp), %esi lea PT_ORIG_EAX(%esp), %esi
lea PT_EFLAGS+4(%esp), %edi lea PT_EFLAGS(%esp), %edi
/* If eip is before iret_restore_end then stack /* If eip is before iret_restore_end then stack
hasn't been restored yet. */ hasn't been restored yet. */
cmp $iret_restore_end, %eax cmp $iret_restore_end, %eax
jae 1f jae 1f
movl 0+4(%edi),%eax /* copy EAX */ movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */
movl %eax, PT_EAX+4(%esp) movl %eax, PT_EAX(%esp)
lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */
/* set up the copy */ /* set up the copy */
1: std 1: std
mov $(PT_EIP+4) / 4, %ecx /* copy ret+saved regs up to orig_eax */ mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */
rep movsl rep movsl
cld cld
lea 4(%edi),%esp /* point esp to new frame */ lea 4(%edi),%esp /* point esp to new frame */
2: ret 2: jmp xen_do_upcall
/* /*
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
#define XEN_OPS_H #define XEN_OPS_H
#include <linux/init.h> #include <linux/init.h>
#include <linux/irqreturn.h>
#include <xen/xen-ops.h>
/* These are code, but not functions. Defined in entry.S */ /* These are code, but not functions. Defined in entry.S */
extern const char xen_hypervisor_callback[]; extern const char xen_hypervisor_callback[];
...@@ -9,7 +11,6 @@ extern const char xen_failsafe_callback[]; ...@@ -9,7 +11,6 @@ extern const char xen_failsafe_callback[];
void xen_copy_trap_info(struct trap_info *traps); void xen_copy_trap_info(struct trap_info *traps);
DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
DECLARE_PER_CPU(unsigned long, xen_cr3); DECLARE_PER_CPU(unsigned long, xen_cr3);
DECLARE_PER_CPU(unsigned long, xen_current_cr3); DECLARE_PER_CPU(unsigned long, xen_current_cr3);
...@@ -19,6 +20,7 @@ extern struct shared_info *HYPERVISOR_shared_info; ...@@ -19,6 +20,7 @@ extern struct shared_info *HYPERVISOR_shared_info;
char * __init xen_memory_setup(void); char * __init xen_memory_setup(void);
void __init xen_arch_setup(void); void __init xen_arch_setup(void);
void __init xen_init_IRQ(void); void __init xen_init_IRQ(void);
void xen_enable_sysenter(void);
void xen_setup_timer(int cpu); void xen_setup_timer(int cpu);
void xen_setup_cpu_clockevents(void); void xen_setup_cpu_clockevents(void);
...@@ -28,6 +30,8 @@ unsigned long xen_get_wallclock(void); ...@@ -28,6 +30,8 @@ unsigned long xen_get_wallclock(void);
int xen_set_wallclock(unsigned long time); int xen_set_wallclock(unsigned long time);
unsigned long long xen_sched_clock(void); unsigned long long xen_sched_clock(void);
irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
bool xen_vcpu_stolen(int vcpu); bool xen_vcpu_stolen(int vcpu);
void xen_mark_init_mm_pinned(void); void xen_mark_init_mm_pinned(void);
...@@ -64,4 +68,6 @@ DECL_ASM(unsigned long, xen_save_fl_direct, void); ...@@ -64,4 +68,6 @@ DECL_ASM(unsigned long, xen_save_fl_direct, void);
DECL_ASM(void, xen_restore_fl_direct, unsigned long); DECL_ASM(void, xen_restore_fl_direct, unsigned long);
void xen_iret(void); void xen_iret(void);
void xen_sysexit(void);
#endif /* XEN_OPS_H */ #endif /* XEN_OPS_H */
...@@ -97,4 +97,6 @@ source "drivers/dca/Kconfig" ...@@ -97,4 +97,6 @@ source "drivers/dca/Kconfig"
source "drivers/auxdisplay/Kconfig" source "drivers/auxdisplay/Kconfig"
source "drivers/uio/Kconfig" source "drivers/uio/Kconfig"
source "drivers/xen/Kconfig"
endmenu endmenu
...@@ -47,6 +47,7 @@ ...@@ -47,6 +47,7 @@
#include <xen/interface/grant_table.h> #include <xen/interface/grant_table.h>
#include <xen/interface/io/blkif.h> #include <xen/interface/io/blkif.h>
#include <xen/interface/io/protocols.h>
#include <asm/xen/hypervisor.h> #include <asm/xen/hypervisor.h>
...@@ -74,7 +75,6 @@ static struct block_device_operations xlvbd_block_fops; ...@@ -74,7 +75,6 @@ static struct block_device_operations xlvbd_block_fops;
struct blkfront_info struct blkfront_info
{ {
struct xenbus_device *xbdev; struct xenbus_device *xbdev;
dev_t dev;
struct gendisk *gd; struct gendisk *gd;
int vdevice; int vdevice;
blkif_vdev_t handle; blkif_vdev_t handle;
...@@ -88,6 +88,7 @@ struct blkfront_info ...@@ -88,6 +88,7 @@ struct blkfront_info
struct blk_shadow shadow[BLK_RING_SIZE]; struct blk_shadow shadow[BLK_RING_SIZE];
unsigned long shadow_free; unsigned long shadow_free;
int feature_barrier; int feature_barrier;
int is_ready;
/** /**
* The number of people holding this device open. We won't allow a * The number of people holding this device open. We won't allow a
...@@ -614,6 +615,12 @@ static int talk_to_backend(struct xenbus_device *dev, ...@@ -614,6 +615,12 @@ static int talk_to_backend(struct xenbus_device *dev,
message = "writing event-channel"; message = "writing event-channel";
goto abort_transaction; goto abort_transaction;
} }
err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
XEN_IO_PROTO_ABI_NATIVE);
if (err) {
message = "writing protocol";
goto abort_transaction;
}
err = xenbus_transaction_end(xbt, 0); err = xenbus_transaction_end(xbt, 0);
if (err) { if (err) {
...@@ -833,6 +840,8 @@ static void blkfront_connect(struct blkfront_info *info) ...@@ -833,6 +840,8 @@ static void blkfront_connect(struct blkfront_info *info)
spin_unlock_irq(&blkif_io_lock); spin_unlock_irq(&blkif_io_lock);
add_disk(info->gd); add_disk(info->gd);
info->is_ready = 1;
} }
/** /**
...@@ -896,7 +905,7 @@ static void backend_changed(struct xenbus_device *dev, ...@@ -896,7 +905,7 @@ static void backend_changed(struct xenbus_device *dev,
break; break;
case XenbusStateClosing: case XenbusStateClosing:
bd = bdget(info->dev); bd = bdget_disk(info->gd, 0);
if (bd == NULL) if (bd == NULL)
xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
...@@ -925,6 +934,13 @@ static int blkfront_remove(struct xenbus_device *dev) ...@@ -925,6 +934,13 @@ static int blkfront_remove(struct xenbus_device *dev)
return 0; return 0;
} }
static int blkfront_is_ready(struct xenbus_device *dev)
{
struct blkfront_info *info = dev->dev.driver_data;
return info->is_ready;
}
static int blkif_open(struct inode *inode, struct file *filep) static int blkif_open(struct inode *inode, struct file *filep)
{ {
struct blkfront_info *info = inode->i_bdev->bd_disk->private_data; struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
...@@ -971,6 +987,7 @@ static struct xenbus_driver blkfront = { ...@@ -971,6 +987,7 @@ static struct xenbus_driver blkfront = {
.remove = blkfront_remove, .remove = blkfront_remove,
.resume = blkfront_resume, .resume = blkfront_resume,
.otherend_changed = backend_changed, .otherend_changed = backend_changed,
.is_ready = blkfront_is_ready,
}; };
static int __init xlblk_init(void) static int __init xlblk_init(void)
...@@ -998,3 +1015,5 @@ module_exit(xlblk_exit); ...@@ -998,3 +1015,5 @@ module_exit(xlblk_exit);
MODULE_DESCRIPTION("Xen virtual block device frontend"); MODULE_DESCRIPTION("Xen virtual block device frontend");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);
MODULE_ALIAS("xen:vbd");
MODULE_ALIAS("xenblk");
...@@ -149,6 +149,15 @@ config INPUT_APMPOWER ...@@ -149,6 +149,15 @@ config INPUT_APMPOWER
To compile this driver as a module, choose M here: the To compile this driver as a module, choose M here: the
module will be called apm-power. module will be called apm-power.
config XEN_KBDDEV_FRONTEND
tristate "Xen virtual keyboard and mouse support"
depends on XEN_FBDEV_FRONTEND
default y
help
This driver implements the front-end of the Xen virtual
keyboard and mouse device driver. It communicates with a back-end
in another domain.
comment "Input Device Drivers" comment "Input Device Drivers"
source "drivers/input/keyboard/Kconfig" source "drivers/input/keyboard/Kconfig"
......
...@@ -23,3 +23,5 @@ obj-$(CONFIG_INPUT_TOUCHSCREEN) += touchscreen/ ...@@ -23,3 +23,5 @@ obj-$(CONFIG_INPUT_TOUCHSCREEN) += touchscreen/
obj-$(CONFIG_INPUT_MISC) += misc/ obj-$(CONFIG_INPUT_MISC) += misc/
obj-$(CONFIG_INPUT_APMPOWER) += apm-power.o obj-$(CONFIG_INPUT_APMPOWER) += apm-power.o
obj-$(CONFIG_XEN_KBDDEV_FRONTEND) += xen-kbdfront.o
/*
* Xen para-virtual input device
*
* Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
* Copyright (C) 2006-2008 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
*
* Based on linux/drivers/input/mouse/sermouse.c
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of this archive for
* more details.
*/
/*
* TODO:
*
* Switch to grant tables together with xen-fbfront.c.
*/
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/input.h>
#include <asm/xen/hypervisor.h>
#include <xen/events.h>
#include <xen/page.h>
#include <xen/interface/io/fbif.h>
#include <xen/interface/io/kbdif.h>
#include <xen/xenbus.h>
struct xenkbd_info {
struct input_dev *kbd;
struct input_dev *ptr;
struct xenkbd_page *page;
int irq;
struct xenbus_device *xbdev;
char phys[32];
};
static int xenkbd_remove(struct xenbus_device *);
static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info *);
static void xenkbd_disconnect_backend(struct xenkbd_info *);
/*
* Note: if you need to send out events, see xenfb_do_update() for how
* to do that.
*/
static irqreturn_t input_handler(int rq, void *dev_id)
{
struct xenkbd_info *info = dev_id;
struct xenkbd_page *page = info->page;
__u32 cons, prod;
prod = page->in_prod;
if (prod == page->in_cons)
return IRQ_HANDLED;
rmb(); /* ensure we see ring contents up to prod */
for (cons = page->in_cons; cons != prod; cons++) {
union xenkbd_in_event *event;
struct input_dev *dev;
event = &XENKBD_IN_RING_REF(page, cons);
dev = info->ptr;
switch (event->type) {
case XENKBD_TYPE_MOTION:
input_report_rel(dev, REL_X, event->motion.rel_x);
input_report_rel(dev, REL_Y, event->motion.rel_y);
break;
case XENKBD_TYPE_KEY:
dev = NULL;
if (test_bit(event->key.keycode, info->kbd->keybit))
dev = info->kbd;
if (test_bit(event->key.keycode, info->ptr->keybit))
dev = info->ptr;
if (dev)
input_report_key(dev, event->key.keycode,
event->key.pressed);
else
printk(KERN_WARNING
"xenkbd: unhandled keycode 0x%x\n",
event->key.keycode);
break;
case XENKBD_TYPE_POS:
input_report_abs(dev, ABS_X, event->pos.abs_x);
input_report_abs(dev, ABS_Y, event->pos.abs_y);
break;
}
if (dev)
input_sync(dev);
}
mb(); /* ensure we got ring contents */
page->in_cons = cons;
notify_remote_via_irq(info->irq);
return IRQ_HANDLED;
}
static int __devinit xenkbd_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
{
int ret, i;
struct xenkbd_info *info;
struct input_dev *kbd, *ptr;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info) {
xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
return -ENOMEM;
}
dev->dev.driver_data = info;
info->xbdev = dev;
info->irq = -1;
snprintf(info->phys, sizeof(info->phys), "xenbus/%s", dev->nodename);
info->page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
if (!info->page)
goto error_nomem;
/* keyboard */
kbd = input_allocate_device();
if (!kbd)
goto error_nomem;
kbd->name = "Xen Virtual Keyboard";
kbd->phys = info->phys;
kbd->id.bustype = BUS_PCI;
kbd->id.vendor = 0x5853;
kbd->id.product = 0xffff;
kbd->evbit[0] = BIT(EV_KEY);
for (i = KEY_ESC; i < KEY_UNKNOWN; i++)
set_bit(i, kbd->keybit);
for (i = KEY_OK; i < KEY_MAX; i++)
set_bit(i, kbd->keybit);
ret = input_register_device(kbd);
if (ret) {
input_free_device(kbd);
xenbus_dev_fatal(dev, ret, "input_register_device(kbd)");
goto error;
}
info->kbd = kbd;
/* pointing device */
ptr = input_allocate_device();
if (!ptr)
goto error_nomem;
ptr->name = "Xen Virtual Pointer";
ptr->phys = info->phys;
ptr->id.bustype = BUS_PCI;
ptr->id.vendor = 0x5853;
ptr->id.product = 0xfffe;
ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS);
for (i = BTN_LEFT; i <= BTN_TASK; i++)
set_bit(i, ptr->keybit);
ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y);
input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0);
input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0);
ret = input_register_device(ptr);
if (ret) {
input_free_device(ptr);
xenbus_dev_fatal(dev, ret, "input_register_device(ptr)");
goto error;
}
info->ptr = ptr;
ret = xenkbd_connect_backend(dev, info);
if (ret < 0)
goto error;
return 0;
error_nomem:
ret = -ENOMEM;
xenbus_dev_fatal(dev, ret, "allocating device memory");
error:
xenkbd_remove(dev);
return ret;
}
static int xenkbd_resume(struct xenbus_device *dev)
{
struct xenkbd_info *info = dev->dev.driver_data;
xenkbd_disconnect_backend(info);
memset(info->page, 0, PAGE_SIZE);
return xenkbd_connect_backend(dev, info);
}
static int xenkbd_remove(struct xenbus_device *dev)
{
struct xenkbd_info *info = dev->dev.driver_data;
xenkbd_disconnect_backend(info);
if (info->kbd)
input_unregister_device(info->kbd);
if (info->ptr)
input_unregister_device(info->ptr);
free_page((unsigned long)info->page);
kfree(info);
return 0;
}
static int xenkbd_connect_backend(struct xenbus_device *dev,
struct xenkbd_info *info)
{
int ret, evtchn;
struct xenbus_transaction xbt;
ret = xenbus_alloc_evtchn(dev, &evtchn);
if (ret)
return ret;
ret = bind_evtchn_to_irqhandler(evtchn, input_handler,
0, dev->devicetype, info);
if (ret < 0) {
xenbus_free_evtchn(dev, evtchn);
xenbus_dev_fatal(dev, ret, "bind_evtchn_to_irqhandler");
return ret;
}
info->irq = ret;
again:
ret = xenbus_transaction_start(&xbt);
if (ret) {
xenbus_dev_fatal(dev, ret, "starting transaction");
return ret;
}
ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu",
virt_to_mfn(info->page));
if (ret)
goto error_xenbus;
ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
evtchn);
if (ret)
goto error_xenbus;
ret = xenbus_transaction_end(xbt, 0);
if (ret) {
if (ret == -EAGAIN)
goto again;
xenbus_dev_fatal(dev, ret, "completing transaction");
return ret;
}
xenbus_switch_state(dev, XenbusStateInitialised);
return 0;
error_xenbus:
xenbus_transaction_end(xbt, 1);
xenbus_dev_fatal(dev, ret, "writing xenstore");
return ret;
}
static void xenkbd_disconnect_backend(struct xenkbd_info *info)
{
if (info->irq >= 0)
unbind_from_irqhandler(info->irq, info);
info->irq = -1;
}
static void xenkbd_backend_changed(struct xenbus_device *dev,
enum xenbus_state backend_state)
{
struct xenkbd_info *info = dev->dev.driver_data;
int ret, val;
switch (backend_state) {
case XenbusStateInitialising:
case XenbusStateInitialised:
case XenbusStateUnknown:
case XenbusStateClosed:
break;
case XenbusStateInitWait:
InitWait:
ret = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
"feature-abs-pointer", "%d", &val);
if (ret < 0)
val = 0;
if (val) {
ret = xenbus_printf(XBT_NIL, info->xbdev->nodename,
"request-abs-pointer", "1");
if (ret)
printk(KERN_WARNING
"xenkbd: can't request abs-pointer");
}
xenbus_switch_state(dev, XenbusStateConnected);
break;
case XenbusStateConnected:
/*
* Work around xenbus race condition: If backend goes
* through InitWait to Connected fast enough, we can
* get Connected twice here.
*/
if (dev->state != XenbusStateConnected)
goto InitWait; /* no InitWait seen yet, fudge it */
break;
case XenbusStateClosing:
xenbus_frontend_closed(dev);
break;
}
}
static struct xenbus_device_id xenkbd_ids[] = {
{ "vkbd" },
{ "" }
};
static struct xenbus_driver xenkbd = {
.name = "vkbd",
.owner = THIS_MODULE,
.ids = xenkbd_ids,
.probe = xenkbd_probe,
.remove = xenkbd_remove,
.resume = xenkbd_resume,
.otherend_changed = xenkbd_backend_changed,
};
static int __init xenkbd_init(void)
{
if (!is_running_on_xen())
return -ENODEV;
/* Nothing to do if running in dom0. */
if (is_initial_xendomain())
return -ENODEV;
return xenbus_register_frontend(&xenkbd);
}
static void __exit xenkbd_cleanup(void)
{
xenbus_unregister_driver(&xenkbd);
}
module_init(xenkbd_init);
module_exit(xenkbd_cleanup);
MODULE_LICENSE("GPL");
...@@ -1809,3 +1809,5 @@ module_exit(netif_exit); ...@@ -1809,3 +1809,5 @@ module_exit(netif_exit);
MODULE_DESCRIPTION("Xen virtual network device frontend"); MODULE_DESCRIPTION("Xen virtual network device frontend");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_ALIAS("xen:vif");
MODULE_ALIAS("xennet");
...@@ -1930,6 +1930,20 @@ config FB_VIRTUAL ...@@ -1930,6 +1930,20 @@ config FB_VIRTUAL
If unsure, say N. If unsure, say N.
config XEN_FBDEV_FRONTEND
tristate "Xen virtual frame buffer support"
depends on FB && XEN
select FB_SYS_FILLRECT
select FB_SYS_COPYAREA
select FB_SYS_IMAGEBLIT
select FB_SYS_FOPS
select FB_DEFERRED_IO
default y
help
This driver implements the front-end of the Xen virtual
frame buffer driver. It communicates with a back-end
in another domain.
source "drivers/video/omap/Kconfig" source "drivers/video/omap/Kconfig"
source "drivers/video/backlight/Kconfig" source "drivers/video/backlight/Kconfig"
......
...@@ -114,6 +114,7 @@ obj-$(CONFIG_FB_PS3) += ps3fb.o ...@@ -114,6 +114,7 @@ obj-$(CONFIG_FB_PS3) += ps3fb.o
obj-$(CONFIG_FB_SM501) += sm501fb.o obj-$(CONFIG_FB_SM501) += sm501fb.o
obj-$(CONFIG_FB_XILINX) += xilinxfb.o obj-$(CONFIG_FB_XILINX) += xilinxfb.o
obj-$(CONFIG_FB_OMAP) += omap/ obj-$(CONFIG_FB_OMAP) += omap/
obj-$(CONFIG_XEN_FBDEV_FRONTEND) += xen-fbfront.o
# Platform or fallback drivers go here # Platform or fallback drivers go here
obj-$(CONFIG_FB_UVESA) += uvesafb.o obj-$(CONFIG_FB_UVESA) += uvesafb.o
......
This diff is collapsed.
config XEN_BALLOON
bool "Xen memory balloon driver"
depends on XEN
default y
help
The balloon driver allows the Xen domain to request more memory from
the system to expand the domain's memory allocation, or alternatively
return unneeded memory to the system.
config XEN_SCRUB_PAGES
bool "Scrub pages before returning them to system"
depends on XEN_BALLOON
default y
help
Scrub pages before returning them to the system for reuse by
other domains. This makes sure that any confidential data
is not accidentally visible to other domains. Is it more
secure, but slightly less efficient.
If in doubt, say yes.
obj-y += grant-table.o obj-y += grant-table.o features.o events.o
obj-y += xenbus/ obj-y += xenbus/
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += balloon.o
This diff is collapsed.
...@@ -33,12 +33,11 @@ ...@@ -33,12 +33,11 @@
#include <asm/xen/hypercall.h> #include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h> #include <asm/xen/hypervisor.h>
#include <xen/xen-ops.h>
#include <xen/events.h> #include <xen/events.h>
#include <xen/interface/xen.h> #include <xen/interface/xen.h>
#include <xen/interface/event_channel.h> #include <xen/interface/event_channel.h>
#include "xen-ops.h"
/* /*
* This lock protects updates to the following mapping and reference-count * This lock protects updates to the following mapping and reference-count
* arrays. The lock does not need to be acquired to read the mapping tables. * arrays. The lock does not need to be acquired to read the mapping tables.
...@@ -455,6 +454,53 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) ...@@ -455,6 +454,53 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
notify_remote_via_irq(irq); notify_remote_via_irq(irq);
} }
irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
{
struct shared_info *sh = HYPERVISOR_shared_info;
int cpu = smp_processor_id();
int i;
unsigned long flags;
static DEFINE_SPINLOCK(debug_lock);
spin_lock_irqsave(&debug_lock, flags);
printk("vcpu %d\n ", cpu);
for_each_online_cpu(i) {
struct vcpu_info *v = per_cpu(xen_vcpu, i);
printk("%d: masked=%d pending=%d event_sel %08lx\n ", i,
(get_irq_regs() && i == cpu) ? xen_irqs_disabled(get_irq_regs()) : v->evtchn_upcall_mask,
v->evtchn_upcall_pending,
v->evtchn_pending_sel);
}
printk("pending:\n ");
for(i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
printk("%08lx%s", sh->evtchn_pending[i],
i % 8 == 0 ? "\n " : " ");
printk("\nmasks:\n ");
for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
printk("%08lx%s", sh->evtchn_mask[i],
i % 8 == 0 ? "\n " : " ");
printk("\nunmasked:\n ");
for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
printk("%08lx%s", sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
i % 8 == 0 ? "\n " : " ");
printk("\npending list:\n");
for(i = 0; i < NR_EVENT_CHANNELS; i++) {
if (sync_test_bit(i, sh->evtchn_pending)) {
printk(" %d: event %d -> irq %d\n",
cpu_evtchn[i], i,
evtchn_to_irq[i]);
}
}
spin_unlock_irqrestore(&debug_lock, flags);
return IRQ_HANDLED;
}
/* /*
* Search the CPUs pending events bitmasks. For each one found, map * Search the CPUs pending events bitmasks. For each one found, map
...@@ -470,29 +516,44 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) ...@@ -470,29 +516,44 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
int cpu = get_cpu(); int cpu = get_cpu();
struct shared_info *s = HYPERVISOR_shared_info; struct shared_info *s = HYPERVISOR_shared_info;
struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
unsigned long pending_words; static DEFINE_PER_CPU(unsigned, nesting_count);
unsigned count;
vcpu_info->evtchn_upcall_pending = 0; do {
unsigned long pending_words;
/* NB. No need for a barrier here -- XCHG is a barrier on x86. */ vcpu_info->evtchn_upcall_pending = 0;
pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
while (pending_words != 0) {
unsigned long pending_bits;
int word_idx = __ffs(pending_words);
pending_words &= ~(1UL << word_idx);
while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) { if (__get_cpu_var(nesting_count)++)
int bit_idx = __ffs(pending_bits); goto out;
int port = (word_idx * BITS_PER_LONG) + bit_idx;
int irq = evtchn_to_irq[port];
if (irq != -1) { #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
regs->orig_ax = ~irq; /* Clear master flag /before/ clearing selector flag. */
do_IRQ(regs); rmb();
#endif
pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
while (pending_words != 0) {
unsigned long pending_bits;
int word_idx = __ffs(pending_words);
pending_words &= ~(1UL << word_idx);
while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
int bit_idx = __ffs(pending_bits);
int port = (word_idx * BITS_PER_LONG) + bit_idx;
int irq = evtchn_to_irq[port];
if (irq != -1)
xen_do_IRQ(irq, regs);
} }
} }
}
BUG_ON(!irqs_disabled());
count = __get_cpu_var(nesting_count);
__get_cpu_var(nesting_count) = 0;
} while(count != 1);
out:
put_cpu(); put_cpu();
} }
...@@ -525,6 +586,22 @@ static void set_affinity_irq(unsigned irq, cpumask_t dest) ...@@ -525,6 +586,22 @@ static void set_affinity_irq(unsigned irq, cpumask_t dest)
rebind_irq_to_cpu(irq, tcpu); rebind_irq_to_cpu(irq, tcpu);
} }
int resend_irq_on_evtchn(unsigned int irq)
{
int masked, evtchn = evtchn_from_irq(irq);
struct shared_info *s = HYPERVISOR_shared_info;
if (!VALID_EVTCHN(evtchn))
return 1;
masked = sync_test_and_set_bit(evtchn, s->evtchn_mask);
sync_set_bit(evtchn, s->evtchn_pending);
if (!masked)
unmask_evtchn(evtchn);
return 1;
}
static void enable_dynirq(unsigned int irq) static void enable_dynirq(unsigned int irq)
{ {
int evtchn = evtchn_from_irq(irq); int evtchn = evtchn_from_irq(irq);
...@@ -554,10 +631,16 @@ static void ack_dynirq(unsigned int irq) ...@@ -554,10 +631,16 @@ static void ack_dynirq(unsigned int irq)
static int retrigger_dynirq(unsigned int irq) static int retrigger_dynirq(unsigned int irq)
{ {
int evtchn = evtchn_from_irq(irq); int evtchn = evtchn_from_irq(irq);
struct shared_info *sh = HYPERVISOR_shared_info;
int ret = 0; int ret = 0;
if (VALID_EVTCHN(evtchn)) { if (VALID_EVTCHN(evtchn)) {
set_evtchn(evtchn); int masked;
masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask);
sync_set_bit(evtchn, sh->evtchn_pending);
if (!masked)
unmask_evtchn(evtchn);
ret = 1; ret = 1;
} }
......
...@@ -439,24 +439,6 @@ static inline unsigned int max_nr_grant_frames(void) ...@@ -439,24 +439,6 @@ static inline unsigned int max_nr_grant_frames(void)
return xen_max; return xen_max;
} }
static int map_pte_fn(pte_t *pte, struct page *pmd_page,
unsigned long addr, void *data)
{
unsigned long **frames = (unsigned long **)data;
set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
(*frames)++;
return 0;
}
static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
unsigned long addr, void *data)
{
set_pte_at(&init_mm, addr, pte, __pte(0));
return 0;
}
static int gnttab_map(unsigned int start_idx, unsigned int end_idx) static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
{ {
struct gnttab_setup_table setup; struct gnttab_setup_table setup;
...@@ -470,7 +452,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) ...@@ -470,7 +452,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
setup.dom = DOMID_SELF; setup.dom = DOMID_SELF;
setup.nr_frames = nr_gframes; setup.nr_frames = nr_gframes;
setup.frame_list = frames; set_xen_guest_handle(setup.frame_list, frames);
rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
if (rc == -ENOSYS) { if (rc == -ENOSYS) {
...@@ -480,17 +462,9 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) ...@@ -480,17 +462,9 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
BUG_ON(rc || setup.status); BUG_ON(rc || setup.status);
if (shared == NULL) { rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(),
struct vm_struct *area; &shared);
area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
BUG_ON(area == NULL);
shared = area->addr;
}
rc = apply_to_page_range(&init_mm, (unsigned long)shared,
PAGE_SIZE * nr_gframes,
map_pte_fn, &frames);
BUG_ON(rc); BUG_ON(rc);
frames -= nr_gframes; /* adjust after map_pte_fn() */
kfree(frames); kfree(frames);
...@@ -506,10 +480,7 @@ static int gnttab_resume(void) ...@@ -506,10 +480,7 @@ static int gnttab_resume(void)
static int gnttab_suspend(void) static int gnttab_suspend(void)
{ {
apply_to_page_range(&init_mm, (unsigned long)shared, arch_gnttab_unmap_shared(shared, nr_grant_frames);
PAGE_SIZE * nr_grant_frames,
unmap_pte_fn, NULL);
return 0; return 0;
} }
......
...@@ -399,7 +399,7 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) ...@@ -399,7 +399,7 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
*vaddr = NULL; *vaddr = NULL;
area = alloc_vm_area(PAGE_SIZE); area = xen_alloc_vm_area(PAGE_SIZE);
if (!area) if (!area)
return -ENOMEM; return -ENOMEM;
...@@ -409,7 +409,7 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) ...@@ -409,7 +409,7 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
BUG(); BUG();
if (op.status != GNTST_okay) { if (op.status != GNTST_okay) {
free_vm_area(area); xen_free_vm_area(area);
xenbus_dev_fatal(dev, op.status, xenbus_dev_fatal(dev, op.status,
"mapping in shared page %d from domain %d", "mapping in shared page %d from domain %d",
gnt_ref, dev->otherend_id); gnt_ref, dev->otherend_id);
...@@ -508,7 +508,7 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) ...@@ -508,7 +508,7 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
BUG(); BUG();
if (op.status == GNTST_okay) if (op.status == GNTST_okay)
free_vm_area(area); xen_free_vm_area(area);
else else
xenbus_dev_error(dev, op.status, xenbus_dev_error(dev, op.status,
"unmapping page at handle %d error %d", "unmapping page at handle %d error %d",
......
...@@ -88,6 +88,16 @@ int xenbus_match(struct device *_dev, struct device_driver *_drv) ...@@ -88,6 +88,16 @@ int xenbus_match(struct device *_dev, struct device_driver *_drv)
return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
} }
static int xenbus_uevent(struct device *_dev, struct kobj_uevent_env *env)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype))
return -ENOMEM;
return 0;
}
/* device/<type>/<id> => <type>-<id> */ /* device/<type>/<id> => <type>-<id> */
static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename) static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
{ {
...@@ -166,6 +176,7 @@ static struct xen_bus_type xenbus_frontend = { ...@@ -166,6 +176,7 @@ static struct xen_bus_type xenbus_frontend = {
.bus = { .bus = {
.name = "xen", .name = "xen",
.match = xenbus_match, .match = xenbus_match,
.uevent = xenbus_uevent,
.probe = xenbus_dev_probe, .probe = xenbus_dev_probe,
.remove = xenbus_dev_remove, .remove = xenbus_dev_remove,
.shutdown = xenbus_dev_shutdown, .shutdown = xenbus_dev_shutdown,
...@@ -438,6 +449,12 @@ static ssize_t xendev_show_devtype(struct device *dev, ...@@ -438,6 +449,12 @@ static ssize_t xendev_show_devtype(struct device *dev,
} }
DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
static ssize_t xendev_show_modalias(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype);
}
DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
int xenbus_probe_node(struct xen_bus_type *bus, int xenbus_probe_node(struct xen_bus_type *bus,
const char *type, const char *type,
...@@ -492,10 +509,16 @@ int xenbus_probe_node(struct xen_bus_type *bus, ...@@ -492,10 +509,16 @@ int xenbus_probe_node(struct xen_bus_type *bus,
err = device_create_file(&xendev->dev, &dev_attr_devtype); err = device_create_file(&xendev->dev, &dev_attr_devtype);
if (err) if (err)
goto fail_remove_file; goto fail_remove_nodename;
err = device_create_file(&xendev->dev, &dev_attr_modalias);
if (err)
goto fail_remove_devtype;
return 0; return 0;
fail_remove_file: fail_remove_devtype:
device_remove_file(&xendev->dev, &dev_attr_devtype);
fail_remove_nodename:
device_remove_file(&xendev->dev, &dev_attr_nodename); device_remove_file(&xendev->dev, &dev_attr_nodename);
fail_unregister: fail_unregister:
device_unregister(&xendev->dev); device_unregister(&xendev->dev);
...@@ -846,6 +869,7 @@ static int is_disconnected_device(struct device *dev, void *data) ...@@ -846,6 +869,7 @@ static int is_disconnected_device(struct device *dev, void *data)
{ {
struct xenbus_device *xendev = to_xenbus_device(dev); struct xenbus_device *xendev = to_xenbus_device(dev);
struct device_driver *drv = data; struct device_driver *drv = data;
struct xenbus_driver *xendrv;
/* /*
* A device with no driver will never connect. We care only about * A device with no driver will never connect. We care only about
...@@ -858,7 +882,9 @@ static int is_disconnected_device(struct device *dev, void *data) ...@@ -858,7 +882,9 @@ static int is_disconnected_device(struct device *dev, void *data)
if (drv && (dev->driver != drv)) if (drv && (dev->driver != drv))
return 0; return 0;
return (xendev->state != XenbusStateConnected); xendrv = to_xenbus_driver(dev->driver);
return (xendev->state != XenbusStateConnected ||
(xendrv->is_ready && !xendrv->is_ready(xendev)));
} }
static int exists_disconnected_device(struct device_driver *drv) static int exists_disconnected_device(struct device_driver *drv)
......
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Copyright (C) IBM Corp. 2006
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
#include <linux/gfp.h>
#include <linux/mm.h>
#include <asm/page.h>
#include <xen/xencomm.h>
#include <xen/interface/xen.h>
#ifdef __ia64__
#include <asm/xen/xencomm.h> /* for is_kern_addr() */
#endif
#ifdef HAVE_XEN_PLATFORM_COMPAT_H
#include <xen/platform-compat.h>
#endif
static int xencomm_init(struct xencomm_desc *desc,
void *buffer, unsigned long bytes)
{
unsigned long recorded = 0;
int i = 0;
while ((recorded < bytes) && (i < desc->nr_addrs)) {
unsigned long vaddr = (unsigned long)buffer + recorded;
unsigned long paddr;
int offset;
int chunksz;
offset = vaddr % PAGE_SIZE; /* handle partial pages */
chunksz = min(PAGE_SIZE - offset, bytes - recorded);
paddr = xencomm_vtop(vaddr);
if (paddr == ~0UL) {
printk(KERN_DEBUG "%s: couldn't translate vaddr %lx\n",
__func__, vaddr);
return -EINVAL;
}
desc->address[i++] = paddr;
recorded += chunksz;
}
if (recorded < bytes) {
printk(KERN_DEBUG
"%s: could only translate %ld of %ld bytes\n",
__func__, recorded, bytes);
return -ENOSPC;
}
/* mark remaining addresses invalid (just for safety) */
while (i < desc->nr_addrs)
desc->address[i++] = XENCOMM_INVALID;
desc->magic = XENCOMM_MAGIC;
return 0;
}
static struct xencomm_desc *xencomm_alloc(gfp_t gfp_mask,
void *buffer, unsigned long bytes)
{
struct xencomm_desc *desc;
unsigned long buffer_ulong = (unsigned long)buffer;
unsigned long start = buffer_ulong & PAGE_MASK;
unsigned long end = (buffer_ulong + bytes) | ~PAGE_MASK;
unsigned long nr_addrs = (end - start + 1) >> PAGE_SHIFT;
unsigned long size = sizeof(*desc) +
sizeof(desc->address[0]) * nr_addrs;
/*
* slab allocator returns at least sizeof(void*) aligned pointer.
* When sizeof(*desc) > sizeof(void*), struct xencomm_desc might
* cross page boundary.
*/
if (sizeof(*desc) > sizeof(void *)) {
unsigned long order = get_order(size);
desc = (struct xencomm_desc *)__get_free_pages(gfp_mask,
order);
if (desc == NULL)
return NULL;
desc->nr_addrs =
((PAGE_SIZE << order) - sizeof(struct xencomm_desc)) /
sizeof(*desc->address);
} else {
desc = kmalloc(size, gfp_mask);
if (desc == NULL)
return NULL;
desc->nr_addrs = nr_addrs;
}
return desc;
}
void xencomm_free(struct xencomm_handle *desc)
{
if (desc && !((ulong)desc & XENCOMM_INLINE_FLAG)) {
struct xencomm_desc *desc__ = (struct xencomm_desc *)desc;
if (sizeof(*desc__) > sizeof(void *)) {
unsigned long size = sizeof(*desc__) +
sizeof(desc__->address[0]) * desc__->nr_addrs;
unsigned long order = get_order(size);
free_pages((unsigned long)__va(desc), order);
} else
kfree(__va(desc));
}
}
static int xencomm_create(void *buffer, unsigned long bytes,
struct xencomm_desc **ret, gfp_t gfp_mask)
{
struct xencomm_desc *desc;
int rc;
pr_debug("%s: %p[%ld]\n", __func__, buffer, bytes);
if (bytes == 0) {
/* don't create a descriptor; Xen recognizes NULL. */
BUG_ON(buffer != NULL);
*ret = NULL;
return 0;
}
BUG_ON(buffer == NULL); /* 'bytes' is non-zero */
desc = xencomm_alloc(gfp_mask, buffer, bytes);
if (!desc) {
printk(KERN_DEBUG "%s failure\n", "xencomm_alloc");
return -ENOMEM;
}
rc = xencomm_init(desc, buffer, bytes);
if (rc) {
printk(KERN_DEBUG "%s failure: %d\n", "xencomm_init", rc);
xencomm_free((struct xencomm_handle *)__pa(desc));
return rc;
}
*ret = desc;
return 0;
}
/* check if memory address is within VMALLOC region */
static int is_phys_contiguous(unsigned long addr)
{
if (!is_kernel_addr(addr))
return 0;
return (addr < VMALLOC_START) || (addr >= VMALLOC_END);
}
static struct xencomm_handle *xencomm_create_inline(void *ptr)
{
unsigned long paddr;
BUG_ON(!is_phys_contiguous((unsigned long)ptr));
paddr = (unsigned long)xencomm_pa(ptr);
BUG_ON(paddr & XENCOMM_INLINE_FLAG);
return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG);
}
/* "mini" routine, for stack-based communications: */
static int xencomm_create_mini(void *buffer,
unsigned long bytes, struct xencomm_mini *xc_desc,
struct xencomm_desc **ret)
{
int rc = 0;
struct xencomm_desc *desc;
BUG_ON(((unsigned long)xc_desc) % sizeof(*xc_desc) != 0);
desc = (void *)xc_desc;
desc->nr_addrs = XENCOMM_MINI_ADDRS;
rc = xencomm_init(desc, buffer, bytes);
if (!rc)
*ret = desc;
return rc;
}
struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes)
{
int rc;
struct xencomm_desc *desc;
if (is_phys_contiguous((unsigned long)ptr))
return xencomm_create_inline(ptr);
rc = xencomm_create(ptr, bytes, &desc, GFP_KERNEL);
if (rc || desc == NULL)
return NULL;
return xencomm_pa(desc);
}
struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, unsigned long bytes,
struct xencomm_mini *xc_desc)
{
int rc;
struct xencomm_desc *desc = NULL;
if (is_phys_contiguous((unsigned long)ptr))
return xencomm_create_inline(ptr);
rc = xencomm_create_mini(ptr, bytes, xc_desc,
&desc);
if (rc)
return NULL;
return xencomm_pa(desc);
}
...@@ -220,11 +220,13 @@ struct pv_mmu_ops { ...@@ -220,11 +220,13 @@ struct pv_mmu_ops {
unsigned long va); unsigned long va);
/* Hooks for allocating/releasing pagetable pages */ /* Hooks for allocating/releasing pagetable pages */
void (*alloc_pt)(struct mm_struct *mm, u32 pfn); void (*alloc_pte)(struct mm_struct *mm, u32 pfn);
void (*alloc_pd)(struct mm_struct *mm, u32 pfn); void (*alloc_pmd)(struct mm_struct *mm, u32 pfn);
void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
void (*release_pt)(u32 pfn); void (*alloc_pud)(struct mm_struct *mm, u32 pfn);
void (*release_pd)(u32 pfn); void (*release_pte)(u32 pfn);
void (*release_pmd)(u32 pfn);
void (*release_pud)(u32 pfn);
/* Pagetable manipulation functions */ /* Pagetable manipulation functions */
void (*set_pte)(pte_t *ptep, pte_t pteval); void (*set_pte)(pte_t *ptep, pte_t pteval);
...@@ -910,28 +912,37 @@ static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, ...@@ -910,28 +912,37 @@ static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
} }
static inline void paravirt_alloc_pt(struct mm_struct *mm, unsigned pfn) static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn)
{ {
PVOP_VCALL2(pv_mmu_ops.alloc_pt, mm, pfn); PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn);
} }
static inline void paravirt_release_pt(unsigned pfn) static inline void paravirt_release_pte(unsigned pfn)
{ {
PVOP_VCALL1(pv_mmu_ops.release_pt, pfn); PVOP_VCALL1(pv_mmu_ops.release_pte, pfn);
} }
static inline void paravirt_alloc_pd(struct mm_struct *mm, unsigned pfn) static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned pfn)
{ {
PVOP_VCALL2(pv_mmu_ops.alloc_pd, mm, pfn); PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn);
} }
static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn, static inline void paravirt_alloc_pmd_clone(unsigned pfn, unsigned clonepfn,
unsigned start, unsigned count) unsigned start, unsigned count)
{ {
PVOP_VCALL4(pv_mmu_ops.alloc_pd_clone, pfn, clonepfn, start, count); PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count);
} }
static inline void paravirt_release_pd(unsigned pfn) static inline void paravirt_release_pmd(unsigned pfn)
{ {
PVOP_VCALL1(pv_mmu_ops.release_pd, pfn); PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn);
}
static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned pfn)
{
PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn);
}
static inline void paravirt_release_pud(unsigned pfn)
{
PVOP_VCALL1(pv_mmu_ops.release_pud, pfn);
} }
#ifdef CONFIG_HIGHPTE #ifdef CONFIG_HIGHPTE
......
#ifdef CONFIG_X86_32 #ifndef _ASM_X86_PGALLOC_H
# include "pgalloc_32.h" #define _ASM_X86_PGALLOC_H
#include <linux/threads.h>
#include <linux/mm.h> /* for struct page */
#include <linux/pagemap.h>
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else #else
# include "pgalloc_64.h" static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {}
static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {}
static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
unsigned long start, unsigned long count) {}
static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) {}
static inline void paravirt_release_pte(unsigned long pfn) {}
static inline void paravirt_release_pmd(unsigned long pfn) {}
static inline void paravirt_release_pud(unsigned long pfn) {}
#endif #endif
/*
* Allocate and free page tables.
*/
extern pgd_t *pgd_alloc(struct mm_struct *);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
/* Should really implement gc for free page table pages. This could be
done with a reference count in struct page. */
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
free_page((unsigned long)pte);
}
static inline void pte_free(struct mm_struct *mm, struct page *pte)
{
__free_page(pte);
}
extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
static inline void pmd_populate_kernel(struct mm_struct *mm,
pmd_t *pmd, pte_t *pte)
{
paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT);
set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
}
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
struct page *pte)
{
unsigned long pfn = page_to_pfn(pte);
paravirt_alloc_pte(mm, pfn);
set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
}
#define pmd_pgtable(pmd) pmd_page(pmd)
#if PAGETABLE_LEVELS > 2
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
}
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
{
BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
free_page((unsigned long)pmd);
}
extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
#ifdef CONFIG_X86_PAE
extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd);
#else /* !CONFIG_X86_PAE */
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
}
#endif /* CONFIG_X86_PAE */
#if PAGETABLE_LEVELS > 3
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
{
paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
}
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
free_page((unsigned long)pud);
}
extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
#endif /* PAGETABLE_LEVELS > 3 */
#endif /* PAGETABLE_LEVELS > 2 */
#endif /* _ASM_X86_PGALLOC_H */
#ifndef _I386_PGALLOC_H
#define _I386_PGALLOC_H
#include <linux/threads.h>
#include <linux/mm.h> /* for struct page */
#include <linux/pagemap.h>
#include <asm/tlb.h>
#include <asm-generic/tlb.h>
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
#define paravirt_alloc_pt(mm, pfn) do { } while (0)
#define paravirt_alloc_pd(mm, pfn) do { } while (0)
#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0)
#define paravirt_release_pt(pfn) do { } while (0)
#define paravirt_release_pd(pfn) do { } while (0)
#endif
static inline void pmd_populate_kernel(struct mm_struct *mm,
pmd_t *pmd, pte_t *pte)
{
paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT);
set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
}
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
{
unsigned long pfn = page_to_pfn(pte);
paravirt_alloc_pt(mm, pfn);
set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
}
#define pmd_pgtable(pmd) pmd_page(pmd)
/*
* Allocate and free page tables.
*/
extern pgd_t *pgd_alloc(struct mm_struct *);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_page((unsigned long)pte);
}
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
pgtable_page_dtor(pte);
__free_page(pte);
}
extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
#ifdef CONFIG_X86_PAE
/*
* In the PAE case we free the pmds as part of the pgd.
*/
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
}
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
{
BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
free_page((unsigned long)pmd);
}
extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
{
paravirt_alloc_pd(mm, __pa(pmd) >> PAGE_SHIFT);
/* Note: almost everything apart from _PAGE_PRESENT is
reserved at the pmd (PDPT) level. */
set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
/*
* According to Intel App note "TLBs, Paging-Structure Caches,
* and Their Invalidation", April 2007, document 317080-001,
* section 8.1: in PAE mode we explicitly have to flush the
* TLB via cr3 if the top-level pgd is changed...
*/
if (mm == current->active_mm)
write_cr3(read_cr3());
}
#endif /* CONFIG_X86_PAE */
#endif /* _I386_PGALLOC_H */
#ifndef _X86_64_PGALLOC_H
#define _X86_64_PGALLOC_H
#include <asm/pda.h>
#include <linux/threads.h>
#include <linux/mm.h>
#define pmd_populate_kernel(mm, pmd, pte) \
set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
#define pud_populate(mm, pud, pmd) \
set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)))
#define pgd_populate(mm, pgd, pud) \
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)))
#define pmd_pgtable(pmd) pmd_page(pmd)
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
{
set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
}
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
{
BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
free_page((unsigned long)pmd);
}
static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
{
return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
}
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
free_page((unsigned long)pud);
}
static inline void pgd_list_add(pgd_t *pgd)
{
struct page *page = virt_to_page(pgd);
unsigned long flags;
spin_lock_irqsave(&pgd_lock, flags);
list_add(&page->lru, &pgd_list);
spin_unlock_irqrestore(&pgd_lock, flags);
}
static inline void pgd_list_del(pgd_t *pgd)
{
struct page *page = virt_to_page(pgd);
unsigned long flags;
spin_lock_irqsave(&pgd_lock, flags);
list_del(&page->lru);
spin_unlock_irqrestore(&pgd_lock, flags);
}
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
unsigned boundary;
pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
if (!pgd)
return NULL;
pgd_list_add(pgd);
/*
* Copy kernel pointers in from init.
* Could keep a freelist or slab cache of those because the kernel
* part never changes.
*/
boundary = pgd_index(__PAGE_OFFSET);
memset(pgd, 0, boundary * sizeof(pgd_t));
memcpy(pgd + boundary,
init_level4_pgt + boundary,
(PTRS_PER_PGD - boundary) * sizeof(pgd_t));
return pgd;
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
BUG_ON((unsigned long)pgd & (PAGE_SIZE-1));
pgd_list_del(pgd);
free_page((unsigned long)pgd);
}
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
return (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
}
static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *page;
void *p;
p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
if (!p)
return NULL;
page = virt_to_page(p);
pgtable_page_ctor(page);
return page;
}
/* Should really implement gc for free page table pages. This could be
done with a reference count in struct page. */
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
free_page((unsigned long)pte);
}
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
pgtable_page_dtor(pte);
__free_page(pte);
}
#define __pte_free_tlb(tlb,pte) \
do { \
pgtable_page_dtor((pte)); \
tlb_remove_page((tlb), (pte)); \
} while (0)
#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
#define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
#endif /* _X86_64_PGALLOC_H */
#ifndef _ASM_X86_PGTABLE_H #ifndef _ASM_X86_PGTABLE_H
#define _ASM_X86_PGTABLE_H #define _ASM_X86_PGTABLE_H
#define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1)
#define FIRST_USER_ADDRESS 0 #define FIRST_USER_ADDRESS 0
#define _PAGE_BIT_PRESENT 0 /* is present */ #define _PAGE_BIT_PRESENT 0 /* is present */
...@@ -330,6 +329,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) ...@@ -330,6 +329,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
# include "pgtable_64.h" # include "pgtable_64.h"
#endif #endif
#define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET)
#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
enum { enum {
...@@ -389,37 +391,17 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, ...@@ -389,37 +391,17 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
* bit at the same time. * bit at the same time.
*/ */
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ extern int ptep_set_access_flags(struct vm_area_struct *vma,
({ \ unsigned long address, pte_t *ptep,
int __changed = !pte_same(*(ptep), entry); \ pte_t entry, int dirty);
if (__changed && dirty) { \
*ptep = entry; \
pte_update_defer((vma)->vm_mm, (address), (ptep)); \
flush_tlb_page(vma, address); \
} \
__changed; \
})
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
#define ptep_test_and_clear_young(vma, addr, ptep) ({ \ extern int ptep_test_and_clear_young(struct vm_area_struct *vma,
int __ret = 0; \ unsigned long addr, pte_t *ptep);
if (pte_young(*(ptep))) \
__ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \
&(ptep)->pte); \
if (__ret) \
pte_update((vma)->vm_mm, addr, ptep); \
__ret; \
})
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
#define ptep_clear_flush_young(vma, address, ptep) \ extern int ptep_clear_flush_young(struct vm_area_struct *vma,
({ \ unsigned long address, pte_t *ptep);
int __young; \
__young = ptep_test_and_clear_young((vma), (address), (ptep)); \
if (__young) \
flush_tlb_page(vma, address); \
__young; \
})
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
...@@ -456,6 +438,22 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, ...@@ -456,6 +438,22 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
pte_update(mm, addr, ptep); pte_update(mm, addr, ptep);
} }
/*
* clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
*
* dst - pointer to pgd range anwhere on a pgd page
* src - ""
* count - the number of pgds to copy.
*
* dst and src can be on the same page, but the range must not overlap,
* and must not cross a page boundary.
*/
static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
{
memcpy(dst, src, count * sizeof(pgd_t));
}
#include <asm-generic/pgtable.h> #include <asm-generic/pgtable.h>
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
......
...@@ -48,9 +48,6 @@ void paging_init(void); ...@@ -48,9 +48,6 @@ void paging_init(void);
#define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1)) #define PGDIR_MASK (~(PGDIR_SIZE - 1))
#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
/* Just any arbitrary offset to the start of the vmalloc VM area: the /* Just any arbitrary offset to the start of the vmalloc VM area: the
* current 8MB value just means that there will be a 8MB "hole" after the * current 8MB value just means that there will be a 8MB "hole" after the
* physical memory until the kernel virtual memory starts. That means that * physical memory until the kernel virtual memory starts. That means that
...@@ -108,21 +105,6 @@ extern int pmd_bad(pmd_t pmd); ...@@ -108,21 +105,6 @@ extern int pmd_bad(pmd_t pmd);
# include <asm/pgtable-2level.h> # include <asm/pgtable-2level.h>
#endif #endif
/*
* clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
*
* dst - pointer to pgd range anwhere on a pgd page
* src - ""
* count - the number of pgds to copy.
*
* dst and src can be on the same page, but the range must not overlap,
* and must not cross a page boundary.
*/
static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
{
memcpy(dst, src, count * sizeof(pgd_t));
}
/* /*
* Macro to mark a page protection value as "uncacheable". * Macro to mark a page protection value as "uncacheable".
* On processors which do not support it, this is a no-op. * On processors which do not support it, this is a no-op.
......
...@@ -24,7 +24,7 @@ extern void paging_init(void); ...@@ -24,7 +24,7 @@ extern void paging_init(void);
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#define SHARED_KERNEL_PMD 1 #define SHARED_KERNEL_PMD 0
/* /*
* PGDIR_SHIFT determines what a top-level page table entry can map * PGDIR_SHIFT determines what a top-level page table entry can map
......
#ifndef __XEN_EVENTS_H
#define __XEN_EVENTS_H
enum ipi_vector {
XEN_RESCHEDULE_VECTOR,
XEN_CALL_FUNCTION_VECTOR,
XEN_NR_IPIS,
};
static inline int xen_irqs_disabled(struct pt_regs *regs)
{
return raw_irqs_disabled_flags(regs->flags);
}
static inline void xen_do_IRQ(int irq, struct pt_regs *regs)
{
regs->orig_ax = ~irq;
do_IRQ(regs);
}
#endif /* __XEN_EVENTS_H */
#ifndef __XEN_GRANT_TABLE_H
#define __XEN_GRANT_TABLE_H
#define xen_alloc_vm_area(size) alloc_vm_area(size)
#define xen_free_vm_area(area) free_vm_area(area)
#endif /* __XEN_GRANT_TABLE_H */
...@@ -163,6 +163,12 @@ HYPERVISOR_set_callbacks(unsigned long event_selector, ...@@ -163,6 +163,12 @@ HYPERVISOR_set_callbacks(unsigned long event_selector,
failsafe_selector, failsafe_address); failsafe_selector, failsafe_address);
} }
static inline int
HYPERVISOR_callback_op(int cmd, void *arg)
{
return _hypercall2(int, callback_op, cmd, arg);
}
static inline int static inline int
HYPERVISOR_fpu_taskswitch(int set) HYPERVISOR_fpu_taskswitch(int set)
{ {
......
...@@ -22,6 +22,30 @@ ...@@ -22,6 +22,30 @@
#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name) #define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name)
#define GUEST_HANDLE(name) __guest_handle_ ## name #define GUEST_HANDLE(name) __guest_handle_ ## name
#ifdef __XEN__
#if defined(__i386__)
#define set_xen_guest_handle(hnd, val) \
do { \
if (sizeof(hnd) == 8) \
*(uint64_t *)&(hnd) = 0; \
(hnd).p = val; \
} while (0)
#elif defined(__x86_64__)
#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0)
#endif
#else
#if defined(__i386__)
#define set_xen_guest_handle(hnd, val) \
do { \
if (sizeof(hnd) == 8) \
*(uint64_t *)&(hnd) = 0; \
(hnd) = val; \
} while (0)
#elif defined(__x86_64__)
#define set_xen_guest_handle(hnd, val) do { (hnd) = val; } while (0)
#endif
#endif
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
/* Guest handles for primitive C types. */ /* Guest handles for primitive C types. */
__DEFINE_GUEST_HANDLE(uchar, unsigned char); __DEFINE_GUEST_HANDLE(uchar, unsigned char);
...@@ -171,6 +195,10 @@ struct arch_vcpu_info { ...@@ -171,6 +195,10 @@ struct arch_vcpu_info {
unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */ unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */
}; };
struct xen_callback {
unsigned long cs;
unsigned long eip;
};
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
/* /*
......
#ifndef __XEN_PAGE_H
#define __XEN_PAGE_H
#include <linux/pfn.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <xen/features.h>
/* Xen machine address */
typedef struct xmaddr {
phys_addr_t maddr;
} xmaddr_t;
/* Xen pseudo-physical address */
typedef struct xpaddr {
phys_addr_t paddr;
} xpaddr_t;
#define XMADDR(x) ((xmaddr_t) { .maddr = (x) })
#define XPADDR(x) ((xpaddr_t) { .paddr = (x) })
/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
#define INVALID_P2M_ENTRY (~0UL)
#define FOREIGN_FRAME_BIT (1UL<<31)
#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
extern unsigned long *phys_to_machine_mapping;
static inline unsigned long pfn_to_mfn(unsigned long pfn)
{
if (xen_feature(XENFEAT_auto_translated_physmap))
return pfn;
return phys_to_machine_mapping[(unsigned int)(pfn)] &
~FOREIGN_FRAME_BIT;
}
static inline int phys_to_machine_mapping_valid(unsigned long pfn)
{
if (xen_feature(XENFEAT_auto_translated_physmap))
return 1;
return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
}
static inline unsigned long mfn_to_pfn(unsigned long mfn)
{
unsigned long pfn;
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
#if 0
if (unlikely((mfn >> machine_to_phys_order) != 0))
return max_mapnr;
#endif
pfn = 0;
/*
* The array access can fail (e.g., device space beyond end of RAM).
* In such cases it doesn't matter what we return (we return garbage),
* but we must handle the fault without crashing!
*/
__get_user(pfn, &machine_to_phys_mapping[mfn]);
return pfn;
}
static inline xmaddr_t phys_to_machine(xpaddr_t phys)
{
unsigned offset = phys.paddr & ~PAGE_MASK;
return XMADDR(PFN_PHYS((u64)pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset);
}
static inline xpaddr_t machine_to_phys(xmaddr_t machine)
{
unsigned offset = machine.maddr & ~PAGE_MASK;
return XPADDR(PFN_PHYS((u64)mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset);
}
/*
* We detect special mappings in one of two ways:
* 1. If the MFN is an I/O page then Xen will set the m2p entry
* to be outside our maximum possible pseudophys range.
* 2. If the MFN belongs to a different domain then we will certainly
* not have MFN in our p2m table. Conversely, if the page is ours,
* then we'll have p2m(m2p(MFN))==MFN.
* If we detect a special mapping then it doesn't have a 'struct page'.
* We force !pfn_valid() by returning an out-of-range pointer.
*
* NB. These checks require that, for any MFN that is not in our reservation,
* there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
* we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
* Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
*
* NB2. When deliberately mapping foreign pages into the p2m table, you *must*
* use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
* require. In all the cases we care about, the FOREIGN_FRAME bit is
* masked (e.g., pfn_to_mfn()) so behaviour there is correct.
*/
static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
{
extern unsigned long max_mapnr;
unsigned long pfn = mfn_to_pfn(mfn);
if ((pfn < max_mapnr)
&& !xen_feature(XENFEAT_auto_translated_physmap)
&& (phys_to_machine_mapping[pfn] != mfn))
return max_mapnr; /* force !pfn_valid() */
return pfn;
}
static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
if (xen_feature(XENFEAT_auto_translated_physmap)) {
BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
return;
}
phys_to_machine_mapping[pfn] = mfn;
}
/* VIRT <-> MACHINE conversion */
#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v))))
#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v))))
#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
static inline unsigned long pte_mfn(pte_t pte)
{
return (pte.pte & ~_PAGE_NX) >> PAGE_SHIFT;
}
static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot)
{
pte_t pte;
pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) |
(pgprot_val(pgprot) & __supported_pte_mask);
return pte;
}
static inline pteval_t pte_val_ma(pte_t pte)
{
return pte.pte;
}
static inline pte_t __pte_ma(pteval_t x)
{
return (pte_t) { .pte = x };
}
#ifdef CONFIG_X86_PAE
#define pmd_val_ma(v) ((v).pmd)
#define pud_val_ma(v) ((v).pgd.pgd)
#define __pmd_ma(x) ((pmd_t) { (x) } )
#else /* !X86_PAE */
#define pmd_val_ma(v) ((v).pud.pgd.pgd)
#endif /* CONFIG_X86_PAE */
#define pgd_val_ma(x) ((x).pgd)
xmaddr_t arbitrary_virt_to_machine(unsigned long address);
void make_lowmem_page_readonly(void *vaddr);
void make_lowmem_page_readwrite(void *vaddr);
#endif /* __XEN_PAGE_H */
/******************************************************************************
* balloon.h
*
* Xen balloon driver - enables returning/claiming memory to/from Xen.
*
* Copyright (c) 2003, B Dragovic
* Copyright (c) 2003-2004, M Williamson, K Fraser
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef __XEN_BALLOON_H__
#define __XEN_BALLOON_H__
#include <linux/spinlock.h>
#if 0
/*
* Inform the balloon driver that it should allow some slop for device-driver
* memory activities.
*/
void balloon_update_driver_allowance(long delta);
/* Allocate/free a set of empty pages in low memory (i.e., no RAM mapped). */
struct page **alloc_empty_pages_and_pagevec(int nr_pages);
void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages);
void balloon_release_driver_page(struct page *page);
/*
* Prevent the balloon driver from changing the memory reservation during
* a driver critical region.
*/
extern spinlock_t balloon_lock;
#define balloon_lock(__flags) spin_lock_irqsave(&balloon_lock, __flags)
#define balloon_unlock(__flags) spin_unlock_irqrestore(&balloon_lock, __flags)
#endif
#endif /* __XEN_BALLOON_H__ */
...@@ -5,13 +5,7 @@ ...@@ -5,13 +5,7 @@
#include <xen/interface/event_channel.h> #include <xen/interface/event_channel.h>
#include <asm/xen/hypercall.h> #include <asm/xen/hypercall.h>
#include <asm/xen/events.h>
enum ipi_vector {
XEN_RESCHEDULE_VECTOR,
XEN_CALL_FUNCTION_VECTOR,
XEN_NR_IPIS,
};
int bind_evtchn_to_irq(unsigned int evtchn); int bind_evtchn_to_irq(unsigned int evtchn);
int bind_evtchn_to_irqhandler(unsigned int evtchn, int bind_evtchn_to_irqhandler(unsigned int evtchn,
...@@ -37,6 +31,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, ...@@ -37,6 +31,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
void unbind_from_irqhandler(unsigned int irq, void *dev_id); void unbind_from_irqhandler(unsigned int irq, void *dev_id);
void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector); void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector);
int resend_irq_on_evtchn(unsigned int irq);
static inline void notify_remote_via_evtchn(int port) static inline void notify_remote_via_evtchn(int port)
{ {
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <asm/xen/hypervisor.h> #include <asm/xen/hypervisor.h>
#include <xen/interface/grant_table.h> #include <xen/interface/grant_table.h>
#include <asm/xen/grant_table.h>
/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
#define NR_GRANT_FRAMES 4 #define NR_GRANT_FRAMES 4
...@@ -102,6 +103,12 @@ void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, ...@@ -102,6 +103,12 @@ void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
unsigned long pfn); unsigned long pfn);
int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
unsigned long max_nr_gframes,
struct grant_entry **__shared);
void arch_gnttab_unmap_shared(struct grant_entry *shared,
unsigned long nr_gframes);
#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
#endif /* __ASM_GNTTAB_H__ */ #endif /* __ASM_GNTTAB_H__ */
/******************************************************************************
* callback.h
*
* Register guest OS callbacks with Xen.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Copyright (c) 2006, Ian Campbell
*/
#ifndef __XEN_PUBLIC_CALLBACK_H__
#define __XEN_PUBLIC_CALLBACK_H__
#include "xen.h"
/*
* Prototype for this hypercall is:
* long callback_op(int cmd, void *extra_args)
* @cmd == CALLBACKOP_??? (callback operation).
* @extra_args == Operation-specific extra arguments (NULL if none).
*/
/* ia64, x86: Callback for event delivery. */
#define CALLBACKTYPE_event 0
/* x86: Failsafe callback when guest state cannot be restored by Xen. */
#define CALLBACKTYPE_failsafe 1
/* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */
#define CALLBACKTYPE_syscall 2
/*
* x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel
* feature is enabled. Do not use this callback type in new code.
*/
#define CALLBACKTYPE_sysenter_deprecated 3
/* x86: Callback for NMI delivery. */
#define CALLBACKTYPE_nmi 4
/*
* x86: sysenter is only available as follows:
* - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled
* - 64-bit hypervisor: 32-bit guest applications on Intel CPUs
* ('32-on-32-on-64', '32-on-64-on-64')
* [nb. also 64-bit guest applications on Intel CPUs
* ('64-on-64-on-64'), but syscall is preferred]
*/
#define CALLBACKTYPE_sysenter 5
/*
* x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs
* ('32-on-32-on-64', '32-on-64-on-64')
*/
#define CALLBACKTYPE_syscall32 7
/*
* Disable event deliver during callback? This flag is ignored for event and
* NMI callbacks: event delivery is unconditionally disabled.
*/
#define _CALLBACKF_mask_events 0
#define CALLBACKF_mask_events (1U << _CALLBACKF_mask_events)
/*
* Register a callback.
*/
#define CALLBACKOP_register 0
struct callback_register {
uint16_t type;
uint16_t flags;
struct xen_callback address;
};
/*
* Unregister a callback.
*
* Not all callbacks can be unregistered. -EINVAL will be returned if
* you attempt to unregister such a callback.
*/
#define CALLBACKOP_unregister 1
struct callback_unregister {
uint16_t type;
uint16_t _unused;
};
#endif /* __XEN_PUBLIC_CALLBACK_H__ */
...@@ -185,6 +185,7 @@ struct gnttab_map_grant_ref { ...@@ -185,6 +185,7 @@ struct gnttab_map_grant_ref {
grant_handle_t handle; grant_handle_t handle;
uint64_t dev_bus_addr; uint64_t dev_bus_addr;
}; };
DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref);
/* /*
* GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings
...@@ -206,6 +207,7 @@ struct gnttab_unmap_grant_ref { ...@@ -206,6 +207,7 @@ struct gnttab_unmap_grant_ref {
/* OUT parameters. */ /* OUT parameters. */
int16_t status; /* GNTST_* */ int16_t status; /* GNTST_* */
}; };
DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref);
/* /*
* GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
...@@ -223,8 +225,9 @@ struct gnttab_setup_table { ...@@ -223,8 +225,9 @@ struct gnttab_setup_table {
uint32_t nr_frames; uint32_t nr_frames;
/* OUT parameters. */ /* OUT parameters. */
int16_t status; /* GNTST_* */ int16_t status; /* GNTST_* */
ulong *frame_list; GUEST_HANDLE(ulong) frame_list;
}; };
DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table);
/* /*
* GNTTABOP_dump_table: Dump the contents of the grant table to the * GNTTABOP_dump_table: Dump the contents of the grant table to the
...@@ -237,6 +240,7 @@ struct gnttab_dump_table { ...@@ -237,6 +240,7 @@ struct gnttab_dump_table {
/* OUT parameters. */ /* OUT parameters. */
int16_t status; /* GNTST_* */ int16_t status; /* GNTST_* */
}; };
DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table);
/* /*
* GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The
...@@ -255,7 +259,7 @@ struct gnttab_transfer { ...@@ -255,7 +259,7 @@ struct gnttab_transfer {
/* OUT parameters. */ /* OUT parameters. */
int16_t status; int16_t status;
}; };
DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer);
/* /*
* GNTTABOP_copy: Hypervisor based copy * GNTTABOP_copy: Hypervisor based copy
...@@ -296,6 +300,7 @@ struct gnttab_copy { ...@@ -296,6 +300,7 @@ struct gnttab_copy {
/* OUT parameters. */ /* OUT parameters. */
int16_t status; int16_t status;
}; };
DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy);
/* /*
* GNTTABOP_query_size: Query the current and maximum sizes of the shared * GNTTABOP_query_size: Query the current and maximum sizes of the shared
...@@ -313,7 +318,7 @@ struct gnttab_query_size { ...@@ -313,7 +318,7 @@ struct gnttab_query_size {
uint32_t max_nr_frames; uint32_t max_nr_frames;
int16_t status; /* GNTST_* */ int16_t status; /* GNTST_* */
}; };
DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size);
/* /*
* Bitfield values for update_pin_status.flags. * Bitfield values for update_pin_status.flags.
......
/*
* fbif.h -- Xen virtual frame buffer device
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
* Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
*/
#ifndef __XEN_PUBLIC_IO_FBIF_H__
#define __XEN_PUBLIC_IO_FBIF_H__
/* Out events (frontend -> backend) */
/*
* Out events may be sent only when requested by backend, and receipt
* of an unknown out event is an error.
*/
/* Event type 1 currently not used */
/*
* Framebuffer update notification event
* Capable frontend sets feature-update in xenstore.
* Backend requests it by setting request-update in xenstore.
*/
#define XENFB_TYPE_UPDATE 2
struct xenfb_update {
uint8_t type; /* XENFB_TYPE_UPDATE */
int32_t x; /* source x */
int32_t y; /* source y */
int32_t width; /* rect width */
int32_t height; /* rect height */
};
#define XENFB_OUT_EVENT_SIZE 40
union xenfb_out_event {
uint8_t type;
struct xenfb_update update;
char pad[XENFB_OUT_EVENT_SIZE];
};
/* In events (backend -> frontend) */
/*
* Frontends should ignore unknown in events.
* No in events currently defined.
*/
#define XENFB_IN_EVENT_SIZE 40
union xenfb_in_event {
uint8_t type;
char pad[XENFB_IN_EVENT_SIZE];
};
/* shared page */
#define XENFB_IN_RING_SIZE 1024
#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE)
#define XENFB_IN_RING_OFFS 1024
#define XENFB_IN_RING(page) \
((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS))
#define XENFB_IN_RING_REF(page, idx) \
(XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN])
#define XENFB_OUT_RING_SIZE 2048
#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE)
#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE)
#define XENFB_OUT_RING(page) \
((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS))
#define XENFB_OUT_RING_REF(page, idx) \
(XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN])
struct xenfb_page {
uint32_t in_cons, in_prod;
uint32_t out_cons, out_prod;
int32_t width; /* width of the framebuffer (in pixels) */
int32_t height; /* height of the framebuffer (in pixels) */
uint32_t line_length; /* length of a row of pixels (in bytes) */
uint32_t mem_length; /* length of the framebuffer (in bytes) */
uint8_t depth; /* depth of a pixel (in bits) */
/*
* Framebuffer page directory
*
* Each directory page holds PAGE_SIZE / sizeof(*pd)
* framebuffer pages, and can thus map up to PAGE_SIZE *
* PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and
* sizeof(unsigned long) == 4, that's 4 Megs. Two directory
* pages should be enough for a while.
*/
unsigned long pd[2];
};
/*
* Wart: xenkbd needs to know resolution. Put it here until a better
* solution is found, but don't leak it to the backend.
*/
#ifdef __KERNEL__
#define XENFB_WIDTH 800
#define XENFB_HEIGHT 600
#define XENFB_DEPTH 32
#endif
#endif
/*
* kbdif.h -- Xen virtual keyboard/mouse
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
* Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
*/
#ifndef __XEN_PUBLIC_IO_KBDIF_H__
#define __XEN_PUBLIC_IO_KBDIF_H__
/* In events (backend -> frontend) */
/*
* Frontends should ignore unknown in events.
*/
/* Pointer movement event */
#define XENKBD_TYPE_MOTION 1
/* Event type 2 currently not used */
/* Key event (includes pointer buttons) */
#define XENKBD_TYPE_KEY 3
/*
* Pointer position event
* Capable backend sets feature-abs-pointer in xenstore.
* Frontend requests ot instead of XENKBD_TYPE_MOTION by setting
* request-abs-update in xenstore.
*/
#define XENKBD_TYPE_POS 4
struct xenkbd_motion {
uint8_t type; /* XENKBD_TYPE_MOTION */
int32_t rel_x; /* relative X motion */
int32_t rel_y; /* relative Y motion */
};
struct xenkbd_key {
uint8_t type; /* XENKBD_TYPE_KEY */
uint8_t pressed; /* 1 if pressed; 0 otherwise */
uint32_t keycode; /* KEY_* from linux/input.h */
};
struct xenkbd_position {
uint8_t type; /* XENKBD_TYPE_POS */
int32_t abs_x; /* absolute X position (in FB pixels) */
int32_t abs_y; /* absolute Y position (in FB pixels) */
};
#define XENKBD_IN_EVENT_SIZE 40
union xenkbd_in_event {
uint8_t type;
struct xenkbd_motion motion;
struct xenkbd_key key;
struct xenkbd_position pos;
char pad[XENKBD_IN_EVENT_SIZE];
};
/* Out events (frontend -> backend) */
/*
* Out events may be sent only when requested by backend, and receipt
* of an unknown out event is an error.
* No out events currently defined.
*/
#define XENKBD_OUT_EVENT_SIZE 40
union xenkbd_out_event {
uint8_t type;
char pad[XENKBD_OUT_EVENT_SIZE];
};
/* shared page */
#define XENKBD_IN_RING_SIZE 2048
#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE)
#define XENKBD_IN_RING_OFFS 1024
#define XENKBD_IN_RING(page) \
((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS))
#define XENKBD_IN_RING_REF(page, idx) \
(XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN])
#define XENKBD_OUT_RING_SIZE 1024
#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE)
#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE)
#define XENKBD_OUT_RING(page) \
((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS))
#define XENKBD_OUT_RING_REF(page, idx) \
(XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN])
struct xenkbd_page {
uint32_t in_cons, in_prod;
uint32_t out_cons, out_prod;
};
#endif
#ifndef __XEN_PROTOCOLS_H__
#define __XEN_PROTOCOLS_H__
#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi"
#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi"
#define XEN_IO_PROTO_ABI_IA64 "ia64-abi"
#define XEN_IO_PROTO_ABI_POWERPC64 "powerpc64-abi"
#if defined(__i386__)
# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
#elif defined(__x86_64__)
# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
#elif defined(__ia64__)
# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64
#elif defined(__powerpc64__)
# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_POWERPC64
#else
# error arch fixup needed here
#endif
#endif
...@@ -29,7 +29,7 @@ struct xen_memory_reservation { ...@@ -29,7 +29,7 @@ struct xen_memory_reservation {
* OUT: GMFN bases of extents that were allocated * OUT: GMFN bases of extents that were allocated
* (NB. This command also updates the mach_to_phys translation table) * (NB. This command also updates the mach_to_phys translation table)
*/ */
GUEST_HANDLE(ulong) extent_start; ulong extent_start;
/* Number of extents, and size/alignment of each (2^extent_order pages). */ /* Number of extents, and size/alignment of each (2^extent_order pages). */
unsigned long nr_extents; unsigned long nr_extents;
...@@ -50,7 +50,6 @@ struct xen_memory_reservation { ...@@ -50,7 +50,6 @@ struct xen_memory_reservation {
domid_t domid; domid_t domid;
}; };
DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation);
/* /*
* Returns the maximum machine frame number of mapped RAM in this system. * Returns the maximum machine frame number of mapped RAM in this system.
...@@ -86,7 +85,7 @@ struct xen_machphys_mfn_list { ...@@ -86,7 +85,7 @@ struct xen_machphys_mfn_list {
* any large discontiguities in the machine address space, 2MB gaps in * any large discontiguities in the machine address space, 2MB gaps in
* the machphys table will be represented by an MFN base of zero. * the machphys table will be represented by an MFN base of zero.
*/ */
GUEST_HANDLE(ulong) extent_start; ulong extent_start;
/* /*
* Number of extents written to the above array. This will be smaller * Number of extents written to the above array. This will be smaller
...@@ -94,7 +93,6 @@ struct xen_machphys_mfn_list { ...@@ -94,7 +93,6 @@ struct xen_machphys_mfn_list {
*/ */
unsigned int nr_extents; unsigned int nr_extents;
}; };
DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
/* /*
* Sets the GPFN at which a particular page appears in the specified guest's * Sets the GPFN at which a particular page appears in the specified guest's
...@@ -117,7 +115,6 @@ struct xen_add_to_physmap { ...@@ -117,7 +115,6 @@ struct xen_add_to_physmap {
/* GPFN where the source mapping page should appear. */ /* GPFN where the source mapping page should appear. */
unsigned long gpfn; unsigned long gpfn;
}; };
DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
/* /*
* Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error
...@@ -132,14 +129,13 @@ struct xen_translate_gpfn_list { ...@@ -132,14 +129,13 @@ struct xen_translate_gpfn_list {
unsigned long nr_gpfns; unsigned long nr_gpfns;
/* List of GPFNs to translate. */ /* List of GPFNs to translate. */
GUEST_HANDLE(ulong) gpfn_list; ulong gpfn_list;
/* /*
* Output list to contain MFN translations. May be the same as the input * Output list to contain MFN translations. May be the same as the input
* list (in which case each input GPFN is overwritten with the output MFN). * list (in which case each input GPFN is overwritten with the output MFN).
*/ */
GUEST_HANDLE(ulong) mfn_list; ulong mfn_list;
}; };
DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
#endif /* __XEN_PUBLIC_MEMORY_H__ */ #endif /* __XEN_PUBLIC_MEMORY_H__ */
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
#ifndef INCLUDE_XEN_OPS_H
#define INCLUDE_XEN_OPS_H
#include <linux/percpu.h>
DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
#endif /* INCLUDE_XEN_OPS_H */
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment