Commit 81ae31d7 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'stable/for-linus-3.18-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull Xen updates from David Vrabel:
 "Features and fixes:

   - Add pvscsi frontend and backend drivers.
   - Remove _PAGE_IOMAP PTE flag, freeing it for alternate uses.
   - Try and keep memory contiguous during PV memory setup (reduces
     SWIOTLB usage).
   - Allow front/back drivers to use threaded irqs.
   - Support large initrds in PV guests.
   - Fix PVH guests in preparation for Xen 4.5"

* tag 'stable/for-linus-3.18-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (22 commits)
  xen: remove DEFINE_XENBUS_DRIVER() macro
  xen/xenbus: Remove BUG_ON() when error string trucated
  xen/xenbus: Correct the comments for xenbus_grant_ring()
  x86/xen: Set EFER.NX and EFER.SCE in PVH guests
  xen: eliminate scalability issues from initrd handling
  xen: sync some headers with xen tree
  xen: make pvscsi frontend dependant on xenbus frontend
  arm{,64}/xen: Remove "EXPERIMENTAL" in the description of the Xen options
  xen-scsifront: don't deadlock if the ring becomes full
  x86: remove the Xen-specific _PAGE_IOMAP PTE flag
  x86/xen: do not use _PAGE_IOMAP PTE flag for I/O mappings
  x86: skip check for spurious faults for non-present faults
  xen/efi: Directly include needed headers
  xen-scsiback: clean up a type issue in scsiback_make_tpg()
  xen-scsifront: use GFP_ATOMIC under spin_lock
  MAINTAINERS: Add xen pvscsi maintainer
  xen-scsiback: Add Xen PV SCSI backend driver
  xen-scsifront: Add Xen PV SCSI frontend driver
  xen: Add Xen pvSCSI protocol description
  xen/events: support threaded irqs for interdomain event channels
  ...
parents ef4a48c5 95afae48
...@@ -10268,6 +10268,15 @@ S: Supported ...@@ -10268,6 +10268,15 @@ S: Supported
F: drivers/block/xen-blkback/* F: drivers/block/xen-blkback/*
F: drivers/block/xen* F: drivers/block/xen*
XEN PVSCSI DRIVERS
M: Juergen Gross <jgross@suse.com>
L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
L: linux-scsi@vger.kernel.org
S: Supported
F: drivers/scsi/xen-scsifront.c
F: drivers/xen/xen-scsiback.c
F: include/xen/interface/io/vscsiif.h
XEN SWIOTLB SUBSYSTEM XEN SWIOTLB SUBSYSTEM
M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
L: xen-devel@lists.xenproject.org (moderated for non-subscribers) L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
......
...@@ -1779,7 +1779,7 @@ config XEN_DOM0 ...@@ -1779,7 +1779,7 @@ config XEN_DOM0
depends on XEN depends on XEN
config XEN config XEN
bool "Xen guest support on ARM (EXPERIMENTAL)" bool "Xen guest support on ARM"
depends on ARM && AEABI && OF depends on ARM && AEABI && OF
depends on CPU_V7 && !CPU_V6 depends on CPU_V7 && !CPU_V6
depends on !GENERIC_ATOMIC64 depends on !GENERIC_ATOMIC64
......
...@@ -349,7 +349,7 @@ config XEN_DOM0 ...@@ -349,7 +349,7 @@ config XEN_DOM0
depends on XEN depends on XEN
config XEN config XEN
bool "Xen guest support on ARM64 (EXPERIMENTAL)" bool "Xen guest support on ARM64"
depends on ARM64 && OF depends on ARM64 && OF
select SWIOTLB_XEN select SWIOTLB_XEN
help help
......
...@@ -23,7 +23,6 @@ ...@@ -23,7 +23,6 @@
#define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 #define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
#define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */ #define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */
#define _PAGE_BIT_IOMAP _PAGE_BIT_SOFTW2 /* flag used to indicate IO mapping */
#define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */ #define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
...@@ -52,7 +51,7 @@ ...@@ -52,7 +51,7 @@
#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
#define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
...@@ -168,10 +167,10 @@ ...@@ -168,10 +167,10 @@
#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP) #define __PAGE_KERNEL_IO (__PAGE_KERNEL)
#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP) #define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE)
#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP) #define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS)
#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP) #define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC)
#define PAGE_KERNEL __pgprot(__PAGE_KERNEL) #define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
......
...@@ -933,8 +933,17 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) ...@@ -933,8 +933,17 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
* cross-processor TLB flush, even if no stale TLB entries exist * cross-processor TLB flush, even if no stale TLB entries exist
* on other processors. * on other processors.
* *
* Spurious faults may only occur if the TLB contains an entry with
* fewer permission than the page table entry. Non-present (P = 0)
* and reserved bit (R = 1) faults are never spurious.
*
* There are no security implications to leaving a stale TLB when * There are no security implications to leaving a stale TLB when
* increasing the permissions on a page. * increasing the permissions on a page.
*
* Returns non-zero if a spurious fault was handled, zero otherwise.
*
* See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3
* (Optional Invalidation).
*/ */
static noinline int static noinline int
spurious_fault(unsigned long error_code, unsigned long address) spurious_fault(unsigned long error_code, unsigned long address)
...@@ -945,8 +954,17 @@ spurious_fault(unsigned long error_code, unsigned long address) ...@@ -945,8 +954,17 @@ spurious_fault(unsigned long error_code, unsigned long address)
pte_t *pte; pte_t *pte;
int ret; int ret;
/* Reserved-bit violation or user access to kernel space? */ /*
if (error_code & (PF_USER | PF_RSVD)) * Only writes to RO or instruction fetches from NX may cause
* spurious faults.
*
* These could be from user or supervisor accesses but the TLB
* is only lazily flushed after a kernel mapping protection
* change, so user accesses are not expected to cause spurious
* faults.
*/
if (error_code != (PF_WRITE | PF_PROT)
&& error_code != (PF_INSTR | PF_PROT))
return 0; return 0;
pgd = init_mm.pgd + pgd_index(address); pgd = init_mm.pgd + pgd_index(address);
......
...@@ -537,7 +537,7 @@ static void __init pagetable_init(void) ...@@ -537,7 +537,7 @@ static void __init pagetable_init(void)
permanent_kmaps_init(pgd_base); permanent_kmaps_init(pgd_base);
} }
pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
EXPORT_SYMBOL_GPL(__supported_pte_mask); EXPORT_SYMBOL_GPL(__supported_pte_mask);
/* user-defined highmem size */ /* user-defined highmem size */
......
...@@ -151,7 +151,7 @@ early_param("gbpages", parse_direct_gbpages_on); ...@@ -151,7 +151,7 @@ early_param("gbpages", parse_direct_gbpages_on);
* around without checking the pgd every time. * around without checking the pgd every time.
*/ */
pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; pteval_t __supported_pte_mask __read_mostly = ~0;
EXPORT_SYMBOL_GPL(__supported_pte_mask); EXPORT_SYMBOL_GPL(__supported_pte_mask);
int force_personality32; int force_personality32;
......
...@@ -442,8 +442,6 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, ...@@ -442,8 +442,6 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
*/ */
prot |= _PAGE_CACHE_UC_MINUS; prot |= _PAGE_CACHE_UC_MINUS;
prot |= _PAGE_IOMAP; /* creating a mapping for IO */
vma->vm_page_prot = __pgprot(prot); vma->vm_page_prot = __pgprot(prot);
if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
......
...@@ -15,12 +15,14 @@ ...@@ -15,12 +15,14 @@
* with this program. If not, see <http://www.gnu.org/licenses/>. * with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include <linux/bitops.h>
#include <linux/efi.h> #include <linux/efi.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/string.h> #include <linux/string.h>
#include <xen/xen-ops.h> #include <xen/xen-ops.h>
#include <asm/page.h>
#include <asm/setup.h> #include <asm/setup.h>
void __init xen_efi_init(void) void __init xen_efi_init(void)
......
...@@ -1463,6 +1463,7 @@ static void __ref xen_setup_gdt(int cpu) ...@@ -1463,6 +1463,7 @@ static void __ref xen_setup_gdt(int cpu)
pv_cpu_ops.load_gdt = xen_load_gdt; pv_cpu_ops.load_gdt = xen_load_gdt;
} }
#ifdef CONFIG_XEN_PVH
/* /*
* A PV guest starts with default flags that are not set for PVH, set them * A PV guest starts with default flags that are not set for PVH, set them
* here asap. * here asap.
...@@ -1508,17 +1509,21 @@ static void __init xen_pvh_early_guest_init(void) ...@@ -1508,17 +1509,21 @@ static void __init xen_pvh_early_guest_init(void)
return; return;
xen_have_vector_callback = 1; xen_have_vector_callback = 1;
xen_pvh_early_cpu_init(0, false);
xen_pvh_set_cr_flags(0); xen_pvh_set_cr_flags(0);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
BUG(); /* PVH: Implement proper support. */ BUG(); /* PVH: Implement proper support. */
#endif #endif
} }
#endif /* CONFIG_XEN_PVH */
/* First C function to be called on Xen boot */ /* First C function to be called on Xen boot */
asmlinkage __visible void __init xen_start_kernel(void) asmlinkage __visible void __init xen_start_kernel(void)
{ {
struct physdev_set_iopl set_iopl; struct physdev_set_iopl set_iopl;
unsigned long initrd_start = 0;
int rc; int rc;
if (!xen_start_info) if (!xen_start_info)
...@@ -1527,7 +1532,9 @@ asmlinkage __visible void __init xen_start_kernel(void) ...@@ -1527,7 +1532,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_domain_type = XEN_PV_DOMAIN; xen_domain_type = XEN_PV_DOMAIN;
xen_setup_features(); xen_setup_features();
#ifdef CONFIG_XEN_PVH
xen_pvh_early_guest_init(); xen_pvh_early_guest_init();
#endif
xen_setup_machphys_mapping(); xen_setup_machphys_mapping();
/* Install Xen paravirt ops */ /* Install Xen paravirt ops */
...@@ -1559,8 +1566,6 @@ asmlinkage __visible void __init xen_start_kernel(void) ...@@ -1559,8 +1566,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
#endif #endif
__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
__supported_pte_mask |= _PAGE_IOMAP;
/* /*
* Prevent page tables from being allocated in highmem, even * Prevent page tables from being allocated in highmem, even
* if CONFIG_HIGHPTE is enabled. * if CONFIG_HIGHPTE is enabled.
...@@ -1667,10 +1672,16 @@ asmlinkage __visible void __init xen_start_kernel(void) ...@@ -1667,10 +1672,16 @@ asmlinkage __visible void __init xen_start_kernel(void)
new_cpu_data.x86_capability[0] = cpuid_edx(1); new_cpu_data.x86_capability[0] = cpuid_edx(1);
#endif #endif
if (xen_start_info->mod_start) {
if (xen_start_info->flags & SIF_MOD_START_PFN)
initrd_start = PFN_PHYS(xen_start_info->mod_start);
else
initrd_start = __pa(xen_start_info->mod_start);
}
/* Poke various useful things into boot_params */ /* Poke various useful things into boot_params */
boot_params.hdr.type_of_loader = (9 << 4) | 0; boot_params.hdr.type_of_loader = (9 << 4) | 0;
boot_params.hdr.ramdisk_image = xen_start_info->mod_start boot_params.hdr.ramdisk_image = initrd_start;
? __pa(xen_start_info->mod_start) : 0;
boot_params.hdr.ramdisk_size = xen_start_info->mod_len; boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
......
...@@ -399,38 +399,14 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) ...@@ -399,38 +399,14 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
if (unlikely(mfn == INVALID_P2M_ENTRY)) { if (unlikely(mfn == INVALID_P2M_ENTRY)) {
mfn = 0; mfn = 0;
flags = 0; flags = 0;
} else { } else
/* mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
* Paramount to do this test _after_ the
* INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
* IDENTITY_FRAME_BIT resolves to true.
*/
mfn &= ~FOREIGN_FRAME_BIT;
if (mfn & IDENTITY_FRAME_BIT) {
mfn &= ~IDENTITY_FRAME_BIT;
flags |= _PAGE_IOMAP;
}
}
val = ((pteval_t)mfn << PAGE_SHIFT) | flags; val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
} }
return val; return val;
} }
static pteval_t iomap_pte(pteval_t val)
{
if (val & _PAGE_PRESENT) {
unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
pteval_t flags = val & PTE_FLAGS_MASK;
/* We assume the pte frame number is a MFN, so
just use it as-is. */
val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
}
return val;
}
__visible pteval_t xen_pte_val(pte_t pte) __visible pteval_t xen_pte_val(pte_t pte)
{ {
pteval_t pteval = pte.pte; pteval_t pteval = pte.pte;
...@@ -441,9 +417,6 @@ __visible pteval_t xen_pte_val(pte_t pte) ...@@ -441,9 +417,6 @@ __visible pteval_t xen_pte_val(pte_t pte)
pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT; pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
} }
#endif #endif
if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
return pteval;
return pte_mfn_to_pfn(pteval); return pte_mfn_to_pfn(pteval);
} }
PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
...@@ -481,7 +454,6 @@ void xen_set_pat(u64 pat) ...@@ -481,7 +454,6 @@ void xen_set_pat(u64 pat)
__visible pte_t xen_make_pte(pteval_t pte) __visible pte_t xen_make_pte(pteval_t pte)
{ {
phys_addr_t addr = (pte & PTE_PFN_MASK);
#if 0 #if 0
/* If Linux is trying to set a WC pte, then map to the Xen WC. /* If Linux is trying to set a WC pte, then map to the Xen WC.
* If _PAGE_PAT is set, then it probably means it is really * If _PAGE_PAT is set, then it probably means it is really
...@@ -496,19 +468,7 @@ __visible pte_t xen_make_pte(pteval_t pte) ...@@ -496,19 +468,7 @@ __visible pte_t xen_make_pte(pteval_t pte)
pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT; pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
} }
#endif #endif
/*
* Unprivileged domains are allowed to do IOMAPpings for
* PCI passthrough, but not map ISA space. The ISA
* mappings are just dummy local mappings to keep other
* parts of the kernel happy.
*/
if (unlikely(pte & _PAGE_IOMAP) &&
(xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
pte = iomap_pte(pte);
} else {
pte &= ~_PAGE_IOMAP;
pte = pte_pfn_to_mfn(pte); pte = pte_pfn_to_mfn(pte);
}
return native_make_pte(pte); return native_make_pte(pte);
} }
...@@ -2091,7 +2051,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) ...@@ -2091,7 +2051,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
default: default:
/* By default, set_fixmap is used for hardware mappings */ /* By default, set_fixmap is used for hardware mappings */
pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP)); pte = mfn_pte(phys, prot);
break; break;
} }
......
...@@ -173,6 +173,7 @@ ...@@ -173,6 +173,7 @@
#include <xen/balloon.h> #include <xen/balloon.h>
#include <xen/grant_table.h> #include <xen/grant_table.h>
#include "p2m.h"
#include "multicalls.h" #include "multicalls.h"
#include "xen-ops.h" #include "xen-ops.h"
...@@ -180,12 +181,6 @@ static void __init m2p_override_init(void); ...@@ -180,12 +181,6 @@ static void __init m2p_override_init(void);
unsigned long xen_max_p2m_pfn __read_mostly; unsigned long xen_max_p2m_pfn __read_mostly;
#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
/* Placeholders for holes in the address space */ /* Placeholders for holes in the address space */
static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
...@@ -202,16 +197,12 @@ static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE); ...@@ -202,16 +197,12 @@ static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE);
RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
/* We might hit two boundary violations at the start and end, at max each /* For each I/O range remapped we may lose up to two leaf pages for the boundary
* boundary violation will require three middle nodes. */ * violations and three mid pages to cover up to 3GB. With
RESERVE_BRK(p2m_mid_extra, PAGE_SIZE * 2 * 3); * early_can_reuse_p2m_middle() most of the leaf pages will be reused by the
* remapped region.
/* When we populate back during bootup, the amount of pages can vary. The */
* max we have is seen is 395979, but that does not mean it can't be more. RESERVE_BRK(p2m_identity_remap, PAGE_SIZE * 2 * 3 * MAX_REMAP_RANGES);
* Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle
* it can re-use Xen provided mfn_list array, so we only need to allocate at
* most three P2M top nodes. */
RESERVE_BRK(p2m_populated, PAGE_SIZE * 3);
static inline unsigned p2m_top_index(unsigned long pfn) static inline unsigned p2m_top_index(unsigned long pfn)
{ {
......
#ifndef _XEN_P2M_H
#define _XEN_P2M_H
#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
#define MAX_REMAP_RANGES 10
extern unsigned long __init set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e);
#endif /* _XEN_P2M_H */
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <xen/features.h> #include <xen/features.h>
#include "xen-ops.h" #include "xen-ops.h"
#include "vdso.h" #include "vdso.h"
#include "p2m.h"
/* These are code, but not functions. Defined in entry.S */ /* These are code, but not functions. Defined in entry.S */
extern const char xen_hypervisor_callback[]; extern const char xen_hypervisor_callback[];
...@@ -46,6 +47,9 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; ...@@ -46,6 +47,9 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
/* Number of pages released from the initial allocation. */ /* Number of pages released from the initial allocation. */
unsigned long xen_released_pages; unsigned long xen_released_pages;
/* Buffer used to remap identity mapped pages */
unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata;
/* /*
* The maximum amount of extra memory compared to the base size. The * The maximum amount of extra memory compared to the base size. The
* main scaling factor is the size of struct page. At extreme ratios * main scaling factor is the size of struct page. At extreme ratios
...@@ -151,107 +155,325 @@ static unsigned long __init xen_do_chunk(unsigned long start, ...@@ -151,107 +155,325 @@ static unsigned long __init xen_do_chunk(unsigned long start,
return len; return len;
} }
static unsigned long __init xen_release_chunk(unsigned long start, /*
unsigned long end) * Finds the next RAM pfn available in the E820 map after min_pfn.
{ * This function updates min_pfn with the pfn found and returns
return xen_do_chunk(start, end, true); * the size of that range or zero if not found.
} */
static unsigned long __init xen_find_pfn_range(
static unsigned long __init xen_populate_chunk(
const struct e820entry *list, size_t map_size, const struct e820entry *list, size_t map_size,
unsigned long max_pfn, unsigned long *last_pfn, unsigned long *min_pfn)
unsigned long credits_left)
{ {
const struct e820entry *entry; const struct e820entry *entry;
unsigned int i; unsigned int i;
unsigned long done = 0; unsigned long done = 0;
unsigned long dest_pfn;
for (i = 0, entry = list; i < map_size; i++, entry++) { for (i = 0, entry = list; i < map_size; i++, entry++) {
unsigned long s_pfn; unsigned long s_pfn;
unsigned long e_pfn; unsigned long e_pfn;
unsigned long pfns;
long capacity;
if (credits_left <= 0)
break;
if (entry->type != E820_RAM) if (entry->type != E820_RAM)
continue; continue;
e_pfn = PFN_DOWN(entry->addr + entry->size); e_pfn = PFN_DOWN(entry->addr + entry->size);
/* We only care about E820 after the xen_start_info->nr_pages */ /* We only care about E820 after this */
if (e_pfn <= max_pfn) if (e_pfn < *min_pfn)
continue; continue;
s_pfn = PFN_UP(entry->addr); s_pfn = PFN_UP(entry->addr);
/* If the E820 falls within the nr_pages, we want to start
* at the nr_pages PFN. /* If min_pfn falls within the E820 entry, we want to start
* If that would mean going past the E820 entry, skip it * at the min_pfn PFN.
*/ */
if (s_pfn <= max_pfn) { if (s_pfn <= *min_pfn) {
capacity = e_pfn - max_pfn; done = e_pfn - *min_pfn;
dest_pfn = max_pfn;
} else { } else {
capacity = e_pfn - s_pfn; done = e_pfn - s_pfn;
dest_pfn = s_pfn; *min_pfn = s_pfn;
} }
if (credits_left < capacity)
capacity = credits_left;
pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false);
done += pfns;
*last_pfn = (dest_pfn + pfns);
if (pfns < capacity)
break; break;
credits_left -= pfns;
} }
return done; return done;
} }
static void __init xen_set_identity_and_release_chunk( /*
unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, * This releases a chunk of memory and then does the identity map. It's used as
unsigned long *released, unsigned long *identity) * as a fallback if the remapping fails.
*/
static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity,
unsigned long *released)
{ {
unsigned long pfn; WARN_ON(start_pfn > end_pfn);
/* Need to release pages first */
*released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true);
*identity += set_phys_range_identity(start_pfn, end_pfn);
}
/*
* Helper function to update both the p2m and m2p tables.
*/
static unsigned long __init xen_update_mem_tables(unsigned long pfn,
unsigned long mfn)
{
struct mmu_update update = {
.ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
.val = pfn
};
/* Update p2m */
if (!early_set_phys_to_machine(pfn, mfn)) {
WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
pfn, mfn);
return false;
}
/* Update m2p */
if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
mfn, pfn);
return false;
}
return true;
}
/*
* This function updates the p2m and m2p tables with an identity map from
* start_pfn to start_pfn+size and remaps the underlying RAM of the original
* allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks
* to not exhaust the reserved brk space. Doing it in properly aligned blocks
* ensures we only allocate the minimum required leaf pages in the p2m table. It
* copies the existing mfns from the p2m table under the 1:1 map, overwrites
* them with the identity map and then updates the p2m and m2p tables with the
* remapped memory.
*/
static unsigned long __init xen_do_set_identity_and_remap_chunk(
unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
{
unsigned long ident_pfn_iter, remap_pfn_iter;
unsigned long ident_start_pfn_align, remap_start_pfn_align;
unsigned long ident_end_pfn_align, remap_end_pfn_align;
unsigned long ident_boundary_pfn, remap_boundary_pfn;
unsigned long ident_cnt = 0;
unsigned long remap_cnt = 0;
unsigned long left = size;
unsigned long mod;
int i;
WARN_ON(size == 0);
BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
/* /*
* If the PFNs are currently mapped, clear the mappings * Determine the proper alignment to remap memory in P2M_PER_PAGE sized
* (except for the ISA region which must be 1:1 mapped) to * blocks. We need to keep track of both the existing pfn mapping and
* release the refcounts (in Xen) on the original frames. * the new pfn remapping.
*/ */
for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { mod = start_pfn % P2M_PER_PAGE;
pte_t pte = __pte_ma(0); ident_start_pfn_align =
mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn;
mod = remap_pfn % P2M_PER_PAGE;
remap_start_pfn_align =
mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn;
mod = (start_pfn + size) % P2M_PER_PAGE;
ident_end_pfn_align = start_pfn + size - mod;
mod = (remap_pfn + size) % P2M_PER_PAGE;
remap_end_pfn_align = remap_pfn + size - mod;
/* Iterate over each p2m leaf node in each range */
for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align;
ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align;
ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) {
/* Check we aren't past the end */
BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size);
BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size);
/* Save p2m mappings */
for (i = 0; i < P2M_PER_PAGE; i++)
xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i);
/* Set identity map which will free a p2m leaf */
ident_cnt += set_phys_range_identity(ident_pfn_iter,
ident_pfn_iter + P2M_PER_PAGE);
#ifdef DEBUG
/* Helps verify a p2m leaf has been freed */
for (i = 0; i < P2M_PER_PAGE; i++) {
unsigned int pfn = ident_pfn_iter + i;
BUG_ON(pfn_to_mfn(pfn) != pfn);
}
#endif
/* Now remap memory */
for (i = 0; i < P2M_PER_PAGE; i++) {
unsigned long mfn = xen_remap_buf[i];
/* This will use the p2m leaf freed above */
if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) {
WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
remap_pfn_iter + i, mfn);
return 0;
}
if (pfn < PFN_UP(ISA_END_ADDRESS)) remap_cnt++;
pte = mfn_pte(pfn, PAGE_KERNEL_IO); }
(void)HYPERVISOR_update_va_mapping( left -= P2M_PER_PAGE;
(unsigned long)__va(pfn << PAGE_SHIFT), pte, 0);
} }
if (start_pfn < nr_pages) /* Max boundary space possible */
*released += xen_release_chunk( BUG_ON(left > (P2M_PER_PAGE - 1) * 2);
start_pfn, min(end_pfn, nr_pages));
*identity += set_phys_range_identity(start_pfn, end_pfn); /* Now handle the boundary conditions */
ident_boundary_pfn = start_pfn;
remap_boundary_pfn = remap_pfn;
for (i = 0; i < left; i++) {
unsigned long mfn;
/* These two checks move from the start to end boundaries */
if (ident_boundary_pfn == ident_start_pfn_align)
ident_boundary_pfn = ident_pfn_iter;
if (remap_boundary_pfn == remap_start_pfn_align)
remap_boundary_pfn = remap_pfn_iter;
/* Check we aren't past the end */
BUG_ON(ident_boundary_pfn >= start_pfn + size);
BUG_ON(remap_boundary_pfn >= remap_pfn + size);
mfn = pfn_to_mfn(ident_boundary_pfn);
if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) {
WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
remap_pfn_iter + i, mfn);
return 0;
}
remap_cnt++;
ident_boundary_pfn++;
remap_boundary_pfn++;
}
/* Finish up the identity map */
if (ident_start_pfn_align >= ident_end_pfn_align) {
/*
* In this case we have an identity range which does not span an
* aligned block so everything needs to be identity mapped here.
* If we didn't check this we might remap too many pages since
* the align boundaries are not meaningful in this case.
*/
ident_cnt += set_phys_range_identity(start_pfn,
start_pfn + size);
} else {
/* Remapped above so check each end of the chunk */
if (start_pfn < ident_start_pfn_align)
ident_cnt += set_phys_range_identity(start_pfn,
ident_start_pfn_align);
if (start_pfn + size > ident_pfn_iter)
ident_cnt += set_phys_range_identity(ident_pfn_iter,
start_pfn + size);
}
BUG_ON(ident_cnt != size);
BUG_ON(remap_cnt != size);
return size;
} }
static unsigned long __init xen_set_identity_and_release( /*
const struct e820entry *list, size_t map_size, unsigned long nr_pages) * This function takes a contiguous pfn range that needs to be identity mapped
* and:
*
* 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn.
* 2) Calls the do_ function to actually do the mapping/remapping work.
*
* The goal is to not allocate additional memory but to remap the existing
* pages. In the case of an error the underlying memory is simply released back
* to Xen and not remapped.
*/
static unsigned long __init xen_set_identity_and_remap_chunk(
const struct e820entry *list, size_t map_size, unsigned long start_pfn,
unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn,
unsigned long *identity, unsigned long *remapped,
unsigned long *released)
{
unsigned long pfn;
unsigned long i = 0;
unsigned long n = end_pfn - start_pfn;
while (i < n) {
unsigned long cur_pfn = start_pfn + i;
unsigned long left = n - i;
unsigned long size = left;
unsigned long remap_range_size;
/* Do not remap pages beyond the current allocation */
if (cur_pfn >= nr_pages) {
/* Identity map remaining pages */
*identity += set_phys_range_identity(cur_pfn,
cur_pfn + size);
break;
}
if (cur_pfn + size > nr_pages)
size = nr_pages - cur_pfn;
remap_range_size = xen_find_pfn_range(list, map_size,
&remap_pfn);
if (!remap_range_size) {
pr_warning("Unable to find available pfn range, not remapping identity pages\n");
xen_set_identity_and_release_chunk(cur_pfn,
cur_pfn + left, nr_pages, identity, released);
break;
}
/* Adjust size to fit in current e820 RAM region */
if (size > remap_range_size)
size = remap_range_size;
if (!xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn)) {
WARN(1, "Failed to remap 1:1 memory cur_pfn=%ld size=%ld remap_pfn=%ld\n",
cur_pfn, size, remap_pfn);
xen_set_identity_and_release_chunk(cur_pfn,
cur_pfn + left, nr_pages, identity, released);
break;
}
/* Update variables to reflect new mappings. */
i += size;
remap_pfn += size;
*identity += size;
*remapped += size;
}
/*
* If the PFNs are currently mapped, the VA mapping also needs
* to be updated to be 1:1.
*/
for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
(void)HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
mfn_pte(pfn, PAGE_KERNEL_IO), 0);
return remap_pfn;
}
static unsigned long __init xen_set_identity_and_remap(
const struct e820entry *list, size_t map_size, unsigned long nr_pages,
unsigned long *released)
{ {
phys_addr_t start = 0; phys_addr_t start = 0;
unsigned long released = 0;
unsigned long identity = 0; unsigned long identity = 0;
unsigned long remapped = 0;
unsigned long last_pfn = nr_pages;
const struct e820entry *entry; const struct e820entry *entry;
unsigned long num_released = 0;
int i; int i;
/* /*
* Combine non-RAM regions and gaps until a RAM region (or the * Combine non-RAM regions and gaps until a RAM region (or the
* end of the map) is reached, then set the 1:1 map and * end of the map) is reached, then set the 1:1 map and
* release the pages (if available) in those non-RAM regions. * remap the memory in those non-RAM regions.
* *
* The combined non-RAM regions are rounded to a whole number * The combined non-RAM regions are rounded to a whole number
* of pages so any partial pages are accessible via the 1:1 * of pages so any partial pages are accessible via the 1:1
...@@ -269,22 +491,24 @@ static unsigned long __init xen_set_identity_and_release( ...@@ -269,22 +491,24 @@ static unsigned long __init xen_set_identity_and_release(
end_pfn = PFN_UP(entry->addr); end_pfn = PFN_UP(entry->addr);
if (start_pfn < end_pfn) if (start_pfn < end_pfn)
xen_set_identity_and_release_chunk( last_pfn = xen_set_identity_and_remap_chunk(
start_pfn, end_pfn, nr_pages, list, map_size, start_pfn,
&released, &identity); end_pfn, nr_pages, last_pfn,
&identity, &remapped,
&num_released);
start = end; start = end;
} }
} }
if (released) *released = num_released;
printk(KERN_INFO "Released %lu pages of unused memory\n", released);
if (identity)
printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
return released; pr_info("Set %ld page(s) to 1-1 mapping\n", identity);
} pr_info("Remapped %ld page(s), last_pfn=%ld\n", remapped,
last_pfn);
pr_info("Released %ld page(s)\n", num_released);
return last_pfn;
}
static unsigned long __init xen_get_max_pages(void) static unsigned long __init xen_get_max_pages(void)
{ {
unsigned long max_pages = MAX_DOMAIN_PAGES; unsigned long max_pages = MAX_DOMAIN_PAGES;
...@@ -347,7 +571,6 @@ char * __init xen_memory_setup(void) ...@@ -347,7 +571,6 @@ char * __init xen_memory_setup(void)
unsigned long max_pages; unsigned long max_pages;
unsigned long last_pfn = 0; unsigned long last_pfn = 0;
unsigned long extra_pages = 0; unsigned long extra_pages = 0;
unsigned long populated;
int i; int i;
int op; int op;
...@@ -392,20 +615,11 @@ char * __init xen_memory_setup(void) ...@@ -392,20 +615,11 @@ char * __init xen_memory_setup(void)
extra_pages += max_pages - max_pfn; extra_pages += max_pages - max_pfn;
/* /*
* Set P2M for all non-RAM pages and E820 gaps to be identity * Set identity map on non-RAM pages and remap the underlying RAM.
* type PFNs. Any RAM pages that would be made inaccesible by
* this are first released.
*/ */
xen_released_pages = xen_set_identity_and_release( last_pfn = xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
map, memmap.nr_entries, max_pfn); &xen_released_pages);
/*
* Populate back the non-RAM pages and E820 gaps that had been
* released. */
populated = xen_populate_chunk(map, memmap.nr_entries,
max_pfn, &last_pfn, xen_released_pages);
xen_released_pages -= populated;
extra_pages += xen_released_pages; extra_pages += xen_released_pages;
if (last_pfn > max_pfn) { if (last_pfn > max_pfn) {
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include <xen/hvc-console.h> #include <xen/hvc-console.h>
#include "xen-ops.h" #include "xen-ops.h"
#include "mmu.h" #include "mmu.h"
#include "smp.h"
cpumask_var_t xen_cpu_initialized_map; cpumask_var_t xen_cpu_initialized_map;
...@@ -99,10 +100,14 @@ static void cpu_bringup(void) ...@@ -99,10 +100,14 @@ static void cpu_bringup(void)
wmb(); /* make sure everything is out */ wmb(); /* make sure everything is out */
} }
/* Note: cpu parameter is only relevant for PVH */ /*
static void cpu_bringup_and_idle(int cpu) * Note: cpu parameter is only relevant for PVH. The reason for passing it
* is we can't do smp_processor_id until the percpu segments are loaded, for
* which we need the cpu number! So we pass it in rdi as first parameter.
*/
asmlinkage __visible void cpu_bringup_and_idle(int cpu)
{ {
#ifdef CONFIG_X86_64 #ifdef CONFIG_XEN_PVH
if (xen_feature(XENFEAT_auto_translated_physmap) && if (xen_feature(XENFEAT_auto_translated_physmap) &&
xen_feature(XENFEAT_supervisor_mode_kernel)) xen_feature(XENFEAT_supervisor_mode_kernel))
xen_pvh_secondary_vcpu_init(cpu); xen_pvh_secondary_vcpu_init(cpu);
...@@ -374,11 +379,10 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ...@@ -374,11 +379,10 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->user_regs.fs = __KERNEL_PERCPU; ctxt->user_regs.fs = __KERNEL_PERCPU;
ctxt->user_regs.gs = __KERNEL_STACK_CANARY; ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
#endif #endif
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
if (!xen_feature(XENFEAT_auto_translated_physmap)) { if (!xen_feature(XENFEAT_auto_translated_physmap)) {
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
ctxt->flags = VGCF_IN_KERNEL; ctxt->flags = VGCF_IN_KERNEL;
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
ctxt->user_regs.ds = __USER_DS; ctxt->user_regs.ds = __USER_DS;
...@@ -413,15 +417,18 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ...@@ -413,15 +417,18 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
(unsigned long)xen_failsafe_callback; (unsigned long)xen_failsafe_callback;
ctxt->user_regs.cs = __KERNEL_CS; ctxt->user_regs.cs = __KERNEL_CS;
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
#ifdef CONFIG_X86_32
} }
#else #ifdef CONFIG_XEN_PVH
} else else {
/* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with /*
* %rdi having the cpu number - which means are passing in * The vcpu comes on kernel page tables which have the NX pte
* as the first parameter the cpu. Subtle! * bit set. This means before DS/SS is touched, NX in
* EFER must be set. Hence the following assembly glue code.
*/ */
ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init;
ctxt->user_regs.rdi = cpu; ctxt->user_regs.rdi = cpu;
ctxt->user_regs.rsi = true; /* entry == true */
}
#endif #endif
ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
......
...@@ -8,4 +8,12 @@ extern void xen_send_IPI_allbutself(int vector); ...@@ -8,4 +8,12 @@ extern void xen_send_IPI_allbutself(int vector);
extern void xen_send_IPI_all(int vector); extern void xen_send_IPI_all(int vector);
extern void xen_send_IPI_self(int vector); extern void xen_send_IPI_self(int vector);
#ifdef CONFIG_XEN_PVH
extern void xen_pvh_early_cpu_init(int cpu, bool entry);
#else
static inline void xen_pvh_early_cpu_init(int cpu, bool entry)
{
}
#endif
#endif #endif
...@@ -47,6 +47,41 @@ ENTRY(startup_xen) ...@@ -47,6 +47,41 @@ ENTRY(startup_xen)
__FINIT __FINIT
#ifdef CONFIG_XEN_PVH
/*
* xen_pvh_early_cpu_init() - early PVH VCPU initialization
* @cpu: this cpu number (%rdi)
* @entry: true if this is a secondary vcpu coming up on this entry
* point, false if this is the boot CPU being initialized for
* the first time (%rsi)
*
* Note: This is called as a function on the boot CPU, and is the entry point
* on the secondary CPU.
*/
ENTRY(xen_pvh_early_cpu_init)
mov %rsi, %r11
/* Gather features to see if NX implemented. */
mov $0x80000001, %eax
cpuid
mov %edx, %esi
mov $MSR_EFER, %ecx
rdmsr
bts $_EFER_SCE, %eax
bt $20, %esi
jnc 1f /* No NX, skip setting it */
bts $_EFER_NX, %eax
1: wrmsr
#ifdef CONFIG_SMP
cmp $0, %r11b
jne cpu_bringup_and_idle
#endif
ret
#endif /* CONFIG_XEN_PVH */
.pushsection .text .pushsection .text
.balign PAGE_SIZE .balign PAGE_SIZE
ENTRY(hypercall_page) ENTRY(hypercall_page)
...@@ -124,6 +159,7 @@ NEXT_HYPERCALL(arch_6) ...@@ -124,6 +159,7 @@ NEXT_HYPERCALL(arch_6)
ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
.quad _PAGE_PRESENT; .quad _PAGE_PRESENT) .quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
ELFNOTE(Xen, XEN_ELFNOTE_MOD_START_PFN, .long 1)
ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START) ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START)
ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0) ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0)
......
...@@ -907,22 +907,17 @@ static int connect_ring(struct backend_info *be) ...@@ -907,22 +907,17 @@ static int connect_ring(struct backend_info *be)
return 0; return 0;
} }
/* ** Driver Registration ** */
static const struct xenbus_device_id xen_blkbk_ids[] = { static const struct xenbus_device_id xen_blkbk_ids[] = {
{ "vbd" }, { "vbd" },
{ "" } { "" }
}; };
static struct xenbus_driver xen_blkbk_driver = {
static DEFINE_XENBUS_DRIVER(xen_blkbk, , .ids = xen_blkbk_ids,
.probe = xen_blkbk_probe, .probe = xen_blkbk_probe,
.remove = xen_blkbk_remove, .remove = xen_blkbk_remove,
.otherend_changed = frontend_changed .otherend_changed = frontend_changed
); };
int xen_blkif_xenbus_init(void) int xen_blkif_xenbus_init(void)
{ {
......
...@@ -2055,13 +2055,14 @@ static const struct xenbus_device_id blkfront_ids[] = { ...@@ -2055,13 +2055,14 @@ static const struct xenbus_device_id blkfront_ids[] = {
{ "" } { "" }
}; };
static DEFINE_XENBUS_DRIVER(blkfront, , static struct xenbus_driver blkfront_driver = {
.ids = blkfront_ids,
.probe = blkfront_probe, .probe = blkfront_probe,
.remove = blkfront_remove, .remove = blkfront_remove,
.resume = blkfront_resume, .resume = blkfront_resume,
.otherend_changed = blkback_changed, .otherend_changed = blkback_changed,
.is_ready = blkfront_is_ready, .is_ready = blkfront_is_ready,
); };
static int __init xlblk_init(void) static int __init xlblk_init(void)
{ {
......
...@@ -367,12 +367,13 @@ static const struct xenbus_device_id tpmfront_ids[] = { ...@@ -367,12 +367,13 @@ static const struct xenbus_device_id tpmfront_ids[] = {
}; };
MODULE_ALIAS("xen:vtpm"); MODULE_ALIAS("xen:vtpm");
static DEFINE_XENBUS_DRIVER(tpmfront, , static struct xenbus_driver tpmfront_driver = {
.ids = tpmfront_ids,
.probe = tpmfront_probe, .probe = tpmfront_probe,
.remove = tpmfront_remove, .remove = tpmfront_remove,
.resume = tpmfront_resume, .resume = tpmfront_resume,
.otherend_changed = backend_changed, .otherend_changed = backend_changed,
); };
static int __init xen_tpmfront_init(void) static int __init xen_tpmfront_init(void)
{ {
......
...@@ -365,12 +365,13 @@ static const struct xenbus_device_id xenkbd_ids[] = { ...@@ -365,12 +365,13 @@ static const struct xenbus_device_id xenkbd_ids[] = {
{ "" } { "" }
}; };
static DEFINE_XENBUS_DRIVER(xenkbd, , static struct xenbus_driver xenkbd_driver = {
.ids = xenkbd_ids,
.probe = xenkbd_probe, .probe = xenkbd_probe,
.remove = xenkbd_remove, .remove = xenkbd_remove,
.resume = xenkbd_resume, .resume = xenkbd_resume,
.otherend_changed = xenkbd_backend_changed, .otherend_changed = xenkbd_backend_changed,
); };
static int __init xenkbd_init(void) static int __init xenkbd_init(void)
{ {
......
...@@ -937,22 +937,18 @@ static int read_xenbus_vif_flags(struct backend_info *be) ...@@ -937,22 +937,18 @@ static int read_xenbus_vif_flags(struct backend_info *be)
return 0; return 0;
} }
/* ** Driver Registration ** */
static const struct xenbus_device_id netback_ids[] = { static const struct xenbus_device_id netback_ids[] = {
{ "vif" }, { "vif" },
{ "" } { "" }
}; };
static struct xenbus_driver netback_driver = {
static DEFINE_XENBUS_DRIVER(netback, , .ids = netback_ids,
.probe = netback_probe, .probe = netback_probe,
.remove = netback_remove, .remove = netback_remove,
.uevent = netback_uevent, .uevent = netback_uevent,
.otherend_changed = frontend_changed, .otherend_changed = frontend_changed,
); };
int xenvif_xenbus_init(void) int xenvif_xenbus_init(void)
{ {
......
...@@ -2300,12 +2300,6 @@ static void xennet_sysfs_delif(struct net_device *netdev) ...@@ -2300,12 +2300,6 @@ static void xennet_sysfs_delif(struct net_device *netdev)
#endif /* CONFIG_SYSFS */ #endif /* CONFIG_SYSFS */
static const struct xenbus_device_id netfront_ids[] = {
{ "vif" },
{ "" }
};
static int xennet_remove(struct xenbus_device *dev) static int xennet_remove(struct xenbus_device *dev)
{ {
struct netfront_info *info = dev_get_drvdata(&dev->dev); struct netfront_info *info = dev_get_drvdata(&dev->dev);
...@@ -2338,12 +2332,18 @@ static int xennet_remove(struct xenbus_device *dev) ...@@ -2338,12 +2332,18 @@ static int xennet_remove(struct xenbus_device *dev)
return 0; return 0;
} }
static DEFINE_XENBUS_DRIVER(netfront, , static const struct xenbus_device_id netfront_ids[] = {
{ "vif" },
{ "" }
};
static struct xenbus_driver netfront_driver = {
.ids = netfront_ids,
.probe = netfront_probe, .probe = netfront_probe,
.remove = xennet_remove, .remove = xennet_remove,
.resume = netfront_resume, .resume = netfront_resume,
.otherend_changed = netback_changed, .otherend_changed = netback_changed,
); };
static int __init netif_init(void) static int __init netif_init(void)
{ {
......
...@@ -1136,11 +1136,13 @@ static const struct xenbus_device_id xenpci_ids[] = { ...@@ -1136,11 +1136,13 @@ static const struct xenbus_device_id xenpci_ids[] = {
{""}, {""},
}; };
static DEFINE_XENBUS_DRIVER(xenpci, "pcifront", static struct xenbus_driver xenpci_driver = {
.name = "pcifront",
.ids = xenpci_ids,
.probe = pcifront_xenbus_probe, .probe = pcifront_xenbus_probe,
.remove = pcifront_xenbus_remove, .remove = pcifront_xenbus_remove,
.otherend_changed = pcifront_backend_changed, .otherend_changed = pcifront_backend_changed,
); };
static int __init pcifront_init(void) static int __init pcifront_init(void)
{ {
......
...@@ -587,6 +587,16 @@ config VMWARE_PVSCSI ...@@ -587,6 +587,16 @@ config VMWARE_PVSCSI
To compile this driver as a module, choose M here: the To compile this driver as a module, choose M here: the
module will be called vmw_pvscsi. module will be called vmw_pvscsi.
config XEN_SCSI_FRONTEND
tristate "XEN SCSI frontend driver"
depends on SCSI && XEN
select XEN_XENBUS_FRONTEND
help
The XEN SCSI frontend driver allows the kernel to access SCSI Devices
within another guest OS (usually Dom0).
Only needed if the kernel is running in a XEN guest and generic
SCSI access to a device is needed.
config HYPERV_STORAGE config HYPERV_STORAGE
tristate "Microsoft Hyper-V virtual storage driver" tristate "Microsoft Hyper-V virtual storage driver"
depends on SCSI && HYPERV depends on SCSI && HYPERV
......
...@@ -141,6 +141,7 @@ obj-$(CONFIG_SCSI_ESAS2R) += esas2r/ ...@@ -141,6 +141,7 @@ obj-$(CONFIG_SCSI_ESAS2R) += esas2r/
obj-$(CONFIG_SCSI_PMCRAID) += pmcraid.o obj-$(CONFIG_SCSI_PMCRAID) += pmcraid.o
obj-$(CONFIG_SCSI_VIRTIO) += virtio_scsi.o obj-$(CONFIG_SCSI_VIRTIO) += virtio_scsi.o
obj-$(CONFIG_VMWARE_PVSCSI) += vmw_pvscsi.o obj-$(CONFIG_VMWARE_PVSCSI) += vmw_pvscsi.o
obj-$(CONFIG_XEN_SCSI_FRONTEND) += xen-scsifront.o
obj-$(CONFIG_HYPERV_STORAGE) += hv_storvsc.o obj-$(CONFIG_HYPERV_STORAGE) += hv_storvsc.o
obj-$(CONFIG_ARM) += arm/ obj-$(CONFIG_ARM) += arm/
......
/*
* Xen SCSI frontend driver
*
* Copyright (c) 2008, FUJITSU Limited
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/device.h>
#include <linux/wait.h>
#include <linux/interrupt.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
#include <linux/blkdev.h>
#include <linux/pfn.h>
#include <linux/slab.h>
#include <linux/bitops.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi.h>
#include <scsi/scsi_host.h>
#include <xen/xen.h>
#include <xen/xenbus.h>
#include <xen/grant_table.h>
#include <xen/events.h>
#include <xen/page.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/io/vscsiif.h>
#include <xen/interface/io/protocols.h>
#include <asm/xen/hypervisor.h>
#define GRANT_INVALID_REF 0
#define VSCSIFRONT_OP_ADD_LUN 1
#define VSCSIFRONT_OP_DEL_LUN 2
/* Tuning point. */
#define VSCSIIF_DEFAULT_CMD_PER_LUN 10
#define VSCSIIF_MAX_TARGET 64
#define VSCSIIF_MAX_LUN 255
#define VSCSIIF_RING_SIZE __CONST_RING_SIZE(vscsiif, PAGE_SIZE)
#define VSCSIIF_MAX_REQS VSCSIIF_RING_SIZE
#define vscsiif_grants_sg(_sg) (PFN_UP((_sg) * \
sizeof(struct scsiif_request_segment)))
struct vscsifrnt_shadow {
/* command between backend and frontend */
unsigned char act;
uint16_t rqid;
unsigned int nr_grants; /* number of grants in gref[] */
struct scsiif_request_segment *sg; /* scatter/gather elements */
/* Do reset or abort function. */
wait_queue_head_t wq_reset; /* reset work queue */
int wait_reset; /* reset work queue condition */
int32_t rslt_reset; /* reset response status: */
/* SUCCESS or FAILED or: */
#define RSLT_RESET_WAITING 0
#define RSLT_RESET_ERR -1
/* Requested struct scsi_cmnd is stored from kernel. */
struct scsi_cmnd *sc;
int gref[vscsiif_grants_sg(SG_ALL) + SG_ALL];
};
struct vscsifrnt_info {
struct xenbus_device *dev;
struct Scsi_Host *host;
int host_active;
unsigned int evtchn;
unsigned int irq;
grant_ref_t ring_ref;
struct vscsiif_front_ring ring;
struct vscsiif_response ring_rsp;
spinlock_t shadow_lock;
DECLARE_BITMAP(shadow_free_bitmap, VSCSIIF_MAX_REQS);
struct vscsifrnt_shadow *shadow[VSCSIIF_MAX_REQS];
wait_queue_head_t wq_sync;
unsigned int wait_ring_available:1;
char dev_state_path[64];
struct task_struct *curr;
};
static DEFINE_MUTEX(scsifront_mutex);
static void scsifront_wake_up(struct vscsifrnt_info *info)
{
info->wait_ring_available = 0;
wake_up(&info->wq_sync);
}
static int scsifront_get_rqid(struct vscsifrnt_info *info)
{
unsigned long flags;
int free;
spin_lock_irqsave(&info->shadow_lock, flags);
free = find_first_bit(info->shadow_free_bitmap, VSCSIIF_MAX_REQS);
__clear_bit(free, info->shadow_free_bitmap);
spin_unlock_irqrestore(&info->shadow_lock, flags);
return free;
}
static int _scsifront_put_rqid(struct vscsifrnt_info *info, uint32_t id)
{
int empty = bitmap_empty(info->shadow_free_bitmap, VSCSIIF_MAX_REQS);
__set_bit(id, info->shadow_free_bitmap);
info->shadow[id] = NULL;
return empty || info->wait_ring_available;
}
static void scsifront_put_rqid(struct vscsifrnt_info *info, uint32_t id)
{
unsigned long flags;
int kick;
spin_lock_irqsave(&info->shadow_lock, flags);
kick = _scsifront_put_rqid(info, id);
spin_unlock_irqrestore(&info->shadow_lock, flags);
if (kick)
scsifront_wake_up(info);
}
static struct vscsiif_request *scsifront_pre_req(struct vscsifrnt_info *info)
{
struct vscsiif_front_ring *ring = &(info->ring);
struct vscsiif_request *ring_req;
uint32_t id;
id = scsifront_get_rqid(info); /* use id in response */
if (id >= VSCSIIF_MAX_REQS)
return NULL;
ring_req = RING_GET_REQUEST(&(info->ring), ring->req_prod_pvt);
ring->req_prod_pvt++;
ring_req->rqid = (uint16_t)id;
return ring_req;
}
static void scsifront_do_request(struct vscsifrnt_info *info)
{
struct vscsiif_front_ring *ring = &(info->ring);
int notify;
RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(ring, notify);
if (notify)
notify_remote_via_irq(info->irq);
}
static void scsifront_gnttab_done(struct vscsifrnt_info *info, uint32_t id)
{
struct vscsifrnt_shadow *s = info->shadow[id];
int i;
if (s->sc->sc_data_direction == DMA_NONE)
return;
for (i = 0; i < s->nr_grants; i++) {
if (unlikely(gnttab_query_foreign_access(s->gref[i]) != 0)) {
shost_printk(KERN_ALERT, info->host, KBUILD_MODNAME
"grant still in use by backend\n");
BUG();
}
gnttab_end_foreign_access(s->gref[i], 0, 0UL);
}
kfree(s->sg);
}
static void scsifront_cdb_cmd_done(struct vscsifrnt_info *info,
struct vscsiif_response *ring_rsp)
{
struct scsi_cmnd *sc;
uint32_t id;
uint8_t sense_len;
id = ring_rsp->rqid;
sc = info->shadow[id]->sc;
BUG_ON(sc == NULL);
scsifront_gnttab_done(info, id);
scsifront_put_rqid(info, id);
sc->result = ring_rsp->rslt;
scsi_set_resid(sc, ring_rsp->residual_len);
sense_len = min_t(uint8_t, VSCSIIF_SENSE_BUFFERSIZE,
ring_rsp->sense_len);
if (sense_len)
memcpy(sc->sense_buffer, ring_rsp->sense_buffer, sense_len);
sc->scsi_done(sc);
}
static void scsifront_sync_cmd_done(struct vscsifrnt_info *info,
struct vscsiif_response *ring_rsp)
{
uint16_t id = ring_rsp->rqid;
unsigned long flags;
struct vscsifrnt_shadow *shadow = info->shadow[id];
int kick;
spin_lock_irqsave(&info->shadow_lock, flags);
shadow->wait_reset = 1;
switch (shadow->rslt_reset) {
case RSLT_RESET_WAITING:
shadow->rslt_reset = ring_rsp->rslt;
break;
case RSLT_RESET_ERR:
kick = _scsifront_put_rqid(info, id);
spin_unlock_irqrestore(&info->shadow_lock, flags);
kfree(shadow);
if (kick)
scsifront_wake_up(info);
return;
default:
shost_printk(KERN_ERR, info->host, KBUILD_MODNAME
"bad reset state %d, possibly leaking %u\n",
shadow->rslt_reset, id);
break;
}
spin_unlock_irqrestore(&info->shadow_lock, flags);
wake_up(&shadow->wq_reset);
}
static int scsifront_cmd_done(struct vscsifrnt_info *info)
{
struct vscsiif_response *ring_rsp;
RING_IDX i, rp;
int more_to_do = 0;
unsigned long flags;
spin_lock_irqsave(info->host->host_lock, flags);
rp = info->ring.sring->rsp_prod;
rmb(); /* ordering required respective to dom0 */
for (i = info->ring.rsp_cons; i != rp; i++) {
ring_rsp = RING_GET_RESPONSE(&info->ring, i);
if (WARN(ring_rsp->rqid >= VSCSIIF_MAX_REQS ||
test_bit(ring_rsp->rqid, info->shadow_free_bitmap),
"illegal rqid %u returned by backend!\n",
ring_rsp->rqid))
continue;
if (info->shadow[ring_rsp->rqid]->act == VSCSIIF_ACT_SCSI_CDB)
scsifront_cdb_cmd_done(info, ring_rsp);
else
scsifront_sync_cmd_done(info, ring_rsp);
}
info->ring.rsp_cons = i;
if (i != info->ring.req_prod_pvt)
RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
else
info->ring.sring->rsp_event = i + 1;
info->wait_ring_available = 0;
spin_unlock_irqrestore(info->host->host_lock, flags);
wake_up(&info->wq_sync);
return more_to_do;
}
static irqreturn_t scsifront_irq_fn(int irq, void *dev_id)
{
struct vscsifrnt_info *info = dev_id;
while (scsifront_cmd_done(info))
/* Yield point for this unbounded loop. */
cond_resched();
return IRQ_HANDLED;
}
static int map_data_for_request(struct vscsifrnt_info *info,
struct scsi_cmnd *sc,
struct vscsiif_request *ring_req,
struct vscsifrnt_shadow *shadow)
{
grant_ref_t gref_head;
struct page *page;
int err, ref, ref_cnt = 0;
int grant_ro = (sc->sc_data_direction == DMA_TO_DEVICE);
unsigned int i, off, len, bytes;
unsigned int data_len = scsi_bufflen(sc);
unsigned int data_grants = 0, seg_grants = 0;
struct scatterlist *sg;
unsigned long mfn;
struct scsiif_request_segment *seg;
ring_req->nr_segments = 0;
if (sc->sc_data_direction == DMA_NONE || !data_len)
return 0;
scsi_for_each_sg(sc, sg, scsi_sg_count(sc), i)
data_grants += PFN_UP(sg->offset + sg->length);
if (data_grants > VSCSIIF_SG_TABLESIZE) {
if (data_grants > info->host->sg_tablesize) {
shost_printk(KERN_ERR, info->host, KBUILD_MODNAME
"Unable to map request_buffer for command!\n");
return -E2BIG;
}
seg_grants = vscsiif_grants_sg(data_grants);
shadow->sg = kcalloc(data_grants,
sizeof(struct scsiif_request_segment), GFP_ATOMIC);
if (!shadow->sg)
return -ENOMEM;
}
seg = shadow->sg ? : ring_req->seg;
err = gnttab_alloc_grant_references(seg_grants + data_grants,
&gref_head);
if (err) {
kfree(shadow->sg);
shost_printk(KERN_ERR, info->host, KBUILD_MODNAME
"gnttab_alloc_grant_references() error\n");
return -ENOMEM;
}
if (seg_grants) {
page = virt_to_page(seg);
off = (unsigned long)seg & ~PAGE_MASK;
len = sizeof(struct scsiif_request_segment) * data_grants;
while (len > 0) {
bytes = min_t(unsigned int, len, PAGE_SIZE - off);
ref = gnttab_claim_grant_reference(&gref_head);
BUG_ON(ref == -ENOSPC);
mfn = pfn_to_mfn(page_to_pfn(page));
gnttab_grant_foreign_access_ref(ref,
info->dev->otherend_id, mfn, 1);
shadow->gref[ref_cnt] = ref;
ring_req->seg[ref_cnt].gref = ref;
ring_req->seg[ref_cnt].offset = (uint16_t)off;
ring_req->seg[ref_cnt].length = (uint16_t)bytes;
page++;
len -= bytes;
off = 0;
ref_cnt++;
}
BUG_ON(seg_grants < ref_cnt);
seg_grants = ref_cnt;
}
scsi_for_each_sg(sc, sg, scsi_sg_count(sc), i) {
page = sg_page(sg);
off = sg->offset;
len = sg->length;
while (len > 0 && data_len > 0) {
/*
* sg sends a scatterlist that is larger than
* the data_len it wants transferred for certain
* IO sizes.
*/
bytes = min_t(unsigned int, len, PAGE_SIZE - off);
bytes = min(bytes, data_len);
ref = gnttab_claim_grant_reference(&gref_head);
BUG_ON(ref == -ENOSPC);
mfn = pfn_to_mfn(page_to_pfn(page));
gnttab_grant_foreign_access_ref(ref,
info->dev->otherend_id, mfn, grant_ro);
shadow->gref[ref_cnt] = ref;
seg->gref = ref;
seg->offset = (uint16_t)off;
seg->length = (uint16_t)bytes;
page++;
seg++;
len -= bytes;
data_len -= bytes;
off = 0;
ref_cnt++;
}
}
if (seg_grants)
ring_req->nr_segments = VSCSIIF_SG_GRANT | seg_grants;
else
ring_req->nr_segments = (uint8_t)ref_cnt;
shadow->nr_grants = ref_cnt;
return 0;
}
static struct vscsiif_request *scsifront_command2ring(
struct vscsifrnt_info *info, struct scsi_cmnd *sc,
struct vscsifrnt_shadow *shadow)
{
struct vscsiif_request *ring_req;
memset(shadow, 0, sizeof(*shadow));
ring_req = scsifront_pre_req(info);
if (!ring_req)
return NULL;
info->shadow[ring_req->rqid] = shadow;
shadow->rqid = ring_req->rqid;
ring_req->id = sc->device->id;
ring_req->lun = sc->device->lun;
ring_req->channel = sc->device->channel;
ring_req->cmd_len = sc->cmd_len;
BUG_ON(sc->cmd_len > VSCSIIF_MAX_COMMAND_SIZE);
memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction;
ring_req->timeout_per_command = sc->request->timeout / HZ;
return ring_req;
}
static int scsifront_queuecommand(struct Scsi_Host *shost,
struct scsi_cmnd *sc)
{
struct vscsifrnt_info *info = shost_priv(shost);
struct vscsiif_request *ring_req;
struct vscsifrnt_shadow *shadow = scsi_cmd_priv(sc);
unsigned long flags;
int err;
uint16_t rqid;
spin_lock_irqsave(shost->host_lock, flags);
if (RING_FULL(&info->ring))
goto busy;
ring_req = scsifront_command2ring(info, sc, shadow);
if (!ring_req)
goto busy;
sc->result = 0;
rqid = ring_req->rqid;
ring_req->act = VSCSIIF_ACT_SCSI_CDB;
shadow->sc = sc;
shadow->act = VSCSIIF_ACT_SCSI_CDB;
err = map_data_for_request(info, sc, ring_req, shadow);
if (err < 0) {
pr_debug("%s: err %d\n", __func__, err);
scsifront_put_rqid(info, rqid);
spin_unlock_irqrestore(shost->host_lock, flags);
if (err == -ENOMEM)
return SCSI_MLQUEUE_HOST_BUSY;
sc->result = DID_ERROR << 16;
sc->scsi_done(sc);
return 0;
}
scsifront_do_request(info);
spin_unlock_irqrestore(shost->host_lock, flags);
return 0;
busy:
spin_unlock_irqrestore(shost->host_lock, flags);
pr_debug("%s: busy\n", __func__);
return SCSI_MLQUEUE_HOST_BUSY;
}
/*
* Any exception handling (reset or abort) must be forwarded to the backend.
* We have to wait until an answer is returned. This answer contains the
* result to be returned to the requestor.
*/
static int scsifront_action_handler(struct scsi_cmnd *sc, uint8_t act)
{
struct Scsi_Host *host = sc->device->host;
struct vscsifrnt_info *info = shost_priv(host);
struct vscsifrnt_shadow *shadow, *s = scsi_cmd_priv(sc);
struct vscsiif_request *ring_req;
int err = 0;
shadow = kmalloc(sizeof(*shadow), GFP_NOIO);
if (!shadow)
return FAILED;
spin_lock_irq(host->host_lock);
for (;;) {
if (!RING_FULL(&info->ring)) {
ring_req = scsifront_command2ring(info, sc, shadow);
if (ring_req)
break;
}
if (err) {
spin_unlock_irq(host->host_lock);
kfree(shadow);
return FAILED;
}
info->wait_ring_available = 1;
spin_unlock_irq(host->host_lock);
err = wait_event_interruptible(info->wq_sync,
!info->wait_ring_available);
spin_lock_irq(host->host_lock);
}
ring_req->act = act;
ring_req->ref_rqid = s->rqid;
shadow->act = act;
shadow->rslt_reset = RSLT_RESET_WAITING;
init_waitqueue_head(&shadow->wq_reset);
ring_req->nr_segments = 0;
scsifront_do_request(info);
spin_unlock_irq(host->host_lock);
err = wait_event_interruptible(shadow->wq_reset, shadow->wait_reset);
spin_lock_irq(host->host_lock);
if (!err) {
err = shadow->rslt_reset;
scsifront_put_rqid(info, shadow->rqid);
kfree(shadow);
} else {
spin_lock(&info->shadow_lock);
shadow->rslt_reset = RSLT_RESET_ERR;
spin_unlock(&info->shadow_lock);
err = FAILED;
}
spin_unlock_irq(host->host_lock);
return err;
}
static int scsifront_eh_abort_handler(struct scsi_cmnd *sc)
{
pr_debug("%s\n", __func__);
return scsifront_action_handler(sc, VSCSIIF_ACT_SCSI_ABORT);
}
static int scsifront_dev_reset_handler(struct scsi_cmnd *sc)
{
pr_debug("%s\n", __func__);
return scsifront_action_handler(sc, VSCSIIF_ACT_SCSI_RESET);
}
static int scsifront_sdev_configure(struct scsi_device *sdev)
{
struct vscsifrnt_info *info = shost_priv(sdev->host);
if (info && current == info->curr)
xenbus_printf(XBT_NIL, info->dev->nodename,
info->dev_state_path, "%d", XenbusStateConnected);
return 0;
}
static void scsifront_sdev_destroy(struct scsi_device *sdev)
{
struct vscsifrnt_info *info = shost_priv(sdev->host);
if (info && current == info->curr)
xenbus_printf(XBT_NIL, info->dev->nodename,
info->dev_state_path, "%d", XenbusStateClosed);
}
static struct scsi_host_template scsifront_sht = {
.module = THIS_MODULE,
.name = "Xen SCSI frontend driver",
.queuecommand = scsifront_queuecommand,
.eh_abort_handler = scsifront_eh_abort_handler,
.eh_device_reset_handler = scsifront_dev_reset_handler,
.slave_configure = scsifront_sdev_configure,
.slave_destroy = scsifront_sdev_destroy,
.cmd_per_lun = VSCSIIF_DEFAULT_CMD_PER_LUN,
.can_queue = VSCSIIF_MAX_REQS,
.this_id = -1,
.cmd_size = sizeof(struct vscsifrnt_shadow),
.sg_tablesize = VSCSIIF_SG_TABLESIZE,
.use_clustering = DISABLE_CLUSTERING,
.proc_name = "scsifront",
};
static int scsifront_alloc_ring(struct vscsifrnt_info *info)
{
struct xenbus_device *dev = info->dev;
struct vscsiif_sring *sring;
int err = -ENOMEM;
/***** Frontend to Backend ring start *****/
sring = (struct vscsiif_sring *)__get_free_page(GFP_KERNEL);
if (!sring) {
xenbus_dev_fatal(dev, err,
"fail to allocate shared ring (Front to Back)");
return err;
}
SHARED_RING_INIT(sring);
FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
err = xenbus_grant_ring(dev, virt_to_mfn(sring));
if (err < 0) {
free_page((unsigned long)sring);
xenbus_dev_fatal(dev, err,
"fail to grant shared ring (Front to Back)");
return err;
}
info->ring_ref = err;
err = xenbus_alloc_evtchn(dev, &info->evtchn);
if (err) {
xenbus_dev_fatal(dev, err, "xenbus_alloc_evtchn");
goto free_gnttab;
}
err = bind_evtchn_to_irq(info->evtchn);
if (err <= 0) {
xenbus_dev_fatal(dev, err, "bind_evtchn_to_irq");
goto free_gnttab;
}
info->irq = err;
err = request_threaded_irq(info->irq, NULL, scsifront_irq_fn,
IRQF_ONESHOT, "scsifront", info);
if (err) {
xenbus_dev_fatal(dev, err, "request_threaded_irq");
goto free_irq;
}
return 0;
/* free resource */
free_irq:
unbind_from_irqhandler(info->irq, info);
free_gnttab:
gnttab_end_foreign_access(info->ring_ref, 0,
(unsigned long)info->ring.sring);
return err;
}
static int scsifront_init_ring(struct vscsifrnt_info *info)
{
struct xenbus_device *dev = info->dev;
struct xenbus_transaction xbt;
int err;
pr_debug("%s\n", __func__);
err = scsifront_alloc_ring(info);
if (err)
return err;
pr_debug("%s: %u %u\n", __func__, info->ring_ref, info->evtchn);
again:
err = xenbus_transaction_start(&xbt);
if (err)
xenbus_dev_fatal(dev, err, "starting transaction");
err = xenbus_printf(xbt, dev->nodename, "ring-ref", "%u",
info->ring_ref);
if (err) {
xenbus_dev_fatal(dev, err, "%s", "writing ring-ref");
goto fail;
}
err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
info->evtchn);
if (err) {
xenbus_dev_fatal(dev, err, "%s", "writing event-channel");
goto fail;
}
err = xenbus_transaction_end(xbt, 0);
if (err) {
if (err == -EAGAIN)
goto again;
xenbus_dev_fatal(dev, err, "completing transaction");
goto free_sring;
}
return 0;
fail:
xenbus_transaction_end(xbt, 1);
free_sring:
unbind_from_irqhandler(info->irq, info);
gnttab_end_foreign_access(info->ring_ref, 0,
(unsigned long)info->ring.sring);
return err;
}
static int scsifront_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
{
struct vscsifrnt_info *info;
struct Scsi_Host *host;
int err = -ENOMEM;
char name[TASK_COMM_LEN];
host = scsi_host_alloc(&scsifront_sht, sizeof(*info));
if (!host) {
xenbus_dev_fatal(dev, err, "fail to allocate scsi host");
return err;
}
info = (struct vscsifrnt_info *)host->hostdata;
dev_set_drvdata(&dev->dev, info);
info->dev = dev;
bitmap_fill(info->shadow_free_bitmap, VSCSIIF_MAX_REQS);
err = scsifront_init_ring(info);
if (err) {
scsi_host_put(host);
return err;
}
init_waitqueue_head(&info->wq_sync);
spin_lock_init(&info->shadow_lock);
snprintf(name, TASK_COMM_LEN, "vscsiif.%d", host->host_no);
host->max_id = VSCSIIF_MAX_TARGET;
host->max_channel = 0;
host->max_lun = VSCSIIF_MAX_LUN;
host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512;
host->max_cmd_len = VSCSIIF_MAX_COMMAND_SIZE;
err = scsi_add_host(host, &dev->dev);
if (err) {
dev_err(&dev->dev, "fail to add scsi host %d\n", err);
goto free_sring;
}
info->host = host;
info->host_active = 1;
xenbus_switch_state(dev, XenbusStateInitialised);
return 0;
free_sring:
unbind_from_irqhandler(info->irq, info);
gnttab_end_foreign_access(info->ring_ref, 0,
(unsigned long)info->ring.sring);
scsi_host_put(host);
return err;
}
static int scsifront_remove(struct xenbus_device *dev)
{
struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
pr_debug("%s: %s removed\n", __func__, dev->nodename);
mutex_lock(&scsifront_mutex);
if (info->host_active) {
/* Scsi_host not yet removed */
scsi_remove_host(info->host);
info->host_active = 0;
}
mutex_unlock(&scsifront_mutex);
gnttab_end_foreign_access(info->ring_ref, 0,
(unsigned long)info->ring.sring);
unbind_from_irqhandler(info->irq, info);
scsi_host_put(info->host);
return 0;
}
static void scsifront_disconnect(struct vscsifrnt_info *info)
{
struct xenbus_device *dev = info->dev;
struct Scsi_Host *host = info->host;
pr_debug("%s: %s disconnect\n", __func__, dev->nodename);
/*
* When this function is executed, all devices of
* Frontend have been deleted.
* Therefore, it need not block I/O before remove_host.
*/
mutex_lock(&scsifront_mutex);
if (info->host_active) {
scsi_remove_host(host);
info->host_active = 0;
}
mutex_unlock(&scsifront_mutex);
xenbus_frontend_closed(dev);
}
static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
{
struct xenbus_device *dev = info->dev;
int i, err = 0;
char str[64];
char **dir;
unsigned int dir_n = 0;
unsigned int device_state;
unsigned int hst, chn, tgt, lun;
struct scsi_device *sdev;
dir = xenbus_directory(XBT_NIL, dev->otherend, "vscsi-devs", &dir_n);
if (IS_ERR(dir))
return;
/* mark current task as the one allowed to modify device states */
BUG_ON(info->curr);
info->curr = current;
for (i = 0; i < dir_n; i++) {
/* read status */
snprintf(str, sizeof(str), "vscsi-devs/%s/state", dir[i]);
err = xenbus_scanf(XBT_NIL, dev->otherend, str, "%u",
&device_state);
if (XENBUS_EXIST_ERR(err))
continue;
/* virtual SCSI device */
snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", dir[i]);
err = xenbus_scanf(XBT_NIL, dev->otherend, str,
"%u:%u:%u:%u", &hst, &chn, &tgt, &lun);
if (XENBUS_EXIST_ERR(err))
continue;
/*
* Front device state path, used in slave_configure called
* on successfull scsi_add_device, and in slave_destroy called
* on remove of a device.
*/
snprintf(info->dev_state_path, sizeof(info->dev_state_path),
"vscsi-devs/%s/state", dir[i]);
switch (op) {
case VSCSIFRONT_OP_ADD_LUN:
if (device_state != XenbusStateInitialised)
break;
if (scsi_add_device(info->host, chn, tgt, lun)) {
dev_err(&dev->dev, "scsi_add_device\n");
xenbus_printf(XBT_NIL, dev->nodename,
info->dev_state_path,
"%d", XenbusStateClosed);
}
break;
case VSCSIFRONT_OP_DEL_LUN:
if (device_state != XenbusStateClosing)
break;
sdev = scsi_device_lookup(info->host, chn, tgt, lun);
if (sdev) {
scsi_remove_device(sdev);
scsi_device_put(sdev);
}
break;
default:
break;
}
}
info->curr = NULL;
kfree(dir);
}
static void scsifront_read_backend_params(struct xenbus_device *dev,
struct vscsifrnt_info *info)
{
unsigned int sg_grant;
int ret;
struct Scsi_Host *host = info->host;
ret = xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg-grant", "%u",
&sg_grant);
if (ret == 1 && sg_grant) {
sg_grant = min_t(unsigned int, sg_grant, SG_ALL);
sg_grant = max_t(unsigned int, sg_grant, VSCSIIF_SG_TABLESIZE);
host->sg_tablesize = min_t(unsigned int, sg_grant,
VSCSIIF_SG_TABLESIZE * PAGE_SIZE /
sizeof(struct scsiif_request_segment));
host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512;
}
dev_info(&dev->dev, "using up to %d SG entries\n", host->sg_tablesize);
}
static void scsifront_backend_changed(struct xenbus_device *dev,
enum xenbus_state backend_state)
{
struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
pr_debug("%s: %p %u %u\n", __func__, dev, dev->state, backend_state);
switch (backend_state) {
case XenbusStateUnknown:
case XenbusStateInitialising:
case XenbusStateInitWait:
case XenbusStateInitialised:
break;
case XenbusStateConnected:
scsifront_read_backend_params(dev, info);
if (xenbus_read_driver_state(dev->nodename) ==
XenbusStateInitialised)
scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
if (dev->state != XenbusStateConnected)
xenbus_switch_state(dev, XenbusStateConnected);
break;
case XenbusStateClosed:
if (dev->state == XenbusStateClosed)
break;
/* Missed the backend's Closing state -- fallthrough */
case XenbusStateClosing:
scsifront_disconnect(info);
break;
case XenbusStateReconfiguring:
scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_DEL_LUN);
xenbus_switch_state(dev, XenbusStateReconfiguring);
break;
case XenbusStateReconfigured:
scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
xenbus_switch_state(dev, XenbusStateConnected);
break;
}
}
static const struct xenbus_device_id scsifront_ids[] = {
{ "vscsi" },
{ "" }
};
static struct xenbus_driver scsifront_driver = {
.ids = scsifront_ids,
.probe = scsifront_probe,
.remove = scsifront_remove,
.otherend_changed = scsifront_backend_changed,
};
static int __init scsifront_init(void)
{
if (!xen_domain())
return -ENODEV;
return xenbus_register_frontend(&scsifront_driver);
}
module_init(scsifront_init);
static void __exit scsifront_exit(void)
{
xenbus_unregister_driver(&scsifront_driver);
}
module_exit(scsifront_exit);
MODULE_DESCRIPTION("Xen SCSI frontend driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS("xen:vscsi");
MODULE_AUTHOR("Juergen Gross <jgross@suse.com>");
...@@ -347,8 +347,6 @@ static int xen_console_remove(struct xencons_info *info) ...@@ -347,8 +347,6 @@ static int xen_console_remove(struct xencons_info *info)
} }
#ifdef CONFIG_HVC_XEN_FRONTEND #ifdef CONFIG_HVC_XEN_FRONTEND
static struct xenbus_driver xencons_driver;
static int xencons_remove(struct xenbus_device *dev) static int xencons_remove(struct xenbus_device *dev)
{ {
return xen_console_remove(dev_get_drvdata(&dev->dev)); return xen_console_remove(dev_get_drvdata(&dev->dev));
...@@ -499,13 +497,14 @@ static const struct xenbus_device_id xencons_ids[] = { ...@@ -499,13 +497,14 @@ static const struct xenbus_device_id xencons_ids[] = {
{ "" } { "" }
}; };
static struct xenbus_driver xencons_driver = {
static DEFINE_XENBUS_DRIVER(xencons, "xenconsole", .name = "xenconsole",
.ids = xencons_ids,
.probe = xencons_probe, .probe = xencons_probe,
.remove = xencons_remove, .remove = xencons_remove,
.resume = xencons_resume, .resume = xencons_resume,
.otherend_changed = xencons_backend_changed, .otherend_changed = xencons_backend_changed,
); };
#endif /* CONFIG_HVC_XEN_FRONTEND */ #endif /* CONFIG_HVC_XEN_FRONTEND */
static int __init xen_hvc_init(void) static int __init xen_hvc_init(void)
......
...@@ -684,12 +684,13 @@ static const struct xenbus_device_id xenfb_ids[] = { ...@@ -684,12 +684,13 @@ static const struct xenbus_device_id xenfb_ids[] = {
{ "" } { "" }
}; };
static DEFINE_XENBUS_DRIVER(xenfb, , static struct xenbus_driver xenfb_driver = {
.ids = xenfb_ids,
.probe = xenfb_probe, .probe = xenfb_probe,
.remove = xenfb_remove, .remove = xenfb_remove,
.resume = xenfb_resume, .resume = xenfb_resume,
.otherend_changed = xenfb_backend_changed, .otherend_changed = xenfb_backend_changed,
); };
static int __init xenfb_init(void) static int __init xenfb_init(void)
{ {
......
...@@ -172,6 +172,15 @@ config XEN_PCIDEV_BACKEND ...@@ -172,6 +172,15 @@ config XEN_PCIDEV_BACKEND
If in doubt, say m. If in doubt, say m.
config XEN_SCSI_BACKEND
tristate "XEN SCSI backend driver"
depends on XEN && XEN_BACKEND && TARGET_CORE
help
The SCSI backend driver allows the kernel to export its SCSI Devices
to other guests via a high-performance shared-memory interface.
Only needed for systems running as XEN driver domains (e.g. Dom0) and
if guests need generic access to SCSI devices.
config XEN_PRIVCMD config XEN_PRIVCMD
tristate tristate
depends on XEN depends on XEN
......
...@@ -36,6 +36,7 @@ obj-$(CONFIG_XEN_ACPI_HOTPLUG_MEMORY) += xen-acpi-memhotplug.o ...@@ -36,6 +36,7 @@ obj-$(CONFIG_XEN_ACPI_HOTPLUG_MEMORY) += xen-acpi-memhotplug.o
obj-$(CONFIG_XEN_ACPI_HOTPLUG_CPU) += xen-acpi-cpuhotplug.o obj-$(CONFIG_XEN_ACPI_HOTPLUG_CPU) += xen-acpi-cpuhotplug.o
obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o
obj-$(CONFIG_XEN_EFI) += efi.o obj-$(CONFIG_XEN_EFI) += efi.o
obj-$(CONFIG_XEN_SCSI_BACKEND) += xen-scsiback.o
xen-evtchn-y := evtchn.o xen-evtchn-y := evtchn.o
xen-gntdev-y := gntdev.o xen-gntdev-y := gntdev.o
xen-gntalloc-y := gntalloc.o xen-gntalloc-y := gntalloc.o
......
...@@ -27,6 +27,8 @@ ...@@ -27,6 +27,8 @@
#include <xen/interface/platform.h> #include <xen/interface/platform.h>
#include <xen/xen.h> #include <xen/xen.h>
#include <asm/page.h>
#include <asm/xen/hypercall.h> #include <asm/xen/hypercall.h>
#define INIT_EFI_OP(name) \ #define INIT_EFI_OP(name) \
......
...@@ -900,7 +900,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) ...@@ -900,7 +900,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
return irq; return irq;
} }
static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
unsigned int remote_port) unsigned int remote_port)
{ {
struct evtchn_bind_interdomain bind_interdomain; struct evtchn_bind_interdomain bind_interdomain;
...@@ -914,6 +914,7 @@ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, ...@@ -914,6 +914,7 @@ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
} }
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq);
static int find_virq(unsigned int virq, unsigned int cpu) static int find_virq(unsigned int virq, unsigned int cpu)
{ {
......
...@@ -592,7 +592,7 @@ static int grow_gnttab_list(unsigned int more_frames) ...@@ -592,7 +592,7 @@ static int grow_gnttab_list(unsigned int more_frames)
return 0; return 0;
grow_nomem: grow_nomem:
for ( ; i >= nr_glist_frames; i--) while (i-- > nr_glist_frames)
free_page((unsigned long) gnttab_list[i]); free_page((unsigned long) gnttab_list[i]);
return -ENOMEM; return -ENOMEM;
} }
......
...@@ -719,11 +719,13 @@ static const struct xenbus_device_id xen_pcibk_ids[] = { ...@@ -719,11 +719,13 @@ static const struct xenbus_device_id xen_pcibk_ids[] = {
{""}, {""},
}; };
static DEFINE_XENBUS_DRIVER(xen_pcibk, DRV_NAME, static struct xenbus_driver xen_pcibk_driver = {
.name = DRV_NAME,
.ids = xen_pcibk_ids,
.probe = xen_pcibk_xenbus_probe, .probe = xen_pcibk_xenbus_probe,
.remove = xen_pcibk_xenbus_remove, .remove = xen_pcibk_xenbus_remove,
.otherend_changed = xen_pcibk_frontend_changed, .otherend_changed = xen_pcibk_frontend_changed,
); };
const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend; const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend;
......
/*
* Xen SCSI backend driver
*
* Copyright (c) 2008, FUJITSU Limited
*
* Based on the blkback driver code.
* Adaption to kernel taget core infrastructure taken from vhost/scsi.c
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <stdarg.h>
#include <linux/module.h>
#include <linux/utsname.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/list.h>
#include <linux/gfp.h>
#include <linux/delay.h>
#include <linux/spinlock.h>
#include <linux/configfs.h>
#include <generated/utsrelease.h>
#include <scsi/scsi_dbg.h>
#include <scsi/scsi_eh.h>
#include <scsi/scsi_tcq.h>
#include <target/target_core_base.h>
#include <target/target_core_fabric.h>
#include <target/target_core_configfs.h>
#include <target/target_core_fabric_configfs.h>
#include <asm/hypervisor.h>
#include <xen/xen.h>
#include <xen/balloon.h>
#include <xen/events.h>
#include <xen/xenbus.h>
#include <xen/grant_table.h>
#include <xen/page.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/io/vscsiif.h>
#define DPRINTK(_f, _a...) \
pr_debug("(file=%s, line=%d) " _f, __FILE__ , __LINE__ , ## _a)
#define VSCSI_VERSION "v0.1"
#define VSCSI_NAMELEN 32
struct ids_tuple {
unsigned int hst; /* host */
unsigned int chn; /* channel */
unsigned int tgt; /* target */
unsigned int lun; /* LUN */
};
struct v2p_entry {
struct ids_tuple v; /* translate from */
struct scsiback_tpg *tpg; /* translate to */
unsigned int lun;
struct kref kref;
struct list_head l;
};
struct vscsibk_info {
struct xenbus_device *dev;
domid_t domid;
unsigned int irq;
struct vscsiif_back_ring ring;
int ring_error;
spinlock_t ring_lock;
atomic_t nr_unreplied_reqs;
spinlock_t v2p_lock;
struct list_head v2p_entry_lists;
wait_queue_head_t waiting_to_free;
};
/* theoretical maximum of grants for one request */
#define VSCSI_MAX_GRANTS (SG_ALL + VSCSIIF_SG_TABLESIZE)
/*
* VSCSI_GRANT_BATCH is the maximum number of grants to be processed in one
* call to map/unmap grants. Don't choose it too large, as there are arrays
* with VSCSI_GRANT_BATCH elements allocated on the stack.
*/
#define VSCSI_GRANT_BATCH 16
struct vscsibk_pend {
uint16_t rqid;
uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE];
uint8_t cmd_len;
uint8_t sc_data_direction;
uint16_t n_sg; /* real length of SG list */
uint16_t n_grants; /* SG pages and potentially SG list */
uint32_t data_len;
uint32_t result;
struct vscsibk_info *info;
struct v2p_entry *v2p;
struct scatterlist *sgl;
uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
grant_handle_t grant_handles[VSCSI_MAX_GRANTS];
struct page *pages[VSCSI_MAX_GRANTS];
struct se_cmd se_cmd;
};
struct scsiback_tmr {
atomic_t tmr_complete;
wait_queue_head_t tmr_wait;
};
struct scsiback_nexus {
/* Pointer to TCM session for I_T Nexus */
struct se_session *tvn_se_sess;
};
struct scsiback_tport {
/* SCSI protocol the tport is providing */
u8 tport_proto_id;
/* Binary World Wide unique Port Name for pvscsi Target port */
u64 tport_wwpn;
/* ASCII formatted WWPN for pvscsi Target port */
char tport_name[VSCSI_NAMELEN];
/* Returned by scsiback_make_tport() */
struct se_wwn tport_wwn;
};
struct scsiback_tpg {
/* scsiback port target portal group tag for TCM */
u16 tport_tpgt;
/* track number of TPG Port/Lun Links wrt explicit I_T Nexus shutdown */
int tv_tpg_port_count;
/* xen-pvscsi references to tpg_nexus, protected by tv_tpg_mutex */
int tv_tpg_fe_count;
/* list for scsiback_list */
struct list_head tv_tpg_list;
/* Used to protect access for tpg_nexus */
struct mutex tv_tpg_mutex;
/* Pointer to the TCM pvscsi I_T Nexus for this TPG endpoint */
struct scsiback_nexus *tpg_nexus;
/* Pointer back to scsiback_tport */
struct scsiback_tport *tport;
/* Returned by scsiback_make_tpg() */
struct se_portal_group se_tpg;
/* alias used in xenstore */
char param_alias[VSCSI_NAMELEN];
/* list of info structures related to this target portal group */
struct list_head info_list;
};
#define SCSIBACK_INVALID_HANDLE (~0)
static bool log_print_stat;
module_param(log_print_stat, bool, 0644);
static int scsiback_max_buffer_pages = 1024;
module_param_named(max_buffer_pages, scsiback_max_buffer_pages, int, 0644);
MODULE_PARM_DESC(max_buffer_pages,
"Maximum number of free pages to keep in backend buffer");
static struct kmem_cache *scsiback_cachep;
static DEFINE_SPINLOCK(free_pages_lock);
static int free_pages_num;
static LIST_HEAD(scsiback_free_pages);
/* Global spinlock to protect scsiback TPG list */
static DEFINE_MUTEX(scsiback_mutex);
static LIST_HEAD(scsiback_list);
/* Local pointer to allocated TCM configfs fabric module */
static struct target_fabric_configfs *scsiback_fabric_configfs;
static void scsiback_get(struct vscsibk_info *info)
{
atomic_inc(&info->nr_unreplied_reqs);
}
static void scsiback_put(struct vscsibk_info *info)
{
if (atomic_dec_and_test(&info->nr_unreplied_reqs))
wake_up(&info->waiting_to_free);
}
static void put_free_pages(struct page **page, int num)
{
unsigned long flags;
int i = free_pages_num + num, n = num;
if (num == 0)
return;
if (i > scsiback_max_buffer_pages) {
n = min(num, i - scsiback_max_buffer_pages);
free_xenballooned_pages(n, page + num - n);
n = num - n;
}
spin_lock_irqsave(&free_pages_lock, flags);
for (i = 0; i < n; i++)
list_add(&page[i]->lru, &scsiback_free_pages);
free_pages_num += n;
spin_unlock_irqrestore(&free_pages_lock, flags);
}
static int get_free_page(struct page **page)
{
unsigned long flags;
spin_lock_irqsave(&free_pages_lock, flags);
if (list_empty(&scsiback_free_pages)) {
spin_unlock_irqrestore(&free_pages_lock, flags);
return alloc_xenballooned_pages(1, page, false);
}
page[0] = list_first_entry(&scsiback_free_pages, struct page, lru);
list_del(&page[0]->lru);
free_pages_num--;
spin_unlock_irqrestore(&free_pages_lock, flags);
return 0;
}
static unsigned long vaddr_page(struct page *page)
{
unsigned long pfn = page_to_pfn(page);
return (unsigned long)pfn_to_kaddr(pfn);
}
static unsigned long vaddr(struct vscsibk_pend *req, int seg)
{
return vaddr_page(req->pages[seg]);
}
static void scsiback_print_status(char *sense_buffer, int errors,
struct vscsibk_pend *pending_req)
{
struct scsiback_tpg *tpg = pending_req->v2p->tpg;
pr_err("xen-pvscsi[%s:%d] cmnd[0]=%02x -> st=%02x msg=%02x host=%02x drv=%02x\n",
tpg->tport->tport_name, pending_req->v2p->lun,
pending_req->cmnd[0], status_byte(errors), msg_byte(errors),
host_byte(errors), driver_byte(errors));
if (CHECK_CONDITION & status_byte(errors))
__scsi_print_sense("xen-pvscsi", sense_buffer,
SCSI_SENSE_BUFFERSIZE);
}
static void scsiback_fast_flush_area(struct vscsibk_pend *req)
{
struct gnttab_unmap_grant_ref unmap[VSCSI_GRANT_BATCH];
struct page *pages[VSCSI_GRANT_BATCH];
unsigned int i, invcount = 0;
grant_handle_t handle;
int err;
kfree(req->sgl);
req->sgl = NULL;
req->n_sg = 0;
if (!req->n_grants)
return;
for (i = 0; i < req->n_grants; i++) {
handle = req->grant_handles[i];
if (handle == SCSIBACK_INVALID_HANDLE)
continue;
gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
GNTMAP_host_map, handle);
req->grant_handles[i] = SCSIBACK_INVALID_HANDLE;
pages[invcount] = req->pages[i];
put_page(pages[invcount]);
invcount++;
if (invcount < VSCSI_GRANT_BATCH)
continue;
err = gnttab_unmap_refs(unmap, NULL, pages, invcount);
BUG_ON(err);
invcount = 0;
}
if (invcount) {
err = gnttab_unmap_refs(unmap, NULL, pages, invcount);
BUG_ON(err);
}
put_free_pages(req->pages, req->n_grants);
req->n_grants = 0;
}
static void scsiback_free_translation_entry(struct kref *kref)
{
struct v2p_entry *entry = container_of(kref, struct v2p_entry, kref);
struct scsiback_tpg *tpg = entry->tpg;
mutex_lock(&tpg->tv_tpg_mutex);
tpg->tv_tpg_fe_count--;
mutex_unlock(&tpg->tv_tpg_mutex);
kfree(entry);
}
static void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
uint32_t resid, struct vscsibk_pend *pending_req)
{
struct vscsiif_response *ring_res;
struct vscsibk_info *info = pending_req->info;
int notify;
struct scsi_sense_hdr sshdr;
unsigned long flags;
unsigned len;
spin_lock_irqsave(&info->ring_lock, flags);
ring_res = RING_GET_RESPONSE(&info->ring, info->ring.rsp_prod_pvt);
info->ring.rsp_prod_pvt++;
ring_res->rslt = result;
ring_res->rqid = pending_req->rqid;
if (sense_buffer != NULL &&
scsi_normalize_sense(sense_buffer, VSCSIIF_SENSE_BUFFERSIZE,
&sshdr)) {
len = min_t(unsigned, 8 + sense_buffer[7],
VSCSIIF_SENSE_BUFFERSIZE);
memcpy(ring_res->sense_buffer, sense_buffer, len);
ring_res->sense_len = len;
} else {
ring_res->sense_len = 0;
}
ring_res->residual_len = resid;
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&info->ring, notify);
spin_unlock_irqrestore(&info->ring_lock, flags);
if (notify)
notify_remote_via_irq(info->irq);
if (pending_req->v2p)
kref_put(&pending_req->v2p->kref,
scsiback_free_translation_entry);
}
static void scsiback_cmd_done(struct vscsibk_pend *pending_req)
{
struct vscsibk_info *info = pending_req->info;
unsigned char *sense_buffer;
unsigned int resid;
int errors;
sense_buffer = pending_req->sense_buffer;
resid = pending_req->se_cmd.residual_count;
errors = pending_req->result;
if (errors && log_print_stat)
scsiback_print_status(sense_buffer, errors, pending_req);
scsiback_fast_flush_area(pending_req);
scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req);
scsiback_put(info);
}
static void scsiback_cmd_exec(struct vscsibk_pend *pending_req)
{
struct se_cmd *se_cmd = &pending_req->se_cmd;
struct se_session *sess = pending_req->v2p->tpg->tpg_nexus->tvn_se_sess;
int rc;
memset(pending_req->sense_buffer, 0, VSCSIIF_SENSE_BUFFERSIZE);
memset(se_cmd, 0, sizeof(*se_cmd));
scsiback_get(pending_req->info);
rc = target_submit_cmd_map_sgls(se_cmd, sess, pending_req->cmnd,
pending_req->sense_buffer, pending_req->v2p->lun,
pending_req->data_len, 0,
pending_req->sc_data_direction, 0,
pending_req->sgl, pending_req->n_sg,
NULL, 0, NULL, 0);
if (rc < 0) {
transport_send_check_condition_and_sense(se_cmd,
TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE, 0);
transport_generic_free_cmd(se_cmd, 0);
}
}
static int scsiback_gnttab_data_map_batch(struct gnttab_map_grant_ref *map,
struct page **pg, grant_handle_t *grant, int cnt)
{
int err, i;
if (!cnt)
return 0;
err = gnttab_map_refs(map, NULL, pg, cnt);
BUG_ON(err);
for (i = 0; i < cnt; i++) {
if (unlikely(map[i].status != GNTST_okay)) {
pr_err("xen-pvscsi: invalid buffer -- could not remap it\n");
map[i].handle = SCSIBACK_INVALID_HANDLE;
err = -ENOMEM;
} else {
get_page(pg[i]);
}
grant[i] = map[i].handle;
}
return err;
}
static int scsiback_gnttab_data_map_list(struct vscsibk_pend *pending_req,
struct scsiif_request_segment *seg, struct page **pg,
grant_handle_t *grant, int cnt, u32 flags)
{
int mapcount = 0, i, err = 0;
struct gnttab_map_grant_ref map[VSCSI_GRANT_BATCH];
struct vscsibk_info *info = pending_req->info;
for (i = 0; i < cnt; i++) {
if (get_free_page(pg + mapcount)) {
put_free_pages(pg, mapcount);
pr_err("xen-pvscsi: no grant page\n");
return -ENOMEM;
}
gnttab_set_map_op(&map[mapcount], vaddr_page(pg[mapcount]),
flags, seg[i].gref, info->domid);
mapcount++;
if (mapcount < VSCSI_GRANT_BATCH)
continue;
err = scsiback_gnttab_data_map_batch(map, pg, grant, mapcount);
pg += mapcount;
grant += mapcount;
pending_req->n_grants += mapcount;
if (err)
return err;
mapcount = 0;
}
err = scsiback_gnttab_data_map_batch(map, pg, grant, mapcount);
pending_req->n_grants += mapcount;
return err;
}
static int scsiback_gnttab_data_map(struct vscsiif_request *ring_req,
struct vscsibk_pend *pending_req)
{
u32 flags;
int i, err, n_segs, i_seg = 0;
struct page **pg;
struct scsiif_request_segment *seg;
unsigned long end_seg = 0;
unsigned int nr_segments = (unsigned int)ring_req->nr_segments;
unsigned int nr_sgl = 0;
struct scatterlist *sg;
grant_handle_t *grant;
pending_req->n_sg = 0;
pending_req->n_grants = 0;
pending_req->data_len = 0;
nr_segments &= ~VSCSIIF_SG_GRANT;
if (!nr_segments)
return 0;
if (nr_segments > VSCSIIF_SG_TABLESIZE) {
DPRINTK("xen-pvscsi: invalid parameter nr_seg = %d\n",
ring_req->nr_segments);
return -EINVAL;
}
if (ring_req->nr_segments & VSCSIIF_SG_GRANT) {
err = scsiback_gnttab_data_map_list(pending_req, ring_req->seg,
pending_req->pages, pending_req->grant_handles,
nr_segments, GNTMAP_host_map | GNTMAP_readonly);
if (err)
return err;
nr_sgl = nr_segments;
nr_segments = 0;
for (i = 0; i < nr_sgl; i++) {
n_segs = ring_req->seg[i].length /
sizeof(struct scsiif_request_segment);
if ((unsigned)ring_req->seg[i].offset +
(unsigned)ring_req->seg[i].length > PAGE_SIZE ||
n_segs * sizeof(struct scsiif_request_segment) !=
ring_req->seg[i].length)
return -EINVAL;
nr_segments += n_segs;
}
if (nr_segments > SG_ALL) {
DPRINTK("xen-pvscsi: invalid nr_seg = %d\n",
nr_segments);
return -EINVAL;
}
}
/* free of (sgl) in fast_flush_area()*/
pending_req->sgl = kmalloc_array(nr_segments,
sizeof(struct scatterlist), GFP_KERNEL);
if (!pending_req->sgl)
return -ENOMEM;
sg_init_table(pending_req->sgl, nr_segments);
pending_req->n_sg = nr_segments;
flags = GNTMAP_host_map;
if (pending_req->sc_data_direction == DMA_TO_DEVICE)
flags |= GNTMAP_readonly;
pg = pending_req->pages + nr_sgl;
grant = pending_req->grant_handles + nr_sgl;
if (!nr_sgl) {
seg = ring_req->seg;
err = scsiback_gnttab_data_map_list(pending_req, seg,
pg, grant, nr_segments, flags);
if (err)
return err;
} else {
for (i = 0; i < nr_sgl; i++) {
seg = (struct scsiif_request_segment *)(
vaddr(pending_req, i) + ring_req->seg[i].offset);
n_segs = ring_req->seg[i].length /
sizeof(struct scsiif_request_segment);
err = scsiback_gnttab_data_map_list(pending_req, seg,
pg, grant, n_segs, flags);
if (err)
return err;
pg += n_segs;
grant += n_segs;
}
end_seg = vaddr(pending_req, 0) + ring_req->seg[0].offset;
seg = (struct scsiif_request_segment *)end_seg;
end_seg += ring_req->seg[0].length;
pg = pending_req->pages + nr_sgl;
}
for_each_sg(pending_req->sgl, sg, nr_segments, i) {
sg_set_page(sg, pg[i], seg->length, seg->offset);
pending_req->data_len += seg->length;
seg++;
if (nr_sgl && (unsigned long)seg >= end_seg) {
i_seg++;
end_seg = vaddr(pending_req, i_seg) +
ring_req->seg[i_seg].offset;
seg = (struct scsiif_request_segment *)end_seg;
end_seg += ring_req->seg[i_seg].length;
}
if (sg->offset >= PAGE_SIZE ||
sg->length > PAGE_SIZE ||
sg->offset + sg->length > PAGE_SIZE)
return -EINVAL;
}
return 0;
}
static void scsiback_disconnect(struct vscsibk_info *info)
{
wait_event(info->waiting_to_free,
atomic_read(&info->nr_unreplied_reqs) == 0);
unbind_from_irqhandler(info->irq, info);
info->irq = 0;
xenbus_unmap_ring_vfree(info->dev, info->ring.sring);
}
static void scsiback_device_action(struct vscsibk_pend *pending_req,
enum tcm_tmreq_table act, int tag)
{
int rc, err = FAILED;
struct scsiback_tpg *tpg = pending_req->v2p->tpg;
struct se_cmd *se_cmd = &pending_req->se_cmd;
struct scsiback_tmr *tmr;
tmr = kzalloc(sizeof(struct scsiback_tmr), GFP_KERNEL);
if (!tmr)
goto out;
init_waitqueue_head(&tmr->tmr_wait);
transport_init_se_cmd(se_cmd, tpg->se_tpg.se_tpg_tfo,
tpg->tpg_nexus->tvn_se_sess, 0, DMA_NONE, MSG_SIMPLE_TAG,
&pending_req->sense_buffer[0]);
rc = core_tmr_alloc_req(se_cmd, tmr, act, GFP_KERNEL);
if (rc < 0)
goto out;
se_cmd->se_tmr_req->ref_task_tag = tag;
if (transport_lookup_tmr_lun(se_cmd, pending_req->v2p->lun) < 0)
goto out;
transport_generic_handle_tmr(se_cmd);
wait_event(tmr->tmr_wait, atomic_read(&tmr->tmr_complete));
err = (se_cmd->se_tmr_req->response == TMR_FUNCTION_COMPLETE) ?
SUCCESS : FAILED;
out:
if (tmr) {
transport_generic_free_cmd(&pending_req->se_cmd, 1);
kfree(tmr);
}
scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
kmem_cache_free(scsiback_cachep, pending_req);
}
/*
Perform virtual to physical translation
*/
static struct v2p_entry *scsiback_do_translation(struct vscsibk_info *info,
struct ids_tuple *v)
{
struct v2p_entry *entry;
struct list_head *head = &(info->v2p_entry_lists);
unsigned long flags;
spin_lock_irqsave(&info->v2p_lock, flags);
list_for_each_entry(entry, head, l) {
if ((entry->v.chn == v->chn) &&
(entry->v.tgt == v->tgt) &&
(entry->v.lun == v->lun)) {
kref_get(&entry->kref);
goto out;
}
}
entry = NULL;
out:
spin_unlock_irqrestore(&info->v2p_lock, flags);
return entry;
}
static int prepare_pending_reqs(struct vscsibk_info *info,
struct vscsiif_request *ring_req,
struct vscsibk_pend *pending_req)
{
struct v2p_entry *v2p;
struct ids_tuple vir;
pending_req->rqid = ring_req->rqid;
pending_req->info = info;
vir.chn = ring_req->channel;
vir.tgt = ring_req->id;
vir.lun = ring_req->lun;
v2p = scsiback_do_translation(info, &vir);
if (!v2p) {
pending_req->v2p = NULL;
DPRINTK("xen-pvscsi: doesn't exist.\n");
return -ENODEV;
}
pending_req->v2p = v2p;
/* request range check from frontend */
pending_req->sc_data_direction = ring_req->sc_data_direction;
if ((pending_req->sc_data_direction != DMA_BIDIRECTIONAL) &&
(pending_req->sc_data_direction != DMA_TO_DEVICE) &&
(pending_req->sc_data_direction != DMA_FROM_DEVICE) &&
(pending_req->sc_data_direction != DMA_NONE)) {
DPRINTK("xen-pvscsi: invalid parameter data_dir = %d\n",
pending_req->sc_data_direction);
return -EINVAL;
}
pending_req->cmd_len = ring_req->cmd_len;
if (pending_req->cmd_len > VSCSIIF_MAX_COMMAND_SIZE) {
DPRINTK("xen-pvscsi: invalid parameter cmd_len = %d\n",
pending_req->cmd_len);
return -EINVAL;
}
memcpy(pending_req->cmnd, ring_req->cmnd, pending_req->cmd_len);
return 0;
}
static int scsiback_do_cmd_fn(struct vscsibk_info *info)
{
struct vscsiif_back_ring *ring = &info->ring;
struct vscsiif_request *ring_req;
struct vscsibk_pend *pending_req;
RING_IDX rc, rp;
int err, more_to_do;
uint32_t result;
uint8_t act;
rc = ring->req_cons;
rp = ring->sring->req_prod;
rmb(); /* guest system is accessing ring, too */
if (RING_REQUEST_PROD_OVERFLOW(ring, rp)) {
rc = ring->rsp_prod_pvt;
pr_warn("xen-pvscsi: Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n",
info->domid, rp, rc, rp - rc);
info->ring_error = 1;
return 0;
}
while ((rc != rp)) {
if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
break;
pending_req = kmem_cache_alloc(scsiback_cachep, GFP_KERNEL);
if (!pending_req)
return 1;
ring_req = RING_GET_REQUEST(ring, rc);
ring->req_cons = ++rc;
act = ring_req->act;
err = prepare_pending_reqs(info, ring_req, pending_req);
if (err) {
switch (err) {
case -ENODEV:
result = DID_NO_CONNECT;
break;
default:
result = DRIVER_ERROR;
break;
}
scsiback_do_resp_with_sense(NULL, result << 24, 0,
pending_req);
kmem_cache_free(scsiback_cachep, pending_req);
return 1;
}
switch (act) {
case VSCSIIF_ACT_SCSI_CDB:
if (scsiback_gnttab_data_map(ring_req, pending_req)) {
scsiback_fast_flush_area(pending_req);
scsiback_do_resp_with_sense(NULL,
DRIVER_ERROR << 24, 0, pending_req);
kmem_cache_free(scsiback_cachep, pending_req);
} else {
scsiback_cmd_exec(pending_req);
}
break;
case VSCSIIF_ACT_SCSI_ABORT:
scsiback_device_action(pending_req, TMR_ABORT_TASK,
ring_req->ref_rqid);
break;
case VSCSIIF_ACT_SCSI_RESET:
scsiback_device_action(pending_req, TMR_LUN_RESET, 0);
break;
default:
pr_err_ratelimited("xen-pvscsi: invalid request\n");
scsiback_do_resp_with_sense(NULL, DRIVER_ERROR << 24,
0, pending_req);
kmem_cache_free(scsiback_cachep, pending_req);
break;
}
/* Yield point for this unbounded loop. */
cond_resched();
}
RING_FINAL_CHECK_FOR_REQUESTS(&info->ring, more_to_do);
return more_to_do;
}
static irqreturn_t scsiback_irq_fn(int irq, void *dev_id)
{
struct vscsibk_info *info = dev_id;
if (info->ring_error)
return IRQ_HANDLED;
while (scsiback_do_cmd_fn(info))
cond_resched();
return IRQ_HANDLED;
}
static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref,
evtchn_port_t evtchn)
{
void *area;
struct vscsiif_sring *sring;
int err;
if (info->irq)
return -1;
err = xenbus_map_ring_valloc(info->dev, ring_ref, &area);
if (err)
return err;
sring = (struct vscsiif_sring *)area;
BACK_RING_INIT(&info->ring, sring, PAGE_SIZE);
err = bind_interdomain_evtchn_to_irq(info->domid, evtchn);
if (err < 0)
goto unmap_page;
info->irq = err;
err = request_threaded_irq(info->irq, NULL, scsiback_irq_fn,
IRQF_ONESHOT, "vscsiif-backend", info);
if (err)
goto free_irq;
return 0;
free_irq:
unbind_from_irqhandler(info->irq, info);
info->irq = 0;
unmap_page:
xenbus_unmap_ring_vfree(info->dev, area);
return err;
}
static int scsiback_map(struct vscsibk_info *info)
{
struct xenbus_device *dev = info->dev;
unsigned int ring_ref, evtchn;
int err;
err = xenbus_gather(XBT_NIL, dev->otherend,
"ring-ref", "%u", &ring_ref,
"event-channel", "%u", &evtchn, NULL);
if (err) {
xenbus_dev_fatal(dev, err, "reading %s ring", dev->otherend);
return err;
}
return scsiback_init_sring(info, ring_ref, evtchn);
}
/*
Add a new translation entry
*/
static int scsiback_add_translation_entry(struct vscsibk_info *info,
char *phy, struct ids_tuple *v)
{
int err = 0;
struct v2p_entry *entry;
struct v2p_entry *new;
struct list_head *head = &(info->v2p_entry_lists);
unsigned long flags;
char *lunp;
unsigned int lun;
struct scsiback_tpg *tpg_entry, *tpg = NULL;
char *error = "doesn't exist";
lunp = strrchr(phy, ':');
if (!lunp) {
pr_err("xen-pvscsi: illegal format of physical device %s\n",
phy);
return -EINVAL;
}
*lunp = 0;
lunp++;
if (kstrtouint(lunp, 10, &lun) || lun >= TRANSPORT_MAX_LUNS_PER_TPG) {
pr_err("xen-pvscsi: lun number not valid: %s\n", lunp);
return -EINVAL;
}
mutex_lock(&scsiback_mutex);
list_for_each_entry(tpg_entry, &scsiback_list, tv_tpg_list) {
if (!strcmp(phy, tpg_entry->tport->tport_name) ||
!strcmp(phy, tpg_entry->param_alias)) {
spin_lock(&tpg_entry->se_tpg.tpg_lun_lock);
if (tpg_entry->se_tpg.tpg_lun_list[lun]->lun_status ==
TRANSPORT_LUN_STATUS_ACTIVE) {
if (!tpg_entry->tpg_nexus)
error = "nexus undefined";
else
tpg = tpg_entry;
}
spin_unlock(&tpg_entry->se_tpg.tpg_lun_lock);
break;
}
}
if (tpg) {
mutex_lock(&tpg->tv_tpg_mutex);
tpg->tv_tpg_fe_count++;
mutex_unlock(&tpg->tv_tpg_mutex);
}
mutex_unlock(&scsiback_mutex);
if (!tpg) {
pr_err("xen-pvscsi: %s:%d %s\n", phy, lun, error);
return -ENODEV;
}
new = kmalloc(sizeof(struct v2p_entry), GFP_KERNEL);
if (new == NULL) {
err = -ENOMEM;
goto out_free;
}
spin_lock_irqsave(&info->v2p_lock, flags);
/* Check double assignment to identical virtual ID */
list_for_each_entry(entry, head, l) {
if ((entry->v.chn == v->chn) &&
(entry->v.tgt == v->tgt) &&
(entry->v.lun == v->lun)) {
pr_warn("xen-pvscsi: Virtual ID is already used. Assignment was not performed.\n");
err = -EEXIST;
goto out;
}
}
/* Create a new translation entry and add to the list */
kref_init(&new->kref);
new->v = *v;
new->tpg = tpg;
new->lun = lun;
list_add_tail(&new->l, head);
out:
spin_unlock_irqrestore(&info->v2p_lock, flags);
out_free:
mutex_lock(&tpg->tv_tpg_mutex);
tpg->tv_tpg_fe_count--;
mutex_unlock(&tpg->tv_tpg_mutex);
if (err)
kfree(new);
return err;
}
static void __scsiback_del_translation_entry(struct v2p_entry *entry)
{
list_del(&entry->l);
kref_put(&entry->kref, scsiback_free_translation_entry);
}
/*
Delete the translation entry specfied
*/
static int scsiback_del_translation_entry(struct vscsibk_info *info,
struct ids_tuple *v)
{
struct v2p_entry *entry;
struct list_head *head = &(info->v2p_entry_lists);
unsigned long flags;
spin_lock_irqsave(&info->v2p_lock, flags);
/* Find out the translation entry specified */
list_for_each_entry(entry, head, l) {
if ((entry->v.chn == v->chn) &&
(entry->v.tgt == v->tgt) &&
(entry->v.lun == v->lun)) {
goto found;
}
}
spin_unlock_irqrestore(&info->v2p_lock, flags);
return 1;
found:
/* Delete the translation entry specfied */
__scsiback_del_translation_entry(entry);
spin_unlock_irqrestore(&info->v2p_lock, flags);
return 0;
}
static void scsiback_do_add_lun(struct vscsibk_info *info, const char *state,
char *phy, struct ids_tuple *vir)
{
if (!scsiback_add_translation_entry(info, phy, vir)) {
if (xenbus_printf(XBT_NIL, info->dev->nodename, state,
"%d", XenbusStateInitialised)) {
pr_err("xen-pvscsi: xenbus_printf error %s\n", state);
scsiback_del_translation_entry(info, vir);
}
} else {
xenbus_printf(XBT_NIL, info->dev->nodename, state,
"%d", XenbusStateClosed);
}
}
static void scsiback_do_del_lun(struct vscsibk_info *info, const char *state,
struct ids_tuple *vir)
{
if (!scsiback_del_translation_entry(info, vir)) {
if (xenbus_printf(XBT_NIL, info->dev->nodename, state,
"%d", XenbusStateClosed))
pr_err("xen-pvscsi: xenbus_printf error %s\n", state);
}
}
#define VSCSIBACK_OP_ADD_OR_DEL_LUN 1
#define VSCSIBACK_OP_UPDATEDEV_STATE 2
static void scsiback_do_1lun_hotplug(struct vscsibk_info *info, int op,
char *ent)
{
int err;
struct ids_tuple vir;
char *val;
int device_state;
char phy[VSCSI_NAMELEN];
char str[64];
char state[64];
struct xenbus_device *dev = info->dev;
/* read status */
snprintf(state, sizeof(state), "vscsi-devs/%s/state", ent);
err = xenbus_scanf(XBT_NIL, dev->nodename, state, "%u", &device_state);
if (XENBUS_EXIST_ERR(err))
return;
/* physical SCSI device */
snprintf(str, sizeof(str), "vscsi-devs/%s/p-dev", ent);
val = xenbus_read(XBT_NIL, dev->nodename, str, NULL);
if (IS_ERR(val)) {
xenbus_printf(XBT_NIL, dev->nodename, state,
"%d", XenbusStateClosed);
return;
}
strlcpy(phy, val, VSCSI_NAMELEN);
kfree(val);
/* virtual SCSI device */
snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", ent);
err = xenbus_scanf(XBT_NIL, dev->nodename, str, "%u:%u:%u:%u",
&vir.hst, &vir.chn, &vir.tgt, &vir.lun);
if (XENBUS_EXIST_ERR(err)) {
xenbus_printf(XBT_NIL, dev->nodename, state,
"%d", XenbusStateClosed);
return;
}
switch (op) {
case VSCSIBACK_OP_ADD_OR_DEL_LUN:
if (device_state == XenbusStateInitialising)
scsiback_do_add_lun(info, state, phy, &vir);
if (device_state == XenbusStateClosing)
scsiback_do_del_lun(info, state, &vir);
break;
case VSCSIBACK_OP_UPDATEDEV_STATE:
if (device_state == XenbusStateInitialised) {
/* modify vscsi-devs/dev-x/state */
if (xenbus_printf(XBT_NIL, dev->nodename, state,
"%d", XenbusStateConnected)) {
pr_err("xen-pvscsi: xenbus_printf error %s\n",
str);
scsiback_del_translation_entry(info, &vir);
xenbus_printf(XBT_NIL, dev->nodename, state,
"%d", XenbusStateClosed);
}
}
break;
/*When it is necessary, processing is added here.*/
default:
break;
}
}
static void scsiback_do_lun_hotplug(struct vscsibk_info *info, int op)
{
int i;
char **dir;
unsigned int ndir = 0;
dir = xenbus_directory(XBT_NIL, info->dev->nodename, "vscsi-devs",
&ndir);
if (IS_ERR(dir))
return;
for (i = 0; i < ndir; i++)
scsiback_do_1lun_hotplug(info, op, dir[i]);
kfree(dir);
}
static void scsiback_frontend_changed(struct xenbus_device *dev,
enum xenbus_state frontend_state)
{
struct vscsibk_info *info = dev_get_drvdata(&dev->dev);
switch (frontend_state) {
case XenbusStateInitialising:
break;
case XenbusStateInitialised:
if (scsiback_map(info))
break;
scsiback_do_lun_hotplug(info, VSCSIBACK_OP_ADD_OR_DEL_LUN);
xenbus_switch_state(dev, XenbusStateConnected);
break;
case XenbusStateConnected:
scsiback_do_lun_hotplug(info, VSCSIBACK_OP_UPDATEDEV_STATE);
if (dev->state == XenbusStateConnected)
break;
xenbus_switch_state(dev, XenbusStateConnected);
break;
case XenbusStateClosing:
if (info->irq)
scsiback_disconnect(info);
xenbus_switch_state(dev, XenbusStateClosing);
break;
case XenbusStateClosed:
xenbus_switch_state(dev, XenbusStateClosed);
if (xenbus_dev_is_online(dev))
break;
/* fall through if not online */
case XenbusStateUnknown:
device_unregister(&dev->dev);
break;
case XenbusStateReconfiguring:
scsiback_do_lun_hotplug(info, VSCSIBACK_OP_ADD_OR_DEL_LUN);
xenbus_switch_state(dev, XenbusStateReconfigured);
break;
default:
xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
frontend_state);
break;
}
}
/*
Release the translation entry specfied
*/
static void scsiback_release_translation_entry(struct vscsibk_info *info)
{
struct v2p_entry *entry, *tmp;
struct list_head *head = &(info->v2p_entry_lists);
unsigned long flags;
spin_lock_irqsave(&info->v2p_lock, flags);
list_for_each_entry_safe(entry, tmp, head, l)
__scsiback_del_translation_entry(entry);
spin_unlock_irqrestore(&info->v2p_lock, flags);
}
static int scsiback_remove(struct xenbus_device *dev)
{
struct vscsibk_info *info = dev_get_drvdata(&dev->dev);
if (info->irq)
scsiback_disconnect(info);
scsiback_release_translation_entry(info);
dev_set_drvdata(&dev->dev, NULL);
return 0;
}
static int scsiback_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
{
int err;
struct vscsibk_info *info = kzalloc(sizeof(struct vscsibk_info),
GFP_KERNEL);
DPRINTK("%p %d\n", dev, dev->otherend_id);
if (!info) {
xenbus_dev_fatal(dev, -ENOMEM, "allocating backend structure");
return -ENOMEM;
}
info->dev = dev;
dev_set_drvdata(&dev->dev, info);
info->domid = dev->otherend_id;
spin_lock_init(&info->ring_lock);
info->ring_error = 0;
atomic_set(&info->nr_unreplied_reqs, 0);
init_waitqueue_head(&info->waiting_to_free);
info->dev = dev;
info->irq = 0;
INIT_LIST_HEAD(&info->v2p_entry_lists);
spin_lock_init(&info->v2p_lock);
err = xenbus_printf(XBT_NIL, dev->nodename, "feature-sg-grant", "%u",
SG_ALL);
if (err)
xenbus_dev_error(dev, err, "writing feature-sg-grant");
err = xenbus_switch_state(dev, XenbusStateInitWait);
if (err)
goto fail;
return 0;
fail:
pr_warn("xen-pvscsi: %s failed\n", __func__);
scsiback_remove(dev);
return err;
}
static char *scsiback_dump_proto_id(struct scsiback_tport *tport)
{
switch (tport->tport_proto_id) {
case SCSI_PROTOCOL_SAS:
return "SAS";
case SCSI_PROTOCOL_FCP:
return "FCP";
case SCSI_PROTOCOL_ISCSI:
return "iSCSI";
default:
break;
}
return "Unknown";
}
static u8 scsiback_get_fabric_proto_ident(struct se_portal_group *se_tpg)
{
struct scsiback_tpg *tpg = container_of(se_tpg,
struct scsiback_tpg, se_tpg);
struct scsiback_tport *tport = tpg->tport;
switch (tport->tport_proto_id) {
case SCSI_PROTOCOL_SAS:
return sas_get_fabric_proto_ident(se_tpg);
case SCSI_PROTOCOL_FCP:
return fc_get_fabric_proto_ident(se_tpg);
case SCSI_PROTOCOL_ISCSI:
return iscsi_get_fabric_proto_ident(se_tpg);
default:
pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n",
tport->tport_proto_id);
break;
}
return sas_get_fabric_proto_ident(se_tpg);
}
static char *scsiback_get_fabric_wwn(struct se_portal_group *se_tpg)
{
struct scsiback_tpg *tpg = container_of(se_tpg,
struct scsiback_tpg, se_tpg);
struct scsiback_tport *tport = tpg->tport;
return &tport->tport_name[0];
}
static u16 scsiback_get_tag(struct se_portal_group *se_tpg)
{
struct scsiback_tpg *tpg = container_of(se_tpg,
struct scsiback_tpg, se_tpg);
return tpg->tport_tpgt;
}
static u32 scsiback_get_default_depth(struct se_portal_group *se_tpg)
{
return 1;
}
static u32
scsiback_get_pr_transport_id(struct se_portal_group *se_tpg,
struct se_node_acl *se_nacl,
struct t10_pr_registration *pr_reg,
int *format_code,
unsigned char *buf)
{
struct scsiback_tpg *tpg = container_of(se_tpg,
struct scsiback_tpg, se_tpg);
struct scsiback_tport *tport = tpg->tport;
switch (tport->tport_proto_id) {
case SCSI_PROTOCOL_SAS:
return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
format_code, buf);
case SCSI_PROTOCOL_FCP:
return fc_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
format_code, buf);
case SCSI_PROTOCOL_ISCSI:
return iscsi_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
format_code, buf);
default:
pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n",
tport->tport_proto_id);
break;
}
return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
format_code, buf);
}
static u32
scsiback_get_pr_transport_id_len(struct se_portal_group *se_tpg,
struct se_node_acl *se_nacl,
struct t10_pr_registration *pr_reg,
int *format_code)
{
struct scsiback_tpg *tpg = container_of(se_tpg,
struct scsiback_tpg, se_tpg);
struct scsiback_tport *tport = tpg->tport;
switch (tport->tport_proto_id) {
case SCSI_PROTOCOL_SAS:
return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
format_code);
case SCSI_PROTOCOL_FCP:
return fc_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
format_code);
case SCSI_PROTOCOL_ISCSI:
return iscsi_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
format_code);
default:
pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n",
tport->tport_proto_id);
break;
}
return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
format_code);
}
static char *
scsiback_parse_pr_out_transport_id(struct se_portal_group *se_tpg,
const char *buf,
u32 *out_tid_len,
char **port_nexus_ptr)
{
struct scsiback_tpg *tpg = container_of(se_tpg,
struct scsiback_tpg, se_tpg);
struct scsiback_tport *tport = tpg->tport;
switch (tport->tport_proto_id) {
case SCSI_PROTOCOL_SAS:
return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
port_nexus_ptr);
case SCSI_PROTOCOL_FCP:
return fc_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
port_nexus_ptr);
case SCSI_PROTOCOL_ISCSI:
return iscsi_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
port_nexus_ptr);
default:
pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n",
tport->tport_proto_id);
break;
}
return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
port_nexus_ptr);
}
static struct se_wwn *
scsiback_make_tport(struct target_fabric_configfs *tf,
struct config_group *group,
const char *name)
{
struct scsiback_tport *tport;
char *ptr;
u64 wwpn = 0;
int off = 0;
tport = kzalloc(sizeof(struct scsiback_tport), GFP_KERNEL);
if (!tport)
return ERR_PTR(-ENOMEM);
tport->tport_wwpn = wwpn;
/*
* Determine the emulated Protocol Identifier and Target Port Name
* based on the incoming configfs directory name.
*/
ptr = strstr(name, "naa.");
if (ptr) {
tport->tport_proto_id = SCSI_PROTOCOL_SAS;
goto check_len;
}
ptr = strstr(name, "fc.");
if (ptr) {
tport->tport_proto_id = SCSI_PROTOCOL_FCP;
off = 3; /* Skip over "fc." */
goto check_len;
}
ptr = strstr(name, "iqn.");
if (ptr) {
tport->tport_proto_id = SCSI_PROTOCOL_ISCSI;
goto check_len;
}
pr_err("Unable to locate prefix for emulated Target Port: %s\n", name);
kfree(tport);
return ERR_PTR(-EINVAL);
check_len:
if (strlen(name) >= VSCSI_NAMELEN) {
pr_err("Emulated %s Address: %s, exceeds max: %d\n", name,
scsiback_dump_proto_id(tport), VSCSI_NAMELEN);
kfree(tport);
return ERR_PTR(-EINVAL);
}
snprintf(&tport->tport_name[0], VSCSI_NAMELEN, "%s", &name[off]);
pr_debug("xen-pvscsi: Allocated emulated Target %s Address: %s\n",
scsiback_dump_proto_id(tport), name);
return &tport->tport_wwn;
}
static void scsiback_drop_tport(struct se_wwn *wwn)
{
struct scsiback_tport *tport = container_of(wwn,
struct scsiback_tport, tport_wwn);
pr_debug("xen-pvscsi: Deallocating emulated Target %s Address: %s\n",
scsiback_dump_proto_id(tport), tport->tport_name);
kfree(tport);
}
static struct se_node_acl *
scsiback_alloc_fabric_acl(struct se_portal_group *se_tpg)
{
return kzalloc(sizeof(struct se_node_acl), GFP_KERNEL);
}
static void
scsiback_release_fabric_acl(struct se_portal_group *se_tpg,
struct se_node_acl *se_nacl)
{
kfree(se_nacl);
}
static u32 scsiback_tpg_get_inst_index(struct se_portal_group *se_tpg)
{
return 1;
}
static int scsiback_check_stop_free(struct se_cmd *se_cmd)
{
/*
* Do not release struct se_cmd's containing a valid TMR
* pointer. These will be released directly in scsiback_device_action()
* with transport_generic_free_cmd().
*/
if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
return 0;
transport_generic_free_cmd(se_cmd, 0);
return 1;
}
static void scsiback_release_cmd(struct se_cmd *se_cmd)
{
struct vscsibk_pend *pending_req = container_of(se_cmd,
struct vscsibk_pend, se_cmd);
kmem_cache_free(scsiback_cachep, pending_req);
}
static int scsiback_shutdown_session(struct se_session *se_sess)
{
return 0;
}
static void scsiback_close_session(struct se_session *se_sess)
{
}
static u32 scsiback_sess_get_index(struct se_session *se_sess)
{
return 0;
}
static int scsiback_write_pending(struct se_cmd *se_cmd)
{
/* Go ahead and process the write immediately */
target_execute_cmd(se_cmd);
return 0;
}
static int scsiback_write_pending_status(struct se_cmd *se_cmd)
{
return 0;
}
static void scsiback_set_default_node_attrs(struct se_node_acl *nacl)
{
}
static u32 scsiback_get_task_tag(struct se_cmd *se_cmd)
{
struct vscsibk_pend *pending_req = container_of(se_cmd,
struct vscsibk_pend, se_cmd);
return pending_req->rqid;
}
static int scsiback_get_cmd_state(struct se_cmd *se_cmd)
{
return 0;
}
static int scsiback_queue_data_in(struct se_cmd *se_cmd)
{
struct vscsibk_pend *pending_req = container_of(se_cmd,
struct vscsibk_pend, se_cmd);
pending_req->result = SAM_STAT_GOOD;
scsiback_cmd_done(pending_req);
return 0;
}
static int scsiback_queue_status(struct se_cmd *se_cmd)
{
struct vscsibk_pend *pending_req = container_of(se_cmd,
struct vscsibk_pend, se_cmd);
if (se_cmd->sense_buffer &&
((se_cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ||
(se_cmd->se_cmd_flags & SCF_EMULATED_TASK_SENSE)))
pending_req->result = (DRIVER_SENSE << 24) |
SAM_STAT_CHECK_CONDITION;
else
pending_req->result = se_cmd->scsi_status;
scsiback_cmd_done(pending_req);
return 0;
}
static void scsiback_queue_tm_rsp(struct se_cmd *se_cmd)
{
struct se_tmr_req *se_tmr = se_cmd->se_tmr_req;
struct scsiback_tmr *tmr = se_tmr->fabric_tmr_ptr;
atomic_set(&tmr->tmr_complete, 1);
wake_up(&tmr->tmr_wait);
}
static void scsiback_aborted_task(struct se_cmd *se_cmd)
{
}
static ssize_t scsiback_tpg_param_show_alias(struct se_portal_group *se_tpg,
char *page)
{
struct scsiback_tpg *tpg = container_of(se_tpg, struct scsiback_tpg,
se_tpg);
ssize_t rb;
mutex_lock(&tpg->tv_tpg_mutex);
rb = snprintf(page, PAGE_SIZE, "%s\n", tpg->param_alias);
mutex_unlock(&tpg->tv_tpg_mutex);
return rb;
}
static ssize_t scsiback_tpg_param_store_alias(struct se_portal_group *se_tpg,
const char *page, size_t count)
{
struct scsiback_tpg *tpg = container_of(se_tpg, struct scsiback_tpg,
se_tpg);
int len;
if (strlen(page) >= VSCSI_NAMELEN) {
pr_err("param alias: %s, exceeds max: %d\n", page,
VSCSI_NAMELEN);
return -EINVAL;
}
mutex_lock(&tpg->tv_tpg_mutex);
len = snprintf(tpg->param_alias, VSCSI_NAMELEN, "%s", page);
if (tpg->param_alias[len - 1] == '\n')
tpg->param_alias[len - 1] = '\0';
mutex_unlock(&tpg->tv_tpg_mutex);
return count;
}
TF_TPG_PARAM_ATTR(scsiback, alias, S_IRUGO | S_IWUSR);
static struct configfs_attribute *scsiback_param_attrs[] = {
&scsiback_tpg_param_alias.attr,
NULL,
};
static int scsiback_make_nexus(struct scsiback_tpg *tpg,
const char *name)
{
struct se_portal_group *se_tpg;
struct se_session *se_sess;
struct scsiback_nexus *tv_nexus;
mutex_lock(&tpg->tv_tpg_mutex);
if (tpg->tpg_nexus) {
mutex_unlock(&tpg->tv_tpg_mutex);
pr_debug("tpg->tpg_nexus already exists\n");
return -EEXIST;
}
se_tpg = &tpg->se_tpg;
tv_nexus = kzalloc(sizeof(struct scsiback_nexus), GFP_KERNEL);
if (!tv_nexus) {
mutex_unlock(&tpg->tv_tpg_mutex);
return -ENOMEM;
}
/*
* Initialize the struct se_session pointer
*/
tv_nexus->tvn_se_sess = transport_init_session(TARGET_PROT_NORMAL);
if (IS_ERR(tv_nexus->tvn_se_sess)) {
mutex_unlock(&tpg->tv_tpg_mutex);
kfree(tv_nexus);
return -ENOMEM;
}
se_sess = tv_nexus->tvn_se_sess;
/*
* Since we are running in 'demo mode' this call with generate a
* struct se_node_acl for the scsiback struct se_portal_group with
* the SCSI Initiator port name of the passed configfs group 'name'.
*/
tv_nexus->tvn_se_sess->se_node_acl = core_tpg_check_initiator_node_acl(
se_tpg, (unsigned char *)name);
if (!tv_nexus->tvn_se_sess->se_node_acl) {
mutex_unlock(&tpg->tv_tpg_mutex);
pr_debug("core_tpg_check_initiator_node_acl() failed for %s\n",
name);
goto out;
}
/*
* Now register the TCM pvscsi virtual I_T Nexus as active with the
* call to __transport_register_session()
*/
__transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl,
tv_nexus->tvn_se_sess, tv_nexus);
tpg->tpg_nexus = tv_nexus;
mutex_unlock(&tpg->tv_tpg_mutex);
return 0;
out:
transport_free_session(se_sess);
kfree(tv_nexus);
return -ENOMEM;
}
static int scsiback_drop_nexus(struct scsiback_tpg *tpg)
{
struct se_session *se_sess;
struct scsiback_nexus *tv_nexus;
mutex_lock(&tpg->tv_tpg_mutex);
tv_nexus = tpg->tpg_nexus;
if (!tv_nexus) {
mutex_unlock(&tpg->tv_tpg_mutex);
return -ENODEV;
}
se_sess = tv_nexus->tvn_se_sess;
if (!se_sess) {
mutex_unlock(&tpg->tv_tpg_mutex);
return -ENODEV;
}
if (tpg->tv_tpg_port_count != 0) {
mutex_unlock(&tpg->tv_tpg_mutex);
pr_err("Unable to remove xen-pvscsi I_T Nexus with active TPG port count: %d\n",
tpg->tv_tpg_port_count);
return -EBUSY;
}
if (tpg->tv_tpg_fe_count != 0) {
mutex_unlock(&tpg->tv_tpg_mutex);
pr_err("Unable to remove xen-pvscsi I_T Nexus with active TPG frontend count: %d\n",
tpg->tv_tpg_fe_count);
return -EBUSY;
}
pr_debug("xen-pvscsi: Removing I_T Nexus to emulated %s Initiator Port: %s\n",
scsiback_dump_proto_id(tpg->tport),
tv_nexus->tvn_se_sess->se_node_acl->initiatorname);
/*
* Release the SCSI I_T Nexus to the emulated xen-pvscsi Target Port
*/
transport_deregister_session(tv_nexus->tvn_se_sess);
tpg->tpg_nexus = NULL;
mutex_unlock(&tpg->tv_tpg_mutex);
kfree(tv_nexus);
return 0;
}
static ssize_t scsiback_tpg_show_nexus(struct se_portal_group *se_tpg,
char *page)
{
struct scsiback_tpg *tpg = container_of(se_tpg,
struct scsiback_tpg, se_tpg);
struct scsiback_nexus *tv_nexus;
ssize_t ret;
mutex_lock(&tpg->tv_tpg_mutex);
tv_nexus = tpg->tpg_nexus;
if (!tv_nexus) {
mutex_unlock(&tpg->tv_tpg_mutex);
return -ENODEV;
}
ret = snprintf(page, PAGE_SIZE, "%s\n",
tv_nexus->tvn_se_sess->se_node_acl->initiatorname);
mutex_unlock(&tpg->tv_tpg_mutex);
return ret;
}
static ssize_t scsiback_tpg_store_nexus(struct se_portal_group *se_tpg,
const char *page,
size_t count)
{
struct scsiback_tpg *tpg = container_of(se_tpg,
struct scsiback_tpg, se_tpg);
struct scsiback_tport *tport_wwn = tpg->tport;
unsigned char i_port[VSCSI_NAMELEN], *ptr, *port_ptr;
int ret;
/*
* Shutdown the active I_T nexus if 'NULL' is passed..
*/
if (!strncmp(page, "NULL", 4)) {
ret = scsiback_drop_nexus(tpg);
return (!ret) ? count : ret;
}
/*
* Otherwise make sure the passed virtual Initiator port WWN matches
* the fabric protocol_id set in scsiback_make_tport(), and call
* scsiback_make_nexus().
*/
if (strlen(page) >= VSCSI_NAMELEN) {
pr_err("Emulated NAA Sas Address: %s, exceeds max: %d\n",
page, VSCSI_NAMELEN);
return -EINVAL;
}
snprintf(&i_port[0], VSCSI_NAMELEN, "%s", page);
ptr = strstr(i_port, "naa.");
if (ptr) {
if (tport_wwn->tport_proto_id != SCSI_PROTOCOL_SAS) {
pr_err("Passed SAS Initiator Port %s does not match target port protoid: %s\n",
i_port, scsiback_dump_proto_id(tport_wwn));
return -EINVAL;
}
port_ptr = &i_port[0];
goto check_newline;
}
ptr = strstr(i_port, "fc.");
if (ptr) {
if (tport_wwn->tport_proto_id != SCSI_PROTOCOL_FCP) {
pr_err("Passed FCP Initiator Port %s does not match target port protoid: %s\n",
i_port, scsiback_dump_proto_id(tport_wwn));
return -EINVAL;
}
port_ptr = &i_port[3]; /* Skip over "fc." */
goto check_newline;
}
ptr = strstr(i_port, "iqn.");
if (ptr) {
if (tport_wwn->tport_proto_id != SCSI_PROTOCOL_ISCSI) {
pr_err("Passed iSCSI Initiator Port %s does not match target port protoid: %s\n",
i_port, scsiback_dump_proto_id(tport_wwn));
return -EINVAL;
}
port_ptr = &i_port[0];
goto check_newline;
}
pr_err("Unable to locate prefix for emulated Initiator Port: %s\n",
i_port);
return -EINVAL;
/*
* Clear any trailing newline for the NAA WWN
*/
check_newline:
if (i_port[strlen(i_port) - 1] == '\n')
i_port[strlen(i_port) - 1] = '\0';
ret = scsiback_make_nexus(tpg, port_ptr);
if (ret < 0)
return ret;
return count;
}
TF_TPG_BASE_ATTR(scsiback, nexus, S_IRUGO | S_IWUSR);
static struct configfs_attribute *scsiback_tpg_attrs[] = {
&scsiback_tpg_nexus.attr,
NULL,
};
static ssize_t
scsiback_wwn_show_attr_version(struct target_fabric_configfs *tf,
char *page)
{
return sprintf(page, "xen-pvscsi fabric module %s on %s/%s on "
UTS_RELEASE"\n",
VSCSI_VERSION, utsname()->sysname, utsname()->machine);
}
TF_WWN_ATTR_RO(scsiback, version);
static struct configfs_attribute *scsiback_wwn_attrs[] = {
&scsiback_wwn_version.attr,
NULL,
};
static char *scsiback_get_fabric_name(void)
{
return "xen-pvscsi";
}
static int scsiback_port_link(struct se_portal_group *se_tpg,
struct se_lun *lun)
{
struct scsiback_tpg *tpg = container_of(se_tpg,
struct scsiback_tpg, se_tpg);
mutex_lock(&tpg->tv_tpg_mutex);
tpg->tv_tpg_port_count++;
mutex_unlock(&tpg->tv_tpg_mutex);
return 0;
}
static void scsiback_port_unlink(struct se_portal_group *se_tpg,
struct se_lun *lun)
{
struct scsiback_tpg *tpg = container_of(se_tpg,
struct scsiback_tpg, se_tpg);
mutex_lock(&tpg->tv_tpg_mutex);
tpg->tv_tpg_port_count--;
mutex_unlock(&tpg->tv_tpg_mutex);
}
static struct se_portal_group *
scsiback_make_tpg(struct se_wwn *wwn,
struct config_group *group,
const char *name)
{
struct scsiback_tport *tport = container_of(wwn,
struct scsiback_tport, tport_wwn);
struct scsiback_tpg *tpg;
u16 tpgt;
int ret;
if (strstr(name, "tpgt_") != name)
return ERR_PTR(-EINVAL);
ret = kstrtou16(name + 5, 10, &tpgt);
if (ret)
return ERR_PTR(ret);
tpg = kzalloc(sizeof(struct scsiback_tpg), GFP_KERNEL);
if (!tpg)
return ERR_PTR(-ENOMEM);
mutex_init(&tpg->tv_tpg_mutex);
INIT_LIST_HEAD(&tpg->tv_tpg_list);
INIT_LIST_HEAD(&tpg->info_list);
tpg->tport = tport;
tpg->tport_tpgt = tpgt;
ret = core_tpg_register(&scsiback_fabric_configfs->tf_ops, wwn,
&tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL);
if (ret < 0) {
kfree(tpg);
return NULL;
}
mutex_lock(&scsiback_mutex);
list_add_tail(&tpg->tv_tpg_list, &scsiback_list);
mutex_unlock(&scsiback_mutex);
return &tpg->se_tpg;
}
static void scsiback_drop_tpg(struct se_portal_group *se_tpg)
{
struct scsiback_tpg *tpg = container_of(se_tpg,
struct scsiback_tpg, se_tpg);
mutex_lock(&scsiback_mutex);
list_del(&tpg->tv_tpg_list);
mutex_unlock(&scsiback_mutex);
/*
* Release the virtual I_T Nexus for this xen-pvscsi TPG
*/
scsiback_drop_nexus(tpg);
/*
* Deregister the se_tpg from TCM..
*/
core_tpg_deregister(se_tpg);
kfree(tpg);
}
static int scsiback_check_true(struct se_portal_group *se_tpg)
{
return 1;
}
static int scsiback_check_false(struct se_portal_group *se_tpg)
{
return 0;
}
static struct target_core_fabric_ops scsiback_ops = {
.get_fabric_name = scsiback_get_fabric_name,
.get_fabric_proto_ident = scsiback_get_fabric_proto_ident,
.tpg_get_wwn = scsiback_get_fabric_wwn,
.tpg_get_tag = scsiback_get_tag,
.tpg_get_default_depth = scsiback_get_default_depth,
.tpg_get_pr_transport_id = scsiback_get_pr_transport_id,
.tpg_get_pr_transport_id_len = scsiback_get_pr_transport_id_len,
.tpg_parse_pr_out_transport_id = scsiback_parse_pr_out_transport_id,
.tpg_check_demo_mode = scsiback_check_true,
.tpg_check_demo_mode_cache = scsiback_check_true,
.tpg_check_demo_mode_write_protect = scsiback_check_false,
.tpg_check_prod_mode_write_protect = scsiback_check_false,
.tpg_alloc_fabric_acl = scsiback_alloc_fabric_acl,
.tpg_release_fabric_acl = scsiback_release_fabric_acl,
.tpg_get_inst_index = scsiback_tpg_get_inst_index,
.check_stop_free = scsiback_check_stop_free,
.release_cmd = scsiback_release_cmd,
.put_session = NULL,
.shutdown_session = scsiback_shutdown_session,
.close_session = scsiback_close_session,
.sess_get_index = scsiback_sess_get_index,
.sess_get_initiator_sid = NULL,
.write_pending = scsiback_write_pending,
.write_pending_status = scsiback_write_pending_status,
.set_default_node_attributes = scsiback_set_default_node_attrs,
.get_task_tag = scsiback_get_task_tag,
.get_cmd_state = scsiback_get_cmd_state,
.queue_data_in = scsiback_queue_data_in,
.queue_status = scsiback_queue_status,
.queue_tm_rsp = scsiback_queue_tm_rsp,
.aborted_task = scsiback_aborted_task,
/*
* Setup callers for generic logic in target_core_fabric_configfs.c
*/
.fabric_make_wwn = scsiback_make_tport,
.fabric_drop_wwn = scsiback_drop_tport,
.fabric_make_tpg = scsiback_make_tpg,
.fabric_drop_tpg = scsiback_drop_tpg,
.fabric_post_link = scsiback_port_link,
.fabric_pre_unlink = scsiback_port_unlink,
.fabric_make_np = NULL,
.fabric_drop_np = NULL,
#if 0
.fabric_make_nodeacl = scsiback_make_nodeacl,
.fabric_drop_nodeacl = scsiback_drop_nodeacl,
#endif
};
static int scsiback_register_configfs(void)
{
struct target_fabric_configfs *fabric;
int ret;
pr_debug("xen-pvscsi: fabric module %s on %s/%s on "UTS_RELEASE"\n",
VSCSI_VERSION, utsname()->sysname, utsname()->machine);
/*
* Register the top level struct config_item_type with TCM core
*/
fabric = target_fabric_configfs_init(THIS_MODULE, "xen-pvscsi");
if (IS_ERR(fabric))
return PTR_ERR(fabric);
/*
* Setup fabric->tf_ops from our local scsiback_ops
*/
fabric->tf_ops = scsiback_ops;
/*
* Setup default attribute lists for various fabric->tf_cit_tmpl
*/
fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = scsiback_wwn_attrs;
fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = scsiback_tpg_attrs;
fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;
fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = scsiback_param_attrs;
fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
/*
* Register the fabric for use within TCM
*/
ret = target_fabric_configfs_register(fabric);
if (ret < 0) {
target_fabric_configfs_free(fabric);
return ret;
}
/*
* Setup our local pointer to *fabric
*/
scsiback_fabric_configfs = fabric;
pr_debug("xen-pvscsi: Set fabric -> scsiback_fabric_configfs\n");
return 0;
};
static void scsiback_deregister_configfs(void)
{
if (!scsiback_fabric_configfs)
return;
target_fabric_configfs_deregister(scsiback_fabric_configfs);
scsiback_fabric_configfs = NULL;
pr_debug("xen-pvscsi: Cleared scsiback_fabric_configfs\n");
};
static const struct xenbus_device_id scsiback_ids[] = {
{ "vscsi" },
{ "" }
};
static struct xenbus_driver scsiback_driver = {
.ids = scsiback_ids,
.probe = scsiback_probe,
.remove = scsiback_remove,
.otherend_changed = scsiback_frontend_changed
};
static void scsiback_init_pend(void *p)
{
struct vscsibk_pend *pend = p;
int i;
memset(pend, 0, sizeof(*pend));
for (i = 0; i < VSCSI_MAX_GRANTS; i++)
pend->grant_handles[i] = SCSIBACK_INVALID_HANDLE;
}
static int __init scsiback_init(void)
{
int ret;
if (!xen_domain())
return -ENODEV;
scsiback_cachep = kmem_cache_create("vscsiif_cache",
sizeof(struct vscsibk_pend), 0, 0, scsiback_init_pend);
if (!scsiback_cachep)
return -ENOMEM;
ret = xenbus_register_backend(&scsiback_driver);
if (ret)
goto out_cache_destroy;
ret = scsiback_register_configfs();
if (ret)
goto out_unregister_xenbus;
return 0;
out_unregister_xenbus:
xenbus_unregister_driver(&scsiback_driver);
out_cache_destroy:
kmem_cache_destroy(scsiback_cachep);
pr_err("xen-pvscsi: %s: error %d\n", __func__, ret);
return ret;
}
static void __exit scsiback_exit(void)
{
struct page *page;
while (free_pages_num) {
if (get_free_page(&page))
BUG();
free_xenballooned_pages(1, &page);
}
scsiback_deregister_configfs();
xenbus_unregister_driver(&scsiback_driver);
kmem_cache_destroy(scsiback_cachep);
}
module_init(scsiback_init);
module_exit(scsiback_exit);
MODULE_DESCRIPTION("Xen SCSI backend driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS("xen-backend:vscsi");
MODULE_AUTHOR("Juergen Gross <jgross@suse.com>");
...@@ -259,7 +259,6 @@ static char *error_path(struct xenbus_device *dev) ...@@ -259,7 +259,6 @@ static char *error_path(struct xenbus_device *dev)
static void xenbus_va_dev_error(struct xenbus_device *dev, int err, static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
const char *fmt, va_list ap) const char *fmt, va_list ap)
{ {
int ret;
unsigned int len; unsigned int len;
char *printf_buffer = NULL; char *printf_buffer = NULL;
char *path_buffer = NULL; char *path_buffer = NULL;
...@@ -270,9 +269,7 @@ static void xenbus_va_dev_error(struct xenbus_device *dev, int err, ...@@ -270,9 +269,7 @@ static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
goto fail; goto fail;
len = sprintf(printf_buffer, "%i ", -err); len = sprintf(printf_buffer, "%i ", -err);
ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap); vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1);
dev_err(&dev->dev, "%s\n", printf_buffer); dev_err(&dev->dev, "%s\n", printf_buffer);
...@@ -361,8 +358,8 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err, ...@@ -361,8 +358,8 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
* @ring_mfn: mfn of ring to grant * @ring_mfn: mfn of ring to grant
* Grant access to the given @ring_mfn to the peer of the given device. Return * Grant access to the given @ring_mfn to the peer of the given device. Return
* 0 on success, or -errno on error. On error, the device will switch to * a grant reference on success, or -errno on error. On error, the device will
* XenbusStateClosing, and the error will be saved in the store. * switch to XenbusStateClosing, and the error will be saved in the store.
*/ */
int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn) int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
{ {
......
...@@ -297,9 +297,13 @@ void xenbus_dev_shutdown(struct device *_dev) ...@@ -297,9 +297,13 @@ void xenbus_dev_shutdown(struct device *_dev)
EXPORT_SYMBOL_GPL(xenbus_dev_shutdown); EXPORT_SYMBOL_GPL(xenbus_dev_shutdown);
int xenbus_register_driver_common(struct xenbus_driver *drv, int xenbus_register_driver_common(struct xenbus_driver *drv,
struct xen_bus_type *bus) struct xen_bus_type *bus,
struct module *owner, const char *mod_name)
{ {
drv->driver.name = drv->name ? drv->name : drv->ids[0].devicetype;
drv->driver.bus = &bus->bus; drv->driver.bus = &bus->bus;
drv->driver.owner = owner;
drv->driver.mod_name = mod_name;
return driver_register(&drv->driver); return driver_register(&drv->driver);
} }
......
...@@ -60,7 +60,9 @@ extern int xenbus_match(struct device *_dev, struct device_driver *_drv); ...@@ -60,7 +60,9 @@ extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
extern int xenbus_dev_probe(struct device *_dev); extern int xenbus_dev_probe(struct device *_dev);
extern int xenbus_dev_remove(struct device *_dev); extern int xenbus_dev_remove(struct device *_dev);
extern int xenbus_register_driver_common(struct xenbus_driver *drv, extern int xenbus_register_driver_common(struct xenbus_driver *drv,
struct xen_bus_type *bus); struct xen_bus_type *bus,
struct module *owner,
const char *mod_name);
extern int xenbus_probe_node(struct xen_bus_type *bus, extern int xenbus_probe_node(struct xen_bus_type *bus,
const char *type, const char *type,
const char *nodename); const char *nodename);
......
...@@ -234,13 +234,15 @@ int xenbus_dev_is_online(struct xenbus_device *dev) ...@@ -234,13 +234,15 @@ int xenbus_dev_is_online(struct xenbus_device *dev)
} }
EXPORT_SYMBOL_GPL(xenbus_dev_is_online); EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
int xenbus_register_backend(struct xenbus_driver *drv) int __xenbus_register_backend(struct xenbus_driver *drv, struct module *owner,
const char *mod_name)
{ {
drv->read_otherend_details = read_frontend_details; drv->read_otherend_details = read_frontend_details;
return xenbus_register_driver_common(drv, &xenbus_backend); return xenbus_register_driver_common(drv, &xenbus_backend,
owner, mod_name);
} }
EXPORT_SYMBOL_GPL(xenbus_register_backend); EXPORT_SYMBOL_GPL(__xenbus_register_backend);
static int backend_probe_and_watch(struct notifier_block *notifier, static int backend_probe_and_watch(struct notifier_block *notifier,
unsigned long event, unsigned long event,
......
...@@ -317,13 +317,15 @@ static void wait_for_devices(struct xenbus_driver *xendrv) ...@@ -317,13 +317,15 @@ static void wait_for_devices(struct xenbus_driver *xendrv)
print_device_status); print_device_status);
} }
int xenbus_register_frontend(struct xenbus_driver *drv) int __xenbus_register_frontend(struct xenbus_driver *drv, struct module *owner,
const char *mod_name)
{ {
int ret; int ret;
drv->read_otherend_details = read_backend_details; drv->read_otherend_details = read_backend_details;
ret = xenbus_register_driver_common(drv, &xenbus_frontend); ret = xenbus_register_driver_common(drv, &xenbus_frontend,
owner, mod_name);
if (ret) if (ret)
return ret; return ret;
...@@ -332,7 +334,7 @@ int xenbus_register_frontend(struct xenbus_driver *drv) ...@@ -332,7 +334,7 @@ int xenbus_register_frontend(struct xenbus_driver *drv)
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(xenbus_register_frontend); EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq); static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
static int backend_state; static int backend_state;
......
...@@ -28,6 +28,8 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, ...@@ -28,6 +28,8 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
unsigned long irqflags, unsigned long irqflags,
const char *devname, const char *devname,
void *dev_id); void *dev_id);
int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
unsigned int remote_port);
int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
unsigned int remote_port, unsigned int remote_port,
irq_handler_t handler, irq_handler_t handler,
......
...@@ -3,6 +3,24 @@ ...@@ -3,6 +3,24 @@
* *
* Definitions used for the Xen ELF notes. * Definitions used for the Xen ELF notes.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Copyright (c) 2006, Ian Campbell, XenSource Ltd. * Copyright (c) 2006, Ian Campbell, XenSource Ltd.
*/ */
...@@ -18,12 +36,13 @@ ...@@ -18,12 +36,13 @@
* *
* LEGACY indicated the fields in the legacy __xen_guest string which * LEGACY indicated the fields in the legacy __xen_guest string which
* this a note type replaces. * this a note type replaces.
*
* String values (for non-legacy) are NULL terminated ASCII, also known
* as ASCIZ type.
*/ */
/* /*
* NAME=VALUE pair (string). * NAME=VALUE pair (string).
*
* LEGACY: FEATURES and PAE
*/ */
#define XEN_ELFNOTE_INFO 0 #define XEN_ELFNOTE_INFO 0
...@@ -137,9 +156,29 @@ ...@@ -137,9 +156,29 @@
/* /*
* Whether or not the guest supports cooperative suspend cancellation. * Whether or not the guest supports cooperative suspend cancellation.
* This is a numeric value.
*
* Default is 0
*/ */
#define XEN_ELFNOTE_SUSPEND_CANCEL 14 #define XEN_ELFNOTE_SUSPEND_CANCEL 14
/*
* The (non-default) location the initial phys-to-machine map should be
* placed at by the hypervisor (Dom0) or the tools (DomU).
* The kernel must be prepared for this mapping to be established using
* large pages, despite such otherwise not being available to guests.
* The kernel must also be able to handle the page table pages used for
* this mapping not being accessible through the initial mapping.
* (Only x86-64 supports this at present.)
*/
#define XEN_ELFNOTE_INIT_P2M 15
/*
* Whether or not the guest can deal with being passed an initrd not
* mapped through its initial page tables.
*/
#define XEN_ELFNOTE_MOD_START_PFN 16
/* /*
* The features supported by this kernel (numeric). * The features supported by this kernel (numeric).
* *
...@@ -153,6 +192,11 @@ ...@@ -153,6 +192,11 @@
*/ */
#define XEN_ELFNOTE_SUPPORTED_FEATURES 17 #define XEN_ELFNOTE_SUPPORTED_FEATURES 17
/*
* The number of the highest elfnote defined.
*/
#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES
#endif /* __XEN_PUBLIC_ELFNOTE_H__ */ #endif /* __XEN_PUBLIC_ELFNOTE_H__ */
/* /*
......
/******************************************************************************
* vscsiif.h
*
* Based on the blkif.h code.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Copyright(c) FUJITSU Limited 2008.
*/
#ifndef __XEN__PUBLIC_IO_SCSI_H__
#define __XEN__PUBLIC_IO_SCSI_H__
#include "ring.h"
#include "../grant_table.h"
/*
* Feature and Parameter Negotiation
* =================================
* The two halves of a Xen pvSCSI driver utilize nodes within the XenStore to
* communicate capabilities and to negotiate operating parameters. This
* section enumerates these nodes which reside in the respective front and
* backend portions of the XenStore, following the XenBus convention.
*
* Any specified default value is in effect if the corresponding XenBus node
* is not present in the XenStore.
*
* XenStore nodes in sections marked "PRIVATE" are solely for use by the
* driver side whose XenBus tree contains them.
*
*****************************************************************************
* Backend XenBus Nodes
*****************************************************************************
*
*------------------ Backend Device Identification (PRIVATE) ------------------
*
* p-devname
* Values: string
*
* A free string used to identify the physical device (e.g. a disk name).
*
* p-dev
* Values: string
*
* A string specifying the backend device: either a 4-tuple "h:c:t:l"
* (host, controller, target, lun, all integers), or a WWN (e.g.
* "naa.60014054ac780582").
*
* v-dev
* Values: string
*
* A string specifying the frontend device in form of a 4-tuple "h:c:t:l"
* (host, controller, target, lun, all integers).
*
*--------------------------------- Features ---------------------------------
*
* feature-sg-grant
* Values: unsigned [VSCSIIF_SG_TABLESIZE...65535]
* Default Value: 0
*
* Specifies the maximum number of scatter/gather elements in grant pages
* supported. If not set, the backend supports up to VSCSIIF_SG_TABLESIZE
* SG elements specified directly in the request.
*
*****************************************************************************
* Frontend XenBus Nodes
*****************************************************************************
*
*----------------------- Request Transport Parameters -----------------------
*
* event-channel
* Values: unsigned
*
* The identifier of the Xen event channel used to signal activity
* in the ring buffer.
*
* ring-ref
* Values: unsigned
*
* The Xen grant reference granting permission for the backend to map
* the sole page in a single page sized ring buffer.
*
* protocol
* Values: string (XEN_IO_PROTO_ABI_*)
* Default Value: XEN_IO_PROTO_ABI_NATIVE
*
* The machine ABI rules governing the format of all ring request and
* response structures.
*/
/* Requests from the frontend to the backend */
/*
* Request a SCSI operation specified via a CDB in vscsiif_request.cmnd.
* The target is specified via channel, id and lun.
*
* The operation to be performed is specified via a CDB in cmnd[], the length
* of the CDB is in cmd_len. sc_data_direction specifies the direction of data
* (to the device, from the device, or none at all).
*
* If data is to be transferred to or from the device the buffer(s) in the
* guest memory is/are specified via one or multiple scsiif_request_segment
* descriptors each specifying a memory page via a grant_ref_t, a offset into
* the page and the length of the area in that page. All scsiif_request_segment
* areas concatenated form the resulting data buffer used by the operation.
* If the number of scsiif_request_segment areas is not too large (less than
* or equal VSCSIIF_SG_TABLESIZE) the areas can be specified directly in the
* seg[] array and the number of valid scsiif_request_segment elements is to be
* set in nr_segments.
*
* If "feature-sg-grant" in the Xenstore is set it is possible to specify more
* than VSCSIIF_SG_TABLESIZE scsiif_request_segment elements via indirection.
* The maximum number of allowed scsiif_request_segment elements is the value
* of the "feature-sg-grant" entry from Xenstore. When using indirection the
* seg[] array doesn't contain specifications of the data buffers, but
* references to scsiif_request_segment arrays, which in turn reference the
* data buffers. While nr_segments holds the number of populated seg[] entries
* (plus the set VSCSIIF_SG_GRANT bit), the number of scsiif_request_segment
* elements referencing the target data buffers is calculated from the lengths
* of the seg[] elements (the sum of all valid seg[].length divided by the
* size of one scsiif_request_segment structure).
*/
#define VSCSIIF_ACT_SCSI_CDB 1
/*
* Request abort of a running operation for the specified target given by
* channel, id, lun and the operation's rqid in ref_rqid.
*/
#define VSCSIIF_ACT_SCSI_ABORT 2
/*
* Request a device reset of the specified target (channel and id).
*/
#define VSCSIIF_ACT_SCSI_RESET 3
/*
* Preset scatter/gather elements for a following request. Deprecated.
* Keeping the define only to avoid usage of the value "4" for other actions.
*/
#define VSCSIIF_ACT_SCSI_SG_PRESET 4
/*
* Maximum scatter/gather segments per request.
*
* Considering balance between allocating at least 16 "vscsiif_request"
* structures on one page (4096 bytes) and the number of scatter/gather
* elements needed, we decided to use 26 as a magic number.
*
* If "feature-sg-grant" is set, more scatter/gather elements can be specified
* by placing them in one or more (up to VSCSIIF_SG_TABLESIZE) granted pages.
* In this case the vscsiif_request seg elements don't contain references to
* the user data, but to the SG elements referencing the user data.
*/
#define VSCSIIF_SG_TABLESIZE 26
/*
* based on Linux kernel 2.6.18, still valid
* Changing these values requires support of multiple protocols via the rings
* as "old clients" will blindly use these values and the resulting structure
* sizes.
*/
#define VSCSIIF_MAX_COMMAND_SIZE 16
#define VSCSIIF_SENSE_BUFFERSIZE 96
struct scsiif_request_segment {
grant_ref_t gref;
uint16_t offset;
uint16_t length;
};
#define VSCSIIF_SG_PER_PAGE (PAGE_SIZE / sizeof(struct scsiif_request_segment))
/* Size of one request is 252 bytes */
struct vscsiif_request {
uint16_t rqid; /* private guest value, echoed in resp */
uint8_t act; /* command between backend and frontend */
uint8_t cmd_len; /* valid CDB bytes */
uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE]; /* the CDB */
uint16_t timeout_per_command; /* deprecated */
uint16_t channel, id, lun; /* (virtual) device specification */
uint16_t ref_rqid; /* command abort reference */
uint8_t sc_data_direction; /* for DMA_TO_DEVICE(1)
DMA_FROM_DEVICE(2)
DMA_NONE(3) requests */
uint8_t nr_segments; /* Number of pieces of scatter-gather */
/*
* flag in nr_segments: SG elements via grant page
*
* If VSCSIIF_SG_GRANT is set, the low 7 bits of nr_segments specify the number
* of grant pages containing SG elements. Usable if "feature-sg-grant" set.
*/
#define VSCSIIF_SG_GRANT 0x80
struct scsiif_request_segment seg[VSCSIIF_SG_TABLESIZE];
uint32_t reserved[3];
};
/* Size of one response is 252 bytes */
struct vscsiif_response {
uint16_t rqid; /* identifies request */
uint8_t padding;
uint8_t sense_len;
uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
int32_t rslt;
uint32_t residual_len; /* request bufflen -
return the value from physical device */
uint32_t reserved[36];
};
DEFINE_RING_TYPES(vscsiif, struct vscsiif_request, struct vscsiif_response);
#endif /*__XEN__PUBLIC_IO_SCSI_H__*/
...@@ -3,6 +3,24 @@ ...@@ -3,6 +3,24 @@
* *
* Guest OS interface to Xen. * Guest OS interface to Xen.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Copyright (c) 2004, K A Fraser * Copyright (c) 2004, K A Fraser
*/ */
...@@ -73,13 +91,23 @@ ...@@ -73,13 +91,23 @@
* VIRTUAL INTERRUPTS * VIRTUAL INTERRUPTS
* *
* Virtual interrupts that a guest OS may receive from Xen. * Virtual interrupts that a guest OS may receive from Xen.
* In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a
* global VIRQ. The former can be bound once per VCPU and cannot be re-bound.
* The latter can be allocated only once per guest: they must initially be
* allocated to VCPU0 but can subsequently be re-bound.
*/ */
#define VIRQ_TIMER 0 /* Timebase update, and/or requested timeout. */ #define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */
#define VIRQ_DEBUG 1 /* Request guest to dump debug info. */ #define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */
#define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */ #define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */
#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */ #define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */
#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ #define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */
#define VIRQ_PCPU_STATE 9 /* (DOM0) PCPU state changed */ #define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */
#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */
#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */
#define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */
#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */
#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */
#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */
/* Architecture-specific VIRQ definitions. */ /* Architecture-specific VIRQ definitions. */
#define VIRQ_ARCH_0 16 #define VIRQ_ARCH_0 16
...@@ -92,24 +120,68 @@ ...@@ -92,24 +120,68 @@
#define VIRQ_ARCH_7 23 #define VIRQ_ARCH_7 23
#define NR_VIRQS 24 #define NR_VIRQS 24
/* /*
* MMU-UPDATE REQUESTS * enum neg_errnoval HYPERVISOR_mmu_update(const struct mmu_update reqs[],
* * unsigned count, unsigned *done_out,
* HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs. * unsigned foreigndom)
* A foreigndom (FD) can be specified (or DOMID_SELF for none). * @reqs is an array of mmu_update_t structures ((ptr, val) pairs).
* Where the FD has some effect, it is described below. * @count is the length of the above array.
* ptr[1:0] specifies the appropriate MMU_* command. * @pdone is an output parameter indicating number of completed operations
* @foreigndom[15:0]: FD, the expected owner of data pages referenced in this
* hypercall invocation. Can be DOMID_SELF.
* @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced
* in this hypercall invocation. The value of this field
* (x) encodes the PFD as follows:
* x == 0 => PFD == DOMID_SELF
* x != 0 => PFD == x - 1
* *
* Sub-commands: ptr[1:0] specifies the appropriate MMU_* command.
* -------------
* ptr[1:0] == MMU_NORMAL_PT_UPDATE: * ptr[1:0] == MMU_NORMAL_PT_UPDATE:
* Updates an entry in a page table. If updating an L1 table, and the new * Updates an entry in a page table belonging to PFD. If updating an L1 table,
* table entry is valid/present, the mapped frame must belong to the FD, if * and the new table entry is valid/present, the mapped frame must belong to
* an FD has been specified. If attempting to map an I/O page then the * FD. If attempting to map an I/O page then the caller assumes the privilege
* caller assumes the privilege of the FD. * of the FD.
* FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller.
* FD == DOMID_XEN: Map restricted areas of Xen's heap space. * FD == DOMID_XEN: Map restricted areas of Xen's heap space.
* ptr[:2] -- Machine address of the page-table entry to modify. * ptr[:2] -- Machine address of the page-table entry to modify.
* val -- Value to write. * val -- Value to write.
* *
* There also certain implicit requirements when using this hypercall. The
* pages that make up a pagetable must be mapped read-only in the guest.
* This prevents uncontrolled guest updates to the pagetable. Xen strictly
* enforces this, and will disallow any pagetable update which will end up
* mapping pagetable page RW, and will disallow using any writable page as a
* pagetable. In practice it means that when constructing a page table for a
* process, thread, etc, we MUST be very dilligient in following these rules:
* 1). Start with top-level page (PGD or in Xen language: L4). Fill out
* the entries.
* 2). Keep on going, filling out the upper (PUD or L3), and middle (PMD
* or L2).
* 3). Start filling out the PTE table (L1) with the PTE entries. Once
* done, make sure to set each of those entries to RO (so writeable bit
* is unset). Once that has been completed, set the PMD (L2) for this
* PTE table as RO.
* 4). When completed with all of the PMD (L2) entries, and all of them have
* been set to RO, make sure to set RO the PUD (L3). Do the same
* operation on PGD (L4) pagetable entries that have a PUD (L3) entry.
* 5). Now before you can use those pages (so setting the cr3), you MUST also
* pin them so that the hypervisor can verify the entries. This is done
* via the HYPERVISOR_mmuext_op(MMUEXT_PIN_L4_TABLE, guest physical frame
* number of the PGD (L4)). And this point the HYPERVISOR_mmuext_op(
* MMUEXT_NEW_BASEPTR, guest physical frame number of the PGD (L4)) can be
* issued.
* For 32-bit guests, the L4 is not used (as there is less pagetables), so
* instead use L3.
* At this point the pagetables can be modified using the MMU_NORMAL_PT_UPDATE
* hypercall. Also if so desired the OS can also try to write to the PTE
* and be trapped by the hypervisor (as the PTE entry is RO).
*
* To deallocate the pages, the operations are the reverse of the steps
* mentioned above. The argument is MMUEXT_UNPIN_TABLE for all levels and the
* pagetable MUST not be in use (meaning that the cr3 is not set to it).
*
* ptr[1:0] == MMU_MACHPHYS_UPDATE: * ptr[1:0] == MMU_MACHPHYS_UPDATE:
* Updates an entry in the machine->pseudo-physical mapping table. * Updates an entry in the machine->pseudo-physical mapping table.
* ptr[:2] -- Machine address within the frame whose mapping to modify. * ptr[:2] -- Machine address within the frame whose mapping to modify.
...@@ -119,6 +191,72 @@ ...@@ -119,6 +191,72 @@
* ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD: * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD:
* As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed
* with those in @val. * with those in @val.
*
* @val is usually the machine frame number along with some attributes.
* The attributes by default follow the architecture defined bits. Meaning that
* if this is a X86_64 machine and four page table layout is used, the layout
* of val is:
* - 63 if set means No execute (NX)
* - 46-13 the machine frame number
* - 12 available for guest
* - 11 available for guest
* - 10 available for guest
* - 9 available for guest
* - 8 global
* - 7 PAT (PSE is disabled, must use hypercall to make 4MB or 2MB pages)
* - 6 dirty
* - 5 accessed
* - 4 page cached disabled
* - 3 page write through
* - 2 userspace accessible
* - 1 writeable
* - 0 present
*
* The one bits that does not fit with the default layout is the PAGE_PSE
* also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the
* HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB
* (or 2MB) instead of using the PAGE_PSE bit.
*
* The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen
* using it as the Page Attribute Table (PAT) bit - for details on it please
* refer to Intel SDM 10.12. The PAT allows to set the caching attributes of
* pages instead of using MTRRs.
*
* The PAT MSR is as follows (it is a 64-bit value, each entry is 8 bits):
* PAT4 PAT0
* +-----+-----+----+----+----+-----+----+----+
* | UC | UC- | WC | WB | UC | UC- | WC | WB | <= Linux
* +-----+-----+----+----+----+-----+----+----+
* | UC | UC- | WT | WB | UC | UC- | WT | WB | <= BIOS (default when machine boots)
* +-----+-----+----+----+----+-----+----+----+
* | rsv | rsv | WP | WC | UC | UC- | WT | WB | <= Xen
* +-----+-----+----+----+----+-----+----+----+
*
* The lookup of this index table translates to looking up
* Bit 7, Bit 4, and Bit 3 of val entry:
*
* PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3).
*
* If all bits are off, then we are using PAT0. If bit 3 turned on,
* then we are using PAT1, if bit 3 and bit 4, then PAT2..
*
* As you can see, the Linux PAT1 translates to PAT4 under Xen. Which means
* that if a guest that follows Linux's PAT setup and would like to set Write
* Combined on pages it MUST use PAT4 entry. Meaning that Bit 7 (PAGE_PAT) is
* set. For example, under Linux it only uses PAT0, PAT1, and PAT2 for the
* caching as:
*
* WB = none (so PAT0)
* WC = PWT (bit 3 on)
* UC = PWT | PCD (bit 3 and 4 are on).
*
* To make it work with Xen, it needs to translate the WC bit as so:
*
* PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3
*
* And to translate back it would:
*
* PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7.
*/ */
#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ #define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */
#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ #define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */
...@@ -127,7 +265,12 @@ ...@@ -127,7 +265,12 @@
/* /*
* MMU EXTENDED OPERATIONS * MMU EXTENDED OPERATIONS
* *
* HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures. * enum neg_errnoval HYPERVISOR_mmuext_op(mmuext_op_t uops[],
* unsigned int count,
* unsigned int *pdone,
* unsigned int foreigndom)
*/
/* HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures.
* A foreigndom (FD) can be specified (or DOMID_SELF for none). * A foreigndom (FD) can be specified (or DOMID_SELF for none).
* Where the FD has some effect, it is described below. * Where the FD has some effect, it is described below.
* *
...@@ -164,9 +307,23 @@ ...@@ -164,9 +307,23 @@
* cmd: MMUEXT_FLUSH_CACHE * cmd: MMUEXT_FLUSH_CACHE
* No additional arguments. Writes back and flushes cache contents. * No additional arguments. Writes back and flushes cache contents.
* *
* cmd: MMUEXT_FLUSH_CACHE_GLOBAL
* No additional arguments. Writes back and flushes cache contents
* on all CPUs in the system.
*
* cmd: MMUEXT_SET_LDT * cmd: MMUEXT_SET_LDT
* linear_addr: Linear address of LDT base (NB. must be page-aligned). * linear_addr: Linear address of LDT base (NB. must be page-aligned).
* nr_ents: Number of entries in LDT. * nr_ents: Number of entries in LDT.
*
* cmd: MMUEXT_CLEAR_PAGE
* mfn: Machine frame number to be cleared.
*
* cmd: MMUEXT_COPY_PAGE
* mfn: Machine frame number of the destination page.
* src_mfn: Machine frame number of the source page.
*
* cmd: MMUEXT_[UN]MARK_SUPER
* mfn: Machine frame number of head of superpage to be [un]marked.
*/ */
#define MMUEXT_PIN_L1_TABLE 0 #define MMUEXT_PIN_L1_TABLE 0
#define MMUEXT_PIN_L2_TABLE 1 #define MMUEXT_PIN_L2_TABLE 1
...@@ -183,12 +340,18 @@ ...@@ -183,12 +340,18 @@
#define MMUEXT_FLUSH_CACHE 12 #define MMUEXT_FLUSH_CACHE 12
#define MMUEXT_SET_LDT 13 #define MMUEXT_SET_LDT 13
#define MMUEXT_NEW_USER_BASEPTR 15 #define MMUEXT_NEW_USER_BASEPTR 15
#define MMUEXT_CLEAR_PAGE 16
#define MMUEXT_COPY_PAGE 17
#define MMUEXT_FLUSH_CACHE_GLOBAL 18
#define MMUEXT_MARK_SUPER 19
#define MMUEXT_UNMARK_SUPER 20
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
struct mmuext_op { struct mmuext_op {
unsigned int cmd; unsigned int cmd;
union { union {
/* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */ /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR
* CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */
xen_pfn_t mfn; xen_pfn_t mfn;
/* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
unsigned long linear_addr; unsigned long linear_addr;
...@@ -198,6 +361,8 @@ struct mmuext_op { ...@@ -198,6 +361,8 @@ struct mmuext_op {
unsigned int nr_ents; unsigned int nr_ents;
/* TLB_FLUSH_MULTI, INVLPG_MULTI */ /* TLB_FLUSH_MULTI, INVLPG_MULTI */
void *vcpumask; void *vcpumask;
/* COPY_PAGE */
xen_pfn_t src_mfn;
} arg2; } arg2;
}; };
DEFINE_GUEST_HANDLE_STRUCT(mmuext_op); DEFINE_GUEST_HANDLE_STRUCT(mmuext_op);
...@@ -225,10 +390,23 @@ DEFINE_GUEST_HANDLE_STRUCT(mmuext_op); ...@@ -225,10 +390,23 @@ DEFINE_GUEST_HANDLE_STRUCT(mmuext_op);
*/ */
#define VMASST_CMD_enable 0 #define VMASST_CMD_enable 0
#define VMASST_CMD_disable 1 #define VMASST_CMD_disable 1
/* x86/32 guests: simulate full 4GB segment limits. */
#define VMASST_TYPE_4gb_segments 0 #define VMASST_TYPE_4gb_segments 0
/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */
#define VMASST_TYPE_4gb_segments_notify 1 #define VMASST_TYPE_4gb_segments_notify 1
/*
* x86 guests: support writes to bottom-level PTEs.
* NB1. Page-directory entries cannot be written.
* NB2. Guest must continue to remove all writable mappings of PTEs.
*/
#define VMASST_TYPE_writable_pagetables 2 #define VMASST_TYPE_writable_pagetables 2
/* x86/PAE guests: support PDPTs above 4GB. */
#define VMASST_TYPE_pae_extended_cr3 3 #define VMASST_TYPE_pae_extended_cr3 3
#define MAX_VMASST_TYPE 3 #define MAX_VMASST_TYPE 3
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
...@@ -260,6 +438,15 @@ typedef uint16_t domid_t; ...@@ -260,6 +438,15 @@ typedef uint16_t domid_t;
*/ */
#define DOMID_XEN (0x7FF2U) #define DOMID_XEN (0x7FF2U)
/* DOMID_COW is used as the owner of sharable pages */
#define DOMID_COW (0x7FF3U)
/* DOMID_INVALID is used to identify pages with unknown owner. */
#define DOMID_INVALID (0x7FF4U)
/* Idle domain. */
#define DOMID_IDLE (0x7FFFU)
/* /*
* Send an array of these to HYPERVISOR_mmu_update(). * Send an array of these to HYPERVISOR_mmu_update().
* NB. The fields are natural pointer/address size for this architecture. * NB. The fields are natural pointer/address size for this architecture.
...@@ -272,7 +459,9 @@ DEFINE_GUEST_HANDLE_STRUCT(mmu_update); ...@@ -272,7 +459,9 @@ DEFINE_GUEST_HANDLE_STRUCT(mmu_update);
/* /*
* Send an array of these to HYPERVISOR_multicall(). * Send an array of these to HYPERVISOR_multicall().
* NB. The fields are natural register size for this architecture. * NB. The fields are logically the natural register size for this
* architecture. In cases where xen_ulong_t is larger than this then
* any unused bits in the upper portion must be zero.
*/ */
struct multicall_entry { struct multicall_entry {
xen_ulong_t op; xen_ulong_t op;
...@@ -442,8 +631,48 @@ struct start_info { ...@@ -442,8 +631,48 @@ struct start_info {
unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */
unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ unsigned long mod_len; /* Size (bytes) of pre-loaded module. */
int8_t cmd_line[MAX_GUEST_CMDLINE]; int8_t cmd_line[MAX_GUEST_CMDLINE];
/* The pfn range here covers both page table and p->m table frames. */
unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */
unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */
}; };
/* These flags are passed in the 'flags' field of start_info_t. */
#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */
#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */
#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */
#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */
/*
* A multiboot module is a package containing modules very similar to a
* multiboot module array. The only differences are:
* - the array of module descriptors is by convention simply at the beginning
* of the multiboot module,
* - addresses in the module descriptors are based on the beginning of the
* multiboot module,
* - the number of modules is determined by a termination descriptor that has
* mod_start == 0.
*
* This permits to both build it statically and reference it in a configuration
* file, and let the PV guest easily rebase the addresses to virtual addresses
* and at the same time count the number of modules.
*/
struct xen_multiboot_mod_list {
/* Address of first byte of the module */
uint32_t mod_start;
/* Address of last byte of the module (inclusive) */
uint32_t mod_end;
/* Address of zero-terminated command line */
uint32_t cmdline;
/* Unused, must be zero */
uint32_t pad;
};
/*
* The console structure in start_info.console.dom0
*
* This structure includes a variety of information required to
* have a working VGA/VESA console.
*/
struct dom0_vga_console_info { struct dom0_vga_console_info {
uint8_t video_type; uint8_t video_type;
#define XEN_VGATYPE_TEXT_MODE_3 0x03 #define XEN_VGATYPE_TEXT_MODE_3 0x03
...@@ -484,11 +713,6 @@ struct dom0_vga_console_info { ...@@ -484,11 +713,6 @@ struct dom0_vga_console_info {
} u; } u;
}; };
/* These flags are passed in the 'flags' field of start_info_t. */
#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */
#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */
typedef uint64_t cpumap_t; typedef uint64_t cpumap_t;
typedef uint8_t xen_domain_handle_t[16]; typedef uint8_t xen_domain_handle_t[16];
......
...@@ -86,6 +86,7 @@ struct xenbus_device_id ...@@ -86,6 +86,7 @@ struct xenbus_device_id
/* A xenbus driver. */ /* A xenbus driver. */
struct xenbus_driver { struct xenbus_driver {
const char *name; /* defaults to ids[0].devicetype */
const struct xenbus_device_id *ids; const struct xenbus_device_id *ids;
int (*probe)(struct xenbus_device *dev, int (*probe)(struct xenbus_device *dev,
const struct xenbus_device_id *id); const struct xenbus_device_id *id);
...@@ -100,20 +101,22 @@ struct xenbus_driver { ...@@ -100,20 +101,22 @@ struct xenbus_driver {
int (*is_ready)(struct xenbus_device *dev); int (*is_ready)(struct xenbus_device *dev);
}; };
#define DEFINE_XENBUS_DRIVER(var, drvname, methods...) \
struct xenbus_driver var ## _driver = { \
.driver.name = drvname + 0 ?: var ## _ids->devicetype, \
.driver.owner = THIS_MODULE, \
.ids = var ## _ids, ## methods \
}
static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv) static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv)
{ {
return container_of(drv, struct xenbus_driver, driver); return container_of(drv, struct xenbus_driver, driver);
} }
int __must_check xenbus_register_frontend(struct xenbus_driver *); int __must_check __xenbus_register_frontend(struct xenbus_driver *drv,
int __must_check xenbus_register_backend(struct xenbus_driver *); struct module *owner,
const char *mod_name);
int __must_check __xenbus_register_backend(struct xenbus_driver *drv,
struct module *owner,
const char *mod_name);
#define xenbus_register_frontend(drv) \
__xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME);
#define xenbus_register_backend(drv) \
__xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME);
void xenbus_unregister_driver(struct xenbus_driver *drv); void xenbus_unregister_driver(struct xenbus_driver *drv);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment