Commit eb64c3c6 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'stable/for-linus-3.19-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull additional xen update from David Vrabel:
 "Xen: additional features for 3.19-rc0

   - Linear p2m for x86 PV guests which simplifies the p2m code,
     improves performance and will allow for > 512 GB PV guests in the
     future.

  A last-minute, configuration specific issue was discovered with this
  change which is why it was not included in my previous pull request.
  This is now been fixed and tested"

* tag 'stable/for-linus-3.19-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen: switch to post-init routines in xen mmu.c earlier
  Revert "swiotlb-xen: pass dev_addr to swiotlb_tbl_unmap_single"
  xen: annotate xen_set_identity_and_remap_chunk() with __init
  xen: introduce helper functions to do safe read and write accesses
  xen: Speed up set_phys_to_machine() by using read-only mappings
  xen: switch to linear virtual mapped sparse p2m list
  xen: Hide get_phys_to_machine() to be able to tune common path
  x86: Introduce function to get pmd entry pointer
  xen: Delay invalidating extra memory
  xen: Delay m2p_override initialization
  xen: Delay remapping memory of pv-domain
  xen: use common page allocation function in p2m.c
  xen: Make functions static
  xen: fix some style issues in p2m.c
parents 61de8e53 f1d04b23
...@@ -452,6 +452,7 @@ static inline void update_page_count(int level, unsigned long pages) { } ...@@ -452,6 +452,7 @@ static inline void update_page_count(int level, unsigned long pages) { }
extern pte_t *lookup_address(unsigned long address, unsigned int *level); extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
unsigned int *level); unsigned int *level);
extern pmd_t *lookup_pmd_address(unsigned long address);
extern phys_addr_t slow_virt_to_phys(void *__address); extern phys_addr_t slow_virt_to_phys(void *__address);
extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
unsigned numpages, unsigned long page_flags); unsigned numpages, unsigned long page_flags);
......
...@@ -41,10 +41,12 @@ typedef struct xpaddr { ...@@ -41,10 +41,12 @@ typedef struct xpaddr {
extern unsigned long *machine_to_phys_mapping; extern unsigned long *machine_to_phys_mapping;
extern unsigned long machine_to_phys_nr; extern unsigned long machine_to_phys_nr;
extern unsigned long *xen_p2m_addr;
extern unsigned long xen_p2m_size;
extern unsigned long xen_max_p2m_pfn;
extern unsigned long get_phys_to_machine(unsigned long pfn); extern unsigned long get_phys_to_machine(unsigned long pfn);
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern unsigned long set_phys_range_identity(unsigned long pfn_s, extern unsigned long set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e); unsigned long pfn_e);
...@@ -52,17 +54,52 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s, ...@@ -52,17 +54,52 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
struct gnttab_map_grant_ref *kmap_ops, struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count); struct page **pages, unsigned int count);
extern int m2p_add_override(unsigned long mfn, struct page *page,
struct gnttab_map_grant_ref *kmap_op);
extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
struct gnttab_map_grant_ref *kmap_ops, struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count); struct page **pages, unsigned int count);
extern int m2p_remove_override(struct page *page,
struct gnttab_map_grant_ref *kmap_op,
unsigned long mfn);
extern struct page *m2p_find_override(unsigned long mfn);
extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
/*
* Helper functions to write or read unsigned long values to/from
* memory, when the access may fault.
*/
static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val)
{
return __put_user(val, (unsigned long __user *)addr);
}
static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val)
{
return __get_user(*val, (unsigned long __user *)addr);
}
/*
* When to use pfn_to_mfn(), __pfn_to_mfn() or get_phys_to_machine():
* - pfn_to_mfn() returns either INVALID_P2M_ENTRY or the mfn. No indicator
* bits (identity or foreign) are set.
* - __pfn_to_mfn() returns the found entry of the p2m table. A possibly set
* identity or foreign indicator will be still set. __pfn_to_mfn() is
* encapsulating get_phys_to_machine() which is called in special cases only.
* - get_phys_to_machine() is to be called by __pfn_to_mfn() only in special
* cases needing an extended handling.
*/
static inline unsigned long __pfn_to_mfn(unsigned long pfn)
{
unsigned long mfn;
if (pfn < xen_p2m_size)
mfn = xen_p2m_addr[pfn];
else if (unlikely(pfn < xen_max_p2m_pfn))
return get_phys_to_machine(pfn);
else
return IDENTITY_FRAME(pfn);
if (unlikely(mfn == INVALID_P2M_ENTRY))
return get_phys_to_machine(pfn);
return mfn;
}
static inline unsigned long pfn_to_mfn(unsigned long pfn) static inline unsigned long pfn_to_mfn(unsigned long pfn)
{ {
unsigned long mfn; unsigned long mfn;
...@@ -70,7 +107,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn) ...@@ -70,7 +107,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
if (xen_feature(XENFEAT_auto_translated_physmap)) if (xen_feature(XENFEAT_auto_translated_physmap))
return pfn; return pfn;
mfn = get_phys_to_machine(pfn); mfn = __pfn_to_mfn(pfn);
if (mfn != INVALID_P2M_ENTRY) if (mfn != INVALID_P2M_ENTRY)
mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT); mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
...@@ -83,7 +120,7 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn) ...@@ -83,7 +120,7 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
if (xen_feature(XENFEAT_auto_translated_physmap)) if (xen_feature(XENFEAT_auto_translated_physmap))
return 1; return 1;
return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY; return __pfn_to_mfn(pfn) != INVALID_P2M_ENTRY;
} }
static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn) static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)
...@@ -102,7 +139,7 @@ static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn) ...@@ -102,7 +139,7 @@ static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)
* In such cases it doesn't matter what we return (we return garbage), * In such cases it doesn't matter what we return (we return garbage),
* but we must handle the fault without crashing! * but we must handle the fault without crashing!
*/ */
ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); ret = xen_safe_read_ulong(&machine_to_phys_mapping[mfn], &pfn);
if (ret < 0) if (ret < 0)
return ~0; return ~0;
...@@ -117,7 +154,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) ...@@ -117,7 +154,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
return mfn; return mfn;
pfn = mfn_to_pfn_no_overrides(mfn); pfn = mfn_to_pfn_no_overrides(mfn);
if (get_phys_to_machine(pfn) != mfn) { if (__pfn_to_mfn(pfn) != mfn) {
/* /*
* If this appears to be a foreign mfn (because the pfn * If this appears to be a foreign mfn (because the pfn
* doesn't map back to the mfn), then check the local override * doesn't map back to the mfn), then check the local override
...@@ -133,8 +170,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) ...@@ -133,8 +170,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
* entry doesn't map back to the mfn and m2p_override doesn't have a * entry doesn't map back to the mfn and m2p_override doesn't have a
* valid entry for it. * valid entry for it.
*/ */
if (pfn == ~0 && if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn))
get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
pfn = mfn; pfn = mfn;
return pfn; return pfn;
...@@ -180,7 +216,7 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) ...@@ -180,7 +216,7 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
return mfn; return mfn;
pfn = mfn_to_pfn(mfn); pfn = mfn_to_pfn(mfn);
if (get_phys_to_machine(pfn) != mfn) if (__pfn_to_mfn(pfn) != mfn)
return -1; /* force !pfn_valid() */ return -1; /* force !pfn_valid() */
return pfn; return pfn;
} }
......
...@@ -383,6 +383,26 @@ static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address, ...@@ -383,6 +383,26 @@ static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
return lookup_address(address, level); return lookup_address(address, level);
} }
/*
* Lookup the PMD entry for a virtual address. Return a pointer to the entry
* or NULL if not present.
*/
pmd_t *lookup_pmd_address(unsigned long address)
{
pgd_t *pgd;
pud_t *pud;
pgd = pgd_offset_k(address);
if (pgd_none(*pgd))
return NULL;
pud = pud_offset(pgd, address);
if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
return NULL;
return pmd_offset(pud, address);
}
/* /*
* This is necessary because __pa() does not work on some * This is necessary because __pa() does not work on some
* kinds of memory, like vmalloc() or the alloc_remap() * kinds of memory, like vmalloc() or the alloc_remap()
......
...@@ -387,7 +387,7 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) ...@@ -387,7 +387,7 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
unsigned long mfn; unsigned long mfn;
if (!xen_feature(XENFEAT_auto_translated_physmap)) if (!xen_feature(XENFEAT_auto_translated_physmap))
mfn = get_phys_to_machine(pfn); mfn = __pfn_to_mfn(pfn);
else else
mfn = pfn; mfn = pfn;
/* /*
...@@ -1113,20 +1113,16 @@ static void __init xen_cleanhighmap(unsigned long vaddr, ...@@ -1113,20 +1113,16 @@ static void __init xen_cleanhighmap(unsigned long vaddr,
* instead of somewhere later and be confusing. */ * instead of somewhere later and be confusing. */
xen_mc_flush(); xen_mc_flush();
} }
static void __init xen_pagetable_p2m_copy(void)
static void __init xen_pagetable_p2m_free(void)
{ {
unsigned long size; unsigned long size;
unsigned long addr; unsigned long addr;
unsigned long new_mfn_list;
if (xen_feature(XENFEAT_auto_translated_physmap))
return;
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
new_mfn_list = xen_revector_p2m_tree();
/* No memory or already called. */ /* No memory or already called. */
if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list) if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list)
return; return;
/* using __ka address and sticking INVALID_P2M_ENTRY! */ /* using __ka address and sticking INVALID_P2M_ENTRY! */
...@@ -1144,8 +1140,6 @@ static void __init xen_pagetable_p2m_copy(void) ...@@ -1144,8 +1140,6 @@ static void __init xen_pagetable_p2m_copy(void)
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
memblock_free(__pa(xen_start_info->mfn_list), size); memblock_free(__pa(xen_start_info->mfn_list), size);
/* And revector! Bye bye old array */
xen_start_info->mfn_list = new_mfn_list;
/* At this stage, cleanup_highmap has already cleaned __ka space /* At this stage, cleanup_highmap has already cleaned __ka space
* from _brk_limit way up to the max_pfn_mapped (which is the end of * from _brk_limit way up to the max_pfn_mapped (which is the end of
...@@ -1169,17 +1163,35 @@ static void __init xen_pagetable_p2m_copy(void) ...@@ -1169,17 +1163,35 @@ static void __init xen_pagetable_p2m_copy(void)
} }
#endif #endif
static void __init xen_pagetable_init(void) static void __init xen_pagetable_p2m_setup(void)
{ {
paging_init(); if (xen_feature(XENFEAT_auto_translated_physmap))
return;
xen_vmalloc_p2m_tree();
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
xen_pagetable_p2m_copy(); xen_pagetable_p2m_free();
#endif #endif
/* And revector! Bye bye old array */
xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
}
static void __init xen_pagetable_init(void)
{
paging_init();
xen_post_allocator_init();
xen_pagetable_p2m_setup();
/* Allocate and initialize top and mid mfn levels for p2m structure */ /* Allocate and initialize top and mid mfn levels for p2m structure */
xen_build_mfn_list_list(); xen_build_mfn_list_list();
/* Remap memory freed due to conflicts with E820 map */
if (!xen_feature(XENFEAT_auto_translated_physmap))
xen_remap_memory();
xen_setup_shared_info(); xen_setup_shared_info();
xen_post_allocator_init();
} }
static void xen_write_cr2(unsigned long cr2) static void xen_write_cr2(unsigned long cr2)
{ {
......
...@@ -3,21 +3,22 @@ ...@@ -3,21 +3,22 @@
* guests themselves, but it must also access and update the p2m array * guests themselves, but it must also access and update the p2m array
* during suspend/resume when all the pages are reallocated. * during suspend/resume when all the pages are reallocated.
* *
* The p2m table is logically a flat array, but we implement it as a * The logical flat p2m table is mapped to a linear kernel memory area.
* three-level tree to allow the address space to be sparse. * For accesses by Xen a three-level tree linked via mfns only is set up to
* allow the address space to be sparse.
* *
* Xen * Xen
* | * |
* p2m_top p2m_top_mfn * p2m_top_mfn
* / \ / \ * / \
* p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn * p2m_mid_mfn p2m_mid_mfn
* / \ / \ / / * / /
* p2m p2m p2m p2m p2m p2m p2m ... * p2m p2m p2m ...
* *
* The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
* *
* The p2m_top and p2m_top_mfn levels are limited to 1 page, so the * The p2m_top_mfn level is limited to 1 page, so the maximum representable
* maximum representable pseudo-physical address space is: * pseudo-physical address space is:
* P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
* *
* P2M_PER_PAGE depends on the architecture, as a mfn is always * P2M_PER_PAGE depends on the architecture, as a mfn is always
...@@ -30,6 +31,9 @@ ...@@ -30,6 +31,9 @@
* leaf entries, or for the top root, or middle one, for which there is a void * leaf entries, or for the top root, or middle one, for which there is a void
* entry, we assume it is "missing". So (for example) * entry, we assume it is "missing". So (for example)
* pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
* We have a dedicated page p2m_missing with all entries being
* INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m
* list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns.
* *
* We also have the possibility of setting 1-1 mappings on certain regions, so * We also have the possibility of setting 1-1 mappings on certain regions, so
* that: * that:
...@@ -39,11 +43,9 @@ ...@@ -39,11 +43,9 @@
* PCI BARs, or ACPI spaces), we can create mappings easily because we * PCI BARs, or ACPI spaces), we can create mappings easily because we
* get the PFN value to match the MFN. * get the PFN value to match the MFN.
* *
* For this to work efficiently we have one new page p2m_identity and * For this to work efficiently we have one new page p2m_identity. All entries
* allocate (via reserved_brk) any other pages we need to cover the sides * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only
* (1GB or 4MB boundary violations). All entries in p2m_identity are set to * recognizes that and MFNs, no other fancy value).
* INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
* no other fancy value).
* *
* On lookup we spot that the entry points to p2m_identity and return the * On lookup we spot that the entry points to p2m_identity and return the
* identity value instead of dereferencing and returning INVALID_P2M_ENTRY. * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
...@@ -55,106 +57,6 @@ ...@@ -55,106 +57,6 @@
* PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
* non-identity pfn. To protect ourselves against we elect to set (and get) the * non-identity pfn. To protect ourselves against we elect to set (and get) the
* IDENTITY_FRAME_BIT on all identity mapped PFNs. * IDENTITY_FRAME_BIT on all identity mapped PFNs.
*
* This simplistic diagram is used to explain the more subtle piece of code.
* There is also a digram of the P2M at the end that can help.
* Imagine your E820 looking as so:
*
* 1GB 2GB 4GB
* /-------------------+---------\/----\ /----------\ /---+-----\
* | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
* \-------------------+---------/\----/ \----------/ \---+-----/
* ^- 1029MB ^- 2001MB
*
* [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100),
* 2048MB = 524288 (0x80000)]
*
* And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
* is actually not present (would have to kick the balloon driver to put it in).
*
* When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
* Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
* of the PFN and the end PFN (263424 and 512256 respectively). The first step
* is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
* covers 512^2 of page estate (1GB) and in case the start or end PFN is not
* aligned on 512^2*PAGE_SIZE (1GB) we reserve_brk new middle and leaf pages as
* required to split any existing p2m_mid_missing middle pages.
*
* With the E820 example above, 263424 is not 1GB aligned so we allocate a
* reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
* Each entry in the allocate page is "missing" (points to p2m_missing).
*
* Next stage is to determine if we need to do a more granular boundary check
* on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
* We check if the start pfn and end pfn violate that boundary check, and if
* so reserve_brk a (p2m[x][y]) leaf page. This way we have a much finer
* granularity of setting which PFNs are missing and which ones are identity.
* In our example 263424 and 512256 both fail the check so we reserve_brk two
* pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
* values) and assign them to p2m[1][2] and p2m[1][488] respectively.
*
* At this point we would at minimum reserve_brk one page, but could be up to
* three. Each call to set_phys_range_identity has at maximum a three page
* cost. If we were to query the P2M at this stage, all those entries from
* start PFN through end PFN (so 1029MB -> 2001MB) would return
* INVALID_P2M_ENTRY ("missing").
*
* The next step is to walk from the start pfn to the end pfn setting
* the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
* If we find that the middle entry is pointing to p2m_missing we can swap it
* over to p2m_identity - this way covering 4MB (or 2MB) PFN space (and
* similarly swapping p2m_mid_missing for p2m_mid_identity for larger regions).
* At this point we do not need to worry about boundary aligment (so no need to
* reserve_brk a middle page, figure out which PFNs are "missing" and which
* ones are identity), as that has been done earlier. If we find that the
* middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
* that page (which covers 512 PFNs) and set the appropriate PFN with
* IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we
* set from p2m[1][2][256->511] and p2m[1][488][0->256] with
* IDENTITY_FRAME_BIT set.
*
* All other regions that are void (or not filled) either point to p2m_missing
* (considered missing) or have the default value of INVALID_P2M_ENTRY (also
* considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
* contain the INVALID_P2M_ENTRY value and are considered "missing."
*
* Finally, the region beyond the end of of the E820 (4 GB in this example)
* is set to be identity (in case there are MMIO regions placed here).
*
* This is what the p2m ends up looking (for the E820 above) with this
* fabulous drawing:
*
* p2m /--------------\
* /-----\ | &mfn_list[0],| /-----------------\
* | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
* |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
* | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
* |-----| \ | [p2m_identity]+\\ | .... |
* | 2 |--\ \-------------------->| ... | \\ \----------------/
* |-----| \ \---------------/ \\
* | 3 |-\ \ \\ p2m_identity [1]
* |-----| \ \-------------------->/---------------\ /-----------------\
* | .. |\ | | [p2m_identity]+-->| ~0, ~0, ~0, ... |
* \-----/ | | | [p2m_identity]+-->| ..., ~0 |
* | | | .... | \-----------------/
* | | +-[x], ~0, ~0.. +\
* | | \---------------/ \
* | | \-> /---------------\
* | V p2m_mid_missing p2m_missing | IDENTITY[@0] |
* | /-----------------\ /------------\ | IDENTITY[@256]|
* | | [p2m_missing] +---->| ~0, ~0, ...| | ~0, ~0, .... |
* | | [p2m_missing] +---->| ..., ~0 | \---------------/
* | | ... | \------------/
* | \-----------------/
* |
* | p2m_mid_identity
* | /-----------------\
* \-->| [p2m_identity] +---->[1]
* | [p2m_identity] +---->[1]
* | ... |
* \-----------------/
*
* where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
*/ */
#include <linux/init.h> #include <linux/init.h>
...@@ -164,9 +66,11 @@ ...@@ -164,9 +66,11 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/bootmem.h> #include <linux/bootmem.h>
#include <linux/slab.h>
#include <asm/cache.h> #include <asm/cache.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/uaccess.h>
#include <asm/xen/page.h> #include <asm/xen/page.h>
#include <asm/xen/hypercall.h> #include <asm/xen/hypercall.h>
...@@ -178,31 +82,26 @@ ...@@ -178,31 +82,26 @@
#include "multicalls.h" #include "multicalls.h"
#include "xen-ops.h" #include "xen-ops.h"
#define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE)
static void __init m2p_override_init(void); static void __init m2p_override_init(void);
unsigned long *xen_p2m_addr __read_mostly;
EXPORT_SYMBOL_GPL(xen_p2m_addr);
unsigned long xen_p2m_size __read_mostly;
EXPORT_SYMBOL_GPL(xen_p2m_size);
unsigned long xen_max_p2m_pfn __read_mostly; unsigned long xen_max_p2m_pfn __read_mostly;
EXPORT_SYMBOL_GPL(xen_max_p2m_pfn);
static DEFINE_SPINLOCK(p2m_update_lock);
static unsigned long *p2m_mid_missing_mfn; static unsigned long *p2m_mid_missing_mfn;
static unsigned long *p2m_top_mfn; static unsigned long *p2m_top_mfn;
static unsigned long **p2m_top_mfn_p; static unsigned long **p2m_top_mfn_p;
static unsigned long *p2m_missing;
/* Placeholders for holes in the address space */ static unsigned long *p2m_identity;
static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); static pte_t *p2m_missing_pte;
static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); static pte_t *p2m_identity_pte;
static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE);
RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
/* For each I/O range remapped we may lose up to two leaf pages for the boundary
* violations and three mid pages to cover up to 3GB. With
* early_can_reuse_p2m_middle() most of the leaf pages will be reused by the
* remapped region.
*/
RESERVE_BRK(p2m_identity_remap, PAGE_SIZE * 2 * 3 * MAX_REMAP_RANGES);
static inline unsigned p2m_top_index(unsigned long pfn) static inline unsigned p2m_top_index(unsigned long pfn)
{ {
...@@ -220,14 +119,6 @@ static inline unsigned p2m_index(unsigned long pfn) ...@@ -220,14 +119,6 @@ static inline unsigned p2m_index(unsigned long pfn)
return pfn % P2M_PER_PAGE; return pfn % P2M_PER_PAGE;
} }
static void p2m_top_init(unsigned long ***top)
{
unsigned i;
for (i = 0; i < P2M_TOP_PER_PAGE; i++)
top[i] = p2m_mid_missing;
}
static void p2m_top_mfn_init(unsigned long *top) static void p2m_top_mfn_init(unsigned long *top)
{ {
unsigned i; unsigned i;
...@@ -244,28 +135,43 @@ static void p2m_top_mfn_p_init(unsigned long **top) ...@@ -244,28 +135,43 @@ static void p2m_top_mfn_p_init(unsigned long **top)
top[i] = p2m_mid_missing_mfn; top[i] = p2m_mid_missing_mfn;
} }
static void p2m_mid_init(unsigned long **mid, unsigned long *leaf) static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf)
{ {
unsigned i; unsigned i;
for (i = 0; i < P2M_MID_PER_PAGE; i++) for (i = 0; i < P2M_MID_PER_PAGE; i++)
mid[i] = leaf; mid[i] = virt_to_mfn(leaf);
} }
static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) static void p2m_init(unsigned long *p2m)
{ {
unsigned i; unsigned i;
for (i = 0; i < P2M_MID_PER_PAGE; i++) for (i = 0; i < P2M_PER_PAGE; i++)
mid[i] = virt_to_mfn(leaf); p2m[i] = INVALID_P2M_ENTRY;
} }
static void p2m_init(unsigned long *p2m) static void p2m_init_identity(unsigned long *p2m, unsigned long pfn)
{ {
unsigned i; unsigned i;
for (i = 0; i < P2M_MID_PER_PAGE; i++) for (i = 0; i < P2M_PER_PAGE; i++)
p2m[i] = INVALID_P2M_ENTRY; p2m[i] = IDENTITY_FRAME(pfn + i);
}
static void * __ref alloc_p2m_page(void)
{
if (unlikely(!slab_is_available()))
return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
}
/* Only to be called in case of a race for a page just allocated! */
static void free_p2m_page(void *p)
{
BUG_ON(!slab_is_available());
free_page((unsigned long)p);
} }
/* /*
...@@ -280,40 +186,46 @@ static void p2m_init(unsigned long *p2m) ...@@ -280,40 +186,46 @@ static void p2m_init(unsigned long *p2m)
*/ */
void __ref xen_build_mfn_list_list(void) void __ref xen_build_mfn_list_list(void)
{ {
unsigned long pfn; unsigned long pfn, mfn;
pte_t *ptep;
unsigned int level, topidx, mididx;
unsigned long *mid_mfn_p;
if (xen_feature(XENFEAT_auto_translated_physmap)) if (xen_feature(XENFEAT_auto_translated_physmap))
return; return;
/* Pre-initialize p2m_top_mfn to be completely missing */ /* Pre-initialize p2m_top_mfn to be completely missing */
if (p2m_top_mfn == NULL) { if (p2m_top_mfn == NULL) {
p2m_mid_missing_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); p2m_mid_missing_mfn = alloc_p2m_page();
p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
p2m_top_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); p2m_top_mfn_p = alloc_p2m_page();
p2m_top_mfn_p_init(p2m_top_mfn_p); p2m_top_mfn_p_init(p2m_top_mfn_p);
p2m_top_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); p2m_top_mfn = alloc_p2m_page();
p2m_top_mfn_init(p2m_top_mfn); p2m_top_mfn_init(p2m_top_mfn);
} else { } else {
/* Reinitialise, mfn's all change after migration */ /* Reinitialise, mfn's all change after migration */
p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
} }
for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN;
unsigned topidx = p2m_top_index(pfn); pfn += P2M_PER_PAGE) {
unsigned mididx = p2m_mid_index(pfn); topidx = p2m_top_index(pfn);
unsigned long **mid; mididx = p2m_mid_index(pfn);
unsigned long *mid_mfn_p;
mid = p2m_top[topidx];
mid_mfn_p = p2m_top_mfn_p[topidx]; mid_mfn_p = p2m_top_mfn_p[topidx];
ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn),
&level);
BUG_ON(!ptep || level != PG_LEVEL_4K);
mfn = pte_mfn(*ptep);
ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
/* Don't bother allocating any mfn mid levels if /* Don't bother allocating any mfn mid levels if
* they're just missing, just update the stored mfn, * they're just missing, just update the stored mfn,
* since all could have changed over a migrate. * since all could have changed over a migrate.
*/ */
if (mid == p2m_mid_missing) { if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) {
BUG_ON(mididx); BUG_ON(mididx);
BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
...@@ -322,19 +234,14 @@ void __ref xen_build_mfn_list_list(void) ...@@ -322,19 +234,14 @@ void __ref xen_build_mfn_list_list(void)
} }
if (mid_mfn_p == p2m_mid_missing_mfn) { if (mid_mfn_p == p2m_mid_missing_mfn) {
/* mid_mfn_p = alloc_p2m_page();
* XXX boot-time only! We should never find
* missing parts of the mfn tree after
* runtime.
*/
mid_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
p2m_mid_mfn_init(mid_mfn_p, p2m_missing); p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
p2m_top_mfn_p[topidx] = mid_mfn_p; p2m_top_mfn_p[topidx] = mid_mfn_p;
} }
p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); mid_mfn_p[mididx] = mfn;
} }
} }
...@@ -353,171 +260,235 @@ void xen_setup_mfn_list_list(void) ...@@ -353,171 +260,235 @@ void xen_setup_mfn_list_list(void)
/* Set up p2m_top to point to the domain-builder provided p2m pages */ /* Set up p2m_top to point to the domain-builder provided p2m pages */
void __init xen_build_dynamic_phys_to_machine(void) void __init xen_build_dynamic_phys_to_machine(void)
{ {
unsigned long *mfn_list;
unsigned long max_pfn;
unsigned long pfn; unsigned long pfn;
if (xen_feature(XENFEAT_auto_translated_physmap)) if (xen_feature(XENFEAT_auto_translated_physmap))
return; return;
mfn_list = (unsigned long *)xen_start_info->mfn_list; xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list;
max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE);
xen_max_p2m_pfn = max_pfn;
p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); for (pfn = xen_start_info->nr_pages; pfn < xen_p2m_size; pfn++)
p2m_init(p2m_missing); xen_p2m_addr[pfn] = INVALID_P2M_ENTRY;
p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_init(p2m_identity);
p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); xen_max_p2m_pfn = xen_p2m_size;
p2m_mid_init(p2m_mid_missing, p2m_missing); }
p2m_mid_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_init(p2m_mid_identity, p2m_identity);
p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); #define P2M_TYPE_IDENTITY 0
p2m_top_init(p2m_top); #define P2M_TYPE_MISSING 1
#define P2M_TYPE_PFN 2
#define P2M_TYPE_UNKNOWN 3
/* static int xen_p2m_elem_type(unsigned long pfn)
* The domain builder gives us a pre-constructed p2m array in {
* mfn_list for all the pages initially given to us, so we just unsigned long mfn;
* need to graft that into our tree structure.
*/
for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) {
unsigned topidx = p2m_top_index(pfn);
unsigned mididx = p2m_mid_index(pfn);
if (p2m_top[topidx] == p2m_mid_missing) { if (pfn >= xen_p2m_size)
unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); return P2M_TYPE_IDENTITY;
p2m_mid_init(mid, p2m_missing);
p2m_top[topidx] = mid; mfn = xen_p2m_addr[pfn];
}
/* if (mfn == INVALID_P2M_ENTRY)
* As long as the mfn_list has enough entries to completely return P2M_TYPE_MISSING;
* fill a p2m page, pointing into the array is ok. But if
* not the entries beyond the last pfn will be undefined.
*/
if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) {
unsigned long p2midx;
p2midx = max_pfn % P2M_PER_PAGE; if (mfn & IDENTITY_FRAME_BIT)
for ( ; p2midx < P2M_PER_PAGE; p2midx++) return P2M_TYPE_IDENTITY;
mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY;
}
p2m_top[topidx][mididx] = &mfn_list[pfn];
}
m2p_override_init(); return P2M_TYPE_PFN;
} }
#ifdef CONFIG_X86_64
unsigned long __init xen_revector_p2m_tree(void) static void __init xen_rebuild_p2m_list(unsigned long *p2m)
{ {
unsigned long va_start; unsigned int i, chunk;
unsigned long va_end;
unsigned long pfn; unsigned long pfn;
unsigned long pfn_free = 0; unsigned long *mfns;
unsigned long *mfn_list = NULL; pte_t *ptep;
unsigned long size; pmd_t *pmdp;
int type;
va_start = xen_start_info->mfn_list;
/*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long),
* so make sure it is rounded up to that */
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
va_end = va_start + size;
/* If we were revectored already, don't do it again. */
if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET)
return 0;
mfn_list = alloc_bootmem_align(size, PAGE_SIZE); p2m_missing = alloc_p2m_page();
if (!mfn_list) { p2m_init(p2m_missing);
pr_warn("Could not allocate space for a new P2M tree!\n"); p2m_identity = alloc_p2m_page();
return xen_start_info->mfn_list; p2m_init(p2m_identity);
p2m_missing_pte = alloc_p2m_page();
paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT);
p2m_identity_pte = alloc_p2m_page();
paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT);
for (i = 0; i < PTRS_PER_PTE; i++) {
set_pte(p2m_missing_pte + i,
pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL_RO));
set_pte(p2m_identity_pte + i,
pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL_RO));
} }
/* Fill it out with INVALID_P2M_ENTRY value */
memset(mfn_list, 0xFF, size);
for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) { for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) {
unsigned topidx = p2m_top_index(pfn); /*
unsigned mididx; * Try to map missing/identity PMDs or p2m-pages if possible.
unsigned long *mid_p; * We have to respect the structure of the mfn_list_list
* which will be built just afterwards.
* Chunk size to test is one p2m page if we are in the middle
* of a mfn_list_list mid page and the complete mid page area
* if we are at index 0 of the mid page. Please note that a
* mid page might cover more than one PMD, e.g. on 32 bit PAE
* kernels.
*/
chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ?
P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE;
type = xen_p2m_elem_type(pfn);
i = 0;
if (type != P2M_TYPE_PFN)
for (i = 1; i < chunk; i++)
if (xen_p2m_elem_type(pfn + i) != type)
break;
if (i < chunk)
/* Reset to minimal chunk size. */
chunk = P2M_PER_PAGE;
if (!p2m_top[topidx]) if (type == P2M_TYPE_PFN || i < chunk) {
/* Use initial p2m page contents. */
#ifdef CONFIG_X86_64
mfns = alloc_p2m_page();
copy_page(mfns, xen_p2m_addr + pfn);
#else
mfns = xen_p2m_addr + pfn;
#endif
ptep = populate_extra_pte((unsigned long)(p2m + pfn));
set_pte(ptep,
pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL));
continue; continue;
}
if (p2m_top[topidx] == p2m_mid_missing) if (chunk == P2M_PER_PAGE) {
/* Map complete missing or identity p2m-page. */
mfns = (type == P2M_TYPE_MISSING) ?
p2m_missing : p2m_identity;
ptep = populate_extra_pte((unsigned long)(p2m + pfn));
set_pte(ptep,
pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL_RO));
continue; continue;
}
mididx = p2m_mid_index(pfn); /* Complete missing or identity PMD(s) can be mapped. */
mid_p = p2m_top[topidx][mididx]; ptep = (type == P2M_TYPE_MISSING) ?
if (!mid_p) p2m_missing_pte : p2m_identity_pte;
continue; for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
if ((mid_p == p2m_missing) || (mid_p == p2m_identity)) pmdp = populate_extra_pmd(
continue; (unsigned long)(p2m + pfn + i * PTRS_PER_PTE));
set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE));
}
}
}
if ((unsigned long)mid_p == INVALID_P2M_ENTRY) void __init xen_vmalloc_p2m_tree(void)
continue; {
static struct vm_struct vm;
/* The old va. Rebase it on mfn_list */ vm.flags = VM_ALLOC;
if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn,
unsigned long *new; PMD_SIZE * PMDS_PER_MID_PAGE);
vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE);
pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size);
if (pfn_free > (size / sizeof(unsigned long))) { xen_max_p2m_pfn = vm.size / sizeof(unsigned long);
WARN(1, "Only allocated for %ld pages, but we want %ld!\n",
size / sizeof(unsigned long), pfn_free);
return 0;
}
new = &mfn_list[pfn_free];
copy_page(new, mid_p); xen_rebuild_p2m_list(vm.addr);
p2m_top[topidx][mididx] = &mfn_list[pfn_free];
pfn_free += P2M_PER_PAGE; xen_p2m_addr = vm.addr;
xen_p2m_size = xen_max_p2m_pfn;
} xen_inv_extra_mem();
/* This should be the leafs allocated for identity from _brk. */
}
return (unsigned long)mfn_list;
m2p_override_init();
} }
#else
unsigned long __init xen_revector_p2m_tree(void)
{
return 0;
}
#endif
unsigned long get_phys_to_machine(unsigned long pfn) unsigned long get_phys_to_machine(unsigned long pfn)
{ {
unsigned topidx, mididx, idx; pte_t *ptep;
unsigned int level;
if (unlikely(pfn >= xen_p2m_size)) {
if (pfn < xen_max_p2m_pfn)
return xen_chk_extra_mem(pfn);
if (unlikely(pfn >= MAX_P2M_PFN))
return IDENTITY_FRAME(pfn); return IDENTITY_FRAME(pfn);
}
topidx = p2m_top_index(pfn); ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level);
mididx = p2m_mid_index(pfn); BUG_ON(!ptep || level != PG_LEVEL_4K);
idx = p2m_index(pfn);
/* /*
* The INVALID_P2M_ENTRY is filled in both p2m_*identity * The INVALID_P2M_ENTRY is filled in both p2m_*identity
* and in p2m_*missing, so returning the INVALID_P2M_ENTRY * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
* would be wrong. * would be wrong.
*/ */
if (p2m_top[topidx][mididx] == p2m_identity) if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity)))
return IDENTITY_FRAME(pfn); return IDENTITY_FRAME(pfn);
return p2m_top[topidx][mididx][idx]; return xen_p2m_addr[pfn];
} }
EXPORT_SYMBOL_GPL(get_phys_to_machine); EXPORT_SYMBOL_GPL(get_phys_to_machine);
static void *alloc_p2m_page(void) /*
* Allocate new pmd(s). It is checked whether the old pmd is still in place.
* If not, nothing is changed. This is okay as the only reason for allocating
* a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual
* pmd. In case of PAE/x86-32 there are multiple pmds to allocate!
*/
static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg)
{ {
return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); pte_t *ptechk;
} pte_t *pteret = ptep;
pte_t *pte_newpg[PMDS_PER_MID_PAGE];
pmd_t *pmdp;
unsigned int level;
unsigned long flags;
unsigned long vaddr;
int i;
static void free_p2m_page(void *p) /* Do all allocations first to bail out in error case. */
{ for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
free_page((unsigned long)p); pte_newpg[i] = alloc_p2m_page();
if (!pte_newpg[i]) {
for (i--; i >= 0; i--)
free_p2m_page(pte_newpg[i]);
return NULL;
}
}
vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1);
for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
copy_page(pte_newpg[i], pte_pg);
paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT);
pmdp = lookup_pmd_address(vaddr);
BUG_ON(!pmdp);
spin_lock_irqsave(&p2m_update_lock, flags);
ptechk = lookup_address(vaddr, &level);
if (ptechk == pte_pg) {
set_pmd(pmdp,
__pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE));
if (vaddr == (addr & ~(PMD_SIZE - 1)))
pteret = pte_offset_kernel(pmdp, addr);
pte_newpg[i] = NULL;
}
spin_unlock_irqrestore(&p2m_update_lock, flags);
if (pte_newpg[i]) {
paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT);
free_p2m_page(pte_newpg[i]);
}
vaddr += PMD_SIZE;
}
return pteret;
} }
/* /*
...@@ -530,28 +501,28 @@ static void free_p2m_page(void *p) ...@@ -530,28 +501,28 @@ static void free_p2m_page(void *p)
static bool alloc_p2m(unsigned long pfn) static bool alloc_p2m(unsigned long pfn)
{ {
unsigned topidx, mididx; unsigned topidx, mididx;
unsigned long ***top_p, **mid;
unsigned long *top_mfn_p, *mid_mfn; unsigned long *top_mfn_p, *mid_mfn;
unsigned long *p2m_orig; pte_t *ptep, *pte_pg;
unsigned int level;
unsigned long flags;
unsigned long addr = (unsigned long)(xen_p2m_addr + pfn);
unsigned long p2m_pfn;
topidx = p2m_top_index(pfn); topidx = p2m_top_index(pfn);
mididx = p2m_mid_index(pfn); mididx = p2m_mid_index(pfn);
top_p = &p2m_top[topidx]; ptep = lookup_address(addr, &level);
mid = ACCESS_ONCE(*top_p); BUG_ON(!ptep || level != PG_LEVEL_4K);
pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
if (mid == p2m_mid_missing) { if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) {
/* Mid level is missing, allocate a new one */ /* PMD level is missing, allocate a new one */
mid = alloc_p2m_page(); ptep = alloc_p2m_pmd(addr, ptep, pte_pg);
if (!mid) if (!ptep)
return false; return false;
p2m_mid_init(mid, p2m_missing);
if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
free_p2m_page(mid);
} }
if (p2m_top_mfn) {
top_mfn_p = &p2m_top_mfn[topidx]; top_mfn_p = &p2m_top_mfn[topidx];
mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
...@@ -579,9 +550,13 @@ static bool alloc_p2m(unsigned long pfn) ...@@ -579,9 +550,13 @@ static bool alloc_p2m(unsigned long pfn)
p2m_top_mfn_p[topidx] = mid_mfn; p2m_top_mfn_p[topidx] = mid_mfn;
} }
} }
} else {
mid_mfn = NULL;
}
p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]); p2m_pfn = pte_pfn(ACCESS_ONCE(*ptep));
if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) { if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) ||
p2m_pfn == PFN_DOWN(__pa(p2m_missing))) {
/* p2m leaf page is missing */ /* p2m leaf page is missing */
unsigned long *p2m; unsigned long *p2m;
...@@ -589,183 +564,36 @@ static bool alloc_p2m(unsigned long pfn) ...@@ -589,183 +564,36 @@ static bool alloc_p2m(unsigned long pfn)
if (!p2m) if (!p2m)
return false; return false;
if (p2m_pfn == PFN_DOWN(__pa(p2m_missing)))
p2m_init(p2m); p2m_init(p2m);
if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig)
free_p2m_page(p2m);
else else
mid_mfn[mididx] = virt_to_mfn(p2m); p2m_init_identity(p2m, pfn);
}
return true;
}
static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary)
{
unsigned topidx, mididx, idx;
unsigned long *p2m;
topidx = p2m_top_index(pfn);
mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
/* Pfff.. No boundary cross-over, lets get out. */
if (!idx && check_boundary)
return false;
WARN(p2m_top[topidx][mididx] == p2m_identity,
"P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n",
topidx, mididx);
/* spin_lock_irqsave(&p2m_update_lock, flags);
* Could be done by xen_build_dynamic_phys_to_machine..
*/
if (p2m_top[topidx][mididx] != p2m_missing)
return false;
/* Boundary cross-over for the edges: */
p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_init(p2m);
p2m_top[topidx][mididx] = p2m; if (pte_pfn(*ptep) == p2m_pfn) {
set_pte(ptep,
return true; pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL));
} if (mid_mfn)
mid_mfn[mididx] = virt_to_mfn(p2m);
static bool __init early_alloc_p2m_middle(unsigned long pfn) p2m = NULL;
{
unsigned topidx = p2m_top_index(pfn);
unsigned long **mid;
mid = p2m_top[topidx];
if (mid == p2m_mid_missing) {
mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_init(mid, p2m_missing);
p2m_top[topidx] = mid;
}
return true;
}
/*
* Skim over the P2M tree looking at pages that are either filled with
* INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and
* replace the P2M leaf with a p2m_missing or p2m_identity.
* Stick the old page in the new P2M tree location.
*/
static bool __init early_can_reuse_p2m_middle(unsigned long set_pfn)
{
unsigned topidx;
unsigned mididx;
unsigned ident_pfns;
unsigned inv_pfns;
unsigned long *p2m;
unsigned idx;
unsigned long pfn;
/* We only look when this entails a P2M middle layer */
if (p2m_index(set_pfn))
return false;
for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) {
topidx = p2m_top_index(pfn);
if (!p2m_top[topidx])
continue;
if (p2m_top[topidx] == p2m_mid_missing)
continue;
mididx = p2m_mid_index(pfn);
p2m = p2m_top[topidx][mididx];
if (!p2m)
continue;
if ((p2m == p2m_missing) || (p2m == p2m_identity))
continue;
if ((unsigned long)p2m == INVALID_P2M_ENTRY)
continue;
ident_pfns = 0;
inv_pfns = 0;
for (idx = 0; idx < P2M_PER_PAGE; idx++) {
/* IDENTITY_PFNs are 1:1 */
if (p2m[idx] == IDENTITY_FRAME(pfn + idx))
ident_pfns++;
else if (p2m[idx] == INVALID_P2M_ENTRY)
inv_pfns++;
else
break;
}
if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE))
goto found;
} }
return false;
found:
/* Found one, replace old with p2m_identity or p2m_missing */
p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing);
/* Reset where we want to stick the old page in. */
topidx = p2m_top_index(set_pfn);
mididx = p2m_mid_index(set_pfn);
/* This shouldn't happen */
if (WARN_ON(p2m_top[topidx] == p2m_mid_missing))
early_alloc_p2m_middle(set_pfn);
if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing))
return false;
p2m_init(p2m);
p2m_top[topidx][mididx] = p2m;
return true;
}
bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
if (!early_alloc_p2m_middle(pfn))
return false;
if (early_can_reuse_p2m_middle(pfn)) spin_unlock_irqrestore(&p2m_update_lock, flags);
return __set_phys_to_machine(pfn, mfn);
if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/))
return false;
if (!__set_phys_to_machine(pfn, mfn)) if (p2m)
return false; free_p2m_page(p2m);
} }
return true; return true;
} }
static void __init early_split_p2m(unsigned long pfn)
{
unsigned long mididx, idx;
mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
/*
* Allocate new middle and leaf pages if this pfn lies in the
* middle of one.
*/
if (mididx || idx)
early_alloc_p2m_middle(pfn);
if (idx)
early_alloc_p2m(pfn, false);
}
unsigned long __init set_phys_range_identity(unsigned long pfn_s, unsigned long __init set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e) unsigned long pfn_e)
{ {
unsigned long pfn; unsigned long pfn;
if (unlikely(pfn_s >= MAX_P2M_PFN)) if (unlikely(pfn_s >= xen_p2m_size))
return 0; return 0;
if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
...@@ -774,91 +602,42 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, ...@@ -774,91 +602,42 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
if (pfn_s > pfn_e) if (pfn_s > pfn_e)
return 0; return 0;
if (pfn_e > MAX_P2M_PFN) if (pfn_e > xen_p2m_size)
pfn_e = MAX_P2M_PFN; pfn_e = xen_p2m_size;
early_split_p2m(pfn_s); for (pfn = pfn_s; pfn < pfn_e; pfn++)
early_split_p2m(pfn_e); xen_p2m_addr[pfn] = IDENTITY_FRAME(pfn);
for (pfn = pfn_s; pfn < pfn_e;) {
unsigned topidx = p2m_top_index(pfn);
unsigned mididx = p2m_mid_index(pfn);
if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
break;
pfn++;
/*
* If the PFN was set to a middle or leaf identity
* page the remainder must also be identity, so skip
* ahead to the next middle or leaf entry.
*/
if (p2m_top[topidx] == p2m_mid_identity)
pfn = ALIGN(pfn, P2M_MID_PER_PAGE * P2M_PER_PAGE);
else if (p2m_top[topidx][mididx] == p2m_identity)
pfn = ALIGN(pfn, P2M_PER_PAGE);
}
WARN((pfn - pfn_s) != (pfn_e - pfn_s),
"Identity mapping failed. We are %ld short of 1-1 mappings!\n",
(pfn_e - pfn_s) - (pfn - pfn_s));
return pfn - pfn_s; return pfn - pfn_s;
} }
/* Try to install p2m mapping; fail if intermediate bits missing */
bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{ {
unsigned topidx, mididx, idx; pte_t *ptep;
unsigned int level;
/* don't track P2M changes in autotranslate guests */ /* don't track P2M changes in autotranslate guests */
if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
return true; return true;
if (unlikely(pfn >= MAX_P2M_PFN)) { if (unlikely(pfn >= xen_p2m_size)) {
BUG_ON(mfn != INVALID_P2M_ENTRY); BUG_ON(mfn != INVALID_P2M_ENTRY);
return true; return true;
} }
topidx = p2m_top_index(pfn); if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn)))
mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
/* For sparse holes were the p2m leaf has real PFN along with
* PCI holes, stick in the PFN as the MFN value.
*
* set_phys_range_identity() will have allocated new middle
* and leaf pages as required so an existing p2m_mid_missing
* or p2m_missing mean that whole range will be identity so
* these can be switched to p2m_mid_identity or p2m_identity.
*/
if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
if (p2m_top[topidx] == p2m_mid_identity)
return true;
if (p2m_top[topidx] == p2m_mid_missing) {
WARN_ON(cmpxchg(&p2m_top[topidx], p2m_mid_missing,
p2m_mid_identity) != p2m_mid_missing);
return true;
}
if (p2m_top[topidx][mididx] == p2m_identity)
return true; return true;
/* Swap over from MISSING to IDENTITY if needed. */ ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level);
if (p2m_top[topidx][mididx] == p2m_missing) { BUG_ON(!ptep || level != PG_LEVEL_4K);
WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
p2m_identity) != p2m_missing);
return true;
}
}
if (p2m_top[topidx][mididx] == p2m_missing) if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing)))
return mfn == INVALID_P2M_ENTRY; return mfn == INVALID_P2M_ENTRY;
p2m_top[topidx][mididx][idx] = mfn; if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity)))
return mfn == IDENTITY_FRAME(pfn);
return true; return false;
} }
bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
...@@ -867,8 +646,7 @@ bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) ...@@ -867,8 +646,7 @@ bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
if (!alloc_p2m(pfn)) if (!alloc_p2m(pfn))
return false; return false;
if (!__set_phys_to_machine(pfn, mfn)) return __set_phys_to_machine(pfn, mfn);
return false;
} }
return true; return true;
...@@ -877,14 +655,15 @@ bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) ...@@ -877,14 +655,15 @@ bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
#define M2P_OVERRIDE_HASH_SHIFT 10 #define M2P_OVERRIDE_HASH_SHIFT 10
#define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT) #define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT)
static RESERVE_BRK_ARRAY(struct list_head, m2p_overrides, M2P_OVERRIDE_HASH); static struct list_head *m2p_overrides;
static DEFINE_SPINLOCK(m2p_override_lock); static DEFINE_SPINLOCK(m2p_override_lock);
static void __init m2p_override_init(void) static void __init m2p_override_init(void)
{ {
unsigned i; unsigned i;
m2p_overrides = extend_brk(sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, m2p_overrides = alloc_bootmem_align(
sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH,
sizeof(unsigned long)); sizeof(unsigned long));
for (i = 0; i < M2P_OVERRIDE_HASH; i++) for (i = 0; i < M2P_OVERRIDE_HASH; i++)
...@@ -896,67 +675,8 @@ static unsigned long mfn_hash(unsigned long mfn) ...@@ -896,67 +675,8 @@ static unsigned long mfn_hash(unsigned long mfn)
return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT);
} }
int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count)
{
int i, ret = 0;
bool lazy = false;
pte_t *pte;
if (xen_feature(XENFEAT_auto_translated_physmap))
return 0;
if (kmap_ops &&
!in_interrupt() &&
paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
arch_enter_lazy_mmu_mode();
lazy = true;
}
for (i = 0; i < count; i++) {
unsigned long mfn, pfn;
/* Do not add to override if the map failed. */
if (map_ops[i].status)
continue;
if (map_ops[i].flags & GNTMAP_contains_pte) {
pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
(map_ops[i].host_addr & ~PAGE_MASK));
mfn = pte_mfn(*pte);
} else {
mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
}
pfn = page_to_pfn(pages[i]);
WARN_ON(PagePrivate(pages[i]));
SetPagePrivate(pages[i]);
set_page_private(pages[i], mfn);
pages[i]->index = pfn_to_mfn(pfn);
if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
ret = -ENOMEM;
goto out;
}
if (kmap_ops) {
ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
if (ret)
goto out;
}
}
out:
if (lazy)
arch_leave_lazy_mmu_mode();
return ret;
}
EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping);
/* Add an MFN override for a particular page */ /* Add an MFN override for a particular page */
int m2p_add_override(unsigned long mfn, struct page *page, static int m2p_add_override(unsigned long mfn, struct page *page,
struct gnttab_map_grant_ref *kmap_op) struct gnttab_map_grant_ref *kmap_op)
{ {
unsigned long flags; unsigned long flags;
...@@ -1004,19 +724,19 @@ int m2p_add_override(unsigned long mfn, struct page *page, ...@@ -1004,19 +724,19 @@ int m2p_add_override(unsigned long mfn, struct page *page,
* because mfn_to_pfn (that ends up being called by GUPF) will * because mfn_to_pfn (that ends up being called by GUPF) will
* return the backend pfn rather than the frontend pfn. */ * return the backend pfn rather than the frontend pfn. */
pfn = mfn_to_pfn_no_overrides(mfn); pfn = mfn_to_pfn_no_overrides(mfn);
if (get_phys_to_machine(pfn) == mfn) if (__pfn_to_mfn(pfn) == mfn)
set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(m2p_add_override);
int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
struct gnttab_map_grant_ref *kmap_ops, struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count) struct page **pages, unsigned int count)
{ {
int i, ret = 0; int i, ret = 0;
bool lazy = false; bool lazy = false;
pte_t *pte;
if (xen_feature(XENFEAT_auto_translated_physmap)) if (xen_feature(XENFEAT_auto_translated_physmap))
return 0; return 0;
...@@ -1029,33 +749,73 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, ...@@ -1029,33 +749,73 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
} }
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
unsigned long mfn = get_phys_to_machine(page_to_pfn(pages[i])); unsigned long mfn, pfn;
unsigned long pfn = page_to_pfn(pages[i]);
if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { /* Do not add to override if the map failed. */
ret = -EINVAL; if (map_ops[i].status)
goto out; continue;
if (map_ops[i].flags & GNTMAP_contains_pte) {
pte = (pte_t *)(mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
(map_ops[i].host_addr & ~PAGE_MASK));
mfn = pte_mfn(*pte);
} else {
mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
} }
pfn = page_to_pfn(pages[i]);
set_page_private(pages[i], INVALID_P2M_ENTRY); WARN_ON(PagePrivate(pages[i]));
WARN_ON(!PagePrivate(pages[i])); SetPagePrivate(pages[i]);
ClearPagePrivate(pages[i]); set_page_private(pages[i], mfn);
set_phys_to_machine(pfn, pages[i]->index); pages[i]->index = pfn_to_mfn(pfn);
if (kmap_ops) if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn); ret = -ENOMEM;
goto out;
}
if (kmap_ops) {
ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
if (ret) if (ret)
goto out; goto out;
} }
}
out: out:
if (lazy) if (lazy)
arch_leave_lazy_mmu_mode(); arch_leave_lazy_mmu_mode();
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping);
static struct page *m2p_find_override(unsigned long mfn)
{
unsigned long flags;
struct list_head *bucket;
struct page *p, *ret;
if (unlikely(!m2p_overrides))
return NULL;
ret = NULL;
bucket = &m2p_overrides[mfn_hash(mfn)];
int m2p_remove_override(struct page *page, spin_lock_irqsave(&m2p_override_lock, flags);
list_for_each_entry(p, bucket, lru) {
if (page_private(p) == mfn) {
ret = p;
break;
}
}
spin_unlock_irqrestore(&m2p_override_lock, flags);
return ret;
}
static int m2p_remove_override(struct page *page,
struct gnttab_map_grant_ref *kmap_op, struct gnttab_map_grant_ref *kmap_op,
unsigned long mfn) unsigned long mfn)
{ {
...@@ -1102,8 +862,7 @@ int m2p_remove_override(struct page *page, ...@@ -1102,8 +862,7 @@ int m2p_remove_override(struct page *page,
* hypercall actually returned an error. * hypercall actually returned an error.
*/ */
if (kmap_op->handle == GNTST_general_error) { if (kmap_op->handle == GNTST_general_error) {
printk(KERN_WARNING "m2p_remove_override: " pr_warn("m2p_remove_override: pfn %lx mfn %lx, failed to modify kernel mappings",
"pfn %lx mfn %lx, failed to modify kernel mappings",
pfn, mfn); pfn, mfn);
put_balloon_scratch_page(); put_balloon_scratch_page();
return -1; return -1;
...@@ -1145,35 +904,56 @@ int m2p_remove_override(struct page *page, ...@@ -1145,35 +904,56 @@ int m2p_remove_override(struct page *page,
* pfn again. */ * pfn again. */
mfn &= ~FOREIGN_FRAME_BIT; mfn &= ~FOREIGN_FRAME_BIT;
pfn = mfn_to_pfn_no_overrides(mfn); pfn = mfn_to_pfn_no_overrides(mfn);
if (get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && if (__pfn_to_mfn(pfn) == FOREIGN_FRAME(mfn) &&
m2p_find_override(mfn) == NULL) m2p_find_override(mfn) == NULL)
set_phys_to_machine(pfn, mfn); set_phys_to_machine(pfn, mfn);
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(m2p_remove_override);
struct page *m2p_find_override(unsigned long mfn) int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count)
{ {
unsigned long flags; int i, ret = 0;
struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)]; bool lazy = false;
struct page *p, *ret;
ret = NULL;
spin_lock_irqsave(&m2p_override_lock, flags); if (xen_feature(XENFEAT_auto_translated_physmap))
return 0;
list_for_each_entry(p, bucket, lru) { if (kmap_ops &&
if (page_private(p) == mfn) { !in_interrupt() &&
ret = p; paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
break; arch_enter_lazy_mmu_mode();
lazy = true;
} }
for (i = 0; i < count; i++) {
unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i]));
unsigned long pfn = page_to_pfn(pages[i]);
if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) {
ret = -EINVAL;
goto out;
} }
spin_unlock_irqrestore(&m2p_override_lock, flags); set_page_private(pages[i], INVALID_P2M_ENTRY);
WARN_ON(!PagePrivate(pages[i]));
ClearPagePrivate(pages[i]);
set_phys_to_machine(pfn, pages[i]->index);
if (kmap_ops)
ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn);
if (ret)
goto out;
}
out:
if (lazy)
arch_leave_lazy_mmu_mode();
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
{ {
...@@ -1192,79 +972,29 @@ EXPORT_SYMBOL_GPL(m2p_find_override_pfn); ...@@ -1192,79 +972,29 @@ EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
#include "debugfs.h" #include "debugfs.h"
static int p2m_dump_show(struct seq_file *m, void *v) static int p2m_dump_show(struct seq_file *m, void *v)
{ {
static const char * const level_name[] = { "top", "middle",
"entry", "abnormal", "error"};
#define TYPE_IDENTITY 0
#define TYPE_MISSING 1
#define TYPE_PFN 2
#define TYPE_UNKNOWN 3
static const char * const type_name[] = { static const char * const type_name[] = {
[TYPE_IDENTITY] = "identity", [P2M_TYPE_IDENTITY] = "identity",
[TYPE_MISSING] = "missing", [P2M_TYPE_MISSING] = "missing",
[TYPE_PFN] = "pfn", [P2M_TYPE_PFN] = "pfn",
[TYPE_UNKNOWN] = "abnormal"}; [P2M_TYPE_UNKNOWN] = "abnormal"};
unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; unsigned long pfn, first_pfn;
unsigned int uninitialized_var(prev_level); int type, prev_type;
unsigned int uninitialized_var(prev_type);
prev_type = xen_p2m_elem_type(0);
if (!p2m_top) first_pfn = 0;
return 0;
for (pfn = 0; pfn < xen_p2m_size; pfn++) {
for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) { type = xen_p2m_elem_type(pfn);
unsigned topidx = p2m_top_index(pfn); if (type != prev_type) {
unsigned mididx = p2m_mid_index(pfn); seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn,
unsigned idx = p2m_index(pfn); type_name[prev_type]);
unsigned lvl, type;
lvl = 4;
type = TYPE_UNKNOWN;
if (p2m_top[topidx] == p2m_mid_missing) {
lvl = 0; type = TYPE_MISSING;
} else if (p2m_top[topidx] == NULL) {
lvl = 0; type = TYPE_UNKNOWN;
} else if (p2m_top[topidx][mididx] == NULL) {
lvl = 1; type = TYPE_UNKNOWN;
} else if (p2m_top[topidx][mididx] == p2m_identity) {
lvl = 1; type = TYPE_IDENTITY;
} else if (p2m_top[topidx][mididx] == p2m_missing) {
lvl = 1; type = TYPE_MISSING;
} else if (p2m_top[topidx][mididx][idx] == 0) {
lvl = 2; type = TYPE_UNKNOWN;
} else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
lvl = 2; type = TYPE_IDENTITY;
} else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
lvl = 2; type = TYPE_MISSING;
} else if (p2m_top[topidx][mididx][idx] == pfn) {
lvl = 2; type = TYPE_PFN;
} else if (p2m_top[topidx][mididx][idx] != pfn) {
lvl = 2; type = TYPE_PFN;
}
if (pfn == 0) {
prev_level = lvl;
prev_type = type;
}
if (pfn == MAX_DOMAIN_PAGES-1) {
lvl = 3;
type = TYPE_UNKNOWN;
}
if (prev_type != type) {
seq_printf(m, " [0x%lx->0x%lx] %s\n",
prev_pfn_type, pfn, type_name[prev_type]);
prev_pfn_type = pfn;
prev_type = type; prev_type = type;
} first_pfn = pfn;
if (prev_level != lvl) {
seq_printf(m, " [0x%lx->0x%lx] level %s\n",
prev_pfn_level, pfn, level_name[prev_level]);
prev_pfn_level = pfn;
prev_level = lvl;
} }
} }
seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn,
type_name[prev_type]);
return 0; return 0;
#undef TYPE_IDENTITY
#undef TYPE_MISSING
#undef TYPE_PFN
#undef TYPE_UNKNOWN
} }
static int p2m_dump_open(struct inode *inode, struct file *filp) static int p2m_dump_open(struct inode *inode, struct file *filp)
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "xen-ops.h" #include "xen-ops.h"
#include "vdso.h" #include "vdso.h"
#include "p2m.h" #include "p2m.h"
#include "mmu.h"
/* These are code, but not functions. Defined in entry.S */ /* These are code, but not functions. Defined in entry.S */
extern const char xen_hypervisor_callback[]; extern const char xen_hypervisor_callback[];
...@@ -47,8 +48,19 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; ...@@ -47,8 +48,19 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
/* Number of pages released from the initial allocation. */ /* Number of pages released from the initial allocation. */
unsigned long xen_released_pages; unsigned long xen_released_pages;
/* Buffer used to remap identity mapped pages */ /*
unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata; * Buffer used to remap identity mapped pages. We only need the virtual space.
* The physical page behind this address is remapped as needed to different
* buffer pages.
*/
#define REMAP_SIZE (P2M_PER_PAGE - 3)
static struct {
unsigned long next_area_mfn;
unsigned long target_pfn;
unsigned long size;
unsigned long mfns[REMAP_SIZE];
} xen_remap_buf __initdata __aligned(PAGE_SIZE);
static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
/* /*
* The maximum amount of extra memory compared to the base size. The * The maximum amount of extra memory compared to the base size. The
...@@ -64,7 +76,6 @@ unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata; ...@@ -64,7 +76,6 @@ unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata;
static void __init xen_add_extra_mem(u64 start, u64 size) static void __init xen_add_extra_mem(u64 start, u64 size)
{ {
unsigned long pfn;
int i; int i;
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
...@@ -84,75 +95,76 @@ static void __init xen_add_extra_mem(u64 start, u64 size) ...@@ -84,75 +95,76 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
printk(KERN_WARNING "Warning: not enough extra memory regions\n"); printk(KERN_WARNING "Warning: not enough extra memory regions\n");
memblock_reserve(start, size); memblock_reserve(start, size);
}
xen_max_p2m_pfn = PFN_DOWN(start + size); static void __init xen_del_extra_mem(u64 start, u64 size)
for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { {
unsigned long mfn = pfn_to_mfn(pfn); int i;
u64 start_r, size_r;
if (WARN_ONCE(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn))
continue;
WARN_ONCE(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n",
pfn, mfn);
__set_phys_to_machine(pfn, INVALID_P2M_ENTRY); for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
start_r = xen_extra_mem[i].start;
size_r = xen_extra_mem[i].size;
/* Start of region. */
if (start_r == start) {
BUG_ON(size > size_r);
xen_extra_mem[i].start += size;
xen_extra_mem[i].size -= size;
break;
}
/* End of region. */
if (start_r + size_r == start + size) {
BUG_ON(size > size_r);
xen_extra_mem[i].size -= size;
break;
} }
/* Mid of region. */
if (start > start_r && start < start_r + size_r) {
BUG_ON(start + size > start_r + size_r);
xen_extra_mem[i].size = start - start_r;
/* Calling memblock_reserve() again is okay. */
xen_add_extra_mem(start + size, start_r + size_r -
(start + size));
break;
}
}
memblock_free(start, size);
} }
static unsigned long __init xen_do_chunk(unsigned long start, /*
unsigned long end, bool release) * Called during boot before the p2m list can take entries beyond the
* hypervisor supplied p2m list. Entries in extra mem are to be regarded as
* invalid.
*/
unsigned long __ref xen_chk_extra_mem(unsigned long pfn)
{ {
struct xen_memory_reservation reservation = { int i;
.address_bits = 0, unsigned long addr = PFN_PHYS(pfn);
.extent_order = 0,
.domid = DOMID_SELF
};
unsigned long len = 0;
unsigned long pfn;
int ret;
for (pfn = start; pfn < end; pfn++) {
unsigned long frame;
unsigned long mfn = pfn_to_mfn(pfn);
if (release) { for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
/* Make sure pfn exists to start with */ if (addr >= xen_extra_mem[i].start &&
if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) addr < xen_extra_mem[i].start + xen_extra_mem[i].size)
continue; return INVALID_P2M_ENTRY;
frame = mfn;
} else {
if (mfn != INVALID_P2M_ENTRY)
continue;
frame = pfn;
} }
set_xen_guest_handle(reservation.extent_start, &frame);
reservation.nr_extents = 1;
ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap, return IDENTITY_FRAME(pfn);
&reservation); }
WARN(ret != 1, "Failed to %s pfn %lx err=%d\n",
release ? "release" : "populate", pfn, ret);
if (ret == 1) { /*
if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) { * Mark all pfns of extra mem as invalid in p2m list.
if (release) */
break; void __init xen_inv_extra_mem(void)
set_xen_guest_handle(reservation.extent_start, &frame); {
reservation.nr_extents = 1; unsigned long pfn, pfn_s, pfn_e;
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, int i;
&reservation);
break;
}
len++;
} else
break;
}
if (len)
printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n",
release ? "Freeing" : "Populating",
start, end, len,
release ? "freed" : "added");
return len; for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
pfn_s = PFN_DOWN(xen_extra_mem[i].start);
pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size);
for (pfn = pfn_s; pfn < pfn_e; pfn++)
set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
} }
/* /*
...@@ -198,26 +210,62 @@ static unsigned long __init xen_find_pfn_range( ...@@ -198,26 +210,62 @@ static unsigned long __init xen_find_pfn_range(
return done; return done;
} }
static int __init xen_free_mfn(unsigned long mfn)
{
struct xen_memory_reservation reservation = {
.address_bits = 0,
.extent_order = 0,
.domid = DOMID_SELF
};
set_xen_guest_handle(reservation.extent_start, &mfn);
reservation.nr_extents = 1;
return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
}
/* /*
* This releases a chunk of memory and then does the identity map. It's used as * This releases a chunk of memory and then does the identity map. It's used
* as a fallback if the remapping fails. * as a fallback if the remapping fails.
*/ */
static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity, unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity,
unsigned long *released) unsigned long *released)
{ {
unsigned long len = 0;
unsigned long pfn, end;
int ret;
WARN_ON(start_pfn > end_pfn); WARN_ON(start_pfn > end_pfn);
end = min(end_pfn, nr_pages);
for (pfn = start_pfn; pfn < end; pfn++) {
unsigned long mfn = pfn_to_mfn(pfn);
/* Make sure pfn exists to start with */
if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
continue;
ret = xen_free_mfn(mfn);
WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
if (ret == 1) {
if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY))
break;
len++;
} else
break;
}
/* Need to release pages first */ /* Need to release pages first */
*released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true); *released += len;
*identity += set_phys_range_identity(start_pfn, end_pfn); *identity += set_phys_range_identity(start_pfn, end_pfn);
} }
/* /*
* Helper function to update both the p2m and m2p tables. * Helper function to update the p2m and m2p tables and kernel mapping.
*/ */
static unsigned long __init xen_update_mem_tables(unsigned long pfn, static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn)
unsigned long mfn)
{ {
struct mmu_update update = { struct mmu_update update = {
.ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
...@@ -225,161 +273,88 @@ static unsigned long __init xen_update_mem_tables(unsigned long pfn, ...@@ -225,161 +273,88 @@ static unsigned long __init xen_update_mem_tables(unsigned long pfn,
}; };
/* Update p2m */ /* Update p2m */
if (!early_set_phys_to_machine(pfn, mfn)) { if (!set_phys_to_machine(pfn, mfn)) {
WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n", WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
pfn, mfn); pfn, mfn);
return false; BUG();
} }
/* Update m2p */ /* Update m2p */
if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) { if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n", WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
mfn, pfn); mfn, pfn);
return false; BUG();
} }
return true; /* Update kernel mapping, but not for highmem. */
if ((pfn << PAGE_SHIFT) >= __pa(high_memory))
return;
if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT),
mfn_pte(mfn, PAGE_KERNEL), 0)) {
WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n",
mfn, pfn);
BUG();
}
} }
/* /*
* This function updates the p2m and m2p tables with an identity map from * This function updates the p2m and m2p tables with an identity map from
* start_pfn to start_pfn+size and remaps the underlying RAM of the original * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the
* allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks * original allocation at remap_pfn. The information needed for remapping is
* to not exhaust the reserved brk space. Doing it in properly aligned blocks * saved in the memory itself to avoid the need for allocating buffers. The
* ensures we only allocate the minimum required leaf pages in the p2m table. It * complete remap information is contained in a list of MFNs each containing
* copies the existing mfns from the p2m table under the 1:1 map, overwrites * up to REMAP_SIZE MFNs and the start target PFN for doing the remap.
* them with the identity map and then updates the p2m and m2p tables with the * This enables us to preserve the original mfn sequence while doing the
* remapped memory. * remapping at a time when the memory management is capable of allocating
* virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and
* its callers.
*/ */
static unsigned long __init xen_do_set_identity_and_remap_chunk( static void __init xen_do_set_identity_and_remap_chunk(
unsigned long start_pfn, unsigned long size, unsigned long remap_pfn) unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
{ {
unsigned long buf = (unsigned long)&xen_remap_buf;
unsigned long mfn_save, mfn;
unsigned long ident_pfn_iter, remap_pfn_iter; unsigned long ident_pfn_iter, remap_pfn_iter;
unsigned long ident_start_pfn_align, remap_start_pfn_align; unsigned long ident_end_pfn = start_pfn + size;
unsigned long ident_end_pfn_align, remap_end_pfn_align;
unsigned long ident_boundary_pfn, remap_boundary_pfn;
unsigned long ident_cnt = 0;
unsigned long remap_cnt = 0;
unsigned long left = size; unsigned long left = size;
unsigned long mod; unsigned long ident_cnt = 0;
int i; unsigned int i, chunk;
WARN_ON(size == 0); WARN_ON(size == 0);
BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
/* mfn_save = virt_to_mfn(buf);
* Determine the proper alignment to remap memory in P2M_PER_PAGE sized
* blocks. We need to keep track of both the existing pfn mapping and
* the new pfn remapping.
*/
mod = start_pfn % P2M_PER_PAGE;
ident_start_pfn_align =
mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn;
mod = remap_pfn % P2M_PER_PAGE;
remap_start_pfn_align =
mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn;
mod = (start_pfn + size) % P2M_PER_PAGE;
ident_end_pfn_align = start_pfn + size - mod;
mod = (remap_pfn + size) % P2M_PER_PAGE;
remap_end_pfn_align = remap_pfn + size - mod;
/* Iterate over each p2m leaf node in each range */
for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align;
ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align;
ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) {
/* Check we aren't past the end */
BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size);
BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size);
/* Save p2m mappings */
for (i = 0; i < P2M_PER_PAGE; i++)
xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i);
/* Set identity map which will free a p2m leaf */
ident_cnt += set_phys_range_identity(ident_pfn_iter,
ident_pfn_iter + P2M_PER_PAGE);
#ifdef DEBUG
/* Helps verify a p2m leaf has been freed */
for (i = 0; i < P2M_PER_PAGE; i++) {
unsigned int pfn = ident_pfn_iter + i;
BUG_ON(pfn_to_mfn(pfn) != pfn);
}
#endif
/* Now remap memory */
for (i = 0; i < P2M_PER_PAGE; i++) {
unsigned long mfn = xen_remap_buf[i];
/* This will use the p2m leaf freed above */
if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) {
WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
remap_pfn_iter + i, mfn);
return 0;
}
remap_cnt++;
}
left -= P2M_PER_PAGE;
}
/* Max boundary space possible */
BUG_ON(left > (P2M_PER_PAGE - 1) * 2);
/* Now handle the boundary conditions */
ident_boundary_pfn = start_pfn;
remap_boundary_pfn = remap_pfn;
for (i = 0; i < left; i++) {
unsigned long mfn;
/* These two checks move from the start to end boundaries */
if (ident_boundary_pfn == ident_start_pfn_align)
ident_boundary_pfn = ident_pfn_iter;
if (remap_boundary_pfn == remap_start_pfn_align)
remap_boundary_pfn = remap_pfn_iter;
/* Check we aren't past the end */ for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn;
BUG_ON(ident_boundary_pfn >= start_pfn + size); ident_pfn_iter < ident_end_pfn;
BUG_ON(remap_boundary_pfn >= remap_pfn + size); ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) {
chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE;
mfn = pfn_to_mfn(ident_boundary_pfn); /* Map first pfn to xen_remap_buf */
mfn = pfn_to_mfn(ident_pfn_iter);
set_pte_mfn(buf, mfn, PAGE_KERNEL);
if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) { /* Save mapping information in page */
WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n", xen_remap_buf.next_area_mfn = xen_remap_mfn;
remap_pfn_iter + i, mfn); xen_remap_buf.target_pfn = remap_pfn_iter;
return 0; xen_remap_buf.size = chunk;
} for (i = 0; i < chunk; i++)
remap_cnt++; xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i);
ident_boundary_pfn++; /* Put remap buf into list. */
remap_boundary_pfn++; xen_remap_mfn = mfn;
}
/* Finish up the identity map */ /* Set identity map */
if (ident_start_pfn_align >= ident_end_pfn_align) {
/*
* In this case we have an identity range which does not span an
* aligned block so everything needs to be identity mapped here.
* If we didn't check this we might remap too many pages since
* the align boundaries are not meaningful in this case.
*/
ident_cnt += set_phys_range_identity(start_pfn,
start_pfn + size);
} else {
/* Remapped above so check each end of the chunk */
if (start_pfn < ident_start_pfn_align)
ident_cnt += set_phys_range_identity(start_pfn,
ident_start_pfn_align);
if (start_pfn + size > ident_pfn_iter)
ident_cnt += set_phys_range_identity(ident_pfn_iter, ident_cnt += set_phys_range_identity(ident_pfn_iter,
start_pfn + size); ident_pfn_iter + chunk);
}
BUG_ON(ident_cnt != size); left -= chunk;
BUG_ON(remap_cnt != size); }
return size; /* Restore old xen_remap_buf mapping */
set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
} }
/* /*
...@@ -396,8 +371,7 @@ static unsigned long __init xen_do_set_identity_and_remap_chunk( ...@@ -396,8 +371,7 @@ static unsigned long __init xen_do_set_identity_and_remap_chunk(
static unsigned long __init xen_set_identity_and_remap_chunk( static unsigned long __init xen_set_identity_and_remap_chunk(
const struct e820entry *list, size_t map_size, unsigned long start_pfn, const struct e820entry *list, size_t map_size, unsigned long start_pfn,
unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn, unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn,
unsigned long *identity, unsigned long *remapped, unsigned long *identity, unsigned long *released)
unsigned long *released)
{ {
unsigned long pfn; unsigned long pfn;
unsigned long i = 0; unsigned long i = 0;
...@@ -431,19 +405,12 @@ static unsigned long __init xen_set_identity_and_remap_chunk( ...@@ -431,19 +405,12 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
if (size > remap_range_size) if (size > remap_range_size)
size = remap_range_size; size = remap_range_size;
if (!xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn)) { xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn);
WARN(1, "Failed to remap 1:1 memory cur_pfn=%ld size=%ld remap_pfn=%ld\n",
cur_pfn, size, remap_pfn);
xen_set_identity_and_release_chunk(cur_pfn,
cur_pfn + left, nr_pages, identity, released);
break;
}
/* Update variables to reflect new mappings. */ /* Update variables to reflect new mappings. */
i += size; i += size;
remap_pfn += size; remap_pfn += size;
*identity += size; *identity += size;
*remapped += size;
} }
/* /*
...@@ -458,13 +425,12 @@ static unsigned long __init xen_set_identity_and_remap_chunk( ...@@ -458,13 +425,12 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
return remap_pfn; return remap_pfn;
} }
static unsigned long __init xen_set_identity_and_remap( static void __init xen_set_identity_and_remap(
const struct e820entry *list, size_t map_size, unsigned long nr_pages, const struct e820entry *list, size_t map_size, unsigned long nr_pages,
unsigned long *released) unsigned long *released)
{ {
phys_addr_t start = 0; phys_addr_t start = 0;
unsigned long identity = 0; unsigned long identity = 0;
unsigned long remapped = 0;
unsigned long last_pfn = nr_pages; unsigned long last_pfn = nr_pages;
const struct e820entry *entry; const struct e820entry *entry;
unsigned long num_released = 0; unsigned long num_released = 0;
...@@ -494,8 +460,7 @@ static unsigned long __init xen_set_identity_and_remap( ...@@ -494,8 +460,7 @@ static unsigned long __init xen_set_identity_and_remap(
last_pfn = xen_set_identity_and_remap_chunk( last_pfn = xen_set_identity_and_remap_chunk(
list, map_size, start_pfn, list, map_size, start_pfn,
end_pfn, nr_pages, last_pfn, end_pfn, nr_pages, last_pfn,
&identity, &remapped, &identity, &num_released);
&num_released);
start = end; start = end;
} }
} }
...@@ -503,12 +468,63 @@ static unsigned long __init xen_set_identity_and_remap( ...@@ -503,12 +468,63 @@ static unsigned long __init xen_set_identity_and_remap(
*released = num_released; *released = num_released;
pr_info("Set %ld page(s) to 1-1 mapping\n", identity); pr_info("Set %ld page(s) to 1-1 mapping\n", identity);
pr_info("Remapped %ld page(s), last_pfn=%ld\n", remapped,
last_pfn);
pr_info("Released %ld page(s)\n", num_released); pr_info("Released %ld page(s)\n", num_released);
}
return last_pfn; /*
* Remap the memory prepared in xen_do_set_identity_and_remap_chunk().
* The remap information (which mfn remap to which pfn) is contained in the
* to be remapped memory itself in a linked list anchored at xen_remap_mfn.
* This scheme allows to remap the different chunks in arbitrary order while
* the resulting mapping will be independant from the order.
*/
void __init xen_remap_memory(void)
{
unsigned long buf = (unsigned long)&xen_remap_buf;
unsigned long mfn_save, mfn, pfn;
unsigned long remapped = 0;
unsigned int i;
unsigned long pfn_s = ~0UL;
unsigned long len = 0;
mfn_save = virt_to_mfn(buf);
while (xen_remap_mfn != INVALID_P2M_ENTRY) {
/* Map the remap information */
set_pte_mfn(buf, xen_remap_mfn, PAGE_KERNEL);
BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[0]);
pfn = xen_remap_buf.target_pfn;
for (i = 0; i < xen_remap_buf.size; i++) {
mfn = xen_remap_buf.mfns[i];
xen_update_mem_tables(pfn, mfn);
remapped++;
pfn++;
}
if (pfn_s == ~0UL || pfn == pfn_s) {
pfn_s = xen_remap_buf.target_pfn;
len += xen_remap_buf.size;
} else if (pfn_s + len == xen_remap_buf.target_pfn) {
len += xen_remap_buf.size;
} else {
xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
pfn_s = xen_remap_buf.target_pfn;
len = xen_remap_buf.size;
}
mfn = xen_remap_mfn;
xen_remap_mfn = xen_remap_buf.next_area_mfn;
}
if (pfn_s != ~0UL && len)
xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
pr_info("Remapped %ld page(s)\n", remapped);
} }
static unsigned long __init xen_get_max_pages(void) static unsigned long __init xen_get_max_pages(void)
{ {
unsigned long max_pages = MAX_DOMAIN_PAGES; unsigned long max_pages = MAX_DOMAIN_PAGES;
...@@ -569,7 +585,6 @@ char * __init xen_memory_setup(void) ...@@ -569,7 +585,6 @@ char * __init xen_memory_setup(void)
int rc; int rc;
struct xen_memory_map memmap; struct xen_memory_map memmap;
unsigned long max_pages; unsigned long max_pages;
unsigned long last_pfn = 0;
unsigned long extra_pages = 0; unsigned long extra_pages = 0;
int i; int i;
int op; int op;
...@@ -616,17 +631,14 @@ char * __init xen_memory_setup(void) ...@@ -616,17 +631,14 @@ char * __init xen_memory_setup(void)
extra_pages += max_pages - max_pfn; extra_pages += max_pages - max_pfn;
/* /*
* Set identity map on non-RAM pages and remap the underlying RAM. * Set identity map on non-RAM pages and prepare remapping the
* underlying RAM.
*/ */
last_pfn = xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn, xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
&xen_released_pages); &xen_released_pages);
extra_pages += xen_released_pages; extra_pages += xen_released_pages;
if (last_pfn > max_pfn) {
max_pfn = min(MAX_DOMAIN_PAGES, last_pfn);
mem_end = PFN_PHYS(max_pfn);
}
/* /*
* Clamp the amount of extra memory to a EXTRA_MEM_RATIO * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
* factor the base size. On non-highmem systems, the base * factor the base size. On non-highmem systems, the base
...@@ -653,6 +665,7 @@ char * __init xen_memory_setup(void) ...@@ -653,6 +665,7 @@ char * __init xen_memory_setup(void)
size = min(size, (u64)extra_pages * PAGE_SIZE); size = min(size, (u64)extra_pages * PAGE_SIZE);
extra_pages -= size / PAGE_SIZE; extra_pages -= size / PAGE_SIZE;
xen_add_extra_mem(addr, size); xen_add_extra_mem(addr, size);
xen_max_p2m_pfn = PFN_DOWN(addr + size);
} else } else
type = E820_UNUSABLE; type = E820_UNUSABLE;
} }
......
...@@ -29,11 +29,13 @@ void xen_build_mfn_list_list(void); ...@@ -29,11 +29,13 @@ void xen_build_mfn_list_list(void);
void xen_setup_machphys_mapping(void); void xen_setup_machphys_mapping(void);
void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
void xen_reserve_top(void); void xen_reserve_top(void);
extern unsigned long xen_max_p2m_pfn;
void xen_mm_pin_all(void); void xen_mm_pin_all(void);
void xen_mm_unpin_all(void); void xen_mm_unpin_all(void);
unsigned long __ref xen_chk_extra_mem(unsigned long pfn);
void __init xen_inv_extra_mem(void);
void __init xen_remap_memory(void);
char * __init xen_memory_setup(void); char * __init xen_memory_setup(void);
char * xen_auto_xlated_memory_setup(void); char * xen_auto_xlated_memory_setup(void);
void __init xen_arch_setup(void); void __init xen_arch_setup(void);
...@@ -46,7 +48,7 @@ void xen_hvm_init_shared_info(void); ...@@ -46,7 +48,7 @@ void xen_hvm_init_shared_info(void);
void xen_unplug_emulated_devices(void); void xen_unplug_emulated_devices(void);
void __init xen_build_dynamic_phys_to_machine(void); void __init xen_build_dynamic_phys_to_machine(void);
unsigned long __init xen_revector_p2m_tree(void); void __init xen_vmalloc_p2m_tree(void);
void xen_init_irq_ops(void); void xen_init_irq_ops(void);
void xen_setup_timer(int cpu); void xen_setup_timer(int cpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment