Commit 31018acd authored by Linus Torvalds's avatar Linus Torvalds

Merge branches 'stable/bug.fixes-3.2' and 'stable/mmu.fixes' of...

Merge branches 'stable/bug.fixes-3.2' and 'stable/mmu.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen

* 'stable/bug.fixes-3.2' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
  xen/p2m/debugfs: Make type_name more obvious.
  xen/p2m/debugfs: Fix potential pointer exception.
  xen/enlighten: Fix compile warnings and set cx to known value.
  xen/xenbus: Remove the unnecessary check.
  xen/irq: If we fail during msi_capability_init return proper error code.
  xen/events: Don't check the info for NULL as it is already done.
  xen/events: BUG() when we can't allocate our event->irq array.

* 'stable/mmu.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
  xen: Fix selfballooning and ensure it doesn't go too far
  xen/gntdev: Fix sleep-inside-spinlock
  xen: modify kernel mappings corresponding to granted pages
  xen: add an "highmem" parameter to alloc_xenballooned_pages
  xen/p2m: Use SetPagePrivate and its friends for M2P overrides.
  xen/p2m: Make debug/xen/mmu/p2m visible again.
  Revert "xen/debug: WARN_ON when identity PFN has no _PAGE_IOMAP flag set."
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <xen/interface/xen.h> #include <xen/interface/xen.h>
#include <xen/grant_table.h>
#include <xen/features.h> #include <xen/features.h>
/* Xen machine address */ /* Xen machine address */
...@@ -48,14 +49,11 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s, ...@@ -48,14 +49,11 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e); unsigned long pfn_e);
extern int m2p_add_override(unsigned long mfn, struct page *page, extern int m2p_add_override(unsigned long mfn, struct page *page,
bool clear_pte); struct gnttab_map_grant_ref *kmap_op);
extern int m2p_remove_override(struct page *page, bool clear_pte); extern int m2p_remove_override(struct page *page, bool clear_pte);
extern struct page *m2p_find_override(unsigned long mfn); extern struct page *m2p_find_override(unsigned long mfn);
extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
#ifdef CONFIG_XEN_DEBUG_FS
extern int p2m_dump_show(struct seq_file *m, void *v);
#endif
static inline unsigned long pfn_to_mfn(unsigned long pfn) static inline unsigned long pfn_to_mfn(unsigned long pfn)
{ {
unsigned long mfn; unsigned long mfn;
......
...@@ -175,8 +175,10 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) ...@@ -175,8 +175,10 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
"pcifront-msi-x" : "pcifront-msi-x" :
"pcifront-msi", "pcifront-msi",
DOMID_SELF); DOMID_SELF);
if (irq < 0) if (irq < 0) {
ret = irq;
goto free; goto free;
}
i++; i++;
} }
kfree(v); kfree(v);
...@@ -221,8 +223,10 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) ...@@ -221,8 +223,10 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
if (msg.data != XEN_PIRQ_MSI_DATA || if (msg.data != XEN_PIRQ_MSI_DATA ||
xen_irq_from_pirq(pirq) < 0) { xen_irq_from_pirq(pirq) < 0) {
pirq = xen_allocate_pirq_msi(dev, msidesc); pirq = xen_allocate_pirq_msi(dev, msidesc);
if (pirq < 0) if (pirq < 0) {
irq = -ENODEV;
goto error; goto error;
}
xen_msi_compose_msg(dev, pirq, &msg); xen_msi_compose_msg(dev, pirq, &msg);
__write_msi_msg(msidesc, &msg); __write_msi_msg(msidesc, &msg);
dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
...@@ -244,7 +248,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) ...@@ -244,7 +248,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
error: error:
dev_err(&dev->dev, dev_err(&dev->dev,
"Xen PCI frontend has not registered MSI/MSI-X support!\n"); "Xen PCI frontend has not registered MSI/MSI-X support!\n");
return -ENODEV; return irq;
} }
#ifdef CONFIG_XEN_DOM0 #ifdef CONFIG_XEN_DOM0
......
...@@ -49,11 +49,3 @@ config XEN_DEBUG_FS ...@@ -49,11 +49,3 @@ config XEN_DEBUG_FS
help help
Enable statistics output and various tuning options in debugfs. Enable statistics output and various tuning options in debugfs.
Enabling this option may incur a significant performance overhead. Enabling this option may incur a significant performance overhead.
config XEN_DEBUG
bool "Enable Xen debug checks"
depends on XEN
default n
help
Enable various WARN_ON checks in the Xen MMU code.
Enabling this option WILL incur a significant performance overhead.
...@@ -251,6 +251,7 @@ static void __init xen_init_cpuid_mask(void) ...@@ -251,6 +251,7 @@ static void __init xen_init_cpuid_mask(void)
~((1 << X86_FEATURE_APIC) | /* disable local APIC */ ~((1 << X86_FEATURE_APIC) | /* disable local APIC */
(1 << X86_FEATURE_ACPI)); /* disable ACPI */ (1 << X86_FEATURE_ACPI)); /* disable ACPI */
ax = 1; ax = 1;
cx = 0;
xen_cpuid(&ax, &bx, &cx, &dx); xen_cpuid(&ax, &bx, &cx, &dx);
xsave_mask = xsave_mask =
......
...@@ -495,41 +495,6 @@ static pte_t xen_make_pte(pteval_t pte) ...@@ -495,41 +495,6 @@ static pte_t xen_make_pte(pteval_t pte)
} }
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
#ifdef CONFIG_XEN_DEBUG
pte_t xen_make_pte_debug(pteval_t pte)
{
phys_addr_t addr = (pte & PTE_PFN_MASK);
phys_addr_t other_addr;
bool io_page = false;
pte_t _pte;
if (pte & _PAGE_IOMAP)
io_page = true;
_pte = xen_make_pte(pte);
if (!addr)
return _pte;
if (io_page &&
(xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
WARN_ONCE(addr != other_addr,
"0x%lx is using VM_IO, but it is 0x%lx!\n",
(unsigned long)addr, (unsigned long)other_addr);
} else {
pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
other_addr = (_pte.pte & PTE_PFN_MASK);
WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set),
"0x%lx is missing VM_IO (and wasn't fixed)!\n",
(unsigned long)addr);
}
return _pte;
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
#endif
static pgd_t xen_make_pgd(pgdval_t pgd) static pgd_t xen_make_pgd(pgdval_t pgd)
{ {
pgd = pte_pfn_to_mfn(pgd); pgd = pte_pfn_to_mfn(pgd);
...@@ -1992,9 +1957,6 @@ void __init xen_ident_map_ISA(void) ...@@ -1992,9 +1957,6 @@ void __init xen_ident_map_ISA(void)
static void __init xen_post_allocator_init(void) static void __init xen_post_allocator_init(void)
{ {
#ifdef CONFIG_XEN_DEBUG
pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
#endif
pv_mmu_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pte = xen_set_pte;
pv_mmu_ops.set_pmd = xen_set_pmd; pv_mmu_ops.set_pmd = xen_set_pmd;
pv_mmu_ops.set_pud = xen_set_pud; pv_mmu_ops.set_pud = xen_set_pud;
...@@ -2404,17 +2366,3 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, ...@@ -2404,17 +2366,3 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
return err; return err;
} }
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
#ifdef CONFIG_XEN_DEBUG_FS
static int p2m_dump_open(struct inode *inode, struct file *filp)
{
return single_open(filp, p2m_dump_show, NULL);
}
static const struct file_operations p2m_dump_fops = {
.open = p2m_dump_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
#endif /* CONFIG_XEN_DEBUG_FS */
...@@ -161,7 +161,9 @@ ...@@ -161,7 +161,9 @@
#include <asm/xen/page.h> #include <asm/xen/page.h>
#include <asm/xen/hypercall.h> #include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h> #include <asm/xen/hypervisor.h>
#include <xen/grant_table.h>
#include "multicalls.h"
#include "xen-ops.h" #include "xen-ops.h"
static void __init m2p_override_init(void); static void __init m2p_override_init(void);
...@@ -676,7 +678,8 @@ static unsigned long mfn_hash(unsigned long mfn) ...@@ -676,7 +678,8 @@ static unsigned long mfn_hash(unsigned long mfn)
} }
/* Add an MFN override for a particular page */ /* Add an MFN override for a particular page */
int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte) int m2p_add_override(unsigned long mfn, struct page *page,
struct gnttab_map_grant_ref *kmap_op)
{ {
unsigned long flags; unsigned long flags;
unsigned long pfn; unsigned long pfn;
...@@ -692,16 +695,28 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte) ...@@ -692,16 +695,28 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
"m2p_add_override: pfn %lx not mapped", pfn)) "m2p_add_override: pfn %lx not mapped", pfn))
return -EINVAL; return -EINVAL;
} }
WARN_ON(PagePrivate(page));
page->private = mfn; SetPagePrivate(page);
set_page_private(page, mfn);
page->index = pfn_to_mfn(pfn); page->index = pfn_to_mfn(pfn);
if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
return -ENOMEM; return -ENOMEM;
if (clear_pte && !PageHighMem(page)) if (kmap_op != NULL) {
/* Just zap old mapping for now */ if (!PageHighMem(page)) {
pte_clear(&init_mm, address, ptep); struct multicall_space mcs =
xen_mc_entry(sizeof(*kmap_op));
MULTI_grant_table_op(mcs.mc,
GNTTABOP_map_grant_ref, kmap_op, 1);
xen_mc_issue(PARAVIRT_LAZY_MMU);
}
/* let's use dev_bus_addr to record the old mfn instead */
kmap_op->dev_bus_addr = page->index;
page->index = (unsigned long) kmap_op;
}
spin_lock_irqsave(&m2p_override_lock, flags); spin_lock_irqsave(&m2p_override_lock, flags);
list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
spin_unlock_irqrestore(&m2p_override_lock, flags); spin_unlock_irqrestore(&m2p_override_lock, flags);
...@@ -735,13 +750,56 @@ int m2p_remove_override(struct page *page, bool clear_pte) ...@@ -735,13 +750,56 @@ int m2p_remove_override(struct page *page, bool clear_pte)
spin_lock_irqsave(&m2p_override_lock, flags); spin_lock_irqsave(&m2p_override_lock, flags);
list_del(&page->lru); list_del(&page->lru);
spin_unlock_irqrestore(&m2p_override_lock, flags); spin_unlock_irqrestore(&m2p_override_lock, flags);
set_phys_to_machine(pfn, page->index); WARN_ON(!PagePrivate(page));
ClearPagePrivate(page);
if (clear_pte) {
struct gnttab_map_grant_ref *map_op =
(struct gnttab_map_grant_ref *) page->index;
set_phys_to_machine(pfn, map_op->dev_bus_addr);
if (!PageHighMem(page)) {
struct multicall_space mcs;
struct gnttab_unmap_grant_ref *unmap_op;
/*
* It might be that we queued all the m2p grant table
* hypercalls in a multicall, then m2p_remove_override
* get called before the multicall has actually been
* issued. In this case handle is going to -1 because
* it hasn't been modified yet.
*/
if (map_op->handle == -1)
xen_mc_flush();
/*
* Now if map_op->handle is negative it means that the
* hypercall actually returned an error.
*/
if (map_op->handle == GNTST_general_error) {
printk(KERN_WARNING "m2p_remove_override: "
"pfn %lx mfn %lx, failed to modify kernel mappings",
pfn, mfn);
return -1;
}
mcs = xen_mc_entry(
sizeof(struct gnttab_unmap_grant_ref));
unmap_op = mcs.args;
unmap_op->host_addr = map_op->host_addr;
unmap_op->handle = map_op->handle;
unmap_op->dev_bus_addr = 0;
MULTI_grant_table_op(mcs.mc,
GNTTABOP_unmap_grant_ref, unmap_op, 1);
xen_mc_issue(PARAVIRT_LAZY_MMU);
if (clear_pte && !PageHighMem(page))
set_pte_at(&init_mm, address, ptep, set_pte_at(&init_mm, address, ptep,
pfn_pte(pfn, PAGE_KERNEL)); pfn_pte(pfn, PAGE_KERNEL));
/* No tlb flush necessary because the caller already __flush_tlb_single(address);
* left the pte unmapped. */ map_op->host_addr = 0;
}
} else
set_phys_to_machine(pfn, page->index);
return 0; return 0;
} }
...@@ -758,7 +816,7 @@ struct page *m2p_find_override(unsigned long mfn) ...@@ -758,7 +816,7 @@ struct page *m2p_find_override(unsigned long mfn)
spin_lock_irqsave(&m2p_override_lock, flags); spin_lock_irqsave(&m2p_override_lock, flags);
list_for_each_entry(p, bucket, lru) { list_for_each_entry(p, bucket, lru) {
if (p->private == mfn) { if (page_private(p) == mfn) {
ret = p; ret = p;
break; break;
} }
...@@ -782,17 +840,21 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) ...@@ -782,17 +840,21 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
EXPORT_SYMBOL_GPL(m2p_find_override_pfn); EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
#ifdef CONFIG_XEN_DEBUG_FS #ifdef CONFIG_XEN_DEBUG_FS
#include <linux/debugfs.h>
int p2m_dump_show(struct seq_file *m, void *v) #include "debugfs.h"
static int p2m_dump_show(struct seq_file *m, void *v)
{ {
static const char * const level_name[] = { "top", "middle", static const char * const level_name[] = { "top", "middle",
"entry", "abnormal" }; "entry", "abnormal", "error"};
static const char * const type_name[] = { "identity", "missing",
"pfn", "abnormal"};
#define TYPE_IDENTITY 0 #define TYPE_IDENTITY 0
#define TYPE_MISSING 1 #define TYPE_MISSING 1
#define TYPE_PFN 2 #define TYPE_PFN 2
#define TYPE_UNKNOWN 3 #define TYPE_UNKNOWN 3
static const char * const type_name[] = {
[TYPE_IDENTITY] = "identity",
[TYPE_MISSING] = "missing",
[TYPE_PFN] = "pfn",
[TYPE_UNKNOWN] = "abnormal"};
unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
unsigned int uninitialized_var(prev_level); unsigned int uninitialized_var(prev_level);
unsigned int uninitialized_var(prev_type); unsigned int uninitialized_var(prev_type);
...@@ -856,4 +918,32 @@ int p2m_dump_show(struct seq_file *m, void *v) ...@@ -856,4 +918,32 @@ int p2m_dump_show(struct seq_file *m, void *v)
#undef TYPE_PFN #undef TYPE_PFN
#undef TYPE_UNKNOWN #undef TYPE_UNKNOWN
} }
#endif
static int p2m_dump_open(struct inode *inode, struct file *filp)
{
return single_open(filp, p2m_dump_show, NULL);
}
static const struct file_operations p2m_dump_fops = {
.open = p2m_dump_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static struct dentry *d_mmu_debug;
static int __init xen_p2m_debugfs(void)
{
struct dentry *d_xen = xen_init_debugfs();
if (d_xen == NULL)
return -ENOMEM;
d_mmu_debug = debugfs_create_dir("mmu", d_xen);
debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
return 0;
}
fs_initcall(xen_p2m_debugfs);
#endif /* CONFIG_XEN_DEBUG_FS */
...@@ -396,7 +396,7 @@ static int xen_blkbk_map(struct blkif_request *req, ...@@ -396,7 +396,7 @@ static int xen_blkbk_map(struct blkif_request *req,
continue; continue;
ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr),
blkbk->pending_page(pending_req, i), false); blkbk->pending_page(pending_req, i), NULL);
if (ret) { if (ret) {
pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n", pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n",
(unsigned long)map[i].dev_bus_addr, ret); (unsigned long)map[i].dev_bus_addr, ret);
......
...@@ -501,20 +501,24 @@ EXPORT_SYMBOL_GPL(balloon_set_new_target); ...@@ -501,20 +501,24 @@ EXPORT_SYMBOL_GPL(balloon_set_new_target);
* alloc_xenballooned_pages - get pages that have been ballooned out * alloc_xenballooned_pages - get pages that have been ballooned out
* @nr_pages: Number of pages to get * @nr_pages: Number of pages to get
* @pages: pages returned * @pages: pages returned
* @highmem: highmem or lowmem pages
* @return 0 on success, error otherwise * @return 0 on success, error otherwise
*/ */
int alloc_xenballooned_pages(int nr_pages, struct page** pages) int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem)
{ {
int pgno = 0; int pgno = 0;
struct page* page; struct page* page;
mutex_lock(&balloon_mutex); mutex_lock(&balloon_mutex);
while (pgno < nr_pages) { while (pgno < nr_pages) {
page = balloon_retrieve(true); page = balloon_retrieve(highmem);
if (page) { if (page && PageHighMem(page) == highmem) {
pages[pgno++] = page; pages[pgno++] = page;
} else { } else {
enum bp_state st; enum bp_state st;
st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER); if (page)
balloon_append(page);
st = decrease_reservation(nr_pages - pgno,
highmem ? GFP_HIGHUSER : GFP_USER);
if (st != BP_DONE) if (st != BP_DONE)
goto out_undo; goto out_undo;
} }
......
...@@ -432,6 +432,7 @@ static int __must_check xen_allocate_irq_dynamic(void) ...@@ -432,6 +432,7 @@ static int __must_check xen_allocate_irq_dynamic(void)
irq = irq_alloc_desc_from(first, -1); irq = irq_alloc_desc_from(first, -1);
if (irq >= 0)
xen_irq_init(irq); xen_irq_init(irq);
return irq; return irq;
...@@ -713,7 +714,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, ...@@ -713,7 +714,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
mutex_lock(&irq_mapping_update_lock); mutex_lock(&irq_mapping_update_lock);
irq = xen_allocate_irq_dynamic(); irq = xen_allocate_irq_dynamic();
if (irq == -1) if (irq < 0)
goto out; goto out;
irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq, irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
...@@ -729,7 +730,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, ...@@ -729,7 +730,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
error_irq: error_irq:
mutex_unlock(&irq_mapping_update_lock); mutex_unlock(&irq_mapping_update_lock);
xen_free_irq(irq); xen_free_irq(irq);
return -1; return ret;
} }
#endif #endif
...@@ -779,7 +780,7 @@ int xen_irq_from_pirq(unsigned pirq) ...@@ -779,7 +780,7 @@ int xen_irq_from_pirq(unsigned pirq)
mutex_lock(&irq_mapping_update_lock); mutex_lock(&irq_mapping_update_lock);
list_for_each_entry(info, &xen_irq_list_head, list) { list_for_each_entry(info, &xen_irq_list_head, list) {
if (info == NULL || info->type != IRQT_PIRQ) if (info->type != IRQT_PIRQ)
continue; continue;
irq = info->irq; irq = info->irq;
if (info->u.pirq.pirq == pirq) if (info->u.pirq.pirq == pirq)
...@@ -1670,6 +1671,7 @@ void __init xen_init_IRQ(void) ...@@ -1670,6 +1671,7 @@ void __init xen_init_IRQ(void)
evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
GFP_KERNEL); GFP_KERNEL);
BUG_ON(!evtchn_to_irq);
for (i = 0; i < NR_EVENT_CHANNELS; i++) for (i = 0; i < NR_EVENT_CHANNELS; i++)
evtchn_to_irq[i] = -1; evtchn_to_irq[i] = -1;
......
...@@ -83,6 +83,7 @@ struct grant_map { ...@@ -83,6 +83,7 @@ struct grant_map {
struct ioctl_gntdev_grant_ref *grants; struct ioctl_gntdev_grant_ref *grants;
struct gnttab_map_grant_ref *map_ops; struct gnttab_map_grant_ref *map_ops;
struct gnttab_unmap_grant_ref *unmap_ops; struct gnttab_unmap_grant_ref *unmap_ops;
struct gnttab_map_grant_ref *kmap_ops;
struct page **pages; struct page **pages;
}; };
...@@ -116,19 +117,22 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) ...@@ -116,19 +117,22 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL); add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL);
add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL); add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL);
add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL); add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL);
add->kmap_ops = kzalloc(sizeof(add->kmap_ops[0]) * count, GFP_KERNEL);
add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL); add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL);
if (NULL == add->grants || if (NULL == add->grants ||
NULL == add->map_ops || NULL == add->map_ops ||
NULL == add->unmap_ops || NULL == add->unmap_ops ||
NULL == add->kmap_ops ||
NULL == add->pages) NULL == add->pages)
goto err; goto err;
if (alloc_xenballooned_pages(count, add->pages)) if (alloc_xenballooned_pages(count, add->pages, false /* lowmem */))
goto err; goto err;
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
add->map_ops[i].handle = -1; add->map_ops[i].handle = -1;
add->unmap_ops[i].handle = -1; add->unmap_ops[i].handle = -1;
add->kmap_ops[i].handle = -1;
} }
add->index = 0; add->index = 0;
...@@ -142,6 +146,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) ...@@ -142,6 +146,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
kfree(add->grants); kfree(add->grants);
kfree(add->map_ops); kfree(add->map_ops);
kfree(add->unmap_ops); kfree(add->unmap_ops);
kfree(add->kmap_ops);
kfree(add); kfree(add);
return NULL; return NULL;
} }
...@@ -243,10 +248,35 @@ static int map_grant_pages(struct grant_map *map) ...@@ -243,10 +248,35 @@ static int map_grant_pages(struct grant_map *map)
gnttab_set_unmap_op(&map->unmap_ops[i], addr, gnttab_set_unmap_op(&map->unmap_ops[i], addr,
map->flags, -1 /* handle */); map->flags, -1 /* handle */);
} }
} else {
/*
* Setup the map_ops corresponding to the pte entries pointing
* to the kernel linear addresses of the struct pages.
* These ptes are completely different from the user ptes dealt
* with find_grant_ptes.
*/
for (i = 0; i < map->count; i++) {
unsigned level;
unsigned long address = (unsigned long)
pfn_to_kaddr(page_to_pfn(map->pages[i]));
pte_t *ptep;
u64 pte_maddr = 0;
BUG_ON(PageHighMem(map->pages[i]));
ptep = lookup_address(address, &level);
pte_maddr = arbitrary_virt_to_machine(ptep).maddr;
gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
map->flags |
GNTMAP_host_map |
GNTMAP_contains_pte,
map->grants[i].ref,
map->grants[i].domid);
}
} }
pr_debug("map %d+%d\n", map->index, map->count); pr_debug("map %d+%d\n", map->index, map->count);
err = gnttab_map_refs(map->map_ops, map->pages, map->count); err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL,
map->pages, map->count);
if (err) if (err)
return err; return err;
...@@ -462,13 +492,11 @@ static int gntdev_release(struct inode *inode, struct file *flip) ...@@ -462,13 +492,11 @@ static int gntdev_release(struct inode *inode, struct file *flip)
pr_debug("priv %p\n", priv); pr_debug("priv %p\n", priv);
spin_lock(&priv->lock);
while (!list_empty(&priv->maps)) { while (!list_empty(&priv->maps)) {
map = list_entry(priv->maps.next, struct grant_map, next); map = list_entry(priv->maps.next, struct grant_map, next);
list_del(&map->next); list_del(&map->next);
gntdev_put_map(map); gntdev_put_map(map);
} }
spin_unlock(&priv->lock);
if (use_ptemod) if (use_ptemod)
mmu_notifier_unregister(&priv->mn, priv->mm); mmu_notifier_unregister(&priv->mn, priv->mm);
...@@ -532,10 +560,11 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, ...@@ -532,10 +560,11 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
if (map) { if (map) {
list_del(&map->next); list_del(&map->next);
gntdev_put_map(map);
err = 0; err = 0;
} }
spin_unlock(&priv->lock); spin_unlock(&priv->lock);
if (map)
gntdev_put_map(map);
return err; return err;
} }
......
...@@ -448,6 +448,7 @@ unsigned int gnttab_max_grant_frames(void) ...@@ -448,6 +448,7 @@ unsigned int gnttab_max_grant_frames(void)
EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count) struct page **pages, unsigned int count)
{ {
int i, ret; int i, ret;
...@@ -488,8 +489,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, ...@@ -488,8 +489,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
*/ */
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
ret = m2p_add_override(mfn, pages[i], ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
map_ops[i].flags & GNTMAP_contains_pte);
if (ret) if (ret)
return ret; return ret;
} }
......
...@@ -68,6 +68,8 @@ ...@@ -68,6 +68,8 @@
*/ */
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/bootmem.h>
#include <linux/swap.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/mman.h> #include <linux/mman.h>
#include <linux/module.h> #include <linux/module.h>
...@@ -93,6 +95,15 @@ static unsigned int selfballoon_uphysteresis __read_mostly = 1; ...@@ -93,6 +95,15 @@ static unsigned int selfballoon_uphysteresis __read_mostly = 1;
/* In HZ, controls frequency of worker invocation. */ /* In HZ, controls frequency of worker invocation. */
static unsigned int selfballoon_interval __read_mostly = 5; static unsigned int selfballoon_interval __read_mostly = 5;
/*
* Minimum usable RAM in MB for selfballooning target for balloon.
* If non-zero, it is added to totalreserve_pages and self-ballooning
* will not balloon below the sum. If zero, a piecewise linear function
* is calculated as a minimum and added to totalreserve_pages. Note that
* setting this value indiscriminately may cause OOMs and crashes.
*/
static unsigned int selfballoon_min_usable_mb;
static void selfballoon_process(struct work_struct *work); static void selfballoon_process(struct work_struct *work);
static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process);
...@@ -189,20 +200,23 @@ static int __init xen_selfballooning_setup(char *s) ...@@ -189,20 +200,23 @@ static int __init xen_selfballooning_setup(char *s)
__setup("selfballooning", xen_selfballooning_setup); __setup("selfballooning", xen_selfballooning_setup);
#endif /* CONFIG_FRONTSWAP */ #endif /* CONFIG_FRONTSWAP */
#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
/* /*
* Use current balloon size, the goal (vm_committed_as), and hysteresis * Use current balloon size, the goal (vm_committed_as), and hysteresis
* parameters to set a new target balloon size * parameters to set a new target balloon size
*/ */
static void selfballoon_process(struct work_struct *work) static void selfballoon_process(struct work_struct *work)
{ {
unsigned long cur_pages, goal_pages, tgt_pages; unsigned long cur_pages, goal_pages, tgt_pages, floor_pages;
unsigned long useful_pages;
bool reset_timer = false; bool reset_timer = false;
if (xen_selfballooning_enabled) { if (xen_selfballooning_enabled) {
cur_pages = balloon_stats.current_pages; cur_pages = totalram_pages;
tgt_pages = cur_pages; /* default is no change */ tgt_pages = cur_pages; /* default is no change */
goal_pages = percpu_counter_read_positive(&vm_committed_as) + goal_pages = percpu_counter_read_positive(&vm_committed_as) +
balloon_stats.current_pages - totalram_pages; totalreserve_pages;
#ifdef CONFIG_FRONTSWAP #ifdef CONFIG_FRONTSWAP
/* allow space for frontswap pages to be repatriated */ /* allow space for frontswap pages to be repatriated */
if (frontswap_selfshrinking && frontswap_enabled) if (frontswap_selfshrinking && frontswap_enabled)
...@@ -217,7 +231,26 @@ static void selfballoon_process(struct work_struct *work) ...@@ -217,7 +231,26 @@ static void selfballoon_process(struct work_struct *work)
((goal_pages - cur_pages) / ((goal_pages - cur_pages) /
selfballoon_uphysteresis); selfballoon_uphysteresis);
/* else if cur_pages == goal_pages, no change */ /* else if cur_pages == goal_pages, no change */
balloon_set_new_target(tgt_pages); useful_pages = max_pfn - totalreserve_pages;
if (selfballoon_min_usable_mb != 0)
floor_pages = totalreserve_pages +
MB2PAGES(selfballoon_min_usable_mb);
/* piecewise linear function ending in ~3% slope */
else if (useful_pages < MB2PAGES(16))
floor_pages = max_pfn; /* not worth ballooning */
else if (useful_pages < MB2PAGES(64))
floor_pages = totalreserve_pages + MB2PAGES(16) +
((useful_pages - MB2PAGES(16)) >> 1);
else if (useful_pages < MB2PAGES(512))
floor_pages = totalreserve_pages + MB2PAGES(40) +
((useful_pages - MB2PAGES(40)) >> 3);
else /* useful_pages >= MB2PAGES(512) */
floor_pages = totalreserve_pages + MB2PAGES(99) +
((useful_pages - MB2PAGES(99)) >> 5);
if (tgt_pages < floor_pages)
tgt_pages = floor_pages;
balloon_set_new_target(tgt_pages +
balloon_stats.current_pages - totalram_pages);
reset_timer = true; reset_timer = true;
} }
#ifdef CONFIG_FRONTSWAP #ifdef CONFIG_FRONTSWAP
...@@ -340,6 +373,31 @@ static ssize_t store_selfballoon_uphys(struct sys_device *dev, ...@@ -340,6 +373,31 @@ static ssize_t store_selfballoon_uphys(struct sys_device *dev,
static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR, static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR,
show_selfballoon_uphys, store_selfballoon_uphys); show_selfballoon_uphys, store_selfballoon_uphys);
SELFBALLOON_SHOW(selfballoon_min_usable_mb, "%d\n",
selfballoon_min_usable_mb);
static ssize_t store_selfballoon_min_usable_mb(struct sys_device *dev,
struct sysdev_attribute *attr,
const char *buf,
size_t count)
{
unsigned long val;
int err;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
err = strict_strtoul(buf, 10, &val);
if (err || val == 0)
return -EINVAL;
selfballoon_min_usable_mb = val;
return count;
}
static SYSDEV_ATTR(selfballoon_min_usable_mb, S_IRUGO | S_IWUSR,
show_selfballoon_min_usable_mb,
store_selfballoon_min_usable_mb);
#ifdef CONFIG_FRONTSWAP #ifdef CONFIG_FRONTSWAP
SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking);
...@@ -421,6 +479,7 @@ static struct attribute *selfballoon_attrs[] = { ...@@ -421,6 +479,7 @@ static struct attribute *selfballoon_attrs[] = {
&attr_selfballoon_interval.attr, &attr_selfballoon_interval.attr,
&attr_selfballoon_downhysteresis.attr, &attr_selfballoon_downhysteresis.attr,
&attr_selfballoon_uphysteresis.attr, &attr_selfballoon_uphysteresis.attr,
&attr_selfballoon_min_usable_mb.attr,
#ifdef CONFIG_FRONTSWAP #ifdef CONFIG_FRONTSWAP
&attr_frontswap_selfshrinking.attr, &attr_frontswap_selfshrinking.attr,
&attr_frontswap_hysteresis.attr, &attr_frontswap_hysteresis.attr,
......
...@@ -104,8 +104,6 @@ static int xenbus_uevent_backend(struct device *dev, ...@@ -104,8 +104,6 @@ static int xenbus_uevent_backend(struct device *dev,
xdev = to_xenbus_device(dev); xdev = to_xenbus_device(dev);
bus = container_of(xdev->dev.bus, struct xen_bus_type, bus); bus = container_of(xdev->dev.bus, struct xen_bus_type, bus);
if (xdev == NULL)
return -ENODEV;
if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype)) if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype))
return -ENOMEM; return -ENOMEM;
......
...@@ -25,8 +25,9 @@ extern struct balloon_stats balloon_stats; ...@@ -25,8 +25,9 @@ extern struct balloon_stats balloon_stats;
void balloon_set_new_target(unsigned long target); void balloon_set_new_target(unsigned long target);
int alloc_xenballooned_pages(int nr_pages, struct page** pages); int alloc_xenballooned_pages(int nr_pages, struct page **pages,
void free_xenballooned_pages(int nr_pages, struct page** pages); bool highmem);
void free_xenballooned_pages(int nr_pages, struct page **pages);
struct sys_device; struct sys_device;
#ifdef CONFIG_XEN_SELFBALLOONING #ifdef CONFIG_XEN_SELFBALLOONING
......
...@@ -156,6 +156,7 @@ unsigned int gnttab_max_grant_frames(void); ...@@ -156,6 +156,7 @@ unsigned int gnttab_max_grant_frames(void);
#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count); struct page **pages, unsigned int count);
int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
struct page **pages, unsigned int count); struct page **pages, unsigned int count);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment