Commit cc5f2704 authored by David Hildenbrand's avatar David Hildenbrand Committed by Linus Torvalds

proc/vmcore: convert oldmem_pfn_is_ram callback to more generic vmcore callbacks

Let's support multiple registered callbacks, making sure that
registering vmcore callbacks cannot fail.  Make the callback return a
bool instead of an int, handling how to deal with errors internally.
Drop unused HAVE_OLDMEM_PFN_IS_RAM.

We soon want to make use of this infrastructure from other drivers:
virtio-mem, registering one callback for each virtio-mem device, to
prevent reading unplugged virtio-mem memory.

Handle it via a generic vmcore_cb structure, prepared for future
extensions: for example, once we support virtio-mem on s390x where the
vmcore is completely constructed in the second kernel, we want to detect
and add plugged virtio-mem memory ranges to the vmcore in order for them
to get dumped properly.

Handle corner cases that are unexpected and shouldn't happen in sane
setups: registering a callback after the vmcore has already been opened
(warn only) and unregistering a callback after the vmcore has already been
opened (warn and essentially read only zeroes from that point on).

Link: https://lkml.kernel.org/r/20211005121430.30136-6-david@redhat.comSigned-off-by: default avatarDavid Hildenbrand <david@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 2c9feeae
...@@ -73,12 +73,23 @@ static int gart_mem_pfn_is_ram(unsigned long pfn) ...@@ -73,12 +73,23 @@ static int gart_mem_pfn_is_ram(unsigned long pfn)
(pfn >= aperture_pfn_start + aperture_page_count)); (pfn >= aperture_pfn_start + aperture_page_count));
} }
#ifdef CONFIG_PROC_VMCORE
static bool gart_oldmem_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn)
{
return !!gart_mem_pfn_is_ram(pfn);
}
static struct vmcore_cb gart_vmcore_cb = {
.pfn_is_ram = gart_oldmem_pfn_is_ram,
};
#endif
static void __init exclude_from_core(u64 aper_base, u32 aper_order) static void __init exclude_from_core(u64 aper_base, u32 aper_order)
{ {
aperture_pfn_start = aper_base >> PAGE_SHIFT; aperture_pfn_start = aper_base >> PAGE_SHIFT;
aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT; aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
#ifdef CONFIG_PROC_VMCORE #ifdef CONFIG_PROC_VMCORE
WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram)); register_vmcore_cb(&gart_vmcore_cb);
#endif #endif
#ifdef CONFIG_PROC_KCORE #ifdef CONFIG_PROC_KCORE
WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram)); WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram));
......
...@@ -12,10 +12,10 @@ ...@@ -12,10 +12,10 @@
* The kdump kernel has to check whether a pfn of the crashed kernel * The kdump kernel has to check whether a pfn of the crashed kernel
* was a ballooned page. vmcore is using this function to decide * was a ballooned page. vmcore is using this function to decide
* whether to access a pfn of the crashed kernel. * whether to access a pfn of the crashed kernel.
* Returns 0 if the pfn is not backed by a RAM page, the caller may * Returns "false" if the pfn is not backed by a RAM page, the caller may
* handle the pfn special in this case. * handle the pfn special in this case.
*/ */
static int xen_oldmem_pfn_is_ram(unsigned long pfn) static bool xen_vmcore_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn)
{ {
struct xen_hvm_get_mem_type a = { struct xen_hvm_get_mem_type a = {
.domid = DOMID_SELF, .domid = DOMID_SELF,
...@@ -24,10 +24,13 @@ static int xen_oldmem_pfn_is_ram(unsigned long pfn) ...@@ -24,10 +24,13 @@ static int xen_oldmem_pfn_is_ram(unsigned long pfn)
if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) { if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) {
pr_warn_once("Unexpected HVMOP_get_mem_type failure\n"); pr_warn_once("Unexpected HVMOP_get_mem_type failure\n");
return -ENXIO; return true;
} }
return a.mem_type != HVMMEM_mmio_dm; return a.mem_type != HVMMEM_mmio_dm;
} }
static struct vmcore_cb xen_vmcore_cb = {
.pfn_is_ram = xen_vmcore_pfn_is_ram,
};
#endif #endif
static void xen_hvm_exit_mmap(struct mm_struct *mm) static void xen_hvm_exit_mmap(struct mm_struct *mm)
...@@ -61,6 +64,6 @@ void __init xen_hvm_init_mmu_ops(void) ...@@ -61,6 +64,6 @@ void __init xen_hvm_init_mmu_ops(void)
if (is_pagetable_dying_supported()) if (is_pagetable_dying_supported())
pv_ops.mmu.exit_mmap = xen_hvm_exit_mmap; pv_ops.mmu.exit_mmap = xen_hvm_exit_mmap;
#ifdef CONFIG_PROC_VMCORE #ifdef CONFIG_PROC_VMCORE
WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram)); register_vmcore_cb(&xen_vmcore_cb);
#endif #endif
} }
...@@ -62,46 +62,75 @@ core_param(novmcoredd, vmcoredd_disabled, bool, 0); ...@@ -62,46 +62,75 @@ core_param(novmcoredd, vmcoredd_disabled, bool, 0);
/* Device Dump Size */ /* Device Dump Size */
static size_t vmcoredd_orig_sz; static size_t vmcoredd_orig_sz;
/* static DECLARE_RWSEM(vmcore_cb_rwsem);
* Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error /* List of registered vmcore callbacks. */
* The called function has to take care of module refcounting. static LIST_HEAD(vmcore_cb_list);
*/ /* Whether we had a surprise unregistration of a callback. */
static int (*oldmem_pfn_is_ram)(unsigned long pfn); static bool vmcore_cb_unstable;
/* Whether the vmcore has been opened once. */
int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn)) static bool vmcore_opened;
void register_vmcore_cb(struct vmcore_cb *cb)
{ {
if (oldmem_pfn_is_ram) down_write(&vmcore_cb_rwsem);
return -EBUSY; INIT_LIST_HEAD(&cb->next);
oldmem_pfn_is_ram = fn; list_add_tail(&cb->next, &vmcore_cb_list);
return 0; /*
* Registering a vmcore callback after the vmcore was opened is
* very unusual (e.g., manual driver loading).
*/
if (vmcore_opened)
pr_warn_once("Unexpected vmcore callback registration\n");
up_write(&vmcore_cb_rwsem);
} }
EXPORT_SYMBOL_GPL(register_oldmem_pfn_is_ram); EXPORT_SYMBOL_GPL(register_vmcore_cb);
void unregister_oldmem_pfn_is_ram(void) void unregister_vmcore_cb(struct vmcore_cb *cb)
{ {
oldmem_pfn_is_ram = NULL; down_write(&vmcore_cb_rwsem);
wmb(); list_del(&cb->next);
/*
* Unregistering a vmcore callback after the vmcore was opened is
* very unusual (e.g., forced driver removal), but we cannot stop
* unregistering.
*/
if (vmcore_opened) {
pr_warn_once("Unexpected vmcore callback unregistration\n");
vmcore_cb_unstable = true;
}
up_write(&vmcore_cb_rwsem);
} }
EXPORT_SYMBOL_GPL(unregister_oldmem_pfn_is_ram); EXPORT_SYMBOL_GPL(unregister_vmcore_cb);
static bool pfn_is_ram(unsigned long pfn) static bool pfn_is_ram(unsigned long pfn)
{ {
int (*fn)(unsigned long pfn); struct vmcore_cb *cb;
/* pfn is ram unless fn() checks pagetype */
bool ret = true; bool ret = true;
/* lockdep_assert_held_read(&vmcore_cb_rwsem);
* Ask hypervisor if the pfn is really ram. if (unlikely(vmcore_cb_unstable))
* A ballooned page contains no data and reading from such a page return false;
* will cause high load in the hypervisor.
*/ list_for_each_entry(cb, &vmcore_cb_list, next) {
fn = oldmem_pfn_is_ram; if (unlikely(!cb->pfn_is_ram))
if (fn) continue;
ret = !!fn(pfn); ret = cb->pfn_is_ram(cb, pfn);
if (!ret)
break;
}
return ret; return ret;
} }
static int open_vmcore(struct inode *inode, struct file *file)
{
down_read(&vmcore_cb_rwsem);
vmcore_opened = true;
up_read(&vmcore_cb_rwsem);
return 0;
}
/* Reads a page from the oldmem device from given offset. */ /* Reads a page from the oldmem device from given offset. */
ssize_t read_from_oldmem(char *buf, size_t count, ssize_t read_from_oldmem(char *buf, size_t count,
u64 *ppos, int userbuf, u64 *ppos, int userbuf,
...@@ -117,6 +146,7 @@ ssize_t read_from_oldmem(char *buf, size_t count, ...@@ -117,6 +146,7 @@ ssize_t read_from_oldmem(char *buf, size_t count,
offset = (unsigned long)(*ppos % PAGE_SIZE); offset = (unsigned long)(*ppos % PAGE_SIZE);
pfn = (unsigned long)(*ppos / PAGE_SIZE); pfn = (unsigned long)(*ppos / PAGE_SIZE);
down_read(&vmcore_cb_rwsem);
do { do {
if (count > (PAGE_SIZE - offset)) if (count > (PAGE_SIZE - offset))
nr_bytes = PAGE_SIZE - offset; nr_bytes = PAGE_SIZE - offset;
...@@ -136,8 +166,10 @@ ssize_t read_from_oldmem(char *buf, size_t count, ...@@ -136,8 +166,10 @@ ssize_t read_from_oldmem(char *buf, size_t count,
tmp = copy_oldmem_page(pfn, buf, nr_bytes, tmp = copy_oldmem_page(pfn, buf, nr_bytes,
offset, userbuf); offset, userbuf);
if (tmp < 0) if (tmp < 0) {
up_read(&vmcore_cb_rwsem);
return tmp; return tmp;
}
} }
*ppos += nr_bytes; *ppos += nr_bytes;
count -= nr_bytes; count -= nr_bytes;
...@@ -147,6 +179,7 @@ ssize_t read_from_oldmem(char *buf, size_t count, ...@@ -147,6 +179,7 @@ ssize_t read_from_oldmem(char *buf, size_t count,
offset = 0; offset = 0;
} while (count); } while (count);
up_read(&vmcore_cb_rwsem);
return read; return read;
} }
...@@ -537,14 +570,19 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma, ...@@ -537,14 +570,19 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
unsigned long from, unsigned long pfn, unsigned long from, unsigned long pfn,
unsigned long size, pgprot_t prot) unsigned long size, pgprot_t prot)
{ {
int ret;
/* /*
* Check if oldmem_pfn_is_ram was registered to avoid * Check if oldmem_pfn_is_ram was registered to avoid
* looping over all pages without a reason. * looping over all pages without a reason.
*/ */
if (oldmem_pfn_is_ram) down_read(&vmcore_cb_rwsem);
return remap_oldmem_pfn_checked(vma, from, pfn, size, prot); if (!list_empty(&vmcore_cb_list) || vmcore_cb_unstable)
ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
else else
return remap_oldmem_pfn_range(vma, from, pfn, size, prot); ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot);
up_read(&vmcore_cb_rwsem);
return ret;
} }
static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
...@@ -668,6 +706,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) ...@@ -668,6 +706,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
#endif #endif
static const struct proc_ops vmcore_proc_ops = { static const struct proc_ops vmcore_proc_ops = {
.proc_open = open_vmcore,
.proc_read = read_vmcore, .proc_read = read_vmcore,
.proc_lseek = default_llseek, .proc_lseek = default_llseek,
.proc_mmap = mmap_vmcore, .proc_mmap = mmap_vmcore,
......
...@@ -91,9 +91,29 @@ static inline void vmcore_unusable(void) ...@@ -91,9 +91,29 @@ static inline void vmcore_unusable(void)
elfcorehdr_addr = ELFCORE_ADDR_ERR; elfcorehdr_addr = ELFCORE_ADDR_ERR;
} }
#define HAVE_OLDMEM_PFN_IS_RAM 1 /**
extern int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn)); * struct vmcore_cb - driver callbacks for /proc/vmcore handling
extern void unregister_oldmem_pfn_is_ram(void); * @pfn_is_ram: check whether a PFN really is RAM and should be accessed when
* reading the vmcore. Will return "true" if it is RAM or if the
* callback cannot tell. If any callback returns "false", it's not
* RAM and the page must not be accessed; zeroes should be
* indicated in the vmcore instead. For example, a ballooned page
* contains no data and reading from such a page will cause high
* load in the hypervisor.
* @next: List head to manage registered callbacks internally; initialized by
* register_vmcore_cb().
*
* vmcore callbacks allow drivers managing physical memory ranges to
* coordinate with vmcore handling code, for example, to prevent accessing
* physical memory ranges that should not be accessed when reading the vmcore,
* although included in the vmcore header as memory ranges to dump.
*/
struct vmcore_cb {
bool (*pfn_is_ram)(struct vmcore_cb *cb, unsigned long pfn);
struct list_head next;
};
extern void register_vmcore_cb(struct vmcore_cb *cb);
extern void unregister_vmcore_cb(struct vmcore_cb *cb);
#else /* !CONFIG_CRASH_DUMP */ #else /* !CONFIG_CRASH_DUMP */
static inline bool is_kdump_kernel(void) { return 0; } static inline bool is_kdump_kernel(void) { return 0; }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment