Commit 28b2ee20 authored by Rik van Riel's avatar Rik van Riel Committed by Linus Torvalds

access_process_vm device memory infrastructure

In order to be able to debug things like the X server and programs using
the PPC Cell SPUs, the debugger needs to be able to access device memory
through ptrace and /proc/pid/mem.

This patch:

Add the generic_access_phys access function and put the hooks in place
to allow access_process_vm to access device or PPC Cell SPU memory.

[riel@redhat.com: Add documentation for the vm_ops->access function]
Signed-off-by: default avatarRik van Riel <riel@redhat.com>
Signed-off-by: default avatarBenjamin Herrensmidt <benh@kernel.crashing.org>
Cc: Dave Airlie <airlied@linux.ie>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Acked-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 0d71d10a
......@@ -510,6 +510,7 @@ prototypes:
void (*close)(struct vm_area_struct*);
int (*fault)(struct vm_area_struct*, struct vm_fault *);
int (*page_mkwrite)(struct vm_area_struct *, struct page *);
int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
locking rules:
BKL mmap_sem PageLocked(page)
......@@ -517,6 +518,7 @@ open: no yes
close: no yes
fault: no yes
page_mkwrite: no yes no
access: no yes
->page_mkwrite() is called when a previously read-only page is
about to become writeable. The file system is responsible for
......@@ -525,6 +527,11 @@ taking to lock out truncate, the page range should be verified to be
within i_size. The page mapping should also be checked that it is not
NULL.
->access() is called when get_user_pages() fails in
acces_process_vm(), typically used to debug a process through
/proc/pid/mem or ptrace. This function is needed only for
VM_IO | VM_PFNMAP VMAs.
================================================================================
Dubious stuff
......
......@@ -31,6 +31,9 @@ config KRETPROBES
def_bool y
depends on KPROBES && HAVE_KRETPROBES
config HAVE_IOREMAP_PROT
def_bool n
config HAVE_KPROBES
def_bool n
......
......@@ -21,6 +21,7 @@ config X86
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_DYNAMIC_FTRACE
......
......@@ -330,6 +330,14 @@ static void __iomem *ioremap_default(resource_size_t phys_addr,
return (void __iomem *)ret;
}
void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
unsigned long prot_val)
{
return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK),
__builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap_prot);
/**
* iounmap - Free a IO remapping
* @addr: virtual address from ioremap_*
......
......@@ -110,6 +110,8 @@ static inline void *phys_to_virt(unsigned long address)
*/
extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
unsigned long prot_val);
/*
* The default ioremap() behavior is non-cached:
......
......@@ -175,6 +175,8 @@ extern void early_iounmap(void *addr, unsigned long size);
*/
extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
unsigned long prot_val);
/*
* The default ioremap() behavior is non-cached:
......
......@@ -170,6 +170,12 @@ struct vm_operations_struct {
/* notification that a previously read-only page is about to become
* writable, if an error is returned it will cause a SIGBUS */
int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
/* called by access_process_vm when get_user_pages() fails, typically
* for use by special VMAs that can switch between memory and hardware
*/
int (*access)(struct vm_area_struct *vma, unsigned long addr,
void *buf, int len, int write);
#ifdef CONFIG_NUMA
/*
* set_policy() op must add a reference to any non-NULL @new mempolicy
......@@ -771,6 +777,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma);
void unmap_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen, int even_cows);
int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
void *buf, int len, int write);
static inline void unmap_shared_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen)
......
......@@ -2751,6 +2751,86 @@ int in_gate_area_no_task(unsigned long addr)
#endif /* __HAVE_ARCH_GATE_AREA */
#ifdef CONFIG_HAVE_IOREMAP_PROT
static resource_size_t follow_phys(struct vm_area_struct *vma,
unsigned long address, unsigned int flags,
unsigned long *prot)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *ptep, pte;
spinlock_t *ptl;
resource_size_t phys_addr = 0;
struct mm_struct *mm = vma->vm_mm;
VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP)));
pgd = pgd_offset(mm, address);
if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
goto no_page_table;
pud = pud_offset(pgd, address);
if (pud_none(*pud) || unlikely(pud_bad(*pud)))
goto no_page_table;
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
goto no_page_table;
/* We cannot handle huge page PFN maps. Luckily they don't exist. */
if (pmd_huge(*pmd))
goto no_page_table;
ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
if (!ptep)
goto out;
pte = *ptep;
if (!pte_present(pte))
goto unlock;
if ((flags & FOLL_WRITE) && !pte_write(pte))
goto unlock;
phys_addr = pte_pfn(pte);
phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */
*prot = pgprot_val(pte_pgprot(pte));
unlock:
pte_unmap_unlock(ptep, ptl);
out:
return phys_addr;
no_page_table:
return 0;
}
int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
void *buf, int len, int write)
{
resource_size_t phys_addr;
unsigned long prot = 0;
void *maddr;
int offset = addr & (PAGE_SIZE-1);
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
return -EINVAL;
phys_addr = follow_phys(vma, addr, write, &prot);
if (!phys_addr)
return -EINVAL;
maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
if (write)
memcpy_toio(maddr + offset, buf, len);
else
memcpy_fromio(buf, maddr + offset, len);
iounmap(maddr);
return len;
}
#endif
/*
* Access another process' address space.
* Source/target buffer must be kernel space,
......@@ -2760,7 +2840,6 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
{
struct mm_struct *mm;
struct vm_area_struct *vma;
struct page *page;
void *old_buf = buf;
mm = get_task_mm(tsk);
......@@ -2772,28 +2851,44 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
while (len) {
int bytes, ret, offset;
void *maddr;
struct page *page = NULL;
ret = get_user_pages(tsk, mm, addr, 1,
write, 1, &page, &vma);
if (ret <= 0)
break;
bytes = len;
offset = addr & (PAGE_SIZE-1);
if (bytes > PAGE_SIZE-offset)
bytes = PAGE_SIZE-offset;
maddr = kmap(page);
if (write) {
copy_to_user_page(vma, page, addr,
maddr + offset, buf, bytes);
set_page_dirty_lock(page);
if (ret <= 0) {
/*
* Check if this is a VM_IO | VM_PFNMAP VMA, which
* we can access using slightly different code.
*/
#ifdef CONFIG_HAVE_IOREMAP_PROT
vma = find_vma(mm, addr);
if (!vma)
break;
if (vma->vm_ops && vma->vm_ops->access)
ret = vma->vm_ops->access(vma, addr, buf,
len, write);
if (ret <= 0)
#endif
break;
bytes = ret;
} else {
copy_from_user_page(vma, page, addr,
buf, maddr + offset, bytes);
bytes = len;
offset = addr & (PAGE_SIZE-1);
if (bytes > PAGE_SIZE-offset)
bytes = PAGE_SIZE-offset;
maddr = kmap(page);
if (write) {
copy_to_user_page(vma, page, addr,
maddr + offset, buf, bytes);
set_page_dirty_lock(page);
} else {
copy_from_user_page(vma, page, addr,
buf, maddr + offset, bytes);
}
kunmap(page);
page_cache_release(page);
}
kunmap(page);
page_cache_release(page);
len -= bytes;
buf += bytes;
addr += bytes;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment