Commit 7e0165b2 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge fixes from Andrew Morton:
 "6 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  lib/Kconfig.debug: fix some messed up configurations
  mm: vmscan: protect shrinker idr replace with CONFIG_MEMCG
  kasan: don't assume percpu shadow allocations will succeed
  kasan: use apply_to_existing_page_range() for releasing vmalloc shadow
  mm/memory.c: add apply_to_existing_page_range() helper
  kasan: fix crashes on access to memory mapped by vm_map_ram()
parents 5f096c0e 045f6d79
......@@ -205,20 +205,23 @@ static inline void *kasan_reset_tag(const void *addr)
#endif /* CONFIG_KASAN_SW_TAGS */
#ifdef CONFIG_KASAN_VMALLOC
int kasan_populate_vmalloc(unsigned long requested_size,
struct vm_struct *area);
void kasan_poison_vmalloc(void *start, unsigned long size);
int kasan_populate_vmalloc(unsigned long addr, unsigned long size);
void kasan_poison_vmalloc(const void *start, unsigned long size);
void kasan_unpoison_vmalloc(const void *start, unsigned long size);
void kasan_release_vmalloc(unsigned long start, unsigned long end,
unsigned long free_region_start,
unsigned long free_region_end);
#else
static inline int kasan_populate_vmalloc(unsigned long requested_size,
struct vm_struct *area)
static inline int kasan_populate_vmalloc(unsigned long start,
unsigned long size)
{
return 0;
}
static inline void kasan_poison_vmalloc(void *start, unsigned long size) {}
static inline void kasan_poison_vmalloc(const void *start, unsigned long size)
{ }
static inline void kasan_unpoison_vmalloc(const void *start, unsigned long size)
{ }
static inline void kasan_release_vmalloc(unsigned long start,
unsigned long end,
unsigned long free_region_start,
......
......@@ -2621,6 +2621,9 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
unsigned long size, pte_fn_t fn, void *data);
extern int apply_to_existing_page_range(struct mm_struct *mm,
unsigned long address, unsigned long size,
pte_fn_t fn, void *data);
#ifdef CONFIG_PAGE_POISONING
extern bool page_poisoning_enabled(void);
......
......@@ -1483,6 +1483,55 @@ config PROVIDE_OHCI1394_DMA_INIT
See Documentation/debugging-via-ohci1394.txt for more information.
source "samples/Kconfig"
config ARCH_HAS_DEVMEM_IS_ALLOWED
bool
config STRICT_DEVMEM
bool "Filter access to /dev/mem"
depends on MMU && DEVMEM
depends on ARCH_HAS_DEVMEM_IS_ALLOWED
default y if PPC || X86 || ARM64
help
If this option is disabled, you allow userspace (root) access to all
of memory, including kernel and userspace memory. Accidental
access to this is obviously disastrous, but specific access can
be used by people debugging the kernel. Note that with PAT support
enabled, even in this case there are restrictions on /dev/mem
use due to the cache aliasing requirements.
If this option is switched on, and IO_STRICT_DEVMEM=n, the /dev/mem
file only allows userspace access to PCI space and the BIOS code and
data regions. This is sufficient for dosemu and X and all common
users of /dev/mem.
If in doubt, say Y.
config IO_STRICT_DEVMEM
bool "Filter I/O access to /dev/mem"
depends on STRICT_DEVMEM
help
If this option is disabled, you allow userspace (root) access to all
io-memory regardless of whether a driver is actively using that
range. Accidental access to this is obviously disastrous, but
specific access can be used by people debugging kernel drivers.
If this option is switched on, the /dev/mem file only allows
userspace access to *idle* io-memory ranges (see /proc/iomem) This
may break traditional users of /dev/mem (dosemu, legacy X, etc...)
if the driver using a given range cannot be disabled.
If in doubt, say Y.
menu "$(SRCARCH) Debugging"
source "arch/$(SRCARCH)/Kconfig.debug"
endmenu
menu "Kernel Testing and Coverage"
source "lib/kunit/Kconfig"
config NOTIFIER_ERROR_INJECTION
......@@ -1643,10 +1692,6 @@ config FAULT_INJECTION_STACKTRACE_FILTER
help
Provide stacktrace filter for fault-injection capabilities
endmenu # "Kernel Testing and Coverage"
menu "Kernel Testing and Coverage"
config ARCH_HAS_KCOV
bool
help
......@@ -2130,52 +2175,7 @@ config MEMTEST
memtest=17, mean do 17 test patterns.
If you are unsure how to answer this question, answer N.
source "samples/Kconfig"
config ARCH_HAS_DEVMEM_IS_ALLOWED
bool
config STRICT_DEVMEM
bool "Filter access to /dev/mem"
depends on MMU && DEVMEM
depends on ARCH_HAS_DEVMEM_IS_ALLOWED
default y if PPC || X86 || ARM64
---help---
If this option is disabled, you allow userspace (root) access to all
of memory, including kernel and userspace memory. Accidental
access to this is obviously disastrous, but specific access can
be used by people debugging the kernel. Note that with PAT support
enabled, even in this case there are restrictions on /dev/mem
use due to the cache aliasing requirements.
If this option is switched on, and IO_STRICT_DEVMEM=n, the /dev/mem
file only allows userspace access to PCI space and the BIOS code and
data regions. This is sufficient for dosemu and X and all common
users of /dev/mem.
If in doubt, say Y.
config IO_STRICT_DEVMEM
bool "Filter I/O access to /dev/mem"
depends on STRICT_DEVMEM
---help---
If this option is disabled, you allow userspace (root) access to all
io-memory regardless of whether a driver is actively using that
range. Accidental access to this is obviously disastrous, but
specific access can be used by people debugging kernel drivers.
If this option is switched on, the /dev/mem file only allows
userspace access to *idle* io-memory ranges (see /proc/iomem) This
may break traditional users of /dev/mem (dosemu, legacy X, etc...)
if the driver using a given range cannot be disabled.
If in doubt, say Y.
menu "$(SRCARCH) Debugging"
source "arch/$(SRCARCH)/Kconfig.debug"
endmenu
config HYPERV_TESTING
bool "Microsoft Hyper-V driver testing"
......@@ -2184,4 +2184,6 @@ config HYPERV_TESTING
help
Select this option to enable Hyper-V vmbus testing.
endmenu # "Kernel Testing and Coverage"
endmenu # Kernel hacking
......@@ -778,15 +778,17 @@ static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr,
return 0;
}
int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area)
int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
{
unsigned long shadow_start, shadow_end;
int ret;
shadow_start = (unsigned long)kasan_mem_to_shadow(area->addr);
if (!is_vmalloc_or_module_addr((void *)addr))
return 0;
shadow_start = (unsigned long)kasan_mem_to_shadow((void *)addr);
shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE);
shadow_end = (unsigned long)kasan_mem_to_shadow(area->addr +
area->size);
shadow_end = (unsigned long)kasan_mem_to_shadow((void *)addr + size);
shadow_end = ALIGN(shadow_end, PAGE_SIZE);
ret = apply_to_page_range(&init_mm, shadow_start,
......@@ -797,10 +799,6 @@ int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area)
flush_cache_vmap(shadow_start, shadow_end);
kasan_unpoison_shadow(area->addr, requested_size);
area->flags |= VM_KASAN;
/*
* We need to be careful about inter-cpu effects here. Consider:
*
......@@ -843,12 +841,23 @@ int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area)
* Poison the shadow for a vmalloc region. Called as part of the
* freeing process at the time the region is freed.
*/
void kasan_poison_vmalloc(void *start, unsigned long size)
void kasan_poison_vmalloc(const void *start, unsigned long size)
{
if (!is_vmalloc_or_module_addr(start))
return;
size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
kasan_poison_shadow(start, size, KASAN_VMALLOC_INVALID);
}
void kasan_unpoison_vmalloc(const void *start, unsigned long size)
{
if (!is_vmalloc_or_module_addr(start))
return;
kasan_unpoison_shadow(start, size);
}
static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr,
void *unused)
{
......@@ -948,6 +957,7 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end,
{
void *shadow_start, *shadow_end;
unsigned long region_start, region_end;
unsigned long size;
region_start = ALIGN(start, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
......@@ -970,9 +980,11 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end,
shadow_end = kasan_mem_to_shadow((void *)region_end);
if (shadow_end > shadow_start) {
apply_to_page_range(&init_mm, (unsigned long)shadow_start,
(unsigned long)(shadow_end - shadow_start),
kasan_depopulate_vmalloc_pte, NULL);
size = shadow_end - shadow_start;
apply_to_existing_page_range(&init_mm,
(unsigned long)shadow_start,
size, kasan_depopulate_vmalloc_pte,
NULL);
flush_tlb_kernel_range((unsigned long)shadow_start,
(unsigned long)shadow_end);
}
......
......@@ -2021,26 +2021,34 @@ EXPORT_SYMBOL(vm_iomap_memory);
static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
unsigned long addr, unsigned long end,
pte_fn_t fn, void *data)
pte_fn_t fn, void *data, bool create)
{
pte_t *pte;
int err;
int err = 0;
spinlock_t *uninitialized_var(ptl);
if (create) {
pte = (mm == &init_mm) ?
pte_alloc_kernel(pmd, addr) :
pte_alloc_map_lock(mm, pmd, addr, &ptl);
if (!pte)
return -ENOMEM;
} else {
pte = (mm == &init_mm) ?
pte_offset_kernel(pmd, addr) :
pte_offset_map_lock(mm, pmd, addr, &ptl);
}
BUG_ON(pmd_huge(*pmd));
arch_enter_lazy_mmu_mode();
do {
if (create || !pte_none(*pte)) {
err = fn(pte++, addr, data);
if (err)
break;
}
} while (addr += PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode();
......@@ -2052,77 +2060,95 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
unsigned long addr, unsigned long end,
pte_fn_t fn, void *data)
pte_fn_t fn, void *data, bool create)
{
pmd_t *pmd;
unsigned long next;
int err;
int err = 0;
BUG_ON(pud_huge(*pud));
if (create) {
pmd = pmd_alloc(mm, pud, addr);
if (!pmd)
return -ENOMEM;
} else {
pmd = pmd_offset(pud, addr);
}
do {
next = pmd_addr_end(addr, end);
err = apply_to_pte_range(mm, pmd, addr, next, fn, data);
if (create || !pmd_none_or_clear_bad(pmd)) {
err = apply_to_pte_range(mm, pmd, addr, next, fn, data,
create);
if (err)
break;
}
} while (pmd++, addr = next, addr != end);
return err;
}
static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
unsigned long addr, unsigned long end,
pte_fn_t fn, void *data)
pte_fn_t fn, void *data, bool create)
{
pud_t *pud;
unsigned long next;
int err;
int err = 0;
if (create) {
pud = pud_alloc(mm, p4d, addr);
if (!pud)
return -ENOMEM;
} else {
pud = pud_offset(p4d, addr);
}
do {
next = pud_addr_end(addr, end);
err = apply_to_pmd_range(mm, pud, addr, next, fn, data);
if (create || !pud_none_or_clear_bad(pud)) {
err = apply_to_pmd_range(mm, pud, addr, next, fn, data,
create);
if (err)
break;
}
} while (pud++, addr = next, addr != end);
return err;
}
static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
unsigned long addr, unsigned long end,
pte_fn_t fn, void *data)
pte_fn_t fn, void *data, bool create)
{
p4d_t *p4d;
unsigned long next;
int err;
int err = 0;
if (create) {
p4d = p4d_alloc(mm, pgd, addr);
if (!p4d)
return -ENOMEM;
} else {
p4d = p4d_offset(pgd, addr);
}
do {
next = p4d_addr_end(addr, end);
err = apply_to_pud_range(mm, p4d, addr, next, fn, data);
if (create || !p4d_none_or_clear_bad(p4d)) {
err = apply_to_pud_range(mm, p4d, addr, next, fn, data,
create);
if (err)
break;
}
} while (p4d++, addr = next, addr != end);
return err;
}
/*
* Scan a region of virtual memory, filling in page tables as necessary
* and calling a provided function on each leaf page table.
*/
int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
unsigned long size, pte_fn_t fn, void *data)
static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr,
unsigned long size, pte_fn_t fn,
void *data, bool create)
{
pgd_t *pgd;
unsigned long next;
unsigned long end = addr + size;
int err;
int err = 0;
if (WARN_ON(addr >= end))
return -EINVAL;
......@@ -2130,15 +2156,41 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
pgd = pgd_offset(mm, addr);
do {
next = pgd_addr_end(addr, end);
err = apply_to_p4d_range(mm, pgd, addr, next, fn, data);
if (!create && pgd_none_or_clear_bad(pgd))
continue;
err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create);
if (err)
break;
} while (pgd++, addr = next, addr != end);
return err;
}
/*
* Scan a region of virtual memory, filling in page tables as necessary
* and calling a provided function on each leaf page table.
*/
int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
unsigned long size, pte_fn_t fn, void *data)
{
return __apply_to_page_range(mm, addr, size, fn, data, true);
}
EXPORT_SYMBOL_GPL(apply_to_page_range);
/*
* Scan a region of virtual memory, calling a provided function on
* each leaf page table where it exists.
*
* Unlike apply_to_page_range, this does _not_ fill in page tables
* where they are absent.
*/
int apply_to_existing_page_range(struct mm_struct *mm, unsigned long addr,
unsigned long size, pte_fn_t fn, void *data)
{
return __apply_to_page_range(mm, addr, size, fn, data, false);
}
EXPORT_SYMBOL_GPL(apply_to_existing_page_range);
/*
* handle_pte_fault chooses page fault handler according to an entry which was
* read non-atomically. Before making any commitment, on those architectures
......
......@@ -1061,6 +1061,26 @@ __alloc_vmap_area(unsigned long size, unsigned long align,
return nva_start_addr;
}
/*
* Free a region of KVA allocated by alloc_vmap_area
*/
static void free_vmap_area(struct vmap_area *va)
{
/*
* Remove from the busy tree/list.
*/
spin_lock(&vmap_area_lock);
unlink_va(va, &vmap_area_root);
spin_unlock(&vmap_area_lock);
/*
* Insert/Merge it back to the free tree/list.
*/
spin_lock(&free_vmap_area_lock);
merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list);
spin_unlock(&free_vmap_area_lock);
}
/*
* Allocate a region of KVA of the specified size and alignment, within the
* vstart and vend.
......@@ -1073,6 +1093,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
struct vmap_area *va, *pva;
unsigned long addr;
int purged = 0;
int ret;
BUG_ON(!size);
BUG_ON(offset_in_page(size));
......@@ -1139,6 +1160,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
va->va_end = addr + size;
va->vm = NULL;
spin_lock(&vmap_area_lock);
insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
spin_unlock(&vmap_area_lock);
......@@ -1147,6 +1169,12 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
BUG_ON(va->va_start < vstart);
BUG_ON(va->va_end > vend);
ret = kasan_populate_vmalloc(addr, size);
if (ret) {
free_vmap_area(va);
return ERR_PTR(ret);
}
return va;
overflow:
......@@ -1185,26 +1213,6 @@ int unregister_vmap_purge_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
/*
* Free a region of KVA allocated by alloc_vmap_area
*/
static void free_vmap_area(struct vmap_area *va)
{
/*
* Remove from the busy tree/list.
*/
spin_lock(&vmap_area_lock);
unlink_va(va, &vmap_area_root);
spin_unlock(&vmap_area_lock);
/*
* Insert/Merge it back to the free tree/list.
*/
spin_lock(&free_vmap_area_lock);
merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list);
spin_unlock(&free_vmap_area_lock);
}
/*
* Clear the pagetable entries of a given vmap_area
*/
......@@ -1771,6 +1779,8 @@ void vm_unmap_ram(const void *mem, unsigned int count)
BUG_ON(addr > VMALLOC_END);
BUG_ON(!PAGE_ALIGNED(addr));
kasan_poison_vmalloc(mem, size);
if (likely(count <= VMAP_MAX_ALLOC)) {
debug_check_no_locks_freed(mem, size);
vb_free(mem, size);
......@@ -1821,6 +1831,9 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
addr = va->va_start;
mem = (void *)addr;
}
kasan_unpoison_vmalloc(mem, size);
if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
vm_unmap_ram(mem, count);
return NULL;
......@@ -2075,6 +2088,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
{
struct vmap_area *va;
struct vm_struct *area;
unsigned long requested_size = size;
BUG_ON(in_interrupt());
size = PAGE_ALIGN(size);
......@@ -2098,23 +2112,9 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
return NULL;
}
setup_vmalloc_vm(area, va, flags, caller);
kasan_unpoison_vmalloc((void *)va->va_start, requested_size);
/*
* For KASAN, if we are in vmalloc space, we need to cover the shadow
* area with real memory. If we come here through VM_ALLOC, this is
* done by a higher level function that has access to the true size,
* which might not be a full page.
*
* We assume module space comes via VM_ALLOC path.
*/
if (is_vmalloc_addr(area->addr) && !(area->flags & VM_ALLOC)) {
if (kasan_populate_vmalloc(area->size, area)) {
unmap_vmap_area(va);
kfree(area);
return NULL;
}
}
setup_vmalloc_vm(area, va, flags, caller);
return area;
}
......@@ -2293,7 +2293,6 @@ static void __vunmap(const void *addr, int deallocate_pages)
debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
if (area->flags & VM_KASAN)
kasan_poison_vmalloc(area->addr, area->size);
vm_remove_mappings(area, deallocate_pages);
......@@ -2539,7 +2538,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
if (!size || (size >> PAGE_SHIFT) > totalram_pages())
goto fail;
area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
area = __get_vm_area_node(real_size, align, VM_ALLOC | VM_UNINITIALIZED |
vm_flags, start, end, node, gfp_mask, caller);
if (!area)
goto fail;
......@@ -2548,11 +2547,6 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
if (!addr)
return NULL;
if (is_vmalloc_or_module_addr(area->addr)) {
if (kasan_populate_vmalloc(real_size, area))
return NULL;
}
/*
* In this function, newly allocated vm_struct has VM_UNINITIALIZED
* flag. It means that vm_struct is not fully initialized.
......@@ -3294,7 +3288,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
struct vmap_area **vas, *va;
struct vm_struct **vms;
int area, area2, last_area, term_area;
unsigned long base, start, size, end, last_end;
unsigned long base, start, size, end, last_end, orig_start, orig_end;
bool purged = false;
enum fit_type type;
......@@ -3424,6 +3418,15 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
spin_unlock(&free_vmap_area_lock);
/* populate the kasan shadow space */
for (area = 0; area < nr_vms; area++) {
if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area]))
goto err_free_shadow;
kasan_unpoison_vmalloc((void *)vas[area]->va_start,
sizes[area]);
}
/* insert all vm's */
spin_lock(&vmap_area_lock);
for (area = 0; area < nr_vms; area++) {
......@@ -3434,12 +3437,6 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
}
spin_unlock(&vmap_area_lock);
/* populate the shadow space outside of the lock */
for (area = 0; area < nr_vms; area++) {
/* assume success here */
kasan_populate_vmalloc(sizes[area], vms[area]);
}
kfree(vas);
return vms;
......@@ -3451,8 +3448,12 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
* and when pcpu_get_vm_areas() is success.
*/
while (area--) {
merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
orig_start = vas[area]->va_start;
orig_end = vas[area]->va_end;
va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
&free_vmap_area_list);
kasan_release_vmalloc(orig_start, orig_end,
va->va_start, va->va_end);
vas[area] = NULL;
}
......@@ -3487,6 +3488,28 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
kfree(vas);
kfree(vms);
return NULL;
err_free_shadow:
spin_lock(&free_vmap_area_lock);
/*
* We release all the vmalloc shadows, even the ones for regions that
* hadn't been successfully added. This relies on kasan_release_vmalloc
* being able to tolerate this case.
*/
for (area = 0; area < nr_vms; area++) {
orig_start = vas[area]->va_start;
orig_end = vas[area]->va_end;
va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
&free_vmap_area_list);
kasan_release_vmalloc(orig_start, orig_end,
va->va_start, va->va_end);
vas[area] = NULL;
kfree(vms[area]);
}
spin_unlock(&free_vmap_area_lock);
kfree(vas);
kfree(vms);
return NULL;
}
/**
......
......@@ -387,7 +387,7 @@ void register_shrinker_prepared(struct shrinker *shrinker)
{
down_write(&shrinker_rwsem);
list_add_tail(&shrinker->list, &shrinker_list);
#ifdef CONFIG_MEMCG_KMEM
#ifdef CONFIG_MEMCG
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
idr_replace(&shrinker_idr, shrinker, shrinker->id);
#endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment