Commit 5bc52f64 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge more fixes from Andrew Morton:
 "The usual shower of hotfixes and some followups to the recently merged
  page_owner enhancements"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mm/memory-failure: poison read receives SIGKILL instead of SIGBUS if mmaped more than once
  mm/slab.c: fix kernel-doc warning for __ksize()
  xarray.h: fix kernel-doc warning
  bitmap.h: fix kernel-doc warning and typo
  fs/fs-writeback.c: fix kernel-doc warning
  fs/libfs.c: fix kernel-doc warning
  fs/direct-io.c: fix kernel-doc warning
  mm, compaction: fix wrong pfn handling in __reset_isolation_pfn()
  mm, hugetlb: allow hugepage allocations to reclaim as needed
  lib/test_meminit: add a kmem_cache_alloc_bulk() test
  mm/slub.c: init_on_free=1 should wipe freelist ptr for bulk allocations
  lib/generic-radix-tree.c: add kmemleak annotations
  mm/slub: fix a deadlock in show_slab_objects()
  mm, page_owner: rename flag indicating that page is allocated
  mm, page_owner: decouple freeing stack trace from debug_pagealloc
  mm, page_owner: fix off-by-one error in __set_page_owner_handle()
parents 2abd839a 3d7fed4a
...@@ -41,6 +41,9 @@ smaller binary while the latter is 1.1 - 2 times faster. ...@@ -41,6 +41,9 @@ smaller binary while the latter is 1.1 - 2 times faster.
Both KASAN modes work with both SLUB and SLAB memory allocators. Both KASAN modes work with both SLUB and SLAB memory allocators.
For better bug detection and nicer reporting, enable CONFIG_STACKTRACE. For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
To augment reports with last allocation and freeing stack of the physical page,
it is recommended to enable also CONFIG_PAGE_OWNER and boot with page_owner=on.
To disable instrumentation for specific files or directories, add a line To disable instrumentation for specific files or directories, add a line
similar to the following to the respective kernel Makefile: similar to the following to the respective kernel Makefile:
......
...@@ -241,9 +241,8 @@ void dio_warn_stale_pagecache(struct file *filp) ...@@ -241,9 +241,8 @@ void dio_warn_stale_pagecache(struct file *filp)
} }
} }
/** /*
* dio_complete() - called when all DIO BIO I/O has been completed * dio_complete() - called when all DIO BIO I/O has been completed
* @offset: the byte offset in the file of the completed operation
* *
* This drops i_dio_count, lets interested parties know that a DIO operation * This drops i_dio_count, lets interested parties know that a DIO operation
* has completed, and calculates the resulting return code for the operation. * has completed, and calculates the resulting return code for the operation.
......
...@@ -905,7 +905,7 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, ...@@ -905,7 +905,7 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
* cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
* @bdi_id: target bdi id * @bdi_id: target bdi id
* @memcg_id: target memcg css id * @memcg_id: target memcg css id
* @nr_pages: number of pages to write, 0 for best-effort dirty flushing * @nr: number of pages to write, 0 for best-effort dirty flushing
* @reason: reason why some writeback work initiated * @reason: reason why some writeback work initiated
* @done: target wb_completion * @done: target wb_completion
* *
......
...@@ -473,8 +473,7 @@ EXPORT_SYMBOL(simple_write_begin); ...@@ -473,8 +473,7 @@ EXPORT_SYMBOL(simple_write_begin);
/** /**
* simple_write_end - .write_end helper for non-block-device FSes * simple_write_end - .write_end helper for non-block-device FSes
* @available: See .write_end of address_space_operations * @file: See .write_end of address_space_operations
* @file: "
* @mapping: " * @mapping: "
* @pos: " * @pos: "
* @len: " * @len: "
......
...@@ -326,10 +326,11 @@ static inline int bitmap_equal(const unsigned long *src1, ...@@ -326,10 +326,11 @@ static inline int bitmap_equal(const unsigned long *src1,
} }
/** /**
* bitmap_or_equal - Check whether the or of two bitnaps is equal to a third * bitmap_or_equal - Check whether the or of two bitmaps is equal to a third
* @src1: Pointer to bitmap 1 * @src1: Pointer to bitmap 1
* @src2: Pointer to bitmap 2 will be or'ed with bitmap 1 * @src2: Pointer to bitmap 2 will be or'ed with bitmap 1
* @src3: Pointer to bitmap 3. Compare to the result of *@src1 | *@src2 * @src3: Pointer to bitmap 3. Compare to the result of *@src1 | *@src2
* @nbits: number of bits in each of these bitmaps
* *
* Returns: True if (*@src1 | *@src2) == *@src3, false otherwise * Returns: True if (*@src1 | *@src2) == *@src3, false otherwise
*/ */
......
...@@ -18,7 +18,7 @@ struct page_ext_operations { ...@@ -18,7 +18,7 @@ struct page_ext_operations {
enum page_ext_flags { enum page_ext_flags {
PAGE_EXT_OWNER, PAGE_EXT_OWNER,
PAGE_EXT_OWNER_ACTIVE, PAGE_EXT_OWNER_ALLOCATED,
#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT) #if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
PAGE_EXT_YOUNG, PAGE_EXT_YOUNG,
PAGE_EXT_IDLE, PAGE_EXT_IDLE,
...@@ -36,6 +36,7 @@ struct page_ext { ...@@ -36,6 +36,7 @@ struct page_ext {
unsigned long flags; unsigned long flags;
}; };
extern unsigned long page_ext_size;
extern void pgdat_page_ext_init(struct pglist_data *pgdat); extern void pgdat_page_ext_init(struct pglist_data *pgdat);
#ifdef CONFIG_SPARSEMEM #ifdef CONFIG_SPARSEMEM
...@@ -52,6 +53,13 @@ static inline void page_ext_init(void) ...@@ -52,6 +53,13 @@ static inline void page_ext_init(void)
struct page_ext *lookup_page_ext(const struct page *page); struct page_ext *lookup_page_ext(const struct page *page);
static inline struct page_ext *page_ext_next(struct page_ext *curr)
{
void *next = curr;
next += page_ext_size;
return next;
}
#else /* !CONFIG_PAGE_EXTENSION */ #else /* !CONFIG_PAGE_EXTENSION */
struct page_ext; struct page_ext;
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#include <linux/export.h> #include <linux/export.h>
#include <linux/generic-radix-tree.h> #include <linux/generic-radix-tree.h>
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/kmemleak.h>
#define GENRADIX_ARY (PAGE_SIZE / sizeof(struct genradix_node *)) #define GENRADIX_ARY (PAGE_SIZE / sizeof(struct genradix_node *))
#define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY) #define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY)
...@@ -75,6 +76,27 @@ void *__genradix_ptr(struct __genradix *radix, size_t offset) ...@@ -75,6 +76,27 @@ void *__genradix_ptr(struct __genradix *radix, size_t offset)
} }
EXPORT_SYMBOL(__genradix_ptr); EXPORT_SYMBOL(__genradix_ptr);
static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask)
{
struct genradix_node *node;
node = (struct genradix_node *)__get_free_page(gfp_mask|__GFP_ZERO);
/*
* We're using pages (not slab allocations) directly for kernel data
* structures, so we need to explicitly inform kmemleak of them in order
* to avoid false positive memory leak reports.
*/
kmemleak_alloc(node, PAGE_SIZE, 1, gfp_mask);
return node;
}
static inline void genradix_free_node(struct genradix_node *node)
{
kmemleak_free(node);
free_page((unsigned long)node);
}
/* /*
* Returns pointer to the specified byte @offset within @radix, allocating it if * Returns pointer to the specified byte @offset within @radix, allocating it if
* necessary - newly allocated slots are always zeroed out: * necessary - newly allocated slots are always zeroed out:
...@@ -97,8 +119,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, ...@@ -97,8 +119,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
break; break;
if (!new_node) { if (!new_node) {
new_node = (void *) new_node = genradix_alloc_node(gfp_mask);
__get_free_page(gfp_mask|__GFP_ZERO);
if (!new_node) if (!new_node)
return NULL; return NULL;
} }
...@@ -121,8 +142,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, ...@@ -121,8 +142,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
n = READ_ONCE(*p); n = READ_ONCE(*p);
if (!n) { if (!n) {
if (!new_node) { if (!new_node) {
new_node = (void *) new_node = genradix_alloc_node(gfp_mask);
__get_free_page(gfp_mask|__GFP_ZERO);
if (!new_node) if (!new_node)
return NULL; return NULL;
} }
...@@ -133,7 +153,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, ...@@ -133,7 +153,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
} }
if (new_node) if (new_node)
free_page((unsigned long) new_node); genradix_free_node(new_node);
return &n->data[offset]; return &n->data[offset];
} }
...@@ -191,7 +211,7 @@ static void genradix_free_recurse(struct genradix_node *n, unsigned level) ...@@ -191,7 +211,7 @@ static void genradix_free_recurse(struct genradix_node *n, unsigned level)
genradix_free_recurse(n->children[i], level - 1); genradix_free_recurse(n->children[i], level - 1);
} }
free_page((unsigned long) n); genradix_free_node(n);
} }
int __genradix_prealloc(struct __genradix *radix, size_t size, int __genradix_prealloc(struct __genradix *radix, size_t size,
......
...@@ -297,6 +297,32 @@ static int __init do_kmem_cache_rcu_persistent(int size, int *total_failures) ...@@ -297,6 +297,32 @@ static int __init do_kmem_cache_rcu_persistent(int size, int *total_failures)
return 1; return 1;
} }
static int __init do_kmem_cache_size_bulk(int size, int *total_failures)
{
struct kmem_cache *c;
int i, iter, maxiter = 1024;
int num, bytes;
bool fail = false;
void *objects[10];
c = kmem_cache_create("test_cache", size, size, 0, NULL);
for (iter = 0; (iter < maxiter) && !fail; iter++) {
num = kmem_cache_alloc_bulk(c, GFP_KERNEL, ARRAY_SIZE(objects),
objects);
for (i = 0; i < num; i++) {
bytes = count_nonzero_bytes(objects[i], size);
if (bytes)
fail = true;
fill_with_garbage(objects[i], size);
}
if (num)
kmem_cache_free_bulk(c, num, objects);
}
*total_failures += fail;
return 1;
}
/* /*
* Test kmem_cache allocation by creating caches of different sizes, with and * Test kmem_cache allocation by creating caches of different sizes, with and
* without constructors, with and without SLAB_TYPESAFE_BY_RCU. * without constructors, with and without SLAB_TYPESAFE_BY_RCU.
...@@ -318,6 +344,7 @@ static int __init test_kmemcache(int *total_failures) ...@@ -318,6 +344,7 @@ static int __init test_kmemcache(int *total_failures)
num_tests += do_kmem_cache_size(size, ctor, rcu, zero, num_tests += do_kmem_cache_size(size, ctor, rcu, zero,
&failures); &failures);
} }
num_tests += do_kmem_cache_size_bulk(size, &failures);
} }
REPORT_FAILURES_IN_FN(); REPORT_FAILURES_IN_FN();
*total_failures += failures; *total_failures += failures;
......
...@@ -270,14 +270,15 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source, ...@@ -270,14 +270,15 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
/* Ensure the start of the pageblock or zone is online and valid */ /* Ensure the start of the pageblock or zone is online and valid */
block_pfn = pageblock_start_pfn(pfn); block_pfn = pageblock_start_pfn(pfn);
block_page = pfn_to_online_page(max(block_pfn, zone->zone_start_pfn)); block_pfn = max(block_pfn, zone->zone_start_pfn);
block_page = pfn_to_online_page(block_pfn);
if (block_page) { if (block_page) {
page = block_page; page = block_page;
pfn = block_pfn; pfn = block_pfn;
} }
/* Ensure the end of the pageblock or zone is online and valid */ /* Ensure the end of the pageblock or zone is online and valid */
block_pfn += pageblock_nr_pages; block_pfn = pageblock_end_pfn(pfn) - 1;
block_pfn = min(block_pfn, zone_end_pfn(zone) - 1); block_pfn = min(block_pfn, zone_end_pfn(zone) - 1);
end_page = pfn_to_online_page(block_pfn); end_page = pfn_to_online_page(block_pfn);
if (!end_page) if (!end_page)
...@@ -303,7 +304,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source, ...@@ -303,7 +304,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
page += (1 << PAGE_ALLOC_COSTLY_ORDER); page += (1 << PAGE_ALLOC_COSTLY_ORDER);
pfn += (1 << PAGE_ALLOC_COSTLY_ORDER); pfn += (1 << PAGE_ALLOC_COSTLY_ORDER);
} while (page < end_page); } while (page <= end_page);
return false; return false;
} }
......
...@@ -199,7 +199,6 @@ struct to_kill { ...@@ -199,7 +199,6 @@ struct to_kill {
struct task_struct *tsk; struct task_struct *tsk;
unsigned long addr; unsigned long addr;
short size_shift; short size_shift;
char addr_valid;
}; };
/* /*
...@@ -324,22 +323,27 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, ...@@ -324,22 +323,27 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
} }
} }
tk->addr = page_address_in_vma(p, vma); tk->addr = page_address_in_vma(p, vma);
tk->addr_valid = 1;
if (is_zone_device_page(p)) if (is_zone_device_page(p))
tk->size_shift = dev_pagemap_mapping_shift(p, vma); tk->size_shift = dev_pagemap_mapping_shift(p, vma);
else else
tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT; tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;
/* /*
* In theory we don't have to kill when the page was * Send SIGKILL if "tk->addr == -EFAULT". Also, as
* munmaped. But it could be also a mremap. Since that's * "tk->size_shift" is always non-zero for !is_zone_device_page(),
* likely very rare kill anyways just out of paranoia, but use * so "tk->size_shift == 0" effectively checks no mapping on
* a SIGKILL because the error is not contained anymore. * ZONE_DEVICE. Indeed, when a devdax page is mmapped N times
*/ * to a process' address space, it's possible not all N VMAs
if (tk->addr == -EFAULT || tk->size_shift == 0) { * contain mappings for the page, but at least one VMA does.
* Only deliver SIGBUS with payload derived from the VMA that
* has a mapping for the page.
*/
if (tk->addr == -EFAULT) {
pr_info("Memory failure: Unable to find user space address %lx in %s\n", pr_info("Memory failure: Unable to find user space address %lx in %s\n",
page_to_pfn(p), tsk->comm); page_to_pfn(p), tsk->comm);
tk->addr_valid = 0; } else if (tk->size_shift == 0) {
kfree(tk);
return;
} }
get_task_struct(tsk); get_task_struct(tsk);
tk->tsk = tsk; tk->tsk = tsk;
...@@ -366,7 +370,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail, ...@@ -366,7 +370,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
* make sure the process doesn't catch the * make sure the process doesn't catch the
* signal and then access the memory. Just kill it. * signal and then access the memory. Just kill it.
*/ */
if (fail || tk->addr_valid == 0) { if (fail || tk->addr == -EFAULT) {
pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
pfn, tk->tsk->comm, tk->tsk->pid); pfn, tk->tsk->comm, tk->tsk->pid);
do_send_sig_info(SIGKILL, SEND_SIG_PRIV, do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
......
...@@ -4473,12 +4473,14 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, ...@@ -4473,12 +4473,14 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
if (page) if (page)
goto got_pg; goto got_pg;
if (order >= pageblock_order && (gfp_mask & __GFP_IO)) { if (order >= pageblock_order && (gfp_mask & __GFP_IO) &&
!(gfp_mask & __GFP_RETRY_MAYFAIL)) {
/* /*
* If allocating entire pageblock(s) and compaction * If allocating entire pageblock(s) and compaction
* failed because all zones are below low watermarks * failed because all zones are below low watermarks
* or is prohibited because it recently failed at this * or is prohibited because it recently failed at this
* order, fail immediately. * order, fail immediately unless the allocator has
* requested compaction and reclaim retry.
* *
* Reclaim is * Reclaim is
* - potentially very expensive because zones are far * - potentially very expensive because zones are far
......
...@@ -67,8 +67,9 @@ static struct page_ext_operations *page_ext_ops[] = { ...@@ -67,8 +67,9 @@ static struct page_ext_operations *page_ext_ops[] = {
#endif #endif
}; };
unsigned long page_ext_size = sizeof(struct page_ext);
static unsigned long total_usage; static unsigned long total_usage;
static unsigned long extra_mem;
static bool __init invoke_need_callbacks(void) static bool __init invoke_need_callbacks(void)
{ {
...@@ -78,9 +79,8 @@ static bool __init invoke_need_callbacks(void) ...@@ -78,9 +79,8 @@ static bool __init invoke_need_callbacks(void)
for (i = 0; i < entries; i++) { for (i = 0; i < entries; i++) {
if (page_ext_ops[i]->need && page_ext_ops[i]->need()) { if (page_ext_ops[i]->need && page_ext_ops[i]->need()) {
page_ext_ops[i]->offset = sizeof(struct page_ext) + page_ext_ops[i]->offset = page_ext_size;
extra_mem; page_ext_size += page_ext_ops[i]->size;
extra_mem += page_ext_ops[i]->size;
need = true; need = true;
} }
} }
...@@ -99,14 +99,9 @@ static void __init invoke_init_callbacks(void) ...@@ -99,14 +99,9 @@ static void __init invoke_init_callbacks(void)
} }
} }
static unsigned long get_entry_size(void)
{
return sizeof(struct page_ext) + extra_mem;
}
static inline struct page_ext *get_entry(void *base, unsigned long index) static inline struct page_ext *get_entry(void *base, unsigned long index)
{ {
return base + get_entry_size() * index; return base + page_ext_size * index;
} }
#if !defined(CONFIG_SPARSEMEM) #if !defined(CONFIG_SPARSEMEM)
...@@ -156,7 +151,7 @@ static int __init alloc_node_page_ext(int nid) ...@@ -156,7 +151,7 @@ static int __init alloc_node_page_ext(int nid)
!IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES)) !IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES))
nr_pages += MAX_ORDER_NR_PAGES; nr_pages += MAX_ORDER_NR_PAGES;
table_size = get_entry_size() * nr_pages; table_size = page_ext_size * nr_pages;
base = memblock_alloc_try_nid( base = memblock_alloc_try_nid(
table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
...@@ -234,7 +229,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid) ...@@ -234,7 +229,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid)
if (section->page_ext) if (section->page_ext)
return 0; return 0;
table_size = get_entry_size() * PAGES_PER_SECTION; table_size = page_ext_size * PAGES_PER_SECTION;
base = alloc_page_ext(table_size, nid); base = alloc_page_ext(table_size, nid);
/* /*
...@@ -254,7 +249,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid) ...@@ -254,7 +249,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid)
* we need to apply a mask. * we need to apply a mask.
*/ */
pfn &= PAGE_SECTION_MASK; pfn &= PAGE_SECTION_MASK;
section->page_ext = (void *)base - get_entry_size() * pfn; section->page_ext = (void *)base - page_ext_size * pfn;
total_usage += table_size; total_usage += table_size;
return 0; return 0;
} }
...@@ -267,7 +262,7 @@ static void free_page_ext(void *addr) ...@@ -267,7 +262,7 @@ static void free_page_ext(void *addr)
struct page *page = virt_to_page(addr); struct page *page = virt_to_page(addr);
size_t table_size; size_t table_size;
table_size = get_entry_size() * PAGES_PER_SECTION; table_size = page_ext_size * PAGES_PER_SECTION;
BUG_ON(PageReserved(page)); BUG_ON(PageReserved(page));
kmemleak_free(addr); kmemleak_free(addr);
......
...@@ -24,12 +24,10 @@ struct page_owner { ...@@ -24,12 +24,10 @@ struct page_owner {
short last_migrate_reason; short last_migrate_reason;
gfp_t gfp_mask; gfp_t gfp_mask;
depot_stack_handle_t handle; depot_stack_handle_t handle;
#ifdef CONFIG_DEBUG_PAGEALLOC
depot_stack_handle_t free_handle; depot_stack_handle_t free_handle;
#endif
}; };
static bool page_owner_disabled = true; static bool page_owner_enabled = false;
DEFINE_STATIC_KEY_FALSE(page_owner_inited); DEFINE_STATIC_KEY_FALSE(page_owner_inited);
static depot_stack_handle_t dummy_handle; static depot_stack_handle_t dummy_handle;
...@@ -44,7 +42,7 @@ static int __init early_page_owner_param(char *buf) ...@@ -44,7 +42,7 @@ static int __init early_page_owner_param(char *buf)
return -EINVAL; return -EINVAL;
if (strcmp(buf, "on") == 0) if (strcmp(buf, "on") == 0)
page_owner_disabled = false; page_owner_enabled = true;
return 0; return 0;
} }
...@@ -52,10 +50,7 @@ early_param("page_owner", early_page_owner_param); ...@@ -52,10 +50,7 @@ early_param("page_owner", early_page_owner_param);
static bool need_page_owner(void) static bool need_page_owner(void)
{ {
if (page_owner_disabled) return page_owner_enabled;
return false;
return true;
} }
static __always_inline depot_stack_handle_t create_dummy_stack(void) static __always_inline depot_stack_handle_t create_dummy_stack(void)
...@@ -84,7 +79,7 @@ static noinline void register_early_stack(void) ...@@ -84,7 +79,7 @@ static noinline void register_early_stack(void)
static void init_page_owner(void) static void init_page_owner(void)
{ {
if (page_owner_disabled) if (!page_owner_enabled)
return; return;
register_dummy_stack(); register_dummy_stack();
...@@ -148,25 +143,19 @@ void __reset_page_owner(struct page *page, unsigned int order) ...@@ -148,25 +143,19 @@ void __reset_page_owner(struct page *page, unsigned int order)
{ {
int i; int i;
struct page_ext *page_ext; struct page_ext *page_ext;
#ifdef CONFIG_DEBUG_PAGEALLOC
depot_stack_handle_t handle = 0; depot_stack_handle_t handle = 0;
struct page_owner *page_owner; struct page_owner *page_owner;
if (debug_pagealloc_enabled())
handle = save_stack(GFP_NOWAIT | __GFP_NOWARN); handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
#endif
for (i = 0; i < (1 << order); i++) { page_ext = lookup_page_ext(page);
page_ext = lookup_page_ext(page + i);
if (unlikely(!page_ext)) if (unlikely(!page_ext))
continue; return;
__clear_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags); for (i = 0; i < (1 << order); i++) {
#ifdef CONFIG_DEBUG_PAGEALLOC __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
if (debug_pagealloc_enabled()) {
page_owner = get_page_owner(page_ext); page_owner = get_page_owner(page_ext);
page_owner->free_handle = handle; page_owner->free_handle = handle;
} page_ext = page_ext_next(page_ext);
#endif
} }
} }
...@@ -184,9 +173,9 @@ static inline void __set_page_owner_handle(struct page *page, ...@@ -184,9 +173,9 @@ static inline void __set_page_owner_handle(struct page *page,
page_owner->gfp_mask = gfp_mask; page_owner->gfp_mask = gfp_mask;
page_owner->last_migrate_reason = -1; page_owner->last_migrate_reason = -1;
__set_bit(PAGE_EXT_OWNER, &page_ext->flags); __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
__set_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags); __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
page_ext = lookup_page_ext(page + i); page_ext = page_ext_next(page_ext);
} }
} }
...@@ -224,12 +213,10 @@ void __split_page_owner(struct page *page, unsigned int order) ...@@ -224,12 +213,10 @@ void __split_page_owner(struct page *page, unsigned int order)
if (unlikely(!page_ext)) if (unlikely(!page_ext))
return; return;
for (i = 0; i < (1 << order); i++) {
page_owner = get_page_owner(page_ext); page_owner = get_page_owner(page_ext);
page_owner->order = 0; page_owner->order = 0;
for (i = 1; i < (1 << order); i++) { page_ext = page_ext_next(page_ext);
page_ext = lookup_page_ext(page + i);
page_owner = get_page_owner(page_ext);
page_owner->order = 0;
} }
} }
...@@ -260,7 +247,7 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) ...@@ -260,7 +247,7 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
* the new page, which will be freed. * the new page, which will be freed.
*/ */
__set_bit(PAGE_EXT_OWNER, &new_ext->flags); __set_bit(PAGE_EXT_OWNER, &new_ext->flags);
__set_bit(PAGE_EXT_OWNER_ACTIVE, &new_ext->flags); __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
} }
void pagetypeinfo_showmixedcount_print(struct seq_file *m, void pagetypeinfo_showmixedcount_print(struct seq_file *m,
...@@ -320,7 +307,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m, ...@@ -320,7 +307,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
if (unlikely(!page_ext)) if (unlikely(!page_ext))
continue; continue;
if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags)) if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
continue; continue;
page_owner = get_page_owner(page_ext); page_owner = get_page_owner(page_ext);
...@@ -435,7 +422,7 @@ void __dump_page_owner(struct page *page) ...@@ -435,7 +422,7 @@ void __dump_page_owner(struct page *page)
return; return;
} }
if (test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags)) if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
pr_alert("page_owner tracks the page as allocated\n"); pr_alert("page_owner tracks the page as allocated\n");
else else
pr_alert("page_owner tracks the page as freed\n"); pr_alert("page_owner tracks the page as freed\n");
...@@ -451,7 +438,6 @@ void __dump_page_owner(struct page *page) ...@@ -451,7 +438,6 @@ void __dump_page_owner(struct page *page)
stack_trace_print(entries, nr_entries, 0); stack_trace_print(entries, nr_entries, 0);
} }
#ifdef CONFIG_DEBUG_PAGEALLOC
handle = READ_ONCE(page_owner->free_handle); handle = READ_ONCE(page_owner->free_handle);
if (!handle) { if (!handle) {
pr_alert("page_owner free stack trace missing\n"); pr_alert("page_owner free stack trace missing\n");
...@@ -460,7 +446,6 @@ void __dump_page_owner(struct page *page) ...@@ -460,7 +446,6 @@ void __dump_page_owner(struct page *page)
pr_alert("page last free stack trace:\n"); pr_alert("page last free stack trace:\n");
stack_trace_print(entries, nr_entries, 0); stack_trace_print(entries, nr_entries, 0);
} }
#endif
if (page_owner->last_migrate_reason != -1) if (page_owner->last_migrate_reason != -1)
pr_alert("page has been migrated, last migrate reason: %s\n", pr_alert("page has been migrated, last migrate reason: %s\n",
...@@ -527,7 +512,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) ...@@ -527,7 +512,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
* Although we do have the info about past allocation of free * Although we do have the info about past allocation of free
* pages, it's not relevant for current memory usage. * pages, it's not relevant for current memory usage.
*/ */
if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags)) if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
continue; continue;
page_owner = get_page_owner(page_ext); page_owner = get_page_owner(page_ext);
......
...@@ -4206,9 +4206,12 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, ...@@ -4206,9 +4206,12 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
/** /**
* __ksize -- Uninstrumented ksize. * __ksize -- Uninstrumented ksize.
* @objp: pointer to the object
* *
* Unlike ksize(), __ksize() is uninstrumented, and does not provide the same * Unlike ksize(), __ksize() is uninstrumented, and does not provide the same
* safety checks as ksize() with KASAN instrumentation enabled. * safety checks as ksize() with KASAN instrumentation enabled.
*
* Return: size of the actual memory used by @objp in bytes
*/ */
size_t __ksize(const void *objp) size_t __ksize(const void *objp)
{ {
......
...@@ -2671,6 +2671,17 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, ...@@ -2671,6 +2671,17 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
return p; return p;
} }
/*
* If the object has been wiped upon free, make sure it's fully initialized by
* zeroing out freelist pointer.
*/
static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
void *obj)
{
if (unlikely(slab_want_init_on_free(s)) && obj)
memset((void *)((char *)obj + s->offset), 0, sizeof(void *));
}
/* /*
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
* have the fastpath folded into their functions. So no function call * have the fastpath folded into their functions. So no function call
...@@ -2759,12 +2770,8 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, ...@@ -2759,12 +2770,8 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
prefetch_freepointer(s, next_object); prefetch_freepointer(s, next_object);
stat(s, ALLOC_FASTPATH); stat(s, ALLOC_FASTPATH);
} }
/*
* If the object has been wiped upon free, make sure it's fully maybe_wipe_obj_freeptr(s, object);
* initialized by zeroing out freelist pointer.
*/
if (unlikely(slab_want_init_on_free(s)) && object)
memset(object + s->offset, 0, sizeof(void *));
if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
memset(object, 0, s->object_size); memset(object, 0, s->object_size);
...@@ -3178,10 +3185,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, ...@@ -3178,10 +3185,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
goto error; goto error;
c = this_cpu_ptr(s->cpu_slab); c = this_cpu_ptr(s->cpu_slab);
maybe_wipe_obj_freeptr(s, p[i]);
continue; /* goto for-loop */ continue; /* goto for-loop */
} }
c->freelist = get_freepointer(s, object); c->freelist = get_freepointer(s, object);
p[i] = object; p[i] = object;
maybe_wipe_obj_freeptr(s, p[i]);
} }
c->tid = next_tid(c->tid); c->tid = next_tid(c->tid);
local_irq_enable(); local_irq_enable();
...@@ -4846,7 +4856,17 @@ static ssize_t show_slab_objects(struct kmem_cache *s, ...@@ -4846,7 +4856,17 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
} }
} }
get_online_mems(); /*
* It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex"
* already held which will conflict with an existing lock order:
*
* mem_hotplug_lock->slab_mutex->kernfs_mutex
*
* We don't really need mem_hotplug_lock (to hold off
* slab_mem_going_offline_callback) here because slab's memory hot
* unplug code doesn't destroy the kmem_cache->node[] data.
*/
#ifdef CONFIG_SLUB_DEBUG #ifdef CONFIG_SLUB_DEBUG
if (flags & SO_ALL) { if (flags & SO_ALL) {
struct kmem_cache_node *n; struct kmem_cache_node *n;
...@@ -4887,7 +4907,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s, ...@@ -4887,7 +4907,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
x += sprintf(buf + x, " N%d=%lu", x += sprintf(buf + x, " N%d=%lu",
node, nodes[node]); node, nodes[node]);
#endif #endif
put_online_mems();
kfree(nodes); kfree(nodes);
return x + sprintf(buf + x, "\n"); return x + sprintf(buf + x, "\n");
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment