Commit 0f47c942 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux

Pull slab changes from Pekka Enberg:
 "The bulk of the changes are more slab unification from Christoph.

  There's also few fixes from Aaron, Glauber, and Joonsoo thrown into
  the mix."

* 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux: (24 commits)
  mm, slab_common: Fix bootstrap creation of kmalloc caches
  slab: Return NULL for oversized allocations
  mm: slab: Verify the nodeid passed to ____cache_alloc_node
  slub: tid must be retrieved from the percpu area of the current processor
  slub: Do not dereference NULL pointer in node_match
  slub: add 'likely' macro to inc_slabs_node()
  slub: correct to calculate num of acquired objects in get_partial_node()
  slub: correctly bootstrap boot caches
  mm/sl[au]b: correct allocation type check in kmalloc_slab()
  slab: Fixup CONFIG_PAGE_ALLOC/DEBUG_SLAB_LEAK sections
  slab: Handle ARCH_DMA_MINALIGN correctly
  slab: Common definition for kmem_cache_node
  slab: Rename list3/l3 to node
  slab: Common Kmalloc cache determination
  stat: Use size_t for sizes instead of unsigned
  slab: Common function to create the kmalloc array
  slab: Common definition for the array of kmalloc caches
  slab: Common constants for kmalloc boundaries
  slab: Rename nodelists to node
  slab: Common name for the per node structures
  ...
parents b9e306e0 69df2ac1
...@@ -184,7 +184,7 @@ static int show_stat(struct seq_file *p, void *v) ...@@ -184,7 +184,7 @@ static int show_stat(struct seq_file *p, void *v)
static int stat_open(struct inode *inode, struct file *file) static int stat_open(struct inode *inode, struct file *file)
{ {
unsigned size = 1024 + 128 * num_possible_cpus(); size_t size = 1024 + 128 * num_possible_cpus();
char *buf; char *buf;
struct seq_file *m; struct seq_file *m;
int res; int res;
......
#if (PAGE_SIZE == 4096)
CACHE(32)
#endif
CACHE(64)
#if L1_CACHE_BYTES < 64
CACHE(96)
#endif
CACHE(128)
#if L1_CACHE_BYTES < 128
CACHE(192)
#endif
CACHE(256)
CACHE(512)
CACHE(1024)
CACHE(2048)
CACHE(4096)
CACHE(8192)
CACHE(16384)
CACHE(32768)
CACHE(65536)
CACHE(131072)
#if KMALLOC_MAX_SIZE >= 262144
CACHE(262144)
#endif
#if KMALLOC_MAX_SIZE >= 524288
CACHE(524288)
#endif
#if KMALLOC_MAX_SIZE >= 1048576
CACHE(1048576)
#endif
#if KMALLOC_MAX_SIZE >= 2097152
CACHE(2097152)
#endif
#if KMALLOC_MAX_SIZE >= 4194304
CACHE(4194304)
#endif
#if KMALLOC_MAX_SIZE >= 8388608
CACHE(8388608)
#endif
#if KMALLOC_MAX_SIZE >= 16777216
CACHE(16777216)
#endif
#if KMALLOC_MAX_SIZE >= 33554432
CACHE(33554432)
#endif
...@@ -94,29 +94,6 @@ ...@@ -94,29 +94,6 @@
#define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \ #define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \
(unsigned long)ZERO_SIZE_PTR) (unsigned long)ZERO_SIZE_PTR)
/*
* Common fields provided in kmem_cache by all slab allocators
* This struct is either used directly by the allocator (SLOB)
* or the allocator must include definitions for all fields
* provided in kmem_cache_common in their definition of kmem_cache.
*
* Once we can do anonymous structs (C11 standard) we could put a
* anonymous struct definition in these allocators so that the
* separate allocations in the kmem_cache structure of SLAB and
* SLUB is no longer needed.
*/
#ifdef CONFIG_SLOB
struct kmem_cache {
unsigned int object_size;/* The original size of the object */
unsigned int size; /* The aligned/padded/added on size */
unsigned int align; /* Alignment as calculated */
unsigned long flags; /* Active flags on the slab */
const char *name; /* Slab name for sysfs */
int refcount; /* Use counter */
void (*ctor)(void *); /* Called on object slot creation */
struct list_head list; /* List of all slab caches on the system */
};
#endif
struct mem_cgroup; struct mem_cgroup;
/* /*
...@@ -148,7 +125,63 @@ void kmem_cache_free(struct kmem_cache *, void *); ...@@ -148,7 +125,63 @@ void kmem_cache_free(struct kmem_cache *, void *);
(__flags), NULL) (__flags), NULL)
/* /*
* The largest kmalloc size supported by the slab allocators is * Common kmalloc functions provided by all allocators
*/
void * __must_check __krealloc(const void *, size_t, gfp_t);
void * __must_check krealloc(const void *, size_t, gfp_t);
void kfree(const void *);
void kzfree(const void *);
size_t ksize(const void *);
/*
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
* alignment larger than the alignment of a 64-bit integer.
* Setting ARCH_KMALLOC_MINALIGN in arch headers allows that.
*/
#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8
#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN
#define KMALLOC_SHIFT_LOW ilog2(ARCH_DMA_MINALIGN)
#else
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
#endif
#ifdef CONFIG_SLOB
/*
* Common fields provided in kmem_cache by all slab allocators
* This struct is either used directly by the allocator (SLOB)
* or the allocator must include definitions for all fields
* provided in kmem_cache_common in their definition of kmem_cache.
*
* Once we can do anonymous structs (C11 standard) we could put a
* anonymous struct definition in these allocators so that the
* separate allocations in the kmem_cache structure of SLAB and
* SLUB is no longer needed.
*/
struct kmem_cache {
unsigned int object_size;/* The original size of the object */
unsigned int size; /* The aligned/padded/added on size */
unsigned int align; /* Alignment as calculated */
unsigned long flags; /* Active flags on the slab */
const char *name; /* Slab name for sysfs */
int refcount; /* Use counter */
void (*ctor)(void *); /* Called on object slot creation */
struct list_head list; /* List of all slab caches on the system */
};
#define KMALLOC_MAX_SIZE (1UL << 30)
#include <linux/slob_def.h>
#else /* CONFIG_SLOB */
/*
* Kmalloc array related definitions
*/
#ifdef CONFIG_SLAB
/*
* The largest kmalloc size supported by the SLAB allocators is
* 32 megabyte (2^25) or the maximum allocatable page order if that is * 32 megabyte (2^25) or the maximum allocatable page order if that is
* less than 32 MB. * less than 32 MB.
* *
...@@ -158,21 +191,119 @@ void kmem_cache_free(struct kmem_cache *, void *); ...@@ -158,21 +191,119 @@ void kmem_cache_free(struct kmem_cache *, void *);
*/ */
#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \ #define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
(MAX_ORDER + PAGE_SHIFT - 1) : 25) (MAX_ORDER + PAGE_SHIFT - 1) : 25)
#define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW 5
#endif
#else
/*
* SLUB allocates up to order 2 pages directly and otherwise
* passes the request to the page allocator.
*/
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT)
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW 3
#endif
#endif
#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_HIGH) /* Maximum allocatable size */
#define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_HIGH - PAGE_SHIFT) #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX)
/* Maximum size for which we actually use a slab cache */
#define KMALLOC_MAX_CACHE_SIZE (1UL << KMALLOC_SHIFT_HIGH)
/* Maximum order allocatable via the slab allocagtor */
#define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_MAX - PAGE_SHIFT)
/* /*
* Some archs want to perform DMA into kmalloc caches and need a guaranteed * Kmalloc subsystem.
* alignment larger than the alignment of a 64-bit integer.
* Setting ARCH_KMALLOC_MINALIGN in arch headers allows that.
*/ */
#ifdef ARCH_DMA_MINALIGN #ifndef KMALLOC_MIN_SIZE
#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN #define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW)
#endif
extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
#ifdef CONFIG_ZONE_DMA
extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
#endif
/*
* Figure out which kmalloc slab an allocation of a certain size
* belongs to.
* 0 = zero alloc
* 1 = 65 .. 96 bytes
* 2 = 120 .. 192 bytes
* n = 2^(n-1) .. 2^n -1
*/
static __always_inline int kmalloc_index(size_t size)
{
if (!size)
return 0;
if (size <= KMALLOC_MIN_SIZE)
return KMALLOC_SHIFT_LOW;
if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96)
return 1;
if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192)
return 2;
if (size <= 8) return 3;
if (size <= 16) return 4;
if (size <= 32) return 5;
if (size <= 64) return 6;
if (size <= 128) return 7;
if (size <= 256) return 8;
if (size <= 512) return 9;
if (size <= 1024) return 10;
if (size <= 2 * 1024) return 11;
if (size <= 4 * 1024) return 12;
if (size <= 8 * 1024) return 13;
if (size <= 16 * 1024) return 14;
if (size <= 32 * 1024) return 15;
if (size <= 64 * 1024) return 16;
if (size <= 128 * 1024) return 17;
if (size <= 256 * 1024) return 18;
if (size <= 512 * 1024) return 19;
if (size <= 1024 * 1024) return 20;
if (size <= 2 * 1024 * 1024) return 21;
if (size <= 4 * 1024 * 1024) return 22;
if (size <= 8 * 1024 * 1024) return 23;
if (size <= 16 * 1024 * 1024) return 24;
if (size <= 32 * 1024 * 1024) return 25;
if (size <= 64 * 1024 * 1024) return 26;
BUG();
/* Will never be reached. Needed because the compiler may complain */
return -1;
}
#ifdef CONFIG_SLAB
#include <linux/slab_def.h>
#elif defined(CONFIG_SLUB)
#include <linux/slub_def.h>
#else #else
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) #error "Unknown slab allocator"
#endif #endif
/*
* Determine size used for the nth kmalloc cache.
* return size or 0 if a kmalloc cache for that
* size does not exist
*/
static __always_inline int kmalloc_size(int n)
{
if (n > 2)
return 1 << n;
if (n == 1 && KMALLOC_MIN_SIZE <= 32)
return 96;
if (n == 2 && KMALLOC_MIN_SIZE <= 64)
return 192;
return 0;
}
#endif /* !CONFIG_SLOB */
/* /*
* Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
* Intended for arches that get misalignment faults even for 64 bit integer * Intended for arches that get misalignment faults even for 64 bit integer
...@@ -224,42 +355,6 @@ struct seq_file; ...@@ -224,42 +355,6 @@ struct seq_file;
int cache_show(struct kmem_cache *s, struct seq_file *m); int cache_show(struct kmem_cache *s, struct seq_file *m);
void print_slabinfo_header(struct seq_file *m); void print_slabinfo_header(struct seq_file *m);
/*
* Common kmalloc functions provided by all allocators
*/
void * __must_check __krealloc(const void *, size_t, gfp_t);
void * __must_check krealloc(const void *, size_t, gfp_t);
void kfree(const void *);
void kzfree(const void *);
size_t ksize(const void *);
/*
* Allocator specific definitions. These are mainly used to establish optimized
* ways to convert kmalloc() calls to kmem_cache_alloc() invocations by
* selecting the appropriate general cache at compile time.
*
* Allocators must define at least:
*
* kmem_cache_alloc()
* __kmalloc()
* kmalloc()
*
* Those wishing to support NUMA must also define:
*
* kmem_cache_alloc_node()
* kmalloc_node()
*
* See each allocator definition file for additional comments and
* implementation notes.
*/
#ifdef CONFIG_SLUB
#include <linux/slub_def.h>
#elif defined(CONFIG_SLOB)
#include <linux/slob_def.h>
#else
#include <linux/slab_def.h>
#endif
/** /**
* kmalloc_array - allocate memory for an array. * kmalloc_array - allocate memory for an array.
* @n: number of elements. * @n: number of elements.
......
...@@ -11,8 +11,6 @@ ...@@ -11,8 +11,6 @@
*/ */
#include <linux/init.h> #include <linux/init.h>
#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */
#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */
#include <linux/compiler.h> #include <linux/compiler.h>
/* /*
...@@ -97,23 +95,13 @@ struct kmem_cache { ...@@ -97,23 +95,13 @@ struct kmem_cache {
* pointer for each node since "nodelists" uses the remainder of * pointer for each node since "nodelists" uses the remainder of
* available pointers. * available pointers.
*/ */
struct kmem_list3 **nodelists; struct kmem_cache_node **node;
struct array_cache *array[NR_CPUS + MAX_NUMNODES]; struct array_cache *array[NR_CPUS + MAX_NUMNODES];
/* /*
* Do not add fields after array[] * Do not add fields after array[]
*/ */
}; };
/* Size description struct for general caches. */
struct cache_sizes {
size_t cs_size;
struct kmem_cache *cs_cachep;
#ifdef CONFIG_ZONE_DMA
struct kmem_cache *cs_dmacachep;
#endif
};
extern struct cache_sizes malloc_sizes[];
void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
void *__kmalloc(size_t size, gfp_t flags); void *__kmalloc(size_t size, gfp_t flags);
...@@ -133,26 +121,22 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) ...@@ -133,26 +121,22 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
void *ret; void *ret;
if (__builtin_constant_p(size)) { if (__builtin_constant_p(size)) {
int i = 0; int i;
if (!size) if (!size)
return ZERO_SIZE_PTR; return ZERO_SIZE_PTR;
#define CACHE(x) \ if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE))
if (size <= x) \ return NULL;
goto found; \
else \ i = kmalloc_index(size);
i++;
#include <linux/kmalloc_sizes.h>
#undef CACHE
return NULL;
found:
#ifdef CONFIG_ZONE_DMA #ifdef CONFIG_ZONE_DMA
if (flags & GFP_DMA) if (flags & GFP_DMA)
cachep = malloc_sizes[i].cs_dmacachep; cachep = kmalloc_dma_caches[i];
else else
#endif #endif
cachep = malloc_sizes[i].cs_cachep; cachep = kmalloc_caches[i];
ret = kmem_cache_alloc_trace(cachep, flags, size); ret = kmem_cache_alloc_trace(cachep, flags, size);
...@@ -186,26 +170,22 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) ...@@ -186,26 +170,22 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
struct kmem_cache *cachep; struct kmem_cache *cachep;
if (__builtin_constant_p(size)) { if (__builtin_constant_p(size)) {
int i = 0; int i;
if (!size) if (!size)
return ZERO_SIZE_PTR; return ZERO_SIZE_PTR;
#define CACHE(x) \ if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE))
if (size <= x) \ return NULL;
goto found; \
else \ i = kmalloc_index(size);
i++;
#include <linux/kmalloc_sizes.h>
#undef CACHE
return NULL;
found:
#ifdef CONFIG_ZONE_DMA #ifdef CONFIG_ZONE_DMA
if (flags & GFP_DMA) if (flags & GFP_DMA)
cachep = malloc_sizes[i].cs_dmacachep; cachep = kmalloc_dma_caches[i];
else else
#endif #endif
cachep = malloc_sizes[i].cs_cachep; cachep = kmalloc_caches[i];
return kmem_cache_alloc_node_trace(cachep, flags, node, size); return kmem_cache_alloc_node_trace(cachep, flags, node, size);
} }
......
...@@ -53,17 +53,6 @@ struct kmem_cache_cpu { ...@@ -53,17 +53,6 @@ struct kmem_cache_cpu {
#endif #endif
}; };
struct kmem_cache_node {
spinlock_t list_lock; /* Protect partial list and nr_partial */
unsigned long nr_partial;
struct list_head partial;
#ifdef CONFIG_SLUB_DEBUG
atomic_long_t nr_slabs;
atomic_long_t total_objects;
struct list_head full;
#endif
};
/* /*
* Word size structure that can be atomically updated or read and that * Word size structure that can be atomically updated or read and that
* contains both the order and the number of objects that a slab of the * contains both the order and the number of objects that a slab of the
...@@ -115,111 +104,6 @@ struct kmem_cache { ...@@ -115,111 +104,6 @@ struct kmem_cache {
struct kmem_cache_node *node[MAX_NUMNODES]; struct kmem_cache_node *node[MAX_NUMNODES];
}; };
/*
* Kmalloc subsystem.
*/
#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8
#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN
#else
#define KMALLOC_MIN_SIZE 8
#endif
#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
/*
* Maximum kmalloc object size handled by SLUB. Larger object allocations
* are passed through to the page allocator. The page allocator "fastpath"
* is relatively slow so we need this value sufficiently high so that
* performance critical objects are allocated through the SLUB fastpath.
*
* This should be dropped to PAGE_SIZE / 2 once the page allocator
* "fastpath" becomes competitive with the slab allocator fastpaths.
*/
#define SLUB_MAX_SIZE (2 * PAGE_SIZE)
#define SLUB_PAGE_SHIFT (PAGE_SHIFT + 2)
#ifdef CONFIG_ZONE_DMA
#define SLUB_DMA __GFP_DMA
#else
/* Disable DMA functionality */
#define SLUB_DMA (__force gfp_t)0
#endif
/*
* We keep the general caches in an array of slab caches that are used for
* 2^x bytes of allocations.
*/
extern struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
/*
* Sorry that the following has to be that ugly but some versions of GCC
* have trouble with constant propagation and loops.
*/
static __always_inline int kmalloc_index(size_t size)
{
if (!size)
return 0;
if (size <= KMALLOC_MIN_SIZE)
return KMALLOC_SHIFT_LOW;
if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96)
return 1;
if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192)
return 2;
if (size <= 8) return 3;
if (size <= 16) return 4;
if (size <= 32) return 5;
if (size <= 64) return 6;
if (size <= 128) return 7;
if (size <= 256) return 8;
if (size <= 512) return 9;
if (size <= 1024) return 10;
if (size <= 2 * 1024) return 11;
if (size <= 4 * 1024) return 12;
/*
* The following is only needed to support architectures with a larger page
* size than 4k. We need to support 2 * PAGE_SIZE here. So for a 64k page
* size we would have to go up to 128k.
*/
if (size <= 8 * 1024) return 13;
if (size <= 16 * 1024) return 14;
if (size <= 32 * 1024) return 15;
if (size <= 64 * 1024) return 16;
if (size <= 128 * 1024) return 17;
if (size <= 256 * 1024) return 18;
if (size <= 512 * 1024) return 19;
if (size <= 1024 * 1024) return 20;
if (size <= 2 * 1024 * 1024) return 21;
BUG();
return -1; /* Will never be reached */
/*
* What we really wanted to do and cannot do because of compiler issues is:
* int i;
* for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++)
* if (size <= (1 << i))
* return i;
*/
}
/*
* Find the slab cache for a given combination of allocation flags and size.
*
* This ought to end up with a global pointer to the right cache
* in kmalloc_caches.
*/
static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
{
int index = kmalloc_index(size);
if (index == 0)
return NULL;
return kmalloc_caches[index];
}
void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
void *__kmalloc(size_t size, gfp_t flags); void *__kmalloc(size_t size, gfp_t flags);
...@@ -274,16 +158,17 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) ...@@ -274,16 +158,17 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
static __always_inline void *kmalloc(size_t size, gfp_t flags) static __always_inline void *kmalloc(size_t size, gfp_t flags)
{ {
if (__builtin_constant_p(size)) { if (__builtin_constant_p(size)) {
if (size > SLUB_MAX_SIZE) if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large(size, flags); return kmalloc_large(size, flags);
if (!(flags & SLUB_DMA)) { if (!(flags & GFP_DMA)) {
struct kmem_cache *s = kmalloc_slab(size); int index = kmalloc_index(size);
if (!s) if (!index)
return ZERO_SIZE_PTR; return ZERO_SIZE_PTR;
return kmem_cache_alloc_trace(s, flags, size); return kmem_cache_alloc_trace(kmalloc_caches[index],
flags, size);
} }
} }
return __kmalloc(size, flags); return __kmalloc(size, flags);
...@@ -310,13 +195,14 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, ...@@ -310,13 +195,14 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
{ {
if (__builtin_constant_p(size) && if (__builtin_constant_p(size) &&
size <= SLUB_MAX_SIZE && !(flags & SLUB_DMA)) { size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) {
struct kmem_cache *s = kmalloc_slab(size); int index = kmalloc_index(size);
if (!s) if (!index)
return ZERO_SIZE_PTR; return ZERO_SIZE_PTR;
return kmem_cache_alloc_node_trace(s, flags, node, size); return kmem_cache_alloc_node_trace(kmalloc_caches[index],
flags, node, size);
} }
return __kmalloc_node(size, flags, node); return __kmalloc_node(size, flags, node);
} }
......
...@@ -285,69 +285,28 @@ struct arraycache_init { ...@@ -285,69 +285,28 @@ struct arraycache_init {
void *entries[BOOT_CPUCACHE_ENTRIES]; void *entries[BOOT_CPUCACHE_ENTRIES];
}; };
/*
* The slab lists for all objects.
*/
struct kmem_list3 {
struct list_head slabs_partial; /* partial list first, better asm code */
struct list_head slabs_full;
struct list_head slabs_free;
unsigned long free_objects;
unsigned int free_limit;
unsigned int colour_next; /* Per-node cache coloring */
spinlock_t list_lock;
struct array_cache *shared; /* shared per node */
struct array_cache **alien; /* on other nodes */
unsigned long next_reap; /* updated without locking */
int free_touched; /* updated without locking */
};
/* /*
* Need this for bootstrapping a per node allocator. * Need this for bootstrapping a per node allocator.
*/ */
#define NUM_INIT_LISTS (3 * MAX_NUMNODES) #define NUM_INIT_LISTS (3 * MAX_NUMNODES)
static struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
#define CACHE_CACHE 0 #define CACHE_CACHE 0
#define SIZE_AC MAX_NUMNODES #define SIZE_AC MAX_NUMNODES
#define SIZE_L3 (2 * MAX_NUMNODES) #define SIZE_NODE (2 * MAX_NUMNODES)
static int drain_freelist(struct kmem_cache *cache, static int drain_freelist(struct kmem_cache *cache,
struct kmem_list3 *l3, int tofree); struct kmem_cache_node *n, int tofree);
static void free_block(struct kmem_cache *cachep, void **objpp, int len, static void free_block(struct kmem_cache *cachep, void **objpp, int len,
int node); int node);
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
static void cache_reap(struct work_struct *unused); static void cache_reap(struct work_struct *unused);
/*
* This function must be completely optimized away if a constant is passed to
* it. Mostly the same as what is in linux/slab.h except it returns an index.
*/
static __always_inline int index_of(const size_t size)
{
extern void __bad_size(void);
if (__builtin_constant_p(size)) {
int i = 0;
#define CACHE(x) \
if (size <=x) \
return i; \
else \
i++;
#include <linux/kmalloc_sizes.h>
#undef CACHE
__bad_size();
} else
__bad_size();
return 0;
}
static int slab_early_init = 1; static int slab_early_init = 1;
#define INDEX_AC index_of(sizeof(struct arraycache_init)) #define INDEX_AC kmalloc_index(sizeof(struct arraycache_init))
#define INDEX_L3 index_of(sizeof(struct kmem_list3)) #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
static void kmem_list3_init(struct kmem_list3 *parent) static void kmem_cache_node_init(struct kmem_cache_node *parent)
{ {
INIT_LIST_HEAD(&parent->slabs_full); INIT_LIST_HEAD(&parent->slabs_full);
INIT_LIST_HEAD(&parent->slabs_partial); INIT_LIST_HEAD(&parent->slabs_partial);
...@@ -363,7 +322,7 @@ static void kmem_list3_init(struct kmem_list3 *parent) ...@@ -363,7 +322,7 @@ static void kmem_list3_init(struct kmem_list3 *parent)
#define MAKE_LIST(cachep, listp, slab, nodeid) \ #define MAKE_LIST(cachep, listp, slab, nodeid) \
do { \ do { \
INIT_LIST_HEAD(listp); \ INIT_LIST_HEAD(listp); \
list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ list_splice(&(cachep->node[nodeid]->slab), listp); \
} while (0) } while (0)
#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
...@@ -524,30 +483,6 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache, ...@@ -524,30 +483,6 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
return reciprocal_divide(offset, cache->reciprocal_buffer_size); return reciprocal_divide(offset, cache->reciprocal_buffer_size);
} }
/*
* These are the default caches for kmalloc. Custom caches can have other sizes.
*/
struct cache_sizes malloc_sizes[] = {
#define CACHE(x) { .cs_size = (x) },
#include <linux/kmalloc_sizes.h>
CACHE(ULONG_MAX)
#undef CACHE
};
EXPORT_SYMBOL(malloc_sizes);
/* Must match cache_sizes above. Out of line to keep cache footprint low. */
struct cache_names {
char *name;
char *name_dma;
};
static struct cache_names __initdata cache_names[] = {
#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
#include <linux/kmalloc_sizes.h>
{NULL,}
#undef CACHE
};
static struct arraycache_init initarray_generic = static struct arraycache_init initarray_generic =
{ {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
...@@ -586,15 +521,15 @@ static void slab_set_lock_classes(struct kmem_cache *cachep, ...@@ -586,15 +521,15 @@ static void slab_set_lock_classes(struct kmem_cache *cachep,
int q) int q)
{ {
struct array_cache **alc; struct array_cache **alc;
struct kmem_list3 *l3; struct kmem_cache_node *n;
int r; int r;
l3 = cachep->nodelists[q]; n = cachep->node[q];
if (!l3) if (!n)
return; return;
lockdep_set_class(&l3->list_lock, l3_key); lockdep_set_class(&n->list_lock, l3_key);
alc = l3->alien; alc = n->alien;
/* /*
* FIXME: This check for BAD_ALIEN_MAGIC * FIXME: This check for BAD_ALIEN_MAGIC
* should go away when common slab code is taught to * should go away when common slab code is taught to
...@@ -625,28 +560,30 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) ...@@ -625,28 +560,30 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
static void init_node_lock_keys(int q) static void init_node_lock_keys(int q)
{ {
struct cache_sizes *s = malloc_sizes; int i;
if (slab_state < UP) if (slab_state < UP)
return; return;
for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { for (i = 1; i < PAGE_SHIFT + MAX_ORDER; i++) {
struct kmem_list3 *l3; struct kmem_cache_node *n;
struct kmem_cache *cache = kmalloc_caches[i];
if (!cache)
continue;
l3 = s->cs_cachep->nodelists[q]; n = cache->node[q];
if (!l3 || OFF_SLAB(s->cs_cachep)) if (!n || OFF_SLAB(cache))
continue; continue;
slab_set_lock_classes(s->cs_cachep, &on_slab_l3_key, slab_set_lock_classes(cache, &on_slab_l3_key,
&on_slab_alc_key, q); &on_slab_alc_key, q);
} }
} }
static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q) static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
{ {
struct kmem_list3 *l3; if (!cachep->node[q])
l3 = cachep->nodelists[q];
if (!l3)
return; return;
slab_set_lock_classes(cachep, &on_slab_l3_key, slab_set_lock_classes(cachep, &on_slab_l3_key,
...@@ -702,41 +639,6 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) ...@@ -702,41 +639,6 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
return cachep->array[smp_processor_id()]; return cachep->array[smp_processor_id()];
} }
static inline struct kmem_cache *__find_general_cachep(size_t size,
gfp_t gfpflags)
{
struct cache_sizes *csizep = malloc_sizes;
#if DEBUG
/* This happens if someone tries to call
* kmem_cache_create(), or __kmalloc(), before
* the generic caches are initialized.
*/
BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);
#endif
if (!size)
return ZERO_SIZE_PTR;
while (size > csizep->cs_size)
csizep++;
/*
* Really subtle: The last entry with cs->cs_size==ULONG_MAX
* has cs_{dma,}cachep==NULL. Thus no special case
* for large kmalloc calls required.
*/
#ifdef CONFIG_ZONE_DMA
if (unlikely(gfpflags & GFP_DMA))
return csizep->cs_dmacachep;
#endif
return csizep->cs_cachep;
}
static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
{
return __find_general_cachep(size, gfpflags);
}
static size_t slab_mgmt_size(size_t nr_objs, size_t align) static size_t slab_mgmt_size(size_t nr_objs, size_t align)
{ {
return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
...@@ -938,29 +840,29 @@ static inline bool is_slab_pfmemalloc(struct slab *slabp) ...@@ -938,29 +840,29 @@ static inline bool is_slab_pfmemalloc(struct slab *slabp)
static void recheck_pfmemalloc_active(struct kmem_cache *cachep, static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
struct array_cache *ac) struct array_cache *ac)
{ {
struct kmem_list3 *l3 = cachep->nodelists[numa_mem_id()]; struct kmem_cache_node *n = cachep->node[numa_mem_id()];
struct slab *slabp; struct slab *slabp;
unsigned long flags; unsigned long flags;
if (!pfmemalloc_active) if (!pfmemalloc_active)
return; return;
spin_lock_irqsave(&l3->list_lock, flags); spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(slabp, &l3->slabs_full, list) list_for_each_entry(slabp, &n->slabs_full, list)
if (is_slab_pfmemalloc(slabp)) if (is_slab_pfmemalloc(slabp))
goto out; goto out;
list_for_each_entry(slabp, &l3->slabs_partial, list) list_for_each_entry(slabp, &n->slabs_partial, list)
if (is_slab_pfmemalloc(slabp)) if (is_slab_pfmemalloc(slabp))
goto out; goto out;
list_for_each_entry(slabp, &l3->slabs_free, list) list_for_each_entry(slabp, &n->slabs_free, list)
if (is_slab_pfmemalloc(slabp)) if (is_slab_pfmemalloc(slabp))
goto out; goto out;
pfmemalloc_active = false; pfmemalloc_active = false;
out: out:
spin_unlock_irqrestore(&l3->list_lock, flags); spin_unlock_irqrestore(&n->list_lock, flags);
} }
static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
...@@ -971,7 +873,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, ...@@ -971,7 +873,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
/* Ensure the caller is allowed to use objects from PFMEMALLOC slab */ /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */
if (unlikely(is_obj_pfmemalloc(objp))) { if (unlikely(is_obj_pfmemalloc(objp))) {
struct kmem_list3 *l3; struct kmem_cache_node *n;
if (gfp_pfmemalloc_allowed(flags)) { if (gfp_pfmemalloc_allowed(flags)) {
clear_obj_pfmemalloc(&objp); clear_obj_pfmemalloc(&objp);
...@@ -993,8 +895,8 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, ...@@ -993,8 +895,8 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
* If there are empty slabs on the slabs_free list and we are * If there are empty slabs on the slabs_free list and we are
* being forced to refill the cache, mark this one !pfmemalloc. * being forced to refill the cache, mark this one !pfmemalloc.
*/ */
l3 = cachep->nodelists[numa_mem_id()]; n = cachep->node[numa_mem_id()];
if (!list_empty(&l3->slabs_free) && force_refill) { if (!list_empty(&n->slabs_free) && force_refill) {
struct slab *slabp = virt_to_slab(objp); struct slab *slabp = virt_to_slab(objp);
ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem)); ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem));
clear_obj_pfmemalloc(&objp); clear_obj_pfmemalloc(&objp);
...@@ -1071,7 +973,7 @@ static int transfer_objects(struct array_cache *to, ...@@ -1071,7 +973,7 @@ static int transfer_objects(struct array_cache *to,
#ifndef CONFIG_NUMA #ifndef CONFIG_NUMA
#define drain_alien_cache(cachep, alien) do { } while (0) #define drain_alien_cache(cachep, alien) do { } while (0)
#define reap_alien(cachep, l3) do { } while (0) #define reap_alien(cachep, n) do { } while (0)
static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
{ {
...@@ -1143,33 +1045,33 @@ static void free_alien_cache(struct array_cache **ac_ptr) ...@@ -1143,33 +1045,33 @@ static void free_alien_cache(struct array_cache **ac_ptr)
static void __drain_alien_cache(struct kmem_cache *cachep, static void __drain_alien_cache(struct kmem_cache *cachep,
struct array_cache *ac, int node) struct array_cache *ac, int node)
{ {
struct kmem_list3 *rl3 = cachep->nodelists[node]; struct kmem_cache_node *n = cachep->node[node];
if (ac->avail) { if (ac->avail) {
spin_lock(&rl3->list_lock); spin_lock(&n->list_lock);
/* /*
* Stuff objects into the remote nodes shared array first. * Stuff objects into the remote nodes shared array first.
* That way we could avoid the overhead of putting the objects * That way we could avoid the overhead of putting the objects
* into the free lists and getting them back later. * into the free lists and getting them back later.
*/ */
if (rl3->shared) if (n->shared)
transfer_objects(rl3->shared, ac, ac->limit); transfer_objects(n->shared, ac, ac->limit);
free_block(cachep, ac->entry, ac->avail, node); free_block(cachep, ac->entry, ac->avail, node);
ac->avail = 0; ac->avail = 0;
spin_unlock(&rl3->list_lock); spin_unlock(&n->list_lock);
} }
} }
/* /*
* Called from cache_reap() to regularly drain alien caches round robin. * Called from cache_reap() to regularly drain alien caches round robin.
*/ */
static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
{ {
int node = __this_cpu_read(slab_reap_node); int node = __this_cpu_read(slab_reap_node);
if (l3->alien) { if (n->alien) {
struct array_cache *ac = l3->alien[node]; struct array_cache *ac = n->alien[node];
if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
__drain_alien_cache(cachep, ac, node); __drain_alien_cache(cachep, ac, node);
...@@ -1199,7 +1101,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) ...@@ -1199,7 +1101,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
{ {
struct slab *slabp = virt_to_slab(objp); struct slab *slabp = virt_to_slab(objp);
int nodeid = slabp->nodeid; int nodeid = slabp->nodeid;
struct kmem_list3 *l3; struct kmem_cache_node *n;
struct array_cache *alien = NULL; struct array_cache *alien = NULL;
int node; int node;
...@@ -1212,10 +1114,10 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) ...@@ -1212,10 +1114,10 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
if (likely(slabp->nodeid == node)) if (likely(slabp->nodeid == node))
return 0; return 0;
l3 = cachep->nodelists[node]; n = cachep->node[node];
STATS_INC_NODEFREES(cachep); STATS_INC_NODEFREES(cachep);
if (l3->alien && l3->alien[nodeid]) { if (n->alien && n->alien[nodeid]) {
alien = l3->alien[nodeid]; alien = n->alien[nodeid];
spin_lock(&alien->lock); spin_lock(&alien->lock);
if (unlikely(alien->avail == alien->limit)) { if (unlikely(alien->avail == alien->limit)) {
STATS_INC_ACOVERFLOW(cachep); STATS_INC_ACOVERFLOW(cachep);
...@@ -1224,28 +1126,28 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) ...@@ -1224,28 +1126,28 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
ac_put_obj(cachep, alien, objp); ac_put_obj(cachep, alien, objp);
spin_unlock(&alien->lock); spin_unlock(&alien->lock);
} else { } else {
spin_lock(&(cachep->nodelists[nodeid])->list_lock); spin_lock(&(cachep->node[nodeid])->list_lock);
free_block(cachep, &objp, 1, nodeid); free_block(cachep, &objp, 1, nodeid);
spin_unlock(&(cachep->nodelists[nodeid])->list_lock); spin_unlock(&(cachep->node[nodeid])->list_lock);
} }
return 1; return 1;
} }
#endif #endif
/* /*
* Allocates and initializes nodelists for a node on each slab cache, used for * Allocates and initializes node for a node on each slab cache, used for
* either memory or cpu hotplug. If memory is being hot-added, the kmem_list3 * either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node
* will be allocated off-node since memory is not yet online for the new node. * will be allocated off-node since memory is not yet online for the new node.
* When hotplugging memory or a cpu, existing nodelists are not replaced if * When hotplugging memory or a cpu, existing node are not replaced if
* already in use. * already in use.
* *
* Must hold slab_mutex. * Must hold slab_mutex.
*/ */
static int init_cache_nodelists_node(int node) static int init_cache_node_node(int node)
{ {
struct kmem_cache *cachep; struct kmem_cache *cachep;
struct kmem_list3 *l3; struct kmem_cache_node *n;
const int memsize = sizeof(struct kmem_list3); const int memsize = sizeof(struct kmem_cache_node);
list_for_each_entry(cachep, &slab_caches, list) { list_for_each_entry(cachep, &slab_caches, list) {
/* /*
...@@ -1253,12 +1155,12 @@ static int init_cache_nodelists_node(int node) ...@@ -1253,12 +1155,12 @@ static int init_cache_nodelists_node(int node)
* begin anything. Make sure some other cpu on this * begin anything. Make sure some other cpu on this
* node has not already allocated this * node has not already allocated this
*/ */
if (!cachep->nodelists[node]) { if (!cachep->node[node]) {
l3 = kmalloc_node(memsize, GFP_KERNEL, node); n = kmalloc_node(memsize, GFP_KERNEL, node);
if (!l3) if (!n)
return -ENOMEM; return -ENOMEM;
kmem_list3_init(l3); kmem_cache_node_init(n);
l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + n->next_reap = jiffies + REAPTIMEOUT_LIST3 +
((unsigned long)cachep) % REAPTIMEOUT_LIST3; ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
/* /*
...@@ -1266,14 +1168,14 @@ static int init_cache_nodelists_node(int node) ...@@ -1266,14 +1168,14 @@ static int init_cache_nodelists_node(int node)
* go. slab_mutex is sufficient * go. slab_mutex is sufficient
* protection here. * protection here.
*/ */
cachep->nodelists[node] = l3; cachep->node[node] = n;
} }
spin_lock_irq(&cachep->nodelists[node]->list_lock); spin_lock_irq(&cachep->node[node]->list_lock);
cachep->nodelists[node]->free_limit = cachep->node[node]->free_limit =
(1 + nr_cpus_node(node)) * (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num; cachep->batchcount + cachep->num;
spin_unlock_irq(&cachep->nodelists[node]->list_lock); spin_unlock_irq(&cachep->node[node]->list_lock);
} }
return 0; return 0;
} }
...@@ -1281,7 +1183,7 @@ static int init_cache_nodelists_node(int node) ...@@ -1281,7 +1183,7 @@ static int init_cache_nodelists_node(int node)
static void __cpuinit cpuup_canceled(long cpu) static void __cpuinit cpuup_canceled(long cpu)
{ {
struct kmem_cache *cachep; struct kmem_cache *cachep;
struct kmem_list3 *l3 = NULL; struct kmem_cache_node *n = NULL;
int node = cpu_to_mem(cpu); int node = cpu_to_mem(cpu);
const struct cpumask *mask = cpumask_of_node(node); const struct cpumask *mask = cpumask_of_node(node);
...@@ -1293,34 +1195,34 @@ static void __cpuinit cpuup_canceled(long cpu) ...@@ -1293,34 +1195,34 @@ static void __cpuinit cpuup_canceled(long cpu)
/* cpu is dead; no one can alloc from it. */ /* cpu is dead; no one can alloc from it. */
nc = cachep->array[cpu]; nc = cachep->array[cpu];
cachep->array[cpu] = NULL; cachep->array[cpu] = NULL;
l3 = cachep->nodelists[node]; n = cachep->node[node];
if (!l3) if (!n)
goto free_array_cache; goto free_array_cache;
spin_lock_irq(&l3->list_lock); spin_lock_irq(&n->list_lock);
/* Free limit for this kmem_list3 */ /* Free limit for this kmem_cache_node */
l3->free_limit -= cachep->batchcount; n->free_limit -= cachep->batchcount;
if (nc) if (nc)
free_block(cachep, nc->entry, nc->avail, node); free_block(cachep, nc->entry, nc->avail, node);
if (!cpumask_empty(mask)) { if (!cpumask_empty(mask)) {
spin_unlock_irq(&l3->list_lock); spin_unlock_irq(&n->list_lock);
goto free_array_cache; goto free_array_cache;
} }
shared = l3->shared; shared = n->shared;
if (shared) { if (shared) {
free_block(cachep, shared->entry, free_block(cachep, shared->entry,
shared->avail, node); shared->avail, node);
l3->shared = NULL; n->shared = NULL;
} }
alien = l3->alien; alien = n->alien;
l3->alien = NULL; n->alien = NULL;
spin_unlock_irq(&l3->list_lock); spin_unlock_irq(&n->list_lock);
kfree(shared); kfree(shared);
if (alien) { if (alien) {
...@@ -1336,17 +1238,17 @@ static void __cpuinit cpuup_canceled(long cpu) ...@@ -1336,17 +1238,17 @@ static void __cpuinit cpuup_canceled(long cpu)
* shrink each nodelist to its limit. * shrink each nodelist to its limit.
*/ */
list_for_each_entry(cachep, &slab_caches, list) { list_for_each_entry(cachep, &slab_caches, list) {
l3 = cachep->nodelists[node]; n = cachep->node[node];
if (!l3) if (!n)
continue; continue;
drain_freelist(cachep, l3, l3->free_objects); drain_freelist(cachep, n, n->free_objects);
} }
} }
static int __cpuinit cpuup_prepare(long cpu) static int __cpuinit cpuup_prepare(long cpu)
{ {
struct kmem_cache *cachep; struct kmem_cache *cachep;
struct kmem_list3 *l3 = NULL; struct kmem_cache_node *n = NULL;
int node = cpu_to_mem(cpu); int node = cpu_to_mem(cpu);
int err; int err;
...@@ -1354,9 +1256,9 @@ static int __cpuinit cpuup_prepare(long cpu) ...@@ -1354,9 +1256,9 @@ static int __cpuinit cpuup_prepare(long cpu)
* We need to do this right in the beginning since * We need to do this right in the beginning since
* alloc_arraycache's are going to use this list. * alloc_arraycache's are going to use this list.
* kmalloc_node allows us to add the slab to the right * kmalloc_node allows us to add the slab to the right
* kmem_list3 and not this cpu's kmem_list3 * kmem_cache_node and not this cpu's kmem_cache_node
*/ */
err = init_cache_nodelists_node(node); err = init_cache_node_node(node);
if (err < 0) if (err < 0)
goto bad; goto bad;
...@@ -1391,25 +1293,25 @@ static int __cpuinit cpuup_prepare(long cpu) ...@@ -1391,25 +1293,25 @@ static int __cpuinit cpuup_prepare(long cpu)
} }
} }
cachep->array[cpu] = nc; cachep->array[cpu] = nc;
l3 = cachep->nodelists[node]; n = cachep->node[node];
BUG_ON(!l3); BUG_ON(!n);
spin_lock_irq(&l3->list_lock); spin_lock_irq(&n->list_lock);
if (!l3->shared) { if (!n->shared) {
/* /*
* We are serialised from CPU_DEAD or * We are serialised from CPU_DEAD or
* CPU_UP_CANCELLED by the cpucontrol lock * CPU_UP_CANCELLED by the cpucontrol lock
*/ */
l3->shared = shared; n->shared = shared;
shared = NULL; shared = NULL;
} }
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
if (!l3->alien) { if (!n->alien) {
l3->alien = alien; n->alien = alien;
alien = NULL; alien = NULL;
} }
#endif #endif
spin_unlock_irq(&l3->list_lock); spin_unlock_irq(&n->list_lock);
kfree(shared); kfree(shared);
free_alien_cache(alien); free_alien_cache(alien);
if (cachep->flags & SLAB_DEBUG_OBJECTS) if (cachep->flags & SLAB_DEBUG_OBJECTS)
...@@ -1464,9 +1366,9 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, ...@@ -1464,9 +1366,9 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
case CPU_DEAD_FROZEN: case CPU_DEAD_FROZEN:
/* /*
* Even if all the cpus of a node are down, we don't free the * Even if all the cpus of a node are down, we don't free the
* kmem_list3 of any cache. This to avoid a race between * kmem_cache_node of any cache. This to avoid a race between
* cpu_down, and a kmalloc allocation from another cpu for * cpu_down, and a kmalloc allocation from another cpu for
* memory from the node of the cpu going down. The list3 * memory from the node of the cpu going down. The node
* structure is usually allocated from kmem_cache_create() and * structure is usually allocated from kmem_cache_create() and
* gets destroyed at kmem_cache_destroy(). * gets destroyed at kmem_cache_destroy().
*/ */
...@@ -1494,22 +1396,22 @@ static struct notifier_block __cpuinitdata cpucache_notifier = { ...@@ -1494,22 +1396,22 @@ static struct notifier_block __cpuinitdata cpucache_notifier = {
* *
* Must hold slab_mutex. * Must hold slab_mutex.
*/ */
static int __meminit drain_cache_nodelists_node(int node) static int __meminit drain_cache_node_node(int node)
{ {
struct kmem_cache *cachep; struct kmem_cache *cachep;
int ret = 0; int ret = 0;
list_for_each_entry(cachep, &slab_caches, list) { list_for_each_entry(cachep, &slab_caches, list) {
struct kmem_list3 *l3; struct kmem_cache_node *n;
l3 = cachep->nodelists[node]; n = cachep->node[node];
if (!l3) if (!n)
continue; continue;
drain_freelist(cachep, l3, l3->free_objects); drain_freelist(cachep, n, n->free_objects);
if (!list_empty(&l3->slabs_full) || if (!list_empty(&n->slabs_full) ||
!list_empty(&l3->slabs_partial)) { !list_empty(&n->slabs_partial)) {
ret = -EBUSY; ret = -EBUSY;
break; break;
} }
...@@ -1531,12 +1433,12 @@ static int __meminit slab_memory_callback(struct notifier_block *self, ...@@ -1531,12 +1433,12 @@ static int __meminit slab_memory_callback(struct notifier_block *self,
switch (action) { switch (action) {
case MEM_GOING_ONLINE: case MEM_GOING_ONLINE:
mutex_lock(&slab_mutex); mutex_lock(&slab_mutex);
ret = init_cache_nodelists_node(nid); ret = init_cache_node_node(nid);
mutex_unlock(&slab_mutex); mutex_unlock(&slab_mutex);
break; break;
case MEM_GOING_OFFLINE: case MEM_GOING_OFFLINE:
mutex_lock(&slab_mutex); mutex_lock(&slab_mutex);
ret = drain_cache_nodelists_node(nid); ret = drain_cache_node_node(nid);
mutex_unlock(&slab_mutex); mutex_unlock(&slab_mutex);
break; break;
case MEM_ONLINE: case MEM_ONLINE:
...@@ -1551,37 +1453,37 @@ static int __meminit slab_memory_callback(struct notifier_block *self, ...@@ -1551,37 +1453,37 @@ static int __meminit slab_memory_callback(struct notifier_block *self,
#endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */ #endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
/* /*
* swap the static kmem_list3 with kmalloced memory * swap the static kmem_cache_node with kmalloced memory
*/ */
static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list, static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *list,
int nodeid) int nodeid)
{ {
struct kmem_list3 *ptr; struct kmem_cache_node *ptr;
ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid); ptr = kmalloc_node(sizeof(struct kmem_cache_node), GFP_NOWAIT, nodeid);
BUG_ON(!ptr); BUG_ON(!ptr);
memcpy(ptr, list, sizeof(struct kmem_list3)); memcpy(ptr, list, sizeof(struct kmem_cache_node));
/* /*
* Do not assume that spinlocks can be initialized via memcpy: * Do not assume that spinlocks can be initialized via memcpy:
*/ */
spin_lock_init(&ptr->list_lock); spin_lock_init(&ptr->list_lock);
MAKE_ALL_LISTS(cachep, ptr, nodeid); MAKE_ALL_LISTS(cachep, ptr, nodeid);
cachep->nodelists[nodeid] = ptr; cachep->node[nodeid] = ptr;
} }
/* /*
* For setting up all the kmem_list3s for cache whose buffer_size is same as * For setting up all the kmem_cache_node for cache whose buffer_size is same as
* size of kmem_list3. * size of kmem_cache_node.
*/ */
static void __init set_up_list3s(struct kmem_cache *cachep, int index) static void __init set_up_node(struct kmem_cache *cachep, int index)
{ {
int node; int node;
for_each_online_node(node) { for_each_online_node(node) {
cachep->nodelists[node] = &initkmem_list3[index + node]; cachep->node[node] = &init_kmem_cache_node[index + node];
cachep->nodelists[node]->next_reap = jiffies + cachep->node[node]->next_reap = jiffies +
REAPTIMEOUT_LIST3 + REAPTIMEOUT_LIST3 +
((unsigned long)cachep) % REAPTIMEOUT_LIST3; ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
} }
...@@ -1589,11 +1491,11 @@ static void __init set_up_list3s(struct kmem_cache *cachep, int index) ...@@ -1589,11 +1491,11 @@ static void __init set_up_list3s(struct kmem_cache *cachep, int index)
/* /*
* The memory after the last cpu cache pointer is used for the * The memory after the last cpu cache pointer is used for the
* the nodelists pointer. * the node pointer.
*/ */
static void setup_nodelists_pointer(struct kmem_cache *cachep) static void setup_node_pointer(struct kmem_cache *cachep)
{ {
cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids]; cachep->node = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids];
} }
/* /*
...@@ -1602,20 +1504,18 @@ static void setup_nodelists_pointer(struct kmem_cache *cachep) ...@@ -1602,20 +1504,18 @@ static void setup_nodelists_pointer(struct kmem_cache *cachep)
*/ */
void __init kmem_cache_init(void) void __init kmem_cache_init(void)
{ {
struct cache_sizes *sizes;
struct cache_names *names;
int i; int i;
kmem_cache = &kmem_cache_boot; kmem_cache = &kmem_cache_boot;
setup_nodelists_pointer(kmem_cache); setup_node_pointer(kmem_cache);
if (num_possible_nodes() == 1) if (num_possible_nodes() == 1)
use_alien_caches = 0; use_alien_caches = 0;
for (i = 0; i < NUM_INIT_LISTS; i++) for (i = 0; i < NUM_INIT_LISTS; i++)
kmem_list3_init(&initkmem_list3[i]); kmem_cache_node_init(&init_kmem_cache_node[i]);
set_up_list3s(kmem_cache, CACHE_CACHE); set_up_node(kmem_cache, CACHE_CACHE);
/* /*
* Fragmentation resistance on low memory - only use bigger * Fragmentation resistance on low memory - only use bigger
...@@ -1631,7 +1531,7 @@ void __init kmem_cache_init(void) ...@@ -1631,7 +1531,7 @@ void __init kmem_cache_init(void)
* kmem_cache structures of all caches, except kmem_cache itself: * kmem_cache structures of all caches, except kmem_cache itself:
* kmem_cache is statically allocated. * kmem_cache is statically allocated.
* Initially an __init data area is used for the head array and the * Initially an __init data area is used for the head array and the
* kmem_list3 structures, it's replaced with a kmalloc allocated * kmem_cache_node structures, it's replaced with a kmalloc allocated
* array at the end of the bootstrap. * array at the end of the bootstrap.
* 2) Create the first kmalloc cache. * 2) Create the first kmalloc cache.
* The struct kmem_cache for the new cache is allocated normally. * The struct kmem_cache for the new cache is allocated normally.
...@@ -1640,7 +1540,7 @@ void __init kmem_cache_init(void) ...@@ -1640,7 +1540,7 @@ void __init kmem_cache_init(void)
* head arrays. * head arrays.
* 4) Replace the __init data head arrays for kmem_cache and the first * 4) Replace the __init data head arrays for kmem_cache and the first
* kmalloc cache with kmalloc allocated arrays. * kmalloc cache with kmalloc allocated arrays.
* 5) Replace the __init data for kmem_list3 for kmem_cache and * 5) Replace the __init data for kmem_cache_node for kmem_cache and
* the other cache's with kmalloc allocated memory. * the other cache's with kmalloc allocated memory.
* 6) Resize the head arrays of the kmalloc caches to their final sizes. * 6) Resize the head arrays of the kmalloc caches to their final sizes.
*/ */
...@@ -1652,50 +1552,28 @@ void __init kmem_cache_init(void) ...@@ -1652,50 +1552,28 @@ void __init kmem_cache_init(void)
*/ */
create_boot_cache(kmem_cache, "kmem_cache", create_boot_cache(kmem_cache, "kmem_cache",
offsetof(struct kmem_cache, array[nr_cpu_ids]) + offsetof(struct kmem_cache, array[nr_cpu_ids]) +
nr_node_ids * sizeof(struct kmem_list3 *), nr_node_ids * sizeof(struct kmem_cache_node *),
SLAB_HWCACHE_ALIGN); SLAB_HWCACHE_ALIGN);
list_add(&kmem_cache->list, &slab_caches); list_add(&kmem_cache->list, &slab_caches);
/* 2+3) create the kmalloc caches */ /* 2+3) create the kmalloc caches */
sizes = malloc_sizes;
names = cache_names;
/* /*
* Initialize the caches that provide memory for the array cache and the * Initialize the caches that provide memory for the array cache and the
* kmem_list3 structures first. Without this, further allocations will * kmem_cache_node structures first. Without this, further allocations will
* bug. * bug.
*/ */
sizes[INDEX_AC].cs_cachep = create_kmalloc_cache(names[INDEX_AC].name, kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac",
sizes[INDEX_AC].cs_size, ARCH_KMALLOC_FLAGS); kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS);
if (INDEX_AC != INDEX_L3) if (INDEX_AC != INDEX_NODE)
sizes[INDEX_L3].cs_cachep = kmalloc_caches[INDEX_NODE] =
create_kmalloc_cache(names[INDEX_L3].name, create_kmalloc_cache("kmalloc-node",
sizes[INDEX_L3].cs_size, ARCH_KMALLOC_FLAGS); kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);
slab_early_init = 0; slab_early_init = 0;
while (sizes->cs_size != ULONG_MAX) {
/*
* For performance, all the general caches are L1 aligned.
* This should be particularly beneficial on SMP boxes, as it
* eliminates "false sharing".
* Note for systems short on memory removing the alignment will
* allow tighter packing of the smaller caches.
*/
if (!sizes->cs_cachep)
sizes->cs_cachep = create_kmalloc_cache(names->name,
sizes->cs_size, ARCH_KMALLOC_FLAGS);
#ifdef CONFIG_ZONE_DMA
sizes->cs_dmacachep = create_kmalloc_cache(
names->name_dma, sizes->cs_size,
SLAB_CACHE_DMA|ARCH_KMALLOC_FLAGS);
#endif
sizes++;
names++;
}
/* 4) Replace the bootstrap head arrays */ /* 4) Replace the bootstrap head arrays */
{ {
struct array_cache *ptr; struct array_cache *ptr;
...@@ -1713,36 +1591,35 @@ void __init kmem_cache_init(void) ...@@ -1713,36 +1591,35 @@ void __init kmem_cache_init(void)
ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) BUG_ON(cpu_cache_get(kmalloc_caches[INDEX_AC])
!= &initarray_generic.cache); != &initarray_generic.cache);
memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
sizeof(struct arraycache_init)); sizeof(struct arraycache_init));
/* /*
* Do not assume that spinlocks can be initialized via memcpy: * Do not assume that spinlocks can be initialized via memcpy:
*/ */
spin_lock_init(&ptr->lock); spin_lock_init(&ptr->lock);
malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
ptr;
} }
/* 5) Replace the bootstrap kmem_list3's */ /* 5) Replace the bootstrap kmem_cache_node */
{ {
int nid; int nid;
for_each_online_node(nid) { for_each_online_node(nid) {
init_list(kmem_cache, &initkmem_list3[CACHE_CACHE + nid], nid); init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
init_list(malloc_sizes[INDEX_AC].cs_cachep, init_list(kmalloc_caches[INDEX_AC],
&initkmem_list3[SIZE_AC + nid], nid); &init_kmem_cache_node[SIZE_AC + nid], nid);
if (INDEX_AC != INDEX_L3) { if (INDEX_AC != INDEX_NODE) {
init_list(malloc_sizes[INDEX_L3].cs_cachep, init_list(kmalloc_caches[INDEX_NODE],
&initkmem_list3[SIZE_L3 + nid], nid); &init_kmem_cache_node[SIZE_NODE + nid], nid);
} }
} }
} }
slab_state = UP; create_kmalloc_caches(ARCH_KMALLOC_FLAGS);
} }
void __init kmem_cache_init_late(void) void __init kmem_cache_init_late(void)
...@@ -1773,7 +1650,7 @@ void __init kmem_cache_init_late(void) ...@@ -1773,7 +1650,7 @@ void __init kmem_cache_init_late(void)
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
/* /*
* Register a memory hotplug callback that initializes and frees * Register a memory hotplug callback that initializes and frees
* nodelists. * node.
*/ */
hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
#endif #endif
...@@ -1803,7 +1680,7 @@ __initcall(cpucache_init); ...@@ -1803,7 +1680,7 @@ __initcall(cpucache_init);
static noinline void static noinline void
slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
{ {
struct kmem_list3 *l3; struct kmem_cache_node *n;
struct slab *slabp; struct slab *slabp;
unsigned long flags; unsigned long flags;
int node; int node;
...@@ -1818,24 +1695,24 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) ...@@ -1818,24 +1695,24 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
unsigned long active_objs = 0, num_objs = 0, free_objects = 0; unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
unsigned long active_slabs = 0, num_slabs = 0; unsigned long active_slabs = 0, num_slabs = 0;
l3 = cachep->nodelists[node]; n = cachep->node[node];
if (!l3) if (!n)
continue; continue;
spin_lock_irqsave(&l3->list_lock, flags); spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(slabp, &l3->slabs_full, list) { list_for_each_entry(slabp, &n->slabs_full, list) {
active_objs += cachep->num; active_objs += cachep->num;
active_slabs++; active_slabs++;
} }
list_for_each_entry(slabp, &l3->slabs_partial, list) { list_for_each_entry(slabp, &n->slabs_partial, list) {
active_objs += slabp->inuse; active_objs += slabp->inuse;
active_slabs++; active_slabs++;
} }
list_for_each_entry(slabp, &l3->slabs_free, list) list_for_each_entry(slabp, &n->slabs_free, list)
num_slabs++; num_slabs++;
free_objects += l3->free_objects; free_objects += n->free_objects;
spin_unlock_irqrestore(&l3->list_lock, flags); spin_unlock_irqrestore(&n->list_lock, flags);
num_slabs += active_slabs; num_slabs += active_slabs;
num_objs = num_slabs * cachep->num; num_objs = num_slabs * cachep->num;
...@@ -2258,7 +2135,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) ...@@ -2258,7 +2135,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
if (slab_state == DOWN) { if (slab_state == DOWN) {
/* /*
* Note: Creation of first cache (kmem_cache). * Note: Creation of first cache (kmem_cache).
* The setup_list3s is taken care * The setup_node is taken care
* of by the caller of __kmem_cache_create * of by the caller of __kmem_cache_create
*/ */
cachep->array[smp_processor_id()] = &initarray_generic.cache; cachep->array[smp_processor_id()] = &initarray_generic.cache;
...@@ -2272,13 +2149,13 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) ...@@ -2272,13 +2149,13 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
cachep->array[smp_processor_id()] = &initarray_generic.cache; cachep->array[smp_processor_id()] = &initarray_generic.cache;
/* /*
* If the cache that's used by kmalloc(sizeof(kmem_list3)) is * If the cache that's used by kmalloc(sizeof(kmem_cache_node)) is
* the second cache, then we need to set up all its list3s, * the second cache, then we need to set up all its node/,
* otherwise the creation of further caches will BUG(). * otherwise the creation of further caches will BUG().
*/ */
set_up_list3s(cachep, SIZE_AC); set_up_node(cachep, SIZE_AC);
if (INDEX_AC == INDEX_L3) if (INDEX_AC == INDEX_NODE)
slab_state = PARTIAL_L3; slab_state = PARTIAL_NODE;
else else
slab_state = PARTIAL_ARRAYCACHE; slab_state = PARTIAL_ARRAYCACHE;
} else { } else {
...@@ -2287,20 +2164,20 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) ...@@ -2287,20 +2164,20 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
kmalloc(sizeof(struct arraycache_init), gfp); kmalloc(sizeof(struct arraycache_init), gfp);
if (slab_state == PARTIAL_ARRAYCACHE) { if (slab_state == PARTIAL_ARRAYCACHE) {
set_up_list3s(cachep, SIZE_L3); set_up_node(cachep, SIZE_NODE);
slab_state = PARTIAL_L3; slab_state = PARTIAL_NODE;
} else { } else {
int node; int node;
for_each_online_node(node) { for_each_online_node(node) {
cachep->nodelists[node] = cachep->node[node] =
kmalloc_node(sizeof(struct kmem_list3), kmalloc_node(sizeof(struct kmem_cache_node),
gfp, node); gfp, node);
BUG_ON(!cachep->nodelists[node]); BUG_ON(!cachep->node[node]);
kmem_list3_init(cachep->nodelists[node]); kmem_cache_node_init(cachep->node[node]);
} }
} }
} }
cachep->nodelists[numa_mem_id()]->next_reap = cachep->node[numa_mem_id()]->next_reap =
jiffies + REAPTIMEOUT_LIST3 + jiffies + REAPTIMEOUT_LIST3 +
((unsigned long)cachep) % REAPTIMEOUT_LIST3; ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
...@@ -2403,7 +2280,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) ...@@ -2403,7 +2280,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
else else
gfp = GFP_NOWAIT; gfp = GFP_NOWAIT;
setup_nodelists_pointer(cachep); setup_node_pointer(cachep);
#if DEBUG #if DEBUG
/* /*
...@@ -2426,7 +2303,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) ...@@ -2426,7 +2303,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
size += BYTES_PER_WORD; size += BYTES_PER_WORD;
} }
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
if (size >= malloc_sizes[INDEX_L3 + 1].cs_size if (size >= kmalloc_size(INDEX_NODE + 1)
&& cachep->object_size > cache_line_size() && cachep->object_size > cache_line_size()
&& ALIGN(size, cachep->align) < PAGE_SIZE) { && ALIGN(size, cachep->align) < PAGE_SIZE) {
cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align); cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
...@@ -2497,7 +2374,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) ...@@ -2497,7 +2374,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
cachep->reciprocal_buffer_size = reciprocal_value(size); cachep->reciprocal_buffer_size = reciprocal_value(size);
if (flags & CFLGS_OFF_SLAB) { if (flags & CFLGS_OFF_SLAB) {
cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); cachep->slabp_cache = kmalloc_slab(slab_size, 0u);
/* /*
* This is a possibility for one of the malloc_sizes caches. * This is a possibility for one of the malloc_sizes caches.
* But since we go off slab only for object size greater than * But since we go off slab only for object size greater than
...@@ -2543,7 +2420,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep) ...@@ -2543,7 +2420,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
{ {
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
check_irq_off(); check_irq_off();
assert_spin_locked(&cachep->nodelists[numa_mem_id()]->list_lock); assert_spin_locked(&cachep->node[numa_mem_id()]->list_lock);
#endif #endif
} }
...@@ -2551,7 +2428,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) ...@@ -2551,7 +2428,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
{ {
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
check_irq_off(); check_irq_off();
assert_spin_locked(&cachep->nodelists[node]->list_lock); assert_spin_locked(&cachep->node[node]->list_lock);
#endif #endif
} }
...@@ -2562,7 +2439,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) ...@@ -2562,7 +2439,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
#define check_spinlock_acquired_node(x, y) do { } while(0) #define check_spinlock_acquired_node(x, y) do { } while(0)
#endif #endif
static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
struct array_cache *ac, struct array_cache *ac,
int force, int node); int force, int node);
...@@ -2574,29 +2451,29 @@ static void do_drain(void *arg) ...@@ -2574,29 +2451,29 @@ static void do_drain(void *arg)
check_irq_off(); check_irq_off();
ac = cpu_cache_get(cachep); ac = cpu_cache_get(cachep);
spin_lock(&cachep->nodelists[node]->list_lock); spin_lock(&cachep->node[node]->list_lock);
free_block(cachep, ac->entry, ac->avail, node); free_block(cachep, ac->entry, ac->avail, node);
spin_unlock(&cachep->nodelists[node]->list_lock); spin_unlock(&cachep->node[node]->list_lock);
ac->avail = 0; ac->avail = 0;
} }
static void drain_cpu_caches(struct kmem_cache *cachep) static void drain_cpu_caches(struct kmem_cache *cachep)
{ {
struct kmem_list3 *l3; struct kmem_cache_node *n;
int node; int node;
on_each_cpu(do_drain, cachep, 1); on_each_cpu(do_drain, cachep, 1);
check_irq_on(); check_irq_on();
for_each_online_node(node) { for_each_online_node(node) {
l3 = cachep->nodelists[node]; n = cachep->node[node];
if (l3 && l3->alien) if (n && n->alien)
drain_alien_cache(cachep, l3->alien); drain_alien_cache(cachep, n->alien);
} }
for_each_online_node(node) { for_each_online_node(node) {
l3 = cachep->nodelists[node]; n = cachep->node[node];
if (l3) if (n)
drain_array(cachep, l3, l3->shared, 1, node); drain_array(cachep, n, n->shared, 1, node);
} }
} }
...@@ -2607,19 +2484,19 @@ static void drain_cpu_caches(struct kmem_cache *cachep) ...@@ -2607,19 +2484,19 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
* Returns the actual number of slabs released. * Returns the actual number of slabs released.
*/ */
static int drain_freelist(struct kmem_cache *cache, static int drain_freelist(struct kmem_cache *cache,
struct kmem_list3 *l3, int tofree) struct kmem_cache_node *n, int tofree)
{ {
struct list_head *p; struct list_head *p;
int nr_freed; int nr_freed;
struct slab *slabp; struct slab *slabp;
nr_freed = 0; nr_freed = 0;
while (nr_freed < tofree && !list_empty(&l3->slabs_free)) { while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
spin_lock_irq(&l3->list_lock); spin_lock_irq(&n->list_lock);
p = l3->slabs_free.prev; p = n->slabs_free.prev;
if (p == &l3->slabs_free) { if (p == &n->slabs_free) {
spin_unlock_irq(&l3->list_lock); spin_unlock_irq(&n->list_lock);
goto out; goto out;
} }
...@@ -2632,8 +2509,8 @@ static int drain_freelist(struct kmem_cache *cache, ...@@ -2632,8 +2509,8 @@ static int drain_freelist(struct kmem_cache *cache,
* Safe to drop the lock. The slab is no longer linked * Safe to drop the lock. The slab is no longer linked
* to the cache. * to the cache.
*/ */
l3->free_objects -= cache->num; n->free_objects -= cache->num;
spin_unlock_irq(&l3->list_lock); spin_unlock_irq(&n->list_lock);
slab_destroy(cache, slabp); slab_destroy(cache, slabp);
nr_freed++; nr_freed++;
} }
...@@ -2645,20 +2522,20 @@ static int drain_freelist(struct kmem_cache *cache, ...@@ -2645,20 +2522,20 @@ static int drain_freelist(struct kmem_cache *cache,
static int __cache_shrink(struct kmem_cache *cachep) static int __cache_shrink(struct kmem_cache *cachep)
{ {
int ret = 0, i = 0; int ret = 0, i = 0;
struct kmem_list3 *l3; struct kmem_cache_node *n;
drain_cpu_caches(cachep); drain_cpu_caches(cachep);
check_irq_on(); check_irq_on();
for_each_online_node(i) { for_each_online_node(i) {
l3 = cachep->nodelists[i]; n = cachep->node[i];
if (!l3) if (!n)
continue; continue;
drain_freelist(cachep, l3, l3->free_objects); drain_freelist(cachep, n, n->free_objects);
ret += !list_empty(&l3->slabs_full) || ret += !list_empty(&n->slabs_full) ||
!list_empty(&l3->slabs_partial); !list_empty(&n->slabs_partial);
} }
return (ret ? 1 : 0); return (ret ? 1 : 0);
} }
...@@ -2687,7 +2564,7 @@ EXPORT_SYMBOL(kmem_cache_shrink); ...@@ -2687,7 +2564,7 @@ EXPORT_SYMBOL(kmem_cache_shrink);
int __kmem_cache_shutdown(struct kmem_cache *cachep) int __kmem_cache_shutdown(struct kmem_cache *cachep)
{ {
int i; int i;
struct kmem_list3 *l3; struct kmem_cache_node *n;
int rc = __cache_shrink(cachep); int rc = __cache_shrink(cachep);
if (rc) if (rc)
...@@ -2696,13 +2573,13 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep) ...@@ -2696,13 +2573,13 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
for_each_online_cpu(i) for_each_online_cpu(i)
kfree(cachep->array[i]); kfree(cachep->array[i]);
/* NUMA: free the list3 structures */ /* NUMA: free the node structures */
for_each_online_node(i) { for_each_online_node(i) {
l3 = cachep->nodelists[i]; n = cachep->node[i];
if (l3) { if (n) {
kfree(l3->shared); kfree(n->shared);
free_alien_cache(l3->alien); free_alien_cache(n->alien);
kfree(l3); kfree(n);
} }
} }
return 0; return 0;
...@@ -2884,7 +2761,7 @@ static int cache_grow(struct kmem_cache *cachep, ...@@ -2884,7 +2761,7 @@ static int cache_grow(struct kmem_cache *cachep,
struct slab *slabp; struct slab *slabp;
size_t offset; size_t offset;
gfp_t local_flags; gfp_t local_flags;
struct kmem_list3 *l3; struct kmem_cache_node *n;
/* /*
* Be lazy and only check for valid flags here, keeping it out of the * Be lazy and only check for valid flags here, keeping it out of the
...@@ -2893,17 +2770,17 @@ static int cache_grow(struct kmem_cache *cachep, ...@@ -2893,17 +2770,17 @@ static int cache_grow(struct kmem_cache *cachep,
BUG_ON(flags & GFP_SLAB_BUG_MASK); BUG_ON(flags & GFP_SLAB_BUG_MASK);
local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
/* Take the l3 list lock to change the colour_next on this node */ /* Take the node list lock to change the colour_next on this node */
check_irq_off(); check_irq_off();
l3 = cachep->nodelists[nodeid]; n = cachep->node[nodeid];
spin_lock(&l3->list_lock); spin_lock(&n->list_lock);
/* Get colour for the slab, and cal the next value. */ /* Get colour for the slab, and cal the next value. */
offset = l3->colour_next; offset = n->colour_next;
l3->colour_next++; n->colour_next++;
if (l3->colour_next >= cachep->colour) if (n->colour_next >= cachep->colour)
l3->colour_next = 0; n->colour_next = 0;
spin_unlock(&l3->list_lock); spin_unlock(&n->list_lock);
offset *= cachep->colour_off; offset *= cachep->colour_off;
...@@ -2940,13 +2817,13 @@ static int cache_grow(struct kmem_cache *cachep, ...@@ -2940,13 +2817,13 @@ static int cache_grow(struct kmem_cache *cachep,
if (local_flags & __GFP_WAIT) if (local_flags & __GFP_WAIT)
local_irq_disable(); local_irq_disable();
check_irq_off(); check_irq_off();
spin_lock(&l3->list_lock); spin_lock(&n->list_lock);
/* Make slab active. */ /* Make slab active. */
list_add_tail(&slabp->list, &(l3->slabs_free)); list_add_tail(&slabp->list, &(n->slabs_free));
STATS_INC_GROWN(cachep); STATS_INC_GROWN(cachep);
l3->free_objects += cachep->num; n->free_objects += cachep->num;
spin_unlock(&l3->list_lock); spin_unlock(&n->list_lock);
return 1; return 1;
opps1: opps1:
kmem_freepages(cachep, objp); kmem_freepages(cachep, objp);
...@@ -3074,7 +2951,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, ...@@ -3074,7 +2951,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
bool force_refill) bool force_refill)
{ {
int batchcount; int batchcount;
struct kmem_list3 *l3; struct kmem_cache_node *n;
struct array_cache *ac; struct array_cache *ac;
int node; int node;
...@@ -3093,14 +2970,14 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, ...@@ -3093,14 +2970,14 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
*/ */
batchcount = BATCHREFILL_LIMIT; batchcount = BATCHREFILL_LIMIT;
} }
l3 = cachep->nodelists[node]; n = cachep->node[node];
BUG_ON(ac->avail > 0 || !l3); BUG_ON(ac->avail > 0 || !n);
spin_lock(&l3->list_lock); spin_lock(&n->list_lock);
/* See if we can refill from the shared array */ /* See if we can refill from the shared array */
if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) { if (n->shared && transfer_objects(ac, n->shared, batchcount)) {
l3->shared->touched = 1; n->shared->touched = 1;
goto alloc_done; goto alloc_done;
} }
...@@ -3108,11 +2985,11 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, ...@@ -3108,11 +2985,11 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
struct list_head *entry; struct list_head *entry;
struct slab *slabp; struct slab *slabp;
/* Get slab alloc is to come from. */ /* Get slab alloc is to come from. */
entry = l3->slabs_partial.next; entry = n->slabs_partial.next;
if (entry == &l3->slabs_partial) { if (entry == &n->slabs_partial) {
l3->free_touched = 1; n->free_touched = 1;
entry = l3->slabs_free.next; entry = n->slabs_free.next;
if (entry == &l3->slabs_free) if (entry == &n->slabs_free)
goto must_grow; goto must_grow;
} }
...@@ -3140,15 +3017,15 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, ...@@ -3140,15 +3017,15 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
/* move slabp to correct slabp list: */ /* move slabp to correct slabp list: */
list_del(&slabp->list); list_del(&slabp->list);
if (slabp->free == BUFCTL_END) if (slabp->free == BUFCTL_END)
list_add(&slabp->list, &l3->slabs_full); list_add(&slabp->list, &n->slabs_full);
else else
list_add(&slabp->list, &l3->slabs_partial); list_add(&slabp->list, &n->slabs_partial);
} }
must_grow: must_grow:
l3->free_objects -= ac->avail; n->free_objects -= ac->avail;
alloc_done: alloc_done:
spin_unlock(&l3->list_lock); spin_unlock(&n->list_lock);
if (unlikely(!ac->avail)) { if (unlikely(!ac->avail)) {
int x; int x;
...@@ -3315,7 +3192,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) ...@@ -3315,7 +3192,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
/* /*
* Fallback function if there was no memory available and no objects on a * Fallback function if there was no memory available and no objects on a
* certain node and fall back is permitted. First we scan all the * certain node and fall back is permitted. First we scan all the
* available nodelists for available objects. If that fails then we * available node for available objects. If that fails then we
* perform an allocation without specifying a node. This allows the page * perform an allocation without specifying a node. This allows the page
* allocator to do its reclaim / fallback magic. We then insert the * allocator to do its reclaim / fallback magic. We then insert the
* slab into the proper nodelist and then allocate from it. * slab into the proper nodelist and then allocate from it.
...@@ -3349,8 +3226,8 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) ...@@ -3349,8 +3226,8 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
nid = zone_to_nid(zone); nid = zone_to_nid(zone);
if (cpuset_zone_allowed_hardwall(zone, flags) && if (cpuset_zone_allowed_hardwall(zone, flags) &&
cache->nodelists[nid] && cache->node[nid] &&
cache->nodelists[nid]->free_objects) { cache->node[nid]->free_objects) {
obj = ____cache_alloc_node(cache, obj = ____cache_alloc_node(cache,
flags | GFP_THISNODE, nid); flags | GFP_THISNODE, nid);
if (obj) if (obj)
...@@ -3406,21 +3283,22 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, ...@@ -3406,21 +3283,22 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
{ {
struct list_head *entry; struct list_head *entry;
struct slab *slabp; struct slab *slabp;
struct kmem_list3 *l3; struct kmem_cache_node *n;
void *obj; void *obj;
int x; int x;
l3 = cachep->nodelists[nodeid]; VM_BUG_ON(nodeid > num_online_nodes());
BUG_ON(!l3); n = cachep->node[nodeid];
BUG_ON(!n);
retry: retry:
check_irq_off(); check_irq_off();
spin_lock(&l3->list_lock); spin_lock(&n->list_lock);
entry = l3->slabs_partial.next; entry = n->slabs_partial.next;
if (entry == &l3->slabs_partial) { if (entry == &n->slabs_partial) {
l3->free_touched = 1; n->free_touched = 1;
entry = l3->slabs_free.next; entry = n->slabs_free.next;
if (entry == &l3->slabs_free) if (entry == &n->slabs_free)
goto must_grow; goto must_grow;
} }
...@@ -3436,20 +3314,20 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, ...@@ -3436,20 +3314,20 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
obj = slab_get_obj(cachep, slabp, nodeid); obj = slab_get_obj(cachep, slabp, nodeid);
check_slabp(cachep, slabp); check_slabp(cachep, slabp);
l3->free_objects--; n->free_objects--;
/* move slabp to correct slabp list: */ /* move slabp to correct slabp list: */
list_del(&slabp->list); list_del(&slabp->list);
if (slabp->free == BUFCTL_END) if (slabp->free == BUFCTL_END)
list_add(&slabp->list, &l3->slabs_full); list_add(&slabp->list, &n->slabs_full);
else else
list_add(&slabp->list, &l3->slabs_partial); list_add(&slabp->list, &n->slabs_partial);
spin_unlock(&l3->list_lock); spin_unlock(&n->list_lock);
goto done; goto done;
must_grow: must_grow:
spin_unlock(&l3->list_lock); spin_unlock(&n->list_lock);
x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL); x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
if (x) if (x)
goto retry; goto retry;
...@@ -3495,7 +3373,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, ...@@ -3495,7 +3373,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
if (nodeid == NUMA_NO_NODE) if (nodeid == NUMA_NO_NODE)
nodeid = slab_node; nodeid = slab_node;
if (unlikely(!cachep->nodelists[nodeid])) { if (unlikely(!cachep->node[nodeid])) {
/* Node not bootstrapped yet */ /* Node not bootstrapped yet */
ptr = fallback_alloc(cachep, flags); ptr = fallback_alloc(cachep, flags);
goto out; goto out;
...@@ -3601,7 +3479,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, ...@@ -3601,7 +3479,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
int node) int node)
{ {
int i; int i;
struct kmem_list3 *l3; struct kmem_cache_node *n;
for (i = 0; i < nr_objects; i++) { for (i = 0; i < nr_objects; i++) {
void *objp; void *objp;
...@@ -3611,19 +3489,19 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, ...@@ -3611,19 +3489,19 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
objp = objpp[i]; objp = objpp[i];
slabp = virt_to_slab(objp); slabp = virt_to_slab(objp);
l3 = cachep->nodelists[node]; n = cachep->node[node];
list_del(&slabp->list); list_del(&slabp->list);
check_spinlock_acquired_node(cachep, node); check_spinlock_acquired_node(cachep, node);
check_slabp(cachep, slabp); check_slabp(cachep, slabp);
slab_put_obj(cachep, slabp, objp, node); slab_put_obj(cachep, slabp, objp, node);
STATS_DEC_ACTIVE(cachep); STATS_DEC_ACTIVE(cachep);
l3->free_objects++; n->free_objects++;
check_slabp(cachep, slabp); check_slabp(cachep, slabp);
/* fixup slab chains */ /* fixup slab chains */
if (slabp->inuse == 0) { if (slabp->inuse == 0) {
if (l3->free_objects > l3->free_limit) { if (n->free_objects > n->free_limit) {
l3->free_objects -= cachep->num; n->free_objects -= cachep->num;
/* No need to drop any previously held /* No need to drop any previously held
* lock here, even if we have a off-slab slab * lock here, even if we have a off-slab slab
* descriptor it is guaranteed to come from * descriptor it is guaranteed to come from
...@@ -3632,14 +3510,14 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, ...@@ -3632,14 +3510,14 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
*/ */
slab_destroy(cachep, slabp); slab_destroy(cachep, slabp);
} else { } else {
list_add(&slabp->list, &l3->slabs_free); list_add(&slabp->list, &n->slabs_free);
} }
} else { } else {
/* Unconditionally move a slab to the end of the /* Unconditionally move a slab to the end of the
* partial list on free - maximum time for the * partial list on free - maximum time for the
* other objects to be freed, too. * other objects to be freed, too.
*/ */
list_add_tail(&slabp->list, &l3->slabs_partial); list_add_tail(&slabp->list, &n->slabs_partial);
} }
} }
} }
...@@ -3647,7 +3525,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, ...@@ -3647,7 +3525,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
{ {
int batchcount; int batchcount;
struct kmem_list3 *l3; struct kmem_cache_node *n;
int node = numa_mem_id(); int node = numa_mem_id();
batchcount = ac->batchcount; batchcount = ac->batchcount;
...@@ -3655,10 +3533,10 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) ...@@ -3655,10 +3533,10 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
BUG_ON(!batchcount || batchcount > ac->avail); BUG_ON(!batchcount || batchcount > ac->avail);
#endif #endif
check_irq_off(); check_irq_off();
l3 = cachep->nodelists[node]; n = cachep->node[node];
spin_lock(&l3->list_lock); spin_lock(&n->list_lock);
if (l3->shared) { if (n->shared) {
struct array_cache *shared_array = l3->shared; struct array_cache *shared_array = n->shared;
int max = shared_array->limit - shared_array->avail; int max = shared_array->limit - shared_array->avail;
if (max) { if (max) {
if (batchcount > max) if (batchcount > max)
...@@ -3677,8 +3555,8 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) ...@@ -3677,8 +3555,8 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
int i = 0; int i = 0;
struct list_head *p; struct list_head *p;
p = l3->slabs_free.next; p = n->slabs_free.next;
while (p != &(l3->slabs_free)) { while (p != &(n->slabs_free)) {
struct slab *slabp; struct slab *slabp;
slabp = list_entry(p, struct slab, list); slabp = list_entry(p, struct slab, list);
...@@ -3690,7 +3568,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) ...@@ -3690,7 +3568,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
STATS_SET_FREEABLE(cachep, i); STATS_SET_FREEABLE(cachep, i);
} }
#endif #endif
spin_unlock(&l3->list_lock); spin_unlock(&n->list_lock);
ac->avail -= batchcount; ac->avail -= batchcount;
memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
} }
...@@ -3800,7 +3678,7 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller) ...@@ -3800,7 +3678,7 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
{ {
struct kmem_cache *cachep; struct kmem_cache *cachep;
cachep = kmem_find_general_cachep(size, flags); cachep = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep))) if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep; return cachep;
return kmem_cache_alloc_node_trace(cachep, flags, node, size); return kmem_cache_alloc_node_trace(cachep, flags, node, size);
...@@ -3845,7 +3723,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, ...@@ -3845,7 +3723,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
* Then kmalloc uses the uninlined functions instead of the inline * Then kmalloc uses the uninlined functions instead of the inline
* functions. * functions.
*/ */
cachep = __find_general_cachep(size, flags); cachep = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep))) if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep; return cachep;
ret = slab_alloc(cachep, flags, caller); ret = slab_alloc(cachep, flags, caller);
...@@ -3934,12 +3812,12 @@ void kfree(const void *objp) ...@@ -3934,12 +3812,12 @@ void kfree(const void *objp)
EXPORT_SYMBOL(kfree); EXPORT_SYMBOL(kfree);
/* /*
* This initializes kmem_list3 or resizes various caches for all nodes. * This initializes kmem_cache_node or resizes various caches for all nodes.
*/ */
static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
{ {
int node; int node;
struct kmem_list3 *l3; struct kmem_cache_node *n;
struct array_cache *new_shared; struct array_cache *new_shared;
struct array_cache **new_alien = NULL; struct array_cache **new_alien = NULL;
...@@ -3962,43 +3840,43 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) ...@@ -3962,43 +3840,43 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
} }
} }
l3 = cachep->nodelists[node]; n = cachep->node[node];
if (l3) { if (n) {
struct array_cache *shared = l3->shared; struct array_cache *shared = n->shared;
spin_lock_irq(&l3->list_lock); spin_lock_irq(&n->list_lock);
if (shared) if (shared)
free_block(cachep, shared->entry, free_block(cachep, shared->entry,
shared->avail, node); shared->avail, node);
l3->shared = new_shared; n->shared = new_shared;
if (!l3->alien) { if (!n->alien) {
l3->alien = new_alien; n->alien = new_alien;
new_alien = NULL; new_alien = NULL;
} }
l3->free_limit = (1 + nr_cpus_node(node)) * n->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num; cachep->batchcount + cachep->num;
spin_unlock_irq(&l3->list_lock); spin_unlock_irq(&n->list_lock);
kfree(shared); kfree(shared);
free_alien_cache(new_alien); free_alien_cache(new_alien);
continue; continue;
} }
l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node); n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
if (!l3) { if (!n) {
free_alien_cache(new_alien); free_alien_cache(new_alien);
kfree(new_shared); kfree(new_shared);
goto fail; goto fail;
} }
kmem_list3_init(l3); kmem_cache_node_init(n);
l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + n->next_reap = jiffies + REAPTIMEOUT_LIST3 +
((unsigned long)cachep) % REAPTIMEOUT_LIST3; ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
l3->shared = new_shared; n->shared = new_shared;
l3->alien = new_alien; n->alien = new_alien;
l3->free_limit = (1 + nr_cpus_node(node)) * n->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num; cachep->batchcount + cachep->num;
cachep->nodelists[node] = l3; cachep->node[node] = n;
} }
return 0; return 0;
...@@ -4007,13 +3885,13 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) ...@@ -4007,13 +3885,13 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
/* Cache is not active yet. Roll back what we did */ /* Cache is not active yet. Roll back what we did */
node--; node--;
while (node >= 0) { while (node >= 0) {
if (cachep->nodelists[node]) { if (cachep->node[node]) {
l3 = cachep->nodelists[node]; n = cachep->node[node];
kfree(l3->shared); kfree(n->shared);
free_alien_cache(l3->alien); free_alien_cache(n->alien);
kfree(l3); kfree(n);
cachep->nodelists[node] = NULL; cachep->node[node] = NULL;
} }
node--; node--;
} }
...@@ -4073,9 +3951,9 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit, ...@@ -4073,9 +3951,9 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
struct array_cache *ccold = new->new[i]; struct array_cache *ccold = new->new[i];
if (!ccold) if (!ccold)
continue; continue;
spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock); spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i)); free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock); spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
kfree(ccold); kfree(ccold);
} }
kfree(new); kfree(new);
...@@ -4176,11 +4054,11 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) ...@@ -4176,11 +4054,11 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
} }
/* /*
* Drain an array if it contains any elements taking the l3 lock only if * Drain an array if it contains any elements taking the node lock only if
* necessary. Note that the l3 listlock also protects the array_cache * necessary. Note that the node listlock also protects the array_cache
* if drain_array() is used on the shared array. * if drain_array() is used on the shared array.
*/ */
static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
struct array_cache *ac, int force, int node) struct array_cache *ac, int force, int node)
{ {
int tofree; int tofree;
...@@ -4190,7 +4068,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, ...@@ -4190,7 +4068,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
if (ac->touched && !force) { if (ac->touched && !force) {
ac->touched = 0; ac->touched = 0;
} else { } else {
spin_lock_irq(&l3->list_lock); spin_lock_irq(&n->list_lock);
if (ac->avail) { if (ac->avail) {
tofree = force ? ac->avail : (ac->limit + 4) / 5; tofree = force ? ac->avail : (ac->limit + 4) / 5;
if (tofree > ac->avail) if (tofree > ac->avail)
...@@ -4200,7 +4078,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, ...@@ -4200,7 +4078,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
memmove(ac->entry, &(ac->entry[tofree]), memmove(ac->entry, &(ac->entry[tofree]),
sizeof(void *) * ac->avail); sizeof(void *) * ac->avail);
} }
spin_unlock_irq(&l3->list_lock); spin_unlock_irq(&n->list_lock);
} }
} }
...@@ -4219,7 +4097,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, ...@@ -4219,7 +4097,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
static void cache_reap(struct work_struct *w) static void cache_reap(struct work_struct *w)
{ {
struct kmem_cache *searchp; struct kmem_cache *searchp;
struct kmem_list3 *l3; struct kmem_cache_node *n;
int node = numa_mem_id(); int node = numa_mem_id();
struct delayed_work *work = to_delayed_work(w); struct delayed_work *work = to_delayed_work(w);
...@@ -4231,33 +4109,33 @@ static void cache_reap(struct work_struct *w) ...@@ -4231,33 +4109,33 @@ static void cache_reap(struct work_struct *w)
check_irq_on(); check_irq_on();
/* /*
* We only take the l3 lock if absolutely necessary and we * We only take the node lock if absolutely necessary and we
* have established with reasonable certainty that * have established with reasonable certainty that
* we can do some work if the lock was obtained. * we can do some work if the lock was obtained.
*/ */
l3 = searchp->nodelists[node]; n = searchp->node[node];
reap_alien(searchp, l3); reap_alien(searchp, n);
drain_array(searchp, l3, cpu_cache_get(searchp), 0, node); drain_array(searchp, n, cpu_cache_get(searchp), 0, node);
/* /*
* These are racy checks but it does not matter * These are racy checks but it does not matter
* if we skip one check or scan twice. * if we skip one check or scan twice.
*/ */
if (time_after(l3->next_reap, jiffies)) if (time_after(n->next_reap, jiffies))
goto next; goto next;
l3->next_reap = jiffies + REAPTIMEOUT_LIST3; n->next_reap = jiffies + REAPTIMEOUT_LIST3;
drain_array(searchp, l3, l3->shared, 0, node); drain_array(searchp, n, n->shared, 0, node);
if (l3->free_touched) if (n->free_touched)
l3->free_touched = 0; n->free_touched = 0;
else { else {
int freed; int freed;
freed = drain_freelist(searchp, l3, (l3->free_limit + freed = drain_freelist(searchp, n, (n->free_limit +
5 * searchp->num - 1) / (5 * searchp->num)); 5 * searchp->num - 1) / (5 * searchp->num));
STATS_ADD_REAPED(searchp, freed); STATS_ADD_REAPED(searchp, freed);
} }
...@@ -4283,25 +4161,25 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) ...@@ -4283,25 +4161,25 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
const char *name; const char *name;
char *error = NULL; char *error = NULL;
int node; int node;
struct kmem_list3 *l3; struct kmem_cache_node *n;
active_objs = 0; active_objs = 0;
num_slabs = 0; num_slabs = 0;
for_each_online_node(node) { for_each_online_node(node) {
l3 = cachep->nodelists[node]; n = cachep->node[node];
if (!l3) if (!n)
continue; continue;
check_irq_on(); check_irq_on();
spin_lock_irq(&l3->list_lock); spin_lock_irq(&n->list_lock);
list_for_each_entry(slabp, &l3->slabs_full, list) { list_for_each_entry(slabp, &n->slabs_full, list) {
if (slabp->inuse != cachep->num && !error) if (slabp->inuse != cachep->num && !error)
error = "slabs_full accounting error"; error = "slabs_full accounting error";
active_objs += cachep->num; active_objs += cachep->num;
active_slabs++; active_slabs++;
} }
list_for_each_entry(slabp, &l3->slabs_partial, list) { list_for_each_entry(slabp, &n->slabs_partial, list) {
if (slabp->inuse == cachep->num && !error) if (slabp->inuse == cachep->num && !error)
error = "slabs_partial inuse accounting error"; error = "slabs_partial inuse accounting error";
if (!slabp->inuse && !error) if (!slabp->inuse && !error)
...@@ -4309,16 +4187,16 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) ...@@ -4309,16 +4187,16 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
active_objs += slabp->inuse; active_objs += slabp->inuse;
active_slabs++; active_slabs++;
} }
list_for_each_entry(slabp, &l3->slabs_free, list) { list_for_each_entry(slabp, &n->slabs_free, list) {
if (slabp->inuse && !error) if (slabp->inuse && !error)
error = "slabs_free/inuse accounting error"; error = "slabs_free/inuse accounting error";
num_slabs++; num_slabs++;
} }
free_objects += l3->free_objects; free_objects += n->free_objects;
if (l3->shared) if (n->shared)
shared_avail += l3->shared->avail; shared_avail += n->shared->avail;
spin_unlock_irq(&l3->list_lock); spin_unlock_irq(&n->list_lock);
} }
num_slabs += active_slabs; num_slabs += active_slabs;
num_objs = num_slabs * cachep->num; num_objs = num_slabs * cachep->num;
...@@ -4344,7 +4222,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) ...@@ -4344,7 +4222,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *cachep) void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *cachep)
{ {
#if STATS #if STATS
{ /* list3 stats */ { /* node stats */
unsigned long high = cachep->high_mark; unsigned long high = cachep->high_mark;
unsigned long allocs = cachep->num_allocations; unsigned long allocs = cachep->num_allocations;
unsigned long grown = cachep->grown; unsigned long grown = cachep->grown;
...@@ -4497,9 +4375,9 @@ static int leaks_show(struct seq_file *m, void *p) ...@@ -4497,9 +4375,9 @@ static int leaks_show(struct seq_file *m, void *p)
{ {
struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
struct slab *slabp; struct slab *slabp;
struct kmem_list3 *l3; struct kmem_cache_node *n;
const char *name; const char *name;
unsigned long *n = m->private; unsigned long *x = m->private;
int node; int node;
int i; int i;
...@@ -4510,43 +4388,43 @@ static int leaks_show(struct seq_file *m, void *p) ...@@ -4510,43 +4388,43 @@ static int leaks_show(struct seq_file *m, void *p)
/* OK, we can do it */ /* OK, we can do it */
n[1] = 0; x[1] = 0;
for_each_online_node(node) { for_each_online_node(node) {
l3 = cachep->nodelists[node]; n = cachep->node[node];
if (!l3) if (!n)
continue; continue;
check_irq_on(); check_irq_on();
spin_lock_irq(&l3->list_lock); spin_lock_irq(&n->list_lock);
list_for_each_entry(slabp, &l3->slabs_full, list) list_for_each_entry(slabp, &n->slabs_full, list)
handle_slab(n, cachep, slabp); handle_slab(x, cachep, slabp);
list_for_each_entry(slabp, &l3->slabs_partial, list) list_for_each_entry(slabp, &n->slabs_partial, list)
handle_slab(n, cachep, slabp); handle_slab(x, cachep, slabp);
spin_unlock_irq(&l3->list_lock); spin_unlock_irq(&n->list_lock);
} }
name = cachep->name; name = cachep->name;
if (n[0] == n[1]) { if (x[0] == x[1]) {
/* Increase the buffer size */ /* Increase the buffer size */
mutex_unlock(&slab_mutex); mutex_unlock(&slab_mutex);
m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL); m->private = kzalloc(x[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
if (!m->private) { if (!m->private) {
/* Too bad, we are really out */ /* Too bad, we are really out */
m->private = n; m->private = x;
mutex_lock(&slab_mutex); mutex_lock(&slab_mutex);
return -ENOMEM; return -ENOMEM;
} }
*(unsigned long *)m->private = n[0] * 2; *(unsigned long *)m->private = x[0] * 2;
kfree(n); kfree(x);
mutex_lock(&slab_mutex); mutex_lock(&slab_mutex);
/* Now make sure this entry will be retried */ /* Now make sure this entry will be retried */
m->count = m->size; m->count = m->size;
return 0; return 0;
} }
for (i = 0; i < n[1]; i++) { for (i = 0; i < x[1]; i++) {
seq_printf(m, "%s: %lu ", name, n[2*i+3]); seq_printf(m, "%s: %lu ", name, x[2*i+3]);
show_symbol(m, n[2*i+2]); show_symbol(m, x[2*i+2]);
seq_putc(m, '\n'); seq_putc(m, '\n');
} }
......
...@@ -16,7 +16,7 @@ enum slab_state { ...@@ -16,7 +16,7 @@ enum slab_state {
DOWN, /* No slab functionality yet */ DOWN, /* No slab functionality yet */
PARTIAL, /* SLUB: kmem_cache_node available */ PARTIAL, /* SLUB: kmem_cache_node available */
PARTIAL_ARRAYCACHE, /* SLAB: kmalloc size for arraycache available */ PARTIAL_ARRAYCACHE, /* SLAB: kmalloc size for arraycache available */
PARTIAL_L3, /* SLAB: kmalloc size for l3 struct available */ PARTIAL_NODE, /* SLAB: kmalloc size for node struct available */
UP, /* Slab caches usable but not all extras yet */ UP, /* Slab caches usable but not all extras yet */
FULL /* Everything is working */ FULL /* Everything is working */
}; };
...@@ -35,6 +35,15 @@ extern struct kmem_cache *kmem_cache; ...@@ -35,6 +35,15 @@ extern struct kmem_cache *kmem_cache;
unsigned long calculate_alignment(unsigned long flags, unsigned long calculate_alignment(unsigned long flags,
unsigned long align, unsigned long size); unsigned long align, unsigned long size);
#ifndef CONFIG_SLOB
/* Kmalloc array related functions */
void create_kmalloc_caches(unsigned long);
/* Find the kmalloc slab corresponding for a certain size */
struct kmem_cache *kmalloc_slab(size_t, gfp_t);
#endif
/* Functions provided by the slab allocators */ /* Functions provided by the slab allocators */
extern int __kmem_cache_create(struct kmem_cache *, unsigned long flags); extern int __kmem_cache_create(struct kmem_cache *, unsigned long flags);
...@@ -230,3 +239,35 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) ...@@ -230,3 +239,35 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
return s; return s;
} }
#endif #endif
/*
* The slab lists for all objects.
*/
struct kmem_cache_node {
spinlock_t list_lock;
#ifdef CONFIG_SLAB
struct list_head slabs_partial; /* partial list first, better asm code */
struct list_head slabs_full;
struct list_head slabs_free;
unsigned long free_objects;
unsigned int free_limit;
unsigned int colour_next; /* Per-node cache coloring */
struct array_cache *shared; /* shared per node */
struct array_cache **alien; /* on other nodes */
unsigned long next_reap; /* updated without locking */
int free_touched; /* updated without locking */
#endif
#ifdef CONFIG_SLUB
unsigned long nr_partial;
struct list_head partial;
#ifdef CONFIG_SLUB_DEBUG
atomic_long_t nr_slabs;
atomic_long_t total_objects;
struct list_head full;
#endif
#endif
};
...@@ -299,7 +299,7 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t siz ...@@ -299,7 +299,7 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t siz
err = __kmem_cache_create(s, flags); err = __kmem_cache_create(s, flags);
if (err) if (err)
panic("Creation of kmalloc slab %s size=%zd failed. Reason %d\n", panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n",
name, size, err); name, size, err);
s->refcount = -1; /* Exempt from merging for now */ s->refcount = -1; /* Exempt from merging for now */
...@@ -319,6 +319,178 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, ...@@ -319,6 +319,178 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
return s; return s;
} }
struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
EXPORT_SYMBOL(kmalloc_caches);
#ifdef CONFIG_ZONE_DMA
struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
EXPORT_SYMBOL(kmalloc_dma_caches);
#endif
/*
* Conversion table for small slabs sizes / 8 to the index in the
* kmalloc array. This is necessary for slabs < 192 since we have non power
* of two cache sizes there. The size of larger slabs can be determined using
* fls.
*/
static s8 size_index[24] = {
3, /* 8 */
4, /* 16 */
5, /* 24 */
5, /* 32 */
6, /* 40 */
6, /* 48 */
6, /* 56 */
6, /* 64 */
1, /* 72 */
1, /* 80 */
1, /* 88 */
1, /* 96 */
7, /* 104 */
7, /* 112 */
7, /* 120 */
7, /* 128 */
2, /* 136 */
2, /* 144 */
2, /* 152 */
2, /* 160 */
2, /* 168 */
2, /* 176 */
2, /* 184 */
2 /* 192 */
};
static inline int size_index_elem(size_t bytes)
{
return (bytes - 1) / 8;
}
/*
* Find the kmem_cache structure that serves a given size of
* allocation
*/
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
{
int index;
if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE))
return NULL;
if (size <= 192) {
if (!size)
return ZERO_SIZE_PTR;
index = size_index[size_index_elem(size)];
} else
index = fls(size - 1);
#ifdef CONFIG_ZONE_DMA
if (unlikely((flags & GFP_DMA)))
return kmalloc_dma_caches[index];
#endif
return kmalloc_caches[index];
}
/*
* Create the kmalloc array. Some of the regular kmalloc arrays
* may already have been created because they were needed to
* enable allocations for slab creation.
*/
void __init create_kmalloc_caches(unsigned long flags)
{
int i;
/*
* Patch up the size_index table if we have strange large alignment
* requirements for the kmalloc array. This is only the case for
* MIPS it seems. The standard arches will not generate any code here.
*
* Largest permitted alignment is 256 bytes due to the way we
* handle the index determination for the smaller caches.
*
* Make sure that nothing crazy happens if someone starts tinkering
* around with ARCH_KMALLOC_MINALIGN
*/
BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
(KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
int elem = size_index_elem(i);
if (elem >= ARRAY_SIZE(size_index))
break;
size_index[elem] = KMALLOC_SHIFT_LOW;
}
if (KMALLOC_MIN_SIZE >= 64) {
/*
* The 96 byte size cache is not used if the alignment
* is 64 byte.
*/
for (i = 64 + 8; i <= 96; i += 8)
size_index[size_index_elem(i)] = 7;
}
if (KMALLOC_MIN_SIZE >= 128) {
/*
* The 192 byte sized cache is not used if the alignment
* is 128 byte. Redirect kmalloc to use the 256 byte cache
* instead.
*/
for (i = 128 + 8; i <= 192; i += 8)
size_index[size_index_elem(i)] = 8;
}
for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
if (!kmalloc_caches[i]) {
kmalloc_caches[i] = create_kmalloc_cache(NULL,
1 << i, flags);
/*
* Caches that are not of the two-to-the-power-of size.
* These have to be created immediately after the
* earlier power of two caches
*/
if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
kmalloc_caches[1] = create_kmalloc_cache(NULL, 96, flags);
if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
kmalloc_caches[2] = create_kmalloc_cache(NULL, 192, flags);
}
}
/* Kmalloc array is now usable */
slab_state = UP;
for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
struct kmem_cache *s = kmalloc_caches[i];
char *n;
if (s) {
n = kasprintf(GFP_NOWAIT, "kmalloc-%d", kmalloc_size(i));
BUG_ON(!n);
s->name = n;
}
}
#ifdef CONFIG_ZONE_DMA
for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
struct kmem_cache *s = kmalloc_caches[i];
if (s) {
int size = kmalloc_size(i);
char *n = kasprintf(GFP_NOWAIT,
"dma-kmalloc-%d", size);
BUG_ON(!n);
kmalloc_dma_caches[i] = create_kmalloc_cache(n,
size, SLAB_CACHE_DMA | flags);
}
}
#endif
}
#endif /* !CONFIG_SLOB */ #endif /* !CONFIG_SLOB */
......
...@@ -1006,7 +1006,7 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) ...@@ -1006,7 +1006,7 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
* dilemma by deferring the increment of the count during * dilemma by deferring the increment of the count during
* bootstrap (see early_kmem_cache_node_alloc). * bootstrap (see early_kmem_cache_node_alloc).
*/ */
if (n) { if (likely(n)) {
atomic_long_inc(&n->nr_slabs); atomic_long_inc(&n->nr_slabs);
atomic_long_add(objects, &n->total_objects); atomic_long_add(objects, &n->total_objects);
} }
...@@ -1494,7 +1494,7 @@ static inline void remove_partial(struct kmem_cache_node *n, ...@@ -1494,7 +1494,7 @@ static inline void remove_partial(struct kmem_cache_node *n,
*/ */
static inline void *acquire_slab(struct kmem_cache *s, static inline void *acquire_slab(struct kmem_cache *s,
struct kmem_cache_node *n, struct page *page, struct kmem_cache_node *n, struct page *page,
int mode) int mode, int *objects)
{ {
void *freelist; void *freelist;
unsigned long counters; unsigned long counters;
...@@ -1508,6 +1508,7 @@ static inline void *acquire_slab(struct kmem_cache *s, ...@@ -1508,6 +1508,7 @@ static inline void *acquire_slab(struct kmem_cache *s,
freelist = page->freelist; freelist = page->freelist;
counters = page->counters; counters = page->counters;
new.counters = counters; new.counters = counters;
*objects = new.objects - new.inuse;
if (mode) { if (mode) {
new.inuse = page->objects; new.inuse = page->objects;
new.freelist = NULL; new.freelist = NULL;
...@@ -1529,7 +1530,7 @@ static inline void *acquire_slab(struct kmem_cache *s, ...@@ -1529,7 +1530,7 @@ static inline void *acquire_slab(struct kmem_cache *s,
return freelist; return freelist;
} }
static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags); static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
/* /*
...@@ -1540,6 +1541,8 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, ...@@ -1540,6 +1541,8 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
{ {
struct page *page, *page2; struct page *page, *page2;
void *object = NULL; void *object = NULL;
int available = 0;
int objects;
/* /*
* Racy check. If we mistakenly see no partial slabs then we * Racy check. If we mistakenly see no partial slabs then we
...@@ -1553,22 +1556,21 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, ...@@ -1553,22 +1556,21 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
spin_lock(&n->list_lock); spin_lock(&n->list_lock);
list_for_each_entry_safe(page, page2, &n->partial, lru) { list_for_each_entry_safe(page, page2, &n->partial, lru) {
void *t; void *t;
int available;
if (!pfmemalloc_match(page, flags)) if (!pfmemalloc_match(page, flags))
continue; continue;
t = acquire_slab(s, n, page, object == NULL); t = acquire_slab(s, n, page, object == NULL, &objects);
if (!t) if (!t)
break; break;
available += objects;
if (!object) { if (!object) {
c->page = page; c->page = page;
stat(s, ALLOC_FROM_PARTIAL); stat(s, ALLOC_FROM_PARTIAL);
object = t; object = t;
available = page->objects - page->inuse;
} else { } else {
available = put_cpu_partial(s, page, 0); put_cpu_partial(s, page, 0);
stat(s, CPU_PARTIAL_NODE); stat(s, CPU_PARTIAL_NODE);
} }
if (kmem_cache_debug(s) || available > s->cpu_partial / 2) if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
...@@ -1947,7 +1949,7 @@ static void unfreeze_partials(struct kmem_cache *s, ...@@ -1947,7 +1949,7 @@ static void unfreeze_partials(struct kmem_cache *s,
* If we did not find a slot then simply move all the partials to the * If we did not find a slot then simply move all the partials to the
* per node partial list. * per node partial list.
*/ */
static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
{ {
struct page *oldpage; struct page *oldpage;
int pages; int pages;
...@@ -1985,7 +1987,6 @@ static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) ...@@ -1985,7 +1987,6 @@ static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
page->next = oldpage; page->next = oldpage;
} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
return pobjects;
} }
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
...@@ -2042,7 +2043,7 @@ static void flush_all(struct kmem_cache *s) ...@@ -2042,7 +2043,7 @@ static void flush_all(struct kmem_cache *s)
static inline int node_match(struct page *page, int node) static inline int node_match(struct page *page, int node)
{ {
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
if (node != NUMA_NO_NODE && page_to_nid(page) != node) if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node))
return 0; return 0;
#endif #endif
return 1; return 1;
...@@ -2332,13 +2333,18 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, ...@@ -2332,13 +2333,18 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
s = memcg_kmem_get_cache(s, gfpflags); s = memcg_kmem_get_cache(s, gfpflags);
redo: redo:
/* /*
* Must read kmem_cache cpu data via this cpu ptr. Preemption is * Must read kmem_cache cpu data via this cpu ptr. Preemption is
* enabled. We may switch back and forth between cpus while * enabled. We may switch back and forth between cpus while
* reading from one cpu area. That does not matter as long * reading from one cpu area. That does not matter as long
* as we end up on the original cpu again when doing the cmpxchg. * as we end up on the original cpu again when doing the cmpxchg.
*
* Preemption is disabled for the retrieval of the tid because that
* must occur from the current processor. We cannot allow rescheduling
* on a different processor between the determination of the pointer
* and the retrieval of the tid.
*/ */
preempt_disable();
c = __this_cpu_ptr(s->cpu_slab); c = __this_cpu_ptr(s->cpu_slab);
/* /*
...@@ -2348,7 +2354,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, ...@@ -2348,7 +2354,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
* linked list in between. * linked list in between.
*/ */
tid = c->tid; tid = c->tid;
barrier(); preempt_enable();
object = c->freelist; object = c->freelist;
page = c->page; page = c->page;
...@@ -2595,10 +2601,11 @@ static __always_inline void slab_free(struct kmem_cache *s, ...@@ -2595,10 +2601,11 @@ static __always_inline void slab_free(struct kmem_cache *s,
* data is retrieved via this pointer. If we are on the same cpu * data is retrieved via this pointer. If we are on the same cpu
* during the cmpxchg then the free will succedd. * during the cmpxchg then the free will succedd.
*/ */
preempt_disable();
c = __this_cpu_ptr(s->cpu_slab); c = __this_cpu_ptr(s->cpu_slab);
tid = c->tid; tid = c->tid;
barrier(); preempt_enable();
if (likely(page == c->page)) { if (likely(page == c->page)) {
set_freepointer(s, object, c->freelist); set_freepointer(s, object, c->freelist);
...@@ -2776,7 +2783,7 @@ init_kmem_cache_node(struct kmem_cache_node *n) ...@@ -2776,7 +2783,7 @@ init_kmem_cache_node(struct kmem_cache_node *n)
static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
{ {
BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
/* /*
* Must align to double word boundary for the double cmpxchg * Must align to double word boundary for the double cmpxchg
...@@ -2983,7 +2990,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) ...@@ -2983,7 +2990,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
s->allocflags |= __GFP_COMP; s->allocflags |= __GFP_COMP;
if (s->flags & SLAB_CACHE_DMA) if (s->flags & SLAB_CACHE_DMA)
s->allocflags |= SLUB_DMA; s->allocflags |= GFP_DMA;
if (s->flags & SLAB_RECLAIM_ACCOUNT) if (s->flags & SLAB_RECLAIM_ACCOUNT)
s->allocflags |= __GFP_RECLAIMABLE; s->allocflags |= __GFP_RECLAIMABLE;
...@@ -3175,13 +3182,6 @@ int __kmem_cache_shutdown(struct kmem_cache *s) ...@@ -3175,13 +3182,6 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
* Kmalloc subsystem * Kmalloc subsystem
*******************************************************************/ *******************************************************************/
struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
EXPORT_SYMBOL(kmalloc_caches);
#ifdef CONFIG_ZONE_DMA
static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
#endif
static int __init setup_slub_min_order(char *str) static int __init setup_slub_min_order(char *str)
{ {
get_option(&str, &slub_min_order); get_option(&str, &slub_min_order);
...@@ -3218,73 +3218,15 @@ static int __init setup_slub_nomerge(char *str) ...@@ -3218,73 +3218,15 @@ static int __init setup_slub_nomerge(char *str)
__setup("slub_nomerge", setup_slub_nomerge); __setup("slub_nomerge", setup_slub_nomerge);
/*
* Conversion table for small slabs sizes / 8 to the index in the
* kmalloc array. This is necessary for slabs < 192 since we have non power
* of two cache sizes there. The size of larger slabs can be determined using
* fls.
*/
static s8 size_index[24] = {
3, /* 8 */
4, /* 16 */
5, /* 24 */
5, /* 32 */
6, /* 40 */
6, /* 48 */
6, /* 56 */
6, /* 64 */
1, /* 72 */
1, /* 80 */
1, /* 88 */
1, /* 96 */
7, /* 104 */
7, /* 112 */
7, /* 120 */
7, /* 128 */
2, /* 136 */
2, /* 144 */
2, /* 152 */
2, /* 160 */
2, /* 168 */
2, /* 176 */
2, /* 184 */
2 /* 192 */
};
static inline int size_index_elem(size_t bytes)
{
return (bytes - 1) / 8;
}
static struct kmem_cache *get_slab(size_t size, gfp_t flags)
{
int index;
if (size <= 192) {
if (!size)
return ZERO_SIZE_PTR;
index = size_index[size_index_elem(size)];
} else
index = fls(size - 1);
#ifdef CONFIG_ZONE_DMA
if (unlikely((flags & SLUB_DMA)))
return kmalloc_dma_caches[index];
#endif
return kmalloc_caches[index];
}
void *__kmalloc(size_t size, gfp_t flags) void *__kmalloc(size_t size, gfp_t flags)
{ {
struct kmem_cache *s; struct kmem_cache *s;
void *ret; void *ret;
if (unlikely(size > SLUB_MAX_SIZE)) if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
return kmalloc_large(size, flags); return kmalloc_large(size, flags);
s = get_slab(size, flags); s = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(s))) if (unlikely(ZERO_OR_NULL_PTR(s)))
return s; return s;
...@@ -3317,7 +3259,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) ...@@ -3317,7 +3259,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
struct kmem_cache *s; struct kmem_cache *s;
void *ret; void *ret;
if (unlikely(size > SLUB_MAX_SIZE)) { if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = kmalloc_large_node(size, flags, node); ret = kmalloc_large_node(size, flags, node);
trace_kmalloc_node(_RET_IP_, ret, trace_kmalloc_node(_RET_IP_, ret,
...@@ -3327,7 +3269,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) ...@@ -3327,7 +3269,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
return ret; return ret;
} }
s = get_slab(size, flags); s = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(s))) if (unlikely(ZERO_OR_NULL_PTR(s)))
return s; return s;
...@@ -3620,6 +3562,12 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache) ...@@ -3620,6 +3562,12 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
memcpy(s, static_cache, kmem_cache->object_size); memcpy(s, static_cache, kmem_cache->object_size);
/*
* This runs very early, and only the boot processor is supposed to be
* up. Even if it weren't true, IRQs are not up so we couldn't fire
* IPIs around.
*/
__flush_cpu_slab(s, smp_processor_id());
for_each_node_state(node, N_NORMAL_MEMORY) { for_each_node_state(node, N_NORMAL_MEMORY) {
struct kmem_cache_node *n = get_node(s, node); struct kmem_cache_node *n = get_node(s, node);
struct page *p; struct page *p;
...@@ -3642,8 +3590,6 @@ void __init kmem_cache_init(void) ...@@ -3642,8 +3590,6 @@ void __init kmem_cache_init(void)
{ {
static __initdata struct kmem_cache boot_kmem_cache, static __initdata struct kmem_cache boot_kmem_cache,
boot_kmem_cache_node; boot_kmem_cache_node;
int i;
int caches = 2;
if (debug_guardpage_minorder()) if (debug_guardpage_minorder())
slub_max_order = 0; slub_max_order = 0;
...@@ -3674,103 +3620,16 @@ void __init kmem_cache_init(void) ...@@ -3674,103 +3620,16 @@ void __init kmem_cache_init(void)
kmem_cache_node = bootstrap(&boot_kmem_cache_node); kmem_cache_node = bootstrap(&boot_kmem_cache_node);
/* Now we can use the kmem_cache to allocate kmalloc slabs */ /* Now we can use the kmem_cache to allocate kmalloc slabs */
create_kmalloc_caches(0);
/*
* Patch up the size_index table if we have strange large alignment
* requirements for the kmalloc array. This is only the case for
* MIPS it seems. The standard arches will not generate any code here.
*
* Largest permitted alignment is 256 bytes due to the way we
* handle the index determination for the smaller caches.
*
* Make sure that nothing crazy happens if someone starts tinkering
* around with ARCH_KMALLOC_MINALIGN
*/
BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
(KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
int elem = size_index_elem(i);
if (elem >= ARRAY_SIZE(size_index))
break;
size_index[elem] = KMALLOC_SHIFT_LOW;
}
if (KMALLOC_MIN_SIZE == 64) {
/*
* The 96 byte size cache is not used if the alignment
* is 64 byte.
*/
for (i = 64 + 8; i <= 96; i += 8)
size_index[size_index_elem(i)] = 7;
} else if (KMALLOC_MIN_SIZE == 128) {
/*
* The 192 byte sized cache is not used if the alignment
* is 128 byte. Redirect kmalloc to use the 256 byte cache
* instead.
*/
for (i = 128 + 8; i <= 192; i += 8)
size_index[size_index_elem(i)] = 8;
}
/* Caches that are not of the two-to-the-power-of size */
if (KMALLOC_MIN_SIZE <= 32) {
kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);
caches++;
}
if (KMALLOC_MIN_SIZE <= 64) {
kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);
caches++;
}
for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);
caches++;
}
slab_state = UP;
/* Provide the correct kmalloc names now that the caches are up */
if (KMALLOC_MIN_SIZE <= 32) {
kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
BUG_ON(!kmalloc_caches[1]->name);
}
if (KMALLOC_MIN_SIZE <= 64) {
kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
BUG_ON(!kmalloc_caches[2]->name);
}
for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
BUG_ON(!s);
kmalloc_caches[i]->name = s;
}
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
register_cpu_notifier(&slab_notifier); register_cpu_notifier(&slab_notifier);
#endif #endif
#ifdef CONFIG_ZONE_DMA
for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
struct kmem_cache *s = kmalloc_caches[i];
if (s && s->size) {
char *name = kasprintf(GFP_NOWAIT,
"dma-kmalloc-%d", s->object_size);
BUG_ON(!name);
kmalloc_dma_caches[i] = create_kmalloc_cache(name,
s->object_size, SLAB_CACHE_DMA);
}
}
#endif
printk(KERN_INFO printk(KERN_INFO
"SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," "SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d,"
" CPUs=%d, Nodes=%d\n", " CPUs=%d, Nodes=%d\n",
caches, cache_line_size(), cache_line_size(),
slub_min_order, slub_max_order, slub_min_objects, slub_min_order, slub_max_order, slub_min_objects,
nr_cpu_ids, nr_node_ids); nr_cpu_ids, nr_node_ids);
} }
...@@ -3933,10 +3792,10 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) ...@@ -3933,10 +3792,10 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
struct kmem_cache *s; struct kmem_cache *s;
void *ret; void *ret;
if (unlikely(size > SLUB_MAX_SIZE)) if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
return kmalloc_large(size, gfpflags); return kmalloc_large(size, gfpflags);
s = get_slab(size, gfpflags); s = kmalloc_slab(size, gfpflags);
if (unlikely(ZERO_OR_NULL_PTR(s))) if (unlikely(ZERO_OR_NULL_PTR(s)))
return s; return s;
...@@ -3956,7 +3815,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, ...@@ -3956,7 +3815,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
struct kmem_cache *s; struct kmem_cache *s;
void *ret; void *ret;
if (unlikely(size > SLUB_MAX_SIZE)) { if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = kmalloc_large_node(size, gfpflags, node); ret = kmalloc_large_node(size, gfpflags, node);
trace_kmalloc_node(caller, ret, trace_kmalloc_node(caller, ret,
...@@ -3966,7 +3825,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, ...@@ -3966,7 +3825,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
return ret; return ret;
} }
s = get_slab(size, gfpflags); s = kmalloc_slab(size, gfpflags);
if (unlikely(ZERO_OR_NULL_PTR(s))) if (unlikely(ZERO_OR_NULL_PTR(s)))
return s; return s;
...@@ -4315,7 +4174,7 @@ static void resiliency_test(void) ...@@ -4315,7 +4174,7 @@ static void resiliency_test(void)
{ {
u8 *p; u8 *p;
BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10); BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
printk(KERN_ERR "SLUB resiliency testing\n"); printk(KERN_ERR "SLUB resiliency testing\n");
printk(KERN_ERR "-----------------------\n"); printk(KERN_ERR "-----------------------\n");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment