Commit e0c22e53 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] Move slab objects to the end of the real allocation

From: Manfred Spraul <manfred@colorfullife.com>

The real memory allocation is usually larger than the actual object size:
either due to L1 cache line padding, or due to page padding with
CONFIG_DEBUG_PAGEALLOC.  Right now objects are placed to the beginning of
the real allocation, but to trigger bugs it's better to move objects to the
end of the real allocation: that way accesses behind the end of the
allocation have a larger chance of hitting the (unmapped) next page.  The
attached patch moves the objects to align them with the end of the real
allocation.

Actually it contains 4 seperate changes:

- Do not page-pad allocations that are <= SMP_CACHE_LINE_SIZE.  This
  crashes.  Right now the limit is hardcoded to 128 bytes, but sooner or
  later an arch will appear with 256 byte cache lines.

- cleanup: redzone bytes are not accessed with inline helper functions,
  instead of magic offsets scattered throughout slab.c

- main change: move objects to the end of the allocation - trivial after
  the cleanup.

- Print old redzone value if a redzone mismatch happens: This makes it
  simpler to figure out what happened [single bit error, wrong redzone
  code, overwritten]
parent d6dbfa23
...@@ -293,6 +293,10 @@ struct kmem_cache_s { ...@@ -293,6 +293,10 @@ struct kmem_cache_s {
atomic_t freehit; atomic_t freehit;
atomic_t freemiss; atomic_t freemiss;
#endif #endif
#if DEBUG
int dbghead;
int reallen;
#endif
}; };
#define CFLGS_OFF_SLAB (0x80000000UL) #define CFLGS_OFF_SLAB (0x80000000UL)
...@@ -356,32 +360,68 @@ struct kmem_cache_s { ...@@ -356,32 +360,68 @@ struct kmem_cache_s {
#define POISON_AFTER 0x6b /* for use-after-free poisoning */ #define POISON_AFTER 0x6b /* for use-after-free poisoning */
#define POISON_END 0xa5 /* end-byte of poisoning */ #define POISON_END 0xa5 /* end-byte of poisoning */
/* memory layout of objects:
* 0 : objp
* 0 .. cachep->dbghead - BYTES_PER_WORD - 1: padding. This ensures that
* the end of an object is aligned with the end of the real
* allocation. Catches writes behind the end of the allocation.
* cachep->dbghead - BYTES_PER_WORD .. cachep->dbghead - 1:
* redzone word.
* cachep->dbghead: The real object.
* cachep->objsize - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
* cachep->objsize - 1* BYTES_PER_WORD: last caller address [BYTES_PER_WORD long]
*/
static inline int obj_dbghead(kmem_cache_t *cachep) static inline int obj_dbghead(kmem_cache_t *cachep)
{ {
if (cachep->flags & SLAB_RED_ZONE) return cachep->dbghead;
return BYTES_PER_WORD;
return 0;
} }
static inline int obj_dbglen(kmem_cache_t *cachep) static inline int obj_reallen(kmem_cache_t *cachep)
{ {
int len = 0; return cachep->reallen;
}
if (cachep->flags & SLAB_RED_ZONE) { static unsigned long *dbg_redzone1(kmem_cache_t *cachep, void *objp)
len += 2*BYTES_PER_WORD; {
} BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
if (cachep->flags & SLAB_STORE_USER) { return (unsigned long*) (objp+obj_dbghead(cachep)-BYTES_PER_WORD);
len += BYTES_PER_WORD; }
}
return len; static unsigned long *dbg_redzone2(kmem_cache_t *cachep, void *objp)
{
BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
if (cachep->flags & SLAB_STORE_USER)
return (unsigned long*) (objp+cachep->objsize-2*BYTES_PER_WORD);
return (unsigned long*) (objp+cachep->objsize-BYTES_PER_WORD);
}
static void **dbg_userword(kmem_cache_t *cachep, void *objp)
{
BUG_ON(!(cachep->flags & SLAB_STORE_USER));
return (void**)(objp+cachep->objsize-BYTES_PER_WORD);
} }
#else #else
static inline int obj_dbghead(kmem_cache_t *cachep) static inline int obj_dbghead(kmem_cache_t *cachep)
{ {
return 0; return 0;
} }
static inline int obj_dbglen(kmem_cache_t *cachep) static inline int obj_reallen(kmem_cache_t *cachep)
{
return cachep->objsize;
}
static inline unsigned long *dbg_redzone1(kmem_cache_t *cachep, void *objp)
{
BUG();
return 0;
}
static inline unsigned long *dbg_redzone2(kmem_cache_t *cachep, void *objp)
{ {
BUG();
return 0;
}
static inline void **dbg_userword(kmem_cache_t *cachep, void *objp)
{
BUG();
return 0; return 0;
} }
#endif #endif
...@@ -804,7 +844,7 @@ static inline void kmem_freepages (kmem_cache_t *cachep, void *addr) ...@@ -804,7 +844,7 @@ static inline void kmem_freepages (kmem_cache_t *cachep, void *addr)
#ifdef CONFIG_DEBUG_PAGEALLOC #ifdef CONFIG_DEBUG_PAGEALLOC
static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, unsigned long caller) static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, unsigned long caller)
{ {
int size = cachep->objsize-obj_dbglen(cachep); int size = obj_reallen(cachep);
addr = (unsigned long *)&((char*)addr)[obj_dbghead(cachep)]; addr = (unsigned long *)&((char*)addr)[obj_dbghead(cachep)];
...@@ -836,7 +876,7 @@ static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, unsigned ...@@ -836,7 +876,7 @@ static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, unsigned
static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val) static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val)
{ {
int size = cachep->objsize-obj_dbglen(cachep); int size = obj_reallen(cachep);
addr = &((char*)addr)[obj_dbghead(cachep)]; addr = &((char*)addr)[obj_dbghead(cachep)];
memset(addr, val, size); memset(addr, val, size);
...@@ -858,47 +898,42 @@ static void *scan_poisoned_obj(unsigned char* addr, unsigned int size) ...@@ -858,47 +898,42 @@ static void *scan_poisoned_obj(unsigned char* addr, unsigned int size)
return NULL; return NULL;
} }
static void check_poison_obj(kmem_cache_t *cachep, void *addr) static void check_poison_obj(kmem_cache_t *cachep, void *objp)
{ {
void *end; void *end;
int size = cachep->objsize-obj_dbglen(cachep); void *realobj;
int size = obj_reallen(cachep);
addr = &((char*)addr)[obj_dbghead(cachep)]; realobj = objp+obj_dbghead(cachep);
end = scan_poisoned_obj(addr, size); end = scan_poisoned_obj(realobj, size);
if (end) { if (end) {
int s; int s;
printk(KERN_ERR "Slab corruption: start=%p, expend=%p, " printk(KERN_ERR "Slab corruption: start=%p, expend=%p, "
"problemat=%p\n", addr, addr+size-1, end); "problemat=%p\n", realobj, realobj+size-1, end);
if (cachep->flags & SLAB_STORE_USER) { if (cachep->flags & SLAB_STORE_USER) {
void *pc; printk(KERN_ERR "Last user: [<%p>]", *dbg_userword(cachep, objp));
print_symbol("(%s)", (unsigned long)*dbg_userword(cachep, objp));
if (cachep->flags & SLAB_RED_ZONE)
pc = *(void**)(addr+size+BYTES_PER_WORD);
else
pc = *(void**)(addr+size);
printk(KERN_ERR "Last user: [<%p>]", pc);
print_symbol("(%s)", (unsigned long)pc);
printk("\n"); printk("\n");
} }
printk(KERN_ERR "Data: "); printk(KERN_ERR "Data: ");
for (s = 0; s < size; s++) { for (s = 0; s < size; s++) {
if (((char*)addr)[s] == POISON_BEFORE) if (((char*)realobj)[s] == POISON_BEFORE)
printk("."); printk(".");
else if (((char*)addr)[s] == POISON_AFTER) else if (((char*)realobj)[s] == POISON_AFTER)
printk("*"); printk("*");
else else
printk("%02X ", ((unsigned char*)addr)[s]); printk("%02X ", ((unsigned char*)realobj)[s]);
} }
printk("\n"); printk("\n");
printk(KERN_ERR "Next: "); printk(KERN_ERR "Next: ");
for (; s < size + 32; s++) { for (; s < size + 32; s++) {
if (((char*)addr)[s] == POISON_BEFORE) if (((char*)realobj)[s] == POISON_BEFORE)
printk("."); printk(".");
else if (((char*)addr)[s] == POISON_AFTER) else if (((char*)realobj)[s] == POISON_AFTER)
printk("*"); printk("*");
else else
printk("%02X ", ((unsigned char*)addr)[s]); printk("%02X ", ((unsigned char*)realobj)[s]);
} }
printk("\n"); printk("\n");
slab_error(cachep, "object was modified after freeing"); slab_error(cachep, "object was modified after freeing");
...@@ -916,7 +951,6 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp) ...@@ -916,7 +951,6 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp)
int i; int i;
for (i = 0; i < cachep->num; i++) { for (i = 0; i < cachep->num; i++) {
void *objp = slabp->s_mem + cachep->objsize * i; void *objp = slabp->s_mem + cachep->objsize * i;
int objlen = cachep->objsize;
if (cachep->flags & SLAB_POISON) { if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC #ifdef CONFIG_DEBUG_PAGEALLOC
...@@ -928,21 +962,16 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp) ...@@ -928,21 +962,16 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp)
check_poison_obj(cachep, objp); check_poison_obj(cachep, objp);
#endif #endif
} }
if (cachep->flags & SLAB_STORE_USER)
objlen -= BYTES_PER_WORD;
if (cachep->flags & SLAB_RED_ZONE) { if (cachep->flags & SLAB_RED_ZONE) {
if (*((unsigned long*)(objp)) != RED_INACTIVE) if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
slab_error(cachep, "start of a freed object " slab_error(cachep, "start of a freed object "
"was overwritten"); "was overwritten");
if (*((unsigned long*)(objp + objlen - BYTES_PER_WORD)) if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
!= RED_INACTIVE)
slab_error(cachep, "end of a freed object " slab_error(cachep, "end of a freed object "
"was overwritten"); "was overwritten");
objp += BYTES_PER_WORD;
} }
if (cachep->dtor && !(cachep->flags & SLAB_POISON)) if (cachep->dtor && !(cachep->flags & SLAB_POISON))
(cachep->dtor)(objp, cachep, 0); (cachep->dtor)(objp+obj_dbghead(cachep), cachep, 0);
} }
#else #else
if (cachep->dtor) { if (cachep->dtor) {
...@@ -1020,10 +1049,6 @@ kmem_cache_create (const char *name, size_t size, size_t offset, ...@@ -1020,10 +1049,6 @@ kmem_cache_create (const char *name, size_t size, size_t offset,
} }
#if FORCED_DEBUG #if FORCED_DEBUG
#ifdef CONFIG_DEBUG_PAGEALLOC
if (size < PAGE_SIZE-3*BYTES_PER_WORD && size > 128)
size = PAGE_SIZE-3*BYTES_PER_WORD;
#endif
/* /*
* Enable redzoning and last user accounting, except * Enable redzoning and last user accounting, except
* - for caches with forced alignment: redzoning would violate the * - for caches with forced alignment: redzoning would violate the
...@@ -1054,6 +1079,9 @@ kmem_cache_create (const char *name, size_t size, size_t offset, ...@@ -1054,6 +1079,9 @@ kmem_cache_create (const char *name, size_t size, size_t offset,
goto opps; goto opps;
memset(cachep, 0, sizeof(kmem_cache_t)); memset(cachep, 0, sizeof(kmem_cache_t));
#if DEBUG
cachep->reallen = size;
#endif
/* Check that size is in terms of words. This is needed to avoid /* Check that size is in terms of words. This is needed to avoid
* unaligned accesses for some archs when redzoning is used, and makes * unaligned accesses for some archs when redzoning is used, and makes
* sure any on-slab bufctl's are also correctly aligned. * sure any on-slab bufctl's are also correctly aligned.
...@@ -1071,12 +1099,20 @@ kmem_cache_create (const char *name, size_t size, size_t offset, ...@@ -1071,12 +1099,20 @@ kmem_cache_create (const char *name, size_t size, size_t offset,
* when redzoning. * when redzoning.
*/ */
flags &= ~SLAB_HWCACHE_ALIGN; flags &= ~SLAB_HWCACHE_ALIGN;
size += 2*BYTES_PER_WORD; /* words for redzone */ /* add space for red zone words */
cachep->dbghead += BYTES_PER_WORD;
size += 2*BYTES_PER_WORD;
} }
if (flags & SLAB_STORE_USER) { if (flags & SLAB_STORE_USER) {
flags &= ~SLAB_HWCACHE_ALIGN; flags &= ~SLAB_HWCACHE_ALIGN;
size += BYTES_PER_WORD; /* word for kfree caller address */ size += BYTES_PER_WORD; /* add space */
}
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
if (size > 128 && cachep->reallen > L1_CACHE_BYTES && size < PAGE_SIZE) {
cachep->dbghead += PAGE_SIZE - size;
size = PAGE_SIZE;
} }
#endif
#endif #endif
align = BYTES_PER_WORD; align = BYTES_PER_WORD;
if (flags & SLAB_HWCACHE_ALIGN) if (flags & SLAB_HWCACHE_ALIGN)
...@@ -1444,20 +1480,15 @@ static void cache_init_objs (kmem_cache_t * cachep, ...@@ -1444,20 +1480,15 @@ static void cache_init_objs (kmem_cache_t * cachep,
for (i = 0; i < cachep->num; i++) { for (i = 0; i < cachep->num; i++) {
void* objp = slabp->s_mem+cachep->objsize*i; void* objp = slabp->s_mem+cachep->objsize*i;
#if DEBUG #if DEBUG
int objlen = cachep->objsize;
/* need to poison the objs? */ /* need to poison the objs? */
if (cachep->flags & SLAB_POISON) if (cachep->flags & SLAB_POISON)
poison_obj(cachep, objp, POISON_BEFORE); poison_obj(cachep, objp, POISON_BEFORE);
if (cachep->flags & SLAB_STORE_USER) { if (cachep->flags & SLAB_STORE_USER)
objlen -= BYTES_PER_WORD; *dbg_userword(cachep, objp) = NULL;
((unsigned long*)(objp+objlen))[0] = 0;
}
if (cachep->flags & SLAB_RED_ZONE) { if (cachep->flags & SLAB_RED_ZONE) {
*((unsigned long*)(objp)) = RED_INACTIVE; *dbg_redzone1(cachep, objp) = RED_INACTIVE;
objp += BYTES_PER_WORD; *dbg_redzone2(cachep, objp) = RED_INACTIVE;
objlen -= 2* BYTES_PER_WORD;
*((unsigned long*)(objp + objlen)) = RED_INACTIVE;
} }
/* /*
* Constructors are not allowed to allocate memory from * Constructors are not allowed to allocate memory from
...@@ -1465,14 +1496,13 @@ static void cache_init_objs (kmem_cache_t * cachep, ...@@ -1465,14 +1496,13 @@ static void cache_init_objs (kmem_cache_t * cachep,
* Otherwise, deadlock. They must also be threaded. * Otherwise, deadlock. They must also be threaded.
*/ */
if (cachep->ctor && !(cachep->flags & SLAB_POISON)) if (cachep->ctor && !(cachep->flags & SLAB_POISON))
cachep->ctor(objp, cachep, ctor_flags); cachep->ctor(objp+obj_dbghead(cachep), cachep, ctor_flags);
if (cachep->flags & SLAB_RED_ZONE) { if (cachep->flags & SLAB_RED_ZONE) {
if (*((unsigned long*)(objp + objlen)) != RED_INACTIVE) if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
slab_error(cachep, "constructor overwrote the" slab_error(cachep, "constructor overwrote the"
" end of an object"); " end of an object");
objp -= BYTES_PER_WORD; if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
if (*((unsigned long*)(objp)) != RED_INACTIVE)
slab_error(cachep, "constructor overwrote the" slab_error(cachep, "constructor overwrote the"
" start of an object"); " start of an object");
} }
...@@ -1623,9 +1653,9 @@ static inline void *cache_free_debugcheck (kmem_cache_t * cachep, void * objp, v ...@@ -1623,9 +1653,9 @@ static inline void *cache_free_debugcheck (kmem_cache_t * cachep, void * objp, v
#if DEBUG #if DEBUG
struct page *page; struct page *page;
unsigned int objnr; unsigned int objnr;
int objlen = cachep->objsize;
struct slab *slabp; struct slab *slabp;
objp -= obj_dbghead(cachep);
kfree_debugcheck(objp); kfree_debugcheck(objp);
page = virt_to_page(objp); page = virt_to_page(objp);
...@@ -1638,21 +1668,18 @@ static inline void *cache_free_debugcheck (kmem_cache_t * cachep, void * objp, v ...@@ -1638,21 +1668,18 @@ static inline void *cache_free_debugcheck (kmem_cache_t * cachep, void * objp, v
} }
slabp = GET_PAGE_SLAB(page); slabp = GET_PAGE_SLAB(page);
if (cachep->flags & SLAB_STORE_USER) {
objlen -= BYTES_PER_WORD;
}
if (cachep->flags & SLAB_RED_ZONE) { if (cachep->flags & SLAB_RED_ZONE) {
objp -= BYTES_PER_WORD; if (*dbg_redzone1(cachep, objp) != RED_ACTIVE || *dbg_redzone2(cachep, objp) != RED_ACTIVE) {
if (xchg((unsigned long *)objp, RED_INACTIVE) != RED_ACTIVE) slab_error(cachep, "double free, or memory outside"
slab_error(cachep, "double free, or memory before"
" object was overwritten");
if (xchg((unsigned long *)(objp+objlen-BYTES_PER_WORD), RED_INACTIVE) != RED_ACTIVE)
slab_error(cachep, "double free, or memory after "
" object was overwritten"); " object was overwritten");
printk(KERN_ERR "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n",
objp, *dbg_redzone1(cachep, objp), *dbg_redzone2(cachep, objp));
} }
if (cachep->flags & SLAB_STORE_USER) { *dbg_redzone1(cachep, objp) = RED_INACTIVE;
*((void**)(objp+objlen)) = caller; *dbg_redzone2(cachep, objp) = RED_INACTIVE;
} }
if (cachep->flags & SLAB_STORE_USER)
*dbg_userword(cachep, objp) = caller;
objnr = (objp-slabp->s_mem)/cachep->objsize; objnr = (objp-slabp->s_mem)/cachep->objsize;
...@@ -1825,8 +1852,6 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep, ...@@ -1825,8 +1852,6 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep,
unsigned long flags, void *objp, void *caller) unsigned long flags, void *objp, void *caller)
{ {
#if DEBUG #if DEBUG
int objlen = cachep->objsize;
if (!objp) if (!objp)
return objp; return objp;
if (cachep->flags & SLAB_POISON) { if (cachep->flags & SLAB_POISON) {
...@@ -1840,24 +1865,20 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep, ...@@ -1840,24 +1865,20 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep,
#endif #endif
poison_obj(cachep, objp, POISON_BEFORE); poison_obj(cachep, objp, POISON_BEFORE);
} }
if (cachep->flags & SLAB_STORE_USER) { if (cachep->flags & SLAB_STORE_USER)
objlen -= BYTES_PER_WORD; *dbg_userword(cachep, objp) = caller;
*((void **)(objp+objlen)) = caller;
}
if (cachep->flags & SLAB_RED_ZONE) { if (cachep->flags & SLAB_RED_ZONE) {
/* Set alloc red-zone, and check old one. */ if (*dbg_redzone1(cachep, objp) != RED_INACTIVE || *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
if (xchg((unsigned long *)objp, RED_ACTIVE) != RED_INACTIVE) { slab_error(cachep, "double free, or memory outside"
slab_error(cachep, "memory before object was " " object was overwritten");
"overwritten"); printk(KERN_ERR "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n",
} objp, *dbg_redzone1(cachep, objp), *dbg_redzone2(cachep, objp));
if (xchg((unsigned long *)(objp+objlen - BYTES_PER_WORD),
RED_ACTIVE) != RED_INACTIVE) {
slab_error(cachep, "memory after object was "
"overwritten");
} }
objp += BYTES_PER_WORD; *dbg_redzone1(cachep, objp) = RED_ACTIVE;
*dbg_redzone2(cachep, objp) = RED_ACTIVE;
} }
objp += obj_dbghead(cachep);
if (cachep->ctor && cachep->flags & SLAB_POISON) { if (cachep->ctor && cachep->flags & SLAB_POISON) {
unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR; unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR;
...@@ -2175,7 +2196,7 @@ free_percpu(const void *objp) ...@@ -2175,7 +2196,7 @@ free_percpu(const void *objp)
unsigned int kmem_cache_size(kmem_cache_t *cachep) unsigned int kmem_cache_size(kmem_cache_t *cachep)
{ {
return cachep->objsize-obj_dbglen(cachep); return obj_reallen(cachep);
} }
kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags) kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
...@@ -2763,12 +2784,17 @@ void ptrinfo(unsigned long addr) ...@@ -2763,12 +2784,17 @@ void ptrinfo(unsigned long addr)
if (objnr >= c->num) { if (objnr >= c->num) {
printk("Bad obj number.\n"); printk("Bad obj number.\n");
} else { } else {
kernel_map_pages(virt_to_page(objp), c->objsize/PAGE_SIZE, 1); kernel_map_pages(virt_to_page(objp),
c->objsize/PAGE_SIZE, 1);
if (c->flags & SLAB_RED_ZONE)
printk("redzone: 0x%lx/0x%lx.\n",
*dbg_redzone1(c, objp),
*dbg_redzone2(c, objp));
printk("redzone: %lxh/%lxh/%lxh.\n", if (c->flags & SLAB_STORE_USER)
((unsigned long*)objp)[0], printk("Last user: %p.\n",
((unsigned long*)(objp+c->objsize))[-2], *dbg_userword(c, objp));
((unsigned long*)(objp+c->objsize))[-1]);
} }
spin_unlock_irqrestore(&c->spinlock, flags); spin_unlock_irqrestore(&c->spinlock, flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment