Commit 0687a454 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Add per-thread caches to the allocator

Now, threads will claim an entire block at a time, and
put it in a thread-local cache.  In the common case, they
can allocate out of this block, and only need to take out
a lock if they run out.
parent 51cf8080
...@@ -109,10 +109,8 @@ static void markPhase() { ...@@ -109,10 +109,8 @@ static void markPhase() {
while (void* p = stack.pop()) { while (void* p = stack.pop()) {
assert(((intptr_t)p) % 8 == 0); assert(((intptr_t)p) % 8 == 0);
GCObjectHeader* header = headerFromObject(p); GCObjectHeader* header = headerFromObject(p);
// printf("%p\n", p);
if (isMarked(header)) { if (isMarked(header)) {
// printf("Already marked, skipping\n");
continue; continue;
} }
...@@ -173,7 +171,7 @@ void runCollection() { ...@@ -173,7 +171,7 @@ void runCollection() {
markPhase(); markPhase();
sweepPhase(); sweepPhase();
if (VERBOSITY("gc") >= 2) if (VERBOSITY("gc") >= 2)
printf("Collection #%d done\n", ++ncollections); printf("Collection #%d done\n\n", ncollections);
long us = _t.end(); long us = _t.end();
static StatCounter sc_us("gc_collections_us"); static StatCounter sc_us("gc_collections_us");
......
...@@ -29,11 +29,6 @@ namespace gc { ...@@ -29,11 +29,6 @@ namespace gc {
inline void* gc_alloc(size_t bytes) __attribute__((visibility("default"))); inline void* gc_alloc(size_t bytes) __attribute__((visibility("default")));
inline void* gc_alloc(size_t bytes) { inline void* gc_alloc(size_t bytes) {
// if ((++numAllocs) >= ALLOCS_PER_COLLECTION) {
// numAllocs = 0;
// runCollection();
//}
#ifndef NVALGRIND #ifndef NVALGRIND
// Adding a redzone will confuse the allocator, so disable it for now. // Adding a redzone will confuse the allocator, so disable it for now.
...@@ -65,6 +60,8 @@ inline void* gc_alloc(size_t bytes) { ...@@ -65,6 +60,8 @@ inline void* gc_alloc(size_t bytes) {
// if (VERBOSITY()) printf("Allocated %ld bytes at [%p, %p)\n", bytes, r, (char*)r + bytes); // if (VERBOSITY()) printf("Allocated %ld bytes at [%p, %p)\n", bytes, r, (char*)r + bytes);
#endif #endif
// printf("Allocated %p\n", r);
return r; return r;
} }
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <sys/mman.h> #include <sys/mman.h>
#include "core/common.h" #include "core/common.h"
#include "core/util.h"
#include "gc/gc_alloc.h" #include "gc/gc_alloc.h"
#ifndef NVALGRIND #ifndef NVALGRIND
...@@ -147,98 +148,124 @@ static Block* alloc_block(uint64_t size, Block** prev) { ...@@ -147,98 +148,124 @@ static Block* alloc_block(uint64_t size, Block** prev) {
return rtn; return rtn;
} }
static void insertIntoLL(Block** next_pointer, Block* next) {
assert(next_pointer);
assert(next);
assert(!next->next);
assert(!next->prev);
next->next = *next_pointer;
if (next->next)
next->next->prev = &next->next;
*next_pointer = next;
next->prev = next_pointer;
}
static void removeFromLL(Block* b) {
if (b->next)
b->next->prev = b->prev;
*b->prev = b->next;
b->next = NULL;
b->prev = NULL;
}
Heap::ThreadBlockCache::~ThreadBlockCache() { Heap::ThreadBlockCache::~ThreadBlockCache() {
LOCK_REGION(heap->lock); LOCK_REGION(heap->lock);
for (int i = 0; i < NUM_BUCKETS; i++) { for (int i = 0; i < NUM_BUCKETS; i++) {
if (cache_heads[i] == NULL) Block* b = cache_heads[i];
if (b == NULL)
continue; continue;
assert(0);
removeFromLL(b);
// This should have been the only block in the list.
// Well, we could cache multiple blocks if we want, and maybe we should,
// but for now this routine only supports caching a single one, and would
// need to get updated:
assert(cache_heads[i] == NULL);
insertIntoLL(&heap->heads[i], b);
} }
} }
void* Heap::allocSmall(size_t rounded_size, Block** prev, Block** full_head) { static void* allocFromBlock(Block* b) {
_collectIfNeeded(rounded_size); int i = 0;
uint64_t mask = 0;
for (; i < BITFIELD_ELTS; i++) {
mask = b->isfree[i];
if (mask != 0L) {
break;
}
}
ThreadBlockCache* cache = thread_caches.get(); if (i == BITFIELD_ELTS) {
return NULL;
}
LOCK_REGION(lock); int first = __builtin_ctzll(mask);
assert(first < 64);
assert(b->isfree[i] & (1L << first));
b->isfree[i] ^= (1L << first);
// printf("Marking %d:%d: %p=%lx\n", i, first, &b->isfree[i], b->isfree[i]);
Block* cur = *prev; int idx = first + i * 64;
assert(!cur || prev == cur->prev);
int scanned = 0;
// printf("alloc(%ld)\n", rounded_size); void* rtn = &b->atoms[idx];
return rtn;
}
// Block **full_prev = full_head; static Block* claimBlock(size_t rounded_size, Block** free_head) {
while (true) { Block* free_block = *free_head;
// printf("cur = %p, prev = %p\n", cur, prev); if (free_block) {
if (cur == NULL) { removeFromLL(free_block);
Block* next = alloc_block(rounded_size, &cur->next); return free_block;
// printf("allocated new block %p\n", next); }
*prev = next;
next->prev = prev;
prev = &cur->next;
next->next = *full_head;
*full_head = NULL;
prev = full_head;
cur = next;
}
int i = 0; return alloc_block(rounded_size, NULL);
uint64_t mask = 0; }
for (; i < BITFIELD_ELTS; i++) {
mask = cur->isfree[i];
if (mask != 0L) {
break;
}
}
if (i == BITFIELD_ELTS) { void* Heap::allocSmall(size_t rounded_size, int bucket_idx) {
scanned++; _collectIfNeeded(rounded_size);
// printf("moving on\n");
Block* t = *prev = cur->next; Block** free_head = &heads[bucket_idx];
cur->next = NULL; Block** full_head = &full_heads[bucket_idx];
if (t)
t->prev = prev;
cur->prev = full_head; ThreadBlockCache* cache = thread_caches.get();
cur->next = *full_head;
*full_head = cur;
cur = t; Block** cache_head = &cache->cache_heads[bucket_idx];
scanned++; static StatCounter sc_total("gc_total");
continue; sc_total.log();
}
// printf("scanned %d\n", scanned); while (true) {
int first = __builtin_ctzll(mask); Block* cache_block = *cache_head;
assert(first < 64); if (cache_block) {
// printf("mask: %lx, first: %d\n", mask, first); void* rtn = allocFromBlock(cache_block);
cur->isfree[i] ^= (1L << first); if (rtn)
return rtn;
int idx = first + i * 64; removeFromLL(cache_block);
}
// printf("Using index %d\n", idx); static StatCounter sc_fallback("gc_nocache");
sc_fallback.log();
void* rtn = &cur->atoms[idx]; LOCK_REGION(lock);
#ifndef NDEBUG if (cache_block) {
Block* b = Block::forPointer(rtn); insertIntoLL(full_head, cache_block);
assert(b == cur); }
int offset = (char*)rtn - (char*)b;
assert(offset % rounded_size == 0);
#endif
#ifndef NVALGRIND assert(*cache_head == NULL);
// VALGRIND_MEMPOOL_ALLOC(cur, rtn, rounded_size);
#endif
return rtn; // should probably be called allocBlock:
Block* myblock = claimBlock(rounded_size, &heads[bucket_idx]);
assert(myblock);
assert(!myblock->next);
assert(!myblock->prev);
insertIntoLL(cache_head, myblock);
} }
} }
...@@ -392,8 +419,20 @@ void Heap::freeUnmarked() { ...@@ -392,8 +419,20 @@ void Heap::freeUnmarked() {
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) { for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
bytes_freed += freeChain(heads[bidx]); bytes_freed += freeChain(heads[bidx]);
bytes_freed += freeChain(full_heads[bidx]); bytes_freed += freeChain(full_heads[bidx]);
while (Block* b = full_heads[bidx]) {
// these should be added at the end...
removeFromLL(b);
insertIntoLL(&heads[bidx], b);
}
} }
thread_caches.forEachValue([&bytes_freed](ThreadBlockCache* cache) {
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
bytes_freed += freeChain(cache->cache_heads[bidx]);
}
});
LargeObj* cur = large_head; LargeObj* cur = large_head;
while (cur) { while (cur) {
void* p = cur->data; void* p = cur->data;
......
...@@ -80,7 +80,7 @@ private: ...@@ -80,7 +80,7 @@ private:
Block* full_heads[NUM_BUCKETS]; Block* full_heads[NUM_BUCKETS];
LargeObj* large_head = NULL; LargeObj* large_head = NULL;
void* allocSmall(size_t rounded_size, Block** head, Block** full_head); void* allocSmall(size_t rounded_size, int bucket_idx);
void* allocLarge(size_t bytes); void* allocLarge(size_t bytes);
// DS_DEFINE_MUTEX(lock); // DS_DEFINE_MUTEX(lock);
...@@ -106,16 +106,16 @@ public: ...@@ -106,16 +106,16 @@ public:
void* rtn; void* rtn;
// assert(bytes >= 16); // assert(bytes >= 16);
if (bytes <= 16) if (bytes <= 16)
rtn = allocSmall(16, &heads[0], &full_heads[0]); rtn = allocSmall(16, 0);
else if (bytes <= 32) else if (bytes <= 32)
rtn = allocSmall(32, &heads[1], &full_heads[1]); rtn = allocSmall(32, 1);
else if (bytes > sizes[NUM_BUCKETS - 1]) else if (bytes > sizes[NUM_BUCKETS - 1])
rtn = allocLarge(bytes); rtn = allocLarge(bytes);
else { else {
rtn = NULL; rtn = NULL;
for (int i = 2; i < NUM_BUCKETS; i++) { for (int i = 2; i < NUM_BUCKETS; i++) {
if (sizes[i] >= bytes) { if (sizes[i] >= bytes) {
rtn = allocSmall(sizes[i], &heads[i], &full_heads[i]); rtn = allocSmall(sizes[i], i);
break; break;
} }
} }
...@@ -129,7 +129,9 @@ public: ...@@ -129,7 +129,9 @@ public:
void free(void* ptr); void free(void* ptr);
// not thread safe:
void* getAllocationFromInteriorPointer(void* ptr); void* getAllocationFromInteriorPointer(void* ptr);
// not thread safe:
void freeUnmarked(); void freeUnmarked();
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment