Commit 081c502b authored by Julien Muchembled's avatar Julien Muchembled

client: new cache algorithm

parent c84c48ee
...@@ -15,26 +15,46 @@ ...@@ -15,26 +15,46 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import division from __future__ import division
import math from BTrees.LOBTree import LOBTree
from bisect import insort from gc import get_referents
from struct import Struct
from sys import getsizeof
s = Struct('d')
pack_double = s.pack
unpack_double = s.unpack
s = Struct('q')
pack_long = s.pack
unpack_long = s.unpack
del s
def internalSizeOfBTree(x):
module = type(x).__module__
seen = set()
left = [x]
size = 0
while left:
x = left.pop()
seen.add(x)
size += getsizeof(x)
left.extend(x for x in get_referents(x)
if type(x).__module__ == module and x not in seen)
return size
class CacheItem(object): class CacheItem(object):
__slots__ = ('oid', 'tid', 'next_tid', 'data', __slots__ = 'oid', 'tid', 'next_tid', 'data', 'counter', 'expire'
'counter', 'level', 'expire',
'prev', 'next')
def __repr__(self): def __repr__(self):
s = '' s = ''
for attr in self.__slots__: for attr in self.__slots__:
try: try:
value = getattr(self, attr) value = getattr(self, attr)
if value: if attr == 'data':
if attr in ('prev', 'next'): s += ' len(%s)=%s' % (attr, len(value))
s += ' %s=<...>' % attr continue
continue if attr == 'expire':
elif attr == 'data': value = unpack_double(pack_long(value))[0]
value = '...'
s += ' %s=%r' % (attr, value) s += ' %s=%r' % (attr, value)
except AttributeError: except AttributeError:
pass pass
...@@ -44,261 +64,186 @@ class CacheItem(object): ...@@ -44,261 +64,186 @@ class CacheItem(object):
return self.tid < other.tid return self.tid < other.tid
class ClientCache(object): class ClientCache(object):
"""In-memory pickle cache based on Multi-Queue cache algorithm """In-memory pickle cache based on LFRU cache algorithm
Multi-Queue algorithm for Second Level Buffer Caches: This Least Frequent Recently Used implementation is adapted to handle
https://www.usenix.org/event/usenix01/full_papers/zhou/zhou_html/index.html records of different sizes. This is possible thanks to a B+Tree: the use
of such a complex structure for a cache is quite unusual for a cache
but we use a C implementation that's relatively fast compared to the
cost of a cache miss.
Quick description: This algorithm adapts well regardless its maximum allowed size,
- There are multiple "regular" queues, plus a history queue without any tweak.
- The queue to store an object in depends on its access frequency
- The queue an object is in defines its lifespan (higher-index queue eq.
longer lifespan)
-> The more often an object is accessed, the higher lifespan it will
have
- Upon cache or history hit, object frequency is increased and object
might get moved to longer-lived queue
- Each access "ages" objects in cache, and an aging object is moved to
shorter-lived queue as it ages without being accessed, or in the
history queue if it's really too old.
- The history queue only contains items with counter > 0
""" """
__slots__ = ('max_size', '_life_time', '_max_history_size', __slots__ = ('max_size', '_oid_dict', '_size', '_added', '_items',
'_queue_list', '_oid_dict', '_time', '_size', '_history_size',
'_nhit', '_nmiss') '_nhit', '_nmiss')
def __init__(self, life_time=10000, max_history_size=100000, def __init__(self, max_size=20*1024*1024):
max_size=20*1024*1024):
self._life_time = life_time
self._max_history_size = max_history_size
self.max_size = max_size self.max_size = max_size
self.clear() self.clear()
def clear(self): def clear(self):
"""Reset cache""" """Reset cache"""
self._queue_list = [None] # first is history
self._oid_dict = {} self._oid_dict = {}
self._time = 0 self._size = self._nhit = self._nmiss = 0
self._size = 0 # Make sure to never produce negative keys, else
self._history_size = 0 # we could not manipulate them when encoded as integers.
self._nhit = self._nmiss = 0 self._added = self.max_size
self._items = LOBTree()
def __repr__(self): def __repr__(self):
nload = self._nhit + self._nmiss nload = self._nhit + self._nmiss
return ("<%s #loads=%s #oids=%s size=%s time=%s queue_length=%r" return ("<%s #loads=%s #oids=%s size=%s #items=%s"
" (life_time=%s max_history_size=%s max_size=%s)>") % ( " btree_overhead=%s (max_size=%s)>") % (
self.__class__.__name__, self.__class__.__name__,
nload and '%s (%.3g%% hit)' % (nload, 100 * self._nhit / nload), nload and '%s (%.3g%% hit)' % (nload, 100 * self._nhit / nload),
len(self._oid_dict), self._size, self._time, len(self._oid_dict), self._size, len(self._items),
[self._history_size] + [ internalSizeOfBTree(self._items),
sum(1 for _ in self._iterQueue(level)) self.max_size)
for level in xrange(1, len(self._queue_list))],
self._life_time, self._max_history_size, self.max_size)
def _iterQueue(self, level):
"""for debugging purpose"""
if level < len(self._queue_list):
# Lockless iteration of the queue.
# XXX: In case of race condition, the result is wrong but at least,
# it won't loop endlessly. If one want to collect accurate
# statistics, a lock should be used.
expire = 0
item = self._queue_list[level]
while item and item.level == level and expire < item.expire:
yield item
expire = item.expire
item = item.next
def _remove_from_oid_dict(self, item):
item_list = self._oid_dict[item.oid]
item_list.remove(item)
if not item_list:
del self._oid_dict[item.oid]
def _add(self, item):
level = item.level
try:
head = self._queue_list[level]
except IndexError:
assert len(self._queue_list) == level
self._queue_list.append(item)
item.prev = item.next = item
else:
if head:
item.prev = tail = head.prev
tail.next = head.prev = item
item.next = head
else:
self._queue_list[level] = item
item.prev = item.next = item
if level:
item.expire = self._time + self._life_time
else:
self._empty(item)
self._history_size += 1
if self._max_history_size < self._history_size:
self._remove(head)
self._remove_from_oid_dict(head)
def _empty(self, item):
self._size -= len(item.data)
item.data = None
def _remove(self, item):
level = item.level
if level is not None:
if level:
item.level = level - 1
else:
self._history_size -= 1
next = item.next
if next is item:
self._queue_list[level] = next = None
else:
item.prev.next = next
next.prev = item.prev
if self._queue_list[level] is item:
self._queue_list[level] = next
return next
def _fetched(self, item, _log=math.log):
self._remove(item)
item.counter = counter = item.counter + 1
# XXX It might be better to adjust the level according to the object
# size. See commented factor for example.
item.level = 1 + int(_log(counter, 2)
# * (1.01 - len(item.data) / self.max_size)
)
self._add(item)
self._time = time = self._time + 1
for head in self._queue_list[1:]:
if head and head.expire < time:
self._remove(head)
if head.level or head.counter:
self._add(head)
else:
self._empty(head)
self._remove_from_oid_dict(head)
break
def _load(self, oid, before_tid=None): def _load(self, oid, before_tid=None):
item_list = self._oid_dict.get(oid) item_list = self._oid_dict.get(oid)
if item_list: if item_list:
if before_tid: if before_tid:
for item in reversed(item_list): for item in item_list:
if item.tid < before_tid: if item.tid < before_tid:
next_tid = item.next_tid next_tid = item.next_tid
if next_tid and next_tid < before_tid: if next_tid and next_tid < before_tid:
break break
return item return item
else: else:
item = item_list[-1] item = item_list[0]
if not item.next_tid: if not item.next_tid:
return item return item
def load(self, oid, before_tid=None): def load(self, oid, before_tid):
"""Return a revision of oid that was current before given tid""" """Return a revision of oid that was current before given tid"""
item = self._load(oid, before_tid) item = self._load(oid, before_tid)
if item: if item:
data = item.data del self._items[item.expire]
if data is not None: item.counter += 1
self._nhit += 1 self._add(item)
self._fetched(item) self._nhit += 1
return data, item.tid, item.next_tid return item.data, item.tid, item.next_tid
self._nmiss += 1 self._nmiss += 1
def _forget(self, item):
items = self._oid_dict[item.oid]
items.remove(item)
if not items:
del self._oid_dict[item.oid]
self._size -= len(item.data)
del self._items[item.expire]
def _add(self, item):
# The initial idea was to compute keys as follows:
# (added - size) * item.counter
# However, after running for a long time, this tends to degenerate:
# - size become more and more negligible over time
# - objects that are most often accessed become impossible to remove,
# making the cache too slow to adapt after a change of workload
# - 64 bits is not enough
# This was solved in several ways, by using the following formula:
# min_key - size + (added - min_key) * item.counter
# and doubles.
# BTrees does not have an optimized class for doubles so we encode
# them as integers, which preserve the same order as long as they're
# positive (hence some extra tweak to avoid negative numbers in some
# rare cases) and it becomes easier to compute the next double
# (+1 instead of libm.nextafter). The downside is that conversion
# between double and long is a bit expensive in Python.
added = self._added
items = self._items
try:
x = items.minKey()
except ValueError:
x = added
else:
# Most of the time, the smallest key is smaller than `added`. In
# the very rare case it isn't, make sure to produce a positive key.
x = min(added, unpack_double(pack_long(x))[0])
size = len(item.data)
expire = unpack_long(pack_double(
x - size + (added - x) * item.counter
))[0]
for x in items.iterkeys(expire):
if x != expire:
break
expire += 1
self._added = added + size
item.expire = expire
items[expire] = item
def store(self, oid, data, tid, next_tid): def store(self, oid, data, tid, next_tid):
"""Store a new data record in the cache""" """Store a new data record in the cache"""
size = len(data) size = len(data)
max_size = self.max_size max_size = self.max_size
if size < max_size: if size < max_size:
item = self._load(oid, next_tid) i = 0
if item: try:
# We don't handle late invalidations for cached oids, because items = self._oid_dict[oid]
# the caller is not supposed to explicitly asks for tids after except KeyError:
# app.last_tid (and the cache should be empty when app.last_tid items = self._oid_dict[oid] = []
# is still None). counter = 1
assert item.tid == tid, (item, tid)
if item.level: # already stored
assert item.next_tid == next_tid and item.data == data
return
assert not item.data
# Possible case of late invalidation.
item.next_tid = next_tid
else: else:
item = CacheItem() for item in items:
item.oid = oid if item.tid < tid:
item.tid = tid assert None is not item.next_tid <= tid
item.next_tid = next_tid break
item.counter = 0 if item.tid == tid:
item.level = None # We don't handle late invalidations for cached oids,
try: # because the caller is not supposed to explicitly asks
item_list = self._oid_dict[oid] # for tids after app.last_tid (and the cache should be
except KeyError: # empty when app.last_tid is still None).
self._oid_dict[oid] = [item] assert item.next_tid == next_tid and item.data == data
return
i += 1
if next_tid:
counter = 1
else: else:
if next_tid: counter = item.counter
insort(item_list, item) if counter != 1:
else: del self._items[item.expire]
prev = item_list[-1] item.counter = 1
assert prev.next_tid <= tid, (prev, item) self._add(item)
item.counter = prev.counter item = CacheItem()
if prev.level: item.oid = oid
prev.counter = 0 item.tid = tid
if prev.level > 1: item.next_tid = next_tid
self._fetched(prev)
item_list.append(item)
else:
self._remove(prev)
item_list[-1] = item
item.data = data item.data = data
self._fetched(item) item.counter = counter
items.insert(i, item)
self._size += size self._size += size
if max_size < self._size: self._add(item)
for head in self._queue_list[1:]: while max_size < self._size:
while head: items = self._items
next = self._remove(head) self._forget(items[items.minKey()])
if head.counter:
head.level = 0
self._add(head)
else:
self._empty(head)
self._remove_from_oid_dict(head)
if self._size <= max_size:
return
head = next
def invalidate(self, oid, tid): def invalidate(self, oid, tid):
"""Mark data record as being valid only up to given tid""" """Mark data record as being valid only up to given tid"""
try: items = self._oid_dict.get(oid)
item = self._oid_dict[oid][-1] if items:
except KeyError: item = items[0]
pass
else:
if item.next_tid is None: if item.next_tid is None:
item.next_tid = tid item.next_tid = tid
else: else:
assert item.next_tid <= tid, (item, oid, tid) assert item.next_tid <= tid, (item, oid, tid)
def clear_current(self): def clear_current(self):
for oid, item_list in self._oid_dict.items(): for oid, items in self._oid_dict.items():
item = item_list[-1] item = items[0]
if item.next_tid is None: if item.next_tid is None:
if item.level: self._forget(item)
self._empty(item)
self._remove(item)
del item_list[-1]
# We don't preserve statistics of removed items. This could be
# done easily when previous versions are cached, by copying
# counters, but it would not be fair for other oids, so it's
# probably not worth it.
if not item_list:
del self._oid_dict[oid]
def test(self): def test(self):
orig_add = ClientCache._add
def _add(cache, item):
orig_add(cache, item)
self.assertLessEqual(0, cache._items.minKey())
ClientCache._add = _add
cache = ClientCache() cache = ClientCache()
repr(cache) repr(cache)
self.assertEqual(cache.load(1, 10), None) self.assertEqual(cache.load(1, 10), None)
...@@ -324,24 +269,26 @@ def test(self): ...@@ -324,24 +269,26 @@ def test(self):
self.assertEqual(cache.load(1, 20), ('15', 15, 20)) self.assertEqual(cache.load(1, 20), ('15', 15, 20))
cache.store(1, '10', 10, 15) cache.store(1, '10', 10, 15)
cache.store(1, '20', 20, 21) cache.store(1, '20', 20, 21)
self.assertEqual([5, 10, 15, 20], [x.tid for x in cache._oid_dict[1]]) self.assertEqual([20, 15, 10, 5], [x.tid for x in cache._oid_dict[1]])
self.assertRaises(AssertionError, cache.store, 1, '20', 20, None) self.assertRaises(AssertionError, cache.store, 1, '20', 20, None)
repr(cache) repr(cache)
map(repr, cache._queue_list) cache = ClientCache(10)
# Test late invalidations. data1 = "x", 1, None
cache.clear() cache.store(1, "x", 1, None)
cache.store(1, '10*', 10, None) repr(*cache._oid_dict[1])
cache.max_size = cache._size data = "xxxxx", 1, None
cache.store(2, '10', 10, 15) cache.store(2, *data)
self.assertEqual(cache._queue_list[0].oid, 1) cache.store(3, *data)
cache.store(2, '15', 15, None) self.assertEqual(cache.load(1, None), data1)
self.assertEqual(cache._queue_list[2].oid, 2) self.assertEqual(cache.load(2, None), None) # bigger records removed faster
data = '10', 10, 15 self.assertEqual(cache.load(3, None), data)
cache.store(1, *data) self.assertEqual(cache._size, 6)
self.assertEqual(cache.load(1, 15), data)
self.assertEqual(1, cache._history_size)
cache.clear_current() cache.clear_current()
self.assertEqual(0, cache._history_size) for oid in 0, 1:
cache.store(oid, 'x', 1, None)
cache.load(oid, None)
cache.load(oid, None)
cache.load(0, None)
if __name__ == '__main__': if __name__ == '__main__':
import unittest import unittest
......
...@@ -931,7 +931,7 @@ class Test(NEOThreadedTest): ...@@ -931,7 +931,7 @@ class Test(NEOThreadedTest):
ll() ll()
x2._p_deactivate() x2._p_deactivate()
# Remove last version of x from cache # Remove last version of x from cache
cache._remove(cache._oid_dict[x2._p_oid].pop()) cache._forget(cache._oid_dict[x2._p_oid][0])
with ll, Patch(cluster.client, _loadFromStorage=break_after): with ll, Patch(cluster.client, _loadFromStorage=break_after):
t = self.newThread(x2._p_activate) t = self.newThread(x2._p_activate)
ll() ll()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment