Commit 7af948cf authored by Julien Muchembled's avatar Julien Muchembled

Lockless stores/checks during replication

parent b7a5bc99
...@@ -61,7 +61,9 @@ ...@@ -61,7 +61,9 @@
partitions. Currently, reads succeed because feeding nodes don't delete partitions. Currently, reads succeed because feeding nodes don't delete
anything while the cluster is operational, for performance reasons: anything while the cluster is operational, for performance reasons:
deletion of dropped partitions must be reimplemented in a scalable way. deletion of dropped partitions must be reimplemented in a scalable way.
(HIGH AVAILABILITY) The same thing happens for writes: storage nodes must discard
stores/checks of dropped partitions (in lockObject, that can be done by
raising ConflictError(None)). (HIGH AVAILABILITY)
Storage Storage
- Use libmysqld instead of a stand-alone MySQL server. - Use libmysqld instead of a stand-alone MySQL server.
......
...@@ -410,6 +410,8 @@ class Application(ThreadedApplication): ...@@ -410,6 +410,8 @@ class Application(ThreadedApplication):
def store(self, oid, serial, data, version, transaction): def store(self, oid, serial, data, version, transaction):
"""Store object.""" """Store object."""
logging.debug('storing oid %s serial %s', dump(oid), dump(serial)) logging.debug('storing oid %s serial %s', dump(oid), dump(serial))
if not serial: # BBB
serial = ZERO_TID
self._store(self._txn_container.get(transaction), oid, serial, data) self._store(self._txn_container.get(transaction), oid, serial, data)
def _store(self, txn_context, oid, serial, data, data_serial=None): def _store(self, txn_context, oid, serial, data, data_serial=None):
...@@ -472,7 +474,7 @@ class Application(ThreadedApplication): ...@@ -472,7 +474,7 @@ class Application(ThreadedApplication):
oid, (serial, conflict_serial) = pop_conflict() oid, (serial, conflict_serial) = pop_conflict()
except KeyError: except KeyError:
return return
if conflict_serial == ZERO_TID: if conflict_serial == MAX_TID:
if 1: if 1:
# XXX: disable deadlock avoidance code until it is fixed # XXX: disable deadlock avoidance code until it is fixed
logging.info('Deadlock avoidance on %r:%r', logging.info('Deadlock avoidance on %r:%r',
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
from ZODB.TimeStamp import TimeStamp from ZODB.TimeStamp import TimeStamp
from neo.lib import logging from neo.lib import logging
from neo.lib.protocol import ZERO_TID from neo.lib.protocol import MAX_TID
from neo.lib.util import dump from neo.lib.util import dump
from neo.lib.exception import NodeNotReady from neo.lib.exception import NodeNotReady
from neo.lib.handler import MTEventHandler from neo.lib.handler import MTEventHandler
...@@ -62,10 +62,13 @@ class StorageAnswersHandler(AnswerBaseHandler): ...@@ -62,10 +62,13 @@ class StorageAnswersHandler(AnswerBaseHandler):
self.app.setHandlerData(args) self.app.setHandlerData(args)
def answerStoreObject(self, conn, conflict, oid, serial): def answerStoreObject(self, conn, conflict, oid, serial):
if not conflict:
# Ignore if not locked on storage side.
return
txn_context = self.app.getHandlerData() txn_context = self.app.getHandlerData()
object_stored_counter_dict = txn_context[ object_stored_counter_dict = txn_context[
'object_stored_counter_dict'][oid] 'object_stored_counter_dict'][oid]
if conflict: if conflict != serial:
# Conflicts can not be resolved now because 'conn' is locked. # Conflicts can not be resolved now because 'conn' is locked.
# We must postpone the resolution (by queuing the conflict in # We must postpone the resolution (by queuing the conflict in
# 'conflict_dict') to avoid any deadlock with another thread that # 'conflict_dict') to avoid any deadlock with another thread that
...@@ -76,10 +79,10 @@ class StorageAnswersHandler(AnswerBaseHandler): ...@@ -76,10 +79,10 @@ class StorageAnswersHandler(AnswerBaseHandler):
# receive the conflict answer from the first store on S2. # receive the conflict answer from the first store on S2.
logging.info('%r report a conflict for %r with %r', logging.info('%r report a conflict for %r with %r',
conn, dump(oid), dump(conflict)) conn, dump(oid), dump(conflict))
if conflict != ZERO_TID: if conflict != MAX_TID:
# If this conflict is not already resolved, mark it for # If this conflict is not already resolved, mark it for
# resolution. # resolution.
if conflict <= txn_context['resolved_dict'].get(oid, ZERO_TID): if conflict <= txn_context['resolved_dict'].get(oid, ''):
return return
if conflict in object_stored_counter_dict: if conflict in object_stored_counter_dict:
raise NEOStorageError('Storages %s accepted object %s' raise NEOStorageError('Storages %s accepted object %s'
......
...@@ -940,10 +940,12 @@ class StoreObject(Packet): ...@@ -940,10 +940,12 @@ class StoreObject(Packet):
""" """
Ask to store an object. Send an OID, an original serial, a current Ask to store an object. Send an OID, an original serial, a current
transaction ID, and data. C -> S. transaction ID, and data. C -> S.
Answer if an object has been stored. If an object is in conflict, As for IStorage, 'serial' is ZERO_TID for new objects.
a serial of the conflicting transaction is returned. In this case, Answered 'conflict' value means:
if this serial is newer than the current transaction ID, a client - None: lockless
node must not try to resolve the conflict. S -> C. - serial: ok
- MAX_TID: deadlock
- else: conflict
""" """
_fmt = PStruct('ask_store_object', _fmt = PStruct('ask_store_object',
POID('oid'), POID('oid'),
......
...@@ -38,13 +38,14 @@ from neo.lib.debug import register as registerLiveDebugger ...@@ -38,13 +38,14 @@ from neo.lib.debug import register as registerLiveDebugger
class Application(BaseApplication): class Application(BaseApplication):
"""The storage node application.""" """The storage node application."""
tm = None
def __init__(self, config): def __init__(self, config):
super(Application, self).__init__( super(Application, self).__init__(
config.getSSL(), config.getDynamicMasterList()) config.getSSL(), config.getDynamicMasterList())
# set the cluster name # set the cluster name
self.name = config.getCluster() self.name = config.getCluster()
self.tm = TransactionManager(self)
self.dm = buildDatabaseManager(config.getAdapter(), self.dm = buildDatabaseManager(config.getAdapter(),
(config.getDatabase(), config.getEngine(), config.getWait()), (config.getDatabase(), config.getEngine(), config.getWait()),
) )
...@@ -93,6 +94,7 @@ class Application(BaseApplication): ...@@ -93,6 +94,7 @@ class Application(BaseApplication):
def log(self): def log(self):
self.em.log() self.em.log()
self.nm.log() self.nm.log()
if self.tm:
self.tm.log() self.tm.log()
if self.pt is not None: if self.pt is not None:
self.pt.log() self.pt.log()
...@@ -184,6 +186,7 @@ class Application(BaseApplication): ...@@ -184,6 +186,7 @@ class Application(BaseApplication):
for conn in self.em.getConnectionList(): for conn in self.em.getConnectionList():
if conn not in (self.listening_conn, self.master_conn): if conn not in (self.listening_conn, self.master_conn):
conn.close() conn.close()
self.tm = TransactionManager(self)
try: try:
self.initialize() self.initialize()
self.doOperation() self.doOperation()
...@@ -194,6 +197,7 @@ class Application(BaseApplication): ...@@ -194,6 +197,7 @@ class Application(BaseApplication):
logging.error('primary master is down: %s', msg) logging.error('primary master is down: %s', msg)
finally: finally:
self.checker = Checker(self) self.checker = Checker(self)
del self.tm
def connectToPrimary(self): def connectToPrimary(self):
"""Find a primary master node, and connect to it. """Find a primary master node, and connect to it.
...@@ -256,7 +260,6 @@ class Application(BaseApplication): ...@@ -256,7 +260,6 @@ class Application(BaseApplication):
# Forget all unfinished data. # Forget all unfinished data.
self.dm.dropUnfinishedData() self.dm.dropUnfinishedData()
self.tm.reset()
self.task_queue = task_queue = deque() self.task_queue = task_queue = deque()
try: try:
......
...@@ -72,7 +72,7 @@ class ClientOperationHandler(EventHandler): ...@@ -72,7 +72,7 @@ class ClientOperationHandler(EventHandler):
def _askStoreObject(self, conn, oid, serial, compression, checksum, data, def _askStoreObject(self, conn, oid, serial, compression, checksum, data,
data_serial, ttid, request_time): data_serial, ttid, request_time):
try: try:
self.app.tm.storeObject(ttid, serial, oid, compression, locked = self.app.tm.storeObject(ttid, serial, oid, compression,
checksum, data, data_serial) checksum, data, data_serial)
except ConflictError, err: except ConflictError, err:
# resolvable or not # resolvable or not
...@@ -93,7 +93,7 @@ class ClientOperationHandler(EventHandler): ...@@ -93,7 +93,7 @@ class ClientOperationHandler(EventHandler):
duration = time.time() - request_time duration = time.time() - request_time
if duration > SLOW_STORE: if duration > SLOW_STORE:
logging.info('StoreObject delay: %.02fs', duration) logging.info('StoreObject delay: %.02fs', duration)
conn.answer(Packets.AnswerStoreObject(None)) conn.answer(Packets.AnswerStoreObject(locked))
def askStoreObject(self, conn, oid, serial, def askStoreObject(self, conn, oid, serial,
compression, checksum, data, data_serial, ttid): compression, checksum, data, data_serial, ttid):
...@@ -171,7 +171,7 @@ class ClientOperationHandler(EventHandler): ...@@ -171,7 +171,7 @@ class ClientOperationHandler(EventHandler):
def _askCheckCurrentSerial(self, conn, ttid, serial, oid, request_time): def _askCheckCurrentSerial(self, conn, ttid, serial, oid, request_time):
try: try:
self.app.tm.checkCurrentSerial(ttid, serial, oid) locked = self.app.tm.checkCurrentSerial(ttid, serial, oid)
except ConflictError, err: except ConflictError, err:
# resolvable or not # resolvable or not
conn.answer(Packets.AnswerCheckCurrentSerial(err.tid)) conn.answer(Packets.AnswerCheckCurrentSerial(err.tid))
...@@ -191,7 +191,7 @@ class ClientOperationHandler(EventHandler): ...@@ -191,7 +191,7 @@ class ClientOperationHandler(EventHandler):
duration = time.time() - request_time duration = time.time() - request_time
if duration > SLOW_STORE: if duration > SLOW_STORE:
logging.info('CheckCurrentSerial delay: %.02fs', duration) logging.info('CheckCurrentSerial delay: %.02fs', duration)
conn.answer(Packets.AnswerCheckCurrentSerial(None)) conn.answer(Packets.AnswerCheckCurrentSerial(locked))
# like ClientOperationHandler but read-only & only for tid <= backup_tid # like ClientOperationHandler but read-only & only for tid <= backup_tid
......
...@@ -31,8 +31,8 @@ class MasterOperationHandler(BaseMasterHandler): ...@@ -31,8 +31,8 @@ class MasterOperationHandler(BaseMasterHandler):
dm._setBackupTID(dm.getLastIDs()[0] or ZERO_TID) dm._setBackupTID(dm.getLastIDs()[0] or ZERO_TID)
dm.commit() dm.commit()
def notifyTransactionFinished(self, conn, *args, **kw): def notifyTransactionFinished(self, conn, *args):
self.app.replicator.transactionFinished(*args, **kw) self.app.replicator.transactionFinished(*args)
def notifyPartitionChanges(self, conn, ptid, cell_list): def notifyPartitionChanges(self, conn, ptid, cell_list):
"""This is very similar to Send Partition Table, except that """This is very similar to Send Partition Table, except that
......
...@@ -136,7 +136,7 @@ class Replicator(object): ...@@ -136,7 +136,7 @@ class Replicator(object):
app = self.app app = self.app
pt = app.pt pt = app.pt
uuid = app.uuid uuid = app.uuid
self.partition_dict = p = {} self.partition_dict = {}
self.replicate_dict = {} self.replicate_dict = {}
self.source_dict = {} self.source_dict = {}
self.ttid_set = set() self.ttid_set = set()
...@@ -160,8 +160,7 @@ class Replicator(object): ...@@ -160,8 +160,7 @@ class Replicator(object):
p.next_trans = p.next_obj = next_tid p.next_trans = p.next_obj = next_tid
p.max_ttid = None p.max_ttid = None
if outdated_list: if outdated_list:
self.app.master_conn.ask(Packets.AskUnfinishedTransactions(), self.app.tm.replicating(outdated_list)
offset_list=outdated_list)
def notifyPartitionChanges(self, cell_list): def notifyPartitionChanges(self, cell_list):
"""This is a callback from MasterOperationHandler.""" """This is a callback from MasterOperationHandler."""
...@@ -190,8 +189,7 @@ class Replicator(object): ...@@ -190,8 +189,7 @@ class Replicator(object):
p.max_ttid = INVALID_TID p.max_ttid = INVALID_TID
added_list.append(offset) added_list.append(offset)
if added_list: if added_list:
self.app.master_conn.ask(Packets.AskUnfinishedTransactions(), self.app.tm.replicating(added_list)
offset_list=added_list)
if abort: if abort:
self.abort() self.abort()
...@@ -326,8 +324,7 @@ class Replicator(object): ...@@ -326,8 +324,7 @@ class Replicator(object):
p.next_obj = add64(tid, 1) p.next_obj = add64(tid, 1)
self.updateBackupTID() self.updateBackupTID()
if not p.max_ttid: if not p.max_ttid:
p = Packets.NotifyReplicationDone(offset, tid) self.app.tm.replicated(offset, tid)
self.app.master_conn.notify(p)
logging.debug("partition %u replicated up to %s from %r", logging.debug("partition %u replicated up to %s from %r",
offset, dump(tid), self.current_node) offset, dump(tid), self.current_node)
self.getCurrentConnection().setReconnectionNoDelay() self.getCurrentConnection().setReconnectionNoDelay()
......
This diff is collapsed.
...@@ -28,7 +28,7 @@ class TransactionManagerTests(NeoUnitTestBase): ...@@ -28,7 +28,7 @@ class TransactionManagerTests(NeoUnitTestBase):
self.app = Mock() self.app = Mock()
# no history # no history
self.app.dm = Mock({'getObjectHistory': []}) self.app.dm = Mock({'getObjectHistory': []})
self.app.pt = Mock({'isAssigned': True}) self.app.pt = Mock({'isAssigned': True, 'getPartitions': 2})
self.app.em = Mock({'setTimeout': None}) self.app.em = Mock({'setTimeout': None})
self.manager = TransactionManager(self.app) self.manager = TransactionManager(self.app)
......
...@@ -33,7 +33,7 @@ from neo.lib.exception import DatabaseFailure, StoppedOperation ...@@ -33,7 +33,7 @@ from neo.lib.exception import DatabaseFailure, StoppedOperation
from neo.lib.protocol import CellStates, ClusterStates, NodeStates, Packets, \ from neo.lib.protocol import CellStates, ClusterStates, NodeStates, Packets, \
ZERO_OID, ZERO_TID ZERO_OID, ZERO_TID
from .. import expectedFailure, Patch from .. import expectedFailure, Patch
from . import LockLock, NEOThreadedTest, with_cluster from . import ConnectionFilter, LockLock, NEOThreadedTest, with_cluster
from neo.lib.util import add64, makeChecksum, p64, u64 from neo.lib.util import add64, makeChecksum, p64, u64
from neo.client.exception import NEOPrimaryMasterLost, NEOStorageError from neo.client.exception import NEOPrimaryMasterLost, NEOStorageError
from neo.client.pool import CELL_CONNECTED, CELL_GOOD from neo.client.pool import CELL_CONNECTED, CELL_GOOD
...@@ -1351,11 +1351,11 @@ class Test(NEOThreadedTest): ...@@ -1351,11 +1351,11 @@ class Test(NEOThreadedTest):
reports a conflict after that this conflict was fully resolved with reports a conflict after that this conflict was fully resolved with
another node. another node.
""" """
def answerStoreObject(orig, conn, conflict, **kw): def answerStoreObject(orig, conn, conflict, oid, serial):
if not conflict: if conflict == serial:
p.revert() p.revert()
ll() ll()
orig(conn, conflict, **kw) orig(conn, conflict, oid, serial)
if 1: if 1:
s0, s1 = cluster.storage_list s0, s1 = cluster.storage_list
t1, c1 = cluster.getTransaction() t1, c1 = cluster.getTransaction()
...@@ -1389,6 +1389,36 @@ class Test(NEOThreadedTest): ...@@ -1389,6 +1389,36 @@ class Test(NEOThreadedTest):
storage.store(oid, None, '*' * storage._cache._max_size, '', txn) storage.store(oid, None, '*' * storage._cache._max_size, '', txn)
self.assertRaises(POSException.ConflictError, storage.tpc_vote, txn) self.assertRaises(POSException.ConflictError, storage.tpc_vote, txn)
@with_cluster(replicas=1)
def testConflictWithOutOfDateCell(self, cluster):
"""
C1 S1 S0 C2
begin down begin
U <------- commit
up (remaining out-of-date due to suspended replication)
store ---> O (stored lockless)
`--------------> conflict
resolve -> stored lockless
`------------> locked
committed
"""
s0, s1 = cluster.storage_list
t1, c1 = cluster.getTransaction()
c1.root()['x'] = x = PCounterWithResolution()
t1.commit()
s1.stop()
cluster.join((s1,))
x.value += 1
t2, c2 = cluster.getTransaction()
c2.root()['x'].value += 2
t2.commit()
with ConnectionFilter() as f:
f.delayAskFetchTransactions()
s1.resetNode()
s1.start()
self.tic()
t1.commit()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment