Commit 6b799777 authored by Vincent Pelletier's avatar Vincent Pelletier

Serialise the whole 2PC at cluster level.

This ensures invalidations are sent in strict ascending TID values order.

git-svn-id: https://svn.erp5.org/repos/neo/trunk@2530 71dcc9de-d417-0410-9af5-da40c76e7ee4
parent 6adc92fb
...@@ -601,11 +601,10 @@ class Application(object): ...@@ -601,11 +601,10 @@ class Application(object):
if self.local_var.txn is not None: if self.local_var.txn is not None:
raise NeoException, 'local_var is not clean in tpc_begin' raise NeoException, 'local_var is not clean in tpc_begin'
# use the given TID or request a new one to the master # use the given TID or request a new one to the master
self.local_var.tid = tid self._askPrimary(Packets.AskBeginTransaction(tid))
if tid is None:
self._askPrimary(Packets.AskBeginTransaction())
if self.local_var.tid is None: if self.local_var.tid is None:
raise NEOStorageError('tpc_begin failed') raise NEOStorageError('tpc_begin failed')
assert tid in (None, self.local_var.tid), (tid, self.local_var.tid)
self.local_var.txn = transaction self.local_var.txn = transaction
@profiler_decorator @profiler_decorator
...@@ -837,6 +836,7 @@ class Application(object): ...@@ -837,6 +836,7 @@ class Application(object):
except: except:
neo.logging.error('Exception in tpc_abort while notifying ' \ neo.logging.error('Exception in tpc_abort while notifying ' \
'storage node %r of abortion, ignoring.', conn, exc_info=1) 'storage node %r of abortion, ignoring.', conn, exc_info=1)
self._getMasterConnection().notify(p)
# Just wait for responses to arrive. If any leads to an exception, # Just wait for responses to arrive. If any leads to an exception,
# log it and continue: we *must* eat all answers to not disturb the # log it and continue: we *must* eat all answers to not disturb the
......
...@@ -196,7 +196,7 @@ class EventHandler(object): ...@@ -196,7 +196,7 @@ class EventHandler(object):
def commitTransaction(self, conn, tid): def commitTransaction(self, conn, tid):
raise UnexpectedPacketError raise UnexpectedPacketError
def askBeginTransaction(self, conn): def askBeginTransaction(self, conn, tid):
raise UnexpectedPacketError raise UnexpectedPacketError
def answerBeginTransaction(self, conn, tid): def answerBeginTransaction(self, conn, tid):
......
...@@ -45,6 +45,7 @@ class Application(object): ...@@ -45,6 +45,7 @@ class Application(object):
last_transaction = ZERO_TID last_transaction = ZERO_TID
def __init__(self, config): def __init__(self, config):
self._queued_events = []
# always use default connector for now # always use default connector for now
self.connector_handler = getConnectorHandler() self.connector_handler = getConnectorHandler()
...@@ -578,3 +579,17 @@ class Application(object): ...@@ -578,3 +579,17 @@ class Application(object):
def isStorageReady(self, uuid): def isStorageReady(self, uuid):
return uuid in self.storage_readiness return uuid in self.storage_readiness
def queueEvent(self, func, conn, *args, **kw):
msg_id = conn.getPeerId()
self._queued_events.append((func, msg_id, conn, args, kw))
def executeQueuedEvent(self):
queue = self._queued_events
while queue:
func, msg_id, conn, args, kw = queue.pop(0)
if conn.isAborted() or conn.isClosed():
continue
conn.setPeerId(msg_id)
func(conn, *args, **kw)
break
...@@ -20,7 +20,7 @@ import neo ...@@ -20,7 +20,7 @@ import neo
from neo.protocol import NodeStates, Packets, ProtocolError from neo.protocol import NodeStates, Packets, ProtocolError
from neo.master.handlers import MasterHandler from neo.master.handlers import MasterHandler
from neo.util import dump from neo.util import dump
from neo.master.transactions import DelayedError
class ClientServiceHandler(MasterHandler): class ClientServiceHandler(MasterHandler):
""" Handler dedicated to client during service state """ """ Handler dedicated to client during service state """
...@@ -47,11 +47,14 @@ class ClientServiceHandler(MasterHandler): ...@@ -47,11 +47,14 @@ class ClientServiceHandler(MasterHandler):
conn.notify(Packets.NotifyNodeInformation(node_list)) conn.notify(Packets.NotifyNodeInformation(node_list))
conn.answer(Packets.AnswerNodeInformation()) conn.answer(Packets.AnswerNodeInformation())
def askBeginTransaction(self, conn): def askBeginTransaction(self, conn, tid):
""" """
A client request a TID, nothing is kept about it until the finish. A client request a TID, nothing is kept about it until the finish.
""" """
conn.answer(Packets.AnswerBeginTransaction(self.app.tm.begin())) try:
conn.answer(Packets.AnswerBeginTransaction(self.app.tm.begin(tid)))
except DelayedError:
self.app.queueEvent(self.askBeginTransaction, conn, tid)
def askNewOIDs(self, conn, num_oids): def askNewOIDs(self, conn, num_oids):
app = self.app app = self.app
...@@ -108,3 +111,8 @@ class ClientServiceHandler(MasterHandler): ...@@ -108,3 +111,8 @@ class ClientServiceHandler(MasterHandler):
conn.answer(Packets.AnswerLastTransaction( conn.answer(Packets.AnswerLastTransaction(
self.app.getLastTransaction())) self.app.getLastTransaction()))
def abortTransaction(self, conn, tid):
app = self.app
app.tm.remove(tid)
app.executeQueuedEvent()
...@@ -107,6 +107,7 @@ class StorageServiceHandler(BaseServiceHandler): ...@@ -107,6 +107,7 @@ class StorageServiceHandler(BaseServiceHandler):
# remove transaction from manager # remove transaction from manager
tm.remove(tid) tm.remove(tid)
app.setLastTransaction(tid) app.setLastTransaction(tid)
app.executeQueuedEvent()
def notifyReplicationDone(self, conn, offset): def notifyReplicationDone(self, conn, offset):
node = self.app.nm.getByUUID(conn.getUUID()) node = self.app.nm.getByUUID(conn.getUUID())
......
...@@ -84,6 +84,9 @@ def addTID(ptid, offset): ...@@ -84,6 +84,9 @@ def addTID(ptid, offset):
higher = (d.year, d.month, d.day, d.hour, d.minute) higher = (d.year, d.month, d.day, d.hour, d.minute)
return packTID((higher, lower)) return packTID((higher, lower))
class DelayedError(Exception):
pass
class Transaction(object): class Transaction(object):
""" """
A pending transaction A pending transaction
...@@ -177,6 +180,9 @@ class TransactionManager(object): ...@@ -177,6 +180,9 @@ class TransactionManager(object):
Manage current transactions Manage current transactions
""" """
_last_tid = ZERO_TID _last_tid = ZERO_TID
# Transaction serialisation
# We don't need to use a real lock, as we are mono-threaded.
_locked = None
def __init__(self): def __init__(self):
# tid -> transaction # tid -> transaction
...@@ -272,11 +278,16 @@ class TransactionManager(object): ...@@ -272,11 +278,16 @@ class TransactionManager(object):
""" """
return self._tid_dict.keys() return self._tid_dict.keys()
def begin(self): def begin(self, tid=None):
""" """
Generate a new TID Generate a new TID
""" """
return self._nextTID() if self._locked is not None:
raise DelayedError()
if tid is None:
tid = self._nextTID()
self._locked = tid
return tid
def prepare(self, node, tid, oid_list, uuid_list, msg_id): def prepare(self, node, tid, oid_list, uuid_list, msg_id):
""" """
...@@ -291,8 +302,13 @@ class TransactionManager(object): ...@@ -291,8 +302,13 @@ class TransactionManager(object):
""" """
Remove a transaction, commited or aborted Remove a transaction, commited or aborted
""" """
node = self._tid_dict[tid].getNode() assert self._locked == tid, (self._locked, tid)
del self._tid_dict[tid] self._locked = None
tid_dict = self._tid_dict
if tid in tid_dict:
# ...and tried to finish
node = tid_dict[tid].getNode()
del tid_dict[tid]
del self._node_dict[node][tid] del self._node_dict[node][tid]
def lock(self, tid, uuid): def lock(self, tid, uuid):
......
...@@ -742,6 +742,11 @@ class AskBeginTransaction(Packet): ...@@ -742,6 +742,11 @@ class AskBeginTransaction(Packet):
""" """
Ask to begin a new transaction. C -> PM. Ask to begin a new transaction. C -> PM.
""" """
def _encode(self, tid):
return _encodeTID(tid)
def _decode(self, body):
return (_decodeTID(unpack('8s', body)[0]), )
class AnswerBeginTransaction(Packet): class AnswerBeginTransaction(Packet):
""" """
......
...@@ -70,7 +70,7 @@ class MasterClientHandlerTests(NeoUnitTestBase): ...@@ -70,7 +70,7 @@ class MasterClientHandlerTests(NeoUnitTestBase):
# client call it # client call it
client_uuid = self.identifyToMasterNode(node_type=NodeTypes.CLIENT, port=self.client_port) client_uuid = self.identifyToMasterNode(node_type=NodeTypes.CLIENT, port=self.client_port)
conn = self.getFakeConnection(client_uuid, self.client_address) conn = self.getFakeConnection(client_uuid, self.client_address)
service.askBeginTransaction(conn) service.askBeginTransaction(conn, None)
self.assertTrue(ltid < self.app.tm.getLastTID()) self.assertTrue(ltid < self.app.tm.getLastTID())
def test_08_askNewOIDs(self): def test_08_askNewOIDs(self):
...@@ -102,7 +102,7 @@ class MasterClientHandlerTests(NeoUnitTestBase): ...@@ -102,7 +102,7 @@ class MasterClientHandlerTests(NeoUnitTestBase):
'getPartition': 0, 'getPartition': 0,
'getCellList': [Mock({'getUUID': storage_uuid})], 'getCellList': [Mock({'getUUID': storage_uuid})],
}) })
service.askBeginTransaction(conn) service.askBeginTransaction(conn, None)
oid_list = [] oid_list = []
tid = self.app.tm.getLastTID() tid = self.app.tm.getLastTID()
conn = self.getFakeConnection(client_uuid, self.client_address) conn = self.getFakeConnection(client_uuid, self.client_address)
......
...@@ -201,9 +201,6 @@ class MasterStorageHandlerTests(NeoUnitTestBase): ...@@ -201,9 +201,6 @@ class MasterStorageHandlerTests(NeoUnitTestBase):
client1, cconn1 = self._getClient() client1, cconn1 = self._getClient()
client2, cconn2 = self._getClient() client2, cconn2 = self._getClient()
client3, cconn3 = self._getClient() client3, cconn3 = self._getClient()
tid1 = self.getNextTID()
tid2 = self.getNextTID(tid1)
tid3 = self.getNextTID(tid2)
oid_list = [self.getOID(), ] oid_list = [self.getOID(), ]
# Some shortcuts to simplify test code # Some shortcuts to simplify test code
...@@ -215,25 +212,13 @@ class MasterStorageHandlerTests(NeoUnitTestBase): ...@@ -215,25 +212,13 @@ class MasterStorageHandlerTests(NeoUnitTestBase):
# Transaction 1: 2 storage nodes involved, one will die and the other # Transaction 1: 2 storage nodes involved, one will die and the other
# already answered node lock # already answered node lock
msg_id_1 = 1 msg_id_1 = 1
tm.prepare(client1, tid1, oid_list, [node1.getUUID(), node2.getUUID()], msg_id_1) tid1 = tm.begin()
tm.prepare(client1, tid1, oid_list,
[node1.getUUID(), node2.getUUID()], msg_id_1)
tm.lock(tid1, node2.getUUID()) tm.lock(tid1, node2.getUUID())
# Transaction 2: 2 storage nodes involved, one will die
msg_id_2 = 2
tm.prepare(client2, tid2, oid_list, [node1.getUUID(), node2.getUUID()], msg_id_2)
# Transaction 3: 1 storage node involved, which won't die
msg_id_3 = 3
tm.prepare(client3, tid3, oid_list, [node2.getUUID(), ], msg_id_3)
# Assert initial state
self.checkNoPacketSent(cconn1)
self.checkNoPacketSent(cconn2)
self.checkNoPacketSent(cconn3)
# Storage 1 dies # Storage 1 dies
node1.setTemporarilyDown() node1.setTemporarilyDown()
self.service.nodeLost(conn1, node1) self.service.nodeLost(conn1, node1)
# Check state after node lost
# T1: last locking node lost, client receives AnswerTransactionFinished # T1: last locking node lost, client receives AnswerTransactionFinished
self.checkAnswerTransactionFinished(cconn1) self.checkAnswerTransactionFinished(cconn1)
self.checkNotifyUnlockInformation(conn2) self.checkNotifyUnlockInformation(conn2)
...@@ -241,10 +226,24 @@ class MasterStorageHandlerTests(NeoUnitTestBase): ...@@ -241,10 +226,24 @@ class MasterStorageHandlerTests(NeoUnitTestBase):
# ...and notifications are sent to other clients # ...and notifications are sent to other clients
self.checkInvalidateObjects(cconn2) self.checkInvalidateObjects(cconn2)
self.checkInvalidateObjects(cconn3) self.checkInvalidateObjects(cconn3)
# Transaction 2: 2 storage nodes involved, one will die
msg_id_2 = 2
tid2 = tm.begin()
tm.prepare(client2, tid2, oid_list,
[node1.getUUID(), node2.getUUID()], msg_id_2)
# T2: pending locking answer, client keeps waiting # T2: pending locking answer, client keeps waiting
self.checkNoPacketSent(cconn2, check_notify=False) self.checkNoPacketSent(cconn2, check_notify=False)
tm.remove(tid2)
# Transaction 3: 1 storage node involved, which won't die
msg_id_3 = 3
tid3 = tm.begin()
tm.prepare(client3, tid3, oid_list,
[node2.getUUID(), ], msg_id_3)
# T3: action not significant to this transacion, so no response # T3: action not significant to this transacion, so no response
self.checkNoPacketSent(cconn3, check_notify=False) self.checkNoPacketSent(cconn3, check_notify=False)
tm.remove(tid3)
def test_answerPack(self): def test_answerPack(self):
# Note: incomming status has no meaning here, so it's left to False. # Note: incomming status has no meaning here, so it's left to False.
......
...@@ -22,7 +22,7 @@ from neo.tests import NeoUnitTestBase ...@@ -22,7 +22,7 @@ from neo.tests import NeoUnitTestBase
from neo.protocol import ZERO_TID from neo.protocol import ZERO_TID
from neo.master.transactions import Transaction, TransactionManager from neo.master.transactions import Transaction, TransactionManager
from neo.master.transactions import packTID, unpackTID, addTID from neo.master.transactions import packTID, unpackTID, addTID, DelayedError
class testTransactionManager(NeoUnitTestBase): class testTransactionManager(NeoUnitTestBase):
...@@ -90,26 +90,19 @@ class testTransactionManager(NeoUnitTestBase): ...@@ -90,26 +90,19 @@ class testTransactionManager(NeoUnitTestBase):
storage_2_uuid = self.makeUUID(2) storage_2_uuid = self.makeUUID(2)
txnman = TransactionManager() txnman = TransactionManager()
# register 4 transactions made by two nodes # register 4 transactions made by two nodes
tid11 = txnman.begin() self.assertEqual(txnman.getPendingList(), [])
txnman.prepare(node1, tid11, oid_list, [storage_1_uuid], 1) tid1 = txnman.begin()
tid12 = txnman.begin() txnman.prepare(node1, tid1, oid_list, [storage_1_uuid], 1)
txnman.prepare(node1, tid12, oid_list, [storage_1_uuid], 2) self.assertEqual(txnman.getPendingList(), [tid1])
tid21 = txnman.begin() # abort transactions of another node, transaction stays
txnman.prepare(node2, tid21, oid_list, [storage_2_uuid], 3)
tid22 = txnman.begin()
txnman.prepare(node2, tid22, oid_list, [storage_2_uuid], 4)
self.assertTrue(tid11 < tid12 < tid21 < tid22)
self.assertEqual(len(txnman.getPendingList()), 4)
# abort transactions of one node
txnman.abortFor(node1)
tid_list = txnman.getPendingList()
self.assertEqual(len(tid_list), 2)
self.assertTrue(tid21 in tid_list)
self.assertTrue(tid22 in tid_list)
# then the other
txnman.abortFor(node2) txnman.abortFor(node2)
self.assertEqual(txnman.getPendingList(), [tid1])
# abort transactions of requesting node, transaction is removed
txnman.abortFor(node1)
self.assertEqual(txnman.getPendingList(), []) self.assertEqual(txnman.getPendingList(), [])
self.assertFalse(txnman.hasPending()) self.assertFalse(txnman.hasPending())
# ...and we can start another transaction
tid2 = txnman.begin()
def test_getNextOIDList(self): def test_getNextOIDList(self):
txnman = TransactionManager() txnman = TransactionManager()
...@@ -138,6 +131,9 @@ class testTransactionManager(NeoUnitTestBase): ...@@ -138,6 +131,9 @@ class testTransactionManager(NeoUnitTestBase):
txnman.setLastTID(ntid) txnman.setLastTID(ntid)
self.assertEqual(txnman.getLastTID(), ntid) self.assertEqual(txnman.getLastTID(), ntid)
self.assertTrue(ntid > tid1) self.assertTrue(ntid > tid1)
# If a new TID is generated, DelayedError is raised
self.assertRaises(DelayedError, txnman.begin)
txnman.remove(tid1)
# new trancation # new trancation
node2 = Mock({'__hash__': 2}) node2 = Mock({'__hash__': 2})
tid2 = txnman.begin() tid2 = txnman.begin()
...@@ -159,36 +155,39 @@ class testTransactionManager(NeoUnitTestBase): ...@@ -159,36 +155,39 @@ class testTransactionManager(NeoUnitTestBase):
tid1 = tm.begin() tid1 = tm.begin()
tm.prepare(client1, tid1, oid_list, [storage_1_uuid, storage_2_uuid], msg_id_1) tm.prepare(client1, tid1, oid_list, [storage_1_uuid, storage_2_uuid], msg_id_1)
tm.lock(tid1, storage_2_uuid) tm.lock(tid1, storage_2_uuid)
t1 = tm[tid1]
self.assertFalse(t1.locked())
# Storage 1 dies:
# t1 is over
self.assertTrue(t1.forget(storage_1_uuid))
self.assertEqual(t1.getUUIDList(), [storage_2_uuid])
tm.remove(tid1)
# Transaction 2: 2 storage nodes involved, one will die # Transaction 2: 2 storage nodes involved, one will die
msg_id_2 = 2 msg_id_2 = 2
tid2 = tm.begin() tid2 = tm.begin()
tm.prepare(client2, tid2, oid_list, [storage_1_uuid, storage_2_uuid], msg_id_2) tm.prepare(client2, tid2, oid_list, [storage_1_uuid, storage_2_uuid], msg_id_2)
# Transaction 3: 1 storage node involved, which won't die
msg_id_3 = 3
tid3 = tm.begin()
tm.prepare(client3, tid3, oid_list, [storage_2_uuid, ], msg_id_3)
t1 = tm[tid1]
t2 = tm[tid2] t2 = tm[tid2]
t3 = tm[tid3]
# Assert initial state
self.assertFalse(t1.locked())
self.assertFalse(t2.locked()) self.assertFalse(t2.locked())
self.assertFalse(t3.locked())
# Storage 1 dies: # Storage 1 dies:
# t1 is over
self.assertTrue(t1.forget(storage_1_uuid))
self.assertEqual(t1.getUUIDList(), [storage_2_uuid])
# t2 still waits for storage 2 # t2 still waits for storage 2
self.assertFalse(t2.forget(storage_1_uuid)) self.assertFalse(t2.forget(storage_1_uuid))
self.assertEqual(t2.getUUIDList(), [storage_2_uuid]) self.assertEqual(t2.getUUIDList(), [storage_2_uuid])
self.assertTrue(t2.lock(storage_2_uuid)) self.assertTrue(t2.lock(storage_2_uuid))
tm.remove(tid2)
# Transaction 3: 1 storage node involved, which won't die
msg_id_3 = 3
tid3 = tm.begin()
tm.prepare(client3, tid3, oid_list, [storage_2_uuid, ], msg_id_3)
t3 = tm[tid3]
self.assertFalse(t3.locked())
# Storage 1 dies:
# t3 doesn't care # t3 doesn't care
self.assertFalse(t3.forget(storage_1_uuid)) self.assertFalse(t3.forget(storage_1_uuid))
self.assertEqual(t3.getUUIDList(), [storage_2_uuid]) self.assertEqual(t3.getUUIDList(), [storage_2_uuid])
self.assertTrue(t3.lock(storage_2_uuid)) self.assertTrue(t3.lock(storage_2_uuid))
tm.remove(tid3)
def testTIDUtils(self): def testTIDUtils(self):
""" """
...@@ -216,5 +215,19 @@ class testTransactionManager(NeoUnitTestBase): ...@@ -216,5 +215,19 @@ class testTransactionManager(NeoUnitTestBase):
unpackTID(addTID(packTID(((2010, 11, 30, 23, 59), 2**32 - 1)), 1)), unpackTID(addTID(packTID(((2010, 11, 30, 23, 59), 2**32 - 1)), 1)),
((2010, 12, 1, 0, 0), 0)) ((2010, 12, 1, 0, 0), 0))
def testTransactionLock(self):
"""
Transaction lock is present to ensure invalidation TIDs are sent in
strictly increasing order.
Note: this implementation might change later, to allow more paralelism.
"""
tm = TransactionManager()
tid1 = tm.begin()
# Further calls fail with DelayedError
self.assertRaises(DelayedError, tm.begin)
# ...until tid1 gets removed
tm.remove(tid1)
tid2 = tm.begin()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -236,8 +236,10 @@ class ProtocolTests(NeoUnitTestBase): ...@@ -236,8 +236,10 @@ class ProtocolTests(NeoUnitTestBase):
def test_32_askBeginTransaction(self): def test_32_askBeginTransaction(self):
p = Packets.AskBeginTransaction() tid = self.getNextTID()
self.assertEqual(p.decode(), ()) p = Packets.AskBeginTransaction(tid)
ptid = p.decode()[0]
self.assertEqual(tid, ptid)
def test_33_answerBeginTransaction(self): def test_33_answerBeginTransaction(self):
tid = self.getNextTID() tid = self.getNextTID()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment