Review aborting of transactions

a4c06242 · Julien Muchembled · dc7a129c · a4c06242 · a4c06242 · a4c06242
Commit a4c06242 authored Jan 10, 2017 by Julien Muchembled
12 changed files
--- a/neo/client/app.py
+++ b/neo/client/app.py
@@ -595,19 +595,16 @@ class Application(ThreadedApplication):
        txn_context = self._txn_container.pop(transaction)
        if txn_context is None:
            return
-        p = Packets.AbortTransaction(txn_context['ttid'])
-        # cancel transaction on all those nodes
-        conns = [self.master_conn]
-        for uuid in txn_context['involved_nodes']:
-            node = self.nm.getByUUID(uuid)
-            if node is not None:
-                conns.append(self.cp.getConnForNode(node))
-        for conn in conns:
-            if conn is not None:
-                try:
-                    conn.notify(p)
-                except ConnectionClosed:
-                    pass
+        try:
+            notify = self.master_conn.notify
+        except AttributeError:
+            pass
+        else:
+            try:
+                notify(Packets.AbortTransaction(txn_context['ttid'],
+                                                txn_context['involved_nodes']))
+            except ConnectionClosed:
+                pass
        # We don't need to flush queue, as it won't be reused by future
        # transactions (deleted on next line & indexed by transaction object
        # instance).

--- a/neo/lib/protocol.py
+++ b/neo/lib/protocol.py
@@ -976,10 +976,11 @@ class StoreObject(Packet):

 class AbortTransaction(Packet):
    """
-    Abort a transaction. C -> S, PM.
+    Abort a transaction. C -> PM -> S.
    """
    _fmt = PStruct('abort_transaction',
        PTID('tid'),
+        PFUUIDList, # unused for PM -> S
    )

 class StoreTransaction(Packet):

--- a/neo/master/app.py
+++ b/neo/master/app.py
@@ -585,6 +585,6 @@ class Application(BaseApplication):
    def isStorageReady(self, uuid):
        return uuid in self.storage_ready_dict

-    def getStorageReadySet(self, readiness):
+    def getStorageReadySet(self, readiness=float('inf')):
        return {k for k, v in self.storage_ready_dict.iteritems()
                  if v <= readiness}
--- a/neo/master/handlers/client.py
+++ b/neo/master/handlers/client.py
@@ -74,6 +74,12 @@ class ClientServiceHandler(MasterHandler):
                node.ask(p, timeout=60)
        else:
            conn.answer(Errors.IncompleteTransaction())
+            # It's simpler to abort automatically rather than asking the client
+            # to send a notification on tpc_abort, since it would have keep the
+            # transaction longer in list of transactions.
+            # This should happen so rarely that we don't try to minimize the
+            # number of abort notifications by looking the modified partitions.
+            self.abortTransaction(conn, ttid, app.getStorageReadySet())

    def askFinalTID(self, conn, ttid):
        tm = self.app.tm
@@ -102,9 +108,24 @@ class ClientServiceHandler(MasterHandler):
        else:
            conn.answer(Packets.AnswerPack(False))

-    def abortTransaction(self, conn, tid):
-        # BUG: The replicator may wait this transaction to be finished.
-        self.app.tm.abort(tid, conn.getUUID())
+    def abortTransaction(self, conn, tid, uuid_list):
+        # Consider a failure when the connection between the storage and the
+        # client breaks while the answer to the first write is sent back.
+        # In other words, the client can not know the exact set of nodes that
+        # know this transaction, and it sends us all nodes it considered for
+        # writing.
+        # We must also add those that are waiting for this transaction to be
+        # finished (returned by tm.abort), because they may have join the
+        # cluster after that the client started to abort.
+        app = self.app
+        involved = app.tm.abort(tid, conn.getUUID())
+        involved.update(uuid_list)
+        involved.intersection_update(app.getStorageReadySet())
+        if involved:
+            p = Packets.AbortTransaction(tid, ())
+            getByUUID = app.nm.getByUUID
+            for involved in involved:
+                getByUUID(involved).notify(p)


 # like ClientServiceHandler but read-only & only for tid <= backup_tid

--- a/neo/master/transactions.py
+++ b/neo/master/transactions.py
@@ -393,10 +393,12 @@ class TransactionManager(EventQueue):
            Abort a transaction
        """
        logging.debug('Abort TXN %s for %s', dump(ttid), uuid_str(uuid))
-        if self[ttid].isPrepared():
+        txn = self[ttid]
+        if txn.isPrepared():
            raise ProtocolError("commit already requested for ttid %s"
                                % dump(ttid))
        del self[ttid]
+        return txn._notification_set

    def lock(self, ttid, uuid):
        """

--- a/neo/storage/handlers/client.py
+++ b/neo/storage/handlers/client.py
@@ -57,9 +57,6 @@ class ClientOperationHandler(EventHandler):
                compression, checksum, data, data_serial)
        conn.answer(p)

-    def abortTransaction(self, conn, ttid):
-        self.app.tm.abort(ttid)
-
    def askStoreTransaction(self, conn, ttid, *txn_info):
        self.app.tm.register(conn, ttid)
        self.app.tm.vote(ttid, txn_info)
@@ -201,7 +198,6 @@ class ClientReadOnlyOperationHandler(ClientOperationHandler):
        conn.answer(Errors.ReadOnlyAccess(
            'read-only access because cluster is in backuping mode'))

-    abortTransaction        = _readOnly
    askStoreTransaction     = _readOnly
    askVoteTransaction      = _readOnly
    askStoreObject          = _readOnly

--- a/neo/storage/handlers/master.py
+++ b/neo/storage/handlers/master.py
@@ -57,6 +57,10 @@ class MasterOperationHandler(BaseMasterHandler):
    def notifyUnlockInformation(self, conn, ttid):
        self.app.tm.unlock(ttid)

+    def abortTransaction(self, conn, ttid, _):
+        self.app.tm.abort(ttid)
+        self.app.replicator.transactionFinished(ttid)
+
    def askPack(self, conn, tid):
        app = self.app
        logging.info('Pack started, up to %s...', dump(tid))

--- a/neo/storage/replicator.py
+++ b/neo/storage/replicator.py
@@ -135,14 +135,19 @@ class Replicator(object):
            self.replicate_dict[offset] = max_tid
        self._nextPartition()

-    def transactionFinished(self, ttid, max_tid):
+    def transactionFinished(self, ttid, max_tid=None):
        """ Callback from MasterOperationHandler """
-        self.ttid_set.remove(ttid)
+        try:
+            self.ttid_set.remove(ttid)
+        except KeyError:
+            assert max_tid is None, max_tid
+            return
        min_ttid = min(self.ttid_set) if self.ttid_set else INVALID_TID
        for offset, p in self.partition_dict.iteritems():
            if p.max_ttid and p.max_ttid < min_ttid:
                p.max_ttid = None
-                self.replicate_dict[offset] = max_tid
+                if max_tid:
+                    self.replicate_dict[offset] = max_tid
        self._nextPartition()

    def getBackupTID(self):
@@ -355,7 +360,9 @@ class Replicator(object):
        p = self.partition_dict[offset]
        p.next_obj = add64(tid, 1)
        self.updateBackupTID()
-        if not p.max_ttid:
+        if p.max_ttid:
+            logging.debug("unfinished transactions: %r", self.ttid_set)
+        else:
            self.app.tm.replicated(offset, tid)
        logging.debug("partition %u replicated up to %s from %r",
                      offset, dump(tid), self.current_node)

--- a/neo/storage/transactions.py
+++ b/neo/storage/transactions.py
@@ -324,9 +324,8 @@ class TransactionManager(EventQueue):
            Note: does not alter persistent content.
        """
        if ttid not in self._transaction_dict:
-            # the tid may be unknown as the transaction is aborted on every node
-            # of the partition, even if no data was received (eg. conflict on
-            # another node)
+            assert not even_if_locked
+            # See how the master processes AbortTransaction from the client.
            return
        logging.debug('Abort TXN %s', dump(ttid))
        transaction = self._transaction_dict[ttid]

--- a/neo/tests/__init__.py
+++ b/neo/tests/__init__.py
@@ -37,6 +37,7 @@ from time import time
 from struct import pack, unpack
 from unittest.case import _ExpectedFailure, _UnexpectedSuccess
 try:
+    from transaction.interfaces import IDataManager
    from ZODB.utils import newTid
 except ImportError:
    pass
@@ -378,6 +379,30 @@ class NeoUnitTestBase(NeoTestBase):
        return packet


+class TransactionalResource(object):
+
+    class _sortKey(object):
+
+        def __init__(self, last):
+            self._last = last
+
+        def __cmp__(self, other):
+            assert type(self) is not type(other), other
+            return 1 if self._last else -1
+
+    def __init__(self, txn, last, **kw):
+        self.sortKey = lambda: self._sortKey(last)
+        for k in kw:
+            assert callable(IDataManager.get(k)), k
+        self.__dict__.update(kw)
+        txn.get().join(self)
+
+    def __getattr__(self, attr):
+        if callable(IDataManager.get(attr)):
+            return lambda *_: None
+        return self.__getattribute__(attr)
+
+
 class Patch(object):
    """
    Patch attributes and revert later automatically.

--- a/neo/tests/threaded/__init__.py
+++ b/neo/tests/threaded/__init__.py
@@ -956,13 +956,12 @@ class NEOThreadedTest(NeoTestBase):
            return obj
        return unpickler

-    class newThread(threading.Thread):
+    class newPausedThread(threading.Thread):

        def __init__(self, func, *args, **kw):
            threading.Thread.__init__(self)
            self.__target = func, args, kw
            self.daemon = True
-            self.start()

        def run(self):
            try:
@@ -978,6 +977,12 @@ class NEOThreadedTest(NeoTestBase):
                del self.__exc_info
                raise etype, value, tb

+    class newThread(newPausedThread):
+
+        def __init__(self, *args, **kw):
+            NEOThreadedTest.newPausedThread.__init__(self, *args, **kw)
+            self.start()
+
    def commitWithStorageFailure(self, client, txn):
        with Patch(client, _getFinalTID=lambda *_: None):
            self.assertRaises(ConnectionClosed, txn.commit)

--- a/neo/tests/threaded/test.py
+++ b/neo/tests/threaded/test.py
@@ -32,7 +32,7 @@ from neo.lib.connection import ServerConnection, MTClientConnection
 from neo.lib.exception import DatabaseFailure, StoppedOperation
 from neo.lib.protocol import CellStates, ClusterStates, NodeStates, Packets, \
    ZERO_OID, ZERO_TID
-from .. import expectedFailure, Patch
+from .. import expectedFailure, Patch, TransactionalResource
 from . import ConnectionFilter, LockLock, NEOThreadedTest, with_cluster
 from neo.lib.util import add64, makeChecksum, p64, u64
 from neo.client.exception import NEOPrimaryMasterLost, NEOStorageError
@@ -1463,5 +1463,97 @@ class Test(NEOThreadedTest):
            value_list.append(r[x].value)
        self.assertEqual(value_list, range(3))

+    @with_cluster(replicas=1, partitions=3, storage_count=3)
+    def testMasterArbitratingVote(self, cluster):
+        """
+        p\S 1  2  3
+        0   U  U  .
+        1   .  U  U
+        2   U  .  U
+
+        With the above setup, check when a client C1 fails to connect to S2
+        and another C2 fails to connect to S1.
+
+        For the first 2 scenarios:
+        - C1 first votes (the master accepts)
+        - C2 vote is delayed until C1 decides to finish or abort
+        """
+        def delayAbort(conn, packet):
+            return isinstance(packet, Packets.AbortTransaction)
+        def noConnection(jar, storage):
+            return Patch(jar.db().storage.app.cp,
+                getConnForNode=lambda orig, node:
+                    None if node.getUUID() == storage.uuid else orig(node))
+        def c1_vote(txn):
+            def vote(orig, *args):
+                result = orig(*args)
+                ll()
+                return result
+            with LockLock() as ll, Patch(cluster.master.tm, vote=vote):
+                commit2.start()
+                ll()
+            if c1_aborts:
+                raise Exception
+        pt = [{x.getUUID() for x in x}
+            for x in cluster.master.pt.partition_list]
+        cluster.storage_list.sort(key=lambda x:
+            (x.uuid not in pt[0], x.uuid in pt[1]))
+        pt = 'UU.|.UU|U.U'
+        self.assertPartitionTable(cluster, pt)
+        s1, s2, s3 = cluster.storage_list
+        t1, c1 = cluster.getTransaction()
+        with cluster.newClient(1) as db:
+            t2, c2 = cluster.getTransaction(db)
+            with noConnection(c1, s2), noConnection(c2, s1):
+                cluster.client.cp.connection_dict[s2.uuid].close()
+                self.tic()
+                for c1_aborts in 0, 1:
+                    # 0: C1 finishes, C2 vote fails
+                    # 1: C1 aborts, C2 finishes
+                    #
+                    # Although we try to modify the same oid, there's no
+                    # conflict because each storage node sees a single
+                    # and different transaction: vote to storages is done
+                    # in parallel, and the master must be involved as an
+                    # arbitrator, which ultimately rejects 1 of the 2
+                    # transactions, preferably before the second phase of
+                    # the commit.
+                    t1.begin(); c1.root()._p_changed = 1
+                    t2.begin(); c2.root()._p_changed = 1
+                    commit2 = self.newPausedThread(t2.commit)
+                    TransactionalResource(t1, 1, tpc_vote=c1_vote)
+                    with ConnectionFilter() as f:
+                        if not c1_aborts:
+                            f.add(delayAbort)
+                        f.delayAskFetchTransactions(lambda _:
+                            f.discard(delayAbort))
+                        try:
+                            t1.commit()
+                            self.assertFalse(c1_aborts)
+                        except Exception:
+                            self.assertTrue(c1_aborts)
+                        try:
+                            commit2.join()
+                            self.assertTrue(c1_aborts)
+                        except NEOStorageError:
+                            self.assertFalse(c1_aborts)
+                        self.tic()
+                        self.assertPartitionTable(cluster,
+                            'OU.|.UU|O.U' if c1_aborts else 'UO.|.OU|U.U')
+                    self.tic()
+                    self.assertPartitionTable(cluster, pt)
+                # S3 fails while C1 starts to finish
+                with ConnectionFilter() as f:
+                    f.add(lambda conn, packet: conn.getUUID() == s3.uuid and
+                        isinstance(packet, Packets.AcceptIdentification))
+                    t1.begin(); c1.root()._p_changed = 1
+                    TransactionalResource(t1, 0, tpc_finish=lambda *_:
+                        cluster.master.nm.getByUUID(s3.uuid)
+                        .getConnection().close())
+                    self.assertRaises(NEOStorageError, t1.commit)
+                    self.assertPartitionTable(cluster, 'UU.|.UO|U.O')
+                self.tic()
+                self.assertPartitionTable(cluster, pt)
+
 if __name__ == "__main__":
    unittest.main()