More bugfixes to backup mode
- catch OperationFailure - reset transaction manager when leaving backup mode - send appropriate target tid to a storage that updates a outdated cell - clean up partition table when leaving BACKINGUP state unexpectedly - make sure all readable cells of a partition have the same 'backup_tid' if they have the same data, so that we know when internal replication is finished when leaving backup mode - fix storage not finished internal replication when leaving backup mode
... | @@ -21,9 +21,10 @@ from neo.lib import logging | ... | @@ -21,9 +21,10 @@ from neo.lib import logging |
from neo.lib.bootstrap import BootstrapManager | from neo.lib.bootstrap import BootstrapManager | ||
from neo.lib.connector import getConnectorHandler | from neo.lib.connector import getConnectorHandler | ||
from neo.lib.exception import PrimaryFailure | from neo.lib.exception import PrimaryFailure | ||
from neo.lib.handler import EventHandler | |||
from neo.lib.node import NodeManager | from neo.lib.node import NodeManager | ||
from neo.lib.protocol import CellStates, ClusterStates, NodeTypes, Packets | from neo.lib.protocol import CellStates, ClusterStates, \ | ||
from neo.lib.protocol import uuid_str, INVALID_TID, ZERO_TID | NodeStates, NodeTypes, Packets, uuid_str, INVALID_TID, ZERO_TID | ||
from neo.lib.util import add64, dump | from neo.lib.util import add64, dump | ||
from .app import StateChangedException | from .app import StateChangedException | ||
from .pt import PartitionTable | from .pt import PartitionTable | ||
... | @@ -144,26 +145,32 @@ class BackupApplication(object): | ... | @@ -144,26 +145,32 @@ class BackupApplication(object): |
while pt.getCheckTid(xrange(pt.getPartitions())) < tid: | while pt.getCheckTid(xrange(pt.getPartitions())) < tid: | ||
poll(1) | poll(1) | ||
last_tid = app.getLastTransaction() | last_tid = app.getLastTransaction() | ||
handler = EventHandler(app) | |||
if tid < last_tid: | if tid < last_tid: | ||
assert tid != ZERO_TID | assert tid != ZERO_TID | ||
logging.warning("Truncating at %s (last_tid was %s)", | logging.warning("Truncating at %s (last_tid was %s)", | ||
dump(app.backup_tid), dump(last_tid)) | dump(app.backup_tid), dump(last_tid)) | ||
p = Packets.AskTruncate(tid) | # XXX: We want to go through a recovery phase in order to | ||
connection_list = [] | # initialize the transaction manager, but this is only | ||
# possible if storages already know that we left backup | |||
# mode. To that purpose, we always send a Truncate packet, | |||
# even if there's nothing to truncate. | |||
p = Packets.Truncate(tid) | |||
for node in app.nm.getStorageList(only_identified=True): | for node in app.nm.getStorageList(only_identified=True): | ||
conn = node.getConnection() | conn = node.getConnection() | ||
conn.ask(p) | conn.setHandler(handler) | ||
connection_list.append(conn) | node.setState(NodeStates.TEMPORARILY_DOWN) | ||
for conn in connection_list: | # Packets will be sent at the beginning of the recovery | ||
while conn.isPending(): | # phase. | ||
poll(1) | conn.notify(p) | ||
app.setLastTransaction(tid) | conn.abort() | ||
# If any error happened before reaching this line, we'd go back | # If any error happened before reaching this line, we'd go back | ||
# to backup mode, which is the right mode to recover. | # to backup mode, which is the right mode to recover. | ||
del app.backup_tid | del app.backup_tid | ||
break | break | ||
finally: | finally: | ||
del self.primary_partition_dict, self.tid_list | del self.primary_partition_dict, self.tid_list | ||
pt.clearReplicating() | |||
def nodeLost(self, node): | def nodeLost(self, node): | ||
getCellList = self.app.pt.getCellList | getCellList = self.app.pt.getCellList | ||
... | @@ -205,9 +212,25 @@ class BackupApplication(object): | ... | @@ -205,9 +212,25 @@ class BackupApplication(object): |
node_list = [] | node_list = [] | ||
for cell in pt.getCellList(offset, readable=True): | for cell in pt.getCellList(offset, readable=True): | ||
node = cell.getNode() | node = cell.getNode() | ||
assert node.isConnected() | assert node.isConnected(), node | ||
assert cell.backup_tid < last_max_tid or \ | if cell.backup_tid == prev_tid: | ||
cell.backup_tid == prev_tid | # Let's given 4 TID t0,t1,t2,t3: if a cell is only | ||
# modified by t0 & t3 and has all data for t0, 4 values | |||
# are possible for its 'backup_tid' until it replicates | |||
# up to t3: t0, t1, t2 or t3 - 1 | |||
# Choosing the smallest one (t0) is easier to implement | |||
# but when leaving backup mode, we would always lose | |||
# data if the last full transaction does not modify | |||
# all partitions. t1 is wrong for the same reason. | |||
# So we have chosen the highest one (t3 - 1). | |||
# t2 should also work but maybe harder to implement. | |||
cell.backup_tid = add64(tid, -1) | |||
|
|||
logging.debug( | |||
"partition %u: updating backup_tid of %r to %s", | |||
offset, cell, dump(cell.backup_tid)) | |||
else: | |||
assert cell.backup_tid < last_max_tid, ( | |||
cell.backup_tid, last_max_tid, prev_tid, tid) | |||
if app.isStorageReady(node.getUUID()): | if app.isStorageReady(node.getUUID()): | ||
node_list.append(node) | node_list.append(node) | ||
assert node_list | assert node_list | ||
... | ... |