Commit efb4da42 authored by Grégory Wisniewski's avatar Grégory Wisniewski

Remove dropNode() calls where it's not safe to change the partition table

without give the choice to an admin to discard the node's content. Now a node
can be dropped only when requested by the admin node or when a storage has
changed its address (this case should be checked and discussed).


git-svn-id: https://svn.erp5.org/repos/neo/branches/prototype3@732 71dcc9de-d417-0410-9af5-da40c76e7ee4
parent 2cc58316
......@@ -646,11 +646,15 @@ class Application(object):
for node in nm.getStorageNodeList():
if node.getState() == TEMPORARILY_DOWN_STATE \
and node.getLastStateChange() + expiration < current_time:
logging.info('%s is down' % (node, ))
node.setState(DOWN_STATE)
self.broadcastNodeInformation(node)
cell_list = self.pt.dropNode(node)
self.broadcastPartitionChanges(self.pt.setNextID(), cell_list)
logging.warning('%s is down, have to notify the admin' % (node, ))
# XXX: here we should notify the administrator that
# a node seems dead and should be dropped frop the
# partition table. This should not be done
# automaticaly to avoid data lost.
#node.setState(DOWN_STATE)
#self.broadcastNodeInformation(node)
#cell_list = self.pt.dropNode(node)
#self.broadcastPartitionChanges(self.pt.setNextID(), cell_list)
if not self.pt.operational():
# Catastrophic.
raise OperationFailure, 'cannot continue operation'
......
......@@ -42,10 +42,10 @@ class IdentificationEventHandler(MasterEventHandler):
node_by_addr = nm.getNodeByServer(server)
def changeNodeAddress(node, server):
from copy import copy
if node_type == protocol.STORAGE_NODE_TYPE:
args = (node.getServer(), server)
# remove storage from partition table
# XXX: this should be safe but need to be checked
cell_list = app.pt.dropNode(node)
if cell_list:
ptid = app.pt.setNextID()
......@@ -70,11 +70,15 @@ class IdentificationEventHandler(MasterEventHandler):
if node.getState() == protocol.RUNNING_STATE:
# still running, reject this new node
raise protocol.ProtocolError('invalid server address')
# this node has changed its address
node = changeNodeAddress(node, server)
if node_by_uuid is None and node_by_addr is not None:
if node.getState() == protocol.RUNNING_STATE:
# still running, reject this new node
raise protocol.ProtocolError('invalid server address')
# FIXME: here the node was known with a different uuid but with the
# same address, is it safe to forgot the old, even if he's not
# running ?
node = changeNodeAddress(node, server)
# ask the app the node identification, if refused, an exception is raised
......
......@@ -31,7 +31,7 @@ class RecoveryEventHandler(MasterEventHandler):
"""This class deals with events for a recovery phase."""
def connectionCompleted(self, conn):
# ask the last IDs to perform the the recovery
# ask the last IDs to perform the recovery
conn.ask(protocol.askLastIDs())
def connectionClosed(self, conn):
......
......@@ -99,14 +99,7 @@ class ServiceEventHandler(MasterEventHandler):
app.broadcastNodeInformation(node)
if node.getNodeType() == STORAGE_NODE_TYPE:
if state in (DOWN_STATE, BROKEN_STATE):
# XXX still required to change here ??? who can send
# this kind of message with these status except admin node
cell_list = app.pt.dropNode(node)
if len(cell_list) != 0:
ptid = app.pt.setNextID()
app.broadcastPartitionChanges(ptid, cell_list)
elif state == TEMPORARILY_DOWN_STATE:
if state == TEMPORARILY_DOWN_STATE:
cell_list = app.pt.outdate()
if len(cell_list) != 0:
ptid = app.pt.setNextID()
......
......@@ -103,14 +103,7 @@ class ShutdownEventHandler(ServiceEventHandler):
logging.debug('broadcasting node information')
app.broadcastNodeInformation(node)
if node.getNodeType() == STORAGE_NODE_TYPE:
if state in (DOWN_STATE, BROKEN_STATE):
# XXX still required to change here ??? who can send
# this kind of message with these status except admin node
cell_list = app.pt.dropNode(node)
if len(cell_list) != 0:
ptid = app.pt.setNextID()
app.broadcastPartitionChanges(ptid, cell_list)
elif state == TEMPORARILY_DOWN_STATE:
if state == TEMPORARILY_DOWN_STATE:
cell_list = app.pt.outdate()
if len(cell_list) != 0:
ptid = app.pt.setNextID()
......
......@@ -33,43 +33,33 @@ class VerificationEventHandler(MasterEventHandler):
def connectionCompleted(self, conn):
pass
def connectionClosed(self, conn):
def _dropIt(self, conn, node, new_state):
app = self.app
uuid = conn.getUUID()
node = app.nm.getNodeByUUID(uuid)
if node.getState() == RUNNING_STATE:
node.setState(TEMPORARILY_DOWN_STATE)
node.setState(new_state)
app.broadcastNodeInformation(node)
if not app.pt.operational():
# Catastrophic.
raise VerificationFailure, 'cannot continue verification'
def connectionClosed(self, conn):
node = self.app.nm.getNodeByUUID(conn.getUUID())
if node.getState() == RUNNING_STATE:
self._dropIt(conn, node, TEMPORARILY_DOWN_STATE)
MasterEventHandler.connectionClosed(self, conn)
def timeoutExpired(self, conn):
app = self.app
uuid = conn.getUUID()
node = app.nm.getNodeByUUID(uuid)
node = self.app.nm.getNodeByUUID(conn.getUUID())
if node.getState() == RUNNING_STATE:
node.setState(TEMPORARILY_DOWN_STATE)
app.broadcastNodeInformation(node)
if not app.pt.operational():
# Catastrophic.
raise VerificationFailure, 'cannot continue verification'
self._dropIt(conn, node, TEMPORARILY_DOWN_STATE)
MasterEventHandler.timeoutExpired(self, conn)
def peerBroken(self, conn):
app = self.app
uuid = conn.getUUID()
node = app.nm.getNodeByUUID(uuid)
node = self.app.nm.getNodeByUUID(conn.getUUID())
if node.getState() != BROKEN_STATE:
node.setState(BROKEN_STATE)
app.broadcastNodeInformation(node)
cell_list = app.pt.dropNode(node)
ptid = app.pt.setNextID()
app.broadcastPartitionChanges(ptid, cell_list)
if not app.pt.operational():
# Catastrophic.
raise VerificationFailure, 'cannot continue verification'
self._dropIt(conn, node, BROKEN_STATE)
# here the node is no more dropped from the partition table anymore
# because it's under the responsability of an administrator to
# restore the node, backup the node content or drop it definitely
# and loose all it's content.
MasterEventHandler.peerBroken(self, conn)
def handleNotifyNodeInformation(self, conn, packet, node_list):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment