Commit 142ad156 authored by Vincent Pelletier's avatar Vincent Pelletier

Don't wait for unreachable nodes in election.

parent b1123667
......@@ -116,8 +116,6 @@ class Application(object):
def _run(self):
"""Make sure that the status is sane and start a loop."""
bootstrap = True
# Make a listening port.
self.listening_conn = ListeningConnection(self.em, None,
addr=self.server, connector=self.connector_handler())
......@@ -127,8 +125,7 @@ class Application(object):
# (Re)elect a new primary master.
self.primary = not self.nm.getMasterList()
if not self.primary:
self.electPrimary(bootstrap=bootstrap)
bootstrap = False
self.electPrimary()
try:
if self.primary:
self.playPrimaryRole()
......@@ -141,7 +138,7 @@ class Application(object):
conn.close()
def electPrimary(self, bootstrap = True):
def electPrimary(self):
"""Elect a primary master node.
The difficulty is that a master node must accept connections from
......@@ -166,34 +163,13 @@ class Application(object):
self.primary = None
self.primary_master_node = None
try:
# Wait at most 20 seconds at bootstrap. Otherwise, wait at most
# 10 seconds to avoid stopping the whole cluster for a long time.
# Note that even if not all master are up in the first 20 seconds
# this is not an issue because the first up will timeout and take
# the primary role.
if bootstrap:
expiration = 20
else:
expiration = 10
t = 0
while (self.unconnected_master_node_set or
self.negotiating_master_node_set):
current_time = time()
if current_time >= t:
t = current_time + 1
for node in self.nm.getMasterList():
if not node.isRunning() and node.getLastStateChange() + \
expiration < current_time:
neo.lib.logging.info('%s is down' % (node, ))
node.setDown()
self.unconnected_master_node_set.discard(
node.getAddress())
# Try to connect to master nodes.
for addr in self.unconnected_master_node_set.difference(
x.getAddress() for x in self.em.getClientList()):
ClientConnection(self.em, client_handler, addr=addr,
connector=self.connector_handler())
for addr in self.unconnected_master_node_set:
ClientConnection(self.em, client_handler, addr=addr,
connector=self.connector_handler())
self.negotiating_master_node_set.add(addr)
self.unconnected_master_node_set.clear()
self.em.poll(1)
except ElectionFailure, m:
# something goes wrong, clean then restart
......@@ -217,7 +193,6 @@ class Application(object):
# Close all connections.
for conn in self.em.getClientList() + self.em.getServerList():
conn.close()
bootstrap = False
else:
# election succeed, stop the process
self.primary = self.primary is None
......
......@@ -32,13 +32,6 @@ class ClientElectionHandler(MasterHandler):
def askPrimary(self, conn):
raise UnexpectedPacketError, "askPrimary on server connection"
def connectionStarted(self, conn):
addr = conn.getAddress()
# connection in progress
self.app.unconnected_master_node_set.remove(addr)
self.app.negotiating_master_node_set.add(addr)
super(ClientElectionHandler, self).connectionStarted(conn)
def connectionFailed(self, conn):
addr = conn.getAddress()
node = self.app.nm.getByAddress(addr)
......@@ -47,7 +40,6 @@ class ClientElectionHandler(MasterHandler):
node.getState())
# connection never success, node is still in unknown state
self.app.negotiating_master_node_set.discard(addr)
self.app.unconnected_master_node_set.add(addr)
super(ClientElectionHandler, self).connectionFailed(conn)
def connectionCompleted(self, conn):
......@@ -120,7 +112,6 @@ class ClientElectionHandler(MasterHandler):
app.primary_master_node = primary_node
# Stop waiting for connections than primary master's to
# complete to exit election phase ASAP.
app.unconnected_master_node_set.clear()
app.negotiating_master_node_set.clear()
primary_node = app.primary_master_node
......@@ -200,7 +191,6 @@ class ServerElectionHandler(MasterHandler):
node = app.nm.getByUUID(uuid)
app.primary = False
app.primary_master_node = node
app.unconnected_master_node_set.clear()
app.negotiating_master_node_set.clear()
neo.lib.logging.info('%s is the primary', node)
......@@ -47,8 +47,6 @@ class MasterClientElectionTests(NeoUnitTestBase):
self.election = ClientElectionHandler(self.app)
self.app.unconnected_master_node_set = set()
self.app.negotiating_master_node_set = set()
for node in self.app.nm.getMasterList():
self.app.unconnected_master_node_set.add(node.getAddress())
# define some variable to simulate client and storage node
self.storage_port = 10021
self.master_port = 10011
......@@ -70,22 +68,8 @@ class MasterClientElectionTests(NeoUnitTestBase):
def _checkUnconnected(self, node):
addr = node.getAddress()
self.assertTrue(addr in self.app.unconnected_master_node_set)
self.assertFalse(addr in self.app.negotiating_master_node_set)
def _checkNegociating(self, node):
addr = node.getAddress()
self.assertTrue(addr in self.app.negotiating_master_node_set)
self.assertFalse(addr in self.app.unconnected_master_node_set)
def test_connectionStarted(self):
node, conn = self.identifyToMasterNode()
self.assertTrue(node.isUnknown())
self._checkUnconnected(node)
self.election.connectionStarted(conn)
self.assertTrue(node.isUnknown())
self._checkNegociating(node)
def test_connectionFailed(self):
node, conn = self.identifyToMasterNode()
self.assertTrue(node.isUnknown())
......@@ -107,7 +91,6 @@ class MasterClientElectionTests(NeoUnitTestBase):
self._checkUnconnected(node)
addr = node.getAddress()
self.app.negotiating_master_node_set.add(addr)
self.app.unconnected_master_node_set.discard(addr)
def test_connectionClosed(self):
node, conn = self.identifyToMasterNode()
......@@ -115,7 +98,6 @@ class MasterClientElectionTests(NeoUnitTestBase):
self.election.connectionClosed(conn)
self.assertTrue(node.isUnknown())
addr = node.getAddress()
self.assertFalse(addr in self.app.unconnected_master_node_set)
self.assertFalse(addr in self.app.negotiating_master_node_set)
def test_acceptIdentification1(self):
......@@ -124,7 +106,6 @@ class MasterClientElectionTests(NeoUnitTestBase):
args = (node.getUUID(), 0, 10, self.app.uuid)
self.election.acceptIdentification(conn,
NodeTypes.CLIENT, *args)
self.assertFalse(node in self.app.unconnected_master_node_set)
self.assertFalse(node in self.app.negotiating_master_node_set)
self.checkClosed(conn)
......@@ -173,7 +154,6 @@ class MasterClientElectionTests(NeoUnitTestBase):
node, conn = self.identifyToMasterNode()
master_list = self._getMasterList()
self.election.answerPrimary(conn, node.getUUID(), master_list)
self.assertEqual(len(self.app.unconnected_master_node_set), 0)
self.assertEqual(len(self.app.negotiating_master_node_set), 0)
self.assertFalse(self.app.primary)
self.assertEqual(self.app.primary_master_node, node)
......@@ -194,7 +174,6 @@ class MasterServerElectionTests(NeoUnitTestBase):
self.app.unconnected_master_node_set = set()
self.app.negotiating_master_node_set = set()
for node in self.app.nm.getMasterList():
self.app.unconnected_master_node_set.add(node.getAddress())
node.setState(NodeStates.RUNNING)
# define some variable to simulate client and storage node
self.client_address = (self.local_ip, 1000)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment