Commit 7e8ca9ec authored by Julien Muchembled's avatar Julien Muchembled

master: fix crash of backup master when disconnected from upstream while serving clients

This fixes:

  Traceback (most recent call last):
    File "neo/master/app.py", line 172, in run
      self._run()
    File "neo/master/app.py", line 182, in _run
      self.playPrimaryRole()
    File "neo/master/app.py", line 314, in playPrimaryRole
      self.backup_app.provideService())
    File "neo/master/backup_app.py", line 101, in provideService
      app.changeClusterState(ClusterStates.STARTING_BACKUP)
    File "neo/master/app.py", line 474, in changeClusterState
      ) or not node.isClient(), (state, node)
  AssertionError: (<EnumItem STARTING_BACKUP (4)>, <ClientNode(uuid=C1, state=RUNNING, connection=<ServerConnection(nid=C1, address=127.0.0.1:52430, handler=ClientReadOnlyServiceHandler, fd=59, on_close=onConnectionClosed, server) at 7f38f5628390>) at 7f38f5628ad0>)
parent e2b11d54
...@@ -136,6 +136,8 @@ class BackupApplication(object): ...@@ -136,6 +136,8 @@ class BackupApplication(object):
del self.pt del self.pt
except AttributeError: except AttributeError:
pass pass
for node in app.nm.getClientList(True):
node.getConnection().close()
except StateChangedException, e: except StateChangedException, e:
if e.args[0] != ClusterStates.STOPPING_BACKUP: if e.args[0] != ClusterStates.STOPPING_BACKUP:
raise raise
......
...@@ -1128,11 +1128,12 @@ class ReplicationTests(NEOThreadedTest): ...@@ -1128,11 +1128,12 @@ class ReplicationTests(NEOThreadedTest):
# S -> Sb link stops working during [cutoff, recover) test iterations # S -> Sb link stops working during [cutoff, recover) test iterations
cutoff = 4 cutoff = 4
recover = 7 recover = 7
loop = 10
def delayReplication(conn, packet): def delayReplication(conn, packet):
return isinstance(packet, Packets.AnswerFetchTransactions) return isinstance(packet, Packets.AnswerFetchTransactions)
with ConnectionFilter() as f: with ConnectionFilter() as f:
for i in xrange(10): for i in xrange(loop):
if i == cutoff: if i == cutoff:
f.add(delayReplication) f.add(delayReplication)
if i == recover: if i == recover:
...@@ -1202,6 +1203,14 @@ class ReplicationTests(NEOThreadedTest): ...@@ -1202,6 +1203,14 @@ class ReplicationTests(NEOThreadedTest):
# thus not ReadOnlyError # thus not ReadOnlyError
self.assertRaises(NEOStorageError, Zb.tpc_vote, txn) self.assertRaises(NEOStorageError, Zb.tpc_vote, txn)
if i == loop // 2:
# Check that we survive a disconnection from upstream
# when we are serving clients. The client must be
# disconnected before leaving BACKINGUP state.
conn, = U.master.getConnectionList(B.master)
conn.close()
self.tic()
# close storage because client app is otherwise shared in # close storage because client app is otherwise shared in
# threaded tests and we need to refresh last_tid on next run # threaded tests and we need to refresh last_tid on next run
# (XXX see above about invalidations not working) # (XXX see above about invalidations not working)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment