Commit 8296c3c7 authored by Yoshinori Okuji's avatar Yoshinori Okuji

A lot of bugfixes.

git-svn-id: https://svn.erp5.org/repos/neo/branches/prototype3@222 71dcc9de-d417-0410-9af5-da40c76e7ee4
parent 440d0981
This diff is collapsed.
......@@ -66,7 +66,7 @@ class Dispatcher(Thread):
if app.pt is not None and app.pt.operational():
# Connected to primary master node and got all informations
break
app.node_not_ready = 0
app.local_var.node_not_ready = 0
if app.primary_master_node is None:
# Try with master node defined in config
addr, port = app.master_node_list[master_index].split(':')
......@@ -110,7 +110,7 @@ class Dispatcher(Thread):
elif app.primary_master_node.getServer() != (addr, port):
# Master node changed, connect to new one
break
elif app.node_not_ready:
elif app.local_var.node_not_ready:
# Wait a bit and reask again
break
elif app.pt is not None and app.pt.operational():
......
......@@ -45,6 +45,36 @@ class ClientEventHandler(EventHandler):
# put message in request queue
dispatcher._request_queue.put((conn, packet))
def _dealWithStorageFailure(self, conn, node, state):
app = self.app
# Remove from pool connection
app.cp.removeConnection(node)
# Put fake packets to task queues.
queue_set = set()
for key in self.dispatcher.message_table.keys():
if id(conn) == key[0]:
queue = self.dispatcher.message_table.pop(key)
queue_set.add(queue)
for queue in queue_set:
queue.put((conn, None))
# Notify the primary master node of the failure.
conn = app.master_conn
if conn is not None:
conn.lock()
try:
msg_id = conn.getNextId()
p = Packet()
ip_address, port = node.getServer()
node_list = [(STORAGE_NODE_TYPE, ip_address, port,
node.getUUID(), state)]
p.notifyNodeInformation(msg_id, node_list)
conn.addPacket(p)
finally:
conn.unlock()
def connectionFailed(self, conn):
app = self.app
uuid = conn.getUUID()
......@@ -59,15 +89,16 @@ class ClientEventHandler(EventHandler):
self.dispatcher.connectToPrimaryMasterNode(app)
else:
# Connection to a storage node failed
app.storage_node = -1
node = app.nm.getNodeByServer(conn.getAddress())
if isinstance(node, StorageNode):
self._dealWithStorageFailure(conn, node, TEMPORARILY_DOWN_STATE)
EventHandler.connectionFailed(self, conn)
def connectionClosed(self, conn):
uuid = conn.getUUID()
app = self.app
if app.master_conn is None:
EventHandler.connectionClosed(self, conn)
elif uuid == app.master_conn.getUUID():
if app.master_conn is not None and uuid == app.master_conn.getUUID():
logging.critical("connection to primary master node closed")
# Close connection
app.master_conn.close()
......@@ -76,29 +107,14 @@ class ClientEventHandler(EventHandler):
logging.critical("trying reconnection to master node...")
self.dispatcher.connectToPrimaryMasterNode(app)
else:
app = self.app
node = app.nm.getNodeByUUID(uuid)
if node is not None:
logging.info("connection to storage node %s closed",
node.getServer())
node = app.nm.getNodeByServer(conn.getAddress())
if isinstance(node, StorageNode):
# Notify primary master node that a storage node is temporarily down
conn = app.master_conn
if conn is not None:
conn.lock()
try:
msg_id = conn.getNextId()
p = Packet()
ip_address, port = node.getServer()
node_list = [(STORAGE_NODE_TYPE, ip_address, port, node.getUUID(),
TEMPORARILY_DOWN_STATE),]
p.notifyNodeInformation(msg_id, node_list)
conn.addPacket(p)
finally:
conn.unlock()
# Remove from pool connection
app.cp.removeConnection(node)
EventHandler.connectionClosed(self, conn)
logging.info("connection to storage node %s closed",
node.getServer())
self._dealWithStorageFailure(conn, node, TEMPORARILY_DOWN_STATE)
EventHandler.connectionClosed(self, conn)
def timeoutExpired(self, conn):
uuid = conn.getUUID()
......@@ -109,24 +125,12 @@ class ClientEventHandler(EventHandler):
logging.critical("trying reconnection to master node...")
self.dispatcher.connectToPrimaryMasterNode(app)
else:
node = app.nm.getNodeByUUID(uuid)
node = app.nm.getNodeByServer(conn.getAddress())
if isinstance(node, StorageNode):
# Notify primary master node that a storage node is temporarily down
conn = app.master_conn
if conn is not None:
conn.lock()
try:
msg_id = conn.getNextId()
p = Packet()
ip_address, port = node.getServer()
node_list = [(STORAGE_NODE_TYPE, ip_address, port, node.getUUID(),
TEMPORARILY_DOWN_STATE),]
p.notifyNodeInformation(msg_id, node_list)
conn.addPacket(p)
finally:
conn.unlock()
# Remove from pool connection
app.cp.removeConnection(node)
# Notify primary master node that a storage node is
# temporarily down.
self._dealWithStorageFailure(conn, node, TEMPORARILY_DOWN_STATE)
EventHandler.timeoutExpired(self, conn)
def peerBroken(self, conn):
......@@ -138,31 +142,17 @@ class ClientEventHandler(EventHandler):
logging.critical("trying reconnection to master node...")
self.dispatcher.connectToPrimaryMasterNode(app)
else:
node = app.nm.getNodeByUUID(uuid)
node = app.nm.getNodeByServer(conn.getAddress())
if isinstance(node, StorageNode):
# Notify primary master node that a storage node is broken
conn = app.master_conn
if conn is not None:
conn.lock()
try:
msg_id = conn.getNextId()
p = Packet()
ip_address, port = node.getServer()
node_list = [(STORAGE_NODE_TYPE, ip_address, port, node.getUUID(),
BROKEN_STATE),]
p.notifyNodeInformation(msg_id, node_list)
conn.addPacket(p)
finally:
conn.unlock()
# Remove from pool connection
app.cp.removeConnection(node)
self._dealWithStorageFailure(conn, node, BROKEN_STATE)
EventHandler.peerBroken(self, conn)
def handleNotReady(self, conn, packet, message):
if isinstance(conn, MTClientConnection):
app = self.app
app.node_not_ready = 1
app.local_var.node_not_ready = 1
else:
self.handleUnexpectedPacket(conn, packet)
......
......@@ -167,12 +167,15 @@ class EpollEventManager(object):
def poll(self, timeout = 1):
rlist, wlist = self.epoll.poll(timeout)
for fd in rlist:
conn = self.connection_dict[fd]
conn.lock()
try:
conn.readable()
finally:
conn.unlock()
conn = self.connection_dict[fd]
conn.lock()
try:
conn.readable()
finally:
conn.unlock()
except KeyError:
pass
for fd in wlist:
# This can fail, if a connection is closed in readable().
......
from time import time
import logging
from neo.protocol import RUNNING_STATE, TEMPORARILY_DOWN_STATE, DOWN_STATE, BROKEN_STATE, \
MASTER_NODE_TYPE, STORAGE_NODE_TYPE, CLIENT_NODE_TYPE
from neo.util import dump
class Node(object):
"""This class represents a node."""
......
......@@ -221,10 +221,6 @@ class Application(object):
for conn in em.getConnectionList():
conn.setHandler(handler)
# Forget all client nodes.
for node in nm.getClientNodeList():
nm.remove(node)
# Forget all unfinished data.
self.dm.dropUnfinishedData()
......
......@@ -99,6 +99,7 @@ class StorageEventHandler(EventHandler):
self.handleUnexpectedPacket(conn, packet)
return
logging.debug('handleNotifyNodeInformation: node_list = %r', node_list)
app = self.app
node = app.nm.getNodeByUUID(uuid)
if not isinstance(node, MasterNode) \
......@@ -142,12 +143,15 @@ class StorageEventHandler(EventHandler):
if state == RUNNING_STATE:
n = app.nm.getNodeByUUID(uuid)
if n is None:
logging.debug('adding client node %s', dump(uuid))
n = ClientNode(uuid = uuid)
app.nm.add(n)
assert app.nm.getNodeByUUID(uuid) is n
else:
self.dealWithClientFailure(uuid)
n = app.nm.getNodeByUUID(uuid)
if n is not None:
logging.debug('removing client node %s', dump(uuid))
app.nm.remove(n)
def handleAskLastIDs(self, conn, packet):
......
......@@ -137,7 +137,7 @@ class OperationEventHandler(StorageEventHandler):
else:
# If I do not know such a node, and it is not even a master
# node, simply reject it.
logging.error('reject an unknown node')
logging.error('reject an unknown node %s', dump(uuid))
conn.addPacket(Packet().notReady(packet.getId(),
'unknown node'))
conn.abort()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment