Commit 03bcb91c authored by Yoshinori Okuji's avatar Yoshinori Okuji

The election part is rewritten. Somehow tested.

git-svn-id: https://svn.erp5.org/repos/neo/branches/prototype3@25 71dcc9de-d417-0410-9af5-da40c76e7ee4
parent dc413da3
...@@ -38,7 +38,7 @@ class BaseConnection(object): ...@@ -38,7 +38,7 @@ class BaseConnection(object):
def getHandler(self): def getHandler(self):
return self.handler return self.handler
def setHandler(self): def setHandler(self, handler):
self.handler = handler self.handler = handler
def getEventManager(self): def getEventManager(self):
...@@ -73,7 +73,7 @@ class ListeningConnection(BaseConnection): ...@@ -73,7 +73,7 @@ class ListeningConnection(BaseConnection):
class Connection(BaseConnection): class Connection(BaseConnection):
"""A connection.""" """A connection."""
def __init__(self, event_manager, handler, s = None, addr = None): def __init__(self, event_manager, handler, s = None, addr = None):
BaseConnection.__init__(self, handler, event_manager, s = s, addr = addr) BaseConnection.__init__(self, event_manager, handler, s = s, addr = addr)
if s is not None: if s is not None:
event_manager.addReader(self) event_manager.addReader(self)
self.read_buf = [] self.read_buf = []
...@@ -231,7 +231,7 @@ class Connection(BaseConnection): ...@@ -231,7 +231,7 @@ class Connection(BaseConnection):
# If this is the first time, enable polling for writing. # If this is the first time, enable polling for writing.
if len(self.write_buf) == 1: if len(self.write_buf) == 1:
self.em.addWriter(self.s) self.em.addWriter(self)
def expectMessage(self, msg_id = None, timeout = 5, additional_timeout = 30): def expectMessage(self, msg_id = None, timeout = 5, additional_timeout = 30):
"""Expect a message for a reply to a given message ID or any message. """Expect a message for a reply to a given message ID or any message.
...@@ -288,13 +288,13 @@ class ClientConnection(Connection): ...@@ -288,13 +288,13 @@ class ClientConnection(Connection):
if self.connecting: if self.connecting:
err = self.s.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR) err = self.s.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR)
if err: if err:
self.connectionFailed() self.handler.connectionFailed(self)
self.close() self.close()
return return
else: else:
self.connecting = False self.connecting = False
self.handler.connectionCompleted(self) self.handler.connectionCompleted(self)
self.cm.addReader(self.s) self.em.addReader(self)
else: else:
Connection.writable(self) Connection.writable(self)
......
class NeoException(Exception): pass
class ElectionFailure(NeoException): pass
class PrimaryFailure(NeoException): pass
...@@ -20,18 +20,19 @@ class EventHandler(object): ...@@ -20,18 +20,19 @@ class EventHandler(object):
def connectionStarted(self, conn): def connectionStarted(self, conn):
"""Called when a connection is started.""" """Called when a connection is started."""
pass logging.debug('connection started for %s:%d', *(conn.getAddress()))
def connectionCompleted(self, conn): def connectionCompleted(self, conn):
"""Called when a connection is completed.""" """Called when a connection is completed."""
pass logging.debug('connection completed for %s:%d', *(conn.getAddress()))
def connectionFailed(self, conn): def connectionFailed(self, conn):
"""Called when a connection failed.""" """Called when a connection failed."""
pass logging.debug('connection failed for %s:%d', *(conn.getAddress()))
def connectionAccepted(self, conn, s, addr): def connectionAccepted(self, conn, s, addr):
"""Called when a connection is accepted.""" """Called when a connection is accepted."""
logging.debug('connection accepted from %s:%d', *addr)
new_conn = ServerConnection(conn.getEventManager(), conn.getHandler(), new_conn = ServerConnection(conn.getEventManager(), conn.getHandler(),
s = s, addr = addr) s = s, addr = addr)
# A request for a node identification should arrive. # A request for a node identification should arrive.
...@@ -39,19 +40,21 @@ class EventHandler(object): ...@@ -39,19 +40,21 @@ class EventHandler(object):
def timeoutExpired(self, conn): def timeoutExpired(self, conn):
"""Called when a timeout event occurs.""" """Called when a timeout event occurs."""
pass logging.debug('timeout expired for %s:%d', *(conn.getAddress()))
def connectionClosed(self, conn): def connectionClosed(self, conn):
"""Called when a connection is closed by the peer.""" """Called when a connection is closed by the peer."""
pass logging.debug('connection closed for %s:%d', *(conn.getAddress()))
def packetReceived(self, conn, packet): def packetReceived(self, conn, packet):
"""Called when a packet is received.""" """Called when a packet is received."""
logging.debug('packet received from %s:%d', *(conn.getAddress()))
self.dispatch(conn, packet) self.dispatch(conn, packet)
def packetMalformed(self, conn, packet, error_message): def packetMalformed(self, conn, packet, error_message):
"""Called when a packet is malformed.""" """Called when a packet is malformed."""
logging.info('malformed packet: %s', error_message) logging.info('malformed packet from %s:%d: %s',
conn.getAddress()[0], conn.getAddress()[1], error_message)
conn.addPacket(Packet().protocolError(packet.getId(), error_message)) conn.addPacket(Packet().protocolError(packet.getId(), error_message))
conn.abort() conn.abort()
self.peerBroken(conn) self.peerBroken(conn)
...@@ -65,7 +68,7 @@ class EventHandler(object): ...@@ -65,7 +68,7 @@ class EventHandler(object):
t = packet.getType() t = packet.getType()
try: try:
method = self.packet_dispatch_table[t] method = self.packet_dispatch_table[t]
args = packet.decode() args = packet.decode() or ()
method(conn, packet, *args) method(conn, packet, *args)
except ValueError: except ValueError:
self.handleUnexpectedPacket(conn, packet) self.handleUnexpectedPacket(conn, packet)
...@@ -107,10 +110,10 @@ class EventHandler(object): ...@@ -107,10 +110,10 @@ class EventHandler(object):
def handlePong(self, conn, packet): def handlePong(self, conn, packet):
pass pass
def handleAskPrimaryNode(self, conn, packet): def handleAskPrimaryMaster(self, conn, packet):
self.handleUnexpectedPacket(conn, packet) self.handleUnexpectedPacket(conn, packet)
def handleAnswerPrimaryNode(self, conn, packet, primary_uuid, known_master_list): def handleAnswerPrimaryMaster(self, conn, packet, primary_uuid, known_master_list):
self.handleUnexpectedPacket(conn, packet) self.handleUnexpectedPacket(conn, packet)
def handleAnnouncePrimaryMaster(self, conn, packet): def handleAnnouncePrimaryMaster(self, conn, packet):
......
This diff is collapsed.
import logging
from neo.protocol import MASTER_NODE_TYPE, \
RUNNING_STATE, BROKEN_STATE, TEMPORARILY_DOWN_STATE, DOWN_STATE
from neo.master.handler import MasterEventHandler
from neo.connection import ClientConnection
from neo.exception import ElectionFailure
from neo.protocol import Packet, INVALID_UUID
from neo.util import dump
class ElectionEventHandler(MasterEventHandler):
"""This class deals with events for a primary master election."""
def connectionStarted(self, conn):
app = self.app
addr = conn.getAddress()
app.unconnected_master_node_set.remove(addr)
app.negotiating_master_node_set.add(addr)
MasterEventHandler.connectionStarted(self, conn)
def connectionCompleted(self, conn):
app = self.app
addr = conn.getAddress()
# Request a node idenfitication.
p = Packet()
msg_id = conn.getNextId()
p.requestNodeIdentification(msg_id, MASTER_NODE_TYPE, app.uuid,
app.server[0], app.server[1], app.name)
conn.addPacket(p)
conn.expectMessage(msg_id)
MasterEventHandler.connectionCompleted(self, conn)
def connectionFailed(self, conn):
app = self.app
addr = conn.getAddress()
app.negotiating_master_node_set.discard(addr)
node = app.nm.getNodeByServer(addr)
if node.getState() not in (DOWN_STATE, BROKEN_STATE):
app.unconnected_master_node_set.add(addr)
node.setState(TEMPORARILY_DOWN_STATE)
MasterEventHandler.connectionFailed(self, conn)
def connectionClosed(self, conn):
if isinstance(conn, ClientConnection):
self.connectionFailed(conn)
MasterEventHandler.connectionClosed(self, conn)
def timeoutExpired(self, conn):
if isinstance(conn, ClientConnection):
self.connectionFailed(conn)
MasterEventHandler.timeoutExpired(self, conn)
def peerBroken(self, conn):
app = self.app
addr = conn.getAddress()
node = app.nm.getNodeByServer(addr)
if isinstance(conn, ClientConnection):
node.setState(DOWN_STATE)
app.negotiating_master_node_set.discard(addr)
else:
if node.getUUID() is not None:
node.setState(BROKEN_STATE)
MasterEventHandler.peerBroken(self, conn)
def packetReceived(self, conn, packet):
if isinstance(conn, ClientConnection):
node = self.app.nm.getNodeByServer(conn.getAddress())
if node.getState() != BROKEN_STATE:
node.setState(RUNNING_STATE)
MasterEventHandler.packetReceived(self, conn, packet)
def handleAcceptNodeIdentification(self, conn, packet, node_type,
uuid, ip_address, port):
if isinstance(conn, ClientConnection):
app = self.app
node = app.nm.getNodeByServer(conn.getAddress())
if node_type != MASTER_NODE_TYPE:
# The peer is not a master node!
logging.error('%s:%d is not a master node', ip_address, port)
app.nm.remove(node)
app.negotiating_master_node_set.discard(node.getAddress())
conn.close()
return
if conn.getAddress() != (ip_address, port):
# The server address is different! Then why was
# the connection successful?
logging.error('%s:%d is waiting for %s:%d',
conn.getAddress()[0], conn.getAddress()[1], ip_address, port)
app.nm.remove(node)
app.negotiating_master_node_set.discard(node.getServer())
conn.close()
return
conn.setUUID(uuid)
node.setUUID(uuid)
# Ask a primary master.
msg_id = conn.getNextId()
conn.addPacket(Packet().askPrimaryMaster(msg_id))
conn.expectMessage(msg_id)
else:
self.handleUnexpectedPacket(conn, packet)
def handleAnswerPrimaryMaster(self, conn, packet, primary_uuid, known_master_list):
if isinstance(conn, ClientConnection):
app = self.app
# Register new master nodes.
for ip_address, port, uuid in known_master_list:
addr = (ip_address, port)
if app.server == addr:
# This is self.
continue
else:
n = app.nm.getNodeByServer(addr)
if n is None:
n = MasterNode(server = addr)
app.nm.add(n)
app.unconnected_master_node_set.add(addr)
if uuid != INVALID_UUID:
# If I don't know the UUID yet, believe what the peer
# told me at the moment.
if n.getUUID() is None:
n.setUUID(uuid)
if primary_uuid != INVALID_UUID:
# The primary master is defined.
if app.primary_master_node is not None \
and app.primary_master_node.getUUID() != primary_uuid:
# There are multiple primary master nodes. This is
# dangerous.
raise ElectionFailure, 'multiple primary master nodes'
primary_node = app.nm.getNodeByUUID(primary_uuid)
if primary_node is None:
# I don't know such a node. Probably this information
# is old. So ignore it.
pass
else:
if node.getUUID() == primary_uuid:
# Whatever the situation is, I trust this master.
app.primary = False
app.primary_master_node = node
else:
if app.uuid < uuid:
# I lost.
app.primary = False
app.negotiating_master_node_set.discard(conn.getAddress())
else:
self.handleUnexpectedPacket(conn, packet)
def handleRequestNodeIdentification(self, conn, packet, node_type,
uuid, ip_address, port, name):
if isinstance(conn, ClientConnection):
self.handleUnexpectedPacket(conn, packet)
else:
app = self.app
if node_type != MASTER_NODE_TYPE:
logging.info('reject a connection from a non-master')
conn.addPacket(Packet().notReady(packet.getId(), 'retry later'))
conn.abort()
return
if name != app.name:
logging.error('reject an alien cluster')
conn.addPacket(Packet().protocolError(packet.getId(),
'invalid cluster name'))
conn.abort()
return
addr = (ip_address, port)
node = app.nm.getNodeByServer(addr)
if node is None:
node = MasterNode(server = addr, uuid = uuid)
app.nm.add(node)
ap.unconnected_master_node_set.add(addr)
else:
# If this node is broken, reject it.
if node.getUUID() == uuid:
if node.getState() == BROKEN_STATE:
p = Packet()
p.brokenNodeDisallowedError(packet.getId(), 'go away')
conn.addPacket(p)
conn.abort()
return
# Trust the UUID sent by the peer.
node.setUUID(uuid)
conn.setUUID(uuid)
p = Packet()
p.acceptNodeIdentification(packet.getId(), MASTER_NODE_TYPE,
app.uuid, app.server[0], app.server[1])
conn.addPacket(p)
# Next, the peer should ask a primary master node.
conn.expectMessage()
def handleAskPrimaryMaster(self, conn, packet):
if isinstance(conn, ClientConnection):
self.handleUnexpectedPacket(conn, packet)
else:
uuid = conn.getUUID()
if uuid is None:
self.handleUnexpectedPacket(conn, packet)
return
app = self.app
if app.primary:
primary_uuid = app.uuid
elif app.primary_master_node is not None:
primary_uuid = app.primary_master_node.getUUID()
else:
primary_uuid = INVALID_UUID
known_master_list = []
for n in app.nm.getMasterNodeList():
if n.getState() == BROKEN_STATE:
continue
info = n.getServer() + (n.getUUID() or INVALID_UUID,)
known_master_list.append(info)
p = Packet()
p.answerPrimaryMaster(packet.getId(), primary_uuid, known_master_list)
conn.addPacket(p)
def handleAnnouncePrimaryMaster(self, conn, packet):
if isinstance(conn, ClientConnection):
self.handleUnexpectedPacket(conn, packet)
else:
uuid = conn.getUUID()
if uuid is None:
self.handleUnexpectedPacket(conn, packet)
return
app = self.app
if app.primary:
# I am also the primary... So restart the election.
raise ElectionFailure, 'another primary arises'
node = app.nm.getNodeByUUID(uuid)
app.primary = False
app.primary_master_node = node
logging.info('%s:%d is the primary', *(node.getServer()))
def handleReelectPrimaryMaster(self, conn, packet):
raise ElectionFailure, 'reelection requested'
from neo.handler import EventHandler
class MasterEventHandler(EventHandler):
"""This class implements a generic part of the event handlers."""
def __init__(self, app):
self.app = app
EventHandler.__init__(self)
from time import time from time import time
import logging
from neo.protocol import RUNNING_STATE, TEMPORARILY_DOWN_STATE, DOWN_STATE, BROKEN_STATE, \ from neo.protocol import RUNNING_STATE, TEMPORARILY_DOWN_STATE, DOWN_STATE, BROKEN_STATE, \
MASTER_NODE_TYPE, STORAGE_NODE_TYPE, CLIENT_NODE_TYPE MASTER_NODE_TYPE, STORAGE_NODE_TYPE, CLIENT_NODE_TYPE
from neo.util import dump
class Node(object): class Node(object):
"""This class represents a node.""" """This class represents a node."""
...@@ -28,6 +30,7 @@ class Node(object): ...@@ -28,6 +30,7 @@ class Node(object):
self.last_state_change = time() self.last_state_change = time()
def setServer(self, server): def setServer(self, server):
if self.server != server:
if self.server is not None: if self.server is not None:
self.manager.unregisterServer(self) self.manager.unregisterServer(self)
...@@ -38,6 +41,7 @@ class Node(object): ...@@ -38,6 +41,7 @@ class Node(object):
return self.server return self.server
def setUUID(self, uuid): def setUUID(self, uuid):
if self.uuid != uuid:
if self.uuid is not None: if self.uuid is not None:
self.manager.unregisterUUID(self) self.manager.unregisterUUID(self)
...@@ -96,11 +100,11 @@ class NodeManager(object): ...@@ -96,11 +100,11 @@ class NodeManager(object):
pass pass
def registerUUID(self, node): def registerUUID(self, node):
self.server_dict[node.getUUID()] = node self.uuid_dict[node.getUUID()] = node
def unregisterUUID(self, node): def unregisterUUID(self, node):
try: try:
del self.server_dict[node.getUUID()] del self.uuid_dict[node.getUUID()]
except KeyError: except KeyError:
pass pass
......
...@@ -125,6 +125,10 @@ class Packet(object): ...@@ -125,6 +125,10 @@ class Packet(object):
def notReady(self, msg_id, error_message): def notReady(self, msg_id, error_message):
return self.error(msg_id, NOT_READY, 'not ready: ' + error_message) return self.error(msg_id, NOT_READY, 'not ready: ' + error_message)
def brokenNodeDisallowedError(self, msg_id, error_message):
return self.error(msg_id, BROKEN_NODE_DISALLOWED_ERROR,
'broken node disallowed error: ' + error_message)
def ping(self, msg_id): def ping(self, msg_id):
self._id = msg_id self._id = msg_id
self._type = PING self._type = PING
...@@ -252,10 +256,10 @@ class Packet(object): ...@@ -252,10 +256,10 @@ class Packet(object):
def _decodeAnswerPrimaryMaster(self): def _decodeAnswerPrimaryMaster(self):
try: try:
primary_uuid, n = unpack('!16sL', self._body[:36]) primary_uuid, n = unpack('!16sL', self._body[:20])
known_master_list = [] known_master_list = []
for i in xrange(n): for i in xrange(n):
ip_address, port, uuid = unpack('!4sH16s', self._body[36+i*22:58+i*22]) ip_address, port, uuid = unpack('!4sH16s', self._body[20+i*22:42+i*22])
ip_address = inet_ntoa(ip_address) ip_address = inet_ntoa(ip_address)
known_master_list.append((ip_address, port, uuid)) known_master_list.append((ip_address, port, uuid))
except: except:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment