app.py 5.74 KB
Newer Older
1
#
Julien Muchembled's avatar
Julien Muchembled committed
2
# Copyright (C) 2006-2015  Nexedi SA
Aurel's avatar
Aurel committed
3
#
4 5 6 7
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
Aurel's avatar
Aurel committed
8
#
9 10 11 12 13 14
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
15
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16

17
from neo.lib import logging
18 19 20 21
from neo.lib.node import NodeManager
from neo.lib.event import EventManager
from neo.lib.connection import ListeningConnection
from neo.lib.exception import PrimaryFailure
22
from .handler import AdminEventHandler, MasterEventHandler, \
23
    MasterRequestEventHandler
24 25
from neo.lib.bootstrap import BootstrapManager
from neo.lib.pt import PartitionTable
26 27
from neo.lib.protocol import ClusterStates, Errors, \
    NodeTypes, NodeStates, Packets
28
from neo.lib.debug import register as registerLiveDebugger
29

30 31 32
class Application(object):
    """The storage node application."""

33
    def __init__(self, config):
34 35
        # Internal attributes.
        self.em = EventManager()
36
        self.nm = NodeManager(config.getDynamicMasterList())
37

38 39
        self.name = config.getCluster()
        self.server = config.getBind()
Aurel's avatar
Aurel committed
40

41
        self.master_addresses = config.getMasters()
42
        logging.debug('IP address is %s, port is %d', *self.server)
43

Aurel's avatar
Aurel committed
44 45 46
        # The partition table is initialized after getting the number of
        # partitions.
        self.pt = None
47
        self.uuid = config.getUUID()
Aurel's avatar
Aurel committed
48
        self.primary_master_node = None
49
        self.request_handler = MasterRequestEventHandler(self)
50
        self.master_event_handler = MasterEventHandler(self)
51
        self.cluster_state = None
52 53 54
        self.reset()
        registerLiveDebugger(on_log=self.log)

55 56 57 58 59 60
    def close(self):
        self.listening_conn = None
        self.nm.close()
        self.em.close()
        del self.__dict__

61
    def reset(self):
62
        self.bootstrapped = False
63 64
        self.master_conn = None
        self.master_node = None
65 66 67 68 69 70

    def log(self):
        self.em.log()
        self.nm.log()
        if self.pt is not None:
            self.pt.log()
Aurel's avatar
Aurel committed
71

Aurel's avatar
Aurel committed
72
    def run(self):
73 74
        try:
            self._run()
75
        except Exception:
76
            logging.exception('Pre-mortem data:')
77
            self.log()
78
            logging.flush()
79 80 81
            raise

    def _run(self):
Aurel's avatar
Aurel committed
82 83 84 85 86 87
        """Make sure that the status is sane and start a loop."""
        if len(self.name) == 0:
            raise RuntimeError, 'cluster name must be non-empty'

        # Make a listening port.
        handler = AdminEventHandler(self)
88
        self.listening_conn = ListeningConnection(self.em, handler, self.server)
Aurel's avatar
Aurel committed
89

90
        while self.cluster_state != ClusterStates.STOPPING:
91
            self.connectToPrimary()
Aurel's avatar
Aurel committed
92
            try:
93
                while True:
Aurel's avatar
Aurel committed
94 95
                    self.em.poll(1)
            except PrimaryFailure:
96
                logging.error('primary master is down')
97 98 99
        self.listening_conn.close()
        while not self.em.isIdle():
            self.em.poll(1)
Aurel's avatar
Aurel committed
100

101
    def connectToPrimary(self):
Aurel's avatar
Aurel committed
102 103 104 105
        """Find a primary master node, and connect to it.

        If a primary master node is not elected or ready, repeat
        the attempt of a connection periodically.
Aurel's avatar
Aurel committed
106

Aurel's avatar
Aurel committed
107 108 109
        Note that I do not accept any connection from non-master nodes
        at this stage."""

110
        nm = self.nm
111
        nm.init()
112
        self.cluster_state = None
113 114 115

        for address in self.master_addresses:
            self.nm.createMaster(address=address)
116

117
        # search, find, connect and identify to the primary master
118
        bootstrap = BootstrapManager(self, self.name, NodeTypes.ADMIN,
119
                self.uuid, self.server)
120
        data = bootstrap.getPrimaryConnection()
121
        (node, conn, uuid, num_partitions, num_replicas) = data
122
        nm.update([(node.getType(), node.getAddress(), node.getUUID(),
123
                    NodeStates.RUNNING)])
124 125 126 127
        self.master_node = node
        self.master_conn = conn
        self.uuid = uuid

128
        if self.pt is None:
129
            self.pt = PartitionTable(num_partitions, num_replicas)
130
        elif self.pt.getPartitions() != num_partitions:
131
            # XXX: shouldn't we recover instead of raising ?
132
            raise RuntimeError('the number of partitions is inconsistent')
133
        elif self.pt.getReplicas() != num_replicas:
134
            # XXX: shouldn't we recover instead of raising ?
135 136 137
            raise RuntimeError('the number of replicas is inconsistent')

        # passive handler
138
        self.master_conn.setHandler(self.master_event_handler)
139
        self.master_conn.ask(Packets.AskClusterState())
140
        self.master_conn.ask(Packets.AskNodeInformation())
141
        self.master_conn.ask(Packets.AskPartitionTable())
Aurel's avatar
Aurel committed
142

143
    def sendPartitionTable(self, conn, min_offset, max_offset, uuid):
144 145 146 147
        # we have a pt
        self.pt.log()
        row_list = []
        if max_offset == 0:
148
            max_offset = self.pt.getPartitions()
149 150 151 152 153
        try:
            for offset in xrange(min_offset, max_offset):
                row = []
                try:
                    for cell in self.pt.getCellList(offset):
154
                        if uuid is None or cell.getUUID() == uuid:
155 156 157 158 159
                            row.append((cell.getUUID(), cell.getState()))
                except TypeError:
                    pass
                row_list.append((offset, row))
        except IndexError:
160 161 162
            conn.notify(Errors.ProtocolError('invalid partition table offset'))
        else:
            conn.answer(Packets.AnswerPartitionList(self.pt.getID(), row_list))