Commit b9a3c365 authored by Xavier Thompson's avatar Xavier Thompson

node boot: Format even with unreachable master

Use the format return code to distinguish between failure to format or
failure to report to master at the end. Then make boot run format even
if the master is unreachable, and:
* if format fails to report, wait until master is reachable, and retry
* if format fails for another reason, retry after a fixed 15s delay
* once format succeeds fully, bang
parent 1c5f8833
......@@ -40,7 +40,7 @@ from netaddr import valid_ipv4, valid_ipv6
from slapos.cli.command import check_root_user
from slapos.cli.entry import SlapOSApp
from slapos.cli.config import ConfigCommand
from slapos.format import isGlobalScopeAddress
from slapos.format import isGlobalScopeAddress, FormatReturn
from slapos.util import string_to_boolean
import argparse
import logging
......@@ -65,10 +65,7 @@ def _runBang(app):
Launch slapos node format.
"""
logger.info("[BOOT] Invoking slapos node bang...")
result = app.run(['node', 'bang', '-m', 'Reboot'])
if result == 1:
return 0
return 1
return app.run(['node', 'bang', '-m', 'Reboot'])
def _runFormat(app):
......@@ -76,10 +73,7 @@ def _runFormat(app):
Launch slapos node format.
"""
logger.info("[BOOT] Invoking slapos node format...")
result = app.run(['node', 'format', '--now', '--verbose'])
if result == 1:
return 0
return 1
return app.run(['node', 'format', '--now', '--verbose'])
def _ping(hostname):
......@@ -139,6 +133,16 @@ def _ping_hostname(hostname):
is_ready = _ping6(hostname)
def _ping_master(master_hostname):
if valid_ipv4(master_hostname):
_test_ping(master_hostname)
elif valid_ipv6(master_hostname):
_test_ping6(master_hostname)
else:
# hostname
_ping_hostname(master_hostname)
def _waitIpv6Ready(ipv6_interface):
"""
test if ipv6 is ready on ipv6_interface
......@@ -154,6 +158,7 @@ def _waitIpv6Ready(ipv6_interface):
"try again in 5 seconds...", ipv6_interface)
sleep(5)
class BootCommand(ConfigCommand):
"""
Test network and invoke simple format and bang (Use on Linux startup)
......@@ -196,23 +201,29 @@ class BootCommand(ConfigCommand):
if ipv6_interface is not None:
_waitIpv6Ready(ipv6_interface)
# Check that node can ping master
if valid_ipv4(master_hostname):
_test_ping(master_hostname)
elif valid_ipv6(master_hostname):
_test_ping6(master_hostname)
else:
# hostname
_ping_hostname(master_hostname)
app = SlapOSApp()
# Make sure slapos node format returns ok
while not _runFormat(app):
logger.error("[BOOT] Fail to format, try again in 15 seconds...")
sleep(15)
while True:
# Make sure slapos node format returns ok
result = _runFormat(app)
if result == FormatReturn.FAILURE:
logger.error("[BOOT] Fail to format, try again in 15 seconds...")
sleep(15)
continue
if result == FormatReturn.OFFLINE_SUCCESS:
logger.error(
"[BOOT] Fail to post format information"
", try again when connection to master is up..."
)
sleep(15)
_ping_master(master_hostname)
continue
break
# Make sure slapos node bang returns ok
while not _runBang(app):
while _runBang(app):
logger.error("[BOOT] Fail to bang, try again in 15 seconds...")
sleep(15)
......
......@@ -125,4 +125,4 @@ class FormatCommand(ConfigCommand):
tracing_monkeypatch(conf)
do_format(conf=conf)
return do_format(conf=conf)
......@@ -30,6 +30,7 @@
from six.moves import configparser
import distro
import enum
import errno
import fcntl
import grp
......@@ -68,6 +69,12 @@ from slapos import version
from slapos import manager as slapmanager
class FormatReturn(enum.IntEnum):
SUCCESS = 0
FAILURE = 1
OFFLINE_SUCCESS = 2
logger = logging.getLogger("slapos.format")
......@@ -1578,39 +1585,50 @@ def random_delay(conf):
def do_format(conf):
random_delay(conf)
try:
random_delay(conf)
if conf.input_definition_file:
computer = parse_computer_definition(conf, conf.input_definition_file)
else:
# no definition file, figure out computer
computer = parse_computer_xml(conf, conf.computer_xml)
computer.instance_storage_home = conf.instance_storage_home
conf.logger.info('Updating computer')
address = computer.getAddress()
computer.address = address['addr']
computer.netmask = address['netmask']
if conf.output_definition_file:
write_computer_definition(conf, computer)
computer.format(alter_user=conf.alter_user,
alter_network=conf.alter_network,
create_tap=conf.create_tap)
if getattr(conf, 'certificate_repository_path', None):
mkdir_p(conf.certificate_repository_path, mode=0o700)
computer.update()
# Dumping and sending to the erp5 the current configuration
if not conf.dry_run:
computer.dump(path_to_xml=conf.computer_xml,
path_to_json=conf.computer_json,
logger=conf.logger)
conf.logger.info('Posting information to %r' % conf.master_url)
try:
computer.send(conf)
return FormatReturn.SUCCESS
except Exception:
conf.logger.exception('failed to transfer information to %r' % conf.master_url)
return FormatReturn.OFFLINE_SUCCESS
finally:
conf.logger.info('slapos successfully prepared the computer.')
except Exception:
conf.logger.exception('slapos failed to prepare the computer.')
return FormatReturn.FAILURE
if conf.input_definition_file:
computer = parse_computer_definition(conf, conf.input_definition_file)
else:
# no definition file, figure out computer
computer = parse_computer_xml(conf, conf.computer_xml)
computer.instance_storage_home = conf.instance_storage_home
conf.logger.info('Updating computer')
address = computer.getAddress()
computer.address = address['addr']
computer.netmask = address['netmask']
if conf.output_definition_file:
write_computer_definition(conf, computer)
computer.format(alter_user=conf.alter_user,
alter_network=conf.alter_network,
create_tap=conf.create_tap)
if getattr(conf, 'certificate_repository_path', None):
mkdir_p(conf.certificate_repository_path, mode=0o700)
computer.update()
# Dumping and sending to the erp5 the current configuration
if not conf.dry_run:
computer.dump(path_to_xml=conf.computer_xml,
path_to_json=conf.computer_json,
logger=conf.logger)
conf.logger.info('Posting information to %r' % conf.master_url)
computer.send(conf)
conf.logger.info('slapos successfully prepared the computer.')
class FormatConfig(object):
......
......@@ -436,8 +436,9 @@ class TestCliBoot(CliMixin):
# run slapos node boot
app = slapos.cli.entry.SlapOSApp()
fake = mock.Mock(return_value=mock.Mock(**{'run.return_value': 0}))
with patch('slapos.cli.boot.check_root_user', return_value=True) as check_root_user,\
patch('slapos.cli.boot.SlapOSApp') as SlapOSApp,\
patch('slapos.cli.boot.SlapOSApp', new=fake) as SlapOSApp,\
patch('slapos.cli.boot.ConfigCommand.config_path', return_value=slapos_conf.name), \
patch(
'slapos.cli.boot.netifaces.ifaddresses',
......@@ -477,8 +478,8 @@ class TestCliBoot(CliMixin):
patch('slapos.cli.format.check_root_user', return_value=True),\
patch('slapos.cli.format.logging.FileHandler', return_value=logging.NullHandler()),\
patch('slapos.cli.bang.check_root_user', return_value=True),\
patch('slapos.cli.format.do_format', side_effect=[Exception, Exception, None]) as do_format,\
patch('slapos.cli.bang.do_bang', side_effect=[Exception, Exception, None]) as do_bang:
patch('slapos.cli.format.do_format', side_effect=[Exception, Exception, 0]) as do_format,\
patch('slapos.cli.bang.do_bang', side_effect=[Exception, Exception, 0]) as do_bang:
app.run(('node', 'boot'))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment