Commit ef9ef9b1 authored by Xavier Thompson's avatar Xavier Thompson

node boot: Drop offline partition start feature

Partitions no longer need to be restarted at boot since now slapgrid
will start the services even if there is no connection to master.

This reverts commit cada4581.
parent 3c5f5f90
......@@ -31,7 +31,6 @@ from __future__ import print_function
import subprocess
from six.moves.urllib.parse import urlparse
from six.moves import xmlrpc_client as xmlrpclib
from time import sleep
import glob
import os
......@@ -42,11 +41,6 @@ from slapos.cli.command import check_root_user
from slapos.cli.entry import SlapOSApp
from slapos.cli.config import ConfigCommand
from slapos.format import isGlobalScopeAddress
from slapos.grid.slapgrid import (COMPUTER_PARTITION_REQUESTED_STATE_FILENAME,
COMPUTER_PARTITION_STARTED_STATE)
from slapos.grid.svcbackend import (_getSupervisordSocketPath,
getSupervisorRPC,
launchSupervisord)
from slapos.util import string_to_boolean
import argparse
import logging
......@@ -65,48 +59,6 @@ def _removeTimestamp(instancehome, partition_base_name):
logger.info("Removing %s", timestamp_path)
os.remove(timestamp_path)
def _startComputerPartition(partition_id, supervisord_socket):
"""
With supervisord, start the instance that was deployed
"""
try:
with getSupervisorRPC(supervisord_socket) as supervisor:
supervisor.startProcessGroup(partition_id, False)
except xmlrpclib.Fault as exc:
if exc.faultString.startswith('BAD_NAME:'):
logger.info("Nothing to start on %s...", partition_id)
else:
raise
else:
logger.info("Requested start of %s...", partition_id)
def _startComputerPartitionList(instance_root, partition_base_name):
"""
Start services for partition which has requested state to 'started'
"""
partition_glob_path = os.path.join(
instance_root,
"%s*" % partition_base_name)
launchSupervisord(instance_root=instance_root, logger=logger)
for partition_path in glob.glob(partition_glob_path):
partition_state_path = os.path.join(
partition_path,
COMPUTER_PARTITION_REQUESTED_STATE_FILENAME
)
supervisord_socket_path = _getSupervisordSocketPath(
instance_root,
logger
)
if os.path.exists(partition_state_path):
partition_state = ""
with open(partition_state_path) as f:
partition_state = f.read()
if partition_state == COMPUTER_PARTITION_STARTED_STATE:
# Call start for this computer partition
_startComputerPartition(
os.path.basename(partition_path.rstrip('/')),
supervisord_socket_path
)
def _runBang(app):
"""
......@@ -124,9 +76,7 @@ def _runFormat(app):
Launch slapos node format.
"""
logger.info("[BOOT] Invoking slapos node format...")
# '--local' parameter is to prevent node format command to post data to
# master, so this command can work without internet and setup partitions IP.
result = app.run(['node', 'format', '--now', '--local', '--verbose'])
result = app.run(['node', 'format', '--now', '--verbose'])
if result == 1:
return 0
return 1
......@@ -246,15 +196,6 @@ class BootCommand(ConfigCommand):
if ipv6_interface is not None:
_waitIpv6Ready(ipv6_interface)
app = SlapOSApp()
# Make sure slapos node format returns ok
while not _runFormat(app):
logger.error("[BOOT] Fail to format, try again in 15 seconds...")
sleep(15)
# Start computer partition services
_startComputerPartitionList(instance_root, partition_base_name)
# Check that node can ping master
if valid_ipv4(master_hostname):
_test_ping(master_hostname)
......@@ -264,6 +205,12 @@ class BootCommand(ConfigCommand):
# hostname
_ping_hostname(master_hostname)
app = SlapOSApp()
# Make sure slapos node format returns ok
while not _runFormat(app):
logger.error("[BOOT] Fail to format, try again in 15 seconds...")
sleep(15)
# Make sure slapos node bang returns ok
while not _runBang(app):
logger.error("[BOOT] Fail to bang, try again in 15 seconds...")
......
......@@ -83,12 +83,6 @@ class FormatCommand(ConfigCommand):
help='Launch slapformat without delay'
' (default: %(default)s)')
ap.add_argument('--local',
default=False, # can have a default as it is not in .cfg
action="store_true",
help='Keep format data locally, do not post xml to master'
' (default: %(default)s)')
ap.add_argument('-n', '--dry_run',
default=False, # can have a default as it is not in .cfg
action="store_true",
......
......@@ -1609,9 +1609,8 @@ def do_format(conf):
computer.dump(path_to_xml=conf.computer_xml,
path_to_json=conf.computer_json,
logger=conf.logger)
if not conf.local:
conf.logger.info('Posting information to %r' % conf.master_url)
computer.send(conf)
conf.logger.info('Posting information to %r' % conf.master_url)
computer.send(conf)
conf.logger.info('slapos successfully prepared the computer.')
......
......@@ -97,7 +97,6 @@ SLAPGRID_OFFLINE_SUCCESS = 3
PROMISE_TIMEOUT = 20
COMPUTER_PARTITION_TIMESTAMP_FILENAME = '.timestamp'
COMPUTER_PARTITION_REQUESTED_STATE_FILENAME = '.requested_state'
COMPUTER_PARTITION_LATEST_BANG_TIMESTAMP_FILENAME = '.slapos_latest_bang_timestamp'
COMPUTER_PARTITION_INSTALL_ERROR_FILENAME = '.slapgrid-%s-error.log'
COMPUTER_PARTITION_WAIT_LIST_FILENAME = '.slapos-report-wait-service-list'
......@@ -1136,10 +1135,6 @@ stderr_logfile_backups=1
instance_path,
COMPUTER_PARTITION_TIMESTAMP_FILENAME
)
partition_state_path = os.path.join(
instance_path,
COMPUTER_PARTITION_REQUESTED_STATE_FILENAME
)
parameter_dict = computer_partition.getInstanceParameterDict()
timestamp = parameter_dict.get('timestamp')
......@@ -1241,7 +1236,6 @@ stderr_logfile_backups=1
return
os.remove(timestamp_path)
os.remove(partition_state_path)
# Include Partition Logging
log_folder_path = "%s/.slapgrid/log" % instance_path
......@@ -1356,8 +1350,6 @@ stderr_logfile_backups=1
if timestamp:
with open(timestamp_path, 'w') as f:
f.write(str(timestamp))
with open(partition_state_path, 'w') as f:
f.write(str(computer_partition_state))
def FilterComputerPartitionList(self, computer_partition_list):
"""
......
......@@ -416,13 +416,8 @@ class TestCliBoot(CliMixin):
os.mkdir(os.path.join(instance_root, partition_base_name + '1'))
timestamp = os.path.join(
instance_root, partition_base_name + '1', '.timestamp')
requested_state_path = os.path.join(instance_root,
partition_base_name + '1',
'.requested_state')
with open(timestamp, 'w') as f:
f.write("1578552471")
with open(requested_state_path, 'w') as f:
f.write("started")
# make a config file using this instance root
with tempfile.NamedTemporaryFile(mode='w') as slapos_conf:
......@@ -447,21 +442,17 @@ class TestCliBoot(CliMixin):
patch(
'slapos.cli.boot.netifaces.ifaddresses',
return_value={socket.AF_INET6: ({'addr': '2000::1'},),},) as ifaddresses,\
patch('slapos.cli.boot._startComputerPartition', return_value=None) as start_partition,\
patch('slapos.cli.boot.launchSupervisord', return_value=None),\
patch('slapos.cli.boot._ping_hostname', return_value=1) as _ping_hostname:
app.run(('node', 'boot'))
# boot command runs as root
check_root_user.assert_called_once()
# Computer partition was started during boot
start_partition.assert_called_once()
# it waits for interface to have an IPv6 address
ifaddresses.assert_called_once_with('interface_name_from_config')
# then ping master hostname to wait for connectivity
_ping_hostname.assert_called_once_with('slap.vifib.com')
# then format and bang
SlapOSApp().run.assert_any_call(['node', 'format', '--now', '--local', '--verbose'])
SlapOSApp().run.assert_any_call(['node', 'format', '--now', '--verbose'])
SlapOSApp().run.assert_any_call(['node', 'bang', '-m', 'Reboot'])
# timestamp files have been removed
......@@ -483,7 +474,6 @@ class TestCliBoot(CliMixin):
patch('slapos.cli.boot.netifaces.ifaddresses',
side_effect=[net1, net2, net3]),\
patch('slapos.cli.boot._ping_hostname', return_value=0),\
patch('slapos.cli.boot._startComputerPartitionList', return_value=None) as start_partition,\
patch('slapos.cli.format.check_root_user', return_value=True),\
patch('slapos.cli.format.logging.FileHandler', return_value=logging.NullHandler()),\
patch('slapos.cli.bang.check_root_user', return_value=True),\
......@@ -493,7 +483,6 @@ class TestCliBoot(CliMixin):
app.run(('node', 'boot'))
check_root_user.assert_called_once()
start_partition.assert_called_once()
self.assertEqual(do_format.call_count, 3)
self.assertEqual(do_bang.call_count, 3)
......
......@@ -1466,7 +1466,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self.assertInstanceDirectoryListEqual(['0'])
partition = os.path.join(self.instance_root, '0')
six.assertCountEqual(self, os.listdir(partition),
['.slapgrid', '.timestamp', '.requested_state', 'buildout.cfg', 'software_release', 'worked', '.slapos-retention-lock-delay'])
['.slapgrid', '.timestamp', 'buildout.cfg', 'software_release', 'worked', '.slapos-retention-lock-delay'])
six.assertCountEqual(self, os.listdir(self.software_root), [instance.software.software_hash])
timestamp_path = os.path.join(instance.partition_path, '.timestamp')
self.setSlapgrid()
......@@ -1487,7 +1487,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self.assertInstanceDirectoryListEqual(['0'])
partition = os.path.join(self.instance_root, '0')
six.assertCountEqual(self, os.listdir(partition),
['.slapgrid', '.timestamp', '.requested_state', 'buildout.cfg',
['.slapgrid', '.timestamp', 'buildout.cfg',
'software_release', 'worked', '.slapos-retention-lock-delay'])
six.assertCountEqual(self, os.listdir(self.software_root), [instance.software.software_hash])
......@@ -1510,7 +1510,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self.assertInstanceDirectoryListEqual(['0'])
partition = os.path.join(self.instance_root, '0')
six.assertCountEqual(self, os.listdir(partition),
['.slapgrid', '.timestamp', '.requested_state', 'buildout.cfg', 'software_release', 'worked', '.slapos-retention-lock-delay'])
['.slapgrid', '.timestamp', 'buildout.cfg', 'software_release', 'worked', '.slapos-retention-lock-delay'])
six.assertCountEqual(self, os.listdir(self.software_root), [instance.software.software_hash])
instance.timestamp = str(int(timestamp) - 1)
self.assertEqual(self.launchSlapgrid(), slapgrid.SLAPGRID_SUCCESS)
......@@ -1528,7 +1528,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self.assertInstanceDirectoryListEqual(['0'])
partition = os.path.join(self.instance_root, '0')
six.assertCountEqual(self, os.listdir(partition),
['.slapgrid', '.timestamp', '.requested_state', 'buildout.cfg', 'software_release', 'worked', '.slapos-retention-lock-delay'])
['.slapgrid', '.timestamp', 'buildout.cfg', 'software_release', 'worked', '.slapos-retention-lock-delay'])
six.assertCountEqual(self, os.listdir(self.software_root), [instance.software.software_hash])
instance.timestamp = str(int(timestamp) + 1)
self.assertEqual(self.launchSlapgrid(), slapgrid.SLAPGRID_SUCCESS)
......@@ -1556,7 +1556,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self.assertInstanceDirectoryListEqual(['0'])
partition = os.path.join(self.instance_root, '0')
six.assertCountEqual(self, os.listdir(partition),
['.slapgrid', '.timestamp', '.requested_state', 'buildout.cfg', 'software_release', 'worked', '.slapos-retention-lock-delay'])
['.slapgrid', '.timestamp', 'buildout.cfg', 'software_release', 'worked', '.slapos-retention-lock-delay'])
six.assertCountEqual(self, os.listdir(self.software_root),
[instance.software.software_hash])
instance.timestamp = None
......@@ -1588,7 +1588,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self.launchSlapgrid()
partition = os.path.join(self.instance_root, '0')
six.assertCountEqual(self, os.listdir(partition),
['.slapgrid', '.timestamp', '.requested_state', 'buildout.cfg',
['.slapgrid', '.timestamp', 'buildout.cfg',
'software_release', 'worked', '.slapos-retention-lock-delay'])
time.sleep(2)
......@@ -1598,7 +1598,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self.launchSlapgrid()
six.assertCountEqual(self, os.listdir(partition),
['.slapgrid', '.timestamp', '.requested_state', 'buildout.cfg',
['.slapgrid', '.timestamp', 'buildout.cfg',
'software_release', 'worked', '.slapos-retention-lock-delay'])
def test_one_partition_periodicity_from_file_does_not_disturb_others(self):
......@@ -1775,43 +1775,6 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self.launchSlapgrid()
self.assertEqual(mock_method.call_count, 2)
def test_partition_requested_state_created(self):
computer = self.getTestComputerClass()(self.software_root, self.instance_root)
with httmock.HTTMock(computer.request_handler):
instance = computer.instance_list[0]
timestamp = str(int(time.time()))
instance.timestamp = timestamp
self.assertEqual(self.grid.processComputerPartitionList(), slapgrid.SLAPGRID_SUCCESS)
self.assertInstanceDirectoryListEqual(['0'])
partition = os.path.join(self.instance_root, '0')
six.assertCountEqual(self, os.listdir(partition),
['.slapgrid', '.timestamp', '.requested_state', 'buildout.cfg',
'software_release', 'worked', '.slapos-retention-lock-delay'])
six.assertCountEqual(self, os.listdir(self.software_root), [instance.software.software_hash])
requested_state_path = os.path.join(instance.partition_path, '.requested_state')
with open(requested_state_path) as f:
self.assertEqual(f.read(), slapgrid.COMPUTER_PARTITION_STOPPED_STATE)
self.assertEqual(instance.sequence,
['/stoppedComputerPartition'])
def test_partition_requested_state_not_created_if_failed(self):
computer = self.getTestComputerClass()(self.software_root, self.instance_root)
with httmock.HTTMock(computer.request_handler):
instance = computer.instance_list[0]
timestamp = str(int(time.time()))
instance.timestamp = timestamp
instance.software.setBuildout("""#!/bin/sh
exit 3""")
self.assertEqual(self.grid.processComputerPartitionList(), slapgrid.SLAPGRID_FAIL)
self.assertInstanceDirectoryListEqual(['0'])
self.assertEqual(instance.sequence,
['/softwareInstanceError'])
requested_state_path = os.path.join(instance.partition_path, '.requested_state')
self.assertFalse(os.path.exists(requested_state_path))
def test_one_partition_buildout_fail_does_not_disturb_others(self):
"""
1. We set up two instance one using a corrupted buildout
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment