Commit 25e46196 authored by Xavier Thompson's avatar Xavier Thompson

node boot: Drop offline partition start feature

Partitions no longer need to be restarted at boot since now slapgrid
will start the services even if there is no connection to master.

This reverts commit cada4581.
parent 48b23e3d
...@@ -31,7 +31,6 @@ from __future__ import print_function ...@@ -31,7 +31,6 @@ from __future__ import print_function
import subprocess import subprocess
from six.moves.urllib.parse import urlparse from six.moves.urllib.parse import urlparse
from six.moves import xmlrpc_client as xmlrpclib
from time import sleep from time import sleep
import glob import glob
import os import os
...@@ -42,11 +41,6 @@ from slapos.cli.command import check_root_user ...@@ -42,11 +41,6 @@ from slapos.cli.command import check_root_user
from slapos.cli.entry import SlapOSApp from slapos.cli.entry import SlapOSApp
from slapos.cli.config import ConfigCommand from slapos.cli.config import ConfigCommand
from slapos.format import isGlobalScopeAddress from slapos.format import isGlobalScopeAddress
from slapos.grid.slapgrid import (COMPUTER_PARTITION_REQUESTED_STATE_FILENAME,
COMPUTER_PARTITION_STARTED_STATE)
from slapos.grid.svcbackend import (_getSupervisordSocketPath,
getSupervisorRPC,
launchSupervisord)
from slapos.util import string_to_boolean from slapos.util import string_to_boolean
import argparse import argparse
import logging import logging
...@@ -65,48 +59,6 @@ def _removeTimestamp(instancehome, partition_base_name): ...@@ -65,48 +59,6 @@ def _removeTimestamp(instancehome, partition_base_name):
logger.info("Removing %s", timestamp_path) logger.info("Removing %s", timestamp_path)
os.remove(timestamp_path) os.remove(timestamp_path)
def _startComputerPartition(partition_id, supervisord_socket):
"""
With supervisord, start the instance that was deployed
"""
try:
with getSupervisorRPC(supervisord_socket) as supervisor:
supervisor.startProcessGroup(partition_id, False)
except xmlrpclib.Fault as exc:
if exc.faultString.startswith('BAD_NAME:'):
logger.info("Nothing to start on %s...", partition_id)
else:
raise
else:
logger.info("Requested start of %s...", partition_id)
def _startComputerPartitionList(instance_root, partition_base_name):
"""
Start services for partition which has requested state to 'started'
"""
partition_glob_path = os.path.join(
instance_root,
"%s*" % partition_base_name)
launchSupervisord(instance_root=instance_root, logger=logger)
for partition_path in glob.glob(partition_glob_path):
partition_state_path = os.path.join(
partition_path,
COMPUTER_PARTITION_REQUESTED_STATE_FILENAME
)
supervisord_socket_path = _getSupervisordSocketPath(
instance_root,
logger
)
if os.path.exists(partition_state_path):
partition_state = ""
with open(partition_state_path) as f:
partition_state = f.read()
if partition_state == COMPUTER_PARTITION_STARTED_STATE:
# Call start for this computer partition
_startComputerPartition(
os.path.basename(partition_path.rstrip('/')),
supervisord_socket_path
)
def _runBang(app): def _runBang(app):
""" """
...@@ -124,9 +76,7 @@ def _runFormat(app): ...@@ -124,9 +76,7 @@ def _runFormat(app):
Launch slapos node format. Launch slapos node format.
""" """
logger.info("[BOOT] Invoking slapos node format...") logger.info("[BOOT] Invoking slapos node format...")
# '--local' parameter is to prevent node format command to post data to result = app.run(['node', 'format', '--now', '--verbose'])
# master, so this command can work without internet and setup partitions IP.
result = app.run(['node', 'format', '--now', '--local', '--verbose'])
if result == 1: if result == 1:
return 0 return 0
return 1 return 1
...@@ -246,15 +196,6 @@ class BootCommand(ConfigCommand): ...@@ -246,15 +196,6 @@ class BootCommand(ConfigCommand):
if ipv6_interface is not None: if ipv6_interface is not None:
_waitIpv6Ready(ipv6_interface) _waitIpv6Ready(ipv6_interface)
app = SlapOSApp()
# Make sure slapos node format returns ok
while not _runFormat(app):
logger.error("[BOOT] Fail to format, try again in 15 seconds...")
sleep(15)
# Start computer partition services
_startComputerPartitionList(instance_root, partition_base_name)
# Check that node can ping master # Check that node can ping master
if valid_ipv4(master_hostname): if valid_ipv4(master_hostname):
_test_ping(master_hostname) _test_ping(master_hostname)
...@@ -264,6 +205,12 @@ class BootCommand(ConfigCommand): ...@@ -264,6 +205,12 @@ class BootCommand(ConfigCommand):
# hostname # hostname
_ping_hostname(master_hostname) _ping_hostname(master_hostname)
app = SlapOSApp()
# Make sure slapos node format returns ok
while not _runFormat(app):
logger.error("[BOOT] Fail to format, try again in 15 seconds...")
sleep(15)
# Make sure slapos node bang returns ok # Make sure slapos node bang returns ok
while not _runBang(app): while not _runBang(app):
logger.error("[BOOT] Fail to bang, try again in 15 seconds...") logger.error("[BOOT] Fail to bang, try again in 15 seconds...")
......
...@@ -83,12 +83,6 @@ class FormatCommand(ConfigCommand): ...@@ -83,12 +83,6 @@ class FormatCommand(ConfigCommand):
help='Launch slapformat without delay' help='Launch slapformat without delay'
' (default: %(default)s)') ' (default: %(default)s)')
ap.add_argument('--local',
default=False, # can have a default as it is not in .cfg
action="store_true",
help='Keep format data locally, do not post xml to master'
' (default: %(default)s)')
ap.add_argument('-n', '--dry_run', ap.add_argument('-n', '--dry_run',
default=False, # can have a default as it is not in .cfg default=False, # can have a default as it is not in .cfg
action="store_true", action="store_true",
......
...@@ -1609,7 +1609,6 @@ def do_format(conf): ...@@ -1609,7 +1609,6 @@ def do_format(conf):
computer.dump(path_to_xml=conf.computer_xml, computer.dump(path_to_xml=conf.computer_xml,
path_to_json=conf.computer_json, path_to_json=conf.computer_json,
logger=conf.logger) logger=conf.logger)
if not conf.local:
conf.logger.info('Posting information to %r' % conf.master_url) conf.logger.info('Posting information to %r' % conf.master_url)
computer.send(conf) computer.send(conf)
conf.logger.info('slapos successfully prepared the computer.') conf.logger.info('slapos successfully prepared the computer.')
......
...@@ -97,7 +97,6 @@ SLAPGRID_OFFLINE_SUCCESS = 3 ...@@ -97,7 +97,6 @@ SLAPGRID_OFFLINE_SUCCESS = 3
PROMISE_TIMEOUT = 20 PROMISE_TIMEOUT = 20
COMPUTER_PARTITION_TIMESTAMP_FILENAME = '.timestamp' COMPUTER_PARTITION_TIMESTAMP_FILENAME = '.timestamp'
COMPUTER_PARTITION_REQUESTED_STATE_FILENAME = '.requested_state'
COMPUTER_PARTITION_LATEST_BANG_TIMESTAMP_FILENAME = '.slapos_latest_bang_timestamp' COMPUTER_PARTITION_LATEST_BANG_TIMESTAMP_FILENAME = '.slapos_latest_bang_timestamp'
COMPUTER_PARTITION_INSTALL_ERROR_FILENAME = '.slapgrid-%s-error.log' COMPUTER_PARTITION_INSTALL_ERROR_FILENAME = '.slapgrid-%s-error.log'
COMPUTER_PARTITION_WAIT_LIST_FILENAME = '.slapos-report-wait-service-list' COMPUTER_PARTITION_WAIT_LIST_FILENAME = '.slapos-report-wait-service-list'
...@@ -1136,10 +1135,6 @@ stderr_logfile_backups=1 ...@@ -1136,10 +1135,6 @@ stderr_logfile_backups=1
instance_path, instance_path,
COMPUTER_PARTITION_TIMESTAMP_FILENAME COMPUTER_PARTITION_TIMESTAMP_FILENAME
) )
partition_state_path = os.path.join(
instance_path,
COMPUTER_PARTITION_REQUESTED_STATE_FILENAME
)
parameter_dict = computer_partition.getInstanceParameterDict() parameter_dict = computer_partition.getInstanceParameterDict()
timestamp = parameter_dict.get('timestamp') timestamp = parameter_dict.get('timestamp')
...@@ -1241,7 +1236,6 @@ stderr_logfile_backups=1 ...@@ -1241,7 +1236,6 @@ stderr_logfile_backups=1
return return
os.remove(timestamp_path) os.remove(timestamp_path)
os.remove(partition_state_path)
# Include Partition Logging # Include Partition Logging
log_folder_path = "%s/.slapgrid/log" % instance_path log_folder_path = "%s/.slapgrid/log" % instance_path
...@@ -1356,8 +1350,6 @@ stderr_logfile_backups=1 ...@@ -1356,8 +1350,6 @@ stderr_logfile_backups=1
if timestamp: if timestamp:
with open(timestamp_path, 'w') as f: with open(timestamp_path, 'w') as f:
f.write(str(timestamp)) f.write(str(timestamp))
with open(partition_state_path, 'w') as f:
f.write(str(computer_partition_state))
def FilterComputerPartitionList(self, computer_partition_list): def FilterComputerPartitionList(self, computer_partition_list):
""" """
......
...@@ -416,13 +416,8 @@ class TestCliBoot(CliMixin): ...@@ -416,13 +416,8 @@ class TestCliBoot(CliMixin):
os.mkdir(os.path.join(instance_root, partition_base_name + '1')) os.mkdir(os.path.join(instance_root, partition_base_name + '1'))
timestamp = os.path.join( timestamp = os.path.join(
instance_root, partition_base_name + '1', '.timestamp') instance_root, partition_base_name + '1', '.timestamp')
requested_state_path = os.path.join(instance_root,
partition_base_name + '1',
'.requested_state')
with open(timestamp, 'w') as f: with open(timestamp, 'w') as f:
f.write("1578552471") f.write("1578552471")
with open(requested_state_path, 'w') as f:
f.write("started")
# make a config file using this instance root # make a config file using this instance root
with tempfile.NamedTemporaryFile(mode='w') as slapos_conf: with tempfile.NamedTemporaryFile(mode='w') as slapos_conf:
...@@ -447,21 +442,17 @@ class TestCliBoot(CliMixin): ...@@ -447,21 +442,17 @@ class TestCliBoot(CliMixin):
patch( patch(
'slapos.cli.boot.netifaces.ifaddresses', 'slapos.cli.boot.netifaces.ifaddresses',
return_value={socket.AF_INET6: ({'addr': '2000::1'},),},) as ifaddresses,\ return_value={socket.AF_INET6: ({'addr': '2000::1'},),},) as ifaddresses,\
patch('slapos.cli.boot._startComputerPartition', return_value=None) as start_partition,\
patch('slapos.cli.boot.launchSupervisord', return_value=None),\
patch('slapos.cli.boot._ping_hostname', return_value=1) as _ping_hostname: patch('slapos.cli.boot._ping_hostname', return_value=1) as _ping_hostname:
app.run(('node', 'boot')) app.run(('node', 'boot'))
# boot command runs as root # boot command runs as root
check_root_user.assert_called_once() check_root_user.assert_called_once()
# Computer partition was started during boot
start_partition.assert_called_once()
# it waits for interface to have an IPv6 address # it waits for interface to have an IPv6 address
ifaddresses.assert_called_once_with('interface_name_from_config') ifaddresses.assert_called_once_with('interface_name_from_config')
# then ping master hostname to wait for connectivity # then ping master hostname to wait for connectivity
_ping_hostname.assert_called_once_with('slap.vifib.com') _ping_hostname.assert_called_once_with('slap.vifib.com')
# then format and bang # then format and bang
SlapOSApp().run.assert_any_call(['node', 'format', '--now', '--local', '--verbose']) SlapOSApp().run.assert_any_call(['node', 'format', '--now', '--verbose'])
SlapOSApp().run.assert_any_call(['node', 'bang', '-m', 'Reboot']) SlapOSApp().run.assert_any_call(['node', 'bang', '-m', 'Reboot'])
# timestamp files have been removed # timestamp files have been removed
...@@ -483,7 +474,6 @@ class TestCliBoot(CliMixin): ...@@ -483,7 +474,6 @@ class TestCliBoot(CliMixin):
patch('slapos.cli.boot.netifaces.ifaddresses', patch('slapos.cli.boot.netifaces.ifaddresses',
side_effect=[net1, net2, net3]),\ side_effect=[net1, net2, net3]),\
patch('slapos.cli.boot._ping_hostname', return_value=0),\ patch('slapos.cli.boot._ping_hostname', return_value=0),\
patch('slapos.cli.boot._startComputerPartitionList', return_value=None) as start_partition,\
patch('slapos.cli.format.check_root_user', return_value=True),\ patch('slapos.cli.format.check_root_user', return_value=True),\
patch('slapos.cli.format.logging.FileHandler', return_value=logging.NullHandler()),\ patch('slapos.cli.format.logging.FileHandler', return_value=logging.NullHandler()),\
patch('slapos.cli.bang.check_root_user', return_value=True),\ patch('slapos.cli.bang.check_root_user', return_value=True),\
...@@ -493,7 +483,6 @@ class TestCliBoot(CliMixin): ...@@ -493,7 +483,6 @@ class TestCliBoot(CliMixin):
app.run(('node', 'boot')) app.run(('node', 'boot'))
check_root_user.assert_called_once() check_root_user.assert_called_once()
start_partition.assert_called_once()
self.assertEqual(do_format.call_count, 3) self.assertEqual(do_format.call_count, 3)
self.assertEqual(do_bang.call_count, 3) self.assertEqual(do_bang.call_count, 3)
......
...@@ -1466,7 +1466,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase): ...@@ -1466,7 +1466,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self.assertInstanceDirectoryListEqual(['0']) self.assertInstanceDirectoryListEqual(['0'])
partition = os.path.join(self.instance_root, '0') partition = os.path.join(self.instance_root, '0')
six.assertCountEqual(self, os.listdir(partition), six.assertCountEqual(self, os.listdir(partition),
['.slapgrid', '.timestamp', '.requested_state', 'buildout.cfg', 'software_release', 'worked', '.slapos-retention-lock-delay']) ['.slapgrid', '.timestamp', 'buildout.cfg', 'software_release', 'worked', '.slapos-retention-lock-delay'])
six.assertCountEqual(self, os.listdir(self.software_root), [instance.software.software_hash]) six.assertCountEqual(self, os.listdir(self.software_root), [instance.software.software_hash])
timestamp_path = os.path.join(instance.partition_path, '.timestamp') timestamp_path = os.path.join(instance.partition_path, '.timestamp')
self.setSlapgrid() self.setSlapgrid()
...@@ -1487,7 +1487,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase): ...@@ -1487,7 +1487,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self.assertInstanceDirectoryListEqual(['0']) self.assertInstanceDirectoryListEqual(['0'])
partition = os.path.join(self.instance_root, '0') partition = os.path.join(self.instance_root, '0')
six.assertCountEqual(self, os.listdir(partition), six.assertCountEqual(self, os.listdir(partition),
['.slapgrid', '.timestamp', '.requested_state', 'buildout.cfg', ['.slapgrid', '.timestamp', 'buildout.cfg',
'software_release', 'worked', '.slapos-retention-lock-delay']) 'software_release', 'worked', '.slapos-retention-lock-delay'])
six.assertCountEqual(self, os.listdir(self.software_root), [instance.software.software_hash]) six.assertCountEqual(self, os.listdir(self.software_root), [instance.software.software_hash])
...@@ -1510,7 +1510,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase): ...@@ -1510,7 +1510,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self.assertInstanceDirectoryListEqual(['0']) self.assertInstanceDirectoryListEqual(['0'])
partition = os.path.join(self.instance_root, '0') partition = os.path.join(self.instance_root, '0')
six.assertCountEqual(self, os.listdir(partition), six.assertCountEqual(self, os.listdir(partition),
['.slapgrid', '.timestamp', '.requested_state', 'buildout.cfg', 'software_release', 'worked', '.slapos-retention-lock-delay']) ['.slapgrid', '.timestamp', 'buildout.cfg', 'software_release', 'worked', '.slapos-retention-lock-delay'])
six.assertCountEqual(self, os.listdir(self.software_root), [instance.software.software_hash]) six.assertCountEqual(self, os.listdir(self.software_root), [instance.software.software_hash])
instance.timestamp = str(int(timestamp) - 1) instance.timestamp = str(int(timestamp) - 1)
self.assertEqual(self.launchSlapgrid(), slapgrid.SLAPGRID_SUCCESS) self.assertEqual(self.launchSlapgrid(), slapgrid.SLAPGRID_SUCCESS)
...@@ -1528,7 +1528,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase): ...@@ -1528,7 +1528,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self.assertInstanceDirectoryListEqual(['0']) self.assertInstanceDirectoryListEqual(['0'])
partition = os.path.join(self.instance_root, '0') partition = os.path.join(self.instance_root, '0')
six.assertCountEqual(self, os.listdir(partition), six.assertCountEqual(self, os.listdir(partition),
['.slapgrid', '.timestamp', '.requested_state', 'buildout.cfg', 'software_release', 'worked', '.slapos-retention-lock-delay']) ['.slapgrid', '.timestamp', 'buildout.cfg', 'software_release', 'worked', '.slapos-retention-lock-delay'])
six.assertCountEqual(self, os.listdir(self.software_root), [instance.software.software_hash]) six.assertCountEqual(self, os.listdir(self.software_root), [instance.software.software_hash])
instance.timestamp = str(int(timestamp) + 1) instance.timestamp = str(int(timestamp) + 1)
self.assertEqual(self.launchSlapgrid(), slapgrid.SLAPGRID_SUCCESS) self.assertEqual(self.launchSlapgrid(), slapgrid.SLAPGRID_SUCCESS)
...@@ -1556,7 +1556,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase): ...@@ -1556,7 +1556,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self.assertInstanceDirectoryListEqual(['0']) self.assertInstanceDirectoryListEqual(['0'])
partition = os.path.join(self.instance_root, '0') partition = os.path.join(self.instance_root, '0')
six.assertCountEqual(self, os.listdir(partition), six.assertCountEqual(self, os.listdir(partition),
['.slapgrid', '.timestamp', '.requested_state', 'buildout.cfg', 'software_release', 'worked', '.slapos-retention-lock-delay']) ['.slapgrid', '.timestamp', 'buildout.cfg', 'software_release', 'worked', '.slapos-retention-lock-delay'])
six.assertCountEqual(self, os.listdir(self.software_root), six.assertCountEqual(self, os.listdir(self.software_root),
[instance.software.software_hash]) [instance.software.software_hash])
instance.timestamp = None instance.timestamp = None
...@@ -1588,7 +1588,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase): ...@@ -1588,7 +1588,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self.launchSlapgrid() self.launchSlapgrid()
partition = os.path.join(self.instance_root, '0') partition = os.path.join(self.instance_root, '0')
six.assertCountEqual(self, os.listdir(partition), six.assertCountEqual(self, os.listdir(partition),
['.slapgrid', '.timestamp', '.requested_state', 'buildout.cfg', ['.slapgrid', '.timestamp', 'buildout.cfg',
'software_release', 'worked', '.slapos-retention-lock-delay']) 'software_release', 'worked', '.slapos-retention-lock-delay'])
time.sleep(2) time.sleep(2)
...@@ -1598,7 +1598,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase): ...@@ -1598,7 +1598,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self.launchSlapgrid() self.launchSlapgrid()
six.assertCountEqual(self, os.listdir(partition), six.assertCountEqual(self, os.listdir(partition),
['.slapgrid', '.timestamp', '.requested_state', 'buildout.cfg', ['.slapgrid', '.timestamp', 'buildout.cfg',
'software_release', 'worked', '.slapos-retention-lock-delay']) 'software_release', 'worked', '.slapos-retention-lock-delay'])
def test_one_partition_periodicity_from_file_does_not_disturb_others(self): def test_one_partition_periodicity_from_file_does_not_disturb_others(self):
...@@ -1775,43 +1775,6 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase): ...@@ -1775,43 +1775,6 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self.launchSlapgrid() self.launchSlapgrid()
self.assertEqual(mock_method.call_count, 2) self.assertEqual(mock_method.call_count, 2)
def test_partition_requested_state_created(self):
computer = self.getTestComputerClass()(self.software_root, self.instance_root)
with httmock.HTTMock(computer.request_handler):
instance = computer.instance_list[0]
timestamp = str(int(time.time()))
instance.timestamp = timestamp
self.assertEqual(self.grid.processComputerPartitionList(), slapgrid.SLAPGRID_SUCCESS)
self.assertInstanceDirectoryListEqual(['0'])
partition = os.path.join(self.instance_root, '0')
six.assertCountEqual(self, os.listdir(partition),
['.slapgrid', '.timestamp', '.requested_state', 'buildout.cfg',
'software_release', 'worked', '.slapos-retention-lock-delay'])
six.assertCountEqual(self, os.listdir(self.software_root), [instance.software.software_hash])
requested_state_path = os.path.join(instance.partition_path, '.requested_state')
with open(requested_state_path) as f:
self.assertEqual(f.read(), slapgrid.COMPUTER_PARTITION_STOPPED_STATE)
self.assertEqual(instance.sequence,
['/stoppedComputerPartition'])
def test_partition_requested_state_not_created_if_failed(self):
computer = self.getTestComputerClass()(self.software_root, self.instance_root)
with httmock.HTTMock(computer.request_handler):
instance = computer.instance_list[0]
timestamp = str(int(time.time()))
instance.timestamp = timestamp
instance.software.setBuildout("""#!/bin/sh
exit 3""")
self.assertEqual(self.grid.processComputerPartitionList(), slapgrid.SLAPGRID_FAIL)
self.assertInstanceDirectoryListEqual(['0'])
self.assertEqual(instance.sequence,
['/softwareInstanceError'])
requested_state_path = os.path.join(instance.partition_path, '.requested_state')
self.assertFalse(os.path.exists(requested_state_path))
def test_one_partition_buildout_fail_does_not_disturb_others(self): def test_one_partition_buildout_fail_does_not_disturb_others(self):
""" """
1. We set up two instance one using a corrupted buildout 1. We set up two instance one using a corrupted buildout
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment