Commit 31a3c142 authored by Matthias Käppler's avatar Matthias Käppler Committed by Bob Van Landuyt

Use external metrics server process for Puma

This moves us away from a process-internal server thread
and toward an external process that we supervise instead.
parent ddddfe2b
# frozen_string_literal: true # frozen_string_literal: true
PUMA_EXTERNAL_METRICS_SERVER = Gitlab::Utils.to_boolean(ENV['PUMA_EXTERNAL_METRICS_SERVER'])
# Keep separate directories for separate processes # Keep separate directories for separate processes
def prometheus_default_multiproc_dir def prometheus_default_multiproc_dir
return unless Rails.env.development? || Rails.env.test? return unless Rails.env.development? || Rails.env.test?
...@@ -72,8 +74,12 @@ Gitlab::Cluster::LifecycleEvents.on_master_start do ...@@ -72,8 +74,12 @@ Gitlab::Cluster::LifecycleEvents.on_master_start do
if Gitlab::Runtime.puma? if Gitlab::Runtime.puma?
Gitlab::Metrics::Samplers::PumaSampler.instance.start Gitlab::Metrics::Samplers::PumaSampler.instance.start
# Starts a metrics server to export metrics from the Puma primary. if Settings.monitoring.web_exporter.enabled && PUMA_EXTERNAL_METRICS_SERVER
Gitlab::Metrics::Exporter::WebExporter.instance.start require_relative '../../metrics_server/metrics_server'
MetricsServer.start_for_puma
else
Gitlab::Metrics::Exporter::WebExporter.instance.start
end
end end
Gitlab::Ci::Parsers.instrument! Gitlab::Ci::Parsers.instrument!
...@@ -90,11 +96,13 @@ Gitlab::Cluster::LifecycleEvents.on_worker_start do ...@@ -90,11 +96,13 @@ Gitlab::Cluster::LifecycleEvents.on_worker_start do
Gitlab::Metrics::Samplers::ThreadsSampler.initialize_instance(logger: logger).start Gitlab::Metrics::Samplers::ThreadsSampler.initialize_instance(logger: logger).start
if Gitlab::Runtime.puma? if Gitlab::Runtime.puma?
# Since we are running a metrics server on the Puma primary, we would inherit # Since we are observing a metrics server from the Puma primary, we would inherit
# this thread after forking into workers, so we need to explicitly stop it here. # this supervision thread after forking into workers, so we need to explicitly stop it here.
# NOTE: This will not be necessary anymore after moving to an external server if PUMA_EXTERNAL_METRICS_SERVER
# process via https://gitlab.com/gitlab-org/gitlab/-/issues/350548 ::MetricsServer::PumaProcessSupervisor.instance.stop
Gitlab::Metrics::Exporter::WebExporter.instance.stop else
Gitlab::Metrics::Exporter::WebExporter.instance.stop
end
Gitlab::Metrics::Samplers::ActionCableSampler.instance(logger: logger).start Gitlab::Metrics::Samplers::ActionCableSampler.instance(logger: logger).start
end end
...@@ -112,16 +120,24 @@ end ...@@ -112,16 +120,24 @@ end
if Gitlab::Runtime.puma? if Gitlab::Runtime.puma?
Gitlab::Cluster::LifecycleEvents.on_before_graceful_shutdown do Gitlab::Cluster::LifecycleEvents.on_before_graceful_shutdown do
# We need to ensure that before we re-exec or shutdown server # We need to ensure that before we re-exec or shutdown server
# we do stop the exporter # we also stop the metrics server
Gitlab::Metrics::Exporter::WebExporter.instance.stop if PUMA_EXTERNAL_METRICS_SERVER
::MetricsServer::PumaProcessSupervisor.instance.shutdown
else
Gitlab::Metrics::Exporter::WebExporter.instance.stop
end
end end
Gitlab::Cluster::LifecycleEvents.on_before_master_restart do Gitlab::Cluster::LifecycleEvents.on_before_master_restart do
# We need to ensure that before we re-exec server # We need to ensure that before we re-exec server
# we do stop the exporter # we also stop the metrics server
# #
# We do it again, for being extra safe, # We do it again, for being extra safe,
# but it should not be needed # but it should not be needed
Gitlab::Metrics::Exporter::WebExporter.instance.stop if PUMA_EXTERNAL_METRICS_SERVER
::MetricsServer::PumaProcessSupervisor.instance.shutdown
else
Gitlab::Metrics::Exporter::WebExporter.instance.stop
end
end end
end end
...@@ -9,7 +9,7 @@ module Gitlab ...@@ -9,7 +9,7 @@ module Gitlab
# The supervisor will also trap termination signals if provided and # The supervisor will also trap termination signals if provided and
# propagate those to the supervised processes. Any supervised processes # propagate those to the supervised processes. Any supervised processes
# that do not terminate within a specified grace period will be killed. # that do not terminate within a specified grace period will be killed.
class ProcessSupervisor class ProcessSupervisor < Gitlab::Daemon
DEFAULT_HEALTH_CHECK_INTERVAL_SECONDS = 5 DEFAULT_HEALTH_CHECK_INTERVAL_SECONDS = 5
DEFAULT_TERMINATE_INTERVAL_SECONDS = 1 DEFAULT_TERMINATE_INTERVAL_SECONDS = 1
DEFAULT_TERMINATE_TIMEOUT_SECONDS = 10 DEFAULT_TERMINATE_TIMEOUT_SECONDS = 10
...@@ -21,13 +21,18 @@ module Gitlab ...@@ -21,13 +21,18 @@ module Gitlab
check_terminate_interval_seconds: DEFAULT_TERMINATE_INTERVAL_SECONDS, check_terminate_interval_seconds: DEFAULT_TERMINATE_INTERVAL_SECONDS,
terminate_timeout_seconds: DEFAULT_TERMINATE_TIMEOUT_SECONDS, terminate_timeout_seconds: DEFAULT_TERMINATE_TIMEOUT_SECONDS,
term_signals: %i(INT TERM), term_signals: %i(INT TERM),
forwarded_signals: []) forwarded_signals: [],
**options)
super(**options)
@term_signals = term_signals @term_signals = term_signals
@forwarded_signals = forwarded_signals @forwarded_signals = forwarded_signals
@health_check_interval_seconds = health_check_interval_seconds @health_check_interval_seconds = health_check_interval_seconds
@check_terminate_interval_seconds = check_terminate_interval_seconds @check_terminate_interval_seconds = check_terminate_interval_seconds
@terminate_timeout_seconds = terminate_timeout_seconds @terminate_timeout_seconds = terminate_timeout_seconds
@pids = []
@alive = false
end end
# Starts a supervision loop for the given process ID(s). # Starts a supervision loop for the given process ID(s).
...@@ -39,32 +44,66 @@ module Gitlab ...@@ -39,32 +44,66 @@ module Gitlab
# start observing those processes instead. Otherwise it will shut down. # start observing those processes instead. Otherwise it will shut down.
def supervise(pid_or_pids, &on_process_death) def supervise(pid_or_pids, &on_process_death)
@pids = Array(pid_or_pids) @pids = Array(pid_or_pids)
@on_process_death = on_process_death
trap_signals! trap_signals!
start
end
# Shuts down the supervisor and all supervised processes with the given signal.
def shutdown(signal = :TERM)
return unless @alive
stop_processes(signal)
stop
end
def supervised_pids
@pids
end
private
def start_working
@alive = true @alive = true
end
def stop_working
@alive = false
end
def run_thread
while @alive while @alive
sleep(@health_check_interval_seconds) sleep(@health_check_interval_seconds)
check_process_health(&on_process_death) check_process_health
end end
end end
private def check_process_health
def check_process_health(&on_process_death)
unless all_alive? unless all_alive?
dead_pids = @pids - live_pids existing_pids = live_pids # Capture this value for the duration of the block.
@pids = Array(yield(dead_pids)) dead_pids = @pids - existing_pids
new_pids = Array(@on_process_death.call(dead_pids))
@pids = existing_pids + new_pids
@alive = @pids.any? @alive = @pids.any?
end end
end end
def stop_processes(signal)
# Set this prior to shutting down so that shutdown hooks which read `alive`
# know the supervisor is about to shut down.
@alive = false
# Shut down supervised processes.
signal_all(signal)
wait_for_termination
end
def trap_signals! def trap_signals!
ProcessManagement.trap_signals(@term_signals) do |signal| ProcessManagement.trap_signals(@term_signals) do |signal|
@alive = false stop_processes(signal)
signal_all(signal)
wait_for_termination
end end
ProcessManagement.trap_signals(@forwarded_signals) do |signal| ProcessManagement.trap_signals(@forwarded_signals) do |signal|
......
...@@ -31,5 +31,6 @@ require_relative '../lib/gitlab/metrics/exporter/gc_request_middleware' ...@@ -31,5 +31,6 @@ require_relative '../lib/gitlab/metrics/exporter/gc_request_middleware'
require_relative '../lib/gitlab/health_checks/probes/collection' require_relative '../lib/gitlab/health_checks/probes/collection'
require_relative '../lib/gitlab/health_checks/probes/status' require_relative '../lib/gitlab/health_checks/probes/status'
require_relative '../lib/gitlab/process_management' require_relative '../lib/gitlab/process_management'
require_relative '../lib/gitlab/process_supervisor'
# rubocop:enable Naming/FileName # rubocop:enable Naming/FileName
...@@ -5,7 +5,28 @@ require_relative '../config/boot' ...@@ -5,7 +5,28 @@ require_relative '../config/boot'
require_relative 'dependencies' require_relative 'dependencies'
class MetricsServer # rubocop:disable Gitlab/NamespacedClass class MetricsServer # rubocop:disable Gitlab/NamespacedClass
# The singleton instance used to supervise the Puma metrics server.
PumaProcessSupervisor = Class.new(Gitlab::ProcessSupervisor)
class << self class << self
def start_for_puma
metrics_dir = ::Prometheus::Client.configuration.multiprocess_files_dir
start_server = proc do
MetricsServer.spawn('puma', metrics_dir: metrics_dir).tap do |pid|
Gitlab::AppLogger.info("Starting Puma metrics server with pid #{pid}")
end
end
supervisor = PumaProcessSupervisor.instance
supervisor.supervise(start_server.call) do
next unless supervisor.alive
Gitlab::AppLogger.info('Puma metrics server terminated, restarting...')
start_server.call
end
end
def spawn(target, metrics_dir:, gitlab_config: nil, wipe_metrics_dir: false) def spawn(target, metrics_dir:, gitlab_config: nil, wipe_metrics_dir: false)
ensure_valid_target!(target) ensure_valid_target!(target)
......
...@@ -10,6 +10,7 @@ require 'time' ...@@ -10,6 +10,7 @@ require 'time'
# we may run into "already initialized" warnings, hence the check. # we may run into "already initialized" warnings, hence the check.
require_relative '../lib/gitlab' unless Object.const_defined?('Gitlab') require_relative '../lib/gitlab' unless Object.const_defined?('Gitlab')
require_relative '../lib/gitlab/utils' require_relative '../lib/gitlab/utils'
require_relative '../lib/gitlab/daemon'
require_relative '../lib/gitlab/sidekiq_config/cli_methods' require_relative '../lib/gitlab/sidekiq_config/cli_methods'
require_relative '../lib/gitlab/sidekiq_config/worker_matcher' require_relative '../lib/gitlab/sidekiq_config/worker_matcher'
require_relative '../lib/gitlab/sidekiq_logging/json_formatter' require_relative '../lib/gitlab/sidekiq_logging/json_formatter'
...@@ -121,11 +122,12 @@ module Gitlab ...@@ -121,11 +122,12 @@ module Gitlab
ProcessManagement.write_pid(@pid) if @pid ProcessManagement.write_pid(@pid) if @pid
supervisor = Gitlab::ProcessSupervisor.new( supervisor = SidekiqProcessSupervisor.instance(
health_check_interval_seconds: @interval, health_check_interval_seconds: @interval,
terminate_timeout_seconds: @soft_timeout_seconds + TIMEOUT_GRACE_PERIOD_SECONDS, terminate_timeout_seconds: @soft_timeout_seconds + TIMEOUT_GRACE_PERIOD_SECONDS,
term_signals: TERMINATE_SIGNALS, term_signals: TERMINATE_SIGNALS,
forwarded_signals: FORWARD_SIGNALS forwarded_signals: FORWARD_SIGNALS,
synchronous: true
) )
metrics_server_pid = start_metrics_server metrics_server_pid = start_metrics_server
...@@ -136,7 +138,7 @@ module Gitlab ...@@ -136,7 +138,7 @@ module Gitlab
# If we're not in the process of shutting down the cluster, # If we're not in the process of shutting down the cluster,
# and the metrics server died, restart it. # and the metrics server died, restart it.
if supervisor.alive && dead_pids.include?(metrics_server_pid) if supervisor.alive && dead_pids.include?(metrics_server_pid)
@logger.info('Metrics server terminated, restarting...') @logger.info('Sidekiq metrics server terminated, restarting...')
metrics_server_pid = restart_metrics_server(wipe_metrics_dir: false) metrics_server_pid = restart_metrics_server(wipe_metrics_dir: false)
all_pids = worker_pids + Array(metrics_server_pid) all_pids = worker_pids + Array(metrics_server_pid)
else else
......
...@@ -16,6 +16,9 @@ module Gitlab ...@@ -16,6 +16,9 @@ module Gitlab
# before we kill the process. # before we kill the process.
TIMEOUT_GRACE_PERIOD_SECONDS = 5 TIMEOUT_GRACE_PERIOD_SECONDS = 5
# The singleton instance used to supervise cluster processes.
SidekiqProcessSupervisor = Class.new(Gitlab::ProcessSupervisor)
# Starts Sidekiq workers for the pairs of processes. # Starts Sidekiq workers for the pairs of processes.
# #
# Example: # Example:
......
...@@ -40,6 +40,8 @@ RSpec.describe Gitlab::SidekiqCluster::CLI, stub_settings_source: true do # rubo ...@@ -40,6 +40,8 @@ RSpec.describe Gitlab::SidekiqCluster::CLI, stub_settings_source: true do # rubo
} }
end end
let(:supervisor) { instance_double(Gitlab::SidekiqCluster::SidekiqProcessSupervisor) }
before do before do
stub_env('RAILS_ENV', 'test') stub_env('RAILS_ENV', 'test')
...@@ -47,8 +49,11 @@ RSpec.describe Gitlab::SidekiqCluster::CLI, stub_settings_source: true do # rubo ...@@ -47,8 +49,11 @@ RSpec.describe Gitlab::SidekiqCluster::CLI, stub_settings_source: true do # rubo
config_file.close config_file.close
allow(::Settings).to receive(:source).and_return(config_file.path) allow(::Settings).to receive(:source).and_return(config_file.path)
::Settings.reload! ::Settings.reload!
allow(Gitlab::ProcessManagement).to receive(:write_pid)
allow(Gitlab::SidekiqCluster::SidekiqProcessSupervisor).to receive(:instance).and_return(supervisor)
allow(supervisor).to receive(:supervise)
end end
after do after do
...@@ -63,11 +68,6 @@ RSpec.describe Gitlab::SidekiqCluster::CLI, stub_settings_source: true do # rubo ...@@ -63,11 +68,6 @@ RSpec.describe Gitlab::SidekiqCluster::CLI, stub_settings_source: true do # rubo
end end
context 'with arguments' do context 'with arguments' do
before do
allow(Gitlab::ProcessManagement).to receive(:write_pid)
allow_next_instance_of(Gitlab::ProcessSupervisor) { |it| allow(it).to receive(:supervise) }
end
it 'starts the Sidekiq workers' do it 'starts the Sidekiq workers' do
expect(Gitlab::SidekiqCluster).to receive(:start) expect(Gitlab::SidekiqCluster).to receive(:start)
.with([['foo']], default_options) .with([['foo']], default_options)
...@@ -298,8 +298,6 @@ RSpec.describe Gitlab::SidekiqCluster::CLI, stub_settings_source: true do # rubo ...@@ -298,8 +298,6 @@ RSpec.describe Gitlab::SidekiqCluster::CLI, stub_settings_source: true do # rubo
context 'without --dryrun' do context 'without --dryrun' do
before do before do
allow(Gitlab::SidekiqCluster).to receive(:start).and_return([]) allow(Gitlab::SidekiqCluster).to receive(:start).and_return([])
allow(Gitlab::ProcessManagement).to receive(:write_pid)
allow_next_instance_of(Gitlab::ProcessSupervisor) { |it| allow(it).to receive(:supervise) }
end end
context 'when there are no sidekiq_health_checks settings set' do context 'when there are no sidekiq_health_checks settings set' do
...@@ -429,14 +427,11 @@ RSpec.describe Gitlab::SidekiqCluster::CLI, stub_settings_source: true do # rubo ...@@ -429,14 +427,11 @@ RSpec.describe Gitlab::SidekiqCluster::CLI, stub_settings_source: true do # rubo
before do before do
allow(Gitlab::SidekiqCluster).to receive(:start).and_return(sidekiq_worker_pids) allow(Gitlab::SidekiqCluster).to receive(:start).and_return(sidekiq_worker_pids)
allow(Gitlab::ProcessManagement).to receive(:write_pid)
end end
it 'stops the entire process cluster if one of the workers has been terminated' do it 'stops the entire process cluster if one of the workers has been terminated' do
allow_next_instance_of(Gitlab::ProcessSupervisor) do |it| expect(supervisor).to receive(:alive).and_return(true)
allow(it).to receive(:supervise).and_yield([2]) expect(supervisor).to receive(:supervise).and_yield([2])
end
expect(MetricsServer).to receive(:fork).once.and_return(metrics_server_pid) expect(MetricsServer).to receive(:fork).once.and_return(metrics_server_pid)
expect(Gitlab::ProcessManagement).to receive(:signal_processes).with([42, 99], :TERM) expect(Gitlab::ProcessManagement).to receive(:signal_processes).with([42, 99], :TERM)
...@@ -445,11 +440,8 @@ RSpec.describe Gitlab::SidekiqCluster::CLI, stub_settings_source: true do # rubo ...@@ -445,11 +440,8 @@ RSpec.describe Gitlab::SidekiqCluster::CLI, stub_settings_source: true do # rubo
context 'when the supervisor is alive' do context 'when the supervisor is alive' do
it 'restarts the metrics server when it is down' do it 'restarts the metrics server when it is down' do
allow_next_instance_of(Gitlab::ProcessSupervisor) do |it| expect(supervisor).to receive(:alive).and_return(true)
allow(it).to receive(:alive).and_return(true) expect(supervisor).to receive(:supervise).and_yield([metrics_server_pid])
allow(it).to receive(:supervise).and_yield([metrics_server_pid])
end
expect(MetricsServer).to receive(:fork).twice.and_return(metrics_server_pid) expect(MetricsServer).to receive(:fork).twice.and_return(metrics_server_pid)
cli.run(%w(foo)) cli.run(%w(foo))
...@@ -458,11 +450,8 @@ RSpec.describe Gitlab::SidekiqCluster::CLI, stub_settings_source: true do # rubo ...@@ -458,11 +450,8 @@ RSpec.describe Gitlab::SidekiqCluster::CLI, stub_settings_source: true do # rubo
context 'when the supervisor is shutting down' do context 'when the supervisor is shutting down' do
it 'does not restart the metrics server' do it 'does not restart the metrics server' do
allow_next_instance_of(Gitlab::ProcessSupervisor) do |it| expect(supervisor).to receive(:alive).and_return(false)
allow(it).to receive(:alive).and_return(false) expect(supervisor).to receive(:supervise).and_yield([metrics_server_pid])
allow(it).to receive(:supervise).and_yield([metrics_server_pid])
end
expect(MetricsServer).to receive(:fork).once.and_return(metrics_server_pid) expect(MetricsServer).to receive(:fork).once.and_return(metrics_server_pid)
cli.run(%w(foo)) cli.run(%w(foo))
......
...@@ -6,7 +6,9 @@ RSpec.describe Gitlab::ProcessSupervisor do ...@@ -6,7 +6,9 @@ RSpec.describe Gitlab::ProcessSupervisor do
let(:health_check_interval_seconds) { 0.1 } let(:health_check_interval_seconds) { 0.1 }
let(:check_terminate_interval_seconds) { 1 } let(:check_terminate_interval_seconds) { 1 }
let(:forwarded_signals) { [] } let(:forwarded_signals) { [] }
let(:process_id) do let(:process_ids) { [spawn_process, spawn_process] }
def spawn_process
Process.spawn('while true; do sleep 1; done').tap do |pid| Process.spawn('while true; do sleep 1; done').tap do |pid|
Process.detach(pid) Process.detach(pid)
end end
...@@ -22,54 +24,52 @@ RSpec.describe Gitlab::ProcessSupervisor do ...@@ -22,54 +24,52 @@ RSpec.describe Gitlab::ProcessSupervisor do
end end
after do after do
if Gitlab::ProcessManagement.process_alive?(process_id) process_ids.each do |pid|
Process.kill('KILL', process_id) Process.kill('KILL', pid)
rescue Errno::ESRCH
# Ignore if a process wasn't actually alive.
end end
end end
describe '#supervise' do describe '#supervise' do
context 'while supervised process is alive' do context 'while supervised processes are alive' do
it 'does not invoke callback' do it 'does not invoke callback' do
expect(Gitlab::ProcessManagement.process_alive?(process_id)).to be(true) expect(Gitlab::ProcessManagement.all_alive?(process_ids)).to be(true)
pids_killed = [] pids_killed = []
thread = Thread.new do supervisor.supervise(process_ids) do |dead_pids|
supervisor.supervise(process_id) do |dead_pids| pids_killed = dead_pids
pids_killed = dead_pids []
[]
end
end end
# Wait several times the poll frequency of the supervisor. # Wait several times the poll frequency of the supervisor.
sleep health_check_interval_seconds * 10 sleep health_check_interval_seconds * 10
thread.terminate
expect(pids_killed).to be_empty expect(pids_killed).to be_empty
expect(Gitlab::ProcessManagement.process_alive?(process_id)).to be(true) expect(Gitlab::ProcessManagement.all_alive?(process_ids)).to be(true)
end end
end end
context 'when supervised process dies' do context 'when a supervised process dies' do
it 'triggers callback with the dead PIDs' do it 'triggers callback with the dead PIDs and adds new PIDs to supervised PIDs' do
expect(Gitlab::ProcessManagement.process_alive?(process_id)).to be(true) expect(Gitlab::ProcessManagement.all_alive?(process_ids)).to be(true)
pids_killed = [] pids_killed = []
thread = Thread.new do supervisor.supervise(process_ids) do |dead_pids|
supervisor.supervise(process_id) do |dead_pids| pids_killed = dead_pids
pids_killed = dead_pids [42] # Fake starting a new process in place of the terminated one.
[]
end
end end
# Terminate the supervised process. # Terminate the supervised process.
Process.kill('TERM', process_id) Process.kill('TERM', process_ids.first)
await_condition(sleep_sec: health_check_interval_seconds) do await_condition(sleep_sec: health_check_interval_seconds) do
pids_killed == [process_id] pids_killed == [process_ids.first]
end end
thread.terminate
expect(Gitlab::ProcessManagement.process_alive?(process_id)).to be(false) expect(Gitlab::ProcessManagement.process_alive?(process_ids.first)).to be(false)
expect(Gitlab::ProcessManagement.process_alive?(process_ids.last)).to be(true)
expect(supervisor.supervised_pids).to match_array([process_ids.last, 42])
end end
end end
...@@ -78,7 +78,7 @@ RSpec.describe Gitlab::ProcessSupervisor do ...@@ -78,7 +78,7 @@ RSpec.describe Gitlab::ProcessSupervisor do
allow(supervisor).to receive(:sleep) allow(supervisor).to receive(:sleep)
allow(Gitlab::ProcessManagement).to receive(:trap_signals) allow(Gitlab::ProcessManagement).to receive(:trap_signals)
allow(Gitlab::ProcessManagement).to receive(:all_alive?).and_return(false) allow(Gitlab::ProcessManagement).to receive(:all_alive?).and_return(false)
allow(Gitlab::ProcessManagement).to receive(:signal_processes).with([process_id], anything) allow(Gitlab::ProcessManagement).to receive(:signal_processes).with(process_ids, anything)
end end
context 'termination signals' do context 'termination signals' do
...@@ -87,21 +87,21 @@ RSpec.describe Gitlab::ProcessSupervisor do ...@@ -87,21 +87,21 @@ RSpec.describe Gitlab::ProcessSupervisor do
allow(Gitlab::ProcessManagement).to receive(:any_alive?).and_return(false) allow(Gitlab::ProcessManagement).to receive(:any_alive?).and_return(false)
expect(Gitlab::ProcessManagement).to receive(:trap_signals).ordered.with(%i(INT TERM)).and_yield(:TERM) expect(Gitlab::ProcessManagement).to receive(:trap_signals).ordered.with(%i(INT TERM)).and_yield(:TERM)
expect(Gitlab::ProcessManagement).to receive(:signal_processes).ordered.with([process_id], :TERM) expect(Gitlab::ProcessManagement).to receive(:signal_processes).ordered.with(process_ids, :TERM)
expect(supervisor).not_to receive(:sleep).with(check_terminate_interval_seconds) expect(supervisor).not_to receive(:sleep).with(check_terminate_interval_seconds)
supervisor.supervise(process_id) { [] } supervisor.supervise(process_ids) { [] }
end end
end end
context 'when TERM does not result in timely shutdown of processes' do context 'when TERM does not result in timely shutdown of processes' do
it 'issues a KILL signal after the grace period expires' do it 'issues a KILL signal after the grace period expires' do
expect(Gitlab::ProcessManagement).to receive(:trap_signals).with(%i(INT TERM)).and_yield(:TERM) expect(Gitlab::ProcessManagement).to receive(:trap_signals).with(%i(INT TERM)).and_yield(:TERM)
expect(Gitlab::ProcessManagement).to receive(:signal_processes).ordered.with([process_id], :TERM) expect(Gitlab::ProcessManagement).to receive(:signal_processes).ordered.with(process_ids, :TERM)
expect(supervisor).to receive(:sleep).ordered.with(check_terminate_interval_seconds).at_least(:once) expect(supervisor).to receive(:sleep).ordered.with(check_terminate_interval_seconds).at_least(:once)
expect(Gitlab::ProcessManagement).to receive(:signal_processes).ordered.with([process_id], '-KILL') expect(Gitlab::ProcessManagement).to receive(:signal_processes).ordered.with(process_ids, '-KILL')
supervisor.supervise(process_id) { [] } supervisor.supervise(process_ids) { [] }
end end
end end
end end
...@@ -111,10 +111,53 @@ RSpec.describe Gitlab::ProcessSupervisor do ...@@ -111,10 +111,53 @@ RSpec.describe Gitlab::ProcessSupervisor do
it 'forwards given signals to the observed processes' do it 'forwards given signals to the observed processes' do
expect(Gitlab::ProcessManagement).to receive(:trap_signals).with(%i(USR1)).and_yield(:USR1) expect(Gitlab::ProcessManagement).to receive(:trap_signals).with(%i(USR1)).and_yield(:USR1)
expect(Gitlab::ProcessManagement).to receive(:signal_processes).ordered.with([process_id], :USR1) expect(Gitlab::ProcessManagement).to receive(:signal_processes).ordered.with(process_ids, :USR1)
supervisor.supervise(process_ids) { [] }
end
end
end
end
describe '#shutdown' do
context 'when supervisor is supervising processes' do
before do
supervisor.supervise(process_ids)
end
context 'when supervisor is alive' do
it 'signals TERM then KILL to all supervised processes' do
expect(Gitlab::ProcessManagement).to receive(:signal_processes).ordered.with(process_ids, :TERM)
expect(Gitlab::ProcessManagement).to receive(:signal_processes).ordered.with(process_ids, '-KILL')
supervisor.supervise(process_id) { [] } supervisor.shutdown
end end
it 'stops the supervisor' do
expect { supervisor.shutdown }.to change { supervisor.alive }.from(true).to(false)
end
end
context 'when supervisor has already shut down' do
before do
supervisor.shutdown
end
it 'does nothing' do
expect(supervisor.alive).to be(false)
expect(Gitlab::ProcessManagement).not_to receive(:signal_processes)
supervisor.shutdown
end
end
end
context 'when supervisor never started' do
it 'does nothing' do
expect(supervisor.alive).to be(false)
expect(Gitlab::ProcessManagement).not_to receive(:signal_processes)
supervisor.shutdown
end end
end end
end end
......
# frozen_string_literal: true # frozen_string_literal: true
require 'fast_spec_helper' require 'spec_helper'
require_relative '../../metrics_server/metrics_server' require_relative '../../metrics_server/metrics_server'
require_relative '../support/helpers/next_instance_of'
RSpec.describe MetricsServer do # rubocop:disable RSpec/FilePath RSpec.describe MetricsServer do # rubocop:disable RSpec/FilePath
include NextInstanceOf
let(:prometheus_config) { ::Prometheus::Client.configuration } let(:prometheus_config) { ::Prometheus::Client.configuration }
let(:metrics_dir) { Dir.mktmpdir } let(:metrics_dir) { Dir.mktmpdir }
...@@ -205,4 +202,47 @@ RSpec.describe MetricsServer do # rubocop:disable RSpec/FilePath ...@@ -205,4 +202,47 @@ RSpec.describe MetricsServer do # rubocop:disable RSpec/FilePath
it_behaves_like 'a metrics exporter', 'sidekiq', 'sidekiq_exporter' it_behaves_like 'a metrics exporter', 'sidekiq', 'sidekiq_exporter'
end end
describe '.start_for_puma' do
let(:supervisor) { instance_double(Gitlab::ProcessSupervisor) }
before do
allow(Gitlab::ProcessSupervisor).to receive(:instance).and_return(supervisor)
end
it 'spawns a server process and supervises it' do
expect(Process).to receive(:spawn).with(
include('METRICS_SERVER_TARGET' => 'puma'), end_with('bin/metrics-server'), anything
).once.and_return(42)
expect(supervisor).to receive(:supervise).with(42)
described_class.start_for_puma
end
context 'when the supervisor callback is invoked' do
context 'and the supervisor is alive' do
it 'restarts the metrics server' do
expect(supervisor).to receive(:alive).and_return(true)
expect(supervisor).to receive(:supervise).and_yield
expect(Process).to receive(:spawn).with(
include('METRICS_SERVER_TARGET' => 'puma'), end_with('bin/metrics-server'), anything
).twice.and_return(42)
described_class.start_for_puma
end
end
context 'and the supervisor is not alive' do
it 'does not restart the server' do
expect(supervisor).to receive(:alive).and_return(false)
expect(supervisor).to receive(:supervise).and_yield
expect(Process).to receive(:spawn).with(
include('METRICS_SERVER_TARGET' => 'puma'), end_with('bin/metrics-server'), anything
).once.and_return(42)
described_class.start_for_puma
end
end
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment