Commit 921fd6ab authored by James Fargher's avatar James Fargher

Merge branch '347199-fork-into-metrics-server' into 'master'

Sidekiq: Fork into metrics-server instead of `exec`ing

See merge request gitlab-org/gitlab!76228
parents 449a370b f50f508b
...@@ -3,14 +3,10 @@ ...@@ -3,14 +3,10 @@
require_relative '../metrics_server/metrics_server' require_relative '../metrics_server/metrics_server'
begin target = ENV['METRICS_SERVER_TARGET']
target = ENV['METRICS_SERVER_TARGET'] raise "METRICS_SERVER_TARGET cannot be blank" if target.blank?
raise "Required: METRICS_SERVER_TARGET=[sidekiq]" unless target == 'sidekiq'
metrics_dir = ENV["prometheus_multiproc_dir"] || File.absolute_path("tmp/prometheus_multiproc_dir/#{target}") metrics_dir = ENV["prometheus_multiproc_dir"] || File.absolute_path("tmp/prometheus_multiproc_dir/#{target}")
wipe_metrics_dir = Gitlab::Utils.to_boolean(ENV['WIPE_METRICS_DIR']) || false wipe_metrics_dir = Gitlab::Utils.to_boolean(ENV['WIPE_METRICS_DIR']) || false
# Re-raise exceptions in threads on the main thread. Process.wait(MetricsServer.spawn(target, metrics_dir: metrics_dir, wipe_metrics_dir: wipe_metrics_dir))
Thread.abort_on_exception = true
MetricsServer.new(target, metrics_dir, wipe_metrics_dir).start
end
...@@ -2,12 +2,6 @@ ...@@ -2,12 +2,6 @@
module Gitlab module Gitlab
module ProcessManagement module ProcessManagement
# The signals that should terminate both the master and workers.
TERMINATE_SIGNALS = %i(INT TERM).freeze
# The signals that should simply be forwarded to the workers.
FORWARD_SIGNALS = %i(TTIN USR1 USR2 HUP).freeze
# Traps the given signals and yields the block whenever these signals are # Traps the given signals and yields the block whenever these signals are
# received. # received.
# #
...@@ -26,12 +20,13 @@ module Gitlab ...@@ -26,12 +20,13 @@ module Gitlab
end end
end end
def self.trap_terminate(&block) # Traps the given signals with the given command.
trap_signals(TERMINATE_SIGNALS, &block) #
end # Example:
#
def self.trap_forward(&block) # modify_signals(%i(HUP TERM), 'DEFAULT')
trap_signals(FORWARD_SIGNALS, &block) def self.modify_signals(signals, command)
signals.each { |signal| trap(signal, command) }
end end
def self.signal(pid, signal) def self.signal(pid, signal)
......
...@@ -22,5 +22,6 @@ require_relative '../lib/gitlab/metrics/exporter/base_exporter' ...@@ -22,5 +22,6 @@ require_relative '../lib/gitlab/metrics/exporter/base_exporter'
require_relative '../lib/gitlab/metrics/exporter/sidekiq_exporter' require_relative '../lib/gitlab/metrics/exporter/sidekiq_exporter'
require_relative '../lib/gitlab/health_checks/probes/collection' require_relative '../lib/gitlab/health_checks/probes/collection'
require_relative '../lib/gitlab/health_checks/probes/status' require_relative '../lib/gitlab/health_checks/probes/status'
require_relative '../lib/gitlab/process_management'
# rubocop:enable Naming/FileName # rubocop:enable Naming/FileName
...@@ -6,17 +6,28 @@ require_relative 'dependencies' ...@@ -6,17 +6,28 @@ require_relative 'dependencies'
class MetricsServer # rubocop:disable Gitlab/NamespacedClass class MetricsServer # rubocop:disable Gitlab/NamespacedClass
class << self class << self
def spawn(target, gitlab_config: nil, wipe_metrics_dir: false) def spawn(target, metrics_dir:, wipe_metrics_dir: false, trapped_signals: [])
cmd = "#{Rails.root}/bin/metrics-server" raise "The only valid target is 'sidekiq' currently" unless target == 'sidekiq'
env = {
'METRICS_SERVER_TARGET' => target, pid = Process.fork
'GITLAB_CONFIG' => gitlab_config,
'WIPE_METRICS_DIR' => wipe_metrics_dir.to_s if pid.nil? # nil means we're inside the fork
} # Remove any custom signal handlers the parent process had registered, since we do
# not want to inherit them, and Ruby forks with a `clone` that has the `CLONE_SIGHAND`
Process.spawn(env, cmd, err: $stderr, out: $stdout).tap do |pid| # flag set.
Gitlab::ProcessManagement.modify_signals(trapped_signals, 'DEFAULT')
server = MetricsServer.new(target, metrics_dir, wipe_metrics_dir)
# This rewrites /proc/cmdline, since otherwise tools like `top` will show the
# parent process `cmdline` which is really confusing.
$0 = server.name
server.start
else
Process.detach(pid) Process.detach(pid)
end end
pid
end end
end end
...@@ -34,10 +45,15 @@ class MetricsServer # rubocop:disable Gitlab/NamespacedClass ...@@ -34,10 +45,15 @@ class MetricsServer # rubocop:disable Gitlab/NamespacedClass
FileUtils.mkdir_p(@metrics_dir, mode: 0700) FileUtils.mkdir_p(@metrics_dir, mode: 0700)
::Prometheus::CleanupMultiprocDirService.new.execute if @wipe_metrics_dir ::Prometheus::CleanupMultiprocDirService.new.execute if @wipe_metrics_dir
settings = Settings.monitoring.sidekiq_exporter settings = Settings.new(Settings.monitoring[name])
exporter_class = "Gitlab::Metrics::Exporter::#{@target.camelize}Exporter".constantize exporter_class = "Gitlab::Metrics::Exporter::#{@target.camelize}Exporter".constantize
server = exporter_class.instance(settings, synchronous: true) server = exporter_class.instance(settings, synchronous: true)
server.start server.start
end end
def name
"#{@target}_exporter"
end
end end
...@@ -20,6 +20,14 @@ require_relative 'sidekiq_cluster' ...@@ -20,6 +20,14 @@ require_relative 'sidekiq_cluster'
module Gitlab module Gitlab
module SidekiqCluster module SidekiqCluster
class CLI class CLI
THREAD_NAME = 'supervisor'
# The signals that should terminate both the master and workers.
TERMINATE_SIGNALS = %i(INT TERM).freeze
# The signals that should simply be forwarded to the workers.
FORWARD_SIGNALS = %i(TTIN USR1 USR2 HUP).freeze
CommandError = Class.new(StandardError) CommandError = Class.new(StandardError)
def initialize(log_output = $stderr) def initialize(log_output = $stderr)
...@@ -27,6 +35,7 @@ module Gitlab ...@@ -27,6 +35,7 @@ module Gitlab
@max_concurrency = 50 @max_concurrency = 50
@min_concurrency = 0 @min_concurrency = 0
@environment = ENV['RAILS_ENV'] || 'development' @environment = ENV['RAILS_ENV'] || 'development'
@metrics_dir = ENV["prometheus_multiproc_dir"] || File.absolute_path("tmp/prometheus_multiproc_dir/sidekiq")
@pid = nil @pid = nil
@interval = 5 @interval = 5
@alive = true @alive = true
...@@ -39,6 +48,8 @@ module Gitlab ...@@ -39,6 +48,8 @@ module Gitlab
end end
def run(argv = ARGV) def run(argv = ARGV)
Thread.current.name = THREAD_NAME
if argv.empty? if argv.empty?
raise CommandError, raise CommandError,
'You must specify at least one queue to start a worker for' 'You must specify at least one queue to start a worker for'
...@@ -144,13 +155,13 @@ module Gitlab ...@@ -144,13 +155,13 @@ module Gitlab
end end
def trap_signals def trap_signals
ProcessManagement.trap_terminate do |signal| ProcessManagement.trap_signals(TERMINATE_SIGNALS) do |signal|
@alive = false @alive = false
ProcessManagement.signal_processes(@processes, signal) ProcessManagement.signal_processes(@processes, signal)
wait_for_termination wait_for_termination
end end
ProcessManagement.trap_forward do |signal| ProcessManagement.trap_signals(FORWARD_SIGNALS) do |signal|
ProcessManagement.signal_processes(@processes, signal) ProcessManagement.signal_processes(@processes, signal)
end end
end end
...@@ -180,7 +191,12 @@ module Gitlab ...@@ -180,7 +191,12 @@ module Gitlab
return unless metrics_server_enabled? return unless metrics_server_enabled?
@logger.info("Starting metrics server on port #{sidekiq_exporter_port}") @logger.info("Starting metrics server on port #{sidekiq_exporter_port}")
@metrics_server_pid = MetricsServer.spawn('sidekiq', wipe_metrics_dir: wipe_metrics_dir) @metrics_server_pid = MetricsServer.spawn(
'sidekiq',
metrics_dir: @metrics_dir,
wipe_metrics_dir: wipe_metrics_dir,
trapped_signals: TERMINATE_SIGNALS + FORWARD_SIGNALS
)
end end
def sidekiq_exporter_enabled? def sidekiq_exporter_enabled?
......
...@@ -29,17 +29,27 @@ RSpec.describe 'bin/metrics-server', :aggregate_failures do ...@@ -29,17 +29,27 @@ RSpec.describe 'bin/metrics-server', :aggregate_failures do
config_file.write(YAML.dump(config)) config_file.write(YAML.dump(config))
config_file.close config_file.close
@pid = MetricsServer.spawn('sidekiq', gitlab_config: config_file.path, wipe_metrics_dir: true)
env = {
'GITLAB_CONFIG' => config_file.path,
'METRICS_SERVER_TARGET' => 'sidekiq',
'WIPE_METRICS_DIR' => '1'
}
@pid = Process.spawn(env, 'bin/metrics-server', pgroup: true)
end end
after do after do
webmock_enable! webmock_enable!
if @pid if @pid
pgrp = Process.getpgid(@pid)
Timeout.timeout(5) do Timeout.timeout(5) do
Process.kill('TERM', @pid) Process.kill('TERM', -pgrp)
Process.waitpid(@pid) Process.waitpid(@pid)
end end
expect(Gitlab::ProcessManagement.process_alive?(@pid)).to be(false)
end end
rescue Errno::ESRCH => _ rescue Errno::ESRCH => _
# 'No such process' means the process died before # 'No such process' means the process died before
......
...@@ -258,6 +258,17 @@ RSpec.describe Gitlab::SidekiqCluster::CLI do # rubocop:disable RSpec/FilePath ...@@ -258,6 +258,17 @@ RSpec.describe Gitlab::SidekiqCluster::CLI do # rubocop:disable RSpec/FilePath
end end
context 'metrics server' do context 'metrics server' do
let(:trapped_signals) { described_class::TERMINATE_SIGNALS + described_class::FORWARD_SIGNALS }
let(:metrics_dir) { Dir.mktmpdir }
before do
stub_env('prometheus_multiproc_dir', metrics_dir)
end
after do
FileUtils.rm_rf(metrics_dir, secure: true)
end
context 'starting the server' do context 'starting the server' do
context 'without --dryrun' do context 'without --dryrun' do
context 'when there are no sidekiq_health_checks settings set' do context 'when there are no sidekiq_health_checks settings set' do
...@@ -342,31 +353,33 @@ RSpec.describe Gitlab::SidekiqCluster::CLI do # rubocop:disable RSpec/FilePath ...@@ -342,31 +353,33 @@ RSpec.describe Gitlab::SidekiqCluster::CLI do # rubocop:disable RSpec/FilePath
end end
end end
using RSpec::Parameterized::TableSyntax context 'with valid settings' do
using RSpec::Parameterized::TableSyntax
where(:sidekiq_exporter_enabled, :sidekiq_exporter_port, :sidekiq_health_checks_port, :start_metrics_server) do
true | '3807' | '3907' | true
true | '3807' | '3807' | false
false | '3807' | '3907' | false
false | '3807' | '3907' | false
end
with_them do where(:sidekiq_exporter_enabled, :sidekiq_exporter_port, :sidekiq_health_checks_port, :start_metrics_server) do
before do true | '3807' | '3907' | true
allow(Gitlab::SidekiqCluster).to receive(:start) true | '3807' | '3807' | false
allow(cli).to receive(:write_pid) false | '3807' | '3907' | false
allow(cli).to receive(:trap_signals) false | '3807' | '3907' | false
allow(cli).to receive(:start_loop)
end end
specify do with_them do
if start_metrics_server before do
expect(MetricsServer).to receive(:spawn).with('sidekiq', wipe_metrics_dir: true) allow(Gitlab::SidekiqCluster).to receive(:start)
else allow(cli).to receive(:write_pid)
expect(MetricsServer).not_to receive(:spawn) allow(cli).to receive(:trap_signals)
allow(cli).to receive(:start_loop)
end end
cli.run(%w(foo)) specify do
if start_metrics_server
expect(MetricsServer).to receive(:spawn).with('sidekiq', metrics_dir: metrics_dir, wipe_metrics_dir: true, trapped_signals: trapped_signals)
else
expect(MetricsServer).not_to receive(:spawn)
end
cli.run(%w(foo))
end
end end
end end
end end
...@@ -388,7 +401,7 @@ RSpec.describe Gitlab::SidekiqCluster::CLI do # rubocop:disable RSpec/FilePath ...@@ -388,7 +401,7 @@ RSpec.describe Gitlab::SidekiqCluster::CLI do # rubocop:disable RSpec/FilePath
before do before do
allow(cli).to receive(:sleep).with(a_kind_of(Numeric)) allow(cli).to receive(:sleep).with(a_kind_of(Numeric))
allow(MetricsServer).to receive(:spawn).with('sidekiq', wipe_metrics_dir: false).and_return(99) allow(MetricsServer).to receive(:spawn).and_return(99)
cli.start_metrics_server cli.start_metrics_server
end end
...@@ -407,7 +420,9 @@ RSpec.describe Gitlab::SidekiqCluster::CLI do # rubocop:disable RSpec/FilePath ...@@ -407,7 +420,9 @@ RSpec.describe Gitlab::SidekiqCluster::CLI do # rubocop:disable RSpec/FilePath
allow(Gitlab::ProcessManagement).to receive(:all_alive?).with(an_instance_of(Array)).and_return(false) allow(Gitlab::ProcessManagement).to receive(:all_alive?).with(an_instance_of(Array)).and_return(false)
allow(cli).to receive(:stop_metrics_server) allow(cli).to receive(:stop_metrics_server)
expect(MetricsServer).to receive(:spawn).with('sidekiq', wipe_metrics_dir: false) expect(MetricsServer).to receive(:spawn).with(
'sidekiq', metrics_dir: metrics_dir, wipe_metrics_dir: false, trapped_signals: trapped_signals
)
cli.start_loop cli.start_loop
end end
...@@ -484,9 +499,9 @@ RSpec.describe Gitlab::SidekiqCluster::CLI do # rubocop:disable RSpec/FilePath ...@@ -484,9 +499,9 @@ RSpec.describe Gitlab::SidekiqCluster::CLI do # rubocop:disable RSpec/FilePath
end end
describe '#trap_signals' do describe '#trap_signals' do
it 'traps the termination and forwarding signals' do it 'traps termination and sidekiq specific signals' do
expect(Gitlab::ProcessManagement).to receive(:trap_terminate) expect(Gitlab::ProcessManagement).to receive(:trap_signals).with(%i[INT TERM])
expect(Gitlab::ProcessManagement).to receive(:trap_forward) expect(Gitlab::ProcessManagement).to receive(:trap_signals).with(%i[TTIN USR1 USR2 HUP])
cli.trap_signals cli.trap_signals
end end
......
...@@ -12,21 +12,12 @@ RSpec.describe Gitlab::ProcessManagement do ...@@ -12,21 +12,12 @@ RSpec.describe Gitlab::ProcessManagement do
end end
end end
describe '.trap_terminate' do describe '.modify_signals' do
it 'traps the termination signals' do it 'traps the given signals with the given command' do
expect(described_class).to receive(:trap_signals) expect(described_class).to receive(:trap).ordered.with(:INT, 'DEFAULT')
.with(described_class::TERMINATE_SIGNALS) expect(described_class).to receive(:trap).ordered.with(:HUP, 'DEFAULT')
described_class.trap_terminate { } described_class.modify_signals(%i(INT HUP), 'DEFAULT')
end
end
describe '.trap_forward' do
it 'traps the signals to forward' do
expect(described_class).to receive(:trap_signals)
.with(described_class::FORWARD_SIGNALS)
described_class.trap_forward { }
end end
end end
......
...@@ -8,52 +8,67 @@ require_relative '../support/helpers/next_instance_of' ...@@ -8,52 +8,67 @@ require_relative '../support/helpers/next_instance_of'
RSpec.describe MetricsServer do # rubocop:disable RSpec/FilePath RSpec.describe MetricsServer do # rubocop:disable RSpec/FilePath
include NextInstanceOf include NextInstanceOf
before do
# We do not want this to have knock-on effects on the test process.
allow(Gitlab::ProcessManagement).to receive(:modify_signals)
end
describe '.spawn' do describe '.spawn' do
let(:env) do context 'when in parent process' do
{ it 'forks into a new process and detaches it' do
'METRICS_SERVER_TARGET' => 'sidekiq', expect(Process).to receive(:fork).and_return(99)
'GITLAB_CONFIG' => nil, expect(Process).to receive(:detach).with(99)
'WIPE_METRICS_DIR' => 'false'
} described_class.spawn('sidekiq', metrics_dir: 'path/to/metrics')
end
end end
it 'spawns a process with the correct environment variables and detaches it' do context 'when in child process' do
expect(Process).to receive(:spawn).with(env, anything, err: $stderr, out: $stdout).and_return(99) before do
expect(Process).to receive(:detach).with(99) # This signals the process that it's "inside" the fork
expect(Process).to receive(:fork).and_return(nil)
expect(Process).not_to receive(:detach)
end
it 'starts the metrics server with the given arguments' do
expect_next_instance_of(MetricsServer) do |server|
expect(server).to receive(:start)
end
described_class.spawn('sidekiq', metrics_dir: 'path/to/metrics')
end
it 'resets signal handlers from parent process' do
expect(Gitlab::ProcessManagement).to receive(:modify_signals).with(%i[A B], 'DEFAULT')
described_class.spawn('sidekiq') described_class.spawn('sidekiq', metrics_dir: 'path/to/metrics', trapped_signals: %i[A B])
end
end end
end end
describe '#start' do describe '#start' do
let(:exporter_class) { Class.new(Gitlab::Metrics::Exporter::BaseExporter) } let(:exporter_class) { Class.new(Gitlab::Metrics::Exporter::BaseExporter) }
let(:exporter_double) { double('fake_exporter', start: true) } let(:exporter_double) { double('fake_exporter', start: true) }
let(:prometheus_client_double) { double(::Prometheus::Client) } let(:prometheus_config) { ::Prometheus::Client.configuration }
let(:prometheus_config) { ::Prometheus::Client::Configuration.new }
let(:metrics_dir) { Dir.mktmpdir } let(:metrics_dir) { Dir.mktmpdir }
let(:settings_double) { double(:settings, sidekiq_exporter: {}) } let(:settings) { { "fake_exporter" => { "enabled" => true } } }
let!(:old_metrics_dir) { ::Prometheus::Client.configuration.multiprocess_files_dir } let!(:old_metrics_dir) { prometheus_config.multiprocess_files_dir }
subject(:metrics_server) { described_class.new('fake', metrics_dir, true)} subject(:metrics_server) { described_class.new('fake', metrics_dir, true)}
before do before do
stub_env('prometheus_multiproc_dir', metrics_dir)
stub_const('Gitlab::Metrics::Exporter::FakeExporter', exporter_class) stub_const('Gitlab::Metrics::Exporter::FakeExporter', exporter_class)
allow(exporter_class).to receive(:instance).with({}, synchronous: true).and_return(exporter_double) expect(exporter_class).to receive(:instance).with(settings['fake_exporter'], synchronous: true).and_return(exporter_double)
allow(Settings).to receive(:monitoring).and_return(settings_double) expect(Settings).to receive(:monitoring).and_return(settings)
end end
after do after do
Gitlab::Metrics.reset_registry! Gitlab::Metrics.reset_registry!
FileUtils.rm_rf(metrics_dir, secure: true)
::Prometheus::CleanupMultiprocDirService.new.execute prometheus_config.multiprocess_files_dir = old_metrics_dir
Dir.rmdir(metrics_dir)
::Prometheus::Client.configuration.multiprocess_files_dir = old_metrics_dir
end end
it 'configures ::Prometheus::Client' do it 'configures ::Prometheus::Client' do
allow(prometheus_client_double).to receive(:configuration).and_return(prometheus_config)
metrics_server.start metrics_server.start
expect(prometheus_config.multiprocess_files_dir).to eq metrics_dir expect(prometheus_config.multiprocess_files_dir).to eq metrics_dir
...@@ -90,12 +105,5 @@ RSpec.describe MetricsServer do # rubocop:disable RSpec/FilePath ...@@ -90,12 +105,5 @@ RSpec.describe MetricsServer do # rubocop:disable RSpec/FilePath
metrics_server.start metrics_server.start
end end
it 'sends the correct Settings to the exporter instance' do
expect(Settings).to receive(:monitoring).and_return(settings_double)
expect(settings_double).to receive(:sidekiq_exporter)
metrics_server.start
end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment