Commit 887cab1a authored by Oswaldo Ferreira's avatar Oswaldo Ferreira

Support graceful timeout for Sidekiq Cluster processes

Considering we're moving towards using Sidekiq Cluster
only and customers using Omnibus are allowed to configure
a Sidekiq timeout through -t flag, here we support a
-t flag for `bin/sidekiq-cluster`.

Here's how it works:

Internally, we'll pass the given value to Sidekiq (`-t`) flag.
For whatever value given, Sidekiq Cluster will wait for 5
extra seconds in order to hard stop all remaining alive processes.

So for instance, if `10` is given, Sidekiq will try to gracefully
terminate within this time. If it gets stuck for some reason, the
processes will be sigkilled within `15` seconds (total wait time).
parent 9e713e25
---
title: Support custom graceful timeout for Sidekiq Cluster processes
merge_request: 27710
author:
type: added
......@@ -62,21 +62,28 @@ module Gitlab
# directory - The directory of the Rails application.
#
# Returns an Array containing the PIDs of the started processes.
def self.start(queues, env: :development, directory: Dir.pwd, max_concurrency: 50, min_concurrency: 0, dryrun: false)
def self.start(queues, env: :development, directory: Dir.pwd, max_concurrency: 50, min_concurrency: 0, timeout: CLI::DEFAULT_SOFT_TIMEOUT_SECONDS, dryrun: false)
queues.map.with_index do |pair, index|
start_sidekiq(pair, env: env, directory: directory, max_concurrency: max_concurrency, min_concurrency: min_concurrency, worker_id: index, dryrun: dryrun)
start_sidekiq(pair, env: env,
directory: directory,
max_concurrency: max_concurrency,
min_concurrency: min_concurrency,
worker_id: index,
timeout: timeout,
dryrun: dryrun)
end
end
# Starts a Sidekiq process that processes _only_ the given queues.
#
# Returns the PID of the started process.
def self.start_sidekiq(queues, env:, directory:, max_concurrency:, min_concurrency:, worker_id:, dryrun:)
def self.start_sidekiq(queues, env:, directory:, max_concurrency:, min_concurrency:, worker_id:, timeout:, dryrun:)
counts = count_by_queue(queues)
cmd = %w[bundle exec sidekiq]
cmd << "-c#{self.concurrency(queues, min_concurrency, max_concurrency)}"
cmd << "-e#{env}"
cmd << "-t#{timeout}"
cmd << "-gqueues:#{proc_details(counts)}"
cmd << "-r#{directory}"
......
......@@ -8,9 +8,17 @@ module Gitlab
module SidekiqCluster
class CLI
CHECK_TERMINATE_INTERVAL_SECONDS = 1
# How long to wait in total when asking for a clean termination
# Sidekiq default to self-terminate is 25s
TERMINATE_TIMEOUT_SECONDS = 30
# How long to wait when asking for a clean termination.
# It maps the Sidekiq default timeout:
# https://github.com/mperham/sidekiq/wiki/Signals#term
#
# This value is passed to Sidekiq's `-t` if none
# is given through arguments.
DEFAULT_SOFT_TIMEOUT_SECONDS = 25
# After surpassing the soft timeout.
DEFAULT_HARD_TIMEOUT_SECONDS = 5
CommandError = Class.new(StandardError)
......@@ -74,7 +82,8 @@ module Gitlab
directory: @rails_path,
max_concurrency: @max_concurrency,
min_concurrency: @min_concurrency,
dryrun: @dryrun
dryrun: @dryrun,
timeout: soft_timeout_seconds
)
return if @dryrun
......@@ -88,6 +97,15 @@ module Gitlab
SidekiqCluster.write_pid(@pid) if @pid
end
def soft_timeout_seconds
@soft_timeout_seconds || DEFAULT_SOFT_TIMEOUT_SECONDS
end
# The amount of time it'll wait for killing the alive Sidekiq processes.
def hard_timeout_seconds
soft_timeout_seconds + DEFAULT_HARD_TIMEOUT_SECONDS
end
def monotonic_time
Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_second)
end
......@@ -101,7 +119,7 @@ module Gitlab
end
def wait_for_termination
deadline = monotonic_time + TERMINATE_TIMEOUT_SECONDS
deadline = monotonic_time + hard_timeout_seconds
sleep(CHECK_TERMINATE_INTERVAL_SECONDS) while continue_waiting?(deadline)
hard_stop_stuck_pids
......@@ -176,6 +194,10 @@ module Gitlab
@interval = int.to_i
end
opt.on('-t', '--timeout INT', 'Graceful timeout for all running processes') do |timeout|
@soft_timeout_seconds = timeout.to_i
end
opt.on('-d', '--dryrun', 'Print commands that would be run without this flag, and quit') do |int|
@dryrun = true
end
......
......@@ -5,8 +5,9 @@ require 'rspec-parameterized'
describe Gitlab::SidekiqCluster::CLI do
let(:cli) { described_class.new('/dev/null') }
let(:timeout) { described_class::DEFAULT_SOFT_TIMEOUT_SECONDS }
let(:default_options) do
{ env: 'test', directory: Dir.pwd, max_concurrency: 50, min_concurrency: 0, dryrun: false }
{ env: 'test', directory: Dir.pwd, max_concurrency: 50, min_concurrency: 0, dryrun: false, timeout: timeout }
end
before do
......@@ -80,6 +81,22 @@ describe Gitlab::SidekiqCluster::CLI do
end
end
context '-timeout flag' do
it 'when given', 'starts Sidekiq workers with given timeout' do
expect(Gitlab::SidekiqCluster).to receive(:start)
.with([['foo']], default_options.merge(timeout: 10))
cli.run(%w(foo --timeout 10))
end
it 'when not given', 'starts Sidekiq workers with default timeout' do
expect(Gitlab::SidekiqCluster).to receive(:start)
.with([['foo']], default_options.merge(timeout: described_class::DEFAULT_SOFT_TIMEOUT_SECONDS))
cli.run(%w(foo))
end
end
context 'queue namespace expansion' do
it 'starts Sidekiq workers for all queues in all_queues.yml with a namespace in argv' do
expect(Gitlab::SidekiqConfig::CliMethods).to receive(:worker_queues).and_return(['cronjob:foo', 'cronjob:bar'])
......@@ -222,7 +239,8 @@ describe Gitlab::SidekiqCluster::CLI do
.with([], :KILL)
stub_const("Gitlab::SidekiqCluster::CLI::CHECK_TERMINATE_INTERVAL_SECONDS", 0.1)
stub_const("Gitlab::SidekiqCluster::CLI::TERMINATE_TIMEOUT_SECONDS", 1)
allow(cli).to receive(:terminate_timeout_seconds) { 1 }
cli.wait_for_termination
end
......@@ -251,7 +269,8 @@ describe Gitlab::SidekiqCluster::CLI do
cli.run(%w(foo))
stub_const("Gitlab::SidekiqCluster::CLI::CHECK_TERMINATE_INTERVAL_SECONDS", 0.1)
stub_const("Gitlab::SidekiqCluster::CLI::TERMINATE_TIMEOUT_SECONDS", 1)
allow(cli).to receive(:terminate_timeout_seconds) { 1 }
cli.wait_for_termination
end
end
......
......@@ -58,6 +58,7 @@ describe Gitlab::SidekiqCluster do
directory: 'foo/bar',
max_concurrency: 20,
min_concurrency: 10,
timeout: 25,
dryrun: true
}
......@@ -74,6 +75,7 @@ describe Gitlab::SidekiqCluster do
max_concurrency: 50,
min_concurrency: 0,
worker_id: an_instance_of(Integer),
timeout: 25,
dryrun: false
}
......@@ -87,10 +89,10 @@ describe Gitlab::SidekiqCluster do
describe '.start_sidekiq' do
let(:first_worker_id) { 0 }
let(:options) do
{ env: :production, directory: 'foo/bar', max_concurrency: 20, min_concurrency: 0, worker_id: first_worker_id, dryrun: false }
{ env: :production, directory: 'foo/bar', max_concurrency: 20, min_concurrency: 0, worker_id: first_worker_id, timeout: 10, dryrun: false }
end
let(:env) { { "ENABLE_SIDEKIQ_CLUSTER" => "1", "SIDEKIQ_WORKER_ID" => first_worker_id.to_s } }
let(:args) { ['bundle', 'exec', 'sidekiq', anything, '-eproduction', *([anything] * 5)] }
let(:args) { ['bundle', 'exec', 'sidekiq', anything, '-eproduction', '-t10', *([anything] * 5)] }
it 'starts a Sidekiq process' do
allow(Process).to receive(:spawn).and_return(1)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment