Commit 5c80bbb3 authored by Stan Hu's avatar Stan Hu

Merge branch '40396-sidekiq-in-process-group' into 'master'

sidekiq: terminate child processes at shutdown

See merge request gitlab-org/gitlab-ce!25669
parents 088128e6 f0c52df5
---
title: 'sidekiq: terminate child processes at shutdown'
merge_request: 25669
author:
type: added
...@@ -77,6 +77,9 @@ Sidekiq.configure_server do |config| ...@@ -77,6 +77,9 @@ Sidekiq.configure_server do |config|
# Avoid autoload issue such as 'Mail::Parsers::AddressStruct' # Avoid autoload issue such as 'Mail::Parsers::AddressStruct'
# https://github.com/mikel/mail/issues/912#issuecomment-214850355 # https://github.com/mikel/mail/issues/912#issuecomment-214850355
Mail.eager_autoload! Mail.eager_autoload!
# Ensure the whole process group is terminated if possible
Gitlab::SidekiqSignals.install!(Sidekiq::CLI::SIGNAL_HANDLERS)
end end
Sidekiq.configure_client do |config| Sidekiq.configure_client do |config|
......
...@@ -17,6 +17,11 @@ With the default settings, the MemoryKiller will cause a Sidekiq restart no ...@@ -17,6 +17,11 @@ With the default settings, the MemoryKiller will cause a Sidekiq restart no
more often than once every 15 minutes, with the restart causing about one more often than once every 15 minutes, with the restart causing about one
minute of delay for incoming background jobs. minute of delay for incoming background jobs.
Some background jobs rely on long-running external processes. To ensure these
are cleanly terminated when Sidekiq is restarted, each Sidekiq process should be
run as a process group leader (e.g., using `chpst -P`). If using Omnibus or the
`bin/background_jobs` script with `runit` installed, this is handled for you.
## Configuring the MemoryKiller ## Configuring the MemoryKiller
The MemoryKiller is controlled using environment variables. The MemoryKiller is controlled using environment variables.
......
...@@ -36,11 +36,13 @@ module Gitlab ...@@ -36,11 +36,13 @@ module Gitlab
# Wait `SHUTDOWN_WAIT` to give already fetched jobs time to finish. # Wait `SHUTDOWN_WAIT` to give already fetched jobs time to finish.
# Then, tell Sidekiq to gracefully shut down by giving jobs a few more # Then, tell Sidekiq to gracefully shut down by giving jobs a few more
# moments to finish, killing and requeuing them if they didn't, and # moments to finish, killing and requeuing them if they didn't, and
# then terminating itself. # then terminating itself. Sidekiq will replicate the TERM to all its
# children if it can.
wait_and_signal(SHUTDOWN_WAIT, 'SIGTERM', 'gracefully shut down') wait_and_signal(SHUTDOWN_WAIT, 'SIGTERM', 'gracefully shut down')
# Wait for Sidekiq to shutdown gracefully, and kill it if it didn't. # Wait for Sidekiq to shutdown gracefully, and kill it if it didn't.
wait_and_signal(Sidekiq.options[:timeout] + 2, 'SIGKILL', 'die') # Kill the whole pgroup, so we can be sure no children are left behind
wait_and_signal_pgroup(Sidekiq.options[:timeout] + 2, 'SIGKILL', 'die')
end end
end end
...@@ -53,6 +55,17 @@ module Gitlab ...@@ -53,6 +55,17 @@ module Gitlab
output.to_i output.to_i
end end
# If this sidekiq process is pgroup leader, signal to the whole pgroup
def wait_and_signal_pgroup(time, signal, explanation)
return wait_and_signal(time, signal, explanation) unless Process.getpgrp == pid
Sidekiq.logger.warn "waiting #{time} seconds before sending Sidekiq worker PGRP-#{pid} #{signal} (#{explanation})"
sleep(time)
Sidekiq.logger.warn "sending Sidekiq worker PGRP-#{pid} #{signal} (#{explanation})"
Process.kill(signal, "-#{pid}")
end
def wait_and_signal(time, signal, explanation) def wait_and_signal(time, signal, explanation)
Sidekiq.logger.warn "waiting #{time} seconds before sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})" Sidekiq.logger.warn "waiting #{time} seconds before sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})"
sleep(time) sleep(time)
......
# frozen_string_literal: true
module Gitlab
# As a process group leader, we can ensure that children of sidekiq are killed
# at the same time as sidekiq itself, to stop long-lived children from being
# reparented to init and "escaping". To do this, we override the default
# handlers used by sidekiq for INT and TERM signals
module SidekiqSignals
REPLACE_SIGNALS = %w[INT TERM].freeze
SIDEKIQ_CHANGED_MESSAGE =
"Intercepting signal handlers: #{REPLACE_SIGNALS.join(", ")} failed. " \
"Sidekiq should have registered them, but appears not to have done so."
def self.install!(sidekiq_handlers)
# This only works if we're process group leader
return unless Process.getpgrp == Process.pid
raise SIDEKIQ_CHANGED_MESSAGE unless
REPLACE_SIGNALS == sidekiq_handlers.keys & REPLACE_SIGNALS
REPLACE_SIGNALS.each do |signal|
old_handler = sidekiq_handlers[signal]
sidekiq_handlers[signal] = ->(cli) do
blindly_signal_pgroup!(signal)
old_handler.call(cli)
end
end
end
# The process group leader can forward INT and TERM signals to the whole
# group. However, the forwarded signal is *also* received by the leader,
# which could lead to an infinite loop. We can avoid this by temporarily
# ignoring the forwarded signal. This may cause us to miss some repeated
# signals from outside the process group, but that isn't fatal.
def self.blindly_signal_pgroup!(signal)
old_trap = trap(signal, 'IGNORE')
Process.kill(signal, "-#{Process.getpgrp}")
trap(signal, old_trap)
end
end
end
...@@ -35,7 +35,7 @@ describe Gitlab::SidekiqMiddleware::MemoryKiller do ...@@ -35,7 +35,7 @@ describe Gitlab::SidekiqMiddleware::MemoryKiller do
stub_const("#{described_class}::MAX_RSS", 5.kilobytes) stub_const("#{described_class}::MAX_RSS", 5.kilobytes)
end end
it 'sends the STP, TERM and KILL signals at expected times' do it 'sends the TSTP, TERM and KILL signals at expected times' do
expect(subject).to receive(:sleep).with(15 * 60).ordered expect(subject).to receive(:sleep).with(15 * 60).ordered
expect(Process).to receive(:kill).with('SIGTSTP', pid).ordered expect(Process).to receive(:kill).with('SIGTSTP', pid).ordered
...@@ -47,6 +47,17 @@ describe Gitlab::SidekiqMiddleware::MemoryKiller do ...@@ -47,6 +47,17 @@ describe Gitlab::SidekiqMiddleware::MemoryKiller do
run run
end end
it 'sends TSTP and TERM to the pid, but KILL to the pgroup, when running as process leader' do
allow(Process).to receive(:getpgrp) { pid }
allow(subject).to receive(:sleep)
expect(Process).to receive(:kill).with('SIGTSTP', pid).ordered
expect(Process).to receive(:kill).with('SIGTERM', pid).ordered
expect(Process).to receive(:kill).with('SIGKILL', "-#{pid}").ordered
run
end
end end
context 'when MAX_RSS is not exceeded' do context 'when MAX_RSS is not exceeded' do
......
require 'spec_helper'
describe Gitlab::SidekiqSignals do
describe '.install' do
let(:result) { Hash.new { |h, k| h[k] = 0 } }
let(:int_handler) { -> (_) { result['INT'] += 1 } }
let(:term_handler) { -> (_) { result['TERM'] += 1 } }
let(:other_handler) { -> (_) { result['OTHER'] += 1 } }
let(:signals) { { 'INT' => int_handler, 'TERM' => term_handler, 'OTHER' => other_handler } }
context 'not a process group leader' do
before do
allow(Process).to receive(:getpgrp) { Process.pid * 2 }
end
it 'does nothing' do
expect { described_class.install!(signals) }
.not_to change { signals }
end
end
context 'as a process group leader' do
before do
allow(Process).to receive(:getpgrp) { Process.pid }
end
it 'installs its own signal handlers for TERM and INT only' do
described_class.install!(signals)
expect(signals['INT']).not_to eq(int_handler)
expect(signals['TERM']).not_to eq(term_handler)
expect(signals['OTHER']).to eq(other_handler)
end
%w[INT TERM].each do |signal|
it "installs a forwarding signal handler for #{signal}" do
described_class.install!(signals)
expect(described_class)
.to receive(:trap)
.with(signal, 'IGNORE')
.and_return(:original_trap)
.ordered
expect(Process)
.to receive(:kill)
.with(signal, "-#{Process.pid}")
.ordered
expect(described_class)
.to receive(:trap)
.with(signal, :original_trap)
.ordered
signals[signal].call(:cli)
expect(result[signal]).to eq(1)
end
it "raises if sidekiq no longer traps SIG#{signal}" do
signals.delete(signal)
expect { described_class.install!(signals) }
.to raise_error(/Sidekiq should have registered/)
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment