Commit 7b146ab6 authored by Grzegorz Bizon's avatar Grzegorz Bizon

Recover from all exceptions when stealing bg migration

It also makes it possible to gracefully retry a migration in order to
avoid problems like deadlocks.
parent 01c55ffc
......@@ -7,6 +7,12 @@ module Gitlab
# Begins stealing jobs from the background migrations queue, blocking the
# caller until all jobs have been completed.
#
# When a migration raises a StandardError is is going to be retries up to
# three times, for example, to recover from a deadlock.
#
# When Exception is being raised, it enqueues the migration again, and
# re-raises the exception.
#
# steal_class - The name of the class for which to steal jobs.
def self.steal(steal_class)
enqueued = Sidekiq::Queue.new(self.queue)
......@@ -20,22 +26,34 @@ module Gitlab
next unless migration_class == steal_class
begin
perform(migration_class, migration_args) if job.delete
rescue => e
Logger.new($stdout).warn(e.message)
perform(migration_class, migration_args, retries: 3) if job.delete
rescue StandardError
next
rescue Exception
BackgroundMigrationWorker # enqueue this migration again
.perform_async(migration_class, migration_args)
raise
end
end
end
end
##
# Performs a background migration. In case of `StandardError` being caught
# this will retry a migration up to three times.
#
# class_name - The name of the background migration class as defined in the
# Gitlab::BackgroundMigration namespace.
#
# arguments - The arguments to pass to the background migration's "perform"
# method.
def self.perform(class_name, arguments)
def self.perform(class_name, arguments, retries: 1)
const_get(class_name).new.perform(*arguments)
rescue => e
Rails.logger.warn("Retrying background migration #{class_name} " \
"with #{arguments}")
(retries -= 1) > 0 ? retry : raise
end
end
end
......@@ -24,7 +24,8 @@ describe Gitlab::BackgroundMigration do
it 'steals jobs from a queue' do
expect(queue[0]).to receive(:delete).and_return(true)
expect(described_class).to receive(:perform).with('Foo', [10, 20])
expect(described_class).to receive(:perform)
.with('Foo', [10, 20], anything)
described_class.steal('Foo')
end
......@@ -32,7 +33,7 @@ describe Gitlab::BackgroundMigration do
it 'does not steal job that has already been taken' do
expect(queue[0]).to receive(:delete).and_return(false)
expect(described_class).not_to receive(:perform).with('Foo', [10, 20])
expect(described_class).not_to receive(:perform)
described_class.steal('Foo')
end
......@@ -57,17 +58,40 @@ describe Gitlab::BackgroundMigration do
before do
stub_const("#{described_class}::Foo", migration)
allow(queue[0]).to receive(:delete).and_return(true)
allow(queue[1]).to receive(:delete).and_return(true)
end
context 'when standard error is being raised' do
before do
allow(migration).to receive(:perform).with(10, 20)
.and_raise(StandardError, 'Migration error')
end
allow(queue[0]).to receive(:delete).and_return(true)
allow(queue[1]).to receive(:delete).and_return(true)
it 'recovers from an exception and retries the migration' do
expect(migration).to receive(:perform).with(10, 20)
.exactly(3).times.ordered
expect(migration).to receive(:perform).with(20, 30)
.once.ordered
expect(Rails.logger).to receive(:warn)
.with(/Retrying background migration/).exactly(3).times
described_class.steal('Foo')
end
end
context 'when top level exception is being raised' do
it 'enqueues the migration again and reraises the error' do
allow(migration).to receive(:perform).with(10, 20)
.and_raise(Exception, 'Migration error').once
expect(BackgroundMigrationWorker).to receive(:perform_async)
.with('Foo', [10, 20]).once
it 'recovers from an exceptions and continues' do
expect(migration).to receive(:perform).twice
expect(Rails.logger).not_to receive(:warn)
expect { described_class.steal('Foo') }
.to output(/Migration error/).to_stdout
.to raise_error(Exception)
end
end
end
end
......@@ -91,9 +115,9 @@ describe Gitlab::BackgroundMigration do
it 'steals from the scheduled sets queue first' do
Sidekiq::Testing.disable! do
expect(described_class).to receive(:perform)
.with('Object', [1]).ordered
.with('Object', [1], anything).ordered
expect(described_class).to receive(:perform)
.with('Object', [2]).ordered
.with('Object', [2], anything).ordered
BackgroundMigrationWorker.perform_async('Object', [2])
BackgroundMigrationWorker.perform_in(10.minutes, 'Object', [1])
......@@ -105,17 +129,38 @@ describe Gitlab::BackgroundMigration do
end
describe '.perform' do
it 'performs a background migration' do
instance = double(:instance)
klass = double(:klass, new: instance)
let(:migration) { spy(:migration) }
expect(described_class).to receive(:const_get)
.with('Foo')
.and_return(klass)
before do
stub_const("#{described_class.name}::Foo", migration)
end
expect(instance).to receive(:perform).with(10, 20)
context 'when retries count is not specified' do
it 'performs a background migration' do
expect(migration).to receive(:perform).with(10, 20).once
described_class.perform('Foo', [10, 20])
end
end
context 'when retries count is zero' do
it 'perform a background migration only once' do
expect(migration).to receive(:perform).with(10, 20)
.and_raise(StandardError).once
expect { described_class.perform('Foo', [10, 20], retries: 0) }
.to raise_error(StandardError)
end
end
context 'when retries count is larger than zero' do
it 'retries a background migration when needed' do
expect(migration).to receive(:perform).with(10, 20)
.and_raise(StandardError).exactly(3).times
expect { described_class.perform('Foo', [10, 20], retries: 3) }
.to raise_error(StandardError)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment