Commit e344cdc2 authored by Mayra Cabrera's avatar Mayra Cabrera

Merge branch 'sabrams-bg-migration-helper' into 'master'

Add option to background migration helper

See merge request gitlab-org/gitlab!47585
parents d9f79071 d06a57c4
......@@ -55,7 +55,8 @@ module Gitlab
bulk_migrate_async(jobs) unless jobs.empty?
end
# Queues background migration jobs for an entire table, batched by ID range.
# Queues background migration jobs for an entire table in batches.
# The default batching column used is the standard primary key `id`.
# Each job is scheduled with a `delay_interval` in between.
# If you use a small interval, then some jobs may run at the same time.
#
......@@ -68,6 +69,7 @@ module Gitlab
# is scheduled to be run. These records can be used to trace execution of the background job, but there is no
# builtin support to manage that automatically at this time. You should only set this flag if you are aware of
# how it works, and intend to manually cleanup the database records in your background job.
# primary_column_name - The name of the primary key column if the primary key is not `id`
#
# *Returns the final migration delay*
#
......@@ -87,8 +89,9 @@ module Gitlab
# # do something
# end
# end
def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE, other_job_arguments: [], initial_delay: 0, track_jobs: false)
raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE, other_job_arguments: [], initial_delay: 0, track_jobs: false, primary_column_name: :id)
raise "#{model_class} does not have an ID column of #{primary_column_name} to use for batch ranges" unless model_class.column_names.include?(primary_column_name.to_s)
raise "#{primary_column_name} is not an integer column" unless model_class.columns_hash[primary_column_name.to_s].type == :integer
# To not overload the worker too much we enforce a minimum interval both
# when scheduling and performing jobs.
......@@ -99,7 +102,7 @@ module Gitlab
final_delay = 0
model_class.each_batch(of: batch_size) do |relation, index|
start_id, end_id = relation.pluck(Arel.sql('MIN(id), MAX(id)')).first
start_id, end_id = relation.pluck(Arel.sql("MIN(#{primary_column_name}), MAX(#{primary_column_name})")).first
# `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for
# the same time, which is not helpful in most cases where we wish to
......
......@@ -189,7 +189,51 @@ RSpec.describe Gitlab::Database::Migrations::BackgroundMigrationHelpers do
end
end
context "when the model doesn't have an ID column" do
context 'when the model specifies a primary_column_name' do
let!(:id1) { create(:container_expiration_policy).id }
let!(:id2) { create(:container_expiration_policy).id }
let!(:id3) { create(:container_expiration_policy).id }
around do |example|
freeze_time { example.run }
end
before do
ContainerExpirationPolicy.class_eval do
include EachBatch
end
end
it 'returns the final expected delay', :aggregate_failures do
Sidekiq::Testing.fake! do
final_delay = model.queue_background_migration_jobs_by_range_at_intervals(ContainerExpirationPolicy, 'FooJob', 10.minutes, batch_size: 2, primary_column_name: :project_id)
expect(final_delay.to_f).to eq(20.minutes.to_f)
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]])
expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(10.minutes.from_now.to_f)
expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]])
expect(BackgroundMigrationWorker.jobs[1]['at']).to eq(20.minutes.from_now.to_f)
end
end
context "when the primary_column_name is not an integer" do
it 'raises error' do
expect do
model.queue_background_migration_jobs_by_range_at_intervals(ContainerExpirationPolicy, 'FooJob', 10.minutes, primary_column_name: :enabled)
end.to raise_error(StandardError, /is not an integer column/)
end
end
context "when the primary_column_name does not exist" do
it 'raises error' do
expect do
model.queue_background_migration_jobs_by_range_at_intervals(ContainerExpirationPolicy, 'FooJob', 10.minutes, primary_column_name: :foo)
end.to raise_error(StandardError, /does not have an ID column of foo/)
end
end
end
context "when the model doesn't have an ID or primary_column_name column" do
it 'raises error (for now)' do
expect do
model.queue_background_migration_jobs_by_range_at_intervals(ProjectAuthorization, 'FooJob', 10.seconds)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment