Commit 1a447fd5 authored by Andreas Brandl's avatar Andreas Brandl

Merge branch 'pb-track-background-job-executions' into 'master'

Support auto-batching background migrations

See merge request gitlab-org/gitlab!54628
parents c2afaab7 99aa25b9
---
title: Create tables to track auto-batched background migrations
merge_request: 54628
author:
type: added
# frozen_string_literal: true
class CreateBackgroundMigrationTrackingTables < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
create_table_with_constraints :batched_background_migrations do |t|
t.timestamps_with_timezone
t.bigint :min_value, null: false, default: 1
t.bigint :max_value, null: false
t.integer :batch_size, null: false
t.integer :sub_batch_size, null: false
t.integer :interval, limit: 2, null: false
t.integer :status, limit: 2, null: false, default: 0
t.text :job_class_name, null: false
t.text :batch_class_name, null: false,
default: 'Gitlab::Database::BackgroundMigration::PrimaryKeyBatchingStrategy'
t.text :table_name, null: false
t.text :column_name, null: false
t.jsonb :job_arguments, null: false, default: '[]'
t.text_limit :job_class_name, 100
t.text_limit :batch_class_name, 100
t.text_limit :table_name, 63
t.text_limit :column_name, 63
t.check_constraint :check_positive_min_value, 'min_value > 0'
t.check_constraint :check_max_value_in_range, 'max_value >= min_value'
t.check_constraint :check_positive_sub_batch_size, 'sub_batch_size > 0'
t.check_constraint :check_batch_size_in_range, 'batch_size >= sub_batch_size'
t.index %i[job_class_name table_name column_name], name: :index_batched_migrations_on_job_table_and_column_name
end
create_table :batched_background_migration_jobs do |t|
t.timestamps_with_timezone
t.datetime_with_timezone :started_at
t.datetime_with_timezone :finished_at
t.references :batched_background_migration, null: false, index: false, foreign_key: { on_delete: :cascade }
t.bigint :min_value, null: false
t.bigint :max_value, null: false
t.integer :batch_size, null: false
t.integer :sub_batch_size, null: false
t.integer :status, limit: 2, null: false, default: 0
t.integer :attempts, limit: 2, null: false, default: 0
t.index [:batched_background_migration_id, :id], name: :index_batched_jobs_by_batched_migration_id_and_id
end
end
def down
drop_table :batched_background_migration_jobs
drop_table :batched_background_migrations
end
end
1cf1305ad5eaaef51f99f057b8a2e81731d69a6d02629c0c9a7d94dfdecbea47
\ No newline at end of file
...@@ -9768,6 +9768,64 @@ CREATE SEQUENCE badges_id_seq ...@@ -9768,6 +9768,64 @@ CREATE SEQUENCE badges_id_seq
ALTER SEQUENCE badges_id_seq OWNED BY badges.id; ALTER SEQUENCE badges_id_seq OWNED BY badges.id;
CREATE TABLE batched_background_migration_jobs (
id bigint NOT NULL,
created_at timestamp with time zone NOT NULL,
updated_at timestamp with time zone NOT NULL,
started_at timestamp with time zone,
finished_at timestamp with time zone,
batched_background_migration_id bigint NOT NULL,
min_value bigint NOT NULL,
max_value bigint NOT NULL,
batch_size integer NOT NULL,
sub_batch_size integer NOT NULL,
status smallint DEFAULT 0 NOT NULL,
attempts smallint DEFAULT 0 NOT NULL
);
CREATE SEQUENCE batched_background_migration_jobs_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE batched_background_migration_jobs_id_seq OWNED BY batched_background_migration_jobs.id;
CREATE TABLE batched_background_migrations (
id bigint NOT NULL,
created_at timestamp with time zone NOT NULL,
updated_at timestamp with time zone NOT NULL,
min_value bigint DEFAULT 1 NOT NULL,
max_value bigint NOT NULL,
batch_size integer NOT NULL,
sub_batch_size integer NOT NULL,
"interval" smallint NOT NULL,
status smallint DEFAULT 0 NOT NULL,
job_class_name text NOT NULL,
batch_class_name text DEFAULT 'Gitlab::Database::BackgroundMigration::PrimaryKeyBatchingStrategy'::text NOT NULL,
table_name text NOT NULL,
column_name text NOT NULL,
job_arguments jsonb DEFAULT '"[]"'::jsonb NOT NULL,
CONSTRAINT check_5bb0382d6f CHECK ((char_length(column_name) <= 63)),
CONSTRAINT check_6b6a06254a CHECK ((char_length(table_name) <= 63)),
CONSTRAINT check_batch_size_in_range CHECK ((batch_size >= sub_batch_size)),
CONSTRAINT check_e6c75b1e29 CHECK ((char_length(job_class_name) <= 100)),
CONSTRAINT check_fe10674721 CHECK ((char_length(batch_class_name) <= 100)),
CONSTRAINT check_max_value_in_range CHECK ((max_value >= min_value)),
CONSTRAINT check_positive_min_value CHECK ((min_value > 0)),
CONSTRAINT check_positive_sub_batch_size CHECK ((sub_batch_size > 0))
);
CREATE SEQUENCE batched_background_migrations_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE batched_background_migrations_id_seq OWNED BY batched_background_migrations.id;
CREATE TABLE board_assignees ( CREATE TABLE board_assignees (
id integer NOT NULL, id integer NOT NULL,
board_id integer NOT NULL, board_id integer NOT NULL,
...@@ -18812,6 +18870,10 @@ ALTER TABLE ONLY background_migration_jobs ALTER COLUMN id SET DEFAULT nextval(' ...@@ -18812,6 +18870,10 @@ ALTER TABLE ONLY background_migration_jobs ALTER COLUMN id SET DEFAULT nextval('
ALTER TABLE ONLY badges ALTER COLUMN id SET DEFAULT nextval('badges_id_seq'::regclass); ALTER TABLE ONLY badges ALTER COLUMN id SET DEFAULT nextval('badges_id_seq'::regclass);
ALTER TABLE ONLY batched_background_migration_jobs ALTER COLUMN id SET DEFAULT nextval('batched_background_migration_jobs_id_seq'::regclass);
ALTER TABLE ONLY batched_background_migrations ALTER COLUMN id SET DEFAULT nextval('batched_background_migrations_id_seq'::regclass);
ALTER TABLE ONLY board_assignees ALTER COLUMN id SET DEFAULT nextval('board_assignees_id_seq'::regclass); ALTER TABLE ONLY board_assignees ALTER COLUMN id SET DEFAULT nextval('board_assignees_id_seq'::regclass);
ALTER TABLE ONLY board_group_recent_visits ALTER COLUMN id SET DEFAULT nextval('board_group_recent_visits_id_seq'::regclass); ALTER TABLE ONLY board_group_recent_visits ALTER COLUMN id SET DEFAULT nextval('board_group_recent_visits_id_seq'::regclass);
...@@ -19893,6 +19955,12 @@ ALTER TABLE ONLY background_migration_jobs ...@@ -19893,6 +19955,12 @@ ALTER TABLE ONLY background_migration_jobs
ALTER TABLE ONLY badges ALTER TABLE ONLY badges
ADD CONSTRAINT badges_pkey PRIMARY KEY (id); ADD CONSTRAINT badges_pkey PRIMARY KEY (id);
ALTER TABLE ONLY batched_background_migration_jobs
ADD CONSTRAINT batched_background_migration_jobs_pkey PRIMARY KEY (id);
ALTER TABLE ONLY batched_background_migrations
ADD CONSTRAINT batched_background_migrations_pkey PRIMARY KEY (id);
ALTER TABLE ONLY board_assignees ALTER TABLE ONLY board_assignees
ADD CONSTRAINT board_assignees_pkey PRIMARY KEY (id); ADD CONSTRAINT board_assignees_pkey PRIMARY KEY (id);
...@@ -21641,6 +21709,10 @@ CREATE INDEX index_badges_on_group_id ON badges USING btree (group_id); ...@@ -21641,6 +21709,10 @@ CREATE INDEX index_badges_on_group_id ON badges USING btree (group_id);
CREATE INDEX index_badges_on_project_id ON badges USING btree (project_id); CREATE INDEX index_badges_on_project_id ON badges USING btree (project_id);
CREATE INDEX index_batched_jobs_by_batched_migration_id_and_id ON batched_background_migration_jobs USING btree (batched_background_migration_id, id);
CREATE INDEX index_batched_migrations_on_job_table_and_column_name ON batched_background_migrations USING btree (job_class_name, table_name, column_name);
CREATE INDEX index_board_assignees_on_assignee_id ON board_assignees USING btree (assignee_id); CREATE INDEX index_board_assignees_on_assignee_id ON board_assignees USING btree (assignee_id);
CREATE UNIQUE INDEX index_board_assignees_on_board_id_and_assignee_id ON board_assignees USING btree (board_id, assignee_id); CREATE UNIQUE INDEX index_board_assignees_on_board_id_and_assignee_id ON board_assignees USING btree (board_id, assignee_id);
...@@ -25383,6 +25455,9 @@ ALTER TABLE ONLY ci_resources ...@@ -25383,6 +25455,9 @@ ALTER TABLE ONLY ci_resources
ALTER TABLE ONLY clusters_applications_fluentd ALTER TABLE ONLY clusters_applications_fluentd
ADD CONSTRAINT fk_rails_4319b1dcd2 FOREIGN KEY (cluster_id) REFERENCES clusters(id) ON DELETE CASCADE; ADD CONSTRAINT fk_rails_4319b1dcd2 FOREIGN KEY (cluster_id) REFERENCES clusters(id) ON DELETE CASCADE;
ALTER TABLE ONLY batched_background_migration_jobs
ADD CONSTRAINT fk_rails_432153b86d FOREIGN KEY (batched_background_migration_id) REFERENCES batched_background_migrations(id) ON DELETE CASCADE;
ALTER TABLE ONLY operations_strategies_user_lists ALTER TABLE ONLY operations_strategies_user_lists
ADD CONSTRAINT fk_rails_43241e8d29 FOREIGN KEY (strategy_id) REFERENCES operations_strategies(id) ON DELETE CASCADE; ADD CONSTRAINT fk_rails_43241e8d29 FOREIGN KEY (strategy_id) REFERENCES operations_strategies(id) ON DELETE CASCADE;
...@@ -2,13 +2,11 @@ ...@@ -2,13 +2,11 @@
module Gitlab module Gitlab
module BackgroundMigration module BackgroundMigration
# Background migration that extends CopyColumn to update the value of a # Background migration that updates the value of a
# column using the value of another column in the same table. # column using the value of another column in the same table.
# #
# - The {start_id, end_id} arguments are at the start so that it can be used # - The {start_id, end_id} arguments are at the start so that it can be used
# with `queue_background_migration_jobs_by_range_at_intervals` # with `queue_batched_background_migration`
# - Provides support for background job tracking through the use of
# Gitlab::Database::BackgroundMigrationJob
# - Uses sub-batching so that we can keep each update's execution time at # - Uses sub-batching so that we can keep each update's execution time at
# low 100s ms, while being able to update more records per 2 minutes # low 100s ms, while being able to update more records per 2 minutes
# that we allow background migration jobs to be scheduled one after the other # that we allow background migration jobs to be scheduled one after the other
...@@ -22,28 +20,24 @@ module Gitlab ...@@ -22,28 +20,24 @@ module Gitlab
# start_id - The start ID of the range of rows to update. # start_id - The start ID of the range of rows to update.
# end_id - The end ID of the range of rows to update. # end_id - The end ID of the range of rows to update.
# table - The name of the table that contains the columns. # batch_table - The name of the table that contains the columns.
# primary_key - The primary key column of the table. # batch_column - The name of the column we use to batch over the table.
# copy_from - The column containing the data to copy.
# copy_to - The column to copy the data to.
# sub_batch_size - We don't want updates to take more than ~100ms # sub_batch_size - We don't want updates to take more than ~100ms
# This allows us to run multiple smaller batches during # This allows us to run multiple smaller batches during
# the minimum 2.minute interval that we can schedule jobs # the minimum 2.minute interval that we can schedule jobs
def perform(start_id, end_id, table, primary_key, copy_from, copy_to, sub_batch_size) # copy_from - The column containing the data to copy.
# copy_to - The column to copy the data to.
def perform(start_id, end_id, batch_table, batch_column, sub_batch_size, copy_from, copy_to)
quoted_copy_from = connection.quote_column_name(copy_from) quoted_copy_from = connection.quote_column_name(copy_from)
quoted_copy_to = connection.quote_column_name(copy_to) quoted_copy_to = connection.quote_column_name(copy_to)
parent_batch_relation = relation_scoped_to_range(table, primary_key, start_id, end_id) parent_batch_relation = relation_scoped_to_range(batch_table, batch_column, start_id, end_id)
parent_batch_relation.each_batch(column: primary_key, of: sub_batch_size) do |sub_batch| parent_batch_relation.each_batch(column: batch_column, of: sub_batch_size) do |sub_batch|
sub_batch.update_all("#{quoted_copy_to}=#{quoted_copy_from}") sub_batch.update_all("#{quoted_copy_to}=#{quoted_copy_from}")
sleep(PAUSE_SECONDS) sleep(PAUSE_SECONDS)
end end
# We have to add all arguments when marking a job as succeeded as they
# are all used to track the job by `queue_background_migration_jobs_by_range_at_intervals`
mark_job_as_succeeded(start_id, end_id, table, primary_key, copy_from, copy_to, sub_batch_size)
end end
private private
...@@ -52,10 +46,6 @@ module Gitlab ...@@ -52,10 +46,6 @@ module Gitlab
ActiveRecord::Base.connection ActiveRecord::Base.connection
end end
def mark_job_as_succeeded(*arguments)
Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(self.class.name, arguments)
end
def relation_scoped_to_range(source_table, source_key_column, start_id, stop_id) def relation_scoped_to_range(source_table, source_key_column, start_id, stop_id)
define_batchable_model(source_table).where(source_key_column => start_id..stop_id) define_batchable_model(source_table).where(source_key_column => start_id..stop_id)
end end
......
# frozen_string_literal: true
module Gitlab
module Database
module BackgroundMigration
class BatchedJob < ActiveRecord::Base # rubocop:disable Rails/ApplicationRecord
self.table_name = :batched_background_migration_jobs
belongs_to :batched_migration, foreign_key: :batched_background_migration_id
enum status: {
pending: 0,
running: 1,
failed: 2,
succeeded: 3
}
delegate :aborted?, :job_class, :table_name, :column_name, :job_arguments,
to: :batched_migration, prefix: :migration
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Database
module BackgroundMigration
class BatchedMigration < ActiveRecord::Base # rubocop:disable Rails/ApplicationRecord
self.table_name = :batched_background_migrations
has_many :batched_jobs, foreign_key: :batched_background_migration_id
has_one :last_job, -> { order(id: :desc) },
class_name: 'Gitlab::Database::BackgroundMigration::BatchedJob',
foreign_key: :batched_background_migration_id
scope :queue_order, -> { order(id: :asc) }
enum status: {
paused: 0,
active: 1,
aborted: 2,
finished: 3
}
def self.remove_toplevel_prefix(name)
name&.sub(/\A::/, '')
end
def interval_elapsed?
last_job.nil? || last_job.created_at <= Time.current - interval
end
def create_batched_job!(min, max)
batched_jobs.create!(min_value: min, max_value: max, batch_size: batch_size, sub_batch_size: sub_batch_size)
end
def next_min_value
last_job&.max_value&.next || min_value
end
def job_class
job_class_name.constantize
end
def batch_class
batch_class_name.constantize
end
def job_class_name=(class_name)
write_attribute(:job_class_name, self.class.remove_toplevel_prefix(class_name))
end
def batch_class_name=(class_name)
write_attribute(:batch_class_name, self.class.remove_toplevel_prefix(class_name))
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Database
module BackgroundMigration
class BatchedMigrationWrapper
def perform(batch_tracking_record)
start_tracking_execution(batch_tracking_record)
execute_batch(batch_tracking_record)
batch_tracking_record.status = :succeeded
rescue => e
batch_tracking_record.status = :failed
raise e
ensure
finish_tracking_execution(batch_tracking_record)
end
private
def start_tracking_execution(tracking_record)
tracking_record.update!(attempts: tracking_record.attempts + 1, status: :running, started_at: Time.current)
end
def execute_batch(tracking_record)
job_instance = tracking_record.migration_job_class.new
job_instance.perform(
tracking_record.min_value,
tracking_record.max_value,
tracking_record.migration_table_name,
tracking_record.migration_column_name,
tracking_record.sub_batch_size,
*tracking_record.migration_job_arguments)
end
def finish_tracking_execution(tracking_record)
tracking_record.finished_at = Time.current
tracking_record.save!
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Database
module BackgroundMigration
class PrimaryKeyBatchingStrategy
include Gitlab::Database::DynamicModelHelpers
def next_batch(table_name, column_name, batch_min_value:, batch_size:)
model_class = define_batchable_model(table_name)
quoted_column_name = model_class.connection.quote_column_name(column_name)
relation = model_class.where("#{quoted_column_name} >= ?", batch_min_value)
next_batch_bounds = nil
relation.each_batch(of: batch_size, column: column_name) do |batch| # rubocop:disable Lint/UnreachableLoop
next_batch_bounds = batch.pluck(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})")).first
break
end
next_batch_bounds
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Database
module BackgroundMigration
class Scheduler
def perform(migration_wrapper: BatchedMigrationWrapper.new)
active_migration = BatchedMigration.active.queue_order.first
return unless active_migration&.interval_elapsed?
if next_batched_job = create_next_batched_job!(active_migration)
migration_wrapper.perform(next_batched_job)
else
finish_active_migration(active_migration)
end
end
private
def create_next_batched_job!(active_migration)
next_batch_range = find_next_batch_range(active_migration)
return if next_batch_range.nil?
active_migration.create_batched_job!(next_batch_range.min, next_batch_range.max)
end
def find_next_batch_range(active_migration)
batching_strategy = active_migration.batch_class.new
batch_min_value = active_migration.next_min_value
next_batch_bounds = batching_strategy.next_batch(
active_migration.table_name,
active_migration.column_name,
batch_min_value: batch_min_value,
batch_size: active_migration.batch_size)
return if next_batch_bounds.nil?
clamped_batch_range(active_migration, next_batch_bounds)
end
def clamped_batch_range(active_migration, next_bounds)
min_value, max_value = next_bounds
return if min_value > active_migration.max_value
max_value = max_value.clamp(min_value, active_migration.max_value)
(min_value..max_value)
end
def finish_active_migration(active_migration)
active_migration.finished!
end
end
end
end
end
...@@ -1015,7 +1015,7 @@ module Gitlab ...@@ -1015,7 +1015,7 @@ module Gitlab
'CopyColumnUsingBackgroundMigrationJob', 'CopyColumnUsingBackgroundMigrationJob',
interval, interval,
batch_size: batch_size, batch_size: batch_size,
other_job_arguments: [table, primary_key, column, tmp_column, sub_batch_size], other_job_arguments: [table, primary_key, sub_batch_size, column, tmp_column],
track_jobs: true, track_jobs: true,
primary_column_name: primary_key primary_column_name: primary_key
) )
......
# frozen_string_literal: true
FactoryBot.define do
factory :batched_background_migration_job, class: '::Gitlab::Database::BackgroundMigration::BatchedJob' do
batched_migration factory: :batched_background_migration
min_value { 1 }
max_value { 10 }
batch_size { 5 }
sub_batch_size { 1 }
end
end
# frozen_string_literal: true
FactoryBot.define do
factory :batched_background_migration, class: '::Gitlab::Database::BackgroundMigration::BatchedMigration' do
max_value { 10 }
batch_size { 5 }
sub_batch_size { 1 }
interval { 2.minutes }
job_class_name { 'Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJob' }
table_name { :events }
column_name { :id }
end
end
...@@ -38,22 +38,9 @@ RSpec.describe Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJo ...@@ -38,22 +38,9 @@ RSpec.describe Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJo
describe '#perform' do describe '#perform' do
let(:migration_class) { described_class.name } let(:migration_class) { described_class.name }
let!(:job1) do
table(:background_migration_jobs).create!(
class_name: migration_class,
arguments: [1, 10, table_name, 'id', 'id', 'id_convert_to_bigint', sub_batch_size]
)
end
let!(:job2) do
table(:background_migration_jobs).create!(
class_name: migration_class,
arguments: [11, 20, table_name, 'id', 'id', 'id_convert_to_bigint', sub_batch_size]
)
end
it 'copies all primary keys in range' do it 'copies all primary keys in range' do
subject.perform(12, 15, table_name, 'id', 'id', 'id_convert_to_bigint', sub_batch_size) subject.perform(12, 15, table_name, 'id', sub_batch_size, 'id', 'id_convert_to_bigint')
expect(test_table.where('id = id_convert_to_bigint').pluck(:id)).to contain_exactly(12, 15) expect(test_table.where('id = id_convert_to_bigint').pluck(:id)).to contain_exactly(12, 15)
expect(test_table.where(id_convert_to_bigint: 0).pluck(:id)).to contain_exactly(11, 19) expect(test_table.where(id_convert_to_bigint: 0).pluck(:id)).to contain_exactly(11, 19)
...@@ -61,7 +48,7 @@ RSpec.describe Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJo ...@@ -61,7 +48,7 @@ RSpec.describe Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJo
end end
it 'copies all foreign keys in range' do it 'copies all foreign keys in range' do
subject.perform(10, 14, table_name, 'id', 'fk', 'fk_convert_to_bigint', sub_batch_size) subject.perform(10, 14, table_name, 'id', sub_batch_size, 'fk', 'fk_convert_to_bigint')
expect(test_table.where('fk = fk_convert_to_bigint').pluck(:id)).to contain_exactly(11, 12) expect(test_table.where('fk = fk_convert_to_bigint').pluck(:id)).to contain_exactly(11, 12)
expect(test_table.where(fk_convert_to_bigint: 0).pluck(:id)).to contain_exactly(15, 19) expect(test_table.where(fk_convert_to_bigint: 0).pluck(:id)).to contain_exactly(15, 19)
...@@ -71,21 +58,11 @@ RSpec.describe Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJo ...@@ -71,21 +58,11 @@ RSpec.describe Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJo
it 'copies columns with NULLs' do it 'copies columns with NULLs' do
expect(test_table.where("name_convert_to_text = 'no name'").count).to eq(4) expect(test_table.where("name_convert_to_text = 'no name'").count).to eq(4)
subject.perform(10, 20, table_name, 'id', 'name', 'name_convert_to_text', sub_batch_size) subject.perform(10, 20, table_name, 'id', sub_batch_size, 'name', 'name_convert_to_text')
expect(test_table.where('name = name_convert_to_text').pluck(:id)).to contain_exactly(11, 12, 19) expect(test_table.where('name = name_convert_to_text').pluck(:id)).to contain_exactly(11, 12, 19)
expect(test_table.where('name is NULL and name_convert_to_text is NULL').pluck(:id)).to contain_exactly(15) expect(test_table.where('name is NULL and name_convert_to_text is NULL').pluck(:id)).to contain_exactly(15)
expect(test_table.where("name_convert_to_text = 'no name'").count).to eq(0) expect(test_table.where("name_convert_to_text = 'no name'").count).to eq(0)
end end
it 'tracks completion with BackgroundMigrationJob' do
expect do
subject.perform(11, 20, table_name, 'id', 'id', 'id_convert_to_bigint', sub_batch_size)
end.to change { Gitlab::Database::BackgroundMigrationJob.succeeded.count }.from(0).to(1)
expect(job1.reload.status).to eq(0)
expect(job2.reload.status).to eq(1)
expect(test_table.where('id = id_convert_to_bigint').count).to eq(4)
end
end end
end end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::BackgroundMigration::BatchedJob, type: :model do
it_behaves_like 'having unique enum values'
describe 'associations' do
it { is_expected.to belong_to(:batched_migration).with_foreign_key(:batched_background_migration_id) }
end
describe 'delegated batched_migration attributes' do
let(:batched_job) { build(:batched_background_migration_job) }
let(:batched_migration) { batched_job.batched_migration }
describe '#migration_aborted?' do
before do
batched_migration.status = :aborted
end
it 'returns the migration aborted?' do
expect(batched_job.migration_aborted?).to eq(batched_migration.aborted?)
end
end
describe '#migration_job_class' do
it 'returns the migration job_class' do
expect(batched_job.migration_job_class).to eq(batched_migration.job_class)
end
end
describe '#migration_table_name' do
it 'returns the migration table_name' do
expect(batched_job.migration_table_name).to eq(batched_migration.table_name)
end
end
describe '#migration_column_name' do
it 'returns the migration column_name' do
expect(batched_job.migration_column_name).to eq(batched_migration.column_name)
end
end
describe '#migration_job_arguments' do
it 'returns the migration job_arguments' do
expect(batched_job.migration_job_arguments).to eq(batched_migration.job_arguments)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigration, type: :model do
it_behaves_like 'having unique enum values'
describe 'associations' do
it { is_expected.to have_many(:batched_jobs).with_foreign_key(:batched_background_migration_id) }
describe '#last_job' do
let!(:batched_migration) { create(:batched_background_migration) }
let!(:batched_job1) { create(:batched_background_migration_job, batched_migration: batched_migration) }
let!(:batched_job2) { create(:batched_background_migration_job, batched_migration: batched_migration) }
it 'returns the most recent (in order of id) batched job' do
expect(batched_migration.last_job).to eq(batched_job2)
end
end
end
describe '.queue_order' do
let!(:migration1) { create(:batched_background_migration) }
let!(:migration2) { create(:batched_background_migration) }
let!(:migration3) { create(:batched_background_migration) }
it 'returns batched migrations ordered by their id' do
expect(described_class.queue_order.all).to eq([migration1, migration2, migration3])
end
end
describe '#interval_elapsed?' do
context 'when the migration has no last_job' do
let(:batched_migration) { build(:batched_background_migration) }
it 'returns true' do
expect(batched_migration.interval_elapsed?).to eq(true)
end
end
context 'when the migration has a last_job' do
let(:interval) { 2.minutes }
let(:batched_migration) { create(:batched_background_migration, interval: interval) }
context 'when the last_job is less than an interval old' do
it 'returns false' do
freeze_time do
create(:batched_background_migration_job,
batched_migration: batched_migration,
created_at: Time.current - 1.minute)
expect(batched_migration.interval_elapsed?).to eq(false)
end
end
end
context 'when the last_job is exactly an interval old' do
it 'returns true' do
freeze_time do
create(:batched_background_migration_job,
batched_migration: batched_migration,
created_at: Time.current - 2.minutes)
expect(batched_migration.interval_elapsed?).to eq(true)
end
end
end
context 'when the last_job is more than an interval old' do
it 'returns true' do
freeze_time do
create(:batched_background_migration_job,
batched_migration: batched_migration,
created_at: Time.current - 3.minutes)
expect(batched_migration.interval_elapsed?).to eq(true)
end
end
end
end
end
describe '#create_batched_job!' do
let(:batched_migration) { create(:batched_background_migration) }
it 'creates a batched_job with the correct batch configuration' do
batched_job = batched_migration.create_batched_job!(1, 5)
expect(batched_job).to have_attributes(
min_value: 1,
max_value: 5,
batch_size: batched_migration.batch_size,
sub_batch_size: batched_migration.sub_batch_size)
end
end
describe '#next_min_value' do
let!(:batched_migration) { create(:batched_background_migration) }
context 'when a previous job exists' do
let!(:batched_job) { create(:batched_background_migration_job, batched_migration: batched_migration) }
it 'returns the next value after the previous maximum' do
expect(batched_migration.next_min_value).to eq(batched_job.max_value + 1)
end
end
context 'when a previous job does not exist' do
it 'returns the migration minimum value' do
expect(batched_migration.next_min_value).to eq(batched_migration.min_value)
end
end
end
describe '#job_class' do
let(:job_class) { Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJob }
let(:batched_migration) { build(:batched_background_migration) }
it 'returns the class of the job for the migration' do
expect(batched_migration.job_class).to eq(job_class)
end
end
describe '#batch_class' do
let(:batch_class) { Gitlab::Database::BackgroundMigration::PrimaryKeyBatchingStrategy}
let(:batched_migration) { build(:batched_background_migration) }
it 'returns the class of the batch strategy for the migration' do
expect(batched_migration.batch_class).to eq(batch_class)
end
end
shared_examples_for 'an attr_writer that normalizes assigned class names' do |attribute_name|
let(:batched_migration) { build(:batched_background_migration) }
context 'when the toplevel namespace prefix exists' do
it 'removes the leading prefix' do
batched_migration.public_send(:"#{attribute_name}=", '::Foo::Bar')
expect(batched_migration[attribute_name]).to eq('Foo::Bar')
end
end
context 'when the toplevel namespace prefix does not exist' do
it 'does not change the given class name' do
batched_migration.public_send(:"#{attribute_name}=", '::Foo::Bar')
expect(batched_migration[attribute_name]).to eq('Foo::Bar')
end
end
end
describe '#job_class_name=' do
it_behaves_like 'an attr_writer that normalizes assigned class names', :job_class_name
end
describe '#batch_class_name=' do
it_behaves_like 'an attr_writer that normalizes assigned class names', :batch_class_name
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '#perform' do
let(:migration_wrapper) { described_class.new }
let(:job_class) { Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJob }
let_it_be(:active_migration) { create(:batched_background_migration, :active, job_arguments: [:id, :other_id]) }
let!(:job_record) { create(:batched_background_migration_job, batched_migration: active_migration) }
it 'runs the migration job' do
expect_next_instance_of(job_class) do |job_instance|
expect(job_instance).to receive(:perform).with(1, 10, 'events', 'id', 1, 'id', 'other_id')
end
migration_wrapper.perform(job_record)
end
it 'updates the the tracking record in the database' do
expect(job_record).to receive(:update!).with(hash_including(attempts: 1, status: :running)).and_call_original
freeze_time do
migration_wrapper.perform(job_record)
reloaded_job_record = job_record.reload
expect(reloaded_job_record).not_to be_pending
expect(reloaded_job_record.attempts).to eq(1)
expect(reloaded_job_record.started_at).to eq(Time.current)
end
end
context 'when the migration job does not raise an error' do
it 'marks the tracking record as succeeded' do
expect_next_instance_of(job_class) do |job_instance|
expect(job_instance).to receive(:perform).with(1, 10, 'events', 'id', 1, 'id', 'other_id')
end
freeze_time do
migration_wrapper.perform(job_record)
reloaded_job_record = job_record.reload
expect(reloaded_job_record).to be_succeeded
expect(reloaded_job_record.finished_at).to eq(Time.current)
end
end
end
context 'when the migration job raises an error' do
it 'marks the tracking record as failed before raising the error' do
expect_next_instance_of(job_class) do |job_instance|
expect(job_instance).to receive(:perform)
.with(1, 10, 'events', 'id', 1, 'id', 'other_id')
.and_raise(RuntimeError, 'Something broke!')
end
freeze_time do
expect { migration_wrapper.perform(job_record) }.to raise_error(RuntimeError, 'Something broke!')
reloaded_job_record = job_record.reload
expect(reloaded_job_record).to be_failed
expect(reloaded_job_record.finished_at).to eq(Time.current)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::BackgroundMigration::PrimaryKeyBatchingStrategy, '#next_batch' do
let(:batching_strategy) { described_class.new }
let_it_be(:event1) { create(:event) }
let_it_be(:event2) { create(:event) }
let_it_be(:event3) { create(:event) }
let_it_be(:event4) { create(:event) }
context 'when starting on the first batch' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(:events, :id, batch_min_value: event1.id, batch_size: 3)
expect(batch_bounds).to eq([event1.id, event3.id])
end
end
context 'when additional batches remain' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(:events, :id, batch_min_value: event2.id, batch_size: 3)
expect(batch_bounds).to eq([event2.id, event4.id])
end
end
context 'when on the final batch' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(:events, :id, batch_min_value: event4.id, batch_size: 3)
expect(batch_bounds).to eq([event4.id, event4.id])
end
end
context 'when no additional batches remain' do
it 'returns nil' do
batch_bounds = batching_strategy.next_batch(:events, :id, batch_min_value: event4.id + 1, batch_size: 1)
expect(batch_bounds).to be_nil
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::BackgroundMigration::Scheduler, '#perform' do
let(:scheduler) { described_class.new }
shared_examples_for 'it has no jobs to run' do
it 'does not create and run a migration job' do
test_wrapper = double('test wrapper')
expect(test_wrapper).not_to receive(:perform)
expect do
scheduler.perform(migration_wrapper: test_wrapper)
end.not_to change { Gitlab::Database::BackgroundMigration::BatchedJob.count }
end
end
context 'when there are no active migrations' do
let!(:migration) { create(:batched_background_migration, :finished) }
it_behaves_like 'it has no jobs to run'
end
shared_examples_for 'it has completed the migration' do
it 'marks the migration as finished' do
relation = Gitlab::Database::BackgroundMigration::BatchedMigration.finished.where(id: first_migration.id)
expect { scheduler.perform }.to change { relation.count }.by(1)
end
end
context 'when there are active migrations' do
let!(:first_migration) { create(:batched_background_migration, :active, batch_size: 2) }
let!(:last_migration) { create(:batched_background_migration, :active) }
let(:job_relation) do
Gitlab::Database::BackgroundMigration::BatchedJob.where(batched_background_migration_id: first_migration.id)
end
context 'when the migration interval has not elapsed' do
before do
expect_next_found_instance_of(Gitlab::Database::BackgroundMigration::BatchedMigration) do |migration|
expect(migration).to receive(:interval_elapsed?).and_return(false)
end
end
it_behaves_like 'it has no jobs to run'
end
context 'when the interval has elapsed' do
before do
expect_next_found_instance_of(Gitlab::Database::BackgroundMigration::BatchedMigration) do |migration|
expect(migration).to receive(:interval_elapsed?).and_return(true)
end
end
context 'when the first migration has no previous jobs' do
context 'when the migration has batches to process' do
let!(:event1) { create(:event) }
let!(:event2) { create(:event) }
let!(:event3) { create(:event) }
it 'runs the job for the first batch' do
first_migration.update!(min_value: event1.id, max_value: event3.id)
expect_next_instance_of(Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper) do |wrapper|
expect(wrapper).to receive(:perform).and_wrap_original do |_, job_record|
expect(job_record).to eq(job_relation.first)
end
end
expect { scheduler.perform }.to change { job_relation.count }.by(1)
expect(job_relation.first).to have_attributes(
min_value: event1.id,
max_value: event2.id,
batch_size: first_migration.batch_size,
sub_batch_size: first_migration.sub_batch_size)
end
end
context 'when the migration has no batches to process' do
it_behaves_like 'it has no jobs to run'
it_behaves_like 'it has completed the migration'
end
end
context 'when the first migration has previous jobs' do
let!(:event1) { create(:event) }
let!(:event2) { create(:event) }
let!(:event3) { create(:event) }
let!(:previous_job) do
create(:batched_background_migration_job,
batched_migration: first_migration,
min_value: event1.id,
max_value: event2.id,
batch_size: 2,
sub_batch_size: 1)
end
context 'when the migration is ready to process another job' do
it 'runs the migration job for the next batch' do
first_migration.update!(min_value: event1.id, max_value: event3.id)
expect_next_instance_of(Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper) do |wrapper|
expect(wrapper).to receive(:perform).and_wrap_original do |_, job_record|
expect(job_record).to eq(job_relation.last)
end
end
expect { scheduler.perform }.to change { job_relation.count }.by(1)
expect(job_relation.last).to have_attributes(
min_value: event3.id,
max_value: event3.id,
batch_size: first_migration.batch_size,
sub_batch_size: first_migration.sub_batch_size)
end
end
context 'when the migration has no batches remaining' do
let!(:final_job) do
create(:batched_background_migration_job,
batched_migration: first_migration,
min_value: event3.id,
max_value: event3.id,
batch_size: 2,
sub_batch_size: 1)
end
it_behaves_like 'it has no jobs to run'
it_behaves_like 'it has completed the migration'
end
end
context 'when the bounds of the next batch exceed the migration maximum value' do
let!(:events) { create_list(:event, 3) }
let(:event1) { events[0] }
let(:event2) { events[1] }
context 'when the batch maximum exceeds the migration maximum' do
it 'clamps the batch maximum to the migration maximum' do
first_migration.update!(batch_size: 5, min_value: event1.id, max_value: event2.id)
expect_next_instance_of(Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper) do |wrapper|
expect(wrapper).to receive(:perform)
end
expect { scheduler.perform }.to change { job_relation.count }.by(1)
expect(job_relation.first).to have_attributes(
min_value: event1.id,
max_value: event2.id,
batch_size: first_migration.batch_size,
sub_batch_size: first_migration.sub_batch_size)
end
end
context 'when the batch minimum exceeds the migration maximum' do
let!(:previous_job) do
create(:batched_background_migration_job,
batched_migration: first_migration,
min_value: event1.id,
max_value: event2.id,
batch_size: 5,
sub_batch_size: 1)
end
before do
first_migration.update!(batch_size: 5, min_value: 1, max_value: event2.id)
end
it_behaves_like 'it has no jobs to run'
it_behaves_like 'it has completed the migration'
end
end
end
end
end
...@@ -1720,7 +1720,7 @@ RSpec.describe Gitlab::Database::MigrationHelpers do ...@@ -1720,7 +1720,7 @@ RSpec.describe Gitlab::Database::MigrationHelpers do
.with( .with(
2.minutes, 2.minutes,
'CopyColumnUsingBackgroundMigrationJob', 'CopyColumnUsingBackgroundMigrationJob',
[event.id, event.id, :events, :id, :id, 'id_convert_to_bigint', 100] [event.id, event.id, :events, :id, 100, :id, 'id_convert_to_bigint']
) )
expect(Gitlab::BackgroundMigration) expect(Gitlab::BackgroundMigration)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment