Commit 6cdba731 authored by Sean McGivern's avatar Sean McGivern

Merge branch '338004-next-batch-not-null-strategy' into 'master'

Makes BackfillWorkItemTypeIdOnIssues migration more efficient

See merge request gitlab-org/gitlab!84213
parents 9c2e405f 10ba7bc3
......@@ -2,6 +2,7 @@
class BackfillWorkItemTypeIdOnIssues < Gitlab::Database::Migration[1.0]
MIGRATION = 'BackfillWorkItemTypeIdForIssues'
BATCH_CLASS_NAME = 'BackfillIssueWorkItemTypeBatchingStrategy'
BATCH_SIZE = 10_000
MAX_BATCH_SIZE = 30_000
SUB_BATCH_SIZE = 100
......@@ -27,7 +28,8 @@ class BackfillWorkItemTypeIdOnIssues < Gitlab::Database::Migration[1.0]
job_interval: INTERVAL,
batch_size: BATCH_SIZE,
max_batch_size: MAX_BATCH_SIZE,
sub_batch_size: SUB_BATCH_SIZE
sub_batch_size: SUB_BATCH_SIZE,
batch_class_name: BATCH_CLASS_NAME
)
end
end
......
# frozen_string_literal: true
class ReplaceWorkItemTypeBackfillNextBatchStrategy < Gitlab::Database::Migration[1.0]
JOB_CLASS_NAME = 'BackfillWorkItemTypeIdForIssues'
NEW_STRATEGY_CLASS = 'BackfillIssueWorkItemTypeBatchingStrategy'
OLD_STRATEGY_CLASS = 'PrimaryKeyBatchingStrategy'
class InlineBatchedMigration < ApplicationRecord
self.table_name = :batched_background_migrations
end
def up
InlineBatchedMigration.where(job_class_name: JOB_CLASS_NAME)
.update_all(batch_class_name: NEW_STRATEGY_CLASS)
end
def down
InlineBatchedMigration.where(job_class_name: JOB_CLASS_NAME)
.update_all(batch_class_name: OLD_STRATEGY_CLASS)
end
end
7897da66c2a941a6a09db6f62fa9069caef235603663077e5dd342a72ac5c7c3
\ No newline at end of file
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
module BatchingStrategies
# Batching class to use for back-filling issue's work_item_type_id for a single issue type.
# Batches will be scoped to records where the foreign key is NULL and only of a given issue type
#
# If no more batches exist in the table, returns nil.
class BackfillIssueWorkItemTypeBatchingStrategy < PrimaryKeyBatchingStrategy
def apply_additional_filters(relation, job_arguments:)
issue_type = job_arguments.first
relation.where(issue_type: issue_type)
end
end
end
end
end
......@@ -23,7 +23,7 @@ module Gitlab
quoted_column_name = model_class.connection.quote_column_name(column_name)
relation = model_class.where("#{quoted_column_name} >= ?", batch_min_value)
relation = apply_additional_filters(relation)
relation = apply_additional_filters(relation, job_arguments: job_arguments)
next_batch_bounds = nil
relation.each_batch(of: batch_size, column: column_name) do |batch| # rubocop:disable Lint/UnreachableLoop
......@@ -40,12 +40,14 @@ module Gitlab
#
# Example:
#
# class TypeIsNotNull < PrimaryKeyBatchingStrategy
# def apply_additional_filters(relation)
# relation.where.not(type: nil)
# class MatchingType < PrimaryKeyBatchingStrategy
# def apply_additional_filters(relation, job_arguments:)
# type = job_arguments.first
#
# relation.where(type: type)
# end
# end
def apply_additional_filters(relation)
def apply_additional_filters(relation, job_arguments: [])
relation
end
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::BackfillIssueWorkItemTypeBatchingStrategy,
'#next_batch' do
# let! can't be used in migration specs because all tables but `work_item_types` are deleted after each spec
let!(:issue_type_enum) { { issue: 0, incident: 1, test_case: 2, requirement: 3, task: 4 } }
let!(:namespace) { table(:namespaces).create!(name: 'namespace', path: 'namespace') }
let!(:project) { table(:projects).create!(namespace_id: namespace.id) }
let!(:issues_table) { table(:issues) }
let!(:task_type) { table(:work_item_types).find_by!(namespace_id: nil, base_type: issue_type_enum[:task]) }
let!(:issue1) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:issue]) }
let!(:task1) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:task]) }
let!(:issue2) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:issue]) }
let!(:issue3) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:issue]) }
let!(:task2) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:task]) }
let!(:incident1) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:incident]) }
# test_case is EE only, but enum values exist on the FOSS model
let!(:test_case1) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:test_case]) }
let!(:task3) do
issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:task], work_item_type_id: task_type.id)
end
let!(:task4) { issues_table.create!(project_id: project.id, issue_type: issue_type_enum[:task]) }
let!(:batching_strategy) { described_class.new(connection: ActiveRecord::Base.connection) }
context 'when issue_type is issue' do
let(:job_arguments) { [issue_type_enum[:issue], 'irrelevant_work_item_id'] }
context 'when starting on the first batch' do
it 'returns the bounds of the next batch' do
batch_bounds = next_batch(issue1.id, 2)
expect(batch_bounds).to match_array([issue1.id, issue2.id])
end
end
context 'when additional batches remain' do
it 'returns the bounds of the next batch' do
batch_bounds = next_batch(issue2.id, 2)
expect(batch_bounds).to match_array([issue2.id, issue3.id])
end
end
context 'when on the final batch' do
it 'returns the bounds of the next batch' do
batch_bounds = next_batch(issue3.id, 2)
expect(batch_bounds).to match_array([issue3.id, issue3.id])
end
end
context 'when no additional batches remain' do
it 'returns nil' do
batch_bounds = next_batch(issue3.id + 1, 1)
expect(batch_bounds).to be_nil
end
end
end
context 'when issue_type is incident' do
let(:job_arguments) { [issue_type_enum[:incident], 'irrelevant_work_item_id'] }
context 'when starting on the first batch' do
it 'returns the bounds of the next batch with only one element' do
batch_bounds = next_batch(incident1.id, 2)
expect(batch_bounds).to match_array([incident1.id, incident1.id])
end
end
end
context 'when issue_type is requirement and there are no matching records' do
let(:job_arguments) { [issue_type_enum[:requirement], 'irrelevant_work_item_id'] }
context 'when starting on the first batch' do
it 'returns nil' do
batch_bounds = next_batch(1, 2)
expect(batch_bounds).to be_nil
end
end
end
context 'when issue_type is task' do
let(:job_arguments) { [issue_type_enum[:task], 'irrelevant_work_item_id'] }
context 'when starting on the first batch' do
it 'returns the bounds of the next batch' do
batch_bounds = next_batch(task1.id, 2)
expect(batch_bounds).to match_array([task1.id, task2.id])
end
end
context 'when additional batches remain' do
it 'returns the bounds of the next batch, does not skip records where FK is already set' do
batch_bounds = next_batch(task2.id, 2)
expect(batch_bounds).to match_array([task2.id, task3.id])
end
end
context 'when on the final batch' do
it 'returns the bounds of the next batch' do
batch_bounds = next_batch(task4.id, 2)
expect(batch_bounds).to match_array([task4.id, task4.id])
end
end
context 'when no additional batches remain' do
it 'returns nil' do
batch_bounds = next_batch(task4.id + 1, 1)
expect(batch_bounds).to be_nil
end
end
end
def next_batch(min_value, batch_size)
batching_strategy.next_batch(
:issues,
:id,
batch_min_value: min_value,
batch_size: batch_size,
job_arguments: job_arguments
)
end
end
......@@ -15,7 +15,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'when starting on the first batch' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace1.id, batch_size: 3, job_arguments: nil)
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace1.id, batch_size: 3, job_arguments: [])
expect(batch_bounds).to eq([namespace1.id, namespace3.id])
end
......@@ -23,7 +23,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'when additional batches remain' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace2.id, batch_size: 3, job_arguments: nil)
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace2.id, batch_size: 3, job_arguments: [])
expect(batch_bounds).to eq([namespace2.id, namespace4.id])
end
......@@ -31,7 +31,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'when on the final batch' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id, batch_size: 3, job_arguments: nil)
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id, batch_size: 3, job_arguments: [])
expect(batch_bounds).to eq([namespace4.id, namespace4.id])
end
......@@ -39,7 +39,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'when no additional batches remain' do
it 'returns nil' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id + 1, batch_size: 1, job_arguments: nil)
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id + 1, batch_size: 1, job_arguments: [])
expect(batch_bounds).to be_nil
end
......@@ -48,8 +48,10 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'additional filters' do
let(:strategy_with_filters) do
Class.new(described_class) do
def apply_additional_filters(relation)
relation.where.not(type: 'Project')
def apply_additional_filters(relation, job_arguments:)
min_id = job_arguments.first
relation.where.not(type: 'Project').where('id >= ?', min_id)
end
end
end
......@@ -58,7 +60,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
let!(:namespace5) { namespaces.create!(name: 'batchtest5', path: 'batch-test5', type: 'Project') }
it 'applies additional filters' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id, batch_size: 3, job_arguments: nil)
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id, batch_size: 3, job_arguments: [1])
expect(batch_bounds).to eq([namespace4.id, namespace4.id])
end
......
......@@ -32,7 +32,8 @@ RSpec.describe BackfillWorkItemTypeIdOnIssues, :migration do
interval: interval,
batch_size: described_class::BATCH_SIZE,
max_batch_size: described_class::MAX_BATCH_SIZE,
sub_batch_size: described_class::SUB_BATCH_SIZE
sub_batch_size: described_class::SUB_BATCH_SIZE,
batch_class_name: described_class::BATCH_CLASS_NAME
)
end
end
......
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe ReplaceWorkItemTypeBackfillNextBatchStrategy, :migration do
describe '#up' do
it 'sets the new strategy for existing migrations' do
migrations = create_migrations(described_class::OLD_STRATEGY_CLASS, 2)
expect do
migrate!
migrations.each(&:reload)
end.to change { migrations.pluck(:batch_class_name).uniq }.from([described_class::OLD_STRATEGY_CLASS])
.to([described_class::NEW_STRATEGY_CLASS])
end
end
describe '#down' do
it 'sets the old strategy for existing migrations' do
migrations = create_migrations(described_class::NEW_STRATEGY_CLASS, 2)
expect do
migrate!
schema_migrate_down!
migrations.each(&:reload)
end.to change { migrations.pluck(:batch_class_name).uniq }.from([described_class::NEW_STRATEGY_CLASS])
.to([described_class::OLD_STRATEGY_CLASS])
end
end
def create_migrations(batch_class_name, count)
Array.new(2) { |index| create_background_migration(batch_class_name, [index]) }
end
def create_background_migration(batch_class_name, job_arguments)
migrations_table = table(:batched_background_migrations)
migrations_table.create!(
batch_class_name: batch_class_name,
job_class_name: described_class::JOB_CLASS_NAME,
max_value: 10,
batch_size: 5,
sub_batch_size: 1,
interval: 2.minutes,
table_name: :issues,
column_name: :id,
total_tuple_count: 10_000,
pause_ms: 100,
job_arguments: job_arguments
)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment