Commit b8cab3e8 authored by Alexandru Croitor's avatar Alexandru Croitor Committed by Jan Provaznik

Add different batching strategy

Use a different batching strategy for backfilling project
namespaces for a single group.
parent b88c1e81
...@@ -3,23 +3,23 @@ ...@@ -3,23 +3,23 @@
class BackfillProjectNamespacesForGroup < Gitlab::Database::Migration[1.0] class BackfillProjectNamespacesForGroup < Gitlab::Database::Migration[1.0]
MIGRATION = 'ProjectNamespaces::BackfillProjectNamespaces' MIGRATION = 'ProjectNamespaces::BackfillProjectNamespaces'
DELAY_INTERVAL = 2.minutes DELAY_INTERVAL = 2.minutes
GROUP_ID = 9970 # pick a test group id here GROUP_ID = 9970 # picking gitlab-org group.
disable_ddl_transaction! disable_ddl_transaction!
def up def up
# return unless Gitlab.com? return unless Gitlab.com?
projects_table = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models::Project.arel_table projects_table = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models::Project.arel_table
hierarchy_cte_sql = Arel::Nodes::SqlLiteral.new(::Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNamespaces.hierarchy_cte(GROUP_ID)) hierarchy_cte_sql = Arel.sql(::Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNamespaces.hierarchy_cte(GROUP_ID))
group_projects = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models::Project.where(projects_table[:namespace_id].in(hierarchy_cte_sql)) group_projects = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models::Project.where(projects_table[:namespace_id].in(hierarchy_cte_sql))
min_id = group_projects&.minimum(:id) min_id = group_projects.minimum(:id)
max_id = group_projects&.maximum(:id) max_id = group_projects.maximum(:id)
return if min_id.blank? || max_id.blank? return if min_id.blank? || max_id.blank?
migration = queue_batched_background_migration( queue_batched_background_migration(
MIGRATION, MIGRATION,
:projects, :projects,
:id, :id,
...@@ -28,14 +28,13 @@ class BackfillProjectNamespacesForGroup < Gitlab::Database::Migration[1.0] ...@@ -28,14 +28,13 @@ class BackfillProjectNamespacesForGroup < Gitlab::Database::Migration[1.0]
job_interval: DELAY_INTERVAL, job_interval: DELAY_INTERVAL,
batch_min_value: min_id, batch_min_value: min_id,
batch_max_value: max_id, batch_max_value: max_id,
sub_batch_size: 50 sub_batch_size: 25,
batch_class_name: 'BackfillProjectNamespacePerGroupBatchingStrategy'
) )
Gitlab::Database::BackgroundMigration::BatchedMigrationRunner.new.run_entire_migration(migration)
end end
def down def down
# return unless Gitlab.com? return unless Gitlab.com?
Gitlab::Database::BackgroundMigration::BatchedMigration Gitlab::Database::BackgroundMigration::BatchedMigration
.for_configuration(MIGRATION, :projects, :id, [GROUP_ID, 'up']).delete_all .for_configuration(MIGRATION, :projects, :id, [GROUP_ID, 'up']).delete_all
......
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
module BatchingStrategies
# Batching class to use for back-filling project namespaces for a single group.
# Batches over the projects table and id column combination, scoped to a given group returning the MIN() and MAX()
# values for the next batch as an array.
#
# If no more batches exist in the table, returns nil.
class BackfillProjectNamespacePerGroupBatchingStrategy < PrimaryKeyBatchingStrategy
# Finds and returns the next batch in the table.
#
# batch_min_value - The minimum value which the next batch will start at
# batch_size - The size of the next batch
def next_batch(batch_min_value:, batch_size:)
next_batch_bounds = nil
model_class = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models::Project
quoted_column_name = model_class.connection.quote_column_name(batched_migration.column_name)
projects_table = model_class.arel_table
hierarchy_cte_sql = Arel::Nodes::SqlLiteral.new(::Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNamespaces.hierarchy_cte(batched_migration.job_arguments.first))
relation = model_class.where(projects_table[:namespace_id].in(hierarchy_cte_sql)).where("#{quoted_column_name} >= ?", batch_min_value)
relation.each_batch(of: batch_size, column: batched_migration.column_name) do |batch| # rubocop:disable Lint/UnreachableLoop
next_batch_bounds = batch.pluck(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})")).first
break
end
next_batch_bounds
end
end
end
end
end
...@@ -11,20 +11,22 @@ module Gitlab ...@@ -11,20 +11,22 @@ module Gitlab
class PrimaryKeyBatchingStrategy class PrimaryKeyBatchingStrategy
include Gitlab::Database::DynamicModelHelpers include Gitlab::Database::DynamicModelHelpers
def initialize(batched_migration)
@batched_migration = batched_migration
end
# Finds and returns the next batch in the table. # Finds and returns the next batch in the table.
# #
# table_name - The table to batch over
# column_name - The column to batch over
# batch_min_value - The minimum value which the next batch will start at # batch_min_value - The minimum value which the next batch will start at
# batch_size - The size of the next batch # batch_size - The size of the next batch
def next_batch(table_name, column_name, batch_min_value:, batch_size:) def next_batch(batch_min_value:, batch_size:)
model_class = define_batchable_model(table_name) model_class = define_batchable_model(batched_migration.table_name)
quoted_column_name = model_class.connection.quote_column_name(column_name) quoted_column_name = model_class.connection.quote_column_name(batched_migration.column_name)
relation = model_class.where("#{quoted_column_name} >= ?", batch_min_value) relation = model_class.where("#{quoted_column_name} >= ?", batch_min_value)
next_batch_bounds = nil next_batch_bounds = nil
relation.each_batch(of: batch_size, column: column_name) do |batch| # rubocop:disable Lint/UnreachableLoop relation.each_batch(of: batch_size, column: batched_migration.column_name) do |batch| # rubocop:disable Lint/UnreachableLoop
next_batch_bounds = batch.pluck(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})")).first next_batch_bounds = batch.pluck(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})")).first
break break
...@@ -32,6 +34,10 @@ module Gitlab ...@@ -32,6 +34,10 @@ module Gitlab
next_batch_bounds next_batch_bounds
end end
private
attr_reader :batched_migration
end end
end end
end end
......
...@@ -51,10 +51,7 @@ module Gitlab ...@@ -51,10 +51,7 @@ module Gitlab
raise 'Job cannot be split further' if new_batch_size < 1 raise 'Job cannot be split further' if new_batch_size < 1
batching_strategy = batched_migration.batch_class.new next_batch_bounds = batched_migration.batching_strategy.next_batch(
next_batch_bounds = batching_strategy.next_batch(
batched_migration.table_name,
batched_migration.column_name,
batch_min_value: min_value, batch_min_value: min_value,
batch_size: new_batch_size batch_size: new_batch_size
) )
......
...@@ -93,6 +93,10 @@ module Gitlab ...@@ -93,6 +93,10 @@ module Gitlab
"#{BATCH_CLASS_MODULE}::#{batch_class_name}".constantize "#{BATCH_CLASS_MODULE}::#{batch_class_name}".constantize
end end
def batching_strategy
@batching_strategy ||= batch_class.new(self)
end
def job_class_name=(class_name) def job_class_name=(class_name)
write_attribute(:job_class_name, class_name.delete_prefix("::")) write_attribute(:job_class_name, class_name.delete_prefix("::"))
end end
...@@ -133,7 +137,7 @@ module Gitlab ...@@ -133,7 +137,7 @@ module Gitlab
end end
def optimize! def optimize!
BatchOptimizer.new(self).optimize! ::Gitlab::Database::BackgroundMigration::BatchOptimizer.new(self).optimize!
end end
private private
......
...@@ -88,14 +88,12 @@ module Gitlab ...@@ -88,14 +88,12 @@ module Gitlab
end end
def find_next_batch_range(active_migration) def find_next_batch_range(active_migration)
batching_strategy = active_migration.batch_class.new
batch_min_value = active_migration.next_min_value batch_min_value = active_migration.next_min_value
next_batch_bounds = batching_strategy.next_batch( next_batch_bounds = active_migration.batching_strategy.next_batch(
active_migration.table_name,
active_migration.column_name,
batch_min_value: batch_min_value, batch_min_value: batch_min_value,
batch_size: active_migration.batch_size) batch_size: active_migration.batch_size
)
return if next_batch_bounds.nil? return if next_batch_bounds.nil?
......
...@@ -56,7 +56,7 @@ RSpec.describe "Admin > Admin sees background migrations" do ...@@ -56,7 +56,7 @@ RSpec.describe "Admin > Admin sees background migrations" do
context 'when there are failed migrations' do context 'when there are failed migrations' do
before do before do
allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class| allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class|
allow(batch_class).to receive(:next_batch).with(anything, anything, batch_min_value: 6, batch_size: 5).and_return([6, 10]) allow(batch_class).to receive(:next_batch).with(batch_min_value: 6, batch_size: 5).and_return([6, 10])
end end
end end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::BackfillProjectNamespacePerGroupBatchingStrategy, '#next_batch' do
let!(:namespaces) { table(:namespaces) }
let!(:projects) { table(:projects) }
let!(:background_migrations) { table(:batched_background_migrations) }
let!(:namespace1) { namespaces.create!(name: 'batchtest1', type: 'Group', path: 'batch-test1') }
let!(:namespace2) { namespaces.create!(name: 'batchtest2', type: 'Group', parent_id: namespace1.id, path: 'batch-test2') }
let!(:namespace3) { namespaces.create!(name: 'batchtest3', type: 'Group', parent_id: namespace2.id, path: 'batch-test3') }
let!(:project1) { projects.create!(name: 'project1', path: 'project1', namespace_id: namespace1.id, visibility_level: 20) }
let!(:project2) { projects.create!(name: 'project2', path: 'project2', namespace_id: namespace2.id, visibility_level: 20) }
let!(:project3) { projects.create!(name: 'project3', path: 'project3', namespace_id: namespace3.id, visibility_level: 20) }
let!(:project4) { projects.create!(name: 'project4', path: 'project4', namespace_id: namespace3.id, visibility_level: 20) }
let!(:batched_migration) do
background_migrations.create!(
created_at: Time.current,
updated_at: Time.current,
min_value: 1,
max_value: Project.maximum(:id),
batch_size: 1_000,
sub_batch_size: 100,
interval: 120,
status: 0,
job_class_name: 'ProjectNamespaces::BackfillProjectNamespaces',
table_name: :projects,
column_name: :id,
total_tuple_count: nil,
job_arguments: [namespace1.id, 'up']
)
end
let!(:batching_strategy) { described_class.new(batched_migration) }
context 'when starting on the first batch' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(batch_min_value: project1.id, batch_size: 3)
expect(batch_bounds).to match_array([project1.id, project3.id])
end
end
context 'when additional batches remain' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(batch_min_value: project2.id, batch_size: 3)
expect(batch_bounds).to match_array([project2.id, project4.id])
end
end
context 'when on the final batch' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(batch_min_value: project4.id, batch_size: 3)
expect(batch_bounds).to match_array([project4.id, project4.id])
end
end
context 'when no additional batches remain' do
it 'returns nil' do
batch_bounds = batching_strategy.next_batch(batch_min_value: project4.id + 1, batch_size: 1)
expect(batch_bounds).to be_nil
end
end
end
...@@ -3,17 +3,35 @@ ...@@ -3,17 +3,35 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy, '#next_batch' do RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy, '#next_batch' do
let(:batching_strategy) { described_class.new }
let(:namespaces) { table(:namespaces) } let(:namespaces) { table(:namespaces) }
let(:background_migrations) { table(:batched_background_migrations) }
let(:batching_strategy) { described_class.new(batched_migration) }
let!(:namespace1) { namespaces.create!(name: 'batchtest1', path: 'batch-test1') } let!(:namespace1) { namespaces.create!(name: 'batchtest1', path: 'batch-test1') }
let!(:namespace2) { namespaces.create!(name: 'batchtest2', path: 'batch-test2') } let!(:namespace2) { namespaces.create!(name: 'batchtest2', path: 'batch-test2') }
let!(:namespace3) { namespaces.create!(name: 'batchtest3', path: 'batch-test3') } let!(:namespace3) { namespaces.create!(name: 'batchtest3', path: 'batch-test3') }
let!(:namespace4) { namespaces.create!(name: 'batchtest4', path: 'batch-test4') } let!(:namespace4) { namespaces.create!(name: 'batchtest4', path: 'batch-test4') }
let!(:batched_migration) do
background_migrations.create!(
created_at: Time.current,
updated_at: Time.current,
min_value: 1,
max_value: 10_000,
batch_size: 1_000,
sub_batch_size: 100,
interval: 120,
status: 0,
job_class_name: 'Foo',
table_name: :namespaces,
column_name: :id,
total_tuple_count: nil
)
end
context 'when starting on the first batch' do context 'when starting on the first batch' do
it 'returns the bounds of the next batch' do it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace1.id, batch_size: 3) batch_bounds = batching_strategy.next_batch(batch_min_value: namespace1.id, batch_size: 3)
expect(batch_bounds).to eq([namespace1.id, namespace3.id]) expect(batch_bounds).to eq([namespace1.id, namespace3.id])
end end
...@@ -21,7 +39,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi ...@@ -21,7 +39,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'when additional batches remain' do context 'when additional batches remain' do
it 'returns the bounds of the next batch' do it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace2.id, batch_size: 3) batch_bounds = batching_strategy.next_batch(batch_min_value: namespace2.id, batch_size: 3)
expect(batch_bounds).to eq([namespace2.id, namespace4.id]) expect(batch_bounds).to eq([namespace2.id, namespace4.id])
end end
...@@ -29,7 +47,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi ...@@ -29,7 +47,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'when on the final batch' do context 'when on the final batch' do
it 'returns the bounds of the next batch' do it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id, batch_size: 3) batch_bounds = batching_strategy.next_batch(batch_min_value: namespace4.id, batch_size: 3)
expect(batch_bounds).to eq([namespace4.id, namespace4.id]) expect(batch_bounds).to eq([namespace4.id, namespace4.id])
end end
...@@ -37,7 +55,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi ...@@ -37,7 +55,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'when no additional batches remain' do context 'when no additional batches remain' do
it 'returns nil' do it 'returns nil' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id + 1, batch_size: 1) batch_bounds = batching_strategy.next_batch(batch_min_value: namespace4.id + 1, batch_size: 1)
expect(batch_bounds).to be_nil expect(batch_bounds).to be_nil
end end
......
...@@ -135,7 +135,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedJob, type: :model d ...@@ -135,7 +135,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedJob, type: :model d
context 'when job can be split' do context 'when job can be split' do
before do before do
allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class| allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class|
allow(batch_class).to receive(:next_batch).with(anything, anything, batch_min_value: 6, batch_size: 5).and_return([6, 10]) allow(batch_class).to receive(:next_batch).with(batch_min_value: 6, batch_size: 5).and_return([6, 10])
end end
end end
...@@ -195,7 +195,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedJob, type: :model d ...@@ -195,7 +195,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedJob, type: :model d
context 'when computed midpoint is larger than the max value of the batch' do context 'when computed midpoint is larger than the max value of the batch' do
before do before do
allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class| allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class|
allow(batch_class).to receive(:next_batch).with(anything, anything, batch_min_value: 6, batch_size: 5).and_return([6, 16]) allow(batch_class).to receive(:next_batch).with(batch_min_value: 6, batch_size: 5).and_return([6, 16])
end end
end end
......
...@@ -272,7 +272,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigration, type: :m ...@@ -272,7 +272,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigration, type: :m
before do before do
allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class| allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class|
allow(batch_class).to receive(:next_batch).with(anything, anything, batch_min_value: 6, batch_size: 5).and_return([6, 10]) allow(batch_class).to receive(:next_batch).with(batch_min_value: 6, batch_size: 5).and_return([6, 10])
end end
end end
......
...@@ -16,7 +16,7 @@ RSpec.describe Admin::BackgroundMigrationsController, :enable_admin_mode do ...@@ -16,7 +16,7 @@ RSpec.describe Admin::BackgroundMigrationsController, :enable_admin_mode do
create(:batched_background_migration_job, batched_migration: migration, batch_size: 10, min_value: 6, max_value: 15, status: :failed, attempts: 3) create(:batched_background_migration_job, batched_migration: migration, batch_size: 10, min_value: 6, max_value: 15, status: :failed, attempts: 3)
allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class| allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class|
allow(batch_class).to receive(:next_batch).with(anything, anything, batch_min_value: 6, batch_size: 5).and_return([6, 10]) allow(batch_class).to receive(:next_batch).with(batch_min_value: 6, batch_size: 5).and_return([6, 10])
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment