Commit b8cab3e8 authored by Alexandru Croitor's avatar Alexandru Croitor Committed by Jan Provaznik

Add different batching strategy

Use a different batching strategy for backfilling project
namespaces for a single group.
parent b88c1e81
......@@ -3,23 +3,23 @@
class BackfillProjectNamespacesForGroup < Gitlab::Database::Migration[1.0]
MIGRATION = 'ProjectNamespaces::BackfillProjectNamespaces'
DELAY_INTERVAL = 2.minutes
GROUP_ID = 9970 # pick a test group id here
GROUP_ID = 9970 # picking gitlab-org group.
disable_ddl_transaction!
def up
# return unless Gitlab.com?
return unless Gitlab.com?
projects_table = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models::Project.arel_table
hierarchy_cte_sql = Arel::Nodes::SqlLiteral.new(::Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNamespaces.hierarchy_cte(GROUP_ID))
hierarchy_cte_sql = Arel.sql(::Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNamespaces.hierarchy_cte(GROUP_ID))
group_projects = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models::Project.where(projects_table[:namespace_id].in(hierarchy_cte_sql))
min_id = group_projects&.minimum(:id)
max_id = group_projects&.maximum(:id)
min_id = group_projects.minimum(:id)
max_id = group_projects.maximum(:id)
return if min_id.blank? || max_id.blank?
migration = queue_batched_background_migration(
queue_batched_background_migration(
MIGRATION,
:projects,
:id,
......@@ -28,14 +28,13 @@ class BackfillProjectNamespacesForGroup < Gitlab::Database::Migration[1.0]
job_interval: DELAY_INTERVAL,
batch_min_value: min_id,
batch_max_value: max_id,
sub_batch_size: 50
sub_batch_size: 25,
batch_class_name: 'BackfillProjectNamespacePerGroupBatchingStrategy'
)
Gitlab::Database::BackgroundMigration::BatchedMigrationRunner.new.run_entire_migration(migration)
end
def down
# return unless Gitlab.com?
return unless Gitlab.com?
Gitlab::Database::BackgroundMigration::BatchedMigration
.for_configuration(MIGRATION, :projects, :id, [GROUP_ID, 'up']).delete_all
......
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
module BatchingStrategies
# Batching class to use for back-filling project namespaces for a single group.
# Batches over the projects table and id column combination, scoped to a given group returning the MIN() and MAX()
# values for the next batch as an array.
#
# If no more batches exist in the table, returns nil.
class BackfillProjectNamespacePerGroupBatchingStrategy < PrimaryKeyBatchingStrategy
# Finds and returns the next batch in the table.
#
# batch_min_value - The minimum value which the next batch will start at
# batch_size - The size of the next batch
def next_batch(batch_min_value:, batch_size:)
next_batch_bounds = nil
model_class = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models::Project
quoted_column_name = model_class.connection.quote_column_name(batched_migration.column_name)
projects_table = model_class.arel_table
hierarchy_cte_sql = Arel::Nodes::SqlLiteral.new(::Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNamespaces.hierarchy_cte(batched_migration.job_arguments.first))
relation = model_class.where(projects_table[:namespace_id].in(hierarchy_cte_sql)).where("#{quoted_column_name} >= ?", batch_min_value)
relation.each_batch(of: batch_size, column: batched_migration.column_name) do |batch| # rubocop:disable Lint/UnreachableLoop
next_batch_bounds = batch.pluck(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})")).first
break
end
next_batch_bounds
end
end
end
end
end
......@@ -11,20 +11,22 @@ module Gitlab
class PrimaryKeyBatchingStrategy
include Gitlab::Database::DynamicModelHelpers
def initialize(batched_migration)
@batched_migration = batched_migration
end
# Finds and returns the next batch in the table.
#
# table_name - The table to batch over
# column_name - The column to batch over
# batch_min_value - The minimum value which the next batch will start at
# batch_size - The size of the next batch
def next_batch(table_name, column_name, batch_min_value:, batch_size:)
model_class = define_batchable_model(table_name)
def next_batch(batch_min_value:, batch_size:)
model_class = define_batchable_model(batched_migration.table_name)
quoted_column_name = model_class.connection.quote_column_name(column_name)
quoted_column_name = model_class.connection.quote_column_name(batched_migration.column_name)
relation = model_class.where("#{quoted_column_name} >= ?", batch_min_value)
next_batch_bounds = nil
relation.each_batch(of: batch_size, column: column_name) do |batch| # rubocop:disable Lint/UnreachableLoop
relation.each_batch(of: batch_size, column: batched_migration.column_name) do |batch| # rubocop:disable Lint/UnreachableLoop
next_batch_bounds = batch.pluck(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})")).first
break
......@@ -32,6 +34,10 @@ module Gitlab
next_batch_bounds
end
private
attr_reader :batched_migration
end
end
end
......
......@@ -51,10 +51,7 @@ module Gitlab
raise 'Job cannot be split further' if new_batch_size < 1
batching_strategy = batched_migration.batch_class.new
next_batch_bounds = batching_strategy.next_batch(
batched_migration.table_name,
batched_migration.column_name,
next_batch_bounds = batched_migration.batching_strategy.next_batch(
batch_min_value: min_value,
batch_size: new_batch_size
)
......
......@@ -93,6 +93,10 @@ module Gitlab
"#{BATCH_CLASS_MODULE}::#{batch_class_name}".constantize
end
def batching_strategy
@batching_strategy ||= batch_class.new(self)
end
def job_class_name=(class_name)
write_attribute(:job_class_name, class_name.delete_prefix("::"))
end
......@@ -133,7 +137,7 @@ module Gitlab
end
def optimize!
BatchOptimizer.new(self).optimize!
::Gitlab::Database::BackgroundMigration::BatchOptimizer.new(self).optimize!
end
private
......
......@@ -88,14 +88,12 @@ module Gitlab
end
def find_next_batch_range(active_migration)
batching_strategy = active_migration.batch_class.new
batch_min_value = active_migration.next_min_value
next_batch_bounds = batching_strategy.next_batch(
active_migration.table_name,
active_migration.column_name,
next_batch_bounds = active_migration.batching_strategy.next_batch(
batch_min_value: batch_min_value,
batch_size: active_migration.batch_size)
batch_size: active_migration.batch_size
)
return if next_batch_bounds.nil?
......
......@@ -56,7 +56,7 @@ RSpec.describe "Admin > Admin sees background migrations" do
context 'when there are failed migrations' do
before do
allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class|
allow(batch_class).to receive(:next_batch).with(anything, anything, batch_min_value: 6, batch_size: 5).and_return([6, 10])
allow(batch_class).to receive(:next_batch).with(batch_min_value: 6, batch_size: 5).and_return([6, 10])
end
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::BackfillProjectNamespacePerGroupBatchingStrategy, '#next_batch' do
let!(:namespaces) { table(:namespaces) }
let!(:projects) { table(:projects) }
let!(:background_migrations) { table(:batched_background_migrations) }
let!(:namespace1) { namespaces.create!(name: 'batchtest1', type: 'Group', path: 'batch-test1') }
let!(:namespace2) { namespaces.create!(name: 'batchtest2', type: 'Group', parent_id: namespace1.id, path: 'batch-test2') }
let!(:namespace3) { namespaces.create!(name: 'batchtest3', type: 'Group', parent_id: namespace2.id, path: 'batch-test3') }
let!(:project1) { projects.create!(name: 'project1', path: 'project1', namespace_id: namespace1.id, visibility_level: 20) }
let!(:project2) { projects.create!(name: 'project2', path: 'project2', namespace_id: namespace2.id, visibility_level: 20) }
let!(:project3) { projects.create!(name: 'project3', path: 'project3', namespace_id: namespace3.id, visibility_level: 20) }
let!(:project4) { projects.create!(name: 'project4', path: 'project4', namespace_id: namespace3.id, visibility_level: 20) }
let!(:batched_migration) do
background_migrations.create!(
created_at: Time.current,
updated_at: Time.current,
min_value: 1,
max_value: Project.maximum(:id),
batch_size: 1_000,
sub_batch_size: 100,
interval: 120,
status: 0,
job_class_name: 'ProjectNamespaces::BackfillProjectNamespaces',
table_name: :projects,
column_name: :id,
total_tuple_count: nil,
job_arguments: [namespace1.id, 'up']
)
end
let!(:batching_strategy) { described_class.new(batched_migration) }
context 'when starting on the first batch' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(batch_min_value: project1.id, batch_size: 3)
expect(batch_bounds).to match_array([project1.id, project3.id])
end
end
context 'when additional batches remain' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(batch_min_value: project2.id, batch_size: 3)
expect(batch_bounds).to match_array([project2.id, project4.id])
end
end
context 'when on the final batch' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(batch_min_value: project4.id, batch_size: 3)
expect(batch_bounds).to match_array([project4.id, project4.id])
end
end
context 'when no additional batches remain' do
it 'returns nil' do
batch_bounds = batching_strategy.next_batch(batch_min_value: project4.id + 1, batch_size: 1)
expect(batch_bounds).to be_nil
end
end
end
......@@ -3,17 +3,35 @@
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy, '#next_batch' do
let(:batching_strategy) { described_class.new }
let(:namespaces) { table(:namespaces) }
let(:background_migrations) { table(:batched_background_migrations) }
let(:batching_strategy) { described_class.new(batched_migration) }
let!(:namespace1) { namespaces.create!(name: 'batchtest1', path: 'batch-test1') }
let!(:namespace2) { namespaces.create!(name: 'batchtest2', path: 'batch-test2') }
let!(:namespace3) { namespaces.create!(name: 'batchtest3', path: 'batch-test3') }
let!(:namespace4) { namespaces.create!(name: 'batchtest4', path: 'batch-test4') }
let!(:batched_migration) do
background_migrations.create!(
created_at: Time.current,
updated_at: Time.current,
min_value: 1,
max_value: 10_000,
batch_size: 1_000,
sub_batch_size: 100,
interval: 120,
status: 0,
job_class_name: 'Foo',
table_name: :namespaces,
column_name: :id,
total_tuple_count: nil
)
end
context 'when starting on the first batch' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace1.id, batch_size: 3)
batch_bounds = batching_strategy.next_batch(batch_min_value: namespace1.id, batch_size: 3)
expect(batch_bounds).to eq([namespace1.id, namespace3.id])
end
......@@ -21,7 +39,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'when additional batches remain' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace2.id, batch_size: 3)
batch_bounds = batching_strategy.next_batch(batch_min_value: namespace2.id, batch_size: 3)
expect(batch_bounds).to eq([namespace2.id, namespace4.id])
end
......@@ -29,7 +47,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'when on the final batch' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id, batch_size: 3)
batch_bounds = batching_strategy.next_batch(batch_min_value: namespace4.id, batch_size: 3)
expect(batch_bounds).to eq([namespace4.id, namespace4.id])
end
......@@ -37,7 +55,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'when no additional batches remain' do
it 'returns nil' do
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id + 1, batch_size: 1)
batch_bounds = batching_strategy.next_batch(batch_min_value: namespace4.id + 1, batch_size: 1)
expect(batch_bounds).to be_nil
end
......
......@@ -135,7 +135,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedJob, type: :model d
context 'when job can be split' do
before do
allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class|
allow(batch_class).to receive(:next_batch).with(anything, anything, batch_min_value: 6, batch_size: 5).and_return([6, 10])
allow(batch_class).to receive(:next_batch).with(batch_min_value: 6, batch_size: 5).and_return([6, 10])
end
end
......@@ -195,7 +195,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedJob, type: :model d
context 'when computed midpoint is larger than the max value of the batch' do
before do
allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class|
allow(batch_class).to receive(:next_batch).with(anything, anything, batch_min_value: 6, batch_size: 5).and_return([6, 16])
allow(batch_class).to receive(:next_batch).with(batch_min_value: 6, batch_size: 5).and_return([6, 16])
end
end
......
......@@ -272,7 +272,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigration, type: :m
before do
allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class|
allow(batch_class).to receive(:next_batch).with(anything, anything, batch_min_value: 6, batch_size: 5).and_return([6, 10])
allow(batch_class).to receive(:next_batch).with(batch_min_value: 6, batch_size: 5).and_return([6, 10])
end
end
......
......@@ -16,7 +16,7 @@ RSpec.describe Admin::BackgroundMigrationsController, :enable_admin_mode do
create(:batched_background_migration_job, batched_migration: migration, batch_size: 10, min_value: 6, max_value: 15, status: :failed, attempts: 3)
allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class|
allow(batch_class).to receive(:next_batch).with(anything, anything, batch_min_value: 6, batch_size: 5).and_return([6, 10])
allow(batch_class).to receive(:next_batch).with(batch_min_value: 6, batch_size: 5).and_return([6, 10])
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment