Commit b88c1e81 authored by Alexandru Croitor's avatar Alexandru Croitor Committed by Jan Provaznik

Add migration for backfilling project namespaces

Adding capabilities to backfill project namespace for each
project. Starting with ability to backfill project namespaces
for a single root namespace first.

Changelog: added
parent 347a44a5
# frozen_string_literal: true
class BackfillProjectNamespacesForGroup < Gitlab::Database::Migration[1.0]
MIGRATION = 'ProjectNamespaces::BackfillProjectNamespaces'
DELAY_INTERVAL = 2.minutes
GROUP_ID = 9970 # pick a test group id here
disable_ddl_transaction!
def up
# return unless Gitlab.com?
projects_table = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models::Project.arel_table
hierarchy_cte_sql = Arel::Nodes::SqlLiteral.new(::Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNamespaces.hierarchy_cte(GROUP_ID))
group_projects = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models::Project.where(projects_table[:namespace_id].in(hierarchy_cte_sql))
min_id = group_projects&.minimum(:id)
max_id = group_projects&.maximum(:id)
return if min_id.blank? || max_id.blank?
migration = queue_batched_background_migration(
MIGRATION,
:projects,
:id,
GROUP_ID,
'up',
job_interval: DELAY_INTERVAL,
batch_min_value: min_id,
batch_max_value: max_id,
sub_batch_size: 50
)
Gitlab::Database::BackgroundMigration::BatchedMigrationRunner.new.run_entire_migration(migration)
end
def down
# return unless Gitlab.com?
Gitlab::Database::BackgroundMigration::BatchedMigration
.for_configuration(MIGRATION, :projects, :id, [GROUP_ID, 'up']).delete_all
end
end
d2d270a335b3a2441a20673bf19d47553f607533d4503e3a01bc3d6d108bcdb3
\ No newline at end of file
...@@ -5,19 +5,15 @@ module Gitlab ...@@ -5,19 +5,15 @@ module Gitlab
module ProjectNamespaces module ProjectNamespaces
# Back-fill project namespaces for projects that do not yet have a namespace. # Back-fill project namespaces for projects that do not yet have a namespace.
# #
# TODO: remove this comment when an actuall backfill migration is added.
#
# This is first being added without an actual migration as we need to initially test
# if backfilling project namespaces affects performance in any significant way.
# rubocop: disable Metrics/ClassLength # rubocop: disable Metrics/ClassLength
class BackfillProjectNamespaces class BackfillProjectNamespaces
BATCH_SIZE = 100 SUB_BATCH_SIZE = 100
DELETE_BATCH_SIZE = 10
PROJECT_NAMESPACE_STI_NAME = 'Project' PROJECT_NAMESPACE_STI_NAME = 'Project'
IsolatedModels = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models IsolatedModels = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models
def perform(start_id, end_id, namespace_id, migration_type = 'up') def perform(start_id, end_id, migration_table_name, migration_column_name, sub_batch_size, pause_ms, namespace_id, migration_type = 'up')
@sub_batch_size = sub_batch_size || SUB_BATCH_SIZE
load_project_ids(start_id, end_id, namespace_id) load_project_ids(start_id, end_id, namespace_id)
case migration_type case migration_type
...@@ -34,10 +30,10 @@ module Gitlab ...@@ -34,10 +30,10 @@ module Gitlab
private private
attr_accessor :project_ids attr_accessor :project_ids, :sub_batch_size
def backfill_project_namespaces(namespace_id) def backfill_project_namespaces(namespace_id)
project_ids.each_slice(BATCH_SIZE) do |project_ids| project_ids.each_slice(sub_batch_size) do |project_ids|
# We need to lock these project records for the period when we create project namespaces # We need to lock these project records for the period when we create project namespaces
# and link them to projects so that if a project is modified in the time between creating # and link them to projects so that if a project is modified in the time between creating
# project namespaces `batch_insert_namespaces` and linking them to projects `batch_update_projects` # project namespaces `batch_insert_namespaces` and linking them to projects `batch_update_projects`
...@@ -56,7 +52,7 @@ module Gitlab ...@@ -56,7 +52,7 @@ module Gitlab
end end
def cleanup_backfilled_project_namespaces(namespace_id) def cleanup_backfilled_project_namespaces(namespace_id)
project_ids.each_slice(BATCH_SIZE) do |project_ids| project_ids.each_slice(sub_batch_size) do |project_ids|
# IMPORTANT: first nullify project_namespace_id in projects table to avoid removing projects when records # IMPORTANT: first nullify project_namespace_id in projects table to avoid removing projects when records
# from namespaces are deleted due to FK/triggers # from namespaces are deleted due to FK/triggers
nullify_project_namespaces_in_projects(project_ids) nullify_project_namespaces_in_projects(project_ids)
...@@ -109,7 +105,10 @@ module Gitlab ...@@ -109,7 +105,10 @@ module Gitlab
end end
def delete_project_namespace_records(project_ids) def delete_project_namespace_records(project_ids)
project_ids.each_slice(DELETE_BATCH_SIZE) do |p_ids| # keep the deletes a 10x smaller batch as deletes seem to be much more expensive
delete_batch_size = (sub_batch_size / 10).to_i + 1
project_ids.each_slice(delete_batch_size) do |p_ids|
IsolatedModels::Namespace.where(type: PROJECT_NAMESPACE_STI_NAME).where(tmp_project_id: p_ids).delete_all IsolatedModels::Namespace.where(type: PROJECT_NAMESPACE_STI_NAME).where(tmp_project_id: p_ids).delete_all
end end
end end
...@@ -117,7 +116,7 @@ module Gitlab ...@@ -117,7 +116,7 @@ module Gitlab
def load_project_ids(start_id, end_id, namespace_id) def load_project_ids(start_id, end_id, namespace_id)
projects = IsolatedModels::Project.arel_table projects = IsolatedModels::Project.arel_table
relation = IsolatedModels::Project.where(projects[:id].between(start_id..end_id)) relation = IsolatedModels::Project.where(projects[:id].between(start_id..end_id))
relation = relation.where(projects[:namespace_id].in(Arel::Nodes::SqlLiteral.new(hierarchy_cte(namespace_id)))) if namespace_id relation = relation.where(projects[:namespace_id].in(Arel::Nodes::SqlLiteral.new(self.class.hierarchy_cte(namespace_id)))) if namespace_id
@project_ids = relation.pluck(:id) @project_ids = relation.pluck(:id)
end end
...@@ -126,7 +125,7 @@ module Gitlab ...@@ -126,7 +125,7 @@ module Gitlab
::Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded('BackfillProjectNamespaces', arguments) ::Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded('BackfillProjectNamespaces', arguments)
end end
def hierarchy_cte(root_namespace_id) def self.hierarchy_cte(root_namespace_id)
<<-SQL <<-SQL
WITH RECURSIVE "base_and_descendants" AS ( WITH RECURSIVE "base_and_descendants" AS (
( (
......
...@@ -30,7 +30,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa ...@@ -30,7 +30,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
start_id = ::Project.minimum(:id) start_id = ::Project.minimum(:id)
end_id = ::Project.maximum(:id) end_id = ::Project.maximum(:id)
projects_count = ::Project.count projects_count = ::Project.count
batches_count = (projects_count / described_class::BATCH_SIZE.to_f).ceil batches_count = (projects_count / described_class::SUB_BATCH_SIZE.to_f).ceil
project_namespaces_count = ::Namespace.where(type: 'Project').count project_namespaces_count = ::Namespace.where(type: 'Project').count
migration = described_class.new migration = described_class.new
...@@ -39,7 +39,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa ...@@ -39,7 +39,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
expect(migration).to receive(:batch_update_projects).exactly(batches_count).and_call_original expect(migration).to receive(:batch_update_projects).exactly(batches_count).and_call_original
expect(migration).to receive(:batch_update_project_namespaces_traversal_ids).exactly(batches_count).and_call_original expect(migration).to receive(:batch_update_project_namespaces_traversal_ids).exactly(batches_count).and_call_original
expect { migration.perform(start_id, end_id, nil, 'up') }.to change(Namespace.where(type: 'Project'), :count) expect { migration.perform(start_id, end_id, nil, nil, nil, nil, nil, 'up') }.to change(Namespace.where(type: 'Project'), :count)
expect(projects_count).to eq(::Namespace.where(type: 'Project').count) expect(projects_count).to eq(::Namespace.where(type: 'Project').count)
check_projects_in_sync_with(Namespace.where(type: 'Project')) check_projects_in_sync_with(Namespace.where(type: 'Project'))
...@@ -53,7 +53,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa ...@@ -53,7 +53,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
start_id = backfilled_namespace_projects.minimum(:id) start_id = backfilled_namespace_projects.minimum(:id)
end_id = backfilled_namespace_projects.maximum(:id) end_id = backfilled_namespace_projects.maximum(:id)
group_projects_count = backfilled_namespace_projects.count group_projects_count = backfilled_namespace_projects.count
batches_count = (group_projects_count / described_class::BATCH_SIZE.to_f).ceil batches_count = (group_projects_count / described_class::SUB_BATCH_SIZE.to_f).ceil
project_namespaces_in_hierarchy = project_namespaces_in_hierarchy(base_ancestor(backfilled_namespace)) project_namespaces_in_hierarchy = project_namespaces_in_hierarchy(base_ancestor(backfilled_namespace))
migration = described_class.new migration = described_class.new
...@@ -66,7 +66,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa ...@@ -66,7 +66,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
expect(group_projects_count).to eq(14) expect(group_projects_count).to eq(14)
expect(project_namespaces_in_hierarchy.count).to eq(0) expect(project_namespaces_in_hierarchy.count).to eq(0)
migration.perform(start_id, end_id, backfilled_namespace.id, 'up') migration.perform(start_id, end_id, nil, nil, nil, nil, backfilled_namespace.id, 'up')
expect(project_namespaces_in_hierarchy.count).to eq(14) expect(project_namespaces_in_hierarchy.count).to eq(14)
check_projects_in_sync_with(project_namespaces_in_hierarchy) check_projects_in_sync_with(project_namespaces_in_hierarchy)
...@@ -79,7 +79,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa ...@@ -79,7 +79,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
start_id = hierarchy1_projects.minimum(:id) start_id = hierarchy1_projects.minimum(:id)
end_id = hierarchy1_projects.maximum(:id) end_id = hierarchy1_projects.maximum(:id)
described_class.new.perform(start_id, end_id, parent_group1.id, 'up') described_class.new.perform(start_id, end_id, nil, nil, nil, nil, parent_group1.id, 'up')
end end
it 'does not duplicate project namespaces' do it 'does not duplicate project namespaces' do
...@@ -87,7 +87,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa ...@@ -87,7 +87,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
projects_count = ::Project.count projects_count = ::Project.count
start_id = ::Project.minimum(:id) start_id = ::Project.minimum(:id)
end_id = ::Project.maximum(:id) end_id = ::Project.maximum(:id)
batches_count = (projects_count / described_class::BATCH_SIZE.to_f).ceil batches_count = (projects_count / described_class::SUB_BATCH_SIZE.to_f).ceil
project_namespaces = ::Namespace.where(type: 'Project') project_namespaces = ::Namespace.where(type: 'Project')
migration = described_class.new migration = described_class.new
...@@ -100,7 +100,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa ...@@ -100,7 +100,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
expect(migration).to receive(:batch_update_projects).exactly(batches_count).and_call_original expect(migration).to receive(:batch_update_projects).exactly(batches_count).and_call_original
expect(migration).to receive(:batch_update_project_namespaces_traversal_ids).exactly(batches_count).and_call_original expect(migration).to receive(:batch_update_project_namespaces_traversal_ids).exactly(batches_count).and_call_original
expect { migration.perform(start_id, end_id, nil, 'up') }.to change(project_namespaces, :count).by(14) expect { migration.perform(start_id, end_id, nil, nil, nil, nil, nil, 'up') }.to change(project_namespaces, :count).by(14)
expect(projects_count).to eq(project_namespaces.count) expect(projects_count).to eq(project_namespaces.count)
end end
...@@ -125,7 +125,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa ...@@ -125,7 +125,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
context 'back-fill project namespaces in batches' do context 'back-fill project namespaces in batches' do
before do before do
stub_const("#{described_class.name}::BATCH_SIZE", 2) stub_const("#{described_class.name}::SUB_BATCH_SIZE", 2)
end end
it_behaves_like 'back-fill project namespaces' it_behaves_like 'back-fill project namespaces'
...@@ -137,7 +137,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa ...@@ -137,7 +137,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
start_id = ::Project.minimum(:id) start_id = ::Project.minimum(:id)
end_id = ::Project.maximum(:id) end_id = ::Project.maximum(:id)
# back-fill first # back-fill first
described_class.new.perform(start_id, end_id, nil, 'up') described_class.new.perform(start_id, end_id, nil, nil, nil, nil, nil, 'up')
end end
shared_examples 'cleanup project namespaces' do shared_examples 'cleanup project namespaces' do
...@@ -146,7 +146,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa ...@@ -146,7 +146,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
start_id = ::Project.minimum(:id) start_id = ::Project.minimum(:id)
end_id = ::Project.maximum(:id) end_id = ::Project.maximum(:id)
migration = described_class.new migration = described_class.new
batches_count = (projects_count / described_class::BATCH_SIZE.to_f).ceil batches_count = (projects_count / described_class::SUB_BATCH_SIZE.to_f).ceil
expect(projects_count).to be > 0 expect(projects_count).to be > 0
expect(projects_count).to eq(::Namespace.where(type: 'Project').count) expect(projects_count).to eq(::Namespace.where(type: 'Project').count)
...@@ -154,7 +154,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa ...@@ -154,7 +154,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
expect(migration).to receive(:nullify_project_namespaces_in_projects).exactly(batches_count).and_call_original expect(migration).to receive(:nullify_project_namespaces_in_projects).exactly(batches_count).and_call_original
expect(migration).to receive(:delete_project_namespace_records).exactly(batches_count).and_call_original expect(migration).to receive(:delete_project_namespace_records).exactly(batches_count).and_call_original
migration.perform(start_id, end_id, nil, 'down') migration.perform(start_id, end_id, nil, nil, nil, nil, nil, 'down')
expect(::Project.count).to be > 0 expect(::Project.count).to be > 0
expect(::Namespace.where(type: 'Project').count).to eq(0) expect(::Namespace.where(type: 'Project').count).to eq(0)
...@@ -168,7 +168,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa ...@@ -168,7 +168,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
start_id = backfilled_namespace_projects.minimum(:id) start_id = backfilled_namespace_projects.minimum(:id)
end_id = backfilled_namespace_projects.maximum(:id) end_id = backfilled_namespace_projects.maximum(:id)
group_projects_count = backfilled_namespace_projects.count group_projects_count = backfilled_namespace_projects.count
batches_count = (group_projects_count / described_class::BATCH_SIZE.to_f).ceil batches_count = (group_projects_count / described_class::SUB_BATCH_SIZE.to_f).ceil
project_namespaces_in_hierarchy = project_namespaces_in_hierarchy(base_ancestor(backfilled_namespace)) project_namespaces_in_hierarchy = project_namespaces_in_hierarchy(base_ancestor(backfilled_namespace))
migration = described_class.new migration = described_class.new
...@@ -176,7 +176,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa ...@@ -176,7 +176,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
expect(migration).to receive(:nullify_project_namespaces_in_projects).exactly(batches_count).and_call_original expect(migration).to receive(:nullify_project_namespaces_in_projects).exactly(batches_count).and_call_original
expect(migration).to receive(:delete_project_namespace_records).exactly(batches_count).and_call_original expect(migration).to receive(:delete_project_namespace_records).exactly(batches_count).and_call_original
migration.perform(start_id, end_id, backfilled_namespace.id, 'down') migration.perform(start_id, end_id, nil, nil, nil, nil, backfilled_namespace.id, 'down')
expect(::Namespace.where(type: 'Project').count).to be > 0 expect(::Namespace.where(type: 'Project').count).to be > 0
expect(project_namespaces_in_hierarchy.count).to eq(0) expect(project_namespaces_in_hierarchy.count).to eq(0)
...@@ -190,7 +190,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa ...@@ -190,7 +190,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
context 'cleanup project namespaces in batches' do context 'cleanup project namespaces in batches' do
before do before do
stub_const("#{described_class.name}::BATCH_SIZE", 2) stub_const("#{described_class.name}::SUB_BATCH_SIZE", 2)
end end
it_behaves_like 'cleanup project namespaces' it_behaves_like 'cleanup project namespaces'
......
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe BackfillProjectNamespacesForGroup do
let_it_be(:migration) { described_class::MIGRATION }
let(:projects) { table(:projects) }
let(:namespaces) { table(:namespaces) }
let(:parent_group1) { namespaces.create!(name: 'parent_group1', path: 'parent_group1', visibility_level: 20, type: 'Group') }
let!(:parent_group1_project) { projects.create!(name: 'parent_group1_project', path: 'parent_group1_project', namespace_id: parent_group1.id, visibility_level: 20) }
before do
allow(Gitlab).to receive(:com?).and_return(true)
end
describe '#up' do
before do
stub_const("BackfillProjectNamespacesForGroup::GROUP_ID", parent_group1.id)
end
it 'schedules background jobs for each batch of namespaces' do
migrate!
expect(migration).to have_scheduled_batched_migration(
table_name: :projects,
column_name: :id,
job_arguments: [described_class::GROUP_ID, 'up'],
interval: described_class::DELAY_INTERVAL
)
end
end
describe '#down' do
it 'deletes all batched migration records' do
migrate!
schema_migrate_down!
expect(migration).not_to have_scheduled_batched_migration
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment