Commit b88c1e81 authored by Alexandru Croitor's avatar Alexandru Croitor Committed by Jan Provaznik

Add migration for backfilling project namespaces

Adding capabilities to backfill project namespace for each
project. Starting with ability to backfill project namespaces
for a single root namespace first.

Changelog: added
parent 347a44a5
# frozen_string_literal: true
class BackfillProjectNamespacesForGroup < Gitlab::Database::Migration[1.0]
MIGRATION = 'ProjectNamespaces::BackfillProjectNamespaces'
DELAY_INTERVAL = 2.minutes
GROUP_ID = 9970 # pick a test group id here
disable_ddl_transaction!
def up
# return unless Gitlab.com?
projects_table = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models::Project.arel_table
hierarchy_cte_sql = Arel::Nodes::SqlLiteral.new(::Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNamespaces.hierarchy_cte(GROUP_ID))
group_projects = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models::Project.where(projects_table[:namespace_id].in(hierarchy_cte_sql))
min_id = group_projects&.minimum(:id)
max_id = group_projects&.maximum(:id)
return if min_id.blank? || max_id.blank?
migration = queue_batched_background_migration(
MIGRATION,
:projects,
:id,
GROUP_ID,
'up',
job_interval: DELAY_INTERVAL,
batch_min_value: min_id,
batch_max_value: max_id,
sub_batch_size: 50
)
Gitlab::Database::BackgroundMigration::BatchedMigrationRunner.new.run_entire_migration(migration)
end
def down
# return unless Gitlab.com?
Gitlab::Database::BackgroundMigration::BatchedMigration
.for_configuration(MIGRATION, :projects, :id, [GROUP_ID, 'up']).delete_all
end
end
d2d270a335b3a2441a20673bf19d47553f607533d4503e3a01bc3d6d108bcdb3
\ No newline at end of file
......@@ -5,19 +5,15 @@ module Gitlab
module ProjectNamespaces
# Back-fill project namespaces for projects that do not yet have a namespace.
#
# TODO: remove this comment when an actuall backfill migration is added.
#
# This is first being added without an actual migration as we need to initially test
# if backfilling project namespaces affects performance in any significant way.
# rubocop: disable Metrics/ClassLength
class BackfillProjectNamespaces
BATCH_SIZE = 100
DELETE_BATCH_SIZE = 10
SUB_BATCH_SIZE = 100
PROJECT_NAMESPACE_STI_NAME = 'Project'
IsolatedModels = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models
def perform(start_id, end_id, namespace_id, migration_type = 'up')
def perform(start_id, end_id, migration_table_name, migration_column_name, sub_batch_size, pause_ms, namespace_id, migration_type = 'up')
@sub_batch_size = sub_batch_size || SUB_BATCH_SIZE
load_project_ids(start_id, end_id, namespace_id)
case migration_type
......@@ -34,10 +30,10 @@ module Gitlab
private
attr_accessor :project_ids
attr_accessor :project_ids, :sub_batch_size
def backfill_project_namespaces(namespace_id)
project_ids.each_slice(BATCH_SIZE) do |project_ids|
project_ids.each_slice(sub_batch_size) do |project_ids|
# We need to lock these project records for the period when we create project namespaces
# and link them to projects so that if a project is modified in the time between creating
# project namespaces `batch_insert_namespaces` and linking them to projects `batch_update_projects`
......@@ -56,7 +52,7 @@ module Gitlab
end
def cleanup_backfilled_project_namespaces(namespace_id)
project_ids.each_slice(BATCH_SIZE) do |project_ids|
project_ids.each_slice(sub_batch_size) do |project_ids|
# IMPORTANT: first nullify project_namespace_id in projects table to avoid removing projects when records
# from namespaces are deleted due to FK/triggers
nullify_project_namespaces_in_projects(project_ids)
......@@ -109,7 +105,10 @@ module Gitlab
end
def delete_project_namespace_records(project_ids)
project_ids.each_slice(DELETE_BATCH_SIZE) do |p_ids|
# keep the deletes a 10x smaller batch as deletes seem to be much more expensive
delete_batch_size = (sub_batch_size / 10).to_i + 1
project_ids.each_slice(delete_batch_size) do |p_ids|
IsolatedModels::Namespace.where(type: PROJECT_NAMESPACE_STI_NAME).where(tmp_project_id: p_ids).delete_all
end
end
......@@ -117,7 +116,7 @@ module Gitlab
def load_project_ids(start_id, end_id, namespace_id)
projects = IsolatedModels::Project.arel_table
relation = IsolatedModels::Project.where(projects[:id].between(start_id..end_id))
relation = relation.where(projects[:namespace_id].in(Arel::Nodes::SqlLiteral.new(hierarchy_cte(namespace_id)))) if namespace_id
relation = relation.where(projects[:namespace_id].in(Arel::Nodes::SqlLiteral.new(self.class.hierarchy_cte(namespace_id)))) if namespace_id
@project_ids = relation.pluck(:id)
end
......@@ -126,7 +125,7 @@ module Gitlab
::Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded('BackfillProjectNamespaces', arguments)
end
def hierarchy_cte(root_namespace_id)
def self.hierarchy_cte(root_namespace_id)
<<-SQL
WITH RECURSIVE "base_and_descendants" AS (
(
......
......@@ -30,7 +30,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
start_id = ::Project.minimum(:id)
end_id = ::Project.maximum(:id)
projects_count = ::Project.count
batches_count = (projects_count / described_class::BATCH_SIZE.to_f).ceil
batches_count = (projects_count / described_class::SUB_BATCH_SIZE.to_f).ceil
project_namespaces_count = ::Namespace.where(type: 'Project').count
migration = described_class.new
......@@ -39,7 +39,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
expect(migration).to receive(:batch_update_projects).exactly(batches_count).and_call_original
expect(migration).to receive(:batch_update_project_namespaces_traversal_ids).exactly(batches_count).and_call_original
expect { migration.perform(start_id, end_id, nil, 'up') }.to change(Namespace.where(type: 'Project'), :count)
expect { migration.perform(start_id, end_id, nil, nil, nil, nil, nil, 'up') }.to change(Namespace.where(type: 'Project'), :count)
expect(projects_count).to eq(::Namespace.where(type: 'Project').count)
check_projects_in_sync_with(Namespace.where(type: 'Project'))
......@@ -53,7 +53,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
start_id = backfilled_namespace_projects.minimum(:id)
end_id = backfilled_namespace_projects.maximum(:id)
group_projects_count = backfilled_namespace_projects.count
batches_count = (group_projects_count / described_class::BATCH_SIZE.to_f).ceil
batches_count = (group_projects_count / described_class::SUB_BATCH_SIZE.to_f).ceil
project_namespaces_in_hierarchy = project_namespaces_in_hierarchy(base_ancestor(backfilled_namespace))
migration = described_class.new
......@@ -66,7 +66,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
expect(group_projects_count).to eq(14)
expect(project_namespaces_in_hierarchy.count).to eq(0)
migration.perform(start_id, end_id, backfilled_namespace.id, 'up')
migration.perform(start_id, end_id, nil, nil, nil, nil, backfilled_namespace.id, 'up')
expect(project_namespaces_in_hierarchy.count).to eq(14)
check_projects_in_sync_with(project_namespaces_in_hierarchy)
......@@ -79,7 +79,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
start_id = hierarchy1_projects.minimum(:id)
end_id = hierarchy1_projects.maximum(:id)
described_class.new.perform(start_id, end_id, parent_group1.id, 'up')
described_class.new.perform(start_id, end_id, nil, nil, nil, nil, parent_group1.id, 'up')
end
it 'does not duplicate project namespaces' do
......@@ -87,7 +87,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
projects_count = ::Project.count
start_id = ::Project.minimum(:id)
end_id = ::Project.maximum(:id)
batches_count = (projects_count / described_class::BATCH_SIZE.to_f).ceil
batches_count = (projects_count / described_class::SUB_BATCH_SIZE.to_f).ceil
project_namespaces = ::Namespace.where(type: 'Project')
migration = described_class.new
......@@ -100,7 +100,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
expect(migration).to receive(:batch_update_projects).exactly(batches_count).and_call_original
expect(migration).to receive(:batch_update_project_namespaces_traversal_ids).exactly(batches_count).and_call_original
expect { migration.perform(start_id, end_id, nil, 'up') }.to change(project_namespaces, :count).by(14)
expect { migration.perform(start_id, end_id, nil, nil, nil, nil, nil, 'up') }.to change(project_namespaces, :count).by(14)
expect(projects_count).to eq(project_namespaces.count)
end
......@@ -125,7 +125,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
context 'back-fill project namespaces in batches' do
before do
stub_const("#{described_class.name}::BATCH_SIZE", 2)
stub_const("#{described_class.name}::SUB_BATCH_SIZE", 2)
end
it_behaves_like 'back-fill project namespaces'
......@@ -137,7 +137,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
start_id = ::Project.minimum(:id)
end_id = ::Project.maximum(:id)
# back-fill first
described_class.new.perform(start_id, end_id, nil, 'up')
described_class.new.perform(start_id, end_id, nil, nil, nil, nil, nil, 'up')
end
shared_examples 'cleanup project namespaces' do
......@@ -146,7 +146,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
start_id = ::Project.minimum(:id)
end_id = ::Project.maximum(:id)
migration = described_class.new
batches_count = (projects_count / described_class::BATCH_SIZE.to_f).ceil
batches_count = (projects_count / described_class::SUB_BATCH_SIZE.to_f).ceil
expect(projects_count).to be > 0
expect(projects_count).to eq(::Namespace.where(type: 'Project').count)
......@@ -154,7 +154,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
expect(migration).to receive(:nullify_project_namespaces_in_projects).exactly(batches_count).and_call_original
expect(migration).to receive(:delete_project_namespace_records).exactly(batches_count).and_call_original
migration.perform(start_id, end_id, nil, 'down')
migration.perform(start_id, end_id, nil, nil, nil, nil, nil, 'down')
expect(::Project.count).to be > 0
expect(::Namespace.where(type: 'Project').count).to eq(0)
......@@ -168,7 +168,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
start_id = backfilled_namespace_projects.minimum(:id)
end_id = backfilled_namespace_projects.maximum(:id)
group_projects_count = backfilled_namespace_projects.count
batches_count = (group_projects_count / described_class::BATCH_SIZE.to_f).ceil
batches_count = (group_projects_count / described_class::SUB_BATCH_SIZE.to_f).ceil
project_namespaces_in_hierarchy = project_namespaces_in_hierarchy(base_ancestor(backfilled_namespace))
migration = described_class.new
......@@ -176,7 +176,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
expect(migration).to receive(:nullify_project_namespaces_in_projects).exactly(batches_count).and_call_original
expect(migration).to receive(:delete_project_namespace_records).exactly(batches_count).and_call_original
migration.perform(start_id, end_id, backfilled_namespace.id, 'down')
migration.perform(start_id, end_id, nil, nil, nil, nil, backfilled_namespace.id, 'down')
expect(::Namespace.where(type: 'Project').count).to be > 0
expect(project_namespaces_in_hierarchy.count).to eq(0)
......@@ -190,7 +190,7 @@ RSpec.describe Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNa
context 'cleanup project namespaces in batches' do
before do
stub_const("#{described_class.name}::BATCH_SIZE", 2)
stub_const("#{described_class.name}::SUB_BATCH_SIZE", 2)
end
it_behaves_like 'cleanup project namespaces'
......
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe BackfillProjectNamespacesForGroup do
let_it_be(:migration) { described_class::MIGRATION }
let(:projects) { table(:projects) }
let(:namespaces) { table(:namespaces) }
let(:parent_group1) { namespaces.create!(name: 'parent_group1', path: 'parent_group1', visibility_level: 20, type: 'Group') }
let!(:parent_group1_project) { projects.create!(name: 'parent_group1_project', path: 'parent_group1_project', namespace_id: parent_group1.id, visibility_level: 20) }
before do
allow(Gitlab).to receive(:com?).and_return(true)
end
describe '#up' do
before do
stub_const("BackfillProjectNamespacesForGroup::GROUP_ID", parent_group1.id)
end
it 'schedules background jobs for each batch of namespaces' do
migrate!
expect(migration).to have_scheduled_batched_migration(
table_name: :projects,
column_name: :id,
job_arguments: [described_class::GROUP_ID, 'up'],
interval: described_class::DELAY_INTERVAL
)
end
end
describe '#down' do
it 'deletes all batched migration records' do
migrate!
schema_migrate_down!
expect(migration).not_to have_scheduled_batched_migration
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment