Commit 163051ac authored by Mayra Cabrera's avatar Mayra Cabrera

Merge branch '230815-move-partitioning-backfill-migration-to-post-deploy' into 'master'

Move partitioning backfill migration to post-deploy

See merge request gitlab-org/gitlab!37633
parents f7549fc3 392c9465
---
title: Move partitioning backfill migration to post-deployment
merge_request: 37633
author:
type: fixed
# frozen_string_literal: true
class BackfillPartitionedAuditEvents < ActiveRecord::Migration[6.0]
include Gitlab::Database::PartitioningMigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
return if ::Gitlab.com?
enqueue_partitioning_data_migration :audit_events
end
def down
return if ::Gitlab.com?
cleanup_partitioning_data_migration :audit_events
end
end
7a1344af4736eb369996398332653b29ec93fbda92b258fbe95093476c01d29e
\ No newline at end of file
...@@ -16,7 +16,9 @@ module Gitlab ...@@ -16,7 +16,9 @@ module Gitlab
BATCH_SIZE = 50_000 BATCH_SIZE = 50_000
# Creates a partitioned copy of an existing table, using a RANGE partitioning strategy on a timestamp column. # Creates a partitioned copy of an existing table, using a RANGE partitioning strategy on a timestamp column.
# One partition is created per month between the given `min_date` and `max_date`. # One partition is created per month between the given `min_date` and `max_date`. Also installs a trigger on
# the original table to copy writes into the partitioned table. To copy over historic data from before creation
# of the partitioned table, use the `enqueue_partitioning_data_migration` helper in a post-deploy migration.
# #
# A copy of the original table is required as PG currently does not support partitioning existing tables. # A copy of the original table is required as PG currently does not support partitioning existing tables.
# #
...@@ -56,10 +58,10 @@ module Gitlab ...@@ -56,10 +58,10 @@ module Gitlab
create_range_partitioned_copy(table_name, partitioned_table_name, partition_column, primary_key) create_range_partitioned_copy(table_name, partitioned_table_name, partition_column, primary_key)
create_daterange_partitions(partitioned_table_name, partition_column.name, min_date, max_date) create_daterange_partitions(partitioned_table_name, partition_column.name, min_date, max_date)
create_trigger_to_sync_tables(table_name, partitioned_table_name, primary_key) create_trigger_to_sync_tables(table_name, partitioned_table_name, primary_key)
enqueue_background_migration(table_name, partitioned_table_name, primary_key)
end end
# Clean up a partitioned copy of an existing table. This deletes the partitioned table and all partitions. # Clean up a partitioned copy of an existing table. First, deletes the database function and trigger that were
# used to copy writes to the partitioned table, then removes the partitioned table (also removing partitions).
# #
# Example: # Example:
# #
...@@ -69,8 +71,6 @@ module Gitlab ...@@ -69,8 +71,6 @@ module Gitlab
assert_table_is_allowed(table_name) assert_table_is_allowed(table_name)
assert_not_in_transaction_block(scope: ERROR_SCOPE) assert_not_in_transaction_block(scope: ERROR_SCOPE)
cleanup_migration_jobs(table_name)
with_lock_retries do with_lock_retries do
trigger_name = make_sync_trigger_name(table_name) trigger_name = make_sync_trigger_name(table_name)
drop_trigger(table_name, trigger_name) drop_trigger(table_name, trigger_name)
...@@ -83,6 +83,38 @@ module Gitlab ...@@ -83,6 +83,38 @@ module Gitlab
drop_table(partitioned_table_name) drop_table(partitioned_table_name)
end end
# Enqueue the background jobs that will backfill data in the partitioned table, by batch-copying records from
# original table. This helper should be called from a post-deploy migration.
#
# Example:
#
# enqueue_partitioning_data_migration :audit_events
#
def enqueue_partitioning_data_migration(table_name)
assert_table_is_allowed(table_name)
assert_not_in_transaction_block(scope: ERROR_SCOPE)
partitioned_table_name = make_partitioned_table_name(table_name)
primary_key = connection.primary_key(table_name)
enqueue_background_migration(table_name, partitioned_table_name, primary_key)
end
# Cleanup a previously enqueued background migration to copy data into a partitioned table. This will not
# prevent the enqueued jobs from executing, but instead cleans up information in the database used to track the
# state of the background migration. It should be safe to also remove the partitioned table even if the
# background jobs are still in-progress, as the absence of the table will cause them to safely exit.
#
# Example:
#
# cleanup_partitioning_data_migration :audit_events
#
def cleanup_partitioning_data_migration(table_name)
assert_table_is_allowed(table_name)
cleanup_migration_jobs(table_name)
end
def create_hash_partitions(table_name, number_of_partitions) def create_hash_partitions(table_name, number_of_partitions)
transaction do transaction do
(0..number_of_partitions - 1).each do |partition| (0..number_of_partitions - 1).each do |partition|
......
...@@ -315,36 +315,6 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe ...@@ -315,36 +315,6 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
expect(model.find(second_todo.id).attributes).to eq(second_todo.attributes) expect(model.find(second_todo.id).attributes).to eq(second_todo.attributes)
end end
end end
describe 'copying historic data to the partitioned table' do
let(:source_table) { 'todos' }
let(:migration_class) { '::Gitlab::Database::PartitioningMigrationHelpers::BackfillPartitionedTable' }
let(:sub_batch_size) { described_class::SUB_BATCH_SIZE }
let(:pause_seconds) { described_class::PAUSE_SECONDS }
let!(:first_id) { create(:todo).id }
let!(:second_id) { create(:todo).id }
let!(:third_id) { create(:todo).id }
before do
stub_const("#{described_class.name}::BATCH_SIZE", 2)
expect(migration).to receive(:queue_background_migration_jobs_by_range_at_intervals).and_call_original
end
it 'enqueues jobs to copy each batch of data' do
Sidekiq::Testing.fake! do
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
first_job_arguments = [first_id, second_id, source_table, partitioned_table, 'id']
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq([migration_class, first_job_arguments])
second_job_arguments = [third_id, third_id, source_table, partitioned_table, 'id']
expect(BackgroundMigrationWorker.jobs[1]['args']).to eq([migration_class, second_job_arguments])
end
end
end
end end
describe '#drop_partitioned_table_for' do describe '#drop_partitioned_table_for' do
...@@ -390,16 +360,85 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe ...@@ -390,16 +360,85 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
expect(connection.table_exists?(table)).to be(false) expect(connection.table_exists?(table)).to be(false)
end end
end end
end
describe '#enqueue_partitioning_data_migration' do
context 'when the table is not allowed' do
let(:source_table) { :this_table_is_not_allowed }
it 'raises an error' do
expect(migration).to receive(:assert_table_is_allowed).with(source_table).and_call_original
expect do
migration.enqueue_partitioning_data_migration source_table
end.to raise_error(/#{source_table} is not allowed for use/)
end
end
context 'when run inside a transaction block' do
it 'raises an error' do
expect(migration).to receive(:transaction_open?).and_return(true)
expect do
migration.enqueue_partitioning_data_migration source_table
end.to raise_error(/can not be run inside a transaction/)
end
end
context 'when records exist in the source table' do
let(:source_table) { 'todos' }
let(:migration_class) { '::Gitlab::Database::PartitioningMigrationHelpers::BackfillPartitionedTable' }
let(:sub_batch_size) { described_class::SUB_BATCH_SIZE }
let(:pause_seconds) { described_class::PAUSE_SECONDS }
let!(:first_id) { create(:todo).id }
let!(:second_id) { create(:todo).id }
let!(:third_id) { create(:todo).id }
before do
stub_const("#{described_class.name}::BATCH_SIZE", 2)
expect(migration).to receive(:queue_background_migration_jobs_by_range_at_intervals).and_call_original
end
it 'enqueues jobs to copy each batch of data' do
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
Sidekiq::Testing.fake! do
migration.enqueue_partitioning_data_migration source_table
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
context 'cleaning up background migration tracking records' do first_job_arguments = [first_id, second_id, source_table, partitioned_table, 'id']
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq([migration_class, first_job_arguments])
second_job_arguments = [third_id, third_id, source_table, partitioned_table, 'id']
expect(BackgroundMigrationWorker.jobs[1]['args']).to eq([migration_class, second_job_arguments])
end
end
end
end
describe '#cleanup_partitioning_data_migration' do
context 'when the table is not allowed' do
let(:source_table) { :this_table_is_not_allowed }
it 'raises an error' do
expect(migration).to receive(:assert_table_is_allowed).with(source_table).and_call_original
expect do
migration.cleanup_partitioning_data_migration source_table
end.to raise_error(/#{source_table} is not allowed for use/)
end
end
context 'when tracking records exist in the background_migration_jobs table' do
let(:migration_class) { 'Gitlab::Database::PartitioningMigrationHelpers::BackfillPartitionedTable' }
let!(:job1) { create(:background_migration_job, class_name: migration_class, arguments: [1, 10, source_table]) } let!(:job1) { create(:background_migration_job, class_name: migration_class, arguments: [1, 10, source_table]) }
let!(:job2) { create(:background_migration_job, class_name: migration_class, arguments: [11, 20, source_table]) } let!(:job2) { create(:background_migration_job, class_name: migration_class, arguments: [11, 20, source_table]) }
let!(:job3) { create(:background_migration_job, class_name: migration_class, arguments: [1, 10, 'other_table']) } let!(:job3) { create(:background_migration_job, class_name: migration_class, arguments: [1, 10, 'other_table']) }
it 'deletes any tracking records from the background_migration_jobs table' do it 'deletes those pertaining to the given table' do
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date expect { migration.cleanup_partitioning_data_migration(source_table) }
expect { migration.drop_partitioned_table_for(source_table) }
.to change { ::Gitlab::Database::BackgroundMigrationJob.count }.from(3).to(1) .to change { ::Gitlab::Database::BackgroundMigrationJob.count }.from(3).to(1)
remaining_record = ::Gitlab::Database::BackgroundMigrationJob.first remaining_record = ::Gitlab::Database::BackgroundMigrationJob.first
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment