Commit e44572c1 authored by Patrick Bair's avatar Patrick Bair

Merge branch 'loose-fk-multi-partition-query' into 'master'

Query all partitions when loading loose FK records

See merge request gitlab-org/gitlab!74072
parents 85e63b76 44af21f9
...@@ -4,29 +4,29 @@ class LooseForeignKeys::DeletedRecord < ApplicationRecord ...@@ -4,29 +4,29 @@ class LooseForeignKeys::DeletedRecord < ApplicationRecord
self.primary_key = :id self.primary_key = :id
scope :for_table, -> (table) { where(fully_qualified_table_name: table) } scope :for_table, -> (table) { where(fully_qualified_table_name: table) }
scope :ordered_by_id, -> { order(:id, :primary_key_value) } scope :consume_order, -> { order(:partition, :consume_after, :id) }
# This needs to be parameterized once we start adding partitions
scope :for_partition, -> { where(partition: 1) }
enum status: { pending: 1, processed: 2 }, _prefix: :status enum status: { pending: 1, processed: 2 }, _prefix: :status
def self.load_batch_for_table(table, batch_size) def self.load_batch_for_table(table, batch_size)
for_table(table) for_table(table)
.for_partition
.status_pending .status_pending
.ordered_by_id .consume_order
.limit(batch_size) .limit(batch_size)
.to_a .to_a
end end
def self.mark_records_processed_for_table_between(table, from_record, to_record) def self.mark_records_processed(all_records)
from = from_record.id # Run a query for each partition to optimize the row lookup by primary key (partition, id)
to = to_record.id update_count = 0
for_table(table) all_records.group_by(&:partition).each do |partition, records_within_partition|
.for_partition update_count += status_pending
.status_pending .where(partition: partition)
.where(id: from..to) .where(id: records_within_partition.pluck(:id))
.update_all(status: :processed) .update_all(status: :processed)
end
update_count
end end
end end
...@@ -25,8 +25,7 @@ module LooseForeignKeys ...@@ -25,8 +25,7 @@ module LooseForeignKeys
return if modification_tracker.over_limit? return if modification_tracker.over_limit?
# At this point, all associations are cleaned up, we can update the status of the parent records # At this point, all associations are cleaned up, we can update the status of the parent records
update_count = LooseForeignKeys::DeletedRecord update_count = LooseForeignKeys::DeletedRecord.mark_records_processed(deleted_parent_records)
.mark_records_processed_for_table_between(deleted_parent_records.first.fully_qualified_table_name, deleted_parent_records.first, deleted_parent_records.last)
deleted_records_counter.increment({ table: parent_klass.table_name, db_config_name: LooseForeignKeys::DeletedRecord.connection.pool.db_config.name }, update_count) deleted_records_counter.increment({ table: parent_klass.table_name, db_config_name: LooseForeignKeys::DeletedRecord.connection.pool.db_config.name }, update_count)
end end
......
# frozen_string_literal: true
class AddConsumeAfterToLooseFkDeletedRecords < Gitlab::Database::Migration[1.0]
enable_lock_retries!
def up
add_column :loose_foreign_keys_deleted_records, :consume_after, :datetime_with_timezone, default: -> { 'NOW()' }
end
def down
remove_column :loose_foreign_keys_deleted_records, :consume_after
end
end
# frozen_string_literal: true
class SupportPartitionQueryInLooseFkTable < Gitlab::Database::Migration[1.0]
include Gitlab::Database::PartitioningMigrationHelpers
disable_ddl_transaction!
INDEX_NAME = 'index_loose_foreign_keys_deleted_records_for_partitioned_query'
def up
add_concurrent_partitioned_index :loose_foreign_keys_deleted_records,
%I[partition fully_qualified_table_name consume_after id],
where: 'status = 1',
name: INDEX_NAME
end
def down
remove_concurrent_partitioned_index_by_name :loose_foreign_keys_deleted_records, INDEX_NAME
end
end
# frozen_string_literal: true
class DropOldLooseFkDeletedRecordsIndex < Gitlab::Database::Migration[1.0]
include Gitlab::Database::PartitioningMigrationHelpers
disable_ddl_transaction!
INDEX_NAME = 'index_loose_foreign_keys_deleted_records_for_loading_records'
def up
remove_concurrent_partitioned_index_by_name :loose_foreign_keys_deleted_records, INDEX_NAME
end
def down
add_concurrent_partitioned_index :loose_foreign_keys_deleted_records,
%I[fully_qualified_table_name id primary_key_value partition],
where: 'status = 1',
name: INDEX_NAME
end
end
86aa6ad1759a00c2cc5cb6dc2e381aead2910a24f0e37933a5e72af56d08101a
\ No newline at end of file
30eb98b8fdb24bc5de357b0ec14a6b92d520db025c82bd7b9448f71542c7d7e3
\ No newline at end of file
1bc48cdae55eea5a5963edd3a138d7d6859afa6caafe0b793c553fdfabe9f488
\ No newline at end of file
...@@ -1021,6 +1021,7 @@ CREATE TABLE loose_foreign_keys_deleted_records ( ...@@ -1021,6 +1021,7 @@ CREATE TABLE loose_foreign_keys_deleted_records (
status smallint DEFAULT 1 NOT NULL, status smallint DEFAULT 1 NOT NULL,
created_at timestamp with time zone DEFAULT now() NOT NULL, created_at timestamp with time zone DEFAULT now() NOT NULL,
fully_qualified_table_name text NOT NULL, fully_qualified_table_name text NOT NULL,
consume_after timestamp with time zone DEFAULT now(),
CONSTRAINT check_1a541f3235 CHECK ((char_length(fully_qualified_table_name) <= 150)) CONSTRAINT check_1a541f3235 CHECK ((char_length(fully_qualified_table_name) <= 150))
) )
PARTITION BY LIST (partition); PARTITION BY LIST (partition);
...@@ -1041,6 +1042,7 @@ CREATE TABLE gitlab_partitions_static.loose_foreign_keys_deleted_records_1 ( ...@@ -1041,6 +1042,7 @@ CREATE TABLE gitlab_partitions_static.loose_foreign_keys_deleted_records_1 (
status smallint DEFAULT 1 NOT NULL, status smallint DEFAULT 1 NOT NULL,
created_at timestamp with time zone DEFAULT now() NOT NULL, created_at timestamp with time zone DEFAULT now() NOT NULL,
fully_qualified_table_name text NOT NULL, fully_qualified_table_name text NOT NULL,
consume_after timestamp with time zone DEFAULT now(),
CONSTRAINT check_1a541f3235 CHECK ((char_length(fully_qualified_table_name) <= 150)) CONSTRAINT check_1a541f3235 CHECK ((char_length(fully_qualified_table_name) <= 150))
); );
ALTER TABLE ONLY loose_foreign_keys_deleted_records ATTACH PARTITION gitlab_partitions_static.loose_foreign_keys_deleted_records_1 FOR VALUES IN ('1'); ALTER TABLE ONLY loose_foreign_keys_deleted_records ATTACH PARTITION gitlab_partitions_static.loose_foreign_keys_deleted_records_1 FOR VALUES IN ('1');
...@@ -24073,6 +24075,10 @@ CREATE INDEX index_merge_request_stage_events_project_duration ON ONLY analytics ...@@ -24073,6 +24075,10 @@ CREATE INDEX index_merge_request_stage_events_project_duration ON ONLY analytics
CREATE INDEX index_006f943df6 ON gitlab_partitions_static.analytics_cycle_analytics_merge_request_stage_events_16 USING btree (stage_event_hash_id, project_id, end_event_timestamp, merge_request_id, start_event_timestamp) WHERE (end_event_timestamp IS NOT NULL); CREATE INDEX index_006f943df6 ON gitlab_partitions_static.analytics_cycle_analytics_merge_request_stage_events_16 USING btree (stage_event_hash_id, project_id, end_event_timestamp, merge_request_id, start_event_timestamp) WHERE (end_event_timestamp IS NOT NULL);
CREATE INDEX index_loose_foreign_keys_deleted_records_for_partitioned_query ON ONLY loose_foreign_keys_deleted_records USING btree (partition, fully_qualified_table_name, consume_after, id) WHERE (status = 1);
CREATE INDEX index_01e3390fac ON gitlab_partitions_static.loose_foreign_keys_deleted_records_1 USING btree (partition, fully_qualified_table_name, consume_after, id) WHERE (status = 1);
CREATE INDEX index_02749b504c ON gitlab_partitions_static.analytics_cycle_analytics_merge_request_stage_events_11 USING btree (stage_event_hash_id, project_id, end_event_timestamp, merge_request_id, start_event_timestamp) WHERE (end_event_timestamp IS NOT NULL); CREATE INDEX index_02749b504c ON gitlab_partitions_static.analytics_cycle_analytics_merge_request_stage_events_11 USING btree (stage_event_hash_id, project_id, end_event_timestamp, merge_request_id, start_event_timestamp) WHERE (end_event_timestamp IS NOT NULL);
CREATE INDEX index_merge_request_stage_events_group_duration ON ONLY analytics_cycle_analytics_merge_request_stage_events USING btree (stage_event_hash_id, group_id, end_event_timestamp, merge_request_id, start_event_timestamp) WHERE (end_event_timestamp IS NOT NULL); CREATE INDEX index_merge_request_stage_events_group_duration ON ONLY analytics_cycle_analytics_merge_request_stage_events USING btree (stage_event_hash_id, group_id, end_event_timestamp, merge_request_id, start_event_timestamp) WHERE (end_event_timestamp IS NOT NULL);
...@@ -24391,10 +24397,6 @@ CREATE INDEX index_8a0fc3de4f ON gitlab_partitions_static.analytics_cycle_analyt ...@@ -24391,10 +24397,6 @@ CREATE INDEX index_8a0fc3de4f ON gitlab_partitions_static.analytics_cycle_analyt
CREATE INDEX index_8b9f9a19a4 ON gitlab_partitions_static.analytics_cycle_analytics_merge_request_stage_events_18 USING btree (stage_event_hash_id, group_id, end_event_timestamp, merge_request_id, start_event_timestamp) WHERE (end_event_timestamp IS NOT NULL); CREATE INDEX index_8b9f9a19a4 ON gitlab_partitions_static.analytics_cycle_analytics_merge_request_stage_events_18 USING btree (stage_event_hash_id, group_id, end_event_timestamp, merge_request_id, start_event_timestamp) WHERE (end_event_timestamp IS NOT NULL);
CREATE INDEX index_loose_foreign_keys_deleted_records_for_loading_records ON ONLY loose_foreign_keys_deleted_records USING btree (fully_qualified_table_name, id, primary_key_value, partition) WHERE (status = 1);
CREATE INDEX index_8be8640437 ON gitlab_partitions_static.loose_foreign_keys_deleted_records_1 USING btree (fully_qualified_table_name, id, primary_key_value, partition) WHERE (status = 1);
CREATE INDEX index_8fb48e72ce ON gitlab_partitions_static.analytics_cycle_analytics_issue_stage_events_26 USING btree (stage_event_hash_id, group_id, end_event_timestamp, issue_id, start_event_timestamp) WHERE (end_event_timestamp IS NOT NULL); CREATE INDEX index_8fb48e72ce ON gitlab_partitions_static.analytics_cycle_analytics_issue_stage_events_26 USING btree (stage_event_hash_id, group_id, end_event_timestamp, issue_id, start_event_timestamp) WHERE (end_event_timestamp IS NOT NULL);
CREATE INDEX index_9201b952a0 ON gitlab_partitions_static.analytics_cycle_analytics_issue_stage_events_13 USING btree (stage_event_hash_id, group_id, end_event_timestamp, issue_id, start_event_timestamp) WHERE (end_event_timestamp IS NOT NULL); CREATE INDEX index_9201b952a0 ON gitlab_partitions_static.analytics_cycle_analytics_issue_stage_events_13 USING btree (stage_event_hash_id, group_id, end_event_timestamp, issue_id, start_event_timestamp) WHERE (end_event_timestamp IS NOT NULL);
...@@ -27911,6 +27913,8 @@ ALTER INDEX index_issue_stage_events_project_duration ATTACH PARTITION gitlab_pa ...@@ -27911,6 +27913,8 @@ ALTER INDEX index_issue_stage_events_project_duration ATTACH PARTITION gitlab_pa
ALTER INDEX index_merge_request_stage_events_project_duration ATTACH PARTITION gitlab_partitions_static.index_006f943df6; ALTER INDEX index_merge_request_stage_events_project_duration ATTACH PARTITION gitlab_partitions_static.index_006f943df6;
ALTER INDEX index_loose_foreign_keys_deleted_records_for_partitioned_query ATTACH PARTITION gitlab_partitions_static.index_01e3390fac;
ALTER INDEX index_merge_request_stage_events_project_duration ATTACH PARTITION gitlab_partitions_static.index_02749b504c; ALTER INDEX index_merge_request_stage_events_project_duration ATTACH PARTITION gitlab_partitions_static.index_02749b504c;
ALTER INDEX index_merge_request_stage_events_group_duration ATTACH PARTITION gitlab_partitions_static.index_0287f5ba09; ALTER INDEX index_merge_request_stage_events_group_duration ATTACH PARTITION gitlab_partitions_static.index_0287f5ba09;
...@@ -28217,8 +28221,6 @@ ALTER INDEX index_issue_stage_events_project_in_progress_duration ATTACH PARTITI ...@@ -28217,8 +28221,6 @@ ALTER INDEX index_issue_stage_events_project_in_progress_duration ATTACH PARTITI
ALTER INDEX index_merge_request_stage_events_group_duration ATTACH PARTITION gitlab_partitions_static.index_8b9f9a19a4; ALTER INDEX index_merge_request_stage_events_group_duration ATTACH PARTITION gitlab_partitions_static.index_8b9f9a19a4;
ALTER INDEX index_loose_foreign_keys_deleted_records_for_loading_records ATTACH PARTITION gitlab_partitions_static.index_8be8640437;
ALTER INDEX index_issue_stage_events_group_duration ATTACH PARTITION gitlab_partitions_static.index_8fb48e72ce; ALTER INDEX index_issue_stage_events_group_duration ATTACH PARTITION gitlab_partitions_static.index_8fb48e72ce;
ALTER INDEX index_issue_stage_events_group_duration ATTACH PARTITION gitlab_partitions_static.index_9201b952a0; ALTER INDEX index_issue_stage_events_group_duration ATTACH PARTITION gitlab_partitions_static.index_9201b952a0;
...@@ -24,26 +24,9 @@ RSpec.describe LooseForeignKeys::DeletedRecord, type: :model do ...@@ -24,26 +24,9 @@ RSpec.describe LooseForeignKeys::DeletedRecord, type: :model do
end end
end end
describe '.mark_records_processed_for_table_between' do describe '.mark_records_processed' do
it 'marks processed exactly one record' do it 'updates all records' do
described_class.mark_records_processed_for_table_between(table, deleted_record_2, deleted_record_2) described_class.mark_records_processed([deleted_record_1, deleted_record_2, deleted_record_4])
expect(described_class.status_pending.count).to eq(3)
expect(described_class.status_processed.count).to eq(1)
processed_record = described_class.status_processed.first
expect(processed_record).to eq(deleted_record_2)
end
it 'deletes two records' do
described_class.mark_records_processed_for_table_between(table, deleted_record_2, deleted_record_4)
expect(described_class.status_pending.count).to eq(2)
expect(described_class.status_processed.count).to eq(2)
end
it 'deletes all records' do
described_class.mark_records_processed_for_table_between(table, deleted_record_1, deleted_record_4)
expect(described_class.status_pending.count).to eq(1) expect(described_class.status_pending.count).to eq(1)
expect(described_class.status_processed.count).to eq(3) expect(described_class.status_processed.count).to eq(3)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment