Commit 361274cd authored by Andreas Brandl's avatar Andreas Brandl

Merge branch '241267-swap-audit-events-migration' into 'master'

Add migration to swap partitioned audit_events

See merge request gitlab-org/gitlab!47581
parents 226c1b96 0dd449a8
......@@ -4,6 +4,7 @@ class AuditEvent < ApplicationRecord
include CreatedAtFilterable
include BulkInsertSafe
include EachBatch
include PartitionedTable
PARALLEL_PERSISTENCE_COLUMNS = [
:author_name,
......@@ -15,6 +16,8 @@ class AuditEvent < ApplicationRecord
self.primary_key = :id
partitioned_by :created_at, strategy: :monthly
serialize :details, Hash # rubocop:disable Cop/ActiveRecordSerialize
belongs_to :user, foreign_key: :author_id
......
# frozen_string_literal: true
# This model is not intended to be used.
# It is a temporary reference to the pre-partitioned
# audit_events table.
# Please refer to https://gitlab.com/groups/gitlab-org/-/epics/3206
# for details.
class AuditEventArchived < ApplicationRecord
self.table_name = 'audit_events_archived'
end
# frozen_string_literal: true
# This model is not yet intended to be used.
# It is in a transitioning phase while we are partitioning
# the table on the database-side.
# Please refer to https://gitlab.com/groups/gitlab-org/-/epics/3206
# for details.
class AuditEventPartitioned < ApplicationRecord
include PartitionedTable
self.table_name = 'audit_events_part_5fc467ac26'
partitioned_by :created_at, strategy: :monthly
end
---
title: Add migration to swap partitioned audit_events
merge_request: 47581
author:
type: added
......@@ -3,7 +3,7 @@
# Make sure we have loaded partitioned models here
# (even with eager loading disabled).
Gitlab::Database::Partitioning::PartitionCreator.register(AuditEventPartitioned)
Gitlab::Database::Partitioning::PartitionCreator.register(AuditEvent)
begin
Gitlab::Database::Partitioning::PartitionCreator.new.create_partitions unless ENV['DISABLE_POSTGRES_PARTITION_CREATION_ON_STARTUP']
......
# frozen_string_literal: true
class SwapPartitionedAuditEvents < ActiveRecord::Migration[6.0]
include Gitlab::Database::PartitioningMigrationHelpers
DOWNTIME = false
def up
replace_with_partitioned_table :audit_events
end
def down
rollback_replace_with_partitioned_table :audit_events
end
end
a436597e876a6d9efc2c1558e05dc576cbbc6f829dc8059d62fc231bbf0ce2fa
\ No newline at end of file
......@@ -15,46 +15,46 @@ CREATE FUNCTION table_sync_function_2be879775d() RETURNS trigger
AS $$
BEGIN
IF (TG_OP = 'DELETE') THEN
DELETE FROM audit_events_part_5fc467ac26 where id = OLD.id;
DELETE FROM audit_events_archived where id = OLD.id;
ELSIF (TG_OP = 'UPDATE') THEN
UPDATE audit_events_part_5fc467ac26
UPDATE audit_events_archived
SET author_id = NEW.author_id,
entity_id = NEW.entity_id,
entity_type = NEW.entity_type,
details = NEW.details,
created_at = NEW.created_at,
ip_address = NEW.ip_address,
author_name = NEW.author_name,
entity_path = NEW.entity_path,
target_details = NEW.target_details,
target_type = NEW.target_type,
target_id = NEW.target_id,
created_at = NEW.created_at
WHERE audit_events_part_5fc467ac26.id = NEW.id;
target_id = NEW.target_id
WHERE audit_events_archived.id = NEW.id;
ELSIF (TG_OP = 'INSERT') THEN
INSERT INTO audit_events_part_5fc467ac26 (id,
INSERT INTO audit_events_archived (id,
author_id,
entity_id,
entity_type,
details,
created_at,
ip_address,
author_name,
entity_path,
target_details,
target_type,
target_id,
created_at)
target_id)
VALUES (NEW.id,
NEW.author_id,
NEW.entity_id,
NEW.entity_type,
NEW.details,
NEW.created_at,
NEW.ip_address,
NEW.author_name,
NEW.entity_path,
NEW.target_details,
NEW.target_type,
NEW.target_id,
NEW.created_at);
NEW.target_id);
END IF;
RETURN NULL;
......@@ -63,7 +63,7 @@ $$;
COMMENT ON FUNCTION table_sync_function_2be879775d() IS 'Partitioning migration: table sync for audit_events table';
CREATE TABLE audit_events_part_5fc467ac26 (
CREATE TABLE audit_events (
id bigint NOT NULL,
author_id integer NOT NULL,
entity_id integer NOT NULL,
......@@ -9623,7 +9623,7 @@ CREATE SEQUENCE atlassian_identities_user_id_seq
ALTER SEQUENCE atlassian_identities_user_id_seq OWNED BY atlassian_identities.user_id;
CREATE TABLE audit_events (
CREATE TABLE audit_events_archived (
id integer NOT NULL,
author_id integer NOT NULL,
entity_id integer NOT NULL,
......@@ -19159,11 +19159,11 @@ ALTER TABLE ONLY ar_internal_metadata
ALTER TABLE ONLY atlassian_identities
ADD CONSTRAINT atlassian_identities_pkey PRIMARY KEY (user_id);
ALTER TABLE ONLY audit_events_part_5fc467ac26
ADD CONSTRAINT audit_events_part_5fc467ac26_pkey PRIMARY KEY (id, created_at);
ALTER TABLE ONLY audit_events_archived
ADD CONSTRAINT audit_events_archived_pkey PRIMARY KEY (id);
ALTER TABLE ONLY audit_events
ADD CONSTRAINT audit_events_pkey PRIMARY KEY (id);
ADD CONSTRAINT audit_events_pkey PRIMARY KEY (id, created_at);
ALTER TABLE ONLY authentication_events
ADD CONSTRAINT authentication_events_pkey PRIMARY KEY (id);
......@@ -20536,9 +20536,9 @@ CREATE INDEX product_analytics_events_experi_project_id_collector_tstamp_idx ON
CREATE INDEX active_billable_users ON users USING btree (id) WHERE (((state)::text = 'active'::text) AND ((user_type IS NULL) OR (user_type = ANY (ARRAY[NULL::integer, 6, 4]))) AND ((user_type IS NULL) OR (user_type <> ALL ('{2,6,1,3,7,8}'::smallint[]))));
CREATE INDEX analytics_index_audit_events_on_created_at_and_author_id ON audit_events USING btree (created_at, author_id);
CREATE INDEX analytics_index_audit_events_on_created_at_and_author_id ON audit_events_archived USING btree (created_at, author_id);
CREATE INDEX analytics_index_audit_events_part_on_created_at_and_author_id ON ONLY audit_events_part_5fc467ac26 USING btree (created_at, author_id);
CREATE INDEX analytics_index_audit_events_part_on_created_at_and_author_id ON ONLY audit_events USING btree (created_at, author_id);
CREATE INDEX analytics_index_events_on_created_at_and_author_id ON events USING btree (created_at, author_id);
......@@ -20582,9 +20582,9 @@ CREATE UNIQUE INDEX epic_user_mentions_on_epic_id_index ON epic_user_mentions US
CREATE INDEX finding_links_on_vulnerability_occurrence_id ON vulnerability_finding_links USING btree (vulnerability_occurrence_id);
CREATE INDEX idx_audit_events_on_entity_id_desc_author_id_created_at ON audit_events USING btree (entity_id, entity_type, id DESC, author_id, created_at);
CREATE INDEX idx_audit_events_on_entity_id_desc_author_id_created_at ON audit_events_archived USING btree (entity_id, entity_type, id DESC, author_id, created_at);
CREATE INDEX idx_audit_events_part_on_entity_id_desc_author_id_created_at ON ONLY audit_events_part_5fc467ac26 USING btree (entity_id, entity_type, id DESC, author_id, created_at);
CREATE INDEX idx_audit_events_part_on_entity_id_desc_author_id_created_at ON ONLY audit_events USING btree (entity_id, entity_type, id DESC, author_id, created_at);
CREATE INDEX idx_ci_pipelines_artifacts_locked ON ci_pipelines USING btree (ci_ref_id, id) WHERE (locked = 1);
......
......@@ -19,7 +19,7 @@ RSpec.describe 'Database schema' do
approver_groups: %w[target_id],
approvers: %w[target_id user_id],
audit_events: %w[author_id entity_id target_id],
audit_events_part_5fc467ac26: %w[author_id entity_id target_id],
audit_events_archived: %w[author_id entity_id target_id],
award_emoji: %w[awardable_id user_id],
aws_roles: %w[role_external_id],
boards: %w[milestone_id iteration_id],
......
......@@ -12,16 +12,27 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
let_it_be(:connection) { ActiveRecord::Base.connection }
let(:source_table) { :audit_events }
let(:source_table) { :_test_original_table }
let(:partitioned_table) { '_test_migration_partitioned_table' }
let(:function_name) { '_test_migration_function_name' }
let(:trigger_name) { '_test_migration_trigger_name' }
let(:partition_column) { 'created_at' }
let(:min_date) { Date.new(2019, 12) }
let(:max_date) { Date.new(2020, 3) }
let(:source_model) { Class.new(ActiveRecord::Base) }
before do
allow(migration).to receive(:puts)
migration.create_table source_table do |t|
t.string :name, null: false
t.integer :age, null: false
t.datetime partition_column
t.datetime :updated_at
end
source_model.table_name = source_table
allow(migration).to receive(:transaction_open?).and_return(false)
allow(migration).to receive(:make_partitioned_table_name).and_return(partitioned_table)
allow(migration).to receive(:make_sync_function_name).and_return(function_name)
......@@ -81,14 +92,11 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
context 'when the given table does not have a primary key' do
let(:source_table) { :_partitioning_migration_helper_test_table }
let(:partition_column) { :some_field }
it 'raises an error' do
migration.create_table source_table, id: false do |t|
t.integer :id
t.datetime partition_column
end
migration.execute(<<~SQL)
ALTER TABLE #{source_table}
DROP CONSTRAINT #{source_table}_pkey
SQL
expect do
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
......@@ -97,12 +105,12 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
context 'when an invalid partition column is given' do
let(:partition_column) { :_this_is_not_real }
let(:invalid_column) { :_this_is_not_real }
it 'raises an error' do
expect do
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
end.to raise_error(/partition column #{partition_column} does not exist/)
migration.partition_table_by_date source_table, invalid_column, min_date: min_date, max_date: max_date
end.to raise_error(/partition column #{invalid_column} does not exist/)
end
end
......@@ -126,19 +134,19 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
context 'with a non-integer primary key datatype' do
before do
connection.create_table :another_example, id: false do |t|
connection.create_table non_int_table, id: false do |t|
t.string :identifier, primary_key: true
t.timestamp :created_at
end
end
let(:source_table) { :another_example }
let(:non_int_table) { :another_example }
let(:old_primary_key) { 'identifier' }
it 'does not change the primary key datatype' do
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date non_int_table, partition_column, min_date: min_date, max_date: max_date
original_pk_column = connection.columns(source_table).find { |c| c.name == old_primary_key }
original_pk_column = connection.columns(non_int_table).find { |c| c.name == old_primary_key }
pk_column = connection.columns(partitioned_table).find { |c| c.name == old_primary_key }
expect(pk_column).not_to be_nil
......@@ -176,11 +184,9 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
context 'when min_date is not given' do
let(:source_table) { :todos }
context 'with records present already' do
before do
create(:todo, created_at: Date.parse('2019-11-05'))
source_model.create!(name: 'Test', age: 10, created_at: Date.parse('2019-11-05'))
end
it 'creates a partition spanning over each month from the first record' do
......@@ -248,13 +254,12 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
describe 'keeping data in sync with the partitioned table' do
let(:source_table) { :todos }
let(:model) { Class.new(ActiveRecord::Base) }
let(:partitioned_model) { Class.new(ActiveRecord::Base) }
let(:timestamp) { Time.utc(2019, 12, 1, 12).round }
before do
model.primary_key = :id
model.table_name = partitioned_table
partitioned_model.primary_key = :id
partitioned_model.table_name = partitioned_table
end
it 'creates a trigger function on the original table' do
......@@ -270,50 +275,50 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
it 'syncs inserts to the partitioned tables' do
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
expect(model.count).to eq(0)
expect(partitioned_model.count).to eq(0)
first_todo = create(:todo, created_at: timestamp, updated_at: timestamp)
second_todo = create(:todo, created_at: timestamp, updated_at: timestamp)
first_record = source_model.create!(name: 'Bob', age: 20, created_at: timestamp, updated_at: timestamp)
second_record = source_model.create!(name: 'Alice', age: 30, created_at: timestamp, updated_at: timestamp)
expect(model.count).to eq(2)
expect(model.find(first_todo.id).attributes).to eq(first_todo.attributes)
expect(model.find(second_todo.id).attributes).to eq(second_todo.attributes)
expect(partitioned_model.count).to eq(2)
expect(partitioned_model.find(first_record.id).attributes).to eq(first_record.attributes)
expect(partitioned_model.find(second_record.id).attributes).to eq(second_record.attributes)
end
it 'syncs updates to the partitioned tables' do
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
first_todo = create(:todo, :pending, commit_id: nil, created_at: timestamp, updated_at: timestamp)
second_todo = create(:todo, created_at: timestamp, updated_at: timestamp)
first_record = source_model.create!(name: 'Bob', age: 20, created_at: timestamp, updated_at: timestamp)
second_record = source_model.create!(name: 'Alice', age: 30, created_at: timestamp, updated_at: timestamp)
expect(model.count).to eq(2)
expect(partitioned_model.count).to eq(2)
first_copy = model.find(first_todo.id)
second_copy = model.find(second_todo.id)
first_copy = partitioned_model.find(first_record.id)
second_copy = partitioned_model.find(second_record.id)
expect(first_copy.attributes).to eq(first_todo.attributes)
expect(second_copy.attributes).to eq(second_todo.attributes)
expect(first_copy.attributes).to eq(first_record.attributes)
expect(second_copy.attributes).to eq(second_record.attributes)
first_todo.update(state_event: 'done', commit_id: 'abc123', updated_at: timestamp + 1.second)
first_record.update!(age: 21, updated_at: timestamp + 1.hour)
expect(model.count).to eq(2)
expect(first_copy.reload.attributes).to eq(first_todo.attributes)
expect(second_copy.reload.attributes).to eq(second_todo.attributes)
expect(partitioned_model.count).to eq(2)
expect(first_copy.reload.attributes).to eq(first_record.attributes)
expect(second_copy.reload.attributes).to eq(second_record.attributes)
end
it 'syncs deletes to the partitioned tables' do
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
first_todo = create(:todo, created_at: timestamp, updated_at: timestamp)
second_todo = create(:todo, created_at: timestamp, updated_at: timestamp)
first_record = source_model.create!(name: 'Bob', age: 20, created_at: timestamp, updated_at: timestamp)
second_record = source_model.create!(name: 'Alice', age: 30, created_at: timestamp, updated_at: timestamp)
expect(model.count).to eq(2)
expect(partitioned_model.count).to eq(2)
first_todo.destroy
first_record.destroy!
expect(model.count).to eq(1)
expect(model.find_by_id(first_todo.id)).to be_nil
expect(model.find(second_todo.id).attributes).to eq(second_todo.attributes)
expect(partitioned_model.count).to eq(1)
expect(partitioned_model.find_by_id(first_record.id)).to be_nil
expect(partitioned_model.find(second_record.id).attributes).to eq(second_record.attributes)
end
end
end
......@@ -388,13 +393,12 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
context 'when records exist in the source table' do
let(:source_table) { 'todos' }
let(:migration_class) { '::Gitlab::Database::PartitioningMigrationHelpers::BackfillPartitionedTable' }
let(:sub_batch_size) { described_class::SUB_BATCH_SIZE }
let(:pause_seconds) { described_class::PAUSE_SECONDS }
let!(:first_id) { create(:todo).id }
let!(:second_id) { create(:todo).id }
let!(:third_id) { create(:todo).id }
let!(:first_id) { source_model.create!(name: 'Bob', age: 20).id }
let!(:second_id) { source_model.create!(name: 'Alice', age: 30).id }
let!(:third_id) { source_model.create!(name: 'Sam', age: 40).id }
before do
stub_const("#{described_class.name}::BATCH_SIZE", 2)
......@@ -410,10 +414,10 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
first_job_arguments = [first_id, second_id, source_table, partitioned_table, 'id']
first_job_arguments = [first_id, second_id, source_table.to_s, partitioned_table, 'id']
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq([migration_class, first_job_arguments])
second_job_arguments = [third_id, third_id, source_table, partitioned_table, 'id']
second_job_arguments = [third_id, third_id, source_table.to_s, partitioned_table, 'id']
expect(BackgroundMigrationWorker.jobs[1]['args']).to eq([migration_class, second_job_arguments])
end
end
......@@ -482,7 +486,6 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
describe '#finalize_backfilling_partitioned_table' do
let(:source_table) { 'todos' }
let(:source_column) { 'id' }
context 'when the table is not allowed' do
......@@ -536,27 +539,27 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
context 'when there is missed data' do
let(:partitioned_model) { Class.new(ActiveRecord::Base) }
let(:timestamp) { Time.utc(2019, 12, 1, 12).round }
let!(:todo1) { create(:todo, created_at: timestamp, updated_at: timestamp) }
let!(:todo2) { create(:todo, created_at: timestamp, updated_at: timestamp) }
let!(:todo3) { create(:todo, created_at: timestamp, updated_at: timestamp) }
let!(:todo4) { create(:todo, created_at: timestamp, updated_at: timestamp) }
let!(:record1) { source_model.create!(name: 'Bob', age: 20, created_at: timestamp, updated_at: timestamp) }
let!(:record2) { source_model.create!(name: 'Alice', age: 30, created_at: timestamp, updated_at: timestamp) }
let!(:record3) { source_model.create!(name: 'Sam', age: 40, created_at: timestamp, updated_at: timestamp) }
let!(:record4) { source_model.create!(name: 'Sue', age: 50, created_at: timestamp, updated_at: timestamp) }
let!(:pending_job1) do
create(:background_migration_job,
class_name: described_class::MIGRATION_CLASS_NAME,
arguments: [todo1.id, todo2.id, source_table, partitioned_table, source_column])
arguments: [record1.id, record2.id, source_table, partitioned_table, source_column])
end
let!(:pending_job2) do
create(:background_migration_job,
class_name: described_class::MIGRATION_CLASS_NAME,
arguments: [todo3.id, todo3.id, source_table, partitioned_table, source_column])
arguments: [record3.id, record3.id, source_table, partitioned_table, source_column])
end
let!(:succeeded_job) do
create(:background_migration_job, :succeeded,
class_name: described_class::MIGRATION_CLASS_NAME,
arguments: [todo4.id, todo4.id, source_table, partitioned_table, source_column])
arguments: [record4.id, record4.id, source_table, partitioned_table, source_column])
end
before do
......@@ -575,17 +578,17 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
it 'idempotently cleans up after failed background migrations' do
expect(partitioned_model.count).to eq(0)
partitioned_model.insert!(todo2.attributes)
partitioned_model.insert!(record2.attributes)
expect_next_instance_of(Gitlab::Database::PartitioningMigrationHelpers::BackfillPartitionedTable) do |backfill|
allow(backfill).to receive(:transaction_open?).and_return(false)
expect(backfill).to receive(:perform)
.with(todo1.id, todo2.id, source_table, partitioned_table, source_column)
.with(record1.id, record2.id, source_table, partitioned_table, source_column)
.and_call_original
expect(backfill).to receive(:perform)
.with(todo3.id, todo3.id, source_table, partitioned_table, source_column)
.with(record3.id, record3.id, source_table, partitioned_table, source_column)
.and_call_original
end
......@@ -593,12 +596,12 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
expect(partitioned_model.count).to eq(3)
[todo1, todo2, todo3].each do |original|
[record1, record2, record3].each do |original|
copy = partitioned_model.find(original.id)
expect(copy.attributes).to eq(original.attributes)
end
expect(partitioned_model.find_by_id(todo4.id)).to be_nil
expect(partitioned_model.find_by_id(record4.id)).to be_nil
[pending_job1, pending_job2].each do |job|
expect(job.reload).to be_succeeded
......
......@@ -2,35 +2,35 @@
require 'spec_helper'
RSpec.describe AuditEventPartitioned do
RSpec.describe AuditEventArchived do
let(:source_table) { AuditEvent }
let(:partitioned_table) { described_class }
let(:destination_table) { described_class }
it 'has the same columns as the source table' do
column_names_from_source_table = column_names(source_table)
column_names_from_partioned_table = column_names(partitioned_table)
column_names_from_destination_table = column_names(destination_table)
expect(column_names_from_partioned_table).to match_array(column_names_from_source_table)
expect(column_names_from_destination_table).to match_array(column_names_from_source_table)
end
it 'has the same null constraints as the source table' do
constraints_from_source_table = null_constraints(source_table)
constraints_from_partitioned_table = null_constraints(partitioned_table)
constraints_from_destination_table = null_constraints(destination_table)
expect(constraints_from_partitioned_table.to_a).to match_array(constraints_from_source_table.to_a)
expect(constraints_from_destination_table.to_a).to match_array(constraints_from_source_table.to_a)
end
it 'inserts the same record as the one in the source table', :aggregate_failures do
expect { create(:audit_event) }.to change { partitioned_table.count }.by(1)
expect { create(:audit_event) }.to change { destination_table.count }.by(1)
event_from_source_table = source_table.connection.select_one(
"SELECT * FROM #{source_table.table_name} ORDER BY created_at desc LIMIT 1"
)
event_from_partitioned_table = partitioned_table.connection.select_one(
"SELECT * FROM #{partitioned_table.table_name} ORDER BY created_at desc LIMIT 1"
event_from_destination_table = destination_table.connection.select_one(
"SELECT * FROM #{destination_table.table_name} ORDER BY created_at desc LIMIT 1"
)
expect(event_from_partitioned_table).to eq(event_from_source_table)
expect(event_from_destination_table).to eq(event_from_source_table)
end
def column_names(table)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment