Commit 0dc03363 authored by Adam Hegyi's avatar Adam Hegyi

Dedup issue_metrics table

This change adds a migration to de-duplicate issue_metrics table and
prevent further duplication by adding a unique index on issue_id.
parent eb3a3410
...@@ -24,6 +24,8 @@ class Issue < ApplicationRecord ...@@ -24,6 +24,8 @@ class Issue < ApplicationRecord
include Todoable include Todoable
include FromUnion include FromUnion
extend ::Gitlab::Utils::Override
DueDateStruct = Struct.new(:title, :name).freeze DueDateStruct = Struct.new(:title, :name).freeze
NoDueDate = DueDateStruct.new('No Due Date', '0').freeze NoDueDate = DueDateStruct.new('No Due Date', '0').freeze
AnyDueDate = DueDateStruct.new('Any Due Date', '').freeze AnyDueDate = DueDateStruct.new('Any Due Date', '').freeze
...@@ -448,8 +450,14 @@ class Issue < ApplicationRecord ...@@ -448,8 +450,14 @@ class Issue < ApplicationRecord
private private
# Ensure that the metrics association is safely created and respecting the unique constraint on issue_id
override :ensure_metrics
def ensure_metrics def ensure_metrics
super if !association(:metrics).loaded? || metrics.blank?
metrics_record = Issue::Metrics.safe_find_or_create_by(issue: self)
self.metrics = metrics_record
end
metrics.record! metrics.record!
end end
......
---
title: Deduplicate issue_metrics table
merge_request: 55285
author:
type: other
# frozen_string_literal: true
class DedupIssueMetrics < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
TMP_INDEX_NAME = 'tmp_unique_issue_metrics_by_issue_id'
OLD_INDEX_NAME = 'index_issue_metrics'
INDEX_NAME = 'index_unique_issue_metrics_issue_id'
BATCH_SIZE = 1_000
disable_ddl_transaction!
class IssueMetrics < ActiveRecord::Base
self.table_name = 'issue_metrics'
include EachBatch
end
def up
IssueMetrics.reset_column_information
last_metrics_record_id = IssueMetrics.maximum(:id) || 0
# This index will disallow further duplicates while we're deduplicating the data.
add_concurrent_index(:issue_metrics, :issue_id, where: "id > #{Integer(last_metrics_record_id)}", unique: true, name: TMP_INDEX_NAME)
IssueMetrics.each_batch(of: BATCH_SIZE) do |relation|
duplicated_issue_ids = IssueMetrics
.where(issue_id: relation.select(:issue_id))
.select(:issue_id)
.group(:issue_id)
.having('COUNT(issue_metrics.issue_id) > 1')
.pluck(:issue_id)
duplicated_issue_ids.each do |issue_id|
deduplicate_item(issue_id)
end
end
add_concurrent_index(:issue_metrics, :issue_id, unique: true, name: INDEX_NAME)
remove_concurrent_index_by_name(:issue_metrics, TMP_INDEX_NAME)
remove_concurrent_index_by_name(:issue_metrics, OLD_INDEX_NAME)
end
def down
add_concurrent_index(:issue_metrics, :issue_id, name: OLD_INDEX_NAME)
remove_concurrent_index_by_name(:issue_metrics, TMP_INDEX_NAME)
remove_concurrent_index_by_name(:issue_metrics, INDEX_NAME)
end
private
def deduplicate_item(issue_id)
issue_metrics_records = IssueMetrics.where(issue_id: issue_id).order(updated_at: :asc).to_a
attributes = {}
issue_metrics_records.each do |issue_metrics_record|
params = issue_metrics_record.attributes.except('id')
attributes.merge!(params.compact)
end
ActiveRecord::Base.transaction do
record_to_keep = issue_metrics_records.pop
records_to_delete = issue_metrics_records
IssueMetrics.where(id: records_to_delete.map(&:id)).delete_all
record_to_keep.update!(attributes)
end
end
end
400dd521f5c462afdcb3c556815f840e916df7576a6d6dd301fe5a49a1fe6011
\ No newline at end of file
...@@ -22816,8 +22816,6 @@ CREATE UNIQUE INDEX index_issue_links_on_source_id_and_target_id ON issue_links ...@@ -22816,8 +22816,6 @@ CREATE UNIQUE INDEX index_issue_links_on_source_id_and_target_id ON issue_links
CREATE INDEX index_issue_links_on_target_id ON issue_links USING btree (target_id); CREATE INDEX index_issue_links_on_target_id ON issue_links USING btree (target_id);
CREATE INDEX index_issue_metrics ON issue_metrics USING btree (issue_id);
CREATE INDEX index_issue_metrics_on_issue_id_and_timestamps ON issue_metrics USING btree (issue_id, first_mentioned_in_commit_at, first_associated_with_milestone_at, first_added_to_board_at); CREATE INDEX index_issue_metrics_on_issue_id_and_timestamps ON issue_metrics USING btree (issue_id, first_mentioned_in_commit_at, first_associated_with_milestone_at, first_added_to_board_at);
CREATE INDEX index_issue_on_project_id_state_id_and_blocking_issues_count ON issues USING btree (project_id, state_id, blocking_issues_count); CREATE INDEX index_issue_on_project_id_state_id_and_blocking_issues_count ON issues USING btree (project_id, state_id, blocking_issues_count);
...@@ -23966,6 +23964,8 @@ CREATE INDEX index_u2f_registrations_on_key_handle ON u2f_registrations USING bt ...@@ -23966,6 +23964,8 @@ CREATE INDEX index_u2f_registrations_on_key_handle ON u2f_registrations USING bt
CREATE INDEX index_u2f_registrations_on_user_id ON u2f_registrations USING btree (user_id); CREATE INDEX index_u2f_registrations_on_user_id ON u2f_registrations USING btree (user_id);
CREATE UNIQUE INDEX index_unique_issue_metrics_issue_id ON issue_metrics USING btree (issue_id);
CREATE INDEX index_unit_test_failures_failed_at ON ci_unit_test_failures USING btree (failed_at DESC); CREATE INDEX index_unit_test_failures_failed_at ON ci_unit_test_failures USING btree (failed_at DESC);
CREATE UNIQUE INDEX index_unit_test_failures_unique_columns ON ci_unit_test_failures USING btree (unit_test_id, failed_at DESC, build_id); CREATE UNIQUE INDEX index_unit_test_failures_unique_columns ON ci_unit_test_failures USING btree (unit_test_id, failed_at DESC, build_id);
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20210226141517_dedup_issue_metrics.rb')
RSpec.describe DedupIssueMetrics, :migration, schema: 20210205104425 do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:issues) { table(:issues) }
let(:metrics) { table(:issue_metrics) }
let(:issue_params) { { title: 'title', project_id: project.id } }
let!(:namespace) { namespaces.create!(name: 'foo', path: 'foo') }
let!(:project) { projects.create!(namespace_id: namespace.id) }
let!(:issue_1) { issues.create!(issue_params) }
let!(:issue_2) { issues.create!(issue_params) }
let!(:issue_3) { issues.create!(issue_params) }
let!(:duplicated_metrics_1) { metrics.create!(issue_id: issue_1.id, first_mentioned_in_commit_at: 1.day.ago, first_added_to_board_at: 5.days.ago, updated_at: 2.months.ago) }
let!(:duplicated_metrics_2) { metrics.create!(issue_id: issue_1.id, first_mentioned_in_commit_at: Time.now, first_associated_with_milestone_at: Time.now, updated_at: 1.month.ago) }
let!(:duplicated_metrics_3) { metrics.create!(issue_id: issue_3.id, first_mentioned_in_commit_at: 1.day.ago, updated_at: 2.months.ago) }
let!(:duplicated_metrics_4) { metrics.create!(issue_id: issue_3.id, first_added_to_board_at: 1.day.ago, updated_at: 1.month.ago) }
let!(:non_duplicated_metrics) { metrics.create!(issue_id: issue_2.id, first_added_to_board_at: 2.days.ago) }
it 'deduplicates issue_metrics table' do
expect { migrate! }.to change { metrics.count }.from(5).to(3)
end
it 'merges `duplicated_metrics_1` with `duplicated_metrics_2`' do
migrate!
expect(metrics.where(id: duplicated_metrics_1.id)).not_to exist
merged_metrics = metrics.find_by(id: duplicated_metrics_2.id)
expect(merged_metrics).to be_present
expect(merged_metrics.first_mentioned_in_commit_at).to be_like_time(duplicated_metrics_2.first_mentioned_in_commit_at)
expect(merged_metrics.first_added_to_board_at).to be_like_time(duplicated_metrics_1.first_added_to_board_at)
end
it 'merges `duplicated_metrics_3` with `duplicated_metrics_4`' do
migrate!
expect(metrics.where(id: duplicated_metrics_3.id)).not_to exist
merged_metrics = metrics.find_by(id: duplicated_metrics_4.id)
expect(merged_metrics).to be_present
expect(merged_metrics.first_mentioned_in_commit_at).to be_like_time(duplicated_metrics_3.first_mentioned_in_commit_at)
expect(merged_metrics.first_added_to_board_at).to be_like_time(duplicated_metrics_4.first_added_to_board_at)
end
it 'does not change non duplicated records' do
expect { migrate! }.not_to change { non_duplicated_metrics.reload.attributes }
end
it 'does nothing when there are no metrics' do
metrics.delete_all
migrate!
expect(metrics.count).to eq(0)
end
end
...@@ -85,18 +85,14 @@ RSpec.describe Issue do ...@@ -85,18 +85,14 @@ RSpec.describe Issue do
describe 'callbacks' do describe 'callbacks' do
describe '#ensure_metrics' do describe '#ensure_metrics' do
it 'creates metrics after saving' do it 'creates metrics after saving' do
issue = create(:issue, project: reusable_project) expect(subject.metrics).to be_persisted
expect(issue.metrics).to be_persisted
expect(Issue::Metrics.count).to eq(1) expect(Issue::Metrics.count).to eq(1)
end end
it 'does not create duplicate metrics for an issue' do it 'does not create duplicate metrics for an issue' do
issue = create(:issue, project: reusable_project) subject.close!
issue.close! expect(subject.metrics).to be_persisted
expect(issue.metrics).to be_persisted
expect(Issue::Metrics.count).to eq(1) expect(Issue::Metrics.count).to eq(1)
end end
...@@ -105,6 +101,20 @@ RSpec.describe Issue do ...@@ -105,6 +101,20 @@ RSpec.describe Issue do
create(:issue, project: reusable_project) create(:issue, project: reusable_project)
end end
context 'when metrics record is missing' do
before do
subject.metrics.delete
subject.reload
subject.metrics # make sure metrics association is cached (currently nil)
end
it 'creates the metrics record' do
subject.update!(title: 'title')
expect(subject.metrics).to be_present
end
end
end end
describe '#record_create_action' do describe '#record_create_action' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment