Commit 6927d13a authored by Mayra Cabrera's avatar Mayra Cabrera

Merge branch '290008-remove-duplicate-services' into 'master'

Remove duplicate services in background migration

See merge request gitlab-org/gitlab!49463
parents 0e10ba46 2d16217a
---
title: Remove duplicate service records
merge_request: 49463
author:
type: fixed
# frozen_string_literal: true
class RemoveDuplicateServices < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
INTERVAL = 2.minutes
BATCH_SIZE = 5_000
MIGRATION = 'RemoveDuplicateServices'
disable_ddl_transaction!
def up
project_ids_with_duplicates = Gitlab::BackgroundMigration::RemoveDuplicateServices::Service.project_ids_with_duplicates
project_ids_with_duplicates.each_batch(of: BATCH_SIZE, column: :project_id) do |batch, index|
migrate_in(
INTERVAL * index,
MIGRATION,
batch.pluck(:project_id)
)
end
end
def down
end
end
1ad19d6b4bc37d24f61f158aa58e4ce6be75cc54722cdc59427c00522fd40b4c
\ No newline at end of file
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Remove duplicated service records with the same project and type.
# These were created in the past for unknown reasons, and should be blocked
# now by the uniqueness validation in the Service model.
class RemoveDuplicateServices
# See app/models/service
class Service < ActiveRecord::Base
include EachBatch
self.table_name = 'services'
self.inheritance_column = :_type_disabled
scope :project_ids_with_duplicates, -> do
select(:project_id)
.distinct
.where.not(project_id: nil)
.group(:project_id, :type)
.having('count(*) > 1')
end
scope :types_with_duplicates, -> (project_ids) do
select(:project_id, :type)
.where(project_id: project_ids)
.group(:project_id, :type)
.having('count(*) > 1')
end
end
def perform(project_ids)
types_with_duplicates = Service.types_with_duplicates(project_ids).pluck(:project_id, :type)
types_with_duplicates.each do |project_id, type|
remove_duplicates(project_id, type)
end
end
private
def remove_duplicates(project_id, type)
scope = Service.where(project_id: project_id, type: type)
# Build a subquery to determine which service record is actually in use,
# by querying for it without specifying an order.
#
# This should match the record returned by `Project#find_service`,
# and the `has_one` service associations on `Project`.
correct_service = scope.select(:id).limit(1)
# Delete all other services with the same `project_id` and `type`
duplicate_services = scope.where.not(id: correct_service)
duplicate_services.delete_all
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::RemoveDuplicateServices, :migration, schema: 20201207165956 do
let_it_be(:users) { table(:users) }
let_it_be(:namespaces) { table(:namespaces) }
let_it_be(:projects) { table(:projects) }
let_it_be(:services) { table(:services) }
let_it_be(:alerts_service_data) { table(:alerts_service_data) }
let_it_be(:chat_names) { table(:chat_names) }
let_it_be(:issue_tracker_data) { table(:issue_tracker_data) }
let_it_be(:jira_tracker_data) { table(:jira_tracker_data) }
let_it_be(:open_project_tracker_data) { table(:open_project_tracker_data) }
let_it_be(:slack_integrations) { table(:slack_integrations) }
let_it_be(:web_hooks) { table(:web_hooks) }
let_it_be(:data_tables) do
[alerts_service_data, chat_names, issue_tracker_data, jira_tracker_data, open_project_tracker_data, slack_integrations, web_hooks]
end
let!(:user) { users.create!(id: 1, projects_limit: 100) }
let!(:namespace) { namespaces.create!(id: 1, name: 'group', path: 'group') }
# project without duplicate services
let!(:project1) { projects.create!(id: 1, namespace_id: namespace.id) }
let!(:service1) { services.create!(id: 1, project_id: project1.id, type: 'AsanaService') }
let!(:service2) { services.create!(id: 2, project_id: project1.id, type: 'JiraService') }
let!(:service3) { services.create!(id: 3, project_id: project1.id, type: 'SlackService') }
# project with duplicate services
let!(:project2) { projects.create!(id: 2, namespace_id: namespace.id) }
let!(:service4) { services.create!(id: 4, project_id: project2.id, type: 'AsanaService') }
let!(:service5) { services.create!(id: 5, project_id: project2.id, type: 'JiraService') }
let!(:service6) { services.create!(id: 6, project_id: project2.id, type: 'JiraService') }
let!(:service7) { services.create!(id: 7, project_id: project2.id, type: 'SlackService') }
let!(:service8) { services.create!(id: 8, project_id: project2.id, type: 'SlackService') }
let!(:service9) { services.create!(id: 9, project_id: project2.id, type: 'SlackService') }
# project with duplicate services and dependant records
let!(:project3) { projects.create!(id: 3, namespace_id: namespace.id) }
let!(:service10) { services.create!(id: 10, project_id: project3.id, type: 'AlertsService') }
let!(:service11) { services.create!(id: 11, project_id: project3.id, type: 'AlertsService') }
let!(:service12) { services.create!(id: 12, project_id: project3.id, type: 'SlashCommandsService') }
let!(:service13) { services.create!(id: 13, project_id: project3.id, type: 'SlashCommandsService') }
let!(:service14) { services.create!(id: 14, project_id: project3.id, type: 'IssueTrackerService') }
let!(:service15) { services.create!(id: 15, project_id: project3.id, type: 'IssueTrackerService') }
let!(:service16) { services.create!(id: 16, project_id: project3.id, type: 'JiraService') }
let!(:service17) { services.create!(id: 17, project_id: project3.id, type: 'JiraService') }
let!(:service18) { services.create!(id: 18, project_id: project3.id, type: 'OpenProjectService') }
let!(:service19) { services.create!(id: 19, project_id: project3.id, type: 'OpenProjectService') }
let!(:service20) { services.create!(id: 20, project_id: project3.id, type: 'SlackService') }
let!(:service21) { services.create!(id: 21, project_id: project3.id, type: 'SlackService') }
let!(:dependant_records) do
alerts_service_data.create!(id: 1, service_id: service10.id)
alerts_service_data.create!(id: 2, service_id: service11.id)
chat_names.create!(id: 1, service_id: service12.id, user_id: user.id, team_id: 'team1', chat_id: 'chat1')
chat_names.create!(id: 2, service_id: service13.id, user_id: user.id, team_id: 'team2', chat_id: 'chat2')
issue_tracker_data.create!(id: 1, service_id: service14.id)
issue_tracker_data.create!(id: 2, service_id: service15.id)
jira_tracker_data.create!(id: 1, service_id: service16.id)
jira_tracker_data.create!(id: 2, service_id: service17.id)
open_project_tracker_data.create!(id: 1, service_id: service18.id)
open_project_tracker_data.create!(id: 2, service_id: service19.id)
slack_integrations.create!(id: 1, service_id: service20.id, user_id: user.id, team_id: 'team1', team_name: 'team1', alias: 'alias1')
slack_integrations.create!(id: 2, service_id: service21.id, user_id: user.id, team_id: 'team2', team_name: 'team2', alias: 'alias2')
web_hooks.create!(id: 1, service_id: service20.id)
web_hooks.create!(id: 2, service_id: service21.id)
end
# project without services
let!(:project4) { projects.create!(id: 4, namespace_id: namespace.id) }
it 'removes duplicate services and dependant records' do
# Determine which services we expect to keep
expected_services = projects.pluck(:id).each_with_object({}) do |project_id, map|
project_services = services.where(project_id: project_id)
types = project_services.distinct.pluck(:type)
map[project_id] = types.map { |type| project_services.where(type: type).take!.id }
end
expect do
subject.perform([project2.id, project3.id])
end.to change { services.count }.from(21).to(12)
services1 = services.where(project_id: project1.id)
expect(services1.count).to be(3)
expect(services1.pluck(:type)).to contain_exactly('AsanaService', 'JiraService', 'SlackService')
expect(services1.pluck(:id)).to contain_exactly(*expected_services[project1.id])
services2 = services.where(project_id: project2.id)
expect(services2.count).to be(3)
expect(services2.pluck(:type)).to contain_exactly('AsanaService', 'JiraService', 'SlackService')
expect(services2.pluck(:id)).to contain_exactly(*expected_services[project2.id])
services3 = services.where(project_id: project3.id)
expect(services3.count).to be(6)
expect(services3.pluck(:type)).to contain_exactly('AlertsService', 'SlashCommandsService', 'IssueTrackerService', 'JiraService', 'OpenProjectService', 'SlackService')
expect(services3.pluck(:id)).to contain_exactly(*expected_services[project3.id])
kept_services = expected_services.values.flatten
data_tables.each do |table|
expect(table.count).to be(1)
expect(kept_services).to include(table.pluck(:service_id).first)
end
end
it 'does not delete services without duplicates' do
expect do
subject.perform([project1.id, project4.id])
end.not_to change { services.count }
end
it 'only deletes duplicate services for the current batch' do
expect do
subject.perform([project2.id])
end.to change { services.count }.by(-3)
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20201207165956_remove_duplicate_services.rb')
RSpec.describe RemoveDuplicateServices do
let_it_be(:namespaces) { table(:namespaces) }
let_it_be(:projects) { table(:projects) }
let_it_be(:services) { table(:services) }
describe '#up' do
before do
stub_const("#{described_class}::BATCH_SIZE", 2)
namespaces.create!(id: 1, name: 'group', path: 'group')
projects.create!(id: 1, namespace_id: 1) # duplicate services
projects.create!(id: 2, namespace_id: 1) # normal services
projects.create!(id: 3, namespace_id: 1) # no services
projects.create!(id: 4, namespace_id: 1) # duplicate services
projects.create!(id: 5, namespace_id: 1) # duplicate services
services.create!(id: 1, project_id: 1, type: 'JiraService')
services.create!(id: 2, project_id: 1, type: 'JiraService')
services.create!(id: 3, project_id: 2, type: 'JiraService')
services.create!(id: 4, project_id: 4, type: 'AsanaService')
services.create!(id: 5, project_id: 4, type: 'AsanaService')
services.create!(id: 6, project_id: 4, type: 'JiraService')
services.create!(id: 7, project_id: 4, type: 'JiraService')
services.create!(id: 8, project_id: 4, type: 'SlackService')
services.create!(id: 9, project_id: 4, type: 'SlackService')
services.create!(id: 10, project_id: 5, type: 'JiraService')
services.create!(id: 11, project_id: 5, type: 'JiraService')
# Services without a project_id should be ignored
services.create!(id: 12, type: 'JiraService')
services.create!(id: 13, type: 'JiraService')
end
it 'schedules background jobs for all projects with duplicate services' do
Sidekiq::Testing.fake! do
freeze_time do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(2.minutes, 1, 4)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(4.minutes, 5)
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment