Commit 5a727a4d authored by Douwe Maan's avatar Douwe Maan

Merge branch 'osw-update-mr-metrics-with-events-data' into 'master'

Populate MR metrics with events table information (migration)

Closes #41587

See merge request gitlab-org/gitlab-ce!23564
parents 853ba4b6 4f6999fa
---
title: Populate MR metrics with events table information (migration)
merge_request: 23564
author:
type: performance
# frozen_string_literal: true
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class PopulateMrMetricsWithEventsData < ActiveRecord::Migration[4.2]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
BATCH_SIZE = 10_000
MIGRATION = 'PopulateMergeRequestMetricsWithEventsDataImproved'
PREVIOUS_MIGRATION = 'PopulateMergeRequestMetricsWithEventsData'
disable_ddl_transaction!
def up
# Perform any ongoing background migration that might still be running from
# previous try (see https://gitlab.com/gitlab-org/gitlab-ce/issues/47676).
Gitlab::BackgroundMigration.steal(PREVIOUS_MIGRATION)
say 'Scheduling `PopulateMergeRequestMetricsWithEventsData` jobs'
# It will update around 4_000_000 records in batches of 10_000 merge
# requests (running between 5 minutes) and should take around 53 hours to complete.
# Apparently, production PostgreSQL is able to vacuum 10k-20k dead_tuples
# per minute. So this should give us enough space.
#
# More information about the updates in `PopulateMergeRequestMetricsWithEventsDataImproved` class.
#
MergeRequest.all.each_batch(of: BATCH_SIZE) do |relation, index|
range = relation.pluck('MIN(id)', 'MAX(id)').first
BackgroundMigrationWorker.perform_in(index * 8.minutes, MIGRATION, range)
end
end
def down
end
end
......@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20181203002526) do
ActiveRecord::Schema.define(version: 20181204154019) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......
# frozen_string_literal: true
# rubocop:disable Style/Documentation
module Gitlab
module BackgroundMigration
class PopulateMergeRequestMetricsWithEventsDataImproved
CLOSED_EVENT_ACTION = 3
MERGED_EVENT_ACTION = 7
def perform(min_merge_request_id, max_merge_request_id)
insert_metrics_for_range(min_merge_request_id, max_merge_request_id)
update_metrics_with_events_data(min_merge_request_id, max_merge_request_id)
end
# Inserts merge_request_metrics records for merge_requests without it for
# a given merge request batch.
def insert_metrics_for_range(min, max)
metrics_not_exists_clause =
<<-SQL.strip_heredoc
NOT EXISTS (SELECT 1 FROM merge_request_metrics
WHERE merge_request_metrics.merge_request_id = merge_requests.id)
SQL
MergeRequest.where(metrics_not_exists_clause).where(id: min..max).each_batch do |batch|
select_sql = batch.select(:id, :created_at, :updated_at).to_sql
execute("INSERT INTO merge_request_metrics (merge_request_id, created_at, updated_at) #{select_sql}")
end
end
def update_metrics_with_events_data(min, max)
if Gitlab::Database.postgresql?
psql_update_metrics_with_events_data(min, max)
else
mysql_update_metrics_with_events_data(min, max)
end
end
def psql_update_metrics_with_events_data(min, max)
update_sql = <<-SQL.strip_heredoc
UPDATE merge_request_metrics
SET (latest_closed_at,
latest_closed_by_id) =
( SELECT updated_at,
author_id
FROM events
WHERE target_id = merge_request_id
AND target_type = 'MergeRequest'
AND action = #{CLOSED_EVENT_ACTION}
ORDER BY id DESC
LIMIT 1 ),
merged_by_id =
( SELECT author_id
FROM events
WHERE target_id = merge_request_id
AND target_type = 'MergeRequest'
AND action = #{MERGED_EVENT_ACTION}
ORDER BY id DESC
LIMIT 1 )
WHERE merge_request_id BETWEEN #{min} AND #{max}
SQL
execute(update_sql)
end
def mysql_update_metrics_with_events_data(min, max)
closed_updated_at_subquery = mysql_events_select(:updated_at, CLOSED_EVENT_ACTION)
closed_author_id_subquery = mysql_events_select(:author_id, CLOSED_EVENT_ACTION)
merged_author_id_subquery = mysql_events_select(:author_id, MERGED_EVENT_ACTION)
update_sql = <<-SQL.strip_heredoc
UPDATE merge_request_metrics
SET latest_closed_at = (#{closed_updated_at_subquery}),
latest_closed_by_id = (#{closed_author_id_subquery}),
merged_by_id = (#{merged_author_id_subquery})
WHERE merge_request_id BETWEEN #{min} AND #{max}
SQL
execute(update_sql)
end
def mysql_events_select(column, action)
<<-SQL.strip_heredoc
SELECT #{column} FROM events
WHERE target_id = merge_request_id
AND target_type = 'MergeRequest'
AND action = #{action}
ORDER BY id DESC
LIMIT 1
SQL
end
def execute(sql)
@connection ||= ActiveRecord::Base.connection
@connection.execute(sql)
end
end
end
end
# frozen_string_literal: true
require 'rails_helper'
describe Gitlab::BackgroundMigration::PopulateMergeRequestMetricsWithEventsDataImproved, :migration, schema: 20181204154019 do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:users) { table(:users) }
let(:events) { table(:events) }
let(:user) { users.create!(email: 'test@example.com', projects_limit: 100, username: 'test') }
let(:namespace) { namespaces.create(name: 'gitlab', path: 'gitlab-org') }
let(:project) { projects.create(namespace_id: namespace.id, name: 'foo') }
let(:merge_requests) { table(:merge_requests) }
def create_merge_request(id, params = {})
params.merge!(id: id,
target_project_id: project.id,
target_branch: 'master',
source_project_id: project.id,
source_branch: 'mr name',
title: "mr name#{id}")
merge_requests.create(params)
end
def create_merge_request_event(id, params = {})
params.merge!(id: id,
project_id: project.id,
author_id: user.id,
target_type: 'MergeRequest')
events.create(params)
end
describe '#perform' do
it 'creates and updates closed and merged events' do
timestamp = Time.new('2018-01-01 12:00:00').utc
create_merge_request(1)
create_merge_request_event(1, target_id: 1, action: 3, updated_at: timestamp)
create_merge_request_event(2, target_id: 1, action: 3, updated_at: timestamp + 10.seconds)
create_merge_request_event(3, target_id: 1, action: 7, updated_at: timestamp)
create_merge_request_event(4, target_id: 1, action: 7, updated_at: timestamp + 10.seconds)
subject.perform(1, 1)
merge_request = MergeRequest.first
expect(merge_request.metrics).to have_attributes(latest_closed_by_id: user.id,
latest_closed_at: timestamp + 10.seconds,
merged_by_id: user.id)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20181204154019_populate_mr_metrics_with_events_data.rb')
describe PopulateMrMetricsWithEventsData, :migration, :sidekiq do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:namespace) { namespaces.create(name: 'gitlab', path: 'gitlab-org') }
let(:project) { projects.create(namespace_id: namespace.id, name: 'foo') }
let(:merge_requests) { table(:merge_requests) }
def create_merge_request(id)
params = {
id: id,
target_project_id: project.id,
target_branch: 'master',
source_project_id: project.id,
source_branch: 'mr name',
title: "mr name#{id}"
}
merge_requests.create!(params)
end
it 'correctly schedules background migrations' do
create_merge_request(1)
create_merge_request(2)
create_merge_request(3)
stub_const("#{described_class.name}::BATCH_SIZE", 2)
Sidekiq::Testing.fake! do
Timecop.freeze do
migrate!
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(8.minutes, 1, 2)
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(16.minutes, 3, 3)
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment