Commit e66535e8 authored by Oswaldo Ferreira's avatar Oswaldo Ferreira

Create a diff deletion worker scheduler to avoid long-running post-migration

parent 80a7be87
......@@ -11,8 +11,7 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
DOWNTIME = false
BATCH_SIZE = 1000
MIGRATION = 'DeleteDiffFiles'
DELAY_INTERVAL = 10.minutes
SCHEDULER = 'ScheduleDiffFilesDeletion'.freeze
TMP_INDEX = 'tmp_partial_diff_id_with_files_index'.freeze
disable_ddl_transaction!
......@@ -38,23 +37,10 @@ class EnqueueDeleteDiffFilesWorkers < ActiveRecord::Migration
# Planning time: 0.752 ms
# Execution time: 12.430 ms
#
diffs_with_files.each_batch(of: BATCH_SIZE) do |relation, outer_index|
# We slice the batches in groups of 5 and schedule each group of 5 at
# once. This should make writings on Redis go 5x faster.
job_batches = relation.pluck(:id).in_groups_of(5, false).map do |ids|
ids.map { |id| [MIGRATION, [id]] }
end
job_batches.each_with_index do |jobs, inner_index|
# This will give some space between batches of workers.
interval = DELAY_INTERVAL * outer_index + inner_index.minutes
diffs_with_files.each_batch(of: BATCH_SIZE) do |relation, scheduler_index|
ids = relation.pluck(:id).map { |id| [id] }
# A single `merge_request_diff` can be associated with way too many
# `merge_request_diff_files`. It's better to avoid scheduling big
# batches and go with 5 at a time.
#
BackgroundMigrationWorker.bulk_perform_in(interval, jobs)
end
BackgroundMigrationWorker.perform_async(SCHEDULER, [ids, scheduler_index])
end
# We remove temporary index, because it is not required during standard
......
# frozen_string_literal: true
# rubocop:disable Metrics/AbcSize
# rubocop:disable Style/Documentation
module Gitlab
module BackgroundMigration
class ScheduleDiffFilesDeletion
BATCH_SIZE = 5
MIGRATION = 'DeleteDiffFiles'
DELAY_INTERVAL = 10.minutes
def perform(diff_ids, scheduler_index)
relation = MergeRequestDiff.where(id: diff_ids)
job_batches = relation.pluck(:id).in_groups_of(BATCH_SIZE, false).map do |ids|
ids.map { |id| [MIGRATION, [id]] }
end
job_batches.each_with_index do |jobs, inner_index|
# This will give some space between batches of workers.
interval = DELAY_INTERVAL * scheduler_index + inner_index.minutes
# A single `merge_request_diff` can be associated with way too many
# `merge_request_diff_files`. It's better to avoid scheduling big
# batches and go with 5 at a time.
#
BackgroundMigrationWorker.bulk_perform_in(interval, jobs)
end
end
end
end
end
require 'spec_helper'
describe Gitlab::BackgroundMigration::ScheduleDiffFilesDeletion, :migration, schema: 20180619121030 do
describe '#perform' do
let(:merge_request_diffs) { table(:merge_request_diffs) }
let(:merge_requests) { table(:merge_requests) }
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
before do
stub_const("#{described_class.name}::BATCH_SIZE", 3)
namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab')
projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab')
merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master', state: 'merged')
merge_request_diffs.create!(id: 1, merge_request_id: 1)
merge_request_diffs.create!(id: 2, merge_request_id: 1)
merge_request_diffs.create!(id: 3, merge_request_id: 1)
merge_request_diffs.create!(id: 4, merge_request_id: 1)
merge_request_diffs.create!(id: 5, merge_request_id: 1)
end
it 'correctly schedules diff file deletion workers' do
Sidekiq::Testing.fake! do
Timecop.freeze do
described_class.new.perform([1, 2, 3, 4, 5], 1)
[1, 2, 3].each do |id|
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(10.minutes, id)
end
[4, 5].each do |id|
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(11.minutes, id)
end
expect(BackgroundMigrationWorker.jobs.size).to eq(5)
end
end
end
end
end
......@@ -8,7 +8,7 @@ describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do
let(:projects) { table(:projects) }
before do
stub_const("#{described_class.name}::BATCH_SIZE", 7)
stub_const("#{described_class.name}::BATCH_SIZE", 4)
namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab')
projects.create!(id: 1, namespace_id: 1, name: 'gitlab', path: 'gitlab')
......@@ -22,35 +22,25 @@ describe EnqueueDeleteDiffFilesWorkers, :migration, :sidekiq do
merge_request_diffs.create!(id: 5, merge_request_id: 1, state: 'empty')
merge_request_diffs.create!(id: 6, merge_request_id: 1, state: 'collected')
merge_request_diffs.create!(id: 7, merge_request_id: 1, state: 'collected')
merge_request_diffs.create!(id: 8, merge_request_id: 1, state: 'collected')
merge_request_diffs.create!(id: 9, merge_request_id: 1, state: 'collected')
merge_request_diffs.create!(id: 10, merge_request_id: 1, state: 'collected')
merge_requests.update(1, latest_merge_request_diff_id: 6)
end
it 'correctly schedules diff file deletion workers' do
it 'correctly schedules diff file deletion workers schedulers' do
Sidekiq::Testing.fake! do
Timecop.freeze do
migrate!
# First scheduling batch
expect(BackgroundMigrationWorker).to receive(:perform_async)
.with(described_class::SCHEDULER, [[[1], [3], [4], [6]], 1])
.and_call_original
# 1st batch schedule
[1, 3, 4, 6, 7].each do |id|
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(10.minutes, id)
end
[8, 9].each do |id|
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(11.minutes, id)
end
# Second scheduling batch
expect(BackgroundMigrationWorker).to receive(:perform_async)
.with(described_class::SCHEDULER, [[[7]], 2])
.and_call_original
# 2nd batch schedule
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(20.minutes, 10)
expect(BackgroundMigrationWorker.jobs.size).to eq(8)
end
end
end
migrate!
it 'migrates the data' do
expect { migrate! }.to change { merge_request_diffs.where(state: 'without_files').count }
.from(1).to(4)
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment