Commit b6f2d9b6 authored by Heinrich Lee Yu's avatar Heinrich Lee Yu

Merge branch 'ab/optimize-batched-migrations' into 'master'

Change batch size depending on how far off we are

See merge request gitlab-org/gitlab!60501
parents dee163de 61e8fc2c
...@@ -17,22 +17,26 @@ module Gitlab ...@@ -17,22 +17,26 @@ module Gitlab
class BatchOptimizer class BatchOptimizer
# Target time efficiency for a job # Target time efficiency for a job
# Time efficiency is defined as: job duration / interval # Time efficiency is defined as: job duration / interval
TARGET_EFFICIENCY = (0.8..0.98).freeze TARGET_EFFICIENCY = (0.9..0.95).freeze
# Lower and upper bound for the batch size # Lower and upper bound for the batch size
ALLOWED_BATCH_SIZE = (1_000..1_000_000).freeze ALLOWED_BATCH_SIZE = (1_000..2_000_000).freeze
# Use this batch_size multiplier to increase batch size # Limit for the multiplier of the batch size
INCREASE_MULTIPLIER = 1.1 MAX_MULTIPLIER = 1.2
# Use this batch_size multiplier to decrease batch size # When smoothing time efficiency, use this many jobs
DECREASE_MULTIPLIER = 0.8 NUMBER_OF_JOBS = 20
attr_reader :migration, :number_of_jobs # Smoothing factor for exponential moving average
EMA_ALPHA = 0.4
def initialize(migration, number_of_jobs: 10) attr_reader :migration, :number_of_jobs, :ema_alpha
def initialize(migration, number_of_jobs: NUMBER_OF_JOBS, ema_alpha: EMA_ALPHA)
@migration = migration @migration = migration
@number_of_jobs = number_of_jobs @number_of_jobs = number_of_jobs
@ema_alpha = ema_alpha
end end
def optimize! def optimize!
...@@ -47,20 +51,15 @@ module Gitlab ...@@ -47,20 +51,15 @@ module Gitlab
private private
def batch_size_multiplier def batch_size_multiplier
efficiency = migration.smoothed_time_efficiency(number_of_jobs: number_of_jobs) efficiency = migration.smoothed_time_efficiency(number_of_jobs: number_of_jobs, alpha: ema_alpha)
return unless efficiency return if efficiency.nil? || efficiency == 0
if TARGET_EFFICIENCY.include?(efficiency) # We hit the range - no change
# We hit the range - no change return if TARGET_EFFICIENCY.include?(efficiency)
nil
elsif efficiency > TARGET_EFFICIENCY.max # Assumption: time efficiency is linear in the batch size
# We're above the range - decrease by 20% [TARGET_EFFICIENCY.max / efficiency, MAX_MULTIPLIER].min
DECREASE_MULTIPLIER
else
# We're below the range - increase by 10%
INCREASE_MULTIPLIER
end
end end
end end
end end
......
...@@ -4,16 +4,19 @@ require 'spec_helper' ...@@ -4,16 +4,19 @@ require 'spec_helper'
RSpec.describe Gitlab::Database::BackgroundMigration::BatchOptimizer do RSpec.describe Gitlab::Database::BackgroundMigration::BatchOptimizer do
describe '#optimize' do describe '#optimize' do
subject { described_class.new(migration, number_of_jobs: number_of_jobs).optimize! } subject { described_class.new(migration, number_of_jobs: number_of_jobs, ema_alpha: ema_alpha).optimize! }
let(:migration) { create(:batched_background_migration, batch_size: batch_size, sub_batch_size: 100, interval: 120) } let(:migration) { create(:batched_background_migration, batch_size: batch_size, sub_batch_size: 100, interval: 120) }
let(:batch_size) { 10_000 } let(:batch_size) { 10_000 }
let_it_be(:number_of_jobs) { 5 } let_it_be(:number_of_jobs) { 5 }
let_it_be(:ema_alpha) { 0.4 }
let_it_be(:target_efficiency) { described_class::TARGET_EFFICIENCY.max }
def mock_efficiency(eff) def mock_efficiency(eff)
expect(migration).to receive(:smoothed_time_efficiency).with(number_of_jobs: number_of_jobs).and_return(eff) expect(migration).to receive(:smoothed_time_efficiency).with(number_of_jobs: number_of_jobs, alpha: ema_alpha).and_return(eff)
end end
it 'with unknown time efficiency, it keeps the batch size' do it 'with unknown time efficiency, it keeps the batch size' do
...@@ -34,25 +37,55 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchOptimizer do ...@@ -34,25 +37,55 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchOptimizer do
expect { subject }.not_to change { migration.reload.batch_size } expect { subject }.not_to change { migration.reload.batch_size }
end end
it 'with a time efficiency of 70%, it increases the batch size by 10%' do it 'with a time efficiency of 85%, it increases the batch size' do
mock_efficiency(0.7) time_efficiency = 0.85
mock_efficiency(time_efficiency)
new_batch_size = ((target_efficiency / time_efficiency) * batch_size).to_i
expect { subject }.to change { migration.reload.batch_size }.from(batch_size).to(new_batch_size)
end
it 'with a time efficiency of 110%, it decreases the batch size' do
time_efficiency = 1.1
mock_efficiency(time_efficiency)
new_batch_size = ((target_efficiency / time_efficiency) * batch_size).to_i
expect { subject }.to change { migration.reload.batch_size }.from(10_000).to(11_000) expect { subject }.to change { migration.reload.batch_size }.from(batch_size).to(new_batch_size)
end end
it 'with a time efficiency of 110%, it decreases the batch size by 20%' do context 'reaching the upper limit for an increase' do
mock_efficiency(1.1) it 'caps the batch size multiplier at 20% when increasing' do
time_efficiency = 0.1 # this would result in a factor of 10 if not limited
expect { subject }.to change { migration.reload.batch_size }.from(10_000).to(8_000) mock_efficiency(time_efficiency)
new_batch_size = (1.2 * batch_size).to_i
expect { subject }.to change { migration.reload.batch_size }.from(batch_size).to(new_batch_size)
end
it 'does not limit the decrease multiplier' do
time_efficiency = 10
mock_efficiency(time_efficiency)
new_batch_size = (0.1 * batch_size).to_i
expect { subject }.to change { migration.reload.batch_size }.from(batch_size).to(new_batch_size)
end
end end
context 'reaching the upper limit for the batch size' do context 'reaching the upper limit for the batch size' do
let(:batch_size) { 950_000 } let(:batch_size) { 1_950_000 }
it 'caps the batch size at 10M' do it 'caps the batch size at 10M' do
mock_efficiency(0.7) mock_efficiency(0.7)
expect { subject }.to change { migration.reload.batch_size }.to(1_000_000) expect { subject }.to change { migration.reload.batch_size }.to(2_000_000)
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment