Merge branch 'ab/optimize-batched-migrations' into 'master'

Change batch size depending on how far off we are See merge request gitlab-org/gitlab!60501

Merge branch 'ab/optimize-batched-migrations' into 'master'
Change batch size depending on how far off we are See merge request gitlab-org/gitlab!60501
b6f2d9b6 · Heinrich Lee Yu · dee163de · 61e8fc2c · b6f2d9b6 · b6f2d9b6
Commit b6f2d9b6 authored Apr 29, 2021 by Heinrich Lee Yu
2 changed files
--- a/lib/gitlab/database/background_migration/batch_optimizer.rb
+++ b/lib/gitlab/database/background_migration/batch_optimizer.rb
@@ -17,22 +17,26 @@ module Gitlab
      class BatchOptimizer
        # Target time efficiency for a job
        # Time efficiency is defined as: job duration / interval
-        TARGET_EFFICIENCY = (0.8..0.98).freeze
+        TARGET_EFFICIENCY = (0.9..0.95).freeze
        # Lower and upper bound for the batch size
-        ALLOWED_BATCH_SIZE = (1_000..1_000_000).freeze
+        ALLOWED_BATCH_SIZE = (1_000..2_000_000).freeze
-        # Use this batch_size multiplier to increase batch size
+        # Limit for the multiplier of the batch size
-        INCREASE_MULTIPLIER = 1.1
+        MAX_MULTIPLIER = 1.2
-        # Use this batch_size multiplier to decrease batch size
+        # When smoothing time efficiency, use this many jobs
-        DECREASE_MULTIPLIER = 0.8
+        NUMBER_OF_JOBS = 20
-        attr_reader :migration, :number_of_jobs
+        # Smoothing factor for exponential moving average
+        EMA_ALPHA = 0.4
-        def initialize(migration, number_of_jobs: 10)
+        attr_reader :migration, :number_of_jobs, :ema_alpha
+        def initialize(migration, number_of_jobs: NUMBER_OF_JOBS, ema_alpha: EMA_ALPHA)
          @migration = migration
          @number_of_jobs = number_of_jobs
+          @ema_alpha = ema_alpha
        end
        def optimize!
@@ -47,20 +51,15 @@ module Gitlab
        private
        def batch_size_multiplier
-          efficiency = migration.smoothed_time_efficiency(number_of_jobs: number_of_jobs)
+          efficiency = migration.smoothed_time_efficiency(number_of_jobs: number_of_jobs, alpha: ema_alpha)
-          return unless efficiency
+          return if efficiency.nil? || efficiency == 0
-          if TARGET_EFFICIENCY.include?(efficiency)
+          # We hit the range - no change
-            # We hit the range - no change
+          return if TARGET_EFFICIENCY.include?(efficiency)
-            nil
-          elsif efficiency > TARGET_EFFICIENCY.max
+          # Assumption: time efficiency is linear in the batch size
-            # We're above the range - decrease by 20%
+          [TARGET_EFFICIENCY.max / efficiency, MAX_MULTIPLIER].min
-            DECREASE_MULTIPLIER
-          else
-            # We're below the range - increase by 10%
-            INCREASE_MULTIPLIER
-          end
        end
      end
    end

--- a/spec/lib/gitlab/database/background_migration/batch_optimizer_spec.rb
+++ b/spec/lib/gitlab/database/background_migration/batch_optimizer_spec.rb
@@ -4,16 +4,19 @@ require 'spec_helper'
 RSpec.describe Gitlab::Database::BackgroundMigration::BatchOptimizer do
  describe '#optimize' do
-    subject { described_class.new(migration, number_of_jobs: number_of_jobs).optimize! }
+    subject { described_class.new(migration, number_of_jobs: number_of_jobs, ema_alpha: ema_alpha).optimize! }
    let(:migration) { create(:batched_background_migration, batch_size: batch_size, sub_batch_size: 100, interval: 120) }
    let(:batch_size) { 10_000 }
    let_it_be(:number_of_jobs) { 5 }
+    let_it_be(:ema_alpha) { 0.4 }
+    let_it_be(:target_efficiency) { described_class::TARGET_EFFICIENCY.max }
    def mock_efficiency(eff)
-      expect(migration).to receive(:smoothed_time_efficiency).with(number_of_jobs: number_of_jobs).and_return(eff)
+      expect(migration).to receive(:smoothed_time_efficiency).with(number_of_jobs: number_of_jobs, alpha: ema_alpha).and_return(eff)
    end
    it 'with unknown time efficiency, it keeps the batch size' do
@@ -34,25 +37,55 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchOptimizer do
      expect { subject }.not_to change { migration.reload.batch_size }
    end
-    it 'with a time efficiency of 70%, it increases the batch size by 10%' do
+    it 'with a time efficiency of 85%, it increases the batch size' do
-      mock_efficiency(0.7)
+      time_efficiency = 0.85
+      mock_efficiency(time_efficiency)
+      new_batch_size = ((target_efficiency / time_efficiency) * batch_size).to_i
+      expect { subject }.to change { migration.reload.batch_size }.from(batch_size).to(new_batch_size)
+    end
+    it 'with a time efficiency of 110%, it decreases the batch size' do
+      time_efficiency = 1.1
+      mock_efficiency(time_efficiency)
+      new_batch_size = ((target_efficiency / time_efficiency) * batch_size).to_i
-      expect { subject }.to change { migration.reload.batch_size }.from(10_000).to(11_000)
+      expect { subject }.to change { migration.reload.batch_size }.from(batch_size).to(new_batch_size)
    end
-    it 'with a time efficiency of 110%, it decreases the batch size by 20%' do
+    context 'reaching the upper limit for an increase' do
-      mock_efficiency(1.1)
+      it 'caps the batch size multiplier at 20% when increasing' do
+        time_efficiency = 0.1  # this would result in a factor of 10 if not limited
-      expect { subject }.to change { migration.reload.batch_size }.from(10_000).to(8_000)
+        mock_efficiency(time_efficiency)
+        new_batch_size = (1.2 * batch_size).to_i
+        expect { subject }.to change { migration.reload.batch_size }.from(batch_size).to(new_batch_size)
+      end
+      it 'does not limit the decrease multiplier' do
+        time_efficiency = 10
+        mock_efficiency(time_efficiency)
+        new_batch_size = (0.1 * batch_size).to_i
+        expect { subject }.to change { migration.reload.batch_size }.from(batch_size).to(new_batch_size)
+      end
    end
    context 'reaching the upper limit for the batch size' do
-      let(:batch_size) { 950_000 }
+      let(:batch_size) { 1_950_000 }
      it 'caps the batch size at 10M' do
        mock_efficiency(0.7)
-        expect { subject }.to change { migration.reload.batch_size }.to(1_000_000)
+        expect { subject }.to change { migration.reload.batch_size }.to(2_000_000)
      end
    end