Commit 25befa50 authored by Alexandru Croitor's avatar Alexandru Croitor Committed by Adam Hegyi

Issue rebalancing with retries

Add a retry updates mechanism as well as lock with retries.
parent e5e9d5d3
......@@ -3,8 +3,18 @@
class IssueRebalancingService
MAX_ISSUE_COUNT = 10_000
BATCH_SIZE = 100
SMALLEST_BATCH_SIZE = 5
RETRIES_LIMIT = 3
TooManyIssues = Class.new(StandardError)
TIMING_CONFIGURATION = [
[0.1.seconds, 0.05.seconds], # short timings, lock_timeout: 100ms, sleep after LockWaitTimeout: 50ms
[0.5.seconds, 0.05.seconds],
[1.second, 0.5.seconds],
[1.second, 0.5.seconds],
[5.seconds, 1.second]
].freeze
def initialize(issue)
@issue = issue
@base = Issue.relative_positioning_query_base(issue)
......@@ -23,18 +33,27 @@ class IssueRebalancingService
assign_positions(start, indexed_ids)
.sort_by(&:first)
.each_slice(BATCH_SIZE) do |pairs_with_position|
if Feature.enabled?(:issue_rebalancing_with_retry)
update_positions_with_retry(pairs_with_position, 'rebalance issue positions in batches ordered by id')
else
update_positions(pairs_with_position, 'rebalance issue positions in batches ordered by id')
end
end
end
else
Issue.transaction do
indexed_ids.each_slice(BATCH_SIZE) do |pairs|
pairs_with_position = assign_positions(start, pairs)
if Feature.enabled?(:issue_rebalancing_with_retry)
update_positions_with_retry(pairs_with_position, 'rebalance issue positions')
else
update_positions(pairs_with_position, 'rebalance issue positions')
end
end
end
end
end
private
......@@ -52,13 +71,38 @@ class IssueRebalancingService
end
end
def update_positions_with_retry(pairs_with_position, query_name)
retries = 0
batch_size = pairs_with_position.size
until pairs_with_position.empty?
begin
update_positions(pairs_with_position.first(batch_size), query_name)
pairs_with_position = pairs_with_position.drop(batch_size)
retries = 0
rescue ActiveRecord::StatementTimeout, ActiveRecord::QueryCanceled => ex
raise ex if batch_size < SMALLEST_BATCH_SIZE
if (retries += 1) == RETRIES_LIMIT
# shrink the batch size in half when RETRIES limit is reached and update still fails perhaps because batch size is still too big
batch_size = (batch_size / 2).to_i
retries = 0
end
retry
end
end
end
def update_positions(pairs_with_position, query_name)
values = pairs_with_position.map do |id, index|
"(#{id}, #{index})"
end.join(', ')
Gitlab::Database::WithLockRetries.new(timing_configuration: TIMING_CONFIGURATION, klass: self.class).run do
run_update_query(values, query_name)
end
end
def run_update_query(values, query_name)
Issue.connection.exec_query(<<~SQL, query_name)
......
---
name: issue_rebalancing_with_retry
introduced_by_url:
rollout_issue_url:
milestone: '13.11'
type: development
group: group::project management
default_enabled: false
......@@ -3,31 +3,35 @@
require 'spec_helper'
RSpec.describe IssueRebalancingService do
let_it_be(:project) { create(:project) }
let_it_be(:project, reload: true) { create(:project) }
let_it_be(:user) { project.creator }
let_it_be(:start) { RelativePositioning::START_POSITION }
let_it_be(:max_pos) { RelativePositioning::MAX_POSITION }
let_it_be(:min_pos) { RelativePositioning::MIN_POSITION }
let_it_be(:clump_size) { 300 }
let_it_be(:unclumped) do
(0..clump_size).to_a.map do |i|
let_it_be(:unclumped, reload: true) do
(1..clump_size).to_a.map do |i|
create(:issue, project: project, author: user, relative_position: start + (1024 * i))
end
end
let_it_be(:end_clump) do
(0..clump_size).to_a.map do |i|
let_it_be(:end_clump, reload: true) do
(1..clump_size).to_a.map do |i|
create(:issue, project: project, author: user, relative_position: max_pos - i)
end
end
let_it_be(:start_clump) do
(0..clump_size).to_a.map do |i|
let_it_be(:start_clump, reload: true) do
(1..clump_size).to_a.map do |i|
create(:issue, project: project, author: user, relative_position: min_pos + i)
end
end
before do
stub_feature_flags(issue_rebalancing_with_retry: false)
end
def issues_in_position_order
project.reload.issues.reorder(relative_position: :asc).to_a
end
......@@ -101,19 +105,70 @@ RSpec.describe IssueRebalancingService do
end
end
shared_examples 'rebalancing is retried on statement timeout exceptions' do
subject { described_class.new(project.issues.first) }
it 'retries update statement' do
call_count = 0
allow(subject).to receive(:run_update_query) do
call_count += 1
if call_count < 13
raise(ActiveRecord::QueryCanceled)
else
call_count = 0 if call_count == 13 + 16 # 16 = 17 sub-batches - 1 call that succeeded as part of 5th batch
true
end
end
# call math:
# batches start at 100 and are split in half after every 3 retries if ActiveRecord::StatementTimeout exception is raised.
# We raise ActiveRecord::StatementTimeout exception for 13 calls:
# 1. 100 => 3 calls
# 2. 100/2=50 => 3 calls + 3 above = 6 calls, raise ActiveRecord::StatementTimeout
# 3. 50/2=25 => 3 calls + 6 above = 9 calls, raise ActiveRecord::StatementTimeout
# 4. 25/2=12 => 3 calls + 9 above = 12 calls, raise ActiveRecord::StatementTimeout
# 5. 12/2=6 => 1 call + 12 above = 13 calls, run successfully
#
# so out of 100 elements we created batches of 6 items => 100/6 = 17 sub-batches of 6 or less elements
#
# project.issues.count: 900 issues, so 9 batches of 100 => 9 * (13+16) = 261
expect(subject).to receive(:update_positions).exactly(261).times.and_call_original
subject.execute
end
end
context 'when issue_rebalancing_optimization feature flag is on' do
before do
stub_feature_flags(issue_rebalancing_optimization: true)
end
it_behaves_like 'IssueRebalancingService shared examples'
context 'when issue_rebalancing_with_retry feature flag is on' do
before do
stub_feature_flags(issue_rebalancing_with_retry: true)
end
it_behaves_like 'IssueRebalancingService shared examples'
it_behaves_like 'rebalancing is retried on statement timeout exceptions'
end
end
context 'when issue_rebalancing_optimization feature flag is on' do
context 'when issue_rebalancing_optimization feature flag is off' do
before do
stub_feature_flags(issue_rebalancing_optimization: false)
end
it_behaves_like 'IssueRebalancingService shared examples'
context 'when issue_rebalancing_with_retry feature flag is on' do
before do
stub_feature_flags(issue_rebalancing_with_retry: true)
end
it_behaves_like 'IssueRebalancingService shared examples'
it_behaves_like 'rebalancing is retried on statement timeout exceptions'
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment