Commit ac7315f1 authored by Stan Hu's avatar Stan Hu

Add metrics for tracking stale secondaries for merge requests

To fix the merge requests failing to match due to replication lag
(https://gitlab.com/gitlab-org/gitlab/-/issues/247857) we added a
mechanism to track the WAL pointer of the merge request to decide
whether to use the secondary or fall back to the primary. This commit
adds metrics to better understand how often this fallback is occurring.
parent 545f50f8
---
title: Add metrics for tracking stale secondaries for merge requests
merge_request: 44813
author:
type: other
......@@ -6,6 +6,9 @@ module EE
module MatchingMergeRequest
extend ::Gitlab::Utils::Override
TOTAL_METRIC = :gitlab_merge_request_match_total
STALE_METRIC = :gitlab_merge_request_match_stale_secondary
override :match?
def match?
return super unless ::Feature.enabled?(:matching_merge_request_db_sync)
......@@ -24,9 +27,44 @@ module EE
# sessions, replication lag could erroneously cause step 5 to
# report no matching merge requests. To avoid this, we check
# the write location to ensure the replica can make this query.
::Gitlab::Database::LoadBalancing::Sticking.unstick_or_continue_sticking(:project, @project.id) # rubocop:disable Gitlab/ModuleWithInstanceVariables
track_session_metrics do
::Gitlab::Database::LoadBalancing::Sticking.unstick_or_continue_sticking(:project, @project.id) # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
super
end
private
def track_session_metrics
before = ::Gitlab::Database::LoadBalancing::Session.current.use_primary?
yield
after = ::Gitlab::Database::LoadBalancing::Session.current.use_primary?
increment_attempt_count
if !before && after
increment_stale_secondary_count
end
end
def increment_attempt_count
total_counter.increment
end
def increment_stale_secondary_count
stale_counter.increment
end
def total_counter
@total_counter ||= ::Gitlab::Metrics.counter(TOTAL_METRIC, 'Total number of merge request match attempts')
end
def stale_counter
@stale_counter ||= ::Gitlab::Metrics.counter(STALE_METRIC, 'Total number of merge request match attempts with lagging secondary')
end
end
end
end
......
......@@ -15,46 +15,90 @@ RSpec.describe Gitlab::Checks::MatchingMergeRequest do
target_branch: target_branch,
in_progress_merge_commit_sha: newrev)
end
let(:total_counter) { subject.send(:total_counter) }
let(:stale_counter) { subject.send(:stale_counter) }
subject { described_class.new(newrev, target_branch, project) }
context 'with load balancing disabled', :request_store, :redis do
before do
expect(::Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(false)
expect(::Gitlab::Database::LoadBalancing::Sticking).not_to receive(:unstick_or_continue_sticking)
end
it 'does not attempt to stick to primary' do
expect(::Gitlab::Database::LoadBalancing::Sticking).not_to receive(:unstick_or_continue_sticking)
expect(subject.match?).to be true
end
it 'increments no counters' do
expect { subject.match? }
.to change { total_counter.get }.by(0)
.and change { stale_counter.get }.by(0)
end
end
context 'with load balancing enabled', :request_store, :redis do
before do
allow(::Gitlab::Database::LoadBalancing::Sticking).to receive(:all_caught_up?).and_return(false)
end
let(:session) { ::Gitlab::Database::LoadBalancing::Session.current }
context 'with feature flag disabled' do
context 'on secondary that has caught up to primary' do
before do
stub_feature_flags(matching_merge_request_db_sync: false)
allow(::Gitlab::Database::LoadBalancing::Sticking).to receive(:all_caught_up?).and_return(true)
expect(::Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(true)
expect(::Gitlab::Database::LoadBalancing::Sticking).to receive(:unstick_or_continue_sticking).and_call_original
end
it 'does not check load balancing state' do
expect(::Gitlab::Database::LoadBalancing).not_to receive(:enable?)
expect(::Gitlab::Database::LoadBalancing::Sticking).not_to receive(:unstick_or_continue_sticking)
it 'continues to use the secondary' do
expect(session.use_primary?).to be false
expect(subject.match?).to be true
end
it 'only increments total counter' do
expect { subject.match? }
.to change { total_counter.get }.by(1)
.and change { stale_counter.get }.by(0)
end
end
it 'sticks to the primary' do
session = ::Gitlab::Database::LoadBalancing::Session.current
expect(::Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(true)
expect(::Gitlab::Database::LoadBalancing::Sticking).to receive(:unstick_or_continue_sticking).and_call_original
context 'on secondary behind primary' do
before do
allow(::Gitlab::Database::LoadBalancing::Sticking).to receive(:all_caught_up?).and_return(false)
end
expect(subject.match?).to be true
expect(session.use_primary?).to be true
context 'with feature flag disabled' do
before do
stub_feature_flags(matching_merge_request_db_sync: false)
expect(::Gitlab::Database::LoadBalancing).not_to receive(:enable?)
expect(::Gitlab::Database::LoadBalancing::Sticking).not_to receive(:unstick_or_continue_sticking)
end
it 'does not check load balancing state' do
expect(subject.match?).to be true
end
it 'increments no counters' do
expect { subject.match? }
.to change { total_counter.get }.by(0)
.and change { stale_counter.get }.by(0)
end
end
context 'load balancing enabled' do
before do
expect(::Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(true)
expect(::Gitlab::Database::LoadBalancing::Sticking).to receive(:unstick_or_continue_sticking).and_call_original
end
it 'sticks to the primary' do
expect(subject.match?).to be true
expect(session.use_primary?).to be true
end
it 'increments both total and stale counters' do
expect { subject.match? }
.to change { total_counter.get }.by(1)
.and change { stale_counter.get }.by(1)
end
end
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment