Commit e91d515d authored by Heinrich Lee Yu's avatar Heinrich Lee Yu

Check WAL locations before sleeping

This saves us from the unnecessary sleep if the replica is already up to
date.

Based on previous tests, 50% of the time, the replica is already up to
date so we don't need the sleep.

This also reduces the sleep time because we did not see any stale
replica errors with the previous value. This suggests that the delay may
be too high.
parent 1d68034f
...@@ -6,7 +6,7 @@ module Gitlab ...@@ -6,7 +6,7 @@ module Gitlab
class SidekiqServerMiddleware class SidekiqServerMiddleware
JobReplicaNotUpToDate = Class.new(StandardError) JobReplicaNotUpToDate = Class.new(StandardError)
MINIMUM_DELAY_INTERVAL = 1 MINIMUM_DELAY_INTERVAL_SECONDS = 0.8
def call(worker, job, _queue) def call(worker, job, _queue)
worker_class = worker.class worker_class = worker.class
...@@ -46,11 +46,13 @@ module Gitlab ...@@ -46,11 +46,13 @@ module Gitlab
return :primary_no_wal if wal_locations.blank? return :primary_no_wal if wal_locations.blank?
# Happy case: we can read from a replica.
return replica_strategy(worker_class, job) if databases_in_sync?(wal_locations)
sleep_if_needed(job) sleep_if_needed(job)
if databases_in_sync?(wal_locations) if databases_in_sync?(wal_locations)
# Happy case: we can read from a replica. replica_strategy(worker_class, job)
retried_before?(worker_class, job) ? :replica_retried : :replica
elsif can_retry?(worker_class, job) elsif can_retry?(worker_class, job)
# Optimistic case: The worker allows retries and we have retries left. # Optimistic case: The worker allows retries and we have retries left.
:retry :retry
...@@ -61,9 +63,9 @@ module Gitlab ...@@ -61,9 +63,9 @@ module Gitlab
end end
def sleep_if_needed(job) def sleep_if_needed(job)
remaining_delay = MINIMUM_DELAY_INTERVAL - (Time.current.to_f - job['created_at'].to_f) remaining_delay = MINIMUM_DELAY_INTERVAL_SECONDS - (Time.current.to_f - job['created_at'].to_f)
sleep remaining_delay if remaining_delay > 0 && remaining_delay < MINIMUM_DELAY_INTERVAL sleep remaining_delay if remaining_delay > 0 && remaining_delay < MINIMUM_DELAY_INTERVAL_SECONDS
end end
def get_wal_locations(job) def get_wal_locations(job)
...@@ -80,6 +82,10 @@ module Gitlab ...@@ -80,6 +82,10 @@ module Gitlab
worker_class.get_data_consistency == :delayed && not_yet_retried?(job) worker_class.get_data_consistency == :delayed && not_yet_retried?(job)
end end
def replica_strategy(worker_class, job)
retried_before?(worker_class, job) ? :replica_retried : :replica
end
def retried_before?(worker_class, job) def retried_before?(worker_class, job)
worker_class.get_data_consistency == :delayed && !not_yet_retried?(job) worker_class.get_data_consistency == :delayed && !not_yet_retried?(job)
end end
......
...@@ -122,7 +122,7 @@ RSpec.describe Gitlab::Database::LoadBalancing::SidekiqServerMiddleware, :clean_ ...@@ -122,7 +122,7 @@ RSpec.describe Gitlab::Database::LoadBalancing::SidekiqServerMiddleware, :clean_
shared_examples_for 'sleeps when necessary' do shared_examples_for 'sleeps when necessary' do
context 'when WAL locations are blank', :freeze_time do context 'when WAL locations are blank', :freeze_time do
let(:job) { { "retry" => 3, "job_id" => "a180b47c-3fd6-41b8-81e9-34da61c3400e", "wal_locations" => {}, "created_at" => Time.current.to_f - (described_class::MINIMUM_DELAY_INTERVAL - 0.3) } } let(:job) { { "retry" => 3, "job_id" => "a180b47c-3fd6-41b8-81e9-34da61c3400e", "wal_locations" => {}, "created_at" => Time.current.to_f - (described_class::MINIMUM_DELAY_INTERVAL_SECONDS - 0.3) } }
it 'does not sleep' do it 'does not sleep' do
expect(middleware).not_to receive(:sleep) expect(middleware).not_to receive(:sleep)
...@@ -135,17 +135,39 @@ RSpec.describe Gitlab::Database::LoadBalancing::SidekiqServerMiddleware, :clean_ ...@@ -135,17 +135,39 @@ RSpec.describe Gitlab::Database::LoadBalancing::SidekiqServerMiddleware, :clean_
let(:job) { { "retry" => 3, "job_id" => "a180b47c-3fd6-41b8-81e9-34da61c3400e", 'wal_locations' => wal_locations, "created_at" => Time.current.to_f - elapsed_time } } let(:job) { { "retry" => 3, "job_id" => "a180b47c-3fd6-41b8-81e9-34da61c3400e", 'wal_locations' => wal_locations, "created_at" => Time.current.to_f - elapsed_time } }
context 'when delay interval has not elapsed' do context 'when delay interval has not elapsed' do
let(:elapsed_time) { described_class::MINIMUM_DELAY_INTERVAL - 0.3 } let(:elapsed_time) { described_class::MINIMUM_DELAY_INTERVAL_SECONDS - 0.3 }
it 'sleeps until the minimum delay is reached' do context 'when replica is up to date' do
expect(middleware).to receive(:sleep).with(be_within(0.01).of(described_class::MINIMUM_DELAY_INTERVAL - elapsed_time)) before do
Gitlab::Database::LoadBalancing.each_load_balancer do |lb|
allow(lb).to receive(:select_up_to_date_host).and_return(true)
end
end
run_middleware it 'does not sleep' do
expect(middleware).not_to receive(:sleep)
run_middleware
end
end
context 'when replica is not up to date' do
before do
Gitlab::Database::LoadBalancing.each_load_balancer do |lb|
allow(lb).to receive(:select_up_to_date_host).and_return(false, true)
end
end
it 'sleeps until the minimum delay is reached' do
expect(middleware).to receive(:sleep).with(be_within(0.01).of(described_class::MINIMUM_DELAY_INTERVAL_SECONDS - elapsed_time))
run_middleware
end
end end
end end
context 'when delay interval has elapsed' do context 'when delay interval has elapsed' do
let(:elapsed_time) { described_class::MINIMUM_DELAY_INTERVAL + 0.3 } let(:elapsed_time) { described_class::MINIMUM_DELAY_INTERVAL_SECONDS + 0.3 }
it 'does not sleep' do it 'does not sleep' do
expect(middleware).not_to receive(:sleep) expect(middleware).not_to receive(:sleep)
...@@ -179,7 +201,7 @@ RSpec.describe Gitlab::Database::LoadBalancing::SidekiqServerMiddleware, :clean_ ...@@ -179,7 +201,7 @@ RSpec.describe Gitlab::Database::LoadBalancing::SidekiqServerMiddleware, :clean_
context 'when delay interval has not elapsed', :freeze_time do context 'when delay interval has not elapsed', :freeze_time do
let(:job) { { "retry" => 3, "job_id" => "a180b47c-3fd6-41b8-81e9-34da61c3400e", 'wal_locations' => wal_locations, "created_at" => Time.current.to_f - elapsed_time } } let(:job) { { "retry" => 3, "job_id" => "a180b47c-3fd6-41b8-81e9-34da61c3400e", 'wal_locations' => wal_locations, "created_at" => Time.current.to_f - elapsed_time } }
let(:elapsed_time) { described_class::MINIMUM_DELAY_INTERVAL - 0.3 } let(:elapsed_time) { described_class::MINIMUM_DELAY_INTERVAL_SECONDS - 0.3 }
it 'does not sleep' do it 'does not sleep' do
expect(middleware).not_to receive(:sleep) expect(middleware).not_to receive(:sleep)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment