Commit 7f407002 authored by Simon Tomlinson's avatar Simon Tomlinson

Ensure service discovery runs before results are used

Fixes a race condition where database queries could start before the
first round of service discovery completes. By running service discovery
once on the main thread before starting the worker thread, we can be
sure that it runs at least once before the application needs the
results.

Changelog: fixed
parent 96757b5f
...@@ -63,31 +63,36 @@ module Gitlab ...@@ -63,31 +63,36 @@ module Gitlab
end end
def start def start
# We run service discovery once in the current thread so that the application's main thread
# does not race this thread to use the results of initial service discovery.
next_sleep_duration = perform_service_discovery
Thread.new do Thread.new do
loop do loop do
interval =
begin
refresh_if_necessary
rescue StandardError => error
# Any exceptions that might occur should be reported to
# Sentry, instead of silently terminating this thread.
Gitlab::ErrorTracking.track_exception(error)
Gitlab::AppLogger.error(
"Service discovery encountered an error: #{error.message}"
)
self.interval
end
# We slightly randomize the sleep() interval. This should reduce # We slightly randomize the sleep() interval. This should reduce
# the likelihood of _all_ processes refreshing at the same time, # the likelihood of _all_ processes refreshing at the same time,
# possibly putting unnecessary pressure on the DNS server. # possibly putting unnecessary pressure on the DNS server.
sleep(interval + rand(MAX_SLEEP_ADJUSTMENT)) sleep(next_sleep_duration + rand(MAX_SLEEP_ADJUSTMENT))
next_sleep_duration = perform_service_discovery
end end
end end
end end
def perform_service_discovery
refresh_if_necessary
rescue StandardError => error
# Any exceptions that might occur should be reported to
# Sentry, instead of silently terminating this thread.
Gitlab::ErrorTracking.track_exception(error)
Gitlab::AppLogger.error(
"Service discovery encountered an error: #{error.message}"
)
interval
end
# Refreshes the hosts, but only if the DNS record returned a new list of # Refreshes the hosts, but only if the DNS record returned a new list of
# addresses. # addresses.
# #
......
...@@ -57,22 +57,21 @@ RSpec.describe Gitlab::Database::LoadBalancing::ServiceDiscovery do ...@@ -57,22 +57,21 @@ RSpec.describe Gitlab::Database::LoadBalancing::ServiceDiscovery do
.and_yield .and_yield
end end
it 'starts service discovery in a new thread' do it 'runs service discovery once before starting the worker thread' do
expect(service) expect(service).to receive(:perform_service_discovery).ordered.and_return(5)
.to receive(:refresh_if_necessary)
.and_return(5)
expect(service) expect(Thread).to receive(:new).ordered.and_call_original # Thread starts
.to receive(:rand)
.and_return(2)
expect(service) expect(service).to receive(:rand).ordered.and_return(2)
.to receive(:sleep) expect(service).to receive(:sleep).ordered.with(7) # Sleep runs after thread starts
.with(7)
expect(service).to receive(:perform_service_discovery).ordered.and_return(1)
service.start.join service.start.join
end end
end
describe '#perform_service_discovery' do
it 'reports exceptions to Sentry' do it 'reports exceptions to Sentry' do
error = StandardError.new error = StandardError.new
...@@ -84,15 +83,7 @@ RSpec.describe Gitlab::Database::LoadBalancing::ServiceDiscovery do ...@@ -84,15 +83,7 @@ RSpec.describe Gitlab::Database::LoadBalancing::ServiceDiscovery do
.to receive(:track_exception) .to receive(:track_exception)
.with(error) .with(error)
expect(service) service.perform_service_discovery
.to receive(:rand)
.and_return(2)
expect(service)
.to receive(:sleep)
.with(62)
service.start.join
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment