Commit e5c5dff0 authored by Yorick Peterse's avatar Yorick Peterse

Prepare the DB LB for always being enabled

This commits prepares the database load balancing code for always being
enabled, even when no replicas are configured.

To achieve this, the load balancer now supports the "primary_only"
option. When enabled, the list of replicas is ignored (which is empty
anyway), and a fake replica is used instead. This fake replica simply
reuses the primary connection of the load balancer it belongs to. We
chose this approach as there's simply too much code that assumes at
least one replica is present when load balancing is enabled.

In addition, we make various changes to ConnectionProxy to allow us to
always enable the use of this class. As an example, some tests use code
like this:

    ActiveRecord::Schema.define do
      create_table :foo do
        ...
      end
    end

Internally Rails ends up performing some `respond_to?` checks to expose
certain methods (or not). Without defining a custom
`respond_to_missing?`, these checks return `false` and thus the method
isn't available.

We've also had to change a few more methods in the ConnectionProxy
object to handle forwarding trailing hashes. In the previous setup
certain tests would still trigger deprecation warnings. This can be
avoided by using `...` everywhere, and getting rid of the `sticky:`
keyword argument.

Changelog: changed
parent 443d1219
...@@ -108,13 +108,14 @@ module Gitlab ...@@ -108,13 +108,14 @@ module Gitlab
end end
# Configures proxying of requests. # Configures proxying of requests.
def self.configure_proxy(proxy = ConnectionProxy.new(hosts)) def self.configure_proxy
ActiveRecord::Base.load_balancing_proxy = proxy lb = LoadBalancer.new(hosts, primary_only: !enable?)
ActiveRecord::Base.load_balancing_proxy = ConnectionProxy.new(lb)
# Populate service discovery immediately if it is configured # Populate service discovery immediately if it is configured
if service_discovery_enabled? if service_discovery_enabled?
ServiceDiscovery ServiceDiscovery
.new(proxy.load_balancer, **service_discovery_configuration) .new(lb, **service_discovery_configuration)
.perform_service_discovery .perform_service_discovery
end end
end end
......
...@@ -34,15 +34,15 @@ module Gitlab ...@@ -34,15 +34,15 @@ module Gitlab
).freeze ).freeze
# hosts - The hosts to use for load balancing. # hosts - The hosts to use for load balancing.
def initialize(hosts = []) def initialize(load_balancer)
@load_balancer = LoadBalancer.new(hosts) @load_balancer = load_balancer
end end
def select_all(arel, name = nil, binds = [], preparable: nil) def select_all(arel, name = nil, binds = [], preparable: nil)
if arel.respond_to?(:locked) && arel.locked if arel.respond_to?(:locked) && arel.locked
# SELECT ... FOR UPDATE queries should be sent to the primary. # SELECT ... FOR UPDATE queries should be sent to the primary.
write_using_load_balancer(:select_all, arel, name, binds, current_session.write!
sticky: true) write_using_load_balancer(:select_all, arel, name, binds)
else else
read_using_load_balancer(:select_all, arel, name, binds) read_using_load_balancer(:select_all, arel, name, binds)
end end
...@@ -56,7 +56,8 @@ module Gitlab ...@@ -56,7 +56,8 @@ module Gitlab
STICKY_WRITES.each do |name| STICKY_WRITES.each do |name|
define_method(name) do |*args, **kwargs, &block| define_method(name) do |*args, **kwargs, &block|
write_using_load_balancer(name, *args, sticky: true, **kwargs, &block) current_session.write!
write_using_load_balancer(name, *args, **kwargs, &block)
end end
end end
...@@ -65,13 +66,20 @@ module Gitlab ...@@ -65,13 +66,20 @@ module Gitlab
track_read_only_transaction! track_read_only_transaction!
read_using_load_balancer(:transaction, *args, **kwargs, &block) read_using_load_balancer(:transaction, *args, **kwargs, &block)
else else
write_using_load_balancer(:transaction, *args, sticky: true, **kwargs, &block) current_session.write!
write_using_load_balancer(:transaction, *args, **kwargs, &block)
end end
ensure ensure
untrack_read_only_transaction! untrack_read_only_transaction!
end end
def respond_to_missing?(name, include_private = false)
@load_balancer.read_write do |connection|
connection.respond_to?(name, include_private)
end
end
# Delegates all unknown messages to a read-write connection. # Delegates all unknown messages to a read-write connection.
def method_missing(...) def method_missing(...)
if current_session.fallback_to_replicas_for_ambiguous_queries? if current_session.fallback_to_replicas_for_ambiguous_queries?
...@@ -102,18 +110,13 @@ module Gitlab ...@@ -102,18 +110,13 @@ module Gitlab
# name - The name of the method to call on a connection object. # name - The name of the method to call on a connection object.
# sticky - If set to true the session will stick to the master after # sticky - If set to true the session will stick to the master after
# the write. # the write.
def write_using_load_balancer(name, *args, sticky: false, **kwargs, &block) def write_using_load_balancer(...)
if read_only_transaction? if read_only_transaction?
raise WriteInsideReadOnlyTransactionError, 'A write query is performed inside a read-only transaction' raise WriteInsideReadOnlyTransactionError, 'A write query is performed inside a read-only transaction'
end end
@load_balancer.read_write do |connection| @load_balancer.read_write do |connection|
# Sticking has to be enabled before calling the method. Not doing so connection.send(...)
# could lead to methods called in a block still being performed on a
# secondary instead of on a primary (when necessary).
current_session.write! if sticky
connection.send(name, *args, **kwargs, &block)
end end
end end
......
...@@ -15,9 +15,18 @@ module Gitlab ...@@ -15,9 +15,18 @@ module Gitlab
attr_reader :host_list attr_reader :host_list
# hosts - The hostnames/addresses of the additional databases. # hosts - The hostnames/addresses of the additional databases.
def initialize(hosts = [], model = ActiveRecord::Base) # model - The ActiveRecord base model the load balancer is enabled for.
# primary_only - If set, the replicas are ignored and the primary is
# always used.
def initialize(hosts = [], model = ActiveRecord::Base, primary_only: false)
@primary_only = primary_only
@model = model @model = model
@host_list = HostList.new(hosts.map { |addr| Host.new(addr, self) }) @host_list =
if primary_only
HostList.new([PrimaryHost.new(self)])
else
HostList.new(hosts.map { |addr| Host.new(addr, self) })
end
end end
def disconnect!(timeout: 120) def disconnect!(timeout: 120)
...@@ -217,8 +226,6 @@ module Gitlab ...@@ -217,8 +226,6 @@ module Gitlab
.establish_connection(replica_db_config) .establish_connection(replica_db_config)
end end
private
# ActiveRecord::ConnectionAdapters::ConnectionHandler handles fetching, # ActiveRecord::ConnectionAdapters::ConnectionHandler handles fetching,
# and caching for connections pools for each "connection", so we # and caching for connections pools for each "connection", so we
# leverage that. # leverage that.
...@@ -230,13 +237,15 @@ module Gitlab ...@@ -230,13 +237,15 @@ module Gitlab
) )
end end
private
def ensure_caching! def ensure_caching!
host.enable_query_cache! unless host.query_cache_enabled host.enable_query_cache! unless host.query_cache_enabled
end end
def request_cache def request_cache
base = RequestStore[:gitlab_load_balancer] ||= {} base = RequestStore[:gitlab_load_balancer] ||= {}
base[pool] ||= {} base[self] ||= {}
end end
end end
end end
......
# frozen_string_literal: true
module Gitlab
module Database
module LoadBalancing
# A host that wraps the primary database connection.
#
# This class is used to always enable load balancing as if replicas exist,
# without the need for extra database connections. This ensures that code
# using the load balancer doesn't have to handle the case where load
# balancing is enabled, but no replicas have been configured (= the
# default case).
class PrimaryHost
def initialize(load_balancer)
@load_balancer = load_balancer
end
def release_connection
@load_balancer.release_primary_connection
end
def enable_query_cache!
# This could mess up the primary connection, so we make this a no-op
nil
end
def disable_query_cache!
# This could mess up the primary connection, so we make this a no-op
nil
end
def query_cache_enabled
@load_balancer.pool.query_cache_enabled
end
def connection
@load_balancer.pool.connection
end
def disconnect!(timeout: 120)
nil
end
def offline!
nil
end
def online?
true
end
def primary_write_location
@load_balancer.primary_write_location
ensure
@load_balancer.release_primary_connection
end
def database_replica_location
row = query_and_release(<<-SQL.squish)
SELECT pg_last_wal_replay_lsn()::text AS location
SQL
row['location'] if row.any?
rescue *Host::CONNECTION_ERRORS
nil
end
def caught_up?(_location)
true
end
def query_and_release(sql)
connection.select_all(sql).first || {}
rescue StandardError
{}
ensure
release_connection
end
end
end
end
end
...@@ -3,7 +3,9 @@ ...@@ -3,7 +3,9 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Gitlab::Database::LoadBalancing::ConnectionProxy do RSpec.describe Gitlab::Database::LoadBalancing::ConnectionProxy do
let(:proxy) { described_class.new } let(:proxy) do
described_class.new(Gitlab::Database::LoadBalancing::LoadBalancer.new([]))
end
describe '#select' do describe '#select' do
it 'performs a read' do it 'performs a read' do
...@@ -35,9 +37,15 @@ RSpec.describe Gitlab::Database::LoadBalancing::ConnectionProxy do ...@@ -35,9 +37,15 @@ RSpec.describe Gitlab::Database::LoadBalancing::ConnectionProxy do
describe 'using a SELECT FOR UPDATE query' do describe 'using a SELECT FOR UPDATE query' do
it 'runs the query on the primary and sticks to it' do it 'runs the query on the primary and sticks to it' do
arel = double(:arel, locked: true) arel = double(:arel, locked: true)
session = Gitlab::Database::LoadBalancing::Session.new
allow(Gitlab::Database::LoadBalancing::Session).to receive(:current)
.and_return(session)
expect(session).to receive(:write!)
expect(proxy).to receive(:write_using_load_balancer) expect(proxy).to receive(:write_using_load_balancer)
.with(:select_all, arel, 'foo', [], sticky: true) .with(:select_all, arel, 'foo', [])
proxy.select_all(arel, 'foo') proxy.select_all(arel, 'foo')
end end
...@@ -58,8 +66,13 @@ RSpec.describe Gitlab::Database::LoadBalancing::ConnectionProxy do ...@@ -58,8 +66,13 @@ RSpec.describe Gitlab::Database::LoadBalancing::ConnectionProxy do
Gitlab::Database::LoadBalancing::ConnectionProxy::STICKY_WRITES.each do |name| Gitlab::Database::LoadBalancing::ConnectionProxy::STICKY_WRITES.each do |name|
describe "#{name}" do describe "#{name}" do
it 'runs the query on the primary and sticks to it' do it 'runs the query on the primary and sticks to it' do
expect(proxy).to receive(:write_using_load_balancer) session = Gitlab::Database::LoadBalancing::Session.new
.with(name, 'foo', sticky: true)
allow(Gitlab::Database::LoadBalancing::Session).to receive(:current)
.and_return(session)
expect(session).to receive(:write!)
expect(proxy).to receive(:write_using_load_balancer).with(name, 'foo')
proxy.send(name, 'foo') proxy.send(name, 'foo')
end end
...@@ -108,7 +121,7 @@ RSpec.describe Gitlab::Database::LoadBalancing::ConnectionProxy do ...@@ -108,7 +121,7 @@ RSpec.describe Gitlab::Database::LoadBalancing::ConnectionProxy do
# We have an extra test for #transaction here to make sure that nested queries # We have an extra test for #transaction here to make sure that nested queries
# are also sent to a primary. # are also sent to a primary.
describe '#transaction' do describe '#transaction' do
let(:session) { double(:session) } let(:session) { Gitlab::Database::LoadBalancing::Session.new }
before do before do
allow(Gitlab::Database::LoadBalancing::Session).to receive(:current) allow(Gitlab::Database::LoadBalancing::Session).to receive(:current)
...@@ -192,7 +205,7 @@ RSpec.describe Gitlab::Database::LoadBalancing::ConnectionProxy do ...@@ -192,7 +205,7 @@ RSpec.describe Gitlab::Database::LoadBalancing::ConnectionProxy do
proxy.foo('foo') proxy.foo('foo')
end end
it 'properly forwards trailing hash arguments' do it 'properly forwards keyword arguments' do
allow(proxy.load_balancer).to receive(:read_write) allow(proxy.load_balancer).to receive(:read_write)
expect(proxy).to receive(:write_using_load_balancer).and_call_original expect(proxy).to receive(:write_using_load_balancer).and_call_original
...@@ -217,7 +230,7 @@ RSpec.describe Gitlab::Database::LoadBalancing::ConnectionProxy do ...@@ -217,7 +230,7 @@ RSpec.describe Gitlab::Database::LoadBalancing::ConnectionProxy do
proxy.foo('foo') proxy.foo('foo')
end end
it 'properly forwards trailing hash arguments' do it 'properly forwards keyword arguments' do
allow(proxy.load_balancer).to receive(:read) allow(proxy.load_balancer).to receive(:read)
expect(proxy).to receive(:read_using_load_balancer).and_call_original expect(proxy).to receive(:read_using_load_balancer).and_call_original
...@@ -297,20 +310,12 @@ RSpec.describe Gitlab::Database::LoadBalancing::ConnectionProxy do ...@@ -297,20 +310,12 @@ RSpec.describe Gitlab::Database::LoadBalancing::ConnectionProxy do
.and_return(session) .and_return(session)
end end
it 'uses but does not stick to the primary when sticking is disabled' do it 'uses but does not stick to the primary' do
expect(proxy.load_balancer).to receive(:read_write).and_yield(connection) expect(proxy.load_balancer).to receive(:read_write).and_yield(connection)
expect(connection).to receive(:foo).with('foo') expect(connection).to receive(:foo).with('foo')
expect(session).not_to receive(:write!) expect(session).not_to receive(:write!)
proxy.write_using_load_balancer(:foo, 'foo') proxy.write_using_load_balancer(:foo, 'foo')
end end
it 'sticks to the primary when sticking is enabled' do
expect(proxy.load_balancer).to receive(:read_write).and_yield(connection)
expect(connection).to receive(:foo).with('foo')
expect(session).to receive(:write!)
proxy.write_using_load_balancer(:foo, 'foo', sticky: true)
end
end end
end end
...@@ -41,6 +41,17 @@ RSpec.describe Gitlab::Database::LoadBalancing::LoadBalancer, :request_store do ...@@ -41,6 +41,17 @@ RSpec.describe Gitlab::Database::LoadBalancing::LoadBalancer, :request_store do
top_error top_error
end end
describe '#initialize' do
it 'ignores the hosts when the primary_only option is enabled' do
lb = described_class.new([db_host], primary_only: true)
hosts = lb.host_list.hosts
expect(hosts.length).to eq(1)
expect(hosts.first)
.to be_instance_of(Gitlab::Database::LoadBalancing::PrimaryHost)
end
end
describe '#read' do describe '#read' do
it 'yields a connection for a read' do it 'yields a connection for a read' do
connection = double(:connection) connection = double(:connection)
...@@ -121,6 +132,17 @@ RSpec.describe Gitlab::Database::LoadBalancing::LoadBalancer, :request_store do ...@@ -121,6 +132,17 @@ RSpec.describe Gitlab::Database::LoadBalancing::LoadBalancer, :request_store do
expect { |b| lb.read(&b) } expect { |b| lb.read(&b) }
.to yield_with_args(ActiveRecord::Base.retrieve_connection) .to yield_with_args(ActiveRecord::Base.retrieve_connection)
end end
it 'uses the primary when the primary_only option is enabled' do
lb = described_class.new(primary_only: true)
# When no hosts are configured, we don't want to produce any warnings, as
# they aren't useful/too noisy.
expect(Gitlab::Database::LoadBalancing::Logger).not_to receive(:warn)
expect { |b| lb.read(&b) }
.to yield_with_args(ActiveRecord::Base.retrieve_connection)
end
end end
describe '#read_write' do describe '#read_write' do
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::LoadBalancing::PrimaryHost do
let(:load_balancer) { Gitlab::Database::LoadBalancing::LoadBalancer.new }
let(:host) { Gitlab::Database::LoadBalancing::PrimaryHost.new(load_balancer) }
describe '#connection' do
it 'returns a connection from the pool' do
expect(load_balancer.pool).to receive(:connection)
host.connection
end
end
describe '#release_connection' do
it 'releases a connection from the pool' do
expect(load_balancer).to receive(:release_primary_connection)
host.release_connection
end
end
describe '#enable_query_cache!' do
it 'does nothing' do
expect(host.enable_query_cache!).to be_nil
end
end
describe '#disable_query_cache!' do
it 'does nothing' do
expect(host.disable_query_cache!).to be_nil
end
end
describe '#query_cache_enabled' do
it 'delegates to the primary connection pool' do
expect(host.query_cache_enabled)
.to eq(load_balancer.pool.query_cache_enabled)
end
end
describe '#disconnect!' do
it 'does nothing' do
expect(host.disconnect!).to be_nil
end
end
describe '#offline!' do
it 'does nothing' do
expect(host.offline!).to be_nil
end
end
describe '#online?' do
it 'returns true' do
expect(host.online?).to eq(true)
end
end
describe '#primary_write_location' do
it 'returns the write location of the primary' do
expect(host.primary_write_location).to be_an_instance_of(String)
expect(host.primary_write_location).not_to be_empty
end
end
describe '#caught_up?' do
it 'returns true' do
expect(host.caught_up?('foo')).to eq(true)
end
end
describe '#database_replica_location' do
let(:connection) { double(:connection) }
it 'returns the write ahead location of the replica', :aggregate_failures do
expect(host)
.to receive(:query_and_release)
.and_return({ 'location' => '0/D525E3A8' })
expect(host.database_replica_location).to be_an_instance_of(String)
end
it 'returns nil when the database query returned no rows' do
expect(host).to receive(:query_and_release).and_return({})
expect(host.database_replica_location).to be_nil
end
it 'returns nil when the database connection fails' do
allow(host).to receive(:connection).and_raise(PG::Error)
expect(host.database_replica_location).to be_nil
end
end
describe '#query_and_release' do
it 'executes a SQL query' do
results = host.query_and_release('SELECT 10 AS number')
expect(results).to be_an_instance_of(Hash)
expect(results['number'].to_i).to eq(10)
end
it 'releases the connection after running the query' do
expect(host)
.to receive(:release_connection)
.once
host.query_and_release('SELECT 10 AS number')
end
it 'returns an empty Hash in the event of an error' do
expect(host.connection)
.to receive(:select_all)
.and_raise(RuntimeError, 'kittens')
expect(host.query_and_release('SELECT 10 AS number')).to eq({})
end
end
end
...@@ -306,10 +306,10 @@ RSpec.describe Gitlab::Database::LoadBalancing do ...@@ -306,10 +306,10 @@ RSpec.describe Gitlab::Database::LoadBalancing do
.and_return(true) .and_return(true)
instance = double(:instance) instance = double(:instance)
lb = instance_spy(Gitlab::Database::LoadBalancing::LoadBalancer) lb = Gitlab::Database::LoadBalancing::LoadBalancer.new([])
proxy = double(:proxy, load_balancer: lb) proxy = Gitlab::Database::LoadBalancing::ConnectionProxy.new(lb)
allow(Gitlab::Database::LoadBalancing) allow(described_class)
.to receive(:proxy) .to receive(:proxy)
.and_return(proxy) .and_return(proxy)
...@@ -345,7 +345,8 @@ RSpec.describe Gitlab::Database::LoadBalancing do ...@@ -345,7 +345,8 @@ RSpec.describe Gitlab::Database::LoadBalancing do
context 'when the load balancing is configured' do context 'when the load balancing is configured' do
let(:db_host) { ActiveRecord::Base.connection_pool.db_config.host } let(:db_host) { ActiveRecord::Base.connection_pool.db_config.host }
let(:proxy) { described_class::ConnectionProxy.new([db_host]) } let(:load_balancer) { described_class::LoadBalancer.new([db_host]) }
let(:proxy) { described_class::ConnectionProxy.new(load_balancer) }
context 'when a proxy connection is used' do context 'when a proxy connection is used' do
it 'returns :unknown' do it 'returns :unknown' do
...@@ -785,6 +786,16 @@ RSpec.describe Gitlab::Database::LoadBalancing do ...@@ -785,6 +786,16 @@ RSpec.describe Gitlab::Database::LoadBalancing do
it 'redirects queries to the right roles' do it 'redirects queries to the right roles' do
roles = [] roles = []
# If we don't run any queries, the pool may be a NullPool. This can
# result in some tests reporting a role as `:unknown`, even though the
# tests themselves are correct.
#
# To prevent this from happening we simply run a simple query to
# ensure the proper pool type is put in place. The exact query doesn't
# matter, provided it actually runs a query and thus creates a proper
# connection pool.
model.count
subscriber = ActiveSupport::Notifications.subscribe('sql.active_record') do |event| subscriber = ActiveSupport::Notifications.subscribe('sql.active_record') do |event|
role = ::Gitlab::Database::LoadBalancing.db_role_for_connection(event.payload[:connection]) role = ::Gitlab::Database::LoadBalancing.db_role_for_connection(event.payload[:connection])
roles << role if role.present? roles << role if role.present?
......
...@@ -4,7 +4,9 @@ RSpec.configure do |config| ...@@ -4,7 +4,9 @@ RSpec.configure do |config|
config.before(:each, :db_load_balancing) do config.before(:each, :db_load_balancing) do
allow(Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(true) allow(Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(true)
proxy = ::Gitlab::Database::LoadBalancing::ConnectionProxy.new([Gitlab::Database.main.config['host']]) lb = ::Gitlab::Database::LoadBalancing::LoadBalancer
.new([Gitlab::Database.main.config['host']])
proxy = ::Gitlab::Database::LoadBalancing::ConnectionProxy.new(lb)
allow(ActiveRecord::Base).to receive(:load_balancing_proxy).and_return(proxy) allow(ActiveRecord::Base).to receive(:load_balancing_proxy).and_return(proxy)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment