Commit 6de28e1d authored by Patrick Bair's avatar Patrick Bair

Merge branch '336898-support-partitioning-over-multiple-dbs' into 'master'

Support multiple databases for partitioning

See merge request gitlab-org/gitlab!68795
parents f361ce1f 452d554d
......@@ -14,8 +14,6 @@ module PartitionedTable
strategy_class = PARTITIONING_STRATEGIES[strategy.to_sym] || raise(ArgumentError, "Unknown partitioning strategy: #{strategy}")
@partitioning_strategy = strategy_class.new(self, partitioning_key, **kwargs)
Gitlab::Database::Partitioning::PartitionManager.register(self)
end
end
end
# frozen_string_literal: true
module Postgresql
class DetachedPartition < ApplicationRecord
class DetachedPartition < ::Gitlab::Database::SharedModel
scope :ready_to_drop, -> { where('drop_after < ?', Time.current) }
end
end
......@@ -12,7 +12,7 @@ module Database
idempotent!
def perform
Gitlab::Database::Partitioning::PartitionManager.new.sync_partitions
Gitlab::Database::Partitioning.sync_partitions
ensure
Gitlab::Database::Partitioning::PartitionMonitoring.new.report_metrics
end
......
# frozen_string_literal: true
# Make sure we have loaded partitioned models here
# (even with eager loading disabled).
Gitlab::Database::Partitioning::PartitionManager.register(AuditEvent)
Gitlab::Database::Partitioning::PartitionManager.register(WebHookLog)
Gitlab::Database::Partitioning::PartitionManager.register(LooseForeignKeys::DeletedRecord)
Gitlab::Database::Partitioning.register_models([
AuditEvent,
WebHookLog,
LooseForeignKeys::DeletedRecord
])
if Gitlab.ee?
Gitlab::Database::Partitioning::PartitionManager.register(IncidentManagement::PendingEscalations::Alert)
Gitlab::Database::Partitioning::PartitionManager.register(IncidentManagement::PendingEscalations::Issue)
Gitlab::Database::Partitioning.register_models([
IncidentManagement::PendingEscalations::Alert,
IncidentManagement::PendingEscalations::Issue
])
end
begin
Gitlab::Database::Partitioning::PartitionManager.new.sync_partitions unless ENV['DISABLE_POSTGRES_PARTITION_CREATION_ON_STARTUP']
Gitlab::Database::Partitioning.sync_partitions unless ENV['DISABLE_POSTGRES_PARTITION_CREATION_ON_STARTUP']
rescue ActiveRecord::ActiveRecordError, PG::Error
# ignore - happens when Rake tasks yet have to create a database, e.g. for testing
end
# frozen_string_literal: true
module Gitlab
module Database
module Partitioning
def self.register_models(models)
registered_models.merge(models)
end
def self.registered_models
@registered_models ||= Set.new
end
def self.sync_partitions(models_to_sync = registered_models)
MultiDatabasePartitionManager.new(models_to_sync).sync_partitions
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Database
module Partitioning
class MultiDatabasePartitionManager
def initialize(models)
@models = models
end
def sync_partitions
Gitlab::AppLogger.info(message: "Syncing dynamic postgres partitions")
models.each do |model|
Gitlab::Database::SharedModel.using_connection(model.connection) do
Gitlab::AppLogger.debug(message: "Switched database connection",
connection_name: connection_name,
table_name: model.table_name)
PartitionManager.new(model).sync_partitions
end
end
Gitlab::AppLogger.info(message: "Finished sync of dynamic postgres partitions")
end
private
attr_reader :models
def connection_name
Gitlab::Database::SharedModel.connection.pool.db_config.name
end
end
end
end
end
......@@ -6,60 +6,49 @@ module Gitlab
class PartitionManager
UnsafeToDetachPartitionError = Class.new(StandardError)
def self.register(model)
raise ArgumentError, "Only models with a #partitioning_strategy can be registered." unless model.respond_to?(:partitioning_strategy)
models << model
end
def self.models
@models ||= Set.new
end
LEASE_TIMEOUT = 1.minute
MANAGEMENT_LEASE_KEY = 'database_partition_management_%s'
RETAIN_DETACHED_PARTITIONS_FOR = 1.week
attr_reader :models
def initialize(models = self.class.models)
@models = models
def initialize(model)
@model = model
end
def sync_partitions
Gitlab::AppLogger.info("Checking state of dynamic postgres partitions")
Gitlab::AppLogger.info(message: "Checking state of dynamic postgres partitions", table_name: model.table_name)
models.each do |model|
# Double-checking before getting the lease:
# The prevailing situation is no missing partitions and no extra partitions
next if missing_partitions(model).empty? && extra_partitions(model).empty?
# Double-checking before getting the lease:
# The prevailing situation is no missing partitions and no extra partitions
return if missing_partitions.empty? && extra_partitions.empty?
only_with_exclusive_lease(model, lease_key: MANAGEMENT_LEASE_KEY) do
partitions_to_create = missing_partitions(model)
create(partitions_to_create) unless partitions_to_create.empty?
only_with_exclusive_lease(model, lease_key: MANAGEMENT_LEASE_KEY) do
partitions_to_create = missing_partitions
create(partitions_to_create) unless partitions_to_create.empty?
if Feature.enabled?(:partition_pruning, default_enabled: :yaml)
partitions_to_detach = extra_partitions(model)
detach(partitions_to_detach) unless partitions_to_detach.empty?
end
if Feature.enabled?(:partition_pruning, default_enabled: :yaml)
partitions_to_detach = extra_partitions
detach(partitions_to_detach) unless partitions_to_detach.empty?
end
rescue StandardError => e
Gitlab::AppLogger.error(message: "Failed to create / detach partition(s)",
table_name: model.table_name,
exception_class: e.class,
exception_message: e.message)
end
rescue StandardError => e
Gitlab::AppLogger.error(message: "Failed to create / detach partition(s)",
table_name: model.table_name,
exception_class: e.class,
exception_message: e.message)
end
private
def missing_partitions(model)
attr_reader :model
delegate :connection, to: :model
def missing_partitions
return [] unless connection.table_exists?(model.table_name)
model.partitioning_strategy.missing_partitions
end
def extra_partitions(model)
def extra_partitions
return [] unless connection.table_exists?(model.table_name)
model.partitioning_strategy.extra_partitions
......@@ -121,13 +110,10 @@ module Gitlab
def with_lock_retries(&block)
Gitlab::Database::WithLockRetries.new(
klass: self.class,
logger: Gitlab::AppLogger
logger: Gitlab::AppLogger,
connection: connection
).run(&block)
end
def connection
ActiveRecord::Base.connection
end
end
end
end
......
......@@ -6,7 +6,7 @@ module Gitlab
class PartitionMonitoring
attr_reader :models
def initialize(models = PartitionManager.models)
def initialize(models = Gitlab::Database::Partitioning.registered_models)
@models = models
end
......
......@@ -2,7 +2,7 @@
module Gitlab
module Database
class PostgresForeignKey < ApplicationRecord
class PostgresForeignKey < SharedModel
self.primary_key = :oid
scope :by_referenced_table_identifier, ->(identifier) do
......
......@@ -2,7 +2,7 @@
module Gitlab
module Database
class PostgresPartition < ActiveRecord::Base
class PostgresPartition < SharedModel
self.primary_key = :identifier
belongs_to :postgres_partitioned_table, foreign_key: 'parent_identifier', primary_key: 'identifier'
......
......@@ -2,7 +2,7 @@
module Gitlab
module Database
class PostgresPartitionedTable < ActiveRecord::Base
class PostgresPartitionedTable < SharedModel
DYNAMIC_PARTITION_STRATEGIES = %w[range list].freeze
self.primary_key = :identifier
......
# frozen_string_literal: true
module Gitlab
module Database
class SharedModel < ActiveRecord::Base
self.abstract_class = true
class << self
def using_connection(connection)
raise 'cannot nest connection overrides for shared models' unless overriding_connection.nil?
self.overriding_connection = connection
yield
ensure
self.overriding_connection = nil
end
def connection
if connection = self.overriding_connection
connection
else
super
end
end
private
def overriding_connection
Thread.current[:overriding_connection]
end
def overriding_connection=(connection)
Thread.current[:overriding_connection] = connection
end
end
end
end
end
......@@ -118,7 +118,7 @@ namespace :gitlab do
desc 'Create missing dynamic database partitions'
task create_dynamic_partitions: :environment do
Gitlab::Database::Partitioning::PartitionManager.new.sync_partitions
Gitlab::Database::Partitioning.sync_partitions
end
# This is targeted towards deploys and upgrades of GitLab.
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::Partitioning::MultiDatabasePartitionManager, '#sync_partitions' do
subject(:sync_partitions) { manager.sync_partitions }
let(:manager) { described_class.new(models) }
let(:models) { [model1, model2] }
let(:model1) { double('model1', connection: connection1, table_name: 'table1') }
let(:model2) { double('model2', connection: connection1, table_name: 'table2') }
let(:connection1) { double('connection1') }
let(:connection2) { double('connection2') }
let(:target_manager_class) { Gitlab::Database::Partitioning::PartitionManager }
let(:target_manager1) { double('partition manager') }
let(:target_manager2) { double('partition manager') }
before do
allow(manager).to receive(:connection_name).and_return('name')
end
it 'syncs model partitions, setting up the appropriate connection for each', :aggregate_failures do
expect(Gitlab::Database::SharedModel).to receive(:using_connection).with(model1.connection).and_yield.ordered
expect(target_manager_class).to receive(:new).with(model1).and_return(target_manager1).ordered
expect(target_manager1).to receive(:sync_partitions)
expect(Gitlab::Database::SharedModel).to receive(:using_connection).with(model2.connection).and_yield.ordered
expect(target_manager_class).to receive(:new).with(model2).and_return(target_manager2).ordered
expect(target_manager2).to receive(:sync_partitions)
sync_partitions
end
end
......@@ -12,31 +12,18 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionManager do
end
end
describe '.register' do
let(:model) { double(partitioning_strategy: nil) }
it 'remembers registered models' do
expect { described_class.register(model) }.to change { described_class.models }.to include(model)
end
after do
# Do not leak the double to other specs
described_class.models.delete(model)
end
end
context 'creating partitions (mocked)' do
subject(:sync_partitions) { described_class.new(models).sync_partitions }
subject(:sync_partitions) { described_class.new(model).sync_partitions }
let(:models) { [model] }
let(:model) { double(partitioning_strategy: partitioning_strategy, table_name: table) }
let(:model) { double(partitioning_strategy: partitioning_strategy, table_name: table, connection: connection) }
let(:partitioning_strategy) { double(missing_partitions: partitions, extra_partitions: []) }
let(:connection) { ActiveRecord::Base.connection }
let(:table) { "some_table" }
before do
allow(ActiveRecord::Base.connection).to receive(:table_exists?).and_call_original
allow(ActiveRecord::Base.connection).to receive(:table_exists?).with(table).and_return(true)
allow(ActiveRecord::Base.connection).to receive(:execute).and_call_original
allow(connection).to receive(:table_exists?).and_call_original
allow(connection).to receive(:table_exists?).with(table).and_return(true)
allow(connection).to receive(:execute).and_call_original
stub_exclusive_lease(described_class::MANAGEMENT_LEASE_KEY % table, timeout: described_class::LEASE_TIMEOUT)
end
......@@ -49,35 +36,23 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionManager do
end
it 'creates the partition' do
expect(ActiveRecord::Base.connection).to receive(:execute).with(partitions.first.to_sql)
expect(ActiveRecord::Base.connection).to receive(:execute).with(partitions.second.to_sql)
expect(connection).to receive(:execute).with(partitions.first.to_sql)
expect(connection).to receive(:execute).with(partitions.second.to_sql)
sync_partitions
end
context 'error handling with 2 models' do
let(:models) do
[
double(partitioning_strategy: strategy1, table_name: table),
double(partitioning_strategy: strategy2, table_name: table)
]
end
let(:strategy1) { double('strategy1', missing_partitions: nil, extra_partitions: []) }
let(:strategy2) { double('strategy2', missing_partitions: partitions, extra_partitions: []) }
it 'still creates partitions for the second table' do
expect(strategy1).to receive(:missing_partitions).and_raise('this should never happen (tm)')
expect(ActiveRecord::Base.connection).to receive(:execute).with(partitions.first.to_sql)
expect(ActiveRecord::Base.connection).to receive(:execute).with(partitions.second.to_sql)
context 'when an error occurs during partition management' do
it 'does not raise an error' do
expect(partitioning_strategy).to receive(:missing_partitions).and_raise('this should never happen (tm)')
sync_partitions
expect { sync_partitions }.not_to raise_error
end
end
end
context 'creating partitions' do
subject(:sync_partitions) { described_class.new([my_model]).sync_partitions }
subject(:sync_partitions) { described_class.new(my_model).sync_partitions }
let(:connection) { ActiveRecord::Base.connection }
let(:my_model) do
......@@ -106,15 +81,15 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionManager do
context 'detaching partitions (mocked)' do
subject(:sync_partitions) { manager.sync_partitions }
let(:manager) { described_class.new(models) }
let(:models) { [model] }
let(:model) { double(partitioning_strategy: partitioning_strategy, table_name: table)}
let(:manager) { described_class.new(model) }
let(:model) { double(partitioning_strategy: partitioning_strategy, table_name: table, connection: connection) }
let(:partitioning_strategy) { double(extra_partitions: extra_partitions, missing_partitions: []) }
let(:connection) { ActiveRecord::Base.connection }
let(:table) { "foo" }
before do
allow(ActiveRecord::Base.connection).to receive(:table_exists?).and_call_original
allow(ActiveRecord::Base.connection).to receive(:table_exists?).with(table).and_return(true)
allow(connection).to receive(:table_exists?).and_call_original
allow(connection).to receive(:table_exists?).with(table).and_return(true)
stub_exclusive_lease(described_class::MANAGEMENT_LEASE_KEY % table, timeout: described_class::LEASE_TIMEOUT)
end
......@@ -136,24 +111,6 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionManager do
sync_partitions
end
context 'error handling' do
let(:models) do
[
double(partitioning_strategy: error_strategy, table_name: table),
model
]
end
let(:error_strategy) { double(extra_partitions: nil, missing_partitions: []) }
it 'still drops partitions for the other model' do
expect(error_strategy).to receive(:extra_partitions).and_raise('injected error!')
extra_partitions.each { |p| expect(manager).to receive(:detach_one_partition).with(p) }
sync_partitions
end
end
end
context 'with the partition_pruning feature flag disabled' do
......@@ -176,7 +133,7 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionManager do
end
end
subject { described_class.new([my_model]).sync_partitions }
subject { described_class.new(my_model).sync_partitions }
let(:connection) { ActiveRecord::Base.connection }
let(:my_model) do
......@@ -285,11 +242,11 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionManager do
it 'creates partitions for the future then drops the oldest one after a month' do
# 1 month for the current month, 1 month for the old month that we're retaining data for, headroom
expected_num_partitions = (Gitlab::Database::Partitioning::MonthlyStrategy::HEADROOM + 2.months) / 1.month
expect { described_class.new([my_model]).sync_partitions }.to change { num_partitions(my_model) }.from(0).to(expected_num_partitions)
expect { described_class.new(my_model).sync_partitions }.to change { num_partitions(my_model) }.from(0).to(expected_num_partitions)
travel 1.month
expect { described_class.new([my_model]).sync_partitions }.to change { has_partition(my_model, 2.months.ago.beginning_of_month) }.from(true).to(false).and(change { num_partitions(my_model) }.by(0))
expect { described_class.new(my_model).sync_partitions }.to change { has_partition(my_model, 2.months.ago.beginning_of_month) }.from(true).to(false).and(change { num_partitions(my_model) }.by(0))
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::Partitioning do
describe '.sync_partitions' do
let(:partition_manager_class) { described_class::MultiDatabasePartitionManager }
let(:partition_manager) { double('partition manager') }
context 'when no partitioned models are given' do
it 'calls the partition manager with the registered models' do
expect(partition_manager_class).to receive(:new)
.with(described_class.registered_models)
.and_return(partition_manager)
expect(partition_manager).to receive(:sync_partitions)
described_class.sync_partitions
end
end
context 'when partitioned models are given' do
it 'calls the partition manager with the given models' do
models = ['my special model']
expect(partition_manager_class).to receive(:new)
.with(models)
.and_return(partition_manager)
expect(partition_manager).to receive(:sync_partitions)
described_class.sync_partitions(models)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::SharedModel do
describe 'using an external connection' do
let!(:original_connection) { described_class.connection }
let(:new_connection) { double('connection') }
it 'overrides the connection for the duration of the block', :aggregate_failures do
expect_original_connection_around do
described_class.using_connection(new_connection) do
expect(described_class.connection).to be(new_connection)
end
end
end
it 'does not affect connections in other threads', :aggregate_failures do
expect_original_connection_around do
described_class.using_connection(new_connection) do
expect(described_class.connection).to be(new_connection)
Thread.new do
expect(described_class.connection).not_to be(new_connection)
end.join
end
end
end
context 'when the block raises an error', :aggregate_failures do
it 're-raises the error, removing the overridden connection' do
expect_original_connection_around do
expect do
described_class.using_connection(new_connection) do
expect(described_class.connection).to be(new_connection)
raise 'here comes an error!'
end
end.to raise_error(RuntimeError, 'here comes an error!')
end
end
end
def expect_original_connection_around
# For safety, ensure our original connection is distinct from our double
# This should be the case, but in case of something leaking we should verify
expect(original_connection).not_to be(new_connection)
expect(described_class.connection).to be(original_connection)
yield
expect(described_class.connection).to be(original_connection)
end
end
end
......@@ -35,11 +35,5 @@ RSpec.describe PartitionedTable do
expect(my_class.partitioning_strategy.partitioning_key).to eq(key)
end
it 'registers itself with the PartitionCreator' do
expect(Gitlab::Database::Partitioning::PartitionManager).to receive(:register).with(my_class)
subject
end
end
end
......@@ -30,7 +30,7 @@ module MigrationsHelpers
end
end
klass.tap { Gitlab::Database::Partitioning::PartitionManager.new.sync_partitions }
klass.tap { Gitlab::Database::Partitioning.sync_partitions([klass]) }
end
def migrations_paths
......
......@@ -6,16 +6,14 @@ RSpec.describe Database::PartitionManagementWorker do
describe '#perform' do
subject { described_class.new.perform }
let(:manager) { instance_double('PartitionManager', sync_partitions: nil) }
let(:monitoring) { instance_double('PartitionMonitoring', report_metrics: nil) }
before do
allow(Gitlab::Database::Partitioning::PartitionManager).to receive(:new).and_return(manager)
allow(Gitlab::Database::Partitioning::PartitionMonitoring).to receive(:new).and_return(monitoring)
end
it 'delegates to PartitionManager' do
expect(manager).to receive(:sync_partitions)
it 'delegates to Partitioning' do
expect(Gitlab::Database::Partitioning).to receive(:sync_partitions)
subject
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment