Commit 982f2257 authored by Andreas Brandl's avatar Andreas Brandl

Support reindexing unique indexes

This enables >=PG12-compatible reindexing behavior leveraging
REINDEX CONCURRENTLY which supports reindexing unique indexes.

Changelog: other
parent 0599a0c1
---
name: database_reindexing_pg12
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/64695
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/334372
milestone: '14.1'
type: development
group: group::database
default_enabled: false
......@@ -348,7 +348,6 @@ sudo gitlab-rake gitlab:db:reindex['public.a_specific_index']
The following index types are not supported:
1. Unique and primary key indexes
1. Indexes used for constraint exclusion
1. Partitioned indexes
1. Expression indexes
......
......@@ -18,19 +18,12 @@ module Gitlab
find(identifier)
end
# A 'regular' index is a non-unique index,
# that does not serve an exclusion constraint and
# is defined on a table that is not partitioned.
#
# Deprecated: Switch to scope .reindexing_support
scope :regular, -> { where(unique: false, partitioned: false, exclusion: false, expression: false, type: Gitlab::Database::Reindexing::SUPPORTED_TYPES)}
# Indexes for reindexing with PG12
# Indexes with reindexing support
scope :reindexing_support, -> { where(partitioned: false, exclusion: false, expression: false, type: Gitlab::Database::Reindexing::SUPPORTED_TYPES) }
scope :not_match, ->(regex) { where("name !~ ?", regex)}
scope :not_match, ->(regex) { where("name !~ ?", regex) }
scope :match, ->(regex) { where("name ~* ?", regex)}
scope :match, ->(regex) { where("name ~* ?", regex) }
scope :not_recently_reindexed, -> do
recent_actions = Reindexing::ReindexAction.recent.where('index_identifier = identifier')
......
......@@ -16,16 +16,9 @@ module Gitlab
end
def self.candidate_indexes
indexes = Gitlab::Database::PostgresIndex
.not_match("^#{ConcurrentReindex::TEMPORARY_INDEX_PREFIX}")
.not_match("^#{ConcurrentReindex::REPLACED_INDEX_PREFIX}")
Gitlab::Database::PostgresIndex
.not_match("#{ReindexConcurrently::TEMPORARY_INDEX_PATTERN}$")
if Feature.enabled?(:database_reindexing_pg12, type: :development)
indexes.reindexing_support
else
indexes.regular
end
.reindexing_support
end
end
end
......
# frozen_string_literal: true
module Gitlab
module Database
module Reindexing
class ConcurrentReindex
include Gitlab::Utils::StrongMemoize
ReindexError = Class.new(StandardError)
PG_IDENTIFIER_LENGTH = 63
TEMPORARY_INDEX_PREFIX = 'tmp_reindex_'
REPLACED_INDEX_PREFIX = 'old_reindex_'
STATEMENT_TIMEOUT = 9.hours
# When dropping an index, we acquire a SHARE UPDATE EXCLUSIVE lock,
# which only conflicts with DDL and vacuum. We therefore execute this with a rather
# high lock timeout and a long pause in between retries. This is an alternative to
# setting a high statement timeout, which would lead to a long running query with effects
# on e.g. vacuum.
REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30
attr_reader :index, :logger
def initialize(index, logger: Gitlab::AppLogger)
@index = index
@logger = logger
end
def perform
raise ReindexError, 'UNIQUE indexes are currently not supported' if index.unique?
raise ReindexError, 'partitioned indexes are currently not supported' if index.partitioned?
raise ReindexError, 'indexes serving an exclusion constraint are currently not supported' if index.exclusion?
raise ReindexError, 'index is a left-over temporary index from a previous reindexing run' if index.name.start_with?(TEMPORARY_INDEX_PREFIX, REPLACED_INDEX_PREFIX)
logger.info "Starting reindex of #{index}"
with_rebuilt_index do |replacement_index|
swap_index(replacement_index)
end
end
private
def with_rebuilt_index
if Gitlab::Database::PostgresIndex.find_by(schema: index.schema, name: replacement_index_name)
logger.debug("dropping dangling index from previous run (if it exists): #{replacement_index_name}")
remove_index(index.schema, replacement_index_name)
end
create_replacement_index_statement = index.definition
.sub(/CREATE INDEX #{index.name}/, "CREATE INDEX CONCURRENTLY #{replacement_index_name}")
logger.info("creating replacement index #{replacement_index_name}")
logger.debug("replacement index definition: #{create_replacement_index_statement}")
set_statement_timeout do
connection.execute(create_replacement_index_statement)
end
replacement_index = Gitlab::Database::PostgresIndex.find_by(schema: index.schema, name: replacement_index_name)
unless replacement_index.valid_index?
message = 'replacement index was created as INVALID'
logger.error("#{message}, cleaning up")
raise ReindexError, "failed to reindex #{index}: #{message}"
end
# Some expression indexes (aka functional indexes)
# require additional statistics. The existing statistics
# are tightly bound to the original index. We have to
# rebuild statistics for the new index before dropping
# the original one.
rebuild_statistics if index.expression?
yield replacement_index
ensure
begin
remove_index(index.schema, replacement_index_name)
rescue StandardError => e
logger.error(e)
end
end
def swap_index(replacement_index)
logger.info("swapping replacement index #{replacement_index} with #{index}")
with_lock_retries do
rename_index(index.schema, index.name, replaced_index_name)
rename_index(replacement_index.schema, replacement_index.name, index.name)
rename_index(index.schema, replaced_index_name, replacement_index.name)
end
end
def rename_index(schema, old_index_name, new_index_name)
connection.execute(<<~SQL)
ALTER INDEX #{quote_table_name(schema)}.#{quote_table_name(old_index_name)}
RENAME TO #{quote_table_name(new_index_name)}
SQL
end
def remove_index(schema, name)
logger.info("Removing index #{schema}.#{name}")
retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new(
timing_configuration: REMOVE_INDEX_RETRY_CONFIG,
klass: self.class,
logger: logger
)
retries.run(raise_on_exhaustion: false) do
connection.execute(<<~SQL)
DROP INDEX CONCURRENTLY
IF EXISTS #{quote_table_name(schema)}.#{quote_table_name(name)}
SQL
end
end
def rebuild_statistics
logger.info("rebuilding table statistics for #{index.schema}.#{index.tablename}")
connection.execute(<<~SQL)
ANALYZE #{quote_table_name(index.schema)}.#{quote_table_name(index.tablename)}
SQL
end
def replacement_index_name
@replacement_index_name ||= "#{TEMPORARY_INDEX_PREFIX}#{index.indexrelid}"
end
def replaced_index_name
@replaced_index_name ||= "#{REPLACED_INDEX_PREFIX}#{index.indexrelid}"
end
def with_lock_retries(&block)
arguments = { klass: self.class, logger: logger }
Gitlab::Database::WithLockRetries.new(**arguments).run(raise_on_exhaustion: true, &block)
end
def set_statement_timeout
execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT)
yield
ensure
execute('RESET statement_timeout')
end
delegate :execute, :quote_table_name, to: :connection
def connection
@connection ||= ActiveRecord::Base.connection
end
end
end
end
end
......@@ -41,13 +41,7 @@ module Gitlab
end
def perform_for(index, action)
strategy = if Feature.enabled?(:database_reindexing_pg12, type: :development)
ReindexConcurrently
else
ConcurrentReindex
end
strategy.new(index).perform
ReindexConcurrently.new(index).perform
rescue StandardError
action.state = :failed
......
......@@ -154,7 +154,7 @@ namespace :gitlab do
Rake::Task['gitlab:db:create_dynamic_partitions'].invoke
end
desc 'reindex a regular (non-unique) index without downtime to eliminate bloat'
desc 'reindex a regular index without downtime to eliminate bloat'
task :reindex, [:index_name] => :environment do |_, args|
unless Feature.enabled?(:database_reindexing, type: :ops)
puts "This feature (database_reindexing) is currently disabled.".color(:yellow)
......
......@@ -22,30 +22,6 @@ RSpec.describe Gitlab::Database::PostgresIndex do
it_behaves_like 'a postgres model'
describe '.regular' do
it 'only non-unique indexes' do
expect(described_class.regular).to all(have_attributes(unique: false))
end
it 'only non partitioned indexes' do
expect(described_class.regular).to all(have_attributes(partitioned: false))
end
it 'only indexes that dont serve an exclusion constraint' do
expect(described_class.regular).to all(have_attributes(exclusion: false))
end
it 'only non-expression indexes' do
expect(described_class.regular).to all(have_attributes(expression: false))
end
it 'only btree and gist indexes' do
types = described_class.regular.map(&:type).uniq
expect(types & %w(btree gist)).to eq(types)
end
end
describe '.reindexing_support' do
it 'only non partitioned indexes' do
expect(described_class.reindexing_support).to all(have_attributes(partitioned: false))
......
......@@ -11,7 +11,7 @@ RSpec.describe Gitlab::Database::Reindexing::Coordinator do
let(:index) { create(:postgres_index) }
let(:notifier) { instance_double(Gitlab::Database::Reindexing::GrafanaNotifier, notify_start: nil, notify_end: nil) }
let(:reindexer) { instance_double(Gitlab::Database::Reindexing::ConcurrentReindex, perform: nil) }
let(:reindexer) { instance_double(Gitlab::Database::Reindexing::ReindexConcurrently, perform: nil) }
let(:action) { create(:reindex_action, index: index) }
let!(:lease) { stub_exclusive_lease(lease_key, uuid, timeout: lease_timeout) }
......@@ -19,83 +19,64 @@ RSpec.describe Gitlab::Database::Reindexing::Coordinator do
let(:lease_timeout) { 1.day }
let(:uuid) { 'uuid' }
shared_examples_for 'reindexing coordination' do
context 'locking' do
it 'acquires a lock while reindexing' do
expect(lease).to receive(:try_obtain).ordered.and_return(uuid)
before do
swapout_view_for_table(:postgres_indexes)
expect(reindexer).to receive(:perform).ordered
allow(Gitlab::Database::Reindexing::ReindexConcurrently).to receive(:new).with(index).and_return(reindexer)
allow(Gitlab::Database::Reindexing::ReindexAction).to receive(:create_for).with(index).and_return(action)
end
expect(Gitlab::ExclusiveLease).to receive(:cancel).ordered.with(lease_key, uuid)
context 'locking' do
it 'acquires a lock while reindexing' do
expect(lease).to receive(:try_obtain).ordered.and_return(uuid)
subject
end
expect(reindexer).to receive(:perform).ordered
it 'does not perform reindexing actions if lease is not granted' do
expect(lease).to receive(:try_obtain).ordered.and_return(false)
expect(Gitlab::Database::Reindexing::ConcurrentReindex).not_to receive(:new)
expect(Gitlab::ExclusiveLease).to receive(:cancel).ordered.with(lease_key, uuid)
subject
end
subject
end
context 'notifications' do
it 'sends #notify_start before reindexing' do
expect(notifier).to receive(:notify_start).with(action).ordered
expect(reindexer).to receive(:perform).ordered
subject
end
it 'does not perform reindexing actions if lease is not granted' do
expect(lease).to receive(:try_obtain).ordered.and_return(false)
expect(Gitlab::Database::Reindexing::ReindexConcurrently).not_to receive(:new)
it 'sends #notify_end after reindexing and updating the action is done' do
expect(action).to receive(:finish).ordered
expect(notifier).to receive(:notify_end).with(action).ordered
subject
end
subject
end
end
context 'action tracking' do
it 'calls #finish on the action' do
expect(reindexer).to receive(:perform).ordered
expect(action).to receive(:finish).ordered
subject
end
context 'notifications' do
it 'sends #notify_start before reindexing' do
expect(notifier).to receive(:notify_start).with(action).ordered
expect(reindexer).to receive(:perform).ordered
it 'upon error, it still calls finish and raises the error' do
expect(reindexer).to receive(:perform).ordered.and_raise('something went wrong')
expect(action).to receive(:finish).ordered
subject
end
expect { subject }.to raise_error(/something went wrong/)
it 'sends #notify_end after reindexing and updating the action is done' do
expect(action).to receive(:finish).ordered
expect(notifier).to receive(:notify_end).with(action).ordered
expect(action).to be_failed
end
subject
end
end
context 'legacy reindexing method (< PG12) - to be removed' do
before do
stub_feature_flags(database_reindexing_pg12: false)
swapout_view_for_table(:postgres_indexes)
context 'action tracking' do
it 'calls #finish on the action' do
expect(reindexer).to receive(:perform).ordered
expect(action).to receive(:finish).ordered
allow(Gitlab::Database::Reindexing::ConcurrentReindex).to receive(:new).with(index).and_return(reindexer)
allow(Gitlab::Database::Reindexing::ReindexAction).to receive(:create_for).with(index).and_return(action)
subject
end
it_behaves_like 'reindexing coordination'
end
it 'upon error, it still calls finish and raises the error' do
expect(reindexer).to receive(:perform).ordered.and_raise('something went wrong')
expect(action).to receive(:finish).ordered
context 'PG12 reindexing method' do
before do
stub_feature_flags(database_reindexing_pg12: true)
swapout_view_for_table(:postgres_indexes)
expect { subject }.to raise_error(/something went wrong/)
allow(Gitlab::Database::Reindexing::ReindexConcurrently).to receive(:new).with(index).and_return(reindexer)
allow(Gitlab::Database::Reindexing::ReindexAction).to receive(:create_for).with(index).and_return(action)
expect(action).to be_failed
end
it_behaves_like 'reindexing coordination'
end
end
end
......@@ -29,30 +29,11 @@ RSpec.describe Gitlab::Database::Reindexing do
describe '.candidate_indexes' do
subject { described_class.candidate_indexes }
context 'with deprecated method for < PG12' do
before do
stub_feature_flags(database_reindexing_pg12: false)
end
it 'retrieves regular indexes that are no left-overs from previous runs' do
result = double
expect(Gitlab::Database::PostgresIndex).to receive_message_chain('not_match.not_match.not_match.regular').with('^tmp_reindex_').with('^old_reindex_').with('\_ccnew[0-9]*$').with(no_args).and_return(result)
expect(subject).to eq(result)
end
end
it 'retrieves regular indexes that are no left-overs from previous runs' do
result = double
expect(Gitlab::Database::PostgresIndex).to receive_message_chain('not_match.reindexing_support').with('\_ccnew[0-9]*$').with(no_args).and_return(result)
context 'with deprecated method for >= PG12' do
before do
stub_feature_flags(database_reindexing_pg12: true)
end
it 'retrieves regular indexes that are no left-overs from previous runs' do
result = double
expect(Gitlab::Database::PostgresIndex).to receive_message_chain('not_match.not_match.not_match.reindexing_support').with('^tmp_reindex_').with('^old_reindex_').with('\_ccnew[0-9]*$').with(no_args).and_return(result)
expect(subject).to eq(result)
end
expect(subject).to eq(result)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment