Commit 982f2257 authored by Andreas Brandl's avatar Andreas Brandl

Support reindexing unique indexes

This enables >=PG12-compatible reindexing behavior leveraging
REINDEX CONCURRENTLY which supports reindexing unique indexes.

Changelog: other
parent 0599a0c1
---
name: database_reindexing_pg12
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/64695
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/334372
milestone: '14.1'
type: development
group: group::database
default_enabled: false
...@@ -348,7 +348,6 @@ sudo gitlab-rake gitlab:db:reindex['public.a_specific_index'] ...@@ -348,7 +348,6 @@ sudo gitlab-rake gitlab:db:reindex['public.a_specific_index']
The following index types are not supported: The following index types are not supported:
1. Unique and primary key indexes
1. Indexes used for constraint exclusion 1. Indexes used for constraint exclusion
1. Partitioned indexes 1. Partitioned indexes
1. Expression indexes 1. Expression indexes
......
...@@ -18,19 +18,12 @@ module Gitlab ...@@ -18,19 +18,12 @@ module Gitlab
find(identifier) find(identifier)
end end
# A 'regular' index is a non-unique index, # Indexes with reindexing support
# that does not serve an exclusion constraint and
# is defined on a table that is not partitioned.
#
# Deprecated: Switch to scope .reindexing_support
scope :regular, -> { where(unique: false, partitioned: false, exclusion: false, expression: false, type: Gitlab::Database::Reindexing::SUPPORTED_TYPES)}
# Indexes for reindexing with PG12
scope :reindexing_support, -> { where(partitioned: false, exclusion: false, expression: false, type: Gitlab::Database::Reindexing::SUPPORTED_TYPES) } scope :reindexing_support, -> { where(partitioned: false, exclusion: false, expression: false, type: Gitlab::Database::Reindexing::SUPPORTED_TYPES) }
scope :not_match, ->(regex) { where("name !~ ?", regex)} scope :not_match, ->(regex) { where("name !~ ?", regex) }
scope :match, ->(regex) { where("name ~* ?", regex)} scope :match, ->(regex) { where("name ~* ?", regex) }
scope :not_recently_reindexed, -> do scope :not_recently_reindexed, -> do
recent_actions = Reindexing::ReindexAction.recent.where('index_identifier = identifier') recent_actions = Reindexing::ReindexAction.recent.where('index_identifier = identifier')
......
...@@ -16,16 +16,9 @@ module Gitlab ...@@ -16,16 +16,9 @@ module Gitlab
end end
def self.candidate_indexes def self.candidate_indexes
indexes = Gitlab::Database::PostgresIndex Gitlab::Database::PostgresIndex
.not_match("^#{ConcurrentReindex::TEMPORARY_INDEX_PREFIX}")
.not_match("^#{ConcurrentReindex::REPLACED_INDEX_PREFIX}")
.not_match("#{ReindexConcurrently::TEMPORARY_INDEX_PATTERN}$") .not_match("#{ReindexConcurrently::TEMPORARY_INDEX_PATTERN}$")
.reindexing_support
if Feature.enabled?(:database_reindexing_pg12, type: :development)
indexes.reindexing_support
else
indexes.regular
end
end end
end end
end end
......
# frozen_string_literal: true
module Gitlab
module Database
module Reindexing
class ConcurrentReindex
include Gitlab::Utils::StrongMemoize
ReindexError = Class.new(StandardError)
PG_IDENTIFIER_LENGTH = 63
TEMPORARY_INDEX_PREFIX = 'tmp_reindex_'
REPLACED_INDEX_PREFIX = 'old_reindex_'
STATEMENT_TIMEOUT = 9.hours
# When dropping an index, we acquire a SHARE UPDATE EXCLUSIVE lock,
# which only conflicts with DDL and vacuum. We therefore execute this with a rather
# high lock timeout and a long pause in between retries. This is an alternative to
# setting a high statement timeout, which would lead to a long running query with effects
# on e.g. vacuum.
REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30
attr_reader :index, :logger
def initialize(index, logger: Gitlab::AppLogger)
@index = index
@logger = logger
end
def perform
raise ReindexError, 'UNIQUE indexes are currently not supported' if index.unique?
raise ReindexError, 'partitioned indexes are currently not supported' if index.partitioned?
raise ReindexError, 'indexes serving an exclusion constraint are currently not supported' if index.exclusion?
raise ReindexError, 'index is a left-over temporary index from a previous reindexing run' if index.name.start_with?(TEMPORARY_INDEX_PREFIX, REPLACED_INDEX_PREFIX)
logger.info "Starting reindex of #{index}"
with_rebuilt_index do |replacement_index|
swap_index(replacement_index)
end
end
private
def with_rebuilt_index
if Gitlab::Database::PostgresIndex.find_by(schema: index.schema, name: replacement_index_name)
logger.debug("dropping dangling index from previous run (if it exists): #{replacement_index_name}")
remove_index(index.schema, replacement_index_name)
end
create_replacement_index_statement = index.definition
.sub(/CREATE INDEX #{index.name}/, "CREATE INDEX CONCURRENTLY #{replacement_index_name}")
logger.info("creating replacement index #{replacement_index_name}")
logger.debug("replacement index definition: #{create_replacement_index_statement}")
set_statement_timeout do
connection.execute(create_replacement_index_statement)
end
replacement_index = Gitlab::Database::PostgresIndex.find_by(schema: index.schema, name: replacement_index_name)
unless replacement_index.valid_index?
message = 'replacement index was created as INVALID'
logger.error("#{message}, cleaning up")
raise ReindexError, "failed to reindex #{index}: #{message}"
end
# Some expression indexes (aka functional indexes)
# require additional statistics. The existing statistics
# are tightly bound to the original index. We have to
# rebuild statistics for the new index before dropping
# the original one.
rebuild_statistics if index.expression?
yield replacement_index
ensure
begin
remove_index(index.schema, replacement_index_name)
rescue StandardError => e
logger.error(e)
end
end
def swap_index(replacement_index)
logger.info("swapping replacement index #{replacement_index} with #{index}")
with_lock_retries do
rename_index(index.schema, index.name, replaced_index_name)
rename_index(replacement_index.schema, replacement_index.name, index.name)
rename_index(index.schema, replaced_index_name, replacement_index.name)
end
end
def rename_index(schema, old_index_name, new_index_name)
connection.execute(<<~SQL)
ALTER INDEX #{quote_table_name(schema)}.#{quote_table_name(old_index_name)}
RENAME TO #{quote_table_name(new_index_name)}
SQL
end
def remove_index(schema, name)
logger.info("Removing index #{schema}.#{name}")
retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new(
timing_configuration: REMOVE_INDEX_RETRY_CONFIG,
klass: self.class,
logger: logger
)
retries.run(raise_on_exhaustion: false) do
connection.execute(<<~SQL)
DROP INDEX CONCURRENTLY
IF EXISTS #{quote_table_name(schema)}.#{quote_table_name(name)}
SQL
end
end
def rebuild_statistics
logger.info("rebuilding table statistics for #{index.schema}.#{index.tablename}")
connection.execute(<<~SQL)
ANALYZE #{quote_table_name(index.schema)}.#{quote_table_name(index.tablename)}
SQL
end
def replacement_index_name
@replacement_index_name ||= "#{TEMPORARY_INDEX_PREFIX}#{index.indexrelid}"
end
def replaced_index_name
@replaced_index_name ||= "#{REPLACED_INDEX_PREFIX}#{index.indexrelid}"
end
def with_lock_retries(&block)
arguments = { klass: self.class, logger: logger }
Gitlab::Database::WithLockRetries.new(**arguments).run(raise_on_exhaustion: true, &block)
end
def set_statement_timeout
execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT)
yield
ensure
execute('RESET statement_timeout')
end
delegate :execute, :quote_table_name, to: :connection
def connection
@connection ||= ActiveRecord::Base.connection
end
end
end
end
end
...@@ -41,13 +41,7 @@ module Gitlab ...@@ -41,13 +41,7 @@ module Gitlab
end end
def perform_for(index, action) def perform_for(index, action)
strategy = if Feature.enabled?(:database_reindexing_pg12, type: :development) ReindexConcurrently.new(index).perform
ReindexConcurrently
else
ConcurrentReindex
end
strategy.new(index).perform
rescue StandardError rescue StandardError
action.state = :failed action.state = :failed
......
...@@ -154,7 +154,7 @@ namespace :gitlab do ...@@ -154,7 +154,7 @@ namespace :gitlab do
Rake::Task['gitlab:db:create_dynamic_partitions'].invoke Rake::Task['gitlab:db:create_dynamic_partitions'].invoke
end end
desc 'reindex a regular (non-unique) index without downtime to eliminate bloat' desc 'reindex a regular index without downtime to eliminate bloat'
task :reindex, [:index_name] => :environment do |_, args| task :reindex, [:index_name] => :environment do |_, args|
unless Feature.enabled?(:database_reindexing, type: :ops) unless Feature.enabled?(:database_reindexing, type: :ops)
puts "This feature (database_reindexing) is currently disabled.".color(:yellow) puts "This feature (database_reindexing) is currently disabled.".color(:yellow)
......
...@@ -22,30 +22,6 @@ RSpec.describe Gitlab::Database::PostgresIndex do ...@@ -22,30 +22,6 @@ RSpec.describe Gitlab::Database::PostgresIndex do
it_behaves_like 'a postgres model' it_behaves_like 'a postgres model'
describe '.regular' do
it 'only non-unique indexes' do
expect(described_class.regular).to all(have_attributes(unique: false))
end
it 'only non partitioned indexes' do
expect(described_class.regular).to all(have_attributes(partitioned: false))
end
it 'only indexes that dont serve an exclusion constraint' do
expect(described_class.regular).to all(have_attributes(exclusion: false))
end
it 'only non-expression indexes' do
expect(described_class.regular).to all(have_attributes(expression: false))
end
it 'only btree and gist indexes' do
types = described_class.regular.map(&:type).uniq
expect(types & %w(btree gist)).to eq(types)
end
end
describe '.reindexing_support' do describe '.reindexing_support' do
it 'only non partitioned indexes' do it 'only non partitioned indexes' do
expect(described_class.reindexing_support).to all(have_attributes(partitioned: false)) expect(described_class.reindexing_support).to all(have_attributes(partitioned: false))
......
...@@ -11,7 +11,7 @@ RSpec.describe Gitlab::Database::Reindexing::Coordinator do ...@@ -11,7 +11,7 @@ RSpec.describe Gitlab::Database::Reindexing::Coordinator do
let(:index) { create(:postgres_index) } let(:index) { create(:postgres_index) }
let(:notifier) { instance_double(Gitlab::Database::Reindexing::GrafanaNotifier, notify_start: nil, notify_end: nil) } let(:notifier) { instance_double(Gitlab::Database::Reindexing::GrafanaNotifier, notify_start: nil, notify_end: nil) }
let(:reindexer) { instance_double(Gitlab::Database::Reindexing::ConcurrentReindex, perform: nil) } let(:reindexer) { instance_double(Gitlab::Database::Reindexing::ReindexConcurrently, perform: nil) }
let(:action) { create(:reindex_action, index: index) } let(:action) { create(:reindex_action, index: index) }
let!(:lease) { stub_exclusive_lease(lease_key, uuid, timeout: lease_timeout) } let!(:lease) { stub_exclusive_lease(lease_key, uuid, timeout: lease_timeout) }
...@@ -19,7 +19,13 @@ RSpec.describe Gitlab::Database::Reindexing::Coordinator do ...@@ -19,7 +19,13 @@ RSpec.describe Gitlab::Database::Reindexing::Coordinator do
let(:lease_timeout) { 1.day } let(:lease_timeout) { 1.day }
let(:uuid) { 'uuid' } let(:uuid) { 'uuid' }
shared_examples_for 'reindexing coordination' do before do
swapout_view_for_table(:postgres_indexes)
allow(Gitlab::Database::Reindexing::ReindexConcurrently).to receive(:new).with(index).and_return(reindexer)
allow(Gitlab::Database::Reindexing::ReindexAction).to receive(:create_for).with(index).and_return(action)
end
context 'locking' do context 'locking' do
it 'acquires a lock while reindexing' do it 'acquires a lock while reindexing' do
expect(lease).to receive(:try_obtain).ordered.and_return(uuid) expect(lease).to receive(:try_obtain).ordered.and_return(uuid)
...@@ -33,7 +39,7 @@ RSpec.describe Gitlab::Database::Reindexing::Coordinator do ...@@ -33,7 +39,7 @@ RSpec.describe Gitlab::Database::Reindexing::Coordinator do
it 'does not perform reindexing actions if lease is not granted' do it 'does not perform reindexing actions if lease is not granted' do
expect(lease).to receive(:try_obtain).ordered.and_return(false) expect(lease).to receive(:try_obtain).ordered.and_return(false)
expect(Gitlab::Database::Reindexing::ConcurrentReindex).not_to receive(:new) expect(Gitlab::Database::Reindexing::ReindexConcurrently).not_to receive(:new)
subject subject
end end
...@@ -73,29 +79,4 @@ RSpec.describe Gitlab::Database::Reindexing::Coordinator do ...@@ -73,29 +79,4 @@ RSpec.describe Gitlab::Database::Reindexing::Coordinator do
end end
end end
end end
context 'legacy reindexing method (< PG12) - to be removed' do
before do
stub_feature_flags(database_reindexing_pg12: false)
swapout_view_for_table(:postgres_indexes)
allow(Gitlab::Database::Reindexing::ConcurrentReindex).to receive(:new).with(index).and_return(reindexer)
allow(Gitlab::Database::Reindexing::ReindexAction).to receive(:create_for).with(index).and_return(action)
end
it_behaves_like 'reindexing coordination'
end
context 'PG12 reindexing method' do
before do
stub_feature_flags(database_reindexing_pg12: true)
swapout_view_for_table(:postgres_indexes)
allow(Gitlab::Database::Reindexing::ReindexConcurrently).to receive(:new).with(index).and_return(reindexer)
allow(Gitlab::Database::Reindexing::ReindexAction).to receive(:create_for).with(index).and_return(action)
end
it_behaves_like 'reindexing coordination'
end
end
end end
...@@ -29,30 +29,11 @@ RSpec.describe Gitlab::Database::Reindexing do ...@@ -29,30 +29,11 @@ RSpec.describe Gitlab::Database::Reindexing do
describe '.candidate_indexes' do describe '.candidate_indexes' do
subject { described_class.candidate_indexes } subject { described_class.candidate_indexes }
context 'with deprecated method for < PG12' do
before do
stub_feature_flags(database_reindexing_pg12: false)
end
it 'retrieves regular indexes that are no left-overs from previous runs' do
result = double
expect(Gitlab::Database::PostgresIndex).to receive_message_chain('not_match.not_match.not_match.regular').with('^tmp_reindex_').with('^old_reindex_').with('\_ccnew[0-9]*$').with(no_args).and_return(result)
expect(subject).to eq(result)
end
end
context 'with deprecated method for >= PG12' do
before do
stub_feature_flags(database_reindexing_pg12: true)
end
it 'retrieves regular indexes that are no left-overs from previous runs' do it 'retrieves regular indexes that are no left-overs from previous runs' do
result = double result = double
expect(Gitlab::Database::PostgresIndex).to receive_message_chain('not_match.not_match.not_match.reindexing_support').with('^tmp_reindex_').with('^old_reindex_').with('\_ccnew[0-9]*$').with(no_args).and_return(result) expect(Gitlab::Database::PostgresIndex).to receive_message_chain('not_match.reindexing_support').with('\_ccnew[0-9]*$').with(no_args).and_return(result)
expect(subject).to eq(result) expect(subject).to eq(result)
end end
end end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment