Commit 888c52b2 authored by Bob Van Landuyt's avatar Bob Van Landuyt

Merge branch 'ab/reindexing-cleanup' into 'master'

Cleanup temp indexes from reindexing

See merge request gitlab-org/gitlab!65971
parents 25b4ebd8 8333786e
......@@ -19,7 +19,12 @@ module Gitlab
end
# Indexes with reindexing support
scope :reindexing_support, -> { where(partitioned: false, exclusion: false, expression: false, type: Gitlab::Database::Reindexing::SUPPORTED_TYPES) }
scope :reindexing_support, -> do
where(partitioned: false, exclusion: false, expression: false, type: Gitlab::Database::Reindexing::SUPPORTED_TYPES)
.not_match("#{Gitlab::Database::Reindexing::ReindexConcurrently::TEMPORARY_INDEX_PATTERN}$")
end
scope :reindexing_leftovers, -> { match("#{Gitlab::Database::Reindexing::ReindexConcurrently::TEMPORARY_INDEX_PATTERN}$") }
scope :not_match, ->(regex) { where("name !~ ?", regex) }
......
......@@ -8,6 +8,13 @@ module Gitlab
SUPPORTED_TYPES = %w(btree gist).freeze
# When dropping an index, we acquire a SHARE UPDATE EXCLUSIVE lock,
# which only conflicts with DDL and vacuum. We therefore execute this with a rather
# high lock timeout and a long pause in between retries. This is an alternative to
# setting a high statement timeout, which would lead to a long running query with effects
# on e.g. vacuum.
REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30
# candidate_indexes: Array of Gitlab::Database::PostgresIndex
def self.perform(candidate_indexes, how_many: DEFAULT_INDEXES_PER_INVOCATION)
IndexSelection.new(candidate_indexes).take(how_many).each do |index|
......@@ -15,10 +22,22 @@ module Gitlab
end
end
def self.candidate_indexes
Gitlab::Database::PostgresIndex
.not_match("#{ReindexConcurrently::TEMPORARY_INDEX_PATTERN}$")
.reindexing_support
def self.cleanup_leftovers!
PostgresIndex.reindexing_leftovers.each do |index|
Gitlab::AppLogger.info("Removing index #{index.identifier} which is a leftover, temporary index from previous reindexing activity")
retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new(
timing_configuration: REMOVE_INDEX_RETRY_CONFIG,
klass: self.class,
logger: Gitlab::AppLogger
)
retries.run(raise_on_exhaustion: false) do
ApplicationRecord.connection.tap do |conn|
conn.execute("DROP INDEX CONCURRENTLY IF EXISTS #{conn.quote_table_name(index.schema)}.#{conn.quote_table_name(index.name)}")
end
end
end
end
end
end
......
......@@ -11,13 +11,6 @@ module Gitlab
STATEMENT_TIMEOUT = 9.hours
PG_MAX_INDEX_NAME_LENGTH = 63
# When dropping an index, we acquire a SHARE UPDATE EXCLUSIVE lock,
# which only conflicts with DDL and vacuum. We therefore execute this with a rather
# high lock timeout and a long pause in between retries. This is an alternative to
# setting a high statement timeout, which would lead to a long running query with effects
# on e.g. vacuum.
REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30
attr_reader :index, :logger
def initialize(index, logger: Gitlab::AppLogger)
......
......@@ -161,7 +161,7 @@ namespace :gitlab do
exit
end
indexes = Gitlab::Database::Reindexing.candidate_indexes
indexes = Gitlab::Database::PostgresIndex.reindexing_support
if identifier = args[:index_name]
raise ArgumentError, "Index name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/
......@@ -173,6 +173,9 @@ namespace :gitlab do
ActiveRecord::Base.logger = Logger.new($stdout) if Gitlab::Utils.to_boolean(ENV['LOG_QUERIES_TO_CONSOLE'], default: false)
# Cleanup leftover temporary indexes from previous, possibly aborted runs (if any)
Gitlab::Database::Reindexing.cleanup_leftovers!
Gitlab::Database::Reindexing.perform(indexes)
rescue StandardError => e
Gitlab::AppLogger.error(e)
......
......@@ -40,6 +40,37 @@ RSpec.describe Gitlab::Database::PostgresIndex do
expect(types & %w(btree gist)).to eq(types)
end
context 'with leftover indexes' do
before do
ActiveRecord::Base.connection.execute(<<~SQL)
CREATE INDEX foobar_ccnew ON users (id);
CREATE INDEX foobar_ccnew1 ON users (id);
SQL
end
subject { described_class.reindexing_support.map(&:name) }
it 'excludes temporary indexes from reindexing' do
expect(subject).not_to include('foobar_ccnew')
expect(subject).not_to include('foobar_ccnew1')
end
end
end
describe '.reindexing_leftovers' do
subject { described_class.reindexing_leftovers }
before do
ActiveRecord::Base.connection.execute(<<~SQL)
CREATE INDEX foobar_ccnew ON users (id);
CREATE INDEX foobar_ccnew1 ON users (id);
SQL
end
it 'retrieves leftover indexes matching the /_ccnew[0-9]*$/ pattern' do
expect(subject.map(&:name)).to eq(%w(foobar_ccnew foobar_ccnew1))
end
end
describe '.not_match' do
......
......@@ -26,14 +26,31 @@ RSpec.describe Gitlab::Database::Reindexing do
end
end
describe '.candidate_indexes' do
subject { described_class.candidate_indexes }
describe '.cleanup_leftovers!' do
subject { described_class.cleanup_leftovers! }
before do
ApplicationRecord.connection.execute(<<~SQL)
CREATE INDEX foobar_ccnew ON users (id);
CREATE INDEX foobar_ccnew1 ON users (id);
SQL
end
it 'retrieves regular indexes that are no left-overs from previous runs' do
result = double
expect(Gitlab::Database::PostgresIndex).to receive_message_chain('not_match.reindexing_support').with('\_ccnew[0-9]*$').with(no_args).and_return(result)
it 'drops both leftover indexes' do
expect_query("SET lock_timeout TO '60000ms'")
expect_query("DROP INDEX CONCURRENTLY IF EXISTS \"public\".\"foobar_ccnew\"")
expect_query("RESET idle_in_transaction_session_timeout; RESET lock_timeout")
expect_query("SET lock_timeout TO '60000ms'")
expect_query("DROP INDEX CONCURRENTLY IF EXISTS \"public\".\"foobar_ccnew1\"")
expect_query("RESET idle_in_transaction_session_timeout; RESET lock_timeout")
expect(subject).to eq(result)
subject
end
def expect_query(sql)
expect(ApplicationRecord.connection).to receive(:execute).ordered.with(sql).and_wrap_original do |method, sql|
method.call(sql.sub(/CONCURRENTLY/, ''))
end
end
end
end
......@@ -201,9 +201,15 @@ RSpec.describe 'gitlab:db namespace rake task', :silence_stdout do
let(:reindex) { double('reindex') }
let(:indexes) { double('indexes') }
it 'cleans up any leftover indexes' do
expect(Gitlab::Database::Reindexing).to receive(:cleanup_leftovers!)
run_rake_task('gitlab:db:reindex')
end
context 'when no index_name is given' do
it 'uses all candidate indexes' do
expect(Gitlab::Database::Reindexing).to receive(:candidate_indexes).and_return(indexes)
expect(Gitlab::Database::PostgresIndex).to receive(:reindexing_support).and_return(indexes)
expect(Gitlab::Database::Reindexing).to receive(:perform).with(indexes)
run_rake_task('gitlab:db:reindex')
......@@ -214,7 +220,7 @@ RSpec.describe 'gitlab:db namespace rake task', :silence_stdout do
let(:index) { double('index') }
before do
allow(Gitlab::Database::Reindexing).to receive(:candidate_indexes).and_return(indexes)
allow(Gitlab::Database::PostgresIndex).to receive(:reindexing_support).and_return(indexes)
end
it 'calls the index rebuilder with the proper arguments' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment