Commit 446ebbc9 authored by Andreas Brandl's avatar Andreas Brandl Committed by Yannis Roussos

Add queuing mechanic for reindexing

This adds an explicit queuing mechanis for reindexing. The rake task
works as follows:

1. Trigger async index creation if necessary (still a hack)
1. Cleanup leftovers from previous run
1. Consume from the queue if non-empty
1. Execute heuristic to choose indexes for regular reindexing

Relates to https://gitlab.com/gitlab-org/gitlab/-/issues/330922

Changelog: added
parent 7c0d178a
# frozen_string_literal: true
class AddReindexingQueue < Gitlab::Database::Migration[1.0]
def change
create_table :postgres_reindex_queued_actions do |t|
t.text :index_identifier, null: false, limit: 255
t.integer :state, limit: 2, null: false, default: 0
t.timestamps_with_timezone null: false
t.index :state
end
change_column_default :postgres_reindex_queued_actions, :created_at, from: nil, to: -> { 'NOW()' }
change_column_default :postgres_reindex_queued_actions, :updated_at, from: nil, to: -> { 'NOW()' }
end
end
3e02605ce307d0ce37c3830e6909e7cfe5632408a757adf59209a70da92c0bc6
\ No newline at end of file
...@@ -17856,6 +17856,24 @@ CREATE SEQUENCE postgres_reindex_actions_id_seq ...@@ -17856,6 +17856,24 @@ CREATE SEQUENCE postgres_reindex_actions_id_seq
ALTER SEQUENCE postgres_reindex_actions_id_seq OWNED BY postgres_reindex_actions.id; ALTER SEQUENCE postgres_reindex_actions_id_seq OWNED BY postgres_reindex_actions.id;
CREATE TABLE postgres_reindex_queued_actions (
id bigint NOT NULL,
index_identifier text NOT NULL,
state smallint DEFAULT 0 NOT NULL,
created_at timestamp with time zone DEFAULT now() NOT NULL,
updated_at timestamp with time zone DEFAULT now() NOT NULL,
CONSTRAINT check_e4b01395c0 CHECK ((char_length(index_identifier) <= 255))
);
CREATE SEQUENCE postgres_reindex_queued_actions_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE postgres_reindex_queued_actions_id_seq OWNED BY postgres_reindex_queued_actions.id;
CREATE TABLE programming_languages ( CREATE TABLE programming_languages (
id integer NOT NULL, id integer NOT NULL,
name character varying NOT NULL, name character varying NOT NULL,
...@@ -21756,6 +21774,8 @@ ALTER TABLE ONLY postgres_async_indexes ALTER COLUMN id SET DEFAULT nextval('pos ...@@ -21756,6 +21774,8 @@ ALTER TABLE ONLY postgres_async_indexes ALTER COLUMN id SET DEFAULT nextval('pos
ALTER TABLE ONLY postgres_reindex_actions ALTER COLUMN id SET DEFAULT nextval('postgres_reindex_actions_id_seq'::regclass); ALTER TABLE ONLY postgres_reindex_actions ALTER COLUMN id SET DEFAULT nextval('postgres_reindex_actions_id_seq'::regclass);
ALTER TABLE ONLY postgres_reindex_queued_actions ALTER COLUMN id SET DEFAULT nextval('postgres_reindex_queued_actions_id_seq'::regclass);
ALTER TABLE ONLY product_analytics_events_experimental ALTER COLUMN id SET DEFAULT nextval('product_analytics_events_experimental_id_seq'::regclass); ALTER TABLE ONLY product_analytics_events_experimental ALTER COLUMN id SET DEFAULT nextval('product_analytics_events_experimental_id_seq'::regclass);
ALTER TABLE ONLY programming_languages ALTER COLUMN id SET DEFAULT nextval('programming_languages_id_seq'::regclass); ALTER TABLE ONLY programming_languages ALTER COLUMN id SET DEFAULT nextval('programming_languages_id_seq'::regclass);
...@@ -23583,6 +23603,9 @@ ALTER TABLE ONLY postgres_async_indexes ...@@ -23583,6 +23603,9 @@ ALTER TABLE ONLY postgres_async_indexes
ALTER TABLE ONLY postgres_reindex_actions ALTER TABLE ONLY postgres_reindex_actions
ADD CONSTRAINT postgres_reindex_actions_pkey PRIMARY KEY (id); ADD CONSTRAINT postgres_reindex_actions_pkey PRIMARY KEY (id);
ALTER TABLE ONLY postgres_reindex_queued_actions
ADD CONSTRAINT postgres_reindex_queued_actions_pkey PRIMARY KEY (id);
ALTER TABLE ONLY programming_languages ALTER TABLE ONLY programming_languages
ADD CONSTRAINT programming_languages_pkey PRIMARY KEY (id); ADD CONSTRAINT programming_languages_pkey PRIMARY KEY (id);
...@@ -26264,6 +26287,8 @@ CREATE UNIQUE INDEX index_postgres_async_indexes_on_name ON postgres_async_index ...@@ -26264,6 +26287,8 @@ CREATE UNIQUE INDEX index_postgres_async_indexes_on_name ON postgres_async_index
CREATE INDEX index_postgres_reindex_actions_on_index_identifier ON postgres_reindex_actions USING btree (index_identifier); CREATE INDEX index_postgres_reindex_actions_on_index_identifier ON postgres_reindex_actions USING btree (index_identifier);
CREATE INDEX index_postgres_reindex_queued_actions_on_state ON postgres_reindex_queued_actions USING btree (state);
CREATE UNIQUE INDEX index_programming_languages_on_name ON programming_languages USING btree (name); CREATE UNIQUE INDEX index_programming_languages_on_name ON programming_languages USING btree (name);
CREATE INDEX index_project_access_tokens_on_project_id ON project_access_tokens USING btree (project_id); CREATE INDEX index_project_access_tokens_on_project_id ON project_access_tokens USING btree (project_id);
...@@ -381,6 +381,7 @@ postgres_indexes: :gitlab_shared ...@@ -381,6 +381,7 @@ postgres_indexes: :gitlab_shared
postgres_partitioned_tables: :gitlab_shared postgres_partitioned_tables: :gitlab_shared
postgres_partitions: :gitlab_shared postgres_partitions: :gitlab_shared
postgres_reindex_actions: :gitlab_shared postgres_reindex_actions: :gitlab_shared
postgres_reindex_queued_actions: :gitlab_main
product_analytics_events_experimental: :gitlab_main product_analytics_events_experimental: :gitlab_main
programming_languages: :gitlab_main programming_languages: :gitlab_main
project_access_tokens: :gitlab_main project_access_tokens: :gitlab_main
......
...@@ -11,6 +11,7 @@ module Gitlab ...@@ -11,6 +11,7 @@ module Gitlab
has_one :bloat_estimate, class_name: 'Gitlab::Database::PostgresIndexBloatEstimate', foreign_key: :identifier has_one :bloat_estimate, class_name: 'Gitlab::Database::PostgresIndexBloatEstimate', foreign_key: :identifier
has_many :reindexing_actions, class_name: 'Gitlab::Database::Reindexing::ReindexAction', foreign_key: :index_identifier has_many :reindexing_actions, class_name: 'Gitlab::Database::Reindexing::ReindexAction', foreign_key: :index_identifier
has_many :queued_reindexing_actions, class_name: 'Gitlab::Database::Reindexing::QueuedAction', foreign_key: :index_identifier
scope :by_identifier, ->(identifier) do scope :by_identifier, ->(identifier) do
raise ArgumentError, "Index name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/ raise ArgumentError, "Index name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/
......
...@@ -15,13 +15,45 @@ module Gitlab ...@@ -15,13 +15,45 @@ module Gitlab
# on e.g. vacuum. # on e.g. vacuum.
REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30 REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30
# candidate_indexes: Array of Gitlab::Database::PostgresIndex # Performs automatic reindexing for a limited number of indexes per call
def self.perform(candidate_indexes, how_many: DEFAULT_INDEXES_PER_INVOCATION) # 1. Consume from the explicit reindexing queue
IndexSelection.new(candidate_indexes).take(how_many).each do |index| # 2. Apply bloat heuristic to find most bloated indexes and reindex those
def self.automatic_reindexing(maximum_records: DEFAULT_INDEXES_PER_INVOCATION)
# Cleanup leftover temporary indexes from previous, possibly aborted runs (if any)
cleanup_leftovers!
# Consume from the explicit reindexing queue first
done_counter = perform_from_queue(maximum_records: maximum_records)
return if done_counter >= maximum_records
# Execute reindexing based on bloat heuristic
perform_with_heuristic(maximum_records: maximum_records - done_counter)
end
# Reindex based on bloat heuristic for a limited number of indexes per call
#
# We use a bloat heuristic to estimate the index bloat and pick the
# most bloated indexes for reindexing.
def self.perform_with_heuristic(candidate_indexes = Gitlab::Database::PostgresIndex.reindexing_support, maximum_records: DEFAULT_INDEXES_PER_INVOCATION)
IndexSelection.new(candidate_indexes).take(maximum_records).each do |index|
Coordinator.new(index).perform Coordinator.new(index).perform
end end
end end
# Reindex indexes that have been explicitly enqueued (for a limited number of indexes per call)
def self.perform_from_queue(maximum_records: DEFAULT_INDEXES_PER_INVOCATION)
QueuedAction.in_queue_order.limit(maximum_records).each do |queued_entry|
Coordinator.new(queued_entry.index).perform
queued_entry.done!
rescue StandardError => e
queued_entry.failed!
Gitlab::AppLogger.error("Failed to perform reindexing action on queued entry #{queued_entry}: #{e}")
end.size
end
def self.cleanup_leftovers! def self.cleanup_leftovers!
PostgresIndex.reindexing_leftovers.each do |index| PostgresIndex.reindexing_leftovers.each do |index|
Gitlab::AppLogger.info("Removing index #{index.identifier} which is a leftover, temporary index from previous reindexing activity") Gitlab::AppLogger.info("Removing index #{index.identifier} which is a leftover, temporary index from previous reindexing activity")
......
# frozen_string_literal: true
module Gitlab
module Database
module Reindexing
class QueuedAction < SharedModel
self.table_name = 'postgres_reindex_queued_actions'
enum state: { queued: 0, done: 1, failed: 2 }
belongs_to :index, foreign_key: :index_identifier, class_name: 'Gitlab::Database::PostgresIndex'
scope :in_queue_order, -> { queued.order(:created_at) }
def to_s
"queued action [ id = #{id}, index: #{index_identifier} ]"
end
end
end
end
end
...@@ -160,39 +160,46 @@ namespace :gitlab do ...@@ -160,39 +160,46 @@ namespace :gitlab do
Rake::Task['gitlab:db:create_dynamic_partitions'].invoke Rake::Task['gitlab:db:create_dynamic_partitions'].invoke
end end
desc 'reindex a regular index without downtime to eliminate bloat' desc 'execute reindexing without downtime to eliminate bloat'
task :reindex, [:index_name, :database] => :environment do |_, args| task reindex: :environment do
unless Feature.enabled?(:database_reindexing, type: :ops) unless Feature.enabled?(:database_reindexing, type: :ops, default_enabled: :yaml)
puts "This feature (database_reindexing) is currently disabled.".color(:yellow) puts "This feature (database_reindexing) is currently disabled.".color(:yellow)
exit exit
end end
Gitlab::Database::EachDatabase.each_database_connection do |connection, connection_name| Gitlab::Database::EachDatabase.each_database_connection do |connection, connection_name|
indexes = Gitlab::Database::PostgresIndex.reindexing_support
if (identifier = args[:index_name]) && (args.fetch(:database, 'main') == connection_name)
raise ArgumentError, "Index name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/
indexes = indexes.where(identifier: identifier)
raise "Index #{args[:index_name]} for #{connection_name} database not found or not supported" if indexes.empty?
end
Gitlab::Database::SharedModel.logger = Logger.new($stdout) if Gitlab::Utils.to_boolean(ENV['LOG_QUERIES_TO_CONSOLE'], default: false) Gitlab::Database::SharedModel.logger = Logger.new($stdout) if Gitlab::Utils.to_boolean(ENV['LOG_QUERIES_TO_CONSOLE'], default: false)
# Cleanup leftover temporary indexes from previous, possibly aborted runs (if any)
Gitlab::Database::Reindexing.cleanup_leftovers!
# Hack: Before we do actual reindexing work, create async indexes # Hack: Before we do actual reindexing work, create async indexes
Gitlab::Database::AsyncIndexes.create_pending_indexes! if Feature.enabled?(:database_async_index_creation, type: :ops) Gitlab::Database::AsyncIndexes.create_pending_indexes! if Feature.enabled?(:database_async_index_creation, type: :ops)
Gitlab::Database::Reindexing.perform(indexes) Gitlab::Database::Reindexing.automatic_reindexing
end end
rescue StandardError => e rescue StandardError => e
Gitlab::AppLogger.error(e) Gitlab::AppLogger.error(e)
raise raise
end end
desc 'Enqueue an index for reindexing'
task :enqueue_reindexing_action, [:index_name, :database] => :environment do |_, args|
connection = Gitlab::Database.databases[args.fetch(:database, Gitlab::Database::PRIMARY_DATABASE_NAME)]
Gitlab::Database::SharedModel.using_connection(connection.scope.connection) do
queued_action = Gitlab::Database::PostgresIndex.find(args[:index_name]).queued_reindexing_actions.create!
puts "Queued reindexing action: #{queued_action}"
puts "There are #{Gitlab::Database::Reindexing::QueuedAction.queued.size} queued actions in total."
end
unless Feature.enabled?(:database_reindexing, type: :ops, default_enabled: :yaml)
puts <<~NOTE.color(:yellow)
Note: database_reindexing feature is currently disabled.
Enable with: Feature.enable(:database_reindexing)
NOTE
end
end
desc 'Check if there have been user additions to the database' desc 'Check if there have been user additions to the database'
task active: :environment do task active: :environment do
if ActiveRecord::Base.connection.migration_context.needs_migration? if ActiveRecord::Base.connection.migration_context.needs_migration?
......
# frozen_string_literal: true
FactoryBot.define do
factory :reindexing_queued_action, class: 'Gitlab::Database::Reindexing::QueuedAction' do
association :index, factory: :postgres_index
state { Gitlab::Database::Reindexing::QueuedAction.states[:queued] }
index_identifier { index.identifier }
end
end
...@@ -4,10 +4,63 @@ require 'spec_helper' ...@@ -4,10 +4,63 @@ require 'spec_helper'
RSpec.describe Gitlab::Database::Reindexing do RSpec.describe Gitlab::Database::Reindexing do
include ExclusiveLeaseHelpers include ExclusiveLeaseHelpers
include Database::DatabaseHelpers
describe '.perform' do describe '.automatic_reindexing' do
subject { described_class.perform(candidate_indexes) } subject { described_class.automatic_reindexing(maximum_records: limit) }
let(:limit) { 5 }
before_all do
swapout_view_for_table(:postgres_indexes)
end
before do
allow(Gitlab::Database::Reindexing).to receive(:cleanup_leftovers!)
allow(Gitlab::Database::Reindexing).to receive(:perform_from_queue).and_return(0)
allow(Gitlab::Database::Reindexing).to receive(:perform_with_heuristic).and_return(0)
end
it 'cleans up leftovers, before consuming the queue' do
expect(Gitlab::Database::Reindexing).to receive(:cleanup_leftovers!).ordered
expect(Gitlab::Database::Reindexing).to receive(:perform_from_queue).ordered
subject
end
context 'with records in the queue' do
before do
create(:reindexing_queued_action)
end
context 'with enough records in the queue to reach limit' do
let(:limit) { 1 }
it 'does not perform reindexing with heuristic' do
expect(Gitlab::Database::Reindexing).to receive(:perform_from_queue).and_return(limit)
expect(Gitlab::Database::Reindexing).not_to receive(:perform_with_heuristic)
subject
end
end
context 'without enough records in the queue to reach limit' do
let(:limit) { 2 }
it 'continues if the queue did not have enough records' do
expect(Gitlab::Database::Reindexing).to receive(:perform_from_queue).ordered.and_return(1)
expect(Gitlab::Database::Reindexing).to receive(:perform_with_heuristic).with(maximum_records: 1).ordered
subject
end
end
end
end
describe '.perform_with_heuristic' do
subject { described_class.perform_with_heuristic(candidate_indexes, maximum_records: limit) }
let(:limit) { 2 }
let(:coordinator) { instance_double(Gitlab::Database::Reindexing::Coordinator) } let(:coordinator) { instance_double(Gitlab::Database::Reindexing::Coordinator) }
let(:index_selection) { instance_double(Gitlab::Database::Reindexing::IndexSelection) } let(:index_selection) { instance_double(Gitlab::Database::Reindexing::IndexSelection) }
let(:candidate_indexes) { double } let(:candidate_indexes) { double }
...@@ -15,7 +68,7 @@ RSpec.describe Gitlab::Database::Reindexing do ...@@ -15,7 +68,7 @@ RSpec.describe Gitlab::Database::Reindexing do
it 'delegates to Coordinator' do it 'delegates to Coordinator' do
expect(Gitlab::Database::Reindexing::IndexSelection).to receive(:new).with(candidate_indexes).and_return(index_selection) expect(Gitlab::Database::Reindexing::IndexSelection).to receive(:new).with(candidate_indexes).and_return(index_selection)
expect(index_selection).to receive(:take).with(2).and_return(indexes) expect(index_selection).to receive(:take).with(limit).and_return(indexes)
indexes.each do |index| indexes.each do |index|
expect(Gitlab::Database::Reindexing::Coordinator).to receive(:new).with(index).and_return(coordinator) expect(Gitlab::Database::Reindexing::Coordinator).to receive(:new).with(index).and_return(coordinator)
...@@ -26,6 +79,59 @@ RSpec.describe Gitlab::Database::Reindexing do ...@@ -26,6 +79,59 @@ RSpec.describe Gitlab::Database::Reindexing do
end end
end end
describe '.perform_from_queue' do
subject { described_class.perform_from_queue(maximum_records: limit) }
before_all do
swapout_view_for_table(:postgres_indexes)
end
let(:limit) { 2 }
let(:queued_actions) { create_list(:reindexing_queued_action, 3) }
let(:coordinator) { instance_double(Gitlab::Database::Reindexing::Coordinator) }
before do
queued_actions.take(limit).each do |action|
allow(Gitlab::Database::Reindexing::Coordinator).to receive(:new).with(action.index).and_return(coordinator)
allow(coordinator).to receive(:perform)
end
end
it 'consumes the queue in order of created_at and applies the limit' do
queued_actions.take(limit).each do |action|
expect(Gitlab::Database::Reindexing::Coordinator).to receive(:new).ordered.with(action.index).and_return(coordinator)
expect(coordinator).to receive(:perform)
end
subject
end
it 'updates queued action and sets state to done' do
subject
queue = queued_actions
queue.shift(limit).each do |action|
expect(action.reload.state).to eq('done')
end
queue.each do |action|
expect(action.reload.state).to eq('queued')
end
end
it 'updates queued action upon error and sets state to failed' do
expect(Gitlab::Database::Reindexing::Coordinator).to receive(:new).ordered.with(queued_actions.first.index).and_return(coordinator)
expect(coordinator).to receive(:perform).and_raise('something went wrong')
subject
states = queued_actions.map(&:reload).map(&:state)
expect(states).to eq(%w(failed done queued))
end
end
describe '.cleanup_leftovers!' do describe '.cleanup_leftovers!' do
subject { described_class.cleanup_leftovers! } subject { described_class.cleanup_leftovers! }
......
...@@ -215,7 +215,7 @@ RSpec.describe 'gitlab:db namespace rake task', :silence_stdout do ...@@ -215,7 +215,7 @@ RSpec.describe 'gitlab:db namespace rake task', :silence_stdout do
stub_feature_flags(database_async_index_creation: true) stub_feature_flags(database_async_index_creation: true)
expect(Gitlab::Database::AsyncIndexes).to receive(:create_pending_indexes!).ordered.exactly(databases_count).times expect(Gitlab::Database::AsyncIndexes).to receive(:create_pending_indexes!).ordered.exactly(databases_count).times
expect(Gitlab::Database::Reindexing).to receive(:perform).ordered.exactly(databases_count).times expect(Gitlab::Database::Reindexing).to receive(:automatic_reindexing).ordered.once
run_rake_task('gitlab:db:reindex') run_rake_task('gitlab:db:reindex')
end end
...@@ -231,56 +231,30 @@ RSpec.describe 'gitlab:db namespace rake task', :silence_stdout do ...@@ -231,56 +231,30 @@ RSpec.describe 'gitlab:db namespace rake task', :silence_stdout do
end end
end end
context 'when no index_name is given' do context 'calls automatic reindexing' do
it 'uses all candidate indexes' do it 'uses all candidate indexes' do
expect(Gitlab::Database::PostgresIndex).to receive(:reindexing_support).exactly(databases_count).times.and_return(indexes) expect(Gitlab::Database::Reindexing).to receive(:automatic_reindexing).once
expect(Gitlab::Database::Reindexing).to receive(:perform).with(indexes).exactly(databases_count).times
run_rake_task('gitlab:db:reindex') run_rake_task('gitlab:db:reindex')
end end
end end
context 'with index name given' do
let(:index) { double('index') }
before do
allow(Gitlab::Database::PostgresIndex).to receive(:reindexing_support).and_return(indexes)
end end
it 'calls the index rebuilder with the proper arguments' do describe 'enqueue_reindexing_action' do
allow(indexes).to receive(:where).with(identifier: 'public.foo_idx').and_return([index]) let(:index_name) { 'public.users_pkey' }
expect(Gitlab::Database::Reindexing).to receive(:perform).with([index]).ordered
expect(Gitlab::Database::Reindexing).to receive(:perform).with(indexes).ordered if databases.many?
run_rake_task('gitlab:db:reindex', '[public.foo_idx]') it 'creates an entry in the queue' do
expect do
run_rake_task('gitlab:db:enqueue_reindexing_action', "[#{index_name}, main]")
end.to change { Gitlab::Database::PostgresIndex.find(index_name).queued_reindexing_actions.size }.from(0).to(1)
end end
context 'when database name is provided' do it 'defaults to main database' do
it 'calls the index rebuilder with the proper arguments when the database name match' do expect(Gitlab::Database::SharedModel).to receive(:using_connection).with(Gitlab::Database.main.scope.connection).and_call_original
allow(indexes).to receive(:where).with(identifier: 'public.foo_idx').and_return([index])
expect(Gitlab::Database::Reindexing).to receive(:perform).with([index]).ordered
expect(Gitlab::Database::Reindexing).to receive(:perform).with(indexes).ordered if databases.many?
run_rake_task('gitlab:db:reindex', '[public.foo_idx,main]') expect do
end run_rake_task('gitlab:db:enqueue_reindexing_action', "[#{index_name}]")
end.to change { Gitlab::Database::PostgresIndex.find(index_name).queued_reindexing_actions.size }.from(0).to(1)
it 'ignores the index and uses all candidate indexes if database name does not match' do
expect(Gitlab::Database::PostgresIndex).to receive(:reindexing_support).exactly(databases_count).times.and_return(indexes)
expect(Gitlab::Database::Reindexing).to receive(:perform).with(indexes).exactly(databases_count).times
run_rake_task('gitlab:db:reindex', '[public.foo_idx,no_such_database]')
end
end
it 'raises an error if the index does not exist' do
allow(indexes).to receive(:where).with(identifier: 'public.absent_index').and_return([])
expect { run_rake_task('gitlab:db:reindex', '[public.absent_index]') }.to raise_error(/Index public.absent_index for main database not found or not supported/)
end
it 'raises an error if the index is not fully qualified with a schema' do
expect { run_rake_task('gitlab:db:reindex', '[foo_idx]') }.to raise_error(/Index name is not fully qualified/)
end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment