Commit 086abdea authored by Bob Van Landuyt's avatar Bob Van Landuyt

Merge branch 'ab/reindex-heuristic' into 'master'

Heuristic to choose indexes for reindexing

See merge request gitlab-org/gitlab!48698
parents 49da3932 42a4a74c
---
title: Add btree bloat estimation view
merge_request: 48698
author:
type: other
# frozen_string_literal: true
class AddIndexBloatEstimateView < ActiveRecord::Migration[6.0]
DOWNTIME = false
def up
execute(<<~SQL)
CREATE VIEW postgres_index_bloat_estimates AS
-- Originally from: https://github.com/ioguix/pgsql-bloat-estimation/blob/master/btree/btree_bloat.sql
-- WARNING: executed with a non-superuser role, the query inspect only index on tables you are granted to read.
-- WARNING: rows with is_na = 't' are known to have bad statistics ("name" type is not supported).
-- This query is compatible with PostgreSQL 8.2 and after
SELECT nspname || '.' || idxname as identifier,
CASE WHEN relpages > est_pages_ff
THEN bs*(relpages-est_pages_ff)
ELSE 0
END::bigint AS bloat_size_bytes
FROM (
SELECT
coalesce(1 +
ceil(reltuples/floor((bs-pageopqdata-pagehdr)*fillfactor/(100*(4+nulldatahdrwidth)::float))), 0
) AS est_pages_ff,
bs, nspname, tblname, idxname, relpages, is_na
FROM (
SELECT maxalign, bs, nspname, tblname, idxname, reltuples, relpages, idxoid, fillfactor,
( index_tuple_hdr_bm +
maxalign - CASE -- Add padding to the index tuple header to align on MAXALIGN
WHEN index_tuple_hdr_bm%maxalign = 0 THEN maxalign
ELSE index_tuple_hdr_bm%maxalign
END
+ nulldatawidth + maxalign - CASE -- Add padding to the data to align on MAXALIGN
WHEN nulldatawidth = 0 THEN 0
WHEN nulldatawidth::integer%maxalign = 0 THEN maxalign
ELSE nulldatawidth::integer%maxalign
END
)::numeric AS nulldatahdrwidth, pagehdr, pageopqdata, is_na
FROM (
SELECT n.nspname, i.tblname, i.idxname, i.reltuples, i.relpages,
i.idxoid, i.fillfactor, current_setting('block_size')::numeric AS bs,
CASE -- MAXALIGN: 4 on 32bits, 8 on 64bits (and mingw32 ?)
WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' THEN 8
ELSE 4
END AS maxalign,
/* per page header, fixed size: 20 for 7.X, 24 for others */
24 AS pagehdr,
/* per page btree opaque data */
16 AS pageopqdata,
/* per tuple header: add IndexAttributeBitMapData if some cols are null-able */
CASE WHEN max(coalesce(s.null_frac,0)) = 0
THEN 2 -- IndexTupleData size
ELSE 2 + (( 32 + 8 - 1 ) / 8) -- IndexTupleData size + IndexAttributeBitMapData size ( max num filed per index + 8 - 1 /8)
END AS index_tuple_hdr_bm,
/* data len: we remove null values save space using it fractionnal part from stats */
sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 1024)) AS nulldatawidth,
max( CASE WHEN i.atttypid = 'pg_catalog.name'::regtype THEN 1 ELSE 0 END ) > 0 AS is_na
FROM (
SELECT ct.relname AS tblname, ct.relnamespace, ic.idxname, ic.attpos, ic.indkey, ic.indkey[ic.attpos], ic.reltuples, ic.relpages, ic.tbloid, ic.idxoid, ic.fillfactor,
coalesce(a1.attnum, a2.attnum) AS attnum, coalesce(a1.attname, a2.attname) AS attname, coalesce(a1.atttypid, a2.atttypid) AS atttypid,
CASE WHEN a1.attnum IS NULL
THEN ic.idxname
ELSE ct.relname
END AS attrelname
FROM (
SELECT idxname, reltuples, relpages, tbloid, idxoid, fillfactor, indkey,
pg_catalog.generate_series(1,indnatts) AS attpos
FROM (
SELECT ci.relname AS idxname, ci.reltuples, ci.relpages, i.indrelid AS tbloid,
i.indexrelid AS idxoid,
coalesce(substring(
array_to_string(ci.reloptions, ' ')
from 'fillfactor=([0-9]+)')::smallint, 90) AS fillfactor,
i.indnatts,
pg_catalog.string_to_array(pg_catalog.textin(
pg_catalog.int2vectorout(i.indkey)),' ')::int[] AS indkey
FROM pg_catalog.pg_index i
JOIN pg_catalog.pg_class ci ON ci.oid = i.indexrelid
WHERE ci.relam=(SELECT oid FROM pg_am WHERE amname = 'btree')
AND ci.relpages > 0
) AS idx_data
) AS ic
JOIN pg_catalog.pg_class ct ON ct.oid = ic.tbloid
LEFT JOIN pg_catalog.pg_attribute a1 ON
ic.indkey[ic.attpos] <> 0
AND a1.attrelid = ic.tbloid
AND a1.attnum = ic.indkey[ic.attpos]
LEFT JOIN pg_catalog.pg_attribute a2 ON
ic.indkey[ic.attpos] = 0
AND a2.attrelid = ic.idxoid
AND a2.attnum = ic.attpos
) i
JOIN pg_catalog.pg_namespace n ON n.oid = i.relnamespace
JOIN pg_catalog.pg_stats s ON s.schemaname = n.nspname
AND s.tablename = i.attrelname
AND s.attname = i.attname
GROUP BY 1,2,3,4,5,6,7,8,9,10,11
) AS rows_data_stats
) AS rows_hdr_pdg_stats
) AS relation_stats
WHERE nspname IN ("current_schema"(), 'gitlab_partitions_dynamic', 'gitlab_partitions_static') AND NOT is_na
ORDER BY nspname, tblname, idxname;
SQL
end
def down
execute(<<~SQL)
DROP VIEW postgres_index_bloat_estimates
SQL
end
end
ef378c2512a2c3940016bcc82bc8885762ed1a982e38cba1c452a9063e0717e2
\ No newline at end of file
...@@ -15033,6 +15033,118 @@ CREATE SEQUENCE pool_repositories_id_seq ...@@ -15033,6 +15033,118 @@ CREATE SEQUENCE pool_repositories_id_seq
ALTER SEQUENCE pool_repositories_id_seq OWNED BY pool_repositories.id; ALTER SEQUENCE pool_repositories_id_seq OWNED BY pool_repositories.id;
CREATE VIEW postgres_index_bloat_estimates AS
SELECT (((relation_stats.nspname)::text || '.'::text) || (relation_stats.idxname)::text) AS identifier,
(
CASE
WHEN ((relation_stats.relpages)::double precision > relation_stats.est_pages_ff) THEN ((relation_stats.bs)::double precision * ((relation_stats.relpages)::double precision - relation_stats.est_pages_ff))
ELSE (0)::double precision
END)::bigint AS bloat_size_bytes
FROM ( SELECT COALESCE(((1)::double precision + ceil((rows_hdr_pdg_stats.reltuples / floor((((((rows_hdr_pdg_stats.bs - (rows_hdr_pdg_stats.pageopqdata)::numeric) - (rows_hdr_pdg_stats.pagehdr)::numeric) * (rows_hdr_pdg_stats.fillfactor)::numeric))::double precision / ((100)::double precision * (((4)::numeric + rows_hdr_pdg_stats.nulldatahdrwidth))::double precision)))))), (0)::double precision) AS est_pages_ff,
rows_hdr_pdg_stats.bs,
rows_hdr_pdg_stats.nspname,
rows_hdr_pdg_stats.tblname,
rows_hdr_pdg_stats.idxname,
rows_hdr_pdg_stats.relpages,
rows_hdr_pdg_stats.is_na
FROM ( SELECT rows_data_stats.maxalign,
rows_data_stats.bs,
rows_data_stats.nspname,
rows_data_stats.tblname,
rows_data_stats.idxname,
rows_data_stats.reltuples,
rows_data_stats.relpages,
rows_data_stats.idxoid,
rows_data_stats.fillfactor,
(((((((rows_data_stats.index_tuple_hdr_bm + rows_data_stats.maxalign) -
CASE
WHEN ((rows_data_stats.index_tuple_hdr_bm % rows_data_stats.maxalign) = 0) THEN rows_data_stats.maxalign
ELSE (rows_data_stats.index_tuple_hdr_bm % rows_data_stats.maxalign)
END))::double precision + rows_data_stats.nulldatawidth) + (rows_data_stats.maxalign)::double precision) - (
CASE
WHEN (rows_data_stats.nulldatawidth = (0)::double precision) THEN 0
WHEN (((rows_data_stats.nulldatawidth)::integer % rows_data_stats.maxalign) = 0) THEN rows_data_stats.maxalign
ELSE ((rows_data_stats.nulldatawidth)::integer % rows_data_stats.maxalign)
END)::double precision))::numeric AS nulldatahdrwidth,
rows_data_stats.pagehdr,
rows_data_stats.pageopqdata,
rows_data_stats.is_na
FROM ( SELECT n.nspname,
i.tblname,
i.idxname,
i.reltuples,
i.relpages,
i.idxoid,
i.fillfactor,
(current_setting('block_size'::text))::numeric AS bs,
CASE
WHEN ((version() ~ 'mingw32'::text) OR (version() ~ '64-bit|x86_64|ppc64|ia64|amd64'::text)) THEN 8
ELSE 4
END AS maxalign,
24 AS pagehdr,
16 AS pageopqdata,
CASE
WHEN (max(COALESCE(s.null_frac, (0)::real)) = (0)::double precision) THEN 2
ELSE (2 + (((32 + 8) - 1) / 8))
END AS index_tuple_hdr_bm,
sum((((1)::double precision - COALESCE(s.null_frac, (0)::real)) * (COALESCE(s.avg_width, 1024))::double precision)) AS nulldatawidth,
(max(
CASE
WHEN (i.atttypid = ('name'::regtype)::oid) THEN 1
ELSE 0
END) > 0) AS is_na
FROM ((( SELECT ct.relname AS tblname,
ct.relnamespace,
ic.idxname,
ic.attpos,
ic.indkey,
ic.indkey[ic.attpos] AS indkey,
ic.reltuples,
ic.relpages,
ic.tbloid,
ic.idxoid,
ic.fillfactor,
COALESCE(a1.attnum, a2.attnum) AS attnum,
COALESCE(a1.attname, a2.attname) AS attname,
COALESCE(a1.atttypid, a2.atttypid) AS atttypid,
CASE
WHEN (a1.attnum IS NULL) THEN ic.idxname
ELSE ct.relname
END AS attrelname
FROM (((( SELECT idx_data.idxname,
idx_data.reltuples,
idx_data.relpages,
idx_data.tbloid,
idx_data.idxoid,
idx_data.fillfactor,
idx_data.indkey,
generate_series(1, (idx_data.indnatts)::integer) AS attpos
FROM ( SELECT ci.relname AS idxname,
ci.reltuples,
ci.relpages,
i_1.indrelid AS tbloid,
i_1.indexrelid AS idxoid,
COALESCE((("substring"(array_to_string(ci.reloptions, ' '::text), 'fillfactor=([0-9]+)'::text))::smallint)::integer, 90) AS fillfactor,
i_1.indnatts,
(string_to_array(textin(int2vectorout(i_1.indkey)), ' '::text))::integer[] AS indkey
FROM (pg_index i_1
JOIN pg_class ci ON ((ci.oid = i_1.indexrelid)))
WHERE ((ci.relam = ( SELECT pg_am.oid
FROM pg_am
WHERE (pg_am.amname = 'btree'::name))) AND (ci.relpages > 0))) idx_data) ic
JOIN pg_class ct ON ((ct.oid = ic.tbloid)))
LEFT JOIN pg_attribute a1 ON (((ic.indkey[ic.attpos] <> 0) AND (a1.attrelid = ic.tbloid) AND (a1.attnum = ic.indkey[ic.attpos]))))
LEFT JOIN pg_attribute a2 ON (((ic.indkey[ic.attpos] = 0) AND (a2.attrelid = ic.idxoid) AND (a2.attnum = ic.attpos))))) i(tblname, relnamespace, idxname, attpos, indkey, indkey_1, reltuples, relpages, tbloid, idxoid, fillfactor, attnum, attname, atttypid, attrelname)
JOIN pg_namespace n ON ((n.oid = i.relnamespace)))
JOIN pg_stats s ON (((s.schemaname = n.nspname) AND (s.tablename = i.attrelname) AND (s.attname = i.attname))))
GROUP BY n.nspname, i.tblname, i.idxname, i.reltuples, i.relpages, i.idxoid, i.fillfactor, (current_setting('block_size'::text))::numeric,
CASE
WHEN ((version() ~ 'mingw32'::text) OR (version() ~ '64-bit|x86_64|ppc64|ia64|amd64'::text)) THEN 8
ELSE 4
END, 24::integer, 16::integer) rows_data_stats) rows_hdr_pdg_stats) relation_stats
WHERE ((relation_stats.nspname = ANY (ARRAY["current_schema"(), 'gitlab_partitions_dynamic'::name, 'gitlab_partitions_static'::name])) AND (NOT relation_stats.is_na))
ORDER BY relation_stats.nspname, relation_stats.tblname, relation_stats.idxname;
CREATE VIEW postgres_indexes AS CREATE VIEW postgres_indexes AS
SELECT (((pg_namespace.nspname)::text || '.'::text) || (pg_class.relname)::text) AS identifier, SELECT (((pg_namespace.nspname)::text || '.'::text) || (pg_class.relname)::text) AS identifier,
pg_index.indexrelid, pg_index.indexrelid,
......
...@@ -3,9 +3,14 @@ ...@@ -3,9 +3,14 @@
module Gitlab module Gitlab
module Database module Database
class PostgresIndex < ActiveRecord::Base class PostgresIndex < ActiveRecord::Base
include Gitlab::Utils::StrongMemoize
self.table_name = 'postgres_indexes' self.table_name = 'postgres_indexes'
self.primary_key = 'identifier' self.primary_key = 'identifier'
has_one :bloat_estimate, class_name: 'Gitlab::Database::PostgresIndexBloatEstimate', foreign_key: :identifier
has_many :reindexing_actions, class_name: 'Gitlab::Database::Reindexing::ReindexAction', foreign_key: :index_identifier
scope :by_identifier, ->(identifier) do scope :by_identifier, ->(identifier) do
raise ArgumentError, "Index name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/ raise ArgumentError, "Index name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/
...@@ -17,11 +22,17 @@ module Gitlab ...@@ -17,11 +22,17 @@ module Gitlab
# is defined on a table that is not partitioned. # is defined on a table that is not partitioned.
scope :regular, -> { where(unique: false, partitioned: false, exclusion: false)} scope :regular, -> { where(unique: false, partitioned: false, exclusion: false)}
scope :random_few, ->(how_many) do scope :not_match, ->(regex) { where("name !~ ?", regex)}
limit(how_many).order(Arel.sql('RANDOM()'))
scope :not_recently_reindexed, -> do
recent_actions = Reindexing::ReindexAction.recent.where('index_identifier = identifier')
where('NOT EXISTS (?)', recent_actions)
end end
scope :not_match, ->(regex) { where("name !~ ?", regex)} def bloat_size
strong_memoize(:bloat_size) { bloat_estimate&.bloat_size || 0 }
end
def to_s def to_s
name name
......
# frozen_string_literal: true
module Gitlab
module Database
# Use this model with care: Retrieving bloat statistics
# for all indexes can be expensive in a large database.
#
# Best used on a per-index basis.
class PostgresIndexBloatEstimate < ActiveRecord::Base
self.table_name = 'postgres_index_bloat_estimates'
self.primary_key = 'identifier'
belongs_to :index, foreign_key: :identifier, class_name: 'Gitlab::Database::PostgresIndex'
alias_attribute :bloat_size, :bloat_size_bytes
end
end
end
...@@ -3,8 +3,14 @@ ...@@ -3,8 +3,14 @@
module Gitlab module Gitlab
module Database module Database
module Reindexing module Reindexing
def self.perform(index_selector) # Number of indexes to reindex per invocation
Coordinator.new(index_selector).perform DEFAULT_INDEXES_PER_INVOCATION = 2
# candidate_indexes: Array of Gitlab::Database::PostgresIndex
def self.perform(candidate_indexes, how_many: DEFAULT_INDEXES_PER_INVOCATION)
indexes = IndexSelection.new(candidate_indexes).take(how_many)
Coordinator.new(indexes).perform
end end
def self.candidate_indexes def self.candidate_indexes
......
# frozen_string_literal: true
module Gitlab
module Database
module Reindexing
class IndexSelection
include Enumerable
delegate :each, to: :indexes
def initialize(candidates)
@candidates = candidates
end
private
attr_reader :candidates
def indexes
# This is an explicit N+1 query:
# Bloat estimates are generally available through a view
# for all indexes. However, estimating bloat for all
# indexes at once is an expensive operation. Therefore,
# we force a N+1 pattern here and estimate bloat on a per-index
# basis.
@indexes ||= filter_candidates.sort_by(&:bloat_size).reverse
end
def filter_candidates
candidates.not_recently_reindexed
end
end
end
end
end
...@@ -6,8 +6,14 @@ module Gitlab ...@@ -6,8 +6,14 @@ module Gitlab
class ReindexAction < ActiveRecord::Base class ReindexAction < ActiveRecord::Base
self.table_name = 'postgres_reindex_actions' self.table_name = 'postgres_reindex_actions'
belongs_to :index, foreign_key: :index_identifier, class_name: 'Gitlab::Database::PostgresIndex'
enum state: { started: 0, finished: 1, failed: 2 } enum state: { started: 0, finished: 1, failed: 2 }
# Amount of time to consider a previous reindexing *recent*
RECENT_THRESHOLD = 7.days
scope :recent, -> { where(state: :finished).where('action_end > ?', Time.zone.now - RECENT_THRESHOLD) }
def self.keep_track_of(index, &block) def self.keep_track_of(index, &block)
action = create!( action = create!(
index_identifier: index.identifier, index_identifier: index.identifier,
......
...@@ -195,7 +195,7 @@ namespace :gitlab do ...@@ -195,7 +195,7 @@ namespace :gitlab do
indexes = if args[:index_name] indexes = if args[:index_name]
[Gitlab::Database::PostgresIndex.by_identifier(args[:index_name])] [Gitlab::Database::PostgresIndex.by_identifier(args[:index_name])]
else else
Gitlab::Database::Reindexing.candidate_indexes.random_few(2) Gitlab::Database::Reindexing.candidate_indexes
end end
Gitlab::Database::Reindexing.perform(indexes) Gitlab::Database::Reindexing.perform(indexes)
......
# frozen_string_literal: true
FactoryBot.define do
factory :postgres_index, class: 'Gitlab::Database::PostgresIndex' do
identifier { "public.some_index_#{indexrelid}" }
sequence(:indexrelid) { |n| n }
schema { 'public' }
name { "some_index_#{indexrelid}" }
tablename { 'foo' }
unique { false }
valid_index { true }
partitioned { false }
exclusion { false }
expression { false }
partial { false }
definition { "CREATE INDEX #{identifier} ON #{tablename} (bar)"}
ondisk_size_bytes { 100.megabytes }
end
end
# frozen_string_literal: true
FactoryBot.define do
factory :postgres_index_bloat_estimate, class: 'Gitlab::Database::PostgresIndexBloatEstimate' do
association :index, factory: :postgres_index
identifier { index.identifier }
bloat_size_bytes { 10.megabytes }
end
end
# frozen_string_literal: true
FactoryBot.define do
factory :reindex_action, class: 'Gitlab::Database::Reindexing::ReindexAction' do
association :index, factory: :postgres_index
action_start { Time.now - 10.minutes }
action_end { Time.now - 5.minutes }
ondisk_size_bytes_start { 2.megabytes }
ondisk_size_bytes_end { 1.megabytes }
state { Gitlab::Database::Reindexing::ReindexAction.states[:finished] }
index_identifier { index.identifier }
end
end
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe 'factories' do RSpec.describe 'factories' do
include DatabaseHelpers
shared_examples 'factory' do |factory| shared_examples 'factory' do |factory|
describe "#{factory.name} factory" do describe "#{factory.name} factory" do
it 'does not raise error when built' do it 'does not raise error when built' do
...@@ -32,6 +34,14 @@ RSpec.describe 'factories' do ...@@ -32,6 +34,14 @@ RSpec.describe 'factories' do
fork_network_member fork_network_member
].to_set.freeze ].to_set.freeze
# Some factories and their corresponding models are based on
# database views. In order to use those, we have to swap the
# view out with a table of the same structure.
factories_based_on_view = %i[
postgres_index
postgres_index_bloat_estimate
].to_set.freeze
without_fd, with_fd = FactoryBot.factories without_fd, with_fd = FactoryBot.factories
.partition { |factory| skip_factory_defaults.include?(factory.name) } .partition { |factory| skip_factory_defaults.include?(factory.name) }
...@@ -40,6 +50,13 @@ RSpec.describe 'factories' do ...@@ -40,6 +50,13 @@ RSpec.describe 'factories' do
let_it_be(:project) { create_default(:project, :repository) } let_it_be(:project) { create_default(:project, :repository) }
let_it_be(:user) { create_default(:user) } let_it_be(:user) { create_default(:user) }
before do
factories_based_on_view.each do |factory|
view = build(factory).class.table_name
swapout_view_for_table(view)
end
end
with_fd.each do |factory| with_fd.each do |factory|
it_behaves_like 'factory', factory it_behaves_like 'factory', factory
end end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::PostgresIndexBloatEstimate do
before do
ActiveRecord::Base.connection.execute(<<~SQL)
ANALYZE schema_migrations
SQL
end
subject { described_class.find(identifier) }
let(:identifier) { 'public.schema_migrations_pkey' }
describe '#bloat_size' do
it 'returns the bloat size in bytes' do
# We cannot reach much more about the bloat size estimate here
expect(subject.bloat_size).to be >= 0
end
end
describe '#bloat_size_bytes' do
it 'is an alias of #bloat_size' do
expect(subject.bloat_size_bytes).to eq(subject.bloat_size)
end
end
describe '#index' do
it 'belongs to a PostgresIndex' do
expect(subject.index.identifier).to eq(identifier)
end
end
end
...@@ -46,9 +46,24 @@ RSpec.describe Gitlab::Database::PostgresIndex do ...@@ -46,9 +46,24 @@ RSpec.describe Gitlab::Database::PostgresIndex do
end end
end end
describe '.random_few' do describe '#bloat_size' do
it 'limits to two records by default' do subject { build(:postgres_index, bloat_estimate: bloat_estimate) }
expect(described_class.random_few(2).size).to eq(2)
let(:bloat_estimate) { build(:postgres_index_bloat_estimate) }
let(:bloat_size) { double }
it 'returns the bloat size from the estimate' do
expect(bloat_estimate).to receive(:bloat_size).and_return(bloat_size)
expect(subject.bloat_size).to eq(bloat_size)
end
context 'without a bloat estimate available' do
let(:bloat_estimate) { nil }
it 'returns 0' do
expect(subject.bloat_size).to eq(0)
end
end end
end end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::Reindexing::IndexSelection do
include DatabaseHelpers
subject { described_class.new(Gitlab::Database::PostgresIndex.all).to_a }
before do
swapout_view_for_table(:postgres_index_bloat_estimates)
swapout_view_for_table(:postgres_indexes)
end
def execute(sql)
ActiveRecord::Base.connection.execute(sql)
end
it 'orders by highest bloat first' do
create_list(:postgres_index, 10).each_with_index do |index, i|
create(:postgres_index_bloat_estimate, index: index, bloat_size_bytes: 1.megabyte * i)
end
expected = Gitlab::Database::PostgresIndexBloatEstimate.order(bloat_size_bytes: :desc).map(&:index)
expect(subject).to eq(expected)
end
context 'with time frozen' do
around do |example|
freeze_time { example.run }
end
it 'does not return indexes with reindex action in the last 7 days' do
not_recently_reindexed = create_list(:postgres_index, 2).each_with_index do |index, i|
create(:postgres_index_bloat_estimate, index: index, bloat_size_bytes: 1.megabyte * i)
create(:reindex_action, index: index, action_end: Time.zone.now - 7.days - 1.minute)
end
create_list(:postgres_index, 2).each_with_index do |index, i|
create(:postgres_index_bloat_estimate, index: index, bloat_size_bytes: 1.megabyte * i)
create(:reindex_action, index: index, action_end: Time.zone.now)
end
expected = Gitlab::Database::PostgresIndexBloatEstimate.where(identifier: not_recently_reindexed.map(&:identifier)).map(&:index).map(&:identifier).sort
expect(subject.map(&:identifier).sort).to eq(expected)
end
end
end
...@@ -6,12 +6,16 @@ RSpec.describe Gitlab::Database::Reindexing do ...@@ -6,12 +6,16 @@ RSpec.describe Gitlab::Database::Reindexing do
include ExclusiveLeaseHelpers include ExclusiveLeaseHelpers
describe '.perform' do describe '.perform' do
subject { described_class.perform(indexes) } subject { described_class.perform(candidate_indexes) }
let(:coordinator) { instance_double(Gitlab::Database::Reindexing::Coordinator) } let(:coordinator) { instance_double(Gitlab::Database::Reindexing::Coordinator) }
let(:index_selection) { instance_double(Gitlab::Database::Reindexing::IndexSelection) }
let(:candidate_indexes) { double }
let(:indexes) { double } let(:indexes) { double }
it 'delegates to Coordinator' do it 'delegates to Coordinator' do
expect(Gitlab::Database::Reindexing::IndexSelection).to receive(:new).with(candidate_indexes).and_return(index_selection)
expect(index_selection).to receive(:take).with(2).and_return(indexes)
expect(Gitlab::Database::Reindexing::Coordinator).to receive(:new).with(indexes).and_return(coordinator) expect(Gitlab::Database::Reindexing::Coordinator).to receive(:new).with(indexes).and_return(coordinator)
expect(coordinator).to receive(:perform) expect(coordinator).to receive(:perform)
......
# frozen_string_literal: true
module DatabaseHelpers
# In order to directly work with views using factories,
# we can swapout the view for a table of identical structure.
def swapout_view_for_table(view)
ActiveRecord::Base.connection.execute(<<~SQL)
CREATE TABLE #{view}_copy (LIKE #{view});
DROP VIEW #{view};
ALTER TABLE #{view}_copy RENAME TO #{view};
SQL
end
end
...@@ -235,8 +235,8 @@ RSpec.describe 'gitlab:db namespace rake task' do ...@@ -235,8 +235,8 @@ RSpec.describe 'gitlab:db namespace rake task' do
let(:indexes) { double('indexes') } let(:indexes) { double('indexes') }
context 'when no index_name is given' do context 'when no index_name is given' do
it 'rebuilds a random number of large indexes' do it 'uses all candidate indexes' do
expect(Gitlab::Database::Reindexing).to receive_message_chain('candidate_indexes.random_few').and_return(indexes) expect(Gitlab::Database::Reindexing).to receive(:candidate_indexes).and_return(indexes)
expect(Gitlab::Database::Reindexing).to receive(:perform).with(indexes) expect(Gitlab::Database::Reindexing).to receive(:perform).with(indexes)
run_rake_task('gitlab:db:reindex') run_rake_task('gitlab:db:reindex')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment