Commit 3a44e222 authored by Simon Tomlinson's avatar Simon Tomlinson

Sliding list partitioning strategy

A sliding list partitioning strategy creates new list partitions and
redirects all inserts to those list partitions. As old partitions are
no longer needed, they are detached and dropped. This can dramatically
reduce vacuum costs for queue-like tables.
parent 6e8ae813
......@@ -7,7 +7,8 @@ module PartitionedTable
attr_reader :partitioning_strategy
PARTITIONING_STRATEGIES = {
monthly: Gitlab::Database::Partitioning::MonthlyStrategy
monthly: Gitlab::Database::Partitioning::MonthlyStrategy,
sliding_list: Gitlab::Database::Partitioning::SlidingListStrategy
}.freeze
def partitioned_by(partitioning_key, strategy:, **kwargs)
......
......@@ -36,6 +36,10 @@ module Gitlab
partitions
end
def after_adding_partitions
# No-op, required by the partition manager
end
private
def desired_partitions
......
......@@ -73,6 +73,7 @@ module Gitlab
partition_name: partition.partition_name,
table_name: partition.table)
end
model.partitioning_strategy.after_adding_partitions
end
end
end
......
# frozen_string_literal: true
module Gitlab
module Database
module Partitioning
class SingleNumericListPartition
include Comparable
def self.from_sql(table, partition_name, definition)
# A list partition can support multiple values, but we only support a single number
matches = definition.match(/FOR VALUES IN \('(?<value>\d+)'\)/)
raise ArgumentError, 'Unknown partition definition' unless matches
value = Integer(matches[:value])
new(table, value, partition_name: partition_name)
end
attr_reader :table, :value
def initialize(table, value, partition_name: nil )
@table = table
@value = value
@partition_name = partition_name
end
def partition_name
@partition_name || "#{table}_#{value}"
end
def to_sql
<<~SQL
CREATE TABLE IF NOT EXISTS #{fully_qualified_partition}
PARTITION OF #{conn.quote_table_name(table)}
FOR VALUES IN (#{conn.quote(value)})
SQL
end
def to_detach_sql
<<~SQL
ALTER TABLE #{conn.quote_table_name(table)}
DETACH PARTITION #{fully_qualified_partition}
SQL
end
def ==(other)
table == other.table &&
partition_name == other.partition_name &&
value == other.value
end
alias_method :eql?, :==
def hash
[table, partition_name, value].hash
end
def <=>(other)
return if table != other.table
value <=> other.value
end
private
def fully_qualified_partition
"%s.%s" % [conn.quote_table_name(Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA), conn.quote_table_name(partition_name)]
end
def conn
@conn ||= Gitlab::Database::SharedModel.connection
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Database
module Partitioning
class SlidingListStrategy
attr_reader :model, :partitioning_key, :next_partition_if, :detach_partition_if
delegate :table_name, to: :model
def initialize(model, partitioning_key, next_partition_if:, detach_partition_if:)
@model = model
@partitioning_key = partitioning_key
@next_partition_if = next_partition_if
@detach_partition_if = detach_partition_if
ensure_partitioning_column_ignored!
end
def current_partitions
Gitlab::Database::PostgresPartition.for_parent_table(table_name).map do |partition|
SingleNumericListPartition.from_sql(table_name, partition.name, partition.condition)
end.sort
end
def missing_partitions
if no_partitions_exist?
[initial_partition]
elsif next_partition_if.call(active_partition.value)
[next_partition]
else
[]
end
end
def initial_partition
SingleNumericListPartition.new(table_name, 1)
end
def next_partition
SingleNumericListPartition.new(table_name, active_partition.value + 1)
end
def extra_partitions
possibly_extra = current_partitions[0...-1] # Never consider the most recent partition
possibly_extra.take_while { |p| detach_partition_if.call(p.value) }
end
def after_adding_partitions
active_value = active_partition.value
model.connection.change_column_default(model.table_name, partitioning_key, active_value)
end
def active_partition
# The current partitions list is sorted, so the last partition has the highest value
# This is the only partition that receives inserts.
current_partitions.last
end
def no_partitions_exist?
current_partitions.empty?
end
private
def ensure_partitioning_column_ignored!
unless model.ignored_columns.include?(partitioning_key.to_s)
raise "Add #{partitioning_key} to #{model.name}.ignored_columns to use it with SlidingListStrategy"
end
end
end
end
end
end
......@@ -16,7 +16,7 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionManager do
subject(:sync_partitions) { described_class.new(model).sync_partitions }
let(:model) { double(partitioning_strategy: partitioning_strategy, table_name: table, connection: connection) }
let(:partitioning_strategy) { double(missing_partitions: partitions, extra_partitions: []) }
let(:partitioning_strategy) { double(missing_partitions: partitions, extra_partitions: [], after_adding_partitions: nil) }
let(:connection) { ActiveRecord::Base.connection }
let(:table) { "some_table" }
......@@ -83,7 +83,7 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionManager do
let(:manager) { described_class.new(model) }
let(:model) { double(partitioning_strategy: partitioning_strategy, table_name: table, connection: connection) }
let(:partitioning_strategy) { double(extra_partitions: extra_partitions, missing_partitions: []) }
let(:partitioning_strategy) { double(extra_partitions: extra_partitions, missing_partitions: [], after_adding_partitions: nil) }
let(:connection) { ActiveRecord::Base.connection }
let(:table) { "foo" }
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::Partitioning::SingleNumericListPartition do
describe '.from_sql' do
subject(:parsed_partition) { described_class.from_sql(table, partition_name, definition) }
let(:table) { 'partitioned_table' }
let(:partition_value) { 0 }
let(:partition_name) { "partitioned_table_#{partition_value}" }
let(:definition) { "FOR VALUES IN ('#{partition_value}')" }
it 'uses specified table name' do
expect(parsed_partition.table).to eq(table)
end
it 'uses specified partition name' do
expect(parsed_partition.partition_name).to eq(partition_name)
end
it 'parses the definition' do
expect(parsed_partition.value).to eq(partition_value)
end
end
describe '#partition_name' do
it 'is the explicit name if provided' do
expect(described_class.new('table', 1, partition_name: 'some_other_name').partition_name).to eq('some_other_name')
end
it 'defaults to the table name followed by the partition value' do
expect(described_class.new('table', 1).partition_name).to eq('table_1')
end
end
context 'sorting' do
it 'is incomparable if the tables do not match' do
expect(described_class.new('table1', 1) <=> described_class.new('table2', 2)).to be_nil
end
it 'sorts by the value when the tables match' do
expect(described_class.new('table1', 1) <=> described_class.new('table1', 2)).to eq(1 <=> 2)
end
it 'sorts by numeric value rather than text value' do
expect(described_class.new('table', 10)).to be > described_class.new('table', 9)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::Partitioning::SlidingListStrategy do
let(:connection) { ActiveRecord::Base.connection }
let(:table_name) { :_test_partitioned_test }
let(:model) { double('model', table_name: table_name, ignored_columns: %w[partition]) }
let(:next_partition_if) { double('next_partition_if') }
let(:detach_partition_if) { double('detach_partition_if') }
subject(:strategy) do
described_class.new(model, :partition,
next_partition_if: next_partition_if,
detach_partition_if: detach_partition_if)
end
before do
connection.execute(<<~SQL)
create table #{table_name}
(
id serial not null,
partition bigint not null default 2,
created_at timestamptz not null,
primary key (id, partition)
)
partition by list(partition);
create table #{table_name}_1
partition of #{table_name} for values in (1);
create table #{table_name}_2
partition of #{table_name} for values in (2);
SQL
end
describe '#current_partitions' do
it 'detects both partitions' do
expect(strategy.current_partitions).to eq([
Gitlab::Database::Partitioning::SingleNumericListPartition.new(table_name, 1, partition_name: '_test_partitioned_test_1'),
Gitlab::Database::Partitioning::SingleNumericListPartition.new(table_name, 2, partition_name: '_test_partitioned_test_2')
])
end
end
describe '#active_partition' do
it 'is the partition with the largest value' do
expect(strategy.active_partition.value).to eq(2)
end
end
describe '#missing_partitions' do
context 'when next_partition_if returns true' do
let(:next_partition_if) { proc { true } }
it 'is a partition definition for the next partition in the series' do
extra = strategy.missing_partitions
expect(extra.length).to eq(1)
expect(extra.first.value).to eq(3)
end
end
context 'when next_partition_if returns false' do
let(:next_partition_if) { proc { false } }
it 'is empty' do
expect(strategy.missing_partitions).to be_empty
end
end
context 'when there are no partitions for the table' do
it 'returns a partition for value 1' do
connection.execute("drop table #{table_name}_1; drop table #{table_name}_2;")
missing_partitions = strategy.missing_partitions
expect(missing_partitions.size).to eq(1)
missing_partition = missing_partitions.first
expect(missing_partition.value).to eq(1)
end
end
end
describe '#extra_partitions' do
before do
(3..10).each do |i|
connection.execute("CREATE TABLE #{table_name}_#{i} PARTITION OF #{table_name} FOR VALUES IN (#{i})")
end
end
context 'when some partitions are true for detach_partition_if' do
let(:detach_partition_if) { ->(p) { p != 5 } }
it 'is the leading set of partitions before that value' do
expect(strategy.extra_partitions.map(&:value)).to contain_exactly(1, 2, 3, 4)
end
end
context 'when all partitions are true for detach_partition_if' do
let(:detach_partition_if) { proc { true } }
it 'is all but the most recent partition', :aggregate_failures do
expect(strategy.extra_partitions.map(&:value)).to contain_exactly(1, 2, 3, 4, 5, 6, 7, 8, 9)
expect(strategy.current_partitions.map(&:value).max).to eq(10)
end
end
end
describe '#initial_partition' do
it 'starts with the value 1', :aggregate_failures do
initial_partition = strategy.initial_partition
expect(initial_partition.value).to eq(1)
expect(initial_partition.table).to eq(strategy.table_name)
expect(initial_partition.partition_name).to eq("#{strategy.table_name}_1")
end
end
describe '#next_partition' do
it 'is one after the active partition', :aggregate_failures do
expect(strategy).to receive(:active_partition).and_return(double(value: 5))
next_partition = strategy.next_partition
expect(next_partition.value).to eq(6)
expect(next_partition.table).to eq(strategy.table_name)
expect(next_partition.partition_name).to eq("#{strategy.table_name}_6")
end
end
describe '#ensure_partitioning_column_ignored!' do
it 'raises when the column is not ignored' do
expect do
Class.new(ApplicationRecord) do
include PartitionedTable
partitioned_by :partition, strategy: :sliding_list,
next_partition_if: proc { false },
detach_partition_if: proc { false }
end
end.to raise_error(/ignored_columns/)
end
it 'does not raise when the column is ignored' do
expect do
Class.new(ApplicationRecord) do
include PartitionedTable
self.ignored_columns = [:partition]
partitioned_by :partition, strategy: :sliding_list,
next_partition_if: proc { false },
detach_partition_if: proc { false }
end
end.not_to raise_error
end
end
context 'redirecting inserts as the active partition changes' do
let(:model) do
Class.new(ApplicationRecord) do
include PartitionedTable
self.table_name = '_test_partitioned_test'
self.primary_key = :id
self.ignored_columns = %w[partition]
# method().call cannot be detected by rspec, so we add a layer of indirection here
def self.next_partition_if_wrapper(...)
next_partition?(...)
end
def self.detach_partition_if_wrapper(...)
detach_partition?(...)
end
partitioned_by :partition, strategy: :sliding_list,
next_partition_if: method(:next_partition_if_wrapper),
detach_partition_if: method(:detach_partition_if_wrapper)
def self.next_partition?(current_partition)
end
def self.detach_partition?(partition)
end
end
end
it 'redirects to the new partition', :aggregate_failures do
partition_2_model = model.create! # Goes in partition 2
allow(model).to receive(:next_partition?) do
model.partitioning_strategy.active_partition.value < 3
end
allow(model).to receive(:detach_partition?).and_return(false)
Gitlab::Database::Partitioning::PartitionManager.new(model).sync_partitions
partition_3_model = model.create!
# Rails doesn't pick up on database default changes, so we need to reload
# We also want to grab the partition column to verify what it was set to.
# In normal operation we make rails ignore it so that we can use a changing default
# So we force select * to load it
all_columns = model.select(model.arel_table[Arel.star])
partition_2_model = all_columns.find(partition_2_model.id)
partition_3_model = all_columns.find(partition_3_model.id)
expect(partition_2_model.partition).to eq(2)
expect(partition_3_model.partition).to eq(3)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment