Commit 207b78e1 authored by Adam Hegyi's avatar Adam Hegyi

Merge branch 'in-operator-keyset-order-strategy' into 'master'

Load `ORDER BY` rows only by default in in optimized IN operator queries

See merge request gitlab-org/gitlab!70623
parents e9735782 7bbc4025
......@@ -226,7 +226,12 @@ Gitlab::Pagination::Keyset::InOperatorOptimization::QueryBuilder.new(
- `finder_query` loads the actual record row from the database. It must also be a lambda, where
the order by column expressions is available for locating the record. In this example, the
yielded values are `created_at` and `id` SQL expressions. Finding a record is very fast via the
primary key, so we don't use the `created_at` value.
primary key, so we don't use the `created_at` value. Providing the `finder_query` lambda is optional.
If it's not given, the IN operator optimization will only make the ORDER BY columns available to
the end-user and not the full database row.
If it's not given, the IN operator optimization will only make the ORDER BY columns available to
the end-user and not the full database row.
The following database index on the `issues` table must be present
to make the query execute efficiently:
......@@ -611,6 +616,32 @@ Gitlab::Pagination::Keyset::Iterator.new(scope: scope, **opts).each_batch(of: 10
end
```
NOTE:
The query loads complete database rows from the disk. This may cause increased I/O and slower
database queries. Depending on the use case, the primary key is often only
needed for the batch query to invoke additional statements. For example, `UPDATE` or `DELETE`. The
`id` column is included in the `ORDER BY` columns (`created_at` and `id`) and is already
loaded. In this case, you can omit the `finder_query` parameter.
Example for loading the `ORDER BY` columns only:
```ruby
scope = Issue.order(:created_at, :id)
array_scope = Group.find(9970).all_projects.select(:id)
array_mapping_scope = -> (id_expression) { Issue.where(Issue.arel_table[:project_id].eq(id_expression)) }
opts = {
in_operator_optimization_options: {
array_scope: array_scope,
array_mapping_scope: array_mapping_scope
}
}
Gitlab::Pagination::Keyset::Iterator.new(scope: scope, **opts).each_batch(of: 100) do |records|
puts records.select(:id).map { |r| [r.id] } # only id and created_at are available
end
```
#### Keyset pagination
The optimization works out of the box with GraphQL and the `keyset_paginate` helper method.
......
......@@ -9,7 +9,6 @@ module Gitlab
UnsupportedScopeOrder = Class.new(StandardError)
RECURSIVE_CTE_NAME = 'recursive_keyset_cte'
RECORDS_COLUMN = 'records'
# This class optimizes slow database queries (PostgreSQL specific) where the
# IN SQL operator is used with sorting.
......@@ -42,7 +41,7 @@ module Gitlab
# > array_mapping_scope: array_mapping_scope,
# > finder_query: finder_query
# > ).execute.limit(20)
def initialize(scope:, array_scope:, array_mapping_scope:, finder_query:, values: {})
def initialize(scope:, array_scope:, array_mapping_scope:, finder_query: nil, values: {})
@scope, success = Gitlab::Pagination::Keyset::SimpleOrderBuilder.build(scope)
unless success
......@@ -57,11 +56,11 @@ module Gitlab
@order = Gitlab::Pagination::Keyset::Order.extract_keyset_order_object(scope)
@array_scope = array_scope
@array_mapping_scope = array_mapping_scope
@finder_query = finder_query
@values = values
@model = @scope.model
@table_name = @model.table_name
@arel_table = @model.arel_table
@finder_strategy = finder_query.present? ? Strategies::RecordLoaderStrategy.new(finder_query, model, order_by_columns) : Strategies::OrderValuesLoaderStrategy.new(model, order_by_columns)
end
def execute
......@@ -74,7 +73,7 @@ module Gitlab
q = cte
.apply_to(model.where({})
.with(selector_cte.to_arel))
.select(result_collector_final_projections)
.select(finder_strategy.final_projections)
.where("count <> 0") # filter out the initializer row
model.from(q.arel.as(table_name))
......@@ -82,13 +81,13 @@ module Gitlab
private
attr_reader :array_scope, :scope, :order, :array_mapping_scope, :finder_query, :values, :model, :table_name, :arel_table
attr_reader :array_scope, :scope, :order, :array_mapping_scope, :finder_strategy, :values, :model, :table_name, :arel_table
def initializer_query
array_column_names = array_scope_columns.array_aggregated_column_names + order_by_columns.array_aggregated_column_names
projections = [
*result_collector_initializer_columns,
*finder_strategy.initializer_columns,
*array_column_names,
'0::bigint AS count'
]
......@@ -156,7 +155,7 @@ module Gitlab
order_column_value_arrays = order_by_columns.replace_value_in_array_by_position_expressions
select = [
*result_collector_columns,
*finder_strategy.columns,
*array_column_list,
*order_column_value_arrays,
"#{RECURSIVE_CTE_NAME}.count + 1"
......@@ -254,23 +253,6 @@ module Gitlab
end.join(", ")
end
def result_collector_initializer_columns
["NULL::#{table_name} AS #{RECORDS_COLUMN}"]
end
def result_collector_columns
query = finder_query
.call(*order_by_columns.array_lookup_expressions_by_position(RECURSIVE_CTE_NAME))
.select("#{table_name}")
.limit(1)
["(#{query.to_sql})"]
end
def result_collector_final_projections
["(#{RECORDS_COLUMN}).*"]
end
def array_scope_columns
@array_scope_columns ||= ArrayScopeColumns.new(array_scope.select_values)
end
......
# frozen_string_literal: true
module Gitlab
module Pagination
module Keyset
module InOperatorOptimization
module Strategies
class OrderValuesLoaderStrategy
def initialize(model, order_by_columns)
@model = model
@order_by_columns = order_by_columns
end
def initializer_columns
order_by_columns.map do |column|
column_name = column.original_column_name.to_s
type = model.columns_hash[column_name].sql_type
"NULL::#{type} AS #{column_name}"
end
end
def columns
order_by_columns.array_lookup_expressions_by_position(QueryBuilder::RECURSIVE_CTE_NAME)
end
def final_projections
order_by_columns.map(&:original_column_name)
end
private
attr_reader :model, :order_by_columns
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Pagination
module Keyset
module InOperatorOptimization
module Strategies
class RecordLoaderStrategy
RECORDS_COLUMN = 'records'
def initialize(finder_query, model, order_by_columns)
@finder_query = finder_query
@order_by_columns = order_by_columns
@table_name = model.table_name
end
def initializer_columns
["NULL::#{table_name} AS #{RECORDS_COLUMN}"]
end
def columns
query = finder_query
.call(*order_by_columns.array_lookup_expressions_by_position(QueryBuilder::RECURSIVE_CTE_NAME))
.select("#{table_name}")
.limit(1)
["(#{query.to_sql})"]
end
def final_projections
["(#{RECORDS_COLUMN}).*"]
end
private
attr_reader :finder_query, :order_by_columns, :table_name
end
end
end
end
end
end
......@@ -41,14 +41,40 @@ RSpec.describe Gitlab::Pagination::Keyset::InOperatorOptimization::QueryBuilder
)
end
it 'returns records in correct order' do
let(:all_records) do
all_records = []
iterator.each_batch(of: batch_size) do |records|
all_records.concat(records)
end
all_records
end
it 'returns records in correct order' do
expect(all_records).to eq(expected_order)
end
context 'when not passing the finder query' do
before do
in_operator_optimization_options.delete(:finder_query)
end
it 'returns records in correct order' do
expect(all_records).to eq(expected_order)
end
it 'loads only the order by column' do
order_by_attribute_names = iterator
.send(:order)
.column_definitions
.map(&:attribute_name)
.map(&:to_s)
record = all_records.first
loaded_attributes = record.attributes.keys - ['time_estimate'] # time_estimate is always present (has default value)
expect(loaded_attributes).to eq(order_by_attribute_names)
end
end
end
context 'when ordering by issues.id DESC' do
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Pagination::Keyset::InOperatorOptimization::Strategies::OrderValuesLoaderStrategy do
let(:model) { Project }
let(:keyset_scope) do
scope, _ = Gitlab::Pagination::Keyset::SimpleOrderBuilder.build(
Project.order(:created_at, :id)
)
scope
end
let(:keyset_order) do
Gitlab::Pagination::Keyset::Order.extract_keyset_order_object(keyset_scope)
end
let(:order_by_columns) do
Gitlab::Pagination::Keyset::InOperatorOptimization::OrderByColumns.new(keyset_order.column_definitions, model.arel_table)
end
subject(:strategy) { described_class.new(model, order_by_columns) }
describe '#initializer_columns' do
it 'returns NULLs for each ORDER BY columns' do
expect(strategy.initializer_columns).to eq([
'NULL::timestamp without time zone AS created_at',
'NULL::integer AS id'
])
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Pagination::Keyset::InOperatorOptimization::Strategies::RecordLoaderStrategy do
let(:finder_query) { -> (created_at_value, id_value) { Project.where(Project.arel_table[:id].eq(id_value)) } }
let(:model) { Project }
let(:keyset_scope) do
scope, _ = Gitlab::Pagination::Keyset::SimpleOrderBuilder.build(
Project.order(:created_at, :id)
)
scope
end
let(:keyset_order) do
Gitlab::Pagination::Keyset::Order.extract_keyset_order_object(keyset_scope)
end
let(:order_by_columns) do
Gitlab::Pagination::Keyset::InOperatorOptimization::OrderByColumns.new(keyset_order.column_definitions, model.arel_table)
end
subject(:strategy) { described_class.new(finder_query, model, order_by_columns) }
describe '#initializer_columns' do
# Explanation:
# > SELECT NULL::projects AS records
#
# The query returns one row and one column. The column may contain a full project row.
# In this particular case the row is NULL.
it 'returns a NULL table row as the result column' do
expect(strategy.initializer_columns).to eq(["NULL::projects AS records"])
end
end
describe '#columns' do
# Explanation:
# > SELECT (SELECT projects FROM projects limit 1)
#
# Selects one row from the database and collapses it into one column.
#
# Side note: Due to the type casts, columns and initializer_columns can be also UNION-ed:
# SELECT * FROM (
# (
# SELECT NULL::projects AS records
# UNION
# SELECT (SELECT projects FROM projects limit 1)
# )
# ) as records
it 'uses the finder query to load the row in the result column' do
expected_loader_query = <<~SQL
(SELECT projects FROM "projects" WHERE "projects"."id" = recursive_keyset_cte.projects_id_array[position] LIMIT 1)
SQL
expect(strategy.columns).to eq([expected_loader_query.chomp])
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment