Commit a9684310 authored by Alex Kalderimis's avatar Alex Kalderimis

Merge branch '335388-keyset-pagination-support-for-sqlliteral-cursors' into 'master'

Support SQL literal keyset values

See merge request gitlab-org/gitlab!66055
parents 5247bd75 2a29b101
# frozen_string_literal: true
module Gitlab
module Pagination
module Keyset
class ColumnConditionBuilder
# This class builds the WHERE conditions for the keyset pagination library.
# It produces WHERE conditions for one column at a time.
#
# Requisite 1: Only the last column (columns.last) is non-nullable and distinct.
# Requisite 2: Only one column is distinct and non-nullable.
#
# Scenario: We want to order by columns named X, Y and Z and build the conditions
# used in the WHERE clause of a pagination query using a set of cursor values.
# X is the column definition for a nullable column
# Y is the column definition for a non-nullable but not distinct column
# Z is the column definition for a distinct, non-nullable column used as a tie breaker.
#
# Then the method is initially invoked with these arguments:
# columns = [ColumnDefinition for X, ColumnDefinition for Y, ColumnDefinition for Z]
# values = { X: x, Y: y, Z: z } => these represent cursor values for pagination
# (x could be nil since X is nullable)
# current_conditions is initialized to [] to store the result during the iteration calls
# invoked within the Order#build_where_values method.
#
# The elements of current_conditions are instances of Arel::Nodes and -
# will be concatenated using OR or UNION to be used in the WHERE clause.
#
# Example: Let's say we want to build WHERE clause conditions for
# ORDER BY X DESC NULLS LAST, Y ASC, Z DESC
#
# Iteration 1:
# columns = [X, Y, Z]
# At the end, current_conditions should be:
# [(Z < z)]
#
# Iteration 2:
# columns = [X, Y]
# At the end, current_conditions should be:
# [(Y > y) OR (Y = y AND Z < z)]
#
# Iteration 3:
# columns = [X]
# At the end, current_conditions should be:
# [((X IS NOT NULL AND Y > y) OR (X IS NOT NULL AND Y = y AND Z < z))
# OR
# ((x IS NULL) OR (X IS NULL))]
#
# Parameters:
#
# - columns: instance of ColumnOrderDefinition
# - value: cursor value for the column
def initialize(column, value)
@column = column
@value = value
end
def where_conditions(current_conditions)
return not_nullable_conditions(current_conditions) if column.not_nullable?
return nulls_first_conditions(current_conditions) if column.nulls_first?
# Here we are dealing with the case of column_definition.nulls_last?
# Suppose ORDER BY X DESC NULLS FIRST, Y ASC, Z DESC is the ordering clause
# and we already have built the conditions for columns Y and Z.
#
# We first need a set of conditions to use when x (the value for X) is NULL:
# null_conds = [
# (x IS NULL AND X IS NULL AND Y<y),
# (x IS NULL AND X IS NULL AND Y=y AND Z<z),
null_conds = current_conditions.map do |conditional|
Arel::Nodes::And.new([value_is_null, column_is_null, conditional])
end
# We then need a set of conditions to use when m has an actual value:
# non_null_conds = [
# (x IS NOT NULL AND X IS NULL),
# (x IS NOT NULL AND X < x)
# (x IS NOT NULL AND X = x AND Y > y),
# (x IS NOT NULL AND X = x AND Y = y AND Z < z),
tie_breaking_conds = current_conditions.map do |conditional|
Arel::Nodes::And.new([column_equals_to_value, conditional])
end
non_null_conds = [column_is_null, compare_column_with_value, *tie_breaking_conds].map do |conditional|
Arel::Nodes::And.new([value_is_not_null, conditional])
end
[*null_conds, *non_null_conds]
end
private
# WHEN THE COLUMN IS NON-NULLABLE AND DISTINCT
# Per Assumption 1, only the last column can be non-nullable and distinct
# (column Z is non-nullable/distinct and comes last in the example).
# So the Order#build_where_conditions is being called for the first time with current_conditions = [].
#
# At the end of the call, we should expect:
# current_conditions should be [(Z < z)]
#
# WHEN THE COLUMN IS NON-NULLABLE BUT NOT DISTINCT
# Let's say Z has been processed and we are about to process the column Y next.
# (per requisite 1, if a non-nullable but not distinct column is being processed,
# at the least, the conditional for the non-nullable/distinct column exists)
#
# At the start of the method call:
# current_conditions = [(Z < z)]
# comparison_node = (Y < y)
# eqaulity_node = (Y = y)
#
# We should add a comparison node for the next column Y, (Y < y)
# then break a tie using the previous conditionals, (Y = y AND Z < z)
#
# At the end of the call, we should expect:
# current_conditions = [(Y < y), (Y = y AND Z < z)]
def not_nullable_conditions(current_conditions)
tie_break_conds = current_conditions.map do |conditional|
Arel::Nodes::And.new([column_equals_to_value, conditional])
end
[compare_column_with_value, *tie_break_conds]
end
def nulls_first_conditions(current_conditions)
# Using the same scenario described earlier,
# suppose the ordering clause is ORDER BY X DESC NULLS FIRST, Y ASC, Z DESC
# and we have built the conditions for columns Y and Z in previous iterations:
#
# current_conditions = [(Y > y), (Y = y AND Z < z)]
#
# In this branch of the iteration,
# we first need a set of conditions to use when m (the value for M) is NULL:
# null_conds = [
# (x IS NULL AND X IS NULL AND Y > y),
# (x IS NULL AND X IS NULL AND Y = y AND Z < z),
# (x IS NULL AND X IS NOT NULL)]
#
# Note that when x has an actual value, say x = 3, null_conds evalutes to FALSE.
tie_breaking_conds = current_conditions.map do |conditional|
Arel::Nodes::And.new([column_is_null, conditional])
end
null_conds = [*tie_breaking_conds, column_is_not_null].map do |conditional|
Arel::Nodes::And.new([value_is_null, conditional])
end
# We then need a set of conditions to use when m has an actual value:
# non_null_conds = [
# (x IS NOT NULL AND X < x),
# (x IS NOT NULL AND X = x AND Y > y),
# (x IS NOT NULL AND X = x AND Y = y AND Z < z)]
#
# Note again that when x IS NULL, non_null_conds evaluates to FALSE.
tie_breaking_conds = current_conditions.map do |conditional|
Arel::Nodes::And.new([column_equals_to_value, conditional])
end
# The combined OR condition (null_where_cond OR non_null_where_cond) will return a correct result -
# without having to account for whether x is nil or an actual value at the application level.
non_null_conds = [compare_column_with_value, *tie_breaking_conds].map do |conditional|
Arel::Nodes::And.new([value_is_not_null, conditional])
end
[*null_conds, *non_null_conds]
end
def column_equals_to_value
@equality_node ||= column.column_expression.eq(value)
end
def column_is_null
@column_is_null ||= column.column_expression.eq(nil)
end
def column_is_not_null
@column_is_not_null ||= column.column_expression.not_eq(nil)
end
def value_is_null
@value_is_null ||= build_quoted_value.eq(nil)
end
def value_is_not_null
@value_is_not_null ||= build_quoted_value.not_eq(nil)
end
def compare_column_with_value
if column.descending_order?
column.column_expression.lt(value)
else
column.column_expression.gt(value)
end
end
# Turns the given value to an SQL literal by casting it to the proper format.
def build_quoted_value
return value if value.instance_of?(Arel::Nodes::SqlLiteral)
Arel::Nodes.build_quoted(value, column.column_expression)
end
attr_reader :column, :value
end
end
end
end
...@@ -141,24 +141,10 @@ module Gitlab ...@@ -141,24 +141,10 @@ module Gitlab
return use_composite_row_comparison(values) if composite_row_comparison_possible? return use_composite_row_comparison(values) if composite_row_comparison_possible?
where_values = [] column_definitions
.map { ColumnConditionBuilder.new(_1, values[_1.attribute_name]) }
reversed_column_definitions = column_definitions.reverse .reverse
reversed_column_definitions.each_with_index do |column_definition, i| .reduce([]) { |where_conditions, column| column.where_conditions(where_conditions) }
value = values[column_definition.attribute_name]
conditions_for_column(column_definition, value).each do |condition|
column_definitions_after_index = reversed_column_definitions.last(column_definitions.reverse.size - i - 1)
equal_conditon_for_rest = column_definitions_after_index.map do |definition|
definition.column_expression.eq(values[definition.attribute_name])
end
where_values << Arel::Nodes::Grouping.new(Arel::Nodes::And.new([condition, *equal_conditon_for_rest].compact))
end
end
where_values
end end
def where_values_with_or_query(values) def where_values_with_or_query(values)
...@@ -222,32 +208,6 @@ module Gitlab ...@@ -222,32 +208,6 @@ module Gitlab
scope scope
end end
def conditions_for_column(column_definition, value)
conditions = []
# Depending on the order, build a query condition fragment for taking the next rows
if column_definition.distinct? || (!column_definition.distinct? && value.present?)
conditions << compare_column_with_value(column_definition, value)
end
# When the column is nullable, additional conditions for NULL a NOT NULL values are necessary.
# This depends on the position of the nulls (top or bottom of the resultset).
if column_definition.nulls_first? && value.blank?
conditions << column_definition.column_expression.not_eq(nil)
elsif column_definition.nulls_last? && value.present?
conditions << column_definition.column_expression.eq(nil)
end
conditions
end
def compare_column_with_value(column_definition, value)
if column_definition.descending_order?
column_definition.column_expression.lt(value)
else
column_definition.column_expression.gt(value)
end
end
def build_or_query(expressions) def build_or_query(expressions)
return [] if expressions.blank? return [] if expressions.blank?
......
...@@ -6,32 +6,67 @@ RSpec.describe Gitlab::Pagination::Keyset::Order do ...@@ -6,32 +6,67 @@ RSpec.describe Gitlab::Pagination::Keyset::Order do
describe 'paginate over items correctly' do describe 'paginate over items correctly' do
let(:table) { Arel::Table.new(:my_table) } let(:table) { Arel::Table.new(:my_table) }
let(:order) { nil } let(:order) { nil }
let(:default_limit) { 999 }
let(:query_building_method) { :build_query }
def run_query(query) def run_query(query)
ApplicationRecord.connection.execute(query).to_a ApplicationRecord.connection.execute(query).to_a
end end
def build_query(order:, where_conditions: nil, limit: nil) def where_conditions_as_sql(where_conditions)
"WHERE #{Array(where_conditions).map(&:to_sql).join(' OR ')}"
end
def build_query(order:, where_conditions: [], limit: nil)
where_string = where_conditions_as_sql(where_conditions)
<<-SQL
SELECT id, year, month
FROM (#{table_data}) my_table (id, year, month)
#{where_string if where_conditions.present?}
ORDER BY #{order}
LIMIT #{limit || default_limit};
SQL
end
def build_union_query(order:, where_conditions: [], limit: nil)
return build_query(order: order, where_conditions: where_conditions, limit: limit) if where_conditions.blank?
union_queries = Array(where_conditions).map do |where_condition|
<<-SQL
(SELECT id, year, month
FROM (#{table_data}) my_table (id, year, month)
WHERE #{where_condition.to_sql}
ORDER BY #{order}
LIMIT #{limit || default_limit})
SQL
end
union_query = union_queries.join(" UNION ALL ")
<<-SQL <<-SQL
SELECT id, year, month SELECT id, year, month
FROM (#{table_data}) my_table (id, year, month) FROM (#{union_query}) as my_table
WHERE #{where_conditions || '1=1'} ORDER BY #{order}
ORDER BY #{order} LIMIT #{limit || default_limit};
LIMIT #{limit || 999};
SQL SQL
end end
def cursor_attributes_for_node(node)
order.cursor_attributes_for_node(node)
end
def iterate_and_collect(order:, page_size:, where_conditions: nil) def iterate_and_collect(order:, page_size:, where_conditions: nil)
all_items = [] all_items = []
loop do loop do
paginated_items = run_query(build_query(order: order, where_conditions: where_conditions, limit: page_size)) paginated_items = run_query(send(query_building_method, order: order, where_conditions: where_conditions, limit: page_size))
break if paginated_items.empty? break if paginated_items.empty?
all_items.concat(paginated_items) all_items.concat(paginated_items)
last_item = paginated_items.last last_item = paginated_items.last
cursor_attributes = order.cursor_attributes_for_node(last_item) cursor_attributes = cursor_attributes_for_node(last_item)
where_conditions = order.where_values_with_or_query(cursor_attributes).to_sql where_conditions = order.build_where_values(cursor_attributes)
end end
all_items all_items
...@@ -54,15 +89,41 @@ RSpec.describe Gitlab::Pagination::Keyset::Order do ...@@ -54,15 +89,41 @@ RSpec.describe Gitlab::Pagination::Keyset::Order do
it { expect(subject).to eq(expected) } it { expect(subject).to eq(expected) }
end end
context 'when using the conditions in an UNION query' do
let(:query_building_method) { :build_union_query }
it { expect(subject).to eq(expected) }
end
context 'when the cursor attributes are SQL literals' do
def cursor_attributes_for_node(node)
# Simulate the scenario where the cursor attributes are SQL literals
order.cursor_attributes_for_node(node).transform_values.each_with_index do |value, i|
index = i + 1
value_sql = value.nil? ? 'NULL::integer' : value
values = [value_sql] * index
Arel.sql("(ARRAY[#{values.join(',')}])[#{index}]") # example: ARRAY[cursor_value][1] will return cursor_value
end
end
it { expect(subject).to eq(expected) }
context 'when using the conditions in an UNION query' do
let(:query_building_method) { :build_union_query }
it { expect(subject).to eq(expected) }
end
end
end end
context 'when paginating backwards' do context 'when paginating backwards' do
subject do subject do
last_item = expected.last last_item = expected.last
cursor_attributes = order.cursor_attributes_for_node(last_item) cursor_attributes = order.cursor_attributes_for_node(last_item)
where_conditions = order.reversed_order.where_values_with_or_query(cursor_attributes) where_conditions = order.reversed_order.build_where_values(cursor_attributes)
iterate_and_collect(order: order.reversed_order, page_size: 2, where_conditions: where_conditions.to_sql) iterate_and_collect(order: order.reversed_order, page_size: 2, where_conditions: where_conditions)
end end
it do it do
...@@ -371,7 +432,7 @@ RSpec.describe Gitlab::Pagination::Keyset::Order do ...@@ -371,7 +432,7 @@ RSpec.describe Gitlab::Pagination::Keyset::Order do
reversed = order.reversed_order reversed = order.reversed_order
before_conditions = reversed.where_values_with_or_query(before_cursor) before_conditions = reversed.where_values_with_or_query(before_cursor)
query = build_query(order: order, where_conditions: "(#{after_conditions.to_sql}) AND (#{before_conditions.to_sql})", limit: 100) query = build_query(order: order, where_conditions: [Arel::Nodes::And.new([after_conditions, before_conditions])], limit: 100)
expect(run_query(query)).to eq([ expect(run_query(query)).to eq([
{ "id" => 2, "year" => 2011, "month" => 0 }, { "id" => 2, "year" => 2011, "month" => 0 },
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment