Commit eb568312 authored by Alex Kalderimis's avatar Alex Kalderimis

Add batching array resolver

This adds a new kind of resolver that is able to batch requests for
collections of items, resolving to an array of those results.

This includes changes to the presenter instrumentation to accept only
known values from the context, and some minor changes to the
graphql_helpers to help constructing resolvers.
parent 7e423209
# frozen_string_literal: true
module Resolvers
# Abstract class that will eliminate N+1 queries for size-constrained
# collections of items.
#
# **note**: The resolver will never load more items than
# `@field.max_page_size` if defined, falling back to
# `context.schema.default_max_page_size`.
#
# provided that:
#
# - the query can be uniquely determined by the object and the arguments
# - the model class includes FromUnion
# - the model class defines a scalar primary key
#
# This comes at the cost of returning arrays, not relations, so we don't get
# any keyset pagination goodness. Consequently, this is only suitable for small-ish
# result sets, as the full result set will be loaded into memory.
#
# To enforce this, the resolver limits the size of result sets to
# `@field.max_page_size || context.schema.default_max_page_size`.
#
# **important**: If the cardinality of your collection is likely to be greater than 100,
# then you will want to pass `max_page_size:` as part of the field definition
# or (ideally) as part of the resolver `field_options`.
#
# How to implement:
# --------------------
#
# Each subclass operates on two generic parameters, A and R:
# - A is any Object that can be used as a Hash key. Instances of A
# are returned by `query_input` and then passed to `query_for`.
# - R is any subclass of ApplicationRecord that includes FromUnion.
# R must have a single scalar primary_key
#
# Subclasses must implement:
# - #model_class -> Class[R]. (Must respond to :primary_key, and :from_union)
# - #query_input(**kwargs) -> A (Must be hashable)
# - #query_for(A) -> ActiveRecord::Relation[R]
#
# Note the relationship between query_input and query_for, one of which
# consumes the input of the other
# (i.e. `resolve(**args).sync == query_for(query_input(**args)).to_a`).
#
# Subclasses may implement:
# - #item_found(A, R) (return value is ignored)
class CachingArrayResolver < BaseResolver
def resolve(**args)
key = query_input(**args)
BatchLoader::GraphQL.for(key).batch(**batch) do |keys, loader|
if keys.size == 1
# We can avoid the union entirely.
k = keys.first
limit(query_for(k)).each { |item| found(loader, k, item) }
else
queries = keys.map { |key| query_for(key) }
by_id = model_class.from_union(tag(queries)).group_by { |r| r[primary_key] }
by_id.values.each do |item_group|
item = item_group.first
item_group.map(&:union_member_idx).each do |i|
found(loader, keys[i], item)
end
end
end
end
end
# Override this to intercept the items once they are found
def item_found(query_input, item)
end
private
def primary_key
@primary_key ||= (model_class.primary_key || raise("No primary key for #{model_class}"))
end
def batch
{ key: self.class, default_value: [] }
end
def found(loader, key, value)
loader.call(key) do |vs|
item_found(key, value)
vs << value
end
end
# Tag each row returned from each query with a the index of which query in
# the union it comes from. This lets us map the results back to the cache key.
def tag(queries)
queries.each_with_index.map do |q, i|
limit(q.select(all_fields, member_idx(i)))
end
end
def limit(query)
query.limit(query_limit) # rubocop: disable CodeReuse/ActiveRecord
end
def all_fields
model_class.arel_table[Arel.star]
end
# rubocop: disable Graphql/Descriptions (false positive!)
def query_limit
field&.max_page_size.presence || context.schema.default_max_page_size
end
# rubocop: enable Graphql/Descriptions
def member_idx(idx)
::Arel::Nodes::SqlLiteral.new(idx.to_s).as('union_member_idx')
end
end
end
......@@ -4,6 +4,8 @@ module Gitlab
module Graphql
module Present
class Instrumentation
SAFE_CONTEXT_KEYS = %i[current_user].freeze
def instrument(type, field)
return field unless field.metadata[:type_class]
......@@ -22,7 +24,8 @@ module Gitlab
next old_resolver.call(presented_type, args, context)
end
presenter = presented_in.presenter_class.new(object, **context.to_h)
attrs = safe_context_values(context)
presenter = presented_in.presenter_class.new(object, **attrs)
# we have to use the new `authorized_new` method, as `new` is protected
wrapped = presented_type.class.authorized_new(presenter, context)
......@@ -34,6 +37,12 @@ module Gitlab
resolve(resolve_with_presenter)
end
end
private
def safe_context_values(context)
context.to_h.slice(*SAFE_CONTEXT_KEYS)
end
end
end
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Resolvers::CachingArrayResolver do
include GraphqlHelpers
let_it_be(:non_admins) { create_list(:user, 4, admin: false) }
let(:query_context) { {} }
let(:max_page_size) { 10 }
let(:schema) { double('Schema', default_max_page_size: 3) }
let_it_be(:caching_resolver) do
Class.new(described_class) do
def query_input(is_admin:)
is_admin
end
def query_for(is_admin)
if is_admin.nil?
model_class.all
else
model_class.where(admin: is_admin)
end
end
def model_class
User # Happens to include FromUnion, and is cheap-ish to create
end
end
end
describe '#resolve' do
context 'all queries return results' do
let_it_be(:admins) { create_list(:admin, 3) }
it 'batches the queries' do
expect do
[resolve_users(true), resolve_users(false)].each(&method(:force))
end.to issue_same_number_of_queries_as { force(resolve_users(nil)) }
end
it 'finds the correct values' do
found_admins = resolve_users(true)
found_others = resolve_users(false)
admins_again = resolve_users(true)
found_all = resolve_users(nil)
expect(force(found_admins)).to match_array(admins)
expect(force(found_others)).to match_array(non_admins)
expect(force(admins_again)).to match_array(admins)
expect(force(found_all)).to match_array(admins + non_admins)
end
end
it 'does not perform a union of a query with itself' do
expect(User).to receive(:where).once.and_call_original
[resolve_users(false), resolve_users(false)].each(&method(:force))
end
context 'one of the queries returns no results' do
it 'finds the correct values' do
found_admins = resolve_users(true)
found_others = resolve_users(false)
found_all = resolve_users(nil)
expect(force(found_admins)).to be_empty
expect(force(found_others)).to match_array(non_admins)
expect(force(found_all)).to match_array(non_admins)
end
end
context 'one of the queries has already been cached' do
before do
force(resolve_users(nil))
end
it 'avoids further queries' do
expect do
repeated_find = resolve_users(nil)
expect(force(repeated_find)).to match_array(non_admins)
end.not_to exceed_query_limit(0)
end
end
context 'the resolver overrides item_found' do
let_it_be(:admins) { create_list(:admin, 2) }
let(:query_context) do
{
found: { true => [], false => [], nil => [] }
}
end
let_it_be(:with_item_found) do
Class.new(caching_resolver) do
def item_found(key, item)
context[:found][key] << item
end
end
end
it 'receives item_found for each key the item mapped to' do
found_admins = resolve_users(true, with_item_found)
found_all = resolve_users(nil, with_item_found)
[found_admins, found_all].each(&method(:force))
expect(query_context[:found]).to match({
false => be_empty,
true => match_array(admins),
nil => match_array(admins + non_admins)
})
end
end
context 'the max_page_size is lower than the total result size' do
let(:max_page_size) { 2 }
it 'respects the max_page_size, on a per subset basis' do
found_all = resolve_users(nil)
found_others = resolve_users(false)
expect(force(found_all).size).to eq(2)
expect(force(found_others).size).to eq(2)
end
end
context 'the field does not declare max_page_size' do
let(:max_page_size) { nil }
it 'takes the page size from schema.default_max_page_size' do
found_all = resolve_users(nil)
found_others = resolve_users(false)
expect(force(found_all).size).to eq(schema.default_max_page_size)
expect(force(found_others).size).to eq(schema.default_max_page_size)
end
end
specify 'force . resolve === to_a . query_for . query_input' do
r = resolver_instance(caching_resolver)
args = { is_admin: false }
naive = r.query_for(r.query_input(**args)).to_a
expect(force(r.resolve(**args))).to eq(naive)
end
end
def resolve_users(is_admin, resolver = caching_resolver)
field = double('Field', max_page_size: max_page_size)
args = { is_admin: is_admin }
resolve(resolver, args: args, field: field, ctx: query_context, schema: schema)
end
def force(lazy)
::Gitlab::Graphql::Lazy.force(lazy)
end
end
......@@ -17,8 +17,8 @@ module GraphqlHelpers
# ready, then the early return is returned instead.
#
# Then the resolve method is called.
def resolve(resolver_class, obj: nil, args: {}, ctx: {}, field: nil)
resolver = resolver_class.new(object: obj, context: ctx, field: field)
def resolve(resolver_class, args: {}, **resolver_args)
resolver = resolver_instance(resolver_class, **resolver_args)
ready, early_return = sync_all { resolver.ready?(**args) }
return early_return unless ready
......@@ -26,6 +26,15 @@ module GraphqlHelpers
resolver.resolve(**args)
end
def resolver_instance(resolver_class, obj: nil, ctx: {}, field: nil, schema: GitlabSchema)
if ctx.is_a?(Hash)
q = double('Query', schema: schema)
ctx = GraphQL::Query::Context.new(query: q, object: obj, values: ctx)
end
resolver_class.new(object: obj, context: ctx, field: field)
end
# Eagerly run a loader's named resolver
# (syncs any lazy values returned by resolve)
def eager_resolve(resolver_class, **opts)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment