Add batching array resolver

This adds a new kind of resolver that is able to batch requests for collections of items, resolving to an array of those results. This includes changes to the presenter instrumentation to accept only known values from the context, and some minor changes to the graphql_helpers to help constructing resolvers.

Add batching array resolver
This adds a new kind of resolver that is able to batch requests for collections of items, resolving to an array of those results. This includes changes to the presenter instrumentation to accept only known values from the context, and some minor changes to the graphql_helpers to help constructing resolvers.
eb568312 · Alex Kalderimis · 7e423209 · eb568312 · eb568312 · eb568312
Commit eb568312 authored Oct 16, 2020 by Alex Kalderimis
4 changed files
--- a/app/graphql/resolvers/caching_array_resolver.rb
+++ b/app/graphql/resolvers/caching_array_resolver.rb
+# frozen_string_literal: true
+
+module Resolvers
+  # Abstract class that will eliminate N+1 queries for size-constrained
+  # collections of items.
+  #
+  # **note**: The resolver will never load more items than
+  # `@field.max_page_size` if defined, falling back to
+  # `context.schema.default_max_page_size`.
+  #
+  # provided that:
+  #
+  # - the query can be uniquely determined by the object and the arguments
+  # - the model class includes FromUnion
+  # - the model class defines a scalar primary key
+  #
+  # This comes at the cost of returning arrays, not relations, so we don't get
+  # any keyset pagination goodness. Consequently, this is only suitable for small-ish
+  # result sets, as the full result set will be loaded into memory.
+  #
+  # To enforce this, the resolver limits the size of result sets to
+  # `@field.max_page_size || context.schema.default_max_page_size`.
+  #
+  # **important**: If the cardinality of your collection is likely to be greater than 100,
+  # then you will want to pass `max_page_size:` as part of the field definition
+  # or (ideally) as part of the resolver `field_options`.
+  #
+  # How to implement:
+  # --------------------
+  #
+  # Each subclass operates on two generic parameters, A and R:
+  #  - A is any Object that can be used as a Hash key. Instances of A
+  #    are returned by `query_input` and then passed to `query_for`.
+  #  - R is any subclass of ApplicationRecord that includes FromUnion.
+  #    R must have a single scalar primary_key
+  #
+  # Subclasses must implement:
+  # - #model_class -> Class[R]. (Must respond to  :primary_key, and :from_union)
+  # - #query_input(**kwargs) -> A (Must be hashable)
+  # - #query_for(A) -> ActiveRecord::Relation[R]
+  #
+  # Note the relationship between query_input and query_for, one of which
+  # consumes the input of the other
+  # (i.e. `resolve(**args).sync == query_for(query_input(**args)).to_a`).
+  #
+  # Subclasses may implement:
+  # - #item_found(A, R) (return value is ignored)
+  class CachingArrayResolver < BaseResolver
+    def resolve(**args)
+      key = query_input(**args)
+
+      BatchLoader::GraphQL.for(key).batch(**batch) do |keys, loader|
+        if keys.size == 1
+          # We can avoid the union entirely.
+          k = keys.first
+          limit(query_for(k)).each { |item| found(loader, k, item) }
+        else
+          queries = keys.map { |key| query_for(key) }
+
+          by_id = model_class.from_union(tag(queries)).group_by { |r| r[primary_key] }
+
+          by_id.values.each do |item_group|
+            item = item_group.first
+            item_group.map(&:union_member_idx).each do |i|
+              found(loader, keys[i], item)
+            end
+          end
+        end
+      end
+    end
+
+    # Override this to intercept the items once they are found
+    def item_found(query_input, item)
+    end
+
+    private
+
+    def primary_key
+      @primary_key ||= (model_class.primary_key || raise("No primary key for #{model_class}"))
+    end
+
+    def batch
+      { key: self.class, default_value: [] }
+    end
+
+    def found(loader, key, value)
+      loader.call(key) do |vs|
+        item_found(key, value)
+        vs << value
+      end
+    end
+
+    # Tag each row returned from each query with a the index of which query in
+    # the union it comes from. This lets us map the results back to the cache key.
+    def tag(queries)
+      queries.each_with_index.map do |q, i|
+        limit(q.select(all_fields, member_idx(i)))
+      end
+    end
+
+    def limit(query)
+      query.limit(query_limit) # rubocop: disable CodeReuse/ActiveRecord
+    end
+
+    def all_fields
+      model_class.arel_table[Arel.star]
+    end
+
+    # rubocop: disable Graphql/Descriptions (false positive!)
+    def query_limit
+      field&.max_page_size.presence || context.schema.default_max_page_size
+    end
+    # rubocop: enable Graphql/Descriptions
+
+    def member_idx(idx)
+      ::Arel::Nodes::SqlLiteral.new(idx.to_s).as('union_member_idx')
+    end
+  end
+end
--- a/lib/gitlab/graphql/present/instrumentation.rb
+++ b/lib/gitlab/graphql/present/instrumentation.rb
@@ -4,6 +4,8 @@ module Gitlab
  module Graphql
    module Present
      class Instrumentation
+        SAFE_CONTEXT_KEYS = %i[current_user].freeze
+
        def instrument(type, field)
          return field unless field.metadata[:type_class]

@@ -22,7 +24,8 @@ module Gitlab
              next old_resolver.call(presented_type, args, context)
            end

-            presenter = presented_in.presenter_class.new(object, **context.to_h)
+            attrs = safe_context_values(context)
+            presenter = presented_in.presenter_class.new(object, **attrs)

            # we have to use the new `authorized_new` method, as `new` is protected
            wrapped = presented_type.class.authorized_new(presenter, context)
@@ -34,6 +37,12 @@ module Gitlab
            resolve(resolve_with_presenter)
          end
        end
+
+        private
+
+        def safe_context_values(context)
+          context.to_h.slice(*SAFE_CONTEXT_KEYS)
+        end
      end
    end
  end

--- a/spec/graphql/resolvers/caching_array_resolver_spec.rb
+++ b/spec/graphql/resolvers/caching_array_resolver_spec.rb
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Resolvers::CachingArrayResolver do
+  include GraphqlHelpers
+
+  let_it_be(:non_admins) { create_list(:user, 4, admin: false) }
+  let(:query_context) { {} }
+  let(:max_page_size) { 10 }
+  let(:schema) { double('Schema', default_max_page_size: 3) }
+
+  let_it_be(:caching_resolver) do
+    Class.new(described_class) do
+      def query_input(is_admin:)
+        is_admin
+      end
+
+      def query_for(is_admin)
+        if is_admin.nil?
+          model_class.all
+        else
+          model_class.where(admin: is_admin)
+        end
+      end
+
+      def model_class
+        User # Happens to include FromUnion, and is cheap-ish to create
+      end
+    end
+  end
+
+  describe '#resolve' do
+    context 'all queries return results' do
+      let_it_be(:admins) { create_list(:admin, 3) }
+
+      it 'batches the queries' do
+        expect do
+          [resolve_users(true), resolve_users(false)].each(&method(:force))
+        end.to issue_same_number_of_queries_as { force(resolve_users(nil)) }
+      end
+
+      it 'finds the correct values' do
+        found_admins = resolve_users(true)
+        found_others = resolve_users(false)
+        admins_again = resolve_users(true)
+        found_all = resolve_users(nil)
+
+        expect(force(found_admins)).to match_array(admins)
+        expect(force(found_others)).to match_array(non_admins)
+        expect(force(admins_again)).to match_array(admins)
+        expect(force(found_all)).to match_array(admins + non_admins)
+      end
+    end
+
+    it 'does not perform a union of a query with itself' do
+      expect(User).to receive(:where).once.and_call_original
+
+      [resolve_users(false), resolve_users(false)].each(&method(:force))
+    end
+
+    context 'one of the queries returns no results' do
+      it 'finds the correct values' do
+        found_admins = resolve_users(true)
+        found_others = resolve_users(false)
+        found_all = resolve_users(nil)
+
+        expect(force(found_admins)).to be_empty
+        expect(force(found_others)).to match_array(non_admins)
+        expect(force(found_all)).to match_array(non_admins)
+      end
+    end
+
+    context 'one of the queries has already been cached' do
+      before do
+        force(resolve_users(nil))
+      end
+
+      it 'avoids further queries' do
+        expect do
+          repeated_find = resolve_users(nil)
+
+          expect(force(repeated_find)).to match_array(non_admins)
+        end.not_to exceed_query_limit(0)
+      end
+    end
+
+    context 'the resolver overrides item_found' do
+      let_it_be(:admins) { create_list(:admin, 2) }
+      let(:query_context) do
+        {
+          found: { true => [], false => [], nil => [] }
+        }
+      end
+
+      let_it_be(:with_item_found) do
+        Class.new(caching_resolver) do
+          def item_found(key, item)
+            context[:found][key] << item
+          end
+        end
+      end
+
+      it 'receives item_found for each key the item mapped to' do
+        found_admins = resolve_users(true, with_item_found)
+        found_all = resolve_users(nil, with_item_found)
+
+        [found_admins, found_all].each(&method(:force))
+
+        expect(query_context[:found]).to match({
+          false => be_empty,
+          true => match_array(admins),
+          nil => match_array(admins + non_admins)
+        })
+      end
+    end
+
+    context 'the max_page_size is lower than the total result size' do
+      let(:max_page_size) { 2 }
+
+      it 'respects the max_page_size, on a per subset basis' do
+        found_all = resolve_users(nil)
+        found_others = resolve_users(false)
+
+        expect(force(found_all).size).to eq(2)
+        expect(force(found_others).size).to eq(2)
+      end
+    end
+
+    context 'the field does not declare max_page_size' do
+      let(:max_page_size) { nil }
+
+      it 'takes the page size from schema.default_max_page_size' do
+        found_all = resolve_users(nil)
+        found_others = resolve_users(false)
+
+        expect(force(found_all).size).to eq(schema.default_max_page_size)
+        expect(force(found_others).size).to eq(schema.default_max_page_size)
+      end
+    end
+
+    specify 'force . resolve === to_a . query_for . query_input' do
+      r = resolver_instance(caching_resolver)
+      args = { is_admin: false }
+
+      naive = r.query_for(r.query_input(**args)).to_a
+
+      expect(force(r.resolve(**args))).to eq(naive)
+    end
+  end
+
+  def resolve_users(is_admin, resolver = caching_resolver)
+    field = double('Field', max_page_size: max_page_size)
+    args = { is_admin: is_admin }
+    resolve(resolver, args: args, field: field, ctx: query_context, schema: schema)
+  end
+
+  def force(lazy)
+    ::Gitlab::Graphql::Lazy.force(lazy)
+  end
+end
--- a/spec/support/helpers/graphql_helpers.rb
+++ b/spec/support/helpers/graphql_helpers.rb
@@ -17,8 +17,8 @@ module GraphqlHelpers
  # ready, then the early return is returned instead.
  #
  # Then the resolve method is called.
-  def resolve(resolver_class, obj: nil, args: {}, ctx: {}, field: nil)
-    resolver = resolver_class.new(object: obj, context: ctx, field: field)
+  def resolve(resolver_class, args: {}, **resolver_args)
+    resolver = resolver_instance(resolver_class, **resolver_args)
    ready, early_return = sync_all { resolver.ready?(**args) }

    return early_return unless ready
@@ -26,6 +26,15 @@ module GraphqlHelpers
    resolver.resolve(**args)
  end

+  def resolver_instance(resolver_class, obj: nil, ctx: {}, field: nil, schema: GitlabSchema)
+    if ctx.is_a?(Hash)
+      q = double('Query', schema: schema)
+      ctx = GraphQL::Query::Context.new(query: q, object: obj, values: ctx)
+    end
+
+    resolver_class.new(object: obj, context: ctx, field: field)
+  end
+
  # Eagerly run a loader's named resolver
  # (syncs any lazy values returned by resolve)
  def eager_resolve(resolver_class, **opts)