Avoid loading objects from DB in ES results

We already have most of the information we want to display in Elasticsearch results inside ES itself, so instead of finding (and returning!) the full ES object and only using the ID to get the database object, we instead use a Lite object that quacks like the original and uses the info from Elasticsearch. We do this for: - Projects - Issues - Merge Requests - Milestones

Avoid loading objects from DB in ES results
We already have most of the information we want to display in Elasticsearch results inside ES itself, so instead of finding (and returning!) the full ES object and only using the ID to get the database object, we instead use a Lite object that quacks like the original and uses the info from Elasticsearch. We do this for: - Projects - Issues - Merge Requests - Milestones
d9cb907c · Mario de la Ossa · ab9cf52e · d9cb907c · d9cb907c · d9cb907c
Commit d9cb907c authored May 14, 2019 by Mario de la Ossa
22 changed files
--- a/app/controllers/search_controller.rb
+++ b/app/controllers/search_controller.rb
@@ -25,6 +25,7 @@ class SearchController < ApplicationController
    @show_snippets = search_service.show_snippets?
    @search_results = search_service.search_results
    @search_objects = search_service.search_objects
+    @display_options = search_service.display_options

    render_commits if @scope == 'commits'
    eager_load_user_status if @scope == 'users'

--- a/app/models/project_feature.rb
+++ b/app/models/project_feature.rb
@@ -72,6 +72,8 @@ class ProjectFeature < ApplicationRecord
  default_value_for :wiki_access_level,           value: ENABLED, allows_nil: false
  default_value_for :repository_access_level,     value: ENABLED, allows_nil: false

+  scope :for_project_id, -> (project) { where(project: project) }
+
  def feature_available?(feature, user)
    # This feature might not be behind a feature flag at all, so default to true
    return false unless ::Feature.enabled?(feature, user, default_enabled: true)

--- a/app/services/search_service.rb
+++ b/app/services/search_service.rb
@@ -52,6 +52,10 @@ class SearchService
    @search_objects ||= search_results.objects(scope, params[:page])
  end

+  def display_options
+    @display_options ||= search_results.display_options(scope)
+  end
+
  private

  def search_service

--- a/app/views/search/_results.html.haml
+++ b/app/views/search/_results.html.haml
@@ -21,7 +21,7 @@
      .search-results
        - if @scope == 'projects'
          .term
-            = render 'shared/projects/list', projects: @search_objects, pipeline_status: false
+            = render 'shared/projects/list', { projects: @search_objects, pipeline_status: false }.merge(@display_options)
        - else
          - locals = { projects: blob_projects(@search_objects) } if %w[blobs wiki_blobs].include?(@scope)
          = render partial: "search/results/#{@scope.singularize}", collection: @search_objects, locals: locals

--- a/app/views/search/results/_issue.html.haml
+++ b/app/views/search/results/_issue.html.haml
 .search-result-row
  %h4
    = confidential_icon(issue)
-    = link_to [issue.project.namespace.becomes(Namespace), issue.project, issue] do
+    = link_to namespace_project_issue_path(issue.project.namespace.becomes(Namespace), issue.project, issue) do
      %span.term.str-truncated= issue.title
    - if issue.closed?
      %span.badge.badge-danger.prepend-left-5= _("Closed")

--- a/app/views/search/results/_merge_request.html.haml
+++ b/app/views/search/results/_merge_request.html.haml
 .search-result-row
  %h4
-    = link_to [merge_request.target_project.namespace.becomes(Namespace), merge_request.target_project, merge_request] do
+    = link_to namespace_project_merge_request_path(merge_request.target_project.namespace.becomes(Namespace), merge_request.target_project, merge_request) do
      %span.term.str-truncated= merge_request.title
    - if merge_request.merged?
      %span.badge.badge-primary.prepend-left-5= _("Merged")

--- a/app/views/search/results/_milestone.html.haml
+++ b/app/views/search/results/_milestone.html.haml
 .search-result-row
  %h4
-    = link_to [milestone.project.namespace.becomes(Namespace), milestone.project, milestone] do
+    = link_to namespace_project_milestone_path(milestone.project.namespace.becomes(Namespace), milestone.project, milestone) do
      %span.term.str-truncated= milestone.title

  - if milestone.description.present?

--- a/ee/app/models/concerns/elastic/application_search.rb
+++ b/ee/app/models/concerns/elastic/application_search.rb
@@ -315,7 +315,7 @@ module Elastic
        self.import(options)
      end

-      def basic_query_hash(fields, query)
+      def basic_query_hash(fields, query, page: nil, per_page: nil)
        query_hash = if query.present?
                       {
                         query: {
@@ -344,6 +344,9 @@ module Elastic
                       }
                     end

+        query_hash[:size] = per_page if per_page
+        query_hash[:from] = per_page * (page - 1) if per_page && page
+
        query_hash[:sort] = [
          { updated_at: { order: :desc } },
          :_score

--- a/ee/app/models/concerns/elastic/issues_search.rb
+++ b/ee/app/models/concerns/elastic/issues_search.rb
@@ -30,7 +30,7 @@ module Elastic
          if query =~ /#(\d+)\z/
            iid_query_hash(Regexp.last_match(1))
          else
-            basic_query_hash(%w(title^2 description), query)
+            basic_query_hash(%w(title^2 description), query, page: options[:page], per_page: options[:per_page])
          end

        options[:features] = 'issues'

--- a/ee/app/models/concerns/elastic/merge_requests_search.rb
+++ b/ee/app/models/concerns/elastic/merge_requests_search.rb
@@ -46,7 +46,7 @@ module Elastic
          if query =~ /\!(\d+)\z/
            iid_query_hash(Regexp.last_match(1))
          else
-            basic_query_hash(%w(title^2 description), query)
+            basic_query_hash(%w(title^2 description), query, page: options[:page], per_page: options[:per_page])
          end

        options[:features] = 'merge_requests'

--- a/ee/app/models/concerns/elastic/milestones_search.rb
+++ b/ee/app/models/concerns/elastic/milestones_search.rb
@@ -12,7 +12,7 @@ module Elastic
        # https://gitlab.com/gitlab-org/gitlab-ee/issues/349
        data = {}

-        [:id, :title, :description, :project_id, :created_at, :updated_at].each do |attr|
+        [:id, :iid, :title, :description, :project_id, :created_at, :updated_at].each do |attr|
          data[attr.to_s] = safely_read_attribute_for_elasticsearch(attr)
        end

@@ -26,7 +26,7 @@ module Elastic
      def self.elastic_search(query, options: {})
        options[:in] = %w(title^2 description)

-        query_hash = basic_query_hash(options[:in], query)
+        query_hash = basic_query_hash(options[:in], query, page: options[:page], per_page: options[:per_page])

        query_hash = project_ids_filter(query_hash, options)


--- a/ee/app/models/concerns/elastic/projects_search.rb
+++ b/ee/app/models/concerns/elastic/projects_search.rb
@@ -65,7 +65,7 @@ module Elastic
      def self.elastic_search(query, options: {})
        options[:in] = %w(name^10 name_with_namespace^2 path_with_namespace path^9 description)

-        query_hash = basic_query_hash(options[:in], query)
+        query_hash = basic_query_hash(options[:in], query, page: options[:page], per_page: options[:per_page])

        filters = []


--- a/ee/changelogs/unreleased/10854-elasticsearch_avoid_db.yml
+++ b/ee/changelogs/unreleased/10854-elasticsearch_avoid_db.yml
+---
+title: Avoid loading database objects for Elasticsearch results
+merge_request: 12691
+author:
+type: performance
--- a/ee/lib/elasticsearch/lite_project.rb
+++ b/ee/lib/elasticsearch/lite_project.rb
+# frozen_string_literal: true
+
+module Elasticsearch
+  class LiteProject
+    include Gitlab::Utils::StrongMemoize
+
+    attr_accessor :id, :name, :path, :description, :namespace_id, :create_at, :updated_at, :archived,
+                  :visibility_level, :last_activity_at, :name_with_namespace, :path_with_namespace,
+                  :issues_access_level, :merge_requests_access_level, :snippets_access_level, :wiki_access_level,
+                  :repository_access_level
+    attr_accessor :pipeline_status, :commit, :creator
+
+    alias_attribute :last_activity_date, :last_activity_at
+
+    def initialize(raw_project_hash)
+      raw_project_hash.each do |key, value|
+        value = value.to_datetime if key =~ /_at$/
+        self.instance_variable_set(:"@#{key}", value)
+      end
+    end
+
+    # only used for routing and results display, so we trick Rails here
+    def namespace
+      # can't use an OpenStruct because to_param is a defined method on it
+      Struct.new(:to_param, :human_name).new(
+        path_with_namespace.sub(/\/#{path}$/, ''),
+        name_with_namespace.sub(/\s+\/\s+#{name}$/, '')
+      )
+    end
+
+    def route
+      # Creates an object that has a `cache_key` attribute set to nil
+      Struct.new(:cache_key).new(nil)
+    end
+
+    def pending_delete?
+      false
+    end
+
+    def cache_key
+      "lite_projects/#{id}-#{updated_at.utc.to_s(:number)}"
+    end
+
+    def to_param
+      path&.to_s
+    end
+
+    def model_name
+      OpenStruct.new(param_key: 'project')
+    end
+
+    def banzai_render_context(field)
+      return unless field == :description
+
+      { pipeline: :description, project: self }
+    end
+
+    def default_issues_tracker?
+      true
+    end
+
+    # rubocop: disable CodeReuse/ActiveRecord
+    # This method is required by the OpenMergeRequestsCountService
+    def merge_requests
+      strong_memoize(:merge_requests) do
+        MergeRequest.opened.where(project_id: self.id)
+      end
+    end
+    # rubocop: enable CodeReuse/ActiveRecord
+
+    def forks_count
+      Projects::ForksCountService.new(self).count
+    end
+
+    def open_issues_count(current_user = nil)
+      Projects::OpenIssuesCountService.new(self, current_user).count
+    end
+
+    def open_merge_requests_count
+      Projects::OpenMergeRequestsCountService.new(self).count
+    end
+  end
+end
--- a/ee/lib/elasticsearch/result_objects.rb
+++ b/ee/lib/elasticsearch/result_objects.rb
+# frozen_string_literal: true
+
+module Elasticsearch
+  module ResultObjects
+    # Here we refine our models to learn how to load from Elasticsearch without hitting the database
+    # In order to use these methods in another class you first have to `using Elasticsearch::ResultObjects`
+    # and you can then use the model objects directly
+
+    ES_ATTRIBUTES = %i(join_field type).freeze
+
+    refine ::Project.singleton_class do
+      def load_from_elasticsearch(es_response, current_user: nil)
+        es_response.results.response.map(&:_source).then do |projects|
+          projects.map do |project|
+            ::Elasticsearch::LiteProject.new(project)
+          end
+        end
+      end
+    end
+
+    refine ::Issue.singleton_class do
+      def load_from_elasticsearch(es_response, current_user:)
+        es_response.results.response.map(&:_source).then do |issues|
+          projects = ::ProjectsFinder.new(
+            current_user: current_user,
+            project_ids_relation: issues.map(&:project_id)
+          ).execute.index_by(&:id)
+
+          issues.map do |issue|
+            issue[:project] = projects[issue.delete(:project_id)]
+            issue[:assignee_ids] = issue.delete(:assignee_id)
+            issue.except!(*ES_ATTRIBUTES)
+
+            new(issue)
+          end
+        end
+      end
+    end
+
+    refine ::MergeRequest.singleton_class do
+      def load_from_elasticsearch(es_response, current_user:)
+        es_response.results.response.map(&:_source).then do |merge_requests|
+          # rubocop: disable CodeReuse/ActiveRecord
+          projects = ::ProjectsFinder.new(
+            current_user: current_user,
+            project_ids_relation: merge_requests.map(&:target_project_id)
+          ).execute.includes(:route, namespace: [:route]).index_by(&:id)
+          # rubocop: enable CodeReuse/ActiveRecord
+
+          merge_requests.map do |merge_request|
+            merge_request[:target_project] = projects[merge_request.delete(:target_project_id)]
+            merge_request.except!(*ES_ATTRIBUTES)
+
+            new(merge_request)
+          end
+        end
+      end
+    end
+
+    refine ::Milestone.singleton_class do
+      def load_from_elasticsearch(es_response, current_user:)
+        es_response.results.response.map(&:_source).then do |milestones|
+          projects = ::ProjectsFinder.new(
+            current_user: current_user,
+            project_ids_relation: milestones.map(&:project_id)
+          ).execute.index_by(&:id)
+
+          milestones.map do |milestone|
+            milestone[:project] = projects[milestone.delete(:project_id)]
+            milestone.except!(*ES_ATTRIBUTES)
+
+            new(milestone)
+          end
+        end
+      end
+    end
+  end
+end
--- a/ee/lib/gitlab/elastic/search_results.rb
+++ b/ee/lib/gitlab/elastic/search_results.rb
@@ -5,6 +5,8 @@ module Gitlab
    class SearchResults
      include Gitlab::Utils::StrongMemoize

+      using Elasticsearch::ResultObjects
+
      attr_reader :current_user, :query, :public_and_internal_projects

      # Limit search results by passed project ids
@@ -25,13 +27,13 @@ module Gitlab
      def objects(scope, page = nil)
        case scope
        when 'projects'
-          projects.page(page).per(per_page).records
+          projects(page: page, per_page: per_page)
        when 'issues'
-          issues.page(page).per(per_page).records
+          issues(page: page, per_page: per_page)
        when 'merge_requests'
-          merge_requests.page(page).per(per_page).records
+          merge_requests(page: page, per_page: per_page)
        when 'milestones'
-          milestones.page(page).per(per_page).records
+          milestones(page: page, per_page: per_page)
        when 'blobs'
          blobs.page(page).per(per_page)
        when 'wiki_blobs'
@@ -45,6 +47,17 @@ module Gitlab
        end
      end

+      def display_options(scope)
+        case scope
+        when 'projects'
+          {
+            stars: false
+          }
+        else
+          {}
+        end
+      end
+
      def generic_search_results
        @generic_search_results ||= Gitlab::SearchResults.new(current_user, limit_projects, query)
      end
@@ -143,19 +156,40 @@ module Gitlab
        }
      end

-      def projects
+      def paginate_array(collection, total_count, page, per_page)
+        offset = per_page * (page - 1)
+        Kaminari.paginate_array(collection, total_count: total_count, limit: per_page, offset: offset)
+      end
+
+      def search(model, query, options, page: 1, per_page: 20)
+        page = (page || 1).to_i
+
+        response = model.elastic_search(
+          query,
+          options: options.merge(page: page, per_page: per_page)
+        )
+
+        results = model.load_from_elasticsearch(response, current_user: current_user)
+
+        paginate_array(results, response.total_count, page, per_page)
+      end
+
+      # See the comment for #commits for more info on why we memoize this way
+      def projects(page: 1, per_page: 20)
        strong_memoize(:projects) do
-          Project.elastic_search(query, options: base_options)
+          search(Project, query, base_options, page: page, per_page: per_page)
        end
      end

-      def issues
+      # See the comment for #commits for more info on why we memoize this way
+      def issues(page: 1, per_page: 20)
        strong_memoize(:issues) do
-          Issue.elastic_search(query, options: base_options)
+          search(Issue, query, base_options, page: page, per_page: per_page)
        end
      end

-      def milestones
+      # See the comment for #commits for more info on why we memoize this way
+      def milestones(page: 1, per_page: 20)
        strong_memoize(:milestones) do
          # Must pass 'issues' and 'merge_requests' to check
          # if any of the features is available for projects in Elastic::ApplicationSearch#project_ids_query
@@ -164,17 +198,18 @@ module Gitlab
          options = base_options
          options[:features] = [:issues, :merge_requests]

-          Milestone.elastic_search(query, options: options)
+          search(Milestone, query, options, page: page, per_page: per_page)
        end
      end

-      def merge_requests
+      # See the comment for #commits for more info on why we memoize this way
+      def merge_requests(page: 1, per_page: 20)
        strong_memoize(:merge_requests) do
-          options = base_options.merge(project_ids: non_guest_project_ids)
-          MergeRequest.elastic_search(query, options: options)
+          search(MergeRequest, query, base_options.merge(project_ids: non_guest_project_ids), page: page, per_page: per_page)
        end
      end

+      # See the comment for #commits for more info on why we memoize this way
      def blobs
        return Kaminari.paginate_array([]) if query.blank?

@@ -191,6 +226,7 @@ module Gitlab
        end
      end

+      # See the comment for #commits for more info on why we memoize this way
      def wiki_blobs
        return Kaminari.paginate_array([]) if query.blank?


--- a/ee/spec/features/search/elastic/global_search_spec.rb
+++ b/ee/spec/features/search/elastic/global_search_spec.rb
 require 'spec_helper'

-describe 'Global elastic search' do
+describe 'Global elastic search', :elastic do
  let(:user) { create(:user) }
  let(:project) { create(:project, :repository, :wiki_repo, namespace: user.namespace) }

  before do
    stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
-    Gitlab::Elastic::Helper.create_empty_index

    project.add_maintainer(user)
    sign_in(user)
  end

  after do
-    Gitlab::Elastic::Helper.delete_index
    stub_ee_application_setting(elasticsearch_search: false, elasticsearch_indexing: false)
  end

+  shared_examples 'a pure Elasticsearch result' do
+    it 'avoids N+1 database queries' do
+      create(object, creation_args)
+      Gitlab::Elastic::Helper.refresh_index
+
+      control_count = ActiveRecord::QueryRecorder.new { visit path }.count
+
+      create_list(object, 10, creation_args)
+      Gitlab::Elastic::Helper.refresh_index
+
+      control_count = control_count + (10 * query_count_multiplier) + 1
+
+      expect { visit path }.not_to exceed_query_limit(control_count)
+    end
+  end
+
+  describe 'I do not overload the database' do
+    context 'searching issues' do
+      let(:object) { :issue }
+      let(:creation_args) { { project: project, title: 'initial' } }
+      let(:path) { search_path(search: 'initial', scope: 'issues') }
+      let(:query_count_multiplier) { 0 }
+
+      it_behaves_like 'a pure Elasticsearch result'
+    end
+
+    context 'searching projects' do
+      let(:object) { :project }
+      let(:creation_args) { { namespace: user.namespace } }
+      let(:path) { search_path(search: 'project*', scope: 'projects') }
+      # Each Project requires 4 extra queries: one for each "count" (forks, open MRs, open Issues) and one for access level
+      let(:query_count_multiplier) { 4 }
+
+      it_behaves_like 'a pure Elasticsearch result'
+    end
+
+    context 'searching merge requests' do
+      it 'avoids N+1 database queries' do
+        path = search_path(search: 'initial', scope: 'merge_requests')
+
+        create(:merge_request, title: 'initial', source_project: project)
+        Gitlab::Elastic::Helper.refresh_index
+
+        control_count = ActiveRecord::QueryRecorder.new { visit path }.count
+
+        merge_requests = create_list(:merge_request, 10, title: 'initial')
+        merge_requests.each { |mr| mr.target_project.add_maintainer(user) }
+        Gitlab::Elastic::Helper.refresh_index
+
+        # Each MR loaded has a Route load that has been tricky to track down
+        expect { visit path }.not_to exceed_query_limit(control_count + 11)
+      end
+    end
+
+    context 'searching milestones' do
+      let(:object) { :milestone }
+      let(:creation_args) { { project: project } }
+      let(:path) { search_path(search: 'milestone*', scope: 'milestones') }
+      let(:query_count_multiplier) { 0 }
+
+      it_behaves_like 'a pure Elasticsearch result'
+    end
+  end
+
  describe 'I search through the issues and I see pagination' do
    before do
      create_list(:issue, 21, project: project, title: 'initial')

--- a/ee/spec/lib/gitlab/elastic/project_search_results_spec.rb
+++ b/ee/spec/lib/gitlab/elastic/project_search_results_spec.rb
@@ -162,31 +162,31 @@ describe Gitlab::Elastic::ProjectSearchResults do

    it 'does not list project confidential issues for non project members' do
      results = described_class.new(non_member, query, project.id)
-      issues = results.objects('issues')
+      issues = results.objects('issues').map(&:id)

-      expect(issues).to include issue
-      expect(issues).not_to include security_issue_1
-      expect(issues).not_to include security_issue_2
+      expect(issues).to include issue.id
+      expect(issues).not_to include security_issue_1.id
+      expect(issues).not_to include security_issue_2.id
      expect(results.issues_count).to eq 1
    end

    it 'lists project confidential issues for author' do
      results = described_class.new(author, query, project.id)
-      issues = results.objects('issues')
+      issues = results.objects('issues').map(&:id)

-      expect(issues).to include issue
-      expect(issues).to include security_issue_1
-      expect(issues).not_to include security_issue_2
+      expect(issues).to include issue.id
+      expect(issues).to include security_issue_1.id
+      expect(issues).not_to include security_issue_2.id
      expect(results.issues_count).to eq 2
    end

    it 'lists project confidential issues for assignee' do
      results = described_class.new(assignee, query, project.id)
-      issues = results.objects('issues')
+      issues = results.objects('issues').map(&:id)

-      expect(issues).to include issue
-      expect(issues).not_to include security_issue_1
-      expect(issues).to include security_issue_2
+      expect(issues).to include issue.id
+      expect(issues).not_to include security_issue_1.id
+      expect(issues).to include security_issue_2.id
      expect(results.issues_count).to eq 2
    end

@@ -194,11 +194,11 @@ describe Gitlab::Elastic::ProjectSearchResults do
      project.add_developer(member)

      results = described_class.new(member, query, project.id)
-      issues = results.objects('issues')
+      issues = results.objects('issues').map(&:id)

-      expect(issues).to include issue
-      expect(issues).to include security_issue_1
-      expect(issues).to include security_issue_2
+      expect(issues).to include issue.id
+      expect(issues).to include security_issue_1.id
+      expect(issues).to include security_issue_2.id
      expect(results.issues_count).to eq 3
    end

@@ -206,21 +206,21 @@ describe Gitlab::Elastic::ProjectSearchResults do
      project.add_guest(member)

      results = described_class.new(member, query, project.id)
-      issues = results.objects('issues')
+      issues = results.objects('issues').map(&:id)

-      expect(issues).to include issue
-      expect(issues).not_to include security_issue_1
-      expect(issues).not_to include security_issue_2
+      expect(issues).to include issue.id
+      expect(issues).not_to include security_issue_1.id
+      expect(issues).not_to include security_issue_2.id
      expect(results.issues_count).to eq 1
    end

    it 'lists all project issues for admin' do
      results = described_class.new(admin, query, project.id)
-      issues = results.objects('issues')
+      issues = results.objects('issues').map(&:id)

-      expect(issues).to include issue
-      expect(issues).to include security_issue_1
-      expect(issues).to include security_issue_2
+      expect(issues).to include issue.id
+      expect(issues).to include security_issue_1.id
+      expect(issues).to include security_issue_2.id
      expect(results.issues_count).to eq 3
    end
  end

--- a/ee/spec/lib/gitlab/elastic/search_results_spec.rb
+++ b/ee/spec/lib/gitlab/elastic/search_results_spec.rb
--- a/ee/spec/models/concerns/elastic/milestone_spec.rb
+++ b/ee/spec/models/concerns/elastic/milestone_spec.rb
@@ -39,6 +39,7 @@ describe Milestone, :elastic do

    expected_hash = milestone.attributes.extract!(
      'id',
+      'iid',
      'title',
      'description',
      'project_id',

--- a/ee/spec/services/search/group_service_spec.rb
+++ b/ee/spec/services/search/group_service_spec.rb
@@ -40,18 +40,18 @@ describe Search::GroupService, :elastic do
    end

    context 'finding projects by name' do
-      subject { results.objects('projects') }
+      subject { results.objects('projects').map(&:id) }

      context 'in parent group' do
        let(:search_group) { nested_group.parent }

-        it { is_expected.to match_array([project1, project2, project3]) }
+        it { is_expected.to match_array([project1.id, project2.id, project3.id]) }
      end

      context 'in subgroup' do
        let(:search_group) { nested_group }

-        it { is_expected.to match_array([project1, project2]) }
+        it { is_expected.to match_array([project1.id, project2.id]) }
      end
    end
  end

--- a/lib/gitlab/search_results.rb
+++ b/lib/gitlab/search_results.rb
@@ -85,6 +85,10 @@ module Gitlab
      UsersFinder.new(current_user, search: query).execute
    end

+    def display_options(_scope)
+      {}
+    end
+
    private

    def projects