Commit d9cb907c authored by Mario de la Ossa's avatar Mario de la Ossa

Avoid loading objects from DB in ES results

We already have most of the information we want to display in
Elasticsearch results inside ES itself, so instead of finding (and
returning!) the full ES object and only using the ID to get the database
object, we instead use a Lite object that quacks like the original
and uses the info from Elasticsearch.

We do this for:
- Projects
- Issues
- Merge Requests
- Milestones
parent ab9cf52e
......@@ -25,6 +25,7 @@ class SearchController < ApplicationController
@show_snippets = search_service.show_snippets?
@search_results = search_service.search_results
@search_objects = search_service.search_objects
@display_options = search_service.display_options
render_commits if @scope == 'commits'
eager_load_user_status if @scope == 'users'
......
......@@ -72,6 +72,8 @@ class ProjectFeature < ApplicationRecord
default_value_for :wiki_access_level, value: ENABLED, allows_nil: false
default_value_for :repository_access_level, value: ENABLED, allows_nil: false
scope :for_project_id, -> (project) { where(project: project) }
def feature_available?(feature, user)
# This feature might not be behind a feature flag at all, so default to true
return false unless ::Feature.enabled?(feature, user, default_enabled: true)
......
......@@ -52,6 +52,10 @@ class SearchService
@search_objects ||= search_results.objects(scope, params[:page])
end
def display_options
@display_options ||= search_results.display_options(scope)
end
private
def search_service
......
......@@ -21,7 +21,7 @@
.search-results
- if @scope == 'projects'
.term
= render 'shared/projects/list', projects: @search_objects, pipeline_status: false
= render 'shared/projects/list', { projects: @search_objects, pipeline_status: false }.merge(@display_options)
- else
- locals = { projects: blob_projects(@search_objects) } if %w[blobs wiki_blobs].include?(@scope)
= render partial: "search/results/#{@scope.singularize}", collection: @search_objects, locals: locals
......
.search-result-row
%h4
= confidential_icon(issue)
= link_to [issue.project.namespace.becomes(Namespace), issue.project, issue] do
= link_to namespace_project_issue_path(issue.project.namespace.becomes(Namespace), issue.project, issue) do
%span.term.str-truncated= issue.title
- if issue.closed?
%span.badge.badge-danger.prepend-left-5= _("Closed")
......
.search-result-row
%h4
= link_to [merge_request.target_project.namespace.becomes(Namespace), merge_request.target_project, merge_request] do
= link_to namespace_project_merge_request_path(merge_request.target_project.namespace.becomes(Namespace), merge_request.target_project, merge_request) do
%span.term.str-truncated= merge_request.title
- if merge_request.merged?
%span.badge.badge-primary.prepend-left-5= _("Merged")
......
.search-result-row
%h4
= link_to [milestone.project.namespace.becomes(Namespace), milestone.project, milestone] do
= link_to namespace_project_milestone_path(milestone.project.namespace.becomes(Namespace), milestone.project, milestone) do
%span.term.str-truncated= milestone.title
- if milestone.description.present?
......
......@@ -315,7 +315,7 @@ module Elastic
self.import(options)
end
def basic_query_hash(fields, query)
def basic_query_hash(fields, query, page: nil, per_page: nil)
query_hash = if query.present?
{
query: {
......@@ -344,6 +344,9 @@ module Elastic
}
end
query_hash[:size] = per_page if per_page
query_hash[:from] = per_page * (page - 1) if per_page && page
query_hash[:sort] = [
{ updated_at: { order: :desc } },
:_score
......
......@@ -30,7 +30,7 @@ module Elastic
if query =~ /#(\d+)\z/
iid_query_hash(Regexp.last_match(1))
else
basic_query_hash(%w(title^2 description), query)
basic_query_hash(%w(title^2 description), query, page: options[:page], per_page: options[:per_page])
end
options[:features] = 'issues'
......
......@@ -46,7 +46,7 @@ module Elastic
if query =~ /\!(\d+)\z/
iid_query_hash(Regexp.last_match(1))
else
basic_query_hash(%w(title^2 description), query)
basic_query_hash(%w(title^2 description), query, page: options[:page], per_page: options[:per_page])
end
options[:features] = 'merge_requests'
......
......@@ -12,7 +12,7 @@ module Elastic
# https://gitlab.com/gitlab-org/gitlab-ee/issues/349
data = {}
[:id, :title, :description, :project_id, :created_at, :updated_at].each do |attr|
[:id, :iid, :title, :description, :project_id, :created_at, :updated_at].each do |attr|
data[attr.to_s] = safely_read_attribute_for_elasticsearch(attr)
end
......@@ -26,7 +26,7 @@ module Elastic
def self.elastic_search(query, options: {})
options[:in] = %w(title^2 description)
query_hash = basic_query_hash(options[:in], query)
query_hash = basic_query_hash(options[:in], query, page: options[:page], per_page: options[:per_page])
query_hash = project_ids_filter(query_hash, options)
......
......@@ -65,7 +65,7 @@ module Elastic
def self.elastic_search(query, options: {})
options[:in] = %w(name^10 name_with_namespace^2 path_with_namespace path^9 description)
query_hash = basic_query_hash(options[:in], query)
query_hash = basic_query_hash(options[:in], query, page: options[:page], per_page: options[:per_page])
filters = []
......
---
title: Avoid loading database objects for Elasticsearch results
merge_request: 12691
author:
type: performance
# frozen_string_literal: true
module Elasticsearch
class LiteProject
include Gitlab::Utils::StrongMemoize
attr_accessor :id, :name, :path, :description, :namespace_id, :create_at, :updated_at, :archived,
:visibility_level, :last_activity_at, :name_with_namespace, :path_with_namespace,
:issues_access_level, :merge_requests_access_level, :snippets_access_level, :wiki_access_level,
:repository_access_level
attr_accessor :pipeline_status, :commit, :creator
alias_attribute :last_activity_date, :last_activity_at
def initialize(raw_project_hash)
raw_project_hash.each do |key, value|
value = value.to_datetime if key =~ /_at$/
self.instance_variable_set(:"@#{key}", value)
end
end
# only used for routing and results display, so we trick Rails here
def namespace
# can't use an OpenStruct because to_param is a defined method on it
Struct.new(:to_param, :human_name).new(
path_with_namespace.sub(/\/#{path}$/, ''),
name_with_namespace.sub(/\s+\/\s+#{name}$/, '')
)
end
def route
# Creates an object that has a `cache_key` attribute set to nil
Struct.new(:cache_key).new(nil)
end
def pending_delete?
false
end
def cache_key
"lite_projects/#{id}-#{updated_at.utc.to_s(:number)}"
end
def to_param
path&.to_s
end
def model_name
OpenStruct.new(param_key: 'project')
end
def banzai_render_context(field)
return unless field == :description
{ pipeline: :description, project: self }
end
def default_issues_tracker?
true
end
# rubocop: disable CodeReuse/ActiveRecord
# This method is required by the OpenMergeRequestsCountService
def merge_requests
strong_memoize(:merge_requests) do
MergeRequest.opened.where(project_id: self.id)
end
end
# rubocop: enable CodeReuse/ActiveRecord
def forks_count
Projects::ForksCountService.new(self).count
end
def open_issues_count(current_user = nil)
Projects::OpenIssuesCountService.new(self, current_user).count
end
def open_merge_requests_count
Projects::OpenMergeRequestsCountService.new(self).count
end
end
end
# frozen_string_literal: true
module Elasticsearch
module ResultObjects
# Here we refine our models to learn how to load from Elasticsearch without hitting the database
# In order to use these methods in another class you first have to `using Elasticsearch::ResultObjects`
# and you can then use the model objects directly
ES_ATTRIBUTES = %i(join_field type).freeze
refine ::Project.singleton_class do
def load_from_elasticsearch(es_response, current_user: nil)
es_response.results.response.map(&:_source).then do |projects|
projects.map do |project|
::Elasticsearch::LiteProject.new(project)
end
end
end
end
refine ::Issue.singleton_class do
def load_from_elasticsearch(es_response, current_user:)
es_response.results.response.map(&:_source).then do |issues|
projects = ::ProjectsFinder.new(
current_user: current_user,
project_ids_relation: issues.map(&:project_id)
).execute.index_by(&:id)
issues.map do |issue|
issue[:project] = projects[issue.delete(:project_id)]
issue[:assignee_ids] = issue.delete(:assignee_id)
issue.except!(*ES_ATTRIBUTES)
new(issue)
end
end
end
end
refine ::MergeRequest.singleton_class do
def load_from_elasticsearch(es_response, current_user:)
es_response.results.response.map(&:_source).then do |merge_requests|
# rubocop: disable CodeReuse/ActiveRecord
projects = ::ProjectsFinder.new(
current_user: current_user,
project_ids_relation: merge_requests.map(&:target_project_id)
).execute.includes(:route, namespace: [:route]).index_by(&:id)
# rubocop: enable CodeReuse/ActiveRecord
merge_requests.map do |merge_request|
merge_request[:target_project] = projects[merge_request.delete(:target_project_id)]
merge_request.except!(*ES_ATTRIBUTES)
new(merge_request)
end
end
end
end
refine ::Milestone.singleton_class do
def load_from_elasticsearch(es_response, current_user:)
es_response.results.response.map(&:_source).then do |milestones|
projects = ::ProjectsFinder.new(
current_user: current_user,
project_ids_relation: milestones.map(&:project_id)
).execute.index_by(&:id)
milestones.map do |milestone|
milestone[:project] = projects[milestone.delete(:project_id)]
milestone.except!(*ES_ATTRIBUTES)
new(milestone)
end
end
end
end
end
end
......@@ -5,6 +5,8 @@ module Gitlab
class SearchResults
include Gitlab::Utils::StrongMemoize
using Elasticsearch::ResultObjects
attr_reader :current_user, :query, :public_and_internal_projects
# Limit search results by passed project ids
......@@ -25,13 +27,13 @@ module Gitlab
def objects(scope, page = nil)
case scope
when 'projects'
projects.page(page).per(per_page).records
projects(page: page, per_page: per_page)
when 'issues'
issues.page(page).per(per_page).records
issues(page: page, per_page: per_page)
when 'merge_requests'
merge_requests.page(page).per(per_page).records
merge_requests(page: page, per_page: per_page)
when 'milestones'
milestones.page(page).per(per_page).records
milestones(page: page, per_page: per_page)
when 'blobs'
blobs.page(page).per(per_page)
when 'wiki_blobs'
......@@ -45,6 +47,17 @@ module Gitlab
end
end
def display_options(scope)
case scope
when 'projects'
{
stars: false
}
else
{}
end
end
def generic_search_results
@generic_search_results ||= Gitlab::SearchResults.new(current_user, limit_projects, query)
end
......@@ -143,19 +156,40 @@ module Gitlab
}
end
def projects
def paginate_array(collection, total_count, page, per_page)
offset = per_page * (page - 1)
Kaminari.paginate_array(collection, total_count: total_count, limit: per_page, offset: offset)
end
def search(model, query, options, page: 1, per_page: 20)
page = (page || 1).to_i
response = model.elastic_search(
query,
options: options.merge(page: page, per_page: per_page)
)
results = model.load_from_elasticsearch(response, current_user: current_user)
paginate_array(results, response.total_count, page, per_page)
end
# See the comment for #commits for more info on why we memoize this way
def projects(page: 1, per_page: 20)
strong_memoize(:projects) do
Project.elastic_search(query, options: base_options)
search(Project, query, base_options, page: page, per_page: per_page)
end
end
def issues
# See the comment for #commits for more info on why we memoize this way
def issues(page: 1, per_page: 20)
strong_memoize(:issues) do
Issue.elastic_search(query, options: base_options)
search(Issue, query, base_options, page: page, per_page: per_page)
end
end
def milestones
# See the comment for #commits for more info on why we memoize this way
def milestones(page: 1, per_page: 20)
strong_memoize(:milestones) do
# Must pass 'issues' and 'merge_requests' to check
# if any of the features is available for projects in Elastic::ApplicationSearch#project_ids_query
......@@ -164,17 +198,18 @@ module Gitlab
options = base_options
options[:features] = [:issues, :merge_requests]
Milestone.elastic_search(query, options: options)
search(Milestone, query, options, page: page, per_page: per_page)
end
end
def merge_requests
# See the comment for #commits for more info on why we memoize this way
def merge_requests(page: 1, per_page: 20)
strong_memoize(:merge_requests) do
options = base_options.merge(project_ids: non_guest_project_ids)
MergeRequest.elastic_search(query, options: options)
search(MergeRequest, query, base_options.merge(project_ids: non_guest_project_ids), page: page, per_page: per_page)
end
end
# See the comment for #commits for more info on why we memoize this way
def blobs
return Kaminari.paginate_array([]) if query.blank?
......@@ -191,6 +226,7 @@ module Gitlab
end
end
# See the comment for #commits for more info on why we memoize this way
def wiki_blobs
return Kaminari.paginate_array([]) if query.blank?
......
require 'spec_helper'
describe 'Global elastic search' do
describe 'Global elastic search', :elastic do
let(:user) { create(:user) }
let(:project) { create(:project, :repository, :wiki_repo, namespace: user.namespace) }
before do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
Gitlab::Elastic::Helper.create_empty_index
project.add_maintainer(user)
sign_in(user)
end
after do
Gitlab::Elastic::Helper.delete_index
stub_ee_application_setting(elasticsearch_search: false, elasticsearch_indexing: false)
end
shared_examples 'a pure Elasticsearch result' do
it 'avoids N+1 database queries' do
create(object, creation_args)
Gitlab::Elastic::Helper.refresh_index
control_count = ActiveRecord::QueryRecorder.new { visit path }.count
create_list(object, 10, creation_args)
Gitlab::Elastic::Helper.refresh_index
control_count = control_count + (10 * query_count_multiplier) + 1
expect { visit path }.not_to exceed_query_limit(control_count)
end
end
describe 'I do not overload the database' do
context 'searching issues' do
let(:object) { :issue }
let(:creation_args) { { project: project, title: 'initial' } }
let(:path) { search_path(search: 'initial', scope: 'issues') }
let(:query_count_multiplier) { 0 }
it_behaves_like 'a pure Elasticsearch result'
end
context 'searching projects' do
let(:object) { :project }
let(:creation_args) { { namespace: user.namespace } }
let(:path) { search_path(search: 'project*', scope: 'projects') }
# Each Project requires 4 extra queries: one for each "count" (forks, open MRs, open Issues) and one for access level
let(:query_count_multiplier) { 4 }
it_behaves_like 'a pure Elasticsearch result'
end
context 'searching merge requests' do
it 'avoids N+1 database queries' do
path = search_path(search: 'initial', scope: 'merge_requests')
create(:merge_request, title: 'initial', source_project: project)
Gitlab::Elastic::Helper.refresh_index
control_count = ActiveRecord::QueryRecorder.new { visit path }.count
merge_requests = create_list(:merge_request, 10, title: 'initial')
merge_requests.each { |mr| mr.target_project.add_maintainer(user) }
Gitlab::Elastic::Helper.refresh_index
# Each MR loaded has a Route load that has been tricky to track down
expect { visit path }.not_to exceed_query_limit(control_count + 11)
end
end
context 'searching milestones' do
let(:object) { :milestone }
let(:creation_args) { { project: project } }
let(:path) { search_path(search: 'milestone*', scope: 'milestones') }
let(:query_count_multiplier) { 0 }
it_behaves_like 'a pure Elasticsearch result'
end
end
describe 'I search through the issues and I see pagination' do
before do
create_list(:issue, 21, project: project, title: 'initial')
......
......@@ -162,31 +162,31 @@ describe Gitlab::Elastic::ProjectSearchResults do
it 'does not list project confidential issues for non project members' do
results = described_class.new(non_member, query, project.id)
issues = results.objects('issues')
issues = results.objects('issues').map(&:id)
expect(issues).to include issue
expect(issues).not_to include security_issue_1
expect(issues).not_to include security_issue_2
expect(issues).to include issue.id
expect(issues).not_to include security_issue_1.id
expect(issues).not_to include security_issue_2.id
expect(results.issues_count).to eq 1
end
it 'lists project confidential issues for author' do
results = described_class.new(author, query, project.id)
issues = results.objects('issues')
issues = results.objects('issues').map(&:id)
expect(issues).to include issue
expect(issues).to include security_issue_1
expect(issues).not_to include security_issue_2
expect(issues).to include issue.id
expect(issues).to include security_issue_1.id
expect(issues).not_to include security_issue_2.id
expect(results.issues_count).to eq 2
end
it 'lists project confidential issues for assignee' do
results = described_class.new(assignee, query, project.id)
issues = results.objects('issues')
issues = results.objects('issues').map(&:id)
expect(issues).to include issue
expect(issues).not_to include security_issue_1
expect(issues).to include security_issue_2
expect(issues).to include issue.id
expect(issues).not_to include security_issue_1.id
expect(issues).to include security_issue_2.id
expect(results.issues_count).to eq 2
end
......@@ -194,11 +194,11 @@ describe Gitlab::Elastic::ProjectSearchResults do
project.add_developer(member)
results = described_class.new(member, query, project.id)
issues = results.objects('issues')
issues = results.objects('issues').map(&:id)
expect(issues).to include issue
expect(issues).to include security_issue_1
expect(issues).to include security_issue_2
expect(issues).to include issue.id
expect(issues).to include security_issue_1.id
expect(issues).to include security_issue_2.id
expect(results.issues_count).to eq 3
end
......@@ -206,21 +206,21 @@ describe Gitlab::Elastic::ProjectSearchResults do
project.add_guest(member)
results = described_class.new(member, query, project.id)
issues = results.objects('issues')
issues = results.objects('issues').map(&:id)
expect(issues).to include issue
expect(issues).not_to include security_issue_1
expect(issues).not_to include security_issue_2
expect(issues).to include issue.id
expect(issues).not_to include security_issue_1.id
expect(issues).not_to include security_issue_2.id
expect(results.issues_count).to eq 1
end
it 'lists all project issues for admin' do
results = described_class.new(admin, query, project.id)
issues = results.objects('issues')
issues = results.objects('issues').map(&:id)
expect(issues).to include issue
expect(issues).to include security_issue_1
expect(issues).to include security_issue_2
expect(issues).to include issue.id
expect(issues).to include security_issue_1.id
expect(issues).to include security_issue_2.id
expect(results.issues_count).to eq 3
end
end
......
......@@ -39,6 +39,7 @@ describe Milestone, :elastic do
expected_hash = milestone.attributes.extract!(
'id',
'iid',
'title',
'description',
'project_id',
......
......@@ -40,18 +40,18 @@ describe Search::GroupService, :elastic do
end
context 'finding projects by name' do
subject { results.objects('projects') }
subject { results.objects('projects').map(&:id) }
context 'in parent group' do
let(:search_group) { nested_group.parent }
it { is_expected.to match_array([project1, project2, project3]) }
it { is_expected.to match_array([project1.id, project2.id, project3.id]) }
end
context 'in subgroup' do
let(:search_group) { nested_group }
it { is_expected.to match_array([project1, project2]) }
it { is_expected.to match_array([project1.id, project2.id]) }
end
end
end
......
......@@ -85,6 +85,10 @@ module Gitlab
UsersFinder.new(current_user, search: query).execute
end
def display_options(_scope)
{}
end
private
def projects
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment