Commit 8541d5a9 authored by Dylan Griffith's avatar Dylan Griffith

Prevent 2nd Elasticsearch query for project searches

We memoize the result object so that when it is later evaluated the same
Elasticsearch result can be used for the `total_count`.

We also extract a shared test examples for the 3 different kinds of
Elastic search results to reduce the chance of regression here.

This memoize approach to solving this problem was originally introduced
in https://gitlab.com/gitlab-org/gitlab/-/merge_requests/13120

Note that some of the methods being memoized have arguments and as such
the memoization is not logically consistent from the code perspective.
We must memoize the results without the page argument because
the current page we're on - if we were to memoize with dynamic
parameters we would end up hitting ES twice for any page that's not page
1, and that's something we want to avoid.  It is safe to memoize the
page we get here because the method is always called before
the equivalent `#_count` method.

Read more at https://gitlab.com/gitlab-org/gitlab/-/issues/219209
parent 3ecbbe3a
---
title: Prevent 2nd Elasticsearch query for project searches
merge_request: 36672
author:
type: performance
...@@ -30,28 +30,33 @@ module Gitlab ...@@ -30,28 +30,33 @@ module Gitlab
return Kaminari.paginate_array([]) if project.empty_repo? || query.blank? return Kaminari.paginate_array([]) if project.empty_repo? || query.blank?
return Kaminari.paginate_array([]) unless root_ref? return Kaminari.paginate_array([]) unless root_ref?
strong_memoize(:blobs) do
project.repository.__elasticsearch__.elastic_search_as_found_blob( project.repository.__elasticsearch__.elastic_search_as_found_blob(
query, query,
page: (page || 1).to_i, page: (page || 1).to_i,
per: per_page per: per_page
) )
end end
end
def wiki_blobs(page: 1, per_page: DEFAULT_PER_PAGE) def wiki_blobs(page: 1, per_page: DEFAULT_PER_PAGE)
return Kaminari.paginate_array([]) unless Ability.allowed?(@current_user, :read_wiki, project) return Kaminari.paginate_array([]) unless Ability.allowed?(@current_user, :read_wiki, project)
if project.wiki_enabled? && !project.wiki.empty? && query.present? if project.wiki_enabled? && !project.wiki.empty? && query.present?
strong_memoize(:wiki_blobs) do
project.wiki.__elasticsearch__.elastic_search_as_wiki_page( project.wiki.__elasticsearch__.elastic_search_as_wiki_page(
query, query,
page: (page || 1).to_i, page: (page || 1).to_i,
per: per_page per: per_page
) )
end
else else
Kaminari.paginate_array([]) Kaminari.paginate_array([])
end end
end end
def notes def notes
strong_memoize(:notes) do
opt = { opt = {
project_ids: limit_project_ids, project_ids: limit_project_ids,
current_user: @current_user, current_user: @current_user,
...@@ -60,6 +65,7 @@ module Gitlab ...@@ -60,6 +65,7 @@ module Gitlab
Note.elastic_search(query, options: opt) Note.elastic_search(query, options: opt)
end end
end
def commits(page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil) def commits(page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil)
return Kaminari.paginate_array([]) unless Ability.allowed?(@current_user, :download_code, project) return Kaminari.paginate_array([]) unless Ability.allowed?(@current_user, :download_code, project)
...@@ -69,12 +75,14 @@ module Gitlab ...@@ -69,12 +75,14 @@ module Gitlab
else else
# We use elastic for default branch only # We use elastic for default branch only
if root_ref? if root_ref?
strong_memoize(:commits) do
project.repository.find_commits_by_message_with_elastic( project.repository.find_commits_by_message_with_elastic(
query, query,
page: (page || 1).to_i, page: (page || 1).to_i,
per_page: per_page, per_page: per_page,
preload_method: preload_method preload_method: preload_method
) )
end
else else
offset = per_page * ((page || 1) - 1) offset = per_page * ((page || 1) - 1)
......
...@@ -38,4 +38,10 @@ RSpec.describe Gitlab::Elastic::GroupSearchResults do ...@@ -38,4 +38,10 @@ RSpec.describe Gitlab::Elastic::GroupSearchResults do
end end
end end
end end
context 'query performance' do
let(:query) { '*' }
include_examples 'does not hit Elasticsearch twice for objects and counts', %w|projects notes blobs wiki_blobs commits issues merge_requests milestones|
end
end end
...@@ -240,4 +240,18 @@ RSpec.describe Gitlab::Elastic::ProjectSearchResults, :elastic do ...@@ -240,4 +240,18 @@ RSpec.describe Gitlab::Elastic::ProjectSearchResults, :elastic do
end end
end end
end end
context 'query performance' do
let(:project) { create(:project, :public, :repository, :wiki_repo) }
before do
# wiki_blobs method checks to see if there is a wiki page before doing
# the search
create(:wiki_page, wiki: project.wiki)
end
let(:results) { described_class.new(user, '*', project) }
include_examples 'does not hit Elasticsearch twice for objects and counts', %w|notes blobs wiki_blobs commits issues merge_requests milestones|
end
end end
...@@ -12,16 +12,6 @@ RSpec.describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need ...@@ -12,16 +12,6 @@ RSpec.describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need
let(:project_2) { create(:project, :public, :repository, :wiki_repo) } let(:project_2) { create(:project, :public, :repository, :wiki_repo) }
let(:limit_project_ids) { [project_1.id] } let(:limit_project_ids) { [project_1.id] }
describe 'counts' do
it 'does not hit Elasticsearch twice for result and counts' do
expect(Repository).to receive(:find_commits_by_message_with_elastic).with('hello world', anything).once.and_call_original
results = described_class.new(user, 'hello world', limit_project_ids)
expect(results.objects('commits', page: 2)).to be_empty
expect(results.commits_count).to eq 0
end
end
describe '#formatted_count' do describe '#formatted_count' do
using RSpec::Parameterized::TableSyntax using RSpec::Parameterized::TableSyntax
...@@ -1134,4 +1124,10 @@ RSpec.describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need ...@@ -1134,4 +1124,10 @@ RSpec.describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need
end end
end end
end end
context 'query performance' do
let(:results) { described_class.new(user, 'hello world', limit_project_ids) }
include_examples 'does not hit Elasticsearch twice for objects and counts', %w|projects notes blobs wiki_blobs commits issues merge_requests milestones|
end
end end
# frozen_string_literal: true
RSpec.shared_examples 'does not hit Elasticsearch twice for objects and counts' do |scopes|
scopes.each do |scope|
context "for scope #{scope}", :elastic, :request_store do
it 'makes 1 Elasticsearch query' do
::Gitlab::SafeRequestStore.clear!
results.objects(scope)
results.public_send("#{scope}_count")
expect(::Gitlab::Instrumentation::ElasticsearchTransport.get_request_count).to eq(1)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment