Commit 453c6832 authored by Valery Sizov's avatar Valery Sizov

ES: More efficient snippet search

parent ad0f8f12
...@@ -3,6 +3,7 @@ Please view this file on the master branch, on stable branches it's out of date. ...@@ -3,6 +3,7 @@ Please view this file on the master branch, on stable branches it's out of date.
v 8.8.0 (unreleased) v 8.8.0 (unreleased)
- [Elastic] Database indexer prints its status - [Elastic] Database indexer prints its status
- [Elastic][Fix] Database indexer skips projects with invalid HEAD reference - [Elastic][Fix] Database indexer skips projects with invalid HEAD reference
- [Elastic] More efficient snippets search
- Set KRB5 as default clone protocol when Kerberos is enabled and user is logged in (Borja Aparicio) - Set KRB5 as default clone protocol when Kerberos is enabled and user is logged in (Borja Aparicio)
v 8.7.2 v 8.7.2
......
...@@ -7,12 +7,11 @@ module Search ...@@ -7,12 +7,11 @@ module Search
end end
def execute def execute
snippets = Snippet.accessible_to(current_user)
if Gitlab.config.elasticsearch.enabled if Gitlab.config.elasticsearch.enabled
Gitlab::Elastic::SnippetSearchResults.new(snippets.pluck(:id), Gitlab::Elastic::SnippetSearchResults.new(current_user,
params[:search]) params[:search])
else else
snippets = Snippet.accessible_to(current_user)
Gitlab::SnippetSearchResults.new(snippets, params[:search]) Gitlab::SnippetSearchResults.new(snippets, params[:search])
end end
end end
......
...@@ -68,22 +68,22 @@ module Elastic ...@@ -68,22 +68,22 @@ module Elastic
query_hash = if query.present? query_hash = if query.present?
{ {
query: { query: {
filtered: { bool: {
query: { must: [{
multi_match: { multi_match: {
fields: fields, fields: fields,
query: query, query: query,
operator: :and operator: :and
} }
}, }]
}, }
} }
} }
else else
{ {
query: { query: {
filtered: { bool: {
query: { match_all: {} } must: { match_all: {} }
} }
}, },
track_scores: true track_scores: true
...@@ -103,8 +103,8 @@ module Elastic ...@@ -103,8 +103,8 @@ module Elastic
def iid_query_hash(query_hash, iid) def iid_query_hash(query_hash, iid)
{ {
query: { query: {
filtered: { bool: {
query: { match: { iid: iid } } must: [{ term: { iid: iid } }]
} }
} }
} }
...@@ -112,7 +112,7 @@ module Elastic ...@@ -112,7 +112,7 @@ module Elastic
def project_ids_filter(query_hash, project_ids) def project_ids_filter(query_hash, project_ids)
if project_ids if project_ids
query_hash[:query][:filtered][:filter] = { query_hash[:query][:bool][:filter] = {
bool: { bool: {
must: [ { terms: { project_id: project_ids } } ] must: [ { terms: { project_id: project_ids } } ]
} }
......
...@@ -21,9 +21,6 @@ module Elastic ...@@ -21,9 +21,6 @@ module Elastic
indexes :author_id, type: :integer indexes :author_id, type: :integer
indexes :assignee_id, type: :integer indexes :assignee_id, type: :integer
indexes :project, type: :nested
indexes :author, type: :nested
indexes :confidential, type: :boolean indexes :confidential, type: :boolean
indexes :updated_at_sort, type: :date, index: :not_analyzed indexes :updated_at_sort, type: :date, index: :not_analyzed
...@@ -58,9 +55,9 @@ module Elastic ...@@ -58,9 +55,9 @@ module Elastic
end end
def self.confidentiality_filter(query_hash, current_user) def self.confidentiality_filter(query_hash, current_user)
return query_hash if current_user.present? && current_user.admin? return query_hash if current_user && current_user.admin?
filter = if current_user.present? filter = if current_user
{ {
bool: { bool: {
should: [ should: [
...@@ -86,7 +83,7 @@ module Elastic ...@@ -86,7 +83,7 @@ module Elastic
{ term: { confidential: false } } { term: { confidential: false } }
end end
query_hash[:query][:filtered][:filter][:bool][:must] << filter query_hash[:query][:bool][:must] << filter
query_hash query_hash
end end
end end
......
...@@ -26,10 +26,6 @@ module Elastic ...@@ -26,10 +26,6 @@ module Elastic
indexes :target_project_id, type: :integer indexes :target_project_id, type: :integer
indexes :author_id, type: :integer indexes :author_id, type: :integer
indexes :source_project, type: :nested
indexes :target_project, type: :nested
indexes :author, type: :nested
indexes :updated_at_sort, type: :string, index: 'not_analyzed' indexes :updated_at_sort, type: :string, index: 'not_analyzed'
end end
...@@ -56,9 +52,6 @@ module Elastic ...@@ -56,9 +52,6 @@ module Elastic
data[attr.to_s] = self.send(attr) data[attr.to_s] = self.send(attr)
end end
data['source_project'] = { 'id' => source_project_id }
data['target_project'] = { 'id' => target_project_id }
data['author'] = { 'id' => author_id }
data['updated_at_sort'] = updated_at data['updated_at_sort'] = updated_at
data data
end end
...@@ -71,15 +64,11 @@ module Elastic ...@@ -71,15 +64,11 @@ module Elastic
end end
if options[:project_ids] if options[:project_ids]
query_hash[:query][:filtered][:filter] = { query_hash[:query][:bool][:filter] = [{
and: [
{
terms: { terms: {
target_project_id: [options[:project_ids]].flatten target_project_id: [options[:project_ids]].flatten
} }
} }]
]
}
end end
self.__elasticsearch__.search(query_hash) self.__elasticsearch__.search(query_hash)
......
...@@ -33,14 +33,14 @@ module Elastic ...@@ -33,14 +33,14 @@ module Elastic
query_hash = { query_hash = {
query: { query: {
filtered: { bool: {
query: { match: { note: query } }, must: { match: { note: query } },
}, },
} }
} }
if query.blank? if query.blank?
query_hash[:query][:filtered][:query] = { match_all: {} } query_hash[:query][:bool][:must] = { match_all: {} }
query_hash[:track_scores] = true query_hash[:track_scores] = true
end end
......
...@@ -83,17 +83,6 @@ module Elastic ...@@ -83,17 +83,6 @@ module Elastic
} }
end end
if !options[:owner_id].blank?
filters << {
nested: {
path: :owner,
filter: {
term: { "owner.id" => options[:owner_id] }
}
}
}
end
if options[:pids] if options[:pids]
filters << { filters << {
ids: { ids: {
...@@ -102,7 +91,7 @@ module Elastic ...@@ -102,7 +91,7 @@ module Elastic
} }
end end
query_hash[:query][:filtered][:filter] = { and: filters } query_hash[:query][:bool][:filter] = { and: filters }
query_hash[:sort] = [:_score] query_hash[:sort] = [:_score]
......
...@@ -20,9 +20,7 @@ module Elastic ...@@ -20,9 +20,7 @@ module Elastic
indexes :project_id, type: :integer indexes :project_id, type: :integer
indexes :author_id, type: :integer indexes :author_id, type: :integer
indexes :visibility_level, type: :integer
indexes :project, type: :nested
indexes :author, type: :nested
indexes :updated_at_sort, type: :date, index: :not_analyzed indexes :updated_at_sort, type: :date, index: :not_analyzed
end end
...@@ -39,18 +37,15 @@ module Elastic ...@@ -39,18 +37,15 @@ module Elastic
:state, :state,
:project_id, :project_id,
:author_id, :author_id,
], :visibility_level
include: { ]
project: { only: :id },
author: { only: :id }
}
}) })
end end
def self.elastic_search(query, options: {}) def self.elastic_search(query, options: {})
query_hash = basic_query_hash(%w(title file_name), query) query_hash = basic_query_hash(%w(title file_name), query)
query_hash = limit_ids(query_hash, options[:ids]) query_hash = filter(query_hash, options[:author_id])
self.__elasticsearch__.search(query_hash) self.__elasticsearch__.search(query_hash)
end end
...@@ -58,13 +53,13 @@ module Elastic ...@@ -58,13 +53,13 @@ module Elastic
def self.elastic_search_code(query, options: {}) def self.elastic_search_code(query, options: {})
query_hash = { query_hash = {
query: { query: {
filtered: { bool: {
query: { match: { content: query } }, must: [{ match: { content: query } }]
}, }
} }
} }
query_hash = limit_ids(query_hash, options[:ids]) query_hash = filter(query_hash, options[:author_id])
query_hash[:sort] = [ query_hash[:sort] = [
{ updated_at_sort: { order: :desc, mode: :min } }, { updated_at_sort: { order: :desc, mode: :min } },
...@@ -76,10 +71,15 @@ module Elastic ...@@ -76,10 +71,15 @@ module Elastic
self.__elasticsearch__.search(query_hash) self.__elasticsearch__.search(query_hash)
end end
def self.limit_ids(query_hash, ids) def self.filter(query_hash, author_id)
if ids if author_id
query_hash[:query][:filtered][:filter] = { query_hash[:query][:bool][:filter] = {
and: [ { terms: { id: ids } } ] bool: {
should: [
{ terms: { visibility_level: [Snippet::PUBLIC, Snippet::INTERNAL] } },
{ term: { author_id: author_id } }
]
}
} }
end end
......
module Gitlab module Gitlab
module Elastic module Elastic
class SnippetSearchResults < ::Gitlab::SnippetSearchResults class SnippetSearchResults < ::Gitlab::SnippetSearchResults
def initialize(user, query)
@user = user
@query = query
end
def objects(scope, page = nil) def objects(scope, page = nil)
case scope case scope
when 'snippet_titles' when 'snippet_titles'
...@@ -16,7 +21,7 @@ module Gitlab ...@@ -16,7 +21,7 @@ module Gitlab
def snippet_titles def snippet_titles
opt = { opt = {
ids: limit_snippets author_id: @user.id
} }
Snippet.elastic_search(query, options: opt) Snippet.elastic_search(query, options: opt)
...@@ -24,7 +29,7 @@ module Gitlab ...@@ -24,7 +29,7 @@ module Gitlab
def snippet_blobs def snippet_blobs
opt = { opt = {
ids: limit_snippets author_id: @user.id
} }
Snippet.elastic_search_code(query, options: opt) Snippet.elastic_search_code(query, options: opt)
......
...@@ -47,9 +47,6 @@ describe "MergeRequest", elastic: true do ...@@ -47,9 +47,6 @@ describe "MergeRequest", elastic: true do
'author_id' 'author_id'
) )
expected_hash['source_project'] = { 'id' => merge_request.source_project_id }
expected_hash['target_project'] = { 'id' => merge_request.target_project_id }
expected_hash['author'] = { 'id' => merge_request.author.id }
expected_hash['updated_at_sort'] = merge_request.updated_at expected_hash['updated_at_sort'] = merge_request.updated_at
expect(merge_request.as_indexed_json).to eq(expected_hash) expect(merge_request.as_indexed_json).to eq(expected_hash)
......
...@@ -12,21 +12,21 @@ describe "Projects", elastic: true do ...@@ -12,21 +12,21 @@ describe "Projects", elastic: true do
end end
it "searches projects" do it "searches projects" do
@project = create :empty_project, name: 'test' project = create :empty_project, name: 'test'
@project1 = create :empty_project, path: 'test1' project1 = create :empty_project, path: 'test1'
@project2 = create :empty_project project2 = create :empty_project
@project3 = create :empty_project, path: 'someone_elses_project' create :empty_project, path: 'someone_elses_project'
@project_ids = [@project.id, @project1.id, @project2.id] project_ids = [project.id, project1.id, project2.id]
Project.__elasticsearch__.refresh_index! Project.__elasticsearch__.refresh_index!
expect(Project.elastic_search('test', options: { pids: @project_ids }).total_count).to eq(1) expect(Project.elastic_search('test', options: { pids: project_ids }).total_count).to eq(1)
expect(Project.elastic_search('test1', options: { pids: @project_ids }).total_count).to eq(1) expect(Project.elastic_search('test1', options: { pids: project_ids }).total_count).to eq(1)
expect(Project.elastic_search('someone_elses_project', options: { pids: @project_ids }).total_count).to eq(0) expect(Project.elastic_search('someone_elses_project', options: { pids: project_ids }).total_count).to eq(0)
end end
it "returns json with all needed elements" do it "returns json with all needed elements" do
project = create :project project = create :empty_project
expected_hash = project.attributes.extract!( expected_hash = project.attributes.extract!(
'id', 'id',
......
...@@ -12,34 +12,35 @@ describe "Snippet", elastic: true do ...@@ -12,34 +12,35 @@ describe "Snippet", elastic: true do
end end
it "searches snippets by code" do it "searches snippets by code" do
@snippet = create :personal_snippet, content: 'genius code' user = create :user
@snippet1 = create :personal_snippet
# the snippet I have no access to snippet = create :personal_snippet, :private, content: 'genius code', author: user
@snippet2 = create :personal_snippet, content: 'genius code' create :personal_snippet, :private, content: 'genius code'
create :personal_snippet, :private
@snippet_ids = [@snippet.id, @snippet1.id] snippet3 = create :personal_snippet, :public, content: 'genius code'
Snippet.__elasticsearch__.refresh_index! Snippet.__elasticsearch__.refresh_index!
options = { ids: @snippet_ids } options = { author_id: user.id }
expect(Snippet.elastic_search_code('genius code', options: options).total_count).to eq(1) result = Snippet.elastic_search_code('genius code', options: options)
expect(result.total_count).to eq(2)
expect(result.records.map(&:id)).to include(snippet.id, snippet3.id)
end end
it "searches snippets by title and file_name" do it "searches snippets by title and file_name" do
@snippet = create :snippet, title: 'home' user = create :user
@snippet1 = create :snippet, file_name: 'index.php'
@snippet2 = create :snippet
# the snippet I have no access to
@snippet3 = create :snippet, title: 'home'
@snippet_ids = [@snippet.id, @snippet1.id, @snippet2.id] create :snippet, :public, title: 'home'
create :snippet, :private, title: 'home 1'
create :snippet, :public, file_name: 'index.php'
create :snippet
Snippet.__elasticsearch__.refresh_index! Snippet.__elasticsearch__.refresh_index!
options = { ids: @snippet_ids } options = { author_id: user.id }
expect(Snippet.elastic_search('home', options: options).total_count).to eq(1) expect(Snippet.elastic_search('home', options: options).total_count).to eq(1)
expect(Snippet.elastic_search('index.php', options: options).total_count).to eq(1) expect(Snippet.elastic_search('index.php', options: options).total_count).to eq(1)
...@@ -58,11 +59,9 @@ describe "Snippet", elastic: true do ...@@ -58,11 +59,9 @@ describe "Snippet", elastic: true do
'state', 'state',
'project_id', 'project_id',
'author_id', 'author_id',
'visibility_level'
) )
expected_hash['project'] = { 'id' => snippet.project.id }
expected_hash['author'] = { 'id' => snippet.author.id }
expect(snippet.as_indexed_json).to eq(expected_hash) expect(snippet.as_indexed_json).to eq(expected_hash)
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment