Commit 453c6832 authored by Valery Sizov's avatar Valery Sizov

ES: More efficient snippet search

parent ad0f8f12
......@@ -3,6 +3,7 @@ Please view this file on the master branch, on stable branches it's out of date.
v 8.8.0 (unreleased)
- [Elastic] Database indexer prints its status
- [Elastic][Fix] Database indexer skips projects with invalid HEAD reference
- [Elastic] More efficient snippets search
- Set KRB5 as default clone protocol when Kerberos is enabled and user is logged in (Borja Aparicio)
v 8.7.2
......
......@@ -7,12 +7,11 @@ module Search
end
def execute
snippets = Snippet.accessible_to(current_user)
if Gitlab.config.elasticsearch.enabled
Gitlab::Elastic::SnippetSearchResults.new(snippets.pluck(:id),
Gitlab::Elastic::SnippetSearchResults.new(current_user,
params[:search])
else
snippets = Snippet.accessible_to(current_user)
Gitlab::SnippetSearchResults.new(snippets, params[:search])
end
end
......
......@@ -68,22 +68,22 @@ module Elastic
query_hash = if query.present?
{
query: {
filtered: {
query: {
bool: {
must: [{
multi_match: {
fields: fields,
query: query,
operator: :and
}
},
},
}]
}
}
}
else
{
query: {
filtered: {
query: { match_all: {} }
bool: {
must: { match_all: {} }
}
},
track_scores: true
......@@ -103,8 +103,8 @@ module Elastic
def iid_query_hash(query_hash, iid)
{
query: {
filtered: {
query: { match: { iid: iid } }
bool: {
must: [{ term: { iid: iid } }]
}
}
}
......@@ -112,7 +112,7 @@ module Elastic
def project_ids_filter(query_hash, project_ids)
if project_ids
query_hash[:query][:filtered][:filter] = {
query_hash[:query][:bool][:filter] = {
bool: {
must: [ { terms: { project_id: project_ids } } ]
}
......
......@@ -21,9 +21,6 @@ module Elastic
indexes :author_id, type: :integer
indexes :assignee_id, type: :integer
indexes :project, type: :nested
indexes :author, type: :nested
indexes :confidential, type: :boolean
indexes :updated_at_sort, type: :date, index: :not_analyzed
......@@ -58,9 +55,9 @@ module Elastic
end
def self.confidentiality_filter(query_hash, current_user)
return query_hash if current_user.present? && current_user.admin?
return query_hash if current_user && current_user.admin?
filter = if current_user.present?
filter = if current_user
{
bool: {
should: [
......@@ -86,7 +83,7 @@ module Elastic
{ term: { confidential: false } }
end
query_hash[:query][:filtered][:filter][:bool][:must] << filter
query_hash[:query][:bool][:must] << filter
query_hash
end
end
......
......@@ -26,10 +26,6 @@ module Elastic
indexes :target_project_id, type: :integer
indexes :author_id, type: :integer
indexes :source_project, type: :nested
indexes :target_project, type: :nested
indexes :author, type: :nested
indexes :updated_at_sort, type: :string, index: 'not_analyzed'
end
......@@ -56,9 +52,6 @@ module Elastic
data[attr.to_s] = self.send(attr)
end
data['source_project'] = { 'id' => source_project_id }
data['target_project'] = { 'id' => target_project_id }
data['author'] = { 'id' => author_id }
data['updated_at_sort'] = updated_at
data
end
......@@ -71,15 +64,11 @@ module Elastic
end
if options[:project_ids]
query_hash[:query][:filtered][:filter] = {
and: [
{
terms: {
target_project_id: [options[:project_ids]].flatten
}
}
]
}
query_hash[:query][:bool][:filter] = [{
terms: {
target_project_id: [options[:project_ids]].flatten
}
}]
end
self.__elasticsearch__.search(query_hash)
......
......@@ -33,14 +33,14 @@ module Elastic
query_hash = {
query: {
filtered: {
query: { match: { note: query } },
bool: {
must: { match: { note: query } },
},
}
}
if query.blank?
query_hash[:query][:filtered][:query] = { match_all: {} }
query_hash[:query][:bool][:must] = { match_all: {} }
query_hash[:track_scores] = true
end
......
......@@ -83,17 +83,6 @@ module Elastic
}
end
if !options[:owner_id].blank?
filters << {
nested: {
path: :owner,
filter: {
term: { "owner.id" => options[:owner_id] }
}
}
}
end
if options[:pids]
filters << {
ids: {
......@@ -102,7 +91,7 @@ module Elastic
}
end
query_hash[:query][:filtered][:filter] = { and: filters }
query_hash[:query][:bool][:filter] = { and: filters }
query_hash[:sort] = [:_score]
......
......@@ -6,25 +6,23 @@ module Elastic
include ApplicationSearch
mappings do
indexes :id, type: :integer
indexes :title, type: :string,
index_options: 'offsets'
indexes :file_name, type: :string,
index_options: 'offsets'
indexes :content, type: :string,
index_options: 'offsets'
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :state, type: :string
indexes :project_id, type: :integer
indexes :author_id, type: :integer
indexes :project, type: :nested
indexes :author, type: :nested
indexes :updated_at_sort, type: :date, index: :not_analyzed
indexes :id, type: :integer
indexes :title, type: :string,
index_options: 'offsets'
indexes :file_name, type: :string,
index_options: 'offsets'
indexes :content, type: :string,
index_options: 'offsets'
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :state, type: :string
indexes :project_id, type: :integer
indexes :author_id, type: :integer
indexes :visibility_level, type: :integer
indexes :updated_at_sort, type: :date, index: :not_analyzed
end
def as_indexed_json(options = {})
......@@ -39,18 +37,15 @@ module Elastic
:state,
:project_id,
:author_id,
],
include: {
project: { only: :id },
author: { only: :id }
}
:visibility_level
]
})
end
def self.elastic_search(query, options: {})
query_hash = basic_query_hash(%w(title file_name), query)
query_hash = limit_ids(query_hash, options[:ids])
query_hash = filter(query_hash, options[:author_id])
self.__elasticsearch__.search(query_hash)
end
......@@ -58,13 +53,13 @@ module Elastic
def self.elastic_search_code(query, options: {})
query_hash = {
query: {
filtered: {
query: { match: { content: query } },
},
bool: {
must: [{ match: { content: query } }]
}
}
}
query_hash = limit_ids(query_hash, options[:ids])
query_hash = filter(query_hash, options[:author_id])
query_hash[:sort] = [
{ updated_at_sort: { order: :desc, mode: :min } },
......@@ -76,10 +71,15 @@ module Elastic
self.__elasticsearch__.search(query_hash)
end
def self.limit_ids(query_hash, ids)
if ids
query_hash[:query][:filtered][:filter] = {
and: [ { terms: { id: ids } } ]
def self.filter(query_hash, author_id)
if author_id
query_hash[:query][:bool][:filter] = {
bool: {
should: [
{ terms: { visibility_level: [Snippet::PUBLIC, Snippet::INTERNAL] } },
{ term: { author_id: author_id } }
]
}
}
end
......
module Gitlab
module Elastic
class SnippetSearchResults < ::Gitlab::SnippetSearchResults
def initialize(user, query)
@user = user
@query = query
end
def objects(scope, page = nil)
case scope
when 'snippet_titles'
......@@ -16,7 +21,7 @@ module Gitlab
def snippet_titles
opt = {
ids: limit_snippets
author_id: @user.id
}
Snippet.elastic_search(query, options: opt)
......@@ -24,7 +29,7 @@ module Gitlab
def snippet_blobs
opt = {
ids: limit_snippets
author_id: @user.id
}
Snippet.elastic_search_code(query, options: opt)
......
......@@ -47,9 +47,6 @@ describe "MergeRequest", elastic: true do
'author_id'
)
expected_hash['source_project'] = { 'id' => merge_request.source_project_id }
expected_hash['target_project'] = { 'id' => merge_request.target_project_id }
expected_hash['author'] = { 'id' => merge_request.author.id }
expected_hash['updated_at_sort'] = merge_request.updated_at
expect(merge_request.as_indexed_json).to eq(expected_hash)
......
......@@ -12,21 +12,21 @@ describe "Projects", elastic: true do
end
it "searches projects" do
@project = create :empty_project, name: 'test'
@project1 = create :empty_project, path: 'test1'
@project2 = create :empty_project
@project3 = create :empty_project, path: 'someone_elses_project'
@project_ids = [@project.id, @project1.id, @project2.id]
project = create :empty_project, name: 'test'
project1 = create :empty_project, path: 'test1'
project2 = create :empty_project
create :empty_project, path: 'someone_elses_project'
project_ids = [project.id, project1.id, project2.id]
Project.__elasticsearch__.refresh_index!
expect(Project.elastic_search('test', options: { pids: @project_ids }).total_count).to eq(1)
expect(Project.elastic_search('test1', options: { pids: @project_ids }).total_count).to eq(1)
expect(Project.elastic_search('someone_elses_project', options: { pids: @project_ids }).total_count).to eq(0)
expect(Project.elastic_search('test', options: { pids: project_ids }).total_count).to eq(1)
expect(Project.elastic_search('test1', options: { pids: project_ids }).total_count).to eq(1)
expect(Project.elastic_search('someone_elses_project', options: { pids: project_ids }).total_count).to eq(0)
end
it "returns json with all needed elements" do
project = create :project
project = create :empty_project
expected_hash = project.attributes.extract!(
'id',
......
......@@ -12,34 +12,35 @@ describe "Snippet", elastic: true do
end
it "searches snippets by code" do
@snippet = create :personal_snippet, content: 'genius code'
@snippet1 = create :personal_snippet
user = create :user
# the snippet I have no access to
@snippet2 = create :personal_snippet, content: 'genius code'
snippet = create :personal_snippet, :private, content: 'genius code', author: user
create :personal_snippet, :private, content: 'genius code'
create :personal_snippet, :private
@snippet_ids = [@snippet.id, @snippet1.id]
snippet3 = create :personal_snippet, :public, content: 'genius code'
Snippet.__elasticsearch__.refresh_index!
options = { ids: @snippet_ids }
options = { author_id: user.id }
expect(Snippet.elastic_search_code('genius code', options: options).total_count).to eq(1)
result = Snippet.elastic_search_code('genius code', options: options)
expect(result.total_count).to eq(2)
expect(result.records.map(&:id)).to include(snippet.id, snippet3.id)
end
it "searches snippets by title and file_name" do
@snippet = create :snippet, title: 'home'
@snippet1 = create :snippet, file_name: 'index.php'
@snippet2 = create :snippet
user = create :user
# the snippet I have no access to
@snippet3 = create :snippet, title: 'home'
create :snippet, :public, title: 'home'
create :snippet, :private, title: 'home 1'
create :snippet, :public, file_name: 'index.php'
create :snippet
@snippet_ids = [@snippet.id, @snippet1.id, @snippet2.id]
Snippet.__elasticsearch__.refresh_index!
options = { ids: @snippet_ids }
options = { author_id: user.id }
expect(Snippet.elastic_search('home', options: options).total_count).to eq(1)
expect(Snippet.elastic_search('index.php', options: options).total_count).to eq(1)
......@@ -58,11 +59,9 @@ describe "Snippet", elastic: true do
'state',
'project_id',
'author_id',
'visibility_level'
)
expected_hash['project'] = { 'id' => snippet.project.id }
expected_hash['author'] = { 'id' => snippet.author.id }
expect(snippet.as_indexed_json).to eq(expected_hash)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment