Commit 5d08aa4c authored by Valery Sizov's avatar Valery Sizov

Merge branch 'es_parent-child' into 'master'

ES: Improve search performance by using parent/child relationships

- [x] Indexing data with relations
- [x] Using parent/child in queries
- [x] Test everything
- [x] Check if delete query should be routed as well. Documentation is not clear about this 
- [x] Resolve routing problem for Snippets. They not always has a parent project
- [x]  Specs

Closes https://gitlab.com/gitlab-org/gitlab-ee/issues/375

See merge request !615
parents e50eb369 d1840e31
......@@ -7,6 +7,7 @@ v 8.11.0 (unreleased)
- Removed unused GitLab GEO database index
- Enable monitoring for ES classes
- [Elastic] Improve code search
- [Elastic] Significant improvement of global search performance
v 8.10.5
- Used cached value of project count in `Elastic::RepositoriesSearch` to reduce DB load. !637
......
......@@ -53,6 +53,10 @@ module Elastic
def searchable?
true
end
def es_parent
project_id
end
end
module ClassMethods
......@@ -65,6 +69,16 @@ module Elastic
{ fields: es_fields }
end
def import_with_parent(options = {})
transform = lambda do |r|
{ index: { _id: r.id, _parent: r.es_parent, data: r.__elasticsearch__.as_indexed_json } }
end
options.merge!(transform: transform)
self.import(options)
end
def basic_query_hash(fields, query)
query_hash = if query.present?
{
......@@ -111,17 +125,42 @@ module Elastic
}
end
def project_ids_filter(query_hash, project_ids)
def project_ids_filter(query_hash, project_ids, public_and_internal_projects = true)
if project_ids
condition = project_ids_condition(project_ids, public_and_internal_projects)
query_hash[:query][:bool][:filter] = {
bool: {
must: [ { terms: { project_id: project_ids } } ]
has_parent: {
parent_type: "project",
query: {
bool: {
should: condition
}
}
}
}
end
query_hash
end
def project_ids_condition(project_ids, public_and_internal_projects)
conditions = [{
terms: { id: project_ids }
}]
if public_and_internal_projects
conditions << {
term: { visibility_level: Project::PUBLIC }
}
conditions << {
term: { visibility_level: Project::INTERNAL }
}
end
conditions
end
end
end
end
......@@ -5,9 +5,8 @@ module Elastic
included do
include ApplicationSearch
mappings do
mappings _parent: { type: 'project' } do
indexes :id, type: :integer
indexes :iid, type: :integer, index: :not_analyzed
indexes :title, type: :string,
index_options: 'offsets'
......@@ -16,11 +15,9 @@ module Elastic
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :state, type: :string
indexes :project_id, type: :integer
indexes :author_id, type: :integer
indexes :assignee_id, type: :integer
indexes :confidential, type: :boolean
end
......@@ -43,7 +40,7 @@ module Elastic
query_hash = basic_query_hash(%w(title^2 description), query)
end
query_hash = project_ids_filter(query_hash, options[:project_ids])
query_hash = project_ids_filter(query_hash, options[:project_ids], options[:public_and_internal_projects])
query_hash = confidentiality_filter(query_hash, options[:current_user])
self.__elasticsearch__.search(query_hash)
......
......@@ -5,23 +5,21 @@ module Elastic
included do
include ApplicationSearch
mappings do
indexes :id, type: :integer
indexes :iid, type: :integer
indexes :target_branch, type: :string,
index_options: 'offsets'
indexes :source_branch, type: :string,
index_options: 'offsets'
indexes :title, type: :string,
index_options: 'offsets'
indexes :description, type: :string,
index_options: 'offsets'
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :state, type: :string
indexes :merge_status, type: :string
mappings _parent: { type: 'project' } do
indexes :id, type: :integer
indexes :iid, type: :integer
indexes :target_branch, type: :string,
index_options: 'offsets'
indexes :source_branch, type: :string,
index_options: 'offsets'
indexes :title, type: :string,
index_options: 'offsets'
indexes :description, type: :string,
index_options: 'offsets'
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :state, type: :string
indexes :merge_status, type: :string
indexes :source_project_id, type: :integer
indexes :target_project_id, type: :integer
indexes :author_id, type: :integer
......@@ -49,10 +47,14 @@ module Elastic
].each do |attr|
data[attr.to_s] = self.send(attr)
end
data
end
def es_parent
target_project_id
end
def self.elastic_search(query, options: {})
if query =~ /#(\d+)\z/
query_hash = iid_query_hash(query_hash, $1)
......@@ -60,13 +62,7 @@ module Elastic
query_hash = basic_query_hash(%w(title^2 description), query)
end
if options[:project_ids]
query_hash[:query][:bool][:filter] = [{
terms: {
target_project_id: [options[:project_ids]].flatten
}
}]
end
query_hash = project_ids_filter(query_hash, options[:project_ids], options[:public_and_internal_projects])
self.__elasticsearch__.search(query_hash)
end
......
......@@ -5,7 +5,7 @@ module Elastic
included do
include ApplicationSearch
mappings do
mappings _parent: { type: 'project' } do
indexes :id, type: :integer
indexes :title, type: :string,
index_options: 'offsets'
......@@ -27,7 +27,7 @@ module Elastic
query_hash = basic_query_hash(options[:in], query)
query_hash = project_ids_filter(query_hash, options[:project_ids])
query_hash = project_ids_filter(query_hash, options[:project_ids], options[:public_and_internal_projects])
self.__elasticsearch__.search(query_hash)
end
......
......@@ -5,7 +5,7 @@ module Elastic
included do
include ApplicationSearch
mappings do
mappings _parent: { type: 'project' } do
indexes :id, type: :integer
indexes :note, type: :string,
index_options: 'offsets'
......@@ -56,7 +56,7 @@ module Elastic
query_hash[:track_scores] = true
end
query_hash = project_ids_filter(query_hash, options[:project_ids])
query_hash = project_ids_filter(query_hash, options[:project_ids], options[:public_and_internal_projects])
query_hash = confidentiality_filter(query_hash, options[:current_user])
query_hash[:sort] = [
......
......@@ -7,7 +7,6 @@ module Elastic
mappings do
indexes :id, type: :integer
indexes :name, type: :string,
index_options: 'offsets'
indexes :path, type: :string,
......@@ -18,9 +17,7 @@ module Elastic
index_options: 'offsets'
indexes :description, type: :string,
index_options: 'offsets'
indexes :namespace_id, type: :integer
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :archived, type: :boolean
......@@ -87,8 +84,8 @@ module Elastic
if options[:pids]
filters << {
ids: {
values: options[:pids]
bool: {
should: project_ids_condition(options[:pids], options[:public_and_internal_projects])
}
}
end
......
......@@ -7,7 +7,6 @@ module Elastic
mappings do
indexes :id, type: :integer
indexes :title, type: :string,
index_options: 'offsets'
indexes :file_name, type: :string,
......@@ -17,7 +16,6 @@ module Elastic
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :state, type: :string
indexes :project_id, type: :integer
indexes :author_id, type: :integer
indexes :visibility_level, type: :integer
......
......@@ -10,12 +10,21 @@ module Search
def execute
group = Group.find_by(id: params[:group_id]) if params[:group_id].present?
projects = ProjectsFinder.new.execute(current_user)
projects = projects.in_namespace(group.id) if group
if current_application_settings.elasticsearch_search?
Gitlab::Elastic::SearchResults.new(current_user, projects.pluck(:id), params[:search])
projects = current_user ? current_user.authorized_projects : Project.none
projects = projects.in_namespace(group.id) if group
Gitlab::Elastic::SearchResults.new(
current_user,
params[:search],
projects.pluck(:id),
!group
)
else
projects = ProjectsFinder.new.execute(current_user)
projects = projects.in_namespace(group.id) if group
Gitlab::SearchResults.new(current_user, projects, params[:search])
end
end
......
......@@ -11,8 +11,8 @@ module Search
def execute
if current_application_settings.elasticsearch_search?
Gitlab::Elastic::ProjectSearchResults.new(current_user,
project.id,
params[:search],
project.id,
params[:repository_ref])
else
Gitlab::ProjectSearchResults.new(current_user,
......
......@@ -13,7 +13,12 @@ class ElasticIndexerWorker
when /index|update/
record = klass.find(record_id)
record.__elasticsearch__.client = client
record.__elasticsearch__.__send__ "#{operation}_document"
if [Project, PersonalSnippet, ProjectSnippet, Snippet].include?(klass)
record.__elasticsearch__.__send__ "#{operation}_document"
else
record.__elasticsearch__.__send__ "#{operation}_document", parent: record.es_parent
end
update_issue_notes(record, options["changed_fields"]) if klass == Issue
when /delete/
......@@ -27,7 +32,7 @@ class ElasticIndexerWorker
def update_issue_notes(record, changed_fields)
if changed_fields && (changed_fields & ISSUE_TRACKED_FIELDS).any?
Note.import query: -> { where(noteable: record) }
Note.import_with_parent query: -> { where(noteable: record) }
end
end
......
......@@ -6,7 +6,7 @@ module Gitlab
class ProjectSearchResults < Gitlab::Elastic::SearchResults
attr_reader :project, :repository_ref
def initialize(current_user, project_id, query, repository_ref = nil)
def initialize(current_user, query, project_id, repository_ref = nil)
@current_user = current_user
@project = Project.find(project_id)
......@@ -16,6 +16,7 @@ module Gitlab
nil
end
@query = query
@public_and_internal_projects = false
end
def objects(scope, page = nil)
......@@ -90,7 +91,8 @@ module Gitlab
def notes
opt = {
project_ids: limit_project_ids,
current_user: @current_user
current_user: @current_user,
public_and_internal_projects: @public_and_internal_projects
}
Note.elastic_search(query, options: opt)
......
......@@ -7,10 +7,11 @@ module Gitlab
# It allows us to search only for projects user has access to
attr_reader :limit_project_ids
def initialize(current_user, limit_project_ids, query)
def initialize(current_user, query, limit_project_ids, public_and_internal_projects = true)
@current_user = current_user
@limit_project_ids = limit_project_ids || Project.all
@limit_project_ids = limit_project_ids
@query = Shellwords.shellescape(query) if query.present?
@public_and_internal_projects = public_and_internal_projects
end
def objects(scope, page = nil)
......@@ -56,7 +57,8 @@ module Gitlab
def projects
opt = {
pids: limit_project_ids
pids: limit_project_ids,
public_and_internal_projects: @public_and_internal_projects
}
@projects = Project.elastic_search(query, options: opt)
......@@ -65,7 +67,8 @@ module Gitlab
def issues
opt = {
project_ids: limit_project_ids,
current_user: current_user
current_user: current_user,
public_and_internal_projects: @public_and_internal_projects
}
Issue.elastic_search(query, options: opt)
......@@ -73,7 +76,8 @@ module Gitlab
def milestones
opt = {
project_ids: limit_project_ids
project_ids: limit_project_ids,
public_and_internal_projects: @public_and_internal_projects
}
Milestone.elastic_search(query, options: opt)
......@@ -81,7 +85,8 @@ module Gitlab
def merge_requests
opt = {
project_ids: limit_project_ids
project_ids: limit_project_ids,
public_and_internal_projects: @public_and_internal_projects
}
MergeRequest.elastic_search(query, options: opt)
......
......@@ -63,6 +63,7 @@ namespace :gitlab do
projects.find_each do |project|
unless project.wiki.empty?
puts "Indexing wiki of #{project.name_with_namespace}..."
begin
project.wiki.index_blobs
puts "Done!".color(:green)
......@@ -78,10 +79,13 @@ namespace :gitlab do
[Project, Issue, MergeRequest, Snippet, Note, Milestone].each do |klass|
print "Indexing #{klass} records... "
if klass == Note
Note.searchable.import
else
case klass
when Note
Note.searchable.import_with_parent
when Project, Snippet
klass.import
else
klass.import_with_parent
end
puts "done".color(:green)
......
......@@ -16,7 +16,7 @@ describe Gitlab::Elastic::ProjectSearchResults, lib: true do
end
describe 'initialize with empty ref' do
subject(:results) { described_class.new(user, project.id, query, '') }
subject(:results) { described_class.new(user, query, project.id, '') }
it { expect(results.project).to eq(project) }
it { expect(results.repository_ref).to be_nil }
......@@ -25,7 +25,7 @@ describe Gitlab::Elastic::ProjectSearchResults, lib: true do
describe 'initialize with ref' do
let(:ref) { 'refs/heads/test' }
subject(:results) { described_class.new(user, project.id, query, ref) }
subject(:results) { described_class.new(user, query, project.id, ref) }
it { expect(results.project).to eq(project) }
it { expect(results.repository_ref).to eq(ref) }
......@@ -53,12 +53,12 @@ describe Gitlab::Elastic::ProjectSearchResults, lib: true do
Gitlab::Elastic::Helper.refresh_index
result = Gitlab::Elastic::ProjectSearchResults.new(user, project.id, "term")
result = Gitlab::Elastic::ProjectSearchResults.new(user, "term", project.id)
expect(result.notes_count).to eq(1)
expect(result.wiki_blobs_count).to eq(1)
expect(result.blobs_count).to eq(1)
result1 = Gitlab::Elastic::ProjectSearchResults.new(user, project.id, "initial")
result1 = Gitlab::Elastic::ProjectSearchResults.new(user, "initial", project.id)
expect(result1.commits_count).to eq(1)
end
end
......@@ -79,7 +79,7 @@ describe Gitlab::Elastic::ProjectSearchResults, lib: true do
end
it 'should not list project confidential issues for non project members' do
results = described_class.new(non_member, project.id, query)
results = described_class.new(non_member, query, project.id)
issues = results.objects('issues')
expect(issues).to include issue
......@@ -89,7 +89,7 @@ describe Gitlab::Elastic::ProjectSearchResults, lib: true do
end
it 'should list project confidential issues for author' do
results = described_class.new(author, project.id, query)
results = described_class.new(author, query, project.id)
issues = results.objects('issues')
expect(issues).to include issue
......@@ -99,7 +99,7 @@ describe Gitlab::Elastic::ProjectSearchResults, lib: true do
end
it 'should list project confidential issues for assignee' do
results = described_class.new(assignee, project.id, query)
results = described_class.new(assignee, query, project.id)
issues = results.objects('issues')
expect(issues).to include issue
......@@ -111,7 +111,7 @@ describe Gitlab::Elastic::ProjectSearchResults, lib: true do
it 'should list project confidential issues for project members' do
project.team << [member, :developer]
results = described_class.new(member, project.id, query)
results = described_class.new(member, query, project.id)
issues = results.objects('issues')
expect(issues).to include issue
......@@ -123,7 +123,7 @@ describe Gitlab::Elastic::ProjectSearchResults, lib: true do
it 'should not list project confidential issues for project members with guest role' do
project.team << [member, :guest]
results = described_class.new(member, project.id, query)
results = described_class.new(member, query, project.id)
issues = results.objects('issues')
expect(issues).to include issue
......@@ -133,7 +133,7 @@ describe Gitlab::Elastic::ProjectSearchResults, lib: true do
end
it 'should list all project issues for admin' do
results = described_class.new(admin, project.id, query)
results = described_class.new(admin, query, project.id)
issues = results.objects('issues')
expect(issues).to include issue
......
......@@ -42,7 +42,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
end
it 'should list issues that title or description contain the query' do
results = described_class.new(user, limit_project_ids, 'hello world')
results = described_class.new(user, 'hello world', limit_project_ids)
issues = results.objects('issues')
expect(issues).to include @issue_1
......@@ -52,14 +52,14 @@ describe Gitlab::Elastic::SearchResults, lib: true do
end
it 'should return empty list when issues title or description does not contain the query' do
results = described_class.new(user, limit_project_ids, 'security')
results = described_class.new(user, 'security', limit_project_ids)
expect(results.objects('issues')).to be_empty
expect(results.issues_count).to eq 0
end
it 'should list issue when search by a valid iid' do
results = described_class.new(user, limit_project_ids, '#2')
results = described_class.new(user, '#2', limit_project_ids)
issues = results.objects('issues')
expect(issues).not_to include @issue_1
......@@ -69,7 +69,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
end
it 'should return empty list when search by invalid iid' do
results = described_class.new(user, limit_project_ids, '#222')
results = described_class.new(user, '#222', limit_project_ids)
expect(results.objects('issues')).to be_empty
expect(results.issues_count).to eq 0
......@@ -101,7 +101,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
let(:query) { 'issue' }
it 'should not list confidential issues for guests' do
results = described_class.new(nil, limit_project_ids, query)
results = described_class.new(nil, query, limit_project_ids)
issues = results.objects('issues')
expect(issues).to include @issue
......@@ -114,7 +114,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
end
it 'should not list confidential issues for non project members' do
results = described_class.new(non_member, limit_project_ids, query)
results = described_class.new(non_member, query, limit_project_ids)
issues = results.objects('issues')
expect(issues).to include @issue
......@@ -127,7 +127,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
end
it 'should list confidential issues for author' do
results = described_class.new(author, limit_project_ids, query)
results = described_class.new(author, query, limit_project_ids)
issues = results.objects('issues')
expect(issues).to include @issue
......@@ -140,7 +140,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
end
it 'should list confidential issues for assignee' do
results = described_class.new(assignee, limit_project_ids, query)
results = described_class.new(assignee, query, limit_project_ids)
issues = results.objects('issues')
expect(issues).to include @issue
......@@ -156,7 +156,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
project_1.team << [member, :developer]
project_2.team << [member, :developer]
results = described_class.new(member, limit_project_ids, query)
results = described_class.new(member, query, limit_project_ids)
issues = results.objects('issues')
expect(issues).to include @issue
......@@ -169,7 +169,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
end
it 'should list all issues for admin' do
results = described_class.new(admin, limit_project_ids, query)
results = described_class.new(admin, query, limit_project_ids)
issues = results.objects('issues')
expect(issues).to include @issue
......@@ -186,7 +186,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
let(:query) { '#1' }
it 'should not list confidential issues for guests' do
results = described_class.new(nil, limit_project_ids, query)
results = described_class.new(nil, query, limit_project_ids)
issues = results.objects('issues')
expect(issues).to include @issue
......@@ -199,7 +199,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
end
it 'should not list confidential issues for non project members' do
results = described_class.new(non_member, limit_project_ids, query)
results = described_class.new(non_member, query, limit_project_ids)
issues = results.objects('issues')
expect(issues).to include @issue
......@@ -212,7 +212,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
end
it 'should list confidential issues for author' do
results = described_class.new(author, limit_project_ids, query)
results = described_class.new(author, query, limit_project_ids)
issues = results.objects('issues')
expect(issues).to include @issue
......@@ -225,7 +225,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
end
it 'should list confidential issues for assignee' do
results = described_class.new(assignee, limit_project_ids, query)
results = described_class.new(assignee, query, limit_project_ids)
issues = results.objects('issues')
expect(issues).to include @issue
......@@ -241,7 +241,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
project_2.team << [member, :developer]
project_3.team << [member, :developer]
results = described_class.new(member, limit_project_ids, query)
results = described_class.new(member, query, limit_project_ids)
issues = results.objects('issues')
expect(issues).to include @issue
......@@ -254,7 +254,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
end
it 'should list all issues for admin' do
results = described_class.new(admin, limit_project_ids, query)
results = described_class.new(admin, query, limit_project_ids)
issues = results.objects('issues')
expect(issues).to include @issue
......@@ -298,7 +298,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
end
it 'should list merge requests that title or description contain the query' do
results = described_class.new(user, limit_project_ids, 'hello world')
results = described_class.new(user, 'hello world', limit_project_ids)
merge_requests = results.objects('merge_requests')
expect(merge_requests).to include @merge_request_1
......@@ -308,14 +308,14 @@ describe Gitlab::Elastic::SearchResults, lib: true do
end
it 'should return empty list when merge requests title or description does not contain the query' do
results = described_class.new(user, limit_project_ids, 'security')
results = described_class.new(user, 'security', limit_project_ids)
expect(results.objects('merge_requests')).to be_empty
expect(results.merge_requests_count).to eq 0
end
it 'should list merge request when search by a valid iid' do
results = described_class.new(user, limit_project_ids, '#2')
results = described_class.new(user, '#2', limit_project_ids)
merge_requests = results.objects('merge_requests')
expect(merge_requests).not_to include @merge_request_1
......@@ -325,7 +325,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
end
it 'should return empty list when search by invalid iid' do
results = described_class.new(user, limit_project_ids, '#222')
results = described_class.new(user, '#222', limit_project_ids)
expect(results.objects('merge_requests')).to be_empty
expect(results.merge_requests_count).to eq 0
......@@ -358,7 +358,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
Gitlab::Elastic::Helper.refresh_index
result = Gitlab::Elastic::SearchResults.new(user, [project.id], 'term')
result = Gitlab::Elastic::SearchResults.new(user, 'term', [project.id])
expect(result.issues_count).to eq(2)
expect(result.merge_requests_count).to eq(2)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment