Commit fa0fb8d8 authored by Kerri Miller's avatar Kerri Miller

Merge branch '273262-dont-use-joins-in-issue-global-search-new' into 'master'

Avoid the use of Elasticsearch joins when searching for issues

See merge request gitlab-org/gitlab!48583
parents 27d29a56 c119edb2
---
title: Avoid the use of Elasticsearch joins when searching for issues
merge_request: 48583
author:
type: performance
......@@ -118,19 +118,29 @@ module Elastic
options[:current_user],
options[:project_ids],
options[:public_and_internal_projects],
options[:features]
options[:features],
options[:no_join_project]
)
query_hash[:query][:bool][:filter] ||= []
query_hash[:query][:bool][:filter] << {
has_parent: {
_name: context.name,
parent_type: "project",
query: {
bool: project_query
}
}
}
query_hash[:query][:bool][:filter] << if options[:no_join_project]
# Some models have denormalized project permissions into the
# document so that we do not need to use joins
{
bool: project_query
}
else
{
has_parent: {
_name: context.name,
parent_type: "project",
query: {
bool: project_query
}
}
}
end
end
query_hash
......@@ -163,14 +173,14 @@ module Elastic
# If a project feature(s) is specified, it indicates interest in child
# documents gated by that project feature - e.g., "issues". The feature's
# visibility level must be taken into account.
def project_ids_query(user, project_ids, public_and_internal_projects, features = nil)
def project_ids_query(user, project_ids, public_and_internal_projects, features = nil, no_join_project = false)
scoped_project_ids = scoped_project_ids(user, project_ids)
# At least one condition must be present, so pick no projects for
# anonymous users.
# Pick private, internal and public projects the user is a member of.
# Pick all private projects for admins & auditors.
conditions = pick_projects_by_membership(scoped_project_ids, user, features)
conditions = pick_projects_by_membership(scoped_project_ids, user, no_join_project, features)
if public_and_internal_projects
context.name(:visibility) do
......@@ -198,12 +208,17 @@ module Elastic
# Admins & auditors are given access to all private projects. Access to
# internal or public projects where the project feature is private is not
# granted here.
def pick_projects_by_membership(project_ids, user, features = nil)
def pick_projects_by_membership(project_ids, user, no_join_project, features = nil)
# This method is used to construct a query on the join as well as query
# on top level doc. When querying top level doc the project's ID is
# `project_id` . When joining it is just `id`.
id_field = no_join_project ? :project_id : :id
if features.nil?
if project_ids == :any
return [{ term: { visibility_level: { _name: context.name(:any), value: Project::PRIVATE } } }]
else
return [{ terms: { _name: context.name(:membership, :id), id: project_ids } }]
return [{ terms: { _name: context.name(:membership, :id), id_field => project_ids } }]
end
end
......@@ -212,7 +227,7 @@ module Elastic
if project_ids == :any
{ term: { visibility_level: { _name: context.name(:any), value: Project::PRIVATE } } }
else
{ terms: { _name: context.name(:membership, :id), id: filter_ids_by_feature(project_ids, user, feature) } }
{ terms: { _name: context.name(:membership, :id), id_field => filter_ids_by_feature(project_ids, user, feature) } }
end
limit = {
......
......@@ -21,6 +21,7 @@ module Elastic
end
options[:features] = 'issues'
options[:no_join_project] = Elastic::DataMigrationService.migration_has_finished?(:add_new_data_to_issues_documents)
context.name(:issue) do
query_hash = context.name(:authorized) { project_ids_filter(query_hash, options) }
query_hash = context.name(:confidentiality) { confidentiality_filter(query_hash, options) }
......
......@@ -150,8 +150,11 @@ module Elastic
# Query notes based on the various feature permission of the noteable_type.
# Appends `noteable_type` (which will be removed in project_ids_filter)
# for base model filtering.
# We do not implement `no_join_project` argument for notes class yet
# as this is not supported. This will need to be fixed when we move
# notes to a new index.
override :pick_projects_by_membership
def pick_projects_by_membership(project_ids, user, _ = nil)
def pick_projects_by_membership(project_ids, user, _, _ = nil)
noteable_type_to_feature.map do |noteable_type, feature|
context.name(feature) do
condition =
......
......@@ -52,6 +52,8 @@ RSpec.describe Elastic::MigrationRecord, :elastic do
let(:in_progress_migration) { described_class.new(version: 10, name: 10, filename: nil) }
before do
es_helper.delete_index(index_name: es_helper.migrations_index_name)
es_helper.create_migrations_index
completed_versions.each { |migration| migration.save!(completed: true) }
in_progress_migration.save!(completed: false)
......
......@@ -2646,6 +2646,7 @@ RSpec.describe Project do
context 'on update' do
let(:project) { create(:project, :public) }
let!(:issue) { create(:issue, project: project) }
context 'when updating the visibility_level' do
it 'triggers ElasticAssociationIndexerWorker to update issues' do
......@@ -2653,6 +2654,18 @@ RSpec.describe Project do
project.update!(visibility_level: Gitlab::VisibilityLevel::PRIVATE)
end
it 'ensures all visibility_level updates are correctly applied in issue searches', :sidekiq_inline do
ensure_elasticsearch_index!
results = Issue.elastic_search('*', options: { public_and_internal_projects: true })
expect(results.count).to eq(1)
project.update!(visibility_level: Gitlab::VisibilityLevel::INTERNAL)
ensure_elasticsearch_index!
results = Issue.elastic_search('*', options: { public_and_internal_projects: true })
expect(results.count).to eq(0)
end
end
context 'when changing the title' do
......
......@@ -34,7 +34,7 @@ RSpec.describe Elastic::DataMigrationService, :elastic do
let(:migration_name) { migration.name.underscore }
it 'returns true if migration has finished' do
expect(subject.migration_has_finished_uncached?(migration_name)).to eq(false)
expect(subject.migration_has_finished_uncached?(migration_name)).to eq(true)
migration.save!(completed: false)
refresh_index!
......@@ -51,7 +51,7 @@ RSpec.describe Elastic::DataMigrationService, :elastic do
describe '.migration_has_finished?' do
let(:migration) { subject.migrations.first }
let(:migration_name) { migration.name.underscore }
let(:finished) { false }
let(:finished) { true }
before do
allow(Rails).to receive(:cache).and_return(ActiveSupport::Cache::MemoryStore.new)
......@@ -67,6 +67,13 @@ RSpec.describe Elastic::DataMigrationService, :elastic do
end
describe '.mark_all_as_completed!' do
before do
# Clear out the migrations index since it is setup initially with
# everything finished migrating
es_helper.delete_index(index_name: es_helper.migrations_index_name)
es_helper.create_migrations_index
end
it 'creates all migration versions' do
expect(Elastic::MigrationRecord.persisted_versions(completed: true).count).to eq(0)
......
......@@ -108,6 +108,35 @@ RSpec.describe Search::GlobalService do
end
end
# Since newly created indices automatically have all migrations as
# finished we need a test to verify the old style searches work for
# instances which haven't finished the migration yet
context 'when add_new_data_to_issues_documents migration is not finished' do
let!(:issue) { create :issue, project: project }
before do
allow(Elastic::DataMigrationService).to receive(:migration_has_finished?)
.with(:add_new_data_to_issues_documents)
.and_return(false)
end
where(:project_level, :feature_access_level, :membership, :admin_mode, :expected_count) do
permission_table_for_guest_feature_access
end
with_them do
it "respects visibility" do
enable_admin_mode!(user) if admin_mode
update_feature_access_level(project, feature_access_level)
ensure_elasticsearch_index!
expect_search_results(user, 'issues', expected_count: expected_count) do |user|
described_class.new(user, search: issue.title).execute
end
end
end
end
context 'sort by created_at' do
let!(:project) { create(:project, :public) }
let!(:old_result) { create(:issue, project: project, title: 'sorted old', created_at: 1.month.ago) }
......@@ -122,6 +151,52 @@ RSpec.describe Search::GlobalService do
let(:results) { described_class.new(nil, search: 'sorted', sort: sort).execute }
end
end
context 'using joins for global permission checks' do
let(:results) { described_class.new(nil, search: '*').execute.objects('issues') }
let(:es_host) { Gitlab::CurrentSettings.elasticsearch_url[0] }
let(:search_url) { Addressable::Template.new("#{es_host}/{index}/doc/_search{?params*}") }
before do
ensure_elasticsearch_index!
end
context 'when add_new_data_to_issues_documents migration is finished' do
before do
allow(Elastic::DataMigrationService).to receive(:migration_has_finished?)
.with(:add_new_data_to_issues_documents)
.and_return(true)
end
it 'does not use joins to apply permissions' do
request = a_request(:get, search_url).with do |req|
expect(req.body).not_to include("has_parent")
end
results
expect(request).to have_been_made
end
end
context 'when add_new_data_to_issues_documents migration is not finished' do
before do
allow(Elastic::DataMigrationService).to receive(:migration_has_finished?)
.with(:add_new_data_to_issues_documents)
.and_return(false)
end
it 'uses joins to apply permissions' do
request = a_request(:get, search_url).with do |req|
expect(req.body).to include("has_parent")
end
results
expect(request).to have_been_made
end
end
end
end
context 'merge_request' do
......
......@@ -12,6 +12,8 @@ RSpec.configure do |config|
helper.create_empty_index(options: { settings: { number_of_replicas: 0 } })
helper.create_migrations_index
::Elastic::DataMigrationService.mark_all_as_completed!
refresh_index!
example.run
......
......@@ -39,6 +39,12 @@ module ElasticsearchHelpers
es_helper.refresh_index(index_name: es_helper.migrations_index_name)
end
def warm_elasticsearch_migrations_cache!
::Elastic::DataMigrationService.migrations.each do |migration|
::Elastic::DataMigrationService.migration_has_finished?(migration.name.underscore.to_sym)
end
end
def es_helper
Gitlab::Elastic::Helper.default
end
......
......@@ -4,6 +4,10 @@ RSpec.shared_examples 'does not hit Elasticsearch twice for objects and counts'
scopes.each do |scope|
context "for scope #{scope}", :elastic, :request_store do
it 'makes 1 Elasticsearch query' do
# We want to warm the cache for checking migrations have run since we
# don't want to count these requests as searches
allow(Rails).to receive(:cache).and_return(ActiveSupport::Cache::MemoryStore.new)
warm_elasticsearch_migrations_cache!
::Gitlab::SafeRequestStore.clear!
results.objects(scope)
......
......@@ -12,6 +12,7 @@ RSpec.describe 'gitlab:elastic namespace rake tasks', :elastic do
before do
es_helper.delete_index
es_helper.delete_index(index_name: es_helper.migrations_index_name)
end
it 'creates an index' do
......
......@@ -8,13 +8,18 @@ RSpec.describe ElasticDeleteProjectWorker, :elastic do
# Create admin user and search globally to avoid dealing with permissions in
# these tests
let(:user) { create(:admin) }
let(:search_options) { { options: { current_user: user, project_ids: :any } } }
before do
stub_ee_application_setting(elasticsearch_indexing: true)
end
it 'deletes a project with all nested objects', :aggregate_failures do
# Extracted to a method as the `#elastic_search` methods using it below will
# mutate the hash and mess up the following searches
def search_options
{ options: { current_user: user, project_ids: :any } }
end
it 'deletes a project with all nested objects' do
project = create :project, :repository
issue = create :issue, project: project
milestone = create :milestone, project: project
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment