Commit c3804184 authored by Sean McGivern's avatar Sean McGivern

Merge branch '323310-remove-old-es-migrations-part2' into '323310-remove-old-es-migrations'

Remove old Advanced Search migrations [part 2]

See merge request gitlab-org/gitlab!63020
parents 42eb2e7f b2fc1fce
......@@ -83,7 +83,7 @@ module EE
# override
def use_separate_indices?
Elastic::DataMigrationService.migration_has_finished?(:migrate_issues_to_separate_index)
true
end
end
......
......@@ -20,11 +20,7 @@ class ElasticDeleteProjectWorker
def indices
helper = Gitlab::Elastic::Helper.default
index_names = [helper.target_name]
if Elastic::DataMigrationService.migration_has_finished?(:migrate_issues_to_separate_index)
index_names << helper.standalone_indices_proxies(target_classes: [Issue]).map(&:index_name)
end
index_names = [helper.target_name] + helper.standalone_indices_proxies(target_classes: [Issue]).map(&:index_name)
if Elastic::DataMigrationService.migration_has_finished?(:migrate_notes_to_separate_index)
index_names << helper.standalone_indices_proxies(target_classes: [Note]).map(&:index_name)
......
# frozen_string_literal: true
class AddNewDataToIssuesDocuments < Elastic::Migration
batched!
throttle_delay 5.minutes
QUERY_BATCH_SIZE = 5000
UPDATE_BATCH_SIZE = 100
def migrate
if completed?
log "Skipping adding visibility_level field to issues documents migration since it is already applied"
return
end
log "Adding visibility_level field to issues documents for batch of #{QUERY_BATCH_SIZE} documents"
query = {
size: QUERY_BATCH_SIZE,
query: {
bool: {
filter: issues_missing_visibility_level_filter
}
}
}
results = client.search(index: helper.target_index_name, body: query)
hits = results.dig('hits', 'hits') || []
document_references = hits.map do |hit|
id = hit.dig('_source', 'id')
es_id = hit.dig('_id')
es_parent = hit.dig('_source', 'join_field', 'parent')
# ensure that any issues missing from the database will be removed from Elasticsearch
# as the data is back-filled
Gitlab::Elastic::DocumentReference.new(Issue, id, es_id, es_parent)
end
document_references.each_slice(UPDATE_BATCH_SIZE) do |refs|
Elastic::ProcessBookkeepingService.track!(*refs)
end
log "Adding visibility_level field to issues documents is completed for batch of #{document_references.size} documents"
end
def completed?
query = {
size: 0,
aggs: {
issues: {
filter: issues_missing_visibility_level_filter
}
}
}
results = client.search(index: helper.target_index_name, body: query)
doc_count = results.dig('aggregations', 'issues', 'doc_count')
doc_count && doc_count == 0
end
private
def issues_missing_visibility_level_filter
{
bool: {
must_not: field_exists('visibility_level'),
filter: issue_type_filter
}
}
end
def issue_type_filter
{
term: {
type: {
value: 'issue'
}
}
}
end
def field_exists(field)
{
exists: {
field: field
}
}
end
include Elastic::MigrationObsolete
end
# frozen_string_literal: true
class MigrateIssuesToSeparateIndex < Elastic::Migration
pause_indexing!
batched!
throttle_delay 1.minute
MAX_ATTEMPTS = 30
FIELDS = %w(
type
id
iid
title
description
created_at
updated_at
state
project_id
author_id
confidential
assignee_id
visibility_level
issues_access_level
).freeze
def migrate
# On initial batch we only create index
if migration_state[:slice].blank?
cleanup # support retries
log "Create standalone issues index under #{issues_index_name}"
helper.create_standalone_indices(target_classes: [Issue])
options = {
slice: 0,
retry_attempt: 0,
max_slices: get_number_of_shards
}
set_migration_state(options)
return
end
retry_attempt = migration_state[:retry_attempt].to_i
slice = migration_state[:slice]
max_slices = migration_state[:max_slices]
if retry_attempt >= MAX_ATTEMPTS
fail_migration_halt_error!(retry_attempt: retry_attempt)
return
end
if slice < max_slices
log "Launching reindexing for slice:#{slice} | max_slices:#{max_slices}"
response = reindex(slice: slice, max_slices: max_slices)
process_response(response)
log "Reindexing for slice:#{slice} | max_slices:#{max_slices} is completed with #{response.to_json}"
set_migration_state(
slice: slice + 1,
retry_attempt: retry_attempt,
max_slices: max_slices
)
end
rescue StandardError => e
log "migrate failed, increasing migration_state retry_attempt: #{retry_attempt} error:#{e.message}"
set_migration_state(
slice: slice,
retry_attempt: retry_attempt + 1,
max_slices: max_slices
)
raise e
end
def completed?
log "completed check: Refreshing #{issues_index_name}"
helper.refresh_index(index_name: issues_index_name)
original_count = original_issues_documents_count
new_count = new_issues_documents_count
log "Checking to see if migration is completed based on index counts: original_count:#{original_count}, new_count:#{new_count}"
original_count == new_count
end
private
def cleanup
helper.delete_index(index_name: issues_index_name) if helper.index_exists?(index_name: issues_index_name)
end
def reindex(slice:, max_slices:)
body = query(slice: slice, max_slices: max_slices)
client.reindex(body: body, wait_for_completion: true)
end
def process_response(response)
if response['failures'].present?
log_raise "Reindexing failed with #{response['failures']}"
end
if response['total'] != (response['updated'] + response['created'] + response['deleted'])
log_raise "Slice reindexing seems to have failed, total is not equal to updated + created + deleted"
end
end
def query(slice:, max_slices:)
{
source: {
index: default_index_name,
_source: FIELDS,
query: {
match: {
type: 'issue'
}
},
slice: {
id: slice,
max: max_slices
}
},
dest: {
index: issues_index_name
}
}
end
def original_issues_documents_count
query = {
size: 0,
aggs: {
issues: {
filter: {
term: {
type: {
value: 'issue'
}
}
}
}
}
}
results = client.search(index: default_index_name, body: query)
results.dig('aggregations', 'issues', 'doc_count')
end
def new_issues_documents_count
helper.documents_count(index_name: issues_index_name)
end
def get_number_of_shards
helper.get_settings.dig('number_of_shards').to_i
end
def default_index_name
helper.target_name
end
def issues_index_name
"#{default_index_name}-issues"
end
include Elastic::MigrationObsolete
end
# frozen_string_literal: true
class DeleteIssuesFromOriginalIndex < Elastic::Migration
batched!
throttle_delay 1.minute
MAX_ATTEMPTS = 30
QUERY_BODY = {
query: {
term: {
type: 'issue'
}
}
}.freeze
def migrate
retry_attempt = migration_state[:retry_attempt].to_i
if retry_attempt >= MAX_ATTEMPTS
fail_migration_halt_error!(retry_attempt: retry_attempt)
return
end
if completed?
log "Skipping removing issues from the original index since it is already applied"
return
end
response = client.delete_by_query(index: helper.target_name, body: QUERY_BODY)
log_raise "Failed to delete issues: #{response['failures']}" if response['failures'].present?
rescue StandardError => e
log "migrate failed, increasing migration_state retry_attempt: #{retry_attempt} error:#{e.class}:#{e.message}"
set_migration_state(
retry_attempt: retry_attempt + 1
)
raise e
end
def completed?
helper.refresh_index
results = client.search(index: helper.target_name, body: QUERY_BODY.merge(size: 0))
results.dig('hits', 'total', 'value') == 0
end
include Elastic::MigrationObsolete
end
# frozen_string_literal: true
class RemovePermissionsDataFromNotesDocuments < Elastic::Migration
batched!
throttle_delay 1.minute
QUERY_BATCH_SIZE = 5000
UPDATE_BATCH_SIZE = 100
def migrate
log "Removing permission data from notes migration starting"
if completed?
log "Skipping removing permission data from notes documents migration since it is already applied"
return
end
log "Removing permission data from notes documents for batch of #{QUERY_BATCH_SIZE} documents"
# use filter query to prevent scores from being calculated
query = {
size: QUERY_BATCH_SIZE,
query: {
bool: {
filter: {
bool: {
must: note_type_filter,
should: [
field_exists('visibility_level'),
field_exists_for_type('issues_access_level', 'Issue'),
field_exists_for_type('repository_access_level', 'Commit'),
field_exists_for_type('merge_requests_access_level', 'MergeRequest'),
field_exists_for_type('snippets_access_level', 'Snippet')
],
minimum_should_match: 1
}
}
}
}
}
results = client.search(index: helper.target_index_name, body: query)
hits = results.dig('hits', 'hits') || []
document_references = hits.map do |hit|
id = hit.dig('_source', 'id')
es_id = hit.dig('_id')
es_parent = hit.dig('_source', 'join_field', 'parent')
# ensure that any notes missing from the database will be removed from Elasticsearch
# as the data is back-filled
Gitlab::Elastic::DocumentReference.new(Note, id, es_id, es_parent)
end
document_references.each_slice(UPDATE_BATCH_SIZE) do |refs|
Elastic::ProcessBookkeepingService.track!(*refs)
end
log "Removing permission data from notes documents is completed for batch of #{document_references.size} documents"
end
def completed?
log "completed check: Refreshing #{helper.target_index_name}"
helper.refresh_index(index_name: helper.target_index_name)
query = {
size: 0,
query: note_type_filter,
aggs: {
notes: {
filter: {
bool: {
should: [
field_exists('visibility_level'),
field_exists_for_type('issues_access_level', 'Issue'),
field_exists_for_type('repository_access_level', 'Commit'),
field_exists_for_type('merge_requests_access_level', 'MergeRequest'),
field_exists_for_type('snippets_access_level', 'Snippet')
],
minimum_should_match: 1
}
}
}
}
}
results = client.search(index: helper.target_index_name, body: query)
doc_count = results.dig('aggregations', 'notes', 'doc_count')
log "Migration has #{doc_count} documents remaining" if doc_count
doc_count && doc_count == 0
end
private
def note_type_filter
{
term: {
type: {
value: 'note'
}
}
}
end
def field_exists(field)
{
bool: {
must: [
{
exists: {
field: field
}
}
]
}
}
end
def field_exists_for_type(field, type)
query = field_exists(field)
query[:bool][:must] << {
term: {
noteable_type: {
value: type
}
}
}
query
end
include Elastic::MigrationObsolete
end
# frozen_string_literal: true
class AddPermissionsDataToNotesDocuments < Elastic::Migration
batched!
throttle_delay 3.minutes
QUERY_BATCH_SIZE = 6_000
UPDATE_BATCH_SIZE = 100
def migrate
if completed?
log "Skipping adding permission data to notes documents migration since it is already applied"
return
end
log "Adding permission data to notes documents for batch of #{QUERY_BATCH_SIZE} documents"
query = es_query.merge(size: QUERY_BATCH_SIZE)
results = client.search(index: helper.target_index_name, body: query)
hits = results.dig('hits', 'hits') || []
document_references = hits.map do |hit|
id = hit.dig('_source', 'id')
es_id = hit.dig('_id')
es_parent = hit.dig('_source', 'join_field', 'parent')
# ensure that any notes missing from the database will be removed from Elasticsearch
# as the data is back-filled
Gitlab::Elastic::DocumentReference.new(Note, id, es_id, es_parent)
end
document_references.each_slice(UPDATE_BATCH_SIZE) do |refs|
Elastic::ProcessInitialBookkeepingService.track!(*refs)
end
log "Adding permission data to notes documents is completed for batch of #{document_references.size} documents"
end
def completed?
log "completed check: Refreshing #{helper.target_index_name}"
helper.refresh_index(index_name: helper.target_index_name)
results = client.count(index: helper.target_index_name, body: es_query)
doc_count = results.dig('count')
log "Migration has #{doc_count} documents remaining" if doc_count
doc_count && doc_count == 0
end
private
def es_query
{
query: {
bool: {
filter: {
bool: {
must: note_type_filter,
should: [
field_does_not_exist_for_type('visibility_level', 'Issue'),
field_does_not_exist_for_type('visibility_level', 'Commit'),
field_does_not_exist_for_type('visibility_level', 'MergeRequest'),
field_does_not_exist_for_type('visibility_level', 'Snippet'),
field_does_not_exist_for_type('issues_access_level', 'Issue'),
field_does_not_exist_for_type('repository_access_level', 'Commit'),
field_does_not_exist_for_type('merge_requests_access_level', 'MergeRequest'),
field_does_not_exist_for_type('snippets_access_level', 'Snippet')
],
minimum_should_match: 1
}
}
}
}
}
end
def note_type_filter
{
term: {
type: {
value: 'note'
}
}
}
end
def field_does_not_exist_for_type(field, type)
{
bool: {
must: {
term: {
noteable_type: {
value: type
}
}
},
must_not: {
exists: {
field: field
}
}
}
}
end
include Elastic::MigrationObsolete
end
......@@ -17,7 +17,7 @@ module Elastic
end
options[:features] = 'issues'
options[:no_join_project] = Elastic::DataMigrationService.migration_has_finished?(:add_new_data_to_issues_documents)
options[:no_join_project] = true
context.name(:issue) do
query_hash = context.name(:authorized) { project_ids_filter(query_hash, options) }
query_hash = context.name(:confidentiality) { confidentiality_filter(query_hash, options) }
......
......@@ -26,11 +26,7 @@ module Elastic
private
def generic_attributes
if Elastic::DataMigrationService.migration_has_finished?(:migrate_issues_to_separate_index)
super.except('join_field')
else
super
end
super.except('join_field')
end
end
end
......
......@@ -13,7 +13,7 @@ module Elastic
options[:in] = ['note']
query_hash = basic_query_hash(%w[note], query, count_only: options[:count_only])
options[:no_join_project] = Elastic::DataMigrationService.migration_has_finished?(:add_permissions_data_to_notes_documents)
options[:no_join_project] = true
context.name(:note) do
query_hash = context.name(:authorized) { project_ids_filter(query_hash, options) }
query_hash = context.name(:confidentiality) { confidentiality_filter(query_hash, options) }
......
......@@ -28,12 +28,8 @@ module Elastic
}
end
# do not add the permission fields unless the `remove_permissions_data_from_notes_documents`
# migration has completed otherwise the migration will never finish
if Elastic::DataMigrationService.migration_has_finished?(:remove_permissions_data_from_notes_documents)
data['visibility_level'] = target.project&.visibility_level || Gitlab::VisibilityLevel::PRIVATE
merge_project_feature_access_level(data)
end
data['visibility_level'] = target.project&.visibility_level || Gitlab::VisibilityLevel::PRIVATE
merge_project_feature_access_level(data)
data.merge(generic_attributes)
end
......
......@@ -8,7 +8,7 @@ namespace :gitlab do
helper = Gitlab::Elastic::Helper.default
indices = [helper.target_name]
indices += helper.standalone_indices_proxies.map(&:index_name) if Elastic::DataMigrationService.migration_has_finished?(:migrate_issues_to_separate_index)
indices += helper.standalone_indices_proxies.map(&:index_name)
indices.each do |index_name|
puts "===== Size stats for index: #{index_name} ====="
pp helper.index_size(index_name: index_name).slice(*%w(docs store))
......
# frozen_string_literal: true
require 'spec_helper'
require File.expand_path('ee/elastic/migrate/20201116142400_add_new_data_to_issues_documents.rb')
RSpec.describe AddNewDataToIssuesDocuments, :elastic, :sidekiq_inline do
let(:version) { 20201116142400 }
let(:migration) { described_class.new(version) }
let(:issues) { create_list(:issue, 3) }
before do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
allow(Elastic::DataMigrationService).to receive(:migration_has_finished?)
.with(:migrate_issues_to_separate_index)
.and_return(false)
# ensure issues are indexed
issues
ensure_elasticsearch_index!
end
describe 'migration_options' do
it 'has migration options set', :aggregate_failures do
expect(migration.batched?).to be_truthy
expect(migration.throttle_delay).to eq(5.minutes)
end
end
describe '.migrate' do
subject { migration.migrate }
context 'when migration is already completed' do
before do
add_visibility_level_for_issues(issues)
end
it 'does not modify data', :aggregate_failures do
expect(::Elastic::ProcessBookkeepingService).not_to receive(:track!)
subject
end
end
context 'migration process' do
before do
remove_visibility_level_for_issues(issues)
end
it 'updates all issue documents' do
# track calls are batched in groups of 100
expect(::Elastic::ProcessBookkeepingService).to receive(:track!).once do |*tracked_refs|
expect(tracked_refs.count).to eq(3)
end
subject
end
it 'only updates issue documents missing visibility_level', :aggregate_failures do
issue = issues.first
add_visibility_level_for_issues(issues[1..-1])
expected = [Gitlab::Elastic::DocumentReference.new(Issue, issue.id, issue.es_id, issue.es_parent)]
expect(::Elastic::ProcessBookkeepingService).to receive(:track!).with(*expected).once
subject
end
it 'processes in batches', :aggregate_failures do
stub_const("#{described_class}::QUERY_BATCH_SIZE", 2)
stub_const("#{described_class}::UPDATE_BATCH_SIZE", 1)
expect(::Elastic::ProcessBookkeepingService).to receive(:track!).exactly(3).times.and_call_original
# cannot use subject in spec because it is memoized
migration.migrate
ensure_elasticsearch_index!
migration.migrate
end
end
end
describe '.completed?' do
subject { migration.completed? }
context 'when documents are missing visibility_level' do
before do
remove_visibility_level_for_issues(issues)
end
it { is_expected.to be_falsey }
end
context 'when no documents are missing visibility_level' do
before do
add_visibility_level_for_issues(issues)
end
it { is_expected.to be_truthy }
end
end
private
def add_visibility_level_for_issues(issues)
script = {
source: "ctx._source['visibility_level'] = params.visibility_level;",
lang: "painless",
params: {
visibility_level: Gitlab::VisibilityLevel::PRIVATE
}
}
update_by_query(issues, script)
end
def remove_visibility_level_for_issues(issues)
script = {
source: "ctx._source.remove('visibility_level')"
}
update_by_query(issues, script)
end
def update_by_query(issues, script)
issue_ids = issues.map { |i| i.id }
client = Issue.__elasticsearch__.client
client.update_by_query({
index: Issue.__elasticsearch__.index_name,
wait_for_completion: true, # run synchronously
refresh: true, # make operation visible to search
body: {
script: script,
query: {
bool: {
must: [
{
terms: {
id: issue_ids
}
},
{
term: {
type: {
value: 'issue'
}
}
}
]
}
}
}
})
end
end
# frozen_string_literal: true
require 'spec_helper'
require File.expand_path('ee/elastic/migrate/20201123123400_migrate_issues_to_separate_index.rb')
RSpec.describe MigrateIssuesToSeparateIndex, :elastic, :sidekiq_inline do
let(:version) { 20201123123400 }
let(:migration) { described_class.new(version) }
let(:issues) { create_list(:issue, 3) }
let(:index_name) { "#{es_helper.target_name}-issues" }
before do
allow(Elastic::DataMigrationService).to receive(:migration_has_finished?)
.with(:migrate_issues_to_separate_index)
.and_return(false)
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
issues
ensure_elasticsearch_index!
end
describe 'migration_options' do
it 'has migration options set', :aggregate_failures do
expect(migration.batched?).to be_truthy
expect(migration.throttle_delay).to eq(1.minute)
expect(migration.pause_indexing?).to be_truthy
end
end
describe '.migrate', :clean_gitlab_redis_shared_state do
context 'initial launch' do
before do
es_helper.delete_index(index_name: es_helper.target_index_name(target: index_name))
end
it 'creates an index and sets migration_state' do
expect { migration.migrate }.to change { es_helper.alias_exists?(name: index_name) }.from(false).to(true)
expect(migration.migration_state).to include(slice: 0, max_slices: 5)
end
end
context 'batch run' do
it 'migrates all issues' do
total_shards = es_helper.get_settings.dig('number_of_shards').to_i
migration.set_migration_state(slice: 0, max_slices: total_shards)
total_shards.times do |i|
migration.migrate
end
expect(migration.completed?).to be_truthy
expect(es_helper.documents_count(index_name: "#{es_helper.target_name}-issues")).to eq(issues.count)
end
end
context 'failed run' do
let(:client) { double('Elasticsearch::Transport::Client') }
before do
allow(migration).to receive(:client).and_return(client)
end
context 'exception is raised' do
before do
allow(client).to receive(:reindex).and_raise(StandardError)
end
it 'increases retry_attempt' do
migration.set_migration_state(slice: 0, max_slices: 2, retry_attempt: 1)
expect { migration.migrate }.to raise_error(StandardError)
expect(migration.migration_state).to match(slice: 0, max_slices: 2, retry_attempt: 2)
end
it 'fails the migration after too many attempts' do
migration.set_migration_state(slice: 0, max_slices: 2, retry_attempt: 30)
migration.migrate
expect(migration.migration_state).to match(slice: 0, max_slices: 2, retry_attempt: 30, halted: true, halted_indexing_unpaused: false)
expect(migration).not_to receive(:process_response)
end
end
context 'elasticsearch failures' do
context 'total is not equal' do
before do
allow(client).to receive(:reindex).and_return({ "total" => 60, "updated" => 0, "created" => 45, "deleted" => 0, "failures" => [] })
end
it 'raises an error' do
migration.set_migration_state(slice: 0, max_slices: 2)
expect { migration.migrate }.to raise_error(/total is not equal/)
end
end
context 'reindexing failues' do
before do
allow(client).to receive(:reindex).and_return({ "total" => 60, "updated" => 0, "created" => 0, "deleted" => 0, "failures" => [{ "type": "es_rejected_execution_exception" }] })
end
it 'raises an error' do
migration.set_migration_state(slice: 0, max_slices: 2)
expect { migration.migrate }.to raise_error(/failed with/)
end
end
end
end
end
describe '.completed?' do
subject { migration.completed? }
before do
2.times do |slice|
migration.set_migration_state(slice: slice, max_slices: 2)
migration.migrate
end
end
context 'counts are equal' do
let(:issues_count) { issues.count }
it 'returns true' do
is_expected.to be_truthy
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require File.expand_path('ee/elastic/migrate/20210112165500_delete_issues_from_original_index.rb')
RSpec.describe DeleteIssuesFromOriginalIndex, :elastic, :sidekiq_inline do
let(:version) { 20210112165500 }
let(:migration) { described_class.new(version) }
let(:helper) { Gitlab::Elastic::Helper.new }
before do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
allow(migration).to receive(:helper).and_return(helper)
end
describe 'migration_options' do
it 'has migration options set', :aggregate_failures do
expect(migration.batched?).to be_truthy
expect(migration.throttle_delay).to eq(1.minute)
end
end
context 'issues are already deleted' do
it 'does not execute delete_by_query' do
expect(migration.completed?).to be_truthy
expect(helper.client).not_to receive(:delete_by_query)
migration.migrate
end
end
context 'issues are still present in the index' do
let(:issues) { create_list(:issue, 3) }
before do
allow(Elastic::DataMigrationService).to receive(:migration_has_finished?)
.with(:migrate_issues_to_separate_index)
.and_return(false)
# ensure issues are indexed
issues
ensure_elasticsearch_index!
end
it 'removes issues from the index' do
expect { migration.migrate }.to change { migration.completed? }.from(false).to(true)
end
end
context 'migration fails' do
let(:client) { double('Elasticsearch::Transport::Client') }
before do
allow(migration).to receive(:client).and_return(client)
allow(migration).to receive(:completed?).and_return(false)
end
context 'exception is raised' do
before do
allow(client).to receive(:delete_by_query).and_raise(StandardError)
end
it 'increases retry_attempt' do
migration.set_migration_state(retry_attempt: 1)
expect { migration.migrate }.to raise_error(StandardError)
expect(migration.migration_state).to match(retry_attempt: 2)
end
it 'fails the migration after too many attempts' do
migration.set_migration_state(retry_attempt: 30)
migration.migrate
expect(migration.migration_state).to match(retry_attempt: 30, halted: true, halted_indexing_unpaused: false)
expect(client).not_to receive(:delete_by_query)
end
end
context 'es responds with errors' do
before do
allow(client).to receive(:delete_by_query).and_return('failures' => ['failed'])
end
it 'raises an error and increases retry attempt' do
expect { migration.migrate }.to raise_error(/Failed to delete issues/)
expect(migration.migration_state).to match(retry_attempt: 1)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require File.expand_path('ee/elastic/migrate/20210127154600_remove_permissions_data_from_notes_documents.rb')
RSpec.describe RemovePermissionsDataFromNotesDocuments, :elastic, :sidekiq_inline do
let(:version) { 20210127154600 }
let(:migration) { described_class.new(version) }
let(:helper) { Gitlab::Elastic::Helper.new }
before do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
allow(migration).to receive(:helper).and_return(helper)
set_elasticsearch_migration_to :remove_permissions_data_from_notes_documents, including: false
end
describe 'migration_options' do
it 'has migration options set', :aggregate_failures do
expect(migration.batched?).to be_truthy
expect(migration.throttle_delay).to eq(1.minute)
end
end
describe '#migrate' do
let!(:note_on_commit) { create(:note_on_commit) }
let!(:note_on_issue) { create(:note_on_issue) }
let!(:note_on_merge_request) { create(:note_on_merge_request) }
let!(:note_on_snippet) { create(:note_on_project_snippet) }
before do
ensure_elasticsearch_index!
end
context 'when migration is completed' do
before do
remove_permission_data_for_notes([note_on_commit, note_on_issue, note_on_merge_request, note_on_snippet])
end
it 'does not queue documents for indexing', :aggregate_failures do
expect(migration.completed?).to be_truthy
expect(::Elastic::ProcessBookkeepingService).not_to receive(:track!)
migration.migrate
end
end
context 'migration process' do
before do
add_permission_data_for_notes([note_on_commit, note_on_issue, note_on_merge_request, note_on_snippet])
# migrations are completed by default in test environments
# required to prevent the `as_indexed_json` method from populating the permissions fields
set_elasticsearch_migration_to version, including: false
end
it 'queues documents for indexing' do
# track calls are batched in groups of 100
expect(::Elastic::ProcessBookkeepingService).to receive(:track!).once do |*tracked_refs|
expect(tracked_refs.count).to eq(4)
end
migration.migrate
end
it 'only queues documents for indexing that contain permission data', :aggregate_failures do
remove_permission_data_for_notes([note_on_issue, note_on_snippet, note_on_merge_request])
expected = [Gitlab::Elastic::DocumentReference.new(Note, note_on_commit.id, note_on_commit.es_id, note_on_commit.es_parent)]
expect(::Elastic::ProcessBookkeepingService).to receive(:track!).with(*expected).once
migration.migrate
end
it 'processes in batches until completed' do
stub_const("#{described_class}::QUERY_BATCH_SIZE", 2)
stub_const("#{described_class}::UPDATE_BATCH_SIZE", 1)
allow(::Elastic::ProcessBookkeepingService).to receive(:track!).and_call_original
migration.migrate
expect(::Elastic::ProcessBookkeepingService).to have_received(:track!).exactly(2).times
ensure_elasticsearch_index!
migration.migrate
expect(::Elastic::ProcessBookkeepingService).to have_received(:track!).exactly(4).times
ensure_elasticsearch_index!
migration.migrate
# The migration should have already finished so there are no more items to process
expect(::Elastic::ProcessBookkeepingService).to have_received(:track!).exactly(4).times
expect(migration).to be_completed
end
end
end
describe '#completed?' do
let!(:note_on_commit) { create(:note_on_commit) }
before do
ensure_elasticsearch_index!
end
subject { migration.completed? }
context 'when no documents have permissions data' do
before do
remove_permission_data_for_notes([note_on_commit])
end
it { is_expected.to be_truthy }
end
context 'when documents have permissions data' do
before do
add_permission_data_for_notes([note_on_commit])
end
it { is_expected.to be_falsey }
end
it 'refreshes the index' do
expect(helper).to receive(:refresh_index)
subject
end
end
private
def add_permission_data_for_notes(notes)
script = {
source: "ctx._source['visibility_level'] = params.visibility_level; ctx._source['issues_access_level'] = params.visibility_level; ctx._source['merge_requests_access_level'] = params.visibility_level; ctx._source['snippets_access_level'] = params.visibility_level; ctx._source['repository_access_level'] = params.visibility_level;",
lang: "painless",
params: {
visibility_level: Gitlab::VisibilityLevel::PRIVATE
}
}
update_by_query(notes, script)
end
def remove_permission_data_for_notes(notes)
script = {
source: "ctx._source.remove('visibility_level'); ctx._source.remove('repository_access_level'); ctx._source.remove('snippets_access_level'); ctx._source.remove('merge_requests_access_level'); ctx._source.remove('issues_access_level');"
}
update_by_query(notes, script)
end
def update_by_query(notes, script)
note_ids = notes.map { |i| i.id }
client = Note.__elasticsearch__.client
client.update_by_query({
index: Note.__elasticsearch__.index_name,
wait_for_completion: true, # run synchronously
refresh: true, # make operation visible to search
body: {
script: script,
query: {
bool: {
must: [
{
terms: {
id: note_ids
}
},
{
term: {
type: {
value: 'note'
}
}
}
]
}
}
}
})
end
end
# frozen_string_literal: true
require 'spec_helper'
require File.expand_path('ee/elastic/migrate/20210128163600_add_permissions_data_to_notes_documents.rb')
RSpec.describe AddPermissionsDataToNotesDocuments, :elastic, :sidekiq_inline do
let(:version) { 20210128163600 }
let(:migration) { described_class.new(version) }
before do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
end
describe 'migration_options' do
it 'has migration options set', :aggregate_failures do
expect(migration.batched?).to be_truthy
expect(migration.throttle_delay).to eq(3.minutes)
end
end
describe '#migrate' do
let(:notes) { [note_on_commit, note_on_issue, note_on_merge_request, note_on_snippet] }
let!(:note_on_commit) { create(:note_on_commit) }
let!(:note_on_issue) { create(:note_on_issue) }
let!(:note_on_merge_request) { create(:note_on_merge_request) }
let!(:note_on_snippet) { create(:note_on_project_snippet) }
before do
ensure_elasticsearch_index!
end
context 'when migration is completed' do
it 'does not queue documents for indexing' do
expect(migration.completed?).to be_truthy
expect(::Elastic::ProcessInitialBookkeepingService).not_to receive(:track!)
migration.migrate
end
end
context 'migration process' do
before do
remove_permission_data_for_notes(notes)
end
it 'queues documents for indexing' do
expect(::Elastic::ProcessInitialBookkeepingService).to receive(:track!).once do |*tracked_refs|
expect(tracked_refs.count).to eq(4)
end
migration.migrate
end
it 'only queues documents for indexing that are missing permission data', :aggregate_failures do
add_permission_data_for_notes([note_on_issue, note_on_snippet, note_on_merge_request])
expected = [Gitlab::Elastic::DocumentReference.new(Note, note_on_commit.id, note_on_commit.es_id, note_on_commit.es_parent)]
expect(::Elastic::ProcessInitialBookkeepingService).to receive(:track!).with(*expected).once
migration.migrate
end
it 'processes in batches until completed', :aggregate_failures do
stub_const("#{described_class}::QUERY_BATCH_SIZE", 2)
stub_const("#{described_class}::UPDATE_BATCH_SIZE", 1)
allow(::Elastic::ProcessInitialBookkeepingService).to receive(:track!).and_call_original
migration.migrate
expect(::Elastic::ProcessInitialBookkeepingService).to have_received(:track!).exactly(2).times
ensure_elasticsearch_index!
migration.migrate
expect(::Elastic::ProcessInitialBookkeepingService).to have_received(:track!).exactly(4).times
ensure_elasticsearch_index!
migration.migrate
# The migration should have already finished so there are no more items to process
expect(::Elastic::ProcessInitialBookkeepingService).to have_received(:track!).exactly(4).times
expect(migration).to be_completed
end
end
end
describe '#completed?' do
using RSpec::Parameterized::TableSyntax
let(:helper) { Gitlab::Elastic::Helper.new }
subject { migration.completed? }
before do
allow(migration).to receive(:helper).and_return(helper)
end
it 'refreshes the index' do
expect(helper).to receive(:refresh_index)
subject
end
# Only affected note types are issue, commit, merge requests, project snippets and completed? should return
# false if documents are missing data. The completed? method will be true for all other types
where(:note_type, :expected_result) do
:diff_note_on_commit | false
:diff_note_on_design | true
:diff_note_on_merge_request | false
:discussion_note_on_commit | false
:discussion_note_on_issue | false
:discussion_note_on_merge_request | false
:discussion_note_on_personal_snippet | true
:discussion_note_on_project_snippet | false
:discussion_note_on_vulnerability | true
:legacy_diff_note_on_commit | false
:legacy_diff_note_on_merge_request | false
:note_on_alert | true
:note_on_commit | false
:note_on_design | true
:note_on_epic | true
:note_on_issue | false
:note_on_merge_request | false
:note_on_personal_snippet | true
:note_on_project_snippet | false
:note_on_vulnerability | true
end
with_them do
let!(:note) { create(note_type) } # rubocop:disable Rails/SaveBang
context 'when documents are missing permissions data' do
before do
ensure_elasticsearch_index!
remove_permission_data_for_notes([note])
end
it { is_expected.to eq(expected_result) }
end
context 'when no documents are missing permissions data' do
before do
ensure_elasticsearch_index!
end
it { is_expected.to be_truthy }
end
end
end
private
def add_permission_data_for_notes(notes)
script = {
source: "ctx._source['visibility_level'] = params.visibility_level; ctx._source['issues_access_level'] = params.visibility_level; ctx._source['merge_requests_access_level'] = params.visibility_level; ctx._source['snippets_access_level'] = params.visibility_level; ctx._source['repository_access_level'] = params.visibility_level;",
lang: "painless",
params: {
visibility_level: Gitlab::VisibilityLevel::PRIVATE
}
}
update_by_query(notes, script)
end
def remove_permission_data_for_notes(notes)
script = {
source: "ctx._source.remove('visibility_level'); ctx._source.remove('repository_access_level'); ctx._source.remove('snippets_access_level'); ctx._source.remove('merge_requests_access_level'); ctx._source.remove('issues_access_level');"
}
update_by_query(notes, script)
end
def update_by_query(notes, script)
note_ids = notes.map(&:id)
client = Note.__elasticsearch__.client
client.update_by_query({
index: Note.__elasticsearch__.index_name,
wait_for_completion: true, # run synchronously
refresh: true, # make operation visible to search
body: {
script: script,
query: {
bool: {
must: [
{
terms: {
id: note_ids
}
},
{
term: {
type: {
value: 'note'
}
}
}
]
}
}
}
})
end
end
......@@ -10,7 +10,7 @@ RSpec.describe AddNewDataToMergeRequestsDocuments, :elastic, :sidekiq_inline do
before do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
set_elasticsearch_migration_to :add_new_data_to_merge_requests_documents, including: false
# ensure merge_requests are indexed
merge_requests
......
......@@ -158,18 +158,6 @@ RSpec.describe Note, :elastic do
project.project_feature.update_attribute(access_level.to_sym, project_feature_permission) if access_level.present?
end
it 'does not contain permissions if remove_permissions_data_from_notes_documents is not finished' do
allow(Elastic::DataMigrationService).to receive(:migration_has_finished?)
.with(:remove_permissions_data_from_notes_documents)
.and_return(false)
allow(Elastic::DataMigrationService).to receive(:migration_has_finished?)
.with(:migrate_notes_to_separate_index)
.and_return(false)
expect(note_json).not_to have_key(access_level) if access_level.present?
expect(note_json).not_to have_key('visibility_level')
end
it 'contains the correct permissions', :aggregate_failures do
if access_level
expect(note_json).to have_key(access_level)
......
......@@ -18,18 +18,6 @@ RSpec.describe Search::GlobalService do
let(:service) { described_class.new(user, params) }
end
context 'issue search' do
let(:results) { described_class.new(nil, search: '*').execute.objects('issues') }
it_behaves_like 'search query applies joins based on migrations shared examples', :add_new_data_to_issues_documents
end
context 'notes search' do
let(:results) { described_class.new(nil, search: '*').execute.objects('notes') }
it_behaves_like 'search query applies joins based on migrations shared examples', :add_permissions_data_to_notes_documents
end
context 'merge_requests search' do
let(:results) { described_class.new(nil, search: '*').execute.objects('merge_requests') }
......@@ -128,17 +116,7 @@ RSpec.describe Search::GlobalService do
end
with_them do
context 'when add_permissions_data_to_notes_documents migration is finished' do
it_behaves_like 'search respects visibility'
end
context 'when add_permissions_data_to_notes_documents migration is not finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
end
it_behaves_like 'search respects visibility'
end
it_behaves_like 'search respects visibility'
end
end
......@@ -150,17 +128,7 @@ RSpec.describe Search::GlobalService do
end
with_them do
context 'when add_permissions_data_to_notes_documents migration is finished' do
it_behaves_like 'search respects visibility'
end
context 'when add_permissions_data_to_notes_documents migration is not finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
end
it_behaves_like 'search respects visibility'
end
it_behaves_like 'search respects visibility'
end
end
......@@ -177,17 +145,7 @@ RSpec.describe Search::GlobalService do
project.repository.index_commits_and_blobs
end
context 'when add_permissions_data_to_notes_documents migration is finished' do
it_behaves_like 'search respects visibility'
end
context 'when add_permissions_data_to_notes_documents migration is not finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
end
it_behaves_like 'search respects visibility'
end
it_behaves_like 'search respects visibility'
end
end
......@@ -199,17 +157,7 @@ RSpec.describe Search::GlobalService do
end
with_them do
context 'when add_permissions_data_to_notes_documents migration is finished' do
it_behaves_like 'search respects visibility'
end
context 'when add_permissions_data_to_notes_documents migration is not finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
end
it_behaves_like 'search respects visibility'
end
it_behaves_like 'search respects visibility'
end
end
end
......@@ -218,38 +166,14 @@ RSpec.describe Search::GlobalService do
let(:scope) { 'issues' }
let(:search) { issue.title }
context 'when add_new_data_to_issues_documents migration is finished' do
let!(:issue) { create :issue, project: project }
where(:project_level, :feature_access_level, :membership, :admin_mode, :expected_count) do
permission_table_for_guest_feature_access
end
let!(:issue) { create :issue, project: project }
with_them do
it_behaves_like 'search respects visibility'
end
where(:project_level, :feature_access_level, :membership, :admin_mode, :expected_count) do
permission_table_for_guest_feature_access
end
# Since newly created indices automatically have all migrations as
# finished we need a test to verify the old style searches work for
# instances which haven't finished the migration yet
context 'when add_new_data_to_issues_documents migration is not finished' do
before do
set_elasticsearch_migration_to :add_new_data_to_issues_documents, including: false
end
# issue cannot be defined prior to the migration mocks because it
# will cause the incorrect value to be passed to `use_separate_indices` when creating
# the proxy
let!(:issue) { create(:issue, project: project) }
where(:project_level, :feature_access_level, :membership, :admin_mode, :expected_count) do
permission_table_for_guest_feature_access
end
with_them do
it_behaves_like 'search respects visibility'
end
with_them do
it_behaves_like 'search respects visibility'
end
end
......@@ -457,61 +381,19 @@ RSpec.describe Search::GlobalService do
context 'with notes on issues' do
let(:noteable) { create :issue, project: project }
context 'when add_permissions_data_to_notes_documents migration has not finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
end
it_behaves_like 'search notes shared examples', :note_on_issue
end
context 'when add_permissions_data_to_notes_documents migration has finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: true
end
it_behaves_like 'search notes shared examples', :note_on_issue
end
it_behaves_like 'search notes shared examples', :note_on_issue
end
context 'with notes on merge requests' do
let(:noteable) { create :merge_request, target_project: project, source_project: project }
context 'when add_permissions_data_to_notes_documents migration has not finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
end
it_behaves_like 'search notes shared examples', :note_on_merge_request
end
context 'when add_permissions_data_to_notes_documents migration has finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: true
end
it_behaves_like 'search notes shared examples', :note_on_merge_request
end
it_behaves_like 'search notes shared examples', :note_on_merge_request
end
context 'with notes on commits' do
let(:noteable) { create(:commit, project: project) }
context 'when add_permissions_data_to_notes_documents migration has not finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
end
it_behaves_like 'search notes shared examples', :note_on_commit
end
context 'when add_permissions_data_to_notes_documents migration has finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: true
end
it_behaves_like 'search notes shared examples', :note_on_commit
end
it_behaves_like 'search notes shared examples', :note_on_commit
end
end
end
......@@ -67,15 +67,6 @@ RSpec.describe Search::GroupService do
end
end
context 'notes search', :elastic, :clean_gitlab_redis_shared_state do
let_it_be(:group) { create(:group) }
let_it_be(:project) { create(:project, namespace: group) }
let(:results) { described_class.new(nil, group, search: 'test').execute.objects('notes') }
it_behaves_like 'search query applies joins based on migrations shared examples', :add_permissions_data_to_notes_documents
end
context 'visibility', :elastic_delete_by_query, :clean_gitlab_redis_shared_state, :sidekiq_inline do
include_context 'ProjectPolicyTable context'
......@@ -154,17 +145,7 @@ RSpec.describe Search::GroupService do
end
with_them do
context 'when add_permissions_data_to_notes_documents migration is finished' do
it_behaves_like 'search respects visibility'
end
context 'when add_permissions_data_to_notes_documents migration is not finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
end
it_behaves_like 'search respects visibility'
end
it_behaves_like 'search respects visibility'
end
end
......@@ -177,17 +158,7 @@ RSpec.describe Search::GroupService do
end
with_them do
context 'when add_permissions_data_to_notes_documents migration is finished' do
it_behaves_like 'search respects visibility'
end
context 'when add_permissions_data_to_notes_documents migration is not finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
end
it_behaves_like 'search respects visibility'
end
it_behaves_like 'search respects visibility'
end
end
......@@ -207,17 +178,7 @@ RSpec.describe Search::GroupService do
project2.repository.index_commits_and_blobs
end
context 'when add_permissions_data_to_notes_documents migration is finished' do
it_behaves_like 'search respects visibility'
end
context 'when add_permissions_data_to_notes_documents migration is not finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
end
it_behaves_like 'search respects visibility'
end
it_behaves_like 'search respects visibility'
end
end
......@@ -230,17 +191,7 @@ RSpec.describe Search::GroupService do
end
with_them do
context 'when add_permissions_data_to_notes_documents migration is finished' do
it_behaves_like 'search respects visibility'
end
context 'when add_permissions_data_to_notes_documents migration is not finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
end
it_behaves_like 'search respects visibility'
end
it_behaves_like 'search respects visibility'
end
end
end
......
......@@ -34,14 +34,6 @@ RSpec.describe Search::ProjectService do
end
end
context 'notes search' do
let_it_be(:project) { create(:project) }
let(:results) { described_class.new(project, nil, search: 'test').execute.objects('notes') }
it_behaves_like 'search query applies joins based on migrations shared examples', :add_permissions_data_to_notes_documents
end
context 'visibility', :elastic_delete_by_query, :sidekiq_inline do
include_context 'ProjectPolicyTable context'
......@@ -121,17 +113,7 @@ RSpec.describe Search::ProjectService do
end
with_them do
context 'when add_permissions_data_to_notes_documents migration is finished' do
it_behaves_like 'search respects visibility'
end
context 'when add_permissions_data_to_notes_documents migration is not finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
end
it_behaves_like 'search respects visibility'
end
it_behaves_like 'search respects visibility'
end
end
......@@ -144,17 +126,7 @@ RSpec.describe Search::ProjectService do
end
with_them do
context 'when add_permissions_data_to_notes_documents migration is finished' do
it_behaves_like 'search respects visibility'
end
context 'when add_permissions_data_to_notes_documents migration is not finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
end
it_behaves_like 'search respects visibility'
end
it_behaves_like 'search respects visibility'
end
end
......@@ -174,17 +146,7 @@ RSpec.describe Search::ProjectService do
project2.repository.index_commits_and_blobs
end
context 'when add_permissions_data_to_notes_documents migration is finished' do
it_behaves_like 'search respects visibility'
end
context 'when add_permissions_data_to_notes_documents migration is not finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
end
it_behaves_like 'search respects visibility'
end
it_behaves_like 'search respects visibility'
end
end
......@@ -197,17 +159,7 @@ RSpec.describe Search::ProjectService do
end
with_them do
context 'when add_permissions_data_to_notes_documents migration is finished' do
it_behaves_like 'search respects visibility'
end
context 'when add_permissions_data_to_notes_documents migration is not finished' do
before do
set_elasticsearch_migration_to :add_permissions_data_to_notes_documents, including: false
end
it_behaves_like 'search respects visibility'
end
it_behaves_like 'search respects visibility'
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment