Commit 9abb6fb5 authored by Dylan Griffith's avatar Dylan Griffith

Improve performance of Elasticsearch notes permissions migration

We are currently running this migration in production to reindex all the
notes in GitLab.com. There happen to be over 30M of them so it will take
quite a while to run. We have found in
https://gitlab.com/gitlab-org/gitlab/-/issues/324745 that the migration
is not performing as well as we'd hoped. Given that we want to keep each
job taking less than 1 minute to run we want to reduce the batch size to
make that easier.

We have found that 9000 per 3 minutes is too much and we are regularly
overlapping with the previous batch and thus we end up often processing
the same batch twice.

In addition it will be more efficient to push the work into the "initial
indexing queue" instead of our main "incremental indexing queue" as the
initial indexing queue has far less traffic (it only covers newly
created projects while incremental is any record update in any project).
parent babc2fe7
---
title: Improve performance of Elasticsearch notes permissions migration
merge_request: 56823
author:
type: performance
...@@ -4,7 +4,7 @@ class AddPermissionsDataToNotesDocuments < Elastic::Migration ...@@ -4,7 +4,7 @@ class AddPermissionsDataToNotesDocuments < Elastic::Migration
batched! batched!
throttle_delay 3.minutes throttle_delay 3.minutes
QUERY_BATCH_SIZE = 9_000 QUERY_BATCH_SIZE = 6_000
UPDATE_BATCH_SIZE = 100 UPDATE_BATCH_SIZE = 100
def migrate def migrate
...@@ -30,7 +30,7 @@ class AddPermissionsDataToNotesDocuments < Elastic::Migration ...@@ -30,7 +30,7 @@ class AddPermissionsDataToNotesDocuments < Elastic::Migration
end end
document_references.each_slice(UPDATE_BATCH_SIZE) do |refs| document_references.each_slice(UPDATE_BATCH_SIZE) do |refs|
Elastic::ProcessBookkeepingService.track!(*refs) Elastic::ProcessInitialBookkeepingService.track!(*refs)
end end
log "Adding permission data to notes documents is completed for batch of #{document_references.size} documents" log "Adding permission data to notes documents is completed for batch of #{document_references.size} documents"
......
...@@ -32,7 +32,7 @@ RSpec.describe AddPermissionsDataToNotesDocuments, :elastic, :sidekiq_inline do ...@@ -32,7 +32,7 @@ RSpec.describe AddPermissionsDataToNotesDocuments, :elastic, :sidekiq_inline do
context 'when migration is completed' do context 'when migration is completed' do
it 'does not queue documents for indexing' do it 'does not queue documents for indexing' do
expect(migration.completed?).to be_truthy expect(migration.completed?).to be_truthy
expect(::Elastic::ProcessBookkeepingService).not_to receive(:track!) expect(::Elastic::ProcessInitialBookkeepingService).not_to receive(:track!)
migration.migrate migration.migrate
end end
...@@ -44,7 +44,7 @@ RSpec.describe AddPermissionsDataToNotesDocuments, :elastic, :sidekiq_inline do ...@@ -44,7 +44,7 @@ RSpec.describe AddPermissionsDataToNotesDocuments, :elastic, :sidekiq_inline do
end end
it 'queues documents for indexing' do it 'queues documents for indexing' do
expect(::Elastic::ProcessBookkeepingService).to receive(:track!).once do |*tracked_refs| expect(::Elastic::ProcessInitialBookkeepingService).to receive(:track!).once do |*tracked_refs|
expect(tracked_refs.count).to eq(4) expect(tracked_refs.count).to eq(4)
end end
...@@ -55,7 +55,7 @@ RSpec.describe AddPermissionsDataToNotesDocuments, :elastic, :sidekiq_inline do ...@@ -55,7 +55,7 @@ RSpec.describe AddPermissionsDataToNotesDocuments, :elastic, :sidekiq_inline do
add_permission_data_for_notes([note_on_issue, note_on_snippet, note_on_merge_request]) add_permission_data_for_notes([note_on_issue, note_on_snippet, note_on_merge_request])
expected = [Gitlab::Elastic::DocumentReference.new(Note, note_on_commit.id, note_on_commit.es_id, note_on_commit.es_parent)] expected = [Gitlab::Elastic::DocumentReference.new(Note, note_on_commit.id, note_on_commit.es_id, note_on_commit.es_parent)]
expect(::Elastic::ProcessBookkeepingService).to receive(:track!).with(*expected).once expect(::Elastic::ProcessInitialBookkeepingService).to receive(:track!).with(*expected).once
migration.migrate migration.migrate
end end
...@@ -64,22 +64,22 @@ RSpec.describe AddPermissionsDataToNotesDocuments, :elastic, :sidekiq_inline do ...@@ -64,22 +64,22 @@ RSpec.describe AddPermissionsDataToNotesDocuments, :elastic, :sidekiq_inline do
stub_const("#{described_class}::QUERY_BATCH_SIZE", 2) stub_const("#{described_class}::QUERY_BATCH_SIZE", 2)
stub_const("#{described_class}::UPDATE_BATCH_SIZE", 1) stub_const("#{described_class}::UPDATE_BATCH_SIZE", 1)
allow(::Elastic::ProcessBookkeepingService).to receive(:track!).and_call_original allow(::Elastic::ProcessInitialBookkeepingService).to receive(:track!).and_call_original
migration.migrate migration.migrate
expect(::Elastic::ProcessBookkeepingService).to have_received(:track!).exactly(2).times expect(::Elastic::ProcessInitialBookkeepingService).to have_received(:track!).exactly(2).times
ensure_elasticsearch_index! ensure_elasticsearch_index!
migration.migrate migration.migrate
expect(::Elastic::ProcessBookkeepingService).to have_received(:track!).exactly(4).times expect(::Elastic::ProcessInitialBookkeepingService).to have_received(:track!).exactly(4).times
ensure_elasticsearch_index! ensure_elasticsearch_index!
migration.migrate migration.migrate
# The migration should have already finished so there are no more items to process # The migration should have already finished so there are no more items to process
expect(::Elastic::ProcessBookkeepingService).to have_received(:track!).exactly(4).times expect(::Elastic::ProcessInitialBookkeepingService).to have_received(:track!).exactly(4).times
expect(migration).to be_completed expect(migration).to be_completed
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment