Commit 503e29a7 authored by Rémy Coutable's avatar Rémy Coutable

Merge branch '8013_and_8628_ES_indexing_race_condition' into 'master'

Fix repo pushes messing with initial Elasticsearch indexing

Closes #8628

See merge request gitlab-org/gitlab-ee!9478
parents 0155c489 6e415666
......@@ -366,6 +366,12 @@ Here are some common pitfalls and how to overcome them:
You will need to re-run all the rake tasks to re-index the database, repositories, and wikis.
- **No new data is added to the Elasticsearch index when I push code**
When performing the initial indexing of blobs, we lock all projects until the project finishes indexing. It could
happen that an error during the process causes one or multiple projects to remain locked. In order to unlock them,
run the `gitlab:elastic:clear_locked_projects` rake task.
- **"Can't specify parent if no parent field has been configured"**
If you enabled Elasticsearch before GitLab 8.12 and have not rebuilt indexes you will get
......
......@@ -8,7 +8,7 @@ module EE
override :execute_related_hooks
def execute_related_hooks
if ::Gitlab::CurrentSettings.elasticsearch_indexing? && default_branch?
if ::Gitlab::CurrentSettings.elasticsearch_indexing? && default_branch? && should_index_commits?
::ElasticCommitIndexerWorker.perform_async(project.id, params[:oldrev], params[:newrev])
end
......@@ -17,6 +17,10 @@ module EE
private
def should_index_commits?
::Gitlab::Redis::SharedState.with { |redis| !redis.sismember(:elastic_projects_indexing, project.id) }
end
override :pipeline_options
def pipeline_options
{ mirror_update: project.mirror? && project.repository.up_to_date_with_upstream?(branch_name) }
......
......@@ -11,17 +11,23 @@ class ElasticBatchProjectIndexerWorker
sidekiq_options retry: 10
def perform(start, finish, update_index = false)
projects = build_relation(start, finish, update_index)
projects = build_relation(start, finish)
projects.find_each { |project| run_indexer(project) }
projects.find_each { |project| run_indexer(project, update_index) }
end
private
def run_indexer(project)
def run_indexer(project, update_index)
# Ensure we remove the hold on the project, no matter what, so ElasticCommitIndexerWorker can do its thing
# We do this before the indexer starts to avoid the possibility of pushes coming in during this time not
# being indexed.
Gitlab::Redis::SharedState.with { |redis| redis.srem(:elastic_projects_indexing, project.id) }
logger.info "Indexing #{project.full_name} (ID=#{project.id})..."
last_commit = project.index_status.try(:last_commit)
# Get the last commit if we're updating indexed projects - otherwise we'll want to index everything
last_commit = project.index_status.try(:last_commit) if update_index
Gitlab::Elastic::Indexer.new(project).run(last_commit)
logger.info "Indexing #{project.full_name} (ID=#{project.id}) is done!"
......@@ -30,13 +36,9 @@ class ElasticBatchProjectIndexerWorker
end
# rubocop: disable CodeReuse/ActiveRecord
def build_relation(start, finish, update_index)
def build_relation(start, finish)
relation = Project.includes(:index_status)
unless update_index
relation = relation.where('index_statuses.id IS NULL').references(:index_statuses)
end
table = Project.arel_table
relation = relation.where(table[:id].gteq(start)) if start
relation = relation.where(table[:id].lteq(finish)) if finish
......
---
title: Fix repo pushes while initial Elasticsearch indexing not permitting initial
indexing to complete
merge_request: 9478
author:
type: fixed
......@@ -2,6 +2,11 @@ namespace :gitlab do
namespace :elastic do
desc "GitLab | Elasticsearch | Index eveything at once"
task :index do
# UPDATE_INDEX=true can cause some projects not to be indexed properly if someone were to push a commit to the
# project before the rake task could get to it, so we set it to `nil` here to avoid that. It doesn't make sense
# to use this configuration during a full re-index anyways.
ENV['UPDATE_INDEX'] = nil
Rake::Task["gitlab:elastic:create_empty_index"].invoke
Rake::Task["gitlab:elastic:clear_index_status"].invoke
Rake::Task["gitlab:elastic:index_wikis"].invoke
......@@ -40,6 +45,13 @@ namespace :gitlab do
end
end
desc 'GitLab | Elasticsearch | Unlock repositories for indexing in case something gets stuck'
task clear_locked_projects: :environment do
Gitlab::Redis::SharedState.with { |redis| redis.del(:elastic_projects_indexing) }
puts 'Cleared all locked projects. Incremental indexing should work now.'
end
desc "GitLab | Elasticsearch | Index wiki repositories"
task index_wikis: :environment do
projects = apply_project_filters(Project.with_wiki_enabled)
......@@ -180,6 +192,7 @@ namespace :gitlab do
relation.all.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
ids = relation.reorder(:id).pluck(:id)
Gitlab::Redis::SharedState.with { |redis| redis.sadd(:elastic_projects_indexing, ids) }
yield ids[0], ids[-1]
end
end
......
......@@ -16,16 +16,43 @@ describe GitPushService do
described_class.new(project, user, oldrev: oldrev, newrev: newrev, ref: ref)
end
before do
allow(project.repository).to receive(:commit).and_call_original
allow(project.repository).to receive(:commit).with("master").and_return(nil)
end
context 'deleted branch' do
let(:newrev) { blankrev }
it 'handles when remote branch exists' do
allow(project.repository).to receive(:commit).and_call_original
allow(project.repository).to receive(:commit).with("master").and_return(nil)
expect(project.repository).to receive(:commit).with("refs/remotes/upstream/master").and_return(sample_commit)
subject.execute
end
end
context 'ElasticSearch indexing' do
before do
stub_ee_application_setting(elasticsearch_indexing?: true)
end
context 'when the project is locked by elastic.rake', :clean_gitlab_redis_shared_state do
before do
Gitlab::Redis::SharedState.with { |redis| redis.sadd(:elastic_projects_indexing, project.id) }
end
it 'does not run ElasticCommitIndexerWorker' do
expect(ElasticCommitIndexerWorker).not_to receive(:perform_async)
subject.execute
end
end
it 'runs ElasticCommitIndexerWorker' do
expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id, oldrev, newrev)
subject.execute
end
end
end
end
......@@ -6,23 +6,38 @@ describe ElasticBatchProjectIndexerWorker do
describe '#perform' do
it 'runs the indexer for projects in the batch range' do
projects.each {|project| expect_index(project) }
projects.each { |project| expect_index(project, false) }
worker.perform(projects.first.id, projects.last.id)
end
it 'skips projects not in the batch range' do
expect_index(projects.first).never
expect_index(projects.last)
expect_index(projects.first, false).never
expect_index(projects.last, false)
worker.perform(projects.last.id, projects.last.id)
end
it 'clears the "locked" state from redis when the project finishes indexing' do
Gitlab::Redis::SharedState.with { |redis| redis.sadd(:elastic_projects_indexing, projects.first.id) }
expect_index(projects.first, false).and_call_original
expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer|
expect(indexer).to receive(:run).with(nil)
end
expect { worker.perform(projects.first.id, projects.first.id) }
.to change { project_locked?(projects.first) }.from(true).to(false)
end
context 'update_index = false' do
it 'skips projects that were already indexed' do
projects.first.create_index_status!
it 'indexes all projects it receives even if already indexed' do
projects.first.build_index_status.update!(last_commit: 'foo')
expect_index(projects.first).never
expect_index(projects.first, false).and_call_original
expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer|
expect(indexer).to receive(:run).with(nil)
end
worker.perform(projects.first.id, projects.first.id)
end
......@@ -32,8 +47,8 @@ describe ElasticBatchProjectIndexerWorker do
it 'reindexes projects that were already indexed' do
projects.first.create_index_status!
expect_index(projects.first)
expect_index(projects.last)
expect_index(projects.first, true)
expect_index(projects.last, true)
worker.perform(projects.first.id, projects.last.id, true)
end
......@@ -41,7 +56,7 @@ describe ElasticBatchProjectIndexerWorker do
it 'starts indexing at the last indexed commit' do
projects.first.create_index_status!(last_commit: 'foo')
expect_index(projects.first).and_call_original
expect_index(projects.first, true).and_call_original
expect_any_instance_of(Gitlab::Elastic::Indexer).to receive(:run).with('foo')
worker.perform(projects.first.id, projects.first.id, true)
......@@ -49,7 +64,11 @@ describe ElasticBatchProjectIndexerWorker do
end
end
def expect_index(project)
expect(worker).to receive(:run_indexer).with(project)
def expect_index(project, update_index)
expect(worker).to receive(:run_indexer).with(project, update_index)
end
def project_locked?(project)
Gitlab::Redis::SharedState.with { |redis| redis.sismember(:elastic_projects_indexing, project.id) }
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment