Commit 5e468ced authored by Markus Koller's avatar Markus Koller Committed by Markus Koller

Respect limited indexing settings in rake tasks

Some of the rake tasks were not respecting the new limited indexing
settings for Elasticsearch, so this refactors them to use
IndexRecordService through ElasticIndexerWorker. As part of this change
we're also queuing a Sidekiq job for each individual project now
instead of processing them in batches in each job, and we're always
processing them asynchronously.

- The index_repositories, index_repositories_async, index_database and
  index_$MODEL tasks were replaced with a single index_projects task,
  which indexes projects and all their associated records and their
  repositories

- The BATCH environment variable was removed because it's not useful
  anymore, since everything gets queued in Sidekiq anyway
parent 7e5e9f9c
This diff is collapsed.
......@@ -33,6 +33,8 @@ module Elastic
end
def use_elasticsearch?
# FIXME: check project.use_elasticsearch? for ProjectSnippets?
# see https://gitlab.com/gitlab-org/gitlab-ee/issues/11850
::Gitlab::CurrentSettings.elasticsearch_indexing?
end
......
......@@ -8,6 +8,8 @@ module Elastic
# @param indexing [Boolean] determines whether operation is "indexing" or "updating"
def execute(record, indexing, options = {})
return true unless record.use_elasticsearch?
record.__elasticsearch__.client = client
import(record, record.class.nested?, indexing)
......
---
title: Respect limited indexing settings in rake tasks
merge_request: 13437
author:
type: fixed
......@@ -9,25 +9,29 @@ namespace :gitlab do
Rake::Task["gitlab:elastic:create_empty_index"].invoke
Rake::Task["gitlab:elastic:clear_index_status"].invoke
Rake::Task["gitlab:elastic:index_projects"].invoke
Rake::Task["gitlab:elastic:index_wikis"].invoke
Rake::Task["gitlab:elastic:index_database"].invoke
Rake::Task["gitlab:elastic:index_repositories"].invoke
Rake::Task["gitlab:elastic:index_snippets"].invoke
end
desc "GitLab | Elasticsearch | Index project repositories in the background"
task index_repositories_async: :environment do
print "Enqueuing project repositories in batches of #{batch_size}"
desc "GitLab | Elasticsearch | Index projects in the background"
task index_projects: :environment do
print "Enqueuing projects"
project_id_batches do |start, finish|
ElasticBatchProjectIndexerWorker.perform_async(start, finish)
project_id_batches do |ids|
args = ids.collect do |id|
[:index, 'Project', id, nil] # es_id is unused for :index
end
ElasticIndexerWorker.bulk_perform_async(args)
print "."
end
puts "OK"
end
desc "GitLab | ElasticSearch | Check project repository indexing status"
task index_repositories_status: :environment do
desc "GitLab | ElasticSearch | Check project indexing status"
task index_projects_status: :environment do
indexed = IndexStatus.count
projects = Project.count
percent = (indexed / projects.to_f) * 100.0
......@@ -35,16 +39,6 @@ namespace :gitlab do
puts "Indexing is %.2f%% complete (%d/%d projects)" % [percent, indexed, projects]
end
desc "GitLab | Elasticsearch | Index project repositories"
task index_repositories: :environment do
print "Indexing project repositories..."
Sidekiq::Logging.logger = Logger.new(STDOUT)
project_id_batches do |start, finish|
ElasticBatchProjectIndexerWorker.new.perform(start, finish)
end
end
desc 'GitLab | Elasticsearch | Unlock repositories for indexing in case something gets stuck'
task clear_locked_projects: :environment do
Gitlab::Redis::SharedState.with { |redis| redis.del(:elastic_projects_indexing) }
......@@ -70,34 +64,15 @@ namespace :gitlab do
end
end
INDEXABLE_CLASSES = {
"Project" => "index_projects",
"Issue" => "index_issues",
"MergeRequest" => "index_merge_requests",
"Snippet" => "index_snippets",
"Note" => "index_notes",
"Milestone" => "index_milestones"
}.freeze
INDEXABLE_CLASSES.each do |klass_name, task_name|
task task_name => :environment do
logger = Logger.new(STDOUT)
logger.info("Indexing #{klass_name.pluralize}...")
klass = Kernel.const_get(klass_name)
if klass_name == 'Note'
Note.searchable.es_import
else
klass.es_import
end
desc "GitLab | Elasticsearch | Index all snippets"
task index_snippets: :environment do
logger = Logger.new(STDOUT)
logger.info("Indexing snippets...")
logger.info("Indexing #{klass_name.pluralize}... " + "done".color(:green))
end
end
Snippet.es_import
desc "GitLab | Elasticsearch | Index all database objects"
multitask index_database: INDEXABLE_CLASSES.values
logger.info("Indexing snippets... " + "done".color(:green))
end
desc "GitLab | Elasticsearch | Create empty index"
task create_empty_index: :environment do
......@@ -190,10 +165,6 @@ namespace :gitlab do
end
end
def batch_size
ENV.fetch('BATCH', 300).to_i
end
def project_id_batches(&blk)
relation = Project
......@@ -201,10 +172,14 @@ namespace :gitlab do
relation = relation.includes(:index_status).where('index_statuses.id IS NULL').references(:index_statuses)
end
relation.all.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
if ::Gitlab::CurrentSettings.elasticsearch_limit_indexing?
relation = relation.where(id: ::Gitlab::CurrentSettings.elasticsearch_limited_projects.select(:id))
end
relation.all.in_batches(start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
ids = relation.reorder(:id).pluck(:id)
Gitlab::Redis::SharedState.with { |redis| redis.sadd(:elastic_projects_indexing, ids) }
yield ids[0], ids[-1]
yield ids
end
end
......
......@@ -125,4 +125,21 @@ describe Elastic::IndexRecordService, :elastic do
expect(Note.elastic_search('note_2', options: options).present?).to eq(true)
expect(Note.elastic_search('note_3', options: options).present?).to eq(true)
end
it 'skips records for which indexing is disabled' do
project = nil
Sidekiq::Testing.disable! do
project = create :project, name: 'project_1'
end
expect(project).to receive(:use_elasticsearch?).and_return(false)
Sidekiq::Testing.inline! do
subject.execute(project, true)
Gitlab::Elastic::Helper.refresh_index
end
expect(Project.elastic_search('project_1').present?).to eq(false)
end
end
# frozen_string_literal: true
require 'rake_helper'
describe 'gitlab:elastic namespace rake tasks', :elastic, :sidekiq do
before do
Rake.application.rake_require 'tasks/gitlab/elastic'
stub_ee_application_setting(elasticsearch_indexing: true)
end
describe 'index' do
it 'calls all indexing tasks in order' do
expect(Rake::Task['gitlab:elastic:create_empty_index']).to receive(:invoke).ordered
expect(Rake::Task['gitlab:elastic:clear_index_status']).to receive(:invoke).ordered
expect(Rake::Task['gitlab:elastic:index_projects']).to receive(:invoke).ordered
expect(Rake::Task['gitlab:elastic:index_wikis']).to receive(:invoke).ordered
expect(Rake::Task['gitlab:elastic:index_snippets']).to receive(:invoke).ordered
run_rake_task 'gitlab:elastic:index'
end
end
describe 'index_projects' do
let(:project1) { create :project }
let(:project2) { create :project }
let(:project3) { create :project }
before do
Sidekiq::Testing.disable! do
project1
project2
end
end
it 'queues jobs for each project batch' do
expect(ElasticIndexerWorker).to receive(:bulk_perform_async).with([
[:index, 'Project', project1.id, nil],
[:index, 'Project', project2.id, nil]
])
run_rake_task 'gitlab:elastic:index_projects'
end
context 'with limited indexing enabled' do
before do
Sidekiq::Testing.disable! do
project1
project2
project3
create :elasticsearch_indexed_project, project: project1
create :elasticsearch_indexed_namespace, namespace: project3.namespace
end
stub_ee_application_setting(elasticsearch_limit_indexing: true)
end
it 'does not queue jobs for projects that should not be indexed' do
expect(ElasticIndexerWorker).to receive(:bulk_perform_async).with([
[:index, 'Project', project1.id, nil],
[:index, 'Project', project3.id, nil]
])
run_rake_task 'gitlab:elastic:index_projects'
end
end
end
describe 'index_snippets' do
it 'indexes snippets' do
expect(Snippet).to receive(:es_import)
run_rake_task 'gitlab:elastic:index_snippets'
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment