From 6c8901c66f6094b9f89e72a86b57fd70362c4795 Mon Sep 17 00:00:00 2001 From: Nick Thomas <nick@gitlab.com> Date: Mon, 17 Jun 2019 20:06:27 +0000 Subject: [PATCH] Automatically index wikis in elasticsearch --- doc/integration/elasticsearch.md | 14 +++------ .../services/elastic/index_record_service.rb | 8 +++-- .../12116-indexing-wikis-on-main.yml | 5 +++ ee/lib/tasks/gitlab/elastic.rake | 31 ------------------- .../elastic/index_record_service_spec.rb | 2 ++ ee/spec/tasks/gitlab/elastic_rake_spec.rb | 1 - 6 files changed, 17 insertions(+), 44 deletions(-) create mode 100644 ee/changelogs/unreleased/12116-indexing-wikis-on-main.yml diff --git a/doc/integration/elasticsearch.md b/doc/integration/elasticsearch.md index 6064c417900..a2f38a2fcdf 100644 --- a/doc/integration/elasticsearch.md +++ b/doc/integration/elasticsearch.md @@ -275,19 +275,18 @@ You can also use the `gitlab:elastic:clear_index_status` Rake task to force the indexer to "forget" all progress, so retrying the indexing process from the start. -To index all wikis: +The `index_projects` command enqueues jobs to index all project and wiki +repositories, and most database content. However, snippets still need to be +indexed separately. To do so, run one of these commands: ```sh # Omnibus installations -sudo gitlab-rake gitlab:elastic:index_wikis +sudo gitlab-rake gitlab:elastic:index_snippets # Installations from source -bundle exec rake gitlab:elastic:index_wikis RAILS_ENV=production +bundle exec rake gitlab:elastic:index_snippets RAILS_ENV=production ``` -The wiki indexer also supports the `ID_FROM` and `ID_TO` parameters if you want -to limit a project set. - Enable replication and refreshing again after indexing (only if you previously disabled it): ```bash @@ -335,14 +334,11 @@ There are several rake tasks available to you via the command line: - `sudo gitlab-rake gitlab:elastic:create_empty_index` - `sudo gitlab-rake gitlab:elastic:clear_index_status` - `sudo gitlab-rake gitlab:elastic:index_projects` - - `sudo gitlab-rake gitlab:elastic:index_wikis` - `sudo gitlab-rake gitlab:elastic:index_snippets` - [sudo gitlab-rake gitlab:elastic:index_projects](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/lib/tasks/gitlab/elastic.rake) - This iterates over all projects and queues sidekiq jobs to index them in the background. - [sudo gitlab-rake gitlab:elastic:index_projects_status](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/lib/tasks/gitlab/elastic.rake) - This determines the overall status of the indexing. It is done by counting the total number of indexed projects, dividing by a count of the total number of projects, then multiplying by 100. -- [sudo gitlab-rake gitlab:elastic:index_wikis](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/lib/tasks/gitlab/elastic.rake) - - Iterates over every project, determines if said project contains wiki data, and then indexes the blobs (content) of said wiki data. - [sudo gitlab-rake gitlab:elastic:create_empty_index](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/lib/tasks/gitlab/elastic.rake) - This generates an empty index on the Elasticsearch side. - [sudo gitlab-rake gitlab:elastic:clear_index_status](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/lib/tasks/gitlab/elastic.rake) diff --git a/ee/app/services/elastic/index_record_service.rb b/ee/app/services/elastic/index_record_service.rb index e53f641de61..7bc81ead6ab 100644 --- a/ee/app/services/elastic/index_record_service.rb +++ b/ee/app/services/elastic/index_record_service.rb @@ -36,12 +36,14 @@ module Elastic end def initial_index_project(project) + # Enqueue the repository indexing jobs immediately so they run in parallel + # One for the project repository, one for the wiki repository + ElasticCommitIndexerWorker.perform_async(project.id) + ElasticCommitIndexerWorker.perform_async(project.id, nil, nil, true) + project.each_indexed_association do |klass, objects| objects.es_import end - - # Finally, index blobs/commits/wikis - ElasticCommitIndexerWorker.perform_async(project.id) end def import(record, nested, indexing) diff --git a/ee/changelogs/unreleased/12116-indexing-wikis-on-main.yml b/ee/changelogs/unreleased/12116-indexing-wikis-on-main.yml new file mode 100644 index 00000000000..c074ec5efb6 --- /dev/null +++ b/ee/changelogs/unreleased/12116-indexing-wikis-on-main.yml @@ -0,0 +1,5 @@ +--- +title: Automatically index wikis in elasticsearch +merge_request: 14095 +author: +type: changed diff --git a/ee/lib/tasks/gitlab/elastic.rake b/ee/lib/tasks/gitlab/elastic.rake index 0fb2eb54b5e..a861a635bef 100644 --- a/ee/lib/tasks/gitlab/elastic.rake +++ b/ee/lib/tasks/gitlab/elastic.rake @@ -10,7 +10,6 @@ namespace :gitlab do Rake::Task["gitlab:elastic:create_empty_index"].invoke Rake::Task["gitlab:elastic:clear_index_status"].invoke Rake::Task["gitlab:elastic:index_projects"].invoke - Rake::Task["gitlab:elastic:index_wikis"].invoke Rake::Task["gitlab:elastic:index_snippets"].invoke end @@ -46,24 +45,6 @@ namespace :gitlab do puts 'Cleared all locked projects. Incremental indexing should work now.' end - desc "GitLab | Elasticsearch | Index wiki repositories" - task index_wikis: :environment do - projects = apply_project_filters(Project.with_wiki_enabled) - - projects.find_each do |project| - if project.use_elasticsearch? && !project.wiki.empty? - puts "Indexing wiki of #{project.full_name}..." - - begin - project.wiki.index_wiki_blobs - puts "Enqueued!".color(:green) - rescue StandardError => e - puts "#{e.message}, trace - #{e.backtrace}" - end - end - end - end - desc "GitLab | Elasticsearch | Index all snippets" task index_snippets: :environment do logger = Logger.new(STDOUT) @@ -127,18 +108,6 @@ namespace :gitlab do end end - def apply_project_filters(projects) - if ENV['ID_FROM'] - projects = projects.where("projects.id >= ?", ENV['ID_FROM']) - end - - if ENV['ID_TO'] - projects = projects.where("projects.id <= ?", ENV['ID_TO']) - end - - projects - end - def display_unindexed(projects) arr = if projects.count < 500 || ENV['SHOW_ALL'] projects diff --git a/ee/spec/services/elastic/index_record_service_spec.rb b/ee/spec/services/elastic/index_record_service_spec.rb index bcdce7b5d62..b5d93ac242c 100644 --- a/ee/spec/services/elastic/index_record_service_spec.rb +++ b/ee/spec/services/elastic/index_record_service_spec.rb @@ -69,6 +69,7 @@ describe Elastic::IndexRecordService, :elastic do it 'indexes records associated with the project' do expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id).and_call_original + expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id, nil, nil, true).and_call_original Sidekiq::Testing.inline! do subject.execute(project, true) @@ -83,6 +84,7 @@ describe Elastic::IndexRecordService, :elastic do other_project = create :project expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(other_project.id).and_call_original + expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(other_project.id, nil, nil, true).and_call_original Sidekiq::Testing.inline! do subject.execute(other_project, true) diff --git a/ee/spec/tasks/gitlab/elastic_rake_spec.rb b/ee/spec/tasks/gitlab/elastic_rake_spec.rb index 4194e60584d..e11be040105 100644 --- a/ee/spec/tasks/gitlab/elastic_rake_spec.rb +++ b/ee/spec/tasks/gitlab/elastic_rake_spec.rb @@ -13,7 +13,6 @@ describe 'gitlab:elastic namespace rake tasks', :elastic, :sidekiq do expect(Rake::Task['gitlab:elastic:create_empty_index']).to receive(:invoke).ordered expect(Rake::Task['gitlab:elastic:clear_index_status']).to receive(:invoke).ordered expect(Rake::Task['gitlab:elastic:index_projects']).to receive(:invoke).ordered - expect(Rake::Task['gitlab:elastic:index_wikis']).to receive(:invoke).ordered expect(Rake::Task['gitlab:elastic:index_snippets']).to receive(:invoke).ordered run_rake_task 'gitlab:elastic:index' -- 2.30.9