From 6c8901c66f6094b9f89e72a86b57fd70362c4795 Mon Sep 17 00:00:00 2001
From: Nick Thomas <nick@gitlab.com>
Date: Mon, 17 Jun 2019 20:06:27 +0000
Subject: [PATCH] Automatically index wikis in elasticsearch

---
 doc/integration/elasticsearch.md              | 14 +++------
 .../services/elastic/index_record_service.rb  |  8 +++--
 .../12116-indexing-wikis-on-main.yml          |  5 +++
 ee/lib/tasks/gitlab/elastic.rake              | 31 -------------------
 .../elastic/index_record_service_spec.rb      |  2 ++
 ee/spec/tasks/gitlab/elastic_rake_spec.rb     |  1 -
 6 files changed, 17 insertions(+), 44 deletions(-)
 create mode 100644 ee/changelogs/unreleased/12116-indexing-wikis-on-main.yml

diff --git a/doc/integration/elasticsearch.md b/doc/integration/elasticsearch.md
index 6064c417900..a2f38a2fcdf 100644
--- a/doc/integration/elasticsearch.md
+++ b/doc/integration/elasticsearch.md
@@ -275,19 +275,18 @@ You can also use the `gitlab:elastic:clear_index_status` Rake task to force the
 indexer to "forget" all progress, so retrying the indexing process from the
 start.
 
-To index all wikis:
+The `index_projects` command enqueues jobs to index all project and wiki
+repositories, and most database content. However, snippets still need to be
+indexed separately. To do so, run one of these commands:
 
 ```sh
 # Omnibus installations
-sudo gitlab-rake gitlab:elastic:index_wikis
+sudo gitlab-rake gitlab:elastic:index_snippets
 
 # Installations from source
-bundle exec rake gitlab:elastic:index_wikis RAILS_ENV=production
+bundle exec rake gitlab:elastic:index_snippets RAILS_ENV=production
 ```
 
-The wiki indexer also supports the `ID_FROM` and `ID_TO` parameters if you want
-to limit a project set.
-
 Enable replication and refreshing again after indexing (only if you previously disabled it):
 
 ```bash
@@ -335,14 +334,11 @@ There are several rake tasks available to you via the command line:
     - `sudo gitlab-rake gitlab:elastic:create_empty_index`
     - `sudo gitlab-rake gitlab:elastic:clear_index_status`
     - `sudo gitlab-rake gitlab:elastic:index_projects`
-    - `sudo gitlab-rake gitlab:elastic:index_wikis`
     - `sudo gitlab-rake gitlab:elastic:index_snippets`
 - [sudo gitlab-rake gitlab:elastic:index_projects](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/lib/tasks/gitlab/elastic.rake)
   - This iterates over all projects and queues sidekiq jobs to index them in the background.
 - [sudo gitlab-rake gitlab:elastic:index_projects_status](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/lib/tasks/gitlab/elastic.rake)
   - This determines the overall status of the indexing. It is done by counting the total number of indexed projects, dividing by a count of the total number of projects, then multiplying by 100.
-- [sudo gitlab-rake gitlab:elastic:index_wikis](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/lib/tasks/gitlab/elastic.rake)
-  - Iterates over every project, determines if said project contains wiki data, and then indexes the blobs (content) of said wiki data.
 - [sudo gitlab-rake gitlab:elastic:create_empty_index](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/lib/tasks/gitlab/elastic.rake)
   - This generates an empty index on the Elasticsearch side.
 - [sudo gitlab-rake gitlab:elastic:clear_index_status](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/lib/tasks/gitlab/elastic.rake)
diff --git a/ee/app/services/elastic/index_record_service.rb b/ee/app/services/elastic/index_record_service.rb
index e53f641de61..7bc81ead6ab 100644
--- a/ee/app/services/elastic/index_record_service.rb
+++ b/ee/app/services/elastic/index_record_service.rb
@@ -36,12 +36,14 @@ module Elastic
     end
 
     def initial_index_project(project)
+      # Enqueue the repository indexing jobs immediately so they run in parallel
+      # One for the project repository, one for the wiki repository
+      ElasticCommitIndexerWorker.perform_async(project.id)
+      ElasticCommitIndexerWorker.perform_async(project.id, nil, nil, true)
+
       project.each_indexed_association do |klass, objects|
         objects.es_import
       end
-
-      # Finally, index blobs/commits/wikis
-      ElasticCommitIndexerWorker.perform_async(project.id)
     end
 
     def import(record, nested, indexing)
diff --git a/ee/changelogs/unreleased/12116-indexing-wikis-on-main.yml b/ee/changelogs/unreleased/12116-indexing-wikis-on-main.yml
new file mode 100644
index 00000000000..c074ec5efb6
--- /dev/null
+++ b/ee/changelogs/unreleased/12116-indexing-wikis-on-main.yml
@@ -0,0 +1,5 @@
+---
+title: Automatically index wikis in elasticsearch
+merge_request: 14095
+author:
+type: changed
diff --git a/ee/lib/tasks/gitlab/elastic.rake b/ee/lib/tasks/gitlab/elastic.rake
index 0fb2eb54b5e..a861a635bef 100644
--- a/ee/lib/tasks/gitlab/elastic.rake
+++ b/ee/lib/tasks/gitlab/elastic.rake
@@ -10,7 +10,6 @@ namespace :gitlab do
       Rake::Task["gitlab:elastic:create_empty_index"].invoke
       Rake::Task["gitlab:elastic:clear_index_status"].invoke
       Rake::Task["gitlab:elastic:index_projects"].invoke
-      Rake::Task["gitlab:elastic:index_wikis"].invoke
       Rake::Task["gitlab:elastic:index_snippets"].invoke
     end
 
@@ -46,24 +45,6 @@ namespace :gitlab do
       puts 'Cleared all locked projects. Incremental indexing should work now.'
     end
 
-    desc "GitLab | Elasticsearch | Index wiki repositories"
-    task index_wikis: :environment do
-      projects = apply_project_filters(Project.with_wiki_enabled)
-
-      projects.find_each do |project|
-        if project.use_elasticsearch? && !project.wiki.empty?
-          puts "Indexing wiki of #{project.full_name}..."
-
-          begin
-            project.wiki.index_wiki_blobs
-            puts "Enqueued!".color(:green)
-          rescue StandardError => e
-            puts "#{e.message}, trace - #{e.backtrace}"
-          end
-        end
-      end
-    end
-
     desc "GitLab | Elasticsearch | Index all snippets"
     task index_snippets: :environment do
       logger = Logger.new(STDOUT)
@@ -127,18 +108,6 @@ namespace :gitlab do
       end
     end
 
-    def apply_project_filters(projects)
-      if ENV['ID_FROM']
-        projects = projects.where("projects.id >= ?", ENV['ID_FROM'])
-      end
-
-      if ENV['ID_TO']
-        projects = projects.where("projects.id <= ?", ENV['ID_TO'])
-      end
-
-      projects
-    end
-
     def display_unindexed(projects)
       arr = if projects.count < 500 || ENV['SHOW_ALL']
               projects
diff --git a/ee/spec/services/elastic/index_record_service_spec.rb b/ee/spec/services/elastic/index_record_service_spec.rb
index bcdce7b5d62..b5d93ac242c 100644
--- a/ee/spec/services/elastic/index_record_service_spec.rb
+++ b/ee/spec/services/elastic/index_record_service_spec.rb
@@ -69,6 +69,7 @@ describe Elastic::IndexRecordService, :elastic do
 
     it 'indexes records associated with the project' do
       expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id).and_call_original
+      expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id, nil, nil, true).and_call_original
 
       Sidekiq::Testing.inline! do
         subject.execute(project, true)
@@ -83,6 +84,7 @@ describe Elastic::IndexRecordService, :elastic do
       other_project = create :project
 
       expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(other_project.id).and_call_original
+      expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(other_project.id, nil, nil, true).and_call_original
 
       Sidekiq::Testing.inline! do
         subject.execute(other_project, true)
diff --git a/ee/spec/tasks/gitlab/elastic_rake_spec.rb b/ee/spec/tasks/gitlab/elastic_rake_spec.rb
index 4194e60584d..e11be040105 100644
--- a/ee/spec/tasks/gitlab/elastic_rake_spec.rb
+++ b/ee/spec/tasks/gitlab/elastic_rake_spec.rb
@@ -13,7 +13,6 @@ describe 'gitlab:elastic namespace rake tasks', :elastic, :sidekiq do
       expect(Rake::Task['gitlab:elastic:create_empty_index']).to receive(:invoke).ordered
       expect(Rake::Task['gitlab:elastic:clear_index_status']).to receive(:invoke).ordered
       expect(Rake::Task['gitlab:elastic:index_projects']).to receive(:invoke).ordered
-      expect(Rake::Task['gitlab:elastic:index_wikis']).to receive(:invoke).ordered
       expect(Rake::Task['gitlab:elastic:index_snippets']).to receive(:invoke).ordered
 
       run_rake_task 'gitlab:elastic:index'
-- 
2.30.9