Commit 42447fc4 authored by Dylan Griffith's avatar Dylan Griffith

Add rake task for reindexing Elasticsearch

This task can be used by an administrator as a first step in a zero
downtime re-indexing process.
parent fefbb917
......@@ -426,6 +426,15 @@ There are several rake tasks available to you via the command line:
- Performs an Elasticsearch import that indexes the snippets data.
- [`sudo gitlab-rake gitlab:elastic:projects_not_indexed`](https://gitlab.com/gitlab-org/gitlab/blob/master/ee/lib/tasks/gitlab/elastic.rake)
- Displays which projects are not indexed.
- [`sudo gitlab-rake gitlab:elastic:reindex_to_another_cluster[<SOURCE_CLUSTER_URL>,<DESTINATION_CLUSTER_URL>]`](https://gitlab.com/gitlab-org/gitlab/blob/master/ee/lib/tasks/gitlab/elastic.rake)
- Creates a new index in the destination cluster and triggers a [reindex from
remote](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-reindex.html#reindex-from-remote)
such that the index is fully copied from the source index. This can be
useful when you wish to perform a migration to a new cluster as this
reindexing should be quicker than reindexing via GitLab. Note that remote
reindex requires your source cluster to be whitelisted in your destination
cluster in Elasticsearch settings as per [the
documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-reindex.html#reindex-from-remote).
### Environment Variables
......
---
title: Add rake task for reindexing Elasticsearch
merge_request: 27772
author:
type: added
......@@ -4,7 +4,7 @@ module Gitlab
module Elastic
class Helper
# rubocop: disable CodeReuse/ActiveRecord
def self.create_empty_index(version = ::Elastic::MultiVersionUtil::TARGET_VERSION)
def self.create_empty_index(version = ::Elastic::MultiVersionUtil::TARGET_VERSION, client = nil)
settings = {}
mappings = {}
......@@ -23,7 +23,7 @@ module Gitlab
end
proxy = Project.__elasticsearch__.version(version)
client = proxy.client
client ||= proxy.client
index_name = proxy.index_name
create_index_options = {
......@@ -52,6 +52,36 @@ module Gitlab
end
# rubocop: enable CodeReuse/ActiveRecord
def self.reindex_to_another_cluster(source_cluster_url, destination_cluster_url, version = ::Elastic::MultiVersionUtil::TARGET_VERSION)
proxy = Project.__elasticsearch__.version(version)
index_name = proxy.index_name
destination_client = Gitlab::Elastic::Client.build(url: destination_cluster_url)
create_empty_index(version, destination_client)
optimize_for_write_settings = { index: { number_of_replicas: 0, refresh_interval: "-1" } }
destination_client.indices.put_settings(index: index_name, body: optimize_for_write_settings)
source_addressable = Addressable::URI.parse(source_cluster_url)
response = destination_client.reindex(body: {
source: {
remote: {
host: source_addressable.omit(:user, :password).to_s,
username: source_addressable.user,
password: source_addressable.password
},
index: index_name
},
dest: {
index: index_name
}
}, wait_for_completion: false)
response['task']
end
def self.delete_index(version = ::Elastic::MultiVersionUtil::TARGET_VERSION)
Project.__elasticsearch__.version(version).delete_index!
end
......
......@@ -90,6 +90,12 @@ namespace :gitlab do
end
end
desc "GitLab | Elasticsearch | Reindex to another cluster"
task :reindex_to_another_cluster, [:source_cluster_url, :dest_cluster_url] => :environment do |_, args|
task_id = Gitlab::Elastic::Helper.reindex_to_another_cluster(args.source_cluster_url, args.dest_cluster_url)
puts "Reindexing to another cluster started with task id: #{task_id}".color(:green)
end
def project_id_batches(&blk)
relation = Project
......
# frozen_string_literal: true
require 'fast_spec_helper'
require 'webmock/rspec'
describe Gitlab::Elastic::Helper do
describe '.index_exists' do
......@@ -14,4 +15,49 @@ describe Gitlab::Elastic::Helper do
expect(described_class.index_exists?).to eq(false)
end
end
describe 'reindex_to_another_cluster' do
it 'creates an empty index and triggers a reindex' do
_version_check_request = stub_request(:get, 'http://newcluster.example.com:9200/')
.to_return(status: 200, body: { version: { number: '7.5.1' } }.to_json)
_index_exists_check = stub_request(:head, 'http://newcluster.example.com:9200/gitlab-test')
.to_return(status: 404, body: +'')
create_cluster_request = stub_request(:put, 'http://newcluster.example.com:9200/gitlab-test')
.to_return(status: 200, body: +'')
optimize_settings_for_write_request = stub_request(:put, 'http://newcluster.example.com:9200/gitlab-test/_settings')
.with(body: { index: { number_of_replicas: 0, refresh_interval: "-1" } })
.to_return(status: 200, body: +'')
reindex_request = stub_request(:post, 'http://newcluster.example.com:9200/_reindex?wait_for_completion=false')
.with(
body: {
source: {
remote: {
host: 'http://oldcluster.example.com:9200/',
username: 'olduser',
password: 'oldpass'
},
index: 'gitlab-test'
},
dest: {
index: 'gitlab-test'
}
}).to_return(status: 200,
headers: { "Content-Type" => "application/json" },
body: { task: 'abc123' }.to_json)
source_url = 'http://olduser:oldpass@oldcluster.example.com:9200/'
dest_url = 'http://newcluster.example.com:9200/'
task = Gitlab::Elastic::Helper.reindex_to_another_cluster(source_url, dest_url)
expect(task).to eq('abc123')
assert_requested create_cluster_request
assert_requested optimize_settings_for_write_request
assert_requested reindex_request
end
end
end
......@@ -72,4 +72,12 @@ describe 'gitlab:elastic namespace rake tasks', :elastic do
run_rake_task 'gitlab:elastic:index_snippets'
end
end
describe 'reindex_to_another_cluster' do
it 'calls reindex_to_another_cluster' do
expect(Gitlab::Elastic::Helper).to receive(:reindex_to_another_cluster).with('http://oldcluster.example.com:9300/', 'http://newcluster.example.com:9300/')
run_rake_task 'gitlab:elastic:reindex_to_another_cluster', 'http://oldcluster.example.com:9300/', 'http://newcluster.example.com:9300/'
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment