Commit 97b90bff authored by Nick Thomas's avatar Nick Thomas

Merge branch 'rake-task-for-reindexing' into 'master'

Add rake task for reindexing

See merge request gitlab-org/gitlab!27772
parents 18fe0434 42447fc4
...@@ -426,6 +426,15 @@ There are several rake tasks available to you via the command line: ...@@ -426,6 +426,15 @@ There are several rake tasks available to you via the command line:
- Performs an Elasticsearch import that indexes the snippets data. - Performs an Elasticsearch import that indexes the snippets data.
- [`sudo gitlab-rake gitlab:elastic:projects_not_indexed`](https://gitlab.com/gitlab-org/gitlab/blob/master/ee/lib/tasks/gitlab/elastic.rake) - [`sudo gitlab-rake gitlab:elastic:projects_not_indexed`](https://gitlab.com/gitlab-org/gitlab/blob/master/ee/lib/tasks/gitlab/elastic.rake)
- Displays which projects are not indexed. - Displays which projects are not indexed.
- [`sudo gitlab-rake gitlab:elastic:reindex_to_another_cluster[<SOURCE_CLUSTER_URL>,<DESTINATION_CLUSTER_URL>]`](https://gitlab.com/gitlab-org/gitlab/blob/master/ee/lib/tasks/gitlab/elastic.rake)
- Creates a new index in the destination cluster and triggers a [reindex from
remote](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-reindex.html#reindex-from-remote)
such that the index is fully copied from the source index. This can be
useful when you wish to perform a migration to a new cluster as this
reindexing should be quicker than reindexing via GitLab. Note that remote
reindex requires your source cluster to be whitelisted in your destination
cluster in Elasticsearch settings as per [the
documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-reindex.html#reindex-from-remote).
### Environment Variables ### Environment Variables
......
---
title: Add rake task for reindexing Elasticsearch
merge_request: 27772
author:
type: added
...@@ -4,7 +4,7 @@ module Gitlab ...@@ -4,7 +4,7 @@ module Gitlab
module Elastic module Elastic
class Helper class Helper
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def self.create_empty_index(version = ::Elastic::MultiVersionUtil::TARGET_VERSION) def self.create_empty_index(version = ::Elastic::MultiVersionUtil::TARGET_VERSION, client = nil)
settings = {} settings = {}
mappings = {} mappings = {}
...@@ -23,7 +23,7 @@ module Gitlab ...@@ -23,7 +23,7 @@ module Gitlab
end end
proxy = Project.__elasticsearch__.version(version) proxy = Project.__elasticsearch__.version(version)
client = proxy.client client ||= proxy.client
index_name = proxy.index_name index_name = proxy.index_name
create_index_options = { create_index_options = {
...@@ -52,6 +52,36 @@ module Gitlab ...@@ -52,6 +52,36 @@ module Gitlab
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
def self.reindex_to_another_cluster(source_cluster_url, destination_cluster_url, version = ::Elastic::MultiVersionUtil::TARGET_VERSION)
proxy = Project.__elasticsearch__.version(version)
index_name = proxy.index_name
destination_client = Gitlab::Elastic::Client.build(url: destination_cluster_url)
create_empty_index(version, destination_client)
optimize_for_write_settings = { index: { number_of_replicas: 0, refresh_interval: "-1" } }
destination_client.indices.put_settings(index: index_name, body: optimize_for_write_settings)
source_addressable = Addressable::URI.parse(source_cluster_url)
response = destination_client.reindex(body: {
source: {
remote: {
host: source_addressable.omit(:user, :password).to_s,
username: source_addressable.user,
password: source_addressable.password
},
index: index_name
},
dest: {
index: index_name
}
}, wait_for_completion: false)
response['task']
end
def self.delete_index(version = ::Elastic::MultiVersionUtil::TARGET_VERSION) def self.delete_index(version = ::Elastic::MultiVersionUtil::TARGET_VERSION)
Project.__elasticsearch__.version(version).delete_index! Project.__elasticsearch__.version(version).delete_index!
end end
......
...@@ -90,6 +90,12 @@ namespace :gitlab do ...@@ -90,6 +90,12 @@ namespace :gitlab do
end end
end end
desc "GitLab | Elasticsearch | Reindex to another cluster"
task :reindex_to_another_cluster, [:source_cluster_url, :dest_cluster_url] => :environment do |_, args|
task_id = Gitlab::Elastic::Helper.reindex_to_another_cluster(args.source_cluster_url, args.dest_cluster_url)
puts "Reindexing to another cluster started with task id: #{task_id}".color(:green)
end
def project_id_batches(&blk) def project_id_batches(&blk)
relation = Project relation = Project
......
# frozen_string_literal: true # frozen_string_literal: true
require 'fast_spec_helper' require 'fast_spec_helper'
require 'webmock/rspec'
describe Gitlab::Elastic::Helper do describe Gitlab::Elastic::Helper do
describe '.index_exists' do describe '.index_exists' do
...@@ -14,4 +15,49 @@ describe Gitlab::Elastic::Helper do ...@@ -14,4 +15,49 @@ describe Gitlab::Elastic::Helper do
expect(described_class.index_exists?).to eq(false) expect(described_class.index_exists?).to eq(false)
end end
end end
describe 'reindex_to_another_cluster' do
it 'creates an empty index and triggers a reindex' do
_version_check_request = stub_request(:get, 'http://newcluster.example.com:9200/')
.to_return(status: 200, body: { version: { number: '7.5.1' } }.to_json)
_index_exists_check = stub_request(:head, 'http://newcluster.example.com:9200/gitlab-test')
.to_return(status: 404, body: +'')
create_cluster_request = stub_request(:put, 'http://newcluster.example.com:9200/gitlab-test')
.to_return(status: 200, body: +'')
optimize_settings_for_write_request = stub_request(:put, 'http://newcluster.example.com:9200/gitlab-test/_settings')
.with(body: { index: { number_of_replicas: 0, refresh_interval: "-1" } })
.to_return(status: 200, body: +'')
reindex_request = stub_request(:post, 'http://newcluster.example.com:9200/_reindex?wait_for_completion=false')
.with(
body: {
source: {
remote: {
host: 'http://oldcluster.example.com:9200/',
username: 'olduser',
password: 'oldpass'
},
index: 'gitlab-test'
},
dest: {
index: 'gitlab-test'
}
}).to_return(status: 200,
headers: { "Content-Type" => "application/json" },
body: { task: 'abc123' }.to_json)
source_url = 'http://olduser:oldpass@oldcluster.example.com:9200/'
dest_url = 'http://newcluster.example.com:9200/'
task = Gitlab::Elastic::Helper.reindex_to_another_cluster(source_url, dest_url)
expect(task).to eq('abc123')
assert_requested create_cluster_request
assert_requested optimize_settings_for_write_request
assert_requested reindex_request
end
end
end end
...@@ -72,4 +72,12 @@ describe 'gitlab:elastic namespace rake tasks', :elastic do ...@@ -72,4 +72,12 @@ describe 'gitlab:elastic namespace rake tasks', :elastic do
run_rake_task 'gitlab:elastic:index_snippets' run_rake_task 'gitlab:elastic:index_snippets'
end end
end end
describe 'reindex_to_another_cluster' do
it 'calls reindex_to_another_cluster' do
expect(Gitlab::Elastic::Helper).to receive(:reindex_to_another_cluster).with('http://oldcluster.example.com:9300/', 'http://newcluster.example.com:9300/')
run_rake_task 'gitlab:elastic:reindex_to_another_cluster', 'http://oldcluster.example.com:9300/', 'http://newcluster.example.com:9300/'
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment