Commit a0db23e6 authored by Valery Sizov's avatar Valery Sizov

Merge branch 'es_subprocess' into 'master'

ES subprocess

Fixes https://gitlab.com/gitlab-org/gitlab-ee/issues/296

See merge request !232
parents f33db5d2 64fcc163
......@@ -6,6 +6,7 @@ v 8.6.0 (unreleased)
- [Elastic] Added UPDATE_INDEX option to rake task
- [Elastic] Removing repository and wiki index after removing project
- [Elastic] Update index on push to wiki
- [Elastic] Use subprocesses for ElasticSearch index jobs
v 8.5.2
- Update LDAP groups asynchronously
......
......@@ -64,8 +64,13 @@ class GitPushService < BaseService
end
def index_commits_blobs
@project.repository.index_commits(from_rev: params[:oldrev], to_rev: params[:newrev])
@project.repository.index_blobs(from_rev: params[:oldrev], to_rev: params[:newrev])
indexer = Elastic::Indexer.new
indexer.run(
@project.id,
project.repository.path_to_repo,
params[:oldrev],
params[:newrev]
)
end
def process_default_branch
......
#!/usr/bin/env ruby
require 'rubygems'
require 'bundler/setup'
require 'json'
require 'elasticsearch/git'
require 'active_support'
require 'active_support/core_ext'
PROJECT_ID = ARGV.shift
REPO_PATH = ARGV.shift
FROM_SHA = ENV['FROM_SHA']
TO_SHA = ENV['TO_SHA']
RAILS_ENV = ENV['RAILS_ENV']
elastic_connection_info = JSON.parse ENV['ELASTIC_CONNECTION_INFO']
ELASTIC_HOST = elastic_connection_info[:host]
ELASTIC_PORT = elastic_connection_info[:port]
class Repository
include Elasticsearch::Git::Repository
index_name ['repository', 'index', RAILS_ENV].compact.join('-')
self.__elasticsearch__.client = Elasticsearch::Client.new(
host: ELASTIC_HOST,
port: ELASTIC_PORT
)
def repository_id
PROJECT_ID
end
def path_to_repo
REPO_PATH
end
end
Repository.__elasticsearch__.create_index!
repo = Repository.new
params = { from_rev: FROM_SHA, to_rev: TO_SHA }.compact
print "Indexing commits..."
repo.index_commits(params)
puts "Done"
print "Indexing blobs..."
repo.index_blobs(params)
puts "Done"
\ No newline at end of file
module Elastic
class Indexer
Error = Class.new(StandardError)
def initialize
connection_info = {
host: Gitlab.config.elasticsearch.host,
port: Gitlab.config.elasticsearch.port
}.to_json
# We accept any form of settings, including string and array
# This is why JSON is needed
@vars = {
'ELASTIC_CONNECTION_INFO' => connection_info,
'RAILS_ENV' => Rails.env
}
end
def run(project_id, repo_path, from_sha = nil, to_sha = nil)
vars = @vars.merge({ 'FROM_SHA' => from_sha, 'TO_SHA' => to_sha })
command = ['bin/elastic_repo_indexer', project_id.to_s, repo_path]
output, status = Gitlab::Popen.popen(command, nil, vars)
raise Error, output.join("\n") unless status.zero?
true
end
end
end
......@@ -5,13 +5,13 @@ module Gitlab
module Popen
extend self
def popen(cmd, path=nil)
def popen(cmd, path = nil, vars = {})
unless cmd.is_a?(Array)
raise "System commands must be given as an array of strings"
end
path ||= Dir.pwd
vars = { "PWD" => path }
vars['PWD'] = path
options = { chdir: path }
unless File.directory?(path)
......@@ -20,6 +20,7 @@ module Gitlab
@cmd_output = ""
@cmd_status = 0
Open3.popen3(vars, *cmd, options) do |stdin, stdout, stderr, wait_thr|
# We are not using stdin so we should close it, in case the command we
# are running waits for input.
......
......@@ -14,6 +14,8 @@ namespace :gitlab do
projects = apply_project_filters(projects)
indexer = Elastic::Indexer.new
projects.find_each do |project|
if project.repository.exists? && !project.repository.empty?
puts "Indexing #{project.name_with_namespace} (ID=#{project.id})..."
......@@ -28,8 +30,11 @@ namespace :gitlab do
next
end
project.repository.index_commits(from_rev: project.index_status.last_commit)
project.repository.index_blobs(from_rev: project.index_status.last_commit)
indexer.run(
project.id,
project.repository.path_to_repo,
project.index_status.last_commit
)
# During indexing the new commits can be pushed,
# the last_commit parameter only indicates that at least this commit is in index
......
require 'spec_helper'
describe "Indexer" do
it "runs commands" do
expect(Gitlab::Popen).to receive(:popen).with(
array_including('bin/elastic_repo_indexer', '1', 'full_repo_path'),
nil,
hash_including(
'ELASTIC_CONNECTION_INFO' => {
host: Gitlab.config.elasticsearch.host,
port: Gitlab.config.elasticsearch.port
}.to_json,
'RAILS_ENV' => Rails.env,
'FROM_SHA' => '000000',
'TO_SHA' => '1d1f2d'
)
).and_return([[''], 0])
Elastic::Indexer.new.run(1, 'full_repo_path', '000000', '1d1f2d')
end
end
......@@ -134,6 +134,22 @@ describe GitPushService, services: true do
end
end
describe "ES indexing" do
before do
allow(Gitlab.config.elasticsearch).to receive(:enabled).and_return(true)
end
after do
allow(Gitlab.config.elasticsearch).to receive(:enabled).and_return(false)
end
it "triggers indexer" do
expect_any_instance_of(Elastic::Indexer).to receive(:run)
execute_service(project, user, @oldrev, @newrev, @ref )
end
end
describe "Push Event" do
before do
service = execute_service(project, user, @oldrev, @newrev, @ref )
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment