Commit e360b930 authored by Dmitriy Zaporozhets's avatar Dmitriy Zaporozhets

Merge branch 'elasticsearch' into 'master'

Elasticsearch integration

https://gitlab.com/gitlab-org/gitlab-ee/issues/61

Steps:

- [x] Base integration
- [x] Implement snippet search (service lawyer and actual search)
- [x] Implement project search (service lawyer and actual search)
- [x] Implement global search (service lawyer and actual search)
- [x] Implement search through the notes
- [x] Implement search through the milestones
- [x] Implement search though the wiki blobes
- [x] Implement search though the commits and code
- [x] Specs and Spinach (fix)
- [ ] Specs and Spinach (new ones)
- [x] Take care of omnibus support for elasticsearch 
- [x] Documentation
- [ ] Add elasticsearch check to GitLab check
- [x] Update and create index means

See merge request !109
parents 97700125 558ddfbb
......@@ -91,6 +91,11 @@ gem "six", '~> 0.2.0'
# Seed data
gem "seed-fu", '~> 2.3.5'
# Search
gem 'elasticsearch-model'
gem 'elasticsearch-rails'
gem 'gitlab-elasticsearch-git', require: "elasticsearch/git"
# Markdown and HTML processing
gem 'html-pipeline', '~> 1.11.0'
gem 'task_list', '~> 1.0.2', require: 'task_list/railtie'
......
......@@ -181,6 +181,19 @@ GEM
railties (>= 3.2)
dropzonejs-rails (0.7.2)
rails (> 3.1)
elasticsearch (1.0.15)
elasticsearch-api (= 1.0.15)
elasticsearch-transport (= 1.0.15)
elasticsearch-api (1.0.15)
multi_json
elasticsearch-model (0.1.8)
activesupport (> 3)
elasticsearch (> 0.4)
hashie
elasticsearch-rails (0.1.8)
elasticsearch-transport (1.0.15)
faraday
multi_json
email_reply_parser (0.5.8)
email_spec (1.6.0)
launchy (~> 2.1)
......@@ -345,6 +358,14 @@ GEM
mime-types (>= 1.19)
rugged (>= 0.23.0b)
github-markup (1.3.3)
gitlab-elasticsearch-git (0.0.7)
activemodel (~> 4.2.0)
activesupport (~> 4.2.0)
charlock_holmes (~> 0.7.3)
elasticsearch-api (~> 1.0.15)
elasticsearch-model
github-linguist (~> 4.7.0)
rugged (~> 0.23.3)
gitlab-flowdock-git-hook (1.0.1)
flowdock (~> 0.7)
gitlab-grit (>= 2.4.1)
......@@ -920,6 +941,8 @@ DEPENDENCIES
diffy (~> 3.0.3)
doorkeeper (~> 2.2.0)
dropzonejs-rails (~> 0.7.1)
elasticsearch-model
elasticsearch-rails
email_reply_parser (~> 0.5.8)
email_spec (~> 1.6.0)
factory_girl_rails (~> 4.3.0)
......@@ -933,6 +956,7 @@ DEPENDENCIES
gemnasium-gitlab-service (~> 0.2)
github-linguist (~> 4.7.0)
github-markup (~> 1.3.1)
gitlab-elasticsearch-git
gitlab-flowdock-git-hook (~> 1.0.1)
gitlab-license (~> 0.0.4)
gitlab_emoji (~> 0.2.0)
......
module ApplicationSearch
extend ActiveSupport::Concern
included do
include Elasticsearch::Model
self.__elasticsearch__.client = Elasticsearch::Client.new(
host: Gitlab.config.elasticsearch.host,
port: Gitlab.config.elasticsearch.port
)
index_name [Rails.application.class.parent_name.downcase, self.name.downcase, Rails.env].join('-')
settings \
index: {
analysis: {
analyzer: {
default:{
tokenizer: "standard",
filter: ["standard", "lowercase", "my_stemmer"]
}
},
filter: {
my_stemmer: {
type: "stemmer",
name: "light_english"
}
}
}
}
if Gitlab.config.elasticsearch.enabled
after_commit on: :create do
ElasticIndexerWorker.perform_async(:index, self.class.to_s, self.id)
end
after_commit on: :update do
ElasticIndexerWorker.perform_async(:update, self.class.to_s, self.id)
end
after_commit on: :destroy do
ElasticIndexerWorker.perform_async(:delete, self.class.to_s, self.id)
end
end
end
module ClassMethods
def highlight_options(fields)
es_fields = fields.map { |field| field.split('^').first }.inject({}) do |memo, field|
memo[field.to_sym] = {}
memo
end
{ fields: es_fields }
end
def basic_query_hash(fields, query)
query_hash = if query.present?
{
query: {
filtered: {
query: {
multi_match: {
fields: fields,
query: query,
operator: :and
}
},
},
}
}
else
{
query: {
filtered: {
query: { match_all: {} }
}
},
track_scores: true
}
end
query_hash[:sort] = [
{ updated_at_sort: { order: :desc, mode: :min } },
:_score
]
query_hash[:highlight] = highlight_options(fields)
query_hash
end
def project_ids_filter(query_hash, project_ids)
if project_ids
query_hash[:query][:filtered][:filter] = {
and: [ { terms: { project_id: project_ids } } ]
}
end
query_hash
end
end
end
module IssuesSearch
extend ActiveSupport::Concern
included do
include ApplicationSearch
mappings do
indexes :id, type: :integer
indexes :iid, type: :integer, index: :not_analyzed
indexes :title, type: :string,
index_options: 'offsets'
indexes :description, type: :string,
index_options: 'offsets'
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :state, type: :string
indexes :project_id, type: :integer
indexes :author_id, type: :integer
indexes :project, type: :nested
indexes :author, type: :nested
indexes :updated_at_sort, type: :date, index: :not_analyzed
end
def as_indexed_json(options = {})
as_json(
include: {
project: { only: :id },
author: { only: :id }
}
).merge({ updated_at_sort: updated_at })
end
def self.elastic_search(query, options: {})
options[:in] = %w(title^2 description)
query_hash = basic_query_hash(options[:in], query)
query_hash = project_ids_filter(query_hash, options[:projects_ids])
self.__elasticsearch__.search(query_hash)
end
end
end
module MergeRequestsSearch
extend ActiveSupport::Concern
included do
include ApplicationSearch
mappings do
indexes :id, type: :integer
indexes :iid, type: :integer
indexes :target_branch, type: :string,
index_options: 'offsets'
indexes :source_branch, type: :string,
index_options: 'offsets'
indexes :title, type: :string,
index_options: 'offsets'
indexes :description, type: :string,
index_options: 'offsets'
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :state, type: :string
indexes :merge_status, type: :string
indexes :source_project_id, type: :integer
indexes :target_project_id, type: :integer
indexes :author_id, type: :integer
indexes :source_project, type: :nested
indexes :target_project, type: :nested
indexes :author, type: :nested
indexes :updated_at_sort, type: :string, index: 'not_analyzed'
end
def as_indexed_json(options = {})
as_json(
include: {
source_project: { only: :id },
target_project: { only: :id },
author: { only: :id }
}
).merge({ updated_at_sort: updated_at })
end
def self.elastic_search(query, options: {})
query_hash = basic_query_hash(%w(title^2 description), query)
if options[:projects_ids]
query_hash[:query][:filtered][:filter] = {
and: [
{
terms: {
target_project_id: [options[:projects_ids]].flatten
}
}
]
}
end
self.__elasticsearch__.search(query_hash)
end
end
end
module MilestonesSearch
extend ActiveSupport::Concern
included do
include ApplicationSearch
mappings do
indexes :id, type: :integer
indexes :title, type: :string,
index_options: 'offsets'
indexes :description, type: :string,
index_options: 'offsets'
indexes :project_id, type: :integer
indexes :created_at, type: :date
indexes :updated_at_sort, type: :string, index: 'not_analyzed'
end
def as_indexed_json(options = {})
as_json.merge({ updated_at_sort: updated_at })
end
def self.elastic_search(query, options: {})
options[:in] = %w(title^2 description)
query_hash = basic_query_hash(options[:in], query)
query_hash = project_ids_filter(query_hash, options[:projects_ids])
self.__elasticsearch__.search(query_hash)
end
end
end
module NotesSearch
extend ActiveSupport::Concern
included do
include ApplicationSearch
mappings do
indexes :id, type: :integer
indexes :note, type: :string,
index_options: 'offsets'
indexes :project_id, type: :integer
indexes :created_at, type: :date
indexes :updated_at_sort, type: :string, index: 'not_analyzed'
end
def as_indexed_json(options = {})
as_json.merge({ updated_at_sort: updated_at })
end
def self.elastic_search(query, options: {})
options[:in] = ["note"]
query_hash = {
query: {
filtered: {
query: { match: { note: query } },
},
}
}
if query.blank?
query_hash[:query][:filtered][:query] = { match_all: {} }
query_hash[:track_scores] = true
end
query_hash = project_ids_filter(query_hash, options[:projects_ids])
query_hash[:sort] = [
{ updated_at_sort: { order: :desc, mode: :min } },
:_score
]
query_hash[:highlight] = highlight_options(options[:in])
self.__elasticsearch__.search(query_hash)
end
end
end
module ProjectsSearch
extend ActiveSupport::Concern
included do
include ApplicationSearch
mappings do
indexes :id, type: :integer
indexes :name, type: :string,
index_options: 'offsets'
indexes :path, type: :string,
index_options: 'offsets'
indexes :name_with_namespace, type: :string,
index_options: 'offsets'
indexes :path_with_namespace, type: :string,
index_options: 'offsets'
indexes :description, type: :string,
index_options: 'offsets'
indexes :namespace_id, type: :integer
indexes :created_at, type: :date
indexes :archived, type: :boolean
indexes :visibility_level, type: :integer
indexes :last_activity_at, type: :date
indexes :last_pushed_at, type: :date
end
def as_indexed_json(options = {})
as_json.merge({
name_with_namespace: name_with_namespace,
path_with_namespace: path_with_namespace
})
end
def self.elastic_search(query, options: {})
options[:in] = %w(name^10 name_with_namespace^2 path_with_namespace path^9)
query_hash = basic_query_hash(options[:in], query)
filters = []
if options[:abandoned]
filters << {
range: {
last_pushed_at: {
lte: "now-6M/m"
}
}
}
end
if options[:with_push]
filters << {
not: {
missing: {
field: :last_pushed_at,
existence: true,
null_value: true
}
}
}
end
if options[:namespace_id]
filters << {
terms: {
namespace_id: [options[:namespace_id]].flatten
}
}
end
if options[:non_archived]
filters << {
terms: {
archived: [!options[:non_archived]].flatten
}
}
end
if options[:visibility_levels]
filters << {
terms: {
visibility_level: [options[:visibility_levels]].flatten
}
}
end
if !options[:owner_id].blank?
filters << {
nested: {
path: :owner,
filter: {
term: { "owner.id" => options[:owner_id] }
}
}
}
end
if options[:pids]
filters << {
ids: {
values: options[:pids]
}
}
end
query_hash[:query][:filtered][:filter] = { and: filters }
query_hash[:sort] = [:_score]
self.__elasticsearch__.search(query_hash)
end
end
end
module RepositoriesSearch
extend ActiveSupport::Concern
included do
include Elasticsearch::Git::Repository
self.__elasticsearch__.client = Elasticsearch::Client.new(
host: Gitlab.config.elasticsearch.host,
port: Gitlab.config.elasticsearch.port
)
def repository_id
project.id
end
def self.repositories_count
Project.count
end
def client_for_indexing
self.__elasticsearch__.client
end
def self.import
Repository.__elasticsearch__.create_index! force: true
Project.find_each do |project|
if project.repository.exists? && !project.repository.empty?
project.repository.index_commits
project.repository.index_blobs
end
end
end
end
end
module SnippetsSearch
extend ActiveSupport::Concern
included do
include ApplicationSearch
mappings do
indexes :id, type: :integer
indexes :title, type: :string,
index_options: 'offsets'
indexes :file_name, type: :string,
index_options: 'offsets'
indexes :content, type: :string,
index_options: 'offsets'
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :state, type: :string
indexes :project_id, type: :integer
indexes :author_id, type: :integer
indexes :project, type: :nested
indexes :author, type: :nested
indexes :updated_at_sort, type: :date, index: :not_analyzed
end
def as_indexed_json(options = {})
as_json(
include: {
project: { only: :id },
author: { only: :id }
}
)
end
def self.elastic_search(query, options: {})
query_hash = basic_query_hash(%w(title file_name), query)
query_hash = limit_ids(query_hash, options[:ids])
self.__elasticsearch__.search(query_hash)
end
def self.elastic_search_code(query, options: {})
query_hash = {
query: {
filtered: {
query: { match: { content: query } },
},
}
}
query_hash = limit_ids(query_hash, options[:ids])
query_hash[:sort] = [
{ updated_at_sort: { order: :desc, mode: :min } },
:_score
]
query_hash[:highlight] = { fields: { content: {} } }
self.__elasticsearch__.search(query_hash)
end
def self.limit_ids(query_hash, ids)
if ids
query_hash[:query][:filtered][:filter] = {
and: [ { terms: { id: ids } } ]
}
end
query_hash
end
end
end
module WikiRepositoriesSearch
extend ActiveSupport::Concern
included do
include Elasticsearch::Git::Repository
self.__elasticsearch__.client = Elasticsearch::Client.new(
host: Gitlab.config.elasticsearch.host,
port: Gitlab.config.elasticsearch.port
)
def repository_id
"wiki_#{project.id}"
end
def self.repositories_count
Project.where(wiki_enabled: true).count
end
def client_for_indexing
self.__elasticsearch__.client
end
def self.import
ProjectWiki.__elasticsearch__.create_index! force: true
Project.where(wiki_enabled: true).find_each do |project|
unless project.wiki.empty?
project.wiki.index_blobs
end
end
end
end
end
......@@ -8,11 +8,11 @@ module ApplicationSettingsHelper
end
def signup_enabled?
current_application_settings.signup_enabled?
current_application_settings.signup_enabled
end
def signin_enabled?
current_application_settings.signin_enabled?
current_application_settings.signin_enabled
end
def extra_sign_in_text
......
......@@ -17,4 +17,79 @@ module SnippetsHelper
snippet_path(snippet)
end
end
# Get an array of line numbers surrounding a matching
# line, bounded by min/max.
#
# @returns Array of line numbers
def bounded_line_numbers(line, min, max, surrounding_lines)
lower = line - surrounding_lines > min ? line - surrounding_lines : min
upper = line + surrounding_lines < max ? line + surrounding_lines : max
(lower..upper).to_a
end
# Returns a sorted set of lines to be included in a snippet preview.
# This ensures matching adjacent lines do not display duplicated
# surrounding code.
#
# @returns Array, unique and sorted.
def matching_lines(lined_content, surrounding_lines)
used_lines = []
lined_content.each_with_index do |line, line_number|
used_lines.concat bounded_line_numbers(
line_number,
0,
lined_content.size,
surrounding_lines
) if line.include?(query)
end
used_lines.uniq.sort
end
# 'Chunkify' entire snippet. Splits the snippet data into matching lines +
# surrounding_lines() worth of unmatching lines.
#
# @returns a hash with {snippet_object, snippet_chunks:{data,start_line}}
def chunk_snippet(snippet, surrounding_lines = 3)
lined_content = snippet.content.split("\n")
used_lines = matching_lines(lined_content, surrounding_lines)
snippet_chunk = []
snippet_chunks = []
snippet_start_line = 0
last_line = -1
# Go through each used line, and add consecutive lines as a single chunk
# to the snippet chunk array.
used_lines.each do |line_number|
if last_line < 0
# Start a new chunk.
snippet_start_line = line_number
snippet_chunk << lined_content[line_number]
elsif last_line == line_number - 1
# Consecutive line, continue chunk.
snippet_chunk << lined_content[line_number]
else
# Non-consecutive line, add chunk to chunk array.
snippet_chunks << {
data: snippet_chunk.join("\n"),
start_line: snippet_start_line + 1
}
# Start a new chunk.
snippet_chunk = [lined_content[line_number]]
snippet_start_line = line_number
end
last_line = line_number
end
# Add final chunk to chunk array
snippet_chunks << {
data: snippet_chunk.join("\n"),
start_line: snippet_start_line + 1
}
# Return snippet with chunk array
{ snippet_object: snippet, snippet_chunks: snippet_chunks }
end
end
......@@ -27,6 +27,8 @@ class Issue < ActiveRecord::Base
include Referable
include Sortable
include Taskable
include IssuesSearch
WEIGHT_RANGE = 1..9
ActsAsTaggableOn.strict_case_match = true
......@@ -39,6 +41,7 @@ class Issue < ActiveRecord::Base
scope :cared, ->(user) { where(assignee_id: user) }
scope :open_for, ->(user) { opened.assigned_to(user) }
scope :in_projects, ->(project_ids) { where(project_id: project_ids) }
state_machine :state, initial: :opened do
event :close do
......
......@@ -35,6 +35,7 @@ class MergeRequest < ActiveRecord::Base
include Referable
include Sortable
include Taskable
include MergeRequestsSearch
belongs_to :target_project, foreign_key: :target_project_id, class_name: "Project"
belongs_to :source_project, foreign_key: :source_project_id, class_name: "Project"
......
......@@ -24,6 +24,7 @@ class Milestone < ActiveRecord::Base
include Sortable
include Referable
include StripAttribute
include MilestonesSearch
belongs_to :project
has_many :issues
......
......@@ -26,6 +26,7 @@ class Note < ActiveRecord::Base
include Gitlab::CurrentSettings
include Participable
include Mentionable
include NotesSearch
default_value_for :system, false
......
......@@ -51,6 +51,7 @@ class Project < ActiveRecord::Base
include AfterCommitQueue
include CaseSensitivity
include TokenAuthenticatable
include ProjectsSearch
extend Gitlab::ConfigHelper
......@@ -259,6 +260,11 @@ class Project < ActiveRecord::Base
project.mirror_last_successful_update_at = timestamp
project.save
end
if Gitlab.config.elasticsearch.enabled
project.repository.index_blobs
project.repository.index_commits
end
end
after_transition started: :failed do |project, transaction|
......@@ -942,6 +948,10 @@ class Project < ActiveRecord::Base
false
end
def wiki
@wiki ||= ProjectWiki.new(self, self.owner)
end
def reference_issue_tracker?
default_issues_tracker? || jira_tracker_active?
end
......
class ProjectWiki
include Gitlab::ShellAdapter
include WikiRepositoriesSearch
MARKUPS = {
'Markdown' => :md,
......@@ -12,6 +13,8 @@ class ProjectWiki
# Returns a string describing what went wrong after
# an operation fails.
attr_reader :error_message
attr_reader :project
def initialize(project, user = nil)
@project = project
......
require 'securerandom'
class Repository
include RepositoriesSearch
class CommitError < StandardError; end
MIRROR_REMOTE = "upstream"
......@@ -106,6 +108,12 @@ class Repository
commits
end
def find_commits_by_message_with_elastic(query)
project.repository.search(query, type: :commit)[:commits][:results].map do |result|
commit result["_source"]["commit"]["sha"]
end
end
def find_branch(name)
raw_repository.branches.find { |branch| branch.name == name }
end
......@@ -659,6 +667,52 @@ class Repository
end
def parse_search_result(result)
if result.is_a?(String)
parse_search_result_from_grep(result)
else
parse_search_result_from_elastic(result)
end
end
def parse_search_result_from_elastic(result)
ref = result["_source"]["blob"]["oid"]
filename = result["_source"]["blob"]["path"]
content = result["_source"]["blob"]["content"]
total_lines = content.lines.size
term = result["highlight"]["blob.content"][0].match(/gitlabelasticsearch→(.*?)←gitlabelasticsearch/)[1]
found_line_number = 0
content.each_line.each_with_index do |line, index|
if line.include?(term)
found_line_number = index
break
end
end
from = if found_line_number >= 2
found_line_number - 2
else
found_line_number
end
to = if (total_lines - found_line_number) > 3
found_line_number + 2
else
found_line_number
end
data = content.lines[from..to]
OpenStruct.new(
filename: filename,
ref: ref,
startline: from + 1,
data: data.join
)
end
def parse_search_result_from_grep(result)
ref = nil
filename = nil
startline = 0
......
......@@ -21,6 +21,7 @@ class Snippet < ActiveRecord::Base
include Participable
include Referable
include Sortable
include SnippetsSearch
default_value_for :visibility_level, Snippet::PRIVATE
......
......@@ -61,6 +61,12 @@ class GitPushService
EventCreateService.new.push(project, user, @push_data)
project.execute_hooks(@push_data.dup, :push_hooks)
project.execute_services(@push_data.dup, :push_hooks)
if Gitlab.config.elasticsearch.enabled
project.repository.index_commits
project.repository.index_blobs
end
CreateCommitBuildsService.new.execute(project, @user, @push_data)
ProjectCacheWorker.perform_async(project.id)
end
......
......@@ -12,7 +12,11 @@ module Search
projects = projects.in_namespace(group.id) if group
project_ids = projects.pluck(:id)
Gitlab::SearchResults.new(project_ids, params[:search])
if Gitlab.config.elasticsearch.enabled
Gitlab::Elastic::SearchResults.new(project_ids, params[:search])
else
Gitlab::SearchResults.new(project_ids, params[:search])
end
end
end
end
......@@ -7,9 +7,15 @@ module Search
end
def execute
Gitlab::ProjectSearchResults.new(project.id,
params[:search],
params[:repository_ref])
if Gitlab.config.elasticsearch.enabled
Gitlab::Elastic::ProjectSearchResults.new(project.id,
params[:search],
params[:repository_ref])
else
Gitlab::ProjectSearchResults.new(project.id,
params[:search],
params[:repository_ref])
end
end
end
end
......@@ -8,7 +8,12 @@ module Search
def execute
snippet_ids = Snippet.accessible_to(current_user).pluck(:id)
Gitlab::SnippetSearchResults.new(snippet_ids, params[:search])
if Gitlab.config.elasticsearch.enabled
Gitlab::Elastic::SnippetSearchResults.new(snippet_ids, params[:search])
else
Gitlab::SnippetSearchResults.new(snippet_ids, params[:search])
end
end
end
end
class ElasticIndexerWorker
include Sidekiq::Worker
sidekiq_options queue: :elasticsearch
Client = Elasticsearch::Client.new(host: Gitlab.config.elasticsearch.host,
port: Gitlab.config.elasticsearch.port)
def perform(operation, klass, record_id, options = {})
klass = "Snippet" if klass =~ /Snippet$/
cklass = klass.constantize
case operation.to_s
when /index|update/
record = cklass.find(record_id)
record.__elasticsearch__.client = Client
record.__elasticsearch__.__send__ "#{operation}_document"
when /delete/
Client.delete index: cklass.index_name, type: cklass.document_type, id: record_id
end
end
end
......@@ -37,7 +37,7 @@ start_no_deamonize()
start_sidekiq()
{
bundle exec sidekiq -q post_receive -q mailers -q archive_repo -q system_hook -q project_web_hook -q gitlab_shell -q incoming_email -q runner -q common -q pages -q default -e $RAILS_ENV -P $sidekiq_pidfile "$@"
bundle exec sidekiq -q post_receive -q mailers -q archive_repo -q system_hook -q project_web_hook -q gitlab_shell -q incoming_email -q runner -q common -q pages -q default -q elasticsearch -e $RAILS_ENV -P $sidekiq_pidfile "$@"
}
load_ok()
......
......@@ -16,7 +16,8 @@ module Gitlab
#{config.root}/app/models/hooks
#{config.root}/app/models/concerns
#{config.root}/app/models/project_services
#{config.root}/app/models/members))
#{config.root}/app/models/members
#{config.root}/app/elastic))
# Only load the plugins named here, in the order given (default is alphabetical).
# :all can be used as a placeholder for all plugins not explicitly named.
......
......@@ -136,6 +136,14 @@ production: &base
# The location where LFS objects are stored (default: shared/lfs-objects).
# storage_path: shared/lfs-objects
## Elasticsearch (EE only)
# Enable it if you are going to use elasticsearch instead of
# regular database search
elasticsearch:
enabled: false
host: localhost
port: 9200
## GitLab Pages
pages:
enabled: false
......
......@@ -246,6 +246,12 @@ Settings.gitlab['restricted_signup_domains'] ||= []
Settings.gitlab['import_sources'] ||= ['github','bitbucket','gitlab','gitorious','google_code','fogbugz','git']
#
# Elasticseacrh
#
Settings['elasticsearch'] ||= Settingslogic.new({})
Settings.elasticsearch['enabled'] = false if Settings.elasticsearch['enabled'].nil?
#
# CI
#
......
......@@ -77,6 +77,7 @@
- [Downgrade back to CE](downgrade_ee_to_ce/README.md) Follow this guide if you need to downgrade from EE to CE.
- [Git LFS configuration](workflow/lfs/lfs_administration.md)
- [GitLab Pages configuration](pages/administration.md)
- [Elasticsearch (EE-only)](integration/elasticsearch.md) Enable Elasticsearch
## Contributor documentation
......
# Elasticsearch integration
_**Note:** This feature was [introduced][ee-109] in GitLab EE 8.4._
---
[Elasticsearch] is a flexible, scalable and powerful search service.
If you want to keep GitLab's search fast when dealing with huge amount of data,
you should consider [enabling Elasticsearch](#enable-elasticsearch).
GitLab leverages the search capabilities of Elasticsearch and enables it when
searching in:
- GitLab application
- issues
- merge requests
- milestones
- notes
- projects
- repositories
- snippets
- wiki repositories
Once the data is added to the database, search indexes will be updated
automatically. Elasticsearch can be installed on the same machine that GitLab
is installed or on a separate server.
## Install Elasticsearch
Providing detailed information on installing Elasticsearch is out of the scope
of this document.
You can follow the steps as described in the [official web site][install] or
use the packages that are available for your OS.
## Enable Elasticsearch
In order to enable Elasticsearch you need to have access to the server that
GitLab is hosted on.
The following three parameters are needed to enable Elasticsearch:
| Parameter | Description |
| --------- | ----------- |
| `enabled` | Enables/disables the Elasticsearch integration. Can be either `true` or `false` |
| `host` | The host where Elasticsearch is installed on. Can be either an IP or a domain name which correctly resolves to an IP. It can be changed in the [Elasticsearch configuration settings][elastic-settings]. The default value is `localhost` |
| `port` | The TCP port that Elasticsearch listens to. It can be changed in the [Elasticsearch configuration settings][elastic-settings]. The default value is `9200` |
### Enable Elasticsearch in Omnibus installations
If you have used one of the [Omnibus packages][pkg] to install GitLab, all
you have to do is edit `/etc/gitlab/gitlab.rb` and add the following lines:
```ruby
gitlab_rails['elasticsearch'] = [
{
"enabled" => "true",
"host" => "localhost",
"port" => 9200
}
]
```
Replace the values as you see fit according to the
[settings table above](#enable-elasticsearch).
Save the file and reconfigure GitLab for the changes to take effect:
`sudo gitlab-ctl reconfigure`.
As a last step, move on to
[add GitLab's data to the Elasticsearch index](#add-gitlabs-data-to-the-elasticsearch-index).
### Enable Elasticsearch in source installations
If you have installed GitLab from source, edit `/home/git/gitlab/config/gitlab.yml`:
```yaml
elasticsearch:
enabled: true
host: localhost
port: 9200
```
Replace the values as you see fit according to the
[settings table above](#enable-elasticsearch).
Save the file and restart GitLab for the changes to take effect:
`sudo service gitlab restart`.
As a last step, move on to
[add GitLab's data to the Elasticsearch index](#add-gitlabs-data-to-the-elasticsearch-index).
## Add GitLab's data to the Elasticsearch index
After [enabling Elasticsearch](#enable-elasticsearch), you must run the
following rake tasks to add GitLab's data to the Elasticsearch index.
It might take a while depending on how big your Git repositories are.
---
To index all your repositories:
```
# omnibus installations
sudo gitlab-rake gitlab:elastic:index_repositories
# installations from source
bundle exec rake gitlab:elastic:index_repositories RAILS_ENV=production
```
To index all wikis:
```
# omnibus installations
sudo gitlab-rake gitlab:elastic:index_wikis
# installations from source
bundle exec rake gitlab:elastic:index_wikis RAILS_ENV=production
```
To index all database entities:
```
# omnibus installations
sudo gitlab-rake gitlab:elastic:index_database
# installations from source
bundle exec rake gitlab:elastic:index_database RAILS_ENV=production
```
## Disable Elasticsearch
Disabling the Elasticsearch integration is as easy as setting `enabled` to
`false` in your GitLab settings. See [Enable Elasticsearch](#enable-elasticsearch)
to find where those settings are and don't forget to reconfigure/restart GitLab
for the changes to take effect.
[ee-109]: https://gitlab.com/gitlab-org/gitlab-ee/merge_requests/109 "Elasticsearch Merge Request"
[elasticsearch]: https://www.elastic.co/products/elasticsearch "Elasticsearch website"
[install]: https://www.elastic.co/guide/en/elasticsearch/reference/current/_installation.html "Elasticsearch installation documentation"
[pkg]: https://about.gitlab.com/downloads/ "Download Omnibus GitLab"
[elastic-settings]: https://www.elastic.co/guide/en/elasticsearch/reference/current/setup-configuration.html#settings "Elasticsearch configuration settings"
module Gitlab
module Elastic
class ProjectSearchResults < SearchResults
attr_reader :project, :repository_ref
def initialize(project_id, query, repository_ref = nil)
@project = Project.find(project_id)
@repository_ref = if repository_ref.present?
repository_ref
else
nil
end
@query = query
end
def objects(scope, page = nil)
case scope
when 'notes'
notes.records.page(page).per(per_page)
when 'blobs'
blobs.page(page).per(per_page)
when 'wiki_blobs'
wiki_blobs.page(page).per(per_page)
when 'commits'
Kaminari.paginate_array(commits).page(page).per(per_page)
else
super
end
end
def total_count
@total_count ||= issues_count + merge_requests_count + blobs_count +
notes_count + wiki_blobs_count + commits_count
end
def blobs_count
@blobs_count ||= blobs.total_count
end
def notes_count
@notes_count ||= notes.total_count
end
def wiki_blobs_count
@wiki_blobs_count ||= wiki_blobs.total_count
end
def commits_count
@commits_count ||= commits.count
end
private
def blobs
if project.empty_repo? || query.blank?
Kaminari.paginate_array([])
else
# We use elastic for default branch only
if root_ref?
project.repository.search(
query,
type: :blob,
options: { highlight: true }
)[:blobs][:results].response
else
Kaminari.paginate_array(
project.repository.search_files(query, repository_ref)
)
end
end
end
def wiki_blobs
if project.wiki_enabled? && !project.wiki.empty? && query.present?
project.wiki.search(
query,
type: :blob,
options: { highlight: true }
)[:blobs][:results].response
else
Kaminari.paginate_array([])
end
end
def notes
opt = {
project_ids: limit_project_ids
}
Note.elastic_search(query, options: opt)
end
def commits
if project.empty_repo? || query.blank?
Kaminari.paginate_array([])
else
# We use elastic for default branch only
if root_ref?
project.repository.find_commits_by_message_with_elastic(query)
else
Kaminari.paginate_array(
project.repository.find_commits_by_message(query).compact
)
end
end
end
def limit_project_ids
[project.id]
end
def root_ref?
!repository_ref || project.root_ref?(repository_ref)
end
end
end
end
module Gitlab
module Elastic
class SearchResults
attr_reader :query
# Limit search results by passed project ids
# It allows us to search only for projects user has access to
attr_reader :limit_project_ids
def initialize(limit_project_ids, query)
@limit_project_ids = limit_project_ids || Project.all
@query = Shellwords.shellescape(query) if query.present?
end
def objects(scope, page = nil)
case scope
when 'projects'
projects.records.page(page).per(per_page)
when 'issues'
issues.records.page(page).per(per_page)
when 'merge_requests'
merge_requests.records.page(page).per(per_page)
when 'milestones'
milestones.records.page(page).per(per_page)
else
Kaminari.paginate_array([])
end
end
def total_count
@total_count ||= projects_count + issues_count + merge_requests_count + milestones_count
end
def projects_count
@projects_count ||= projects.total_count
end
def issues_count
@issues_count ||= issues.total_count
end
def merge_requests_count
@merge_requests_count ||= merge_requests.total_count
end
def milestones_count
@milestones_count ||= milestones.total_count
end
def empty?
total_count.zero?
end
private
def projects
opt = {
pids: limit_project_ids
}
@projects = Project.elastic_search(query, options: opt)
end
def issues
opt = {
projects_ids: limit_project_ids
}
if query =~ /#(\d+)\z/
Issue.in_projects(limit_project_ids).where(iid: $1)
else
Issue.elastic_search(query, options: opt)
end
end
def milestones
opt = {
projects_ids: limit_project_ids
}
Milestone.elastic_search(query, options: opt)
end
def merge_requests
opt = {
projects_ids: limit_project_ids
}
if query =~ /[#!](\d+)\z/
MergeRequest.in_projects(limit_project_ids).where(iid: $1)
else
MergeRequest.elastic_search(query, options: opt)
end
end
def default_scope
'projects'
end
def per_page
20
end
end
end
end
module Gitlab
module Elastic
class SnippetSearchResults < ::Gitlab::SnippetSearchResults
def objects(scope, page = nil)
case scope
when 'snippet_titles'
snippet_titles.records.page(page).per(per_page)
when 'snippet_blobs'
# We process whole list of items then paginate it. Not too smart
# Should be refactored in the CE side first to prevent conflicts hell
Kaminari.paginate_array(
snippet_blobs.records.map do |snippet|
chunk_snippet(snippet)
end
).page(page).per(per_page)
else
super
end
end
private
def snippet_titles
opt = {
ids: limit_snippet_ids
}
Snippet.elastic_search(query, options: opt)
end
def snippet_blobs
opt = {
ids: limit_snippet_ids
}
Snippet.elastic_search_code(query, options: opt)
end
end
end
end
module Gitlab
class SnippetSearchResults < SearchResults
include SnippetsHelper
attr_reader :limit_snippet_ids
def initialize(limit_snippet_ids, query)
......@@ -47,85 +49,5 @@ module Gitlab
def default_scope
'snippet_blobs'
end
# Get an array of line numbers surrounding a matching
# line, bounded by min/max.
#
# @returns Array of line numbers
def bounded_line_numbers(line, min, max)
lower = line - surrounding_lines > min ? line - surrounding_lines : min
upper = line + surrounding_lines < max ? line + surrounding_lines : max
(lower..upper).to_a
end
# Returns a sorted set of lines to be included in a snippet preview.
# This ensures matching adjacent lines do not display duplicated
# surrounding code.
#
# @returns Array, unique and sorted.
def matching_lines(lined_content)
used_lines = []
lined_content.each_with_index do |line, line_number|
used_lines.concat bounded_line_numbers(
line_number,
0,
lined_content.size
) if line.include?(query)
end
used_lines.uniq.sort
end
# 'Chunkify' entire snippet. Splits the snippet data into matching lines +
# surrounding_lines() worth of unmatching lines.
#
# @returns a hash with {snippet_object, snippet_chunks:{data,start_line}}
def chunk_snippet(snippet)
lined_content = snippet.content.split("\n")
used_lines = matching_lines(lined_content)
snippet_chunk = []
snippet_chunks = []
snippet_start_line = 0
last_line = -1
# Go through each used line, and add consecutive lines as a single chunk
# to the snippet chunk array.
used_lines.each do |line_number|
if last_line < 0
# Start a new chunk.
snippet_start_line = line_number
snippet_chunk << lined_content[line_number]
elsif last_line == line_number - 1
# Consecutive line, continue chunk.
snippet_chunk << lined_content[line_number]
else
# Non-consecutive line, add chunk to chunk array.
snippet_chunks << {
data: snippet_chunk.join("\n"),
start_line: snippet_start_line + 1
}
# Start a new chunk.
snippet_chunk = [lined_content[line_number]]
snippet_start_line = line_number
end
last_line = line_number
end
# Add final chunk to chunk array
snippet_chunks << {
data: snippet_chunk.join("\n"),
start_line: snippet_start_line + 1
}
# Return snippet with chunk array
{ snippet_object: snippet, snippet_chunks: snippet_chunks }
end
# Defines how many unmatching lines should be
# included around the matching lines in a snippet
def surrounding_lines
3
end
end
end
namespace :gitlab do
namespace :elastic do
desc "Indexing repositories"
task index_repositories: :environment do
Repository.import
end
desc "Indexing all wikis"
task index_wikis: :environment do
ProjectWiki.import
end
desc "Create indexes in the Elasticsearch from database records"
task index_database: :environment do
[Project, Issue, MergeRequest, Snippet, Note, Milestone].each do |klass|
klass.__elasticsearch__.create_index!
klass.import
end
end
end
end
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment