Commit 29dbcccb authored by Nick Thomas's avatar Nick Thomas

Merge branch 'elasticsearch_v6' into 'master'

Elasticsearch v6 - migrate from parent/child relationships to joins

Closes #4218

See merge request gitlab-org/gitlab-ee!7618
parents caf0a2e9 8e03c123
......@@ -96,13 +96,13 @@ stages:
services:
- postgres:9.2
- redis:alpine
- docker.elastic.co/elasticsearch/elasticsearch:5.5.2
- docker.elastic.co/elasticsearch/elasticsearch:5.6.12
.use-mysql-with-elasticsearch: &use-mysql-with-elasticsearch
services:
- mysql:5.7
- redis:alpine
- docker.elastic.co/elasticsearch/elasticsearch:5.5.2
- docker.elastic.co/elasticsearch/elasticsearch:5.6.12
# END EE-only service helpers
.rails5-variables: &rails5-variables
......
# frozen_string_literal: true
class IssueAssignee < ActiveRecord::Base
prepend EE::IssueAssignee
belongs_to :issue
belongs_to :assignee, class_name: "User", foreign_key: :user_id
# EE-specific
after_commit :update_elasticsearch_index, on: [:create, :destroy]
# EE-specific
def update_elasticsearch_index
if Gitlab::CurrentSettings.current_application_settings.elasticsearch_indexing?
ElasticIndexerWorker.perform_async(
:update,
'Issue',
issue.id,
changed_fields: ['assignee_ids']
)
end
end
end
......@@ -8,7 +8,7 @@
# A note of this type is never resolvable.
class LegacyDiffNote < Note
# Elastic search configuration (it does not support STI properly)
document_type 'note'
document_type 'doc'
index_name [Rails.application.class.parent_name.downcase, Rails.env].join('-')
include Elastic::NotesSearch
......
......@@ -2,7 +2,7 @@
class PersonalSnippet < Snippet
# Elastic search configuration (it does not support STI)
document_type 'snippet'
document_type 'doc'
index_name [Rails.application.class.parent_name.downcase, Rails.env].join('-')
include Elastic::SnippetsSearch
include WithUploads
......
......@@ -59,7 +59,7 @@ class ProjectFeature < ActiveRecord::Base
after_commit on: :update do
if Gitlab::CurrentSettings.current_application_settings.elasticsearch_indexing?
ElasticIndexerWorker.perform_async(:update, 'Project', project_id)
ElasticIndexerWorker.perform_async(:update, 'Project', project_id, project.es_id)
end
end
......
......@@ -2,7 +2,7 @@
class ProjectSnippet < Snippet
# Elastic search configuration (it does not support STI)
document_type 'snippet'
document_type 'doc'
index_name [Rails.application.class.parent_name.downcase, Rails.env].join('-')
include Elastic::SnippetsSearch
......
......@@ -2,6 +2,10 @@
require 'gitlab/current_settings'
Elasticsearch::Model::Response::Records.prepend GemExtensions::Elasticsearch::Model::Response::Records
Elasticsearch::Model::Adapter::Multiple::Records.prepend GemExtensions::Elasticsearch::Model::Adapter::Multiple::Records
Elasticsearch::Model::Indexing::InstanceMethods.prepend GemExtensions::Elasticsearch::Model::Indexing::InstanceMethods
module Elasticsearch
module Model
module Client
......
......@@ -9,19 +9,19 @@ Information on how to enable ElasticSearch and perform the initial indexing is k
It is recommended to use the Docker image. After installing docker you can immediately spin up an instance with
```
docker run --name elastic55 -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:5.5.3
docker run --name elastic56 -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:5.6.12
```
and use `docker stop elastic55` and `docker start elastic55` to stop/start it.
and use `docker stop elastic56` and `docker start elastic56` to stop/start it.
### Installing on the host
We currently only support Elasticsearch [up to 5.5](https://docs.gitlab.com/ee/integration/elasticsearch.html#requirements), but `brew` only has elasticsearch 6, 5.6, and 2.4 available. While 2.4 would work you probably want to test things out in the latest one we support.
We currently only support Elasticsearch [5.6 to 6.x](https://docs.gitlab.com/ee/integration/elasticsearch.html#requirements)
In order to install 5.5.2, you would usually have to hunt down an old homebrew-core commit that contains the recipe for it. We've already done the work for you. Simply run:
Version 5.6 is available on homebrew and is the recommended version to use in order to test compatibility.
```
brew install https://raw.githubusercontent.com/Homebrew/homebrew-core/f1a767645f61112762f05e68a610d89b161faa99/Formula/elasticsearch.rb
brew install elasticsearch@5.6
```
There is no need to install any plugins
......
......@@ -17,8 +17,7 @@ special searches:
| -------------- | --------------------- |
| GitLab Enterprise Edition 8.4 - 8.17 | Elasticsearch 2.4 with [Delete By Query Plugin](https://www.elastic.co/guide/en/elasticsearch/plugins/2.4/plugins-delete-by-query.html) installed |
| GitLab Enterprise Edition 9.0+ | Elasticsearch 5.1 - 5.5 |
Elasticsearch 6.0+ is not supported currently. [We will support 6.0+ in the future.](https://gitlab.com/gitlab-org/gitlab-ee/issues/4218)
| GitLab Enterprise Edition 11.4+ | Elasticsearch 5.6 - 6.x |
## Installing Elasticsearch
......
# frozen_string_literal: true
module EE
module SearchHelper
extend ::Gitlab::Utils::Override
......@@ -12,7 +13,7 @@ module EE
override :find_project_for_result_blob
# rubocop: disable CodeReuse/ActiveRecord
def find_project_for_result_blob(result)
super || ::Project.find_by(id: result['_parent'])
super || ::Project.find_by(id: result.dig('_source', 'join_field', 'parent')&.split('_')&.last)
end
# rubocop: enable CodeReuse/ActiveRecord
......
# frozen_string_literal: true
module Elastic
module ApplicationSearch
extend ActiveSupport::Concern
......@@ -7,6 +8,9 @@ module Elastic
index_name [Rails.application.class.parent_name.downcase, Rails.env].join('-')
# ES6 requires a single type per index
document_type 'doc'
settings \
index: {
analysis: {
......@@ -37,9 +41,159 @@ module Elastic
}
}
# Since we can't have multiple types in ES6, but want to be able to use JOINs, we must declare all our
# fields together instead of per model
mappings do
### Shared fields
indexes :id, type: :integer
indexes :created_at, type: :date
indexes :updated_at, type: :date
# ES6-compatible way of having a parent, this is shared with all
# Please note that if we add a parent to `project` we'll have to use that "grand-parent" as the routing value
# for all children of project - therefore it is not advised.
indexes :join_field, type: :join,
relations: {
project: %i(
issue
merge_request
milestone
note
blob
wiki_blob
commit
)
}
# ES6 requires a single type per index, so we implement our own "type"
indexes :type, type: :keyword
indexes :iid, type: :integer
indexes :title, type: :text,
index_options: 'offsets'
indexes :description, type: :text,
index_options: 'offsets'
indexes :state, type: :text
indexes :project_id, type: :integer
indexes :author_id, type: :integer
## Projects and Snippets
indexes :visibility_level, type: :integer
### ISSUES
indexes :confidential, type: :boolean
# The field assignee_id does not exist in issues table anymore.
# Nevertheless we'll keep this field as is because we don't want users to rebuild index
# + the ES treats arrays transparently so
# to any integer field you can write any array of integers and you don't have to change mapping.
# More over you can query those items just like a single integer value.
indexes :assignee_id, type: :integer
### MERGE REQUESTS
indexes :target_branch, type: :text,
index_options: 'offsets'
indexes :source_branch, type: :text,
index_options: 'offsets'
indexes :merge_status, type: :text
indexes :source_project_id, type: :integer
indexes :target_project_id, type: :integer
### NOTES
indexes :note, type: :text,
index_options: 'offsets'
indexes :issue do
indexes :assignee_id, type: :integer
indexes :author_id, type: :integer
indexes :confidential, type: :boolean
end
# ES6 gets rid of "index: :not_analyzed" option, but a keyword type behaves the same
# as it is not analyzed and is only searchable by its exact value.
indexes :noteable_type, type: :keyword
indexes :noteable_id, type: :keyword
### PROJECTS
indexes :name, type: :text,
index_options: 'offsets'
indexes :path, type: :text,
index_options: 'offsets'
indexes :name_with_namespace, type: :text,
index_options: 'offsets',
analyzer: :my_ngram_analyzer
indexes :path_with_namespace, type: :text,
index_options: 'offsets'
indexes :namespace_id, type: :integer
indexes :archived, type: :boolean
indexes :issues_access_level, type: :integer
indexes :merge_requests_access_level, type: :integer
indexes :snippets_access_level, type: :integer
indexes :wiki_access_level, type: :integer
indexes :repository_access_level, type: :integer
indexes :last_activity_at, type: :date
indexes :last_pushed_at, type: :date
### SNIPPETS
indexes :file_name, type: :text,
index_options: 'offsets'
indexes :content, type: :text,
index_options: 'offsets'
### REPOSITORIES
indexes :blob do
indexes :id, type: :text,
index_options: 'offsets',
analyzer: :sha_analyzer
indexes :rid, type: :keyword
indexes :oid, type: :text,
index_options: 'offsets',
analyzer: :sha_analyzer
indexes :commit_sha, type: :text,
index_options: 'offsets',
analyzer: :sha_analyzer
indexes :path, type: :text,
analyzer: :path_analyzer
indexes :file_name, type: :text,
analyzer: :code_analyzer,
search_analyzer: :code_search_analyzer
indexes :content, type: :text,
index_options: 'offsets',
analyzer: :code_analyzer,
search_analyzer: :code_search_analyzer
indexes :language, type: :keyword
end
indexes :commit do
indexes :id, type: :text,
index_options: 'offsets',
analyzer: :sha_analyzer
indexes :rid, type: :keyword
indexes :sha, type: :text,
index_options: 'offsets',
analyzer: :sha_analyzer
indexes :author do
indexes :name, type: :text, index_options: 'offsets'
indexes :email, type: :text, index_options: 'offsets'
indexes :time, type: :date, format: :basic_date_time_no_millis
end
indexes :commiter do
indexes :name, type: :text, index_options: 'offsets'
indexes :email, type: :text, index_options: 'offsets'
indexes :time, type: :date, format: :basic_date_time_no_millis
end
indexes :message, type: :text, index_options: 'offsets'
end
end
after_commit on: :create do
if Gitlab::CurrentSettings.elasticsearch_indexing? && self.searchable?
ElasticIndexerWorker.perform_async(:index, self.class.to_s, self.id)
ElasticIndexerWorker.perform_async(:index, self.class.to_s, self.id, self.es_id)
end
end
......@@ -49,6 +203,7 @@ module Elastic
:update,
self.class.to_s,
self.id,
self.es_id,
changed_fields: self.previous_changes.keys
)
end
......@@ -60,7 +215,8 @@ module Elastic
:delete,
self.class.to_s,
self.id,
project_id: self.es_parent
self.es_id,
es_parent: self.es_parent
)
end
end
......@@ -70,8 +226,26 @@ module Elastic
true
end
def generic_attributes
{
'join_field' => {
'name' => es_type,
'parent' => es_parent
},
'type' => es_type
}
end
def es_parent
project_id if respond_to?(:project_id)
"project_#{project_id}" unless is_a?(Project) || self&.project_id.nil?
end
def es_type
self.class.es_type
end
def es_id
"#{es_type}_#{id}"
end
# Some attributes are actually complicated methods. Bad data can cause
......@@ -91,6 +265,10 @@ module Elastic
false
end
def es_type
name.underscore
end
def highlight_options(fields)
es_fields = fields.map { |field| field.split('^').first }.each_with_object({}) do |field, memo|
memo[field.to_sym] = {}
......@@ -99,9 +277,11 @@ module Elastic
{ fields: es_fields }
end
def import_with_parent(options = {})
def es_import(options = {})
transform = lambda do |r|
{ index: { _id: r.id, _parent: r.es_parent, data: r.__elasticsearch__.as_indexed_json } }
{ index: { _id: r.es_id, data: r.__elasticsearch__.as_indexed_json } }.tap do |data|
data[:index][:routing] = r.es_parent if r.es_parent
end
end
options[:transform] = transform
......@@ -120,6 +300,9 @@ module Elastic
query: query,
default_operator: :and
}
}],
filter: [{
term: { type: self.es_type }
}]
}
}
......
......@@ -5,29 +5,6 @@ module Elastic
included do
include ApplicationSearch
mappings _parent: { type: 'project' } do
indexes :id, type: :integer
indexes :iid, type: :integer
indexes :title, type: :text,
index_options: 'offsets'
indexes :description, type: :text,
index_options: 'offsets'
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :state, type: :text
indexes :project_id, type: :integer
indexes :author_id, type: :integer
# The field assignee_id does not exist in issues table anymore.
# Nevertheless we'll keep this field as is because we don't want users to rebuild index
# + the ES treats arrays transparently so
# to any integer field you can write any array of integers and you don't have to change mapping.
# More over you can query those items just like a single integer value.
indexes :assignee_id, type: :integer
indexes :confidential, type: :boolean
end
def as_indexed_json(options = {})
data = {}
......@@ -39,7 +16,7 @@ module Elastic
data['assignee_id'] = safely_read_attribute_for_elasticsearch(:assignee_ids)
data
data.merge(generic_attributes)
end
def self.nested?
......
......@@ -5,26 +5,6 @@ module Elastic
included do
include ApplicationSearch
mappings _parent: { type: 'project' } do
indexes :id, type: :integer
indexes :iid, type: :integer
indexes :target_branch, type: :text,
index_options: 'offsets'
indexes :source_branch, type: :text,
index_options: 'offsets'
indexes :title, type: :text,
index_options: 'offsets'
indexes :description, type: :text,
index_options: 'offsets'
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :state, type: :text
indexes :merge_status, type: :text
indexes :source_project_id, type: :integer
indexes :target_project_id, type: :integer
indexes :author_id, type: :integer
end
def as_indexed_json(options = {})
# We don't use as_json(only: ...) because it calls all virtual and serialized attributtes
# https://gitlab.com/gitlab-org/gitlab-ee/issues/349
......@@ -48,11 +28,11 @@ module Elastic
data[attr.to_s] = safely_read_attribute_for_elasticsearch(attr)
end
data
data.merge(generic_attributes)
end
def es_parent
target_project_id
"project_#{target_project_id}"
end
def self.nested?
......
......@@ -5,17 +5,6 @@ module Elastic
included do
include ApplicationSearch
mappings _parent: { type: 'project' } do
indexes :id, type: :integer
indexes :title, type: :text,
index_options: 'offsets'
indexes :description, type: :text,
index_options: 'offsets'
indexes :project_id, type: :integer
indexes :created_at, type: :date
indexes :updated_at, type: :date
end
def as_indexed_json(options = {})
# We don't use as_json(only: ...) because it calls all virtual and serialized attributtes
# https://gitlab.com/gitlab-org/gitlab-ee/issues/349
......@@ -25,7 +14,7 @@ module Elastic
data[attr.to_s] = safely_read_attribute_for_elasticsearch(attr)
end
data
data.merge(generic_attributes)
end
def self.nested?
......
......@@ -5,24 +5,6 @@ module Elastic
included do
include ApplicationSearch
mappings _parent: { type: 'project' } do
indexes :id, type: :integer
indexes :note, type: :text,
index_options: 'offsets'
indexes :project_id, type: :integer
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :issue do
indexes :assignee_id, type: :integer
indexes :author_id, type: :integer
indexes :confidential, type: :boolean
end
indexes :noteable_type, type: :string, index: :not_analyzed
indexes :noteable_id, type: :integer, index: :not_analyzed
end
def self.inherited(subclass)
super
......@@ -31,6 +13,10 @@ module Elastic
subclass.__elasticsearch__.instance_variable_set(:@mapping, self.mapping.dup)
end
def es_type
'note'
end
def as_indexed_json(options = {})
data = {}
......@@ -48,7 +34,7 @@ module Elastic
}
end
data
data.merge(generic_attributes)
end
def self.nested?
......
......@@ -13,35 +13,6 @@ module Elastic
included do
include ApplicationSearch
mappings do
indexes :id, type: :integer
indexes :name, type: :text,
index_options: 'offsets'
indexes :path, type: :text,
index_options: 'offsets'
indexes :name_with_namespace, type: :text,
index_options: 'offsets',
analyzer: :my_ngram_analyzer
indexes :path_with_namespace, type: :text,
index_options: 'offsets'
indexes :description, type: :text,
index_options: 'offsets'
indexes :namespace_id, type: :integer
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :archived, type: :boolean
indexes :visibility_level, type: :integer
indexes :issues_access_level, type: :integer
indexes :merge_requests_access_level, type: :integer
indexes :snippets_access_level, type: :integer
indexes :wiki_access_level, type: :integer
indexes :repository_access_level, type: :integer
indexes :last_activity_at, type: :date
indexes :last_pushed_at, type: :date
end
def as_indexed_json(options = {})
# We don't use as_json(only: ...) because it calls all virtual and serialized attributtes
# https://gitlab.com/gitlab-org/gitlab-ee/issues/349
......@@ -64,6 +35,12 @@ module Elastic
data[attr.to_s] = safely_read_attribute_for_elasticsearch(attr)
end
# Set it as a parent in our `project => child` JOIN field
data['join_field'] = es_type
# ES6 is now single-type per index, so we implement our own typing
data['type'] = 'project'
TRACKED_FEATURE_SETTINGS.each do |feature|
data[feature] = project_feature.public_send(feature) # rubocop:disable GitlabSecurity/PublicSend
end
......
......@@ -11,6 +11,10 @@ module Elastic
project.id
end
def es_type
'blob'
end
delegate :id, to: :project, prefix: true
def client_for_indexing
......
......@@ -5,22 +5,6 @@ module Elastic
included do
include ApplicationSearch
mappings do
indexes :id, type: :integer
indexes :title, type: :text,
index_options: 'offsets'
indexes :file_name, type: :text,
index_options: 'offsets'
indexes :content, type: :text,
index_options: 'offsets'
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :state, type: :text
indexes :project_id, type: :integer
indexes :author_id, type: :integer
indexes :visibility_level, type: :integer
end
def as_indexed_json(options = {})
# We don't use as_json(only: ...) because it calls all virtual and serialized attributtes
# https://gitlab.com/gitlab-org/gitlab-ee/issues/349
......@@ -40,6 +24,9 @@ module Elastic
data[attr.to_s] = safely_read_attribute_for_elasticsearch(attr)
end
# ES6 is now single-type per index, so we implement our own typing
data['type'] = es_type
data
end
......
......@@ -11,6 +11,10 @@ module Elastic
"wiki_#{project.id}"
end
def es_type
'wiki_blob'
end
delegate :id, to: :project, prefix: true
def client_for_indexing
......
# frozen_string_literal: true
module EE
module IssueAssignee
extend ActiveSupport::Concern
prepended do
after_commit :update_elasticsearch_index, on: [:create, :destroy]
end
def update_elasticsearch_index
if ::Gitlab::CurrentSettings.current_application_settings.elasticsearch_indexing?
::ElasticIndexerWorker.perform_async(
:update,
'Issue',
issue.id,
issue.es_id,
changed_fields: ['assignee_ids']
)
end
end
end
end
# frozen_string_literal: true
class ElasticIndexerWorker
include ApplicationWorker
include Elasticsearch::Model::Client::ClassMethods
......@@ -6,7 +7,7 @@ class ElasticIndexerWorker
ISSUE_TRACKED_FIELDS = %w(assignee_ids author_id confidential).freeze
def perform(operation, class_name, record_id, options = {})
def perform(operation, class_name, record_id, es_id, options = {})
return true unless Gitlab::CurrentSettings.elasticsearch_indexing?
klass = class_name.constantize
......@@ -17,7 +18,7 @@ class ElasticIndexerWorker
record.__elasticsearch__.client = client
if klass.nested?
record.__elasticsearch__.__send__ "#{operation}_document", parent: record.es_parent # rubocop:disable GitlabSecurity/PublicSend
record.__elasticsearch__.__send__ "#{operation}_document", routing: record.es_parent # rubocop:disable GitlabSecurity/PublicSend
else
record.__elasticsearch__.__send__ "#{operation}_document" # rubocop:disable GitlabSecurity/PublicSend
end
......@@ -28,14 +29,13 @@ class ElasticIndexerWorker
client.delete(
index: klass.index_name,
type: klass.document_type,
id: record_id,
parent: options["project_id"]
id: es_id,
routing: options["es_parent"]
)
else
client.delete index: klass.index_name, type: klass.document_type, id: record_id
clear_project_data(record_id, es_id) if klass == Project
client.delete index: klass.index_name, type: klass.document_type, id: es_id
end
clear_project_data(record_id) if klass == Project
end
rescue Elasticsearch::Transport::Transport::Errors::NotFound, ActiveRecord::RecordNotFound
# These errors can happen in several cases, including:
......@@ -51,15 +51,12 @@ class ElasticIndexerWorker
def update_issue_notes(record, changed_fields)
if changed_fields && (changed_fields & ISSUE_TRACKED_FIELDS).any?
Note.import_with_parent query: -> { where(noteable: record) }
Note.es_import query: -> { where(noteable: record) }
end
end
def clear_project_data(record_id)
remove_children_documents(Repository.document_type, record_id)
remove_children_documents(ProjectWiki.document_type, record_id)
remove_children_documents(MergeRequest.document_type, record_id)
remove_documents_by_project_id(record_id)
def clear_project_data(record_id, es_id)
remove_children_documents('project', record_id, es_id)
end
def remove_documents_by_project_id(record_id)
......@@ -73,14 +70,17 @@ class ElasticIndexerWorker
})
end
def remove_children_documents(document_type, parent_record_id)
def remove_children_documents(parent_type, parent_record_id, parent_es_id)
client.delete_by_query({
index: Project.__elasticsearch__.index_name,
routing: parent_es_id,
body: {
query: {
parent_id: {
type: document_type,
id: parent_record_id
has_parent: {
parent_type: parent_type,
query: {
term: { id: parent_record_id }
}
}
}
}
......
---
title: elasticsearch 6 support - migrate from parent/child relationships to join
merge_request: 7618
author:
type: added
......@@ -16,6 +16,8 @@ module Elasticsearch
index_name [self.name.downcase, 'index', env].compact.join('-')
document_type 'doc'
settings \
index: {
analysis: {
......@@ -56,7 +58,7 @@ module Elasticsearch
filter: {
code: {
type: "pattern_capture",
preserve_original: 1,
preserve_original: true,
patterns: [
"(\\p{Ll}+|\\p{Lu}\\p{Ll}+|\\p{Lu}+)",
"(\\d+)",
......
......@@ -12,53 +12,12 @@ module Elasticsearch
include Elasticsearch::Git::Model
include Elasticsearch::Git::EncoderHelper
mapping _parent: { type: 'project' } do
indexes :blob do
indexes :id, type: :text,
index_options: 'offsets',
analyzer: :sha_analyzer
indexes :rid, type: :keyword
indexes :oid, type: :text,
index_options: 'offsets',
analyzer: :sha_analyzer
indexes :commit_sha, type: :text,
index_options: 'offsets',
analyzer: :sha_analyzer
indexes :path, type: :text,
analyzer: :path_analyzer
indexes :file_name, type: :text,
analyzer: :code_analyzer,
search_analyzer: :code_search_analyzer
indexes :content, type: :text,
index_options: 'offsets',
analyzer: :code_analyzer,
search_analyzer: :code_search_analyzer
indexes :language, type: :keyword
end
indexes :commit do
indexes :id, type: :text,
index_options: 'offsets',
analyzer: :sha_analyzer
indexes :rid, type: :keyword
indexes :sha, type: :text,
index_options: 'offsets',
analyzer: :sha_analyzer
indexes :author do
indexes :name, type: :text, index_options: 'offsets'
indexes :email, type: :text, index_options: 'offsets'
indexes :time, type: :date, format: :basic_date_time_no_millis
end
indexes :commiter do
indexes :name, type: :text, index_options: 'offsets'
indexes :email, type: :text, index_options: 'offsets'
indexes :time, type: :date, format: :basic_date_time_no_millis
end
indexes :message, type: :text, index_options: 'offsets'
def es_parent
"project_#{project_id}"
end
def es_type
'blob'
end
# Indexing all text-like blobs in repository
......@@ -118,9 +77,9 @@ module Elasticsearch
{
delete: {
_index: "#{self.class.index_name}",
_type: self.class.name.underscore,
_type: 'doc',
_id: "#{repository_id}_#{blob.path}",
_parent: project_id
routing: es_parent
}
}
end
......@@ -131,12 +90,11 @@ module Elasticsearch
{
index: {
_index: "#{self.class.index_name}",
_type: self.class.name.underscore,
_type: 'doc',
_id: "#{repository_id}_#{blob.path}",
_parent: project_id,
routing: es_parent,
data: {
blob: {
type: "blob",
oid: blob.id,
rid: repository_id,
content: blob.data,
......@@ -150,10 +108,15 @@ module Elasticsearch
# install newest versions
# https://github.com/elastic/elasticsearch-mapper-attachments/issues/124
file_name: blob.path,
# Linguist is not available in the Ruby indexer. The Go indexer can
# fill in the right language.
language: nil
},
type: es_type,
join_field: {
'name' => es_type,
'parent' => es_parent
}
}
}
......@@ -217,12 +180,11 @@ module Elasticsearch
{
index: {
_index: "#{self.class.index_name}",
_type: self.class.name.underscore,
_type: 'doc',
_id: "#{repository_id}_#{commit.oid}",
_parent: project_id,
routing: es_parent,
data: {
commit: {
type: "commit",
rid: repository_id,
sha: commit.oid,
author: {
......@@ -236,6 +198,11 @@ module Elasticsearch
time: committer[:time].strftime('%Y%m%dT%H%M%S%z')
},
message: encode!(commit.message)
},
type: 'commit',
join_field: {
'name' => 'commit',
'parent' => es_parent
}
}
}
......@@ -288,7 +255,6 @@ module Elasticsearch
if b.text?
result.push(
{
type: 'blob',
id: "#{target_sha}_#{b.path}",
rid: repository_id,
oid: b.id,
......@@ -312,7 +278,6 @@ module Elasticsearch
if b.text?
result.push(
{
type: 'blob',
id: "#{repository_for_indexing.head.target.oid}_#{path}#{blob[:name]}",
rid: repository_id,
oid: b.id,
......@@ -338,7 +303,6 @@ module Elasticsearch
if obj.type == :commit
res.push(
{
type: 'commit',
sha: obj.oid,
author: obj.author,
committer: obj.committer,
......@@ -410,10 +374,11 @@ module Elasticsearch
when :all
results[:blobs] = search_blob(query, page: page, per: per, options: options)
results[:commits] = search_commit(query, page: page, per: per, options: options)
when :blob
results[:blobs] = search_blob(query, page: page, per: per, options: options)
results[:wiki_blobs] = search_blob(query, type: :wiki_blob, page: page, per: per, options: options)
when :commit
results[:commits] = search_commit(query, page: page, per: per, options: options)
when :blob, :wiki_blob
results[type.to_s.pluralize.to_sym] = search_blob(query, type: type, page: page, per: per, options: options)
end
results
......@@ -434,7 +399,7 @@ module Elasticsearch
default_operator: :and
}
},
filter: [{ term: { 'commit.type' => 'commit' } }]
filter: [{ term: { 'type' => 'commit' } }]
}
},
size: per,
......@@ -481,7 +446,7 @@ module Elasticsearch
}
end
def search_blob(query, type: :all, page: 1, per: 20, options: {})
def search_blob(query, type: :blob, page: 1, per: 20, options: {})
page ||= 1
query = ::Gitlab::Search::Query.new(query) do
......@@ -500,7 +465,9 @@ module Elasticsearch
fields: %w[blob.content blob.file_name]
}
},
filter: [{ term: { 'blob.type' => 'blob' } }]
filter: [
{ term: { type: type } }
]
}
},
size: per,
......
# frozen_string_literal: true
module GemExtensions
module Elasticsearch
module Model
module Adapter
module Multiple
# We need to change the ID used to recover items from the database.
# Originally elasticsearch-model uses `_id`, but we need to use the `id` field
module Records
def records
records_by_type = __records_by_type
records = response.response["hits"]["hits"].map do |hit|
records_by_type[__type_for_hit(hit)][hit[:_source][:id].to_s]
end
records.compact
end
def __type_for_hit(hit)
@@__types ||= {} # rubocop:disable Style/ClassVars
@@__types[ "#{hit[:_index]}::#{hit[:_source][:type]}" ] ||= begin
::Elasticsearch::Model::Registry.all.detect do |model|
model.index_name == hit[:_index] && model.es_type == hit[:_source][:type]
end
end
end
def __ids_by_type
ids_by_type = {}
response.response["hits"]["hits"].each do |hit|
type = __type_for_hit(hit)
ids_by_type[type] ||= []
ids_by_type[type] << hit[:_source][:id]
end
ids_by_type
end
end
end
end
end
end
end
# frozen_string_literal: true
module GemExtensions
module Elasticsearch
module Model
module Indexing
# We need `_id` to be the model's `#es_id` in all indexing/editing operations
module InstanceMethods
def index_document(options = {})
document = self.as_indexed_json
client.index(
{ index: index_name,
type: document_type,
id: self.es_id,
body: document }.merge(options)
)
end
def delete_document(options = {})
client.delete(
{ index: index_name,
type: document_type,
id: self.es_id }.merge(options)
)
end
def update_document(options = {})
if attributes_in_database = self.instance_variable_get(:@__changed_model_attributes).presence
attributes = if respond_to?(:as_indexed_json)
self.as_indexed_json.select { |k, _v| attributes_in_database.keys.map(&:to_s).include? k.to_s }
else
attributes_in_database
end
client.update(
{ index: index_name,
type: document_type,
id: self.es_id,
body: { doc: attributes } }.merge(options)
)
else
index_document(options)
end
end
def update_document_attributes(attributes, options = {})
client.update(
{ index: index_name,
type: document_type,
id: self.es_id,
body: { doc: attributes } }.merge(options)
)
end
end
end
end
end
end
# frozen_string_literal: true
module GemExtensions
module Elasticsearch
module Model
module Response
# We need to change the ID used to recover items from the database.
# Originally elasticsearch-model uses `_id`, but we need to use the `id` field
module Records
def ids
response.response['hits']['hits'].map { |hit| hit['_source']['id'] }
end
end
end
end
end
end
......@@ -18,11 +18,16 @@ module Gitlab
Repository
].each do |klass|
settings.deep_merge!(klass.settings.to_hash)
mappings.merge!(klass.mappings.to_hash)
mappings.deep_merge!(klass.mappings.to_hash)
end
client = Project.__elasticsearch__.client
# ES5.6 needs a setting enabled to support JOIN datatypes that ES6 does not support...
if Gitlab::VersionInfo.parse(client.info['version']['number']) < Gitlab::VersionInfo.new(6)
settings['index.mapping.single_type'] = true
end
if client.indices.exists? index: index_name
client.indices.delete index: index_name
end
......
......@@ -75,9 +75,9 @@ module Gitlab
if project.wiki_enabled? && !project.wiki.empty? && query.present?
project.wiki.search(
query,
type: :blob,
type: :wiki_blob,
options: { highlight: true }
)[:blobs][:results].response
)[:wiki_blobs][:results].response
else
Kaminari.paginate_array([])
end
......
......@@ -172,9 +172,9 @@ module Gitlab
ProjectWiki.search(
query,
type: :blob,
type: :wiki_blob,
options: opt.merge({ highlight: true })
)[:blobs][:results].response
)[:wiki_blobs][:results].response
end
end
......
......@@ -74,13 +74,10 @@ namespace :gitlab do
klass = Kernel.const_get(klass_name)
case klass_name
when 'Note'
Note.searchable.import_with_parent
when 'Project', 'Snippet'
klass.import
if klass_name == 'Note'
Note.searchable.es_import
else
klass.import_with_parent
klass.es_import
end
logger.info("Indexing #{klass_name.pluralize}... " + "done".color(:green))
......
......@@ -51,7 +51,7 @@ describe SearchHelper do
Gitlab::Elastic::Helper.refresh_index
@project_2.destroy
blob = { _parent: @project_2.id }
blob = { _source: { join_field: { parent: @project_2.es_id } } }.as_json
result = find_project_for_result_blob(blob)
......
......@@ -779,14 +779,14 @@ describe Gitlab::Elastic::SearchResults do
results = described_class.new(user, 'term', limit_project_ids)
blobs = results.objects('wiki_blobs')
expect(blobs.map {|blob| blob._parent.to_i }).to match_array [internal_project.id, private_project2.id, public_project.id]
expect(blobs.map { |blob| blob.join_field.parent }).to match_array [internal_project.es_id, private_project2.es_id, public_project.es_id]
expect(results.wiki_blobs_count).to eq 3
# Unauthenticated search
results = described_class.new(nil, 'term', [])
blobs = results.objects('wiki_blobs')
expect(blobs.first._parent.to_i).to eq public_project.id
expect(blobs.first.join_field.parent).to eq public_project.es_id
expect(results.wiki_blobs_count).to eq 1
end
end
......@@ -843,14 +843,14 @@ describe Gitlab::Elastic::SearchResults do
results = described_class.new(user, 'tesla', limit_project_ids)
blobs = results.objects('blobs')
expect(blobs.map { |blob| blob._parent.to_i }).to match_array [internal_project.id, private_project2.id, public_project.id]
expect(blobs.map { |blob| blob.join_field.parent }).to match_array [internal_project.es_id, private_project2.es_id, public_project.es_id]
expect(results.blobs_count).to eq 3
# Unauthenticated search
results = described_class.new(nil, 'tesla', [])
blobs = results.objects('blobs')
expect(blobs.first._parent.to_i).to eq public_project.id.to_i
expect(blobs.first.join_field.parent).to eq public_project.es_id
expect(results.blobs_count).to eq 1
end
end
......
......@@ -33,6 +33,13 @@ describe Issue, :elastic do
expected_hash = issue.attributes.extract!('id', 'iid', 'title', 'description', 'created_at',
'updated_at', 'state', 'project_id', 'author_id',
'confidential')
.merge({
'join_field' => {
'name' => issue.es_type,
'parent' => issue.es_parent
},
'type' => issue.es_type
})
expected_hash['assignee_id'] = [assignee.id]
......
......@@ -44,7 +44,13 @@ describe MergeRequest, :elastic do
'source_project_id',
'target_project_id',
'author_id'
)
).merge({
'join_field' => {
'name' => merge_request.es_type,
'parent' => merge_request.es_parent
},
'type' => merge_request.es_type
})
expect(merge_request.as_indexed_json).to eq(expected_hash)
end
......
......@@ -35,7 +35,13 @@ describe Milestone, :elastic do
'project_id',
'created_at',
'updated_at'
)
).merge({
'join_field' => {
'name' => milestone.es_type,
'parent' => milestone.es_parent
},
'type' => milestone.es_type
})
expect(milestone.as_indexed_json).to eq(expected_hash)
end
......
......@@ -55,6 +55,8 @@ describe Note, :elastic do
created_at
updated_at
issue
join_field
type
)
expect(note.as_indexed_json.keys).to eq(expected_hash_keys)
......@@ -66,7 +68,7 @@ describe Note, :elastic do
issue = create :issue, project: project, updated_at: 1.minute.ago
# Only issue should be updated
expect(ElasticIndexerWorker).to receive(:perform_async).with(:update, 'Issue', anything, anything)
expect(ElasticIndexerWorker).to receive(:perform_async).with(:update, 'Issue', anything, anything, anything)
create :note, :system, project: project, noteable: issue
end
......
......@@ -57,7 +57,7 @@ describe Project, :elastic do
'updated_at',
'visibility_level',
'last_activity_at'
)
).merge({ 'join_field' => project.es_type, 'type' => project.es_type })
expected_hash.merge!(
project.project_feature.attributes.extract!(
......
......@@ -16,7 +16,7 @@ describe ProjectWiki, :elastic do
Gitlab::Elastic::Helper.refresh_index
end
expect(project.wiki.search('term1', type: :blob)[:blobs][:total_count]).to eq(1)
expect(project.wiki.search('term1 | term2', type: :blob)[:blobs][:total_count]).to eq(2)
expect(project.wiki.search('term1', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(1)
expect(project.wiki.search('term1 | term2', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(2)
end
end
......@@ -124,7 +124,7 @@ describe Snippet, :elastic do
'project_id',
'author_id',
'visibility_level'
)
).merge({ 'type' => snippet.es_type })
expect(snippet.as_indexed_json).to eq(expected_hash)
end
......
......@@ -15,172 +15,84 @@ describe ElasticIndexerWorker, :elastic do
expect_any_instance_of(Elasticsearch::Model).not_to receive(:__elasticsearch__)
expect(subject.perform("index", "Milestone", 1)).to be_truthy
expect(subject.perform("index", "Milestone", 1, 1)).to be_truthy
end
describe 'Indexing new records' do
it 'indexes a project' do
project = nil
describe 'Indexing, updating, and deleting records' do
using RSpec::Parameterized::TableSyntax
Sidekiq::Testing.disable! do
project = create :project
end
expect do
subject.perform("index", "Project", project.id)
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('*').records.size }.by(1)
end
it 'indexes an issue' do
issue = nil
Sidekiq::Testing.disable! do
issue = create :issue
end
expect do
subject.perform("index", "Issue", issue.id)
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('*').records.size }.by(1)
end
it 'indexes a note' do
note = nil
Sidekiq::Testing.disable! do
note = create :note
end
expect do
subject.perform("index", "Note", note.id)
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('*').records.size }.by(1)
end
it 'indexes a milestone' do
milestone = nil
Sidekiq::Testing.disable! do
milestone = create :milestone
end
expect do
subject.perform("index", "Milestone", milestone.id)
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('*').records.size }.by(1)
where(:type, :name, :attribute) do
:project | "Project" | :name
:issue | "Issue" | :title
:note | "Note" | :note
:milestone | "Milestone" | :title
:merge_request | "MergeRequest" | :title
end
it 'indexes a merge request' do
merge_request = nil
with_them do
it 'indexes new records' do
object = nil
Sidekiq::Testing.disable! do
merge_request = create :merge_request
object = create(type)
end
expect do
subject.perform("index", "MergeRequest", merge_request.id)
subject.perform("index", name, object.id, object.es_id)
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('*').records.size }.by(1)
end
end
describe 'Updating index' do
it 'updates a project' do
project = nil
Sidekiq::Testing.disable! do
project = create :project
subject.perform("index", "Project", project.id)
project.update(name: "new")
end
expect do
subject.perform("update", "Project", project.id)
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('new').records.size }.by(1)
end
it 'updates an issue' do
issue = nil
it 'updates the index when object is changed' do
object = nil
Sidekiq::Testing.disable! do
issue = create :issue
subject.perform("index", "Issue", issue.id)
issue.update(title: "new")
object = create(type)
subject.perform("index", name, object.id, object.es_id)
object.update(attribute => "new")
end
expect do
subject.perform("update", "Issue", issue.id)
subject.perform("update", name, object.id, object.es_id)
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('new').records.size }.by(1)
end
it 'updates a note' do
note = nil
it 'deletes from index when an object is deleted' do
object = nil
Sidekiq::Testing.disable! do
note = create :note
subject.perform("index", "Note", note.id)
note.update(note: 'new')
end
expect do
subject.perform("update", "Note", note.id)
object = create(type)
subject.perform("index", name, object.id, object.es_id)
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('new').records.size }.by(1)
end
it 'updates a milestone' do
milestone = nil
Sidekiq::Testing.disable! do
milestone = create :milestone
subject.perform("index", "Milestone", milestone.id)
milestone.update(title: 'new')
object.destroy
end
expect do
subject.perform("update", "Milestone", milestone.id)
subject.perform("delete", name, object.id, object.es_id, { 'es_parent' => object.es_parent })
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('new').records.size }.by(1)
end.to change { Elasticsearch::Model.search('*').total_count }.by(-1)
end
it 'updates a merge request' do
merge_request = nil
Sidekiq::Testing.disable! do
merge_request = create :merge_request
subject.perform("index", "MergeRequest", merge_request.id)
merge_request.update(title: 'new')
end
expect do
subject.perform("index", "MergeRequest", merge_request.id)
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('new').records.size }.by(1)
end
end
describe 'Delete' do
it 'deletes a project with all nested objects' do
project, issue, milestone, note, merge_request = nil
Sidekiq::Testing.disable! do
project = create :project, :repository
subject.perform("index", "Project", project.id)
subject.perform("index", "Project", project.id, project.es_id)
issue = create :issue, project: project
subject.perform("index", "Issue", issue.id)
subject.perform("index", "Issue", issue.id, issue.es_id)
milestone = create :milestone, project: project
subject.perform("index", "Milestone", milestone.id)
subject.perform("index", "Milestone", milestone.id, milestone.es_id)
note = create :note, project: project
subject.perform("index", "Note", note.id)
subject.perform("index", "Note", note.id, note.es_id)
merge_request = create :merge_request, target_project: project, source_project: project
subject.perform("index", "MergeRequest", merge_request.id)
subject.perform("index", "MergeRequest", merge_request.id, merge_request.es_id)
end
ElasticCommitIndexerWorker.new.perform(project.id)
......@@ -189,78 +101,9 @@ describe ElasticIndexerWorker, :elastic do
## All database objects + data from repository. The absolute value does not matter
expect(Elasticsearch::Model.search('*').total_count).to be > 40
subject.perform("delete", "Project", project.id)
subject.perform("delete", "Project", project.id, project.es_id)
Gitlab::Elastic::Helper.refresh_index
expect(Elasticsearch::Model.search('*').total_count).to be(0)
end
it 'deletes an issue' do
issue, project_id = nil
Sidekiq::Testing.disable! do
issue = create :issue
subject.perform("index", "Issue", issue.id)
Gitlab::Elastic::Helper.refresh_index
project_id = issue.project_id
issue.destroy
end
expect do
subject.perform("delete", "Issue", issue.id, "project_id" => project_id)
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('*').total_count }.by(-1)
end
it 'deletes a note' do
note, project_id = nil
Sidekiq::Testing.disable! do
note = create :note
subject.perform("index", "Note", note.id)
Gitlab::Elastic::Helper.refresh_index
project_id = note.project_id
note.destroy
end
expect do
subject.perform("delete", "Note", note.id, "project_id" => project_id)
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('*').total_count }.by(-1)
end
it 'deletes a milestone' do
milestone, project_id = nil
Sidekiq::Testing.disable! do
milestone = create :milestone
subject.perform("index", "Milestone", milestone.id)
Gitlab::Elastic::Helper.refresh_index
project_id = milestone.project_id
milestone.destroy
end
expect do
subject.perform("delete", "Milestone", milestone.id, "project_id" => project_id)
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('*').total_count }.by(-1)
end
it 'deletes a merge request' do
merge_request, project_id = nil
Sidekiq::Testing.disable! do
merge_request = create :merge_request
subject.perform("index", "MergeRequest", merge_request.id)
Gitlab::Elastic::Helper.refresh_index
project_id = merge_request.target_project_id
merge_request.destroy
end
expect do
subject.perform("delete", "MergeRequest", merge_request.id, "project_id" => project_id)
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('*').total_count }.by(-1)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment