Commit 37bf7ba2 authored by Nick Thomas's avatar Nick Thomas

Merge branch 'avoid_es_loading_commits' into 'master'

Avoid Gitaly N+1 in Elasticsearch commit results

Closes #2282, #2016, and #3454

See merge request gitlab-org/gitlab-ee!9760
parents 173f288e 7671d6f3
......@@ -44,7 +44,7 @@ module Elastic
# Since we can't have multiple types in ES6, but want to be able to use JOINs, we must declare all our
# fields together instead of per model
mappings do
mappings dynamic: 'strict' do
### Shared fields
indexes :id, type: :integer
indexes :created_at, type: :date
......@@ -182,7 +182,7 @@ module Elastic
indexes :time, type: :date, format: :basic_date_time_no_millis
end
indexes :commiter do
indexes :committer do
indexes :name, type: :text, index_options: 'offsets'
indexes :email, type: :text, index_options: 'offsets'
indexes :time, type: :date, format: :basic_date_time_no_millis
......
......@@ -57,22 +57,37 @@ module Elastic
# Avoid one SELECT per result by loading all projects into a hash
project_ids = response.map {|result| result["_source"]["commit"]["rid"] }.uniq
projects = Project.where(id: project_ids).index_by(&:id)
projects = Project.includes(:route).where(id: project_ids).index_by(&:id)
# n + 1: https://gitlab.com/gitlab-org/gitlab-ee/issues/3454
commits = Gitlab::GitalyClient.allow_n_plus_1_calls do
response.map do |result|
sha = result["_source"]["commit"]["sha"]
commits = response.map do |result|
project_id = result["_source"]["commit"]["rid"].to_i
projects[project_id].try(:commit, sha)
project = projects[project_id]
raw_commit = Gitlab::Git::Commit.new(
project.repository.raw,
prepare_commit(result['_source']['commit']),
lazy_load_parents: true
)
Commit.new(raw_commit, project)
end
end.compact
# Before "map" we had a paginated array so we need to recover it
offset = per_page * ((page || 1) - 1)
Kaminari.paginate_array(commits, total_count: response.total_count, limit: per_page, offset: offset)
end
def prepare_commit(raw_commit_hash)
{
id: raw_commit_hash['sha'],
message: raw_commit_hash['message'],
parent_ids: nil,
author_name: raw_commit_hash['author']['name'],
author_email: raw_commit_hash['author']['email'],
authored_date: Time.parse(raw_commit_hash['author']['time']).utc,
committer_name: raw_commit_hash['committer']['name'],
committer_email: raw_commit_hash['committer']['email'],
committed_date: Time.parse(raw_commit_hash['committer']['time']).utc
}
end
end
end
end
---
title: Avoid a Gitaly N+1 when loading commits for Elasticsearch search results
merge_request: 9760
author:
type: performance
......@@ -184,11 +184,12 @@ module Gitlab
end
end
def initialize(repository, raw_commit, head = nil)
def initialize(repository, raw_commit, head = nil, lazy_load_parents: false)
raise "Nil as raw commit passed" unless raw_commit
@repository = repository
@head = head
@lazy_load_parents = lazy_load_parents
init_commit(raw_commit)
end
......@@ -225,6 +226,12 @@ module Gitlab
author_name != committer_name || author_email != committer_email
end
def parent_ids
return @parent_ids unless @lazy_load_parents
@parent_ids ||= @repository.commit(id).parent_ids
end
def parent_id
parent_ids.first
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment