Commit f8731fa3 authored by Mark Chao's avatar Mark Chao

ES: Wrap blob search results as FoundBlob

Extract common logic between blob and commit search.
Internalize highlight for searches.

Assign project to search results, which can be
passed to FoundBlob, where it is used for
policy checks.
parent 7b84e864
...@@ -20,6 +20,16 @@ module Elastic ...@@ -20,6 +20,16 @@ module Elastic
results results
end end
# @return [Kaminari::PaginatableArray]
def elastic_search_as_found_blob(query, page: 1, per: 20, options: {})
# Highlight is required for parse_search_result to locate relevant line
options = options.merge(highlight: true)
elastic_search_and_wrap(query, type: es_type, page: page, per: per, options: options) do |result, project|
::Gitlab::Elastic::SearchResults.parse_search_result(result, project)
end
end
private private
def extract_repository_ids(options) def extract_repository_ids(options)
...@@ -165,6 +175,56 @@ module Elastic ...@@ -165,6 +175,56 @@ module Elastic
total_count: res.size total_count: res.size
} }
end end
# Wrap returned results into GitLab model objects and paginate
#
# @return [Kaminari::PaginatableArray]
def elastic_search_and_wrap(query, type:, page: 1, per: 20, options: {}, &blk)
type = type.to_s
response = elastic_search(
query,
type: type,
page: page,
per: per,
options: options
)[type.pluralize.to_sym][:results]
items, total_count = yield_each_search_result(response, type, &blk)
# Before "map" we had a paginated array so we need to recover it
offset = per * ((page || 1) - 1)
Kaminari.paginate_array(items, total_count: total_count, limit: per, offset: offset)
end
def yield_each_search_result(response, type)
# Avoid one SELECT per result by loading all projects into a hash
project_ids = response.map { |result| project_id_for_commit_or_blob(result, type) }.uniq
projects = Project.with_route.id_in(project_ids).index_by(&:id)
total_count = response.total_count
items = response.map do |result|
project_id = project_id_for_commit_or_blob(result, type)
project = projects[project_id]
if project.nil? || project.pending_delete?
total_count -= 1
next
end
yield(result, project)
end
# Remove results for deleted projects
items.compact!
[items, total_count]
end
# Indexed commit does not include project_id
def project_id_for_commit_or_blob(result, type)
result.dig('_source', 'project_id') || result.dig('_source', type, 'rid').to_i
end
end end
end end
end end
...@@ -16,10 +16,18 @@ module Elastic ...@@ -16,10 +16,18 @@ module Elastic
end end
def elastic_search(query, type: :all, page: 1, per: 20, options: {}) def elastic_search(query, type: :all, page: 1, per: 20, options: {})
options[:repository_id] = repository_id if options[:repository_id].nil? options = repository_specific_options(options)
self.class.elastic_search(query, type: type, page: page, per: per, options: options) self.class.elastic_search(query, type: type, page: page, per: per, options: options)
end end
# @return [Kaminari::PaginatableArray]
def elastic_search_as_found_blob(query, page: 1, per: 20, options: {})
options = repository_specific_options(options)
self.class.elastic_search_as_found_blob(query, page: page, per: per, options: options)
end
def delete_index_for_commits_and_blobs(wiki: false) def delete_index_for_commits_and_blobs(wiki: false)
types = types =
if wiki if wiki
...@@ -62,6 +70,14 @@ module Elastic ...@@ -62,6 +70,14 @@ module Elastic
def repository_id def repository_id
raise NotImplementedError raise NotImplementedError
end end
def repository_specific_options(options)
if options[:repository_id].nil?
options = options.merge(repository_id: repository_id)
end
options
end
end end
end end
end end
...@@ -9,30 +9,9 @@ module Elastic ...@@ -9,30 +9,9 @@ module Elastic
'blob' 'blob'
end end
# @return [Kaminari::PaginatableArray]
def find_commits_by_message_with_elastic(query, page: 1, per_page: 20, options: {}) def find_commits_by_message_with_elastic(query, page: 1, per_page: 20, options: {})
response = elastic_search( elastic_search_and_wrap(query, type: :commit, page: page, per: per_page, options: options) do |result, project|
query,
type: :commit,
page: page,
per: per_page,
options: options
)[:commits][:results]
response_count = response.total_count
# Avoid one SELECT per result by loading all projects into a hash
project_ids = response.map {|result| result["_source"]["commit"]["rid"] }.uniq
projects = Project.with_route.id_in(project_ids).index_by(&:id)
commits = response.map do |result|
project_id = result["_source"]["commit"]["rid"].to_i
project = projects[project_id]
if project.nil? || project.pending_delete?
response_count -= 1
next
end
raw_commit = Gitlab::Git::Commit.new( raw_commit = Gitlab::Git::Commit.new(
project.repository.raw, project.repository.raw,
prepare_commit(result['_source']['commit']), prepare_commit(result['_source']['commit']),
...@@ -40,13 +19,6 @@ module Elastic ...@@ -40,13 +19,6 @@ module Elastic
) )
Commit.new(raw_commit, project) Commit.new(raw_commit, project)
end end
# Remove results for deleted projects
commits.compact!
# Before "map" we had a paginated array so we need to recover it
offset = per_page * ((page || 1) - 1)
Kaminari.paginate_array(commits, total_count: response_count, limit: per_page, offset: offset)
end end
private private
......
...@@ -112,7 +112,7 @@ module Gitlab ...@@ -112,7 +112,7 @@ module Gitlab
false false
end end
def self.parse_search_result(result) def self.parse_search_result(result, project)
ref = result["_source"]["blob"]["commit_sha"] ref = result["_source"]["blob"]["commit_sha"]
path = result["_source"]["blob"]["path"] path = result["_source"]["blob"]["path"]
extname = File.extname(path) extname = File.extname(path)
...@@ -156,6 +156,7 @@ module Gitlab ...@@ -156,6 +156,7 @@ module Gitlab
ref: ref, ref: ref,
startline: from + 1, startline: from + 1,
data: data.join, data: data.join,
project: project,
project_id: project_id project_id: project_id
) )
end end
......
# frozen_string_literal: true
require 'spec_helper'
describe Elastic::Latest::GitClassProxy do
let_it_be(:project) { create(:project, :repository) }
let(:included_class) { Elastic::Latest::RepositoryClassProxy }
subject { included_class.new(project.repository) }
describe '#elastic_search_as_found_blob', :elastic do
before do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
Gitlab::Elastic::Indexer.new(project).run
Gitlab::Elastic::Helper.refresh_index
end
it 'returns FoundBlob', :sidekiq_inline do
results = subject.elastic_search_as_found_blob('def popen')
expect(results).not_to be_empty
expect(results).to all(be_a(Gitlab::Search::FoundBlob))
result = results.first
expect(result.ref).to eq('b83d6e391c22777fca1ed3012fce84f633d7fed0')
expect(result.path).to eq('files/ruby/popen.rb')
expect(result.startline).to eq(2)
expect(result.data).to include('Popen')
expect(result.project).to eq(project)
end
end
end
...@@ -51,6 +51,33 @@ describe Elastic::Latest::GitInstanceProxy do ...@@ -51,6 +51,33 @@ describe Elastic::Latest::GitInstanceProxy do
end end
end end
describe '#elastic_search_as_found_blob' do
let(:params) do
{
page: 2,
per: 30,
options: { foo: :bar }
}
end
it 'provides repository_id if not provided' do
expected_params = params.deep_dup
expected_params[:options][:repository_id] = project.id
expect(subject.class).to receive(:elastic_search_as_found_blob).with('foo', expected_params)
subject.elastic_search_as_found_blob('foo', params)
end
it 'uses provided repository_id' do
params[:options][:repository_id] = 42
expect(subject.class).to receive(:elastic_search_as_found_blob).with('foo', params)
subject.elastic_search_as_found_blob('foo', params)
end
end
describe '#delete_index_for_commits_and_blobs' do describe '#delete_index_for_commits_and_blobs' do
let(:write_targets) { [double(:write_target_1), double(:write_target_2)] } let(:write_targets) { [double(:write_target_1), double(:write_target_2)] }
let(:read_target) { double(:read_target) } let(:read_target) { double(:read_target) }
......
...@@ -67,6 +67,7 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin ...@@ -67,6 +67,7 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin
end end
describe 'parse_search_result' do describe 'parse_search_result' do
let(:project) { double(:project) }
let(:blob) do let(:blob) do
{ {
'blob' => { 'blob' => {
...@@ -78,11 +79,12 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin ...@@ -78,11 +79,12 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin
end end
it 'returns an unhighlighted blob when no highlight data is present' do it 'returns an unhighlighted blob when no highlight data is present' do
parsed = described_class.parse_search_result('_source' => blob) parsed = described_class.parse_search_result({ '_source' => blob }, project)
expect(parsed).to be_kind_of(::Gitlab::Search::FoundBlob) expect(parsed).to be_kind_of(::Gitlab::Search::FoundBlob)
expect(parsed).to have_attributes( expect(parsed).to have_attributes(
startline: 1, startline: 1,
project: project,
data: "foo\n" data: "foo\n"
) )
end end
...@@ -95,7 +97,7 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin ...@@ -95,7 +97,7 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin
} }
} }
parsed = described_class.parse_search_result(result) parsed = described_class.parse_search_result(result, project)
expect(parsed).to be_kind_of(::Gitlab::Search::FoundBlob) expect(parsed).to be_kind_of(::Gitlab::Search::FoundBlob)
expect(parsed).to have_attributes( expect(parsed).to have_attributes(
...@@ -104,6 +106,7 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin ...@@ -104,6 +106,7 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin
basename: 'path/file', basename: 'path/file',
ref: 'sha', ref: 'sha',
startline: 2, startline: 2,
project: project,
data: "bar\n" data: "bar\n"
) )
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment