Commit 4095e48d authored by Nick Thomas's avatar Nick Thomas

Elasticsearch fixes for advanced search syntax

The introduction of advanced search syntax allowed queries like `-foo`. These
break our repository queries in two distinct ways:

* Blobs would be returned for commit queries, and vice-versa
* Highlighting was assumed to be present for all queries

The first is fixed by adding explicit type terms to the queries:

    { term: { 'commit.type' => 'commit' } }
    { term: { 'blob.type' => 'blob' } }

(The `_type` field is, of course, `repository` for both document kinds).

A query matched by a negative search will not have any highlighting results,
so we also need to make processing that conditional.

Finally, switch commit search to `default_operator: :and` so it behaves more
like the rest of the application.

Switching to `default_operator: :and` should be enough to keep this from
happening.
parent 70a7605c
---
title: Fix global code search when using negation queries
merge_request: 2709
author:
type: fixed
...@@ -407,13 +407,14 @@ module Elasticsearch ...@@ -407,13 +407,14 @@ module Elasticsearch
query_hash = { query_hash = {
query: { query: {
bool: { bool: {
must: [{ must: {
simple_query_string: { simple_query_string: {
fields: fields, fields: fields,
query: query, query: query,
default_operator: :or default_operator: :and
} }
}] },
filter: [{ term: { 'commit.type' => 'commit' } }]
} }
}, },
size: per, size: per,
...@@ -426,7 +427,7 @@ module Elasticsearch ...@@ -426,7 +427,7 @@ module Elasticsearch
end end
if options[:repository_id] if options[:repository_id]
query_hash[:query][:bool][:filter] = { query_hash[:query][:bool][:filter] << {
terms: { terms: {
'commit.rid' => [options[:repository_id]].flatten 'commit.rid' => [options[:repository_id]].flatten
} }
...@@ -434,7 +435,6 @@ module Elasticsearch ...@@ -434,7 +435,6 @@ module Elasticsearch
end end
if options[:additional_filter] if options[:additional_filter]
query_hash[:query][:bool][:filter] ||= []
query_hash[:query][:bool][:filter] << options[:additional_filter] query_hash[:query][:bool][:filter] << options[:additional_filter]
end end
...@@ -474,15 +474,14 @@ module Elasticsearch ...@@ -474,15 +474,14 @@ module Elasticsearch
default_operator: :and, default_operator: :and,
fields: %w[blob.content blob.file_name] fields: %w[blob.content blob.file_name]
} }
} },
filter: [{ term: { 'blob.type' => 'blob' } }]
} }
}, },
size: per, size: per,
from: per * (page - 1) from: per * (page - 1)
} }
query_hash[:query][:bool][:filter] = []
if options[:repository_id] if options[:repository_id]
query_hash[:query][:bool][:filter] << { query_hash[:query][:bool][:filter] << {
terms: { terms: {
...@@ -492,7 +491,6 @@ module Elasticsearch ...@@ -492,7 +491,6 @@ module Elasticsearch
end end
if options[:additional_filter] if options[:additional_filter]
query_hash[:query][:bool][:filter] ||= []
query_hash[:query][:bool][:filter] << options[:additional_filter] query_hash[:query][:bool][:filter] << options[:additional_filter]
end end
......
...@@ -75,8 +75,11 @@ module Gitlab ...@@ -75,8 +75,11 @@ module Gitlab
content = result["_source"]["blob"]["content"] content = result["_source"]["blob"]["content"]
total_lines = content.lines.size total_lines = content.lines.size
highlighted_content = result["highlight"]["blob.content"] term =
term = highlighted_content && highlighted_content[0].match(/gitlabelasticsearch→(.*?)←gitlabelasticsearch/)[1] if result['highlight']
highlighted = result['highlight']['blob.content']
highlighted && highlighted[0].match(/gitlabelasticsearch→(.*?)←gitlabelasticsearch/)[1]
end
found_line_number = 0 found_line_number = 0
......
...@@ -16,6 +16,49 @@ describe Gitlab::Elastic::SearchResults do ...@@ -16,6 +16,49 @@ describe Gitlab::Elastic::SearchResults do
let(:project_2) { create(:project, :repository) } let(:project_2) { create(:project, :repository) }
let(:limit_project_ids) { [project_1.id] } let(:limit_project_ids) { [project_1.id] }
describe 'parse_search_result' do
let(:blob) do
{
'blob' => {
'commit_sha' => 'sha',
'content' => "foo\nbar\nbaz\n",
'path' => 'path/file.ext'
}
}
end
it 'returns an unhighlighted blob when no highlight data is present' do
parsed = described_class.parse_search_result('_source' => blob)
expect(parsed).to be_kind_of(::Gitlab::SearchResults::FoundBlob)
expect(parsed).to have_attributes(
startline: 1,
data: "foo\n"
)
end
it 'parses the blob with highlighting' do
result = {
'_source' => blob,
'highlight' => {
'blob.content' => ["foo\ngitlabelasticsearch→bar←gitlabelasticsearch\nbaz\n"]
}
}
parsed = described_class.parse_search_result(result)
expect(parsed).to be_kind_of(::Gitlab::SearchResults::FoundBlob)
expect(parsed).to have_attributes(
id: nil,
filename: 'path/file.ext',
basename: 'path/file',
ref: 'sha',
startline: 2,
data: "bar\n"
)
end
end
describe 'issues' do describe 'issues' do
before do before do
@issue_1 = create( @issue_1 = create(
......
...@@ -11,21 +11,53 @@ describe Repository, elastic: true do ...@@ -11,21 +11,53 @@ describe Repository, elastic: true do
stub_application_setting(elasticsearch_search: false, elasticsearch_indexing: false) stub_application_setting(elasticsearch_search: false, elasticsearch_indexing: false)
end end
it "searches blobs and commits" do def index!(project)
project = create :project, :repository
Sidekiq::Testing.inline! do Sidekiq::Testing.inline! do
project.repository.index_blobs project.repository.index_blobs
project.repository.index_commits project.repository.index_commits
Gitlab::Elastic::Helper.refresh_index Gitlab::Elastic::Helper.refresh_index
end end
end
it "searches blobs and commits" do
project = create :project, :repository
index!(project)
expect(project.repository.search('def popen')[:blobs][:total_count]).to eq(1) expect(project.repository.search('def popen')[:blobs][:total_count]).to eq(1)
expect(project.repository.search('def | popen')[:blobs][:total_count] > 1).to be_truthy expect(project.repository.search('def | popen')[:blobs][:total_count] > 1).to be_truthy
expect(project.repository.search('initial')[:commits][:total_count]).to eq(1) expect(project.repository.search('initial')[:commits][:total_count]).to eq(1)
end end
def search_and_check!(on, query, type:, per: 1000)
results = on.search(query, type: type, per: per)["#{type}s".to_sym][:results]
blobs, commits = results.partition { |result| result['_source']['blob'].present? }
case type
when :blob
expect(blobs).not_to be_empty
expect(commits).to be_empty
when :commit
expect(blobs).to be_empty
expect(commits).not_to be_empty
else
raise ArgumentError
end
end
# A negation query can match both commits and blobs as they both have _type
# 'repository'. Ensure this doesn't happen, in both global and project search
it 'filters commits from blobs, and vice-versa' do
project = create :project, :repository
index!(project)
search_and_check!(Repository, '-foo', type: :blob)
search_and_check!(Repository, '-foo', type: :commit)
search_and_check!(project.repository, '-foo', type: :blob)
search_and_check!(project.repository, '-foo', type: :commit)
end
describe "class method find_commits_by_message_with_elastic" do describe "class method find_commits_by_message_with_elastic" do
it "returns commits" do it "returns commits" do
project = create :project, :repository project = create :project, :repository
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment