Commit 6c9f28d3 authored by Nick Thomas's avatar Nick Thomas

Merge branch '27789-sha-prefix-search' into 'master'

Use prefix search instead of ngrams for sha fields

See merge request gitlab-org/gitlab!27597
parents 98e7859f dfe2c6df
---
title: Use prefix search instead of ngrams for sha fields
merge_request: 27597
author:
type: other
...@@ -32,11 +32,6 @@ module Elastic ...@@ -32,11 +32,6 @@ module Elastic
tokenizer: 'path_tokenizer', tokenizer: 'path_tokenizer',
filter: %w(lowercase asciifolding) filter: %w(lowercase asciifolding)
}, },
sha_analyzer: {
type: 'custom',
tokenizer: 'sha_tokenizer',
filter: %w(lowercase asciifolding)
},
code_analyzer: { code_analyzer: {
type: 'custom', type: 'custom',
tokenizer: 'whitespace', tokenizer: 'whitespace',
...@@ -79,16 +74,16 @@ module Elastic ...@@ -79,16 +74,16 @@ module Elastic
max_gram: 3, max_gram: 3,
token_chars: %w(letter digit) token_chars: %w(letter digit)
}, },
sha_tokenizer: {
type: "edgeNGram",
min_gram: 5,
max_gram: 40,
token_chars: %w(letter digit)
},
path_tokenizer: { path_tokenizer: {
type: 'path_hierarchy', type: 'path_hierarchy',
reverse: true reverse: true
} }
},
normalizer: {
sha_normalizer: {
type: "custom",
filter: ["lowercase"]
}
} }
} }
} }
...@@ -198,16 +193,16 @@ module Elastic ...@@ -198,16 +193,16 @@ module Elastic
indexes :blob do indexes :blob do
indexes :type, type: :keyword indexes :type, type: :keyword
indexes :id, type: :text, indexes :id, type: :keyword,
index_options: 'docs', index_options: 'docs',
analyzer: :sha_analyzer normalizer: :sha_normalizer
indexes :rid, type: :keyword indexes :rid, type: :keyword
indexes :oid, type: :text, indexes :oid, type: :keyword,
index_options: 'docs', index_options: 'docs',
analyzer: :sha_analyzer normalizer: :sha_normalizer
indexes :commit_sha, type: :text, indexes :commit_sha, type: :keyword,
index_options: 'docs', index_options: 'docs',
analyzer: :sha_analyzer normalizer: :sha_normalizer
indexes :path, type: :text, indexes :path, type: :text,
analyzer: :path_analyzer analyzer: :path_analyzer
indexes :file_name, type: :text, indexes :file_name, type: :text,
...@@ -223,13 +218,13 @@ module Elastic ...@@ -223,13 +218,13 @@ module Elastic
indexes :commit do indexes :commit do
indexes :type, type: :keyword indexes :type, type: :keyword
indexes :id, type: :text, indexes :id, type: :keyword,
index_options: 'docs', index_options: 'docs',
analyzer: :sha_analyzer normalizer: :sha_normalizer
indexes :rid, type: :keyword indexes :rid, type: :keyword
indexes :sha, type: :text, indexes :sha, type: :keyword,
index_options: 'docs', index_options: 'docs',
analyzer: :sha_analyzer normalizer: :sha_normalizer
indexes :author do indexes :author do
indexes :name, type: :text, index_options: 'docs' indexes :name, type: :text, index_options: 'docs'
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
module Elastic module Elastic
module Latest module Latest
module GitClassProxy module GitClassProxy
SHA_REGEX = /\A[0-9a-f]{5,40}\z/i.freeze
def elastic_search(query, type: :all, page: 1, per: 20, options: {}) def elastic_search(query, type: :all, page: 1, per: 20, options: {})
results = { blobs: [], commits: [] } results = { blobs: [], commits: [] }
...@@ -41,13 +43,15 @@ module Elastic ...@@ -41,13 +43,15 @@ module Elastic
fields = %w(message^10 sha^5 author.name^2 author.email^2 committer.name committer.email).map {|i| "commit.#{i}"} fields = %w(message^10 sha^5 author.name^2 author.email^2 committer.name committer.email).map {|i| "commit.#{i}"}
query_with_prefix = query.split(/\s+/).map { |s| s.gsub(SHA_REGEX) { |sha| "#{sha}*" } }.join(' ')
query_hash = { query_hash = {
query: { query: {
bool: { bool: {
must: { must: {
simple_query_string: { simple_query_string: {
fields: fields, fields: fields,
query: query, query: query_with_prefix,
default_operator: :and default_operator: :and
} }
}, },
......
...@@ -22,6 +22,13 @@ describe Repository, :elastic do ...@@ -22,6 +22,13 @@ describe Repository, :elastic do
expect(project.repository.elastic_search('def popen')[:blobs][:total_count]).to eq(1) expect(project.repository.elastic_search('def popen')[:blobs][:total_count]).to eq(1)
expect(project.repository.elastic_search('def | popen')[:blobs][:total_count] > 1).to be_truthy expect(project.repository.elastic_search('def | popen')[:blobs][:total_count] > 1).to be_truthy
expect(project.repository.elastic_search('initial')[:commits][:total_count]).to eq(1) expect(project.repository.elastic_search('initial')[:commits][:total_count]).to eq(1)
root_ref = project.repository.root_ref_sha.upcase
expect(project.repository.elastic_search(root_ref)[:commits][:total_count]).to eq(1)
partial_ref = root_ref[0...5]
expect(project.repository.elastic_search(partial_ref)[:commits][:total_count]).to eq(1)
expect(project.repository.elastic_search(partial_ref + '*')[:commits][:total_count]).to eq(1)
end end
it 'can filter blobs' do it 'can filter blobs' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment