Commit 30ae308b authored by Dmitry Gruzd's avatar Dmitry Gruzd Committed by Kerri Miller

Advanced Search: Use reverse filename index

parent ccf33f48
---
name: elastic_file_name_reverse_optimization
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/77226
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/349122
milestone: '14.7'
type: development
group: group::global search
default_enabled: false
...@@ -59,6 +59,10 @@ module Elastic ...@@ -59,6 +59,10 @@ module Elastic
type: 'custom', type: 'custom',
tokenizer: 'whitespace', tokenizer: 'whitespace',
filter: %w(lowercase asciifolding) filter: %w(lowercase asciifolding)
},
whitespace_reverse: {
tokenizer: 'whitespace',
filter: %w(lowercase asciifolding reverse)
} }
}, },
filter: { filter: {
...@@ -207,7 +211,8 @@ module Elastic ...@@ -207,7 +211,8 @@ module Elastic
analyzer: :path_analyzer analyzer: :path_analyzer
indexes :file_name, type: :text, indexes :file_name, type: :text,
analyzer: :code_analyzer, analyzer: :code_analyzer,
search_analyzer: :code_search_analyzer search_analyzer: :code_search_analyzer,
fields: { reverse: { type: :text, analyzer: :whitespace_reverse } }
indexes :content, type: :text, indexes :content, type: :text,
index_options: 'positions', index_options: 'positions',
analyzer: :code_analyzer, analyzer: :code_analyzer,
......
...@@ -148,7 +148,13 @@ module Elastic ...@@ -148,7 +148,13 @@ module Elastic
query = ::Gitlab::Search::Query.new(query) do query = ::Gitlab::Search::Query.new(query) do
filter :filename, field: :file_name filter :filename, field: :file_name
filter :path, parser: ->(input) { "*#{input.downcase}*" } filter :path, parser: ->(input) { "*#{input.downcase}*" }
filter :extension, field: :path, parser: ->(input) { '*.' + input.downcase }
if Feature.enabled?(:elastic_file_name_reverse_optimization, default_enabled: :yaml)
filter :extension, field: 'file_name.reverse', type: :prefix, parser: ->(input) { input.downcase.reverse + '.' }
else
filter :extension, field: :path, parser: ->(input) { '*.' + input.downcase }
end
filter :blob, field: :oid filter :blob, field: :oid
end end
......
...@@ -46,42 +46,59 @@ RSpec.describe Repository, :elastic do ...@@ -46,42 +46,59 @@ RSpec.describe Repository, :elastic do
'commit:match:search_terms') 'commit:match:search_terms')
end end
it 'can filter blobs' do context 'filtering' do
project = create :project, :repository let(:project) { create :project, :repository }
index!(project)
before do
index!(project)
end
# Finds custom-highlighting/test.gitlab-custom it 'can filter blobs' do
expect(project.repository.elastic_search('def | popen filename:test')[:blobs][:total_count]).to eq(1) # Finds custom-highlighting/test.gitlab-custom
expect(project.repository.elastic_search('def | popen filename:test')[:blobs][:total_count]).to eq(1)
# Should not find anything, since filename doesn't match on path # Should not find anything, since filename doesn't match on path
expect(project.repository.elastic_search('def | popen filename:files')[:blobs][:total_count]).to eq(0) expect(project.repository.elastic_search('def | popen filename:files')[:blobs][:total_count]).to eq(0)
# Finds files/ruby/popen.rb, files/markdown/ruby-style-guide.md, files/ruby/regex.rb, files/ruby/version_info.rb # Finds files/ruby/popen.rb, files/markdown/ruby-style-guide.md, files/ruby/regex.rb, files/ruby/version_info.rb
expect(project.repository.elastic_search('def | popen path:ruby')[:blobs][:total_count]).to eq(4) expect(project.repository.elastic_search('def | popen path:ruby')[:blobs][:total_count]).to eq(4)
# Finds files/markdown/ruby-style-guide.md # Finds files/markdown/ruby-style-guide.md
expect(project.repository.elastic_search('def | popen extension:md')[:blobs][:total_count]).to eq(1) expect(project.repository.elastic_search('def | popen extension:md')[:blobs][:total_count]).to eq(1)
# Finds files/ruby/popen.rb # Finds files/ruby/popen.rb
expect(project.repository.elastic_search('* blob:7e3e39ebb9b2bf433b4ad17313770fbe4051649c')[:blobs][:total_count]).to eq(1) expect(project.repository.elastic_search('* blob:7e3e39ebb9b2bf433b4ad17313770fbe4051649c')[:blobs][:total_count]).to eq(1)
# filename filter without search term # filename filter without search term
count = project.repository.ls_files('master').count { |path| path.split('/')[-1].include?('test') } count = project.repository.ls_files('master').count { |path| path.split('/')[-1].include?('test') }
expect(project.repository.elastic_search('filename:test')[:blobs][:total_count]).to eq(count) expect(project.repository.elastic_search('filename:test')[:blobs][:total_count]).to eq(count)
expect(project.repository.elastic_search('filename:test')[:blobs][:total_count]).to be > 0 expect(project.repository.elastic_search('filename:test')[:blobs][:total_count]).to be > 0
# extension filter without search term # extension filter without search term
count = project.repository.ls_files('master').count { |path| path.split('/')[-1].split('.')[-1].include?('md') } count = project.repository.ls_files('master').count { |path| path.split('/')[-1].split('.')[-1].include?('md') }
expect(project.repository.elastic_search('extension:md')[:blobs][:total_count]).to eq(count) expect(project.repository.elastic_search('extension:md')[:blobs][:total_count]).to eq(count)
expect(project.repository.elastic_search('extension:md')[:blobs][:total_count]).to be > 0 expect(project.repository.elastic_search('extension:md')[:blobs][:total_count]).to be > 0
# path filter without search term # path filter without search term
count = project.repository.ls_files('master').count { |path| path.include?('ruby') } count = project.repository.ls_files('master').count { |path| path.include?('ruby') }
expect(project.repository.elastic_search('path:ruby')[:blobs][:total_count]).to eq(count) expect(project.repository.elastic_search('path:ruby')[:blobs][:total_count]).to eq(count)
expect(project.repository.elastic_search('path:ruby')[:blobs][:total_count]).to be > 0 expect(project.repository.elastic_search('path:ruby')[:blobs][:total_count]).to be > 0
# blob filter without search term # blob filter without search term
expect(project.repository.elastic_search('blob:7e3e39ebb9b2bf433b4ad17313770fbe4051649c')[:blobs][:total_count]).to eq(1) expect(project.repository.elastic_search('blob:7e3e39ebb9b2bf433b4ad17313770fbe4051649c')[:blobs][:total_count]).to eq(1)
end
it 'filters by extension when optimization is disabled' do
stub_feature_flags(elastic_file_name_reverse_optimization: false)
# Finds files/markdown/ruby-style-guide.md
expect(project.repository.elastic_search('def | popen extension:md')[:blobs][:total_count]).to eq(1)
# extension filter without search term
count = project.repository.ls_files('master').count { |path| path.split('/')[-1].split('.')[-1].include?('md') }
expect(project.repository.elastic_search('extension:md')[:blobs][:total_count]).to eq(count)
expect(project.repository.elastic_search('extension:md')[:blobs][:total_count]).to be > 0
end
end end
def search_and_check!(on, query, type:, per: 1000) def search_and_check!(on, query, type:, per: 1000)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment