Commit e9fcae37 authored by Sean McGivern's avatar Sean McGivern

Merge branch 'improve_code_search_for_camel_case' into 'master'

[Elasticsearch] Improve code search for camel case

Closes #2003

See merge request !2054
parents 40588ddd c5aac229
---
title: "[Elasticsearch] Improve code search for camel case"
merge_request:
author:
......@@ -264,6 +264,22 @@ sudo -u git -H bundle exec rake gitlab:check RAILS_ENV=production
If all items are green, then congratulations, the upgrade is complete!
### 13. Elasticsearch index update (if you currently use Elasticsearch)
In 9.3 release we changed the index mapping to improve partial word matching. Please re-create your index by using one of two ways listed below:
1. Re-create the index. The following command is acceptable for not very big GitLab instances (storage size no more than few gigabytes).
```
# Omnibus installations
sudo gitlab-rake gitlab:elastic:index
# Installations from source
bundle exec rake gitlab:elastic:index
```
1. For very big GitLab instances we recommend following [Add GitLab's data to the Elasticsearch index](../integration/elasticsearch.md#add-gitlabs-data-to-the-elasticsearch-index).
## Things went south? Revert to previous version (9.2)
### 1. Revert the code to the previous version
......
......@@ -33,7 +33,7 @@ module Elasticsearch
code_analyzer: {
type: 'custom',
tokenizer: 'standard',
filter: %w(code lowercase asciifolding),
filter: %w(code edgeNGram_filter lowercase asciifolding),
char_filter: ["code_mapping"]
},
code_search_analyzer: {
......@@ -61,8 +61,14 @@ module Elasticsearch
preserve_original: 1,
patterns: [
"(\\p{Ll}+|\\p{Lu}\\p{Ll}+|\\p{Lu}+)",
"(\\d+)"
"(\\d+)",
"(?=([\\p{Lu}]+[\\p{L}]+))"
]
},
edgeNGram_filter: {
type: 'edgeNGram',
min_gram: 2,
max_gram: 40
}
},
char_filter: {
......
......@@ -378,8 +378,8 @@ describe Gitlab::Elastic::SearchResults, lib: true do
results = described_class.new(user, 'def', limit_project_ids)
blobs = results.objects('blobs')
expect(blobs.first["_source"]["blob"]["content"]).to include("def")
expect(results.blobs_count).to eq 5
expect(blobs.first['_source']['blob']['content']).to include('def')
expect(results.blobs_count).to eq 7
end
it 'finds blobs from public projects only' do
......@@ -388,10 +388,11 @@ describe Gitlab::Elastic::SearchResults, lib: true do
Gitlab::Elastic::Helper.refresh_index
results = described_class.new(user, 'def', [project_1.id])
expect(results.blobs_count).to eq 5
expect(results.blobs_count).to eq 7
results = described_class.new(user, 'def', [project_1.id, project_2.id])
expect(results.blobs_count).to eq 10
expect(results.blobs_count).to eq 14
end
it 'returns zero when blobs are not found' do
......@@ -399,6 +400,45 @@ describe Gitlab::Elastic::SearchResults, lib: true do
expect(results.blobs_count).to eq 0
end
context 'Searches CamelCased methods' do
before do
project_1.repository.create_file(
user,
'test.txt',
' function writeStringToFile(){} ',
message: 'added test file',
branch_name: 'master')
project_1.repository.index_blobs
Gitlab::Elastic::Helper.refresh_index
end
def search_for(term)
blobs = described_class.new(user, term, [project_1.id]).objects('blobs')
blobs.map do |blob|
blob['_source']['blob']['path']
end
end
it 'find by first word' do
expect(search_for('write')).to include('test.txt')
end
it 'find by first two words' do
expect(search_for('writeString')).to include('test.txt')
end
it 'find by last two words' do
expect(search_for('ToFile')).to include('test.txt')
end
it 'find by exact match' do
expect(search_for('writeStringToFile')).to include('test.txt')
end
end
end
describe 'Wikis' do
......@@ -415,7 +455,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
it 'finds wiki blobs' do
blobs = results.objects('wiki_blobs')
expect(blobs.first["_source"]["blob"]["content"]).to include("term")
expect(blobs.first['_source']['blob']['content']).to include("term")
expect(results.wiki_blobs_count).to eq 1
end
......@@ -423,7 +463,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
project_1.add_guest(user)
blobs = results.objects('wiki_blobs')
expect(blobs.first["_source"]["blob"]["content"]).to include("term")
expect(blobs.first['_source']['blob']['content']).to include("term")
expect(results.wiki_blobs_count).to eq 1
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment