Commit 2ead279c authored by Nick Thomas's avatar Nick Thomas

Merge branch 'elastic-test-repos' into 'master'

Add elastic test rake task

Closes #7822

See merge request gitlab-org/gitlab-ee!8240
parents b794edfa 7832cd8e
require './spec/support/sidekiq' require './spec/support/sidekiq'
# rubocop:disable Rails/Output
Sidekiq::Testing.inline! do Sidekiq::Testing.inline! do
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
project_urls = [ Gitlab::Seeder.without_gitaly_timeout do
'https://gitlab.com/gitlab-org/gitlab-test.git', project_urls = %w[
'https://gitlab.com/gitlab-org/gitlab-shell.git', https://gitlab.com/gitlab-org/gitlab-test.git
'https://gitlab.com/gnuwget/wget2.git', https://gitlab.com/gitlab-org/gitlab-shell.git
'https://gitlab.com/Commit451/LabCoat.git', https://gitlab.com/gnuwget/wget2.git
'https://github.com/jashkenas/underscore.git', https://gitlab.com/Commit451/LabCoat.git
'https://github.com/flightjs/flight.git', https://github.com/jashkenas/underscore.git
'https://github.com/twitter/typeahead.js.git', https://github.com/flightjs/flight.git
'https://github.com/h5bp/html5-boilerplate.git', https://github.com/twitter/typeahead.js.git
'https://github.com/google/material-design-lite.git', https://github.com/h5bp/html5-boilerplate.git
'https://github.com/jlevy/the-art-of-command-line.git', https://github.com/google/material-design-lite.git
'https://github.com/FreeCodeCamp/freecodecamp.git', https://github.com/jlevy/the-art-of-command-line.git
'https://github.com/google/deepdream.git', https://github.com/FreeCodeCamp/freecodecamp.git
'https://github.com/jtleek/datasharing.git', https://github.com/google/deepdream.git
'https://github.com/WebAssembly/design.git', https://github.com/jtleek/datasharing.git
'https://github.com/airbnb/javascript.git', https://github.com/WebAssembly/design.git
'https://github.com/tessalt/echo-chamber-js.git', https://github.com/airbnb/javascript.git
'https://github.com/atom/atom.git', https://github.com/tessalt/echo-chamber-js.git
'https://github.com/mattermost/mattermost-server.git', https://github.com/atom/atom.git
'https://github.com/purifycss/purifycss.git', https://github.com/mattermost/mattermost-server.git
'https://github.com/facebook/nuclide.git', https://github.com/purifycss/purifycss.git
'https://github.com/wbkd/awesome-d3.git', https://github.com/facebook/nuclide.git
'https://github.com/kilimchoi/engineering-blogs.git', https://github.com/wbkd/awesome-d3.git
'https://github.com/gilbarbara/logos.git', https://github.com/kilimchoi/engineering-blogs.git
'https://github.com/reduxjs/redux.git', https://github.com/gilbarbara/logos.git
'https://github.com/awslabs/s2n.git', https://github.com/reduxjs/redux.git
'https://github.com/arkency/reactjs_koans.git', https://github.com/awslabs/s2n.git
'https://github.com/twbs/bootstrap.git', https://github.com/arkency/reactjs_koans.git
'https://github.com/chjj/ttystudio.git', https://github.com/twbs/bootstrap.git
'https://github.com/MostlyAdequate/mostly-adequate-guide.git', https://github.com/chjj/ttystudio.git
'https://github.com/octocat/Spoon-Knife.git', https://github.com/MostlyAdequate/mostly-adequate-guide.git
'https://github.com/opencontainers/runc.git', https://github.com/octocat/Spoon-Knife.git
'https://github.com/googlesamples/android-topeka.git' https://github.com/opencontainers/runc.git
] https://github.com/googlesamples/android-topeka.git
]
# You can specify how many projects you need during seed execution
size = ENV['SIZE'].present? ? ENV['SIZE'].to_i : 8
project_urls.first(size).each_with_index do |url, i|
group_path, project_path = url.split('/')[-2..-1]
group = Group.find_by(path: group_path)
unless group
group = Group.new(
name: group_path.titleize,
path: group_path
)
group.description = FFaker::Lorem.sentence
group.save
group.add_owner(User.first)
end
project_path.gsub!(".git", "") large_project_urls = %w[
https://github.com/torvalds/linux.git
https://gitlab.gnome.org/GNOME/gimp.git
https://gitlab.gnome.org/GNOME/gnome-mud.git
https://gitlab.com/fdroid/fdroidclient.git
https://gitlab.com/inkscape/inkscape.git
https://github.com/gnachman/iTerm2.git
]
params = { def create_project(url, force_latest_storage: false)
import_url: url, group_path, project_path = url.split('/')[-2..-1]
namespace_id: group.id,
name: project_path.titleize,
description: FFaker::Lorem.sentence,
visibility_level: Gitlab::VisibilityLevel.values.sample,
skip_disk_validation: true
}
if i % 2 == 0 group = Group.find_by(path: group_path)
params[:storage_version] = Project::LATEST_STORAGE_VERSION
end unless group
group = Group.new(
name: group_path.titleize,
path: group_path
)
group.description = FFaker::Lorem.sentence
group.save
group.add_owner(User.first)
end
project_path.gsub!(".git", "")
params = {
import_url: url,
namespace_id: group.id,
name: project_path.titleize,
description: FFaker::Lorem.sentence,
visibility_level: Gitlab::VisibilityLevel.values.sample,
skip_disk_validation: true
}
if force_latest_storage
params[:storage_version] = Project::LATEST_STORAGE_VERSION
end
project = nil
project = nil Sidekiq::Worker.skipping_transaction_check do
project = Projects::CreateService.new(User.first, params).execute
Sidekiq::Worker.skipping_transaction_check do # Seed-Fu runs this entire fixture in a transaction, so the `after_commit`
project = Projects::CreateService.new(User.first, params).execute # hook won't run until after the fixture is loaded. That is too late
# since the Sidekiq::Testing block has already exited. Force clearing
# the `after_commit` queue to ensure the job is run now.
project.send(:_run_after_commit_queue)
project.import_state.send(:_run_after_commit_queue)
end
# Seed-Fu runs this entire fixture in a transaction, so the `after_commit` if project.valid? && project.valid_repo?
# hook won't run until after the fixture is loaded. That is too late print '.'
# since the Sidekiq::Testing block has already exited. Force clearing else
# the `after_commit` queue to ensure the job is run now. puts project.errors.full_messages
project.send(:_run_after_commit_queue) print 'F'
project.import_state.send(:_run_after_commit_queue) end
end end
if project.valid? && project.valid_repo? # You can specify how many projects you need during seed execution
print '.' size = ENV['SIZE'].present? ? ENV['SIZE'].to_i : 8
else
puts project.errors.full_messages project_urls.first(size).each_with_index do |url, i|
print 'F' create_project(url, force_latest_storage: i.even?)
end
if ENV['LARGE_PROJECTS'].present?
large_project_urls.each(&method(:create_project))
if ENV['FORK'].present?
puts "\nGenerating forks"
project_name = ENV['FORK'] == 'true' ? 'torvalds/linux' : ENV['FORK']
project = Project.find_by_full_path(project_name)
User.offset(1).first(5).each do |user|
new_project = Projects::ForkService.new(project, user).execute
if new_project.valid? && (new_project.valid_repo? || new_project.import_state.scheduled?)
print '.'
else
new_project.errors.full_messages.each do |error|
puts "#{new_project.full_path}: #{error}"
end
print 'F'
end
end
end
end end
end end
end end
......
...@@ -38,6 +38,13 @@ this adds `gitlab-elasticsearch-indexer` to `$GOPATH/bin`, please make sure that ...@@ -38,6 +38,13 @@ this adds `gitlab-elasticsearch-indexer` to `$GOPATH/bin`, please make sure that
**note:** `make` will not recompile the executable unless you do `make clean` beforehand **note:** `make` will not recompile the executable unless you do `make clean` beforehand
## Helpful rake tasks
- `gitlab:elastic:test:index_size`: Tells you how much space the current index is using, as well as how many documents are in the index.
- `gitlab:elastic:test:index_size_change`: Outputs index size, reindexes, and outputs index size again. Useful when testing improvements to indexing size.
Additionally, if you need large repos or multiple forks for testing, please consider [following these instructions](https://docs.gitlab.com/ee/development/rake_tasks.html#extra-project-seed-options)
## How does it work? ## How does it work?
The ElasticSearch integration depends on an external indexer. We ship a [ruby indexer](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/bin/elastic_repo_indexer) by default but are also working on an [indexer written in Go](https://gitlab.com/gitlab-org/gitlab-elasticsearch-indexer). The user must trigger the initial indexing via a rake task, but after this is done GitLab itself will trigger reindexing when required via `after_` callbacks on create, update, and destroy that are inherited from [/ee/app/models/concerns/elastic/application_search.rb](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/app/models/concerns/elastic/application_search.rb). The ElasticSearch integration depends on an external indexer. We ship a [ruby indexer](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/bin/elastic_repo_indexer) by default but are also working on an [indexer written in Go](https://gitlab.com/gitlab-org/gitlab-elasticsearch-indexer). The user must trigger the initial indexing via a rake task, but after this is done GitLab itself will trigger reindexing when required via `after_` callbacks on create, update, and destroy that are inherited from [/ee/app/models/concerns/elastic/application_search.rb](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/app/models/concerns/elastic/application_search.rb).
...@@ -156,4 +163,4 @@ cluster.routing.allocation.disk.watermark.high: 10gb ...@@ -156,4 +163,4 @@ cluster.routing.allocation.disk.watermark.high: 10gb
Restart ElasticSearch, and the `read_only_allow_delete` will clear on it's own. Restart ElasticSearch, and the `read_only_allow_delete` will clear on it's own.
_from "Disk-based Shard Allocation | Elasticsearch Reference" [5.6](https://www.elastic.co/guide/en/elasticsearch/reference/5.6/disk-allocator.html#disk-allocator) and [6.x](https://www.elastic.co/guide/en/elasticsearch/reference/6.x/disk-allocator.html)_ _from "Disk-based Shard Allocation | Elasticsearch Reference" [5.6](https://www.elastic.co/guide/en/elasticsearch/reference/5.6/disk-allocator.html#disk-allocator) and [6.x](https://www.elastic.co/guide/en/elasticsearch/reference/6.x/disk-allocator.html)_
\ No newline at end of file
...@@ -38,6 +38,14 @@ Note that since you can't see the questions from stdout, you might just want ...@@ -38,6 +38,14 @@ Note that since you can't see the questions from stdout, you might just want
to `echo 'yes'` to keep it running. It would still print the errors on stderr to `echo 'yes'` to keep it running. It would still print the errors on stderr
so no worries about missing errors. so no worries about missing errors.
### Extra Project seed options
There are a few environment flags you can pass to change how projects are seeded
- `SIZE`: defaults to `8`, max: `32`. Amount of projects to create.
- `LARGE_PROJECTS`: defaults to false. If set will clone 6 large projects to help with testing.
- `FORK`: defaults to false. If set to `true` will fork `torvalds/linux` five times. Can also be set to an existing project full_path and it will fork that instead.
### Notes for MySQL ### Notes for MySQL
Since the seeds would contain various UTF-8 characters, such as emojis or so, Since the seeds would contain various UTF-8 characters, such as emojis or so,
......
...@@ -49,6 +49,10 @@ module Gitlab ...@@ -49,6 +49,10 @@ module Gitlab
def self.refresh_index def self.refresh_index
Project.__elasticsearch__.refresh_index! Project.__elasticsearch__.refresh_index!
end end
def self.index_size
Project.__elasticsearch__.client.indices.stats['indices'][Project.__elasticsearch__.index_name]['total']
end
end end
end end
end end
namespace :gitlab do
namespace :elastic do
namespace :test do
desc 'GitLab | Elasticsearch | Test | Measure space taken by ES indices'
task index_size: :environment do
puts "===== Size stats for index: #{Project.__elasticsearch__.index_name} ====="
pp Gitlab::Elastic::Helper.index_size.slice(*%w(docs store))
end
desc 'GitLab | Elasticsearch | Test | Measure space taken by ES indices, reindex, and measure space taken again'
task :index_size_change do
Rake::Task["gitlab:elastic:test:index_size"].invoke
puts '===== Reindexing, please wait ====='
silence_stdout do
Rake::Task["gitlab:elastic:index"].invoke
end
# `#invoke` will only ever invoke a rake task once unless it gets reenabled and
# we can't use `#execute` because the `index_size` task depends on loading the environment
Rake::Task["gitlab:elastic:test:index_size"].reenable
Rake::Task["gitlab:elastic:test:index_size"].invoke
puts 'Done! Please ensure document count is the expected value, otherwise please check indexing is working properly.'
end
end
end
end
def silence_stdout(&_block)
old_stdout = $stdout.dup
$stdout.reopen(File::NULL)
$stdout.sync = true
yield
ensure
$stdout.reopen(old_stdout)
old_stdout.close
end
...@@ -26,6 +26,19 @@ module Gitlab ...@@ -26,6 +26,19 @@ module Gitlab
puts "\nOK".color(:green) puts "\nOK".color(:green)
end end
def self.without_gitaly_timeout
# Remove Gitaly timeout
old_timeout = Gitlab::CurrentSettings.current_application_settings.gitaly_timeout_default
Gitlab::CurrentSettings.current_application_settings.update_columns(gitaly_timeout_default: 0)
# Otherwise we still see the default value when running seed_fu
ApplicationSetting.expire
yield
ensure
Gitlab::CurrentSettings.current_application_settings.update_columns(gitaly_timeout_default: old_timeout)
ApplicationSetting.expire
end
def self.mute_notifications def self.mute_notifications
NotificationService.prepend(MuteNotifications) NotificationService.prepend(MuteNotifications)
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment