Commit 2ead279c authored by Nick Thomas's avatar Nick Thomas

Merge branch 'elastic-test-repos' into 'master'

Add elastic test rake task

Closes #7822

See merge request gitlab-org/gitlab-ee!8240
parents b794edfa 7832cd8e
require './spec/support/sidekiq'
# rubocop:disable Rails/Output
Sidekiq::Testing.inline! do
Gitlab::Seeder.quiet do
project_urls = [
'https://gitlab.com/gitlab-org/gitlab-test.git',
'https://gitlab.com/gitlab-org/gitlab-shell.git',
'https://gitlab.com/gnuwget/wget2.git',
'https://gitlab.com/Commit451/LabCoat.git',
'https://github.com/jashkenas/underscore.git',
'https://github.com/flightjs/flight.git',
'https://github.com/twitter/typeahead.js.git',
'https://github.com/h5bp/html5-boilerplate.git',
'https://github.com/google/material-design-lite.git',
'https://github.com/jlevy/the-art-of-command-line.git',
'https://github.com/FreeCodeCamp/freecodecamp.git',
'https://github.com/google/deepdream.git',
'https://github.com/jtleek/datasharing.git',
'https://github.com/WebAssembly/design.git',
'https://github.com/airbnb/javascript.git',
'https://github.com/tessalt/echo-chamber-js.git',
'https://github.com/atom/atom.git',
'https://github.com/mattermost/mattermost-server.git',
'https://github.com/purifycss/purifycss.git',
'https://github.com/facebook/nuclide.git',
'https://github.com/wbkd/awesome-d3.git',
'https://github.com/kilimchoi/engineering-blogs.git',
'https://github.com/gilbarbara/logos.git',
'https://github.com/reduxjs/redux.git',
'https://github.com/awslabs/s2n.git',
'https://github.com/arkency/reactjs_koans.git',
'https://github.com/twbs/bootstrap.git',
'https://github.com/chjj/ttystudio.git',
'https://github.com/MostlyAdequate/mostly-adequate-guide.git',
'https://github.com/octocat/Spoon-Knife.git',
'https://github.com/opencontainers/runc.git',
'https://github.com/googlesamples/android-topeka.git'
]
# You can specify how many projects you need during seed execution
size = ENV['SIZE'].present? ? ENV['SIZE'].to_i : 8
project_urls.first(size).each_with_index do |url, i|
group_path, project_path = url.split('/')[-2..-1]
group = Group.find_by(path: group_path)
unless group
group = Group.new(
name: group_path.titleize,
path: group_path
)
group.description = FFaker::Lorem.sentence
group.save
group.add_owner(User.first)
end
Gitlab::Seeder.without_gitaly_timeout do
project_urls = %w[
https://gitlab.com/gitlab-org/gitlab-test.git
https://gitlab.com/gitlab-org/gitlab-shell.git
https://gitlab.com/gnuwget/wget2.git
https://gitlab.com/Commit451/LabCoat.git
https://github.com/jashkenas/underscore.git
https://github.com/flightjs/flight.git
https://github.com/twitter/typeahead.js.git
https://github.com/h5bp/html5-boilerplate.git
https://github.com/google/material-design-lite.git
https://github.com/jlevy/the-art-of-command-line.git
https://github.com/FreeCodeCamp/freecodecamp.git
https://github.com/google/deepdream.git
https://github.com/jtleek/datasharing.git
https://github.com/WebAssembly/design.git
https://github.com/airbnb/javascript.git
https://github.com/tessalt/echo-chamber-js.git
https://github.com/atom/atom.git
https://github.com/mattermost/mattermost-server.git
https://github.com/purifycss/purifycss.git
https://github.com/facebook/nuclide.git
https://github.com/wbkd/awesome-d3.git
https://github.com/kilimchoi/engineering-blogs.git
https://github.com/gilbarbara/logos.git
https://github.com/reduxjs/redux.git
https://github.com/awslabs/s2n.git
https://github.com/arkency/reactjs_koans.git
https://github.com/twbs/bootstrap.git
https://github.com/chjj/ttystudio.git
https://github.com/MostlyAdequate/mostly-adequate-guide.git
https://github.com/octocat/Spoon-Knife.git
https://github.com/opencontainers/runc.git
https://github.com/googlesamples/android-topeka.git
]
project_path.gsub!(".git", "")
large_project_urls = %w[
https://github.com/torvalds/linux.git
https://gitlab.gnome.org/GNOME/gimp.git
https://gitlab.gnome.org/GNOME/gnome-mud.git
https://gitlab.com/fdroid/fdroidclient.git
https://gitlab.com/inkscape/inkscape.git
https://github.com/gnachman/iTerm2.git
]
params = {
import_url: url,
namespace_id: group.id,
name: project_path.titleize,
description: FFaker::Lorem.sentence,
visibility_level: Gitlab::VisibilityLevel.values.sample,
skip_disk_validation: true
}
def create_project(url, force_latest_storage: false)
group_path, project_path = url.split('/')[-2..-1]
if i % 2 == 0
params[:storage_version] = Project::LATEST_STORAGE_VERSION
end
group = Group.find_by(path: group_path)
unless group
group = Group.new(
name: group_path.titleize,
path: group_path
)
group.description = FFaker::Lorem.sentence
group.save
group.add_owner(User.first)
end
project_path.gsub!(".git", "")
params = {
import_url: url,
namespace_id: group.id,
name: project_path.titleize,
description: FFaker::Lorem.sentence,
visibility_level: Gitlab::VisibilityLevel.values.sample,
skip_disk_validation: true
}
if force_latest_storage
params[:storage_version] = Project::LATEST_STORAGE_VERSION
end
project = nil
project = nil
Sidekiq::Worker.skipping_transaction_check do
project = Projects::CreateService.new(User.first, params).execute
Sidekiq::Worker.skipping_transaction_check do
project = Projects::CreateService.new(User.first, params).execute
# Seed-Fu runs this entire fixture in a transaction, so the `after_commit`
# hook won't run until after the fixture is loaded. That is too late
# since the Sidekiq::Testing block has already exited. Force clearing
# the `after_commit` queue to ensure the job is run now.
project.send(:_run_after_commit_queue)
project.import_state.send(:_run_after_commit_queue)
end
# Seed-Fu runs this entire fixture in a transaction, so the `after_commit`
# hook won't run until after the fixture is loaded. That is too late
# since the Sidekiq::Testing block has already exited. Force clearing
# the `after_commit` queue to ensure the job is run now.
project.send(:_run_after_commit_queue)
project.import_state.send(:_run_after_commit_queue)
if project.valid? && project.valid_repo?
print '.'
else
puts project.errors.full_messages
print 'F'
end
end
if project.valid? && project.valid_repo?
print '.'
else
puts project.errors.full_messages
print 'F'
# You can specify how many projects you need during seed execution
size = ENV['SIZE'].present? ? ENV['SIZE'].to_i : 8
project_urls.first(size).each_with_index do |url, i|
create_project(url, force_latest_storage: i.even?)
end
if ENV['LARGE_PROJECTS'].present?
large_project_urls.each(&method(:create_project))
if ENV['FORK'].present?
puts "\nGenerating forks"
project_name = ENV['FORK'] == 'true' ? 'torvalds/linux' : ENV['FORK']
project = Project.find_by_full_path(project_name)
User.offset(1).first(5).each do |user|
new_project = Projects::ForkService.new(project, user).execute
if new_project.valid? && (new_project.valid_repo? || new_project.import_state.scheduled?)
print '.'
else
new_project.errors.full_messages.each do |error|
puts "#{new_project.full_path}: #{error}"
end
print 'F'
end
end
end
end
end
end
......
......@@ -38,6 +38,13 @@ this adds `gitlab-elasticsearch-indexer` to `$GOPATH/bin`, please make sure that
**note:** `make` will not recompile the executable unless you do `make clean` beforehand
## Helpful rake tasks
- `gitlab:elastic:test:index_size`: Tells you how much space the current index is using, as well as how many documents are in the index.
- `gitlab:elastic:test:index_size_change`: Outputs index size, reindexes, and outputs index size again. Useful when testing improvements to indexing size.
Additionally, if you need large repos or multiple forks for testing, please consider [following these instructions](https://docs.gitlab.com/ee/development/rake_tasks.html#extra-project-seed-options)
## How does it work?
The ElasticSearch integration depends on an external indexer. We ship a [ruby indexer](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/bin/elastic_repo_indexer) by default but are also working on an [indexer written in Go](https://gitlab.com/gitlab-org/gitlab-elasticsearch-indexer). The user must trigger the initial indexing via a rake task, but after this is done GitLab itself will trigger reindexing when required via `after_` callbacks on create, update, and destroy that are inherited from [/ee/app/models/concerns/elastic/application_search.rb](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/app/models/concerns/elastic/application_search.rb).
......@@ -156,4 +163,4 @@ cluster.routing.allocation.disk.watermark.high: 10gb
Restart ElasticSearch, and the `read_only_allow_delete` will clear on it's own.
_from "Disk-based Shard Allocation | Elasticsearch Reference" [5.6](https://www.elastic.co/guide/en/elasticsearch/reference/5.6/disk-allocator.html#disk-allocator) and [6.x](https://www.elastic.co/guide/en/elasticsearch/reference/6.x/disk-allocator.html)_
\ No newline at end of file
_from "Disk-based Shard Allocation | Elasticsearch Reference" [5.6](https://www.elastic.co/guide/en/elasticsearch/reference/5.6/disk-allocator.html#disk-allocator) and [6.x](https://www.elastic.co/guide/en/elasticsearch/reference/6.x/disk-allocator.html)_
......@@ -38,6 +38,14 @@ Note that since you can't see the questions from stdout, you might just want
to `echo 'yes'` to keep it running. It would still print the errors on stderr
so no worries about missing errors.
### Extra Project seed options
There are a few environment flags you can pass to change how projects are seeded
- `SIZE`: defaults to `8`, max: `32`. Amount of projects to create.
- `LARGE_PROJECTS`: defaults to false. If set will clone 6 large projects to help with testing.
- `FORK`: defaults to false. If set to `true` will fork `torvalds/linux` five times. Can also be set to an existing project full_path and it will fork that instead.
### Notes for MySQL
Since the seeds would contain various UTF-8 characters, such as emojis or so,
......
......@@ -49,6 +49,10 @@ module Gitlab
def self.refresh_index
Project.__elasticsearch__.refresh_index!
end
def self.index_size
Project.__elasticsearch__.client.indices.stats['indices'][Project.__elasticsearch__.index_name]['total']
end
end
end
end
namespace :gitlab do
namespace :elastic do
namespace :test do
desc 'GitLab | Elasticsearch | Test | Measure space taken by ES indices'
task index_size: :environment do
puts "===== Size stats for index: #{Project.__elasticsearch__.index_name} ====="
pp Gitlab::Elastic::Helper.index_size.slice(*%w(docs store))
end
desc 'GitLab | Elasticsearch | Test | Measure space taken by ES indices, reindex, and measure space taken again'
task :index_size_change do
Rake::Task["gitlab:elastic:test:index_size"].invoke
puts '===== Reindexing, please wait ====='
silence_stdout do
Rake::Task["gitlab:elastic:index"].invoke
end
# `#invoke` will only ever invoke a rake task once unless it gets reenabled and
# we can't use `#execute` because the `index_size` task depends on loading the environment
Rake::Task["gitlab:elastic:test:index_size"].reenable
Rake::Task["gitlab:elastic:test:index_size"].invoke
puts 'Done! Please ensure document count is the expected value, otherwise please check indexing is working properly.'
end
end
end
end
def silence_stdout(&_block)
old_stdout = $stdout.dup
$stdout.reopen(File::NULL)
$stdout.sync = true
yield
ensure
$stdout.reopen(old_stdout)
old_stdout.close
end
......@@ -26,6 +26,19 @@ module Gitlab
puts "\nOK".color(:green)
end
def self.without_gitaly_timeout
# Remove Gitaly timeout
old_timeout = Gitlab::CurrentSettings.current_application_settings.gitaly_timeout_default
Gitlab::CurrentSettings.current_application_settings.update_columns(gitaly_timeout_default: 0)
# Otherwise we still see the default value when running seed_fu
ApplicationSetting.expire
yield
ensure
Gitlab::CurrentSettings.current_application_settings.update_columns(gitaly_timeout_default: old_timeout)
ApplicationSetting.expire
end
def self.mute_notifications
NotificationService.prepend(MuteNotifications)
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment