Commit 7ae15546 authored by Jan Provaznik's avatar Jan Provaznik

Merge branch '28149-improve-seed' into 'master'

Seed dev database with massive amount of Users, Projects and its relations

Closes #17211

See merge request gitlab-org/gitlab!16700
parents 4f438c5a e3dc3bfc
# frozen_string_literal: true
class Gitlab::Seeder::Users
include ActionView::Helpers::NumberHelper
RANDOM_USERS_COUNT = 20
MASS_USERS_COUNT = ENV['CI'] ? 10 : 1_000_000
MASS_INSERT_USERNAME_START = 'mass_insert_user_'
attr_reader :opts
def initialize(opts = {})
@opts = opts
end
def seed!
Sidekiq::Testing.inline! do
create_mass_users!
create_random_users!
end
end
private
def create_mass_users!
encrypted_password = Devise::Encryptor.digest(User, '12345678')
Gitlab::Seeder.with_mass_insert(MASS_USERS_COUNT, User) do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO users (username, name, email, confirmed_at, projects_limit, encrypted_password)
SELECT
'#{MASS_INSERT_USERNAME_START}' || seq,
'Seed user ' || seq,
'seed_user' || seq || '@example.com',
to_timestamp(seq),
#{MASS_USERS_COUNT},
'#{encrypted_password}'
FROM generate_series(1, #{MASS_USERS_COUNT}) AS seq
SQL
end
relation = User.where(admin: false)
Gitlab::Seeder.with_mass_insert(relation.count, Namespace) do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO namespaces (name, path, owner_id)
SELECT
username,
username,
id
FROM users WHERE NOT admin
SQL
end
end
def create_random_users!
RANDOM_USERS_COUNT.times do |i|
begin
User.create!(
username: FFaker::Internet.user_name,
name: FFaker::Name.name,
email: FFaker::Internet.email,
confirmed_at: DateTime.now,
password: '12345678'
)
print '.'
rescue ActiveRecord::RecordInvalid
print 'F'
end
end
end
end
Gitlab::Seeder.quiet do
users = Gitlab::Seeder::Users.new
users.seed!
end
require './spec/support/sidekiq'
# rubocop:disable Rails/Output
Sidekiq::Testing.inline! do
Gitlab::Seeder.quiet do
Gitlab::Seeder.without_gitaly_timeout do
project_urls = %w[
https://gitlab.com/gitlab-org/gitlab-test.git
https://gitlab.com/gitlab-org/gitlab-shell.git
https://gitlab.com/gnuwget/wget2.git
https://gitlab.com/Commit451/LabCoat.git
https://github.com/jashkenas/underscore.git
https://github.com/flightjs/flight.git
https://github.com/twitter/typeahead.js.git
https://github.com/h5bp/html5-boilerplate.git
https://github.com/google/material-design-lite.git
https://github.com/jlevy/the-art-of-command-line.git
https://github.com/FreeCodeCamp/freecodecamp.git
https://github.com/google/deepdream.git
https://github.com/jtleek/datasharing.git
https://github.com/WebAssembly/design.git
https://github.com/airbnb/javascript.git
https://github.com/tessalt/echo-chamber-js.git
https://github.com/atom/atom.git
https://github.com/mattermost/mattermost-server.git
https://github.com/purifycss/purifycss.git
https://github.com/facebook/nuclide.git
https://github.com/wbkd/awesome-d3.git
https://github.com/kilimchoi/engineering-blogs.git
https://github.com/gilbarbara/logos.git
https://github.com/reduxjs/redux.git
https://github.com/awslabs/s2n.git
https://github.com/arkency/reactjs_koans.git
https://github.com/twbs/bootstrap.git
https://github.com/chjj/ttystudio.git
https://github.com/MostlyAdequate/mostly-adequate-guide.git
https://github.com/octocat/Spoon-Knife.git
https://github.com/opencontainers/runc.git
https://github.com/googlesamples/android-topeka.git
]
large_project_urls = %w[
https://github.com/torvalds/linux.git
https://gitlab.gnome.org/GNOME/gimp.git
https://gitlab.gnome.org/GNOME/gnome-mud.git
https://gitlab.com/fdroid/fdroidclient.git
https://gitlab.com/inkscape/inkscape.git
https://github.com/gnachman/iTerm2.git
]
def create_project(url, force_latest_storage: false)
group_path, project_path = url.split('/')[-2..-1]
group = Group.find_by(path: group_path)
unless group
group = Group.new(
name: group_path.titleize,
path: group_path
)
group.description = FFaker::Lorem.sentence
group.save!
group.add_owner(User.first)
end
class Gitlab::Seeder::Projects
include ActionView::Helpers::NumberHelper
PROJECT_URLS = %w[
https://gitlab.com/gitlab-org/gitlab-test.git
https://gitlab.com/gitlab-org/gitlab-shell.git
https://gitlab.com/gnuwget/wget2.git
https://gitlab.com/Commit451/LabCoat.git
https://github.com/jashkenas/underscore.git
https://github.com/flightjs/flight.git
https://github.com/twitter/typeahead.js.git
https://github.com/h5bp/html5-boilerplate.git
https://github.com/google/material-design-lite.git
https://github.com/jlevy/the-art-of-command-line.git
https://github.com/FreeCodeCamp/freecodecamp.git
https://github.com/google/deepdream.git
https://github.com/jtleek/datasharing.git
https://github.com/WebAssembly/design.git
https://github.com/airbnb/javascript.git
https://github.com/tessalt/echo-chamber-js.git
https://github.com/atom/atom.git
https://github.com/mattermost/mattermost-server.git
https://github.com/purifycss/purifycss.git
https://github.com/facebook/nuclide.git
https://github.com/wbkd/awesome-d3.git
https://github.com/kilimchoi/engineering-blogs.git
https://github.com/gilbarbara/logos.git
https://github.com/reduxjs/redux.git
https://github.com/awslabs/s2n.git
https://github.com/arkency/reactjs_koans.git
https://github.com/twbs/bootstrap.git
https://github.com/chjj/ttystudio.git
https://github.com/MostlyAdequate/mostly-adequate-guide.git
https://github.com/octocat/Spoon-Knife.git
https://github.com/opencontainers/runc.git
https://github.com/googlesamples/android-topeka.git
]
LARGE_PROJECT_URLS = %w[
https://github.com/torvalds/linux.git
https://gitlab.gnome.org/GNOME/gimp.git
https://gitlab.gnome.org/GNOME/gnome-mud.git
https://gitlab.com/fdroid/fdroidclient.git
https://gitlab.com/inkscape/inkscape.git
https://github.com/gnachman/iTerm2.git
]
# Consider altering MASS_USERS_COUNT for less
# users with projects.
MASS_PROJECTS_COUNT_PER_USER = {
private: 3, # 3m projects +
internal: 1, # 1m projects +
public: 1 # 1m projects = 5m total
}
MASS_INSERT_NAME_START = 'mass_insert_project_'
def seed!
Sidekiq::Testing.inline! do
create_real_projects!
create_large_projects!
create_mass_projects!
end
end
project_path.gsub!(".git", "")
private
params = {
import_url: url,
namespace_id: group.id,
name: project_path.titleize,
description: FFaker::Lorem.sentence,
visibility_level: Gitlab::VisibilityLevel.values.sample,
skip_disk_validation: true
}
def create_real_projects!
# You can specify how many projects you need during seed execution
size = ENV['SIZE'].present? ? ENV['SIZE'].to_i : 8
if force_latest_storage
params[:storage_version] = Project::LATEST_STORAGE_VERSION
end
PROJECT_URLS.first(size).each_with_index do |url, i|
create_real_project!(url, force_latest_storage: i.even?)
end
end
project = nil
def create_large_projects!
return unless ENV['LARGE_PROJECTS'].present?
Sidekiq::Worker.skipping_transaction_check do
project = Projects::CreateService.new(User.first, params).execute
LARGE_PROJECT_URLS.each(&method(:create_real_project!))
# Seed-Fu runs this entire fixture in a transaction, so the `after_commit`
# hook won't run until after the fixture is loaded. That is too late
# since the Sidekiq::Testing block has already exited. Force clearing
# the `after_commit` queue to ensure the job is run now.
project.send(:_run_after_commit_queue)
project.import_state.send(:_run_after_commit_queue)
end
if ENV['FORK'].present?
puts "\nGenerating forks"
if project.valid? && project.valid_repo?
project_name = ENV['FORK'] == 'true' ? 'torvalds/linux' : ENV['FORK']
project = Project.find_by_full_path(project_name)
User.offset(1).first(5).each do |user|
new_project = ::Projects::ForkService.new(project, user).execute
if new_project.valid? && (new_project.valid_repo? || new_project.import_state.scheduled?)
print '.'
else
puts project.errors.full_messages
new_project.errors.full_messages.each do |error|
puts "#{new_project.full_path}: #{error}"
end
print 'F'
end
end
end
end
# You can specify how many projects you need during seed execution
size = ENV['SIZE'].present? ? ENV['SIZE'].to_i : 8
def create_real_project!(url, force_latest_storage: false)
group_path, project_path = url.split('/')[-2..-1]
project_urls.first(size).each_with_index do |url, i|
create_project(url, force_latest_storage: i.even?)
end
group = Group.find_by(path: group_path)
if ENV['LARGE_PROJECTS'].present?
large_project_urls.each(&method(:create_project))
unless group
group = Group.new(
name: group_path.titleize,
path: group_path
)
group.description = FFaker::Lorem.sentence
group.save!
if ENV['FORK'].present?
puts "\nGenerating forks"
group.add_owner(User.first)
end
project_name = ENV['FORK'] == 'true' ? 'torvalds/linux' : ENV['FORK']
project_path.gsub!(".git", "")
project = Project.find_by_full_path(project_name)
params = {
import_url: url,
namespace_id: group.id,
name: project_path.titleize,
description: FFaker::Lorem.sentence,
visibility_level: Gitlab::VisibilityLevel.values.sample,
skip_disk_validation: true
}
User.offset(1).first(5).each do |user|
new_project = Projects::ForkService.new(project, user).execute
if force_latest_storage
params[:storage_version] = Project::LATEST_STORAGE_VERSION
end
if new_project.valid? && (new_project.valid_repo? || new_project.import_state.scheduled?)
print '.'
else
new_project.errors.full_messages.each do |error|
puts "#{new_project.full_path}: #{error}"
end
print 'F'
end
end
end
end
project = nil
Sidekiq::Worker.skipping_transaction_check do
project = ::Projects::CreateService.new(User.first, params).execute
# Seed-Fu runs this entire fixture in a transaction, so the `after_commit`
# hook won't run until after the fixture is loaded. That is too late
# since the Sidekiq::Testing block has already exited. Force clearing
# the `after_commit` queue to ensure the job is run now.
project.send(:_run_after_commit_queue)
project.import_state.send(:_run_after_commit_queue)
end
if project.valid? && project.valid_repo?
print '.'
else
puts project.errors.full_messages
print 'F'
end
end
def create_mass_projects!
projects_per_user_count = MASS_PROJECTS_COUNT_PER_USER.values.sum
visibility_per_user = ['private'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:private) +
['internal'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:internal) +
['public'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:public)
visibility_level_per_user = visibility_per_user.map { |visibility| Gitlab::VisibilityLevel.level_value(visibility) }
visibility_per_user = visibility_per_user.join(',')
visibility_level_per_user = visibility_level_per_user.join(',')
Gitlab::Seeder.with_mass_insert(User.count * projects_per_user_count, "Projects and relations") do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO projects (name, path, creator_id, namespace_id, visibility_level, created_at, updated_at)
SELECT
'Seed project ' || seq || ' ' || ('{#{visibility_per_user}}'::text[])[seq] AS project_name,
'mass_insert_project_' || ('{#{visibility_per_user}}'::text[])[seq] || '_' || seq AS project_path,
u.id AS user_id,
n.id AS namespace_id,
('{#{visibility_level_per_user}}'::int[])[seq] AS visibility_level,
NOW() AS created_at,
NOW() AS updated_at
FROM users u
CROSS JOIN generate_series(1, #{projects_per_user_count}) AS seq
JOIN namespaces n ON n.owner_id=u.id
SQL
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO project_features (project_id, merge_requests_access_level, issues_access_level, wiki_access_level,
pages_access_level)
SELECT
id,
#{ProjectFeature::ENABLED} AS merge_requests_access_level,
#{ProjectFeature::ENABLED} AS issues_access_level,
#{ProjectFeature::ENABLED} AS wiki_access_level,
#{ProjectFeature::ENABLED} AS pages_access_level
FROM projects ON CONFLICT (project_id) DO NOTHING;
SQL
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO routes (source_id, source_type, name, path)
SELECT
p.id,
'Project',
u.name || ' / ' || p.name,
u.username || '/' || p.path
FROM projects p JOIN users u ON u.id=p.creator_id
ON CONFLICT (source_type, source_id) DO NOTHING;
SQL
end
end
end
Gitlab::Seeder.quiet do
projects = Gitlab::Seeder::Projects.new
projects.seed!
end
......@@ -43,7 +43,7 @@ Gitlab::Seeder.quiet do
end
puts "\nGenerating project labels"
Project.all.find_each do |project|
Project.not_mass_generated.find_each do |project|
Gitlab::Seeder::ProjectLabels.new(project).seed!
end
end
require './spec/support/sidekiq'
Gitlab::Seeder.quiet do
20.times do |i|
begin
User.create!(
username: FFaker::Internet.user_name,
name: FFaker::Name.name,
email: FFaker::Internet.email,
confirmed_at: DateTime.now,
password: '12345678'
)
print '.'
rescue ActiveRecord::RecordInvalid
print 'F'
end
end
5.times do |i|
begin
User.create!(
username: "user#{i}",
name: "User #{i}",
email: "user#{i}@example.com",
confirmed_at: DateTime.now,
password: '12345678'
)
print '.'
rescue ActiveRecord::RecordInvalid
print 'F'
end
end
end
......@@ -3,7 +3,7 @@ require './spec/support/sidekiq'
Sidekiq::Testing.inline! do
Gitlab::Seeder.quiet do
Group.all.each do |group|
User.all.sample(4).each do |user|
User.not_mass_generated.sample(4).each do |user|
if group.add_user(user, Gitlab::Access.values.sample).persisted?
print '.'
else
......@@ -12,8 +12,8 @@ Sidekiq::Testing.inline! do
end
end
Project.all.each do |project|
User.all.sample(4).each do |user|
Project.not_mass_generated.each do |project|
User.not_mass_generated.sample(4).each do |user|
if project.add_role(user, Gitlab::Access.sym_options.keys.sample)
print '.'
else
......
require './spec/support/sidekiq'
Gitlab::Seeder.quiet do
Project.all.each do |project|
Project.not_mass_generated.each do |project|
5.times do |i|
milestone_params = {
title: "v#{i}.0",
......
......@@ -4,7 +4,13 @@ Gitlab::Seeder.quiet do
# Limit the number of merge requests per project to avoid long seeds
MAX_NUM_MERGE_REQUESTS = 10
Project.non_archived.with_merge_requests_enabled.reject(&:empty_repo?).each do |project|
projects = Project
.non_archived
.with_merge_requests_enabled
.not_mass_generated
.reject(&:empty_repo?)
projects.each do |project|
branches = project.repository.branch_names.sample(MAX_NUM_MERGE_REQUESTS * 2)
branches.each do |branch_name|
......
......@@ -9,7 +9,7 @@ Sidekiq::Testing.disable! do
# that it falls under `Sidekiq::Testing.disable!`.
Key.skip_callback(:commit, :after, :add_to_shell)
User.first(10).each do |user|
User.not_mass_generated.first(10).each do |user|
key = "ssh-rsa AAAAB3NzaC1yc2EAAAABJQAAAIEAiPWx6WM4lhHNedGfBpPJNPpZ7yKu+dnn1SJejgt#{user.id + 100}6k6YjzGGphH2TUxwKzxcKDKKezwkpfnxPkSMkuEspGRt/aZZ9wa++Oi7Qkr8prgHc4soW6NUlfDzpvZK2H5E7eQaSeP3SAwGmQKUFHCddNaP0L+hM7zhFNzjFvpaMgJw0="
key = user.keys.create(
......
......@@ -25,7 +25,7 @@ end
eos
50.times do |i|
user = User.all.sample
user = User.not_mass_generated.sample
PersonalSnippet.seed(:id, [{
id: i,
......
......@@ -214,7 +214,7 @@ class Gitlab::Seeder::Pipelines
end
Gitlab::Seeder.quiet do
Project.all.sample(5).each do |project|
Project.not_mass_generated.sample(5).each do |project|
project_builds = Gitlab::Seeder::Pipelines.new(project)
project_builds.seed!
end
......
......@@ -3,7 +3,7 @@ require './spec/support/sidekiq'
Gitlab::Seeder.quiet do
admin_user = User.find(1)
Project.all.each do |project|
Project.not_mass_generated.each do |project|
params = {
name: 'master'
}
......
......@@ -217,7 +217,7 @@ Gitlab::Seeder.quiet do
flag = 'SEED_CYCLE_ANALYTICS'
if ENV[flag]
Project.find_each do |project|
Project.not_mass_generated.find_each do |project|
# This seed naively assumes that every project has a repository, and every
# repository has a `master` branch, which may be the case for a pristine
# GDK seed, but is almost never true for a GDK that's actually had
......
......@@ -67,7 +67,7 @@ class Gitlab::Seeder::Environments
end
Gitlab::Seeder.quiet do
Project.all.sample(5).each do |project|
Project.not_mass_generated.sample(5).each do |project|
project_environments = Gitlab::Seeder::Environments.new(project)
project_environments.seed!
end
......
......@@ -22,7 +22,7 @@ module Db
end
def self.random_user
User.find(User.pluck(:id).sample)
User.find(User.not_mass_generated.pluck(:id).sample)
end
end
end
......
......@@ -2,8 +2,8 @@ require './spec/support/sidekiq'
Sidekiq::Testing.inline! do
Gitlab::Seeder.quiet do
User.all.sample(10).each do |user|
source_project = Project.public_only.sample
User.not_mass_generated.sample(10).each do |user|
source_project = Project.not_mass_generated.public_only.sample
##
# 03_project.rb might not have created a public project because
......
......@@ -12,6 +12,14 @@ The `setup` task is an alias for `gitlab:setup`.
This tasks calls `db:reset` to create the database, and calls `db:seed_fu` to seed the database.
Note: `db:setup` calls `db:seed` but this does nothing.
### Env variables
**MASS_INSERT**: Create millions of users (2m), projects (5m) and its
relations. It's highly recommended to run the seed with it to catch slow queries
while developing. Expect the process to take up to 20 extra minutes.
**LARGE_PROJECTS**: Create large projects (through import) from a predefined set of urls.
### Seeding issues for all or a given project
You can seed issues for all or a given project with the `gitlab:seed:issues`
......
......@@ -88,7 +88,7 @@ Gitlab::Seeder.quiet do
seeder = Gitlab::Seeder::Burndown.new(project)
seeder.seed!
else
Project.all.each do |project|
Project.not_mass_generated.each do |project|
seeder = Gitlab::Seeder::Burndown.new(project)
seeder.seed!
end
......
......@@ -128,7 +128,7 @@ class Gitlab::Seeder::Vulnerabilities
end
Gitlab::Seeder.quiet do
Project.joins(:ci_pipelines).distinct.all.sample(5).each do |project|
Project.joins(:ci_pipelines).not_mass_generated.distinct.all.sample(5).each do |project|
seeder = Gitlab::Seeder::Vulnerabilities.new(project)
seeder.seed!
end
......
# frozen_string_literal: true
# EE fixture
Gitlab::Seeder.quiet do
Project.all.sample(5).each do |project|
Project.not_mass_generated.sample(5).each do |project|
project.ci_pipelines.all.sample(2).each do |pipeline|
next if pipeline.source_pipeline
......
......@@ -32,7 +32,7 @@ class Gitlab::Seeder::Packages
end
Gitlab::Seeder.quiet do
Project.all.sample(5).each do |project|
Project.not_mass_generated.sample(5).each do |project|
Gitlab::Seeder::Packages.new(project.owner, project).seed
end
end
......@@ -14,7 +14,71 @@ end
module Gitlab
class Seeder
extend ActionView::Helpers::NumberHelper
ESTIMATED_INSERT_PER_MINUTE = 2_000_000
MASS_INSERT_ENV = 'MASS_INSERT'
module ProjectSeed
extend ActiveSupport::Concern
included do
scope :not_mass_generated, -> do
where.not("path LIKE '#{Gitlab::Seeder::Projects::MASS_INSERT_NAME_START}%'")
end
end
end
module UserSeed
extend ActiveSupport::Concern
included do
scope :not_mass_generated, -> do
where.not("username LIKE '#{Gitlab::Seeder::Users::MASS_INSERT_USERNAME_START}%'")
end
end
end
def self.with_mass_insert(size, model)
humanized_model_name = model.is_a?(String) ? model : model.model_name.human.pluralize(size)
if !ENV[MASS_INSERT_ENV] && !ENV['CI']
puts "\nSkipping mass insertion for #{humanized_model_name}."
puts "Consider running the seed with #{MASS_INSERT_ENV}=1"
return
end
humanized_size = number_with_delimiter(size)
estimative = estimated_time_message(size)
puts "\nCreating #{humanized_size} #{humanized_model_name}."
puts estimative
yield
puts "\n#{number_with_delimiter(size)} #{humanized_model_name} created!"
end
def self.estimated_time_message(size)
estimated_minutes = (size.to_f / ESTIMATED_INSERT_PER_MINUTE).round
humanized_minutes = 'minute'.pluralize(estimated_minutes)
if estimated_minutes.zero?
"Rough estimated time: less than a minute ⏰"
else
"Rough estimated time: #{estimated_minutes} #{humanized_minutes} ⏰"
end
end
def self.quiet
# Disable database insertion logs so speed isn't limited by ability to print to console
old_logger = ActiveRecord::Base.logger
ActiveRecord::Base.logger = nil
# Additional seed logic for models.
Project.include(ProjectSeed)
User.include(UserSeed)
mute_notifications
mute_mailer
......@@ -23,6 +87,7 @@ module Gitlab
yield
SeedFu.quiet = false
ActiveRecord::Base.logger = old_logger
puts "\nOK".color(:green)
end
......
......@@ -5,6 +5,10 @@ namespace :dev do
task setup: :environment do
ENV['force'] = 'yes'
Rake::Task["gitlab:setup"].invoke
# Make sure DB statistics are up to date.
ActiveRecord::Base.connection.execute('ANALYZE')
Rake::Task["gitlab:shell:setup"].invoke
end
......
......@@ -22,7 +22,7 @@ namespace :gitlab do
[project]
else
Project.find_each
Project.not_mass_generated.find_each
end
projects.each do |project|
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment