Commit 7ae15546 authored by Jan Provaznik's avatar Jan Provaznik

Merge branch '28149-improve-seed' into 'master'

Seed dev database with massive amount of Users, Projects and its relations

Closes #17211

See merge request gitlab-org/gitlab!16700
parents 4f438c5a e3dc3bfc
# frozen_string_literal: true
class Gitlab::Seeder::Users
include ActionView::Helpers::NumberHelper
RANDOM_USERS_COUNT = 20
MASS_USERS_COUNT = ENV['CI'] ? 10 : 1_000_000
MASS_INSERT_USERNAME_START = 'mass_insert_user_'
attr_reader :opts
def initialize(opts = {})
@opts = opts
end
def seed!
Sidekiq::Testing.inline! do
create_mass_users!
create_random_users!
end
end
private
def create_mass_users!
encrypted_password = Devise::Encryptor.digest(User, '12345678')
Gitlab::Seeder.with_mass_insert(MASS_USERS_COUNT, User) do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO users (username, name, email, confirmed_at, projects_limit, encrypted_password)
SELECT
'#{MASS_INSERT_USERNAME_START}' || seq,
'Seed user ' || seq,
'seed_user' || seq || '@example.com',
to_timestamp(seq),
#{MASS_USERS_COUNT},
'#{encrypted_password}'
FROM generate_series(1, #{MASS_USERS_COUNT}) AS seq
SQL
end
relation = User.where(admin: false)
Gitlab::Seeder.with_mass_insert(relation.count, Namespace) do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO namespaces (name, path, owner_id)
SELECT
username,
username,
id
FROM users WHERE NOT admin
SQL
end
end
def create_random_users!
RANDOM_USERS_COUNT.times do |i|
begin
User.create!(
username: FFaker::Internet.user_name,
name: FFaker::Name.name,
email: FFaker::Internet.email,
confirmed_at: DateTime.now,
password: '12345678'
)
print '.'
rescue ActiveRecord::RecordInvalid
print 'F'
end
end
end
end
Gitlab::Seeder.quiet do
users = Gitlab::Seeder::Users.new
users.seed!
end
require './spec/support/sidekiq' require './spec/support/sidekiq'
# rubocop:disable Rails/Output class Gitlab::Seeder::Projects
include ActionView::Helpers::NumberHelper
Sidekiq::Testing.inline! do PROJECT_URLS = %w[
Gitlab::Seeder.quiet do
Gitlab::Seeder.without_gitaly_timeout do
project_urls = %w[
https://gitlab.com/gitlab-org/gitlab-test.git https://gitlab.com/gitlab-org/gitlab-test.git
https://gitlab.com/gitlab-org/gitlab-shell.git https://gitlab.com/gitlab-org/gitlab-shell.git
https://gitlab.com/gnuwget/wget2.git https://gitlab.com/gnuwget/wget2.git
...@@ -39,8 +37,7 @@ Sidekiq::Testing.inline! do ...@@ -39,8 +37,7 @@ Sidekiq::Testing.inline! do
https://github.com/opencontainers/runc.git https://github.com/opencontainers/runc.git
https://github.com/googlesamples/android-topeka.git https://github.com/googlesamples/android-topeka.git
] ]
LARGE_PROJECT_URLS = %w[
large_project_urls = %w[
https://github.com/torvalds/linux.git https://github.com/torvalds/linux.git
https://gitlab.gnome.org/GNOME/gimp.git https://gitlab.gnome.org/GNOME/gimp.git
https://gitlab.gnome.org/GNOME/gnome-mud.git https://gitlab.gnome.org/GNOME/gnome-mud.git
...@@ -48,8 +45,62 @@ Sidekiq::Testing.inline! do ...@@ -48,8 +45,62 @@ Sidekiq::Testing.inline! do
https://gitlab.com/inkscape/inkscape.git https://gitlab.com/inkscape/inkscape.git
https://github.com/gnachman/iTerm2.git https://github.com/gnachman/iTerm2.git
] ]
# Consider altering MASS_USERS_COUNT for less
# users with projects.
MASS_PROJECTS_COUNT_PER_USER = {
private: 3, # 3m projects +
internal: 1, # 1m projects +
public: 1 # 1m projects = 5m total
}
MASS_INSERT_NAME_START = 'mass_insert_project_'
def seed!
Sidekiq::Testing.inline! do
create_real_projects!
create_large_projects!
create_mass_projects!
end
end
private
def create_real_projects!
# You can specify how many projects you need during seed execution
size = ENV['SIZE'].present? ? ENV['SIZE'].to_i : 8
PROJECT_URLS.first(size).each_with_index do |url, i|
create_real_project!(url, force_latest_storage: i.even?)
end
end
def create_large_projects!
return unless ENV['LARGE_PROJECTS'].present?
LARGE_PROJECT_URLS.each(&method(:create_real_project!))
if ENV['FORK'].present?
puts "\nGenerating forks"
project_name = ENV['FORK'] == 'true' ? 'torvalds/linux' : ENV['FORK']
project = Project.find_by_full_path(project_name)
User.offset(1).first(5).each do |user|
new_project = ::Projects::ForkService.new(project, user).execute
def create_project(url, force_latest_storage: false) if new_project.valid? && (new_project.valid_repo? || new_project.import_state.scheduled?)
print '.'
else
new_project.errors.full_messages.each do |error|
puts "#{new_project.full_path}: #{error}"
end
print 'F'
end
end
end
end
def create_real_project!(url, force_latest_storage: false)
group_path, project_path = url.split('/')[-2..-1] group_path, project_path = url.split('/')[-2..-1]
group = Group.find_by(path: group_path) group = Group.find_by(path: group_path)
...@@ -83,7 +134,7 @@ Sidekiq::Testing.inline! do ...@@ -83,7 +134,7 @@ Sidekiq::Testing.inline! do
project = nil project = nil
Sidekiq::Worker.skipping_transaction_check do Sidekiq::Worker.skipping_transaction_check do
project = Projects::CreateService.new(User.first, params).execute project = ::Projects::CreateService.new(User.first, params).execute
# Seed-Fu runs this entire fixture in a transaction, so the `after_commit` # Seed-Fu runs this entire fixture in a transaction, so the `after_commit`
# hook won't run until after the fixture is loaded. That is too late # hook won't run until after the fixture is loaded. That is too late
...@@ -101,37 +152,59 @@ Sidekiq::Testing.inline! do ...@@ -101,37 +152,59 @@ Sidekiq::Testing.inline! do
end end
end end
# You can specify how many projects you need during seed execution def create_mass_projects!
size = ENV['SIZE'].present? ? ENV['SIZE'].to_i : 8 projects_per_user_count = MASS_PROJECTS_COUNT_PER_USER.values.sum
visibility_per_user = ['private'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:private) +
project_urls.first(size).each_with_index do |url, i| ['internal'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:internal) +
create_project(url, force_latest_storage: i.even?) ['public'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:public)
end visibility_level_per_user = visibility_per_user.map { |visibility| Gitlab::VisibilityLevel.level_value(visibility) }
if ENV['LARGE_PROJECTS'].present? visibility_per_user = visibility_per_user.join(',')
large_project_urls.each(&method(:create_project)) visibility_level_per_user = visibility_level_per_user.join(',')
if ENV['FORK'].present? Gitlab::Seeder.with_mass_insert(User.count * projects_per_user_count, "Projects and relations") do
puts "\nGenerating forks" ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO projects (name, path, creator_id, namespace_id, visibility_level, created_at, updated_at)
project_name = ENV['FORK'] == 'true' ? 'torvalds/linux' : ENV['FORK'] SELECT
'Seed project ' || seq || ' ' || ('{#{visibility_per_user}}'::text[])[seq] AS project_name,
project = Project.find_by_full_path(project_name) 'mass_insert_project_' || ('{#{visibility_per_user}}'::text[])[seq] || '_' || seq AS project_path,
u.id AS user_id,
User.offset(1).first(5).each do |user| n.id AS namespace_id,
new_project = Projects::ForkService.new(project, user).execute ('{#{visibility_level_per_user}}'::int[])[seq] AS visibility_level,
NOW() AS created_at,
if new_project.valid? && (new_project.valid_repo? || new_project.import_state.scheduled?) NOW() AS updated_at
print '.' FROM users u
else CROSS JOIN generate_series(1, #{projects_per_user_count}) AS seq
new_project.errors.full_messages.each do |error| JOIN namespaces n ON n.owner_id=u.id
puts "#{new_project.full_path}: #{error}" SQL
end
print 'F' ActiveRecord::Base.connection.execute <<~SQL
end INSERT INTO project_features (project_id, merge_requests_access_level, issues_access_level, wiki_access_level,
end pages_access_level)
end SELECT
end id,
#{ProjectFeature::ENABLED} AS merge_requests_access_level,
#{ProjectFeature::ENABLED} AS issues_access_level,
#{ProjectFeature::ENABLED} AS wiki_access_level,
#{ProjectFeature::ENABLED} AS pages_access_level
FROM projects ON CONFLICT (project_id) DO NOTHING;
SQL
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO routes (source_id, source_type, name, path)
SELECT
p.id,
'Project',
u.name || ' / ' || p.name,
u.username || '/' || p.path
FROM projects p JOIN users u ON u.id=p.creator_id
ON CONFLICT (source_type, source_id) DO NOTHING;
SQL
end end
end end
end end
Gitlab::Seeder.quiet do
projects = Gitlab::Seeder::Projects.new
projects.seed!
end
...@@ -43,7 +43,7 @@ Gitlab::Seeder.quiet do ...@@ -43,7 +43,7 @@ Gitlab::Seeder.quiet do
end end
puts "\nGenerating project labels" puts "\nGenerating project labels"
Project.all.find_each do |project| Project.not_mass_generated.find_each do |project|
Gitlab::Seeder::ProjectLabels.new(project).seed! Gitlab::Seeder::ProjectLabels.new(project).seed!
end end
end end
require './spec/support/sidekiq'
Gitlab::Seeder.quiet do
20.times do |i|
begin
User.create!(
username: FFaker::Internet.user_name,
name: FFaker::Name.name,
email: FFaker::Internet.email,
confirmed_at: DateTime.now,
password: '12345678'
)
print '.'
rescue ActiveRecord::RecordInvalid
print 'F'
end
end
5.times do |i|
begin
User.create!(
username: "user#{i}",
name: "User #{i}",
email: "user#{i}@example.com",
confirmed_at: DateTime.now,
password: '12345678'
)
print '.'
rescue ActiveRecord::RecordInvalid
print 'F'
end
end
end
...@@ -3,7 +3,7 @@ require './spec/support/sidekiq' ...@@ -3,7 +3,7 @@ require './spec/support/sidekiq'
Sidekiq::Testing.inline! do Sidekiq::Testing.inline! do
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
Group.all.each do |group| Group.all.each do |group|
User.all.sample(4).each do |user| User.not_mass_generated.sample(4).each do |user|
if group.add_user(user, Gitlab::Access.values.sample).persisted? if group.add_user(user, Gitlab::Access.values.sample).persisted?
print '.' print '.'
else else
...@@ -12,8 +12,8 @@ Sidekiq::Testing.inline! do ...@@ -12,8 +12,8 @@ Sidekiq::Testing.inline! do
end end
end end
Project.all.each do |project| Project.not_mass_generated.each do |project|
User.all.sample(4).each do |user| User.not_mass_generated.sample(4).each do |user|
if project.add_role(user, Gitlab::Access.sym_options.keys.sample) if project.add_role(user, Gitlab::Access.sym_options.keys.sample)
print '.' print '.'
else else
......
require './spec/support/sidekiq' require './spec/support/sidekiq'
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
Project.all.each do |project| Project.not_mass_generated.each do |project|
5.times do |i| 5.times do |i|
milestone_params = { milestone_params = {
title: "v#{i}.0", title: "v#{i}.0",
......
...@@ -4,7 +4,13 @@ Gitlab::Seeder.quiet do ...@@ -4,7 +4,13 @@ Gitlab::Seeder.quiet do
# Limit the number of merge requests per project to avoid long seeds # Limit the number of merge requests per project to avoid long seeds
MAX_NUM_MERGE_REQUESTS = 10 MAX_NUM_MERGE_REQUESTS = 10
Project.non_archived.with_merge_requests_enabled.reject(&:empty_repo?).each do |project| projects = Project
.non_archived
.with_merge_requests_enabled
.not_mass_generated
.reject(&:empty_repo?)
projects.each do |project|
branches = project.repository.branch_names.sample(MAX_NUM_MERGE_REQUESTS * 2) branches = project.repository.branch_names.sample(MAX_NUM_MERGE_REQUESTS * 2)
branches.each do |branch_name| branches.each do |branch_name|
......
...@@ -9,7 +9,7 @@ Sidekiq::Testing.disable! do ...@@ -9,7 +9,7 @@ Sidekiq::Testing.disable! do
# that it falls under `Sidekiq::Testing.disable!`. # that it falls under `Sidekiq::Testing.disable!`.
Key.skip_callback(:commit, :after, :add_to_shell) Key.skip_callback(:commit, :after, :add_to_shell)
User.first(10).each do |user| User.not_mass_generated.first(10).each do |user|
key = "ssh-rsa AAAAB3NzaC1yc2EAAAABJQAAAIEAiPWx6WM4lhHNedGfBpPJNPpZ7yKu+dnn1SJejgt#{user.id + 100}6k6YjzGGphH2TUxwKzxcKDKKezwkpfnxPkSMkuEspGRt/aZZ9wa++Oi7Qkr8prgHc4soW6NUlfDzpvZK2H5E7eQaSeP3SAwGmQKUFHCddNaP0L+hM7zhFNzjFvpaMgJw0=" key = "ssh-rsa AAAAB3NzaC1yc2EAAAABJQAAAIEAiPWx6WM4lhHNedGfBpPJNPpZ7yKu+dnn1SJejgt#{user.id + 100}6k6YjzGGphH2TUxwKzxcKDKKezwkpfnxPkSMkuEspGRt/aZZ9wa++Oi7Qkr8prgHc4soW6NUlfDzpvZK2H5E7eQaSeP3SAwGmQKUFHCddNaP0L+hM7zhFNzjFvpaMgJw0="
key = user.keys.create( key = user.keys.create(
......
...@@ -25,7 +25,7 @@ end ...@@ -25,7 +25,7 @@ end
eos eos
50.times do |i| 50.times do |i|
user = User.all.sample user = User.not_mass_generated.sample
PersonalSnippet.seed(:id, [{ PersonalSnippet.seed(:id, [{
id: i, id: i,
......
...@@ -214,7 +214,7 @@ class Gitlab::Seeder::Pipelines ...@@ -214,7 +214,7 @@ class Gitlab::Seeder::Pipelines
end end
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
Project.all.sample(5).each do |project| Project.not_mass_generated.sample(5).each do |project|
project_builds = Gitlab::Seeder::Pipelines.new(project) project_builds = Gitlab::Seeder::Pipelines.new(project)
project_builds.seed! project_builds.seed!
end end
......
...@@ -3,7 +3,7 @@ require './spec/support/sidekiq' ...@@ -3,7 +3,7 @@ require './spec/support/sidekiq'
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
admin_user = User.find(1) admin_user = User.find(1)
Project.all.each do |project| Project.not_mass_generated.each do |project|
params = { params = {
name: 'master' name: 'master'
} }
......
...@@ -217,7 +217,7 @@ Gitlab::Seeder.quiet do ...@@ -217,7 +217,7 @@ Gitlab::Seeder.quiet do
flag = 'SEED_CYCLE_ANALYTICS' flag = 'SEED_CYCLE_ANALYTICS'
if ENV[flag] if ENV[flag]
Project.find_each do |project| Project.not_mass_generated.find_each do |project|
# This seed naively assumes that every project has a repository, and every # This seed naively assumes that every project has a repository, and every
# repository has a `master` branch, which may be the case for a pristine # repository has a `master` branch, which may be the case for a pristine
# GDK seed, but is almost never true for a GDK that's actually had # GDK seed, but is almost never true for a GDK that's actually had
......
...@@ -67,7 +67,7 @@ class Gitlab::Seeder::Environments ...@@ -67,7 +67,7 @@ class Gitlab::Seeder::Environments
end end
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
Project.all.sample(5).each do |project| Project.not_mass_generated.sample(5).each do |project|
project_environments = Gitlab::Seeder::Environments.new(project) project_environments = Gitlab::Seeder::Environments.new(project)
project_environments.seed! project_environments.seed!
end end
......
...@@ -22,7 +22,7 @@ module Db ...@@ -22,7 +22,7 @@ module Db
end end
def self.random_user def self.random_user
User.find(User.pluck(:id).sample) User.find(User.not_mass_generated.pluck(:id).sample)
end end
end end
end end
......
...@@ -2,8 +2,8 @@ require './spec/support/sidekiq' ...@@ -2,8 +2,8 @@ require './spec/support/sidekiq'
Sidekiq::Testing.inline! do Sidekiq::Testing.inline! do
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
User.all.sample(10).each do |user| User.not_mass_generated.sample(10).each do |user|
source_project = Project.public_only.sample source_project = Project.not_mass_generated.public_only.sample
## ##
# 03_project.rb might not have created a public project because # 03_project.rb might not have created a public project because
......
...@@ -12,6 +12,14 @@ The `setup` task is an alias for `gitlab:setup`. ...@@ -12,6 +12,14 @@ The `setup` task is an alias for `gitlab:setup`.
This tasks calls `db:reset` to create the database, and calls `db:seed_fu` to seed the database. This tasks calls `db:reset` to create the database, and calls `db:seed_fu` to seed the database.
Note: `db:setup` calls `db:seed` but this does nothing. Note: `db:setup` calls `db:seed` but this does nothing.
### Env variables
**MASS_INSERT**: Create millions of users (2m), projects (5m) and its
relations. It's highly recommended to run the seed with it to catch slow queries
while developing. Expect the process to take up to 20 extra minutes.
**LARGE_PROJECTS**: Create large projects (through import) from a predefined set of urls.
### Seeding issues for all or a given project ### Seeding issues for all or a given project
You can seed issues for all or a given project with the `gitlab:seed:issues` You can seed issues for all or a given project with the `gitlab:seed:issues`
......
...@@ -88,7 +88,7 @@ Gitlab::Seeder.quiet do ...@@ -88,7 +88,7 @@ Gitlab::Seeder.quiet do
seeder = Gitlab::Seeder::Burndown.new(project) seeder = Gitlab::Seeder::Burndown.new(project)
seeder.seed! seeder.seed!
else else
Project.all.each do |project| Project.not_mass_generated.each do |project|
seeder = Gitlab::Seeder::Burndown.new(project) seeder = Gitlab::Seeder::Burndown.new(project)
seeder.seed! seeder.seed!
end end
......
...@@ -128,7 +128,7 @@ class Gitlab::Seeder::Vulnerabilities ...@@ -128,7 +128,7 @@ class Gitlab::Seeder::Vulnerabilities
end end
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
Project.joins(:ci_pipelines).distinct.all.sample(5).each do |project| Project.joins(:ci_pipelines).not_mass_generated.distinct.all.sample(5).each do |project|
seeder = Gitlab::Seeder::Vulnerabilities.new(project) seeder = Gitlab::Seeder::Vulnerabilities.new(project)
seeder.seed! seeder.seed!
end end
......
# frozen_string_literal: true # frozen_string_literal: true
# EE fixture # EE fixture
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
Project.all.sample(5).each do |project| Project.not_mass_generated.sample(5).each do |project|
project.ci_pipelines.all.sample(2).each do |pipeline| project.ci_pipelines.all.sample(2).each do |pipeline|
next if pipeline.source_pipeline next if pipeline.source_pipeline
......
...@@ -32,7 +32,7 @@ class Gitlab::Seeder::Packages ...@@ -32,7 +32,7 @@ class Gitlab::Seeder::Packages
end end
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
Project.all.sample(5).each do |project| Project.not_mass_generated.sample(5).each do |project|
Gitlab::Seeder::Packages.new(project.owner, project).seed Gitlab::Seeder::Packages.new(project.owner, project).seed
end end
end end
...@@ -14,7 +14,71 @@ end ...@@ -14,7 +14,71 @@ end
module Gitlab module Gitlab
class Seeder class Seeder
extend ActionView::Helpers::NumberHelper
ESTIMATED_INSERT_PER_MINUTE = 2_000_000
MASS_INSERT_ENV = 'MASS_INSERT'
module ProjectSeed
extend ActiveSupport::Concern
included do
scope :not_mass_generated, -> do
where.not("path LIKE '#{Gitlab::Seeder::Projects::MASS_INSERT_NAME_START}%'")
end
end
end
module UserSeed
extend ActiveSupport::Concern
included do
scope :not_mass_generated, -> do
where.not("username LIKE '#{Gitlab::Seeder::Users::MASS_INSERT_USERNAME_START}%'")
end
end
end
def self.with_mass_insert(size, model)
humanized_model_name = model.is_a?(String) ? model : model.model_name.human.pluralize(size)
if !ENV[MASS_INSERT_ENV] && !ENV['CI']
puts "\nSkipping mass insertion for #{humanized_model_name}."
puts "Consider running the seed with #{MASS_INSERT_ENV}=1"
return
end
humanized_size = number_with_delimiter(size)
estimative = estimated_time_message(size)
puts "\nCreating #{humanized_size} #{humanized_model_name}."
puts estimative
yield
puts "\n#{number_with_delimiter(size)} #{humanized_model_name} created!"
end
def self.estimated_time_message(size)
estimated_minutes = (size.to_f / ESTIMATED_INSERT_PER_MINUTE).round
humanized_minutes = 'minute'.pluralize(estimated_minutes)
if estimated_minutes.zero?
"Rough estimated time: less than a minute ⏰"
else
"Rough estimated time: #{estimated_minutes} #{humanized_minutes} ⏰"
end
end
def self.quiet def self.quiet
# Disable database insertion logs so speed isn't limited by ability to print to console
old_logger = ActiveRecord::Base.logger
ActiveRecord::Base.logger = nil
# Additional seed logic for models.
Project.include(ProjectSeed)
User.include(UserSeed)
mute_notifications mute_notifications
mute_mailer mute_mailer
...@@ -23,6 +87,7 @@ module Gitlab ...@@ -23,6 +87,7 @@ module Gitlab
yield yield
SeedFu.quiet = false SeedFu.quiet = false
ActiveRecord::Base.logger = old_logger
puts "\nOK".color(:green) puts "\nOK".color(:green)
end end
......
...@@ -5,6 +5,10 @@ namespace :dev do ...@@ -5,6 +5,10 @@ namespace :dev do
task setup: :environment do task setup: :environment do
ENV['force'] = 'yes' ENV['force'] = 'yes'
Rake::Task["gitlab:setup"].invoke Rake::Task["gitlab:setup"].invoke
# Make sure DB statistics are up to date.
ActiveRecord::Base.connection.execute('ANALYZE')
Rake::Task["gitlab:shell:setup"].invoke Rake::Task["gitlab:shell:setup"].invoke
end end
......
...@@ -22,7 +22,7 @@ namespace :gitlab do ...@@ -22,7 +22,7 @@ namespace :gitlab do
[project] [project]
else else
Project.find_each Project.not_mass_generated.find_each
end end
projects.each do |project| projects.each do |project|
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment