Commit fb207ede authored by charlie ablett's avatar charlie ablett

Merge branch 'fix-max-inserts' into 'master'

Fix fixture max inserts

See merge request gitlab-org/gitlab!82307
parents f25176fe 46d6c0b0
......@@ -4,8 +4,8 @@ class Gitlab::Seeder::Users
include ActionView::Helpers::NumberHelper
RANDOM_USERS_COUNT = 20
MASS_NAMESPACES_COUNT = 100
MASS_USERS_COUNT = ENV['CI'] ? 10 : 1_000_000
attr_reader :opts
def initialize(opts = {})
......@@ -15,6 +15,7 @@ class Gitlab::Seeder::Users
def seed!
Sidekiq::Testing.inline! do
create_mass_users!
create_mass_namespaces!
create_random_users!
end
end
......@@ -26,20 +27,22 @@ class Gitlab::Seeder::Users
Gitlab::Seeder.with_mass_insert(MASS_USERS_COUNT, User) do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO users (username, name, email, confirmed_at, projects_limit, encrypted_password)
INSERT INTO users (username, name, email, state, confirmed_at, projects_limit, encrypted_password)
SELECT
'#{Gitlab::Seeder::MASS_INSERT_USER_START}' || seq,
'Seed user ' || seq,
'seed_user' || seq || '@example.com',
'active',
to_timestamp(seq),
#{MASS_USERS_COUNT},
'#{encrypted_password}'
FROM generate_series(1, #{MASS_USERS_COUNT}) AS seq
ON CONFLICT DO NOTHING;
SQL
end
relation = User.where(admin: false)
Gitlab::Seeder.with_mass_insert(relation.count, Namespace) do
Gitlab::Seeder.with_mass_insert(relation.count, 'user namespaces') do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO namespaces (name, path, owner_id, type)
SELECT
......@@ -48,6 +51,16 @@ class Gitlab::Seeder::Users
id,
'User'
FROM users WHERE NOT admin
ON CONFLICT DO NOTHING;
SQL
end
Gitlab::Seeder.with_mass_insert(relation.count, "User namespaces routes") do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO routes (namespace_id, source_id, source_type, path, name)
SELECT id as namespace_id, id as source_id, 'Namespace', path, name
FROM namespaces WHERE type IS NULL OR type = 'User'
ON CONFLICT DO NOTHING;
SQL
end
......@@ -74,6 +87,97 @@ class Gitlab::Seeder::Users
end
end
def create_mass_namespaces!
Gitlab::Seeder.with_mass_insert(MASS_NAMESPACES_COUNT, "root namespaces and subgroups 9 levels deep") do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO namespaces (name, path, type)
SELECT
'mass insert group level 0 - ' || seq,
'#{Gitlab::Seeder::MASS_INSERT_GROUP_START}_0_' || seq,
'Group'
FROM generate_series(1, #{MASS_NAMESPACES_COUNT}) AS seq
ON CONFLICT DO NOTHING;
SQL
(1..9).each do |idx|
count = Namespace.where("path LIKE '#{Gitlab::Seeder::MASS_INSERT_PREFIX}%'").where(type: 'Group').count * 2
Gitlab::Seeder.log_message("Creating subgroups at level #{idx}: #{count}")
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO namespaces (name, path, type, parent_id)
SELECT
'mass insert group level #{idx} - ' || seq,
'#{Gitlab::Seeder::MASS_INSERT_GROUP_START}_#{idx}_' || seq,
'Group',
namespaces.id
FROM namespaces
CROSS JOIN generate_series(1, 2) AS seq
WHERE namespaces.type='Group' AND namespaces.path like '#{Gitlab::Seeder::MASS_INSERT_GROUP_START}_#{idx-1}_%'
ON CONFLICT DO NOTHING;
SQL
end
Gitlab::Seeder.log_message("creating routes.")
ActiveRecord::Base.connection.execute <<~SQL
WITH RECURSIVE cte(source_id, namespace_id, parent_id, path, height) AS (
(
SELECT ARRAY[batch.id], batch.id, batch.parent_id, batch.path, 1
FROM
"namespaces" as batch
WHERE
"batch"."type" = 'Group' AND "batch"."parent_id" is null
)
UNION
(
SELECT array_append(cte.source_id, n.id), n.id, n.parent_id, cte.path || '/' || n.path, cte.height+1
FROM
"namespaces" as n,
"cte"
WHERE
"n"."type" = 'Group'
AND "n"."parent_id" = "cte"."namespace_id"
)
)
INSERT INTO routes (namespace_id, source_id, source_type, path, name)
SELECT cte.namespace_id as namespace_id, cte.namespace_id as source_id, 'Namespace', cte.path, cte.path FROM cte
ON CONFLICT DO NOTHING;
SQL
Gitlab::Seeder.log_message("filling traversal ids.")
ActiveRecord::Base.connection.execute <<~SQL
WITH RECURSIVE cte(source_id, namespace_id, parent_id) AS (
(
SELECT ARRAY[batch.id], batch.id, batch.parent_id
FROM
"namespaces" as batch
WHERE
"batch"."type" = 'Group' AND "batch"."parent_id" is null
)
UNION
(
SELECT array_append(cte.source_id, n.id), n.id, n.parent_id
FROM
"namespaces" as n,
"cte"
WHERE
"n"."type" = 'Group'
AND "n"."parent_id" = "cte"."namespace_id"
)
)
UPDATE namespaces
SET traversal_ids = computed.source_id FROM (SELECT namespace_id, source_id FROM cte) AS computed
where computed.namespace_id = namespaces.id AND namespaces.path LIKE '#{Gitlab::Seeder::MASS_INSERT_PREFIX}%'
SQL
Gitlab::Seeder.log_message("creating namespace settings.")
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO namespace_settings(namespace_id, created_at, updated_at)
SELECT id, now(), now() FROM namespaces
ON CONFLICT DO NOTHING;
SQL
end
end
def random_password
@random_password ||= SecureRandom.hex.slice(0,16)
end
......
......@@ -53,14 +53,56 @@ class Gitlab::Seeder::Projects
public: 1 # 1m projects = 5m total
}
BATCH_SIZE = 100_000
def seed!
Sidekiq::Testing.inline! do
create_real_projects!
create_large_projects!
create_mass_projects!
end
end
def self.insert_project_namespaces_sql(type:, range:)
<<~SQL
INSERT INTO namespaces (name, path, parent_id, owner_id, type, visibility_level, created_at, updated_at)
SELECT
'Seed project ' || seq || ' ' || ('{#{Gitlab::Seeder::Projects.visibility_per_user}}'::text[])[seq] AS project_name,
'#{Gitlab::Seeder::MASS_INSERT_PROJECT_START}' || ('{#{Gitlab::Seeder::Projects.visibility_per_user}}'::text[])[seq] || '_' || seq AS namespace_path,
n.id AS parent_id,
n.owner_id AS owner_id,
'Project' AS type,
('{#{Gitlab::Seeder::Projects.visibility_level_per_user}}'::int[])[seq] AS visibility_level,
NOW() AS created_at,
NOW() AS updated_at
FROM namespaces n
CROSS JOIN generate_series(1, #{Gitlab::Seeder::Projects.projects_per_user_count}) AS seq
WHERE type='#{type}' AND path LIKE '#{Gitlab::Seeder::MASS_INSERT_PREFIX}%'
AND n.id BETWEEN #{range.first} AND #{range.last}
ON CONFLICT DO NOTHING;
SQL
end
def self.insert_projects_sql(type:, range:)
<<~SQL
INSERT INTO projects (name, path, creator_id, namespace_id, project_namespace_id, visibility_level, created_at, updated_at)
SELECT
n.name AS project_name,
n.path AS project_path,
n.owner_id AS creator_id,
n.parent_id AS namespace_id,
n.id AS project_namespace_id,
n.visibility_level AS visibility_level,
NOW() AS created_at,
NOW() AS updated_at
FROM namespaces n
WHERE type = 'Project' AND n.parent_id IN (
SELECT id FROM namespaces n1 WHERE type='#{type}'
AND path LIKE '#{Gitlab::Seeder::MASS_INSERT_PREFIX}%' AND n1.id BETWEEN #{range.first} AND #{range.last}
)
ON CONFLICT DO NOTHING;
SQL
end
private
def create_real_projects!
......@@ -156,55 +198,26 @@ class Gitlab::Seeder::Projects
end
end
def create_mass_projects!
projects_per_user_count = MASS_PROJECTS_COUNT_PER_USER.values.sum
visibility_per_user = ['private'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:private) +
['internal'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:internal) +
['public'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:public)
visibility_level_per_user = visibility_per_user.map { |visibility| Gitlab::VisibilityLevel.level_value(visibility) }
visibility_per_user = visibility_per_user.join(',')
visibility_level_per_user = visibility_level_per_user.join(',')
Gitlab::Seeder.with_mass_insert(User.count * projects_per_user_count, "Projects and relations") do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO projects (name, path, creator_id, namespace_id, visibility_level, created_at, updated_at)
SELECT
'Seed project ' || seq || ' ' || ('{#{visibility_per_user}}'::text[])[seq] AS project_name,
'#{Gitlab::Seeder::MASS_INSERT_PROJECT_START}' || ('{#{visibility_per_user}}'::text[])[seq] || '_' || seq AS project_path,
u.id AS user_id,
n.id AS namespace_id,
('{#{visibility_level_per_user}}'::int[])[seq] AS visibility_level,
NOW() AS created_at,
NOW() AS updated_at
FROM users u
CROSS JOIN generate_series(1, #{projects_per_user_count}) AS seq
JOIN namespaces n ON n.owner_id=u.id
SQL
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO project_features (project_id, merge_requests_access_level, issues_access_level, wiki_access_level,
pages_access_level)
SELECT
id,
#{ProjectFeature::ENABLED} AS merge_requests_access_level,
#{ProjectFeature::ENABLED} AS issues_access_level,
#{ProjectFeature::ENABLED} AS wiki_access_level,
#{ProjectFeature::ENABLED} AS pages_access_level
FROM projects ON CONFLICT (project_id) DO NOTHING;
SQL
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO routes (source_id, source_type, name, path)
SELECT
p.id,
'Project',
u.name || ' / ' || p.name,
u.username || '/' || p.path
FROM projects p JOIN users u ON u.id=p.creator_id
ON CONFLICT (source_type, source_id) DO NOTHING;
SQL
end
def self.projects_per_user_count
MASS_PROJECTS_COUNT_PER_USER.values.sum
end
def self.visibility_per_user_array
['private'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:private) +
['internal'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:internal) +
['public'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:public)
end
def self.visibility_level_per_user_map
visibility_per_user_array.map { |visibility| Gitlab::VisibilityLevel.level_value(visibility) }
end
def self.visibility_per_user
visibility_per_user_array.join(',')
end
def self.visibility_level_per_user
visibility_level_per_user_map.join(',')
end
end
......
# frozen_string_literal: true
class Gitlab::Seeder::UserProjects
def seed!
create_user_projects!
end
private
def create_user_projects!
user_namespaces = Namespace.where("path LIKE ?", "#{Gitlab::Seeder::MASS_INSERT_PREFIX}%").where(type: 'User')
Gitlab::Seeder.with_mass_insert(user_namespaces.count * Gitlab::Seeder::Projects.projects_per_user_count, "User projects and corresponding project namespaces") do
user_namespaces.each_batch(of: Gitlab::Seeder::Projects::BATCH_SIZE) do |batch, index|
range = batch.pluck(Arel.sql('MIN(id)'), Arel.sql('MAX(id)')).first
count = index * batch.size * Gitlab::Seeder::Projects.projects_per_user_count
Gitlab::Seeder.log_message("Creating project namespaces: #{count}.")
ActiveRecord::Base.connection.execute(Gitlab::Seeder::Projects.insert_project_namespaces_sql(type: 'User', range: range))
Gitlab::Seeder.log_message("Creating projects: #{count}.")
ActiveRecord::Base.connection.execute(Gitlab::Seeder::Projects.insert_projects_sql(type: 'User', range: range))
end
end
end
end
Gitlab::Seeder.quiet do
projects = Gitlab::Seeder::UserProjects.new
projects.seed!
end
# frozen_string_literal: true
class Gitlab::Seeder::GroupProjects
def seed!
create_projects!
end
private
def create_projects!
groups = Namespace.where("path LIKE ?", "#{Gitlab::Seeder::MASS_INSERT_PREFIX}%").where(type: 'Group')
Gitlab::Seeder.with_mass_insert(groups.count * Gitlab::Seeder::Projects.projects_per_user_count, "Projects and corresponding project namespaces") do
groups.each_batch(of: Gitlab::Seeder::Projects::BATCH_SIZE) do |batch, index|
range = batch.pluck(Arel.sql('MIN(id)'), Arel.sql('MAX(id)')).first
count = index * batch.size * Gitlab::Seeder::Projects.projects_per_user_count
Gitlab::Seeder.log_message("Creating projects namespaces: #{count}.")
ActiveRecord::Base.connection.execute(Gitlab::Seeder::Projects.insert_project_namespaces_sql(type: 'Group', range: range))
Gitlab::Seeder.log_message("Creating projects: #{count}.")
ActiveRecord::Base.connection.execute(Gitlab::Seeder::Projects.insert_projects_sql(type: 'Group', range: range))
end
end
end
end
Gitlab::Seeder.quiet do
projects = Gitlab::Seeder::GroupProjects.new
projects.seed!
end
# frozen_string_literal: true
class Gitlab::Seeder::ProjectFeatures
include ActionView::Helpers::NumberHelper
BATCH_SIZE = 100_000
def seed!
create_project_features!
end
def create_project_features!
Gitlab::Seeder.with_mass_insert(Project.count, "Project features") do
Project.each_batch(of: BATCH_SIZE) do |batch, index|
range = batch.pluck(Arel.sql('MIN(id)'), Arel.sql('MAX(id)')).first
count = index * BATCH_SIZE
Gitlab::Seeder.log_message("Creating project features: #{count}.")
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO project_features (project_id, merge_requests_access_level, issues_access_level, wiki_access_level, pages_access_level)
SELECT
id,
#{ProjectFeature::ENABLED} AS merge_requests_access_level,
#{ProjectFeature::ENABLED} AS issues_access_level,
#{ProjectFeature::ENABLED} AS wiki_access_level,
#{ProjectFeature::ENABLED} AS pages_access_level
FROM projects
WHERE projects.id BETWEEN #{range.first} AND #{range.last}
ON CONFLICT DO NOTHING;
SQL
end
end
end
end
Gitlab::Seeder.quiet do
projects = Gitlab::Seeder::ProjectFeatures.new
projects.seed!
end
# frozen_string_literal: true
class Gitlab::Seeder::ProjectRoutes
include ActionView::Helpers::NumberHelper
BATCH_SIZE = 100_000
def seed!
create_project_routes!
end
def create_project_routes!
Gitlab::Seeder.with_mass_insert(Project.count, "Project routes") do
Project.each_batch(of: BATCH_SIZE / 2) do |batch, index|
range = batch.pluck(Arel.sql('MIN(id)'), Arel.sql('MAX(id)')).first
count = index * BATCH_SIZE / 2
Gitlab::Seeder.log_message("Creating project routes: #{count}.")
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO routes (namespace_id, source_id, source_type, name, path)
SELECT
p.project_namespace_id as namespace_id,
p.id as source_id,
'Project',
routes.name || ' / ' || p.name,
routes.path || '/' || p.path
FROM projects p
INNER JOIN routes ON routes.source_id = p.namespace_id and source_type = 'Namespace'
WHERE p.id BETWEEN #{range.first} AND #{range.last}
ON CONFLICT DO NOTHING;
SQL
end
end
end
end
Gitlab::Seeder.quiet do
projects = Gitlab::Seeder::ProjectRoutes.new
projects.seed!
end
......@@ -37,13 +37,15 @@ class Gitlab::Seeder::ProjectLabels
end
Gitlab::Seeder.quiet do
puts "\nGenerating group labels"
Group.all.find_each do |group|
Gitlab::Seeder::GroupLabels.new(group).seed!
label_per_group = 10
puts "\nGenerating group labels: #{Group.not_mass_generated.count * label_per_group}"
Group.not_mass_generated.find_each do |group|
Gitlab::Seeder::GroupLabels.new(group, label_per_group: label_per_group).seed!
end
puts "\nGenerating project labels"
label_per_project = 5
puts "\nGenerating project labels: #{Project.not_mass_generated.count * label_per_project}"
Project.not_mass_generated.find_each do |project|
Gitlab::Seeder::ProjectLabels.new(project).seed!
Gitlab::Seeder::ProjectLabels.new(project, label_per_project: label_per_project).seed!
end
end
......@@ -2,7 +2,7 @@ require './spec/support/sidekiq_middleware'
Sidekiq::Testing.inline! do
Gitlab::Seeder.quiet do
Group.all.each do |group|
Group.not_mass_generated.each do |group|
User.not_mass_generated.sample(4).each do |user|
if group.add_user(user, Gitlab::Access.values.sample).persisted?
print '.'
......
......@@ -41,7 +41,7 @@ end
Gitlab::Seeder.quiet do
puts "\nGenerating group crm organizations and contacts"
Group.where('parent_id IS NULL').first(10).each do |group|
Group.not_mass_generated.where('parent_id IS NULL').first(10).each do |group|
Gitlab::Seeder::Crm.new(group).seed!
end
end
# frozen_string_literal: true
Gitlab::Seeder.quiet do
Group.all.each do |group|
Group.not_mass_generated.each do |group|
5.times do
epic_params = {
title: FFaker::Lorem.sentence(6),
......
# frozen_string_literal: true
Gitlab::Seeder.quiet do
groups = Group.take(5)
groups = Group.not_mass_generated.take(5)
next if groups.empty?
......
......@@ -9,7 +9,7 @@ module Iterations
end
Gitlab::Seeder.quiet do
Group.all.each do |group|
Group.not_mass_generated.each do |group|
cadences = []
1000.times do
random_number = rand(5)
......
......@@ -4,12 +4,24 @@ module Gitlab
class Seeder
extend ActionView::Helpers::NumberHelper
MASS_INSERT_PROJECT_START = 'mass_insert_project_'
MASS_INSERT_USER_START = 'mass_insert_user_'
MASS_INSERT_PREFIX = 'mass_insert'
MASS_INSERT_PROJECT_START = "#{MASS_INSERT_PREFIX}_project_"
MASS_INSERT_GROUP_START = "#{MASS_INSERT_PREFIX}_group_"
MASS_INSERT_USER_START = "#{MASS_INSERT_PREFIX}_user_"
REPORTED_USER_START = 'reported_user_'
ESTIMATED_INSERT_PER_MINUTE = 2_000_000
ESTIMATED_INSERT_PER_MINUTE = 250_000
MASS_INSERT_ENV = 'MASS_INSERT'
module NamespaceSeed
extend ActiveSupport::Concern
included do
scope :not_mass_generated, -> do
where.not("path LIKE '#{MASS_INSERT_GROUP_START}%'")
end
end
end
module ProjectSeed
extend ActiveSupport::Concern
......@@ -30,6 +42,10 @@ module Gitlab
end
end
def self.log_message(message)
puts "#{Time.current}: #{message}"
end
def self.with_mass_insert(size, model)
humanized_model_name = model.is_a?(String) ? model : model.model_name.human.pluralize(size)
......@@ -63,6 +79,7 @@ module Gitlab
def self.quiet
# Additional seed logic for models.
Namespace.include(NamespaceSeed)
Project.include(ProjectSeed)
User.include(UserSeed)
......
......@@ -10,7 +10,12 @@ namespace :dev do
Gitlab::Database::EachDatabase.each_database_connection do |connection|
# Make sure DB statistics are up to date.
# gitlab:setup task can insert quite a bit of data, especially with MASS_INSERT=1
# so ANALYZE can take more than default 15s statement timeout. This being a dev task,
# we disable the statement timeout for ANALYZE to run and enable it back afterwards.
connection.execute('SET statement_timeout TO 0')
connection.execute('ANALYZE')
connection.execute('RESET statement_timeout')
end
Rake::Task["gitlab:shell:setup"].invoke
......
......@@ -3,6 +3,24 @@
require 'spec_helper'
RSpec.describe Gitlab::Seeder do
describe Namespace do
subject { described_class }
it 'has not_mass_generated scope' do
expect { Namespace.not_mass_generated }.to raise_error(NoMethodError)
Gitlab::Seeder.quiet do
expect { Namespace.not_mass_generated }.not_to raise_error
end
end
it 'includes NamespaceSeed module' do
Gitlab::Seeder.quiet do
is_expected.to include_module(Gitlab::Seeder::NamespaceSeed)
end
end
end
describe '.quiet' do
let(:database_base_models) do
{
......@@ -50,4 +68,13 @@ RSpec.describe Gitlab::Seeder do
notification_service.new_note(note)
end
end
describe '.log_message' do
it 'prepends timestamp to the logged message' do
freeze_time do
message = "some message."
expect { described_class.log_message(message) }.to output(/#{Time.current}: #{message}/).to_stdout
end
end
end
end
......@@ -17,7 +17,9 @@ RSpec.describe 'dev rake tasks' do
it 'sets up the development environment', :aggregate_failures do
expect(Rake::Task['gitlab:setup']).to receive(:invoke)
expect(connections).to all(receive(:execute).with('SET statement_timeout TO 0'))
expect(connections).to all(receive(:execute).with('ANALYZE'))
expect(connections).to all(receive(:execute).with('RESET statement_timeout'))
expect(Rake::Task['gitlab:shell:setup']).to receive(:invoke)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment