Commit 1139db32 authored by Corinna Wiesner's avatar Corinna Wiesner

Populate user_highest_roles table

The user_highest_roles table is populated with a background migration
which schedules ~530 jobs with a 5 minutes gap to create an entry for
each active User.
parent 27309598
---
title: Populate user_highest_roles table
merge_request: 27127
author:
type: added
# frozen_string_literal: true
# See https://docs.gitlab.com/ee/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class AddMigratingUserHighestRolesTableIndexToUsers < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
INDEX_NAME = 'index_for_migrating_user_highest_roles_table'
disable_ddl_transaction!
def up
add_concurrent_index :users,
:id,
where: "state = 'active' AND user_type IS NULL AND bot_type IS NULL AND ghost IS NOT TRUE",
name: INDEX_NAME
end
def down
remove_concurrent_index :users, :id, name: INDEX_NAME
end
end
# frozen_string_literal: true
class SchedulePopulateUserHighestRolesTable < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
BATCH_SIZE = 10_000
DELAY = 5.minutes.to_i
DOWNTIME = false
MIGRATION = 'PopulateUserHighestRolesTable'
disable_ddl_transaction!
class User < ActiveRecord::Base
include EachBatch
scope :active, -> {
where(state: 'active', user_type: nil, bot_type: nil)
.where('ghost IS NOT TRUE')
}
end
def up
# We currently have ~5_300_000 users with the state active on GitLab.com.
# This means it'll schedule ~530 jobs (10k Users each) with a 5 minutes gap,
# so this should take ~44 hours for all background migrations to complete.
User.active.each_batch(of: BATCH_SIZE) do |batch, index|
range = batch.pluck(Arel.sql('MIN(id)'), Arel.sql('MAX(id)')).first
delay = index * DELAY
migrate_in(delay.seconds, MIGRATION, [*range])
end
end
def down
# nothing
end
end
......@@ -9030,6 +9030,8 @@ CREATE UNIQUE INDEX index_feature_gates_on_feature_key_and_key_and_value ON publ
CREATE UNIQUE INDEX index_features_on_key ON public.features USING btree (key);
CREATE INDEX index_for_migrating_user_highest_roles_table ON public.users USING btree (id) WHERE (((state)::text = 'active'::text) AND (user_type IS NULL) AND (bot_type IS NULL) AND (ghost IS NOT TRUE));
CREATE INDEX index_for_resource_group ON public.ci_builds USING btree (resource_group_id, id) WHERE (resource_group_id IS NOT NULL);
CREATE INDEX index_for_status_per_branch_per_project ON public.merge_trains USING btree (target_project_id, target_branch, status);
......@@ -12811,6 +12813,7 @@ COPY "schema_migrations" (version) FROM STDIN;
20200311084025
20200311093210
20200311094020
20200311130802
20200311141053
20200311141943
20200311154110
......@@ -12825,6 +12828,7 @@ COPY "schema_migrations" (version) FROM STDIN;
20200316111759
20200316162648
20200316173312
20200317110602
20200317142110
20200318140400
20200318152134
......
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# This background migration creates records on user_highest_roles according to
# the given user IDs range. IDs will load users with a left outer joins to
# have a record for users without a Group or Project. One INSERT per ID is
# issued.
class PopulateUserHighestRolesTable
BATCH_SIZE = 100
# rubocop:disable Style/Documentation
class User < ActiveRecord::Base
self.table_name = 'users'
scope :active, -> {
where(state: 'active', user_type: nil, bot_type: nil)
.where('ghost IS NOT TRUE')
}
end
def perform(from_id, to_id)
(from_id..to_id).each_slice(BATCH_SIZE) do |ids|
execute(
<<-EOF
INSERT INTO user_highest_roles (updated_at, user_id, highest_access_level)
#{select_sql(from_id, to_id)}
ON CONFLICT (user_id) DO
UPDATE SET highest_access_level = EXCLUDED.highest_access_level
EOF
)
end
end
private
def select_sql(from_id, to_id)
User
.select('NOW() as updated_at, users.id, MAX(access_level) AS highest_access_level')
.joins('LEFT OUTER JOIN members ON members.user_id = users.id AND members.requested_at IS NULL')
.where(users: { id: active_user_ids(from_id, to_id) })
.group('users.id')
.to_sql
end
def active_user_ids(from_id, to_id)
User.active.where(users: { id: from_id..to_id }).pluck(:id)
end
def execute(sql)
@connection ||= ActiveRecord::Base.connection
@connection.execute(sql)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::BackgroundMigration::PopulateUserHighestRolesTable, schema: 20200311130802 do
let(:members) { table(:members) }
let(:users) { table(:users) }
let(:user_highest_roles) { table(:user_highest_roles) }
def create_user(id, params = {})
user_params = {
id: id,
state: 'active',
user_type: nil,
bot_type: nil,
ghost: nil,
email: "user#{id}@example.com",
projects_limit: 0
}.merge(params)
users.create(user_params)
end
def create_member(id, access_level, params = {})
params = {
user_id: id,
access_level: access_level,
source_id: 1,
source_type: 'Group',
notification_level: 0
}.merge(params)
members.create(params)
end
before do
create_user(1)
create_user(2, state: 'blocked')
create_user(3, user_type: 2)
create_user(4)
create_user(5, bot_type: 1)
create_user(6, ghost: true)
create_user(7, ghost: false)
create_user(8)
create_member(1, 40)
create_member(7, 30)
create_member(8, 20, requested_at: Time.current)
user_highest_roles.create(user_id: 1, highest_access_level: 50)
end
describe '#perform' do
it 'creates user_highest_roles rows according to users', :aggregate_failures do
expect { subject.perform(1, 8) }.to change(UserHighestRole, :count).from(1).to(4)
created_or_updated_rows = [
{ 'user_id' => 1, 'highest_access_level' => 40 },
{ 'user_id' => 4, 'highest_access_level' => nil },
{ 'user_id' => 7, 'highest_access_level' => 30 },
{ 'user_id' => 8, 'highest_access_level' => nil }
]
rows = user_highest_roles.order(:user_id).map do |row|
row.attributes.slice('user_id', 'highest_access_level')
end
expect(rows).to match_array(created_or_updated_rows)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20200311130802_schedule_populate_user_highest_roles_table.rb')
describe SchedulePopulateUserHighestRolesTable do
let(:users) { table(:users) }
def create_user(id, params = {})
user_params = {
id: id,
state: 'active',
user_type: nil,
bot_type: nil,
ghost: nil,
email: "user#{id}@example.com",
projects_limit: 0
}.merge(params)
users.create!(user_params)
end
it 'correctly schedules background migrations' do
create_user(1)
create_user(2, state: 'blocked')
create_user(3, user_type: 2)
create_user(4)
create_user(5, bot_type: 1)
create_user(6, ghost: true)
create_user(7, ghost: false)
stub_const("#{described_class.name}::BATCH_SIZE", 2)
Sidekiq::Testing.fake! do
Timecop.freeze do
migrate!
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(5.minutes, 1, 4)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(10.minutes, 7, 7)
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment