Commit f73193c3 authored by Yorick Peterse's avatar Yorick Peterse

Smarter refreshing of authorized projects

Prior to this commit the refreshing of authorized projects was done in
two steps:

1. Remove existing authorizations
2. Insert a new list of all authorizations

This can lead to a high amount of dead tuples as every time all rows are
being replaced. For example, if a user with 100 authorizations is given
access to a new project this would lead to:

* 100 rows being removed
* 101 new rows being inserted

This commit changes the way this system works so it only removes/inserts
what is necessary. Using the above example this would lead to only 1 new
row being inserted, with the initial 100 being left untouched.

Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/25257
parent a50cd9eb
......@@ -5,4 +5,17 @@ class ProjectAuthorization < ActiveRecord::Base
validates :project, presence: true
validates :access_level, inclusion: { in: Gitlab::Access.all_values }, presence: true
validates :user, uniqueness: { scope: [:project, :access_level] }, presence: true
def self.insert_authorizations(rows, per_batch = 1000)
rows.each_slice(per_batch) do |slice|
tuples = slice.map do |tuple|
tuple.map { |value| connection.quote(value) }
end
connection.execute <<-EOF.strip_heredoc
INSERT INTO project_authorizations (user_id, project_id, access_level)
VALUES #{tuples.map { |tuple| "(#{tuple.join(', ')})" }.join(', ')}
EOF
end
end
end
......@@ -443,24 +443,18 @@ class User < ActiveRecord::Base
end
def refresh_authorized_projects
transaction do
project_authorizations.delete_all
# project_authorizations_union can return multiple records for the same
# project/user with different access_level so we take row with the maximum
# access_level
project_authorizations.connection.execute <<-SQL
INSERT INTO project_authorizations (user_id, project_id, access_level)
SELECT user_id, project_id, MAX(access_level) AS access_level
FROM (#{project_authorizations_union.to_sql}) sub
GROUP BY user_id, project_id
SQL
Users::RefreshAuthorizedProjectsService.new(self).execute
end
def remove_project_authorizations(project_ids)
project_authorizations.where(id: project_ids).delete_all
end
def set_authorized_projects_column
unless authorized_projects_populated
update_column(:authorized_projects_populated, true)
end
end
end
def authorized_projects(min_access_level = nil)
refresh_authorized_projects unless authorized_projects_populated
......@@ -905,18 +899,6 @@ class User < ActiveRecord::Base
private
# Returns a union query of projects that the user is authorized to access
def project_authorizations_union
relations = [
personal_projects.select("#{id} AS user_id, projects.id AS project_id, #{Gitlab::Access::MASTER} AS access_level"),
groups_projects.select_for_project_authorization,
projects.select_for_project_authorization,
groups.joins(:shared_projects).select_for_project_authorization
]
Gitlab::SQL::Union.new(relations)
end
def ci_projects_union
scope = { access_level: [Gitlab::Access::MASTER, Gitlab::Access::OWNER] }
groups = groups_projects.where(members: scope)
......
module Users
# Service for refreshing the authorized projects of a user.
#
# This particular service class can not be used to update data for the same
# user concurrently. Doing so could lead to an incorrect state. To ensure this
# doesn't happen a caller must synchronize access (e.g. using
# `Gitlab::ExclusiveLease`).
#
# Usage:
#
# user = User.find_by(username: 'alice')
# service = Users::RefreshAuthorizedProjectsService.new(some_user)
# service.execute
class RefreshAuthorizedProjectsService
attr_reader :user
LEASE_TIMEOUT = 1.minute.to_i
# user - The User for which to refresh the authorized projects.
def initialize(user)
@user = user
# We need an up to date User object that has access to all relations that
# may have been created earlier. The only way to ensure this is to reload
# the User object.
user.reload
end
# This method returns the updated User object.
def execute
current = current_authorizations_per_project
fresh = fresh_access_levels_per_project
remove = current.each_with_object([]) do |(project_id, row), array|
# rows not in the new list or with a different access level should be
# removed.
if !fresh[project_id] || fresh[project_id] != row.access_level
array << row.id
end
end
add = fresh.each_with_object([]) do |(project_id, level), array|
# rows not in the old list or with a different access level should be
# added.
if !current[project_id] || current[project_id].access_level != level
array << [user.id, project_id, level]
end
end
update_with_lease(remove, add)
end
# Updates the list of authorizations using an exclusive lease.
def update_with_lease(remove = [], add = [])
lease_key = "refresh_authorized_projects:#{user.id}"
lease = Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT)
until uuid = lease.try_obtain
# Keep trying until we obtain the lease. If we don't do so we may end up
# not updating the list of authorized projects properly. To prevent
# hammering Redis too much we'll wait for a bit between retries.
sleep(1)
end
begin
update_authorizations(remove, add)
ensure
Gitlab::ExclusiveLease.cancel(lease_key, uuid)
end
end
# Updates the list of authorizations for the current user.
#
# remove - The IDs of the authorization rows to remove.
# add - Rows to insert in the form `[user id, project id, access level]`
def update_authorizations(remove = [], add = [])
return if remove.empty? && add.empty?
User.transaction do
user.remove_project_authorizations(remove) unless remove.empty?
ProjectAuthorization.insert_authorizations(add) unless add.empty?
user.set_authorized_projects_column
end
# Since we batch insert authorization rows, Rails' associations may get
# out of sync. As such we force a reload of the User object.
user.reload
end
def fresh_access_levels_per_project
fresh_authorizations.each_with_object({}) do |row, hash|
hash[row.project_id] = row.access_level
end
end
def current_authorizations_per_project
current_authorizations.each_with_object({}) do |row, hash|
hash[row.project_id] = row
end
end
def current_authorizations
user.project_authorizations.select(:id, :project_id, :access_level)
end
def fresh_authorizations
ProjectAuthorization.
unscoped.
select('project_id, MAX(access_level) AS access_level').
from("(#{project_authorizations_union.to_sql}) #{ProjectAuthorization.table_name}").
group(:project_id)
end
private
# Returns a union query of projects that the user is authorized to access
def project_authorizations_union
relations = [
user.personal_projects.select("#{user.id} AS user_id, projects.id AS project_id, #{Gitlab::Access::MASTER} AS access_level"),
user.groups_projects.select_for_project_authorization,
user.projects.select_for_project_authorization,
user.groups.joins(:shared_projects).select_for_project_authorization
]
Gitlab::SQL::Union.new(relations)
end
end
end
......@@ -2,8 +2,6 @@ class AuthorizedProjectsWorker
include Sidekiq::Worker
include DedicatedSidekiqQueue
LEASE_TIMEOUT = 1.minute.to_i
def self.bulk_perform_async(args_list)
Sidekiq::Client.push_bulk('class' => self, 'args' => args_list)
end
......@@ -11,24 +9,6 @@ class AuthorizedProjectsWorker
def perform(user_id)
user = User.find_by(id: user_id)
refresh(user) if user
end
def refresh(user)
lease_key = "refresh_authorized_projects:#{user.id}"
lease = Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT)
until uuid = lease.try_obtain
# Keep trying until we obtain the lease. If we don't do so we may end up
# not updating the list of authorized projects properly. To prevent
# hammering Redis too much we'll wait for a bit between retries.
sleep(1)
end
begin
user.refresh_authorized_projects
ensure
Gitlab::ExclusiveLease.cancel(lease_key, uuid)
end
user.refresh_authorized_projects if user
end
end
require 'spec_helper'
describe ProjectAuthorization do
let(:user) { create(:user) }
let(:project1) { create(:empty_project) }
let(:project2) { create(:empty_project) }
describe '.insert_authorizations' do
it 'inserts the authorizations' do
described_class.
insert_authorizations([[user.id, project1.id, Gitlab::Access::MASTER]])
expect(user.project_authorizations.count).to eq(1)
end
it 'inserts rows in batches' do
described_class.insert_authorizations([
[user.id, project1.id, Gitlab::Access::MASTER],
[user.id, project2.id, Gitlab::Access::MASTER],
], 1)
expect(user.project_authorizations.count).to eq(2)
end
end
end
require 'spec_helper'
describe Users::RefreshAuthorizedProjectsService do
let(:project) { create(:empty_project) }
let(:user) { project.namespace.owner }
let(:service) { described_class.new(user) }
def create_authorization(project, user, access_level = Gitlab::Access::MASTER)
ProjectAuthorization.
create!(project: project, user: user, access_level: access_level)
end
describe '#execute' do
before do
user.project_authorizations.delete_all
end
it 'updates the authorized projects of the user' do
project2 = create(:empty_project)
to_remove = create_authorization(project2, user)
expect(service).to receive(:update_with_lease).
with([to_remove.id], [[user.id, project.id, Gitlab::Access::MASTER]])
service.execute
end
it 'sets the access level of a project to the highest available level' do
to_remove = create_authorization(project, user, Gitlab::Access::DEVELOPER)
expect(service).to receive(:update_with_lease).
with([to_remove.id], [[user.id, project.id, Gitlab::Access::MASTER]])
service.execute
end
it 'returns a User' do
expect(service.execute).to be_an_instance_of(User)
end
end
describe '#update_with_lease', :redis do
it 'refreshes the authorizations using a lease' do
expect_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain).
and_return('foo')
expect(Gitlab::ExclusiveLease).to receive(:cancel).
with(an_instance_of(String), 'foo')
expect(service).to receive(:update_authorizations).with([1], [])
service.update_with_lease([1])
end
end
describe '#update_authorizations' do
it 'does nothing when there are no rows to add and remove' do
expect(user).not_to receive(:remove_project_authorizations)
expect(ProjectAuthorization).not_to receive(:insert_authorizations)
expect(user).not_to receive(:set_authorized_projects_column)
service.update_authorizations([], [])
end
it 'removes authorizations that should be removed' do
authorization = create_authorization(project, user)
service.update_authorizations([authorization.id])
expect(user.project_authorizations).to be_empty
end
it 'inserts authorizations that should be added' do
service.update_authorizations([], [[user.id, project.id, Gitlab::Access::MASTER]])
authorizations = user.project_authorizations
expect(authorizations.length).to eq(1)
expect(authorizations[0].user_id).to eq(user.id)
expect(authorizations[0].project_id).to eq(project.id)
expect(authorizations[0].access_level).to eq(Gitlab::Access::MASTER)
end
it 'populates the authorized projects column' do
# make sure we start with a nil value no matter what the default in the
# factory may be.
user.update(authorized_projects_populated: nil)
service.update_authorizations([], [[user.id, project.id, Gitlab::Access::MASTER]])
expect(user.authorized_projects_populated).to eq(true)
end
end
describe '#fresh_access_levels_per_project' do
let(:hash) { service.fresh_access_levels_per_project }
it 'returns a Hash' do
expect(hash).to be_an_instance_of(Hash)
end
it 'sets the keys to the project IDs' do
expect(hash.keys).to eq([project.id])
end
it 'sets the values to the access levels' do
expect(hash.values).to eq([Gitlab::Access::MASTER])
end
end
describe '#current_authorizations_per_project' do
before { create_authorization(project, user) }
let(:hash) { service.current_authorizations_per_project }
it 'returns a Hash' do
expect(hash).to be_an_instance_of(Hash)
end
it 'sets the keys to the project IDs' do
expect(hash.keys).to eq([project.id])
end
it 'sets the values to the project authorization rows' do
expect(hash.values).to eq([ProjectAuthorization.first])
end
end
describe '#current_authorizations' do
context 'without authorizations' do
it 'returns an empty list' do
expect(service.current_authorizations.empty?).to eq(true)
end
end
context 'with an authorization' do
before { create_authorization(project, user) }
let(:row) { service.current_authorizations.take }
it 'returns the currently authorized projects' do
expect(service.current_authorizations.length).to eq(1)
end
it 'includes the row ID for every row' do
expect(row.id).to be_a_kind_of(Numeric)
end
it 'includes the project ID for every row' do
expect(row.project_id).to eq(project.id)
end
it 'includes the access level for every row' do
expect(row.access_level).to eq(Gitlab::Access::MASTER)
end
end
end
describe '#fresh_authorizations' do
it 'returns the new authorized projects' do
expect(service.fresh_authorizations.length).to eq(1)
end
it 'returns the highest access level' do
project.team.add_guest(user)
rows = service.fresh_authorizations.to_a
expect(rows.length).to eq(1)
expect(rows.first.access_level).to eq(Gitlab::Access::MASTER)
end
context 'every returned row' do
let(:row) { service.fresh_authorizations.take }
it 'includes the project ID' do
expect(row.project_id).to eq(project.id)
end
it 'includes the access level' do
expect(row.access_level).to eq(Gitlab::Access::MASTER)
end
end
end
end
......@@ -7,27 +7,17 @@ describe AuthorizedProjectsWorker do
it "refreshes user's authorized projects" do
user = create(:user)
expect(worker).to receive(:refresh).with(an_instance_of(User))
expect_any_instance_of(User).to receive(:refresh_authorized_projects)
worker.perform(user.id)
end
context "when the user is not found" do
it "does nothing" do
expect(worker).not_to receive(:refresh)
expect_any_instance_of(User).not_to receive(:refresh_authorized_projects)
described_class.new.perform(-1)
end
end
end
describe '#refresh', redis: true do
it 'refreshes the authorized projects of the user' do
user = create(:user)
expect(user).to receive(:refresh_authorized_projects)
worker.refresh(user)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment