Commit 92b2c74c authored by Yorick Peterse's avatar Yorick Peterse

Refresh project authorizations using a Redis lease

When I proposed using serializable transactions I was hoping we would be
able to refresh data of individual users concurrently. Unfortunately
upon closer inspection it was revealed this was not the case. This could
result in a lot of queries failing due to serialization errors,
overloading the database in the process (given enough workers trying to
update the target table).

To work around this we're now using a Redis lease that is cancelled upon
completion. This ensures we can update the data of different users
concurrently without overloading the database.

The code will try to obtain the lease until it succeeds, waiting at
least 1 second between retries. This is necessary as we may otherwise
end up _not_ updating the data which is not an option.
parent 3943e632
...@@ -445,27 +445,21 @@ class User < ActiveRecord::Base ...@@ -445,27 +445,21 @@ class User < ActiveRecord::Base
end end
def refresh_authorized_projects def refresh_authorized_projects
loop do transaction do
begin project_authorizations.delete_all
Gitlab::Database.serialized_transaction do
project_authorizations.delete_all # project_authorizations_union can return multiple records for the same
# project/user with different access_level so we take row with the maximum
# project_authorizations_union can return multiple records for the same project/user with # access_level
# different access_level so we take row with the maximum access_level project_authorizations.connection.execute <<-SQL
project_authorizations.connection.execute <<-SQL INSERT INTO project_authorizations (user_id, project_id, access_level)
INSERT INTO project_authorizations (user_id, project_id, access_level) SELECT user_id, project_id, MAX(access_level) AS access_level
SELECT user_id, project_id, MAX(access_level) AS access_level FROM (#{project_authorizations_union.to_sql}) sub
FROM (#{project_authorizations_union.to_sql}) sub GROUP BY user_id, project_id
GROUP BY user_id, project_id SQL
SQL
unless authorized_projects_populated
update_column(:authorized_projects_populated, true) unless authorized_projects_populated update_column(:authorized_projects_populated, true)
end
break
# In the event of a concurrent modification Rails raises StatementInvalid.
# In this case we want to keep retrying until the transaction succeeds
rescue ActiveRecord::StatementInvalid
end end
end end
end end
......
...@@ -2,14 +2,33 @@ class AuthorizedProjectsWorker ...@@ -2,14 +2,33 @@ class AuthorizedProjectsWorker
include Sidekiq::Worker include Sidekiq::Worker
include DedicatedSidekiqQueue include DedicatedSidekiqQueue
LEASE_TIMEOUT = 1.minute.to_i
def self.bulk_perform_async(args_list) def self.bulk_perform_async(args_list)
Sidekiq::Client.push_bulk('class' => self, 'args' => args_list) Sidekiq::Client.push_bulk('class' => self, 'args' => args_list)
end end
def perform(user_id) def perform(user_id)
user = User.find_by(id: user_id) user = User.find_by(id: user_id)
return unless user
user.refresh_authorized_projects refresh(user) if user
end
def refresh(user)
lease_key = "refresh_authorized_projects:#{user.id}"
lease = Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT)
until uuid = lease.try_obtain
# Keep trying until we obtain the lease. If we don't do so we may end up
# not updating the list of authorized projects properly. To prevent
# hammering Redis too much we'll wait for a bit between retries.
sleep(1)
end
begin
user.refresh_authorized_projects
ensure
Gitlab::ExclusiveLease.cancel(lease_key, uuid)
end
end end
end end
---
title: Use a Redis lease for updating authorized projects
merge_request: 7733
author:
require 'sidekiq/testing' require 'sidekiq/testing'
require './db/fixtures/support/serialized_transaction'
Sidekiq::Testing.inline! do Sidekiq::Testing.inline! do
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
......
require 'sidekiq/testing' require 'sidekiq/testing'
require './db/fixtures/support/serialized_transaction'
Sidekiq::Testing.inline! do Sidekiq::Testing.inline! do
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
......
require 'sidekiq/testing' require 'sidekiq/testing'
require './spec/support/test_env' require './spec/support/test_env'
require './db/fixtures/support/serialized_transaction'
class Gitlab::Seeder::CycleAnalytics class Gitlab::Seeder::CycleAnalytics
def initialize(project, perf: false) def initialize(project, perf: false)
......
require 'gitlab/database'
module Gitlab
module Database
def self.serialized_transaction
connection.transaction { yield }
end
end
end
...@@ -35,13 +35,6 @@ module Gitlab ...@@ -35,13 +35,6 @@ module Gitlab
order order
end end
def self.serialized_transaction
opts = {}
opts[:isolation] = :serializable unless Rails.env.test? && connection.transaction_open?
connection.transaction(opts) { yield }
end
def self.random def self.random
Gitlab::Database.postgresql? ? "RANDOM()" : "RAND()" Gitlab::Database.postgresql? ? "RANDOM()" : "RAND()"
end end
......
...@@ -1349,4 +1349,31 @@ describe User, models: true do ...@@ -1349,4 +1349,31 @@ describe User, models: true do
expect(projects).to be_empty expect(projects).to be_empty
end end
end end
describe '#refresh_authorized_projects', redis: true do
let(:project1) { create(:empty_project) }
let(:project2) { create(:empty_project) }
let(:user) { create(:user) }
before do
project1.team << [user, :reporter]
project2.team << [user, :guest]
user.project_authorizations.delete_all
user.refresh_authorized_projects
end
it 'refreshes the list of authorized projects' do
expect(user.project_authorizations.count).to eq(2)
end
it 'sets the authorized_projects_populated column' do
expect(user.authorized_projects_populated).to eq(true)
end
it 'stores the correct access levels' do
expect(user.project_authorizations.where(access_level: Gitlab::Access::GUEST).exists?).to eq(true)
expect(user.project_authorizations.where(access_level: Gitlab::Access::REPORTER).exists?).to eq(true)
end
end
end end
require 'spec_helper' require 'spec_helper'
describe AuthorizedProjectsWorker do describe AuthorizedProjectsWorker do
let(:worker) { described_class.new }
describe '#perform' do describe '#perform' do
it "refreshes user's authorized projects" do it "refreshes user's authorized projects" do
user = create(:user) user = create(:user)
expect(User).to receive(:find_by).with(id: user.id).and_return(user) expect(worker).to receive(:refresh).with(an_instance_of(User))
expect(user).to receive(:refresh_authorized_projects)
described_class.new.perform(user.id) worker.perform(user.id)
end end
context "when user is not found" do context "when the user is not found" do
it "does nothing" do it "does nothing" do
expect_any_instance_of(User).not_to receive(:refresh_authorized_projects) expect(worker).not_to receive(:refresh)
described_class.new.perform(999_999) described_class.new.perform(-1)
end end
end end
end end
describe '#refresh', redis: true do
it 'refreshes the authorized projects of the user' do
user = create(:user)
expect(user).to receive(:refresh_authorized_projects)
worker.refresh(user)
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment