Commit 960b6409 authored by Thong Kuah's avatar Thong Kuah

Merge branch '300377-traversal-ids-column-sync' into 'master'

Resolve "Traversal IDs column sync" [RUN ALL RSPEC] [RUN AS-IF-FOSS]

See merge request gitlab-org/gitlab!52854
parents 32b86686 7fa6bcd8
......@@ -13,6 +13,7 @@ class Namespace < ApplicationRecord
include Gitlab::Utils::StrongMemoize
include IgnorableColumns
include Namespaces::Traversal::Recursive
include Namespaces::Traversal::Linear
# Prevent users from creating unreasonably deep level of nesting.
# The number 20 was taken based on maximum nesting level of
......
......@@ -34,17 +34,20 @@ class Namespace
sql = """
UPDATE namespaces
SET traversal_ids = cte.traversal_ids
FROM (#{recursive_traversal_ids}) as cte
FROM (#{recursive_traversal_ids(lock: true)}) as cte
WHERE namespaces.id = cte.id
AND namespaces.traversal_ids <> cte.traversal_ids
"""
Namespace.connection.exec_query(sql)
rescue ActiveRecord::Deadlocked
db_deadlock_counter.increment(source: 'Namespace#sync_traversal_ids!')
raise
end
# Identify all incorrect traversal_ids in the current namespace hierarchy.
def incorrect_traversal_ids
def incorrect_traversal_ids(lock: false)
Namespace
.joins("INNER JOIN (#{recursive_traversal_ids}) as cte ON namespaces.id = cte.id")
.joins("INNER JOIN (#{recursive_traversal_ids(lock: lock)}) as cte ON namespaces.id = cte.id")
.where('namespaces.traversal_ids <> cte.traversal_ids')
end
......@@ -55,10 +58,13 @@ class Namespace
#
# Note that the traversal_ids represent a calculated traversal path for the
# namespace and not the value stored within the traversal_ids attribute.
def recursive_traversal_ids
#
# Optionally locked with FOR UPDATE to ensure isolation between concurrent
# updates of the heirarchy.
def recursive_traversal_ids(lock: false)
root_id = Integer(@root.id)
"""
sql = <<~SQL
WITH RECURSIVE cte(id, traversal_ids, cycle) AS (
VALUES(#{root_id}, ARRAY[#{root_id}], false)
UNION ALL
......@@ -67,7 +73,11 @@ class Namespace
WHERE n.parent_id = cte.id AND NOT cycle
)
SELECT id, traversal_ids FROM cte
"""
SQL
sql += ' FOR UPDATE' if lock
sql
end
# This is essentially Namespace#root_ancestor which will soon be rewritten
......@@ -80,5 +90,9 @@ class Namespace
.reorder(nil)
.find_by(parent_id: nil)
end
def db_deadlock_counter
Gitlab::Metrics.counter(:db_deadlock, 'Counts the times we have deadlocked in the database')
end
end
end
# frozen_string_literal: true
#
# Query a recursively defined namespace hierarchy using linear methods through
# the traversal_ids attribute.
#
# Namespace is a nested hierarchy of one parent to many children. A search
# using only the parent-child relationships is a slow operation. This process
# was previously optimized using Postgresql recursive common table expressions
# (CTE) with acceptable performance. However, it lead to slower than possible
# performance, and resulted in complicated queries that were difficult to make
# performant.
#
# Instead of searching the hierarchy recursively, we store a `traversal_ids`
# attribute on each node. The `traversal_ids` is an ordered array of Namespace
# IDs that define the traversal path from the root Namespace to the current
# Namespace.
#
# For example, suppose we have the following Namespaces:
#
# GitLab (id: 1) > Engineering (id: 2) > Manage (id: 3) > Access (id: 4)
#
# Then `traversal_ids` for group "Access" is [1, 2, 3, 4]
#
# And we can match against other Namespace `traversal_ids` such that:
#
# - Ancestors are [1], [1, 2], [1, 2, 3]
# - Descendants are [1, 2, 3, 4, *]
# - Root is [1]
# - Hierarchy is [1, *]
#
# Note that this search method works so long as the IDs are unique and the
# traversal path is ordered from root to leaf nodes.
#
# We implement this in the database using Postgresql arrays, indexed by a
# generalized inverted index (gin).
module Namespaces
module Traversal
module Linear
extend ActiveSupport::Concern
included do
after_create :sync_traversal_ids, if: -> { sync_traversal_ids? }
after_update :sync_traversal_ids, if: -> { sync_traversal_ids? && saved_change_to_parent_id? }
end
def sync_traversal_ids?
Feature.enabled?(:sync_traversal_ids, root_ancestor, default_enabled: :yaml)
end
private
# Update the traversal_ids for the full hierarchy.
#
# NOTE: self.traversal_ids will be stale. Reload for a fresh record.
def sync_traversal_ids
# Clear any previously memoized root_ancestor as our ancestors have changed.
clear_memoization(:root_ancestor)
Namespace::TraversalHierarchy.for_namespace(root_ancestor).sync_traversal_ids!
end
end
end
end
......@@ -6,11 +6,15 @@ module Namespaces
extend ActiveSupport::Concern
def root_ancestor
return self if persisted? && parent_id.nil?
return self if parent.nil?
if persisted?
strong_memoize(:root_ancestor) do
self_and_ancestors.reorder(nil).find_by(parent_id: nil)
end
else
parent.root_ancestor
end
end
# Returns all ancestors, self, and descendants of the current namespace.
......
---
title: Cache namespace traversal path
merge_request: 52854
author:
type: performance
---
name: sync_traversal_ids
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/52854
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/300377
group: group::access
type: development
default_enabled: false
......@@ -546,8 +546,13 @@ RSpec.describe EpicsFinder do
expect(subject).to match_array([base_epic1, base_epic2, private_epic1, private_epic2, public_epic1, public_epic2])
end
it 'does not execute more than 5 SQL queries' do
expect { subject }.not_to exceed_all_query_limit(5)
it 'does not execute more than 6 SQL queries' do
normal_query_count = 5
# sync_traversal_ids feature flag has to query for root_ancestor.
ff_query_count = 1
total_count = normal_query_count + ff_query_count
expect { subject }.not_to exceed_all_query_limit(total_count)
end
it 'does not check permission for subgroups because user inherits permission' do
......
......@@ -123,22 +123,34 @@ RSpec.describe API::Namespaces do
create(:gitlab_subscription, namespace: group1, max_seats_used: 1, seats_in_use: 1)
end
it "avoids additional N+1 database queries" do
control = ActiveRecord::QueryRecorder.new(skip_cached: false) { get api("/namespaces", user) }
# We seem to have some N+1 queries.
# The saml_provider association adds one for each group (saml_provider is
# an association on group, not namespace).
# The route adds one for each namespace.
# And more...
context "avoids additional N+1 database queries" do
let(:control) { ActiveRecord::QueryRecorder.new(skip_cached: false) { get api("/namespaces", user) } }
before do
create(:gitlab_subscription, namespace: group2, max_seats_used: 2)
group2.add_guest(user)
group3 = create(:group)
create(:gitlab_subscription, namespace: group3, max_seats_used: 3)
group3.add_guest(user)
end
# We seem to have some N+1 queries.
# The saml_provider association adds one for each group (saml_provider is
# an association on group, not namespace).
# The route adds one for each namespace.
# And more...
expect { get api("/namespaces", user) }.not_to exceed_all_query_limit(control).with_threshold(7)
context 'traversal_sync_ids feature flag is false' do
before do
stub_feature_flags(sync_traversal_ids: false)
end
it { expect { get api("/namespaces", user) }.not_to exceed_all_query_limit(control).with_threshold(7) }
end
context 'traversal_sync_ids feature flag is true' do
it { expect { get api("/namespaces", user) }.not_to exceed_all_query_limit(control).with_threshold(8) }
end
end
it 'includes max_seats_used' do
......
......@@ -14,10 +14,10 @@ RSpec.describe 'view audit events' do
end
it 'avoids N+1 DB queries', :request_store do
control = ActiveRecord::QueryRecorder.new(skip_cached: false) { send_request }
create_list(:group, 3, parent: group)
control = ActiveRecord::QueryRecorder.new(skip_cached: false) { send_request }
expect { send_request }.not_to exceed_all_query_limit(control)
end
......
......@@ -3,6 +3,8 @@
require 'spec_helper'
RSpec.describe Group do
include ReloadHelpers
let!(:group) { create(:group) }
describe 'associations' do
......@@ -281,7 +283,7 @@ RSpec.describe Group do
end
describe '#two_factor_authentication_allowed' do
let_it_be(:group) { create(:group) }
let_it_be_with_reload(:group) { create(:group) }
context 'for a parent group' do
it 'is valid' do
......@@ -311,6 +313,120 @@ RSpec.describe Group do
end
end
context 'traversal_ids on create' do
context 'default traversal_ids' do
let(:group) { build(:group) }
before do
group.save!
group.reload
end
it { expect(group.traversal_ids).to eq [group.id] }
end
context 'has a parent' do
let(:parent) { create(:group) }
let(:group) { build(:group, parent: parent) }
before do
group.save!
reload_models(parent, group)
end
it { expect(parent.traversal_ids).to eq [parent.id] }
it { expect(group.traversal_ids).to eq [parent.id, group.id] }
end
context 'has a parent update before save' do
let(:parent) { create(:group) }
let(:group) { build(:group, parent: parent) }
let!(:new_grandparent) { create(:group) }
before do
parent.update!(parent: new_grandparent)
group.save!
reload_models(parent, group)
end
it 'avoid traversal_ids race condition' do
expect(parent.traversal_ids).to eq [new_grandparent.id, parent.id]
expect(group.traversal_ids).to eq [new_grandparent.id, parent.id, group.id]
end
end
end
context 'traversal_ids on update' do
context 'parent is updated' do
let(:new_parent) { create(:group) }
subject {group.update!(parent: new_parent, name: 'new name') }
it_behaves_like 'update on column', :traversal_ids
end
context 'parent is not updated' do
subject { group.update!(name: 'new name') }
it_behaves_like 'no update on column', :traversal_ids
end
end
context 'traversal_ids on ancestral update' do
context 'update multiple ancestors before save' do
let(:parent) { create(:group) }
let(:group) { create(:group, parent: parent) }
let!(:new_grandparent) { create(:group) }
let!(:new_parent) { create(:group) }
before do
group.parent = new_parent
new_parent.update!(parent: new_grandparent)
group.save!
reload_models(parent, group, new_grandparent, new_parent)
end
it 'avoids traversal_ids race condition' do
expect(parent.traversal_ids).to eq [parent.id]
expect(group.traversal_ids).to eq [new_grandparent.id, new_parent.id, group.id]
expect(new_grandparent.traversal_ids).to eq [new_grandparent.id]
expect(new_parent.traversal_ids).to eq [new_grandparent.id, new_parent.id]
end
end
context 'assigning a new parent' do
let!(:old_parent) { create(:group) }
let!(:new_parent) { create(:group) }
let!(:group) { create(:group, parent: old_parent) }
before do
group.update(parent: new_parent)
reload_models(old_parent, new_parent, group)
end
it 'updates traversal_ids' do
expect(group.traversal_ids).to eq [new_parent.id, group.id]
end
end
context 'assigning a new grandparent' do
let!(:old_grandparent) { create(:group) }
let!(:new_grandparent) { create(:group) }
let!(:parent_group) { create(:group, parent: old_grandparent) }
let!(:group) { create(:group, parent: parent_group) }
before do
parent_group.update(parent: new_grandparent)
end
it 'updates traversal_ids for all descendants' do
expect(parent_group.reload.traversal_ids).to eq [new_grandparent.id, parent_group.id]
expect(group.reload.traversal_ids).to eq [new_grandparent.id, parent_group.id, group.id]
end
end
end
describe '.without_integration' do
let(:another_group) { create(:group) }
let(:instance_integration) { build(:jira_service, :instance) }
......@@ -1838,24 +1954,28 @@ RSpec.describe Group do
end
end
def subject_and_reload(*models)
subject
models.map(&:reload)
end
describe '#update_shared_runners_setting!' do
context 'enabled' do
subject { group.update_shared_runners_setting!('enabled') }
context 'group that its ancestors have shared runners disabled' do
let_it_be(:parent) { create(:group, :shared_runners_disabled) }
let_it_be(:group) { create(:group, :shared_runners_disabled, parent: parent) }
let_it_be(:project) { create(:project, shared_runners_enabled: false, group: group) }
let_it_be(:parent, reload: true) { create(:group, :shared_runners_disabled) }
let_it_be(:group, reload: true) { create(:group, :shared_runners_disabled, parent: parent) }
let_it_be(:project, reload: true) { create(:project, shared_runners_enabled: false, group: group) }
it 'raises error and does not enable shared Runners' do
expect { subject_and_reload(parent, group, project) }
it 'raises exception' do
expect { subject }
.to raise_error(ActiveRecord::RecordInvalid, 'Validation failed: Shared runners enabled cannot be enabled because parent group has shared Runners disabled')
.and not_change { parent.shared_runners_enabled }
end
it 'does not enable shared runners' do
expect do
subject rescue nil
parent.reload
group.reload
project.reload
end.to not_change { parent.shared_runners_enabled }
.and not_change { group.shared_runners_enabled }
.and not_change { project.shared_runners_enabled }
end
......@@ -1941,13 +2061,21 @@ RSpec.describe Group do
end
context 'when parent does not allow' do
let_it_be(:parent) { create(:group, :shared_runners_disabled, allow_descendants_override_disabled_shared_runners: false ) }
let_it_be(:group) { create(:group, :shared_runners_disabled, allow_descendants_override_disabled_shared_runners: false, parent: parent) }
let_it_be(:parent, reload: true) { create(:group, :shared_runners_disabled, allow_descendants_override_disabled_shared_runners: false ) }
let_it_be(:group, reload: true) { create(:group, :shared_runners_disabled, allow_descendants_override_disabled_shared_runners: false, parent: parent) }
it 'raises error and does not allow descendants to override' do
expect { subject_and_reload(parent, group) }
it 'raises exception' do
expect { subject }
.to raise_error(ActiveRecord::RecordInvalid, 'Validation failed: Allow descendants override disabled shared runners cannot be enabled because parent group does not allow it')
.and not_change { parent.allow_descendants_override_disabled_shared_runners }
end
it 'does not allow descendants to override' do
expect do
subject rescue nil
parent.reload
group.reload
end.to not_change { parent.allow_descendants_override_disabled_shared_runners }
.and not_change { parent.shared_runners_enabled }
.and not_change { group.allow_descendants_override_disabled_shared_runners }
.and not_change { group.shared_runners_enabled }
......
......@@ -43,21 +43,63 @@ RSpec.describe Namespace::TraversalHierarchy, type: :model do
end
end
shared_examples 'locked update query' do
it 'locks query with FOR UPDATE' do
qr = ActiveRecord::QueryRecorder.new do
subject
end
expect(qr.count).to eq 1
expect(qr.log.first).to match /FOR UPDATE/
end
end
describe '#incorrect_traversal_ids' do
subject { described_class.new(root).incorrect_traversal_ids }
let!(:hierarchy) { described_class.new(root) }
subject { hierarchy.incorrect_traversal_ids }
before do
Namespace.update_all(traversal_ids: [])
end
it { is_expected.to match_array Namespace.all }
context 'when lock is true' do
subject { hierarchy.incorrect_traversal_ids(lock: true).load }
it_behaves_like 'locked update query'
end
end
describe '#sync_traversal_ids!' do
let(:hierarchy) { described_class.new(root) }
let!(:hierarchy) { described_class.new(root) }
subject { hierarchy.sync_traversal_ids! }
it { expect(hierarchy.incorrect_traversal_ids).to be_empty }
it_behaves_like 'hierarchy with traversal_ids'
it_behaves_like 'locked update query'
context 'when deadlocked' do
before do
hierarchy.sync_traversal_ids!
root.reload
connection_double = double(:connection)
allow(Namespace).to receive(:connection).and_return(connection_double)
allow(connection_double).to receive(:exec_query) { raise ActiveRecord::Deadlocked.new }
end
it_behaves_like 'hierarchy with traversal_ids'
it { expect(hierarchy.incorrect_traversal_ids).to be_empty }
it { expect { subject }.to raise_error(ActiveRecord::Deadlocked) }
it 'increment db_deadlock counter' do
expect { subject rescue nil }.to change { db_deadlock_total('Namespace#sync_traversal_ids!') }.by(1)
end
end
end
def db_deadlock_total(source)
Gitlab::Metrics
.counter(:db_deadlock, 'Counts the times we have deadlocked in the database')
.get(source: source)
end
end
......@@ -168,6 +168,20 @@ RSpec.describe Namespace do
describe 'inclusions' do
it { is_expected.to include_module(Gitlab::VisibilityLevel) }
it { is_expected.to include_module(Namespaces::Traversal::Recursive) }
it { is_expected.to include_module(Namespaces::Traversal::Linear) }
end
context 'traversal_ids on create' do
context 'default traversal_ids' do
let(:namespace) { build(:namespace) }
before do
namespace.save!
namespace.reload
end
it { expect(namespace.traversal_ids).to eq [namespace.id] }
end
end
describe 'callbacks' do
......@@ -1085,6 +1099,7 @@ RSpec.describe Namespace do
end
describe '#root_ancestor' do
context 'with persisted root group' do
let!(:root_group) { create(:group) }
it 'returns root_ancestor for root group without a query' do
......@@ -1103,6 +1118,26 @@ RSpec.describe Namespace do
end
end
context 'with not persisted root group' do
let!(:root_group) { build(:group) }
it 'returns root_ancestor for root group without a query' do
expect { root_group.root_ancestor }.not_to exceed_query_limit(0)
end
it 'returns the top most ancestor' do
nested_group = build(:group, parent: root_group)
deep_nested_group = build(:group, parent: nested_group)
very_deep_nested_group = build(:group, parent: deep_nested_group)
expect(root_group.root_ancestor).to eq(root_group)
expect(nested_group.root_ancestor).to eq(root_group)
expect(deep_nested_group.root_ancestor).to eq(root_group)
expect(very_deep_nested_group.root_ancestor).to eq(root_group)
end
end
end
describe '#full_path_before_last_save' do
context 'when the group has no parent' do
it 'returns the path before last save' do
......
# frozen_string_literal: true
module ReloadHelpers
def reload_models(*models)
models.map(&:reload)
end
def subject_and_reload(*models)
subject
reload_models(*models)
end
end
# frozen_string_literal: true
def update_column_regex(column)
/UPDATE.+SET.+#{column}[^=*]=.+FROM.*/m
end
RSpec.shared_examples 'update on column' do |column|
it "#{column} column updated" do
qr = ActiveRecord::QueryRecorder.new do
subject
end
expect(qr.log).to include a_string_matching update_column_regex(column)
end
end
RSpec.shared_examples 'no update on column' do |column|
it "#{column} column is not updated" do
qr = ActiveRecord::QueryRecorder.new do
subject
end
expect(qr.log).not_to include a_string_matching update_column_regex(column)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment