Commit 42661f03 authored by Alex Pooley's avatar Alex Pooley

Define a namespace traversal cache

1. Add a traversal_ids array column to the namespaces table.
2. Define Traversal::Hierarchy to sync traversal_ids using recursive
traversal methods.
parent 4f7ccca0
# frozen_string_literal: true
#
# A Namespace::TraversalHierarchy is the collection of namespaces that descend
# from a root Namespace as defined by the Namespace#traversal_ids attributes.
#
# This class provides operations to be performed on the hierarchy itself,
# rather than individual namespaces.
#
# This includes methods for synchronizing traversal_ids attributes to a correct
# state. We use recursive methods to determine the correct state so we don't
# have to depend on the integrity of the traversal_ids attribute values
# themselves.
#
class Namespace
class TraversalHierarchy
attr_accessor :root
def self.for_namespace(namespace)
new(recursive_root_ancestor(namespace))
end
def initialize(root)
raise StandardError.new('Must specify a root node') if root.parent_id
@root = root
end
# Update all traversal_ids in the current namespace hierarchy.
def sync_traversal_ids!
# An issue in Rails since 2013 prevents this kind of join based update in
# ActiveRecord. https://github.com/rails/rails/issues/13496
# Ideally it would be:
# `incorrect_traversal_ids.update_all('traversal_ids = cte.traversal_ids')`
sql = """
UPDATE namespaces
SET traversal_ids = cte.traversal_ids
FROM (#{recursive_traversal_ids}) as cte
WHERE namespaces.id = cte.id
AND namespaces.traversal_ids <> cte.traversal_ids
"""
Namespace.connection.exec_query(sql)
end
# Identify all incorrect traversal_ids in the current namespace hierarchy.
def incorrect_traversal_ids
Namespace
.joins("INNER JOIN (#{recursive_traversal_ids}) as cte ON namespaces.id = cte.id")
.where('namespaces.traversal_ids <> cte.traversal_ids')
end
private
# Determine traversal_ids for the namespace hierarchy using recursive methods.
# Generate a collection of [id, traversal_ids] rows.
#
# Note that the traversal_ids represent a calculated traversal path for the
# namespace and not the value stored within the traversal_ids attribute.
def recursive_traversal_ids
root_id = Integer(@root.id)
"""
WITH RECURSIVE cte(id, traversal_ids, cycle) AS (
VALUES(#{root_id}, ARRAY[#{root_id}], false)
UNION ALL
SELECT n.id, cte.traversal_ids || n.id, n.id = ANY(cte.traversal_ids)
FROM namespaces n, cte
WHERE n.parent_id = cte.id AND NOT cycle
)
SELECT id, traversal_ids FROM cte
"""
end
# This is essentially Namespace#root_ancestor which will soon be rewritten
# to use traversal_ids. We replicate here as a reliable way to find the
# root using recursive methods.
def self.recursive_root_ancestor(namespace)
Gitlab::ObjectHierarchy
.new(Namespace.where(id: namespace))
.base_and_ancestors
.reorder(nil)
.find_by(parent_id: nil)
end
end
end
---
title: Define a namespace traversal cache
merge_request: 35713
author:
type: performance
# frozen_string_literal: true
class AddTraversalIdsToNamespaces < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def up
with_lock_retries do
add_column :namespaces, :traversal_ids, :integer, array: true, default: [], null: false
end
end
def down
with_lock_retries do
remove_column :namespaces, :traversal_ids
end
end
end
......@@ -13099,7 +13099,8 @@ CREATE TABLE public.namespaces (
max_personal_access_token_lifetime integer,
push_rule_id bigint,
shared_runners_enabled boolean DEFAULT true NOT NULL,
allow_descendants_override_disabled_shared_runners boolean DEFAULT false NOT NULL
allow_descendants_override_disabled_shared_runners boolean DEFAULT false NOT NULL,
traversal_ids integer[] DEFAULT '{}'::integer[] NOT NULL
);
CREATE SEQUENCE public.namespaces_id_seq
......@@ -23531,6 +23532,7 @@ COPY "schema_migrations" (version) FROM STDIN;
20200608075553
20200608214008
20200609002841
20200609012539
20200609142506
20200609142507
20200609142508
......
......@@ -29,5 +29,35 @@ FactoryBot.define do
trait :with_root_storage_statistics do
association :root_storage_statistics, factory: :namespace_root_storage_statistics
end
# Construct a hierarchy underneath the namespace.
# Each namespace will have `children` amount of children,
# and `depth` levels of descendants.
trait :with_hierarchy do
transient do
children { 4 }
depth { 4 }
end
after(:create) do |namespace, evaluator|
def create_graph(parent: nil, children: 4, depth: 4)
return unless depth > 1
children.times do
factory_name = parent.model_name.singular
child = FactoryBot.create(factory_name, parent: parent)
create_graph(parent: child, children: children, depth: depth - 1)
end
parent
end
create_graph(
parent: namespace,
children: evaluator.children,
depth: evaluator.depth
)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Namespace::TraversalHierarchy, type: :model do
let_it_be(:root, reload: true) { create(:namespace, :with_hierarchy) }
describe '.for_namespace' do
let(:hierarchy) { described_class.for_namespace(namespace) }
context 'with root group' do
let(:namespace) { root }
it { expect(hierarchy.root).to eq root }
end
context 'with child group' do
let(:namespace) { root.children.first.children.first }
it { expect(hierarchy.root).to eq root }
end
context 'with group outside of hierarchy' do
let(:namespace) { create(:namespace) }
it { expect(hierarchy.root).not_to eq root }
end
end
describe '.new' do
let(:hierarchy) { described_class.new(namespace) }
context 'with root group' do
let(:namespace) { root }
it { expect(hierarchy.root).to eq root }
end
context 'with child group' do
let(:namespace) { root.children.first }
it { expect { hierarchy }.to raise_error(StandardError, 'Must specify a root node') }
end
end
describe '#incorrect_traversal_ids' do
subject { described_class.new(root).incorrect_traversal_ids }
it { is_expected.to match_array Namespace.all }
end
describe '#sync_traversal_ids!' do
let(:hierarchy) { described_class.new(root) }
before do
hierarchy.sync_traversal_ids!
root.reload
end
it_behaves_like 'hierarchy with traversal_ids'
it { expect(hierarchy.incorrect_traversal_ids).to be_empty }
end
end
# frozen_string_literal: true
RSpec.shared_examples 'hierarchy with traversal_ids' do
# A convenient null node to represent the parent of root.
let(:null_node) { double(traversal_ids: []) }
# Walk the tree to assert that the current_node's traversal_id is always
# present and equal to it's parent's traversal_ids plus it's own ID.
def validate_traversal_ids(current_node, parent = null_node)
expect(current_node.traversal_ids).to be_present
expect(current_node.traversal_ids).to eq parent.traversal_ids + [current_node.id]
current_node.children.each do |child|
validate_traversal_ids(child, current_node)
end
end
it 'will be valid' do
validate_traversal_ids(root)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment