Commit 0ac6ffdd authored by Andreas Brandl's avatar Andreas Brandl

Merge branch 'ahegyi-make-hierarchcy-cte-distinct' into 'master'

Add DISTINCT to the CTE queries for hierarchies [RUN ALL RSPEC] [RUN AS-IF-FOSS]

See merge request gitlab-org/gitlab!56509
parents 57aa7792 a7038df0
...@@ -15,8 +15,7 @@ module Namespaces ...@@ -15,8 +15,7 @@ module Namespaces
# Returns all ancestors, self, and descendants of the current namespace. # Returns all ancestors, self, and descendants of the current namespace.
def self_and_hierarchy def self_and_hierarchy
Gitlab::ObjectHierarchy object_hierarchy(self.class.where(id: id))
.new(self.class.where(id: id))
.all_objects .all_objects
end end
...@@ -24,38 +23,38 @@ module Namespaces ...@@ -24,38 +23,38 @@ module Namespaces
def ancestors def ancestors
return self.class.none unless parent_id return self.class.none unless parent_id
Gitlab::ObjectHierarchy object_hierarchy(self.class.where(id: parent_id))
.new(self.class.where(id: parent_id))
.base_and_ancestors .base_and_ancestors
end end
# returns all ancestors upto but excluding the given namespace # returns all ancestors upto but excluding the given namespace
# when no namespace is given, all ancestors upto the top are returned # when no namespace is given, all ancestors upto the top are returned
def ancestors_upto(top = nil, hierarchy_order: nil) def ancestors_upto(top = nil, hierarchy_order: nil)
Gitlab::ObjectHierarchy.new(self.class.where(id: id)) object_hierarchy(self.class.where(id: id))
.ancestors(upto: top, hierarchy_order: hierarchy_order) .ancestors(upto: top, hierarchy_order: hierarchy_order)
end end
def self_and_ancestors(hierarchy_order: nil) def self_and_ancestors(hierarchy_order: nil)
return self.class.where(id: id) unless parent_id return self.class.where(id: id) unless parent_id
Gitlab::ObjectHierarchy object_hierarchy(self.class.where(id: id))
.new(self.class.where(id: id))
.base_and_ancestors(hierarchy_order: hierarchy_order) .base_and_ancestors(hierarchy_order: hierarchy_order)
end end
# Returns all the descendants of the current namespace. # Returns all the descendants of the current namespace.
def descendants def descendants
Gitlab::ObjectHierarchy object_hierarchy(self.class.where(parent_id: id))
.new(self.class.where(parent_id: id))
.base_and_descendants .base_and_descendants
end end
def self_and_descendants def self_and_descendants
Gitlab::ObjectHierarchy object_hierarchy(self.class.where(id: id))
.new(self.class.where(id: id))
.base_and_descendants .base_and_descendants
end end
def object_hierarchy(ancestors_base)
Gitlab::ObjectHierarchy.new(ancestors_base, options: { use_distinct: Feature.enabled?(:use_distinct_in_object_hierarchy, self) })
end
end end
end end
end end
---
name: use_distinct_in_object_hierarchy
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/56509
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/324644
milestone: '13.10'
type: development
group: group::optimize
default_enabled: false
...@@ -60,12 +60,27 @@ module Gitlab ...@@ -60,12 +60,27 @@ module Gitlab
# ancestor to most nested object respectively. This uses a `depth` column # ancestor to most nested object respectively. This uses a `depth` column
# where `1` is defined as the depth for the base and increment as we go up # where `1` is defined as the depth for the base and increment as we go up
# each parent. # each parent.
#
# Note: By default the order is breadth-first
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def base_and_ancestors(upto: nil, hierarchy_order: nil) def base_and_ancestors(upto: nil, hierarchy_order: nil)
recursive_query = base_and_ancestors_cte(upto, hierarchy_order).apply_to(model.all) if use_distinct?
recursive_query = recursive_query.order(depth: hierarchy_order) if hierarchy_order expose_depth = hierarchy_order.present?
hierarchy_order ||= :asc
read_only(recursive_query)
recursive_query = base_and_ancestors_cte(upto, hierarchy_order).apply_to(model.all).distinct
# if hierarchy_order is given, the calculated `depth` should be present in SELECT
if expose_depth
read_only(model.from(Arel::Nodes::As.new(recursive_query.arel, objects_table)).order(depth: hierarchy_order))
else
read_only(remove_depth_and_maintain_order(recursive_query, hierarchy_order: hierarchy_order))
end
else
recursive_query = base_and_ancestors_cte(upto, hierarchy_order).apply_to(model.all)
recursive_query = recursive_query.order(depth: hierarchy_order) if hierarchy_order
read_only(recursive_query)
end
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
...@@ -74,9 +89,22 @@ module Gitlab ...@@ -74,9 +89,22 @@ module Gitlab
# #
# When `with_depth` is `true`, a `depth` column is included where it starts with `1` for the base objects # When `with_depth` is `true`, a `depth` column is included where it starts with `1` for the base objects
# and incremented as we go down the descendant tree # and incremented as we go down the descendant tree
# rubocop: disable CodeReuse/ActiveRecord
def base_and_descendants(with_depth: false) def base_and_descendants(with_depth: false)
read_only(base_and_descendants_cte(with_depth: with_depth).apply_to(model.all)) if use_distinct?
# Always calculate `depth`, remove it later if with_depth is false
base_cte = base_and_descendants_cte(with_depth: true).apply_to(model.all).distinct
if with_depth
read_only(model.from(Arel::Nodes::As.new(recursive_query.arel, objects_table)).order(depth: :asc))
else
read_only(remove_depth_and_maintain_order(base_cte, hierarchy_order: :asc))
end
else
read_only(base_and_descendants_cte(with_depth: with_depth).apply_to(model.all))
end
end end
# rubocop: enable CodeReuse/ActiveRecord
# Returns a relation that includes the base objects, their ancestors, # Returns a relation that includes the base objects, their ancestors,
# and the descendants of the base objects. # and the descendants of the base objects.
...@@ -108,13 +136,21 @@ module Gitlab ...@@ -108,13 +136,21 @@ module Gitlab
ancestors_table = ancestors.alias_to(objects_table) ancestors_table = ancestors.alias_to(objects_table)
descendants_table = descendants.alias_to(objects_table) descendants_table = descendants.alias_to(objects_table)
ancestors_scope = model.unscoped.from(ancestors_table)
descendants_scope = model.unscoped.from(descendants_table)
if use_distinct?
ancestors_scope = ancestors_scope.distinct
descendants_scope = descendants_scope.distinct
end
relation = model relation = model
.unscoped .unscoped
.with .with
.recursive(ancestors.to_arel, descendants.to_arel) .recursive(ancestors.to_arel, descendants.to_arel)
.from_union([ .from_union([
model.unscoped.from(ancestors_table), ancestors_scope,
model.unscoped.from(descendants_table) descendants_scope
]) ])
read_only(relation) read_only(relation)
...@@ -123,12 +159,28 @@ module Gitlab ...@@ -123,12 +159,28 @@ module Gitlab
private private
# Use distinct on the Namespace queries to avoid bad planner behavior in PG11.
def use_distinct?
(model <= Namespace) && options[:use_distinct]
end
# Remove the extra `depth` field using an INNER JOIN to avoid breaking UNION queries
# and ordering the rows based on the `depth` column to maintain the row order.
#
# rubocop: disable CodeReuse/ActiveRecord
def remove_depth_and_maintain_order(relation, hierarchy_order: :asc)
joined_relation = model.joins("INNER JOIN (#{relation.select(:id, :depth).to_sql}) namespaces_join_table on namespaces_join_table.id = #{model.table_name}.id").order("namespaces_join_table.depth" => hierarchy_order)
model.from(Arel::Nodes::As.new(joined_relation.arel, objects_table))
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def base_and_ancestors_cte(stop_id = nil, hierarchy_order = nil) def base_and_ancestors_cte(stop_id = nil, hierarchy_order = nil)
cte = SQL::RecursiveCTE.new(:base_and_ancestors) cte = SQL::RecursiveCTE.new(:base_and_ancestors)
base_query = ancestors_base.except(:order) base_query = ancestors_base.except(:order)
base_query = base_query.select("1 as #{DEPTH_COLUMN}", "ARRAY[id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if hierarchy_order base_query = base_query.select("1 as #{DEPTH_COLUMN}", "ARRAY[#{objects_table.name}.id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if hierarchy_order
cte << base_query cte << base_query
...@@ -161,7 +213,7 @@ module Gitlab ...@@ -161,7 +213,7 @@ module Gitlab
cte = SQL::RecursiveCTE.new(:base_and_descendants) cte = SQL::RecursiveCTE.new(:base_and_descendants)
base_query = descendants_base.except(:order) base_query = descendants_base.except(:order)
base_query = base_query.select("1 AS #{DEPTH_COLUMN}", "ARRAY[id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if with_depth base_query = base_query.select("1 AS #{DEPTH_COLUMN}", "ARRAY[#{objects_table.name}.id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if with_depth
cte << base_query cte << base_query
......
...@@ -7,178 +7,206 @@ RSpec.describe Gitlab::ObjectHierarchy do ...@@ -7,178 +7,206 @@ RSpec.describe Gitlab::ObjectHierarchy do
let!(:child1) { create(:group, parent: parent) } let!(:child1) { create(:group, parent: parent) }
let!(:child2) { create(:group, parent: child1) } let!(:child2) { create(:group, parent: child1) }
describe '#base_and_ancestors' do shared_context 'Gitlab::ObjectHierarchy test cases' do
let(:relation) do describe '#base_and_ancestors' do
described_class.new(Group.where(id: child2.id)).base_and_ancestors let(:relation) do
end described_class.new(Group.where(id: child2.id)).base_and_ancestors
end
it 'includes the base rows' do
expect(relation).to include(child2)
end
it 'includes all of the ancestors' do it 'includes the base rows' do
expect(relation).to include(parent, child1) expect(relation).to include(child2)
end end
it 'can find ancestors upto a certain level' do it 'includes all of the ancestors' do
relation = described_class.new(Group.where(id: child2)).base_and_ancestors(upto: child1) expect(relation).to include(parent, child1)
end
expect(relation).to contain_exactly(child2) it 'can find ancestors upto a certain level' do
end relation = described_class.new(Group.where(id: child2)).base_and_ancestors(upto: child1)
it 'uses ancestors_base #initialize argument' do expect(relation).to contain_exactly(child2)
relation = described_class.new(Group.where(id: child2.id), Group.none).base_and_ancestors end
expect(relation).to include(parent, child1, child2) it 'uses ancestors_base #initialize argument' do
end relation = described_class.new(Group.where(id: child2.id), Group.none).base_and_ancestors
it 'does not allow the use of #update_all' do expect(relation).to include(parent, child1, child2)
expect { relation.update_all(share_with_group_lock: false) } end
.to raise_error(ActiveRecord::ReadOnlyRecord)
end
describe 'hierarchy_order option' do it 'does not allow the use of #update_all' do
let(:relation) do expect { relation.update_all(share_with_group_lock: false) }
described_class.new(Group.where(id: child2.id)).base_and_ancestors(hierarchy_order: hierarchy_order) .to raise_error(ActiveRecord::ReadOnlyRecord)
end end
context ':asc' do describe 'hierarchy_order option' do
let(:hierarchy_order) { :asc } let(:relation) do
described_class.new(Group.where(id: child2.id)).base_and_ancestors(hierarchy_order: hierarchy_order)
end
context ':asc' do
let(:hierarchy_order) { :asc }
it 'orders by child to parent' do it 'orders by child to parent' do
expect(relation).to eq([child2, child1, parent]) expect(relation).to eq([child2, child1, parent])
end
end end
end
context ':desc' do context ':desc' do
let(:hierarchy_order) { :desc } let(:hierarchy_order) { :desc }
it 'orders by parent to child' do it 'orders by parent to child' do
expect(relation).to eq([parent, child1, child2]) expect(relation).to eq([parent, child1, child2])
end
end end
end end
end end
end
describe '#base_and_descendants' do
let(:relation) do
described_class.new(Group.where(id: parent.id)).base_and_descendants
end
it 'includes the base rows' do describe '#base_and_descendants' do
expect(relation).to include(parent) let(:relation) do
end described_class.new(Group.where(id: parent.id)).base_and_descendants
end
it 'includes all the descendants' do it 'includes the base rows' do
expect(relation).to include(child1, child2) expect(relation).to include(parent)
end end
it 'uses descendants_base #initialize argument' do it 'includes all the descendants' do
relation = described_class.new(Group.none, Group.where(id: parent.id)).base_and_descendants expect(relation).to include(child1, child2)
end
expect(relation).to include(parent, child1, child2) it 'uses descendants_base #initialize argument' do
end relation = described_class.new(Group.none, Group.where(id: parent.id)).base_and_descendants
it 'does not allow the use of #update_all' do expect(relation).to include(parent, child1, child2)
expect { relation.update_all(share_with_group_lock: false) } end
.to raise_error(ActiveRecord::ReadOnlyRecord)
end
context 'when with_depth is true' do it 'does not allow the use of #update_all' do
let(:relation) do expect { relation.update_all(share_with_group_lock: false) }
described_class.new(Group.where(id: parent.id)).base_and_descendants(with_depth: true) .to raise_error(ActiveRecord::ReadOnlyRecord)
end end
it 'includes depth in the results' do context 'when with_depth is true' do
object_depths = { let(:relation) do
parent.id => 1, described_class.new(Group.where(id: parent.id)).base_and_descendants(with_depth: true)
child1.id => 2, end
child2.id => 3
} it 'includes depth in the results' do
object_depths = {
parent.id => 1,
child1.id => 2,
child2.id => 3
}
relation.each do |object| relation.each do |object|
expect(object.depth).to eq(object_depths[object.id]) expect(object.depth).to eq(object_depths[object.id])
end
end end
end end
end end
end
describe '#descendants' do describe '#descendants' do
it 'includes only the descendants' do it 'includes only the descendants' do
relation = described_class.new(Group.where(id: parent)).descendants relation = described_class.new(Group.where(id: parent)).descendants
expect(relation).to contain_exactly(child1, child2) expect(relation).to contain_exactly(child1, child2)
end
end end
end
describe '#max_descendants_depth' do describe '#max_descendants_depth' do
subject { described_class.new(base_relation).max_descendants_depth } subject { described_class.new(base_relation).max_descendants_depth }
context 'when base relation is empty' do context 'when base relation is empty' do
let(:base_relation) { Group.where(id: nil) } let(:base_relation) { Group.where(id: nil) }
it { expect(subject).to be_nil } it { expect(subject).to be_nil }
end end
context 'when base has no children' do context 'when base has no children' do
let(:base_relation) { Group.where(id: child2) } let(:base_relation) { Group.where(id: child2) }
it { expect(subject).to eq(1) } it { expect(subject).to eq(1) }
end end
context 'when base has grandchildren' do context 'when base has grandchildren' do
let(:base_relation) { Group.where(id: parent) } let(:base_relation) { Group.where(id: parent) }
it { expect(subject).to eq(3) } it { expect(subject).to eq(3) }
end
end end
end
describe '#ancestors' do describe '#ancestors' do
it 'includes only the ancestors' do it 'includes only the ancestors' do
relation = described_class.new(Group.where(id: child2)).ancestors relation = described_class.new(Group.where(id: child2)).ancestors
expect(relation).to contain_exactly(child1, parent) expect(relation).to contain_exactly(child1, parent)
end end
it 'can find ancestors upto a certain level' do it 'can find ancestors upto a certain level' do
relation = described_class.new(Group.where(id: child2)).ancestors(upto: child1) relation = described_class.new(Group.where(id: child2)).ancestors(upto: child1)
expect(relation).to be_empty expect(relation).to be_empty
end
end end
end
describe '#all_objects' do describe '#all_objects' do
let(:relation) do let(:relation) do
described_class.new(Group.where(id: child1.id)).all_objects described_class.new(Group.where(id: child1.id)).all_objects
end end
it 'includes the base rows' do it 'includes the base rows' do
expect(relation).to include(child1) expect(relation).to include(child1)
end end
it 'includes the ancestors' do
expect(relation).to include(parent)
end
it 'includes the descendants' do
expect(relation).to include(child2)
end
it 'uses ancestors_base #initialize argument for ancestors' do
relation = described_class.new(Group.where(id: child1.id), Group.where(id: non_existing_record_id)).all_objects
expect(relation).to include(parent)
end
it 'includes the ancestors' do it 'uses descendants_base #initialize argument for descendants' do
expect(relation).to include(parent) relation = described_class.new(Group.where(id: non_existing_record_id), Group.where(id: child1.id)).all_objects
expect(relation).to include(child2)
end
it 'does not allow the use of #update_all' do
expect { relation.update_all(share_with_group_lock: false) }
.to raise_error(ActiveRecord::ReadOnlyRecord)
end
end end
end
it 'includes the descendants' do context 'when the use_distinct_in_object_hierarchy feature flag is enabled' do
expect(relation).to include(child2) before do
stub_feature_flags(use_distinct_in_object_hierarchy: true)
end end
it 'uses ancestors_base #initialize argument for ancestors' do it_behaves_like 'Gitlab::ObjectHierarchy test cases'
relation = described_class.new(Group.where(id: child1.id), Group.where(id: non_existing_record_id)).all_objects
expect(relation).to include(parent) it 'calls DISTINCT' do
expect(parent.self_and_descendants.to_sql).to include("DISTINCT")
expect(child2.self_and_ancestors.to_sql).to include("DISTINCT")
end end
end
it 'uses descendants_base #initialize argument for descendants' do context 'when the use_distinct_in_object_hierarchy feature flag is disabled' do
relation = described_class.new(Group.where(id: non_existing_record_id), Group.where(id: child1.id)).all_objects before do
stub_feature_flags(use_distinct_in_object_hierarchy: false)
expect(relation).to include(child2)
end end
it 'does not allow the use of #update_all' do it_behaves_like 'Gitlab::ObjectHierarchy test cases'
expect { relation.update_all(share_with_group_lock: false) }
.to raise_error(ActiveRecord::ReadOnlyRecord) it 'does not call DISTINCT' do
expect(parent.self_and_descendants.to_sql).not_to include("DISTINCT")
expect(child2.self_and_ancestors.to_sql).not_to include("DISTINCT")
end end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment