Commit 87ad78ae authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre Committed by Nick Thomas

Add a method to retrieve registries when selective sync is enabled

This method allow us to enable selective sync support for the
FDW queries to count synced registries.
parent 015318ab
# frozen_string_literal: true
# Finder for retrieving project registries that have been synced
# scoped to a type (repository or wiki) using cross-database joins
# for selective sync.
#
# Basic usage:
#
# Geo::LegacyProjectRegistrySyncedFinder.new(current_node: Gitlab::Geo.current_node, :repository).execute
#
# Valid `type` values are:
#
# * `:repository`
# * `:wiki`
#
# Any other value will be ignored.
module Geo
class LegacyProjectRegistrySyncedFinder < RegistryFinder
def initialize(current_node:, type:)
super(current_node: current_node)
@type = type.to_sym
end
def execute
if selective_sync?
synced_registries_for_selective_sync
else
synced_registries
end
end
private
attr_reader :type
def synced_registries
Geo::ProjectRegistry.synced(type)
end
# rubocop: disable CodeReuse/ActiveRecord
def synced_registries_for_selective_sync
legacy_inner_join_registry_ids(
synced_registries,
current_node.projects.pluck(:id),
Geo::ProjectRegistry,
foreign_key: :project_id
)
end
# rubocop: enable CodeReuse/ActiveRecord
end
end
......@@ -7,25 +7,11 @@ module Geo
end
def count_synced_repositories
relation =
if selective_sync?
legacy_find_synced_repositories
else
find_synced_repositories
end
relation.count
registries_for_synced_projects(:repository).count
end
def count_synced_wikis
relation =
if use_legacy_queries?
legacy_find_synced_wikis
else
fdw_find_synced_wikis
end
relation.count
registries_for_synced_projects(:wiki).count
end
def count_failed_repositories
......@@ -143,8 +129,18 @@ module Geo
protected
def find_synced_repositories
Geo::ProjectRegistry.synced_repos
def finder_klass_for_synced_registries
if Gitlab::Geo::Fdw.enabled_for_selective_sync?
Geo::ProjectRegistrySyncedFinder
else
Geo::LegacyProjectRegistrySyncedFinder
end
end
def registries_for_synced_projects(type)
finder_klass_for_synced_registries
.new(current_node: current_node, type: type)
.execute
end
def find_verified_repositories
......@@ -196,11 +192,6 @@ module Geo
end
# rubocop: enable CodeReuse/ActiveRecord
# @return [ActiveRecord::Relation<Geo::ProjectRegistry>]
def fdw_find_synced_wikis
Geo::ProjectRegistry.synced_wikis
end
# @return [ActiveRecord::Relation<Geo::Fdw::Project>]
# rubocop: disable CodeReuse/ActiveRecord
def fdw_find_projects_updated_recently
......@@ -290,37 +281,15 @@ module Geo
::Gitlab::SQL::Glob.q(value)
end
# @return [ActiveRecord::Relation<Geo::ProjectRegistry>] list of synced projects
def legacy_find_synced_repositories
legacy_find_project_registries(Geo::ProjectRegistry.synced_repos)
end
# @return [ActiveRecord::Relation<Geo::ProjectRegistry>] list of synced projects
# rubocop: disable CodeReuse/ActiveRecord
def legacy_find_synced_wikis
legacy_inner_join_registry_ids(
current_node.projects,
Geo::ProjectRegistry.synced_wikis.pluck(:project_id),
Project
)
end
# rubocop: enable CodeReuse/ActiveRecord
# @return [ActiveRecord::Relation<Geo::ProjectRegistry>] list of verified projects
def legacy_find_verified_repositories
legacy_find_project_registries(Geo::ProjectRegistry.verified_repos)
end
# @return [ActiveRecord::Relation<Geo::ProjectRegistry>] list of verified wikis
# rubocop: disable CodeReuse/ActiveRecord
def legacy_find_verified_wikis
legacy_inner_join_registry_ids(
current_node.projects,
Geo::ProjectRegistry.verified_wikis.pluck(:project_id),
Project
)
legacy_find_project_registries(Geo::ProjectRegistry.verified_wikis)
end
# rubocop: enable CodeReuse/ActiveRecord
# @return [ActiveRecord::Relation<Project>] list of synced projects
# rubocop: disable CodeReuse/ActiveRecord
......
# frozen_string_literal: true
# Finder for retrieving project registries that have been synced
# scoped to a type (repository or wiki) using FDW queries.
#
# Basic usage:
#
# Geo::ProjectRegistrySyncedFinder.new(current_node: Gitlab::Geo.current_node, :repository).execute
#
# Valid `type` values are:
#
# * `:repository`
# * `:wiki`
#
# Any other value will be ignored.
module Geo
class ProjectRegistrySyncedFinder
def initialize(current_node:, type:)
@current_node = Geo::Fdw::GeoNode.find(current_node.id)
@type = type.to_sym
end
def execute
current_node.project_registries.synced(type)
end
private
attr_reader :current_node, :type
end
end
# frozen_string_literal: true
module Geo::SelectiveSync
extend ActiveSupport::Concern
def selective_sync?
selective_sync_type.present?
end
def selective_sync_by_namespaces?
selective_sync_type == 'namespaces'
end
def selective_sync_by_shards?
selective_sync_type == 'shards'
end
end
# frozen_string_literal: true
module Geo
module Fdw
class GeoNode < ::Geo::BaseFdw
include ::Geo::SelectiveSync
self.primary_key = :id
self.inheritance_column = nil
self.table_name = Gitlab::Geo::Fdw.foreign_table_name('geo_nodes')
serialize :selective_sync_shards, Array # rubocop:disable Cop/ActiveRecordSerialize
has_many :geo_node_namespace_links, class_name: 'Geo::Fdw::GeoNodeNamespaceLink'
has_many :namespaces, class_name: 'Geo::Fdw::Namespace', through: :geo_node_namespace_links
def project_registries
return Geo::ProjectRegistry.all unless selective_sync?
if selective_sync_by_namespaces?
registries_for_selected_namespaces
elsif selective_sync_by_shards?
registries_for_selected_shards
else
Geo::ProjectRegistry.none
end
end
private
def registries_for_selected_namespaces
query = selected_namespaces_and_descendants
Geo::ProjectRegistry
.joins(fdw_inner_join_projects)
.where(fdw_projects_table.name => { namespace_id: query.select(:id) })
end
def selected_namespaces_and_descendants
relation = selected_namespaces_and_descendants_cte.apply_to(Geo::Fdw::Namespace.all)
relation.extend(Gitlab::Database::ReadOnlyRelation)
relation
end
def selected_namespaces_and_descendants_cte
cte = Gitlab::SQL::RecursiveCTE.new(:base_and_descendants)
cte << geo_node_namespace_links
.select(fdw_geo_node_namespace_links_table[:namespace_id].as('id'))
.except(:order)
# Recursively get all the descendants of the base set.
cte << Geo::Fdw::Namespace
.select(fdw_namespaces_table[:id])
.from([fdw_namespaces_table, cte.table])
.where(fdw_namespaces_table[:parent_id].eq(cte.table[:id]))
.except(:order)
cte
end
def registries_for_selected_shards
Geo::ProjectRegistry
.joins(fdw_inner_join_projects)
.where(fdw_projects_table.name => { repository_storage: selective_sync_shards })
end
def project_registries_table
Geo::ProjectRegistry.arel_table
end
def fdw_projects_table
Geo::Fdw::Project.arel_table
end
def fdw_namespaces_table
Geo::Fdw::Namespace.arel_table
end
def fdw_geo_node_namespace_links_table
Geo::Fdw::GeoNodeNamespaceLink.arel_table
end
def fdw_inner_join_projects
project_registries_table
.join(fdw_projects_table, Arel::Nodes::InnerJoin)
.on(project_registries_table[:project_id].eq(fdw_projects_table[:id]))
.join_sources
end
end
end
end
# frozen_string_literal: true
module Geo
module Fdw
class GeoNodeNamespaceLink < ::Geo::BaseFdw
self.table_name = Gitlab::Geo::Fdw.foreign_table_name('geo_node_namespace_links')
belongs_to :geo_node, class_name: 'Geo::Fdw::GeoNode', inverse_of: :namespaces
belongs_to :namespace, class_name: 'Geo::Fdw::Namespace', inverse_of: :geo_nodes
end
end
end
# frozen_string_literal: true
module Geo
module Fdw
class Namespace < ::Geo::BaseFdw
self.primary_key = :id
self.inheritance_column = nil
self.table_name = Gitlab::Geo::Fdw.foreign_table_name('namespaces')
has_many :geo_node_namespace_links, class_name: 'Geo::Fdw::GeoNodeNamespaceLink'
has_many :geo_nodes, class_name: 'Geo::Fdw::GeoNode', through: :geo_node_namespace_links
end
end
end
......@@ -88,6 +88,17 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
where(project: Geo::Fdw::Project.search(query))
end
def self.synced(type)
case type
when :repository
synced_repos
when :wiki
synced_wikis
else
none
end
end
def self.flag_repositories_for_resync!
update_all(
resync_repository: true,
......
......@@ -2,6 +2,7 @@
class GeoNode < ActiveRecord::Base
include Presentable
include Geo::SelectiveSync
SELECTIVE_SYNC_TYPES = %w[namespaces shards].freeze
......@@ -213,24 +214,12 @@ class GeoNode < ActiveRecord::Base
end
end
def selective_sync_by_namespaces?
selective_sync_type == 'namespaces'
end
def selective_sync_by_shards?
selective_sync_type == 'shards'
end
def projects_include?(project_id)
return true unless selective_sync?
projects.where(id: project_id).exists?
end
def selective_sync?
selective_sync_type.present?
end
def replication_slots_count
return unless Gitlab::Database.replication_slots_supported? && primary?
......
---
title: Geo - Add selective sync support for the FDW queries to count synced registries
merge_request: 9445
author:
type: changed
......@@ -19,6 +19,10 @@ module Gitlab
value.nil? ? true : value
end
def enabled_for_selective_sync?
enabled? && Feature.enabled?(:use_fdw_queries_for_selective_sync)
end
# Return full table name with foreign schema
#
# @param [String] table_name
......
# frozen_string_literal: true
require 'spec_helper'
describe Geo::LegacyProjectRegistrySyncedFinder, :geo do
include EE::GeoHelpers
describe '#execute' do
let(:node) { create(:geo_node) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:nested_group_1) { create(:group, parent: group_1) }
let(:project_1) { create(:project, group: group_1) }
let(:project_2) { create(:project, group: nested_group_1) }
let(:project_3) { create(:project, group: nested_group_1) }
let(:project_4) { create(:project, :broken_storage, group: group_2) }
let(:project_5) { create(:project, :broken_storage, group: group_2) }
let!(:registry_synced) { create(:geo_project_registry, :synced, project: project_1) }
let!(:registry_repository_dirty) { create(:geo_project_registry, :synced, :repository_dirty, project: project_2) }
let!(:registry_wiki_dirty) { create(:geo_project_registry, :synced, :wiki_dirty, project: project_3) }
let!(:registry_wiki_dirty_broken_shard) { create(:geo_project_registry, :synced, :wiki_dirty, project: project_4) }
let!(:registry_repository_dirty_broken_shard) { create(:geo_project_registry, :synced, :repository_dirty, project: project_5) }
let!(:registry_sync_failed) { create(:geo_project_registry, :sync_failed) }
shared_examples 'finds synced registries' do
context 'with repository type' do
subject { described_class.new(current_node: node, type: :repository) }
context 'without selective sync' do
it 'returns all synced registries' do
expect(subject.execute).to match_array([registry_synced, registry_wiki_dirty, registry_wiki_dirty_broken_shard])
end
end
context 'with selective sync by namespace' do
it 'returns synced registries where projects belongs to the namespaces' do
node.update!(selective_sync_type: 'namespaces', namespaces: [group_1, nested_group_1])
expect(subject.execute).to match_array([registry_synced, registry_wiki_dirty])
end
end
context 'with selective sync by shard' do
it 'returns synced registries where projects belongs to the shards' do
node.update!(selective_sync_type: 'shards', selective_sync_shards: ['broken'])
expect(subject.execute).to match_array([registry_wiki_dirty_broken_shard])
end
end
end
context 'with wiki type' do
subject { described_class.new(current_node: node, type: :wiki) }
context 'without selective sync' do
it 'returns all synced registries' do
expect(subject.execute).to match_array([registry_synced, registry_repository_dirty, registry_repository_dirty_broken_shard])
end
end
context 'with selective sync by namespace' do
it 'returns synced registries where projects belongs to the namespaces' do
node.update!(selective_sync_type: 'namespaces', namespaces: [group_1, nested_group_1])
expect(subject.execute).to match_array([registry_synced, registry_repository_dirty])
end
end
context 'with selective sync by shard' do
it 'returns synced registries where projects belongs to the shards' do
node.update!(selective_sync_type: 'shards', selective_sync_shards: ['broken'])
expect(subject.execute).to match_array([registry_repository_dirty_broken_shard])
end
end
end
context 'with invalid type' do
subject { described_class.new(current_node: node, type: :invalid) }
it 'returns nothing' do
expect(subject.execute).to be_empty
end
end
end
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
context 'FDW', :delete do
before do
skip('FDW is not configured') unless Gitlab::Geo::Fdw.enabled?
end
include_examples 'finds synced registries'
end
context 'Legacy' do
before do
stub_fdw_disabled
end
include_examples 'finds synced registries'
end
end
end
......@@ -26,12 +26,6 @@ describe Geo::ProjectRegistryFinder, :geo do
shared_examples 'counts all the things' do
describe '#count_synced_repositories' do
it 'delegates to #find_synced_repositories' do
expect(subject).to receive(:find_synced_repositories).and_call_original
subject.count_synced_repositories
end
it 'counts repositories that have been synced' do
create(:geo_project_registry, :sync_failed)
create(:geo_project_registry, :synced, project: project_synced)
......@@ -41,25 +35,11 @@ describe Geo::ProjectRegistryFinder, :geo do
expect(subject.count_synced_repositories).to eq 2
end
it 'counts synced wikis with nil wiki_access_level (which means enabled wiki)' do
project_synced.project_feature.update!(wiki_access_level: nil)
create(:geo_project_registry, :synced, project: project_synced)
expect(subject.count_synced_wikis).to eq 1
end
context 'with selective sync' do
before do
secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
end
it 'delegates to #legacy_find_synced_repositories' do
expect(subject).to receive(:legacy_find_synced_repositories).and_call_original
subject.count_synced_repositories
end
it 'counts projects that has been synced' do
project_1_in_synced_group = create(:project, group: synced_group)
project_2_in_synced_group = create(:project, group: synced_group)
......@@ -74,12 +54,6 @@ describe Geo::ProjectRegistryFinder, :geo do
end
describe '#count_synced_wikis' do
it 'delegates to the correct method' do
expect(subject).to receive("#{method_prefix}_find_synced_wikis".to_sym).and_call_original
subject.count_synced_wikis
end
it 'counts wiki that have been synced' do
create(:geo_project_registry, :sync_failed)
create(:geo_project_registry, :synced, project: project_synced)
......@@ -102,12 +76,6 @@ describe Geo::ProjectRegistryFinder, :geo do
secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
end
it 'delegates to #legacy_find_synced_wiki' do
expect(subject).to receive(:legacy_find_synced_wikis).and_call_original
subject.count_synced_wikis
end
it 'counts projects that has been synced' do
project_1_in_synced_group = create(:project, group: synced_group)
project_2_in_synced_group = create(:project, group: synced_group)
......@@ -639,7 +607,21 @@ describe Geo::ProjectRegistryFinder, :geo do
skip('FDW is not configured') if Gitlab::Database.postgresql? && !Gitlab::Geo::Fdw.enabled?
end
include_examples 'counts all the things'
context 'with use_fdw_queries_for_selective_sync disabled' do
before do
stub_feature_flags(use_fdw_queries_for_selective_sync: false)
end
include_examples 'counts all the things'
end
context 'with use_fdw_queries_for_selective_sync enabled' do
before do
stub_feature_flags(use_fdw_queries_for_selective_sync: true)
end
include_examples 'counts all the things'
end
include_examples 'finds all the things' do
let(:method_prefix) { 'fdw' }
......@@ -648,7 +630,7 @@ describe Geo::ProjectRegistryFinder, :geo do
context 'Legacy' do
before do
allow(Gitlab::Geo::Fdw).to receive(:enabled?).and_return(false)
stub_fdw_disabled
end
include_examples 'counts all the things'
......
# frozen_string_literal: true
require 'spec_helper'
describe Geo::ProjectRegistrySyncedFinder, :geo do
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
describe '#execute', :delete do
let(:node) { create(:geo_node) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:nested_group_1) { create(:group, parent: group_1) }
let(:project_1) { create(:project, group: group_1) }
let(:project_2) { create(:project, group: nested_group_1) }
let(:project_3) { create(:project, group: nested_group_1) }
let(:project_4) { create(:project, :broken_storage, group: group_2) }
let(:project_5) { create(:project, :broken_storage, group: group_2) }
let!(:registry_synced) { create(:geo_project_registry, :synced, project: project_1) }
let!(:registry_repository_dirty) { create(:geo_project_registry, :synced, :repository_dirty, project: project_2) }
let!(:registry_wiki_dirty) { create(:geo_project_registry, :synced, :wiki_dirty, project: project_3) }
let!(:registry_wiki_dirty_broken_shard) { create(:geo_project_registry, :synced, :wiki_dirty, project: project_4) }
let!(:registry_repository_dirty_broken_shard) { create(:geo_project_registry, :synced, :repository_dirty, project: project_5) }
let!(:registry_sync_failed) { create(:geo_project_registry, :sync_failed) }
before do
skip('FDW is not configured') unless Gitlab::Geo::Fdw.enabled?
end
context 'with repository type' do
subject { described_class.new(current_node: node, type: :repository) }
context 'without selective sync' do
it 'returns all synced registries' do
expect(subject.execute).to match_array([registry_synced, registry_wiki_dirty, registry_wiki_dirty_broken_shard])
end
end
context 'with selective sync by namespace' do
it 'returns synced registries where projects belongs to the namespaces' do
node.update!(selective_sync_type: 'namespaces', namespaces: [group_1, nested_group_1])
expect(subject.execute).to match_array([registry_synced, registry_wiki_dirty])
end
end
context 'with selective sync by shard' do
it 'returns synced registries where projects belongs to the shards' do
node.update!(selective_sync_type: 'shards', selective_sync_shards: ['broken'])
expect(subject.execute).to match_array([registry_wiki_dirty_broken_shard])
end
end
end
context 'with wiki type' do
subject { described_class.new(current_node: node, type: :wiki) }
context 'without selective sync' do
it 'returns all synced registries' do
expect(subject.execute).to match_array([registry_synced, registry_repository_dirty, registry_repository_dirty_broken_shard])
end
end
context 'with selective sync by namespace' do
it 'returns synced registries where projects belongs to the namespaces' do
node.update!(selective_sync_type: 'namespaces', namespaces: [group_1, nested_group_1])
expect(subject.execute).to match_array([registry_synced, registry_repository_dirty])
end
end
context 'with selective sync by shard' do
it 'returns synced registries where projects belongs to the shards' do
node.update!(selective_sync_type: 'shards', selective_sync_shards: ['broken'])
expect(subject.execute).to match_array([registry_repository_dirty_broken_shard])
end
end
end
context 'with invalid type' do
subject { described_class.new(current_node: node, type: :invalid) }
it 'returns nothing' do
expect(subject.execute).to be_empty
end
end
end
end
......@@ -44,6 +44,40 @@ describe Gitlab::Geo::Fdw, :geo do
end
end
describe '.enabled_for_selective_sync?' do
context 'when the feature flag is enabled' do
before do
stub_feature_flags(use_fdw_queries_for_selective_sync: true)
end
it 'returns false when FDW is disabled' do
allow(described_class).to receive(:enabled?).and_return(false)
expect(described_class.enabled_for_selective_sync?).to eq false
end
it 'returns true when FDW is enabled' do
expect(described_class.enabled_for_selective_sync?).to eq true
end
end
context 'when the feature flag is disabled' do
before do
stub_feature_flags(use_fdw_queries_for_selective_sync: false)
end
it 'returns false when FDW is disabled' do
allow(described_class).to receive(:enabled?).and_return(false)
expect(described_class.enabled_for_selective_sync?).to eq false
end
it 'returns false when FDW is enabled' do
expect(described_class.enabled_for_selective_sync?).to eq false
end
end
end
describe '.foreign_tables_up_to_date?' do
it 'returns false when foreign schema does not exist' do
drop_foreign_schema
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Geo::Fdw::GeoNodeNamespaceLink, :geo, type: :model do
context 'relationships' do
it { is_expected.to belong_to(:geo_node).class_name('Geo::Fdw::GeoNode').inverse_of(:namespaces) }
it { is_expected.to belong_to(:namespace).class_name('Geo::Fdw::Namespace').inverse_of(:geo_nodes) }
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do
context 'relationships' do
it { is_expected.to have_many(:geo_node_namespace_links).class_name('Geo::Fdw::GeoNodeNamespaceLink') }
it { is_expected.to have_many(:namespaces).class_name('Geo::Fdw::Namespace').through(:geo_node_namespace_links) }
end
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
describe '#project_registries', :delete do
before do
skip('FDW is not configured') unless Gitlab::Geo::Fdw.enabled?
end
let(:node) { create(:geo_node) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:nested_group_1) { create(:group, parent: group_1) }
let(:project_1) { create(:project, group: group_1) }
let(:project_2) { create(:project, group: nested_group_1) }
let(:project_3) { create(:project, :broken_storage, group: group_2) }
let!(:registry_1) { create(:geo_project_registry, project: project_1) }
let!(:registry_2) { create(:geo_project_registry, project: project_2) }
let!(:registry_3) { create(:geo_project_registry, project: project_3) }
subject { described_class.find(node.id) }
it 'returns all registries without selective sync' do
expect(subject.project_registries).to match_array([registry_1, registry_2, registry_3])
end
it 'returns registries where projects belong to the namespaces with selective sync by namespace' do
node.update!(selective_sync_type: 'namespaces', namespaces: [group_1, nested_group_1])
expect(subject.project_registries).to match_array([registry_1, registry_2])
end
it 'returns registries where projects belong to the shards with selective sync by shard' do
node.update!(selective_sync_type: 'shards', selective_sync_shards: %w[default bar])
expect(subject.project_registries).to match_array([registry_1, registry_2])
end
it 'returns nothing if an unrecognised selective sync type is used' do
node.update_attribute(:selective_sync_type, 'unknown')
expect(subject.project_registries).to be_empty
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Geo::Fdw::Namespace, :geo, type: :model do
context 'relationships' do
it { is_expected.to have_many(:geo_node_namespace_links).class_name('Geo::Fdw::GeoNodeNamespaceLink') }
it { is_expected.to have_many(:geo_nodes).class_name('Geo::Fdw::GeoNode').through(:geo_node_namespace_links) }
end
end
require 'spec_helper'
describe GeoNodeStatus, :geo do
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
describe GeoNodeStatus, :geo, :delete do
include ::EE::GeoHelpers
let!(:primary) { create(:geo_node, :primary) }
......
......@@ -58,7 +58,9 @@ describe 'geo rake tasks', :geo do
end
end
describe 'status task' do
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
describe 'status task', :delete do
let!(:current_node) { create(:geo_node) }
let!(:primary_node) { create(:geo_node, :primary) }
let!(:geo_event_log) { create(:geo_event_log) }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment