Remove feature flag from project repository scheduler worker

Since GitLab 13.2 we don't rely on FDW queries
to sync these repositories.
parent 74b09efc
......@@ -76,66 +76,32 @@ module Geo
# rubocop: disable CodeReuse/ActiveRecord
def find_project_ids_not_synced(except_ids:, batch_size:)
if Geo::ProjectRegistry.registry_consistency_worker_enabled?
project_ids =
find_never_synced_project_ids(batch_size: batch_size, except_ids: except_ids)
project_ids =
registry_finder
.find_never_synced_registries(batch_size: batch_size, except_ids: except_ids)
.pluck_model_foreign_key
find_project_ids_within_shard(project_ids, direction: :desc)
else
find_unsynced_projects(batch_size: batch_size)
.id_not_in(except_ids)
.reorder(last_repository_updated_at: :desc)
.pluck_primary_key
end
find_project_ids_within_shard(project_ids, direction: :desc)
end
# rubocop: enable CodeReuse/ActiveRecord
def find_unsynced_projects(batch_size:)
Geo::ProjectUnsyncedFinder
.new(current_node: current_node, shard_name: shard_name, batch_size: batch_size)
.execute
end
# rubocop: disable CodeReuse/ActiveRecord
def find_project_ids_updated_recently(except_ids:, batch_size:)
if Geo::ProjectRegistry.registry_consistency_worker_enabled?
project_ids =
find_retryable_dirty_project_ids(batch_size: batch_size, except_ids: except_ids)
project_ids =
registry_finder
.find_retryable_dirty_registries(batch_size: batch_size, except_ids: except_ids)
.pluck_model_foreign_key
find_project_ids_within_shard(project_ids, direction: :asc)
else
find_projects_updated_recently(batch_size: batch_size)
.id_not_in(except_ids)
.order('project_registry.last_repository_synced_at ASC NULLS FIRST, projects.last_repository_updated_at ASC')
.pluck_primary_key
end
find_project_ids_within_shard(project_ids, direction: :asc)
end
# rubocop: enable CodeReuse/ActiveRecord
def find_projects_updated_recently(batch_size:)
Geo::ProjectUpdatedRecentlyFinder
.new(current_node: current_node, shard_name: shard_name, batch_size: batch_size)
.execute
end
def valid_shard?
return true unless current_node.selective_sync_by_shards?
current_node.selective_sync_shards.include?(shard_name)
end
def find_never_synced_project_ids(batch_size:, except_ids:)
registry_finder
.find_never_synced_registries(batch_size: batch_size, except_ids: except_ids)
.pluck_model_foreign_key
end
def find_retryable_dirty_project_ids(batch_size:, except_ids:)
registry_finder
.find_retryable_dirty_registries(batch_size: batch_size, except_ids: except_ids)
.pluck_model_foreign_key
end
# rubocop:disable CodeReuse/ActiveRecord
def find_project_ids_within_shard(project_ids, direction:)
Project
......
......@@ -2,24 +2,20 @@
require 'spec_helper'
RSpec.describe Geo::RepositoryShardSyncWorker, :geo, :geo_fdw, :clean_gitlab_redis_cache, :use_sql_query_cache_for_tracking_db do
RSpec.describe Geo::RepositoryShardSyncWorker, :geo, :clean_gitlab_redis_cache, :use_sql_query_cache_for_tracking_db do
include ::EE::GeoHelpers
include ExclusiveLeaseHelpers
let!(:primary) { create(:geo_node, :primary) }
let!(:secondary) { create(:geo_node) }
let(:shard_name) { Gitlab.config.repositories.storages.each_key.first }
before do
stub_current_geo_node(secondary)
end
describe '#perform' do
let!(:restricted_group) { create(:group) }
let!(:unsynced_project_in_restricted_group) { create(:project, group: restricted_group) }
let!(:unsynced_project) { create(:project) }
let_it_be(:primary) { create(:geo_node, :primary) }
let_it_be(:secondary) { create(:geo_node) }
let!(:project_1) { create(:project) }
let!(:project_2) { create(:project) }
let(:shard_name) { Gitlab.config.repositories.storages.each_key.first }
before do
stub_current_geo_node(secondary)
stub_exclusive_lease(renew: true)
Gitlab::ShardHealthCache.update([shard_name])
......@@ -70,479 +66,220 @@ RSpec.describe Geo::RepositoryShardSyncWorker, :geo, :geo_fdw, :clean_gitlab_red
end
end
context 'when geo_project_registry_ssot_sync is enabled' do
before do
stub_feature_flags(geo_project_registry_ssot_sync: true)
end
it 'performs Geo::ProjectSyncWorker for each registry' do
create(:geo_project_registry, project: unsynced_project)
expect(Geo::ProjectSyncWorker).to receive(:perform_async).once.and_return(spy)
subject.perform(shard_name)
end
it 'performs Geo::ProjectSyncWorker for each registry' do
create(:geo_project_registry, project: project_2)
it 'performs Geo::ProjectSyncWorker for projects where last attempt to sync failed' do
create(:geo_project_registry, :sync_failed, project: unsynced_project_in_restricted_group)
create(:geo_project_registry, :synced, project: unsynced_project)
expect(Geo::ProjectSyncWorker).to receive(:perform_async).once.and_return(spy)
expect(Geo::ProjectSyncWorker).to receive(:perform_async).once.and_return(spy)
subject.perform(shard_name)
end
it 'performs Geo::ProjectSyncWorker for synced projects updated recently' do
create(:geo_project_registry, :synced, :repository_dirty, project: unsynced_project_in_restricted_group)
create(:geo_project_registry, :synced, project: unsynced_project)
create(:geo_project_registry, :synced, :wiki_dirty)
expect(Geo::ProjectSyncWorker).to receive(:perform_async).twice.and_return(spy)
subject.perform(shard_name)
end
subject.perform(shard_name)
end
it 'does not schedule a job twice for the same project' do
create(:geo_project_registry, project: unsynced_project)
create(:geo_project_registry, project: unsynced_project_in_restricted_group)
it 'performs Geo::ProjectSyncWorker for projects where last attempt to sync failed' do
create(:geo_project_registry, :sync_failed, project: project_1)
create(:geo_project_registry, :synced, project: project_2)
scheduled_jobs = [
{ job_id: 1, project_id: unsynced_project.id },
{ job_id: 2, project_id: unsynced_project_in_restricted_group.id }
]
expect(Geo::ProjectSyncWorker).to receive(:perform_async).once.and_return(spy)
is_expected.to receive(:scheduled_jobs).and_return(scheduled_jobs).at_least(:once)
is_expected.not_to receive(:schedule_job)
subject.perform(shard_name)
end
Sidekiq::Testing.inline! { subject.perform(shard_name) }
end
it 'performs Geo::ProjectSyncWorker for synced projects updated recently' do
create(:geo_project_registry, :synced, :repository_dirty, project: project_1)
create(:geo_project_registry, :synced, project: project_2)
create(:geo_project_registry, :synced, :wiki_dirty)
context 'backoff time' do
let(:cache_key) { "#{described_class.name.underscore}:shard:#{shard_name}:skip" }
expect(Geo::ProjectSyncWorker).to receive(:perform_async).twice.and_return(spy)
before do
allow(Rails.cache).to receive(:read).and_call_original
allow(Rails.cache).to receive(:write).and_call_original
end
subject.perform(shard_name)
end
it 'sets the back off time when there are no pending items' do
create(:geo_project_registry, :synced, project: unsynced_project_in_restricted_group)
create(:geo_project_registry, :synced, project: unsynced_project)
it 'does not schedule a job twice for the same project' do
create(:geo_project_registry, project: project_2)
create(:geo_project_registry, project: project_1)
expect(Rails.cache).to receive(:write).with(cache_key, true, expires_in: 300.seconds).once
scheduled_jobs = [
{ job_id: 1, project_id: project_2.id },
{ job_id: 2, project_id: project_1.id }
]
subject.perform(shard_name)
end
is_expected.to receive(:scheduled_jobs).and_return(scheduled_jobs).at_least(:once)
is_expected.not_to receive(:schedule_job)
it 'does not perform Geo::ProjectSyncWorker when the backoff time is set' do
expect(Rails.cache).to receive(:read).with(cache_key).and_return(true)
Sidekiq::Testing.inline! { subject.perform(shard_name) }
end
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async)
context 'backoff time' do
let(:cache_key) { "#{described_class.name.underscore}:shard:#{shard_name}:skip" }
subject.perform(shard_name)
end
before do
allow(Rails.cache).to receive(:read).and_call_original
allow(Rails.cache).to receive(:write).and_call_original
end
context 'repositories that have never been updated' do
let!(:project_list) { create_list(:project, 4, last_repository_updated_at: 2.hours.ago) }
let!(:abandoned_project) { create(:project) }
before do
# Project sync failed but never received an update
create(:geo_project_registry, :repository_sync_failed, project: abandoned_project)
abandoned_project.update_column(:last_repository_updated_at, 1.year.ago)
# Neither of these are needed for this spec
unsynced_project.destroy
unsynced_project_in_restricted_group.destroy
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:db_retrieve_batch_size).and_return(2) # Must be >1 because of the Geo::BaseSchedulerWorker#interleave
end
secondary.update!(repos_max_capacity: 3) # Must be more than db_retrieve_batch_size
project_list.each do |project|
create(:geo_project_registry, project: project)
it 'sets the back off time when there are no pending items' do
create(:geo_project_registry, :synced, project: project_1)
create(:geo_project_registry, :synced, project: project_2)
allow(Geo::ProjectSyncWorker)
.to receive(:perform_async)
.with(project.id, anything)
.and_call_original
end
allow_next_instance_of(Geo::ProjectRegistry) do |instance|
allow(instance).to receive(:wiki_sync_due?).and_return(false)
end
expect(Rails.cache).to receive(:write).with(cache_key, true, expires_in: 300.seconds).once
allow_next_instance_of(Geo::RepositorySyncService) do |instance|
allow(instance).to receive(:expire_repository_caches)
end
end
it 'tries to sync project where last attempt to sync failed' do
expect(Geo::ProjectSyncWorker)
.to receive(:perform_async)
.with(abandoned_project.id, anything)
.at_least(:once)
.and_return(spy)
3.times do
Sidekiq::Testing.inline! { subject.perform(shard_name) }
end
end
subject.perform(shard_name)
end
context 'multiple shards' do
it 'uses two loops to schedule jobs', :sidekiq_might_not_need_inline do
create(:geo_project_registry, project: unsynced_project)
create(:geo_project_registry, project: unsynced_project_in_restricted_group)
Gitlab::ShardHealthCache.update([shard_name, 'shard2', 'shard3', 'shard4', 'shard5'])
secondary.update!(repos_max_capacity: 5)
it 'does not perform Geo::ProjectSyncWorker when the backoff time is set' do
expect(Rails.cache).to receive(:read).with(cache_key).and_return(true)
expect(subject).to receive(:schedule_jobs).twice.and_call_original
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async)
subject.perform(shard_name)
end
subject.perform(shard_name)
end
end
context 'all repositories fail' do
let!(:project_list) { create_list(:project, 4, :random_last_repository_updated_at) }
before do
# Neither of these are needed for this spec
unsynced_project.destroy
unsynced_project_in_restricted_group.destroy
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:db_retrieve_batch_size).and_return(2) # Must be >1 because of the Geo::BaseSchedulerWorker#interleave
end
secondary.update!(repos_max_capacity: 3) # Must be more than db_retrieve_batch_size
context 'repositories that have never been updated' do
let!(:project_list) { create_list(:project, 4, last_repository_updated_at: 2.hours.ago) }
let!(:abandoned_project) { create(:project) }
allow_next_instance_of(Project) do |instance|
allow(instance).to receive(:ensure_repository).and_raise(Gitlab::Shell::Error.new('foo'))
end
before do
# Project sync failed but never received an update
create(:geo_project_registry, :repository_sync_failed, project: abandoned_project)
abandoned_project.update_column(:last_repository_updated_at, 1.year.ago)
allow_next_instance_of(Geo::ProjectRegistry) do |instance|
allow(instance).to receive(:wiki_sync_due?).and_return(false)
end
# Neither of these are needed for this spec
project_2.destroy
project_1.destroy
allow_next_instance_of(Geo::RepositorySyncService) do |instance|
allow(instance).to receive(:expire_repository_caches)
end
allow_next_instance_of(Geo::ProjectHousekeepingService) do |instance|
allow(instance).to receive(:do_housekeeping)
end
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:db_retrieve_batch_size).and_return(2) # Must be >1 because of the Geo::BaseSchedulerWorker#interleave
end
it 'tries to sync every project' do
project_list.each do |project|
create(:geo_project_registry, project: project)
secondary.update!(repos_max_capacity: 3) # Must be more than db_retrieve_batch_size
expect(Geo::ProjectSyncWorker)
.to receive(:perform_async)
.with(project.id, anything)
.at_least(:once)
.and_call_original
end
project_list.each do |project|
create(:geo_project_registry, project: project)
3.times do
Sidekiq::Testing.inline! { subject.perform(shard_name) }
end
allow(Geo::ProjectSyncWorker)
.to receive(:perform_async)
.with(project.id, anything)
.and_call_original
end
context 'projects that require resync' do
context 'when project repository is dirty' do
it 'does not sync repositories' do
create(:geo_project_registry, :synced, :repository_dirty, project: unsynced_project)
create(:geo_project_registry, :synced, :repository_dirty, project: unsynced_project_in_restricted_group)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(unsynced_project.id, sync_repository: true, sync_wiki: false)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(unsynced_project_in_restricted_group.id, sync_repository: true, sync_wiki: false)
subject.perform(shard_name)
end
end
context 'when project wiki is dirty' do
it 'does not syn wikis' do
create(:geo_project_registry, :synced, :wiki_dirty, project: unsynced_project)
create(:geo_project_registry, :synced, :wiki_dirty, project: unsynced_project_in_restricted_group)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(unsynced_project.id, sync_repository: false, sync_wiki: true)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(unsynced_project_in_restricted_group.id, sync_repository: false, sync_wiki: true)
allow_next_instance_of(Geo::ProjectRegistry) do |instance|
allow(instance).to receive(:wiki_sync_due?).and_return(false)
end
subject.perform(shard_name)
end
end
allow_next_instance_of(Geo::RepositorySyncService) do |instance|
allow(instance).to receive(:expire_repository_caches)
end
end
context 'additional shards' do
it 'skips backfill for projects on unhealthy shards' do
missing_not_synced = create(:project, group: restricted_group)
missing_not_synced.update_column(:repository_storage, 'unknown')
missing_dirty = create(:project, group: restricted_group)
missing_dirty.update_column(:repository_storage, 'unknown')
create(:geo_project_registry, project: unsynced_project_in_restricted_group)
create(:geo_project_registry, project: missing_dirty)
create(:geo_project_registry, project: missing_not_synced)
expect(Geo::ProjectSyncWorker).to receive(:perform_async).with(unsynced_project_in_restricted_group.id, anything)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(missing_not_synced.id, anything)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(missing_dirty.id, anything)
it 'tries to sync project where last attempt to sync failed' do
expect(Geo::ProjectSyncWorker)
.to receive(:perform_async)
.with(abandoned_project.id, anything)
.at_least(:once)
.and_return(spy)
3.times do
Sidekiq::Testing.inline! { subject.perform(shard_name) }
end
end
end
context 'when geo_project_registry_ssot_sync is disabled' do
before do
stub_feature_flags(geo_project_registry_ssot_sync: false)
end
it 'performs Geo::ProjectSyncWorker for each project' do
expect(Geo::ProjectSyncWorker).to receive(:perform_async).twice.and_return(spy)
subject.perform(shard_name)
end
it 'performs Geo::ProjectSyncWorker for projects where last attempt to sync failed' do
create(:geo_project_registry, :sync_failed, project: unsynced_project_in_restricted_group)
create(:geo_project_registry, :synced, project: unsynced_project)
expect(Geo::ProjectSyncWorker).to receive(:perform_async).once.and_return(spy)
subject.perform(shard_name)
end
it 'performs Geo::ProjectSyncWorker for synced projects updated recently' do
create(:geo_project_registry, :synced, :repository_dirty, project: unsynced_project_in_restricted_group)
create(:geo_project_registry, :synced, project: unsynced_project)
create(:geo_project_registry, :synced, :wiki_dirty)
expect(Geo::ProjectSyncWorker).to receive(:perform_async).twice.and_return(spy)
subject.perform(shard_name)
end
context 'when all repositories fail' do
let!(:project_list) { create_list(:project, 4, :random_last_repository_updated_at) }
it 'does not schedule a job twice for the same project' do
scheduled_jobs = [
{ job_id: 1, project_id: unsynced_project.id },
{ job_id: 2, project_id: unsynced_project_in_restricted_group.id }
]
is_expected.to receive(:scheduled_jobs).and_return(scheduled_jobs).at_least(:once)
is_expected.not_to receive(:schedule_job)
Sidekiq::Testing.inline! { subject.perform(shard_name) }
end
context 'backoff time' do
let(:cache_key) { "#{described_class.name.underscore}:shard:#{shard_name}:skip" }
before do
# Neither of these are needed for this spec
project_2.destroy
project_1.destroy
before do
allow(Rails.cache).to receive(:read).and_call_original
allow(Rails.cache).to receive(:write).and_call_original
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:db_retrieve_batch_size).and_return(2) # Must be >1 because of the Geo::BaseSchedulerWorker#interleave
end
secondary.update!(repos_max_capacity: 3) # Must be more than db_retrieve_batch_size
it 'sets the back off time when there are no pending items' do
create(:geo_project_registry, :synced, project: unsynced_project_in_restricted_group)
create(:geo_project_registry, :synced, project: unsynced_project)
expect(Rails.cache).to receive(:write).with(cache_key, true, expires_in: 300.seconds).once
subject.perform(shard_name)
allow_next_instance_of(Project) do |instance|
allow(instance).to receive(:ensure_repository).and_raise(Gitlab::Shell::Error.new('foo'))
end
it 'does not perform Geo::ProjectSyncWorker when the backoff time is set' do
expect(Rails.cache).to receive(:read).with(cache_key).and_return(true)
allow_next_instance_of(Geo::ProjectRegistry) do |instance|
allow(instance).to receive(:wiki_sync_due?).and_return(false)
end
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async)
allow_next_instance_of(Geo::RepositorySyncService) do |instance|
allow(instance).to receive(:expire_repository_caches)
end
subject.perform(shard_name)
allow_next_instance_of(Geo::ProjectHousekeepingService) do |instance|
allow(instance).to receive(:do_housekeeping)
end
end
context 'repositories that have never been updated' do
let!(:project_list) { create_list(:project, 4, last_repository_updated_at: 2.hours.ago) }
let!(:abandoned_project) { create(:project) }
before do
# Project sync failed but never received an update
create(:geo_project_registry, :repository_sync_failed, project: abandoned_project)
abandoned_project.update_column(:last_repository_updated_at, 1.year.ago)
# Neither of these are needed for this spec
unsynced_project.destroy
unsynced_project_in_restricted_group.destroy
it 'tries to sync every project' do
project_list.each do |project|
create(:geo_project_registry, project: project)
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:db_retrieve_batch_size).and_return(2) # Must be >1 because of the Geo::BaseSchedulerWorker#interleave
end
secondary.update!(repos_max_capacity: 3) # Must be more than db_retrieve_batch_size
project_list.each do |project|
allow(Geo::ProjectSyncWorker)
.to receive(:perform_async)
.with(project.id, anything)
.and_call_original
end
allow_next_instance_of(Geo::ProjectRegistry) do |instance|
allow(instance).to receive(:wiki_sync_due?).and_return(false)
end
allow_next_instance_of(Geo::RepositorySyncService) do |instance|
allow(instance).to receive(:expire_repository_caches)
end
end
it 'tries to sync project where last attempt to sync failed' do
expect(Geo::ProjectSyncWorker)
.to receive(:perform_async)
.with(abandoned_project.id, anything)
.with(project.id, anything)
.at_least(:once)
.and_return(spy)
3.times do
Sidekiq::Testing.inline! { subject.perform(shard_name) }
end
end
end
context 'multiple shards' do
it 'uses two loops to schedule jobs', :sidekiq_might_not_need_inline do
expect(subject).to receive(:schedule_jobs).twice.and_call_original
Gitlab::ShardHealthCache.update([shard_name, 'shard2', 'shard3', 'shard4', 'shard5'])
secondary.update!(repos_max_capacity: 5)
subject.perform(shard_name)
.and_call_original
end
end
context 'when node has namespace restrictions', :request_store do
before do
secondary.update!(selective_sync_type: 'namespaces', namespaces: [restricted_group])
allow(::Gitlab::Geo).to receive(:current_node).and_call_original
Rails.cache.write(:current_node, secondary.to_json)
allow(::GeoNode).to receive(:current_node).and_return(secondary)
end
it 'does not perform Geo::ProjectSyncWorker for projects that do not belong to selected namespaces to replicate' do
expect(Geo::ProjectSyncWorker).to receive(:perform_async)
.with(unsynced_project_in_restricted_group.id, sync_repository: true, sync_wiki: true)
.once
.and_return(spy)
subject.perform(shard_name)
end
it 'does not perform Geo::ProjectSyncWorker for synced projects updated recently that do not belong to selected namespaces to replicate' do
create(:geo_project_registry, :synced, :repository_dirty, project: unsynced_project_in_restricted_group)
create(:geo_project_registry, :synced, :repository_dirty, project: unsynced_project)
expect(Geo::ProjectSyncWorker).to receive(:perform_async)
.with(unsynced_project_in_restricted_group.id, sync_repository: true, sync_wiki: false)
.once
.and_return(spy)
subject.perform(shard_name)
3.times do
Sidekiq::Testing.inline! { subject.perform(shard_name) }
end
end
context 'all repositories fail' do
let!(:project_list) { create_list(:project, 4, :random_last_repository_updated_at) }
before do
# Neither of these are needed for this spec
unsynced_project.destroy
unsynced_project_in_restricted_group.destroy
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:db_retrieve_batch_size).and_return(2) # Must be >1 because of the Geo::BaseSchedulerWorker#interleave
end
secondary.update!(repos_max_capacity: 3) # Must be more than db_retrieve_batch_size
allow_next_instance_of(Project) do |instance|
allow(instance).to receive(:ensure_repository).and_raise(Gitlab::Shell::Error.new('foo'))
end
allow_next_instance_of(Geo::ProjectRegistry) do |instance|
allow(instance).to receive(:wiki_sync_due?).and_return(false)
end
context 'projects that require resync' do
context 'when project repository is dirty' do
it 'does not sync repositories' do
create(:geo_project_registry, :synced, :repository_dirty, project: project_2)
create(:geo_project_registry, :synced, :repository_dirty, project: project_1)
allow_next_instance_of(Geo::RepositorySyncService) do |instance|
allow(instance).to receive(:expire_repository_caches)
end
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(project_2.id, sync_repository: true, sync_wiki: false)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(project_1.id, sync_repository: true, sync_wiki: false)
allow_next_instance_of(Geo::ProjectHousekeepingService) do |instance|
allow(instance).to receive(:do_housekeeping)
subject.perform(shard_name)
end
end
it 'tries to sync every project' do
project_list.each do |project|
expect(Geo::ProjectSyncWorker)
.to receive(:perform_async)
.with(project.id, anything)
.at_least(:once)
.and_call_original
end
context 'when project wiki is dirty' do
it 'does not syn wikis' do
create(:geo_project_registry, :synced, :wiki_dirty, project: project_2)
create(:geo_project_registry, :synced, :wiki_dirty, project: project_1)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(project_2.id, sync_repository: false, sync_wiki: true)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(project_1.id, sync_repository: false, sync_wiki: true)
3.times do
Sidekiq::Testing.inline! { subject.perform(shard_name) }
subject.perform(shard_name)
end
end
end
end
context 'additional shards' do
it 'skips backfill for projects on unhealthy shards' do
missing_not_synced = create(:project, group: restricted_group)
missing_not_synced.update_column(:repository_storage, 'unknown')
missing_dirty = create(:project, group: restricted_group)
missing_dirty.update_column(:repository_storage, 'unknown')
context 'with multiple shards' do
it 'uses two loops to schedule jobs', :sidekiq_might_not_need_inline do
create(:geo_project_registry, project: project_2)
create(:geo_project_registry, project: project_1)
create(:geo_project_registry, :synced, :repository_dirty, project: missing_dirty)
Gitlab::ShardHealthCache.update([shard_name, 'shard2', 'shard3', 'shard4', 'shard5'])
secondary.update!(repos_max_capacity: 5)
expect(Geo::ProjectSyncWorker).to receive(:perform_async).with(unsynced_project_in_restricted_group.id, anything)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(missing_not_synced.id, anything)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(missing_dirty.id, anything)
expect(subject).to receive(:schedule_jobs).twice.and_call_original
Sidekiq::Testing.inline! { subject.perform(shard_name) }
end
subject.perform(shard_name)
end
context 'projects that require resync' do
context 'when project repository is dirty' do
it 'syncs repository only' do
create(:geo_project_registry, :synced, :repository_dirty, project: unsynced_project)
create(:geo_project_registry, :synced, :repository_dirty, project: unsynced_project_in_restricted_group)
expect(Geo::ProjectSyncWorker).to receive(:perform_async).with(unsynced_project.id, sync_repository: true, sync_wiki: false)
expect(Geo::ProjectSyncWorker).to receive(:perform_async).with(unsynced_project_in_restricted_group.id, sync_repository: true, sync_wiki: false)
subject.perform(shard_name)
end
end
it 'skips backfill for projects on unhealthy shards' do
project_unhealthy_shard = create_project_on_shard('unknown')
context 'when project wiki is dirty' do
it 'syncs wiki only' do
create(:geo_project_registry, :synced, :wiki_dirty, project: unsynced_project)
create(:geo_project_registry, :synced, :wiki_dirty, project: unsynced_project_in_restricted_group)
create(:geo_project_registry, project: project_1)
create(:geo_project_registry, project: project_unhealthy_shard)
expect(Geo::ProjectSyncWorker).to receive(:perform_async).with(unsynced_project.id, sync_repository: false, sync_wiki: true)
expect(Geo::ProjectSyncWorker).to receive(:perform_async).with(unsynced_project_in_restricted_group.id, sync_repository: false, sync_wiki: true)
expect(Geo::ProjectSyncWorker).to receive(:perform_async).with(project_1.id, anything)
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async).with(project_unhealthy_shard.id, anything)
subject.perform(shard_name)
end
end
Sidekiq::Testing.inline! { subject.perform(shard_name) }
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment