diff --git a/app/services/geo/repository_backfill_service.rb b/app/services/geo/repository_backfill_service.rb index 67cbf998c3e76b99ae694a27e155bb1a1a5ef73b..3c909140fa6edd00bf489b2e72873bf11251483b 100644 --- a/app/services/geo/repository_backfill_service.rb +++ b/app/services/geo/repository_backfill_service.rb @@ -10,6 +10,8 @@ module Geo end def execute + return if backfilled?(project) + try_obtain_lease do log('Started repository sync') started_at, finished_at = fetch_repositories @@ -81,6 +83,29 @@ module Geo Gitlab::ExclusiveLease.cancel(lease_key, repository_lease) end + def backfilled?(project) + return false unless project.repository.exists? + return false if project.repository.exists? && project.repository.empty? + return true if registry_exists?(project) + + # When Geo customers upgrade to 9.0, the secondaries nodes that are + # enabled will start the backfilling process automatically. We need + # to populate the tracking database correctly for projects synced + # before the process being started or projects created during the + # backfilling. Otherwise, the query to retrieve the projects will + # always return the same projects because they don't have entries + # in the tracking database + update_registry(DateTime.now, DateTime.now) + + true + end + + def registry_exists?(project) + Geo::ProjectRegistry.where(project_id: project.id) + .where.not(last_repository_synced_at: nil) + .any? + end + def update_registry(started_at, finished_at) log('Updating registry information') registry = Geo::ProjectRegistry.find_or_initialize_by(project_id: project.id) diff --git a/app/workers/geo_backfill_worker.rb b/app/workers/geo_backfill_worker.rb index d45015530e4a288135e02be9657ab1854141c9fb..f1e58639582e7a20bb432580b28bdd6a4df28272 100644 --- a/app/workers/geo_backfill_worker.rb +++ b/app/workers/geo_backfill_worker.rb @@ -19,9 +19,6 @@ class GeoBackfillWorker break if over_time?(start_time) break unless Gitlab::Geo.current_node_enabled? - project = Project.find(project_id) - next if synced?(project) - # We try to obtain a lease here for the entire backfilling process # because backfill the repositories continuously at a controlled rate # instead of hammering the primary node. Initially, we are backfilling @@ -51,16 +48,6 @@ class GeoBackfillWorker Time.now - start_time >= RUN_TIME end - def synced?(project) - project.repository_exists? || registry_exists?(project) - end - - def registry_exists?(project) - Geo::ProjectRegistry.where(project_id: project.id) - .where.not(last_repository_synced_at: nil) - .any? - end - def try_obtain_lease lease = Gitlab::ExclusiveLease.new(lease_key, timeout: lease_timeout).try_obtain diff --git a/spec/services/geo/repository_backfill_service_spec.rb b/spec/services/geo/repository_backfill_service_spec.rb index 882132c35a18b7e3c4dde960720ebb4854e73b68..e33ba8c6764bef2a937423f8419e70701bf17ca5 100644 --- a/spec/services/geo/repository_backfill_service_spec.rb +++ b/spec/services/geo/repository_backfill_service_spec.rb @@ -2,62 +2,117 @@ require 'spec_helper' describe Geo::RepositoryBackfillService, services: true do let!(:primary) { create(:geo_node, :primary, host: 'primary-geo-node') } - let(:project) { create(:empty_project) } subject { described_class.new(project.id) } describe '#execute' do - it 'fetches project repositories' do - fetch_count = 0 + context 'when repository is empty' do + let(:project) { create(:project_empty_repo) } - allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) do - fetch_count += 1 + it 'fetches project repositories' do + fetch_count = 0 + + allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) do + fetch_count += 1 + end + + subject.execute + + expect(fetch_count).to eq 2 end - subject.execute + it 'expires repository caches' do + allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) { true } - expect(fetch_count).to eq 2 - end + expect_any_instance_of(Repository).to receive(:expire_all_method_caches).once + expect_any_instance_of(Repository).to receive(:expire_branch_cache).once + expect_any_instance_of(Repository).to receive(:expire_content_cache).once - it 'expires repository caches' do - allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) { true } + subject.execute + end - expect_any_instance_of(Repository).to receive(:expire_all_method_caches).once - expect_any_instance_of(Repository).to receive(:expire_branch_cache).once - expect_any_instance_of(Repository).to receive(:expire_content_cache).once + it 'releases lease' do + expect(Gitlab::ExclusiveLease).to receive(:cancel).once.and_call_original - subject.execute - end + subject.execute + end - it 'releases lease' do - expect(Gitlab::ExclusiveLease).to receive(:cancel).once.and_call_original + context 'tracking database' do + it 'tracks repository sync' do + expect { subject.execute }.to change(Geo::ProjectRegistry, :count).by(1) + end - subject.execute - end + it 'stores last_repository_successful_sync_at when succeed' do + allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) { true } + + subject.execute + + registry = Geo::ProjectRegistry.find_by(project_id: project.id) + + expect(registry.last_repository_successful_sync_at).not_to be_nil + end - context 'tracking database' do - it 'tracks repository sync' do - expect { subject.execute }.to change(Geo::ProjectRegistry, :count).by(1) + it 'reset last_repository_successful_sync_at when fail' do + allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) { raise Gitlab::Shell::Error } + + subject.execute + + registry = Geo::ProjectRegistry.find_by(project_id: project.id) + + expect(registry.last_repository_successful_sync_at).to be_nil + end end + end - it 'stores last_repository_successful_sync_at when succeed' do - allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) { true } + context 'when repository exists and is not empty' do + let(:project) { create(:project) } + + it 'does not fetch the project repositories' do + fetch_count = 0 + + allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) do + fetch_count += 1 + end subject.execute - registry = Geo::ProjectRegistry.find_by(project_id: project.id) + expect(fetch_count).to eq 0 + end - expect(registry.last_repository_successful_sync_at).not_to be_nil + context 'tracking database' do + it 'tracks missing repository sync' do + expect { subject.execute }.to change(Geo::ProjectRegistry, :count).by(1) + end end + end - it 'reset last_repository_successful_sync_at when fail' do - allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) { raise Gitlab::Shell::Error } + context 'when repository was backfilled' do + let(:project) { create(:project) } + + let!(:registry) do + Geo::ProjectRegistry.create( + project: project, + last_repository_synced_at: DateTime.now, + last_repository_successful_sync_at: DateTime.now + ) + end + + it 'does not fetch the project repositories' do + fetch_count = 0 + + allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) do + fetch_count += 1 + end subject.execute - registry = Geo::ProjectRegistry.find_by(project_id: project.id) + expect(fetch_count).to eq 0 + end - expect(registry.last_repository_successful_sync_at).to be_nil + context 'tracking database' do + it 'does not track repository sync' do + expect { subject.execute }.not_to change(Geo::ProjectRegistry, :count) + end end end end diff --git a/spec/workers/geo_backfill_worker_spec.rb b/spec/workers/geo_backfill_worker_spec.rb index 791d6139261640bb30b3b7dacdb4b1d437923e56..9c93bcb0d5153241c8e7f28f1f6d39ee40ddd78d 100644 --- a/spec/workers/geo_backfill_worker_spec.rb +++ b/spec/workers/geo_backfill_worker_spec.rb @@ -42,14 +42,6 @@ describe Geo::GeoBackfillWorker, services: true do subject.perform end - it 'does not perform Geo::RepositoryBackfillService for projects that repository exists' do - create_list(:project, 2) - - expect(Geo::RepositoryBackfillService).to receive(:new).twice.and_return(spy) - - subject.perform - end - it 'does not perform Geo::RepositoryBackfillService when can not obtain a lease' do allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain) { false }