From 9b09c3d71aa3ccc42644b7fee6b8357e29acf473 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20Javier=20L=C3=B3pez?= <fjlopez@gitlab.com> Date: Tue, 19 Jan 2021 18:34:57 +0100 Subject: [PATCH] Create different GitGarbageCollectWorkers depending on the resource In this commit we extract the existing logic of the `GitGarbageCollectWorker` into a concern, in order to reuse it later with the new wiki garbage collector that we're introding as well in this commit. --- .../repositories/can_housekeep_repository.rb | 4 + app/models/project.rb | 5 + app/models/wiki.rb | 9 + .../repositories/housekeeping_service.rb | 2 +- app/workers/all_queues.yml | 8 + .../concerns/git_garbage_collect_methods.rb | 135 +++++++ .../projects/git_garbage_collect_worker.rb | 126 +----- .../wikis/git_garbage_collect_worker.rb | 20 + config/sidekiq_queues.yml | 4 + ee/app/models/group_wiki.rb | 5 + ee/app/workers/all_queues.yml | 8 + .../group_wikis/git_garbage_collect_worker.rb | 15 + ee/spec/models/group_wiki_spec.rb | 1 + .../git_garbage_collect_worker_spec.rb | 12 + spec/models/project_spec.rb | 1 + spec/models/project_wiki_spec.rb | 1 + ...an_housekeep_repository_shared_examples.rb | 6 + .../housekeeping_shared_examples.rb | 18 +- ...garbage_collect_methods_shared_examples.rb | 320 +++++++++++++++ .../git_garbage_collect_worker_spec.rb | 370 ++---------------- .../wikis/git_garbage_collect_worker_spec.rb | 14 + 21 files changed, 633 insertions(+), 451 deletions(-) create mode 100644 app/workers/concerns/git_garbage_collect_methods.rb create mode 100644 app/workers/wikis/git_garbage_collect_worker.rb create mode 100644 ee/app/workers/group_wikis/git_garbage_collect_worker.rb create mode 100644 ee/spec/workers/group_wikis/git_garbage_collect_worker_spec.rb create mode 100644 spec/support/shared_examples/workers/concerns/git_garbage_collect_methods_shared_examples.rb create mode 100644 spec/workers/wikis/git_garbage_collect_worker_spec.rb diff --git a/app/models/concerns/repositories/can_housekeep_repository.rb b/app/models/concerns/repositories/can_housekeep_repository.rb index 2b79851a07c..946f82c5f36 100644 --- a/app/models/concerns/repositories/can_housekeep_repository.rb +++ b/app/models/concerns/repositories/can_housekeep_repository.rb @@ -16,6 +16,10 @@ module Repositories Gitlab::Redis::SharedState.with { |redis| redis.del(pushes_since_gc_redis_shared_state_key) } end + def git_garbage_collect_worker_klass + raise NotImplementedError + end + private def pushes_since_gc_redis_shared_state_key diff --git a/app/models/project.rb b/app/models/project.rb index bfa85fc9d99..b004d907da1 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -2522,6 +2522,11 @@ class Project < ApplicationRecord tracing_setting&.external_url end + override :git_garbage_collect_worker_klass + def git_garbage_collect_worker_klass + Projects::GitGarbageCollectWorker + end + private def find_service(services, name) diff --git a/app/models/wiki.rb b/app/models/wiki.rb index 11c10a61d18..ab53515ec48 100644 --- a/app/models/wiki.rb +++ b/app/models/wiki.rb @@ -256,6 +256,15 @@ class Wiki def after_post_receive end + override :git_garbage_collect_worker_klass + def git_garbage_collect_worker_klass + Wikis::GitGarbageCollectWorker + end + + def cleanup + @repository = nil + end + private def commit_details(action, message = nil, title = nil) diff --git a/app/services/repositories/housekeeping_service.rb b/app/services/repositories/housekeeping_service.rb index e97c295e18e..de80390e60b 100644 --- a/app/services/repositories/housekeeping_service.rb +++ b/app/services/repositories/housekeeping_service.rb @@ -45,7 +45,7 @@ module Repositories private def execute_gitlab_shell_gc(lease_uuid) - Projects::GitGarbageCollectWorker.perform_async(@resource.id, task, lease_key, lease_uuid) + @resource.git_garbage_collect_worker_klass.perform_async(@resource.id, task, lease_key, lease_uuid) ensure if pushes_since_gc >= gc_period Gitlab::Metrics.measure(:reset_pushes_since_gc) do diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 6179265149e..9b6c9625e28 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -2239,6 +2239,14 @@ :weight: 1 :idempotent: true :tags: [] +- :name: wikis_git_garbage_collect + :feature_category: :gitaly + :has_external_dependencies: + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: + :tags: [] - :name: x509_certificate_revoke :feature_category: :source_code_management :has_external_dependencies: diff --git a/app/workers/concerns/git_garbage_collect_methods.rb b/app/workers/concerns/git_garbage_collect_methods.rb new file mode 100644 index 00000000000..17a80d1ddb3 --- /dev/null +++ b/app/workers/concerns/git_garbage_collect_methods.rb @@ -0,0 +1,135 @@ +# frozen_string_literal: true + +module GitGarbageCollectMethods + extend ActiveSupport::Concern + + included do + include ApplicationWorker + + sidekiq_options retry: false + feature_category :gitaly + loggable_arguments 1, 2, 3 + end + + # Timeout set to 24h + LEASE_TIMEOUT = 86400 + + def perform(resource_id, task = :gc, lease_key = nil, lease_uuid = nil) + resource = find_resource(resource_id) + lease_key ||= default_lease_key(task, resource) + active_uuid = get_lease_uuid(lease_key) + + if active_uuid + return unless active_uuid == lease_uuid + + renew_lease(lease_key, active_uuid) + else + lease_uuid = try_obtain_lease(lease_key) + + return unless lease_uuid + end + + task = task.to_sym + + before_gitaly_call(task, resource) + gitaly_call(task, resource) + + # Refresh the branch cache in case garbage collection caused a ref lookup to fail + flush_ref_caches(resource) if gc?(task) + + update_repository_statistics(resource) if task != :pack_refs + + # In case pack files are deleted, release libgit2 cache and open file + # descriptors ASAP instead of waiting for Ruby garbage collection + resource.cleanup + ensure + cancel_lease(lease_key, lease_uuid) if lease_key.present? && lease_uuid.present? + end + + private + + def default_lease_key(task, resource) + "git_gc:#{task}:#{resource.class.name.underscore.pluralize}:#{resource.id}" + end + + def find_resource(id) + raise NotImplementedError + end + + def gc?(task) + task == :gc || task == :prune + end + + def try_obtain_lease(key) + ::Gitlab::ExclusiveLease.new(key, timeout: LEASE_TIMEOUT).try_obtain + end + + def renew_lease(key, uuid) + ::Gitlab::ExclusiveLease.new(key, uuid: uuid, timeout: LEASE_TIMEOUT).renew + end + + def cancel_lease(key, uuid) + ::Gitlab::ExclusiveLease.cancel(key, uuid) + end + + def get_lease_uuid(key) + ::Gitlab::ExclusiveLease.get_uuid(key) + end + + def before_gitaly_call(task, resource) + # no-op + end + + def gitaly_call(task, resource) + repository = resource.repository.raw_repository + + client = get_gitaly_client(task, repository) + + case task + when :prune, :gc + client.garbage_collect(bitmaps_enabled?, prune: task == :prune) + when :full_repack + client.repack_full(bitmaps_enabled?) + when :incremental_repack + client.repack_incremental + when :pack_refs + client.pack_refs + end + rescue GRPC::NotFound => e + Gitlab::GitLogger.error("#{__method__} failed:\nRepository not found") + raise Gitlab::Git::Repository::NoRepository.new(e) + rescue GRPC::BadStatus => e + Gitlab::GitLogger.error("#{__method__} failed:\n#{e}") + raise Gitlab::Git::CommandError.new(e) + end + + def get_gitaly_client(task, repository) + if task == :pack_refs + Gitlab::GitalyClient::RefService + else + Gitlab::GitalyClient::RepositoryService + end.new(repository) + end + + def bitmaps_enabled? + Gitlab::CurrentSettings.housekeeping_bitmaps_enabled + end + + def flush_ref_caches(resource) + resource.repository.expire_branches_cache + resource.repository.branch_names + resource.repository.has_visible_content? + end + + def update_repository_statistics(resource) + resource.repository.expire_statistics_caches + + return if Gitlab::Database.read_only? # GitGarbageCollectWorker may be run on a Geo secondary + + update_db_repository_statistics(resource) + end + + def update_db_repository_statistics(resource) + # no-op + end +end diff --git a/app/workers/projects/git_garbage_collect_worker.rb b/app/workers/projects/git_garbage_collect_worker.rb index aba99ce35d0..7c7c7f27a6b 100644 --- a/app/workers/projects/git_garbage_collect_worker.rb +++ b/app/workers/projects/git_garbage_collect_worker.rb @@ -2,131 +2,41 @@ module Projects class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker - include ApplicationWorker - - sidekiq_options retry: false - feature_category :gitaly - loggable_arguments 1, 2, 3 - - # Timeout set to 24h - LEASE_TIMEOUT = 86400 - - def perform(project_id, task = :gc, lease_key = nil, lease_uuid = nil) - lease_key ||= "git_gc:#{task}:#{project_id}" - project = find_project(project_id) - active_uuid = get_lease_uuid(lease_key) - - if active_uuid - return unless active_uuid == lease_uuid - - renew_lease(lease_key, active_uuid) - else - lease_uuid = try_obtain_lease(lease_key) - - return unless lease_uuid - end - - task = task.to_sym - - if gc?(task) - ::Projects::GitDeduplicationService.new(project).execute - cleanup_orphan_lfs_file_references(project) - end - - gitaly_call(task, project) - - # Refresh the branch cache in case garbage collection caused a ref lookup to fail - flush_ref_caches(project) if gc?(task) - - update_repository_statistics(project) if task != :pack_refs - - # In case pack files are deleted, release libgit2 cache and open file - # descriptors ASAP instead of waiting for Ruby garbage collection - project.cleanup - ensure - cancel_lease(lease_key, lease_uuid) if lease_key.present? && lease_uuid.present? - end + extend ::Gitlab::Utils::Override + include GitGarbageCollectMethods private - def find_project(project_id) - Project.find(project_id) + override :default_lease_key + def default_lease_key(task, resource) + "git_gc:#{task}:#{resource.id}" end - def gc?(task) - task == :gc || task == :prune + override :find_resource + def find_resource(id) + Project.find(id) end - def try_obtain_lease(key) - ::Gitlab::ExclusiveLease.new(key, timeout: LEASE_TIMEOUT).try_obtain - end + override :before_gitaly_call + def before_gitaly_call(task, resource) + return unless gc?(task) - def renew_lease(key, uuid) - ::Gitlab::ExclusiveLease.new(key, uuid: uuid, timeout: LEASE_TIMEOUT).renew + ::Projects::GitDeduplicationService.new(resource).execute + cleanup_orphan_lfs_file_references(resource) end - def cancel_lease(key, uuid) - ::Gitlab::ExclusiveLease.cancel(key, uuid) - end - - def get_lease_uuid(key) - ::Gitlab::ExclusiveLease.get_uuid(key) - end - - def gitaly_call(task, project) - repository = project.repository.raw_repository - client = get_gitaly_client(task, repository) - - case task - when :prune, :gc - client.garbage_collect(bitmaps_enabled?, prune: task == :prune) - when :full_repack - client.repack_full(bitmaps_enabled?) - when :incremental_repack - client.repack_incremental - when :pack_refs - client.pack_refs - end - rescue GRPC::NotFound => e - Gitlab::GitLogger.error("#{__method__} failed:\nRepository not found") - raise Gitlab::Git::Repository::NoRepository.new(e) - rescue GRPC::BadStatus => e - Gitlab::GitLogger.error("#{__method__} failed:\n#{e}") - raise Gitlab::Git::CommandError.new(e) - end - - def get_gitaly_client(task, repository) - if task == :pack_refs - Gitlab::GitalyClient::RefService - else - Gitlab::GitalyClient::RepositoryService - end.new(repository) - end - - def cleanup_orphan_lfs_file_references(project) + def cleanup_orphan_lfs_file_references(resource) return if Gitlab::Database.read_only? # GitGarbageCollectWorker may be run on a Geo secondary - ::Gitlab::Cleanup::OrphanLfsFileReferences.new(project, dry_run: false, logger: logger).run! + ::Gitlab::Cleanup::OrphanLfsFileReferences.new(resource, dry_run: false, logger: logger).run! rescue => err Gitlab::GitLogger.warn(message: "Cleaning up orphan LFS objects files failed", error: err.message) Gitlab::ErrorTracking.track_and_raise_for_dev_exception(err) end - def flush_ref_caches(project) - project.repository.expire_branches_cache - project.repository.branch_names - project.repository.has_visible_content? - end - - def update_repository_statistics(project) - project.repository.expire_statistics_caches - return if Gitlab::Database.read_only? # GitGarbageCollectWorker may be run on a Geo secondary - - Projects::UpdateStatisticsService.new(project, nil, statistics: [:repository_size, :lfs_objects_size]).execute - end - - def bitmaps_enabled? - Gitlab::CurrentSettings.housekeeping_bitmaps_enabled + override :update_db_repository_statistics + def update_db_repository_statistics(resource) + Projects::UpdateStatisticsService.new(resource, nil, statistics: [:repository_size, :lfs_objects_size]).execute end end end diff --git a/app/workers/wikis/git_garbage_collect_worker.rb b/app/workers/wikis/git_garbage_collect_worker.rb new file mode 100644 index 00000000000..1b455c50618 --- /dev/null +++ b/app/workers/wikis/git_garbage_collect_worker.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +module Wikis + class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker + extend ::Gitlab::Utils::Override + include GitGarbageCollectMethods + + private + + override :find_resource + def find_resource(id) + Project.find(id).wiki + end + + override :update_db_repository_statistics + def update_db_repository_statistics(resource) + Projects::UpdateStatisticsService.new(resource.container, nil, statistics: [:wiki_size]).execute + end + end +end diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index b01239d6aaf..18398cfcbe9 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -158,6 +158,8 @@ - 1 - - group_saml_group_sync - 1 +- - group_wikis_git_garbage_collect + - 1 - - hashed_storage - 1 - - import_issues_csv @@ -362,5 +364,7 @@ - 1 - - web_hooks_destroy - 1 +- - wikis_git_garbage_collect + - 1 - - x509_certificate_revoke - 1 diff --git a/ee/app/models/group_wiki.rb b/ee/app/models/group_wiki.rb index b1d8dcec12a..8cd449d9568 100644 --- a/ee/app/models/group_wiki.rb +++ b/ee/app/models/group_wiki.rb @@ -50,4 +50,9 @@ class GroupWiki < Wiki # TODO: Update group wiki storage # https://gitlab.com/gitlab-org/gitlab/-/issues/230465 end + + override :git_garbage_collect_worker_klass + def git_garbage_collect_worker_klass + GroupWikis::GitGarbageCollectWorker + end end diff --git a/ee/app/workers/all_queues.yml b/ee/app/workers/all_queues.yml index 3af7050cc9d..03190d8a3f8 100644 --- a/ee/app/workers/all_queues.yml +++ b/ee/app/workers/all_queues.yml @@ -757,6 +757,14 @@ :weight: 1 :idempotent: true :tags: [] +- :name: group_wikis_git_garbage_collect + :feature_category: :gitaly + :has_external_dependencies: + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: + :tags: [] - :name: incident_management_apply_incident_sla_exceeded_label :feature_category: :incident_management :has_external_dependencies: diff --git a/ee/app/workers/group_wikis/git_garbage_collect_worker.rb b/ee/app/workers/group_wikis/git_garbage_collect_worker.rb new file mode 100644 index 00000000000..be71848399f --- /dev/null +++ b/ee/app/workers/group_wikis/git_garbage_collect_worker.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module GroupWikis + class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker + extend ::Gitlab::Utils::Override + include GitGarbageCollectMethods + + private + + override :find_resource + def find_resource(id) + Group.find(id).wiki + end + end +end diff --git a/ee/spec/models/group_wiki_spec.rb b/ee/spec/models/group_wiki_spec.rb index aa241d354a6..c6820f42759 100644 --- a/ee/spec/models/group_wiki_spec.rb +++ b/ee/spec/models/group_wiki_spec.rb @@ -133,5 +133,6 @@ RSpec.describe GroupWiki do let_it_be(:resource) { create(:group_wiki) } let(:resource_key) { 'group_wikis' } + let(:expected_worker_class) { ::GroupWikis::GitGarbageCollectWorker } end end diff --git a/ee/spec/workers/group_wikis/git_garbage_collect_worker_spec.rb b/ee/spec/workers/group_wikis/git_garbage_collect_worker_spec.rb new file mode 100644 index 00000000000..cc1c0c50f3e --- /dev/null +++ b/ee/spec/workers/group_wikis/git_garbage_collect_worker_spec.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe GroupWikis::GitGarbageCollectWorker do + it_behaves_like 'can collect git garbage', update_statistics: false do + let_it_be(:resource) { create(:group_wiki) } + let_it_be(:page) { create(:wiki_page, wiki: resource) } + + let(:expected_default_lease) { "group_wikis:#{resource.id}" } + end +end diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb index 8d0b7336955..011c0dfe438 100644 --- a/spec/models/project_spec.rb +++ b/spec/models/project_spec.rb @@ -3002,6 +3002,7 @@ RSpec.describe Project, factory_default: :keep do it_behaves_like 'can housekeep repository' do let(:resource) { build_stubbed(:project) } let(:resource_key) { 'projects' } + let(:expected_worker_class) { Projects::GitGarbageCollectWorker } end describe '#deployment_variables' do diff --git a/spec/models/project_wiki_spec.rb b/spec/models/project_wiki_spec.rb index 8001d009901..c04fc70deca 100644 --- a/spec/models/project_wiki_spec.rb +++ b/spec/models/project_wiki_spec.rb @@ -47,5 +47,6 @@ RSpec.describe ProjectWiki do let_it_be(:resource) { create(:project_wiki) } let(:resource_key) { 'project_wikis' } + let(:expected_worker_class) { Wikis::GitGarbageCollectWorker } end end diff --git a/spec/support/shared_examples/models/concerns/repositories/can_housekeep_repository_shared_examples.rb b/spec/support/shared_examples/models/concerns/repositories/can_housekeep_repository_shared_examples.rb index 2f0b95427d2..4006b8226ce 100644 --- a/spec/support/shared_examples/models/concerns/repositories/can_housekeep_repository_shared_examples.rb +++ b/spec/support/shared_examples/models/concerns/repositories/can_housekeep_repository_shared_examples.rb @@ -41,5 +41,11 @@ RSpec.shared_examples 'can housekeep repository' do expect(resource.send(:pushes_since_gc_redis_shared_state_key)).to eq("#{resource_key}/#{resource.id}/pushes_since_gc") end end + + describe '#git_garbage_collect_worker_klass' do + it 'defines a git gargabe collect worker' do + expect(resource.git_garbage_collect_worker_klass).to eq(expected_worker_class) + end + end end end diff --git a/spec/support/shared_examples/services/repositories/housekeeping_shared_examples.rb b/spec/support/shared_examples/services/repositories/housekeeping_shared_examples.rb index ad0dc389aeb..4c00faee56b 100644 --- a/spec/support/shared_examples/services/repositories/housekeeping_shared_examples.rb +++ b/spec/support/shared_examples/services/repositories/housekeeping_shared_examples.rb @@ -3,16 +3,16 @@ RSpec.shared_examples 'housekeeps repository' do subject { described_class.new(resource) } - context 'with a clean redis state', :clean_gitlab_redis_shared_state do + context 'with a clean redis state', :clean_gitlab_redis_shared_state, :aggregate_failures do describe '#execute' do it 'enqueues a sidekiq job' do expect(subject).to receive(:try_obtain_lease).and_return(:the_uuid) expect(subject).to receive(:lease_key).and_return(:the_lease_key) expect(subject).to receive(:task).and_return(:incremental_repack) - expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :incremental_repack, :the_lease_key, :the_uuid).and_call_original + expect(resource.git_garbage_collect_worker_klass).to receive(:perform_async).with(resource.id, :incremental_repack, :the_lease_key, :the_uuid).and_call_original Sidekiq::Testing.fake! do - expect { subject.execute }.to change(Projects::GitGarbageCollectWorker.jobs, :size).by(1) + expect { subject.execute }.to change(resource.git_garbage_collect_worker_klass.jobs, :size).by(1) end end @@ -38,7 +38,7 @@ RSpec.shared_examples 'housekeeps repository' do end it 'does not enqueue a job' do - expect(Projects::GitGarbageCollectWorker).not_to receive(:perform_async) + expect(resource.git_garbage_collect_worker_klass).not_to receive(:perform_async) expect { subject.execute }.to raise_error(Repositories::HousekeepingService::LeaseTaken) end @@ -63,16 +63,16 @@ RSpec.shared_examples 'housekeeps repository' do allow(subject).to receive(:lease_key).and_return(:the_lease_key) # At push 200 - expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :gc, :the_lease_key, :the_uuid) + expect(resource.git_garbage_collect_worker_klass).to receive(:perform_async).with(resource.id, :gc, :the_lease_key, :the_uuid) .once # At push 50, 100, 150 - expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :full_repack, :the_lease_key, :the_uuid) + expect(resource.git_garbage_collect_worker_klass).to receive(:perform_async).with(resource.id, :full_repack, :the_lease_key, :the_uuid) .exactly(3).times # At push 10, 20, ... (except those above) - expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :incremental_repack, :the_lease_key, :the_uuid) + expect(resource.git_garbage_collect_worker_klass).to receive(:perform_async).with(resource.id, :incremental_repack, :the_lease_key, :the_uuid) .exactly(16).times # At push 6, 12, 18, ... (except those above) - expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :pack_refs, :the_lease_key, :the_uuid) + expect(resource.git_garbage_collect_worker_klass).to receive(:perform_async).with(resource.id, :pack_refs, :the_lease_key, :the_uuid) .exactly(27).times 201.times do @@ -90,7 +90,7 @@ RSpec.shared_examples 'housekeeps repository' do allow(housekeeping).to receive(:try_obtain_lease).and_return(:gc_uuid) allow(housekeeping).to receive(:lease_key).and_return(:gc_lease_key) - expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :gc, :gc_lease_key, :gc_uuid).twice + expect(resource.git_garbage_collect_worker_klass).to receive(:perform_async).with(resource.id, :gc, :gc_lease_key, :gc_uuid).twice 2.times do housekeeping.execute diff --git a/spec/support/shared_examples/workers/concerns/git_garbage_collect_methods_shared_examples.rb b/spec/support/shared_examples/workers/concerns/git_garbage_collect_methods_shared_examples.rb new file mode 100644 index 00000000000..f2314793cb4 --- /dev/null +++ b/spec/support/shared_examples/workers/concerns/git_garbage_collect_methods_shared_examples.rb @@ -0,0 +1,320 @@ +# frozen_string_literal: true + +require 'fileutils' + +RSpec.shared_examples 'can collect git garbage' do |update_statistics: true| + include GitHelpers + + let!(:lease_uuid) { SecureRandom.uuid } + let!(:lease_key) { "resource_housekeeping:#{resource.id}" } + let(:params) { [resource.id, task, lease_key, lease_uuid] } + let(:shell) { Gitlab::Shell.new } + let(:repository) { resource.repository } + let(:statistics_service_klass) { nil } + + subject { described_class.new } + + before do + allow(subject).to receive(:find_resource).and_return(resource) + end + + shared_examples 'it calls Gitaly' do + specify do + repository_service = instance_double(Gitlab::GitalyClient::RepositoryService) + + expect(subject).to receive(:get_gitaly_client).with(task, repository.raw_repository).and_return(repository_service) + expect(repository_service).to receive(gitaly_task) + + subject.perform(*params) + end + end + + shared_examples 'it updates the resource statistics' do + it 'updates the resource statistics' do + expect_next_instance_of(statistics_service_klass, anything, nil, statistics: statistics_keys) do |service| + expect(service).to receive(:execute) + end + + subject.perform(*params) + end + + it 'does nothing if the database is read-only' do + allow(Gitlab::Database).to receive(:read_only?) { true } + + expect(statistics_service_klass).not_to receive(:new) + + subject.perform(*params) + end + end + + describe '#perform', :aggregate_failures do + let(:gitaly_task) { :garbage_collect } + let(:task) { :gc } + + context 'with active lease_uuid' do + before do + allow(subject).to receive(:get_lease_uuid).and_return(lease_uuid) + end + + it_behaves_like 'it calls Gitaly' + it_behaves_like 'it updates the resource statistics' if update_statistics + + it "flushes ref caches when the task if 'gc'" do + expect(subject).to receive(:renew_lease).with(lease_key, lease_uuid).and_call_original + expect(repository).to receive(:expire_branches_cache).and_call_original + expect(repository).to receive(:branch_names).and_call_original + expect(repository).to receive(:has_visible_content?).and_call_original + expect(repository.raw_repository).to receive(:has_visible_content?).and_call_original + + subject.perform(*params) + end + + it 'handles gRPC errors' do + allow_next_instance_of(Gitlab::GitalyClient::RepositoryService, repository.raw_repository) do |instance| + allow(instance).to receive(:garbage_collect).and_raise(GRPC::NotFound) + end + + expect { subject.perform(*params) }.to raise_exception(Gitlab::Git::Repository::NoRepository) + end + end + + context 'with different lease than the active one' do + before do + allow(subject).to receive(:get_lease_uuid).and_return(SecureRandom.uuid) + end + + it 'returns silently' do + expect(repository).not_to receive(:expire_branches_cache).and_call_original + expect(repository).not_to receive(:branch_names).and_call_original + expect(repository).not_to receive(:has_visible_content?).and_call_original + + subject.perform(*params) + end + end + + context 'with no active lease' do + let(:params) { [resource.id] } + + before do + allow(subject).to receive(:get_lease_uuid).and_return(false) + end + + context 'when is able to get the lease' do + before do + allow(subject).to receive(:try_obtain_lease).and_return(SecureRandom.uuid) + end + + it_behaves_like 'it calls Gitaly' + it_behaves_like 'it updates the resource statistics' if update_statistics + + it "flushes ref caches when the task if 'gc'" do + expect(subject).to receive(:get_lease_uuid).with("git_gc:#{task}:#{expected_default_lease}").and_return(false) + expect(repository).to receive(:expire_branches_cache).and_call_original + expect(repository).to receive(:branch_names).and_call_original + expect(repository).to receive(:has_visible_content?).and_call_original + expect(repository.raw_repository).to receive(:has_visible_content?).and_call_original + + subject.perform(*params) + end + end + + context 'when no lease can be obtained' do + it 'returns silently' do + expect(subject).to receive(:try_obtain_lease).and_return(false) + + expect(subject).not_to receive(:command) + expect(repository).not_to receive(:expire_branches_cache).and_call_original + expect(repository).not_to receive(:branch_names).and_call_original + expect(repository).not_to receive(:has_visible_content?).and_call_original + + subject.perform(*params) + end + end + end + + context 'repack_full' do + let(:task) { :full_repack } + let(:gitaly_task) { :repack_full } + + before do + expect(subject).to receive(:get_lease_uuid).and_return(lease_uuid) + end + + it_behaves_like 'it calls Gitaly' + it_behaves_like 'it updates the resource statistics' if update_statistics + end + + context 'pack_refs' do + let(:task) { :pack_refs } + let(:gitaly_task) { :pack_refs } + + before do + expect(subject).to receive(:get_lease_uuid).and_return(lease_uuid) + end + + it 'calls Gitaly' do + repository_service = instance_double(Gitlab::GitalyClient::RefService) + + expect(subject).to receive(:get_gitaly_client).with(task, repository.raw_repository).and_return(repository_service) + expect(repository_service).to receive(gitaly_task) + + subject.perform(*params) + end + + it 'does not update the resource statistics' do + expect(statistics_service_klass).not_to receive(:new) + + subject.perform(*params) + end + end + + context 'repack_incremental' do + let(:task) { :incremental_repack } + let(:gitaly_task) { :repack_incremental } + + before do + expect(subject).to receive(:get_lease_uuid).and_return(lease_uuid) + end + + it_behaves_like 'it calls Gitaly' + it_behaves_like 'it updates the resource statistics' if update_statistics + end + + shared_examples 'gc tasks' do + before do + allow(subject).to receive(:get_lease_uuid).and_return(lease_uuid) + allow(subject).to receive(:bitmaps_enabled?).and_return(bitmaps_enabled) + end + + it 'incremental repack adds a new packfile' do + create_objects(resource) + before_packs = packs(resource) + + expect(before_packs.count).to be >= 1 + + subject.perform(resource.id, 'incremental_repack', lease_key, lease_uuid) + after_packs = packs(resource) + + # Exactly one new pack should have been created + expect(after_packs.count).to eq(before_packs.count + 1) + + # Previously existing packs are still around + expect(before_packs & after_packs).to eq(before_packs) + end + + it 'full repack consolidates into 1 packfile' do + create_objects(resource) + subject.perform(resource.id, 'incremental_repack', lease_key, lease_uuid) + before_packs = packs(resource) + + expect(before_packs.count).to be >= 2 + + subject.perform(resource.id, 'full_repack', lease_key, lease_uuid) + after_packs = packs(resource) + + expect(after_packs.count).to eq(1) + + # Previously existing packs should be gone now + expect(after_packs - before_packs).to eq(after_packs) + + expect(File.exist?(bitmap_path(after_packs.first))).to eq(bitmaps_enabled) + end + + it 'gc consolidates into 1 packfile and updates packed-refs' do + create_objects(resource) + before_packs = packs(resource) + before_packed_refs = packed_refs(resource) + + expect(before_packs.count).to be >= 1 + + # It's quite difficult to use `expect_next_instance_of` in this place + # because the RepositoryService is instantiated several times to do + # some repository calls like `exists?`, `create_repository`, ... . + # Therefore, since we're instantiating the object several times, + # RSpec has troubles figuring out which instance is the next and which + # one we want to mock. + # Besides, at this point, we actually want to perform the call to Gitaly, + # otherwise we would just use `instance_double` like in other parts of the + # spec file. + expect_any_instance_of(Gitlab::GitalyClient::RepositoryService) # rubocop:disable RSpec/AnyInstanceOf + .to receive(:garbage_collect) + .with(bitmaps_enabled, prune: false) + .and_call_original + + subject.perform(resource.id, 'gc', lease_key, lease_uuid) + after_packed_refs = packed_refs(resource) + after_packs = packs(resource) + + expect(after_packs.count).to eq(1) + + # Previously existing packs should be gone now + expect(after_packs - before_packs).to eq(after_packs) + + # The packed-refs file should have been updated during 'git gc' + expect(before_packed_refs).not_to eq(after_packed_refs) + + expect(File.exist?(bitmap_path(after_packs.first))).to eq(bitmaps_enabled) + end + + it 'cleans up repository after finishing' do + expect(resource).to receive(:cleanup).and_call_original + + subject.perform(resource.id, 'gc', lease_key, lease_uuid) + end + + it 'prune calls garbage_collect with the option prune: true' do + repository_service = instance_double(Gitlab::GitalyClient::RepositoryService) + + expect(subject).to receive(:get_gitaly_client).with(:prune, repository.raw_repository).and_return(repository_service) + expect(repository_service).to receive(:garbage_collect).with(bitmaps_enabled, prune: true) + + subject.perform(resource.id, 'prune', lease_key, lease_uuid) + end + + # Create a new commit on a random new branch + def create_objects(resource) + rugged = rugged_repo(resource.repository) + old_commit = rugged.branches.first.target + new_commit_sha = Rugged::Commit.create( + rugged, + message: "hello world #{SecureRandom.hex(6)}", + author: { email: 'foo@bar', name: 'baz' }, + committer: { email: 'foo@bar', name: 'baz' }, + tree: old_commit.tree, + parents: [old_commit] + ) + rugged.references.create("refs/heads/#{SecureRandom.hex(6)}", new_commit_sha) + end + + def packs(resource) + Dir["#{path_to_repo}/objects/pack/*.pack"] + end + + def packed_refs(resource) + path = File.join(path_to_repo, 'packed-refs') + FileUtils.touch(path) + File.read(path) + end + + def path_to_repo + @path_to_repo ||= File.join(TestEnv.repos_path, resource.repository.relative_path) + end + + def bitmap_path(pack) + pack.sub(/\.pack\z/, '.bitmap') + end + end + + context 'with bitmaps enabled' do + let(:bitmaps_enabled) { true } + + include_examples 'gc tasks' + end + + context 'with bitmaps disabled' do + let(:bitmaps_enabled) { false } + + include_examples 'gc tasks' + end + end +end diff --git a/spec/workers/projects/git_garbage_collect_worker_spec.rb b/spec/workers/projects/git_garbage_collect_worker_spec.rb index 9ef1cce117e..73a70ba436b 100644 --- a/spec/workers/projects/git_garbage_collect_worker_spec.rb +++ b/spec/workers/projects/git_garbage_collect_worker_spec.rb @@ -1,374 +1,78 @@ # frozen_string_literal: true -require 'fileutils' require 'spec_helper' RSpec.describe Projects::GitGarbageCollectWorker do - include GitHelpers - let_it_be(:project) { create(:project, :repository) } - let!(:lease_uuid) { SecureRandom.uuid } - let!(:lease_key) { "project_housekeeping:#{project.id}" } - let(:params) { [project.id, task, lease_key, lease_uuid] } - let(:shell) { Gitlab::Shell.new } - let(:repository) { project.repository } - - subject { described_class.new } - - before do - allow(subject).to receive(:find_project).and_return(project) - end - - shared_examples 'it calls Gitaly' do - specify do - repository_service = instance_double(Gitlab::GitalyClient::RepositoryService) - - expect(subject).to receive(:get_gitaly_client).with(task, repository.raw_repository).and_return(repository_service) - expect(repository_service).to receive(gitaly_task) - - subject.perform(*params) - end - end - - shared_examples 'it updates the project statistics' do - it 'updates the project statistics' do - expect_next_instance_of(Projects::UpdateStatisticsService, project, nil, statistics: [:repository_size, :lfs_objects_size]) do |service| - expect(service).to receive(:execute) - end - - subject.perform(*params) - end - - it 'does nothing if the database is read-only' do - allow(Gitlab::Database).to receive(:read_only?) { true } - - expect(Projects::UpdateStatisticsService).not_to receive(:new) - - subject.perform(*params) - end + it_behaves_like 'can collect git garbage' do + let(:resource) { project } + let(:statistics_service_klass) { Projects::UpdateStatisticsService } + let(:statistics_keys) { [:repository_size, :lfs_objects_size] } + let(:expected_default_lease) { "#{resource.id}" } end - describe '#perform', :aggregate_failures do - let(:gitaly_task) { :garbage_collect } - let(:task) { :gc } - - context 'with active lease_uuid' do - before do - allow(subject).to receive(:get_lease_uuid).and_return(lease_uuid) - end - - it_behaves_like 'it calls Gitaly' - it_behaves_like 'it updates the project statistics' + context 'when is able to get the lease' do + let(:params) { [project.id] } - it "flushes ref caches when the task if 'gc'" do - expect(subject).to receive(:renew_lease).with(lease_key, lease_uuid).and_call_original - expect(repository).to receive(:expire_branches_cache).and_call_original - expect(repository).to receive(:branch_names).and_call_original - expect(repository).to receive(:has_visible_content?).and_call_original - expect(repository.raw_repository).to receive(:has_visible_content?).and_call_original + subject { described_class.new } - subject.perform(*params) - end - - it 'handles gRPC errors' do - allow_next_instance_of(Gitlab::GitalyClient::RepositoryService, repository.raw_repository) do |instance| - allow(instance).to receive(:garbage_collect).and_raise(GRPC::NotFound) - end - - expect { subject.perform(*params) }.to raise_exception(Gitlab::Git::Repository::NoRepository) - end + before do + allow(subject).to receive(:get_lease_uuid).and_return(false) + allow(subject).to receive(:find_resource).and_return(project) + allow(subject).to receive(:try_obtain_lease).and_return(SecureRandom.uuid) end - context 'with different lease than the active one' do - before do - allow(subject).to receive(:get_lease_uuid).and_return(SecureRandom.uuid) - end + context 'when the repository has joined a pool' do + let!(:pool) { create(:pool_repository, :ready) } + let(:project) { pool.source_project } - it 'returns silently' do - expect(repository).not_to receive(:expire_branches_cache).and_call_original - expect(repository).not_to receive(:branch_names).and_call_original - expect(repository).not_to receive(:has_visible_content?).and_call_original + it 'ensures the repositories are linked' do + expect(project.pool_repository).to receive(:link_repository).once subject.perform(*params) end end - context 'with no active lease' do - let(:params) { [project.id] } + context 'LFS object garbage collection' do + let_it_be(:lfs_reference) { create(:lfs_objects_project, project: project) } + let(:lfs_object) { lfs_reference.lfs_object } before do - allow(subject).to receive(:get_lease_uuid).and_return(false) + stub_lfs_setting(enabled: true) end - context 'when is able to get the lease' do - before do - allow(subject).to receive(:try_obtain_lease).and_return(SecureRandom.uuid) + it 'cleans up unreferenced LFS objects' do + expect_next_instance_of(Gitlab::Cleanup::OrphanLfsFileReferences) do |svc| + expect(svc.project).to eq(project) + expect(svc.dry_run).to be_falsy + expect(svc).to receive(:run!).and_call_original end - it_behaves_like 'it calls Gitaly' - it_behaves_like 'it updates the project statistics' - - it "flushes ref caches when the task if 'gc'" do - expect(subject).to receive(:get_lease_uuid).with("git_gc:#{task}:#{project.id}").and_return(false) - expect(repository).to receive(:expire_branches_cache).and_call_original - expect(repository).to receive(:branch_names).and_call_original - expect(repository).to receive(:has_visible_content?).and_call_original - expect(repository.raw_repository).to receive(:has_visible_content?).and_call_original - - subject.perform(*params) - end - - context 'when the repository has joined a pool' do - let!(:pool) { create(:pool_repository, :ready) } - let(:project) { pool.source_project } - - it 'ensures the repositories are linked' do - expect(project.pool_repository).to receive(:link_repository).once - - subject.perform(*params) - end - end - - context 'LFS object garbage collection' do - before do - stub_lfs_setting(enabled: true) - end - - let_it_be(:lfs_reference) { create(:lfs_objects_project, project: project) } - let(:lfs_object) { lfs_reference.lfs_object } - - it 'cleans up unreferenced LFS objects' do - expect_next_instance_of(Gitlab::Cleanup::OrphanLfsFileReferences) do |svc| - expect(svc.project).to eq(project) - expect(svc.dry_run).to be_falsy - expect(svc).to receive(:run!).and_call_original - end - - subject.perform(*params) - - expect(project.lfs_objects.reload).not_to include(lfs_object) - end - - it 'catches and logs exceptions' do - allow_next_instance_of(Gitlab::Cleanup::OrphanLfsFileReferences) do |svc| - allow(svg).to receive(:run!).and_raise(/Failed/) - end - - expect(Gitlab::GitLogger).to receive(:warn) - expect(Gitlab::ErrorTracking).to receive(:track_and_raise_for_dev_exception) - - subject.perform(*params) - end - - it 'does nothing if the database is read-only' do - allow(Gitlab::Database).to receive(:read_only?) { true } - expect(Gitlab::Cleanup::OrphanLfsFileReferences).not_to receive(:new) - - subject.perform(*params) + subject.perform(*params) - expect(project.lfs_objects.reload).to include(lfs_object) - end - end + expect(project.lfs_objects.reload).not_to include(lfs_object) end - context 'when no lease can be obtained' do - it 'returns silently' do - expect(subject).to receive(:try_obtain_lease).and_return(false) - - expect(subject).not_to receive(:command) - expect(repository).not_to receive(:expire_branches_cache).and_call_original - expect(repository).not_to receive(:branch_names).and_call_original - expect(repository).not_to receive(:has_visible_content?).and_call_original - - subject.perform(*params) + it 'catches and logs exceptions' do + allow_next_instance_of(Gitlab::Cleanup::OrphanLfsFileReferences) do |svc| + allow(svg).to receive(:run!).and_raise(/Failed/) end - end - end - context 'repack_full' do - let(:task) { :full_repack } - let(:gitaly_task) { :repack_full } - - before do - expect(subject).to receive(:get_lease_uuid).and_return(lease_uuid) - end - - it_behaves_like 'it calls Gitaly' - it_behaves_like 'it updates the project statistics' - end - - context 'pack_refs' do - let(:task) { :pack_refs } - let(:gitaly_task) { :pack_refs } - - before do - expect(subject).to receive(:get_lease_uuid).and_return(lease_uuid) - end - - it 'calls Gitaly' do - repository_service = instance_double(Gitlab::GitalyClient::RefService) - - expect(subject).to receive(:get_gitaly_client).with(task, repository.raw_repository).and_return(repository_service) - expect(repository_service).to receive(gitaly_task) + expect(Gitlab::GitLogger).to receive(:warn) + expect(Gitlab::ErrorTracking).to receive(:track_and_raise_for_dev_exception) subject.perform(*params) end - it 'does not update the project statistics' do - expect(Projects::UpdateStatisticsService).not_to receive(:new) + it 'does nothing if the database is read-only' do + allow(Gitlab::Database).to receive(:read_only?) { true } + expect(Gitlab::Cleanup::OrphanLfsFileReferences).not_to receive(:new) subject.perform(*params) - end - end - - context 'repack_incremental' do - let(:task) { :incremental_repack } - let(:gitaly_task) { :repack_incremental } - - before do - expect(subject).to receive(:get_lease_uuid).and_return(lease_uuid) - end - - it_behaves_like 'it calls Gitaly' - it_behaves_like 'it updates the project statistics' - end - - shared_examples 'gc tasks' do - before do - allow(subject).to receive(:get_lease_uuid).and_return(lease_uuid) - allow(subject).to receive(:bitmaps_enabled?).and_return(bitmaps_enabled) - end - - it 'incremental repack adds a new packfile' do - create_objects(project) - before_packs = packs(project) - - expect(before_packs.count).to be >= 1 - - subject.perform(project.id, 'incremental_repack', lease_key, lease_uuid) - after_packs = packs(project) - - # Exactly one new pack should have been created - expect(after_packs.count).to eq(before_packs.count + 1) - - # Previously existing packs are still around - expect(before_packs & after_packs).to eq(before_packs) - end - - it 'full repack consolidates into 1 packfile' do - create_objects(project) - subject.perform(project.id, 'incremental_repack', lease_key, lease_uuid) - before_packs = packs(project) - - expect(before_packs.count).to be >= 2 - - subject.perform(project.id, 'full_repack', lease_key, lease_uuid) - after_packs = packs(project) - - expect(after_packs.count).to eq(1) - - # Previously existing packs should be gone now - expect(after_packs - before_packs).to eq(after_packs) - - expect(File.exist?(bitmap_path(after_packs.first))).to eq(bitmaps_enabled) - end - - it 'gc consolidates into 1 packfile and updates packed-refs' do - create_objects(project) - before_packs = packs(project) - before_packed_refs = packed_refs(project) - expect(before_packs.count).to be >= 1 - - # It's quite difficult to use `expect_next_instance_of` in this place - # because the RepositoryService is instantiated several times to do - # some repository calls like `exists?`, `create_repository`, ... . - # Therefore, since we're instantiating the object several times, - # RSpec has troubles figuring out which instance is the next and which - # one we want to mock. - # Besides, at this point, we actually want to perform the call to Gitaly, - # otherwise we would just use `instance_double` like in other parts of the - # spec file. - expect_any_instance_of(Gitlab::GitalyClient::RepositoryService) # rubocop:disable RSpec/AnyInstanceOf - .to receive(:garbage_collect) - .with(bitmaps_enabled, prune: false) - .and_call_original - - subject.perform(project.id, 'gc', lease_key, lease_uuid) - after_packed_refs = packed_refs(project) - after_packs = packs(project) - - expect(after_packs.count).to eq(1) - - # Previously existing packs should be gone now - expect(after_packs - before_packs).to eq(after_packs) - - # The packed-refs file should have been updated during 'git gc' - expect(before_packed_refs).not_to eq(after_packed_refs) - - expect(File.exist?(bitmap_path(after_packs.first))).to eq(bitmaps_enabled) - end - - it 'cleans up repository after finishing' do - expect(project).to receive(:cleanup).and_call_original - - subject.perform(project.id, 'gc', lease_key, lease_uuid) - end - - it 'prune calls garbage_collect with the option prune: true' do - repository_service = instance_double(Gitlab::GitalyClient::RepositoryService) - - expect(subject).to receive(:get_gitaly_client).with(:prune, repository.raw_repository).and_return(repository_service) - expect(repository_service).to receive(:garbage_collect).with(bitmaps_enabled, prune: true) - - subject.perform(project.id, 'prune', lease_key, lease_uuid) + expect(project.lfs_objects.reload).to include(lfs_object) end end - - context 'with bitmaps enabled' do - let(:bitmaps_enabled) { true } - - include_examples 'gc tasks' - end - - context 'with bitmaps disabled' do - let(:bitmaps_enabled) { false } - - include_examples 'gc tasks' - end - end - - # Create a new commit on a random new branch - def create_objects(project) - rugged = rugged_repo(project.repository) - old_commit = rugged.branches.first.target - new_commit_sha = Rugged::Commit.create( - rugged, - message: "hello world #{SecureRandom.hex(6)}", - author: { email: 'foo@bar', name: 'baz' }, - committer: { email: 'foo@bar', name: 'baz' }, - tree: old_commit.tree, - parents: [old_commit] - ) - rugged.references.create("refs/heads/#{SecureRandom.hex(6)}", new_commit_sha) - end - - def packs(project) - Dir["#{path_to_repo}/objects/pack/*.pack"] - end - - def packed_refs(project) - path = File.join(path_to_repo, 'packed-refs') - FileUtils.touch(path) - File.read(path) - end - - def path_to_repo - @path_to_repo ||= File.join(TestEnv.repos_path, project.repository.relative_path) - end - - def bitmap_path(pack) - pack.sub(/\.pack\z/, '.bitmap') end end diff --git a/spec/workers/wikis/git_garbage_collect_worker_spec.rb b/spec/workers/wikis/git_garbage_collect_worker_spec.rb new file mode 100644 index 00000000000..77c2e49a83a --- /dev/null +++ b/spec/workers/wikis/git_garbage_collect_worker_spec.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Wikis::GitGarbageCollectWorker do + it_behaves_like 'can collect git garbage' do + let_it_be(:resource) { create(:project_wiki) } + let_it_be(:page) { create(:wiki_page, wiki: resource) } + + let(:statistics_service_klass) { Projects::UpdateStatisticsService } + let(:statistics_keys) { [:wiki_size] } + let(:expected_default_lease) { "project_wikis:#{resource.id}" } + end +end -- 2.30.9