Commit 6de4fb41 authored by Kamil Trzciński's avatar Kamil Trzciński

Merge branch 'revert-a0d3be0c' into 'master'

Revert "Merge branch 'remove_ff_gitaly_backup' into 'master'"

See merge request gitlab-org/gitlab!70224
parents 6235833f f18609fe
---
name: gitaly_backup
introduced_by_url: https://gitlab.com/gitlab-org/gitaly/-/merge_requests/3554
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/333034
milestone: '14.0'
type: development
group: group::gitaly
default_enabled: true
...@@ -1480,8 +1480,16 @@ If this happens, examine the following: ...@@ -1480,8 +1480,16 @@ If this happens, examine the following:
### `gitaly-backup` for repository backup and restore **(FREE SELF)** ### `gitaly-backup` for repository backup and restore **(FREE SELF)**
> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/333034) in GitLab 14.2. > - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/333034) in GitLab 14.2.
> - [Deployed behind a feature flag](../user/feature_flags.md), enabled by default.
> - Recommended for production use.
> - For GitLab self-managed instances, GitLab administrators can opt to [disable it](#disable-or-enable-gitaly-backup).
There can be
[risks when disabling released features](../administration/feature_flags.md#risks-when-disabling-released-features).
Refer to this feature's version history for more details.
`gitaly-backup` is used by the backup Rake task to create and restore repository backups from Gitaly. `gitaly-backup` is used by the backup Rake task to create and restore repository backups from Gitaly.
`gitaly-backup` replaces the previous backup method that directly calls RPCs on Gitaly from GitLab.
The backup Rake task must be able to find this executable. It can be configured in Omnibus GitLab packages: The backup Rake task must be able to find this executable. It can be configured in Omnibus GitLab packages:
...@@ -1493,3 +1501,22 @@ The backup Rake task must be able to find this executable. It can be configured ...@@ -1493,3 +1501,22 @@ The backup Rake task must be able to find this executable. It can be configured
1. [Reconfigure GitLab](../administration/restart_gitlab.md#omnibus-gitlab-reconfigure) 1. [Reconfigure GitLab](../administration/restart_gitlab.md#omnibus-gitlab-reconfigure)
for the changes to take effect for the changes to take effect
#### Disable or enable `gitaly-backup`
`gitaly-backup` is under development but ready for production use.
It is deployed behind a feature flag that is **enabled by default**.
[GitLab administrators with access to the GitLab Rails console](../administration/feature_flags.md)
can opt to disable it.
To disable it:
```ruby
Feature.disable(:gitaly_backup)
```
To enable it:
```ruby
Feature.enable(:gitaly_backup)
```
...@@ -7,6 +7,11 @@ module EE ...@@ -7,6 +7,11 @@ module EE
private private
override :repository_storage_klasses
def repository_storage_klasses
super << GroupWikiRepository
end
def group_relation def group_relation
::Group.includes(:route, :owners, group_wiki_repository: :shard) # rubocop: disable CodeReuse/ActiveRecord ::Group.includes(:route, :owners, group_wiki_repository: :shard) # rubocop: disable CodeReuse/ActiveRecord
end end
...@@ -17,6 +22,15 @@ module EE ...@@ -17,6 +22,15 @@ module EE
end end
end end
override :enqueue_container
def enqueue_container(container)
if container.is_a?(Group)
enqueue_group(container)
else
super
end
end
def enqueue_group(group) def enqueue_group(group)
strategy.enqueue(group, ::Gitlab::GlRepository::WIKI) strategy.enqueue(group, ::Gitlab::GlRepository::WIKI)
end end
...@@ -33,6 +47,15 @@ module EE ...@@ -33,6 +47,15 @@ module EE
enqueue_group(group) enqueue_group(group)
end end
end end
override :records_to_enqueue
def records_to_enqueue(storage)
super << groups_in_storage(storage)
end
def groups_in_storage(storage)
group_relation.id_in(GroupWikiRepository.for_repository_storage(storage).select(:group_id))
end
end end
end end
end end
...@@ -9,8 +9,6 @@ RSpec.describe Backup::Repositories do ...@@ -9,8 +9,6 @@ RSpec.describe Backup::Repositories do
subject { described_class.new(progress, strategy: strategy) } subject { described_class.new(progress, strategy: strategy) }
describe '#dump' do describe '#dump' do
let_it_be(:groups) { create_list(:group, 5, :wiki_repo) }
context 'hashed storage' do context 'hashed storage' do
let_it_be(:project) { create(:project, :repository) } let_it_be(:project) { create(:project, :repository) }
let_it_be(:group) { create(:group, :wiki_repo) } let_it_be(:group) { create(:group, :wiki_repo) }
...@@ -18,7 +16,7 @@ RSpec.describe Backup::Repositories do ...@@ -18,7 +16,7 @@ RSpec.describe Backup::Repositories do
it 'calls enqueue for each repository type', :aggregate_failures do it 'calls enqueue for each repository type', :aggregate_failures do
create(:wiki_page, container: group) create(:wiki_page, container: group)
subject.dump subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
expect(strategy).to have_received(:start).with(:create) expect(strategy).to have_received(:start).with(:create)
expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::PROJECT) expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::PROJECT)
...@@ -27,30 +25,46 @@ RSpec.describe Backup::Repositories do ...@@ -27,30 +25,46 @@ RSpec.describe Backup::Repositories do
end end
end end
context 'command failure' do context 'no concurrency' do
it 'enqueue_group raises an error' do let_it_be(:groups) { create_list(:group, 5, :wiki_repo) }
allow(strategy).to receive(:enqueue).with(anything, Gitlab::GlRepository::WIKI).and_raise(IOError)
it 'creates the expected number of threads' do
expect(Thread).not_to receive(:new)
expect { subject.dump }.to raise_error(IOError) expect(strategy).to receive(:start).with(:create)
groups.each do |group|
expect(strategy).to receive(:enqueue).with(group, Gitlab::GlRepository::WIKI)
end
expect(strategy).to receive(:wait)
subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
end end
it 'group query raises an error' do describe 'command failure' do
allow(Group).to receive_message_chain(:includes, :find_each).and_raise(ActiveRecord::StatementTimeout) it 'enqueue_group raises an error' do
allow(strategy).to receive(:enqueue).with(anything, Gitlab::GlRepository::WIKI).and_raise(IOError)
expect { subject.dump(max_concurrency: 1, max_storage_concurrency: 1) }.to raise_error(IOError)
end
expect { subject.dump }.to raise_error(ActiveRecord::StatementTimeout) it 'group query raises an error' do
allow(Group).to receive_message_chain(:includes, :find_each).and_raise(ActiveRecord::StatementTimeout)
expect { subject.dump(max_concurrency: 1, max_storage_concurrency: 1) }.to raise_error(ActiveRecord::StatementTimeout)
end
end end
end
it 'avoids N+1 database queries' do it 'avoids N+1 database queries' do
control_count = ActiveRecord::QueryRecorder.new do control_count = ActiveRecord::QueryRecorder.new do
subject.dump subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
end.count end.count
create_list(:group, 2, :wiki_repo) create_list(:group, 2, :wiki_repo)
expect do expect do
subject.dump subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
end.not_to exceed_query_limit(control_count) end.not_to exceed_query_limit(control_count)
end
end end
end end
......
...@@ -57,6 +57,10 @@ module Backup ...@@ -57,6 +57,10 @@ module Backup
}.merge(Gitlab::GitalyClient.connection_data(repository.storage)).to_json) }.merge(Gitlab::GitalyClient.connection_data(repository.storage)).to_json)
end end
def parallel_enqueue?
false
end
private private
def started? def started?
......
# frozen_string_literal: true
module Backup
# Backup and restores repositories using the gitaly RPC
class GitalyRpcBackup
def initialize(progress)
@progress = progress
end
def start(type)
raise Error, 'already started' if @type
@type = type
case type
when :create
FileUtils.rm_rf(backup_repos_path)
FileUtils.mkdir_p(Gitlab.config.backup.path)
FileUtils.mkdir(backup_repos_path, mode: 0700)
when :restore
# no op
else
raise Error, "unknown backup type: #{type}"
end
end
def wait
@type = nil
end
def enqueue(container, repository_type)
backup_restore = BackupRestore.new(
progress,
repository_type.repository_for(container),
backup_repos_path
)
case @type
when :create
backup_restore.backup
when :restore
backup_restore.restore(always_create: repository_type.project?)
else
raise Error, 'not started'
end
end
def parallel_enqueue?
true
end
private
attr_reader :progress
def backup_repos_path
@backup_repos_path ||= File.join(Gitlab.config.backup.path, 'repositories')
end
class BackupRestore
attr_accessor :progress, :repository, :backup_repos_path
def initialize(progress, repository, backup_repos_path)
@progress = progress
@repository = repository
@backup_repos_path = backup_repos_path
end
def backup
progress.puts " * #{display_repo_path} ... "
if repository.empty?
progress.puts " * #{display_repo_path} ... " + "[EMPTY] [SKIPPED]".color(:cyan)
return
end
FileUtils.mkdir_p(repository_backup_path)
repository.bundle_to_disk(path_to_bundle)
repository.gitaly_repository_client.backup_custom_hooks(custom_hooks_tar)
progress.puts " * #{display_repo_path} ... " + "[DONE]".color(:green)
rescue StandardError => e
progress.puts "[Failed] backing up #{display_repo_path}".color(:red)
progress.puts "Error #{e}".color(:red)
end
def restore(always_create: false)
progress.puts " * #{display_repo_path} ... "
repository.remove rescue nil
if File.exist?(path_to_bundle)
repository.create_from_bundle(path_to_bundle)
restore_custom_hooks
elsif always_create
repository.create_repository
end
progress.puts " * #{display_repo_path} ... " + "[DONE]".color(:green)
rescue StandardError => e
progress.puts "[Failed] restoring #{display_repo_path}".color(:red)
progress.puts "Error #{e}".color(:red)
end
private
def display_repo_path
"#{repository.full_path} (#{repository.disk_path})"
end
def repository_backup_path
@repository_backup_path ||= File.join(backup_repos_path, repository.disk_path)
end
def path_to_bundle
@path_to_bundle ||= File.join(backup_repos_path, repository.disk_path + '.bundle')
end
def restore_custom_hooks
return unless File.exist?(custom_hooks_tar)
repository.gitaly_repository_client.restore_custom_hooks(custom_hooks_tar)
end
def custom_hooks_tar
File.join(repository_backup_path, "custom_hooks.tar")
end
end
end
end
...@@ -9,10 +9,36 @@ module Backup ...@@ -9,10 +9,36 @@ module Backup
@strategy = strategy @strategy = strategy
end end
def dump def dump(max_concurrency:, max_storage_concurrency:)
strategy.start(:create) strategy.start(:create)
enqueue_consecutive
# gitaly-backup is designed to handle concurrency on its own. So we want
# to avoid entering the buggy concurrency code here when gitaly-backup
# is enabled.
if (max_concurrency <= 1 && max_storage_concurrency <= 1) || !strategy.parallel_enqueue?
return enqueue_consecutive
end
check_valid_storages!
semaphore = Concurrent::Semaphore.new(max_concurrency)
errors = Queue.new
threads = Gitlab.config.repositories.storages.keys.map do |storage|
Thread.new do
Rails.application.executor.wrap do
enqueue_storage(storage, semaphore, max_storage_concurrency: max_storage_concurrency)
rescue StandardError => e
errors << e
end
end
end
ActiveSupport::Dependencies.interlock.permit_concurrent_loads do
threads.each(&:join)
end
raise errors.pop unless errors.empty?
ensure ensure
strategy.wait strategy.wait
end end
...@@ -32,6 +58,18 @@ module Backup ...@@ -32,6 +58,18 @@ module Backup
attr_reader :progress, :strategy attr_reader :progress, :strategy
def check_valid_storages!
repository_storage_klasses.each do |klass|
if klass.excluding_repository_storage(Gitlab.config.repositories.storages.keys).exists?
raise Error, "repositories.storages in gitlab.yml does not include all storages used by #{klass}"
end
end
end
def repository_storage_klasses
[ProjectRepository, SnippetRepository]
end
def enqueue_consecutive def enqueue_consecutive
enqueue_consecutive_projects enqueue_consecutive_projects
enqueue_consecutive_snippets enqueue_consecutive_snippets
...@@ -47,6 +85,50 @@ module Backup ...@@ -47,6 +85,50 @@ module Backup
Snippet.find_each(batch_size: 1000) { |snippet| enqueue_snippet(snippet) } Snippet.find_each(batch_size: 1000) { |snippet| enqueue_snippet(snippet) }
end end
def enqueue_storage(storage, semaphore, max_storage_concurrency:)
errors = Queue.new
queue = InterlockSizedQueue.new(1)
threads = Array.new(max_storage_concurrency) do
Thread.new do
Rails.application.executor.wrap do
while container = queue.pop
ActiveSupport::Dependencies.interlock.permit_concurrent_loads do
semaphore.acquire
end
begin
enqueue_container(container)
rescue StandardError => e
errors << e
break
ensure
semaphore.release
end
end
end
end
end
enqueue_records_for_storage(storage, queue, errors)
raise errors.pop unless errors.empty?
ensure
queue.close
ActiveSupport::Dependencies.interlock.permit_concurrent_loads do
threads.each(&:join)
end
end
def enqueue_container(container)
case container
when Project
enqueue_project(container)
when Snippet
enqueue_snippet(container)
end
end
def enqueue_project(project) def enqueue_project(project)
strategy.enqueue(project, Gitlab::GlRepository::PROJECT) strategy.enqueue(project, Gitlab::GlRepository::PROJECT)
strategy.enqueue(project, Gitlab::GlRepository::WIKI) strategy.enqueue(project, Gitlab::GlRepository::WIKI)
...@@ -57,10 +139,32 @@ module Backup ...@@ -57,10 +139,32 @@ module Backup
strategy.enqueue(snippet, Gitlab::GlRepository::SNIPPET) strategy.enqueue(snippet, Gitlab::GlRepository::SNIPPET)
end end
def enqueue_records_for_storage(storage, queue, errors)
records_to_enqueue(storage).each do |relation|
relation.find_each(batch_size: 100) do |project|
break unless errors.empty?
queue.push(project)
end
end
end
def records_to_enqueue(storage)
[projects_in_storage(storage), snippets_in_storage(storage)]
end
def projects_in_storage(storage)
project_relation.id_in(ProjectRepository.for_repository_storage(storage).select(:project_id))
end
def project_relation def project_relation
Project.includes(:route, :group, namespace: :owner) Project.includes(:route, :group, namespace: :owner)
end end
def snippets_in_storage(storage)
Snippet.id_in(SnippetRepository.for_repository_storage(storage).select(:snippet_id))
end
def restore_object_pools def restore_object_pools
PoolRepository.includes(:source_project).find_each do |pool| PoolRepository.includes(:source_project).find_each do |pool|
progress.puts " - Object pool #{pool.disk_path}..." progress.puts " - Object pool #{pool.disk_path}..."
...@@ -95,6 +199,24 @@ module Backup ...@@ -95,6 +199,24 @@ module Backup
Snippet.id_in(invalid_snippets).delete_all Snippet.id_in(invalid_snippets).delete_all
end end
class InterlockSizedQueue < SizedQueue
extend ::Gitlab::Utils::Override
override :pop
def pop(*)
ActiveSupport::Dependencies.interlock.permit_concurrent_loads do
super
end
end
override :push
def push(*)
ActiveSupport::Dependencies.interlock.permit_concurrent_loads do
super
end
end
end
end end
end end
......
...@@ -102,10 +102,19 @@ namespace :gitlab do ...@@ -102,10 +102,19 @@ namespace :gitlab do
task create: :gitlab_environment do task create: :gitlab_environment do
puts_time "Dumping repositories ...".color(:blue) puts_time "Dumping repositories ...".color(:blue)
max_concurrency = ENV.fetch('GITLAB_BACKUP_MAX_CONCURRENCY', 1).to_i
max_storage_concurrency = ENV.fetch('GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY', 1).to_i
if ENV["SKIP"] && ENV["SKIP"].include?("repositories") if ENV["SKIP"] && ENV["SKIP"].include?("repositories")
puts_time "[SKIPPED]".color(:cyan) puts_time "[SKIPPED]".color(:cyan)
elsif max_concurrency < 1 || max_storage_concurrency < 1
puts "GITLAB_BACKUP_MAX_CONCURRENCY and GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY must have a value of at least 1".color(:red)
exit 1
else else
Backup::Repositories.new(progress, strategy: repository_backup_strategy).dump Backup::Repositories.new(progress, strategy: repository_backup_strategy).dump(
max_concurrency: max_concurrency,
max_storage_concurrency: max_storage_concurrency
)
puts_time "done".color(:green) puts_time "done".color(:green)
end end
end end
...@@ -290,9 +299,13 @@ namespace :gitlab do ...@@ -290,9 +299,13 @@ namespace :gitlab do
end end
def repository_backup_strategy def repository_backup_strategy
max_concurrency = ENV['GITLAB_BACKUP_MAX_CONCURRENCY'].presence if Feature.enabled?(:gitaly_backup, default_enabled: :yaml)
max_storage_concurrency = ENV['GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY'].presence max_concurrency = ENV['GITLAB_BACKUP_MAX_CONCURRENCY'].presence
Backup::GitalyBackup.new(progress, parallel: max_concurrency, parallel_storage: max_storage_concurrency) max_storage_concurrency = ENV['GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY'].presence
Backup::GitalyBackup.new(progress, parallel: max_concurrency, parallel_storage: max_storage_concurrency)
else
Backup::GitalyRpcBackup.new(progress)
end
end end
end end
# namespace end: backup # namespace end: backup
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Backup::GitalyRpcBackup do
let(:progress) { spy(:stdout) }
subject { described_class.new(progress) }
after do
# make sure we do not leave behind any backup files
FileUtils.rm_rf(File.join(Gitlab.config.backup.path, 'repositories'))
end
context 'unknown' do
it 'fails to start unknown' do
expect { subject.start(:unknown) }.to raise_error(::Backup::Error, 'unknown backup type: unknown')
end
end
context 'create' do
RSpec.shared_examples 'creates a repository backup' do
it 'creates repository bundles', :aggregate_failures do
# Add data to the wiki, design repositories, and snippets, so they will be included in the dump.
create(:wiki_page, container: project)
create(:design, :with_file, issue: create(:issue, project: project))
project_snippet = create(:project_snippet, :repository, project: project)
personal_snippet = create(:personal_snippet, :repository, author: project.owner)
subject.start(:create)
subject.enqueue(project, Gitlab::GlRepository::PROJECT)
subject.enqueue(project, Gitlab::GlRepository::WIKI)
subject.enqueue(project, Gitlab::GlRepository::DESIGN)
subject.enqueue(personal_snippet, Gitlab::GlRepository::SNIPPET)
subject.enqueue(project_snippet, Gitlab::GlRepository::SNIPPET)
subject.wait
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.wiki.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.design.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', personal_snippet.disk_path + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project_snippet.disk_path + '.bundle'))
end
context 'failure' do
before do
allow_next_instance_of(Repository) do |repository|
allow(repository).to receive(:bundle_to_disk) { raise 'Fail in tests' }
end
end
it 'logs an appropriate message', :aggregate_failures do
subject.start(:create)
subject.enqueue(project, Gitlab::GlRepository::PROJECT)
subject.wait
expect(progress).to have_received(:puts).with("[Failed] backing up #{project.full_path} (#{project.disk_path})")
expect(progress).to have_received(:puts).with("Error Fail in tests")
end
end
end
context 'hashed storage' do
let_it_be(:project) { create(:project, :repository) }
it_behaves_like 'creates a repository backup'
end
context 'legacy storage' do
let_it_be(:project) { create(:project, :repository, :legacy_storage) }
it_behaves_like 'creates a repository backup'
end
end
context 'restore' do
let_it_be(:project) { create(:project, :repository) }
let_it_be(:personal_snippet) { create(:personal_snippet, author: project.owner) }
let_it_be(:project_snippet) { create(:project_snippet, project: project, author: project.owner) }
def copy_bundle_to_backup_path(bundle_name, destination)
FileUtils.mkdir_p(File.join(Gitlab.config.backup.path, 'repositories', File.dirname(destination)))
FileUtils.cp(Rails.root.join('spec/fixtures/lib/backup', bundle_name), File.join(Gitlab.config.backup.path, 'repositories', destination))
end
it 'restores from repository bundles', :aggregate_failures do
copy_bundle_to_backup_path('project_repo.bundle', project.disk_path + '.bundle')
copy_bundle_to_backup_path('wiki_repo.bundle', project.disk_path + '.wiki.bundle')
copy_bundle_to_backup_path('design_repo.bundle', project.disk_path + '.design.bundle')
copy_bundle_to_backup_path('personal_snippet_repo.bundle', personal_snippet.disk_path + '.bundle')
copy_bundle_to_backup_path('project_snippet_repo.bundle', project_snippet.disk_path + '.bundle')
subject.start(:restore)
subject.enqueue(project, Gitlab::GlRepository::PROJECT)
subject.enqueue(project, Gitlab::GlRepository::WIKI)
subject.enqueue(project, Gitlab::GlRepository::DESIGN)
subject.enqueue(personal_snippet, Gitlab::GlRepository::SNIPPET)
subject.enqueue(project_snippet, Gitlab::GlRepository::SNIPPET)
subject.wait
collect_commit_shas = -> (repo) { repo.commits('master', limit: 10).map(&:sha) }
expect(collect_commit_shas.call(project.repository)).to eq(['393a7d860a5a4c3cc736d7eb00604e3472bb95ec'])
expect(collect_commit_shas.call(project.wiki.repository)).to eq(['c74b9948d0088d703ee1fafeddd9ed9add2901ea'])
expect(collect_commit_shas.call(project.design_repository)).to eq(['c3cd4d7bd73a51a0f22045c3a4c871c435dc959d'])
expect(collect_commit_shas.call(personal_snippet.repository)).to eq(['3b3c067a3bc1d1b695b51e2be30c0f8cf698a06e'])
expect(collect_commit_shas.call(project_snippet.repository)).to eq(['6e44ba56a4748be361a841e759c20e421a1651a1'])
end
it 'cleans existing repositories', :aggregate_failures do
expect_next_instance_of(DesignManagement::Repository) do |repository|
expect(repository).to receive(:remove)
end
# 4 times = project repo + wiki repo + project_snippet repo + personal_snippet repo
expect(Repository).to receive(:new).exactly(4).times.and_wrap_original do |method, *original_args|
full_path, container, kwargs = original_args
repository = method.call(full_path, container, **kwargs)
expect(repository).to receive(:remove)
repository
end
subject.start(:restore)
subject.enqueue(project, Gitlab::GlRepository::PROJECT)
subject.enqueue(project, Gitlab::GlRepository::WIKI)
subject.enqueue(project, Gitlab::GlRepository::DESIGN)
subject.enqueue(personal_snippet, Gitlab::GlRepository::SNIPPET)
subject.enqueue(project_snippet, Gitlab::GlRepository::SNIPPET)
subject.wait
end
context 'failure' do
before do
allow_next_instance_of(Repository) do |repository|
allow(repository).to receive(:create_repository) { raise 'Fail in tests' }
allow(repository).to receive(:create_from_bundle) { raise 'Fail in tests' }
end
end
it 'logs an appropriate message', :aggregate_failures do
subject.start(:restore)
subject.enqueue(project, Gitlab::GlRepository::PROJECT)
subject.wait
expect(progress).to have_received(:puts).with("[Failed] restoring #{project.full_path} (#{project.disk_path})")
expect(progress).to have_received(:puts).with("Error Fail in tests")
end
end
end
end
...@@ -4,7 +4,8 @@ require 'spec_helper' ...@@ -4,7 +4,8 @@ require 'spec_helper'
RSpec.describe Backup::Repositories do RSpec.describe Backup::Repositories do
let(:progress) { spy(:stdout) } let(:progress) { spy(:stdout) }
let(:strategy) { spy(:strategy) } let(:parallel_enqueue) { true }
let(:strategy) { spy(:strategy, parallel_enqueue?: parallel_enqueue) }
subject { described_class.new(progress, strategy: strategy) } subject { described_class.new(progress, strategy: strategy) }
...@@ -16,7 +17,7 @@ RSpec.describe Backup::Repositories do ...@@ -16,7 +17,7 @@ RSpec.describe Backup::Repositories do
project_snippet = create(:project_snippet, :repository, project: project) project_snippet = create(:project_snippet, :repository, project: project)
personal_snippet = create(:personal_snippet, :repository, author: project.owner) personal_snippet = create(:personal_snippet, :repository, author: project.owner)
subject.dump subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
expect(strategy).to have_received(:start).with(:create) expect(strategy).to have_received(:start).with(:create)
expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::PROJECT) expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::PROJECT)
...@@ -40,30 +41,132 @@ RSpec.describe Backup::Repositories do ...@@ -40,30 +41,132 @@ RSpec.describe Backup::Repositories do
it_behaves_like 'creates repository bundles' it_behaves_like 'creates repository bundles'
end end
context 'command failure' do context 'no concurrency' do
it 'enqueue_project raises an error' do it 'creates the expected number of threads' do
allow(strategy).to receive(:enqueue).with(anything, Gitlab::GlRepository::PROJECT).and_raise(IOError) expect(Thread).not_to receive(:new)
expect { subject.dump }.to raise_error(IOError) expect(strategy).to receive(:start).with(:create)
projects.each do |project|
expect(strategy).to receive(:enqueue).with(project, Gitlab::GlRepository::PROJECT)
end
expect(strategy).to receive(:wait)
subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
end
describe 'command failure' do
it 'enqueue_project raises an error' do
allow(strategy).to receive(:enqueue).with(anything, Gitlab::GlRepository::PROJECT).and_raise(IOError)
expect { subject.dump(max_concurrency: 1, max_storage_concurrency: 1) }.to raise_error(IOError)
end
it 'project query raises an error' do
allow(Project).to receive_message_chain(:includes, :find_each).and_raise(ActiveRecord::StatementTimeout)
expect { subject.dump(max_concurrency: 1, max_storage_concurrency: 1) }.to raise_error(ActiveRecord::StatementTimeout)
end
end
it 'avoids N+1 database queries' do
control_count = ActiveRecord::QueryRecorder.new do
subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
end.count
create_list(:project, 2, :repository)
expect do
subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
end.not_to exceed_query_limit(control_count)
end end
end
it 'project query raises an error' do context 'concurrency with a strategy without parallel enqueueing support' do
allow(Project).to receive_message_chain(:includes, :find_each).and_raise(ActiveRecord::StatementTimeout) let(:parallel_enqueue) { false }
expect { subject.dump }.to raise_error(ActiveRecord::StatementTimeout) it 'enqueues all projects sequentially' do
expect(Thread).not_to receive(:new)
expect(strategy).to receive(:start).with(:create)
projects.each do |project|
expect(strategy).to receive(:enqueue).with(project, Gitlab::GlRepository::PROJECT)
end
expect(strategy).to receive(:wait)
subject.dump(max_concurrency: 2, max_storage_concurrency: 2)
end end
end end
it 'avoids N+1 database queries' do [4, 10].each do |max_storage_concurrency|
control_count = ActiveRecord::QueryRecorder.new do context "max_storage_concurrency #{max_storage_concurrency}", quarantine: 'https://gitlab.com/gitlab-org/gitlab/-/issues/241701' do
subject.dump let(:storage_keys) { %w[default test_second_storage] }
end.count
before do
allow(Gitlab.config.repositories.storages).to receive(:keys).and_return(storage_keys)
end
create_list(:project, 2, :repository) it 'creates the expected number of threads' do
expect(Thread).to receive(:new)
.exactly(storage_keys.length * (max_storage_concurrency + 1)).times
.and_call_original
expect do expect(strategy).to receive(:start).with(:create)
subject.dump projects.each do |project|
end.not_to exceed_query_limit(control_count) expect(strategy).to receive(:enqueue).with(project, Gitlab::GlRepository::PROJECT)
end
expect(strategy).to receive(:wait)
subject.dump(max_concurrency: 1, max_storage_concurrency: max_storage_concurrency)
end
it 'creates the expected number of threads with extra max concurrency' do
expect(Thread).to receive(:new)
.exactly(storage_keys.length * (max_storage_concurrency + 1)).times
.and_call_original
expect(strategy).to receive(:start).with(:create)
projects.each do |project|
expect(strategy).to receive(:enqueue).with(project, Gitlab::GlRepository::PROJECT)
end
expect(strategy).to receive(:wait)
subject.dump(max_concurrency: 3, max_storage_concurrency: max_storage_concurrency)
end
describe 'command failure' do
it 'enqueue_project raises an error' do
allow(strategy).to receive(:enqueue).and_raise(IOError)
expect { subject.dump(max_concurrency: 1, max_storage_concurrency: max_storage_concurrency) }.to raise_error(IOError)
end
it 'project query raises an error' do
allow(Project).to receive_message_chain(:for_repository_storage, :includes, :find_each).and_raise(ActiveRecord::StatementTimeout)
expect { subject.dump(max_concurrency: 1, max_storage_concurrency: max_storage_concurrency) }.to raise_error(ActiveRecord::StatementTimeout)
end
context 'misconfigured storages' do
let(:storage_keys) { %w[test_second_storage] }
it 'raises an error' do
expect { subject.dump(max_concurrency: 1, max_storage_concurrency: max_storage_concurrency) }.to raise_error(Backup::Error, 'repositories.storages in gitlab.yml is misconfigured')
end
end
end
it 'avoids N+1 database queries' do
control_count = ActiveRecord::QueryRecorder.new do
subject.dump(max_concurrency: 1, max_storage_concurrency: max_storage_concurrency)
end.count
create_list(:project, 2, :repository)
expect do
subject.dump(max_concurrency: 1, max_storage_concurrency: max_storage_concurrency)
end.not_to exceed_query_limit(control_count)
end
end
end end
end end
......
...@@ -383,10 +383,30 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do ...@@ -383,10 +383,30 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do
create(:project, :repository) create(:project, :repository)
end end
it 'has defaults' do
expect_next_instance_of(::Backup::Repositories) do |instance|
expect(instance).to receive(:dump)
.with(max_concurrency: 1, max_storage_concurrency: 1)
.and_call_original
end
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
end
it 'passes through concurrency environment variables' do it 'passes through concurrency environment variables' do
# The way concurrency is handled will change with the `gitaly_backup`
# feature flag. For now we need to check that both ways continue to
# work. This will be cleaned up in the rollout issue.
# See https://gitlab.com/gitlab-org/gitlab/-/issues/333034
stub_env('GITLAB_BACKUP_MAX_CONCURRENCY', 5) stub_env('GITLAB_BACKUP_MAX_CONCURRENCY', 5)
stub_env('GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY', 2) stub_env('GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY', 2)
expect_next_instance_of(::Backup::Repositories) do |instance|
expect(instance).to receive(:dump)
.with(max_concurrency: 5, max_storage_concurrency: 2)
.and_call_original
end
expect(::Backup::GitalyBackup).to receive(:new).with(anything, parallel: 5, parallel_storage: 2).and_call_original expect(::Backup::GitalyBackup).to receive(:new).with(anything, parallel: 5, parallel_storage: 2).and_call_original
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment