Commit c8eb8e78 authored by Luke Duncalfe's avatar Luke Duncalfe

Merge branch 'jf_extract_repo_backup' into 'master'

Extract gitaly RPC backup strategy

See merge request gitlab-org/gitlab!61544
parents 1dca5cf8 17f7bb50
......@@ -5,13 +5,6 @@ module EE
module Repositories
extend ::Gitlab::Utils::Override
override :restore
def restore
restore_group_repositories
super
end
private
override :repository_storage_klasses
......@@ -19,12 +12,6 @@ module EE
super << GroupWikiRepository
end
def restore_group_repositories
find_groups_in_batches do |group|
restore_repository(group, ::Gitlab::GlRepository::WIKI)
end
end
def group_relation
::Group.includes(:route, :owners, group_wiki_repository: :shard) # rubocop: disable CodeReuse/ActiveRecord
end
......@@ -35,29 +22,29 @@ module EE
end
end
override :dump_container
def dump_container(container)
override :enqueue_container
def enqueue_container(container)
if container.is_a?(Group)
dump_group(container)
enqueue_group(container)
else
super
end
end
def dump_group(group)
backup_repository(group, ::Gitlab::GlRepository::WIKI)
def enqueue_group(group)
strategy.enqueue(group, ::Gitlab::GlRepository::WIKI)
end
override :dump_consecutive
def dump_consecutive
dump_consecutive_groups
override :enqueue_consecutive
def enqueue_consecutive
enqueue_consecutive_groups
super
end
def dump_consecutive_groups
def enqueue_consecutive_groups
find_groups_in_batches do |group|
dump_group(group)
enqueue_group(group)
end
end
......
......@@ -3,31 +3,25 @@
require 'spec_helper'
RSpec.describe Backup::Repositories do
let(:progress) { StringIO.new }
let(:progress) { spy(:stdout) }
let(:strategy) { spy(:strategy) }
subject { described_class.new(progress) }
before do
allow(progress).to receive(:puts)
allow(progress).to receive(:print)
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:progress).and_return(progress)
end
end
subject { described_class.new(progress, strategy: strategy) }
describe '#dump' do
context 'hashed storage' do
let_it_be(:project) { create(:project, :repository) }
let_it_be(:group) { create(:group, :wiki_repo) }
it 'creates repository bundles', :aggregate_failures do
it 'calls enqueue for each repository type', :aggregate_failures do
create(:wiki_page, container: group)
subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', group.wiki.disk_path + '.bundle'))
expect(strategy).to have_received(:start).with(:create)
expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::PROJECT)
expect(strategy).to have_received(:enqueue).with(group, Gitlab::GlRepository::WIKI)
expect(strategy).to have_received(:wait)
end
end
......@@ -37,16 +31,18 @@ RSpec.describe Backup::Repositories do
it 'creates the expected number of threads' do
expect(Thread).not_to receive(:new)
expect(strategy).to receive(:start).with(:create)
groups.each do |group|
expect(subject).to receive(:dump_group).with(group).and_call_original
expect(strategy).to receive(:enqueue).with(group, Gitlab::GlRepository::WIKI)
end
expect(strategy).to receive(:wait)
subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
end
describe 'command failure' do
it 'dump_group raises an error' do
allow(subject).to receive(:dump_group).and_raise(IOError)
it 'enqueue_group raises an error' do
allow(strategy).to receive(:enqueue).with(anything, Gitlab::GlRepository::WIKI).and_raise(IOError)
expect { subject.dump(max_concurrency: 1, max_storage_concurrency: 1) }.to raise_error(IOError)
end
......@@ -76,24 +72,13 @@ RSpec.describe Backup::Repositories do
let_it_be(:project) { create(:project) }
let_it_be(:group) { create(:group) }
let(:next_path_to_bundle) do
[
Rails.root.join('spec/fixtures/lib/backup/wiki_repo.bundle'),
Rails.root.join('spec/fixtures/lib/backup/project_repo.bundle')
].to_enum
end
it 'restores repositories from bundles', :aggregate_failures do
allow_next_instance_of(described_class::BackupRestore) do |backup_restore|
allow(backup_restore).to receive(:path_to_bundle).and_return(next_path_to_bundle.next)
end
it 'calls enqueue for each repository type', :aggregate_failures do
subject.restore
collect_commit_shas = -> (repo) { repo.commits('master', limit: 10).map(&:sha) }
expect(collect_commit_shas.call(project.repository)).to eq(['393a7d860a5a4c3cc736d7eb00604e3472bb95ec'])
expect(collect_commit_shas.call(group.wiki.repository)).to eq(['c74b9948d0088d703ee1fafeddd9ed9add2901ea'])
expect(strategy).to have_received(:start).with(:restore)
expect(strategy).to have_received(:enqueue).with(project, Gitlab::GlRepository::PROJECT)
expect(strategy).to have_received(:enqueue).with(group, Gitlab::GlRepository::WIKI)
expect(strategy).to have_received(:wait)
end
end
end
# frozen_string_literal: true
module Backup
# Backup and restores repositories using the gitaly RPC
class GitalyRpcBackup
def initialize(progress)
@progress = progress
end
def start(type)
raise Error, 'already started' if @type
@type = type
case type
when :create
FileUtils.rm_rf(backup_repos_path)
FileUtils.mkdir_p(Gitlab.config.backup.path)
FileUtils.mkdir(backup_repos_path, mode: 0700)
when :restore
# no op
else
raise Error, "unknown backup type: #{type}"
end
end
def wait
@type = nil
end
def enqueue(container, repository_type)
backup_restore = BackupRestore.new(
progress,
repository_type.repository_for(container),
backup_repos_path
)
case @type
when :create
backup_restore.backup
when :restore
backup_restore.restore(always_create: repository_type.project?)
else
raise Error, 'not started'
end
end
private
attr_reader :progress
def backup_repos_path
@backup_repos_path ||= File.join(Gitlab.config.backup.path, 'repositories')
end
class BackupRestore
attr_accessor :progress, :repository, :backup_repos_path
def initialize(progress, repository, backup_repos_path)
@progress = progress
@repository = repository
@backup_repos_path = backup_repos_path
end
def backup
progress.puts " * #{display_repo_path} ... "
if repository.empty?
progress.puts " * #{display_repo_path} ... " + "[EMPTY] [SKIPPED]".color(:cyan)
return
end
FileUtils.mkdir_p(repository_backup_path)
repository.bundle_to_disk(path_to_bundle)
repository.gitaly_repository_client.backup_custom_hooks(custom_hooks_tar)
progress.puts " * #{display_repo_path} ... " + "[DONE]".color(:green)
rescue StandardError => e
progress.puts "[Failed] backing up #{display_repo_path}".color(:red)
progress.puts "Error #{e}".color(:red)
end
def restore(always_create: false)
progress.puts " * #{display_repo_path} ... "
repository.remove rescue nil
if File.exist?(path_to_bundle)
repository.create_from_bundle(path_to_bundle)
restore_custom_hooks
elsif always_create
repository.create_repository
end
progress.puts " * #{display_repo_path} ... " + "[DONE]".color(:green)
rescue StandardError => e
progress.puts "[Failed] restoring #{display_repo_path}".color(:red)
progress.puts "Error #{e}".color(:red)
end
private
def display_repo_path
"#{repository.full_path} (#{repository.disk_path})"
end
def repository_backup_path
@repository_backup_path ||= File.join(backup_repos_path, repository.disk_path)
end
def path_to_bundle
@path_to_bundle ||= File.join(backup_repos_path, repository.disk_path + '.bundle')
end
def restore_custom_hooks
return unless File.exist?(custom_hooks_tar)
repository.gitaly_repository_client.restore_custom_hooks(custom_hooks_tar)
end
def custom_hooks_tar
File.join(repository_backup_path, "custom_hooks.tar")
end
end
end
end
......@@ -4,17 +4,16 @@ require 'yaml'
module Backup
class Repositories
attr_reader :progress
def initialize(progress)
def initialize(progress, strategy: GitalyRpcBackup.new(progress))
@progress = progress
@strategy = strategy
end
def dump(max_concurrency:, max_storage_concurrency:)
prepare
strategy.start(:create)
if max_concurrency <= 1 && max_storage_concurrency <= 1
return dump_consecutive
return enqueue_consecutive
end
check_valid_storages!
......@@ -25,7 +24,7 @@ module Backup
threads = Gitlab.config.repositories.storages.keys.map do |storage|
Thread.new do
Rails.application.executor.wrap do
dump_storage(storage, semaphore, max_storage_concurrency: max_storage_concurrency)
enqueue_storage(storage, semaphore, max_storage_concurrency: max_storage_concurrency)
rescue StandardError => e
errors << e
end
......@@ -37,32 +36,24 @@ module Backup
end
raise errors.pop unless errors.empty?
ensure
strategy.wait
end
def restore
restore_project_repositories
restore_snippets
strategy.start(:restore)
enqueue_consecutive
ensure
strategy.wait
cleanup_snippets_without_repositories
restore_object_pools
end
private
def restore_project_repositories
Project.find_each(batch_size: 1000) do |project|
restore_repository(project, Gitlab::GlRepository::PROJECT)
restore_repository(project, Gitlab::GlRepository::WIKI)
restore_repository(project, Gitlab::GlRepository::DESIGN)
end
end
def restore_snippets
invalid_ids = Snippet.find_each(batch_size: 1000)
.map { |snippet| restore_snippet_repository(snippet) }
.compact
cleanup_snippets_without_repositories(invalid_ids)
end
attr_reader :progress, :strategy
def check_valid_storages!
repository_storage_klasses.each do |klass|
......@@ -76,32 +67,22 @@ module Backup
[ProjectRepository, SnippetRepository]
end
def backup_repos_path
@backup_repos_path ||= File.join(Gitlab.config.backup.path, 'repositories')
end
def prepare
FileUtils.rm_rf(backup_repos_path)
FileUtils.mkdir_p(Gitlab.config.backup.path)
FileUtils.mkdir(backup_repos_path, mode: 0700)
def enqueue_consecutive
enqueue_consecutive_projects
enqueue_consecutive_snippets
end
def dump_consecutive
dump_consecutive_projects
dump_consecutive_snippets
end
def dump_consecutive_projects
def enqueue_consecutive_projects
project_relation.find_each(batch_size: 1000) do |project|
dump_project(project)
enqueue_project(project)
end
end
def dump_consecutive_snippets
Snippet.find_each(batch_size: 1000) { |snippet| dump_snippet(snippet) }
def enqueue_consecutive_snippets
Snippet.find_each(batch_size: 1000) { |snippet| enqueue_snippet(snippet) }
end
def dump_storage(storage, semaphore, max_storage_concurrency:)
def enqueue_storage(storage, semaphore, max_storage_concurrency:)
errors = Queue.new
queue = InterlockSizedQueue.new(1)
......@@ -114,7 +95,7 @@ module Backup
end
begin
dump_container(container)
enqueue_container(container)
rescue StandardError => e
errors << e
break
......@@ -136,23 +117,23 @@ module Backup
end
end
def dump_container(container)
def enqueue_container(container)
case container
when Project
dump_project(container)
enqueue_project(container)
when Snippet
dump_snippet(container)
enqueue_snippet(container)
end
end
def dump_project(project)
backup_repository(project, Gitlab::GlRepository::PROJECT)
backup_repository(project, Gitlab::GlRepository::WIKI)
backup_repository(project, Gitlab::GlRepository::DESIGN)
def enqueue_project(project)
strategy.enqueue(project, Gitlab::GlRepository::PROJECT)
strategy.enqueue(project, Gitlab::GlRepository::WIKI)
strategy.enqueue(project, Gitlab::GlRepository::DESIGN)
end
def dump_snippet(snippet)
backup_repository(snippet, Gitlab::GlRepository::SNIPPET)
def enqueue_snippet(snippet)
strategy.enqueue(snippet, Gitlab::GlRepository::SNIPPET)
end
def enqueue_records_for_storage(storage, queue, errors)
......@@ -181,22 +162,6 @@ module Backup
Snippet.id_in(SnippetRepository.for_repository_storage(storage).select(:snippet_id))
end
def backup_repository(container, type)
BackupRestore.new(
progress,
type.repository_for(container),
backup_repos_path
).backup
end
def restore_repository(container, type)
BackupRestore.new(
progress,
type.repository_for(container),
backup_repos_path
).restore(always_create: type.project?)
end
def restore_object_pools
PoolRepository.includes(:source_project).find_each do |pool|
progress.puts " - Object pool #{pool.disk_path}..."
......@@ -214,99 +179,22 @@ module Backup
end
end
def restore_snippet_repository(snippet)
restore_repository(snippet, Gitlab::GlRepository::SNIPPET)
# Snippets without a repository should be removed because they failed to import
# due to having invalid repositories
def cleanup_snippets_without_repositories
invalid_snippets = []
Snippet.find_each(batch_size: 1000).each do |snippet|
response = Snippets::RepositoryValidationService.new(nil, snippet).execute
next if response.success?
if response.error?
snippet.repository.remove
progress.puts("Snippet #{snippet.full_path} can't be restored: #{response.message}")
snippet.id
else
nil
end
end
# Snippets without a repository should be removed because they failed to import
# due to having invalid repositories
def cleanup_snippets_without_repositories(ids)
Snippet.id_in(ids).delete_all
end
class BackupRestore
attr_accessor :progress, :repository, :backup_repos_path
def initialize(progress, repository, backup_repos_path)
@progress = progress
@repository = repository
@backup_repos_path = backup_repos_path
end
def backup
progress.puts " * #{display_repo_path} ... "
if repository.empty?
progress.puts " * #{display_repo_path} ... " + "[EMPTY] [SKIPPED]".color(:cyan)
return
invalid_snippets << snippet.id
end
FileUtils.mkdir_p(repository_backup_path)
repository.bundle_to_disk(path_to_bundle)
repository.gitaly_repository_client.backup_custom_hooks(custom_hooks_tar)
progress.puts " * #{display_repo_path} ... " + "[DONE]".color(:green)
rescue StandardError => e
progress.puts "[Failed] backing up #{display_repo_path}".color(:red)
progress.puts "Error #{e}".color(:red)
end
def restore(always_create: false)
progress.puts " * #{display_repo_path} ... "
repository.remove rescue nil
if File.exist?(path_to_bundle)
repository.create_from_bundle(path_to_bundle)
restore_custom_hooks
elsif always_create
repository.create_repository
end
progress.puts " * #{display_repo_path} ... " + "[DONE]".color(:green)
rescue StandardError => e
progress.puts "[Failed] restoring #{display_repo_path}".color(:red)
progress.puts "Error #{e}".color(:red)
end
private
def display_repo_path
"#{repository.full_path} (#{repository.disk_path})"
end
def repository_backup_path
@repository_backup_path ||= File.join(backup_repos_path, repository.disk_path)
end
def path_to_bundle
@path_to_bundle ||= File.join(backup_repos_path, repository.disk_path + '.bundle')
end
def restore_custom_hooks
return unless File.exist?(custom_hooks_tar)
repository.gitaly_repository_client.restore_custom_hooks(custom_hooks_tar)
end
def custom_hooks_tar
File.join(repository_backup_path, "custom_hooks.tar")
end
Snippet.id_in(invalid_snippets).delete_all
end
class InterlockSizedQueue < SizedQueue
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Backup::GitalyRpcBackup do
let(:progress) { spy(:stdout) }
subject { described_class.new(progress) }
after do
# make sure we do not leave behind any backup files
FileUtils.rm_rf(File.join(Gitlab.config.backup.path, 'repositories'))
end
context 'unknown' do
it 'fails to start unknown' do
expect { subject.start(:unknown) }.to raise_error(::Backup::Error, 'unknown backup type: unknown')
end
end
context 'create' do
RSpec.shared_examples 'creates a repository backup' do
it 'creates repository bundles', :aggregate_failures do
# Add data to the wiki, design repositories, and snippets, so they will be included in the dump.
create(:wiki_page, container: project)
create(:design, :with_file, issue: create(:issue, project: project))
project_snippet = create(:project_snippet, :repository, project: project)
personal_snippet = create(:personal_snippet, :repository, author: project.owner)
subject.start(:create)
subject.enqueue(project, Gitlab::GlRepository::PROJECT)
subject.enqueue(project, Gitlab::GlRepository::WIKI)
subject.enqueue(project, Gitlab::GlRepository::DESIGN)
subject.enqueue(personal_snippet, Gitlab::GlRepository::SNIPPET)
subject.enqueue(project_snippet, Gitlab::GlRepository::SNIPPET)
subject.wait
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.wiki.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.design.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', personal_snippet.disk_path + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project_snippet.disk_path + '.bundle'))
end
context 'failure' do
before do
allow_next_instance_of(Repository) do |repository|
allow(repository).to receive(:bundle_to_disk) { raise 'Fail in tests' }
end
end
it 'logs an appropriate message', :aggregate_failures do
subject.start(:create)
subject.enqueue(project, Gitlab::GlRepository::PROJECT)
subject.wait
expect(progress).to have_received(:puts).with("[Failed] backing up #{project.full_path} (#{project.disk_path})")
expect(progress).to have_received(:puts).with("Error Fail in tests")
end
end
end
context 'hashed storage' do
let_it_be(:project) { create(:project, :repository) }
it_behaves_like 'creates a repository backup'
end
context 'legacy storage' do
let_it_be(:project) { create(:project, :repository, :legacy_storage) }
it_behaves_like 'creates a repository backup'
end
end
context 'restore' do
let_it_be(:project) { create(:project, :repository) }
let_it_be(:personal_snippet) { create(:personal_snippet, author: project.owner) }
let_it_be(:project_snippet) { create(:project_snippet, project: project, author: project.owner) }
def copy_bundle_to_backup_path(bundle_name, destination)
FileUtils.mkdir_p(File.join(Gitlab.config.backup.path, 'repositories', File.dirname(destination)))
FileUtils.cp(Rails.root.join('spec/fixtures/lib/backup', bundle_name), File.join(Gitlab.config.backup.path, 'repositories', destination))
end
it 'restores from repository bundles', :aggregate_failures do
copy_bundle_to_backup_path('project_repo.bundle', project.disk_path + '.bundle')
copy_bundle_to_backup_path('wiki_repo.bundle', project.disk_path + '.wiki.bundle')
copy_bundle_to_backup_path('design_repo.bundle', project.disk_path + '.design.bundle')
copy_bundle_to_backup_path('personal_snippet_repo.bundle', personal_snippet.disk_path + '.bundle')
copy_bundle_to_backup_path('project_snippet_repo.bundle', project_snippet.disk_path + '.bundle')
subject.start(:restore)
subject.enqueue(project, Gitlab::GlRepository::PROJECT)
subject.enqueue(project, Gitlab::GlRepository::WIKI)
subject.enqueue(project, Gitlab::GlRepository::DESIGN)
subject.enqueue(personal_snippet, Gitlab::GlRepository::SNIPPET)
subject.enqueue(project_snippet, Gitlab::GlRepository::SNIPPET)
subject.wait
collect_commit_shas = -> (repo) { repo.commits('master', limit: 10).map(&:sha) }
expect(collect_commit_shas.call(project.repository)).to eq(['393a7d860a5a4c3cc736d7eb00604e3472bb95ec'])
expect(collect_commit_shas.call(project.wiki.repository)).to eq(['c74b9948d0088d703ee1fafeddd9ed9add2901ea'])
expect(collect_commit_shas.call(project.design_repository)).to eq(['c3cd4d7bd73a51a0f22045c3a4c871c435dc959d'])
expect(collect_commit_shas.call(personal_snippet.repository)).to eq(['3b3c067a3bc1d1b695b51e2be30c0f8cf698a06e'])
expect(collect_commit_shas.call(project_snippet.repository)).to eq(['6e44ba56a4748be361a841e759c20e421a1651a1'])
end
it 'cleans existing repositories', :aggregate_failures do
expect_next_instance_of(DesignManagement::Repository) do |repository|
expect(repository).to receive(:remove)
end
# 4 times = project repo + wiki repo + project_snippet repo + personal_snippet repo
expect(Repository).to receive(:new).exactly(4).times.and_wrap_original do |method, *original_args|
full_path, container, kwargs = original_args
repository = method.call(full_path, container, **kwargs)
expect(repository).to receive(:remove)
repository
end
subject.start(:restore)
subject.enqueue(project, Gitlab::GlRepository::PROJECT)
subject.enqueue(project, Gitlab::GlRepository::WIKI)
subject.enqueue(project, Gitlab::GlRepository::DESIGN)
subject.enqueue(personal_snippet, Gitlab::GlRepository::SNIPPET)
subject.enqueue(project_snippet, Gitlab::GlRepository::SNIPPET)
subject.wait
end
context 'failure' do
before do
allow_next_instance_of(Repository) do |repository|
allow(repository).to receive(:create_repository) { raise 'Fail in tests' }
allow(repository).to receive(:create_from_bundle) { raise 'Fail in tests' }
end
end
it 'logs an appropriate message', :aggregate_failures do
subject.start(:restore)
subject.enqueue(project, Gitlab::GlRepository::PROJECT)
subject.wait
expect(progress).to have_received(:puts).with("[Failed] restoring #{project.full_path} (#{project.disk_path})")
expect(progress).to have_received(:puts).with("Error Fail in tests")
end
end
end
end
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment