Commit fb790bf6 authored by James Fargher's avatar James Fargher

Set gitaly-backup to create incremental backups

Incremental backups require a different filesystem layout.
In `gitaly-backup` we do this using `-layout pointer`. This writes files
called `LATEST` that point to the latest full backup and the latest
increment.

Note that in the future we will likely need to use `-id` but for now we
are only using it to make tests repeatable.
parent e53b3f95
...@@ -9,10 +9,13 @@ module Backup ...@@ -9,10 +9,13 @@ module Backup
# @param [StringIO] progress IO interface to output progress # @param [StringIO] progress IO interface to output progress
# @param [Integer] max_parallelism max parallelism when running backups # @param [Integer] max_parallelism max parallelism when running backups
# @param [Integer] storage_parallelism max parallelism per storage (is affected by max_parallelism) # @param [Integer] storage_parallelism max parallelism per storage (is affected by max_parallelism)
def initialize(progress, max_parallelism: nil, storage_parallelism: nil) # @param [String] backup_id unique identifier for the backup
def initialize(progress, max_parallelism: nil, storage_parallelism: nil, incremental: false, backup_id: nil)
@progress = progress @progress = progress
@max_parallelism = max_parallelism @max_parallelism = max_parallelism
@storage_parallelism = storage_parallelism @storage_parallelism = storage_parallelism
@incremental = incremental
@backup_id = backup_id
end end
def start(type, backup_repos_path) def start(type, backup_repos_path)
...@@ -30,6 +33,13 @@ module Backup ...@@ -30,6 +33,13 @@ module Backup
args = [] args = []
args += ['-parallel', @max_parallelism.to_s] if @max_parallelism args += ['-parallel', @max_parallelism.to_s] if @max_parallelism
args += ['-parallel-storage', @storage_parallelism.to_s] if @storage_parallelism args += ['-parallel-storage', @storage_parallelism.to_s] if @storage_parallelism
if Feature.enabled?(:incremental_repository_backup, default_enabled: :yaml)
args += ['-layout', 'pointer']
if type == :create
args += ['-incremental'] if @incremental
args += ['-id', @backup_id] if @backup_id
end
end
@input_stream, stdout, @thread = Open3.popen2(build_env, bin_path, command, '-path', backup_repos_path, *args) @input_stream, stdout, @thread = Open3.popen2(build_env, bin_path, command, '-path', backup_repos_path, *args)
......
...@@ -21,6 +21,7 @@ module Backup ...@@ -21,6 +21,7 @@ module Backup
max_concurrency = ENV.fetch('GITLAB_BACKUP_MAX_CONCURRENCY', 1).to_i max_concurrency = ENV.fetch('GITLAB_BACKUP_MAX_CONCURRENCY', 1).to_i
max_storage_concurrency = ENV.fetch('GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY', 1).to_i max_storage_concurrency = ENV.fetch('GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY', 1).to_i
force = ENV['force'] == 'yes' force = ENV['force'] == 'yes'
incremental = Gitlab::Utils.to_boolean(ENV['INCREMENTAL'], default: false)
@definitions = definitions || { @definitions = definitions || {
'db' => TaskDefinition.new( 'db' => TaskDefinition.new(
...@@ -32,7 +33,7 @@ module Backup ...@@ -32,7 +33,7 @@ module Backup
destination_path: 'repositories', destination_path: 'repositories',
destination_optional: true, destination_optional: true,
task: Repositories.new(progress, task: Repositories.new(progress,
strategy: repository_backup_strategy, strategy: repository_backup_strategy(incremental),
max_concurrency: max_concurrency, max_concurrency: max_concurrency,
max_storage_concurrency: max_storage_concurrency) max_storage_concurrency: max_storage_concurrency)
), ),
...@@ -481,11 +482,11 @@ module Backup ...@@ -481,11 +482,11 @@ module Backup
Gitlab.config.backup.upload.connection&.provider&.downcase == 'google' Gitlab.config.backup.upload.connection&.provider&.downcase == 'google'
end end
def repository_backup_strategy def repository_backup_strategy(incremental)
if Feature.enabled?(:gitaly_backup, default_enabled: :yaml) if Feature.enabled?(:gitaly_backup, default_enabled: :yaml)
max_concurrency = ENV['GITLAB_BACKUP_MAX_CONCURRENCY'].presence max_concurrency = ENV['GITLAB_BACKUP_MAX_CONCURRENCY'].presence
max_storage_concurrency = ENV['GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY'].presence max_storage_concurrency = ENV['GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY'].presence
Backup::GitalyBackup.new(progress, max_parallelism: max_concurrency, storage_parallelism: max_storage_concurrency) Backup::GitalyBackup.new(progress, incremental: incremental, max_parallelism: max_concurrency, storage_parallelism: max_storage_concurrency)
else else
Backup::GitalyRpcBackup.new(progress) Backup::GitalyRpcBackup.new(progress)
end end
......
...@@ -6,6 +6,7 @@ RSpec.describe Backup::GitalyBackup do ...@@ -6,6 +6,7 @@ RSpec.describe Backup::GitalyBackup do
let(:max_parallelism) { nil } let(:max_parallelism) { nil }
let(:storage_parallelism) { nil } let(:storage_parallelism) { nil }
let(:destination) { File.join(Gitlab.config.backup.path, 'repositories') } let(:destination) { File.join(Gitlab.config.backup.path, 'repositories') }
let(:backup_id) { '20220101' }
let(:progress) do let(:progress) do
Tempfile.new('progress').tap do |progress| Tempfile.new('progress').tap do |progress|
...@@ -24,7 +25,7 @@ RSpec.describe Backup::GitalyBackup do ...@@ -24,7 +25,7 @@ RSpec.describe Backup::GitalyBackup do
progress.close progress.close
end end
subject { described_class.new(progress, max_parallelism: max_parallelism, storage_parallelism: storage_parallelism) } subject { described_class.new(progress, max_parallelism: max_parallelism, storage_parallelism: storage_parallelism, backup_id: backup_id) }
context 'unknown' do context 'unknown' do
it 'fails to start unknown' do it 'fails to start unknown' do
...@@ -41,7 +42,7 @@ RSpec.describe Backup::GitalyBackup do ...@@ -41,7 +42,7 @@ RSpec.describe Backup::GitalyBackup do
project_snippet = create(:project_snippet, :repository, project: project) project_snippet = create(:project_snippet, :repository, project: project)
personal_snippet = create(:personal_snippet, :repository, author: project.first_owner) personal_snippet = create(:personal_snippet, :repository, author: project.first_owner)
expect(Open3).to receive(:popen2).with(expected_env, anything, 'create', '-path', anything).and_call_original expect(Open3).to receive(:popen2).with(expected_env, anything, 'create', '-path', anything, '-layout', 'pointer', '-id', backup_id).and_call_original
subject.start(:create, destination) subject.start(:create, destination)
subject.enqueue(project, Gitlab::GlRepository::PROJECT) subject.enqueue(project, Gitlab::GlRepository::PROJECT)
...@@ -51,18 +52,18 @@ RSpec.describe Backup::GitalyBackup do ...@@ -51,18 +52,18 @@ RSpec.describe Backup::GitalyBackup do
subject.enqueue(project_snippet, Gitlab::GlRepository::SNIPPET) subject.enqueue(project_snippet, Gitlab::GlRepository::SNIPPET)
subject.finish! subject.finish!
expect(File).to exist(File.join(destination, project.disk_path + '.bundle')) expect(File).to exist(File.join(destination, project.disk_path, backup_id, '001.bundle'))
expect(File).to exist(File.join(destination, project.disk_path + '.wiki.bundle')) expect(File).to exist(File.join(destination, project.disk_path + '.wiki', backup_id, '001.bundle'))
expect(File).to exist(File.join(destination, project.disk_path + '.design.bundle')) expect(File).to exist(File.join(destination, project.disk_path + '.design', backup_id, '001.bundle'))
expect(File).to exist(File.join(destination, personal_snippet.disk_path + '.bundle')) expect(File).to exist(File.join(destination, personal_snippet.disk_path, backup_id, '001.bundle'))
expect(File).to exist(File.join(destination, project_snippet.disk_path + '.bundle')) expect(File).to exist(File.join(destination, project_snippet.disk_path, backup_id, '001.bundle'))
end end
context 'parallel option set' do context 'parallel option set' do
let(:max_parallelism) { 3 } let(:max_parallelism) { 3 }
it 'passes parallel option through' do it 'passes parallel option through' do
expect(Open3).to receive(:popen2).with(expected_env, anything, 'create', '-path', anything, '-parallel', '3').and_call_original expect(Open3).to receive(:popen2).with(expected_env, anything, 'create', '-path', anything, '-parallel', '3', '-layout', 'pointer', '-id', backup_id).and_call_original
subject.start(:create, destination) subject.start(:create, destination)
subject.finish! subject.finish!
...@@ -73,7 +74,7 @@ RSpec.describe Backup::GitalyBackup do ...@@ -73,7 +74,7 @@ RSpec.describe Backup::GitalyBackup do
let(:storage_parallelism) { 3 } let(:storage_parallelism) { 3 }
it 'passes parallel option through' do it 'passes parallel option through' do
expect(Open3).to receive(:popen2).with(expected_env, anything, 'create', '-path', anything, '-parallel-storage', '3').and_call_original expect(Open3).to receive(:popen2).with(expected_env, anything, 'create', '-path', anything, '-parallel-storage', '3', '-layout', 'pointer', '-id', backup_id).and_call_original
subject.start(:create, destination) subject.start(:create, destination)
subject.finish! subject.finish!
...@@ -86,6 +87,36 @@ RSpec.describe Backup::GitalyBackup do ...@@ -86,6 +87,36 @@ RSpec.describe Backup::GitalyBackup do
subject.start(:create, destination) subject.start(:create, destination)
expect { subject.finish! }.to raise_error(::Backup::Error, 'gitaly-backup exit status 1') expect { subject.finish! }.to raise_error(::Backup::Error, 'gitaly-backup exit status 1')
end end
context 'feature flag incremental_repository_backup disabled' do
before do
stub_feature_flags(incremental_repository_backup: false)
end
it 'creates repository bundles', :aggregate_failures do
# Add data to the wiki, design repositories, and snippets, so they will be included in the dump.
create(:wiki_page, container: project)
create(:design, :with_file, issue: create(:issue, project: project))
project_snippet = create(:project_snippet, :repository, project: project)
personal_snippet = create(:personal_snippet, :repository, author: project.first_owner)
expect(Open3).to receive(:popen2).with(expected_env, anything, 'create', '-path', anything).and_call_original
subject.start(:create, destination)
subject.enqueue(project, Gitlab::GlRepository::PROJECT)
subject.enqueue(project, Gitlab::GlRepository::WIKI)
subject.enqueue(project, Gitlab::GlRepository::DESIGN)
subject.enqueue(personal_snippet, Gitlab::GlRepository::SNIPPET)
subject.enqueue(project_snippet, Gitlab::GlRepository::SNIPPET)
subject.finish!
expect(File).to exist(File.join(destination, project.disk_path + '.bundle'))
expect(File).to exist(File.join(destination, project.disk_path + '.wiki.bundle'))
expect(File).to exist(File.join(destination, project.disk_path + '.design.bundle'))
expect(File).to exist(File.join(destination, personal_snippet.disk_path + '.bundle'))
expect(File).to exist(File.join(destination, project_snippet.disk_path + '.bundle'))
end
end
end end
context 'hashed storage' do context 'hashed storage' do
...@@ -113,7 +144,7 @@ RSpec.describe Backup::GitalyBackup do ...@@ -113,7 +144,7 @@ RSpec.describe Backup::GitalyBackup do
end end
it 'passes through SSL envs' do it 'passes through SSL envs' do
expect(Open3).to receive(:popen2).with(ssl_env, anything, 'create', '-path', anything).and_call_original expect(Open3).to receive(:popen2).with(ssl_env, anything, 'create', '-path', anything, '-layout', 'pointer', '-id', backup_id).and_call_original
subject.start(:create, destination) subject.start(:create, destination)
subject.finish! subject.finish!
...@@ -138,7 +169,7 @@ RSpec.describe Backup::GitalyBackup do ...@@ -138,7 +169,7 @@ RSpec.describe Backup::GitalyBackup do
copy_bundle_to_backup_path('personal_snippet_repo.bundle', personal_snippet.disk_path + '.bundle') copy_bundle_to_backup_path('personal_snippet_repo.bundle', personal_snippet.disk_path + '.bundle')
copy_bundle_to_backup_path('project_snippet_repo.bundle', project_snippet.disk_path + '.bundle') copy_bundle_to_backup_path('project_snippet_repo.bundle', project_snippet.disk_path + '.bundle')
expect(Open3).to receive(:popen2).with(expected_env, anything, 'restore', '-path', anything).and_call_original expect(Open3).to receive(:popen2).with(expected_env, anything, 'restore', '-path', anything, '-layout', 'pointer').and_call_original
subject.start(:restore, destination) subject.start(:restore, destination)
subject.enqueue(project, Gitlab::GlRepository::PROJECT) subject.enqueue(project, Gitlab::GlRepository::PROJECT)
...@@ -150,18 +181,18 @@ RSpec.describe Backup::GitalyBackup do ...@@ -150,18 +181,18 @@ RSpec.describe Backup::GitalyBackup do
collect_commit_shas = -> (repo) { repo.commits('master', limit: 10).map(&:sha) } collect_commit_shas = -> (repo) { repo.commits('master', limit: 10).map(&:sha) }
expect(collect_commit_shas.call(project.repository)).to eq(['393a7d860a5a4c3cc736d7eb00604e3472bb95ec']) expect(collect_commit_shas.call(project.repository)).to match_array(['393a7d860a5a4c3cc736d7eb00604e3472bb95ec'])
expect(collect_commit_shas.call(project.wiki.repository)).to eq(['c74b9948d0088d703ee1fafeddd9ed9add2901ea']) expect(collect_commit_shas.call(project.wiki.repository)).to match_array(['c74b9948d0088d703ee1fafeddd9ed9add2901ea'])
expect(collect_commit_shas.call(project.design_repository)).to eq(['c3cd4d7bd73a51a0f22045c3a4c871c435dc959d']) expect(collect_commit_shas.call(project.design_repository)).to match_array(['c3cd4d7bd73a51a0f22045c3a4c871c435dc959d'])
expect(collect_commit_shas.call(personal_snippet.repository)).to eq(['3b3c067a3bc1d1b695b51e2be30c0f8cf698a06e']) expect(collect_commit_shas.call(personal_snippet.repository)).to match_array(['3b3c067a3bc1d1b695b51e2be30c0f8cf698a06e'])
expect(collect_commit_shas.call(project_snippet.repository)).to eq(['6e44ba56a4748be361a841e759c20e421a1651a1']) expect(collect_commit_shas.call(project_snippet.repository)).to match_array(['6e44ba56a4748be361a841e759c20e421a1651a1'])
end end
context 'parallel option set' do context 'parallel option set' do
let(:max_parallelism) { 3 } let(:max_parallelism) { 3 }
it 'passes parallel option through' do it 'passes parallel option through' do
expect(Open3).to receive(:popen2).with(expected_env, anything, 'restore', '-path', anything, '-parallel', '3').and_call_original expect(Open3).to receive(:popen2).with(expected_env, anything, 'restore', '-path', anything, '-parallel', '3', '-layout', 'pointer').and_call_original
subject.start(:restore, destination) subject.start(:restore, destination)
subject.finish! subject.finish!
...@@ -172,13 +203,45 @@ RSpec.describe Backup::GitalyBackup do ...@@ -172,13 +203,45 @@ RSpec.describe Backup::GitalyBackup do
let(:storage_parallelism) { 3 } let(:storage_parallelism) { 3 }
it 'passes parallel option through' do it 'passes parallel option through' do
expect(Open3).to receive(:popen2).with(expected_env, anything, 'restore', '-path', anything, '-parallel-storage', '3').and_call_original expect(Open3).to receive(:popen2).with(expected_env, anything, 'restore', '-path', anything, '-parallel-storage', '3', '-layout', 'pointer').and_call_original
subject.start(:restore, destination) subject.start(:restore, destination)
subject.finish! subject.finish!
end end
end end
context 'feature flag incremental_repository_backup disabled' do
before do
stub_feature_flags(incremental_repository_backup: false)
end
it 'restores from repository bundles', :aggregate_failures do
copy_bundle_to_backup_path('project_repo.bundle', project.disk_path + '.bundle')
copy_bundle_to_backup_path('wiki_repo.bundle', project.disk_path + '.wiki.bundle')
copy_bundle_to_backup_path('design_repo.bundle', project.disk_path + '.design.bundle')
copy_bundle_to_backup_path('personal_snippet_repo.bundle', personal_snippet.disk_path + '.bundle')
copy_bundle_to_backup_path('project_snippet_repo.bundle', project_snippet.disk_path + '.bundle')
expect(Open3).to receive(:popen2).with(expected_env, anything, 'restore', '-path', anything).and_call_original
subject.start(:restore, destination)
subject.enqueue(project, Gitlab::GlRepository::PROJECT)
subject.enqueue(project, Gitlab::GlRepository::WIKI)
subject.enqueue(project, Gitlab::GlRepository::DESIGN)
subject.enqueue(personal_snippet, Gitlab::GlRepository::SNIPPET)
subject.enqueue(project_snippet, Gitlab::GlRepository::SNIPPET)
subject.finish!
collect_commit_shas = -> (repo) { repo.commits('master', limit: 10).map(&:sha) }
expect(collect_commit_shas.call(project.repository)).to match_array(['393a7d860a5a4c3cc736d7eb00604e3472bb95ec'])
expect(collect_commit_shas.call(project.wiki.repository)).to match_array(['c74b9948d0088d703ee1fafeddd9ed9add2901ea'])
expect(collect_commit_shas.call(project.design_repository)).to match_array(['c3cd4d7bd73a51a0f22045c3a4c871c435dc959d'])
expect(collect_commit_shas.call(personal_snippet.repository)).to match_array(['3b3c067a3bc1d1b695b51e2be30c0f8cf698a06e'])
expect(collect_commit_shas.call(project_snippet.repository)).to match_array(['6e44ba56a4748be361a841e759c20e421a1651a1'])
end
end
it 'raises when the exit code not zero' do it 'raises when the exit code not zero' do
expect(subject).to receive(:bin_path).and_return(Gitlab::Utils.which('false')) expect(subject).to receive(:bin_path).and_return(Gitlab::Utils.which('false'))
......
...@@ -176,8 +176,8 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do ...@@ -176,8 +176,8 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do
expect(exit_status).to eq(0) expect(exit_status).to eq(0)
expect(tar_contents).to match(user_backup_path) expect(tar_contents).to match(user_backup_path)
expect(tar_contents).to match("#{user_backup_path}/custom_hooks.tar") expect(tar_contents).to match("#{user_backup_path}/.+/001.custom_hooks.tar")
expect(tar_contents).to match("#{user_backup_path}.bundle") expect(tar_contents).to match("#{user_backup_path}/.+/001.bundle")
end end
it 'restores files correctly' do it 'restores files correctly' do
...@@ -360,14 +360,14 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do ...@@ -360,14 +360,14 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do
expect(exit_status).to eq(0) expect(exit_status).to eq(0)
[ [
"#{project_a.disk_path}.bundle", "#{project_a.disk_path}/.+/001.bundle",
"#{project_a.disk_path}.wiki.bundle", "#{project_a.disk_path}.wiki/.+/001.bundle",
"#{project_a.disk_path}.design.bundle", "#{project_a.disk_path}.design/.+/001.bundle",
"#{project_b.disk_path}.bundle", "#{project_b.disk_path}/.+/001.bundle",
"#{project_snippet_a.disk_path}.bundle", "#{project_snippet_a.disk_path}/.+/001.bundle",
"#{project_snippet_b.disk_path}.bundle" "#{project_snippet_b.disk_path}/.+/001.bundle"
].each do |repo_name| ].each do |repo_name|
expect(tar_lines.grep(/#{repo_name}/).size).to eq 1 expect(tar_lines).to include(a_string_matching(repo_name))
end end
end end
...@@ -428,7 +428,7 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do ...@@ -428,7 +428,7 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do
expect(::Backup::Repositories).to receive(:new) expect(::Backup::Repositories).to receive(:new)
.with(anything, strategy: anything, max_concurrency: 5, max_storage_concurrency: 2) .with(anything, strategy: anything, max_concurrency: 5, max_storage_concurrency: 2)
.and_call_original .and_call_original
expect(::Backup::GitalyBackup).to receive(:new).with(anything, max_parallelism: 5, storage_parallelism: 2).and_call_original expect(::Backup::GitalyBackup).to receive(:new).with(anything, max_parallelism: 5, storage_parallelism: 2, incremental: false).and_call_original
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment