Commit ee7ce05c authored by Will Chandler's avatar Will Chandler Committed by Markus Koller

Exclude tmp dirs from backups

Add '--exclude' for tmp directories in located in paths included in
backups.  Currently a backup job may fail due to a file or directory
within a tmp dir being removed during the backup.

This change adds a new 'exclude' optional parameter to Backup::Files
to indicate directories that should not be included in backups. This
should prevent changes in temporary files from breaking backups and
also stop unneeded files from being included in the backup archive.
parent 33dc1a9c
---
title: Exclude tmp dirs from backups
merge_request: 41706
author:
type: fixed
......@@ -9,7 +9,7 @@ module Backup
def initialize(progress)
@progress = progress
super('artifacts', JobArtifactUploader.root)
super('artifacts', JobArtifactUploader.root, excludes: ['tmp'])
end
end
end
......@@ -7,14 +7,17 @@ module Backup
class Files
include Backup::Helper
attr_reader :name, :app_files_dir, :backup_tarball, :files_parent_dir
DEFAULT_EXCLUDE = 'lost+found'
def initialize(name, app_files_dir)
attr_reader :name, :app_files_dir, :backup_tarball, :excludes, :files_parent_dir
def initialize(name, app_files_dir, excludes: [])
@name = name
@app_files_dir = File.realpath(app_files_dir)
@files_parent_dir = File.realpath(File.join(@app_files_dir, '..'))
@backup_files_dir = File.join(Gitlab.config.backup.path, File.basename(@app_files_dir) )
@backup_tarball = File.join(Gitlab.config.backup.path, name + '.tar.gz')
@excludes = [DEFAULT_EXCLUDE].concat(excludes)
end
# Copy files from public/files to backup/files
......@@ -23,7 +26,7 @@ module Backup
FileUtils.rm_f(backup_tarball)
if ENV['STRATEGY'] == 'copy'
cmd = %W(rsync -a --exclude=lost+found #{app_files_dir} #{Gitlab.config.backup.path})
cmd = [%w(rsync -a), exclude_dirs(:rsync), %W(#{app_files_dir} #{Gitlab.config.backup.path})].flatten
output, status = Gitlab::Popen.popen(cmd)
unless status == 0
......@@ -31,10 +34,12 @@ module Backup
raise Backup::Error, 'Backup failed'
end
run_pipeline!([%W(#{tar} --exclude=lost+found -C #{@backup_files_dir} -cf - .), gzip_cmd], out: [backup_tarball, 'w', 0600])
tar_cmd = [tar, exclude_dirs(:tar), %W(-C #{@backup_files_dir} -cf - .)].flatten
run_pipeline!([tar_cmd, gzip_cmd], out: [backup_tarball, 'w', 0600])
FileUtils.rm_rf(@backup_files_dir)
else
run_pipeline!([%W(#{tar} --exclude=lost+found -C #{app_files_dir} -cf - .), gzip_cmd], out: [backup_tarball, 'w', 0600])
tar_cmd = [tar, exclude_dirs(:tar), %W(-C #{app_files_dir} -cf - .)].flatten
run_pipeline!([tar_cmd, gzip_cmd], out: [backup_tarball, 'w', 0600])
end
end
......@@ -81,5 +86,17 @@ module Backup
error = err_r.read
raise Backup::Error, "Backup failed. #{error}" unless error =~ regex
end
def exclude_dirs(fmt)
excludes.map do |s|
if s == DEFAULT_EXCLUDE
'--exclude=' + s
elsif fmt == :rsync
'--exclude=/' + s
elsif fmt == :tar
'--exclude=./' + s
end
end
end
end
end
......@@ -9,7 +9,7 @@ module Backup
def initialize(progress)
@progress = progress
super('pages', Gitlab.config.pages.path)
super('pages', Gitlab.config.pages.path, excludes: [::Projects::UpdatePagesService::TMP_EXTRACT_PATH])
end
end
end
......@@ -9,7 +9,7 @@ module Backup
def initialize(progress)
@progress = progress
super('uploads', File.join(Gitlab.config.uploads.storage_path, "uploads"))
super('uploads', File.join(Gitlab.config.uploads.storage_path, "uploads"), excludes: ['tmp'])
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Backup::Artifacts do
let(:progress) { StringIO.new }
subject(:backup) { described_class.new(progress) }
describe '#initialize' do
it 'uses the correct upload dir' do
Dir.mktmpdir do |tmpdir|
allow(JobArtifactUploader).to receive(:root) { "#{tmpdir}" }
expect(backup.app_files_dir).to eq("#{tmpdir}")
end
end
end
describe '#dump' do
before do
allow(File).to receive(:realpath).with('/var/gitlab-artifacts').and_return('/var/gitlab-artifacts')
allow(File).to receive(:realpath).with('/var/gitlab-artifacts/..').and_return('/var')
allow(JobArtifactUploader).to receive(:root) { '/var/gitlab-artifacts' }
end
it 'uses the correct artifact dir' do
expect(backup.app_files_dir).to eq('/var/gitlab-artifacts')
end
it 'excludes tmp from backup tar' do
expect(backup).to receive(:tar).and_return('blabla-tar')
expect(backup).to receive(:run_pipeline!).with([%w(blabla-tar --exclude=lost+found --exclude=./tmp -C /var/gitlab-artifacts -cf - .), 'gzip -c -1'], any_args)
backup.dump
end
end
end
......@@ -14,6 +14,8 @@ RSpec.describe Backup::Files do
allow(File).to receive(:exist?).and_return(true)
allow(File).to receive(:realpath).with("/var/gitlab-registry").and_return("/var/gitlab-registry")
allow(File).to receive(:realpath).with("/var/gitlab-registry/..").and_return("/var")
allow(File).to receive(:realpath).with("/var/gitlab-pages").and_return("/var/gitlab-pages")
allow(File).to receive(:realpath).with("/var/gitlab-pages/..").and_return("/var")
allow_any_instance_of(String).to receive(:color) do |string, _color|
string
......@@ -82,4 +84,48 @@ RSpec.describe Backup::Files do
end
end
end
describe '#dump' do
subject { described_class.new('pages', '/var/gitlab-pages', excludes: ['@pages.tmp']) }
before do
allow(subject).to receive(:run_pipeline!).and_return(true)
end
it 'raises no errors' do
expect { subject.dump }.not_to raise_error
end
it 'excludes tmp dirs from archive' do
expect(subject).to receive(:tar).and_return('blabla-tar')
expect(subject).to receive(:run_pipeline!).with([%w(blabla-tar --exclude=lost+found --exclude=./@pages.tmp -C /var/gitlab-pages -cf - .), 'gzip -c -1'], any_args)
subject.dump
end
describe 'with STRATEGY=copy' do
before do
stub_env('STRATEGY', 'copy')
end
it 'excludes tmp dirs from rsync' do
allow(Gitlab.config.backup).to receive(:path) { '/var/gitlab-backup' }
allow(File).to receive(:realpath).with("/var/gitlab-backup").and_return("/var/gitlab-backup")
expect(Gitlab::Popen).to receive(:popen).with(%w(rsync -a --exclude=lost+found --exclude=/@pages.tmp /var/gitlab-pages /var/gitlab-backup)).and_return(['', 0])
subject.dump
end
end
describe '#exclude_dirs' do
it 'prepends a leading dot slash to tar excludes' do
expect(subject.exclude_dirs(:tar)).to eq(['--exclude=lost+found', '--exclude=./@pages.tmp'])
end
it 'prepends a leading slash to rsync excludes' do
expect(subject.exclude_dirs(:rsync)).to eq(['--exclude=lost+found', '--exclude=/@pages.tmp'])
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Backup::Pages do
let(:progress) { StringIO.new }
subject { described_class.new(progress) }
before do
allow(File).to receive(:realpath).with("/var/gitlab-pages").and_return("/var/gitlab-pages")
allow(File).to receive(:realpath).with("/var/gitlab-pages/..").and_return("/var")
end
describe '#dump' do
it 'uses the correct pages dir' do
allow(Gitlab.config.pages).to receive(:path) { '/var/gitlab-pages' }
expect(subject.app_files_dir).to eq('/var/gitlab-pages')
end
it 'excludes tmp from backup tar' do
allow(Gitlab.config.pages).to receive(:path) { '/var/gitlab-pages' }
expect(subject).to receive(:tar).and_return('blabla-tar')
expect(subject).to receive(:run_pipeline!).with([%w(blabla-tar --exclude=lost+found --exclude=./@pages.tmp -C /var/gitlab-pages -cf - .), 'gzip -c -1'], any_args)
subject.dump
end
end
end
......@@ -18,4 +18,22 @@ RSpec.describe Backup::Uploads do
end
end
end
describe '#dump' do
before do
allow(File).to receive(:realpath).with('/var/uploads').and_return('/var/uploads')
allow(File).to receive(:realpath).with('/var/uploads/..').and_return('/var')
allow(Gitlab.config.uploads).to receive(:storage_path) { '/var' }
end
it 'uses the correct upload dir' do
expect(backup.app_files_dir).to eq('/var/uploads')
end
it 'excludes tmp from backup tar' do
expect(backup).to receive(:tar).and_return('blabla-tar')
expect(backup).to receive(:run_pipeline!).with([%w(blabla-tar --exclude=lost+found --exclude=./tmp -C /var/uploads -cf - .), 'gzip -c -1'], any_args)
backup.dump
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment