Commit 45f991a1 authored by George Koltsov's avatar George Koltsov

Fix unintentional cleanup of Import/Export tmp files

  - ImportExportCleanUpService's primary purpose is to delete
    export archive tar.gz file from object storage after 24 hours
    as well as any not cleaned up export files from tmp
    storage location
  - There is, however, a bug that can remove other projects'
    import files from tmp storage location unintentionally
  - This is happening due to the fact that an import request
    can have a tar.gz file that is more than 24 hours old,
    and because we use `find` system call that includes all
    files that are older than 24 hours, not being scoped to a
    specific project, it will pick up and remove other projects
    import files that happen to be old, regardless if the actual
    import attempt is recent or in progress
  - Such behaviour can be disruptive to project imports
  - Instead of deleting all files that are 24 hours old, scope
    `find` command to locate old files more accurately, not taking
    files from tar.gz into consideration
  - Also add logging for additional observability

Changelog: fixed
parent d6cc7c98
......@@ -11,6 +11,9 @@ class ImportExportUpload < ApplicationRecord
mount_uploader :import_file, ImportExportUploader
mount_uploader :export_file, ImportExportUploader
scope :updated_before, ->(date) { where('updated_at < ?', date) }
scope :with_export_file, -> { where.not(export_file: nil) }
def retrieve_upload(_identifier, paths)
Upload.find_by(model: self, path: paths)
end
......
......@@ -2,6 +2,7 @@
class ImportExportCleanUpService
LAST_MODIFIED_TIME_IN_MINUTES = 1440
DIR_DEPTH = 5
attr_reader :mmin, :path
......@@ -27,15 +28,42 @@ class ImportExportCleanUpService
end
def clean_up_export_files
Gitlab::Popen.popen(%W(find #{path} -not -path #{path} -mmin +#{mmin} -delete))
old_directories do |dir|
FileUtils.remove_entry(dir)
logger.info(
message: 'Removed Import/Export tmp directory',
dir_path: dir
)
end
end
# rubocop: disable CodeReuse/ActiveRecord
def clean_up_export_object_files
ImportExportUpload.where('updated_at < ?', mmin.minutes.ago).each do |upload|
ImportExportUpload.with_export_file.updated_before(mmin.minutes.ago).each do |upload|
upload.remove_export_file!
upload.save!
logger.info(
message: 'Removed Import/Export export_file',
project_id: upload.project_id,
group_id: upload.group_id
)
end
end
def old_directories
IO.popen(directories_cmd) do |find|
find.each_line(chomp: true) do |directory|
yield directory
end
end
end
# rubocop: enable CodeReuse/ActiveRecord
def directories_cmd
%W(find #{path} -mindepth #{DIR_DEPTH} -maxdepth #{DIR_DEPTH} -type d -not -path #{path} -mmin +#{mmin})
end
def logger
@logger ||= Gitlab::Import::Logger.build
end
end
......@@ -88,7 +88,7 @@ module Gitlab
when 'Project'
@exportable.disk_path
when 'Group'
@exportable.full_path
Storage::Hashed.new(@exportable, prefix: Storage::Hashed::GROUP_REPOSITORY_PATH_PREFIX).disk_path
else
raise Gitlab::ImportExport::Error, "Unsupported Exportable Type #{@exportable&.class}"
end
......
......@@ -37,6 +37,28 @@ RSpec.describe Gitlab::ImportExport::Shared do
end
end
context 'with a group on disk' do
describe '#base_path' do
it 'uses hashed storage path' do
group = create(:group)
subject = described_class.new(group)
base_path = %(/tmp/gitlab_exports/@groups/)
expect(subject.base_path).to match(/#{base_path}\h{2}\/\h{2}\/\h{64}/)
end
end
end
context 'when exportable type is unsupported' do
describe '#base_path' do
it 'raises' do
subject = described_class.new('test')
expect { subject.base_path }.to raise_error(Gitlab::ImportExport::Error, 'Unsupported Exportable Type String')
end
end
end
describe '#error' do
let(:error) { StandardError.new('Error importing into /my/folder Permission denied @ unlink_internal - /var/opt/gitlab/gitlab-rails/shared/a/b/c/uploads/file') }
......
......@@ -24,4 +24,23 @@ RSpec.describe ImportExportUpload do
context 'export' do
it_behaves_like 'stores the Import/Export file', :export_file
end
describe 'scopes' do
let_it_be(:upload1) { create(:import_export_upload, export_file: fixture_file_upload('spec/fixtures/project_export.tar.gz')) }
let_it_be(:upload2) { create(:import_export_upload) }
let_it_be(:upload3) { create(:import_export_upload, export_file: fixture_file_upload('spec/fixtures/project_export.tar.gz'), updated_at: 25.hours.ago) }
let_it_be(:upload4) { create(:import_export_upload, updated_at: 2.days.ago) }
describe '.with_export_file' do
it 'returns uploads with export file' do
expect(described_class.with_export_file).to contain_exactly(upload1, upload3)
end
end
describe '.updated_before' do
it 'returns uploads for a specified date' do
expect(described_class.updated_before(24.hours.ago)).to contain_exactly(upload3, upload4)
end
end
end
end
......@@ -8,7 +8,13 @@ RSpec.describe ImportExportCleanUpService do
let(:tmp_import_export_folder) { 'tmp/gitlab_exports' }
context 'when the import/export directory does not exist' do
before do
allow_next_instance_of(Gitlab::Import::Logger) do |logger|
allow(logger).to receive(:info)
end
end
context 'when the import/export tmp storage directory does not exist' do
it 'does not remove any archives' do
path = '/invalid/path/'
stub_repository_downloads_path(path)
......@@ -19,49 +25,84 @@ RSpec.describe ImportExportCleanUpService do
end
end
context 'when the import/export directory exists' do
it 'removes old files' do
in_directory_with_files(mtime: 2.days.ago) do |dir, files|
service.execute
files.each { |file| expect(File.exist?(file)).to eq false }
expect(File.directory?(dir)).to eq false
context 'when the import/export tmp storage directory exists' do
shared_examples 'removes old tmp files' do |subdir|
it 'removes old files and logs' do
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger)
.to receive(:info)
.with(
message: 'Removed Import/Export tmp directory',
dir_path: anything
)
end
validate_cleanup(subdir: subdir, mtime: 2.days.ago, expected: false)
end
end
it 'does not remove new files' do
in_directory_with_files(mtime: 2.hours.ago) do |dir, files|
service.execute
it 'does not remove new files or logs' do
expect(Gitlab::Import::Logger).not_to receive(:new)
files.each { |file| expect(File.exist?(file)).to eq true }
expect(File.directory?(dir)).to eq true
validate_cleanup(subdir: subdir, mtime: 2.hours.ago, expected: true)
end
end
include_examples 'removes old tmp files', '@hashed'
include_examples 'removes old tmp files', '@groups'
end
context 'with uploader exports' do
it 'removes old files' do
it 'removes old files and logs' do
upload = create(:import_export_upload,
updated_at: 2.days.ago,
export_file: fixture_file_upload('spec/fixtures/project_export.tar.gz'))
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger)
.to receive(:info)
.with(
message: 'Removed Import/Export export_file',
project_id: upload.project_id,
group_id: upload.group_id
)
end
expect { service.execute }.to change { upload.reload.export_file.file.nil? }.to(true)
expect(ImportExportUpload.where(export_file: nil)).to include(upload)
end
it 'does not remove new files' do
it 'does not remove new files or logs' do
upload = create(:import_export_upload,
updated_at: 1.hour.ago,
export_file: fixture_file_upload('spec/fixtures/project_export.tar.gz'))
expect(Gitlab::Import::Logger).not_to receive(:new)
expect { service.execute }.not_to change { upload.reload.export_file.file.nil? }
expect(ImportExportUpload.where.not(export_file: nil)).to include(upload)
end
end
def validate_cleanup(subdir:, mtime:, expected:)
in_directory_with_files(mtime: mtime, subdir: subdir) do |dir, files|
service.execute
files.each { |file| expect(File.exist?(file)).to eq(expected) }
expect(File.directory?(dir)).to eq(expected)
end
end
def in_directory_with_files(mtime:)
def in_directory_with_files(mtime:, subdir:)
Dir.mktmpdir do |tmpdir|
stub_repository_downloads_path(tmpdir)
dir = File.join(tmpdir, tmp_import_export_folder, 'subfolder')
hashed = Digest::SHA2.hexdigest(subdir)
subdir_path = [subdir, hashed[0..1], hashed[2..3], hashed, hashed[4..10]]
dir = File.join(tmpdir, tmp_import_export_folder, *[subdir_path])
FileUtils.mkdir_p(dir)
File.utime(mtime.to_i, mtime.to_i, dir)
files = FileUtils.touch(file_list(dir) + [dir], mtime: mtime.to_time)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment