Commit 6afc14d6 authored by Gabriel Mazetto's avatar Gabriel Mazetto

Ignore some empty folders during HashedStorage migration

We are also doing some extra work to prevent data-loss by instead of
removing a folder, doing a rename appending a timestamp
parent 0fcf0f0e
......@@ -16,6 +16,12 @@ module Projects
# Returns the logger currently in use
attr_reader :logger
def initialize(project:, old_disk_path:, logger: nil)
@project = project
@old_disk_path = old_disk_path
@logger = logger || Rails.logger # rubocop:disable Gitlab/RailsLogger
end
# Return whether this operation was skipped or not
#
# @return [Boolean] true if skipped of false otherwise
......@@ -23,6 +29,14 @@ module Projects
@skipped
end
# Check if target path has discardable content
#
# @param [String] new_path
# @return [Boolean] whether we can discard the target path or not
def target_path_discardable?(new_path)
false
end
protected
def move_folder!(old_path, new_path)
......@@ -34,8 +48,13 @@ module Projects
end
if File.exist?(new_path)
logger.error("Cannot move attachments from '#{old_path}' to '#{new_path}', target path already exist (PROJECT_ID=#{project.id})")
raise AttachmentCannotMoveError, "Target path '#{new_path}' already exists"
if target_path_discardable?(new_path)
discard_path!(new_path)
else
logger.error("Cannot move attachments from '#{old_path}' to '#{new_path}', target path already exist (PROJECT_ID=#{project.id})")
raise AttachmentCannotMoveError, "Target path '#{new_path}' already exists"
end
end
# Create base path folder on the new storage layout
......@@ -46,6 +65,16 @@ module Projects
true
end
# Rename a path adding a suffix in order to prevent data-loss.
#
# @param [String] new_path
def discard_path!(new_path)
discarded_path = "#{new_path}-#{Time.now.utc.to_i}"
logger.info("Moving existing empty attachments folder from '#{new_path}' to '#{discarded_path}', (PROJECT_ID=#{project.id})")
FileUtils.mv(new_path, discarded_path)
end
end
end
end
......@@ -10,7 +10,7 @@ module Projects
attr_reader :old_disk_path, :new_disk_path, :old_wiki_disk_path, :old_storage_version, :logger, :move_wiki
def initialize(project, old_disk_path, logger: nil)
def initialize(project:, old_disk_path:, logger: nil)
@project = project
@logger = logger || Gitlab::AppLogger
@old_disk_path = old_disk_path
......
......@@ -3,18 +3,19 @@
module Projects
module HashedStorage
class MigrateAttachmentsService < BaseAttachmentService
def initialize(project, old_disk_path, logger: nil)
@project = project
@logger = logger || Rails.logger # rubocop:disable Gitlab/RailsLogger
@old_disk_path = old_disk_path
extend ::Gitlab::Utils::Override
# List of paths that can be excluded while evaluation if a target can be discarded
DISCARDABLE_PATHS = %w(tmp tmp/cache tmp/work).freeze
def initialize(project:, old_disk_path:, logger: nil)
super
@skipped = false
end
def execute
origin = FileUploader.absolute_base_dir(project)
# It's possible that old_disk_path does not match project.disk_path.
# For example, that happens when we rename a project
origin.sub!(/#{Regexp.escape(project.full_path)}\z/, old_disk_path)
origin = find_old_attachments_path(project)
project.storage_version = ::Project::HASHED_STORAGE_FEATURES[:attachments]
target = FileUploader.absolute_base_dir(project)
......@@ -27,13 +28,38 @@ module Projects
project.save!(validate: false)
yield if block_given?
else
# Rollback changes
project.rollback!
end
result
end
override :target_path_discardable?
# Check if target path has discardable content
#
# @param [String] new_path
# @return [Boolean] whether we can discard the target path or not
def target_path_discardable?(new_path)
return false unless File.directory?(new_path)
found = Dir.glob(File.join(new_path, '**', '**'))
(found - discardable_paths(new_path)).empty?
end
private
def discardable_paths(new_path)
DISCARDABLE_PATHS.collect { |path| File.join(new_path, path) }
end
def find_old_attachments_path(project)
origin = FileUploader.absolute_base_dir(project)
# It's possible that old_disk_path does not match project.disk_path.
# For example, that happens when we rename a project
#
origin.sub(/#{Regexp.escape(project.full_path)}\z/, old_disk_path)
end
end
end
end
......
......@@ -14,12 +14,12 @@ module Projects
def execute
# Migrate repository from Legacy to Hashed Storage
unless project.hashed_storage?(:repository)
return false unless migrate_repository
return false unless migrate_repository_service.execute
end
# Migrate attachments from Legacy to Hashed Storage
unless project.hashed_storage?(:attachments)
return false unless migrate_attachments
return false unless migrate_attachments_service.execute
end
true
......@@ -27,12 +27,12 @@ module Projects
private
def migrate_repository
HashedStorage::MigrateRepositoryService.new(project, old_disk_path, logger: logger).execute
def migrate_repository_service
HashedStorage::MigrateRepositoryService.new(project: project, old_disk_path: old_disk_path, logger: logger)
end
def migrate_attachments
HashedStorage::MigrateAttachmentsService.new(project, old_disk_path, logger: logger).execute
def migrate_attachments_service
HashedStorage::MigrateAttachmentsService.new(project: project, old_disk_path: old_disk_path, logger: logger)
end
end
end
......
......@@ -3,14 +3,9 @@
module Projects
module HashedStorage
class RollbackAttachmentsService < BaseAttachmentService
def initialize(project, logger: nil)
@project = project
@logger = logger || Rails.logger # rubocop:disable Gitlab/RailsLogger
@old_disk_path = project.disk_path
end
def execute
origin = FileUploader.absolute_base_dir(project)
project.storage_version = ::Project::HASHED_STORAGE_FEATURES[:repository]
target = FileUploader.absolute_base_dir(project)
......
......@@ -5,32 +5,26 @@ module Projects
class RollbackService < BaseService
attr_reader :logger, :old_disk_path
def initialize(project, old_disk_path, logger: nil)
@project = project
@old_disk_path = old_disk_path
@logger = logger || Rails.logger # rubocop:disable Gitlab/RailsLogger
end
def execute
# Rollback attachments from Hashed Storage to Legacy
if project.hashed_storage?(:attachments)
return false unless rollback_attachments
return false unless rollback_attachments_service.execute
end
# Rollback repository from Hashed Storage to Legacy
if project.hashed_storage?(:repository)
rollback_repository
rollback_repository_service.execute
end
end
private
def rollback_attachments
HashedStorage::RollbackAttachmentsService.new(project, logger: logger).execute
def rollback_attachments_service
HashedStorage::RollbackAttachmentsService.new(project: project, old_disk_path: old_disk_path, logger: logger)
end
def rollback_repository
HashedStorage::RollbackRepositoryService.new(project, old_disk_path, logger: logger).execute
def rollback_repository_service
HashedStorage::RollbackRepositoryService.new(project: project, old_disk_path: old_disk_path, logger: logger)
end
end
end
......
---
title: 'Hashed Storage Migration: Handle failed attachment migrations with existing
target path'
merge_request: 19061
author:
type: fixed
......@@ -10,10 +10,12 @@ describe Projects::HashedStorage::MigrateAttachmentsService do
let(:hashed_storage) { Storage::HashedProject.new(project) }
let(:old_attachments_path) { legacy_storage.disk_path }
let(:new_attachments_path) { hashed_storage.disk_path }
let(:service) { described_class.new(project, old_attachments_path) }
set(:primary) { create(:geo_node, :primary) }
set(:secondary) { create(:geo_node) }
subject { described_class.new(project: project, old_disk_path: old_attachments_path) }
before do
stub_current_geo_node(primary)
end
......@@ -26,11 +28,11 @@ describe Projects::HashedStorage::MigrateAttachmentsService do
end
it 'returns true' do
expect(service.execute).to be_truthy
expect(subject.execute).to be_truthy
end
it 'creates a Geo::HashedStorageAttachmentsEvent' do
expect { service.execute }.to change(Geo::EventLog, :count).by(1)
expect { subject.execute }.to change(Geo::EventLog, :count).by(1)
event = Geo::EventLog.first.event
......@@ -44,12 +46,12 @@ describe Projects::HashedStorage::MigrateAttachmentsService do
context 'on failure' do
it 'does not create a Geo event when skipped' do
expect { service.execute }.not_to change { Geo::EventLog.count }
expect { subject.execute }.not_to change { Geo::EventLog.count }
end
it 'does not create a Geo event on failure' do
expect(service).to receive(:move_folder!).and_raise(::Projects::HashedStorage::AttachmentMigrationError)
expect { service.execute }.to raise_error(::Projects::HashedStorage::AttachmentMigrationError)
expect(subject).to receive(:move_folder!).and_raise(::Projects::HashedStorage::AttachmentMigrationError)
expect { subject.execute }.to raise_error(::Projects::HashedStorage::AttachmentMigrationError)
expect(Geo::EventLog.count).to eq(0)
end
end
......
......@@ -8,7 +8,8 @@ describe Projects::HashedStorage::MigrateRepositoryService do
let(:project) { create(:project, :empty_repo, :wiki_repo, :legacy_storage) }
let(:legacy_storage) { Storage::LegacyProject.new(project) }
let(:hashed_storage) { Storage::HashedProject.new(project) }
let(:service) { described_class.new(project, legacy_storage.disk_path) }
subject { described_class.new(project: project, old_disk_path: legacy_storage.disk_path) }
describe '#execute' do
set(:primary) { create(:geo_node, :primary) }
......@@ -20,7 +21,7 @@ describe Projects::HashedStorage::MigrateRepositoryService do
end
it 'creates a Geo::HashedStorageMigratedEvent on success' do
expect { service.execute }.to change(Geo::EventLog, :count).by(1)
expect { subject.execute }.to change(Geo::EventLog, :count).by(1)
event = Geo::EventLog.first.event
......@@ -39,10 +40,10 @@ describe Projects::HashedStorage::MigrateRepositoryService do
from_name = project.disk_path
to_name = hashed_storage.disk_path
allow(service).to receive(:move_repository).and_call_original
allow(service).to receive(:move_repository).with(from_name, to_name).once { false } # will disable first move only
allow(subject).to receive(:move_repository).and_call_original
allow(subject).to receive(:move_repository).with(from_name, to_name).once { false } # will disable first move only
expect { service.execute }.not_to change { Geo::EventLog.count }
expect { subject.execute }.not_to change { Geo::EventLog.count }
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Projects::HashedStorage::BaseAttachmentService do
let(:project) { create(:project, :repository, storage_version: 0, skip_disk_validation: true) }
subject(:service) { described_class.new(project: project, old_disk_path: project.full_path, logger: nil) }
describe '#old_disk_path' do
it { is_expected.to respond_to :old_disk_path }
end
describe '#new_disk_path' do
it { is_expected.to respond_to :new_disk_path }
end
describe '#skipped?' do
it { is_expected.to respond_to :skipped? }
end
describe '#target_path_discardable?' do
it 'returns false' do
expect(subject.target_path_discardable?('something/something')).to be_falsey
end
end
describe '#discard_path!' do
it 'renames target path adding a timestamp at the end' do
target_path = Dir.mktmpdir
expect(Dir.exist?(target_path)).to be_truthy
Timecop.freeze do
suffix = Time.now.utc.to_i
subject.send(:discard_path!, target_path)
expected_renamed_path = "#{target_path}-#{suffix}"
expect(Dir.exist?(target_path)).to be_falsey
expect(Dir.exist?(expected_renamed_path)).to be_truthy
end
end
end
describe '#move_folder!' do
context 'when old_path is not a directory' do
it 'adds information to the logger and returns true' do
Tempfile.create do |old_path|
new_path = "#{old_path}-new"
expect(subject.send(:move_folder!, old_path, new_path)).to be_truthy
end
end
end
end
end
......@@ -3,7 +3,7 @@
require 'spec_helper'
describe Projects::HashedStorage::MigrateAttachmentsService do
subject(:service) { described_class.new(project, project.full_path, logger: nil) }
subject(:service) { described_class.new(project: project, old_disk_path: project.full_path, logger: nil) }
let(:project) { create(:project, :repository, storage_version: 1, skip_disk_validation: true) }
let(:legacy_storage) { Storage::LegacyProject.new(project) }
......@@ -72,7 +72,23 @@ describe Projects::HashedStorage::MigrateAttachmentsService do
FileUtils.mkdir_p(base_path(hashed_storage))
end
it 'raises AttachmentCannotMoveError' do
it 'succeed when target is empty' do
expect { service.execute }.not_to raise_error
end
it 'succeed when target include only discardable items' do
Projects::HashedStorage::MigrateAttachmentsService::DISCARDABLE_PATHS.each do |path_fragment|
discardable_path = File.join(base_path(hashed_storage), path_fragment)
FileUtils.mkdir_p(discardable_path)
end
expect { service.execute }.not_to raise_error
end
it 'raises AttachmentCannotMoveError when there are non discardable items on target path' do
not_discardable_path = File.join(base_path(hashed_storage), 'something')
FileUtils.mkdir_p(not_discardable_path)
expect(FileUtils).not_to receive(:mv).with(base_path(legacy_storage), base_path(hashed_storage))
expect { service.execute }.to raise_error(Projects::HashedStorage::AttachmentCannotMoveError)
......@@ -100,6 +116,18 @@ describe Projects::HashedStorage::MigrateAttachmentsService do
end
end
context '#target_path_discardable?' do
it 'returns true when it include only items on the discardable list' do
hashed_attachments_path = File.join(base_path(hashed_storage))
Projects::HashedStorage::MigrateAttachmentsService::DISCARDABLE_PATHS.each do |path_fragment|
discardable_path = File.join(hashed_attachments_path, path_fragment)
FileUtils.mkdir_p(discardable_path)
end
expect(service.target_path_discardable?(hashed_attachments_path)).to be_truthy
end
end
def base_path(storage)
File.join(FileUploader.root, storage.disk_path)
end
......
......@@ -10,7 +10,7 @@ describe Projects::HashedStorage::MigrateRepositoryService do
let(:legacy_storage) { Storage::LegacyProject.new(project) }
let(:hashed_storage) { Storage::HashedProject.new(project) }
subject(:service) { described_class.new(project, project.disk_path) }
subject(:service) { described_class.new(project: project, old_disk_path: project.disk_path) }
describe '#execute' do
let(:old_disk_path) { legacy_storage.disk_path }
......
......@@ -10,13 +10,14 @@ describe Projects::HashedStorage::MigrationService do
describe '#execute' do
context 'repository migration' do
let(:repository_service) { Projects::HashedStorage::MigrateRepositoryService.new(project, project.full_path, logger: logger) }
let(:repository_service) do
Projects::HashedStorage::MigrateRepositoryService.new(project: project,
old_disk_path: project.full_path,
logger: logger)
end
it 'delegates migration to Projects::HashedStorage::MigrateRepositoryService' do
expect(Projects::HashedStorage::MigrateRepositoryService)
.to receive(:new)
.with(project, project.full_path, logger: logger)
.and_return(repository_service)
expect(service).to receive(:migrate_repository_service).and_return(repository_service)
expect(repository_service).to receive(:execute)
service.execute
......@@ -31,13 +32,14 @@ describe Projects::HashedStorage::MigrationService do
end
context 'attachments migration' do
let(:attachments_service) { Projects::HashedStorage::MigrateAttachmentsService.new(project, project.full_path, logger: logger) }
let(:attachments_service) do
Projects::HashedStorage::MigrateAttachmentsService.new(project: project,
old_disk_path: project.full_path,
logger: logger)
end
it 'delegates migration to Projects::HashedStorage::MigrateRepositoryService' do
expect(Projects::HashedStorage::MigrateAttachmentsService)
.to receive(:new)
.with(project, project.full_path, logger: logger)
.and_return(attachments_service)
expect(service).to receive(:migrate_attachments_service).and_return(attachments_service)
expect(attachments_service).to receive(:execute)
service.execute
......
......@@ -3,7 +3,7 @@
require 'spec_helper'
describe Projects::HashedStorage::RollbackAttachmentsService do
subject(:service) { described_class.new(project, logger: nil) }
subject(:service) { described_class.new(project: project, old_disk_path: project.disk_path, logger: nil) }
let(:project) { create(:project, :repository, skip_disk_validation: true) }
let(:legacy_storage) { Storage::LegacyProject.new(project) }
......
......@@ -10,7 +10,7 @@ describe Projects::HashedStorage::RollbackRepositoryService, :clean_gitlab_redis
let(:legacy_storage) { Storage::LegacyProject.new(project) }
let(:hashed_storage) { Storage::HashedProject.new(project) }
subject(:service) { described_class.new(project, project.disk_path) }
subject(:service) { described_class.new(project: project, old_disk_path: project.disk_path) }
describe '#execute' do
let(:old_disk_path) { hashed_storage.disk_path }
......
......@@ -6,17 +6,15 @@ describe Projects::HashedStorage::RollbackService do
let(:project) { create(:project, :empty_repo, :wiki_repo) }
let(:logger) { double }
subject(:service) { described_class.new(project, project.full_path, logger: logger) }
subject(:service) { described_class.new(project, project.disk_path, logger: logger) }
describe '#execute' do
context 'attachments rollback' do
let(:attachments_service_class) { Projects::HashedStorage::RollbackAttachmentsService }
let(:attachments_service) { attachments_service_class.new(project, logger: logger) }
let(:attachments_service) { attachments_service_class.new(project: project, old_disk_path: project.disk_path, logger: logger) }
it 'delegates rollback to Projects::HashedStorage::RollbackAttachmentsService' do
expect(attachments_service_class).to receive(:new)
.with(project, logger: logger)
.and_return(attachments_service)
expect(service).to receive(:rollback_attachments_service).and_return(attachments_service)
expect(attachments_service).to receive(:execute)
service.execute
......@@ -31,15 +29,12 @@ describe Projects::HashedStorage::RollbackService do
end
context 'repository rollback' do
let(:project) { create(:project, :empty_repo, :wiki_repo, storage_version: ::Project::HASHED_STORAGE_FEATURES[:repository]) }
let(:repository_service_class) { Projects::HashedStorage::RollbackRepositoryService }
let(:repository_service) { repository_service_class.new(project, project.full_path, logger: logger) }
let(:repository_service) { repository_service_class.new(project: project, old_disk_path: project.disk_path, logger: logger) }
it 'delegates rollback to RollbackRepositoryService' do
project.storage_version = ::Project::HASHED_STORAGE_FEATURES[:repository]
expect(repository_service_class).to receive(:new)
.with(project, project.full_path, logger: logger)
.and_return(repository_service)
expect(service).to receive(:rollback_repository_service).and_return(repository_service)
expect(repository_service).to receive(:execute)
service.execute
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment