Commit 131c34d1 authored by Michael Kozono's avatar Michael Kozono

Merge branch '10586-geo-file-replication-refactor' into 'master'

Geo: Refactor File Replication to allow for Object Storage

Closes #10586

See merge request gitlab-org/gitlab!15606
parents 821c7f63 183b4233
......@@ -91,7 +91,7 @@ module UploadsActions
upload_paths = uploader.upload_paths(params[:filename])
upload = Upload.find_by(model: model, uploader: uploader_class.to_s, path: upload_paths)
upload&.build_uploader
upload&.retrieve_uploader
end
# rubocop: enable CodeReuse/ActiveRecord
......
......@@ -15,7 +15,7 @@ class Upload < ApplicationRecord
scope :with_files_stored_remotely, -> { where(store: ObjectStorage::Store::REMOTE) }
before_save :calculate_checksum!, if: :foreground_checksummable?
after_commit :schedule_checksum, if: :checksummable?
after_commit :schedule_checksum, if: :needs_checksum?
# as the FileUploader is not mounted, the default CarrierWave ActiveRecord
# hooks are not executed and the file will not be deleted
......@@ -53,20 +53,41 @@ class Upload < ApplicationRecord
def calculate_checksum!
self.checksum = nil
return unless checksummable?
return unless needs_checksum?
self.checksum = Digest::SHA256.file(absolute_path).hexdigest
end
# Initialize the associated Uploader class with current model
#
# @param [String] mounted_as
# @return [GitlabUploader] one of the subclasses, defined at the model's uploader attribute
def build_uploader(mounted_as = nil)
uploader_class.new(model, mounted_as || mount_point).tap do |uploader|
uploader.upload = self
end
end
# Initialize the associated Uploader class with current model and
# retrieve existing file from the store to a local cache
#
# @param [String] mounted_as
# @return [GitlabUploader] one of the subclasses, defined at the model's uploader attribute
def retrieve_uploader(mounted_as = nil)
build_uploader(mounted_as).tap do |uploader|
uploader.retrieve_from_store!(identifier)
end
end
# This checks for existence of the upload on storage
#
# @return [Boolean] whether upload exists on storage
def exist?
exist = File.exist?(absolute_path)
exist = if local?
File.exist?(absolute_path)
else
retrieve_uploader.exists?
end
# Help sysadmins find missing upload files
if persisted? && !exist
......@@ -91,18 +112,24 @@ class Upload < ApplicationRecord
store == ObjectStorage::Store::LOCAL
end
# Returns whether generating checksum is needed
#
# This takes into account whether file exists, if any checksum exists
# or if the storage has checksum generation code implemented
#
# @return [Boolean] whether generating a checksum is needed
def needs_checksum?
checksum.nil? && local? && exist?
end
private
def delete_file!
build_uploader.remove!
end
def checksummable?
checksum.nil? && local? && exist?
retrieve_uploader.remove!
end
def foreground_checksummable?
checksummable? && size <= CHECKSUM_THRESHOLD
needs_checksum? && size <= CHECKSUM_THRESHOLD
end
def schedule_checksum
......
......@@ -19,7 +19,7 @@ class AvatarUploader < GitlabUploader
end
def absolute_path
self.class.absolute_path(model.avatar.upload)
self.class.absolute_path(upload)
end
private
......
......@@ -99,6 +99,17 @@ class GitlabUploader < CarrierWave::Uploader::Base
end
end
# Used to replace an existing upload with another +file+ without modifying stored metadata
# Use this method only to repair/replace an existing upload, or to upload to a Geo secondary node
#
# @param [CarrierWave::SanitizedFile] file that will replace existing upload
# @return CarrierWave::SanitizedFile
def replace_file_without_saving!(file)
raise ArgumentError, 'should be a CarrierWave::SanitizedFile' unless file.is_a? CarrierWave::SanitizedFile
storage.store!(file)
end
private
# Designed to be overridden by child uploaders that have a dynamic path
......
......@@ -12,7 +12,7 @@ class ImportIssuesCsvWorker
@project = Project.find(project_id)
@upload = Upload.find(upload_id)
importer = Issues::ImportCsvService.new(@user, @project, @upload.build_uploader)
importer = Issues::ImportCsvService.new(@user, @project, @upload.retrieve_uploader)
importer.execute
@upload.destroy
......
......@@ -22,7 +22,7 @@ module ObjectStorage
def build_uploader(subject, mount_point)
case subject
when Upload then subject.build_uploader(mount_point)
when Upload then subject.retrieve_uploader(mount_point)
else
subject.send(mount_point) # rubocop:disable GitlabSecurity/PublicSend
end
......
......@@ -119,7 +119,7 @@ module ObjectStorage
end
def build_uploaders(uploads)
uploads.map { |upload| upload.build_uploader(@mounted_as) }
uploads.map { |upload| upload.retrieve_uploader(@mounted_as) }
end
def migrate(uploads)
......
......@@ -79,7 +79,7 @@ module Geo
when :job_artifact
Ci::JobArtifact.find(object_db_id).file
when *Gitlab::Geo::Replication::USER_UPLOADS_OBJECT_TYPES
Upload.find(object_db_id).build_uploader
Upload.find(object_db_id).retrieve_uploader
else
raise NameError, "Unrecognized type: #{object_type}"
end
......
......@@ -35,7 +35,7 @@ module API
if response[:code] == :ok
file = response[:file]
present_disk_file!(file.path, file.filename)
present_carrierwave_file!(file)
else
error! response, response.delete(:code)
end
......
......@@ -6,52 +6,97 @@ module Gitlab
class BaseTransfer
include LogHelpers
attr_reader :file_type, :file_id, :filename, :expected_checksum, :request_data
attr_reader :file_type, :file_id, :filename, :uploader, :expected_checksum, :request_data
TEMP_PREFIX = 'tmp_'.freeze
def initialize(file_type, file_id, filename, expected_checksum, request_data)
def initialize(file_type:, file_id:, request_data:, expected_checksum: nil, filename: nil, uploader: nil)
@file_type = file_type
@file_id = file_id
@filename = filename
@uploader = uploader
@expected_checksum = expected_checksum
@request_data = request_data
end
# Return whether the transfer will be attempted or not
#
# @return [Boolean] whether preconditions for a transfer are fulfilled
def can_transfer?
unless Gitlab::Geo.secondary?
log_error('Skipping transfer as this is not a Secondary node')
return false
end
unless Gitlab::Geo.primary_node
log_error 'Skipping transfer as there is no Primary node to download from'
return false
end
if filename && File.directory?(filename)
log_error 'Skipping transfer as destination exist and is a directory', filename: filename
return false
end
true
end
# @return [String] URL to download the resource from
def resource_url
Gitlab::Geo.primary_node.geo_transfers_url(file_type, file_id.to_s)
end
# Returns Result object with success boolean and number of bytes downloaded.
def download_from_primary
return failure unless Gitlab::Geo.secondary?
return failure if File.directory?(filename)
return skipped_result unless can_transfer?
primary = Gitlab::Geo.primary_node
unless ensure_destination_path_exists
log_error 'Skipping transfer as we cannot create the destination directory'
return failure unless primary
return skipped_result
end
url = primary.geo_transfers_url(file_type, file_id.to_s)
req_headers = TransferRequest.new(request_data).headers
return failure unless ensure_path_exists
download_file(resource_url, req_headers)
end
def stream_from_primary_to_object_storage
return skipped_result unless can_transfer?
download_file(url, req_headers)
req_headers = TransferRequest.new(request_data).headers
transfer_file_to_object_storage(resource_url, req_headers)
end
class Result
attr_reader :success, :bytes_downloaded, :primary_missing_file
attr_reader :success, :bytes_downloaded, :primary_missing_file, :skipped
def initialize(success:, bytes_downloaded:, primary_missing_file: false)
def initialize(success:, bytes_downloaded:, primary_missing_file: false, skipped: false)
@success = success
@bytes_downloaded = bytes_downloaded
@primary_missing_file = primary_missing_file
@skipped = skipped
end
end
private
def failure(bytes_downloaded: 0, primary_missing_file: false)
def skipped_result
Result.new(success: false, bytes_downloaded: 0, skipped: true)
end
def failure_result(bytes_downloaded: 0, primary_missing_file: false)
Result.new(success: false, bytes_downloaded: bytes_downloaded, primary_missing_file: primary_missing_file)
end
def ensure_path_exists
# Ensure entire destination path exist or try to create when not available
#
# @return [Boolean] whether destination path exists or could be created
def ensure_destination_path_exists
path = Pathname.new(filename)
dir = path.dirname
......@@ -60,50 +105,60 @@ module Gitlab
begin
FileUtils.mkdir_p(dir)
rescue => e
log_error("unable to create directory #{dir}: #{e}")
log_error("Unable to create directory #{dir}: #{e}")
return false
end
true
end
# Use Gitlab::HTTP for now but switch to curb if performance becomes
# an issue
# Download file from informed URL using HTTP.rb
#
# @return [Result] Object with transfer status and information
def download_file(url, req_headers)
file_size = -1
temp_file = open_temp_file(filename)
return failure unless temp_file
return failure_result unless temp_file
begin
response = Gitlab::HTTP.get(url, allow_local_requests: true, headers: req_headers, stream_body: true) do |fragment|
temp_file.write(fragment)
end
# Make the request
response = ::HTTP.get(url, headers: req_headers)
temp_file.flush
# Check for failures
unless response.status.success?
log_error("Unsuccessful download", filename: filename, status_code: response.status.code, reason: response.status.reason, url: url)
unless response.success?
log_error("Unsuccessful download", filename: filename, response_code: response.code, response_msg: response.try(:msg), url: url)
return failure(primary_missing_file: primary_missing_file?(response, temp_file))
return failure_result(primary_missing_file: primary_missing_file?(response))
end
if File.directory?(filename)
log_error("Destination file is a directory", filename: filename)
return failure
# Stream to temporary file on disk
response.body.each do |chunk|
temp_file.write(chunk)
end
# Make sure file is written to the disk
# This is required to get correct file size.
temp_file.flush
file_size = File.stat(temp_file.path).size
# Check for checksum mismatch
if checksum_mismatch?(temp_file.path)
log_error("Downloaded file checksum mismatch", expected_checksum: expected_checksum, actual_checksum: @actual_checksum, file_size_bytes: file_size)
return failure(bytes_downloaded: file_size)
return failure_result(bytes_downloaded: file_size)
end
# Move transferred file to the target location
FileUtils.mv(temp_file.path, filename)
log_info("Successful downloaded", filename: filename, file_size_bytes: file_size)
rescue StandardError, Gitlab::HTTP::Error => e
log_info("Successfully downloaded", filename: filename, file_size_bytes: file_size)
rescue StandardError, ::HTTP::Error => e
log_error("Error downloading file", error: e, filename: filename, url: url)
return failure_result
ensure
temp_file.close
temp_file.unlink
......@@ -112,12 +167,57 @@ module Gitlab
Result.new(success: file_size > -1, bytes_downloaded: [file_size, 0].max)
end
def primary_missing_file?(response, temp_file)
body = File.read(temp_file.path) if File.exist?(temp_file.path)
def transfer_file_to_object_storage(url, req_headers)
file_size = -1
# Create a temporary file for Object Storage transfers
temp_file = Tempfile.new("#{TEMP_PREFIX}-#{file_type}-#{file_id}")
temp_file.chmod(default_permissions)
temp_file.binmode
return failure_result unless temp_file
begin
# Make the request
response = ::HTTP.follow.get(url, headers: req_headers)
# Check for failures
unless response.status.success?
log_error("Unsuccessful download", file_type: file_type, file_id: file_id,
status_code: response.status.code, reason: response.status.reason, url: url)
return failure_result(primary_missing_file: primary_missing_file?(response))
end
# Stream to temporary file on disk
response.body.each do |chunk|
temp_file.write(chunk)
end
if response.code == 404 && body.present?
file_size = temp_file.size
# Upload file to Object Storage
uploader.replace_file_without_saving!(CarrierWave::SanitizedFile.new(temp_file))
log_info("Successfully transferred", file_type: file_type, file_id: file_id,
file_size_bytes: file_size)
rescue => e
log_error("Error transferring file", error: e, file_type: file_type, file_id: file_id, url: url)
return failure_result
ensure
temp_file.close
temp_file.unlink
end
Result.new(success: file_size > -1, bytes_downloaded: [file_size, 0].max)
end
def primary_missing_file?(response)
if response.status.not_found?
begin
json_response = JSON.parse(body)
json_response = response.parse
return code_file_not_found?(json_response['geo_code'])
rescue JSON::ParserError
end
......@@ -141,8 +241,8 @@ module Gitlab
temp.chmod(default_permissions)
temp.binmode
temp
rescue StandardError => e
log_error("Error creating temporary file", error: e)
rescue => e
log_error("Error creating temporary file", error: e, filename: target_filename)
nil
end
......
......@@ -19,7 +19,14 @@ module Gitlab
return missing_on_primary if upload.model.nil?
transfer = ::Gitlab::Geo::Replication::FileTransfer.new(object_type.to_sym, upload)
Result.from_transfer_result(transfer.download_from_primary)
result = if upload.local?
transfer.download_from_primary
else
transfer.stream_from_primary_to_object_storage
end
Result.from_transfer_result(result)
end
private
......
......@@ -13,7 +13,7 @@ module Gitlab
return file_not_found(recorded_file) unless recorded_file.exist?
return error('Upload not found') unless valid?
success(CarrierWave::SanitizedFile.new(recorded_file.absolute_path))
success(recorded_file.retrieve_uploader)
end
private
......
......@@ -8,20 +8,43 @@ module Gitlab
# * Saving it in the right place on successful download
# * Returning a detailed Result object
class FileTransfer < BaseTransfer
# Initialize a transfer service for a specified Upload
#
# @param [Symbol] file_type
# @param [Upload] upload
def initialize(file_type, upload)
super(
file_type,
upload.id,
upload.absolute_path,
upload.checksum,
build_request_data(file_type, upload)
)
if upload.local?
super(local_file_attributes(file_type, upload))
else
super(remote_file_attributes(file_type, upload))
end
rescue ObjectStorage::RemoteStoreError
::Gitlab::Geo::Logger.warn "Cannot transfer a remote object."
::Gitlab::Geo::Logger.warn "Error trying to transfer a remote object as a local object."
end
private
def local_file_attributes(file_type, upload)
{
file_type: file_type,
file_id: upload.id,
filename: upload.absolute_path,
uploader: upload.retrieve_uploader,
expected_checksum: upload.checksum,
request_data: build_request_data(file_type, upload)
}
end
def remote_file_attributes(file_type, upload)
{
file_type: file_type,
file_id: upload.id,
uploader: upload.retrieve_uploader,
request_data: build_request_data(file_type, upload)
}
end
def build_request_data(file_type, upload)
{
id: upload.model_id,
......
......@@ -14,7 +14,14 @@ module Gitlab
return fail_before_transfer unless job_artifact.present?
transfer = ::Gitlab::Geo::Replication::JobArtifactTransfer.new(job_artifact)
Result.from_transfer_result(transfer.download_from_primary)
result = if job_artifact.local_store?
transfer.download_from_primary
else
transfer.stream_from_primary_to_object_storage
end
Result.from_transfer_result(result)
end
private
......
......@@ -8,18 +8,39 @@ module Gitlab
# * Saving it in the right place on successful download
# * Returning a detailed Result object
class JobArtifactTransfer < BaseTransfer
# Initialize a transfer service for a specified Ci::JobArtifact
#
# @param [Ci::JobArtifact] job_artifact
def initialize(job_artifact)
super(
:job_artifact,
job_artifact.id,
job_artifact.file.path,
job_artifact.file_sha256,
job_artifact_request_data(job_artifact)
)
if job_artifact.local_store?
super(local_job_artifact_attributes(job_artifact))
else
super(remote_job_artifact_attributes(job_artifact))
end
end
private
def local_job_artifact_attributes(job_artifact)
{
file_type: :job_artifact,
file_id: job_artifact.id,
filename: job_artifact.file.path,
uploader: job_artifact.file,
expected_checksum: job_artifact.file_sha256,
request_data: job_artifact_request_data(job_artifact)
}
end
def remote_job_artifact_attributes(job_artifact)
{
file_type: :job_artifact,
file_id: job_artifact.id,
uploader: job_artifact.file,
request_data: job_artifact_request_data(job_artifact)
}
end
def job_artifact_request_data(job_artifact)
{
id: job_artifact.id,
......
......@@ -14,7 +14,14 @@ module Gitlab
return fail_before_transfer unless lfs_object.present?
transfer = ::Gitlab::Geo::Replication::LfsTransfer.new(lfs_object)
Result.from_transfer_result(transfer.download_from_primary)
result = if lfs_object.local_store?
transfer.download_from_primary
else
transfer.stream_from_primary_to_object_storage
end
Result.from_transfer_result(result)
end
private
......
......@@ -8,18 +8,40 @@ module Gitlab
# * Saving it in the right place on successful download
# * Returning a detailed Result object
class LfsTransfer < BaseTransfer
# Initialize a transfer service for a specified LfsObject
#
# @param [LfsObject] lfs_object
def initialize(lfs_object)
super(
:lfs,
lfs_object.id,
lfs_object.file.path,
lfs_object.oid,
lfs_request_data(lfs_object)
)
if lfs_object.local_store?
super(local_lfs_object_attributes(lfs_object))
else
super(remote_lfs_object_attributes(lfs_object))
end
end
private
def local_lfs_object_attributes(lfs_object)
{
file_type: :lfs,
file_id: lfs_object.id,
filename: lfs_object.file.path,
uploader: lfs_object.file,
expected_checksum: lfs_object.oid,
request_data: lfs_request_data(lfs_object)
}
end
def remote_lfs_object_attributes(lfs_object)
{
file_type: :lfs,
file_id: lfs_object.id,
uploader: lfs_object.file,
expected_checksum: lfs_object.oid,
request_data: lfs_request_data(lfs_object)
}
end
def lfs_request_data(lfs_object)
{
checksum: lfs_object.oid,
......
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Geo::Replication::BaseTransfer do
include ::EE::GeoHelpers
set(:primary_node) { create(:geo_node, :primary) }
set(:secondary_node) { create(:geo_node) }
describe '#can_transfer?' do
subject do
described_class.new(file_type: :avatar, file_id: 1, filename: Tempfile.new,
expected_checksum: nil, request_data: nil)
end
before do
stub_current_geo_node(secondary_node)
end
context 'when not a primary node' do
it 'returns false when not a secondary node' do
expect(Gitlab::Geo).to receive(:secondary?) { false }
expect(subject.can_transfer?).to be_falsey
end
end
context 'when no primary node exists' do
it 'returns false' do
expect(Gitlab::Geo).to receive(:primary_node) { nil }
expect(subject.can_transfer?).to be_falsey
end
end
context 'when destination filename is a directory' do
it 'returns false' do
subject = described_class.new(file_type: :avatar, file_id: 1, filename: Dir::Tmpname.tmpdir,
expected_checksum: nil, request_data: nil)
expect(subject.can_transfer?).to be_falsey
end
end
context 'when no filename is informed' do
it 'returns true' do
subject = described_class.new(file_type: :avatar, file_id: 1,
expected_checksum: nil, request_data: nil)
expect(subject.can_transfer?).to be_truthy
end
end
it 'returns true when is a secondary, a primary exists and filename doesnt point to an existing directory' do
expect(subject.can_transfer?).to be_truthy
end
end
end
......@@ -43,12 +43,12 @@ describe Gitlab::Geo::Replication::FileDownloader, :geo do
def stub_geo_file_transfer(file_type, upload)
url = primary_node.geo_transfers_url(file_type, upload.id.to_s)
stub_request(:get, url).to_return(status: 200, body: upload.build_uploader.file.read, headers: {})
stub_request(:get, url).to_return(status: 200, body: upload.retrieve_uploader.file.read, headers: {})
end
def stub_geo_file_transfer_object_storage(file_type, upload)
url = primary_node.geo_transfers_url(file_type, upload.id.to_s)
stub_request(:get, url).to_return(status: 307, body: upload.build_uploader.url, headers: {})
stub_request(:get, url).to_return(status: 307, body: upload.retrieve_uploader.url, headers: {})
end
end
......@@ -9,10 +9,6 @@ describe Gitlab::Geo::Replication::FileRetriever, :geo do
context 'when the upload exists' do
let(:retriever) { described_class.new(upload.id, message) }
before do
expect(Upload).to receive(:find_by).with(id: upload.id).and_return(upload)
end
context 'when the upload has a file' do
before do
FileUtils.mkdir_p(File.dirname(upload.absolute_path))
......@@ -24,7 +20,7 @@ describe Gitlab::Geo::Replication::FileRetriever, :geo do
it 'returns the file in a success hash' do
expect(subject).to include(code: :ok, message: 'Success')
expect(subject[:file].file).to eq(upload.absolute_path)
expect(subject[:file].file.path).to eq(upload.absolute_path)
end
end
......@@ -74,19 +70,19 @@ describe Gitlab::Geo::Replication::FileRetriever, :geo do
describe '#execute' do
context 'user avatar' do
it_behaves_like "returns necessary params for sending a file from an API endpoint" do
let(:upload) { create(:upload, model: build(:user)) }
let(:upload) { create(:upload, model: create(:user)) }
end
end
context 'group avatar' do
it_behaves_like "returns necessary params for sending a file from an API endpoint" do
let(:upload) { create(:upload, model: build(:group)) }
let(:upload) { create(:upload, model: create(:group)) }
end
end
context 'project avatar' do
it_behaves_like "returns necessary params for sending a file from an API endpoint" do
let(:upload) { create(:upload, model: build(:project)) }
let(:upload) { create(:upload, model: create(:project)) }
end
end
......
......@@ -33,29 +33,27 @@ describe Gitlab::Geo::Replication::FileTransfer do
stub_current_geo_node(secondary_node)
end
context 'when the destination filename is a directory' do
it 'returns a failed result' do
expect(upload).to receive(:absolute_path).and_return('/tmp')
context 'when pre-conditions are not satisfied' do
it 'returns a skipped result' do
allow(subject).to receive(:can_transfer?) { false }
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: 0, primary_missing_file: false)
expect_result(result, success: false, skipped: true, bytes_downloaded: 0, primary_missing_file: false)
end
end
context 'when the HTTP response is successful' do
it 'returns a successful result' do
content = upload.build_uploader.file.read
content = upload.retrieve_uploader.file.read
size = content.bytesize
expect(FileUtils).to receive(:mv).with(anything, upload.absolute_path).and_call_original
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(content.to_s).and_return(response)
stub_request(:get, subject.resource_url).to_return(status: 200, body: content)
result = subject.download_from_primary
expect_result(result, success: true, bytes_downloaded: size, primary_missing_file: false)
stat = File.stat(upload.absolute_path)
expect(stat.size).to eq(size)
expect(stat.mode & 0777).to eq(0666 - File.umask)
expect(File.binread(upload.absolute_path)).to eq(content)
......@@ -65,11 +63,10 @@ describe Gitlab::Geo::Replication::FileTransfer do
context 'when the HTTP response is unsuccessful' do
context 'when the HTTP response indicates a missing file on the primary' do
it 'returns a failed result indicating primary_missing_file' do
expect(FileUtils).not_to receive(:mv).with(anything, upload.absolute_path).and_call_original
response = double(:response, success?: false, code: 404, msg: "No such file")
expect(File).to receive(:read).and_return("{\"geo_code\":\"#{Gitlab::Geo::Replication::FILE_NOT_FOUND_GEO_CODE}\"}")
expect(Gitlab::HTTP).to receive(:get).and_return(response)
stub_request(:get, subject.resource_url)
.to_return(status: 404,
headers: { content_type: 'application/json' },
body: { geo_code: Gitlab::Geo::Replication::FILE_NOT_FOUND_GEO_CODE }.to_json)
result = subject.download_from_primary
......@@ -79,10 +76,7 @@ describe Gitlab::Geo::Replication::FileTransfer do
context 'when the HTTP response does not indicate a missing file on the primary' do
it 'returns a failed result' do
expect(FileUtils).not_to receive(:mv).with(anything, upload.absolute_path).and_call_original
response = double(:response, success?: false, code: 404, msg: 'No such file')
expect(Gitlab::HTTP).to receive(:get).and_return(response)
stub_request(:get, subject.resource_url).to_return(status: 404, body: 'Not found')
result = subject.download_from_primary
......@@ -110,7 +104,8 @@ describe Gitlab::Geo::Replication::FileTransfer do
allow(FileUtils).to receive(:mkdir_p) { raise Errno::EEXIST }
expect(subject).to receive(:log_error).with("unable to create directory /foo: File exists")
expect(subject).to receive(:log_error).with("Unable to create directory /foo: File exists").once
expect(subject).to receive(:log_error).with("Skipping transfer as we cannot create the destination directory").once
result = subject.download_from_primary
expect(result.success).to eq(false)
......@@ -121,8 +116,8 @@ describe Gitlab::Geo::Replication::FileTransfer do
context 'when the checksum of the downloaded file does not match' do
it 'returns a failed result' do
bad_content = 'corrupted!!!'
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(bad_content).and_return(response)
stub_request(:get, subject.resource_url)
.to_return(status: 200, body: bad_content)
result = subject.download_from_primary
......@@ -134,8 +129,8 @@ describe Gitlab::Geo::Replication::FileTransfer do
it 'returns a successful result' do
upload.update_column(:checksum, nil)
content = 'foo'
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(content).and_return(response)
stub_request(:get, subject.resource_url)
.to_return(status: 200, body: content)
result = subject.download_from_primary
......@@ -144,7 +139,7 @@ describe Gitlab::Geo::Replication::FileTransfer do
end
end
def expect_result(result, success:, bytes_downloaded:, primary_missing_file:)
def expect_result(result, success:, bytes_downloaded:, primary_missing_file:, skipped: false)
expect(result.success).to eq(success)
expect(result.bytes_downloaded).to eq(bytes_downloaded)
expect(result.primary_missing_file).to eq(primary_missing_file)
......
......@@ -42,7 +42,7 @@ describe Gitlab::Geo::Replication::JobArtifactTransfer, :geo do
context 'when the destination filename is a directory' do
it 'returns a failed result' do
expect(job_artifact).to receive(:file).and_return(double(path: '/tmp'))
allow(job_artifact).to receive(:file).and_return(double(path: '/tmp'))
result = subject.download_from_primary
......@@ -54,14 +54,14 @@ describe Gitlab::Geo::Replication::JobArtifactTransfer, :geo do
it 'returns a successful result' do
content = job_artifact.file.read
size = content.bytesize
expect(FileUtils).to receive(:mv).with(anything, job_artifact.file.path).and_call_original
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(content.to_s).and_return(response)
stub_request(:get, subject.resource_url).to_return(status: 200, body: content)
result = subject.download_from_primary
expect_result(result, success: true, bytes_downloaded: size, primary_missing_file: false)
stat = File.stat(job_artifact.file.path)
expect(stat.size).to eq(size)
expect(stat.mode & 0777).to eq(0666 - File.umask)
expect(File.binread(job_artifact.file.path)).to eq(content)
......@@ -71,10 +71,10 @@ describe Gitlab::Geo::Replication::JobArtifactTransfer, :geo do
context 'when the HTTP response is unsuccessful' do
context 'when the HTTP response indicates a missing file on the primary' do
it 'returns a failed result indicating primary_missing_file' do
expect(FileUtils).not_to receive(:mv).with(anything, job_artifact.file.path).and_call_original
response = double(:response, success?: false, code: 404, msg: "No such file")
expect(File).to receive(:read).and_return("{\"geo_code\":\"#{Gitlab::Geo::Replication::FILE_NOT_FOUND_GEO_CODE}\"}")
expect(Gitlab::HTTP).to receive(:get).and_return(response)
stub_request(:get, subject.resource_url)
.to_return(status: 404,
headers: { content_type: 'application/json' },
body: { geo_code: Gitlab::Geo::Replication::FILE_NOT_FOUND_GEO_CODE }.to_json)
result = subject.download_from_primary
......@@ -84,9 +84,7 @@ describe Gitlab::Geo::Replication::JobArtifactTransfer, :geo do
context 'when the HTTP response does not indicate a missing file on the primary' do
it 'returns a failed result' do
expect(FileUtils).not_to receive(:mv).with(anything, job_artifact.file.path).and_call_original
response = double(:response, success?: false, code: 404, msg: 'No such file')
expect(Gitlab::HTTP).to receive(:get).and_return(response)
stub_request(:get, subject.resource_url).to_return(status: 404, body: 'Not found')
result = subject.download_from_primary
......@@ -108,11 +106,12 @@ describe Gitlab::Geo::Replication::JobArtifactTransfer, :geo do
context "invalid path" do
it 'logs an error if the destination directory could not be created' do
expect(job_artifact).to receive(:file).and_return(double(path: '/foo/bar'))
allow(job_artifact).to receive(:file).and_return(double(path: '/foo/bar'))
allow(FileUtils).to receive(:mkdir_p) { raise Errno::EEXIST }
expect(subject).to receive(:log_error).with("unable to create directory /foo: File exists")
expect(subject).to receive(:log_error).with("Unable to create directory /foo: File exists").once
expect(subject).to receive(:log_error).with("Skipping transfer as we cannot create the destination directory").once
result = subject.download_from_primary
expect(result.success).to eq(false)
......@@ -123,8 +122,8 @@ describe Gitlab::Geo::Replication::JobArtifactTransfer, :geo do
context 'when the checksum of the downloaded file does not match' do
it 'returns a failed result' do
bad_content = 'corrupted!!!'
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(bad_content).and_return(response)
stub_request(:get, subject.resource_url)
.to_return(status: 200, body: bad_content)
result = subject.download_from_primary
......@@ -136,10 +135,11 @@ describe Gitlab::Geo::Replication::JobArtifactTransfer, :geo do
it 'returns a successful result' do
artifact = create(:ci_job_artifact, :archive)
content = 'foo'
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(content).and_return(response)
transfer = described_class.new(artifact)
stub_request(:get, transfer.resource_url)
.to_return(status: 200, body: content)
result = transfer.download_from_primary
expect_result(result, success: true, bytes_downloaded: content.bytesize, primary_missing_file: false)
......
......@@ -20,7 +20,7 @@ describe Gitlab::Geo::Replication::LfsTransfer do
context 'when the destination filename is a directory' do
it 'returns a failed result' do
expect(lfs_object).to receive(:file).and_return(double(path: '/tmp'))
allow(lfs_object).to receive(:file).and_return(double(path: '/tmp'))
result = subject.download_from_primary
......@@ -32,14 +32,14 @@ describe Gitlab::Geo::Replication::LfsTransfer do
it 'returns a successful result' do
content = lfs_object.file.read
size = content.bytesize
expect(FileUtils).to receive(:mv).with(anything, lfs_object.file.path).and_call_original
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(content.to_s).and_return(response)
stub_request(:get, subject.resource_url).to_return(status: 200, body: content)
result = subject.download_from_primary
expect_result(result, success: true, bytes_downloaded: size, primary_missing_file: false)
stat = File.stat(lfs_object.file.path)
expect(stat.size).to eq(size)
expect(stat.mode & 0777).to eq(0666 - File.umask)
expect(File.binread(lfs_object.file.path)).to eq(content)
......@@ -49,10 +49,10 @@ describe Gitlab::Geo::Replication::LfsTransfer do
context 'when the HTTP response is unsuccessful' do
context 'when the HTTP response indicates a missing file on the primary' do
it 'returns a failed result indicating primary_missing_file' do
expect(FileUtils).not_to receive(:mv).with(anything, lfs_object.file.path).and_call_original
response = double(:response, success?: false, code: 404, msg: "No such file")
expect(File).to receive(:read).and_return("{\"geo_code\":\"#{Gitlab::Geo::Replication::FILE_NOT_FOUND_GEO_CODE}\"}")
expect(Gitlab::HTTP).to receive(:get).and_return(response)
stub_request(:get, subject.resource_url)
.to_return(status: 404,
headers: { content_type: 'application/json' },
body: { geo_code: Gitlab::Geo::Replication::FILE_NOT_FOUND_GEO_CODE }.to_json)
result = subject.download_from_primary
......@@ -62,9 +62,7 @@ describe Gitlab::Geo::Replication::LfsTransfer do
context 'when the HTTP response does not indicate a missing file on the primary' do
it 'returns a failed result' do
expect(FileUtils).not_to receive(:mv).with(anything, lfs_object.file.path).and_call_original
response = double(:response, success?: false, code: 404, msg: 'No such file')
expect(Gitlab::HTTP).to receive(:get).and_return(response)
stub_request(:get, subject.resource_url).to_return(status: 404, body: 'Not found')
result = subject.download_from_primary
......@@ -86,11 +84,12 @@ describe Gitlab::Geo::Replication::LfsTransfer do
context "invalid path" do
it 'logs an error if the destination directory could not be created' do
expect(lfs_object).to receive(:file).and_return(double(path: '/foo/bar'))
allow(lfs_object).to receive(:file).and_return(double(path: '/foo/bar'))
allow(FileUtils).to receive(:mkdir_p) { raise Errno::EEXIST }
expect(subject).to receive(:log_error).with("unable to create directory /foo: File exists")
expect(subject).to receive(:log_error).with("Unable to create directory /foo: File exists").once
expect(subject).to receive(:log_error).with("Skipping transfer as we cannot create the destination directory").once
result = subject.download_from_primary
expect(result.success).to eq(false)
......@@ -101,8 +100,8 @@ describe Gitlab::Geo::Replication::LfsTransfer do
context 'when the checksum of the downloaded file does not match' do
it 'returns a failed result' do
bad_content = 'corrupted!!!'
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(bad_content).and_return(response)
stub_request(:get, subject.resource_url)
.to_return(status: 200, body: bad_content)
result = subject.download_from_primary
......
......@@ -39,9 +39,10 @@ describe Geo::UploadRegistry, :geo, :geo_fdw do
describe '#file' do
it 'returns the path of the upload of a registry' do
registry = create(:geo_upload_registry, :file, :with_file)
upload = create(:upload, :with_file)
registry = create(:geo_upload_registry, :file, file_id: upload.id)
expect(registry.file).to eq('uploads/-/system/project/avatar/avatar.jpg')
expect(registry.file).to eq(upload.path)
end
it 'return "removed" message when the upload no longer exists' do
......
......@@ -120,7 +120,7 @@ describe Geo::FileRegistryRemovalService do
context 'with avatar' do
let!(:upload) { create(:user, :with_avatar).avatar.upload }
let!(:file_registry) { create(:geo_file_registry, :avatar, file_id: upload.id) }
let!(:file_path) { upload.build_uploader.file.path }
let!(:file_path) { upload.retrieve_uploader.file.path }
it_behaves_like 'removes'
......@@ -137,7 +137,7 @@ describe Geo::FileRegistryRemovalService do
context 'with attachment' do
let!(:upload) { create(:note, :with_attachment).attachment.upload }
let!(:file_registry) { create(:geo_file_registry, :attachment, file_id: upload.id) }
let!(:file_path) { upload.build_uploader.file.path }
let!(:file_path) { upload.retrieve_uploader.file.path }
it_behaves_like 'removes'
......@@ -154,7 +154,7 @@ describe Geo::FileRegistryRemovalService do
context 'with file' do
let!(:upload) { create(:user, :with_avatar).avatar.upload }
let!(:file_registry) { create(:geo_file_registry, :avatar, file_id: upload.id) }
let!(:file_path) { upload.build_uploader.file.path }
let!(:file_path) { upload.retrieve_uploader.file.path }
it_behaves_like 'removes'
......@@ -177,7 +177,7 @@ describe Geo::FileRegistryRemovalService do
end
let!(:file_registry) { create(:geo_file_registry, :namespace_file, file_id: upload.id) }
let!(:file_path) { upload.build_uploader.file.path }
let!(:file_path) { upload.retrieve_uploader.file.path }
it_behaves_like 'removes'
......@@ -199,7 +199,7 @@ describe Geo::FileRegistryRemovalService do
Upload.find_by(model: snippet, uploader: PersonalFileUploader.name)
end
let!(:file_registry) { create(:geo_file_registry, :personal_file, file_id: upload.id) }
let!(:file_path) { upload.build_uploader.file.path }
let!(:file_path) { upload.retrieve_uploader.file.path }
it_behaves_like 'removes'
......@@ -221,7 +221,7 @@ describe Geo::FileRegistryRemovalService do
Upload.find_by(model: appearance, uploader: FaviconUploader.name)
end
let!(:file_registry) { create(:geo_file_registry, :favicon, file_id: upload.id) }
let!(:file_path) { upload.build_uploader.file.path }
let!(:file_path) { upload.retrieve_uploader.file.path }
it_behaves_like 'removes'
......
......@@ -92,7 +92,7 @@ module Gitlab
def legacy_file_uploader
strong_memoize(:legacy_file_uploader) do
uploader = upload.build_uploader
uploader = upload.retrieve_uploader
uploader.retrieve_from_store!(File.basename(upload.path))
uploader
end
......
......@@ -68,7 +68,7 @@ module Gitlab
yield(@project.avatar)
else
project_uploads_except_avatar(avatar_path).find_each(batch_size: UPLOADS_BATCH_SIZE) do |upload|
yield(upload.build_uploader)
yield(upload.retrieve_uploader)
end
end
end
......
......@@ -68,7 +68,7 @@ module Gitlab
}
relation.find_each(find_params) do |upload|
clean(upload.build_uploader, dry_run: dry_run)
clean(upload.retrieve_uploader, dry_run: dry_run)
sleep sleep_time if sleep_time
rescue => err
logger.error "failed to sanitize #{upload_ref(upload)}: #{err.message}"
......
......@@ -32,7 +32,7 @@ module Gitlab
end
def remote_object_exists?(upload)
upload.build_uploader.file.exists?
upload.retrieve_uploader.file.exists?
end
end
end
......
......@@ -2,7 +2,7 @@
FactoryBot.define do
factory :upload do
model { build(:project) }
model { create(:project) }
size { 100.kilobytes }
uploader { "AvatarUploader" }
mount_point { :avatar }
......@@ -11,23 +11,27 @@ FactoryBot.define do
# we should build a mount agnostic upload by default
transient do
filename { 'myfile.jpg' }
filename { 'avatar.jpg' }
end
# this needs to comply with RecordsUpload::Concern#upload_path
path { File.join("uploads/-/system", model.class.underscore, mount_point.to_s, 'avatar.jpg') }
path do
uploader_instance = Object.const_get(uploader.to_s).new(model, mount_point)
File.join(uploader_instance.store_dir, filename)
end
trait :personal_snippet_upload do
uploader { "PersonalFileUploader" }
model { create(:personal_snippet) }
path { File.join(secret, filename) }
model { build(:personal_snippet) }
uploader { "PersonalFileUploader" }
secret { SecureRandom.hex }
mount_point { nil }
end
trait :issuable_upload do
uploader { "FileUploader" }
path { File.join(secret, filename) }
secret { SecureRandom.hex }
mount_point { nil }
end
trait :with_file do
......@@ -42,22 +46,23 @@ FactoryBot.define do
end
trait :namespace_upload do
model { build(:group) }
model { create(:group) }
path { File.join(secret, filename) }
uploader { "NamespaceFileUploader" }
secret { SecureRandom.hex }
mount_point { nil }
end
trait :favicon_upload do
model { build(:appearance) }
path { File.join(secret, filename) }
model { create(:appearance) }
uploader { "FaviconUploader" }
secret { SecureRandom.hex }
mount_point { :favicon }
end
trait :attachment_upload do
mount_point { :attachment }
model { build(:note) }
model { create(:note) }
uploader { "AttachmentUploader" }
end
end
......
......@@ -32,7 +32,7 @@ describe Gitlab::BackgroundMigration::LegacyUploadMover do
if with_file
upload = create(:upload, :with_file, :attachment_upload, params)
model.update(attachment: upload.build_uploader)
model.update(attachment: upload.retrieve_uploader)
model.attachment.upload
else
create(:upload, :attachment_upload, params)
......
......@@ -24,7 +24,7 @@ describe Gitlab::BackgroundMigration::LegacyUploadsMigrator do
if with_file
upload = create(:upload, :with_file, :attachment_upload, params)
model.update(attachment: upload.build_uploader)
model.update(attachment: upload.retrieve_uploader)
model.attachment.upload
else
create(:upload, :attachment_upload, params)
......
......@@ -83,7 +83,7 @@ describe Gitlab::ImportExport::UploadsManager do
it 'restores the file' do
manager.restore
expect(project.uploads.map { |u| u.build_uploader.filename }).to include('dummy.txt')
expect(project.uploads.map { |u| u.retrieve_uploader.filename }).to include('dummy.txt')
end
end
end
......@@ -27,7 +27,7 @@ describe Gitlab::ImportExport::UploadsRestorer do
it 'copies the uploads to the project path' do
subject.restore
expect(project.uploads.map { |u| u.build_uploader.filename }).to include('dummy.txt')
expect(project.uploads.map { |u| u.retrieve_uploader.filename }).to include('dummy.txt')
end
end
......@@ -43,7 +43,7 @@ describe Gitlab::ImportExport::UploadsRestorer do
it 'copies the uploads to the project path' do
subject.restore
expect(project.uploads.map { |u| u.build_uploader.filename }).to include('dummy.txt')
expect(project.uploads.map { |u| u.retrieve_uploader.filename }).to include('dummy.txt')
end
end
end
......
......@@ -58,7 +58,7 @@ describe Gitlab::Sanitizers::Exif do
end
describe '#clean' do
let(:uploader) { create(:upload, :with_file, :issuable_upload).build_uploader }
let(:uploader) { create(:upload, :with_file, :issuable_upload).retrieve_uploader }
context "no dry run" do
it "removes exif from the image" do
......
......@@ -3,7 +3,7 @@
require 'spec_helper'
describe Upload do
describe 'assocations' do
describe 'associations' do
it { is_expected.to belong_to(:model) }
end
......@@ -107,6 +107,52 @@ describe Upload do
end
end
describe '#build_uploader' do
it 'returns a uploader object with current upload associated with it' do
subject = build(:upload)
uploader = subject.build_uploader
expect(uploader.upload).to eq(subject)
expect(uploader.mounted_as).to eq(subject.send(:mount_point))
expect(uploader.file).to be_nil
end
end
describe '#retrieve_uploader' do
it 'returns a uploader object with current uploader associated with and cache retrieved' do
subject = build(:upload)
uploader = subject.retrieve_uploader
expect(uploader.upload).to eq(subject)
expect(uploader.mounted_as).to eq(subject.send(:mount_point))
expect(uploader.file).not_to be_nil
end
end
describe '#needs_checksum?' do
context 'with local storage' do
it 'returns true when no checksum exists' do
subject = create(:upload, :with_file, checksum: nil)
expect(subject.needs_checksum?).to be_truthy
end
it 'returns false when checksum is already present' do
subject = create(:upload, :with_file, checksum: 'something')
expect(subject.needs_checksum?).to be_falsey
end
end
context 'with remote storage' do
subject { build(:upload, :object_storage) }
it 'returns false' do
expect(subject.needs_checksum?).to be_falsey
end
end
end
describe '#exist?' do
it 'returns true when the file exists' do
upload = described_class.new(path: __FILE__, store: ObjectStorage::Store::LOCAL)
......
......@@ -44,7 +44,7 @@ describe Uploads::Fog do
subject { data_store.delete_keys(keys) }
before do
uploads.each { |upload| upload.build_uploader.migrate!(2) }
uploads.each { |upload| upload.retrieve_uploader.migrate!(2) }
end
it 'deletes multiple data' do
......
......@@ -104,7 +104,7 @@ shared_examples 'handle uploads' do
context "when neither the uploader nor the model exists" do
before do
allow_any_instance_of(Upload).to receive(:build_uploader).and_return(nil)
allow_any_instance_of(Upload).to receive(:retrieve_uploader).and_return(nil)
allow(controller).to receive(:find_model).and_return(nil)
end
......
......@@ -41,7 +41,8 @@ shared_examples_for 'model with uploads' do |supports_fileuploads|
end
it 'deletes remote files' do
expect_any_instance_of(Uploads::Fog).to receive(:delete_keys).with(uploads.map(&:path))
expected_array = array_including(*uploads.map(&:path))
expect_any_instance_of(Uploads::Fog).to receive(:delete_keys).with(expected_array)
model_object.destroy
end
......
......@@ -3,7 +3,7 @@ require 'spec_helper'
describe FileUploader do
let(:group) { create(:group, name: 'awesome') }
let(:project) { create(:project, :legacy_storage, namespace: group, name: 'project') }
let(:uploader) { described_class.new(project) }
let(:uploader) { described_class.new(project, :avatar) }
let(:upload) { double(model: project, path: 'secret/foo.jpg') }
subject { uploader }
......@@ -184,6 +184,14 @@ describe FileUploader do
end
end
describe '#replace_file_without_saving!' do
let(:replacement) { Tempfile.create('replacement.jpg') }
it 'replaces an existing file without changing its metadata' do
expect { subject.replace_file_without_saving! CarrierWave::SanitizedFile.new(replacement) }.not_to change { subject.upload }
end
end
context 'when remote file is used' do
let(:temp_file) { Tempfile.new("test") }
......
......@@ -69,6 +69,16 @@ describe GitlabUploader do
end
end
describe '#replace_file_without_saving!' do
it 'allows file to be replaced without triggering any callbacks' do
new_file = CarrierWave::SanitizedFile.new(Tempfile.new)
expect(subject).not_to receive(:with_callbacks)
subject.replace_file_without_saving!(new_file)
end
end
describe '#open' do
context 'when trace is stored in File storage' do
context 'when file exists' do
......
......@@ -42,33 +42,23 @@ describe ObjectStorage::MigrateUploadsWorker, :sidekiq do
end
describe '.sanity_check!' do
shared_examples 'raises a SanityCheckError' do
shared_examples 'raises a SanityCheckError' do |expected_message|
let(:mount_point) { nil }
it do
expect { described_class.sanity_check!(uploads, model_class, mount_point) }
.to raise_error(described_class::SanityCheckError)
.to raise_error(described_class::SanityCheckError).with_message(expected_message)
end
end
before do
stub_const("WrongModel", Class.new)
end
context 'uploader types mismatch' do
let!(:outlier) { create(:upload, uploader: 'GitlabUploader') }
include_examples 'raises a SanityCheckError'
end
context 'model types mismatch' do
let!(:outlier) { create(:upload, model_type: 'WrongModel') }
include_examples 'raises a SanityCheckError'
include_examples 'raises a SanityCheckError', /Multiple uploaders found/
end
context 'mount point not found' do
include_examples 'raises a SanityCheckError' do
include_examples 'raises a SanityCheckError', /Mount point [a-z:]+ not found in/ do
let(:mount_point) { :potato }
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment