Commit af1cedeb authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch 'mk/validate-file-transfers-geo' into 'master'

Geo: Validate file transfers (attachments, LFS objects, artifacts)

See merge request gitlab-org/gitlab-ee!14477
parents 90a83394 4af2517d
---
title: 'Geo: Validate file transfers (attachments, LFS objects, artifacts)'
merge_request: 14477
author:
type: added
......@@ -8,23 +8,26 @@ module Gitlab
# * Returning a detailed Result object
class FileTransfer < Transfer
def initialize(file_type, upload)
@file_type = file_type
@file_id = upload.id
@filename = upload.absolute_path
@request_data = build_request_data(upload)
super(
file_type,
upload.id,
upload.absolute_path,
upload.checksum,
build_request_data(file_type, upload)
)
rescue ObjectStorage::RemoteStoreError
::Gitlab::Geo::Logger.warn "Cannot transfer a remote object."
end
private
def build_request_data(upload)
def build_request_data(file_type, upload)
{
id: upload.model_id,
type: upload.model_type,
checksum: upload.checksum,
file_type: @file_type,
file_id: @file_id
file_type: file_type,
file_id: upload.id
}
end
end
......
......@@ -8,19 +8,22 @@ module Gitlab
# * Returning a detailed Result object
class JobArtifactTransfer < Transfer
def initialize(job_artifact)
@file_type = :job_artifact
@file_id = job_artifact.id
@filename = job_artifact.file.path
@request_data = job_artifact_request_data(job_artifact)
super(
:job_artifact,
job_artifact.id,
job_artifact.file.path,
job_artifact.file_sha256,
job_artifact_request_data(job_artifact)
)
end
private
def job_artifact_request_data(job_artifact)
{
id: @file_id,
file_type: @file_type,
file_id: @file_id
id: job_artifact.id,
file_type: :job_artifact,
file_id: job_artifact.id
}
end
end
......
......@@ -8,10 +8,13 @@ module Gitlab
# * Returning a detailed Result object
class LfsTransfer < Transfer
def initialize(lfs_object)
@file_type = :lfs
@file_id = lfs_object.id
@filename = lfs_object.file.path
@request_data = lfs_request_data(lfs_object)
super(
:lfs,
lfs_object.id,
lfs_object.file.path,
lfs_object.oid,
lfs_request_data(lfs_object)
)
end
private
......@@ -19,8 +22,8 @@ module Gitlab
def lfs_request_data(lfs_object)
{
checksum: lfs_object.oid,
file_type: @file_type,
file_id: @file_id
file_type: :lfs,
file_id: lfs_object.id
}
end
end
......
......@@ -5,14 +5,15 @@ module Gitlab
class Transfer
include LogHelpers
attr_reader :file_type, :file_id, :filename, :request_data
attr_reader :file_type, :file_id, :filename, :expected_checksum, :request_data
TEMP_PREFIX = 'tmp_'.freeze
def initialize(file_type, file_id, filename, request_data)
def initialize(file_type, file_id, filename, expected_checksum, request_data)
@file_type = file_type
@file_id = file_id
@filename = filename
@expected_checksum = expected_checksum
@request_data = request_data
end
......@@ -45,8 +46,8 @@ module Gitlab
private
def failure(primary_missing_file: false)
Result.new(success: false, bytes_downloaded: 0, primary_missing_file: primary_missing_file)
def failure(bytes_downloaded: 0, primary_missing_file: false)
Result.new(success: false, bytes_downloaded: bytes_downloaded, primary_missing_file: primary_missing_file)
end
def ensure_path_exists
......@@ -90,9 +91,15 @@ module Gitlab
return failure
end
file_size = File.stat(temp_file.path).size
if checksum_mismatch?(temp_file.path)
log_error("Downloaded file checksum mismatch", expected_checksum: expected_checksum, actual_checksum: @actual_checksum, file_size_bytes: file_size)
return failure(bytes_downloaded: file_size)
end
FileUtils.mv(temp_file.path, filename)
file_size = File.stat(filename).size
log_info("Successful downloaded", filename: filename, file_size_bytes: file_size)
rescue StandardError, Gitlab::HTTP::Error => e
log_error("Error downloading file", error: e, filename: filename, url: url)
......@@ -133,6 +140,21 @@ module Gitlab
log_error("Error creating temporary file", error: e)
nil
end
def checksum_mismatch?(file_path)
# Skip checksum check if primary didn't generate one because, for
# example, large attachments are checksummed asynchronously, and most
# types of artifacts are not checksummed at all at the moment.
return false if expected_checksum.blank?
return false unless Feature.enabled?(:geo_file_transfer_validation, default_enabled: true)
expected_checksum != actual_checksum(file_path)
end
def actual_checksum(file_path)
@actual_checksum = Digest::SHA256.file(file_path).hexdigest
end
end
end
end
require 'spec_helper'
describe Gitlab::Geo::FileTransfer do
let(:user) { create(:user, avatar: fixture_file_upload('spec/fixtures/dk.png', 'image/png')) }
include ::EE::GeoHelpers
set(:primary_node) { create(:geo_node, :primary) }
set(:secondary_node) { create(:geo_node) }
let(:user) { create(:user, :with_avatar) }
let(:upload) { Upload.find_by(model: user, uploader: 'AvatarUploader') }
subject { described_class.new(:file, upload) }
......@@ -21,4 +25,128 @@ describe Gitlab::Geo::FileTransfer do
end
end
end
context '#download_from_primary' do
before do
stub_current_geo_node(secondary_node)
end
context 'when the destination filename is a directory' do
it 'returns a failed result' do
expect(upload).to receive(:absolute_path).and_return('/tmp')
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: 0, primary_missing_file: false)
end
end
context 'when the HTTP response is successful' do
it 'returns a successful result' do
content = upload.build_uploader.file.read
size = content.bytesize
expect(FileUtils).to receive(:mv).with(anything, upload.absolute_path).and_call_original
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(content.to_s).and_return(response)
result = subject.download_from_primary
expect_result(result, success: true, bytes_downloaded: size, primary_missing_file: false)
stat = File.stat(upload.absolute_path)
expect(stat.size).to eq(size)
expect(stat.mode & 0777).to eq(0666 - File.umask)
expect(File.binread(upload.absolute_path)).to eq(content)
end
end
context 'when the HTTP response is unsuccessful' do
context 'when the HTTP response indicates a missing file on the primary' do
it 'returns a failed result indicating primary_missing_file' do
expect(FileUtils).not_to receive(:mv).with(anything, upload.absolute_path).and_call_original
response = double(:response, success?: false, code: 404, msg: "No such file")
expect(File).to receive(:read).and_return("{\"geo_code\":\"#{Gitlab::Geo::FileUploader::FILE_NOT_FOUND_GEO_CODE}\"}")
expect(Gitlab::HTTP).to receive(:get).and_return(response)
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: 0, primary_missing_file: true)
end
end
context 'when the HTTP response does not indicate a missing file on the primary' do
it 'returns a failed result' do
expect(FileUtils).not_to receive(:mv).with(anything, upload.absolute_path).and_call_original
response = double(:response, success?: false, code: 404, msg: 'No such file')
expect(Gitlab::HTTP).to receive(:get).and_return(response)
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: 0, primary_missing_file: false)
end
end
end
context 'when Tempfile fails' do
it 'returns a failed result' do
upload # load this eagerly, since this triggers Tempfile.new
expect(Tempfile).to receive(:new).and_raise(Errno::ENAMETOOLONG)
result = subject.download_from_primary
expect(result.success).to eq(false)
expect(result.bytes_downloaded).to eq(0)
end
end
context "invalid path" do
it 'logs an error if the destination directory could not be created' do
expect(upload).to receive(:absolute_path).and_return('/foo/bar')
allow(FileUtils).to receive(:mkdir_p) { raise Errno::EEXIST }
expect(subject).to receive(:log_error).with("unable to create directory /foo: File exists")
result = subject.download_from_primary
expect(result.success).to eq(false)
expect(result.bytes_downloaded).to eq(0)
end
end
context 'when the checksum of the downloaded file does not match' do
it 'returns a failed result' do
bad_content = 'corrupted!!!'
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(bad_content).and_return(response)
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: bad_content.bytesize, primary_missing_file: false)
end
end
context 'when the primary has not stored a checksum for the file' do
it 'returns a successful result' do
upload.update_column(:checksum, nil)
content = 'foo'
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(content).and_return(response)
result = subject.download_from_primary
expect_result(result, success: true, bytes_downloaded: content.bytesize, primary_missing_file: false)
end
end
end
def expect_result(result, success:, bytes_downloaded:, primary_missing_file:)
expect(result.success).to eq(success)
expect(result.bytes_downloaded).to eq(bytes_downloaded)
expect(result.primary_missing_file).to eq(primary_missing_file)
# Sanity check to help ensure a valid test
expect(success).not_to be_nil
expect(primary_missing_file).not_to be_nil
end
end
require 'spec_helper'
describe Gitlab::Geo::JobArtifactTransfer, :geo do
set(:job_artifact) { create(:ci_job_artifact, :archive) }
include ::EE::GeoHelpers
set(:primary_node) { create(:geo_node, :primary) }
set(:secondary_node) { create(:geo_node) }
set(:job_artifact) { create(:ci_job_artifact, :archive, :correct_checksum) }
subject do
described_class.new(job_artifact)
end
context '#initialize' do
it 'sets file_type to :ci_trace' do
expect(described_class.new(job_artifact).file_type).to eq(:job_artifact)
expect(subject.file_type).to eq(:job_artifact)
end
it 'sets file_id to the job artifact ID' do
expect(described_class.new(job_artifact).file_id).to eq(job_artifact.id)
expect(subject.file_id).to eq(job_artifact.id)
end
it 'sets filename to job artifact default_path' do
expect(described_class.new(job_artifact).filename).to eq(job_artifact.file.path)
expect(subject.filename).to eq(job_artifact.file.path)
expect(job_artifact.file.path).to be_present
end
it 'sets request_data with file_id and file_type' do
expect(described_class.new(job_artifact).request_data).to eq(
expect(subject.request_data).to eq(
id: job_artifact.id,
file_id: job_artifact.id,
file_type: :job_artifact)
end
end
context '#download_from_primary' do
before do
stub_current_geo_node(secondary_node)
end
context 'when the destination filename is a directory' do
it 'returns a failed result' do
expect(job_artifact).to receive(:file).and_return(double(path: '/tmp'))
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: 0, primary_missing_file: false)
end
end
context 'when the HTTP response is successful' do
it 'returns a successful result' do
content = job_artifact.file.read
size = content.bytesize
expect(FileUtils).to receive(:mv).with(anything, job_artifact.file.path).and_call_original
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(content.to_s).and_return(response)
result = subject.download_from_primary
expect_result(result, success: true, bytes_downloaded: size, primary_missing_file: false)
stat = File.stat(job_artifact.file.path)
expect(stat.size).to eq(size)
expect(stat.mode & 0777).to eq(0666 - File.umask)
expect(File.binread(job_artifact.file.path)).to eq(content)
end
end
context 'when the HTTP response is unsuccessful' do
context 'when the HTTP response indicates a missing file on the primary' do
it 'returns a failed result indicating primary_missing_file' do
expect(FileUtils).not_to receive(:mv).with(anything, job_artifact.file.path).and_call_original
response = double(:response, success?: false, code: 404, msg: "No such file")
expect(File).to receive(:read).and_return("{\"geo_code\":\"#{Gitlab::Geo::FileUploader::FILE_NOT_FOUND_GEO_CODE}\"}")
expect(Gitlab::HTTP).to receive(:get).and_return(response)
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: 0, primary_missing_file: true)
end
end
context 'when the HTTP response does not indicate a missing file on the primary' do
it 'returns a failed result' do
expect(FileUtils).not_to receive(:mv).with(anything, job_artifact.file.path).and_call_original
response = double(:response, success?: false, code: 404, msg: 'No such file')
expect(Gitlab::HTTP).to receive(:get).and_return(response)
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: 0, primary_missing_file: false)
end
end
end
context 'when Tempfile fails' do
it 'returns a failed result' do
expect(Tempfile).to receive(:new).and_raise(Errno::ENAMETOOLONG)
result = subject.download_from_primary
expect(result.success).to eq(false)
expect(result.bytes_downloaded).to eq(0)
end
end
context "invalid path" do
it 'logs an error if the destination directory could not be created' do
expect(job_artifact).to receive(:file).and_return(double(path: '/foo/bar'))
allow(FileUtils).to receive(:mkdir_p) { raise Errno::EEXIST }
expect(subject).to receive(:log_error).with("unable to create directory /foo: File exists")
result = subject.download_from_primary
expect(result.success).to eq(false)
expect(result.bytes_downloaded).to eq(0)
end
end
context 'when the checksum of the downloaded file does not match' do
it 'returns a failed result' do
bad_content = 'corrupted!!!'
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(bad_content).and_return(response)
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: bad_content.bytesize, primary_missing_file: false)
end
end
context 'when the primary has not stored a checksum for the file' do
it 'returns a successful result' do
artifact = create(:ci_job_artifact, :archive)
content = 'foo'
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(content).and_return(response)
transfer = described_class.new(artifact)
result = transfer.download_from_primary
expect_result(result, success: true, bytes_downloaded: content.bytesize, primary_missing_file: false)
end
end
end
def expect_result(result, success:, bytes_downloaded:, primary_missing_file:)
expect(result.success).to eq(success)
expect(result.bytes_downloaded).to eq(bytes_downloaded)
expect(result.primary_missing_file).to eq(primary_missing_file)
# Sanity check to help ensure a valid test
expect(success).not_to be_nil
expect(primary_missing_file).not_to be_nil
end
end
require 'spec_helper'
describe Gitlab::Geo::Transfer do
describe Gitlab::Geo::LfsTransfer do
include ::EE::GeoHelpers
set(:primary_node) { create(:geo_node, :primary) }
set(:secondary_node) { create(:geo_node) }
set(:lfs_object) { create(:lfs_object, :with_file) }
let(:lfs_object_file_path) { lfs_object.file.path }
let(:url) { primary_node.geo_transfers_url(:lfs, lfs_object.id.to_s) }
let(:content) { SecureRandom.random_bytes(10) }
let(:size) { File.stat(lfs_object.file.path).size }
set(:lfs_object) { create(:lfs_object, :with_file, :correct_oid) }
subject do
described_class.new(:lfs,
lfs_object.id,
lfs_object_file_path,
{ sha256: lfs_object.oid })
described_class.new(lfs_object)
end
context '#download_from_primary' do
......@@ -25,9 +18,9 @@ describe Gitlab::Geo::Transfer do
context 'when the destination filename is a directory' do
it 'returns a failed result' do
transfer = described_class.new(:lfs, lfs_object.id, '/tmp', { sha256: lfs_object.id })
expect(lfs_object).to receive(:file).and_return(double(path: '/tmp'))
result = transfer.download_from_primary
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: 0, primary_missing_file: false)
end
......@@ -35,6 +28,8 @@ describe Gitlab::Geo::Transfer do
context 'when the HTTP response is successful' do
it 'returns a successful result' do
content = lfs_object.file.read
size = content.bytesize
expect(FileUtils).to receive(:mv).with(anything, lfs_object.file.path).and_call_original
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(content.to_s).and_return(response)
......@@ -88,9 +83,9 @@ describe Gitlab::Geo::Transfer do
end
context "invalid path" do
let(:lfs_object_file_path) { '/foo/bar' }
it 'logs an error if the destination directory could not be created' do
expect(lfs_object).to receive(:file).and_return(double(path: '/foo/bar'))
allow(FileUtils).to receive(:mkdir_p) { raise Errno::EEXIST }
expect(subject).to receive(:log_error).with("unable to create directory /foo: File exists")
......@@ -100,6 +95,18 @@ describe Gitlab::Geo::Transfer do
expect(result.bytes_downloaded).to eq(0)
end
end
context 'when the checksum of the downloaded file does not match' do
it 'returns a failed result' do
bad_content = 'corrupted!!!'
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(bad_content).and_return(response)
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: bad_content.bytesize, primary_missing_file: false)
end
end
end
def expect_result(result, success:, bytes_downloaded:, primary_missing_file:)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment