Commit af1cedeb authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch 'mk/validate-file-transfers-geo' into 'master'

Geo: Validate file transfers (attachments, LFS objects, artifacts)

See merge request gitlab-org/gitlab-ee!14477
parents 90a83394 4af2517d
---
title: 'Geo: Validate file transfers (attachments, LFS objects, artifacts)'
merge_request: 14477
author:
type: added
...@@ -8,23 +8,26 @@ module Gitlab ...@@ -8,23 +8,26 @@ module Gitlab
# * Returning a detailed Result object # * Returning a detailed Result object
class FileTransfer < Transfer class FileTransfer < Transfer
def initialize(file_type, upload) def initialize(file_type, upload)
@file_type = file_type super(
@file_id = upload.id file_type,
@filename = upload.absolute_path upload.id,
@request_data = build_request_data(upload) upload.absolute_path,
upload.checksum,
build_request_data(file_type, upload)
)
rescue ObjectStorage::RemoteStoreError rescue ObjectStorage::RemoteStoreError
::Gitlab::Geo::Logger.warn "Cannot transfer a remote object." ::Gitlab::Geo::Logger.warn "Cannot transfer a remote object."
end end
private private
def build_request_data(upload) def build_request_data(file_type, upload)
{ {
id: upload.model_id, id: upload.model_id,
type: upload.model_type, type: upload.model_type,
checksum: upload.checksum, checksum: upload.checksum,
file_type: @file_type, file_type: file_type,
file_id: @file_id file_id: upload.id
} }
end end
end end
......
...@@ -8,19 +8,22 @@ module Gitlab ...@@ -8,19 +8,22 @@ module Gitlab
# * Returning a detailed Result object # * Returning a detailed Result object
class JobArtifactTransfer < Transfer class JobArtifactTransfer < Transfer
def initialize(job_artifact) def initialize(job_artifact)
@file_type = :job_artifact super(
@file_id = job_artifact.id :job_artifact,
@filename = job_artifact.file.path job_artifact.id,
@request_data = job_artifact_request_data(job_artifact) job_artifact.file.path,
job_artifact.file_sha256,
job_artifact_request_data(job_artifact)
)
end end
private private
def job_artifact_request_data(job_artifact) def job_artifact_request_data(job_artifact)
{ {
id: @file_id, id: job_artifact.id,
file_type: @file_type, file_type: :job_artifact,
file_id: @file_id file_id: job_artifact.id
} }
end end
end end
......
...@@ -8,10 +8,13 @@ module Gitlab ...@@ -8,10 +8,13 @@ module Gitlab
# * Returning a detailed Result object # * Returning a detailed Result object
class LfsTransfer < Transfer class LfsTransfer < Transfer
def initialize(lfs_object) def initialize(lfs_object)
@file_type = :lfs super(
@file_id = lfs_object.id :lfs,
@filename = lfs_object.file.path lfs_object.id,
@request_data = lfs_request_data(lfs_object) lfs_object.file.path,
lfs_object.oid,
lfs_request_data(lfs_object)
)
end end
private private
...@@ -19,8 +22,8 @@ module Gitlab ...@@ -19,8 +22,8 @@ module Gitlab
def lfs_request_data(lfs_object) def lfs_request_data(lfs_object)
{ {
checksum: lfs_object.oid, checksum: lfs_object.oid,
file_type: @file_type, file_type: :lfs,
file_id: @file_id file_id: lfs_object.id
} }
end end
end end
......
...@@ -5,14 +5,15 @@ module Gitlab ...@@ -5,14 +5,15 @@ module Gitlab
class Transfer class Transfer
include LogHelpers include LogHelpers
attr_reader :file_type, :file_id, :filename, :request_data attr_reader :file_type, :file_id, :filename, :expected_checksum, :request_data
TEMP_PREFIX = 'tmp_'.freeze TEMP_PREFIX = 'tmp_'.freeze
def initialize(file_type, file_id, filename, request_data) def initialize(file_type, file_id, filename, expected_checksum, request_data)
@file_type = file_type @file_type = file_type
@file_id = file_id @file_id = file_id
@filename = filename @filename = filename
@expected_checksum = expected_checksum
@request_data = request_data @request_data = request_data
end end
...@@ -45,8 +46,8 @@ module Gitlab ...@@ -45,8 +46,8 @@ module Gitlab
private private
def failure(primary_missing_file: false) def failure(bytes_downloaded: 0, primary_missing_file: false)
Result.new(success: false, bytes_downloaded: 0, primary_missing_file: primary_missing_file) Result.new(success: false, bytes_downloaded: bytes_downloaded, primary_missing_file: primary_missing_file)
end end
def ensure_path_exists def ensure_path_exists
...@@ -90,9 +91,15 @@ module Gitlab ...@@ -90,9 +91,15 @@ module Gitlab
return failure return failure
end end
file_size = File.stat(temp_file.path).size
if checksum_mismatch?(temp_file.path)
log_error("Downloaded file checksum mismatch", expected_checksum: expected_checksum, actual_checksum: @actual_checksum, file_size_bytes: file_size)
return failure(bytes_downloaded: file_size)
end
FileUtils.mv(temp_file.path, filename) FileUtils.mv(temp_file.path, filename)
file_size = File.stat(filename).size
log_info("Successful downloaded", filename: filename, file_size_bytes: file_size) log_info("Successful downloaded", filename: filename, file_size_bytes: file_size)
rescue StandardError, Gitlab::HTTP::Error => e rescue StandardError, Gitlab::HTTP::Error => e
log_error("Error downloading file", error: e, filename: filename, url: url) log_error("Error downloading file", error: e, filename: filename, url: url)
...@@ -133,6 +140,21 @@ module Gitlab ...@@ -133,6 +140,21 @@ module Gitlab
log_error("Error creating temporary file", error: e) log_error("Error creating temporary file", error: e)
nil nil
end end
def checksum_mismatch?(file_path)
# Skip checksum check if primary didn't generate one because, for
# example, large attachments are checksummed asynchronously, and most
# types of artifacts are not checksummed at all at the moment.
return false if expected_checksum.blank?
return false unless Feature.enabled?(:geo_file_transfer_validation, default_enabled: true)
expected_checksum != actual_checksum(file_path)
end
def actual_checksum(file_path)
@actual_checksum = Digest::SHA256.file(file_path).hexdigest
end
end end
end end
end end
require 'spec_helper' require 'spec_helper'
describe Gitlab::Geo::FileTransfer do describe Gitlab::Geo::FileTransfer do
let(:user) { create(:user, avatar: fixture_file_upload('spec/fixtures/dk.png', 'image/png')) } include ::EE::GeoHelpers
set(:primary_node) { create(:geo_node, :primary) }
set(:secondary_node) { create(:geo_node) }
let(:user) { create(:user, :with_avatar) }
let(:upload) { Upload.find_by(model: user, uploader: 'AvatarUploader') } let(:upload) { Upload.find_by(model: user, uploader: 'AvatarUploader') }
subject { described_class.new(:file, upload) } subject { described_class.new(:file, upload) }
...@@ -21,4 +25,128 @@ describe Gitlab::Geo::FileTransfer do ...@@ -21,4 +25,128 @@ describe Gitlab::Geo::FileTransfer do
end end
end end
end end
context '#download_from_primary' do
before do
stub_current_geo_node(secondary_node)
end
context 'when the destination filename is a directory' do
it 'returns a failed result' do
expect(upload).to receive(:absolute_path).and_return('/tmp')
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: 0, primary_missing_file: false)
end
end
context 'when the HTTP response is successful' do
it 'returns a successful result' do
content = upload.build_uploader.file.read
size = content.bytesize
expect(FileUtils).to receive(:mv).with(anything, upload.absolute_path).and_call_original
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(content.to_s).and_return(response)
result = subject.download_from_primary
expect_result(result, success: true, bytes_downloaded: size, primary_missing_file: false)
stat = File.stat(upload.absolute_path)
expect(stat.size).to eq(size)
expect(stat.mode & 0777).to eq(0666 - File.umask)
expect(File.binread(upload.absolute_path)).to eq(content)
end
end
context 'when the HTTP response is unsuccessful' do
context 'when the HTTP response indicates a missing file on the primary' do
it 'returns a failed result indicating primary_missing_file' do
expect(FileUtils).not_to receive(:mv).with(anything, upload.absolute_path).and_call_original
response = double(:response, success?: false, code: 404, msg: "No such file")
expect(File).to receive(:read).and_return("{\"geo_code\":\"#{Gitlab::Geo::FileUploader::FILE_NOT_FOUND_GEO_CODE}\"}")
expect(Gitlab::HTTP).to receive(:get).and_return(response)
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: 0, primary_missing_file: true)
end
end
context 'when the HTTP response does not indicate a missing file on the primary' do
it 'returns a failed result' do
expect(FileUtils).not_to receive(:mv).with(anything, upload.absolute_path).and_call_original
response = double(:response, success?: false, code: 404, msg: 'No such file')
expect(Gitlab::HTTP).to receive(:get).and_return(response)
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: 0, primary_missing_file: false)
end
end
end
context 'when Tempfile fails' do
it 'returns a failed result' do
upload # load this eagerly, since this triggers Tempfile.new
expect(Tempfile).to receive(:new).and_raise(Errno::ENAMETOOLONG)
result = subject.download_from_primary
expect(result.success).to eq(false)
expect(result.bytes_downloaded).to eq(0)
end
end
context "invalid path" do
it 'logs an error if the destination directory could not be created' do
expect(upload).to receive(:absolute_path).and_return('/foo/bar')
allow(FileUtils).to receive(:mkdir_p) { raise Errno::EEXIST }
expect(subject).to receive(:log_error).with("unable to create directory /foo: File exists")
result = subject.download_from_primary
expect(result.success).to eq(false)
expect(result.bytes_downloaded).to eq(0)
end
end
context 'when the checksum of the downloaded file does not match' do
it 'returns a failed result' do
bad_content = 'corrupted!!!'
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(bad_content).and_return(response)
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: bad_content.bytesize, primary_missing_file: false)
end
end
context 'when the primary has not stored a checksum for the file' do
it 'returns a successful result' do
upload.update_column(:checksum, nil)
content = 'foo'
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(content).and_return(response)
result = subject.download_from_primary
expect_result(result, success: true, bytes_downloaded: content.bytesize, primary_missing_file: false)
end
end
end
def expect_result(result, success:, bytes_downloaded:, primary_missing_file:)
expect(result.success).to eq(success)
expect(result.bytes_downloaded).to eq(bytes_downloaded)
expect(result.primary_missing_file).to eq(primary_missing_file)
# Sanity check to help ensure a valid test
expect(success).not_to be_nil
expect(primary_missing_file).not_to be_nil
end
end end
require 'spec_helper' require 'spec_helper'
describe Gitlab::Geo::JobArtifactTransfer, :geo do describe Gitlab::Geo::JobArtifactTransfer, :geo do
set(:job_artifact) { create(:ci_job_artifact, :archive) } include ::EE::GeoHelpers
set(:primary_node) { create(:geo_node, :primary) }
set(:secondary_node) { create(:geo_node) }
set(:job_artifact) { create(:ci_job_artifact, :archive, :correct_checksum) }
subject do
described_class.new(job_artifact)
end
context '#initialize' do context '#initialize' do
it 'sets file_type to :ci_trace' do it 'sets file_type to :ci_trace' do
expect(described_class.new(job_artifact).file_type).to eq(:job_artifact) expect(subject.file_type).to eq(:job_artifact)
end end
it 'sets file_id to the job artifact ID' do it 'sets file_id to the job artifact ID' do
expect(described_class.new(job_artifact).file_id).to eq(job_artifact.id) expect(subject.file_id).to eq(job_artifact.id)
end end
it 'sets filename to job artifact default_path' do it 'sets filename to job artifact default_path' do
expect(described_class.new(job_artifact).filename).to eq(job_artifact.file.path) expect(subject.filename).to eq(job_artifact.file.path)
expect(job_artifact.file.path).to be_present expect(job_artifact.file.path).to be_present
end end
it 'sets request_data with file_id and file_type' do it 'sets request_data with file_id and file_type' do
expect(described_class.new(job_artifact).request_data).to eq( expect(subject.request_data).to eq(
id: job_artifact.id, id: job_artifact.id,
file_id: job_artifact.id, file_id: job_artifact.id,
file_type: :job_artifact) file_type: :job_artifact)
end end
end end
context '#download_from_primary' do
before do
stub_current_geo_node(secondary_node)
end
context 'when the destination filename is a directory' do
it 'returns a failed result' do
expect(job_artifact).to receive(:file).and_return(double(path: '/tmp'))
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: 0, primary_missing_file: false)
end
end
context 'when the HTTP response is successful' do
it 'returns a successful result' do
content = job_artifact.file.read
size = content.bytesize
expect(FileUtils).to receive(:mv).with(anything, job_artifact.file.path).and_call_original
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(content.to_s).and_return(response)
result = subject.download_from_primary
expect_result(result, success: true, bytes_downloaded: size, primary_missing_file: false)
stat = File.stat(job_artifact.file.path)
expect(stat.size).to eq(size)
expect(stat.mode & 0777).to eq(0666 - File.umask)
expect(File.binread(job_artifact.file.path)).to eq(content)
end
end
context 'when the HTTP response is unsuccessful' do
context 'when the HTTP response indicates a missing file on the primary' do
it 'returns a failed result indicating primary_missing_file' do
expect(FileUtils).not_to receive(:mv).with(anything, job_artifact.file.path).and_call_original
response = double(:response, success?: false, code: 404, msg: "No such file")
expect(File).to receive(:read).and_return("{\"geo_code\":\"#{Gitlab::Geo::FileUploader::FILE_NOT_FOUND_GEO_CODE}\"}")
expect(Gitlab::HTTP).to receive(:get).and_return(response)
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: 0, primary_missing_file: true)
end
end
context 'when the HTTP response does not indicate a missing file on the primary' do
it 'returns a failed result' do
expect(FileUtils).not_to receive(:mv).with(anything, job_artifact.file.path).and_call_original
response = double(:response, success?: false, code: 404, msg: 'No such file')
expect(Gitlab::HTTP).to receive(:get).and_return(response)
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: 0, primary_missing_file: false)
end
end
end
context 'when Tempfile fails' do
it 'returns a failed result' do
expect(Tempfile).to receive(:new).and_raise(Errno::ENAMETOOLONG)
result = subject.download_from_primary
expect(result.success).to eq(false)
expect(result.bytes_downloaded).to eq(0)
end
end
context "invalid path" do
it 'logs an error if the destination directory could not be created' do
expect(job_artifact).to receive(:file).and_return(double(path: '/foo/bar'))
allow(FileUtils).to receive(:mkdir_p) { raise Errno::EEXIST }
expect(subject).to receive(:log_error).with("unable to create directory /foo: File exists")
result = subject.download_from_primary
expect(result.success).to eq(false)
expect(result.bytes_downloaded).to eq(0)
end
end
context 'when the checksum of the downloaded file does not match' do
it 'returns a failed result' do
bad_content = 'corrupted!!!'
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(bad_content).and_return(response)
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: bad_content.bytesize, primary_missing_file: false)
end
end
context 'when the primary has not stored a checksum for the file' do
it 'returns a successful result' do
artifact = create(:ci_job_artifact, :archive)
content = 'foo'
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(content).and_return(response)
transfer = described_class.new(artifact)
result = transfer.download_from_primary
expect_result(result, success: true, bytes_downloaded: content.bytesize, primary_missing_file: false)
end
end
end
def expect_result(result, success:, bytes_downloaded:, primary_missing_file:)
expect(result.success).to eq(success)
expect(result.bytes_downloaded).to eq(bytes_downloaded)
expect(result.primary_missing_file).to eq(primary_missing_file)
# Sanity check to help ensure a valid test
expect(success).not_to be_nil
expect(primary_missing_file).not_to be_nil
end
end end
require 'spec_helper' require 'spec_helper'
describe Gitlab::Geo::Transfer do describe Gitlab::Geo::LfsTransfer do
include ::EE::GeoHelpers include ::EE::GeoHelpers
set(:primary_node) { create(:geo_node, :primary) } set(:primary_node) { create(:geo_node, :primary) }
set(:secondary_node) { create(:geo_node) } set(:secondary_node) { create(:geo_node) }
set(:lfs_object) { create(:lfs_object, :with_file) } set(:lfs_object) { create(:lfs_object, :with_file, :correct_oid) }
let(:lfs_object_file_path) { lfs_object.file.path }
let(:url) { primary_node.geo_transfers_url(:lfs, lfs_object.id.to_s) }
let(:content) { SecureRandom.random_bytes(10) }
let(:size) { File.stat(lfs_object.file.path).size }
subject do subject do
described_class.new(:lfs, described_class.new(lfs_object)
lfs_object.id,
lfs_object_file_path,
{ sha256: lfs_object.oid })
end end
context '#download_from_primary' do context '#download_from_primary' do
...@@ -25,9 +18,9 @@ describe Gitlab::Geo::Transfer do ...@@ -25,9 +18,9 @@ describe Gitlab::Geo::Transfer do
context 'when the destination filename is a directory' do context 'when the destination filename is a directory' do
it 'returns a failed result' do it 'returns a failed result' do
transfer = described_class.new(:lfs, lfs_object.id, '/tmp', { sha256: lfs_object.id }) expect(lfs_object).to receive(:file).and_return(double(path: '/tmp'))
result = transfer.download_from_primary result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: 0, primary_missing_file: false) expect_result(result, success: false, bytes_downloaded: 0, primary_missing_file: false)
end end
...@@ -35,6 +28,8 @@ describe Gitlab::Geo::Transfer do ...@@ -35,6 +28,8 @@ describe Gitlab::Geo::Transfer do
context 'when the HTTP response is successful' do context 'when the HTTP response is successful' do
it 'returns a successful result' do it 'returns a successful result' do
content = lfs_object.file.read
size = content.bytesize
expect(FileUtils).to receive(:mv).with(anything, lfs_object.file.path).and_call_original expect(FileUtils).to receive(:mv).with(anything, lfs_object.file.path).and_call_original
response = double(:response, success?: true) response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(content.to_s).and_return(response) expect(Gitlab::HTTP).to receive(:get).and_yield(content.to_s).and_return(response)
...@@ -88,9 +83,9 @@ describe Gitlab::Geo::Transfer do ...@@ -88,9 +83,9 @@ describe Gitlab::Geo::Transfer do
end end
context "invalid path" do context "invalid path" do
let(:lfs_object_file_path) { '/foo/bar' }
it 'logs an error if the destination directory could not be created' do it 'logs an error if the destination directory could not be created' do
expect(lfs_object).to receive(:file).and_return(double(path: '/foo/bar'))
allow(FileUtils).to receive(:mkdir_p) { raise Errno::EEXIST } allow(FileUtils).to receive(:mkdir_p) { raise Errno::EEXIST }
expect(subject).to receive(:log_error).with("unable to create directory /foo: File exists") expect(subject).to receive(:log_error).with("unable to create directory /foo: File exists")
...@@ -100,6 +95,18 @@ describe Gitlab::Geo::Transfer do ...@@ -100,6 +95,18 @@ describe Gitlab::Geo::Transfer do
expect(result.bytes_downloaded).to eq(0) expect(result.bytes_downloaded).to eq(0)
end end
end end
context 'when the checksum of the downloaded file does not match' do
it 'returns a failed result' do
bad_content = 'corrupted!!!'
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(bad_content).and_return(response)
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: bad_content.bytesize, primary_missing_file: false)
end
end
end end
def expect_result(result, success:, bytes_downloaded:, primary_missing_file:) def expect_result(result, success:, bytes_downloaded:, primary_missing_file:)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment