Commit 3f8a817d authored by Michael Kozono's avatar Michael Kozono

Check LFS object file integrity after download

parent 8f8f2d2a
...@@ -8,10 +8,13 @@ module Gitlab ...@@ -8,10 +8,13 @@ module Gitlab
# * Returning a detailed Result object # * Returning a detailed Result object
class LfsTransfer < Transfer class LfsTransfer < Transfer
def initialize(lfs_object) def initialize(lfs_object)
@file_type = :lfs super(
@file_id = lfs_object.id :lfs,
@filename = lfs_object.file.path lfs_object.id,
@request_data = lfs_request_data(lfs_object) lfs_object.file.path,
lfs_object.oid,
lfs_request_data(lfs_object)
)
end end
private private
...@@ -19,8 +22,8 @@ module Gitlab ...@@ -19,8 +22,8 @@ module Gitlab
def lfs_request_data(lfs_object) def lfs_request_data(lfs_object)
{ {
checksum: lfs_object.oid, checksum: lfs_object.oid,
file_type: @file_type, file_type: :lfs,
file_id: @file_id file_id: lfs_object.id
} }
end end
end end
......
...@@ -5,14 +5,15 @@ module Gitlab ...@@ -5,14 +5,15 @@ module Gitlab
class Transfer class Transfer
include LogHelpers include LogHelpers
attr_reader :file_type, :file_id, :filename, :request_data attr_reader :file_type, :file_id, :filename, :expected_checksum, :request_data
TEMP_PREFIX = 'tmp_'.freeze TEMP_PREFIX = 'tmp_'.freeze
def initialize(file_type, file_id, filename, request_data) def initialize(file_type, file_id, filename, expected_checksum, request_data)
@file_type = file_type @file_type = file_type
@file_id = file_id @file_id = file_id
@filename = filename @filename = filename
@expected_checksum = expected_checksum
@request_data = request_data @request_data = request_data
end end
...@@ -45,8 +46,8 @@ module Gitlab ...@@ -45,8 +46,8 @@ module Gitlab
private private
def failure(primary_missing_file: false) def failure(bytes_downloaded: 0, primary_missing_file: false)
Result.new(success: false, bytes_downloaded: 0, primary_missing_file: primary_missing_file) Result.new(success: false, bytes_downloaded: bytes_downloaded, primary_missing_file: primary_missing_file)
end end
def ensure_path_exists def ensure_path_exists
...@@ -90,9 +91,15 @@ module Gitlab ...@@ -90,9 +91,15 @@ module Gitlab
return failure return failure
end end
file_size = File.stat(temp_file.path).size
if checksum_mismatch?(temp_file.path)
log_error("Downloaded file checksum mismatch", expected_checksum: expected_checksum, actual_checksum: @actual_checksum, file_size_bytes: file_size)
return failure(bytes_downloaded: file_size)
end
FileUtils.mv(temp_file.path, filename) FileUtils.mv(temp_file.path, filename)
file_size = File.stat(filename).size
log_info("Successful downloaded", filename: filename, file_size_bytes: file_size) log_info("Successful downloaded", filename: filename, file_size_bytes: file_size)
rescue StandardError, Gitlab::HTTP::Error => e rescue StandardError, Gitlab::HTTP::Error => e
log_error("Error downloading file", error: e, filename: filename, url: url) log_error("Error downloading file", error: e, filename: filename, url: url)
...@@ -133,6 +140,19 @@ module Gitlab ...@@ -133,6 +140,19 @@ module Gitlab
log_error("Error creating temporary file", error: e) log_error("Error creating temporary file", error: e)
nil nil
end end
def checksum_mismatch?(file_path)
# Skip checksum check if primary didn't generate one because, for
# example, large attachments are checksummed asynchronously, and most
# types of artifacts are not checksummed at all at the moment.
return false if expected_checksum.blank?
expected_checksum != actual_checksum(file_path)
end
def actual_checksum(file_path)
@actual_checksum = Digest::SHA256.file(file_path).hexdigest
end
end end
end end
end end
...@@ -5,7 +5,7 @@ describe Gitlab::Geo::LfsTransfer do ...@@ -5,7 +5,7 @@ describe Gitlab::Geo::LfsTransfer do
set(:primary_node) { create(:geo_node, :primary) } set(:primary_node) { create(:geo_node, :primary) }
set(:secondary_node) { create(:geo_node) } set(:secondary_node) { create(:geo_node) }
set(:lfs_object) { create(:lfs_object, :with_file) } set(:lfs_object) { create(:lfs_object, :with_file, :correct_oid) }
subject do subject do
described_class.new(lfs_object) described_class.new(lfs_object)
...@@ -28,7 +28,7 @@ describe Gitlab::Geo::LfsTransfer do ...@@ -28,7 +28,7 @@ describe Gitlab::Geo::LfsTransfer do
context 'when the HTTP response is successful' do context 'when the HTTP response is successful' do
it 'returns a successful result' do it 'returns a successful result' do
content = SecureRandom.random_bytes(10) content = lfs_object.file.read
size = content.bytesize size = content.bytesize
expect(FileUtils).to receive(:mv).with(anything, lfs_object.file.path).and_call_original expect(FileUtils).to receive(:mv).with(anything, lfs_object.file.path).and_call_original
response = double(:response, success?: true) response = double(:response, success?: true)
...@@ -95,6 +95,18 @@ describe Gitlab::Geo::LfsTransfer do ...@@ -95,6 +95,18 @@ describe Gitlab::Geo::LfsTransfer do
expect(result.bytes_downloaded).to eq(0) expect(result.bytes_downloaded).to eq(0)
end end
end end
context 'when the checksum of the downloaded file does not match' do
it 'returns a failed result' do
bad_content = 'corrupted!!!'
response = double(:response, success?: true)
expect(Gitlab::HTTP).to receive(:get).and_yield(bad_content).and_return(response)
result = subject.download_from_primary
expect_result(result, success: false, bytes_downloaded: bad_content.bytesize, primary_missing_file: false)
end
end
end end
def expect_result(result, success:, bytes_downloaded:, primary_missing_file:) def expect_result(result, success:, bytes_downloaded:, primary_missing_file:)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment