Commit 7a543084 authored by Michael Kozono's avatar Michael Kozono

Mark files/artifacts synced if missing on primary

parent b575378a
module Geo
# This class is responsible for:
# * Finding the appropriate Downloader class for a FileRegistry record
# * Executing the Downloader
# * Marking the FileRegistry record as synced or needing retry
class FileDownloadService < FileService
LEASE_TIMEOUT = 8.hours.freeze
......@@ -8,13 +12,15 @@ module Geo
def execute
try_obtain_lease do
start_time = Time.now
bytes_downloaded = downloader.execute
success = (bytes_downloaded.present? && bytes_downloaded >= 0)
log_info("File download",
success: success,
bytes_downloaded: bytes_downloaded,
download_time_s: (Time.now - start_time).to_f.round(3))
update_registry(bytes_downloaded, success: success)
download_result = downloader.execute
mark_as_synced = download_result.success || download_result.primary_missing_file
log_file_download(mark_as_synced, download_result, start_time)
update_registry(download_result.bytes_downloaded,
mark_as_synced: mark_as_synced,
missing_on_primary: download_result.primary_missing_file)
end
end
......@@ -28,8 +34,21 @@ module Geo
raise
end
def update_registry(bytes_downloaded, success:)
transfer =
def log_file_download(mark_as_synced, download_result, start_time)
metadata = {
mark_as_synced: mark_as_synced,
download_success: download_result.success,
bytes_downloaded: download_result.bytes_downloaded,
failed_before_transfer: download_result.failed_before_transfer,
primary_missing_file: download_result.primary_missing_file,
download_time_s: (Time.now - start_time).to_f.round(3)
}
log_info("File download", metadata)
end
def update_registry(bytes_downloaded, mark_as_synced:, missing_on_primary: false)
registry =
if object_type.to_sym == :job_artifact
Geo::JobArtifactRegistry.find_or_initialize_by(artifact_id: object_db_id)
else
......@@ -39,16 +58,17 @@ module Geo
)
end
transfer.bytes = bytes_downloaded
transfer.success = success
registry.bytes = bytes_downloaded
registry.success = mark_as_synced
registry.missing_on_primary = missing_on_primary
unless success
unless mark_as_synced
# We don't limit the amount of retries
transfer.retry_count = (transfer.retry_count || 0) + 1
transfer.retry_at = Time.now + delay(transfer.retry_count).seconds
registry.retry_count = (registry.retry_count || 0) + 1
registry.retry_at = Time.now + delay(registry.retry_count).seconds
end
transfer.save
registry.save
end
def lease_key
......
class AddMissingOnPrimaryToFileRegistry < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_column_with_default :file_registry, :missing_on_primary, :boolean, default: false, allow_null: false
end
def down
remove_column :file_registry, :missing_on_primary
end
end
class AddMissingOnPrimaryToJobArtifactRegistry < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_column_with_default :job_artifact_registry, :missing_on_primary, :boolean, default: false, allow_null: false
end
def down
remove_column :job_artifact_registry, :missing_on_primary
end
end
......@@ -28,6 +28,7 @@ ActiveRecord::Schema.define(version: 20180405074130) do
t.boolean "success", default: false, null: false
t.integer "retry_count"
t.datetime "retry_at"
t.boolean "missing_on_primary", default: false, null: false
end
add_index "file_registry", ["file_type", "file_id"], name: "index_file_registry_on_file_type_and_file_id", unique: true, using: :btree
......@@ -43,6 +44,7 @@ ActiveRecord::Schema.define(version: 20180405074130) do
t.integer "retry_count"
t.boolean "success"
t.string "sha256"
t.boolean "missing_on_primary", default: false, null: false
end
add_index "job_artifact_registry", ["retry_at"], name: "index_job_artifact_registry_on_retry_at", using: :btree
......
module Gitlab
module Geo
# This class is responsible for:
# * Finding an Upload record
# * Requesting and downloading the Upload's file from the primary
# * Returning a detailed Result
#
# TODO: Rearrange things so this class not inherited by JobArtifactDownloader and LfsDownloader
# Maybe rename it so it doesn't seem generic. It only works with Upload records.
class FileDownloader
attr_reader :object_type, :object_db_id
......@@ -14,10 +21,33 @@ module Gitlab
# or nil or -1 if a failure occurred.
def execute
upload = Upload.find_by(id: object_db_id)
return unless upload.present?
return fail_before_transfer unless upload.present?
transfer = ::Gitlab::Geo::FileTransfer.new(object_type.to_sym, upload)
transfer.download_from_primary
Result.from_transfer_result(transfer.download_from_primary)
end
class Result
attr_reader :success, :bytes_downloaded, :primary_missing_file, :failed_before_transfer
def self.from_transfer_result(transfer_result)
Result.new(success: transfer_result.success,
primary_missing_file: transfer_result.primary_missing_file,
bytes_downloaded: transfer_result.bytes_downloaded)
end
def initialize(success:, bytes_downloaded:, primary_missing_file: false, failed_before_transfer: false)
@success = success
@bytes_downloaded = bytes_downloaded
@primary_missing_file = primary_missing_file
@failed_before_transfer = failed_before_transfer
end
end
private
def fail_before_transfer
Result.new(success: false, bytes_downloaded: 0, failed_before_transfer: true)
end
end
end
......
module Gitlab
module Geo
# This class is responsible for:
# * Finding a ::Ci::JobArtifact record
# * Requesting and downloading the JobArtifact's file from the primary
# * Returning a detailed Result
#
# TODO: Rearrange things so this class does not inherit FileDownloader
class JobArtifactDownloader < FileDownloader
def execute
job_artifact = ::Ci::JobArtifact.find_by(id: object_db_id)
return unless job_artifact.present?
return fail_before_transfer unless job_artifact.present?
transfer = ::Gitlab::Geo::JobArtifactTransfer.new(job_artifact)
transfer.download_from_primary
Result.from_transfer_result(transfer.download_from_primary)
end
end
end
......
module Gitlab
module Geo
# This class is responsible for:
# * Finding a LfsObject record
# * Requesting and downloading the LfsObject's file from the primary
# * Returning a detailed Result
#
# TODO: Rearrange things so this class does not inherit FileDownloader
class LfsDownloader < FileDownloader
def execute
lfs_object = LfsObject.find_by(id: object_db_id)
return unless lfs_object.present?
return fail_before_transfer unless lfs_object.present?
transfer = ::Gitlab::Geo::LfsTransfer.new(lfs_object)
transfer.download_from_primary
Result.from_transfer_result(transfer.download_from_primary)
end
end
end
......
......@@ -3,22 +3,29 @@ require 'spec_helper'
describe Gitlab::Geo::JobArtifactDownloader, :geo do
let(:job_artifact) { create(:ci_job_artifact) }
subject do
described_class.new(:job_artifact, job_artifact.id)
end
context '#execute' do
context 'with job artifact' do
it 'returns a FileDownloader::Result object' do
downloader = described_class.new(:job_artifact, job_artifact.id)
result = Gitlab::Geo::Transfer::Result.new(success: true, bytes_downloaded: 1)
context '#download_from_primary' do
it 'with a job artifact' do
allow_any_instance_of(Gitlab::Geo::JobArtifactTransfer)
.to receive(:download_from_primary).and_return(100)
.to receive(:download_from_primary).and_return(result)
expect(subject.execute).to eq(100)
expect(downloader.execute).to be_a(Gitlab::Geo::FileDownloader::Result)
end
end
it 'with an unknown job artifact' do
expect(described_class.new(:job_artifact, 10000)).not_to receive(:download_from_primary)
context 'with unknown job artifact' do
let(:downloader) { described_class.new(:job_artifact, 10000) }
expect(subject.execute).to be_nil
it 'returns a FileDownloader::Result object' do
expect(downloader.execute).to be_a(Gitlab::Geo::FileDownloader::Result)
end
it 'returns a result indicating a failure before a transfer was attempted' do
expect(downloader.execute.failed_before_transfer).to be_truthy
end
end
end
end
require 'spec_helper'
describe Gitlab::Geo::LfsDownloader do
describe Gitlab::Geo::LfsDownloader, :geo do
let(:lfs_object) { create(:lfs_object) }
subject do
described_class.new(:lfs, lfs_object.id)
end
context '#execute' do
context 'with LFS object' do
it 'returns a FileDownloader::Result object' do
downloader = described_class.new(:lfs, lfs_object.id)
result = Gitlab::Geo::Transfer::Result.new(success: true, bytes_downloaded: 1)
context '#download_from_primary' do
it 'with LFS object' do
allow_any_instance_of(Gitlab::Geo::LfsTransfer)
.to receive(:download_from_primary).and_return(100)
.to receive(:download_from_primary).and_return(result)
expect(subject.execute).to eq(100)
expect(downloader.execute).to be_a(Gitlab::Geo::FileDownloader::Result)
end
end
it 'with unknown LFS object' do
expect(described_class.new(:lfs, 10000)).not_to receive(:download_from_primary)
context 'with unknown job artifact' do
let(:downloader) { described_class.new(:lfs, 10000) }
expect(subject.execute).to be_nil
it 'returns a FileDownloader::Result object' do
expect(downloader.execute).to be_a(Gitlab::Geo::FileDownloader::Result)
end
it 'returns a result indicating a failure before a transfer was attempted' do
expect(downloader.execute.failed_before_transfer).to be_truthy
end
end
end
end
......@@ -12,204 +12,260 @@ describe Geo::FileDownloadService do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain).and_return(true)
end
describe '#execute' do
context 'user avatar' do
let(:user) { create(:user, avatar: fixture_file_upload(Rails.root + 'spec/fixtures/dk.png', 'image/png')) }
let(:upload) { Upload.find_by(model: user, uploader: 'AvatarUploader') }
subject(:execute!) { described_class.new(:avatar, upload.id).execute }
shared_examples_for 'a service that downloads the file and registers the sync result' do |file_type|
let(:download_service) { described_class.new(file_type, file.id) }
let(:registry) { file_type == 'job_artifact' ? Geo::JobArtifactRegistry : Geo::FileRegistry }
subject(:execute!) { download_service.execute }
it 'downloads a user avatar' do
stub_transfer(Gitlab::Geo::FileTransfer, 100)
context 'for a new file' do
context 'when the downloader fails before attempting a transfer' do
it 'logs that the download failed before attempting a transfer' do
result = double(:result, success: false, bytes_downloaded: 0, primary_missing_file: false, failed_before_transfer: true)
downloader = double(:downloader, execute: result)
expect(download_service).to receive(:downloader).and_return(downloader)
expect(Gitlab::Geo::Logger).to receive(:info).with(hash_including(:message, :download_time_s, download_success: false, bytes_downloaded: 0, failed_before_transfer: true)).and_call_original
expect { execute! }.to change { Geo::FileRegistry.synced.count }.by(1)
execute!
end
it 'registers when the download fails' do
stub_transfer(Gitlab::Geo::FileTransfer, -1)
expect { execute! }.to change { Geo::FileRegistry.failed.count }.by(1)
expect(Geo::FileRegistry.last.retry_count).to eq(1)
expect(Geo::FileRegistry.last.retry_at).to be_present
end
it 'registers when the download fails with some other error' do
stub_transfer(Gitlab::Geo::FileTransfer, nil)
expect { execute! }.to change { Geo::FileRegistry.failed.count }.by(1)
end
context 'when the downloader attempts a transfer' do
context 'when the file is successfully downloaded' do
before do
stub_transfer_result(bytes_downloaded: 100, success: true)
end
context 'group avatar' do
let(:group) { create(:group, avatar: fixture_file_upload(Rails.root + 'spec/fixtures/dk.png', 'image/png')) }
let(:upload) { Upload.find_by(model: group, uploader: 'AvatarUploader') }
it 'registers the file' do
expect { execute! }.to change { registry.count }.by(1)
end
subject(:execute!) { described_class.new(:avatar, upload.id).execute }
it 'marks the file as synced' do
expect { execute! }.to change { registry.synced.count }.by(1)
end
it 'downloads a group avatar' do
stub_transfer(Gitlab::Geo::FileTransfer, 100)
it 'does not mark the file as missing on the primary' do
execute!
expect { execute! }.to change { Geo::FileRegistry.synced.count }.by(1)
expect(registry.last.missing_on_primary).to be_falsey
end
it 'registers when the download fails' do
stub_transfer(Gitlab::Geo::FileTransfer, -1)
it 'logs the result' do
expect(Gitlab::Geo::Logger).to receive(:info).with(hash_including(:message, :download_time_s, download_success: true, bytes_downloaded: 100)).and_call_original
expect { execute! }.to change { Geo::FileRegistry.failed.count }.by(1)
execute!
end
end
context 'project avatar' do
let(:project) { create(:project, avatar: fixture_file_upload(Rails.root + 'spec/fixtures/dk.png', 'image/png')) }
let(:upload) { Upload.find_by(model: project, uploader: 'AvatarUploader') }
subject(:execute!) { described_class.new(:avatar, upload.id).execute }
it 'downloads a project avatar' do
stub_transfer(Gitlab::Geo::FileTransfer, 100)
expect { execute! }.to change { Geo::FileRegistry.synced.count }.by(1)
context 'when the file fails to download' do
context 'when the file is missing on the primary' do
before do
stub_transfer_result(bytes_downloaded: 100, success: true, primary_missing_file: true)
end
it 'registers when the download fails' do
stub_transfer(Gitlab::Geo::FileTransfer, -1)
expect { execute! }.to change { Geo::FileRegistry.failed.count }.by(1)
it 'registers the file' do
expect { execute! }.to change { registry.count }.by(1)
end
it 'marks the file as synced' do
expect { execute! }.to change { registry.synced.count }.by(1)
end
context 'with an attachment' do
let(:note) { create(:note, :with_attachment) }
let(:upload) { Upload.find_by(model: note, uploader: 'AttachmentUploader') }
it 'marks the file as missing on the primary' do
execute!
subject(:execute!) { described_class.new(:attachment, upload.id).execute }
expect(registry.last.missing_on_primary).to be_truthy
end
it 'downloads the attachment' do
stub_transfer(Gitlab::Geo::FileTransfer, 100)
it 'logs the result' do
expect(Gitlab::Geo::Logger).to receive(:info).with(hash_including(:message, :download_time_s, download_success: true, bytes_downloaded: 100, primary_missing_file: true)).and_call_original
expect { execute! }.to change { Geo::FileRegistry.synced.count }.by(1)
execute!
end
end
it 'registers when the download fails' do
stub_transfer(Gitlab::Geo::FileTransfer, -1)
context 'when the file is not missing on the primary' do
before do
stub_transfer_result(bytes_downloaded: 0, success: false)
end
expect { execute! }.to change { Geo::FileRegistry.failed.count }.by(1)
it 'registers the file' do
expect { execute! }.to change { registry.count }.by(1)
end
it 'marks the file as failed to sync' do
expect { execute! }.to change { registry.failed.count }.by(1)
end
context 'with a snippet' do
let(:upload) { create(:upload, :personal_snippet_upload) }
it 'does not mark the file as missing on the primary' do
execute!
subject(:execute!) { described_class.new(:personal_file, upload.id).execute }
expect(registry.last.missing_on_primary).to be_falsey
end
it 'downloads the file' do
stub_transfer(Gitlab::Geo::FileTransfer, 100)
it 'sets a retry date and increments the retry count' do
execute!
expect { execute! }.to change { Geo::FileRegistry.synced.count }.by(1)
expect(registry.last.retry_count).to eq(1)
expect(registry.last.retry_at).to be_present
end
end
end
end
end
it 'registers when the download fails' do
stub_transfer(Gitlab::Geo::FileTransfer, -1)
expect { execute! }.to change { Geo::FileRegistry.failed.count }.by(1)
context 'for a registered file that failed to sync' do
let!(:registry_entry) do
if file_type == 'job_artifact'
create(:geo_job_artifact_registry, success: false, artifact_id: file.id)
else
create(:geo_file_registry, file_type.to_sym, success: false, file_id: file.id)
end
end
context 'with file upload' do
let(:project) { create(:project) }
let(:upload) { Upload.find_by(model: project, uploader: 'FileUploader') }
context 'when the file is successfully downloaded' do
before do
stub_transfer_result(bytes_downloaded: 100, success: true)
end
it 'does not register a new file' do
expect { execute! }.not_to change { registry.count }
end
subject { described_class.new(:file, upload.id) }
it 'marks the file as synced' do
expect { execute! }.to change { registry.synced.count }.by(1)
end
context 'when the file was marked as missing on the primary' do
before do
FileUploader.new(project).store!(fixture_file_upload(Rails.root + 'spec/fixtures/dk.png', 'image/png'))
registry_entry.update_column(:missing_on_primary, true)
end
it 'downloads the file' do
stub_transfer(Gitlab::Geo::FileTransfer, 100)
it 'marks the file as no longer missing on the primary' do
execute!
expect { subject.execute }.to change { Geo::FileRegistry.synced.count }.by(1)
expect(registry_entry.reload.missing_on_primary).to be_falsey
end
end
it 'registers when the download fails' do
stub_transfer(Gitlab::Geo::FileTransfer, -1)
context 'when the file was not marked as missing on the primary' do
it 'does not mark the file as missing on the primary' do
execute!
expect { subject.execute }.to change { Geo::FileRegistry.failed.count }.by(1)
expect(registry_entry.reload.missing_on_primary).to be_falsey
end
end
end
context 'with namespace file upload' do
let(:group) { create(:group) }
let(:upload) { Upload.find_by(model: group, uploader: 'NamespaceFileUploader') }
context 'when the file fails to download' do
context 'when the file is missing on the primary' do
before do
stub_transfer_result(bytes_downloaded: 100, success: true, primary_missing_file: true)
end
subject { described_class.new(:file, upload.id) }
it 'does not register a new file' do
expect { execute! }.not_to change { registry.count }
end
before do
NamespaceFileUploader.new(group).store!(fixture_file_upload(Rails.root + 'spec/fixtures/dk.png', 'image/png'))
it 'marks the file as synced' do
expect { execute! }.to change { registry.synced.count }.by(1)
end
it 'downloads the file' do
stub_transfer(Gitlab::Geo::FileTransfer, 100)
it 'marks the file as missing on the primary' do
execute!
expect { subject.execute }.to change { Geo::FileRegistry.synced.count }.by(1)
expect(registry_entry.reload.missing_on_primary).to be_truthy
end
it 'registers when the download fails' do
stub_transfer(Gitlab::Geo::FileTransfer, -1)
it 'logs the result' do
expect(Gitlab::Geo::Logger).to receive(:info).with(hash_including(:message, :download_time_s, download_success: true, bytes_downloaded: 100, primary_missing_file: true)).and_call_original
expect { subject.execute }.to change { Geo::FileRegistry.failed.count }.by(1)
execute!
end
end
context 'LFS object' do
let(:lfs_object) { create(:lfs_object) }
subject { described_class.new(:lfs, lfs_object.id) }
context 'when the file is not missing on the primary' do
before do
stub_transfer_result(bytes_downloaded: 0, success: false)
end
it 'downloads an LFS object' do
stub_transfer(Gitlab::Geo::LfsTransfer, 100)
it 'does not register a new file' do
expect { execute! }.not_to change { registry.count }
end
expect { subject.execute }.to change { Geo::FileRegistry.synced.count }.by(1)
it 'does not change the success flag' do
expect { execute! }.not_to change { registry.failed.count }
end
it 'registers when the download fails' do
stub_transfer(Gitlab::Geo::LfsTransfer, -1)
it 'does not mark the file as missing on the primary' do
execute!
expect { subject.execute }.to change { Geo::FileRegistry.failed.count }.by(1)
expect(registry_entry.reload.missing_on_primary).to be_falsey
end
it 'logs a message' do
stub_transfer(Gitlab::Geo::LfsTransfer, 100)
expect(Gitlab::Geo::Logger).to receive(:info).with(hash_including(:message, :download_time_s, success: true, bytes_downloaded: 100)).and_call_original
it 'changes the retry date and increments the retry count' do
expect { execute! }.to change { registry_entry.reload.retry_count }.from(nil).to(1)
end
subject.execute
it 'changes the retry date and increments the retry count' do
expect { execute! }.to change { registry_entry.reload.retry_at }
end
end
end
end
end
context 'job artifacts' do
let(:job_artifact) { create(:ci_job_artifact) }
describe '#execute' do
context 'user avatar' do
it_behaves_like "a service that downloads the file and registers the sync result", 'avatar' do
let(:file) { create(:upload, model: build(:user)) }
end
end
subject { described_class.new(:job_artifact, job_artifact.id) }
context 'group avatar' do
it_behaves_like "a service that downloads the file and registers the sync result", 'avatar' do
let(:file) { create(:upload, model: build(:group)) }
end
end
it 'downloads a job artifact' do
stub_transfer(Gitlab::Geo::JobArtifactTransfer, 100)
context 'project avatar' do
it_behaves_like "a service that downloads the file and registers the sync result", 'avatar' do
let(:file) { create(:upload, model: build(:project)) }
end
end
expect { subject.execute }.to change { Geo::JobArtifactRegistry.synced.count }.by(1)
context 'with an attachment' do
it_behaves_like "a service that downloads the file and registers the sync result", 'attachment' do
let(:file) { create(:upload, :attachment_upload) }
end
end
it 'registers when the download fails' do
stub_transfer(Gitlab::Geo::JobArtifactTransfer, -1)
context 'with a snippet' do
it_behaves_like "a service that downloads the file and registers the sync result", 'personal_file' do
let(:file) { create(:upload, :personal_snippet_upload) }
end
end
expect { subject.execute }.to change { Geo::JobArtifactRegistry.failed.count }.by(1)
context 'with file upload' do
it_behaves_like "a service that downloads the file and registers the sync result", 'file' do
let(:file) { create(:upload, :issuable_upload) }
end
end
it 'logs a message' do
stub_transfer(Gitlab::Geo::JobArtifactTransfer, 100)
context 'with namespace file upload' do
it_behaves_like "a service that downloads the file and registers the sync result", 'namespace_file' do
let(:file) { create(:upload, :namespace_upload) }
end
end
expect(Gitlab::Geo::Logger).to receive(:info).with(hash_including(:message, :download_time_s, success: true, bytes_downloaded: 100)).and_call_original
context 'LFS object' do
it_behaves_like "a service that downloads the file and registers the sync result", 'lfs' do
let(:file) { create(:lfs_object) }
end
end
subject.execute
context 'job artifacts' do
it_behaves_like "a service that downloads the file and registers the sync result", 'job_artifact' do
let(:file) { create(:ci_job_artifact) }
end
end
......@@ -219,9 +275,13 @@ describe Geo::FileDownloadService do
end
end
def stub_transfer(kls, result)
instance = double("(instance of #{kls})", download_from_primary: result)
allow(kls).to receive(:new).and_return(instance)
def stub_transfer_result(bytes_downloaded:, success: false, primary_missing_file: false)
result = double(:transfer_result,
bytes_downloaded: bytes_downloaded,
success: success,
primary_missing_file: primary_missing_file)
instance = double("(instance of Gitlab::Geo::Transfer)", download_from_primary: result)
allow(Gitlab::Geo::Transfer).to receive(:new).and_return(instance)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment