Commit c9af24c2 authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch '28725-paginate-lfs-object-import' into 'master'

Batching LFS objects downloads

See merge request gitlab-org/gitlab!26434
parents 88d9713d 99972505
...@@ -7,8 +7,13 @@ module Projects ...@@ -7,8 +7,13 @@ module Projects
class LfsDownloadLinkListService < BaseService class LfsDownloadLinkListService < BaseService
DOWNLOAD_ACTION = 'download' DOWNLOAD_ACTION = 'download'
# This could be different per server, but it seems like a reasonable value to start with.
# https://github.com/git-lfs/git-lfs/issues/419
REQUEST_BATCH_SIZE = 100
DownloadLinksError = Class.new(StandardError) DownloadLinksError = Class.new(StandardError)
DownloadLinkNotFound = Class.new(StandardError) DownloadLinkNotFound = Class.new(StandardError)
DownloadLinksRequestEntityTooLargeError = Class.new(StandardError)
attr_reader :remote_uri attr_reader :remote_uri
...@@ -25,16 +30,39 @@ module Projects ...@@ -25,16 +30,39 @@ module Projects
def execute(oids) def execute(oids)
return [] unless project&.lfs_enabled? && remote_uri && oids.present? return [] unless project&.lfs_enabled? && remote_uri && oids.present?
get_download_links(oids) get_download_links_in_batches(oids)
end end
private private
def get_download_links_in_batches(oids, batch_size = REQUEST_BATCH_SIZE)
download_links = []
oids.each_slice(batch_size) do |batch|
download_links += get_download_links(batch)
end
download_links
rescue DownloadLinksRequestEntityTooLargeError => e
# Log this exceptions to see how open it happens
Gitlab::ErrorTracking
.track_exception(e, project_id: project&.id, batch_size: batch_size, oids_count: oids.count)
# Try again with a smaller batch
batch_size /= 2
retry if batch_size > REQUEST_BATCH_SIZE / 3
raise DownloadLinksError, 'Unable to download due to RequestEntityTooLarge errors'
end
def get_download_links(oids) def get_download_links(oids)
response = Gitlab::HTTP.post(remote_uri, response = Gitlab::HTTP.post(remote_uri,
body: request_body(oids), body: request_body(oids),
headers: headers) headers: headers)
raise DownloadLinksRequestEntityTooLargeError if response.request_entity_too_large?
raise DownloadLinksError, response.message unless response.success? raise DownloadLinksError, response.message unless response.success?
# Since the LFS Batch API may return a Content-Ttpe of # Since the LFS Batch API may return a Content-Ttpe of
......
---
title: Batch processing LFS objects downloads
merge_request: 26434
author:
type: changed
...@@ -8,18 +8,27 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do ...@@ -8,18 +8,27 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do
let(:new_oids) { { 'oid1' => 123, 'oid2' => 125 } } let(:new_oids) { { 'oid1' => 123, 'oid2' => 125 } }
let(:remote_uri) { URI.parse(lfs_endpoint) } let(:remote_uri) { URI.parse(lfs_endpoint) }
let(:objects_response) do let(:request_object) { HTTParty::Request.new(Net::HTTP::Post, '/') }
body = new_oids.map do |oid, size| let(:parsed_block) { lambda {} }
let(:success_net_response) { Net::HTTPOK.new('', '', '') }
let(:response) { Gitlab::HTTP::Response.new(request_object, net_response, parsed_block) }
def objects_response(oids)
body = oids.map do |oid, size|
{ {
'oid' => oid, 'oid' => oid, 'size' => size,
'size' => size,
'actions' => { 'actions' => {
'download' => { 'href' => "#{import_url}/gitlab-lfs/objects/#{oid}" } 'download' => { 'href' => "#{import_url}/gitlab-lfs/objects/#{oid}" }
} }
} }
end end
Struct.new(:success?, :objects).new(true, body) Struct.new(:success?, :objects).new(true, body).to_json
end
def custom_response(net_response, body = nil)
allow(net_response).to receive(:body).and_return(body)
Gitlab::HTTP::Response.new(request_object, net_response, parsed_block)
end end
let(:invalid_object_response) do let(:invalid_object_response) do
...@@ -33,9 +42,8 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do ...@@ -33,9 +42,8 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do
before do before do
allow(project).to receive(:lfs_enabled?).and_return(true) allow(project).to receive(:lfs_enabled?).and_return(true)
response = instance_double(Gitlab::HTTP::Response)
allow(response).to receive(:body).and_return(objects_response.to_json) response = custom_response(success_net_response, objects_response(new_oids))
allow(response).to receive(:success?).and_return(true)
allow(Gitlab::HTTP).to receive(:post).and_return(response) allow(Gitlab::HTTP).to receive(:post).and_return(response)
end end
...@@ -46,6 +54,102 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do ...@@ -46,6 +54,102 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do
end end
end end
context 'when lfs objects size is larger than the batch size' do
def stub_successful_request(batch)
response = custom_response(success_net_response, objects_response(batch))
stub_request(batch, response)
end
def stub_entity_too_large_error_request(batch)
entity_too_large_net_response = Net::HTTPRequestEntityTooLarge.new('', '', '')
response = custom_response(entity_too_large_net_response)
stub_request(batch, response)
end
def stub_request(batch, response)
expect(Gitlab::HTTP).to receive(:post).with(
remote_uri,
{
body: { operation: 'download', objects: batch.map { |k, v| { oid: k, size: v } } }.to_json,
headers: subject.send(:headers)
}
).and_return(response)
end
let(:new_oids) { { 'oid1' => 123, 'oid2' => 125, 'oid3' => 126, 'oid4' => 127, 'oid5' => 128 } }
context 'when batch size' do
before do
stub_const("#{described_class.name}::REQUEST_BATCH_SIZE", 2)
data = new_oids.to_a
stub_successful_request([data[0], data[1]])
stub_successful_request([data[2], data[3]])
stub_successful_request([data[4]])
end
it 'retreives them in batches' do
subject.execute(new_oids).each do |lfs_download_object|
expect(lfs_download_object.link).to eq "#{import_url}/gitlab-lfs/objects/#{lfs_download_object.oid}"
end
end
end
context 'when request fails with PayloadTooLarge error' do
let(:error_class) { described_class::DownloadLinksRequestEntityTooLargeError }
context 'when the smaller batch eventually works' do
before do
stub_const("#{described_class.name}::REQUEST_BATCH_SIZE", 5)
data = new_oids.to_a
# with the batch size of 5
stub_entity_too_large_error_request(data)
# with the batch size of 2
stub_successful_request([data[0], data[1]])
stub_successful_request([data[2], data[3]])
stub_successful_request([data[4]])
end
it 'retreives them eventually and logs exceptions' do
expect(Gitlab::ErrorTracking).to receive(:track_exception).with(
an_instance_of(error_class), project_id: project.id, batch_size: 5, oids_count: 5
)
subject.execute(new_oids).each do |lfs_download_object|
expect(lfs_download_object.link).to eq "#{import_url}/gitlab-lfs/objects/#{lfs_download_object.oid}"
end
end
end
context 'when batch size cannot be any smaller' do
before do
stub_const("#{described_class.name}::REQUEST_BATCH_SIZE", 5)
data = new_oids.to_a
# with the batch size of 5
stub_entity_too_large_error_request(data)
# with the batch size of 2
stub_entity_too_large_error_request([data[0], data[1]])
end
it 'raises an error and logs exceptions' do
expect(Gitlab::ErrorTracking).to receive(:track_exception).with(
an_instance_of(error_class), project_id: project.id, batch_size: 5, oids_count: 5
)
expect(Gitlab::ErrorTracking).to receive(:track_exception).with(
an_instance_of(error_class), project_id: project.id, batch_size: 2, oids_count: 5
)
expect { subject.execute(new_oids) }.to raise_error(described_class::DownloadLinksError)
end
end
end
end
context 'credentials' do context 'credentials' do
context 'when the download link and the lfs_endpoint have the same host' do context 'when the download link and the lfs_endpoint have the same host' do
context 'when lfs_endpoint has credentials' do context 'when lfs_endpoint has credentials' do
...@@ -87,17 +191,22 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do ...@@ -87,17 +191,22 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do
end end
describe '#get_download_links' do describe '#get_download_links' do
it 'raise error if request fails' do context 'if request fails' do
allow(Gitlab::HTTP).to receive(:post).and_return(Struct.new(:success?, :message).new(false, 'Failed request')) before do
request_timeout_net_response = Net::HTTPRequestTimeout.new('', '', '')
response = custom_response(request_timeout_net_response)
allow(Gitlab::HTTP).to receive(:post).and_return(response)
end
it 'raises an error' do
expect { subject.send(:get_download_links, new_oids) }.to raise_error(described_class::DownloadLinksError) expect { subject.send(:get_download_links, new_oids) }.to raise_error(described_class::DownloadLinksError)
end end
end
shared_examples 'JSON parse errors' do |body| shared_examples 'JSON parse errors' do |body|
it 'raises error' do it 'raises an error' do
response = instance_double(Gitlab::HTTP::Response) response = custom_response(success_net_response)
allow(response).to receive(:body).and_return(body) allow(response).to receive(:body).and_return(body)
allow(response).to receive(:success?).and_return(true)
allow(Gitlab::HTTP).to receive(:post).and_return(response) allow(Gitlab::HTTP).to receive(:post).and_return(response)
expect { subject.send(:get_download_links, new_oids) }.to raise_error(described_class::DownloadLinksError) expect { subject.send(:get_download_links, new_oids) }.to raise_error(described_class::DownloadLinksError)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment