Commit d08e978b authored by Igor Drozdov's avatar Igor Drozdov

Limit number of blobs fetched in a single call

When a large number of blobs requested, we'd want
to fetch them in multiple Gitaly calls
parent ad9bec95
...@@ -13,6 +13,11 @@ module Gitlab ...@@ -13,6 +13,11 @@ module Gitlab
# use load_all_data!. # use load_all_data!.
MAX_DATA_DISPLAY_SIZE = 10.megabytes MAX_DATA_DISPLAY_SIZE = 10.megabytes
# The number of blobs loaded in a single Gitaly call
# When a large number of blobs requested, we'd want to fetch them in
# multiple Gitaly calls
BATCH_SIZE = 250
# These limits are used as a heuristic to ignore files which can't be LFS # These limits are used as a heuristic to ignore files which can't be LFS
# pointers. The format of these is described in # pointers. The format of these is described in
# https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md#the-pointer # https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md#the-pointer
...@@ -67,7 +72,13 @@ module Gitlab ...@@ -67,7 +72,13 @@ module Gitlab
# to the caller to limit the number of blobs and blob_size_limit. # to the caller to limit the number of blobs and blob_size_limit.
# #
def batch(repository, blob_references, blob_size_limit: MAX_DATA_DISPLAY_SIZE) def batch(repository, blob_references, blob_size_limit: MAX_DATA_DISPLAY_SIZE)
repository.gitaly_blob_client.get_blobs(blob_references, blob_size_limit).to_a if Feature.enabled?(:blobs_fetch_in_batches, default_enabled: true)
blob_references.each_slice(BATCH_SIZE).flat_map do |refs|
repository.gitaly_blob_client.get_blobs(refs, blob_size_limit).to_a
end
else
repository.gitaly_blob_client.get_blobs(blob_references, blob_size_limit).to_a
end
end end
# Returns an array of Blob instances just with the metadata, that means # Returns an array of Blob instances just with the metadata, that means
......
...@@ -244,6 +244,61 @@ describe Gitlab::Git::Blob, :seed_helper do ...@@ -244,6 +244,61 @@ describe Gitlab::Git::Blob, :seed_helper do
end end
end end
end end
context 'when large number of blobs requested' do
let(:first_batch) do
[
[SeedRepo::Commit::ID, 'files/ruby/popen.rb'],
[SeedRepo::Commit::ID, 'six']
]
end
let(:second_batch) do
[
[SeedRepo::Commit::ID, 'some'],
[SeedRepo::Commit::ID, 'other']
]
end
let(:third_batch) do
[
[SeedRepo::Commit::ID, 'files']
]
end
let(:blob_references) do
first_batch + second_batch + third_batch
end
let(:client) { repository.gitaly_blob_client }
let(:limit) { 10.megabytes }
before do
stub_const('Gitlab::Git::Blob::BATCH_SIZE', 2)
end
context 'blobs_fetch_in_batches is enabled' do
it 'fetches the blobs in batches' do
expect(client).to receive(:get_blobs).with(first_batch, limit).ordered
expect(client).to receive(:get_blobs).with(second_batch, limit).ordered
expect(client).to receive(:get_blobs).with(third_batch, limit).ordered
subject
end
end
context 'blobs_fetch_in_batches is disabled' do
before do
stub_feature_flags(blobs_fetch_in_batches: false)
end
it 'fetches the blobs in a single batch' do
expect(client).to receive(:get_blobs).with(blob_references, limit)
subject
end
end
end
end end
describe '.batch_metadata' do describe '.batch_metadata' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment