Commit 5e20e448 authored by Jacob Vosmaer's avatar Jacob Vosmaer

Add Gitlab::Git::Blob.batch method

parent 0d52e59d
...@@ -20,66 +20,7 @@ module Gitlab ...@@ -20,66 +20,7 @@ module Gitlab
if is_enabled if is_enabled
find_by_gitaly(repository, sha, path) find_by_gitaly(repository, sha, path)
else else
find_by_rugged(repository, sha, path) find_by_rugged(repository, sha, path, limit: MAX_DATA_DISPLAY_SIZE)
end
end
end
def find_by_gitaly(repository, sha, path)
path = path.sub(/\A\/*/, '')
path = '/' if path.empty?
name = File.basename(path)
entry = Gitlab::GitalyClient::CommitService.new(repository).tree_entry(sha, path, MAX_DATA_DISPLAY_SIZE)
return unless entry
case entry.type
when :COMMIT
new(
id: entry.oid,
name: name,
size: 0,
data: '',
path: path,
commit_id: sha
)
when :BLOB
new(
id: entry.oid,
name: name,
size: entry.size,
data: entry.data.dup,
mode: entry.mode.to_s(8),
path: path,
commit_id: sha,
binary: binary?(entry.data)
)
end
end
def find_by_rugged(repository, sha, path)
commit = repository.lookup(sha)
root_tree = commit.tree
blob_entry = find_entry_by_path(repository, root_tree.oid, path)
return nil unless blob_entry
if blob_entry[:type] == :commit
submodule_blob(blob_entry, path, sha)
else
blob = repository.lookup(blob_entry[:oid])
if blob
new(
id: blob.oid,
name: blob_entry[:name],
size: blob.size,
data: blob.content(MAX_DATA_DISPLAY_SIZE),
mode: blob_entry[:filemode].to_s(8),
path: path,
commit_id: sha,
binary: blob.binary?
)
end end
end end
end end
...@@ -109,6 +50,22 @@ module Gitlab ...@@ -109,6 +50,22 @@ module Gitlab
detect && detect[:type] == :binary detect && detect[:type] == :binary
end end
# Returns an array of Blob instances, specified in blob_references as
# [[commit_sha, path], [commit_sha, path], ...]. If limit < 0 then the
# full blob contents are returned. If limit >= 0 then each blob will
# contain no more than limit bytes in its data attribute.
#
# Keep in mind that this method may allocate a lot of memory. It is up
# to the caller to limit the number of blobs and/or the content limit
# for the individual blobs.
#
def batch(repository, blob_references, limit: nil)
limit ||= MAX_DATA_DISPLAY_SIZE
blob_references.map do |sha, path|
find_by_rugged(repository, sha, path, limit: limit)
end
end
private private
# Recursive search of blob id by path # Recursive search of blob id by path
...@@ -153,6 +110,66 @@ module Gitlab ...@@ -153,6 +110,66 @@ module Gitlab
commit_id: sha commit_id: sha
) )
end end
def find_by_gitaly(repository, sha, path)
path = path.sub(/\A\/*/, '')
path = '/' if path.empty?
name = File.basename(path)
entry = Gitlab::GitalyClient::CommitService.new(repository).tree_entry(sha, path, MAX_DATA_DISPLAY_SIZE)
return unless entry
case entry.type
when :COMMIT
new(
id: entry.oid,
name: name,
size: 0,
data: '',
path: path,
commit_id: sha
)
when :BLOB
new(
id: entry.oid,
name: name,
size: entry.size,
data: entry.data.dup,
mode: entry.mode.to_s(8),
path: path,
commit_id: sha,
binary: binary?(entry.data)
)
end
end
def find_by_rugged(repository, sha, path, limit:)
commit = repository.lookup(sha)
root_tree = commit.tree
blob_entry = find_entry_by_path(repository, root_tree.oid, path)
return nil unless blob_entry
if blob_entry[:type] == :commit
submodule_blob(blob_entry, path, sha)
else
blob = repository.lookup(blob_entry[:oid])
if blob
new(
id: blob.oid,
name: blob_entry[:name],
size: blob.size,
# Rugged::Blob#content is expensive; don't call it if we don't have to.
data: limit.zero? ? '' : blob.content(limit),
mode: blob_entry[:filemode].to_s(8),
path: path,
commit_id: sha,
binary: blob.binary?
)
end
end
end
end end
def initialize(options) def initialize(options)
......
...@@ -146,6 +146,77 @@ describe Gitlab::Git::Blob, seed_helper: true do ...@@ -146,6 +146,77 @@ describe Gitlab::Git::Blob, seed_helper: true do
end end
end end
describe '.batch' do
let(:blob_references) do
[
[SeedRepo::Commit::ID, "files/ruby/popen.rb"],
[SeedRepo::Commit::ID, 'six']
]
end
subject { described_class.batch(repository, blob_references) }
it { expect(subject.size).to eq(blob_references.size) }
context 'first blob' do
let(:blob) { subject[0] }
it { expect(blob.id).to eq(SeedRepo::RubyBlob::ID) }
it { expect(blob.name).to eq(SeedRepo::RubyBlob::NAME) }
it { expect(blob.path).to eq("files/ruby/popen.rb") }
it { expect(blob.commit_id).to eq(SeedRepo::Commit::ID) }
it { expect(blob.data[0..10]).to eq(SeedRepo::RubyBlob::CONTENT[0..10]) }
it { expect(blob.size).to eq(669) }
it { expect(blob.mode).to eq("100644") }
end
context 'second blob' do
let(:blob) { subject[1] }
it { expect(blob.id).to eq('409f37c4f05865e4fb208c771485f211a22c4c2d') }
it { expect(blob.data).to eq('') }
it 'does not mark the blob as binary' do
expect(blob).not_to be_binary
end
end
context 'limiting' do
subject { described_class.batch(repository, blob_references, limit: limit) }
context 'default' do
let(:limit) { nil }
it 'limits to MAX_DATA_DISPLAY_SIZE' do
stub_const('Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE', 100)
expect(subject.first.data.size).to eq(100)
end
end
context 'positive' do
let(:limit) { 10 }
it { expect(subject.first.data.size).to eq(10) }
end
context 'zero' do
let(:limit) { 0 }
it { expect(subject.first.data).to eq('') }
end
context 'negative' do
let(:limit) { -1 }
it 'ignores MAX_DATA_DISPLAY_SIZE' do
stub_const('Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE', 100)
expect(subject.first.data.size).to eq(669)
end
end
end
end
describe 'encoding' do describe 'encoding' do
context 'file with russian text' do context 'file with russian text' do
let(:blob) { Gitlab::Git::Blob.find(repository, SeedRepo::Commit::ID, "encoding/russian.rb") } let(:blob) { Gitlab::Git::Blob.find(repository, SeedRepo::Commit::ID, "encoding/russian.rb") }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment