Commit 0d5df6f0 authored by Patrick Steinhardt's avatar Patrick Steinhardt

repository: Implement `#new_blobs` via `#list_blobs`

In order to determine which blobs are new based on a given revision, we
call Gitaly's `ListNewBlobs` RPC. This RPC is rather inflexible though:
it only allows passing a single new revision. As a result, we cannot
batch computation of new blobs across a set changes in our access
checks.

As a first step towards fixing this limitation, we now migrate
`#new_blobs()` to use the `ListBlobs` RPC: this is a much more flexible
variant of `ListNewBlobs`, and most importantly it allows us to pass a
set of revisions. When the old implementation is removed, we can thus
easily allow `#new_blobs()` to receive multiple revisions.

Note that this changes the return type of `#new_blobs()`: instead of
returning a set of `Gitaly::NewBlobObject` classes, we now return a set
of `Gitlab::Git::Blobs`. While both share the same `size` and `path`
attributes, the former tracks the blob ID via an `id` attribute while
the latter uses an `oid` attribute. There is only a single callsite of
`#new_blobs()` though, which is the FileSizeCheck push rule, and this
callsite only uses the `size` and `path` attributes. So in the end, this
change in behaviour is fine.
parent 1994c891
---
name: new_blobs_via_list_blobs
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/68935
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/339637
milestone: '14.3'
type: development
group: group::gitaly
default_enabled: false
...@@ -364,8 +364,12 @@ module Gitlab ...@@ -364,8 +364,12 @@ module Gitlab
return [] if newrev.blank? || newrev == ::Gitlab::Git::BLANK_SHA return [] if newrev.blank? || newrev == ::Gitlab::Git::BLANK_SHA
strong_memoize("new_blobs_#{newrev}") do strong_memoize("new_blobs_#{newrev}") do
wrapped_gitaly_errors do if Feature.enabled?(:new_blobs_via_list_blobs)
gitaly_ref_client.list_new_blobs(newrev, REV_LIST_COMMIT_LIMIT, dynamic_timeout: dynamic_timeout) blobs(['--not', '--all', '--not', newrev], with_paths: true, dynamic_timeout: dynamic_timeout)
else
wrapped_gitaly_errors do
gitaly_ref_client.list_new_blobs(newrev, REV_LIST_COMMIT_LIMIT, dynamic_timeout: dynamic_timeout)
end
end end
end end
end end
......
...@@ -936,6 +936,75 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do ...@@ -936,6 +936,75 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do
end end
end end
describe '#new_blobs' do
let(:repository) { mutable_repository }
let(:repository_rugged) { mutable_repository_rugged }
let(:new_blob) do
repository_rugged.write('This is a new blob', :blob)
end
let(:new_commit) do
author = { name: 'Test User', email: 'mail@example.com', time: Time.now }
index = repository_rugged.index
index.add(path: 'nested/new-blob.txt', oid: new_blob, mode: 0100644)
Rugged::Commit.create(repository_rugged,
author: author,
committer: author,
message: "Message",
parents: [],
tree: index.write_tree(repository_rugged))
end
subject { repository.new_blobs(new_commit).to_a }
context 'with :new_blobs_via_list_blobs enabled' do
before do
stub_feature_flags(new_blobs_via_list_blobs: true)
expect_next_instance_of(Gitlab::GitalyClient::BlobService) do |service|
expect(service)
.to receive(:list_blobs)
.with(['--not', '--all', '--not', new_commit],
limit: Gitlab::Git::Repository::REV_LIST_COMMIT_LIMIT,
with_paths: true,
dynamic_timeout: nil)
.and_call_original
end
end
it 'enumerates new blobs' do
expect(subject).to match_array(
[have_attributes(class: Gitlab::Git::Blob, id: new_blob, path: 'nested/new-blob.txt', size: 18)]
)
end
end
context 'with :new_blobs_via_list_blobs disabled' do
before do
stub_feature_flags(new_blobs_via_list_blobs: false)
expect_next_instance_of(Gitlab::GitalyClient::RefService) do |service|
expect(service)
.to receive(:list_new_blobs)
.with(new_commit,
Gitlab::Git::Repository::REV_LIST_COMMIT_LIMIT,
dynamic_timeout: nil)
.and_call_original
end
end
it 'enumerates new blobs' do
expect(subject).to match_array([Gitaly::NewBlobObject.new(
size: 18,
oid: new_blob,
path: "nested/new-blob.txt"
)])
end
end
end
describe '#new_commits' do describe '#new_commits' do
let(:repository) { mutable_repository } let(:repository) { mutable_repository }
let(:new_commit) do let(:new_commit) do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment