Commit 0a374212 authored by Stan Hu's avatar Stan Hu Committed by Douglas Barbosa Alexandre

Link existing LFS objects from parent fork during uploads

Previously LFS uploads would always have to be reuploaded to a fork even
if the parent already had received the LFS file, but this is
unnecessary, wasting time and bandwidth. Consider this sequence of
events:

1. Push LFS file `test.bin` to project A.
2. Fork project A to project B.
3. Push LFS file `test2.bin` to project A.
4. Push to project B.

When 4 happens, GitLab should be smart enough to realize that if the
user has access to the parent project, then we should be able to link
the LFS files in A without requesting a reupload of the file.

Relates to https://gitlab.com/gitlab-org/gitlab/-/issues/297022

Changelog: fixed
parent f5e9dc63
...@@ -76,7 +76,10 @@ module Repositories ...@@ -76,7 +76,10 @@ module Repositories
existing_oids = project.lfs_objects_oids(oids: objects_oids) existing_oids = project.lfs_objects_oids(oids: objects_oids)
objects.each do |object| objects.each do |object|
object[:actions] = upload_actions(object) unless existing_oids.include?(object[:oid]) next if existing_oids.include?(object[:oid])
next if should_auto_link? && oids_from_fork.include?(object[:oid]) && link_to_project!(object)
object[:actions] = upload_actions(object)
end end
objects objects
...@@ -150,6 +153,26 @@ module Repositories ...@@ -150,6 +153,26 @@ module Repositories
Gitlab::LfsToken.new(user).basic_encoding Gitlab::LfsToken.new(user).basic_encoding
end end
def should_auto_link?
return false unless Feature.enabled?(:lfs_auto_link_fork_source, project)
return false unless project.forked?
# Sanity check in case for some reason the user doesn't have access to the parent
can?(user, :download_code, project.fork_source)
end
def oids_from_fork
@oids_from_fork ||= project.lfs_objects_oids_from_fork_source(oids: objects_oids)
end
def link_to_project!(object)
lfs_object = LfsObject.for_oid_and_size(object[:oid], object[:size])
return unless lfs_object
LfsObjectsProject.link_to_project!(lfs_object, project)
end
end end
end end
......
...@@ -13,6 +13,7 @@ class LfsObject < ApplicationRecord ...@@ -13,6 +13,7 @@ class LfsObject < ApplicationRecord
scope :with_files_stored_locally, -> { where(file_store: LfsObjectUploader::Store::LOCAL) } scope :with_files_stored_locally, -> { where(file_store: LfsObjectUploader::Store::LOCAL) }
scope :with_files_stored_remotely, -> { where(file_store: LfsObjectUploader::Store::REMOTE) } scope :with_files_stored_remotely, -> { where(file_store: LfsObjectUploader::Store::REMOTE) }
scope :for_oids, -> (oids) { where(oid: oids) } scope :for_oids, -> (oids) { where(oid: oids) }
scope :for_oid_and_size, -> (oid, size) { find_by(oid: oid, size: size) }
validates :oid, presence: true, uniqueness: true validates :oid, presence: true, uniqueness: true
......
...@@ -21,9 +21,19 @@ class LfsObjectsProject < ApplicationRecord ...@@ -21,9 +21,19 @@ class LfsObjectsProject < ApplicationRecord
scope :project_id_in, ->(ids) { where(project_id: ids) } scope :project_id_in, ->(ids) { where(project_id: ids) }
scope :lfs_object_in, -> (lfs_objects) { where(lfs_object: lfs_objects) } scope :lfs_object_in, -> (lfs_objects) { where(lfs_object: lfs_objects) }
def self.link_to_project!(lfs_object, project)
# We can't use an upsert here because there is no uniqueness constraint:
# https://gitlab.com/gitlab-org/gitlab/-/issues/347466
self.safe_find_or_create_by!(lfs_object_id: lfs_object.id, project_id: project.id) # rubocop:disable Performance/ActiveRecordSubtransactionMethods
end
def self.update_statistics_for_project_id(project_id)
ProjectCacheWorker.perform_async(project_id, [], [:lfs_objects_size]) # rubocop:disable CodeReuse/Worker
end
private private
def update_project_statistics def update_project_statistics
ProjectCacheWorker.perform_async(project_id, [], [:lfs_objects_size]) self.class.update_statistics_for_project_id(project_id)
end end
end end
...@@ -1569,6 +1569,12 @@ class Project < ApplicationRecord ...@@ -1569,6 +1569,12 @@ class Project < ApplicationRecord
oids(lfs_objects, oids: oids) oids(lfs_objects, oids: oids)
end end
def lfs_objects_oids_from_fork_source(oids: [])
return [] unless forked?
oids(fork_source.lfs_objects, oids: oids)
end
def personal? def personal?
!group !group
end end
......
---
name: lfs_auto_link_fork_source
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/75972
rollout_issue_url:
milestone: '14.6'
type: development
group: group::source code
default_enabled: false
...@@ -25,6 +25,28 @@ RSpec.describe LfsObjectsProject do ...@@ -25,6 +25,28 @@ RSpec.describe LfsObjectsProject do
end end
end end
describe '#link_to_project!' do
it 'does not throw error when duplicate exists' do
subject
expect do
result = described_class.link_to_project!(subject.lfs_object, subject.project)
expect(result).to be_a(LfsObjectsProject)
end.not_to change { described_class.count }
end
it 'upserts a new entry and updates the project cache' do
new_project = create(:project)
allow(ProjectCacheWorker).to receive(:perform_async).and_call_original
expect(ProjectCacheWorker).to receive(:perform_async).with(new_project.id, [], [:lfs_objects_size])
expect { described_class.link_to_project!(subject.lfs_object, new_project) }
.to change { described_class.count }
expect(described_class.find_by(lfs_object_id: subject.lfs_object.id, project_id: new_project.id)).to be_present
end
end
describe '#update_project_statistics' do describe '#update_project_statistics' do
it 'updates project statistics when the object is added' do it 'updates project statistics when the object is added' do
expect(ProjectCacheWorker).to receive(:perform_async) expect(ProjectCacheWorker).to receive(:perform_async)
......
...@@ -3581,6 +3581,29 @@ RSpec.describe Project, factory_default: :keep do ...@@ -3581,6 +3581,29 @@ RSpec.describe Project, factory_default: :keep do
expect(project.forks).to contain_exactly(forked_project) expect(project.forks).to contain_exactly(forked_project)
end end
end end
describe '#lfs_object_oids_from_fork_source' do
let_it_be(:lfs_object) { create(:lfs_object) }
let_it_be(:another_lfs_object) { create(:lfs_object) }
let(:oids) { [lfs_object.oid, another_lfs_object.oid] }
context 'when fork has one of two LFS objects' do
before do
create(:lfs_objects_project, lfs_object: lfs_object, project: project)
create(:lfs_objects_project, lfs_object: another_lfs_object, project: forked_project)
end
it 'returns OIDs of owned LFS objects', :aggregate_failures do
expect(forked_project.lfs_objects_oids_from_fork_source(oids: oids)).to eq([lfs_object.oid])
expect(forked_project.lfs_objects_oids(oids: oids)).to eq([another_lfs_object.oid])
end
it 'returns empty when project is not a fork' do
expect(project.lfs_objects_oids_from_fork_source(oids: oids)).to eq([])
end
end
end
end end
it_behaves_like 'can housekeep repository' do it_behaves_like 'can housekeep repository' do
......
...@@ -518,16 +518,46 @@ RSpec.describe 'Git LFS API and storage' do ...@@ -518,16 +518,46 @@ RSpec.describe 'Git LFS API and storage' do
end end
context 'in source of fork project' do context 'in source of fork project' do
let(:other_project) { create(:project, :empty_repo) }
let(:project) { fork_project(other_project) } let(:project) { fork_project(other_project) }
before do before do
lfs_object.update!(projects: [other_project]) lfs_object.update!(projects: [other_project])
end end
context 'when user has access to both the parent and fork' do
before do
project.add_developer(user)
other_project.add_developer(user)
end
it 'links existing LFS objects to other project' do
expect(json_response['objects']).to be_kind_of(Array)
expect(json_response['objects'].first).to include(sample_object)
expect(json_response['objects'].first).not_to have_key('actions')
expect(lfs_object.reload.projects.pluck(:id)).to match_array([other_project.id, project.id])
end
context 'when feature flag is disabled' do
before do
stub_feature_flags(lfs_auto_link_fork_source: false)
end
it_behaves_like 'batch upload with existing LFS object' it_behaves_like 'batch upload with existing LFS object'
end end
end end
context 'when user does not have access to parent' do
before do
project.add_developer(user)
end
it_behaves_like 'batch upload with existing LFS object'
end
end
end
context 'when pushing a LFS object that does not exist' do context 'when pushing a LFS object that does not exist' do
it_behaves_like 'pushes new LFS objects', renew_authorization: true it_behaves_like 'pushes new LFS objects', renew_authorization: true
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment