Commit 98714d0e authored by Stan Hu's avatar Stan Hu

Include LFS blobs in archives

This commit adds the `include_lfs_blobs_in_archive` feature flag. When
switched on, `include_lfs_blobs_in_archive` will enable the inclusion of
LFS blobs in archive files by enabling a flag in the `GetArchiveRequest`
RPC message.

Workhorse will decode the `GetArchiveRequest` and send it to Gitaly,
which will run a custom LFS smudge filter (added in
https://gitlab.com/gitlab-org/gitaly/-/merge_requests/2577, activated
via https://gitlab.com/gitlab-org/gitaly/-/merge_requests/2621). Support
for this flag has already been merged via
https://gitlab.com/gitlab-org/gitlab/-/merge_requests/44159.

To prevent stale archive files from being served from the cache, we
introduce a new version prefix in the archive path. When the feature
flag is enabled, the archive path will be in the format that looks like:

```
/path/to/project-1/:sha/@v2/archive.zip
```

When the feature flag is disabled, the archive path will revert back to
the original form:

```
/path/to/project-1/:sha/archive.zip
```

Relates to https://gitlab.com/gitlab-org/gitlab/-/issues/15079.
parent 33670bf0
......@@ -466,7 +466,7 @@ group :ed25519 do
end
# Gitaly GRPC protocol definitions
gem 'gitaly', '~> 13.3.0-rc1'
gem 'gitaly', '~> 13.5.0-rc1'
gem 'grpc', '~> 1.30.2'
......
......@@ -416,7 +416,7 @@ GEM
rails (>= 3.2.0)
git (1.7.0)
rchardet (~> 1.8)
gitaly (13.3.0.pre.rc2)
gitaly (13.5.0.pre.rc1)
grpc (~> 1.0)
github-markup (1.7.0)
gitlab-chronic (0.10.5)
......@@ -1320,7 +1320,7 @@ DEPENDENCIES
gettext (~> 3.3)
gettext_i18n_rails (~> 1.8.0)
gettext_i18n_rails_js (~> 1.3)
gitaly (~> 13.3.0.pre.rc1)
gitaly (~> 13.5.0.pre.rc1)
github-markup (~> 1.7.0)
gitlab-chronic (~> 0.10.5)
gitlab-fog-azure-rm (~> 1.0)
......
---
title: Include LFS blobs in archives
merge_request: 44116
author:
type: added
---
name: include_lfs_blobs_in_archive
introduced_by_url: '44116'
rollout_issue_url:
type: development
group: group::source code
default_enabled: false
......@@ -297,10 +297,17 @@ module Gitlab
end
file_name = "#{name}.#{extension}"
File.join(storage_path, self.gl_repository, sha, file_name)
File.join(storage_path, self.gl_repository, sha, archive_version_path, file_name)
end
private :archive_file_path
def archive_version_path
return '' unless Feature.enabled?(:include_lfs_blobs_in_archive)
'@v2'
end
private :archive_version_path
# Return repo size in megabytes
def size
size = gitaly_repository_client.repository_size
......
......@@ -269,7 +269,8 @@ module Gitlab
commit_id: metadata['CommitId'],
prefix: metadata['ArchivePrefix'],
format: format,
path: path.presence || ""
path: path.presence || "",
include_lfs_blobs: Feature.enabled?(:include_lfs_blobs_in_archive)
).to_proto
)
}
......
......@@ -120,7 +120,7 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do
let(:expected_extension) { 'tar.gz' }
let(:expected_filename) { "#{expected_prefix}.#{expected_extension}" }
let(:expected_path) { File.join(storage_path, cache_key, expected_filename) }
let(:expected_path) { File.join(storage_path, cache_key, "@v2", expected_filename) }
let(:expected_prefix) { "gitlab-git-test-#{ref}-#{SeedRepo::LastCommit::ID}" }
subject(:metadata) { repository.archive_metadata(ref, storage_path, 'gitlab-git-test', format, append_sha: append_sha, path: path) }
......@@ -133,6 +133,13 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do
expect(metadata['ArchivePrefix']).to eq(expected_prefix)
end
context 'when :include_lfs_blobs_in_archive feature flag is disabled' do
let(:expected_path) { File.join(storage_path, cache_key, expected_filename) }
before do
stub_feature_flags(include_lfs_blobs_in_archive: false)
end
it 'sets ArchivePath to the expected globally-unique path' do
# This is really important from a security perspective. Think carefully
# before changing it: https://gitlab.com/gitlab-org/gitlab-foss/issues/45689
......@@ -140,6 +147,19 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do
expect(metadata['ArchivePath']).to eq(expected_path)
end
end
context 'when :include_lfs_blobs_in_archive feature flag is enabled' do
before do
stub_feature_flags(include_lfs_blobs_in_archive: true)
end
it 'sets ArchivePath to the expected globally-unique path' do
expect(expected_path).to include(File.join(repository.gl_repository, SeedRepo::LastCommit::ID))
expect(metadata['ArchivePath']).to eq(expected_path)
end
end
context 'path is set' do
let(:path) { 'foo/bar' }
......
......@@ -54,12 +54,44 @@ RSpec.describe Gitlab::Workhorse do
commit_id: metadata['CommitId'],
prefix: metadata['ArchivePrefix'],
format: Gitaly::GetArchiveRequest::Format::ZIP,
path: path
path: path,
include_lfs_blobs: true
).to_proto
)
}.deep_stringify_keys)
end
context 'when include_lfs_blobs_in_archive is disabled' do
before do
stub_feature_flags(include_lfs_blobs_in_archive: false)
end
it 'sets include_lfs_blobs to false' do
key, command, params = decode_workhorse_header(subject)
expect(key).to eq('Gitlab-Workhorse-Send-Data')
expect(command).to eq('git-archive')
expect(params).to eq({
'GitalyServer' => {
features: { 'gitaly-feature-foobar' => 'true' },
address: Gitlab::GitalyClient.address(project.repository_storage),
token: Gitlab::GitalyClient.token(project.repository_storage)
},
'ArchivePath' => metadata['ArchivePath'],
'GetArchiveRequest' => Base64.encode64(
Gitaly::GetArchiveRequest.new(
repository: repository.gitaly_repository,
commit_id: metadata['CommitId'],
prefix: metadata['ArchivePrefix'],
format: Gitaly::GetArchiveRequest::Format::ZIP,
path: path,
include_lfs_blobs: false
).to_proto
)
}.deep_stringify_keys)
end
end
context 'when archive caching is disabled' do
let(:cache_disabled) { true }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment