Commit e7bbaa30 authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch 'sh-active-include-lfs-in-archives' into 'master'

Include LFS blobs in archives

See merge request gitlab-org/gitlab!44116
parents bf2a91f6 6df47023
...@@ -466,7 +466,7 @@ group :ed25519 do ...@@ -466,7 +466,7 @@ group :ed25519 do
end end
# Gitaly GRPC protocol definitions # Gitaly GRPC protocol definitions
gem 'gitaly', '~> 13.3.0-rc1' gem 'gitaly', '~> 13.5.0-rc1'
gem 'grpc', '~> 1.30.2' gem 'grpc', '~> 1.30.2'
......
...@@ -416,7 +416,7 @@ GEM ...@@ -416,7 +416,7 @@ GEM
rails (>= 3.2.0) rails (>= 3.2.0)
git (1.7.0) git (1.7.0)
rchardet (~> 1.8) rchardet (~> 1.8)
gitaly (13.3.0.pre.rc2) gitaly (13.5.0.pre.rc1)
grpc (~> 1.0) grpc (~> 1.0)
github-markup (1.7.0) github-markup (1.7.0)
gitlab-chronic (0.10.5) gitlab-chronic (0.10.5)
...@@ -1327,7 +1327,7 @@ DEPENDENCIES ...@@ -1327,7 +1327,7 @@ DEPENDENCIES
gettext (~> 3.3) gettext (~> 3.3)
gettext_i18n_rails (~> 1.8.0) gettext_i18n_rails (~> 1.8.0)
gettext_i18n_rails_js (~> 1.3) gettext_i18n_rails_js (~> 1.3)
gitaly (~> 13.3.0.pre.rc1) gitaly (~> 13.5.0.pre.rc1)
github-markup (~> 1.7.0) github-markup (~> 1.7.0)
gitlab-chronic (~> 0.10.5) gitlab-chronic (~> 0.10.5)
gitlab-fog-azure-rm (~> 1.0) gitlab-fog-azure-rm (~> 1.0)
......
# frozen_string_literal: true # frozen_string_literal: true
# RepositoryArchiveCleanUpService removes cached repository archives
# that are generated on-the-fly by Gitaly. These files are stored in the
# following form (as defined in lib/gitlab/git/repository.rb) and served
# by GitLab Workhorse:
#
# /path/to/repository/downloads/project-N/sha/@v2/archive.format
#
# Legacy paths omit the @v2 prefix.
#
# For example:
#
# /var/opt/gitlab/gitlab-rails/shared/cache/archive/project-1/master/@v2/archive.zip
class RepositoryArchiveCleanUpService class RepositoryArchiveCleanUpService
LAST_MODIFIED_TIME_IN_MINUTES = 120 LAST_MODIFIED_TIME_IN_MINUTES = 120
# For `/path/project-N/sha/@v2/archive.zip`, `find /path -maxdepth 4` will find this file
MAX_ARCHIVE_DEPTH = 4
attr_reader :mmin, :path attr_reader :mmin, :path
def initialize(mmin = LAST_MODIFIED_TIME_IN_MINUTES) def initialize(mmin = LAST_MODIFIED_TIME_IN_MINUTES)
...@@ -22,12 +37,15 @@ class RepositoryArchiveCleanUpService ...@@ -22,12 +37,15 @@ class RepositoryArchiveCleanUpService
private private
def clean_up_old_archives def clean_up_old_archives
run(%W(find #{path} -mindepth 1 -maxdepth 3 -type f \( -name \*.tar -o -name \*.bz2 -o -name \*.tar.gz -o -name \*.zip \) -mmin +#{mmin} -delete)) run(%W(find #{path} -mindepth 1 -maxdepth #{MAX_ARCHIVE_DEPTH} -type f \( -name \*.tar -o -name \*.bz2 -o -name \*.tar.gz -o -name \*.zip \) -mmin +#{mmin} -delete))
end end
def clean_up_empty_directories def clean_up_empty_directories
run(%W(find #{path} -mindepth 2 -maxdepth 2 -type d -empty -delete)) (1...MAX_ARCHIVE_DEPTH).reverse_each { |depth| clean_up_empty_directories_with_depth(depth) }
run(%W(find #{path} -mindepth 1 -maxdepth 1 -type d -empty -delete)) end
def clean_up_empty_directories_with_depth(depth)
run(%W(find #{path} -mindepth #{depth} -maxdepth #{depth} -type d -empty -delete))
end end
def run(cmd) def run(cmd)
......
---
title: Include LFS blobs in archives
merge_request: 44116
author:
type: added
---
name: include_lfs_blobs_in_archive
introduced_by_url: '44116'
rollout_issue_url:
type: development
group: group::source code
default_enabled: false
...@@ -297,10 +297,17 @@ module Gitlab ...@@ -297,10 +297,17 @@ module Gitlab
end end
file_name = "#{name}.#{extension}" file_name = "#{name}.#{extension}"
File.join(storage_path, self.gl_repository, sha, file_name) File.join(storage_path, self.gl_repository, sha, archive_version_path, file_name)
end end
private :archive_file_path private :archive_file_path
def archive_version_path
return '' unless Feature.enabled?(:include_lfs_blobs_in_archive)
'@v2'
end
private :archive_version_path
# Return repo size in megabytes # Return repo size in megabytes
def size def size
size = gitaly_repository_client.repository_size size = gitaly_repository_client.repository_size
......
...@@ -269,7 +269,8 @@ module Gitlab ...@@ -269,7 +269,8 @@ module Gitlab
commit_id: metadata['CommitId'], commit_id: metadata['CommitId'],
prefix: metadata['ArchivePrefix'], prefix: metadata['ArchivePrefix'],
format: format, format: format,
path: path.presence || "" path: path.presence || "",
include_lfs_blobs: Feature.enabled?(:include_lfs_blobs_in_archive)
).to_proto ).to_proto
) )
} }
......
...@@ -120,7 +120,7 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do ...@@ -120,7 +120,7 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do
let(:expected_extension) { 'tar.gz' } let(:expected_extension) { 'tar.gz' }
let(:expected_filename) { "#{expected_prefix}.#{expected_extension}" } let(:expected_filename) { "#{expected_prefix}.#{expected_extension}" }
let(:expected_path) { File.join(storage_path, cache_key, expected_filename) } let(:expected_path) { File.join(storage_path, cache_key, "@v2", expected_filename) }
let(:expected_prefix) { "gitlab-git-test-#{ref}-#{SeedRepo::LastCommit::ID}" } let(:expected_prefix) { "gitlab-git-test-#{ref}-#{SeedRepo::LastCommit::ID}" }
subject(:metadata) { repository.archive_metadata(ref, storage_path, 'gitlab-git-test', format, append_sha: append_sha, path: path) } subject(:metadata) { repository.archive_metadata(ref, storage_path, 'gitlab-git-test', format, append_sha: append_sha, path: path) }
...@@ -133,12 +133,32 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do ...@@ -133,12 +133,32 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do
expect(metadata['ArchivePrefix']).to eq(expected_prefix) expect(metadata['ArchivePrefix']).to eq(expected_prefix)
end end
it 'sets ArchivePath to the expected globally-unique path' do context 'when :include_lfs_blobs_in_archive feature flag is disabled' do
# This is really important from a security perspective. Think carefully let(:expected_path) { File.join(storage_path, cache_key, expected_filename) }
# before changing it: https://gitlab.com/gitlab-org/gitlab-foss/issues/45689
expect(expected_path).to include(File.join(repository.gl_repository, SeedRepo::LastCommit::ID))
expect(metadata['ArchivePath']).to eq(expected_path) before do
stub_feature_flags(include_lfs_blobs_in_archive: false)
end
it 'sets ArchivePath to the expected globally-unique path' do
# This is really important from a security perspective. Think carefully
# before changing it: https://gitlab.com/gitlab-org/gitlab-foss/issues/45689
expect(expected_path).to include(File.join(repository.gl_repository, SeedRepo::LastCommit::ID))
expect(metadata['ArchivePath']).to eq(expected_path)
end
end
context 'when :include_lfs_blobs_in_archive feature flag is enabled' do
before do
stub_feature_flags(include_lfs_blobs_in_archive: true)
end
it 'sets ArchivePath to the expected globally-unique path' do
expect(expected_path).to include(File.join(repository.gl_repository, SeedRepo::LastCommit::ID))
expect(metadata['ArchivePath']).to eq(expected_path)
end
end end
context 'path is set' do context 'path is set' do
......
...@@ -54,12 +54,44 @@ RSpec.describe Gitlab::Workhorse do ...@@ -54,12 +54,44 @@ RSpec.describe Gitlab::Workhorse do
commit_id: metadata['CommitId'], commit_id: metadata['CommitId'],
prefix: metadata['ArchivePrefix'], prefix: metadata['ArchivePrefix'],
format: Gitaly::GetArchiveRequest::Format::ZIP, format: Gitaly::GetArchiveRequest::Format::ZIP,
path: path path: path,
include_lfs_blobs: true
).to_proto ).to_proto
) )
}.deep_stringify_keys) }.deep_stringify_keys)
end end
context 'when include_lfs_blobs_in_archive is disabled' do
before do
stub_feature_flags(include_lfs_blobs_in_archive: false)
end
it 'sets include_lfs_blobs to false' do
key, command, params = decode_workhorse_header(subject)
expect(key).to eq('Gitlab-Workhorse-Send-Data')
expect(command).to eq('git-archive')
expect(params).to eq({
'GitalyServer' => {
features: { 'gitaly-feature-foobar' => 'true' },
address: Gitlab::GitalyClient.address(project.repository_storage),
token: Gitlab::GitalyClient.token(project.repository_storage)
},
'ArchivePath' => metadata['ArchivePath'],
'GetArchiveRequest' => Base64.encode64(
Gitaly::GetArchiveRequest.new(
repository: repository.gitaly_repository,
commit_id: metadata['CommitId'],
prefix: metadata['ArchivePrefix'],
format: Gitaly::GetArchiveRequest::Format::ZIP,
path: path,
include_lfs_blobs: false
).to_proto
)
}.deep_stringify_keys)
end
end
context 'when archive caching is disabled' do context 'when archive caching is disabled' do
let(:cache_disabled) { true } let(:cache_disabled) { true }
......
...@@ -18,6 +18,16 @@ RSpec.describe RepositoryArchiveCleanUpService do ...@@ -18,6 +18,16 @@ RSpec.describe RepositoryArchiveCleanUpService do
end end
end end
it 'removes outdated archives and directories in a versioned path' do
in_directory_with_files("project-#{non_existing_record_id}/#{sha}/@v2", %w[tar tar.bz2 tar.gz zip], 3.hours) do |dirname, files|
service.execute
files.each { |filename| expect(File.exist?(filename)).to be_falsy }
expect(File.directory?(dirname)).to be_falsy
expect(File.directory?(File.dirname(dirname))).to be_falsy
end
end
it 'does not remove directories when they contain outdated non-archives' do it 'does not remove directories when they contain outdated non-archives' do
in_directory_with_files("project-#{non_existing_record_id}/#{sha}", %w[tar conf rb], 2.hours) do |dirname, files| in_directory_with_files("project-#{non_existing_record_id}/#{sha}", %w[tar conf rb], 2.hours) do |dirname, files|
service.execute service.execute
...@@ -64,7 +74,9 @@ RSpec.describe RepositoryArchiveCleanUpService do ...@@ -64,7 +74,9 @@ RSpec.describe RepositoryArchiveCleanUpService do
end end
it 'removes files older than 2 hours that matches valid archive extensions' do it 'removes files older than 2 hours that matches valid archive extensions' do
in_directory_with_files('sample.git', %w[tar tar.bz2 tar.gz zip], 2.hours) do |dir, files| # In macOS, the the `mmin` parameter for `find` rounds up, so add a full
# minute to ensure these files are deemed old.
in_directory_with_files('sample.git', %w[tar tar.bz2 tar.gz zip], 121.minutes) do |dir, files|
service.execute service.execute
files.each { |file| expect(File.exist?(file)).to eq false } files.each { |file| expect(File.exist?(file)).to eq false }
...@@ -73,11 +85,11 @@ RSpec.describe RepositoryArchiveCleanUpService do ...@@ -73,11 +85,11 @@ RSpec.describe RepositoryArchiveCleanUpService do
end end
context 'with files older than 2 hours that does not matches valid archive extensions' do context 'with files older than 2 hours that does not matches valid archive extensions' do
it_behaves_like 'invalid archive files', 'sample.git', %w[conf rb], 2.hours it_behaves_like 'invalid archive files', 'sample.git', %w[conf rb], 121.minutes
end end
context 'with files older than 2 hours inside invalid directories' do context 'with files older than 2 hours inside invalid directories' do
it_behaves_like 'invalid archive files', 'john/doe/sample.git', %w[conf rb tar tar.gz], 2.hours it_behaves_like 'invalid archive files', 'john/t/doe/sample.git', %w[conf rb tar tar.gz], 121.minutes
end end
context 'with files newer than 2 hours that matches valid archive extensions' do context 'with files newer than 2 hours that matches valid archive extensions' do
...@@ -110,8 +122,6 @@ RSpec.describe RepositoryArchiveCleanUpService do ...@@ -110,8 +122,6 @@ RSpec.describe RepositoryArchiveCleanUpService do
def create_temporary_files(dir, extensions, mtime) def create_temporary_files(dir, extensions, mtime)
FileUtils.mkdir_p(dir) FileUtils.mkdir_p(dir)
# rubocop: disable Rails/TimeZone FileUtils.touch(extensions.map { |ext| File.join(dir, "sample.#{ext}") }, mtime: Time.now.utc - mtime)
FileUtils.touch(extensions.map { |ext| File.join(dir, "sample.#{ext}") }, mtime: Time.now - mtime)
# rubocop: enable Rails/TimeZone
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment