Commit 9cf4e473 authored by Douwe Maan's avatar Douwe Maan Committed by Mayra Cabrera

Merge branch 'security-45689-fix-archive-cache-bug' into 'security-10-7'

Serve archive requests with the correct file in all cases (10.7)

See merge request gitlab/gitlabhq!2376
parent e71351d4
...@@ -20,11 +20,12 @@ class RepositoryArchiveCleanUpService ...@@ -20,11 +20,12 @@ class RepositoryArchiveCleanUpService
private private
def clean_up_old_archives def clean_up_old_archives
run(%W(find #{path} -not -path #{path} -type f \( -name \*.tar -o -name \*.bz2 -o -name \*.tar.gz -o -name \*.zip \) -maxdepth 2 -mmin +#{mmin} -delete)) run(%W(find #{path} -mindepth 1 -maxdepth 3 -type f \( -name \*.tar -o -name \*.bz2 -o -name \*.tar.gz -o -name \*.zip \) -mmin +#{mmin} -delete))
end end
def clean_up_empty_directories def clean_up_empty_directories
run(%W(find #{path} -not -path #{path} -type d -empty -name \*.git -maxdepth 1 -delete)) run(%W(find #{path} -mindepth 2 -maxdepth 2 -type d -empty -delete))
run(%W(find #{path} -mindepth 1 -maxdepth 1 -type d -empty -delete))
end end
def run(cmd) def run(cmd)
......
---
title: Serve archive requests with the correct file in all cases
merge_request:
author:
type: security
...@@ -391,18 +391,6 @@ module Gitlab ...@@ -391,18 +391,6 @@ module Gitlab
nil nil
end end
def archive_prefix(ref, sha, append_sha:)
append_sha = (ref != sha) if append_sha.nil?
project_name = self.name.chomp('.git')
formatted_ref = ref.tr('/', '-')
prefix_segments = [project_name, formatted_ref]
prefix_segments << sha if append_sha
prefix_segments.join('-')
end
def archive_metadata(ref, storage_path, format = "tar.gz", append_sha:) def archive_metadata(ref, storage_path, format = "tar.gz", append_sha:)
ref ||= root_ref ref ||= root_ref
commit = Gitlab::Git::Commit.find(self, ref) commit = Gitlab::Git::Commit.find(self, ref)
...@@ -413,12 +401,44 @@ module Gitlab ...@@ -413,12 +401,44 @@ module Gitlab
{ {
'RepoPath' => path, 'RepoPath' => path,
'ArchivePrefix' => prefix, 'ArchivePrefix' => prefix,
'ArchivePath' => archive_file_path(prefix, storage_path, format), 'ArchivePath' => archive_file_path(storage_path, commit.id, prefix, format),
'CommitId' => commit.id 'CommitId' => commit.id
} }
end end
def archive_file_path(name, storage_path, format = "tar.gz") # This is both the filename of the archive (missing the extension) and the
# name of the top-level member of the archive under which all files go
#
# FIXME: The generated prefix is incorrect for projects with hashed
# storage enabled
def archive_prefix(ref, sha, append_sha:)
append_sha = (ref != sha) if append_sha.nil?
project_name = self.name.chomp('.git')
formatted_ref = ref.tr('/', '-')
prefix_segments = [project_name, formatted_ref]
prefix_segments << sha if append_sha
prefix_segments.join('-')
end
private :archive_prefix
# The full path on disk where the archive should be stored. This is used
# to cache the archive between requests.
#
# The path is a global namespace, so needs to be globally unique. This is
# achieved by including `gl_repository` in the path.
#
# Archives relating to a particular ref when the SHA is not present in the
# filename must be invalidated when the ref is updated to point to a new
# SHA. This is achieved by including the SHA in the path.
#
# As this is a full path on disk, it is not "cloud native". This should
# be resolved by either removing the cache, or moving the implementation
# into Gitaly and removing the ArchivePath parameter from the git-archive
# senddata response.
def archive_file_path(storage_path, sha, name, format = "tar.gz")
# Build file path # Build file path
return nil unless name return nil unless name
...@@ -436,8 +456,9 @@ module Gitlab ...@@ -436,8 +456,9 @@ module Gitlab
end end
file_name = "#{name}.#{extension}" file_name = "#{name}.#{extension}"
File.join(storage_path, self.name, file_name) File.join(storage_path, self.gl_repository, sha, file_name)
end end
private :archive_file_path
# Return repo size in megabytes # Return repo size in megabytes
def size def size
......
...@@ -234,59 +234,72 @@ describe Gitlab::Git::Repository, seed_helper: true do ...@@ -234,59 +234,72 @@ describe Gitlab::Git::Repository, seed_helper: true do
it_behaves_like 'wrapping gRPC errors', Gitlab::GitalyClient::RefService, :tag_names it_behaves_like 'wrapping gRPC errors', Gitlab::GitalyClient::RefService, :tag_names
end end
shared_examples 'archive check' do |extenstion| describe '#archive_metadata' do
it { expect(metadata['ArchivePath']).to match(%r{tmp/gitlab-git-test.git/gitlab-git-test-master-#{SeedRepo::LastCommit::ID}}) } let(:storage_path) { '/tmp' }
it { expect(metadata['ArchivePath']).to end_with extenstion } let(:cache_key) { File.join(repository.gl_repository, SeedRepo::LastCommit::ID) }
end
describe '#archive_prefix' do let(:append_sha) { true }
let(:project_name) { 'project-name'} let(:ref) { 'master' }
let(:format) { nil }
before do let(:expected_extension) { 'tar.gz' }
expect(repository).to receive(:name).once.and_return(project_name) let(:expected_filename) { "#{expected_prefix}.#{expected_extension}" }
end let(:expected_path) { File.join(storage_path, cache_key, expected_filename) }
let(:expected_prefix) { "gitlab-git-test-#{ref}-#{SeedRepo::LastCommit::ID}" }
it 'returns parameterised string for a ref containing slashes' do subject(:metadata) { repository.archive_metadata(ref, storage_path, format, append_sha: append_sha) }
prefix = repository.archive_prefix('test/branch', 'SHA', append_sha: nil)
expect(prefix).to eq("#{project_name}-test-branch-SHA") it 'sets RepoPath to the repository path' do
expect(metadata['RepoPath']).to eq(repository.path)
end end
it 'returns correct string for a ref containing dots' do it 'sets CommitId to the commit SHA' do
prefix = repository.archive_prefix('test.branch', 'SHA', append_sha: nil) expect(metadata['CommitId']).to eq(SeedRepo::LastCommit::ID)
expect(prefix).to eq("#{project_name}-test.branch-SHA")
end end
it 'returns string with sha when append_sha is false' do it 'sets ArchivePrefix to the expected prefix' do
prefix = repository.archive_prefix('test.branch', 'SHA', append_sha: false) expect(metadata['ArchivePrefix']).to eq(expected_prefix)
expect(prefix).to eq("#{project_name}-test.branch")
end end
end
describe '#archive' do it 'sets ArchivePath to the expected globally-unique path' do
let(:metadata) { repository.archive_metadata('master', '/tmp', append_sha: true) } # This is really important from a security perspective. Think carefully
# before changing it: https://gitlab.com/gitlab-org/gitlab-ce/issues/45689
expect(expected_path).to include(File.join(repository.gl_repository, SeedRepo::LastCommit::ID))
it_should_behave_like 'archive check', '.tar.gz' expect(metadata['ArchivePath']).to eq(expected_path)
end end
describe '#archive_zip' do
let(:metadata) { repository.archive_metadata('master', '/tmp', 'zip', append_sha: true) }
it_should_behave_like 'archive check', '.zip' context 'append_sha varies archive path and filename' do
end where(:append_sha, :ref, :expected_prefix) do
sha = SeedRepo::LastCommit::ID
describe '#archive_bz2' do true | 'master' | "gitlab-git-test-master-#{sha}"
let(:metadata) { repository.archive_metadata('master', '/tmp', 'tbz2', append_sha: true) } true | sha | "gitlab-git-test-#{sha}-#{sha}"
false | 'master' | "gitlab-git-test-master"
false | sha | "gitlab-git-test-#{sha}"
nil | 'master' | "gitlab-git-test-master-#{sha}"
nil | sha | "gitlab-git-test-#{sha}"
end
it_should_behave_like 'archive check', '.tar.bz2' with_them do
end it { expect(metadata['ArchivePrefix']).to eq(expected_prefix) }
it { expect(metadata['ArchivePath']).to eq(expected_path) }
end
end
describe '#archive_fallback' do context 'format varies archive path and filename' do
let(:metadata) { repository.archive_metadata('master', '/tmp', 'madeup', append_sha: true) } where(:format, :expected_extension) do
nil | 'tar.gz'
'madeup' | 'tar.gz'
'tbz2' | 'tar.bz2'
'zip' | 'zip'
end
it_should_behave_like 'archive check', '.tar.gz' with_them do
it { expect(metadata['ArchivePrefix']).to eq(expected_prefix) }
it { expect(metadata['ArchivePath']).to eq(expected_path) }
end
end
end end
describe '#size' do describe '#size' do
......
require 'spec_helper' require 'spec_helper'
describe RepositoryArchiveCleanUpService do describe RepositoryArchiveCleanUpService do
describe '#execute' do subject(:service) { described_class.new }
subject(:service) { described_class.new }
describe '#execute (new archive locations)' do
let(:sha) { "0" * 40 }
it 'removes outdated archives and directories in a new-style path' do
in_directory_with_files("project-999/#{sha}", %w[tar tar.bz2 tar.gz zip], 3.hours) do |dirname, files|
service.execute
files.each { |filename| expect(File.exist?(filename)).to be_falsy }
expect(File.directory?(dirname)).to be_falsy
expect(File.directory?(File.dirname(dirname))).to be_falsy
end
end
it 'does not remove directories when they contain outdated non-archives' do
in_directory_with_files("project-999/#{sha}", %w[tar conf rb], 2.hours) do |dirname, files|
service.execute
expect(File.directory?(dirname)).to be_truthy
end
end
it 'does not remove in-date archives in a new-style path' do
in_directory_with_files("project-999/#{sha}", %w[tar tar.bz2 tar.gz zip], 1.hour) do |dirname, files|
service.execute
files.each { |filename| expect(File.exist?(filename)).to be_truthy }
end
end
end
describe '#execute (legacy archive locations)' do
context 'when the downloads directory does not exist' do context 'when the downloads directory does not exist' do
it 'does not remove any archives' do it 'does not remove any archives' do
path = '/invalid/path/' path = '/invalid/path/'
stub_repository_downloads_path(path) stub_repository_downloads_path(path)
allow(File).to receive(:directory?).and_call_original
expect(File).to receive(:directory?).with(path).and_return(false) expect(File).to receive(:directory?).with(path).and_return(false)
expect(service).not_to receive(:clean_up_old_archives) expect(service).not_to receive(:clean_up_old_archives)
expect(service).not_to receive(:clean_up_empty_directories) expect(service).not_to receive(:clean_up_empty_directories)
...@@ -19,7 +51,7 @@ describe RepositoryArchiveCleanUpService do ...@@ -19,7 +51,7 @@ describe RepositoryArchiveCleanUpService do
context 'when the downloads directory exists' do context 'when the downloads directory exists' do
shared_examples 'invalid archive files' do |dirname, extensions, mtime| shared_examples 'invalid archive files' do |dirname, extensions, mtime|
it 'does not remove files and directoy' do it 'does not remove files and directory' do
in_directory_with_files(dirname, extensions, mtime) do |dir, files| in_directory_with_files(dirname, extensions, mtime) do |dir, files|
service.execute service.execute
...@@ -43,7 +75,7 @@ describe RepositoryArchiveCleanUpService do ...@@ -43,7 +75,7 @@ describe RepositoryArchiveCleanUpService do
end end
context 'with files older than 2 hours inside invalid directories' do context 'with files older than 2 hours inside invalid directories' do
it_behaves_like 'invalid archive files', 'john_doe/sample.git', %w[conf rb tar tar.gz], 2.hours it_behaves_like 'invalid archive files', 'john/doe/sample.git', %w[conf rb tar tar.gz], 2.hours
end end
context 'with files newer than 2 hours that matches valid archive extensions' do context 'with files newer than 2 hours that matches valid archive extensions' do
...@@ -58,24 +90,24 @@ describe RepositoryArchiveCleanUpService do ...@@ -58,24 +90,24 @@ describe RepositoryArchiveCleanUpService do
it_behaves_like 'invalid archive files', 'sample.git', %w[conf rb tar tar.gz], 1.hour it_behaves_like 'invalid archive files', 'sample.git', %w[conf rb tar tar.gz], 1.hour
end end
end end
end
def in_directory_with_files(dirname, extensions, mtime) def in_directory_with_files(dirname, extensions, mtime)
Dir.mktmpdir do |tmpdir| Dir.mktmpdir do |tmpdir|
stub_repository_downloads_path(tmpdir) stub_repository_downloads_path(tmpdir)
dir = File.join(tmpdir, dirname) dir = File.join(tmpdir, dirname)
files = create_temporary_files(dir, extensions, mtime) files = create_temporary_files(dir, extensions, mtime)
yield(dir, files) yield(dir, files)
end
end end
end
def stub_repository_downloads_path(path) def stub_repository_downloads_path(path)
allow(Gitlab.config.gitlab).to receive(:repository_downloads_path).and_return(path) allow(Gitlab.config.gitlab).to receive(:repository_downloads_path).and_return(path)
end end
def create_temporary_files(dir, extensions, mtime) def create_temporary_files(dir, extensions, mtime)
FileUtils.mkdir_p(dir) FileUtils.mkdir_p(dir)
FileUtils.touch(extensions.map { |ext| File.join(dir, "sample.#{ext}") }, mtime: Time.now - mtime) FileUtils.touch(extensions.map { |ext| File.join(dir, "sample.#{ext}") }, mtime: Time.now - mtime)
end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment