Commit 3511b2bf authored by Rubén Dávila's avatar Rubén Dávila

Improve performance of repository size check

Instead of iterating over each delta we use the same mechanism from the
Max File Size push rule.
parent 12d57caf
---
title: Improve performance of repository size limit check
merge_request: 5476
author:
type: performance
......@@ -34,8 +34,9 @@ module EE
return if push_rule.nil? || push_rule.max_file_size.zero?
max_file_size = push_rule.max_file_size
raw_changes = project.repository.raw_changes_between(oldrev, newrev)
large_file = changes.find do |c|
large_file = raw_changes.find do |c|
size_in_mb = ::Gitlab::Utils.bytes_to_megabytes(c.blob_size)
c.operation != :deleted && size_in_mb > max_file_size
......@@ -46,14 +47,6 @@ module EE
end
end
def changes
strong_memoize(:changes) do
next [] unless newrev
project.repository.raw_changes_between(oldrev, newrev)
end
end
def push_rule
project.push_rule
end
......
module EE
module Gitlab
module Deltas
def self.delta_size_check(change, repo)
size_of_deltas = 0
begin
tree_a = repo.lookup(change[:oldrev])
tree_b = repo.lookup(change[:newrev])
return size_of_deltas unless diffable?(tree_a) && diffable?(tree_b)
diff = tree_a.diff(tree_b)
diff.each_delta do |d|
next if d.deleted?
blob = ::Gitlab::Git::Blob.raw(repo, d.new_file[:oid])
# It's possible the OID points to a commit or empty object
next unless blob
size_of_deltas += blob.size
end
size_of_deltas
rescue Rugged::OdbError, Rugged::ReferenceError, Rugged::InvalidError
size_of_deltas
end
end
def self.diffable?(object)
[Rugged::Commit, Rugged::Tree].include?(object.class)
end
end
end
end
require 'spec_helper'
describe EE::Gitlab::Deltas do
let(:project) { create(:project, :repository) }
describe '.delta_size_check' do
it 'returns a non-zero file size' do
change = {
oldrev: TestEnv::BRANCH_SHA['feature'],
newrev: TestEnv::BRANCH_SHA['master']
}
expect(described_class.delta_size_check(change, project.repository)).to be > 0
end
it 'handles annotated tags on an object' do
object_id = 'faaf198af3a36dbf41961466703cc1d47c61d051'
options = { message: 'test tag message\n',
tagger: { name: 'John Smith', email: 'john@gmail.com' } }
result = project.repository.rugged.tags.create('annotated-tag', object_id, options)
change = {
oldrev: result.annotation.oid,
newrev: TestEnv::BRANCH_SHA['master']
}
expect(described_class.delta_size_check(change, project.repository)).to eq(0)
end
end
end
......@@ -39,6 +39,240 @@ For more information: #{EE::Gitlab::GeoGitAccess::GEO_SERVER_DOCS_URL}"
end
end
describe "push_rule_check" do
let(:start_sha) { '6f6d7e7ed97bb5f0054f2b1df789b39ca89b6ff9' }
let(:end_sha) { '570e7b2abdd848b95f2f578043fc23bd6f6fd24d' }
before do
project.add_developer(user)
allow(project.repository).to receive(:new_commits)
.and_return(project.repository.commits_between(start_sha, end_sha))
end
describe "author email check" do
it 'returns true' do
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.not_to raise_error
end
it 'returns false' do
project.create_push_rule(commit_message_regex: "@only.com")
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.to raise_error(described_class::UnauthorizedError)
end
it 'returns true for tags' do
project.create_push_rule(commit_message_regex: "@only.com")
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/tags/v1") }.not_to raise_error
end
it 'allows githook for new branch with an old bad commit' do
bad_commit = double("Commit", safe_message: 'Some change').as_null_object
ref_object = double(name: 'heads/master')
allow(bad_commit).to receive(:refs).and_return([ref_object])
allow_any_instance_of(Repository).to receive(:commits_between).and_return([bad_commit])
project.create_push_rule(commit_message_regex: "Change some files")
# push to new branch, so use a blank old rev and new ref
expect { access.send(:check_push_access!, "#{Gitlab::Git::BLANK_SHA} #{end_sha} refs/heads/new-branch") }.not_to raise_error
end
it 'allows githook for any change with an old bad commit' do
bad_commit = double("Commit", safe_message: 'Some change').as_null_object
ref_object = double(name: 'heads/master')
allow(bad_commit).to receive(:refs).and_return([ref_object])
allow(project.repository).to receive(:commits_between).and_return([bad_commit])
project.create_push_rule(commit_message_regex: "Change some files")
# push to new branch, so use a blank old rev and new ref
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.not_to raise_error
end
it 'does not allow any change from Web UI with bad commit' do
bad_commit = double("Commit", safe_message: 'Some change').as_null_object
# We use tmp ref a a temporary for Web UI commiting
ref_object = double(name: 'refs/tmp')
allow(bad_commit).to receive(:refs).and_return([ref_object])
allow(project.repository).to receive(:commits_between).and_return([bad_commit])
allow(project.repository).to receive(:new_commits).and_return([bad_commit])
project.create_push_rule(commit_message_regex: "Change some files")
# push to new branch, so use a blank old rev and new ref
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.to raise_error(described_class::UnauthorizedError)
end
end
describe "member_check" do
before do
project.create_push_rule(member_check: true)
end
it 'returns false for non-member user' do
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.to raise_error(described_class::UnauthorizedError)
end
it 'returns true if committer is a gitlab member' do
create(:user, email: 'dmitriy.zaporozhets@gmail.com')
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.not_to raise_error
end
end
describe "file names check" do
let(:start_sha) { '913c66a37b4a45b9769037c55c2d238bd0942d2e' }
let(:end_sha) { '33f3729a45c02fc67d00adb1b8bca394b0e761d9' }
before do
allow(project.repository).to receive(:new_commits)
.and_return(project.repository.commits_between(start_sha, end_sha))
end
it 'returns false when filename is prohibited' do
project.create_push_rule(file_name_regex: "jpg$")
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.to raise_error(described_class::UnauthorizedError)
end
it 'returns true if file name is allowed' do
project.create_push_rule(file_name_regex: "exe$")
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.not_to raise_error
end
end
describe "max file size check" do
let(:start_sha) { 'cfe32cf61b73a0d5e9f13e774abde7ff789b1660' }
let(:end_sha) { 'c84ff944ff4529a70788a5e9003c2b7feae29047' }
before do
project.add_developer(user)
end
it "returns false when size is too large" do
project.create_push_rule(max_file_size: 1)
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.to raise_error(described_class::UnauthorizedError)
end
it "returns true when size is allowed" do
project.create_push_rule(max_file_size: 2)
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.not_to raise_error
end
it "returns true when size is nil" do
allow_any_instance_of(Gitlab::Git::Blob).to receive(:size).and_return(nil)
project.create_push_rule(max_file_size: 2)
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.not_to raise_error
end
end
end
describe 'repository size restrictions' do
let(:start_sha) { 'cfe32cf61b73a0d5e9f13e774abde7ff789b1660' }
let(:sha_with_2_mb_file) { 'c84ff944ff4529a70788a5e9003c2b7feae29047' }
let(:sha_with_smallest_changes) { '33f3729a45c02fc67d00adb1b8bca394b0e761d9' }
before do
project.add_developer(user)
end
context 'when repository size is over limit' do
before do
allow(project).to receive(:repository_and_lfs_size).and_return(2.megabytes)
project.update_attribute(:repository_size_limit, 1.megabytes)
end
it 'rejects the push' do
expect do
access.send(:check_push_access!, "#{start_sha} #{sha_with_smallest_changes} refs/heads/master")
end.to raise_error(described_class::UnauthorizedError, /Your push has been rejected/)
end
end
context 'when repository size is below the limit' do
before do
allow(project).to receive(:repository_and_lfs_size).and_return(1.megabyte)
project.update_attribute(:repository_size_limit, 2.megabytes)
end
context 'when new change exceeds the limit' do
it 'rejects the push' do
expect do
access.send(:check_push_access!, "#{start_sha} #{sha_with_2_mb_file} refs/heads/master")
end.to raise_error(described_class::UnauthorizedError, /Your push to this repository would cause it to exceed the size limit/)
end
end
context 'when new change does not exceeds the limit' do
it 'accepts the push' do
expect do
access.send(:check_push_access!, "#{start_sha} #{sha_with_smallest_changes} refs/heads/master")
end.not_to raise_error
end
end
context 'when a file is modified' do
# file created
let(:old) { 'd2d430676773caa88cdaf7c55944073b2fd5561a' }
# file modified
let(:new) { '5f923865dde3436854e9ceb9cdb7815618d4e849' }
before do
# Substract 10_000 bytes in order to demostrate that the 23 KB are not added to the total
allow(project).to receive(:repository_and_lfs_size).and_return(2.megabytes - 10000)
end
it 'just add the difference between the two versions to the total size' do
expect do
access.send(:check_push_access!, "#{old} #{new} refs/heads/master")
end.not_to raise_error
end
end
context 'when a file is renamed' do
# file deleted
let(:old) { '281d3a76f31c812dbf48abce82ccf6860adedd81' }
# file added with different name
let(:new) { 'c347ca2e140aa667b968e51ed0ffe055501fe4f4' }
before do
allow(project).to receive(:repository_and_lfs_size).and_return(2.megabytes)
end
it 'does not modify the total size given the content is the same' do
expect do
access.send(:check_push_access!, "#{old} #{new} refs/heads/master")
end.not_to raise_error
end
end
context 'when a file is deleted' do
# file deleted
let(:old) { 'c1acaa58bbcbc3eafe538cb8274ba387047b69f8' }
# New changes introduced
let(:new) { '5937ac0a7beb003549fc5fd26fc247adbce4a52e' }
before do
allow(project).to receive(:repository_and_lfs_size).and_return(2.megabytes)
end
it 'subtracts the size of the deleted file before calculate the new total' do
expect do
access.send(:check_push_access!, "#{old} #{new} refs/heads/master")
end.not_to raise_error
end
end
end
end
private
def raise_unauthorized(message)
......
......@@ -62,6 +62,12 @@ module Gitlab
end
end
# Returns an array of Blob instances just with the metadata, that means
# the data attribute has no content.
def batch_metadata(repository, blob_references)
batch(repository, blob_references, blob_size_limit: 0)
end
# Find LFS blobs given an array of sha ids
# Returns array of Gitlab::Git::Blob
# Does not guarantee blob data will be set
......
......@@ -28,13 +28,14 @@ module Gitlab
# 85bc2f9753afd5f4fc5d7c75f74f8d526f26b4f3 107 R060\tfiles/js/commit.js.coffee\tfiles/js/commit.coffee
def parse(raw_change)
@blob_id, @blob_size, @raw_operation, raw_paths = raw_change.split(' ', 4)
@blob_size = @blob_size.to_i
@operation = extract_operation
@old_path, @new_path = extract_paths(raw_paths)
end
def extract_paths(file_path)
case operation
when :renamed
when :copied, :renamed
file_path.split(/\t/)
when :deleted
[file_path, nil]
......
......@@ -582,26 +582,32 @@ module Gitlab
# old_rev and new_rev are commit ID's
# the result of this method is an array of Gitlab::Git::RawDiffChange
def raw_changes_between(old_rev, new_rev)
gitaly_migrate(:raw_changes_between) do |is_enabled|
if is_enabled
gitaly_repository_client.raw_changes_between(old_rev, new_rev)
.each_with_object([]) do |msg, arr|
msg.raw_changes.each { |change| arr << ::Gitlab::Git::RawDiffChange.new(change) }
end
else
result = []
@raw_changes_between ||= {}
circuit_breaker.perform do
Open3.pipeline_r(git_diff_cmd(old_rev, new_rev), format_git_cat_file_script, git_cat_file_cmd) do |last_stdout, wait_threads|
last_stdout.each_line { |line| result << ::Gitlab::Git::RawDiffChange.new(line.chomp!) }
@raw_changes_between[[old_rev, new_rev]] ||= begin
return [] if new_rev.blank? || new_rev == Gitlab::Git::BLANK_SHA
if wait_threads.any? { |waiter| !waiter.value&.success? }
raise ::Gitlab::Git::Repository::GitError, "Unabled to obtain changes between #{old_rev} and #{new_rev}"
gitaly_migrate(:raw_changes_between) do |is_enabled|
if is_enabled
gitaly_repository_client.raw_changes_between(old_rev, new_rev)
.each_with_object([]) do |msg, arr|
msg.raw_changes.each { |change| arr << ::Gitlab::Git::RawDiffChange.new(change) }
end
else
result = []
circuit_breaker.perform do
Open3.pipeline_r(git_diff_cmd(old_rev, new_rev), format_git_cat_file_script, git_cat_file_cmd) do |last_stdout, wait_threads|
last_stdout.each_line { |line| result << ::Gitlab::Git::RawDiffChange.new(line.chomp!) }
if wait_threads.any? { |waiter| !waiter.value&.success? }
raise ::Gitlab::Git::Repository::GitError, "Unabled to obtain changes between #{old_rev} and #{new_rev}"
end
end
end
end
result
result
end
end
end
rescue ArgumentError => e
......
......@@ -270,7 +270,7 @@ module Gitlab
check_single_change_access(change, skip_lfs_integrity_check: !first_change)
if project.size_limit_enabled?
push_size_in_bytes += EE::Gitlab::Deltas.delta_size_check(change, project.repository)
push_size_in_bytes += push_size(change)
end
end
......@@ -279,6 +279,29 @@ module Gitlab
end
end
def push_size(change)
old_rev, new_rev = change.values_at(:oldrev, :newrev)
current_changes_size = 0
old_paths = []
repository.raw_changes_between(old_rev, new_rev).each do |c|
case c.operation
when :deleted
current_changes_size -= c.blob_size
when :added
current_changes_size += c.blob_size
when :copied, :modified, :renamed, :type_changed
current_changes_size += c.blob_size
old_paths << [old_rev, c.old_path]
end
end
old_changes_size = Gitlab::Git::Blob.batch_metadata(repository, old_paths).sum(&:size)
current_changes_size - old_changes_size
end
def check_single_change_access(change, skip_lfs_integrity_check: false)
Checks::ChangeAccess.new(
change,
......
......@@ -251,6 +251,26 @@ describe Gitlab::Git::Blob, seed_helper: true do
end
end
describe '.batch_metadata' do
let(:blob_references) do
[
[SeedRepo::Commit::ID, "files/ruby/popen.rb"],
[SeedRepo::Commit::ID, 'six']
]
end
subject { described_class.batch_metadata(repository, blob_references) }
it 'returns an empty data attribute' do
first_blob, last_blob = subject
expect(first_blob.data).to be_blank
expect(first_blob.path).to eq("files/ruby/popen.rb")
expect(last_blob.data).to be_blank
expect(last_blob.path).to eq("six")
end
end
describe '.batch_lfs_pointers' do
let(:tree_object) { repository.rugged.rev_parse('master^{tree}') }
......
......@@ -12,7 +12,7 @@ describe Gitlab::Git::RawDiffChange do
expect(change.operation).to eq(:unknown)
expect(change.old_path).to be_blank
expect(change.new_path).to be_blank
expect(change.blob_size).to be_blank
expect(change.blob_size).to eq(0)
end
end
......
......@@ -1039,154 +1039,6 @@ describe Gitlab::GitAccess do
end
end
describe "push_rule_check" do
let(:start_sha) { '6f6d7e7ed97bb5f0054f2b1df789b39ca89b6ff9' }
let(:end_sha) { '570e7b2abdd848b95f2f578043fc23bd6f6fd24d' }
before do
project.add_developer(user)
allow(project.repository).to receive(:new_commits)
.and_return(project.repository.commits_between(start_sha, end_sha))
end
describe "author email check" do
it 'returns true' do
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.not_to raise_error
end
it 'returns false' do
project.create_push_rule(commit_message_regex: "@only.com")
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.to raise_error(described_class::UnauthorizedError)
end
it 'returns true for tags' do
project.create_push_rule(commit_message_regex: "@only.com")
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/tags/v1") }.not_to raise_error
end
it 'allows githook for new branch with an old bad commit' do
bad_commit = double("Commit", safe_message: 'Some change').as_null_object
ref_object = double(name: 'heads/master')
allow(bad_commit).to receive(:refs).and_return([ref_object])
allow_any_instance_of(Repository).to receive(:commits_between).and_return([bad_commit])
project.create_push_rule(commit_message_regex: "Change some files")
# push to new branch, so use a blank old rev and new ref
expect { access.send(:check_push_access!, "#{Gitlab::Git::BLANK_SHA} #{end_sha} refs/heads/new-branch") }.not_to raise_error
end
it 'allows githook for any change with an old bad commit' do
bad_commit = double("Commit", safe_message: 'Some change').as_null_object
ref_object = double(name: 'heads/master')
allow(bad_commit).to receive(:refs).and_return([ref_object])
allow(project.repository).to receive(:commits_between).and_return([bad_commit])
project.create_push_rule(commit_message_regex: "Change some files")
# push to new branch, so use a blank old rev and new ref
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.not_to raise_error
end
it 'does not allow any change from Web UI with bad commit' do
bad_commit = double("Commit", safe_message: 'Some change').as_null_object
# We use tmp ref a a temporary for Web UI commiting
ref_object = double(name: 'refs/tmp')
allow(bad_commit).to receive(:refs).and_return([ref_object])
allow(project.repository).to receive(:commits_between).and_return([bad_commit])
allow(project.repository).to receive(:new_commits).and_return([bad_commit])
project.create_push_rule(commit_message_regex: "Change some files")
# push to new branch, so use a blank old rev and new ref
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.to raise_error(described_class::UnauthorizedError)
end
end
describe "member_check" do
before do
project.create_push_rule(member_check: true)
end
it 'returns false for non-member user' do
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.to raise_error(described_class::UnauthorizedError)
end
it 'returns true if committer is a gitlab member' do
create(:user, email: 'dmitriy.zaporozhets@gmail.com')
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.not_to raise_error
end
end
describe "file names check" do
let(:start_sha) { '913c66a37b4a45b9769037c55c2d238bd0942d2e' }
let(:end_sha) { '33f3729a45c02fc67d00adb1b8bca394b0e761d9' }
before do
allow(project.repository).to receive(:new_commits)
.and_return(project.repository.commits_between(start_sha, end_sha))
end
it 'returns false when filename is prohibited' do
project.create_push_rule(file_name_regex: "jpg$")
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.to raise_error(described_class::UnauthorizedError)
end
it 'returns true if file name is allowed' do
project.create_push_rule(file_name_regex: "exe$")
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.not_to raise_error
end
end
describe "max file size check" do
let(:start_sha) { 'cfe32cf61b73a0d5e9f13e774abde7ff789b1660' }
let(:end_sha) { 'c84ff944ff4529a70788a5e9003c2b7feae29047' }
it "returns false when size is too large" do
project.create_push_rule(max_file_size: 1)
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.to raise_error(described_class::UnauthorizedError)
end
it "returns true when size is allowed" do
project.create_push_rule(max_file_size: 2)
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.not_to raise_error
end
it "returns true when size is nil" do
allow_any_instance_of(Gitlab::Git::Blob).to receive(:size).and_return(nil)
project.create_push_rule(max_file_size: 2)
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.not_to raise_error
end
end
describe 'repository size restrictions' do
before do
project.update_attribute(:repository_size_limit, 50.megabytes)
end
it 'returns false when blob is too big' do
allow_any_instance_of(Gitlab::Git::Blob).to receive(:size).and_return(100.megabytes.to_i)
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.to raise_error(described_class::UnauthorizedError)
end
it 'returns true when blob is just right' do
allow_any_instance_of(Gitlab::Git::Blob).to receive(:size).and_return(2.megabytes.to_i)
expect { access.send(:check_push_access!, "#{start_sha} #{end_sha} refs/heads/master") }.not_to raise_error
end
end
end
context 'when pushing to a project' do
let(:project) { create(:project, :public, :repository) }
let(:changes) { "#{Gitlab::Git::BLANK_SHA} 570e7b2ab refs/heads/wow" }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment