Commit c13d28c9 authored by Sean McGivern's avatar Sean McGivern

Use sparse checkout for squashes

For large repos, the checkout as part of creating a worktree can be very
expensive. This can be so expensive that processes get 'stuck' on
GitLab.com.

To make this cheaper, configure sparse checkout for squash worktrees so
that we only check out the files that were changed in the target
branch (new files don't need to be included).

Also add specs to cover the case where only new files are added, and the
case where the diff isn't shown in the merge request because it's too
large.
parent 4fa66ffb
...@@ -23,13 +23,15 @@ module MergeRequests ...@@ -23,13 +23,15 @@ module MergeRequests
end end
run_git_command( run_git_command(
%W(worktree add #{tree_path} #{merge_request.target_branch} --detach), %W(worktree add --no-checkout --detach #{tree_path}),
repository.path_to_repo, repository.path_to_repo,
git_env, git_env,
'add worktree for squash' 'add worktree for squash'
) )
diff = git_command(%W(diff --binary #{merge_request.diff_start_sha}...#{merge_request.diff_head_sha})) configure_sparse_checkout
diff = git_command(%W(diff --binary #{diff_range}))
apply = git_command(%w(apply --index)) apply = git_command(%w(apply --index))
run_command( run_command(
...@@ -65,10 +67,64 @@ module MergeRequests ...@@ -65,10 +67,64 @@ module MergeRequests
false false
ensure ensure
clean_dir clean_dir
clean_worktree
end end
def tree_path def tree_path
@tree_path ||= merge_request.squash_dir_path @tree_path ||= merge_request.squash_dir_path
end end
def diff_range
@diff_range ||= "#{merge_request.diff_start_sha}...#{merge_request.diff_head_sha}"
end
def worktree_path
@worktree_path ||= File.join(repository.path_to_repo, 'worktrees', merge_request.id.to_s)
end
def clean_worktree
FileUtils.rm_rf(worktree_path) if File.exist?(worktree_path)
end
# Adding a worktree means checking out the repository. For large repos, this
# can be very expensive, so set up sparse checkout for the worktree to only
# check out the files we're interested in.
#
def configure_sparse_checkout
run_git_command(
%w(config core.sparseCheckout true),
repository.path_to_repo,
git_env,
'configure sparse checkout'
)
# Get the same diff we'll apply, excluding added files. (We can't check
# out files on the target branch if they don't exist yet!)
#
diff_files = run_git_command(
%W(diff --name-only --diff-filter=a --binary #{diff_range}),
repository.path_to_repo,
git_env,
'get files in diff'
)
# If only new files are introduced by this MR, then our sparse checkout
# doesn't need to have any files at all.
#
unless diff_files.empty?
worktree_info = File.join(worktree_path, 'info')
FileUtils.mkdir_p(worktree_info) unless File.directory?(worktree_info)
File.write(File.join(worktree_info, 'sparse-checkout'), diff_files)
end
run_git_command(
%W(checkout --detach #{merge_request.target_branch}),
tree_path,
git_env,
'check out target branch'
)
end
end end
end end
...@@ -5,34 +5,25 @@ describe MergeRequests::SquashService do ...@@ -5,34 +5,25 @@ describe MergeRequests::SquashService do
let(:user) { project.owner } let(:user) { project.owner }
let(:project) { create(:project) } let(:project) { create(:project) }
let(:merge_request) do
create(:merge_request,
source_branch: 'video', source_project: project,
target_branch: 'master', target_project: project)
end
let(:merge_request_with_one_commit) do let(:merge_request_with_one_commit) do
create(:merge_request, create(:merge_request,
source_branch: 'feature.custom-highlighting', source_project: project, source_branch: 'feature.custom-highlighting', source_project: project,
target_branch: 'master', target_project: project) target_branch: 'master', target_project: project)
end end
describe '#execute' do let(:merge_request_with_only_new_files) do
context 'when there is only one commit in the merge request' do create(:merge_request,
it 'returns that commit SHA' do source_branch: 'video', source_project: project,
result = service.execute(merge_request_with_one_commit) target_branch: 'master', target_project: project)
expect(result).to match(status: :success, squash_sha: merge_request_with_one_commit.diff_head_sha)
end end
it 'does not perform any git actions' do let(:merge_request_with_large_files) do
expect(service).not_to receive(:run_git_command) create(:merge_request,
source_branch: 'squash-large-files', source_project: project,
service.execute(merge_request_with_one_commit) target_branch: 'master', target_project: project)
end
end end
context 'when the squash succeeds' do shared_examples 'the squash succeeds' do
it 'returns the squashed commit SHA' do it 'returns the squashed commit SHA' do
result = service.execute(merge_request) result = service.execute(merge_request)
...@@ -72,14 +63,44 @@ describe MergeRequests::SquashService do ...@@ -72,14 +63,44 @@ describe MergeRequests::SquashService do
mr_diff = rugged.diff(merge_request.diff_base_sha, merge_request.diff_head_sha) mr_diff = rugged.diff(merge_request.diff_base_sha, merge_request.diff_head_sha)
squash_diff = rugged.diff(merge_request.diff_start_sha, squash_sha) squash_diff = rugged.diff(merge_request.diff_start_sha, squash_sha)
expect(squash_diff.patch).to eq(mr_diff.patch) expect(squash_diff.patch.length).to eq(mr_diff.patch.length)
expect(squash_commit.sha).not_to eq(merge_request.diff_head_sha) expect(squash_commit.sha).not_to eq(merge_request.diff_head_sha)
end end
end end
end end
describe '#execute' do
context 'when there is only one commit in the merge request' do
it 'returns that commit SHA' do
result = service.execute(merge_request_with_one_commit)
expect(result).to match(status: :success, squash_sha: merge_request_with_one_commit.diff_head_sha)
end
it 'does not perform any git actions' do
expect(service).not_to receive(:run_git_command)
service.execute(merge_request_with_one_commit)
end
end
context 'when squashing only new files' do
let(:merge_request) { merge_request_with_only_new_files }
include_examples 'the squash succeeds'
end
context 'when squashing with files too large to display' do
let(:merge_request) { merge_request_with_large_files }
include_examples 'the squash succeeds'
end
stages = { stages = {
'add worktree for squash' => 'worktree', 'add worktree for squash' => 'worktree',
'configure sparse checkout' => 'config',
'get files in diff' => 'diff --name-only',
'check out target branch' => 'checkout',
'apply patch' => 'diff --binary', 'apply patch' => 'diff --binary',
'commit squashed changes' => 'commit', 'commit squashed changes' => 'commit',
'get SHA of squashed commit' => 'rev-parse' 'get SHA of squashed commit' => 'rev-parse'
...@@ -87,13 +108,14 @@ describe MergeRequests::SquashService do ...@@ -87,13 +108,14 @@ describe MergeRequests::SquashService do
stages.each do |stage, command| stages.each do |stage, command|
context "when the #{stage} stage fails" do context "when the #{stage} stage fails" do
let(:merge_request) { merge_request_with_only_new_files }
let(:error) { 'A test error' } let(:error) { 'A test error' }
before do before do
git_command = a_collection_containing_exactly( git_command = a_collection_containing_exactly(
a_string_starting_with("#{Gitlab.config.git.bin_path} #{command}") a_string_starting_with("#{Gitlab.config.git.bin_path} #{command}")
).or( ).or(
a_collection_starting_with([Gitlab.config.git.bin_path, command]) a_collection_starting_with([Gitlab.config.git.bin_path] + command.split)
) )
allow(service).to receive(:popen).and_return(['', 0]) allow(service).to receive(:popen).and_return(['', 0])
...@@ -121,6 +143,7 @@ describe MergeRequests::SquashService do ...@@ -121,6 +143,7 @@ describe MergeRequests::SquashService do
end end
context 'when any other exception is thrown' do context 'when any other exception is thrown' do
let(:merge_request) { merge_request_with_only_new_files }
let(:error) { 'A test error' } let(:error) { 'A test error' }
before do before do
......
...@@ -38,7 +38,8 @@ module TestEnv ...@@ -38,7 +38,8 @@ module TestEnv
'conflict-too-large' => '39fa04f', 'conflict-too-large' => '39fa04f',
'deleted-image-test' => '6c17798', 'deleted-image-test' => '6c17798',
'wip' => 'b9238ee', 'wip' => 'b9238ee',
'csv' => '3dd0896' 'csv' => '3dd0896',
'squash-large-files' => '54cec52'
} }
# gitlab-test-fork is a fork of gitlab-fork, but we don't necessarily # gitlab-test-fork is a fork of gitlab-fork, but we don't necessarily
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment