Commit 71ed7987 authored by Yorick Peterse's avatar Yorick Peterse

Perform pull request IO work outside a transaction

When importing a GitHub pull request we would perform all work in a
single database transaction. This is less than ideal, because we perform
various slow Git operations when creating a merge request. This in turn
can lead to many DB connections being used, while just waiting for an IO
operation to complete.

To work around this, we now move most of the heavy lifting out of the
database transaction. Some extra error handling is added to ensure we
can resume importing a partially imported pull request, instead of just
throwing an error.

This commit also changes the specs for IssueImporter so they don't rely
on deprecated RSpec methods.
parent 9c296194
---
title: Move PR IO operations out of a transaction
merge_request:
author:
type: performance
...@@ -22,15 +22,22 @@ module Gitlab ...@@ -22,15 +22,22 @@ module Gitlab
end end
def execute def execute
if (mr_id = create_merge_request) mr, already_exists = create_merge_request
issuable_finder.cache_database_id(mr_id)
if mr
insert_git_data(mr, already_exists)
issuable_finder.cache_database_id(mr.id)
end end
end end
# Creates the merge request and returns its ID. # Creates the merge request and returns its ID.
# #
# This method will return `nil` if the merge request could not be # This method will return `nil` if the merge request could not be
# created. # created, otherwise it will return an Array containing the following
# values:
#
# 1. A MergeRequest instance.
# 2. A boolean indicating if the MR already exists.
def create_merge_request def create_merge_request
author_id, author_found = user_finder.author_id_for(pull_request) author_id, author_found = user_finder.author_id_for(pull_request)
...@@ -69,21 +76,42 @@ module Gitlab ...@@ -69,21 +76,42 @@ module Gitlab
merge_request_id = GithubImport merge_request_id = GithubImport
.insert_and_return_id(attributes, project.merge_requests) .insert_and_return_id(attributes, project.merge_requests)
merge_request = project.merge_requests.find(merge_request_id) [project.merge_requests.find(merge_request_id), false]
end
rescue ActiveRecord::InvalidForeignKey
# It's possible the project has been deleted since scheduling this
# job. In this case we'll just skip creating the merge request.
[]
rescue ActiveRecord::RecordNotUnique
# It's possible we previously created the MR, but failed when updating
# the Git data. In this case we'll just continue working on the
# existing row.
[project.merge_requests.find_by(iid: pull_request.iid), true]
end
def insert_git_data(merge_request, already_exists = false)
# These fields are set so we can create the correct merge request # These fields are set so we can create the correct merge request
# diffs. # diffs.
merge_request.source_branch_sha = pull_request.source_branch_sha merge_request.source_branch_sha = pull_request.source_branch_sha
merge_request.target_branch_sha = pull_request.target_branch_sha merge_request.target_branch_sha = pull_request.target_branch_sha
merge_request.keep_around_commit merge_request.keep_around_commit
merge_request.merge_request_diffs.create
merge_request.id # MR diffs normally use an "after_save" hook to pull data from Git.
# All of this happens in the transaction started by calling
# create/save/etc. This in turn can lead to these transactions being
# held open for much longer than necessary. To work around this we
# first save the diff, then populate it.
diff =
if already_exists
merge_request.merge_request_diffs.take
else
merge_request.merge_request_diffs.build
end end
rescue ActiveRecord::InvalidForeignKey
# It's possible the project has been deleted since scheduling this diff.importing = true
# job. In this case we'll just skip creating the merge request. diff.save
diff.save_git_content
end end
end end
end end
......
...@@ -180,12 +180,12 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach ...@@ -180,12 +180,12 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
allow(importer.user_finder) allow(importer.user_finder)
.to receive(:user_id_for) .to receive(:user_id_for)
.ordered.with(issue.assignees[0]) .with(issue.assignees[0])
.and_return(4) .and_return(4)
allow(importer.user_finder) allow(importer.user_finder)
.to receive(:user_id_for) .to receive(:user_id_for)
.ordered.with(issue.assignees[1]) .with(issue.assignees[1])
.and_return(5) .and_return(5)
expect(Gitlab::Database) expect(Gitlab::Database)
......
...@@ -40,13 +40,19 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi ...@@ -40,13 +40,19 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
describe '#execute' do describe '#execute' do
it 'imports the pull request' do it 'imports the pull request' do
mr = double(:merge_request, id: 10)
expect(importer) expect(importer)
.to receive(:create_merge_request) .to receive(:create_merge_request)
.and_return(10) .and_return([mr, false])
expect(importer)
.to receive(:insert_git_data)
.with(mr, false)
expect_any_instance_of(Gitlab::GithubImport::IssuableFinder) expect_any_instance_of(Gitlab::GithubImport::IssuableFinder)
.to receive(:cache_database_id) .to receive(:cache_database_id)
.with(10) .with(mr.id)
importer.execute importer.execute
end end
...@@ -99,18 +105,11 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi ...@@ -99,18 +105,11 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
importer.create_merge_request importer.create_merge_request
end end
it 'returns the ID of the created merge request' do it 'returns the created merge request' do
id = importer.create_merge_request mr, exists = importer.create_merge_request
expect(id).to be_a_kind_of(Numeric)
end
it 'creates the merge request diffs' do
importer.create_merge_request
mr = project.merge_requests.take
expect(mr.merge_request_diffs.exists?).to eq(true) expect(mr).to be_instance_of(MergeRequest)
expect(exists).to eq(false)
end end
end end
...@@ -217,5 +216,65 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi ...@@ -217,5 +216,65 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
expect { importer.create_merge_request }.not_to raise_error expect { importer.create_merge_request }.not_to raise_error
end end
end end
context 'when the merge request already exists' do
before do
allow(importer.user_finder)
.to receive(:author_id_for)
.with(pull_request)
.and_return([user.id, true])
allow(importer.user_finder)
.to receive(:assignee_id_for)
.with(pull_request)
.and_return(user.id)
end
it 'returns the existing merge request' do
mr1, exists1 = importer.create_merge_request
mr2, exists2 = importer.create_merge_request
expect(mr2).to eq(mr1)
expect(exists1).to eq(false)
expect(exists2).to eq(true)
end
end
end
describe '#insert_git_data' do
before do
allow(importer.milestone_finder)
.to receive(:id_for)
.with(pull_request)
.and_return(milestone.id)
allow(importer.user_finder)
.to receive(:author_id_for)
.with(pull_request)
.and_return([user.id, true])
allow(importer.user_finder)
.to receive(:assignee_id_for)
.with(pull_request)
.and_return(user.id)
end
it 'creates the merge request diffs' do
mr, exists = importer.create_merge_request
importer.insert_git_data(mr, exists)
expect(mr.merge_request_diffs.exists?).to eq(true)
end
it 'creates the merge request diff commits' do
mr, exists = importer.create_merge_request
importer.insert_git_data(mr, exists)
diff = mr.merge_request_diffs.take
expect(diff.merge_request_diff_commits.exists?).to eq(true)
end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment