Commit ace29a81 authored by James Lopez's avatar James Lopez

Merge branch 'Add-Caching-To-BitBucket-Server-Importer' into 'master'

Add caching to BitBucket Server importer for pull requests

See merge request gitlab-org/gitlab!45790
parents bce90a6d f66f98f6
---
title: Add Caching to BitBucket Server Import for pull requests
merge_request: 45790
author: Simon Schrottner
type: performance
...@@ -4,11 +4,14 @@ module Gitlab ...@@ -4,11 +4,14 @@ module Gitlab
module BitbucketServerImport module BitbucketServerImport
class Importer class Importer
attr_reader :recover_missing_commits attr_reader :recover_missing_commits
attr_reader :project, :project_key, :repository_slug, :client, :errors, :users attr_reader :project, :project_key, :repository_slug, :client, :errors, :users, :already_imported_cache_key
attr_accessor :logger attr_accessor :logger
REMOTE_NAME = 'bitbucket_server' REMOTE_NAME = 'bitbucket_server'
BATCH_SIZE = 100 BATCH_SIZE = 100
# The base cache key to use for tracking already imported objects.
ALREADY_IMPORTED_CACHE_KEY =
'bitbucket_server-importer/already-imported/%{project}/%{collection}'
TempBranch = Struct.new(:name, :sha) TempBranch = Struct.new(:name, :sha)
...@@ -36,6 +39,12 @@ module Gitlab ...@@ -36,6 +39,12 @@ module Gitlab
@users = {} @users = {}
@temp_branches = [] @temp_branches = []
@logger = Gitlab::Import::Logger.build @logger = Gitlab::Import::Logger.build
@already_imported_cache_key = ALREADY_IMPORTED_CACHE_KEY %
{ project: project.id, collection: collection_method }
end
def collection_method
:pull_requests
end end
def execute def execute
...@@ -48,6 +57,7 @@ module Gitlab ...@@ -48,6 +57,7 @@ module Gitlab
log_info(stage: "complete") log_info(stage: "complete")
Gitlab::Cache::Import::Caching.expire(already_imported_cache_key, 15.minutes.to_i)
true true
end end
...@@ -167,6 +177,7 @@ module Gitlab ...@@ -167,6 +177,7 @@ module Gitlab
# on the remote server. Then we have to issue a `git fetch` to download these # on the remote server. Then we have to issue a `git fetch` to download these
# branches. # branches.
def import_pull_requests def import_pull_requests
log_info(stage: 'import_pull_requests', message: 'starting')
pull_requests = client.pull_requests(project_key, repository_slug).to_a pull_requests = client.pull_requests(project_key, repository_slug).to_a
# Creating branches on the server and fetching the newly-created branches # Creating branches on the server and fetching the newly-created branches
...@@ -176,7 +187,11 @@ module Gitlab ...@@ -176,7 +187,11 @@ module Gitlab
restore_branches(batch) if recover_missing_commits restore_branches(batch) if recover_missing_commits
batch.each do |pull_request| batch.each do |pull_request|
import_bitbucket_pull_request(pull_request) if already_imported?(pull_request)
log_info(stage: 'import_pull_requests', message: 'already imported', iid: pull_request.iid)
else
import_bitbucket_pull_request(pull_request)
end
rescue StandardError => e rescue StandardError => e
Gitlab::ErrorTracking.log_exception( Gitlab::ErrorTracking.log_exception(
e, e,
...@@ -189,6 +204,19 @@ module Gitlab ...@@ -189,6 +204,19 @@ module Gitlab
end end
end end
# Returns true if the given object has already been imported, false
# otherwise.
#
# object - The object to check.
def already_imported?(pull_request)
Gitlab::Cache::Import::Caching.set_includes?(already_imported_cache_key, pull_request.iid)
end
# Marks the given object as "already imported".
def mark_as_imported(pull_request)
Gitlab::Cache::Import::Caching.set_add(already_imported_cache_key, pull_request.iid)
end
def delete_temp_branches def delete_temp_branches
@temp_branches.each do |branch| @temp_branches.each do |branch|
client.delete_branch(project_key, repository_slug, branch.name, branch.sha) client.delete_branch(project_key, repository_slug, branch.name, branch.sha)
...@@ -236,6 +264,7 @@ module Gitlab ...@@ -236,6 +264,7 @@ module Gitlab
end end
log_info(stage: 'import_bitbucket_pull_requests', message: 'finished', iid: pull_request.iid) log_info(stage: 'import_bitbucket_pull_requests', message: 'finished', iid: pull_request.iid)
mark_as_imported(pull_request)
end end
def import_pull_request_comments(pull_request, merge_request) def import_pull_request_comments(pull_request, merge_request)
......
...@@ -115,6 +115,12 @@ RSpec.describe Gitlab::BitbucketServerImport::Importer do ...@@ -115,6 +115,12 @@ RSpec.describe Gitlab::BitbucketServerImport::Importer do
allow(subject.client).to receive(:pull_requests).and_return([pull_request]) allow(subject.client).to receive(:pull_requests).and_return([pull_request])
end end
# As we are using Caching with redis, it is best to clean the cache after each test run, else we need to wait for
# the expiration by the importer
after do
Gitlab::Cache::Import::Caching.expire(subject.already_imported_cache_key, 0)
end
it 'imports merge event' do it 'imports merge event' do
expect(subject.client).to receive(:activities).and_return([merge_event]) expect(subject.client).to receive(:activities).and_return([merge_event])
...@@ -463,6 +469,47 @@ RSpec.describe Gitlab::BitbucketServerImport::Importer do ...@@ -463,6 +469,47 @@ RSpec.describe Gitlab::BitbucketServerImport::Importer do
subject.execute subject.execute
end end
describe 'import pull requests with caching' do
let(:pull_request_already_imported) do
instance_double(
BitbucketServer::Representation::PullRequest,
iid: 11)
end
let(:pull_request_to_be_imported) do
instance_double(
BitbucketServer::Representation::PullRequest,
iid: 12,
source_branch_sha: sample.commits.last,
source_branch_name: Gitlab::Git::BRANCH_REF_PREFIX + sample.source_branch,
target_branch_sha: sample.commits.first,
target_branch_name: Gitlab::Git::BRANCH_REF_PREFIX + sample.target_branch,
title: 'This is a title',
description: 'This is a test pull request',
state: 'merged',
author: 'Test Author',
author_email: pull_request_author.email,
author_username: pull_request_author.username,
created_at: Time.now,
updated_at: Time.now,
raw: {},
merged?: true)
end
before do
Gitlab::Cache::Import::Caching.set_add(subject.already_imported_cache_key, pull_request_already_imported.iid)
allow(subject.client).to receive(:pull_requests).and_return([pull_request_to_be_imported, pull_request_already_imported])
end
it 'only imports one Merge Request, as the other on is in the cache' do
expect(subject.client).to receive(:activities).and_return([merge_event])
expect { subject.execute }.to change { MergeRequest.count }.by(1)
expect(Gitlab::Cache::Import::Caching.set_includes?(subject.already_imported_cache_key, pull_request_already_imported.iid)).to eq(true)
expect(Gitlab::Cache::Import::Caching.set_includes?(subject.already_imported_cache_key, pull_request_to_be_imported.iid)).to eq(true)
end
end
end end
describe 'inaccessible branches' do describe 'inaccessible branches' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment