Commit 26ae1a72 authored by Kassio Borges's avatar Kassio Borges

Github Importer: Add Cache to Pull Request Reviews importer

Github importer have a mechanism to re-schedule an Importer job if it
gets throttled. But, repositories with large number of pull requests,
were in a _throttling loop_ for the Pull Request Reviews Importer.

In other words, when re-scheduled, the job was re-starting importing
reviews to all imported Pull Requests, regardless if already imported
or not, causing another throttling.

To fix this, already imported Pull Requests ids are being cached and
skipped, this way, when the job re-start, we don't do requests related
to already imported Pull Requests.

The same is applied to the Pull Request MergedBy importer.

Related to: https://gitlab.com/gitlab-org/gitlab/-/issues/329552

Changelog: changed
MR: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/60668
parent 2c4abf28
---
title: 'Github Importer: Add Cache to Pull Request Reviews importer'
merge_request: 60668
author:
type: changed
......@@ -70,7 +70,7 @@ module Gitlab
end
def pull_request_reviews(repo_name, iid)
with_rate_limit { octokit.pull_request_reviews(repo_name, iid) }
each_object(:pull_request_reviews, repo_name, iid)
end
# Returns the details of a GitHub repository.
......
......@@ -22,14 +22,18 @@ module Gitlab
:pull_requests_merged_by
end
def id_for_already_imported_cache(pr)
pr.number
def id_for_already_imported_cache(merge_request)
merge_request.id
end
def each_object_to_import
project.merge_requests.with_state(:merged).find_each do |merge_request|
next if already_imported?(merge_request)
pull_request = client.pull_request(project.import_source, merge_request.iid)
yield(pull_request)
mark_as_imported(merge_request)
end
end
end
......
......@@ -22,17 +22,22 @@ module Gitlab
:pull_request_reviews
end
def id_for_already_imported_cache(review)
review.github_id
def id_for_already_imported_cache(merge_request)
merge_request.id
end
def each_object_to_import
project.merge_requests.find_each do |merge_request|
reviews = client.pull_request_reviews(project.import_source, merge_request.iid)
reviews.each do |review|
review.merge_request_id = merge_request.id
yield(review)
end
next if already_imported?(merge_request)
client
.pull_request_reviews(project.import_source, merge_request.iid)
.each do |review|
review.merge_request_id = merge_request.id
yield(review)
end
mark_as_imported(merge_request)
end
end
end
......
......@@ -32,8 +32,9 @@ RSpec.describe Gitlab::GithubImport::Client do
it 'returns the pull request reviews' do
client = described_class.new('foo')
expect(client.octokit).to receive(:pull_request_reviews).with('foo/bar', 999)
expect(client).to receive(:with_rate_limit).and_yield
expect(client)
.to receive(:each_object)
.with(:pull_request_reviews, 'foo/bar', 999)
client.pull_request_reviews('foo/bar', 999)
end
......
......@@ -23,12 +23,11 @@ RSpec.describe Gitlab::GithubImport::Importer::PullRequestsMergedByImporter do
end
describe '#id_for_already_imported_cache' do
it { expect(subject.id_for_already_imported_cache(double(number: 1))).to eq(1) }
it { expect(subject.id_for_already_imported_cache(double(id: 1))).to eq(1) }
end
describe '#each_object_to_import' do
describe '#each_object_to_import', :clean_gitlab_redis_cache do
it 'fetchs the merged pull requests data' do
pull_request = double
create(
:merged_merge_request,
iid: 999,
......@@ -36,12 +35,18 @@ RSpec.describe Gitlab::GithubImport::Importer::PullRequestsMergedByImporter do
target_project: project
)
pull_request = double
allow(client)
.to receive(:pull_request)
.exactly(:once) # ensure to be cached on the second call
.with('http://somegithub.com', 999)
.and_return(pull_request)
expect { |b| subject.each_object_to_import(&b) }.to yield_with_args(pull_request)
expect { |b| subject.each_object_to_import(&b) }
.to yield_with_args(pull_request)
subject.each_object_to_import {}
end
end
end
......@@ -23,12 +23,18 @@ RSpec.describe Gitlab::GithubImport::Importer::PullRequestsReviewsImporter do
end
describe '#id_for_already_imported_cache' do
it { expect(subject.id_for_already_imported_cache(double(github_id: 1))).to eq(1) }
it { expect(subject.id_for_already_imported_cache(double(id: 1))).to eq(1) }
end
describe '#each_object_to_import' do
describe '#each_object_to_import', :clean_gitlab_redis_cache do
it 'fetchs the merged pull requests data' do
merge_request = create(:merge_request, source_project: project)
merge_request = create(
:merged_merge_request,
iid: 999,
source_project: project,
target_project: project
)
review = double
expect(review)
......@@ -37,10 +43,14 @@ RSpec.describe Gitlab::GithubImport::Importer::PullRequestsReviewsImporter do
allow(client)
.to receive(:pull_request_reviews)
.exactly(:once) # ensure to be cached on the second call
.with('github/repo', merge_request.iid)
.and_return([review])
expect { |b| subject.each_object_to_import(&b) }.to yield_with_args(review)
expect { |b| subject.each_object_to_import(&b) }
.to yield_with_args(review)
subject.each_object_to_import {}
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment