Commit d9fb0278 authored by Stan Hu's avatar Stan Hu

Merge branch 'kassio/github-importer-cache-reviews-importer-in-two-levels' into 'master'

GithubImporter: Optimize Pull Request Review Importer [RUN ALL RSPEC] [RUN AS-IF-FOSS]

See merge request gitlab-org/gitlab!62036
parents ef1ced22 f60fd4e3
---
name: github_review_importer_query_only_unimported_merge_requests
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/62036
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/332982
milestone: '14.0'
type: development
group: group::import
default_enabled: true
......@@ -113,6 +113,17 @@ module Gitlab
end
end
# Returns the values of the given set.
#
# raw_key - The key of the set to check.
def self.values_from_set(raw_key)
key = cache_key_for(raw_key)
Redis::Cache.with do |redis|
redis.smembers(key)
end
end
# Sets multiple keys to given values.
#
# mapping - A Hash mapping the cache keys to their values.
......
......@@ -6,6 +6,13 @@ module Gitlab
class PullRequestsReviewsImporter
include ParallelScheduling
def initialize(...)
super
@merge_requests_already_imported_cache_key =
"github-importer/merge_request/already-imported/#{project.id}"
end
def importer_class
PullRequestReviewImporter
end
......@@ -22,11 +29,31 @@ module Gitlab
:pull_request_reviews
end
def id_for_already_imported_cache(merge_request)
merge_request.id
def id_for_already_imported_cache(review)
review.id
end
def each_object_to_import(&block)
if use_github_review_importer_query_only_unimported_merge_requests?
each_merge_request_to_import(&block)
else
each_merge_request_skipping_imported(&block)
end
end
def each_object_to_import
private
attr_reader :merge_requests_already_imported_cache_key
# https://gitlab.com/gitlab-org/gitlab/-/merge_requests/62036#note_587181108
def use_github_review_importer_query_only_unimported_merge_requests?
Feature.enabled?(
:github_review_importer_query_only_unimported_merge_requests,
default_enabled: :yaml
)
end
def each_merge_request_skipping_imported
project.merge_requests.find_each do |merge_request|
next if already_imported?(merge_request)
......@@ -40,6 +67,67 @@ module Gitlab
mark_as_imported(merge_request)
end
end
# The worker can be interrupted, by rate limit for instance,
# in different situations. To avoid requesting already imported data,
# if the worker is interrupted:
# - before importing all reviews of a merge request
# The reviews page is cached with the `PageCounter`, by merge request.
# - before importing all merge requests reviews
# Merge requests that had all the reviews imported are cached with
# `mark_merge_request_reviews_imported`
def each_merge_request_to_import
each_review_page do |page, merge_request|
page.objects.each do |review|
next if already_imported?(review)
review.merge_request_id = merge_request.id
yield(review)
mark_as_imported(review)
end
end
end
def each_review_page
merge_requests_to_import.find_each do |merge_request|
# The page counter needs to be scoped by merge request to avoid skipping
# pages of reviews from already imported merge requests.
page_counter = PageCounter.new(project, page_counter_id(merge_request))
repo = project.import_source
options = collection_options.merge(page: page_counter.current)
client.each_page(collection_method, repo, merge_request.iid, options) do |page|
next unless page_counter.set(page.number)
yield(page, merge_request)
end
# Avoid unnecessary Redis cache keys after the work is done.
page_counter.expire!
mark_merge_request_reviews_imported(merge_request)
end
end
# Returns only the merge requests that still have reviews to be imported.
def merge_requests_to_import
project.merge_requests.where.not(id: already_imported_merge_requests) # rubocop: disable CodeReuse/ActiveRecord
end
def already_imported_merge_requests
Gitlab::Cache::Import::Caching.values_from_set(merge_requests_already_imported_cache_key)
end
def page_counter_id(merge_request)
"merge_request/#{merge_request.id}/#{collection_method}"
end
def mark_merge_request_reviews_imported(merge_request)
Gitlab::Cache::Import::Caching.set_add(
merge_requests_already_imported_cache_key,
merge_request.id
)
end
end
end
end
......
......@@ -26,6 +26,10 @@ module Gitlab
def current
Gitlab::Cache::Import::Caching.read_integer(cache_key) || 1
end
def expire!
Gitlab::Cache::Import::Caching.expire(cache_key, 0)
end
end
end
end
......@@ -88,6 +88,18 @@ RSpec.describe Gitlab::Cache::Import::Caching, :clean_gitlab_redis_cache do
end
end
describe '.values_from_set' do
it 'returns empty list when the set is empty' do
expect(described_class.values_from_set('foo')).to eq([])
end
it 'returns the set list of values' do
described_class.set_add('foo', 10)
expect(described_class.values_from_set('foo')).to eq(['10'])
end
end
describe '.write_multiple' do
it 'sets multiple keys when key_prefix not set' do
mapping = { 'foo' => 10, 'bar' => 20 }
......
......@@ -27,30 +27,100 @@ RSpec.describe Gitlab::GithubImport::Importer::PullRequestsReviewsImporter do
end
describe '#each_object_to_import', :clean_gitlab_redis_cache do
it 'fetchs the merged pull requests data' do
merge_request = create(
:merged_merge_request,
iid: 999,
source_project: project,
target_project: project
)
review = double
expect(review)
.to receive(:merge_request_id=)
.with(merge_request.id)
allow(client)
.to receive(:pull_request_reviews)
.exactly(:once) # ensure to be cached on the second call
.with('github/repo', merge_request.iid)
.and_return([review])
expect { |b| subject.each_object_to_import(&b) }
.to yield_with_args(review)
subject.each_object_to_import {}
context 'when github_review_importer_query_only_unimported_merge_requests is enabled' do
before do
stub_feature_flags(github_review_importer_query_only_unimported_merge_requests: true)
end
let(:merge_request) do
create(
:merged_merge_request,
iid: 999,
source_project: project,
target_project: project
)
end
let(:review) { double(id: 1) }
it 'fetches the pull requests reviews data' do
page = double(objects: [review], number: 1)
expect(review)
.to receive(:merge_request_id=)
.with(merge_request.id)
expect(client)
.to receive(:each_page)
.exactly(:once) # ensure to be cached on the second call
.with(:pull_request_reviews, 'github/repo', merge_request.iid, page: 1)
.and_yield(page)
expect { |b| subject.each_object_to_import(&b) }
.to yield_with_args(review)
subject.each_object_to_import {}
end
it 'skips cached pages' do
Gitlab::GithubImport::PageCounter
.new(project, "merge_request/#{merge_request.id}/pull_request_reviews")
.set(2)
expect(review).not_to receive(:merge_request_id=)
expect(client)
.to receive(:each_page)
.exactly(:once) # ensure to be cached on the second call
.with(:pull_request_reviews, 'github/repo', merge_request.iid, page: 2)
subject.each_object_to_import {}
end
it 'skips cached merge requests' do
Gitlab::Cache::Import::Caching.set_add(
"github-importer/merge_request/already-imported/#{project.id}",
merge_request.id
)
expect(review).not_to receive(:merge_request_id=)
expect(client).not_to receive(:each_page)
subject.each_object_to_import {}
end
end
context 'when github_review_importer_query_only_unimported_merge_requests is disabled' do
before do
stub_feature_flags(github_review_importer_query_only_unimported_merge_requests: false)
end
it 'fetchs the merged pull requests data' do
merge_request = create(
:merged_merge_request,
iid: 999,
source_project: project,
target_project: project
)
review = double
expect(review)
.to receive(:merge_request_id=)
.with(merge_request.id)
allow(client)
.to receive(:pull_request_reviews)
.exactly(:once) # ensure to be cached on the second call
.with('github/repo', merge_request.iid)
.and_return([review])
expect { |b| subject.each_object_to_import(&b) }
.to yield_with_args(review)
subject.each_object_to_import {}
end
end
end
end
......@@ -31,4 +31,15 @@ RSpec.describe Gitlab::GithubImport::PageCounter, :clean_gitlab_redis_cache do
expect(counter.current).to eq(2)
end
end
describe '#expire!' do
it 'expires the current page counter' do
counter.set(2)
counter.expire!
expect(Gitlab::Cache::Import::Caching.read_integer(counter.cache_key)).to be_nil
expect(counter.current).to eq(1)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment