Commit 460480b7 authored by George Koltsov's avatar George Koltsov Committed by Robert May

Add alternative way of importing notes to GitHub Importer

parent 0d365ec3
...@@ -12,17 +12,10 @@ module Gitlab ...@@ -12,17 +12,10 @@ module Gitlab
include GithubImport::Queue include GithubImport::Queue
include StageMethods include StageMethods
# The importers to run in this stage. Issues can't be imported earlier
# on as we also use these to enrich pull requests with assigned labels.
IMPORTERS = [
Importer::IssuesImporter,
Importer::DiffNotesImporter
].freeze
# client - An instance of Gitlab::GithubImport::Client. # client - An instance of Gitlab::GithubImport::Client.
# project - An instance of Project. # project - An instance of Project.
def import(client, project) def import(client, project)
waiters = IMPORTERS.each_with_object({}) do |klass, hash| waiters = importers(project).each_with_object({}) do |klass, hash|
info(project.id, message: "starting importer", importer: klass.name) info(project.id, message: "starting importer", importer: klass.name)
waiter = klass.new(project, client).execute waiter = klass.new(project, client).execute
hash[waiter.key] = waiter.jobs_remaining hash[waiter.key] = waiter.jobs_remaining
...@@ -30,6 +23,25 @@ module Gitlab ...@@ -30,6 +23,25 @@ module Gitlab
AdvanceStageWorker.perform_async(project.id, waiters, :notes) AdvanceStageWorker.perform_async(project.id, waiters, :notes)
end end
# The importers to run in this stage. Issues can't be imported earlier
# on as we also use these to enrich pull requests with assigned labels.
def importers(project)
[
Importer::IssuesImporter,
diff_notes_importer(project)
]
end
private
def diff_notes_importer(project)
if project.group.present? && Feature.enabled?(:github_importer_single_endpoint_notes_import, project.group, type: :ops, default_enabled: :yaml)
Importer::SingleEndpointDiffNotesImporter
else
Importer::DiffNotesImporter
end
end
end end
end end
end end
......
...@@ -15,17 +15,31 @@ module Gitlab ...@@ -15,17 +15,31 @@ module Gitlab
# client - An instance of Gitlab::GithubImport::Client. # client - An instance of Gitlab::GithubImport::Client.
# project - An instance of Project. # project - An instance of Project.
def import(client, project) def import(client, project)
info(project.id, message: "starting importer", importer: 'Importer::NotesImporter') waiters = importers(project).each_with_object({}) do |klass, hash|
waiter = Importer::NotesImporter info(project.id, message: "starting importer", importer: klass.name)
.new(project, client) waiter = klass.new(project, client).execute
.execute hash[waiter.key] = waiter.jobs_remaining
end
AdvanceStageWorker.perform_async( AdvanceStageWorker.perform_async(
project.id, project.id,
{ waiter.key => waiter.jobs_remaining }, waiters,
:lfs_objects :lfs_objects
) )
end end
def importers(project)
if project.group.present? && Feature.enabled?(:github_importer_single_endpoint_notes_import, project.group, type: :ops, default_enabled: :yaml)
[
Importer::SingleEndpointMergeRequestNotesImporter,
Importer::SingleEndpointIssueNotesImporter
]
else
[
Importer::NotesImporter
]
end
end
end end
end end
end end
......
---
name: github_importer_lower_per_page_limit
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/67150
rollout_issue_url:
milestone: '14.2'
type: ops
group: group::import
default_enabled: false
---
name: github_importer_single_endpoint_notes_import
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/67150
rollout_issue_url:
milestone: '14.2'
type: ops
group: group::import
default_enabled: false
...@@ -272,3 +272,11 @@ The last log entry reports the number of objects fetched and imported: ...@@ -272,3 +272,11 @@ The last log entry reports the number of objects fetched and imported:
"import_stage": "Gitlab::GithubImport::Stage::FinishImportWorker" "import_stage": "Gitlab::GithubImport::Stage::FinishImportWorker"
} }
``` ```
## Errors when importing large projects
The GitHub importer may encounter errors when importing large projects. For help with this, see the
documentation for the following use cases:
- [Alternative way to import notes and diff notes](../user/project/import/github.md#alternative-way-to-import-notes-and-diff-notes)
- [Reduce GitHub API request objects per page](../user/project/import/github.md#reduce-github-api-request-objects-per-page)
...@@ -184,3 +184,51 @@ servers. For 4 servers with 8 cores this means you can import up to 32 objects ( ...@@ -184,3 +184,51 @@ servers. For 4 servers with 8 cores this means you can import up to 32 objects (
Reducing the time spent in cloning a repository can be done by increasing network throughput, CPU capacity, and disk Reducing the time spent in cloning a repository can be done by increasing network throughput, CPU capacity, and disk
performance (by using high performance SSDs, for example) of the disks that store the Git repositories (for your GitLab instance). performance (by using high performance SSDs, for example) of the disks that store the Git repositories (for your GitLab instance).
Increasing the number of Sidekiq workers will *not* reduce the time spent cloning repositories. Increasing the number of Sidekiq workers will *not* reduce the time spent cloning repositories.
## Alternative way to import notes and diff notes
When GitHub Importer runs on extremely large projects not all notes & diff notes can be imported due to GitHub API `issues_comments` & `pull_requests_comments` endpoints limitation.
Not all pages can be fetched due to the following error coming from GitHub API: `In order to keep the API fast for everyone, pagination is limited for this resource. Check the rel=last link relation in the Link response header to see how far back you can traverse.`.
Because of that, alternative approach for importing notes & diff notes is available that is behind a feature flag.
Instead of using `issues_comments` & `pull_requests_comments`, use individual resources `issue_comments` & `pull_request_comments` instead in order to pull notes from 1 object at a time.
This allows us to carry over any missing comments, however it increases the number of network requests required to perform the import, which means its execution is going to be much longer.
In order to use alternative way of importing notes `github_importer_single_endpoint_notes_import` feature flag needs to be enabled on the group project is being imported into.
Start a [Rails console](../../../administration/operations/rails_console.md#starting-a-rails-console-session).
```ruby
group = Group.find_by_full_path('my/group/fullpath')
# Enable
Feature.enable(:github_importer_single_endpoint_notes_import, group)
# Disable
Feature.disable(:github_importer_single_endpoint_notes_import, group)
```
## Reduce GitHub API request objects per page
Some GitHub API endpoints may return a 500 or 502 error for project imports from large repositories.
To reduce the chance of such errors, you can enable the feature flag
`github_importer_lower_per_page_limit` in the group project importing the data. This reduces the
page size from 100 to 50.
To enable this feature flag, start a [Rails console](../../../administration/operations/rails_console.md#starting-a-rails-console-session)
and run the following `enable` command:
```ruby
group = Group.find_by_full_path('my/group/fullpath')
# Enable
Feature.enable(:github_importer_lower_per_page_limit, group)
```
To disable the feature, run this command:
```ruby
# Disable
Feature.disable(:github_importer_lower_per_page_limit, group)
```
...@@ -7,6 +7,8 @@ module Gitlab ...@@ -7,6 +7,8 @@ module Gitlab
# The default timeout of the cache keys. # The default timeout of the cache keys.
TIMEOUT = 24.hours.to_i TIMEOUT = 24.hours.to_i
LONGER_TIMEOUT = 72.hours.to_i
WRITE_IF_GREATER_SCRIPT = <<-EOF.strip_heredoc.freeze WRITE_IF_GREATER_SCRIPT = <<-EOF.strip_heredoc.freeze
local key, value, ttl = KEYS[1], tonumber(ARGV[1]), ARGV[2] local key, value, ttl = KEYS[1], tonumber(ARGV[1]), ARGV[2]
local existing = tonumber(redis.call("get", key)) local existing = tonumber(redis.call("get", key))
......
...@@ -11,6 +11,7 @@ module Gitlab ...@@ -11,6 +11,7 @@ module Gitlab
Client.new( Client.new(
token_to_use, token_to_use,
host: host.presence || self.formatted_import_url(project), host: host.presence || self.formatted_import_url(project),
per_page: self.per_page(project),
parallel: parallel parallel: parallel
) )
end end
...@@ -33,5 +34,13 @@ module Gitlab ...@@ -33,5 +34,13 @@ module Gitlab
url.to_s url.to_s
end end
end end
def self.per_page(project)
if project.group.present? && Feature.enabled?(:github_importer_lower_per_page_limit, project.group, type: :ops, default_enabled: :yaml)
Gitlab::GithubImport::Client::LOWER_PER_PAGE
else
Gitlab::GithubImport::Client::DEFAULT_PER_PAGE
end
end
end end
end end
...@@ -19,6 +19,8 @@ module Gitlab ...@@ -19,6 +19,8 @@ module Gitlab
attr_reader :octokit attr_reader :octokit
SEARCH_MAX_REQUESTS_PER_MINUTE = 30 SEARCH_MAX_REQUESTS_PER_MINUTE = 30
DEFAULT_PER_PAGE = 100
LOWER_PER_PAGE = 50
# A single page of data and the corresponding page number. # A single page of data and the corresponding page number.
Page = Struct.new(:objects, :number) Page = Struct.new(:objects, :number)
...@@ -44,7 +46,7 @@ module Gitlab ...@@ -44,7 +46,7 @@ module Gitlab
# this value to `true` for parallel importing is crucial as # this value to `true` for parallel importing is crucial as
# otherwise hitting the rate limit will result in a thread # otherwise hitting the rate limit will result in a thread
# being blocked in a `sleep()` call for up to an hour. # being blocked in a `sleep()` call for up to an hour.
def initialize(token, host: nil, per_page: 100, parallel: true) def initialize(token, host: nil, per_page: DEFAULT_PER_PAGE, parallel: true)
@host = host @host = host
@octokit = ::Octokit::Client.new( @octokit = ::Octokit::Client.new(
access_token: token, access_token: token,
......
# frozen_string_literal: true
# This importer is used when `github_importer_single_endpoint_notes_import`
# feature flag is on and replaces `DiffNotesImporter`.
#
# It fetches 1 PR's diff notes at a time using `pull_request_comments` endpoint, which is
# slower than `NotesImporter` but it makes sure all notes are imported,
# as it can sometimes not be the case for `NotesImporter`, because
# `issues_comments` endpoint it uses can be limited by GitHub API
# to not return all available pages.
module Gitlab
module GithubImport
module Importer
class SingleEndpointDiffNotesImporter
include ParallelScheduling
include SingleEndpointNotesImporting
def importer_class
DiffNoteImporter
end
def representation_class
Representation::DiffNote
end
def sidekiq_worker_class
ImportDiffNoteWorker
end
def object_type
:diff_note
end
def collection_method
:pull_request_comments
end
private
def noteables
project.merge_requests.where.not(iid: already_imported_noteables) # rubocop: disable CodeReuse/ActiveRecord
end
def page_counter_id(merge_request)
"merge_request/#{merge_request.id}/#{collection_method}"
end
def notes_imported_cache_key
"github-importer/merge_request/diff_notes/already-imported/#{project.id}"
end
end
end
end
end
# frozen_string_literal: true
# This importer is used when `github_importer_single_endpoint_notes_import`
# feature flag is on and replaces `IssuesImporter` issue notes import.
#
# It fetches 1 issue's comments at a time using `issue_comments` endpoint, which is
# slower than `NotesImporter` but it makes sure all notes are imported,
# as it can sometimes not be the case for `NotesImporter`, because
# `issues_comments` endpoint it uses can be limited by GitHub API
# to not return all available pages.
module Gitlab
module GithubImport
module Importer
class SingleEndpointIssueNotesImporter
include ParallelScheduling
include SingleEndpointNotesImporting
def importer_class
NoteImporter
end
def representation_class
Representation::Note
end
def sidekiq_worker_class
ImportNoteWorker
end
def object_type
:note
end
def collection_method
:issue_comments
end
private
def noteables
project.issues.where.not(iid: already_imported_noteables) # rubocop: disable CodeReuse/ActiveRecord
end
def page_counter_id(issue)
"issue/#{issue.id}/#{collection_method}"
end
def notes_imported_cache_key
"github-importer/issue/notes/already-imported/#{project.id}"
end
end
end
end
end
# frozen_string_literal: true
# This importer is used when `github_importer_single_endpoint_notes_import`
# feature flag is on and replaces `NotesImporter` MR notes import.
#
# It fetches 1 PR's comments at a time using `issue_comments` endpoint, which is
# slower than `NotesImporter` but it makes sure all notes are imported,
# as it can sometimes not be the case for `NotesImporter`, because
# `issues_comments` endpoint it uses can be limited by GitHub API
# to not return all available pages.
module Gitlab
module GithubImport
module Importer
class SingleEndpointMergeRequestNotesImporter
include ParallelScheduling
include SingleEndpointNotesImporting
def importer_class
NoteImporter
end
def representation_class
Representation::Note
end
def sidekiq_worker_class
ImportNoteWorker
end
def object_type
:note
end
def collection_method
:issue_comments
end
private
def noteables
project.merge_requests.where.not(iid: already_imported_noteables) # rubocop: disable CodeReuse/ActiveRecord
end
def page_counter_id(merge_request)
"merge_request/#{merge_request.id}/#{collection_method}"
end
def notes_imported_cache_key
"github-importer/merge_request/notes/already-imported/#{project.id}"
end
end
end
end
end
...@@ -23,7 +23,7 @@ module Gitlab ...@@ -23,7 +23,7 @@ module Gitlab
# #
# This method will return `nil` if no ID could be found. # This method will return `nil` if no ID could be found.
def database_id def database_id
val = Gitlab::Cache::Import::Caching.read(cache_key) val = Gitlab::Cache::Import::Caching.read(cache_key, timeout: timeout)
val.to_i if val.present? val.to_i if val.present?
end end
...@@ -32,7 +32,7 @@ module Gitlab ...@@ -32,7 +32,7 @@ module Gitlab
# #
# database_id - The ID of the corresponding database row. # database_id - The ID of the corresponding database row.
def cache_database_id(database_id) def cache_database_id(database_id)
Gitlab::Cache::Import::Caching.write(cache_key, database_id) Gitlab::Cache::Import::Caching.write(cache_key, database_id, timeout: timeout)
end end
private private
...@@ -76,6 +76,14 @@ module Gitlab ...@@ -76,6 +76,14 @@ module Gitlab
) )
end end
end end
def timeout
if project.group.present? && ::Feature.enabled?(:github_importer_single_endpoint_notes_import, project.group, type: :ops, default_enabled: :yaml)
Gitlab::Cache::Import::Caching::LONGER_TIMEOUT
else
Gitlab::Cache::Import::Caching::TIMEOUT
end
end
end end
end end
end end
# frozen_string_literal: true
# This module is used in:
# - SingleEndpointDiffNotesImporter
# - SingleEndpointIssueNotesImporter
# - SingleEndpointMergeRequestNotesImporter
#
# `github_importer_single_endpoint_notes_import`
# feature flag is on.
#
# It fetches 1 PR's associated objects at a time using `issue_comments` or
# `pull_request_comments` endpoint, which is slower than `NotesImporter`
# but it makes sure all notes are imported, as it can sometimes not be
# the case for `NotesImporter`, because `issues_comments` endpoint
# it uses can be limited by GitHub API to not return all available pages.
module Gitlab
module GithubImport
module SingleEndpointNotesImporting
BATCH_SIZE = 100
def each_object_to_import
each_notes_page do |page|
page.objects.each do |note|
next if already_imported?(note)
Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched)
yield(note)
mark_as_imported(note)
end
end
end
def id_for_already_imported_cache(note)
note.id
end
private
def each_notes_page
noteables.each_batch(of: BATCH_SIZE, column: :iid) do |batch|
batch.each do |noteable|
# The page counter needs to be scoped by noteable to avoid skipping
# pages of notes from already imported noteables.
page_counter = PageCounter.new(project, page_counter_id(noteable))
repo = project.import_source
options = collection_options.merge(page: page_counter.current)
client.each_page(collection_method, repo, noteable.iid, options) do |page|
next unless page_counter.set(page.number)
yield page
end
mark_notes_imported(noteable)
end
end
end
def mark_notes_imported(noteable)
Gitlab::Cache::Import::Caching.set_add(
notes_imported_cache_key,
noteable.iid
)
end
def already_imported_noteables
Gitlab::Cache::Import::Caching.values_from_set(notes_imported_cache_key)
end
def noteables
NotImplementedError
end
def notes_imported_cache_key
NotImplementedError
end
def page_counter_id(noteable)
NotImplementedError
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::GithubImport::Importer::SingleEndpointDiffNotesImporter do
let(:client) { double }
let(:project) { create(:project, import_source: 'github/repo') }
subject { described_class.new(project, client) }
it { is_expected.to include_module(Gitlab::GithubImport::ParallelScheduling) }
it { is_expected.to include_module(Gitlab::GithubImport::SingleEndpointNotesImporting) }
it { expect(subject.representation_class).to eq(Gitlab::GithubImport::Representation::DiffNote) }
it { expect(subject.importer_class).to eq(Gitlab::GithubImport::Importer::DiffNoteImporter) }
it { expect(subject.collection_method).to eq(:pull_request_comments) }
it { expect(subject.object_type).to eq(:diff_note) }
it { expect(subject.id_for_already_imported_cache(double(id: 1))).to eq(1) }
describe '#each_object_to_import', :clean_gitlab_redis_cache do
let(:merge_request) do
create(
:merged_merge_request,
iid: 999,
source_project: project,
target_project: project
)
end
let(:note) { double(id: 1) }
let(:page) { double(objects: [note], number: 1) }
it 'fetches data' do
expect(client)
.to receive(:each_page)
.exactly(:once) # ensure to be cached on the second call
.with(:pull_request_comments, 'github/repo', merge_request.iid, page: 1)
.and_yield(page)
expect { |b| subject.each_object_to_import(&b) }.to yield_with_args(note)
subject.each_object_to_import {}
expect(
Gitlab::Cache::Import::Caching.set_includes?(
"github-importer/merge_request/diff_notes/already-imported/#{project.id}",
merge_request.iid
)
).to eq(true)
end
it 'skips cached pages' do
Gitlab::GithubImport::PageCounter
.new(project, "merge_request/#{merge_request.id}/pull_request_comments")
.set(2)
expect(client)
.to receive(:each_page)
.exactly(:once) # ensure to be cached on the second call
.with(:pull_request_comments, 'github/repo', merge_request.iid, page: 2)
subject.each_object_to_import {}
end
it 'skips cached merge requests' do
Gitlab::Cache::Import::Caching.set_add(
"github-importer/merge_request/diff_notes/already-imported/#{project.id}",
merge_request.iid
)
expect(client).not_to receive(:each_page)
subject.each_object_to_import {}
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::GithubImport::Importer::SingleEndpointIssueNotesImporter do
let(:client) { double }
let(:project) { create(:project, import_source: 'github/repo') }
subject { described_class.new(project, client) }
it { is_expected.to include_module(Gitlab::GithubImport::ParallelScheduling) }
it { is_expected.to include_module(Gitlab::GithubImport::SingleEndpointNotesImporting) }
it { expect(subject.representation_class).to eq(Gitlab::GithubImport::Representation::Note) }
it { expect(subject.importer_class).to eq(Gitlab::GithubImport::Importer::NoteImporter) }
it { expect(subject.collection_method).to eq(:issue_comments) }
it { expect(subject.object_type).to eq(:note) }
it { expect(subject.id_for_already_imported_cache(double(id: 1))).to eq(1) }
describe '#each_object_to_import', :clean_gitlab_redis_cache do
let(:issue) do
create(
:issue,
iid: 999,
project: project
)
end
let(:note) { double(id: 1) }
let(:page) { double(objects: [note], number: 1) }
it 'fetches data' do
expect(client)
.to receive(:each_page)
.exactly(:once) # ensure to be cached on the second call
.with(:issue_comments, 'github/repo', issue.iid, page: 1)
.and_yield(page)
expect { |b| subject.each_object_to_import(&b) }.to yield_with_args(note)
subject.each_object_to_import {}
expect(
Gitlab::Cache::Import::Caching.set_includes?(
"github-importer/issue/notes/already-imported/#{project.id}",
issue.iid
)
).to eq(true)
end
it 'skips cached pages' do
Gitlab::GithubImport::PageCounter
.new(project, "issue/#{issue.id}/issue_comments")
.set(2)
expect(client)
.to receive(:each_page)
.exactly(:once) # ensure to be cached on the second call
.with(:issue_comments, 'github/repo', issue.iid, page: 2)
subject.each_object_to_import {}
end
it 'skips cached merge requests' do
Gitlab::Cache::Import::Caching.set_add(
"github-importer/issue/notes/already-imported/#{project.id}",
issue.iid
)
expect(client).not_to receive(:each_page)
subject.each_object_to_import {}
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::GithubImport::Importer::SingleEndpointMergeRequestNotesImporter do
let(:client) { double }
let(:project) { create(:project, import_source: 'github/repo') }
subject { described_class.new(project, client) }
it { is_expected.to include_module(Gitlab::GithubImport::ParallelScheduling) }
it { is_expected.to include_module(Gitlab::GithubImport::SingleEndpointNotesImporting) }
it { expect(subject.representation_class).to eq(Gitlab::GithubImport::Representation::Note) }
it { expect(subject.importer_class).to eq(Gitlab::GithubImport::Importer::NoteImporter) }
it { expect(subject.collection_method).to eq(:issue_comments) }
it { expect(subject.object_type).to eq(:note) }
it { expect(subject.id_for_already_imported_cache(double(id: 1))).to eq(1) }
describe '#each_object_to_import', :clean_gitlab_redis_cache do
let(:merge_request) do
create(
:merge_request,
iid: 999,
source_project: project,
target_project: project
)
end
let(:note) { double(id: 1) }
let(:page) { double(objects: [note], number: 1) }
it 'fetches data' do
expect(client)
.to receive(:each_page)
.exactly(:once) # ensure to be cached on the second call
.with(:issue_comments, 'github/repo', merge_request.iid, page: 1)
.and_yield(page)
expect { |b| subject.each_object_to_import(&b) }.to yield_with_args(note)
subject.each_object_to_import {}
expect(
Gitlab::Cache::Import::Caching.set_includes?(
"github-importer/merge_request/notes/already-imported/#{project.id}",
merge_request.iid
)
).to eq(true)
end
it 'skips cached pages' do
Gitlab::GithubImport::PageCounter
.new(project, "merge_request/#{merge_request.id}/issue_comments")
.set(2)
expect(client)
.to receive(:each_page)
.exactly(:once) # ensure to be cached on the second call
.with(:issue_comments, 'github/repo', merge_request.iid, page: 2)
subject.each_object_to_import {}
end
it 'skips cached merge requests' do
Gitlab::Cache::Import::Caching.set_add(
"github-importer/merge_request/notes/already-imported/#{project.id}",
merge_request.iid
)
expect(client).not_to receive(:each_page)
subject.each_object_to_import {}
end
end
end
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Gitlab::GithubImport::IssuableFinder, :clean_gitlab_redis_cache do RSpec.describe Gitlab::GithubImport::IssuableFinder, :clean_gitlab_redis_cache do
let(:project) { double(:project, id: 4) } let(:project) { double(:project, id: 4, group: nil) }
let(:issue) do let(:issue) do
double(:issue, issuable_type: MergeRequest, iid: 1) double(:issue, issuable_type: MergeRequest, iid: 1)
end end
...@@ -26,15 +26,77 @@ RSpec.describe Gitlab::GithubImport::IssuableFinder, :clean_gitlab_redis_cache d ...@@ -26,15 +26,77 @@ RSpec.describe Gitlab::GithubImport::IssuableFinder, :clean_gitlab_redis_cache d
expect { finder.database_id }.to raise_error(TypeError) expect { finder.database_id }.to raise_error(TypeError)
end end
context 'when group is present' do
context 'when github_importer_single_endpoint_notes_import feature flag is enabled' do
it 'reads cache value with longer timeout' do
project = create(:project, import_url: 'http://t0ken@github.com/user/repo.git')
group = create(:group, projects: [project])
stub_feature_flags(github_importer_single_endpoint_notes_import: group)
expect(Gitlab::Cache::Import::Caching)
.to receive(:read)
.with(anything, timeout: Gitlab::Cache::Import::Caching::LONGER_TIMEOUT)
described_class.new(project, issue).database_id
end
end
context 'when github_importer_single_endpoint_notes_import feature flag is disabled' do
it 'reads cache value with default timeout' do
project = double(:project, id: 4, group: create(:group))
stub_feature_flags(github_importer_single_endpoint_notes_import: false)
expect(Gitlab::Cache::Import::Caching)
.to receive(:read)
.with(anything, timeout: Gitlab::Cache::Import::Caching::TIMEOUT)
described_class.new(project, issue).database_id
end
end
end
end end
describe '#cache_database_id' do describe '#cache_database_id' do
it 'caches the ID of a database row' do it 'caches the ID of a database row' do
expect(Gitlab::Cache::Import::Caching) expect(Gitlab::Cache::Import::Caching)
.to receive(:write) .to receive(:write)
.with('github-import/issuable-finder/4/MergeRequest/1', 10) .with('github-import/issuable-finder/4/MergeRequest/1', 10, timeout: 86400)
finder.cache_database_id(10) finder.cache_database_id(10)
end end
context 'when group is present' do
context 'when github_importer_single_endpoint_notes_import feature flag is enabled' do
it 'caches value with longer timeout' do
project = create(:project, import_url: 'http://t0ken@github.com/user/repo.git')
group = create(:group, projects: [project])
stub_feature_flags(github_importer_single_endpoint_notes_import: group)
expect(Gitlab::Cache::Import::Caching)
.to receive(:write)
.with(anything, anything, timeout: Gitlab::Cache::Import::Caching::LONGER_TIMEOUT)
described_class.new(project, issue).cache_database_id(10)
end
end
context 'when github_importer_single_endpoint_notes_import feature flag is disabled' do
it 'caches value with default timeout' do
project = double(:project, id: 4, group: create(:group))
stub_feature_flags(github_importer_single_endpoint_notes_import: false)
expect(Gitlab::Cache::Import::Caching)
.to receive(:write)
.with(anything, anything, timeout: Gitlab::Cache::Import::Caching::TIMEOUT)
described_class.new(project, issue).cache_database_id(10)
end
end
end
end end
end end
...@@ -6,7 +6,7 @@ RSpec.describe Gitlab::GithubImport::SequentialImporter do ...@@ -6,7 +6,7 @@ RSpec.describe Gitlab::GithubImport::SequentialImporter do
describe '#execute' do describe '#execute' do
it 'imports a project in sequence' do it 'imports a project in sequence' do
repository = double(:repository) repository = double(:repository)
project = double(:project, id: 1, repository: repository, import_url: 'http://t0ken@github.another-domain.com/repo-org/repo.git') project = double(:project, id: 1, repository: repository, import_url: 'http://t0ken@github.another-domain.com/repo-org/repo.git', group: nil)
importer = described_class.new(project, token: 'foo') importer = described_class.new(project, token: 'foo')
expect_next_instance_of(Gitlab::GithubImport::Importer::RepositoryImporter) do |instance| expect_next_instance_of(Gitlab::GithubImport::Importer::RepositoryImporter) do |instance|
......
...@@ -3,13 +3,17 @@ ...@@ -3,13 +3,17 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Gitlab::GithubImport do RSpec.describe Gitlab::GithubImport do
before do
stub_feature_flags(github_importer_lower_per_page_limit: false)
end
context 'github.com' do context 'github.com' do
let(:project) { double(:project, import_url: 'http://t0ken@github.com/user/repo.git', id: 1) } let(:project) { double(:project, import_url: 'http://t0ken@github.com/user/repo.git', id: 1, group: nil) }
it 'returns a new Client with a custom token' do it 'returns a new Client with a custom token' do
expect(described_class::Client) expect(described_class::Client)
.to receive(:new) .to receive(:new)
.with('123', host: nil, parallel: true) .with('123', host: nil, parallel: true, per_page: 100)
described_class.new_client_for(project, token: '123') described_class.new_client_for(project, token: '123')
end end
...@@ -23,7 +27,7 @@ RSpec.describe Gitlab::GithubImport do ...@@ -23,7 +27,7 @@ RSpec.describe Gitlab::GithubImport do
expect(described_class::Client) expect(described_class::Client)
.to receive(:new) .to receive(:new)
.with('123', host: nil, parallel: true) .with('123', host: nil, parallel: true, per_page: 100)
described_class.new_client_for(project) described_class.new_client_for(project)
end end
...@@ -45,12 +49,12 @@ RSpec.describe Gitlab::GithubImport do ...@@ -45,12 +49,12 @@ RSpec.describe Gitlab::GithubImport do
end end
context 'GitHub Enterprise' do context 'GitHub Enterprise' do
let(:project) { double(:project, import_url: 'http://t0ken@github.another-domain.com/repo-org/repo.git') } let(:project) { double(:project, import_url: 'http://t0ken@github.another-domain.com/repo-org/repo.git', group: nil) }
it 'returns a new Client with a custom token' do it 'returns a new Client with a custom token' do
expect(described_class::Client) expect(described_class::Client)
.to receive(:new) .to receive(:new)
.with('123', host: 'http://github.another-domain.com/api/v3', parallel: true) .with('123', host: 'http://github.another-domain.com/api/v3', parallel: true, per_page: 100)
described_class.new_client_for(project, token: '123') described_class.new_client_for(project, token: '123')
end end
...@@ -64,7 +68,7 @@ RSpec.describe Gitlab::GithubImport do ...@@ -64,7 +68,7 @@ RSpec.describe Gitlab::GithubImport do
expect(described_class::Client) expect(described_class::Client)
.to receive(:new) .to receive(:new)
.with('123', host: 'http://github.another-domain.com/api/v3', parallel: true) .with('123', host: 'http://github.another-domain.com/api/v3', parallel: true, per_page: 100)
described_class.new_client_for(project) described_class.new_client_for(project)
end end
...@@ -88,4 +92,37 @@ RSpec.describe Gitlab::GithubImport do ...@@ -88,4 +92,37 @@ RSpec.describe Gitlab::GithubImport do
expect(described_class.formatted_import_url(project)).to eq('http://github.another-domain.com/api/v3') expect(described_class.formatted_import_url(project)).to eq('http://github.another-domain.com/api/v3')
end end
end end
describe '.per_page' do
context 'when project group is present' do
context 'when github_importer_lower_per_page_limit is enabled' do
it 'returns lower per page value' do
project = create(:project, import_url: 'http://t0ken@github.com/user/repo.git')
group = create(:group, projects: [project])
stub_feature_flags(github_importer_lower_per_page_limit: group)
expect(described_class.per_page(project)).to eq(Gitlab::GithubImport::Client::LOWER_PER_PAGE)
end
end
context 'when github_importer_lower_per_page_limit is disabled' do
it 'returns default per page value' do
project = double(:project, import_url: 'http://t0ken@github.com/user/repo.git', id: 1, group: create(:group))
stub_feature_flags(github_importer_lower_per_page_limit: false)
expect(described_class.per_page(project)).to eq(Gitlab::GithubImport::Client::DEFAULT_PER_PAGE)
end
end
end
context 'when project group is missing' do
it 'returns default per page value' do
project = double(:project, import_url: 'http://t0ken@github.com/user/repo.git', id: 1, group: nil)
expect(described_class.per_page(project)).to eq(Gitlab::GithubImport::Client::DEFAULT_PER_PAGE)
end
end
end
end end
...@@ -10,7 +10,7 @@ RSpec.describe Gitlab::GithubImport::Stage::ImportIssuesAndDiffNotesWorker do ...@@ -10,7 +10,7 @@ RSpec.describe Gitlab::GithubImport::Stage::ImportIssuesAndDiffNotesWorker do
it 'imports the issues and diff notes' do it 'imports the issues and diff notes' do
client = double(:client) client = double(:client)
described_class::IMPORTERS.each do |klass| worker.importers(project).each do |klass|
importer = double(:importer) importer = double(:importer)
waiter = Gitlab::JobWaiter.new(2, '123') waiter = Gitlab::JobWaiter.new(2, '123')
...@@ -31,4 +31,45 @@ RSpec.describe Gitlab::GithubImport::Stage::ImportIssuesAndDiffNotesWorker do ...@@ -31,4 +31,45 @@ RSpec.describe Gitlab::GithubImport::Stage::ImportIssuesAndDiffNotesWorker do
worker.import(client, project) worker.import(client, project)
end end
end end
describe '#importers' do
context 'when project group is present' do
let_it_be(:project) { create(:project) }
let_it_be(:group) { create(:group, projects: [project]) }
context 'when feature flag github_importer_single_endpoint_notes_import is enabled' do
it 'includes single endpoint diff notes importer' do
project = create(:project)
group = create(:group, projects: [project])
stub_feature_flags(github_importer_single_endpoint_notes_import: group)
expect(worker.importers(project)).to contain_exactly(
Gitlab::GithubImport::Importer::IssuesImporter,
Gitlab::GithubImport::Importer::SingleEndpointDiffNotesImporter
)
end
end
context 'when feature flag github_importer_single_endpoint_notes_import is disabled' do
it 'includes default diff notes importer' do
stub_feature_flags(github_importer_single_endpoint_notes_import: false)
expect(worker.importers(project)).to contain_exactly(
Gitlab::GithubImport::Importer::IssuesImporter,
Gitlab::GithubImport::Importer::DiffNotesImporter
)
end
end
end
context 'when project group is missing' do
it 'includes default diff notes importer' do
expect(worker.importers(project)).to contain_exactly(
Gitlab::GithubImport::Importer::IssuesImporter,
Gitlab::GithubImport::Importer::DiffNotesImporter
)
end
end
end
end end
...@@ -8,18 +8,21 @@ RSpec.describe Gitlab::GithubImport::Stage::ImportNotesWorker do ...@@ -8,18 +8,21 @@ RSpec.describe Gitlab::GithubImport::Stage::ImportNotesWorker do
describe '#import' do describe '#import' do
it 'imports all the notes' do it 'imports all the notes' do
importer = double(:importer)
client = double(:client) client = double(:client)
waiter = Gitlab::JobWaiter.new(2, '123')
expect(Gitlab::GithubImport::Importer::NotesImporter) worker.importers(project).each do |klass|
.to receive(:new) importer = double(:importer)
.with(project, client) waiter = Gitlab::JobWaiter.new(2, '123')
.and_return(importer)
expect(importer) expect(klass)
.to receive(:execute) .to receive(:new)
.and_return(waiter) .with(project, client)
.and_return(importer)
expect(importer)
.to receive(:execute)
.and_return(waiter)
end
expect(Gitlab::GithubImport::AdvanceStageWorker) expect(Gitlab::GithubImport::AdvanceStageWorker)
.to receive(:perform_async) .to receive(:perform_async)
...@@ -28,4 +31,43 @@ RSpec.describe Gitlab::GithubImport::Stage::ImportNotesWorker do ...@@ -28,4 +31,43 @@ RSpec.describe Gitlab::GithubImport::Stage::ImportNotesWorker do
worker.import(client, project) worker.import(client, project)
end end
end end
describe '#importers' do
context 'when project group is present' do
let_it_be(:project) { create(:project) }
let_it_be(:group) { create(:group, projects: [project]) }
context 'when feature flag github_importer_single_endpoint_notes_import is enabled' do
it 'includes single endpoint mr and issue notes importers' do
project = create(:project)
group = create(:group, projects: [project])
stub_feature_flags(github_importer_single_endpoint_notes_import: group)
expect(worker.importers(project)).to contain_exactly(
Gitlab::GithubImport::Importer::SingleEndpointMergeRequestNotesImporter,
Gitlab::GithubImport::Importer::SingleEndpointIssueNotesImporter
)
end
end
context 'when feature flag github_importer_single_endpoint_notes_import is disabled' do
it 'includes default notes importer' do
stub_feature_flags(github_importer_single_endpoint_notes_import: false)
expect(worker.importers(project)).to contain_exactly(
Gitlab::GithubImport::Importer::NotesImporter
)
end
end
end
context 'when project group is missing' do
it 'includes default diff notes importer' do
expect(worker.importers(project)).to contain_exactly(
Gitlab::GithubImport::Importer::NotesImporter
)
end
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment