Commit 4fd01518 authored by Sean McGivern's avatar Sean McGivern

Merge branch 'fix/make-github-import-retryable' into 'master'

Modify GitHub importer to be retryable

Closes #23533

See merge request !7003
parents 04c7c8d8 f68d9261
...@@ -33,6 +33,7 @@ Please view this file on the master branch, on stable branches it's out of date. ...@@ -33,6 +33,7 @@ Please view this file on the master branch, on stable branches it's out of date.
- Fix documents and comments on Build API `scope` - Fix documents and comments on Build API `scope`
- Fix applying labels for GitHub-imported MRs - Fix applying labels for GitHub-imported MRs
- Fix importing MR comments from GitHub - Fix importing MR comments from GitHub
- Modify GitHub importer to be retryable
- Refactor email, use setter method instead AR callbacks for email attribute (Semyon Pupkov) - Refactor email, use setter method instead AR callbacks for email attribute (Semyon Pupkov)
- Shortened merge request modal to let clipboard button not overlap - Shortened merge request modal to let clipboard button not overlap
......
...@@ -29,7 +29,7 @@ module Projects ...@@ -29,7 +29,7 @@ module Projects
if unknown_url? if unknown_url?
# In this case, we only want to import issues, not a repository. # In this case, we only want to import issues, not a repository.
create_repository create_repository
else elsif !project.repository_exists?
import_repository import_repository
end end
end end
......
...@@ -10,7 +10,9 @@ module Gitlab ...@@ -10,7 +10,9 @@ module Gitlab
end end
def create! def create!
self.klass.create!(self.attributes) project.public_send(project_association).find_or_create_by!(find_condition) do |record|
record.attributes = attributes
end
end end
private private
......
...@@ -110,7 +110,7 @@ module Gitlab ...@@ -110,7 +110,7 @@ module Gitlab
if block_given? if block_given?
yield data yield data
# api.last_response could change while we're yielding (e.g. fetching labels for each PR) # api.last_response could change while we're yielding (e.g. fetching labels for each PR)
# so we cache our own last request # so we cache our own last response
each_response_page(last_response, &block) each_response_page(last_response, &block)
else else
each_response_page(last_response) { |page| data.concat(page) } each_response_page(last_response) { |page| data.concat(page) }
......
...@@ -24,7 +24,8 @@ module Gitlab ...@@ -24,7 +24,8 @@ module Gitlab
import_milestones import_milestones
import_issues import_issues
import_pull_requests import_pull_requests
import_comments import_comments(:issues)
import_comments(:pull_requests)
import_wiki import_wiki
import_releases import_releases
handle_errors handle_errors
...@@ -48,7 +49,7 @@ module Gitlab ...@@ -48,7 +49,7 @@ module Gitlab
end end
def import_labels def import_labels
client.labels(repo, per_page: 100) do |labels| fetch_resources(:labels, repo, per_page: 100) do |labels|
labels.each do |raw| labels.each do |raw|
begin begin
label = LabelFormatter.new(project, raw).create! label = LabelFormatter.new(project, raw).create!
...@@ -61,7 +62,7 @@ module Gitlab ...@@ -61,7 +62,7 @@ module Gitlab
end end
def import_milestones def import_milestones
client.milestones(repo, state: :all, per_page: 100) do |milestones| fetch_resources(:milestones, repo, state: :all, per_page: 100) do |milestones|
milestones.each do |raw| milestones.each do |raw|
begin begin
MilestoneFormatter.new(project, raw).create! MilestoneFormatter.new(project, raw).create!
...@@ -73,7 +74,7 @@ module Gitlab ...@@ -73,7 +74,7 @@ module Gitlab
end end
def import_issues def import_issues
client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues| fetch_resources(:issues, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues|
issues.each do |raw| issues.each do |raw|
gh_issue = IssueFormatter.new(project, raw) gh_issue = IssueFormatter.new(project, raw)
...@@ -90,7 +91,7 @@ module Gitlab ...@@ -90,7 +91,7 @@ module Gitlab
end end
def import_pull_requests def import_pull_requests
client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests| fetch_resources(:pull_requests, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests|
pull_requests.each do |raw| pull_requests.each do |raw|
pull_request = PullRequestFormatter.new(project, raw) pull_request = PullRequestFormatter.new(project, raw)
next unless pull_request.valid? next unless pull_request.valid?
...@@ -148,12 +149,23 @@ module Gitlab ...@@ -148,12 +149,23 @@ module Gitlab
end end
end end
def import_comments def import_comments(issuable_type)
client.issues_comments(repo, per_page: 100) do |comments| resource_type = "#{issuable_type}_comments".to_sym
create_comments(comments)
end # Two notes here:
# 1. We don't have a distinctive attribute for comments (unlike issues iid), so we fetch the last inserted note,
# compare it against every comment in the current imported page until we find match, and that's where start importing
# 2. GH returns comments for _both_ issues and PRs through issues_comments API, while pull_requests_comments returns
# only comments on diffs, so select last note not based on noteable_type but on line_code
line_code_is = issuable_type == :pull_requests ? 'NOT NULL' : 'NULL'
last_note = project.notes.where("line_code IS #{line_code_is}").last
fetch_resources(resource_type, repo, per_page: 100) do |comments|
if last_note
discard_inserted_comments(comments, last_note)
last_note = nil
end
client.pull_requests_comments(repo, per_page: 100) do |comments|
create_comments(comments) create_comments(comments)
end end
end end
...@@ -177,6 +189,24 @@ module Gitlab ...@@ -177,6 +189,24 @@ module Gitlab
end end
end end
def discard_inserted_comments(comments, last_note)
last_note_attrs = nil
cut_off_index = comments.find_index do |raw|
comment = CommentFormatter.new(project, raw)
comment_attrs = comment.attributes
last_note_attrs ||= last_note.slice(*comment_attrs.keys)
comment_attrs.with_indifferent_access == last_note_attrs
end
# No matching resource in the collection, which means we got halted right on the end of the last page, so all good
return unless cut_off_index
# Otherwise, remove the resources we've already inserted
comments.shift(cut_off_index + 1)
end
def import_wiki def import_wiki
unless project.wiki.repository_exists? unless project.wiki.repository_exists?
wiki = WikiFormatter.new(project) wiki = WikiFormatter.new(project)
...@@ -192,7 +222,7 @@ module Gitlab ...@@ -192,7 +222,7 @@ module Gitlab
end end
def import_releases def import_releases
client.releases(repo, per_page: 100) do |releases| fetch_resources(:releases, repo, per_page: 100) do |releases|
releases.each do |raw| releases.each do |raw|
begin begin
gh_release = ReleaseFormatter.new(project, raw) gh_release = ReleaseFormatter.new(project, raw)
...@@ -203,6 +233,47 @@ module Gitlab ...@@ -203,6 +233,47 @@ module Gitlab
end end
end end
end end
def fetch_resources(resource_type, *opts)
return if imported?(resource_type)
opts.last.merge!(page: current_page(resource_type))
client.public_send(resource_type, *opts) do |resources|
yield resources
increment_page(resource_type)
end
imported!(resource_type)
end
def imported?(resource_type)
Rails.cache.read("#{cache_key_prefix}:#{resource_type}:imported")
end
def imported!(resource_type)
Rails.cache.write("#{cache_key_prefix}:#{resource_type}:imported", true, ex: 1.day)
end
def increment_page(resource_type)
key = "#{cache_key_prefix}:#{resource_type}:current-page"
# Rails.cache.increment calls INCRBY directly on the value stored under the key, which is
# a serialized ActiveSupport::Cache::Entry, so it will return an error by Redis, hence this ugly work-around
page = Rails.cache.read(key)
page += 1
Rails.cache.write(key, page)
page
end
def current_page(resource_type)
Rails.cache.fetch("#{cache_key_prefix}:#{resource_type}:current-page", ex: 1.day) { 1 }
end
def cache_key_prefix
@cache_key_prefix ||= "github-import:#{project.id}"
end
end end
end end
end end
...@@ -20,8 +20,12 @@ module Gitlab ...@@ -20,8 +20,12 @@ module Gitlab
raw_data.comments > 0 raw_data.comments > 0
end end
def klass def project_association
Issue :issues
end
def find_condition
{ iid: number }
end end
def number def number
......
...@@ -9,8 +9,8 @@ module Gitlab ...@@ -9,8 +9,8 @@ module Gitlab
} }
end end
def klass def project_association
Label :labels
end end
def create! def create!
......
...@@ -14,8 +14,12 @@ module Gitlab ...@@ -14,8 +14,12 @@ module Gitlab
} }
end end
def klass def project_association
Milestone :milestones
end
def find_condition
{ iid: raw_data.number }
end end
private private
......
...@@ -24,8 +24,12 @@ module Gitlab ...@@ -24,8 +24,12 @@ module Gitlab
} }
end end
def klass def project_association
MergeRequest :merge_requests
end
def find_condition
{ iid: number }
end end
def number def number
......
...@@ -11,8 +11,12 @@ module Gitlab ...@@ -11,8 +11,12 @@ module Gitlab
} }
end end
def klass def project_association
Release :releases
end
def find_condition
{ tag: raw_data.tag_name }
end end
def valid? def valid?
......
...@@ -2,6 +2,10 @@ require 'spec_helper' ...@@ -2,6 +2,10 @@ require 'spec_helper'
describe Gitlab::GithubImport::Importer, lib: true do describe Gitlab::GithubImport::Importer, lib: true do
describe '#execute' do describe '#execute' do
before do
allow(Rails).to receive(:cache).and_return(ActiveSupport::Cache::MemoryStore.new)
end
context 'when an error occurs' do context 'when an error occurs' do
let(:project) { create(:project, import_url: 'https://github.com/octocat/Hello-World.git', wiki_access_level: ProjectFeature::DISABLED) } let(:project) { create(:project, import_url: 'https://github.com/octocat/Hello-World.git', wiki_access_level: ProjectFeature::DISABLED) }
let(:octocat) { double(id: 123456, login: 'octocat') } let(:octocat) { double(id: 123456, login: 'octocat') }
...@@ -152,10 +156,9 @@ describe Gitlab::GithubImport::Importer, lib: true do ...@@ -152,10 +156,9 @@ describe Gitlab::GithubImport::Importer, lib: true do
message: 'The remote data could not be fully imported.', message: 'The remote data could not be fully imported.',
errors: [ errors: [
{ type: :label, url: "https://api.github.com/repos/octocat/Hello-World/labels/bug", errors: "Validation failed: Title can't be blank, Title is invalid" }, { type: :label, url: "https://api.github.com/repos/octocat/Hello-World/labels/bug", errors: "Validation failed: Title can't be blank, Title is invalid" },
{ type: :milestone, url: "https://api.github.com/repos/octocat/Hello-World/milestones/1", errors: "Validation failed: Title has already been taken" },
{ type: :issue, url: "https://api.github.com/repos/octocat/Hello-World/issues/1348", errors: "Validation failed: Title can't be blank, Title is too short (minimum is 0 characters)" }, { type: :issue, url: "https://api.github.com/repos/octocat/Hello-World/issues/1348", errors: "Validation failed: Title can't be blank, Title is too short (minimum is 0 characters)" },
{ type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Invalid Repository. Use user/repo format." }, { type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Invalid Repository. Use user/repo format." },
{ type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Validation failed: Validate branches Cannot Create: This merge request already exists: [\"New feature\"]" }, { type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Invalid Repository. Use user/repo format." },
{ type: :wiki, errors: "Gitlab::Shell::Error" }, { type: :wiki, errors: "Gitlab::Shell::Error" },
{ type: :release, url: 'https://api.github.com/repos/octocat/Hello-World/releases/2', errors: "Validation failed: Description can't be blank" } { type: :release, url: 'https://api.github.com/repos/octocat/Hello-World/releases/2', errors: "Validation failed: Description can't be blank" }
] ]
......
...@@ -110,7 +110,7 @@ describe Projects::ImportService, services: true do ...@@ -110,7 +110,7 @@ describe Projects::ImportService, services: true do
end end
it 'expires existence cache after error' do it 'expires existence cache after error' do
allow_any_instance_of(Project).to receive(:repository_exists?).and_return(true) allow_any_instance_of(Project).to receive(:repository_exists?).and_return(false, true)
expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_raise(Gitlab::Shell::Error.new('Failed to import the repository')) expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_raise(Gitlab::Shell::Error.new('Failed to import the repository'))
expect_any_instance_of(Repository).to receive(:expire_emptiness_caches).and_call_original expect_any_instance_of(Repository).to receive(:expire_emptiness_caches).and_call_original
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment