Commit 0d421d16 authored by Yorick Peterse's avatar Yorick Peterse

Merge branch 'fix/optimize-github-importer-for-speed-and-memory' into 'master'

Optimize GitHub importer for speed and memory

See merge request !6552
parents 0ba06916 dca1acd6
...@@ -7,6 +7,7 @@ v 8.13.0 (unreleased) ...@@ -7,6 +7,7 @@ v 8.13.0 (unreleased)
- Fix robots.txt disallowing access to groups starting with "s" (Matt Harrison) - Fix robots.txt disallowing access to groups starting with "s" (Matt Harrison)
- Revoke button in Applications Settings underlines on hover. - Revoke button in Applications Settings underlines on hover.
- Add organization field to user profile - Add organization field to user profile
- Optimize GitHub importing for speed and memory
v 8.12.2 (unreleased) v 8.12.2 (unreleased)
- Fix Import/Export not recognising correctly the imported services. - Fix Import/Export not recognising correctly the imported services.
......
...@@ -52,7 +52,7 @@ module Gitlab ...@@ -52,7 +52,7 @@ module Gitlab
def method_missing(method, *args, &block) def method_missing(method, *args, &block)
if api.respond_to?(method) if api.respond_to?(method)
request { api.send(method, *args, &block) } request(method, *args, &block)
else else
super(method, *args, &block) super(method, *args, &block)
end end
...@@ -99,20 +99,19 @@ module Gitlab ...@@ -99,20 +99,19 @@ module Gitlab
rate_limit.resets_in + GITHUB_SAFE_SLEEP_TIME rate_limit.resets_in + GITHUB_SAFE_SLEEP_TIME
end end
def request def request(method, *args, &block)
sleep rate_limit_sleep_time if rate_limit_exceed? sleep rate_limit_sleep_time if rate_limit_exceed?
data = yield data = api.send(method, *args, &block)
yield data
last_response = api.last_response last_response = api.last_response
while last_response.rels[:next] while last_response.rels[:next]
sleep rate_limit_sleep_time if rate_limit_exceed? sleep rate_limit_sleep_time if rate_limit_exceed?
last_response = last_response.rels[:next].get last_response = last_response.rels[:next].get
data.concat(last_response.data) if last_response.data.is_a?(Array) yield last_response.data if last_response.data.is_a?(Array)
end end
data
end end
end end
end end
......
...@@ -10,6 +10,7 @@ module Gitlab ...@@ -10,6 +10,7 @@ module Gitlab
@repo = project.import_source @repo = project.import_source
@repo_url = project.import_url @repo_url = project.import_url
@errors = [] @errors = []
@labels = {}
if credentials if credentials
@client = Client.new(credentials[:user]) @client = Client.new(credentials[:user])
...@@ -23,6 +24,7 @@ module Gitlab ...@@ -23,6 +24,7 @@ module Gitlab
import_milestones import_milestones
import_issues import_issues
import_pull_requests import_pull_requests
import_comments
import_wiki import_wiki
import_releases import_releases
handle_errors handle_errors
...@@ -46,66 +48,68 @@ module Gitlab ...@@ -46,66 +48,68 @@ module Gitlab
end end
def import_labels def import_labels
labels = client.labels(repo, per_page: 100) client.labels(repo, per_page: 100) do |labels|
labels.each do |raw|
labels.each do |raw| begin
begin label = LabelFormatter.new(project, raw).create!
LabelFormatter.new(project, raw).create! @labels[label.title] = label.id
rescue => e rescue => e
errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
end
end end
end end
end end
def import_milestones def import_milestones
milestones = client.milestones(repo, state: :all, per_page: 100) client.milestones(repo, state: :all, per_page: 100) do |milestones|
milestones.each do |raw|
milestones.each do |raw| begin
begin MilestoneFormatter.new(project, raw).create!
MilestoneFormatter.new(project, raw).create! rescue => e
rescue => e errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } end
end end
end end
end end
def import_issues def import_issues
issues = client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100) client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues|
issues.each do |raw|
issues.each do |raw| gh_issue = IssueFormatter.new(project, raw)
gh_issue = IssueFormatter.new(project, raw)
if gh_issue.valid?
if gh_issue.valid? begin
begin issue = gh_issue.create!
issue = gh_issue.create! apply_labels(issue, raw)
apply_labels(issue) rescue => e
import_comments(issue) if gh_issue.has_comments? errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
rescue => e end
errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
end end
end end
end end
end end
def import_pull_requests def import_pull_requests
pull_requests = client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100) client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests|
pull_requests = pull_requests.map { |raw| PullRequestFormatter.new(project, raw) }.select(&:valid?) pull_requests.each do |raw|
pull_request = PullRequestFormatter.new(project, raw)
pull_requests.each do |pull_request| next unless pull_request.valid?
begin
restore_source_branch(pull_request) unless pull_request.source_branch_exists? begin
restore_target_branch(pull_request) unless pull_request.target_branch_exists? restore_source_branch(pull_request) unless pull_request.source_branch_exists?
restore_target_branch(pull_request) unless pull_request.target_branch_exists?
merge_request = pull_request.create!
apply_labels(merge_request) merge_request = pull_request.create!
import_comments(merge_request) apply_labels(merge_request, raw)
import_comments_on_diff(merge_request) rescue => e
rescue => e errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(pull_request.url), errors: e.message }
errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(pull_request.url), errors: e.message } ensure
ensure clean_up_restored_branches(pull_request)
clean_up_restored_branches(pull_request) end
end end
end end
project.repository.after_remove_branch
end end
def restore_source_branch(pull_request) def restore_source_branch(pull_request)
...@@ -125,37 +129,38 @@ module Gitlab ...@@ -125,37 +129,38 @@ module Gitlab
def clean_up_restored_branches(pull_request) def clean_up_restored_branches(pull_request)
remove_branch(pull_request.source_branch_name) unless pull_request.source_branch_exists? remove_branch(pull_request.source_branch_name) unless pull_request.source_branch_exists?
remove_branch(pull_request.target_branch_name) unless pull_request.target_branch_exists? remove_branch(pull_request.target_branch_name) unless pull_request.target_branch_exists?
project.repository.after_remove_branch
end end
def apply_labels(issuable) def apply_labels(issuable, raw_issuable)
issue = client.issue(repo, issuable.iid) if raw_issuable.labels.count > 0
label_ids = raw_issuable.labels
if issue.labels.count > 0 .map { |attrs| @labels[attrs.name] }
label_ids = issue.labels
.map { |attrs| project.labels.find_by(title: attrs.name).try(:id) }
.compact .compact
issuable.update_attribute(:label_ids, label_ids) issuable.update_attribute(:label_ids, label_ids)
end end
end end
def import_comments(issuable) def import_comments
comments = client.issue_comments(repo, issuable.iid, per_page: 100) client.issues_comments(repo, per_page: 100) do |comments|
create_comments(issuable, comments) create_comments(comments, :issue)
end end
def import_comments_on_diff(merge_request) client.pull_requests_comments(repo, per_page: 100) do |comments|
comments = client.pull_request_comments(repo, merge_request.iid, per_page: 100) create_comments(comments, :pull_request)
create_comments(merge_request, comments) end
end end
def create_comments(issuable, comments) def create_comments(comments, issuable_type)
ActiveRecord::Base.no_touching do ActiveRecord::Base.no_touching do
comments.each do |raw| comments.each do |raw|
begin begin
comment = CommentFormatter.new(project, raw) comment = CommentFormatter.new(project, raw)
issuable_class = issuable_type == :issue ? Issue : MergeRequest
iid = raw.send("#{issuable_type}_url").split('/').last # GH doesn't return parent ID directly
issuable = issuable_class.find_by_iid(iid)
next unless issuable
issuable.notes.create!(comment.attributes) issuable.notes.create!(comment.attributes)
rescue => e rescue => e
errors << { type: :comment, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } errors << { type: :comment, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
...@@ -180,13 +185,14 @@ module Gitlab ...@@ -180,13 +185,14 @@ module Gitlab
end end
def import_releases def import_releases
releases = client.releases(repo, per_page: 100) client.releases(repo, per_page: 100) do |releases|
releases.each do |raw| releases.each do |raw|
begin begin
gh_release = ReleaseFormatter.new(project, raw) gh_release = ReleaseFormatter.new(project, raw)
gh_release.create! if gh_release.valid? gh_release.create! if gh_release.valid?
rescue => e rescue => e
errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
end
end end
end end
end end
......
...@@ -66,6 +66,6 @@ describe Gitlab::GithubImport::Client, lib: true do ...@@ -66,6 +66,6 @@ describe Gitlab::GithubImport::Client, lib: true do
stub_request(:get, /api.github.com/) stub_request(:get, /api.github.com/)
allow(client.api).to receive(:rate_limit!).and_raise(Octokit::NotFound) allow(client.api).to receive(:rate_limit!).and_raise(Octokit::NotFound)
expect { client.issues }.not_to raise_error expect { client.issues {} }.not_to raise_error
end end
end end
...@@ -57,7 +57,8 @@ describe Gitlab::GithubImport::Importer, lib: true do ...@@ -57,7 +57,8 @@ describe Gitlab::GithubImport::Importer, lib: true do
created_at: created_at, created_at: created_at,
updated_at: updated_at, updated_at: updated_at,
closed_at: nil, closed_at: nil,
url: 'https://api.github.com/repos/octocat/Hello-World/issues/1347' url: 'https://api.github.com/repos/octocat/Hello-World/issues/1347',
labels: [double(name: 'Label #1')],
) )
end end
...@@ -75,7 +76,8 @@ describe Gitlab::GithubImport::Importer, lib: true do ...@@ -75,7 +76,8 @@ describe Gitlab::GithubImport::Importer, lib: true do
created_at: created_at, created_at: created_at,
updated_at: updated_at, updated_at: updated_at,
closed_at: nil, closed_at: nil,
url: 'https://api.github.com/repos/octocat/Hello-World/issues/1348' url: 'https://api.github.com/repos/octocat/Hello-World/issues/1348',
labels: [double(name: 'Label #2')],
) )
end end
...@@ -94,7 +96,8 @@ describe Gitlab::GithubImport::Importer, lib: true do ...@@ -94,7 +96,8 @@ describe Gitlab::GithubImport::Importer, lib: true do
updated_at: updated_at, updated_at: updated_at,
closed_at: nil, closed_at: nil,
merged_at: nil, merged_at: nil,
url: 'https://api.github.com/repos/octocat/Hello-World/pulls/1347' url: 'https://api.github.com/repos/octocat/Hello-World/pulls/1347',
labels: [double(name: 'Label #3')],
) )
end end
...@@ -129,6 +132,8 @@ describe Gitlab::GithubImport::Importer, lib: true do ...@@ -129,6 +132,8 @@ describe Gitlab::GithubImport::Importer, lib: true do
allow_any_instance_of(Octokit::Client).to receive(:milestones).and_return([milestone, milestone]) allow_any_instance_of(Octokit::Client).to receive(:milestones).and_return([milestone, milestone])
allow_any_instance_of(Octokit::Client).to receive(:issues).and_return([issue1, issue2]) allow_any_instance_of(Octokit::Client).to receive(:issues).and_return([issue1, issue2])
allow_any_instance_of(Octokit::Client).to receive(:pull_requests).and_return([pull_request, pull_request]) allow_any_instance_of(Octokit::Client).to receive(:pull_requests).and_return([pull_request, pull_request])
allow_any_instance_of(Octokit::Client).to receive(:issues_comments).and_return([])
allow_any_instance_of(Octokit::Client).to receive(:pull_requests_comments).and_return([])
allow_any_instance_of(Octokit::Client).to receive(:last_response).and_return(double(rels: { next: nil })) allow_any_instance_of(Octokit::Client).to receive(:last_response).and_return(double(rels: { next: nil }))
allow_any_instance_of(Octokit::Client).to receive(:releases).and_return([release1, release2]) allow_any_instance_of(Octokit::Client).to receive(:releases).and_return([release1, release2])
allow_any_instance_of(Gitlab::Shell).to receive(:import_repository).and_raise(Gitlab::Shell::Error) allow_any_instance_of(Gitlab::Shell).to receive(:import_repository).and_raise(Gitlab::Shell::Error)
...@@ -148,9 +153,7 @@ describe Gitlab::GithubImport::Importer, lib: true do ...@@ -148,9 +153,7 @@ describe Gitlab::GithubImport::Importer, lib: true do
errors: [ errors: [
{ type: :label, url: "https://api.github.com/repos/octocat/Hello-World/labels/bug", errors: "Validation failed: Title can't be blank, Title is invalid" }, { type: :label, url: "https://api.github.com/repos/octocat/Hello-World/labels/bug", errors: "Validation failed: Title can't be blank, Title is invalid" },
{ type: :milestone, url: "https://api.github.com/repos/octocat/Hello-World/milestones/1", errors: "Validation failed: Title has already been taken" }, { type: :milestone, url: "https://api.github.com/repos/octocat/Hello-World/milestones/1", errors: "Validation failed: Title has already been taken" },
{ type: :issue, url: "https://api.github.com/repos/octocat/Hello-World/issues/1347", errors: "Invalid Repository. Use user/repo format." },
{ type: :issue, url: "https://api.github.com/repos/octocat/Hello-World/issues/1348", errors: "Validation failed: Title can't be blank, Title is too short (minimum is 0 characters)" }, { type: :issue, url: "https://api.github.com/repos/octocat/Hello-World/issues/1348", errors: "Validation failed: Title can't be blank, Title is too short (minimum is 0 characters)" },
{ type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Invalid Repository. Use user/repo format." },
{ type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Validation failed: Validate branches Cannot Create: This merge request already exists: [\"New feature\"]" }, { type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Validation failed: Validate branches Cannot Create: This merge request already exists: [\"New feature\"]" },
{ type: :wiki, errors: "Gitlab::Shell::Error" }, { type: :wiki, errors: "Gitlab::Shell::Error" },
{ type: :release, url: 'https://api.github.com/repos/octocat/Hello-World/releases/2', errors: "Validation failed: Description can't be blank" } { type: :release, url: 'https://api.github.com/repos/octocat/Hello-World/releases/2', errors: "Validation failed: Description can't be blank" }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment