Commit 6ddd1275 authored by Sean McGivern's avatar Sean McGivern

Merge branch 'rc/fix/gh-import-branches-performance' into 'master'

Improve GitHub import performance

Closes #36288, #36292, and #38467

See merge request gitlab-org/gitlab-ce!14445
parents 5725e347 6e48cae2
module RepositoryMirroring module RepositoryMirroring
def set_remote_as_mirror(name) IMPORT_HEAD_REFS = '+refs/heads/*:refs/heads/*'.freeze
config = raw_repository.rugged.config IMPORT_TAG_REFS = '+refs/tags/*:refs/tags/*'.freeze
def set_remote_as_mirror(name)
# This is used to define repository as equivalent as "git clone --mirror" # This is used to define repository as equivalent as "git clone --mirror"
config["remote.#{name}.fetch"] = 'refs/*:refs/*' raw_repository.rugged.config["remote.#{name}.fetch"] = 'refs/*:refs/*'
config["remote.#{name}.mirror"] = true raw_repository.rugged.config["remote.#{name}.mirror"] = true
config["remote.#{name}.prune"] = true raw_repository.rugged.config["remote.#{name}.prune"] = true
end
def set_import_remote_as_mirror(remote_name)
# Add first fetch with Rugged so it does not create its own.
raw_repository.rugged.config["remote.#{remote_name}.fetch"] = IMPORT_HEAD_REFS
add_remote_fetch_config(remote_name, IMPORT_TAG_REFS)
raw_repository.rugged.config["remote.#{remote_name}.mirror"] = true
raw_repository.rugged.config["remote.#{remote_name}.prune"] = true
end
def add_remote_fetch_config(remote_name, refspec)
run_git(%W[config --add remote.#{remote_name}.fetch #{refspec}])
end end
def fetch_mirror(remote, url) def fetch_mirror(remote, url)
......
...@@ -415,6 +415,8 @@ class MergeRequest < ActiveRecord::Base ...@@ -415,6 +415,8 @@ class MergeRequest < ActiveRecord::Base
end end
def create_merge_request_diff def create_merge_request_diff
fetch_ref
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37435 # n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37435
Gitlab::GitalyClient.allow_n_plus_1_calls do Gitlab::GitalyClient.allow_n_plus_1_calls do
merge_request_diffs.create merge_request_diffs.create
...@@ -462,6 +464,7 @@ class MergeRequest < ActiveRecord::Base ...@@ -462,6 +464,7 @@ class MergeRequest < ActiveRecord::Base
return unless open? return unless open?
old_diff_refs = self.diff_refs old_diff_refs = self.diff_refs
create_merge_request_diff create_merge_request_diff
MergeRequests::MergeRequestDiffCacheService.new.execute(self) MergeRequests::MergeRequestDiffCacheService.new.execute(self)
new_diff_refs = self.diff_refs new_diff_refs = self.diff_refs
......
...@@ -55,7 +55,6 @@ class MergeRequestDiff < ActiveRecord::Base ...@@ -55,7 +55,6 @@ class MergeRequestDiff < ActiveRecord::Base
end end
def ensure_commit_shas def ensure_commit_shas
merge_request.fetch_ref
self.start_commit_sha ||= merge_request.target_branch_sha self.start_commit_sha ||= merge_request.target_branch_sha
self.head_commit_sha ||= merge_request.source_branch_sha self.head_commit_sha ||= merge_request.source_branch_sha
self.base_commit_sha ||= find_base_sha self.base_commit_sha ||= find_base_sha
......
---
title: Improve GitHub import performance
merge_request: 14445
author:
type: other
...@@ -9,7 +9,7 @@ module Github ...@@ -9,7 +9,7 @@ module Github
include Gitlab::ShellAdapter include Gitlab::ShellAdapter
attr_reader :project, :repository, :repo, :repo_url, :wiki_url, attr_reader :project, :repository, :repo, :repo_url, :wiki_url,
:options, :errors, :cached, :verbose :options, :errors, :cached, :verbose, :last_fetched_at
def initialize(project, options = {}) def initialize(project, options = {})
@project = project @project = project
...@@ -21,12 +21,13 @@ module Github ...@@ -21,12 +21,13 @@ module Github
@verbose = options.fetch(:verbose, false) @verbose = options.fetch(:verbose, false)
@cached = Hash.new { |hash, key| hash[key] = Hash.new } @cached = Hash.new { |hash, key| hash[key] = Hash.new }
@errors = [] @errors = []
@last_fetched_at = nil
end end
# rubocop: disable Rails/Output # rubocop: disable Rails/Output
def execute def execute
puts 'Fetching repository...'.color(:aqua) if verbose puts 'Fetching repository...'.color(:aqua) if verbose
fetch_repository setup_and_fetch_repository
puts 'Fetching labels...'.color(:aqua) if verbose puts 'Fetching labels...'.color(:aqua) if verbose
fetch_labels fetch_labels
puts 'Fetching milestones...'.color(:aqua) if verbose puts 'Fetching milestones...'.color(:aqua) if verbose
...@@ -42,7 +43,7 @@ module Github ...@@ -42,7 +43,7 @@ module Github
puts 'Expiring repository cache...'.color(:aqua) if verbose puts 'Expiring repository cache...'.color(:aqua) if verbose
expire_repository_cache expire_repository_cache
true errors.empty?
rescue Github::RepositoryFetchError rescue Github::RepositoryFetchError
expire_repository_cache expire_repository_cache
false false
...@@ -52,18 +53,24 @@ module Github ...@@ -52,18 +53,24 @@ module Github
private private
def fetch_repository def setup_and_fetch_repository
begin begin
project.ensure_repository project.ensure_repository
project.repository.add_remote('github', repo_url) project.repository.add_remote('github', repo_url)
project.repository.set_remote_as_mirror('github') project.repository.set_import_remote_as_mirror('github')
project.repository.fetch_remote('github', forced: true) project.repository.add_remote_fetch_config('github', '+refs/pull/*/head:refs/merge-requests/*/head')
fetch_remote(forced: true)
rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error => e rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error => e
error(:project, repo_url, e.message) error(:project, repo_url, e.message)
raise Github::RepositoryFetchError raise Github::RepositoryFetchError
end end
end end
def fetch_remote(forced: false)
@last_fetched_at = Time.now
project.repository.fetch_remote('github', forced: forced)
end
def fetch_wiki_repository def fetch_wiki_repository
return if project.wiki.repository_exists? return if project.wiki.repository_exists?
...@@ -92,7 +99,7 @@ module Github ...@@ -92,7 +99,7 @@ module Github
label.color = representation.color label.color = representation.color
end end
cached[:label_ids][label.title] = label.id cached[:label_ids][representation.title] = label.id
rescue => e rescue => e
error(:label, representation.url, e.message) error(:label, representation.url, e.message)
end end
...@@ -143,7 +150,9 @@ module Github ...@@ -143,7 +150,9 @@ module Github
next unless merge_request.new_record? && pull_request.valid? next unless merge_request.new_record? && pull_request.valid?
begin begin
pull_request.restore_branches! # If the PR has been created/updated after we last fetched the
# remote, we fetch again to get the up-to-date refs.
fetch_remote if pull_request.updated_at > last_fetched_at
author_id = user_id(pull_request.author, project.creator_id) author_id = user_id(pull_request.author, project.creator_id)
description = format_description(pull_request.description, pull_request.author) description = format_description(pull_request.description, pull_request.author)
...@@ -152,6 +161,7 @@ module Github ...@@ -152,6 +161,7 @@ module Github
iid: pull_request.iid, iid: pull_request.iid,
title: pull_request.title, title: pull_request.title,
description: description, description: description,
ref_fetched: true,
source_project: pull_request.source_project, source_project: pull_request.source_project,
source_branch: pull_request.source_branch_name, source_branch: pull_request.source_branch_name,
source_branch_sha: pull_request.source_branch_sha, source_branch_sha: pull_request.source_branch_sha,
...@@ -173,8 +183,6 @@ module Github ...@@ -173,8 +183,6 @@ module Github
fetch_comments(merge_request, :review_comment, review_comments_url, LegacyDiffNote) fetch_comments(merge_request, :review_comment, review_comments_url, LegacyDiffNote)
rescue => e rescue => e
error(:pull_request, pull_request.url, e.message) error(:pull_request, pull_request.url, e.message)
ensure
pull_request.remove_restored_branches!
end end
end end
...@@ -203,11 +211,11 @@ module Github ...@@ -203,11 +211,11 @@ module Github
# for both features, like manipulating assignees, labels # for both features, like manipulating assignees, labels
# and milestones, are provided within the Issues API. # and milestones, are provided within the Issues API.
if representation.pull_request? if representation.pull_request?
return unless representation.has_labels? || representation.has_comments? return unless representation.labels? || representation.comments?
merge_request = MergeRequest.find_by!(target_project_id: project.id, iid: representation.iid) merge_request = MergeRequest.find_by!(target_project_id: project.id, iid: representation.iid)
if representation.has_labels? if representation.labels?
merge_request.update_attribute(:label_ids, label_ids(representation.labels)) merge_request.update_attribute(:label_ids, label_ids(representation.labels))
end end
...@@ -222,14 +230,16 @@ module Github ...@@ -222,14 +230,16 @@ module Github
issue.title = representation.title issue.title = representation.title
issue.description = format_description(representation.description, representation.author) issue.description = format_description(representation.description, representation.author)
issue.state = representation.state issue.state = representation.state
issue.label_ids = label_ids(representation.labels)
issue.milestone_id = milestone_id(representation.milestone) issue.milestone_id = milestone_id(representation.milestone)
issue.author_id = author_id issue.author_id = author_id
issue.assignee_ids = [user_id(representation.assignee)]
issue.created_at = representation.created_at issue.created_at = representation.created_at
issue.updated_at = representation.updated_at issue.updated_at = representation.updated_at
issue.save!(validate: false) issue.save!(validate: false)
issue.update(
label_ids: label_ids(representation.labels),
assignee_ids: assignee_ids(representation.assignees))
fetch_comments_conditionally(issue, representation) fetch_comments_conditionally(issue, representation)
end end
rescue => e rescue => e
...@@ -238,7 +248,7 @@ module Github ...@@ -238,7 +248,7 @@ module Github
end end
def fetch_comments_conditionally(issuable, representation) def fetch_comments_conditionally(issuable, representation)
if representation.has_comments? if representation.comments?
comments_url = "/repos/#{repo}/issues/#{issuable.iid}/comments" comments_url = "/repos/#{repo}/issues/#{issuable.iid}/comments"
fetch_comments(issuable, :comment, comments_url) fetch_comments(issuable, :comment, comments_url)
end end
...@@ -302,7 +312,11 @@ module Github ...@@ -302,7 +312,11 @@ module Github
end end
def label_ids(labels) def label_ids(labels)
labels.map { |attrs| cached[:label_ids][attrs.fetch('name')] }.compact labels.map { |label| cached[:label_ids][label.title] }.compact
end
def assignee_ids(assignees)
assignees.map { |assignee| user_id(assignee) }.compact
end end
def milestone_id(milestone) def milestone_id(milestone)
......
...@@ -7,10 +7,14 @@ module Github ...@@ -7,10 +7,14 @@ module Github
raw.dig('user', 'login') || 'unknown' raw.dig('user', 'login') || 'unknown'
end end
def repo?
raw['repo'].present?
end
def repo def repo
return @repo if defined?(@repo) return unless repo?
@repo = Github::Representation::Repo.new(raw['repo']) if raw['repo'].present? @repo ||= Github::Representation::Repo.new(raw['repo'])
end end
def ref def ref
...@@ -25,10 +29,6 @@ module Github ...@@ -25,10 +29,6 @@ module Github
Commit.truncate_sha(sha) Commit.truncate_sha(sha)
end end
def exists?
@exists ||= branch_exists? && commit_exists?
end
def valid? def valid?
sha.present? && ref.present? sha.present? && ref.present?
end end
...@@ -47,14 +47,6 @@ module Github ...@@ -47,14 +47,6 @@ module Github
private private
def branch_exists?
repository.branch_exists?(ref)
end
def commit_exists?
repository.branch_names_contains(sha).include?(ref)
end
def repository def repository
@repository ||= options.fetch(:repository) @repository ||= options.fetch(:repository)
end end
......
...@@ -23,14 +23,14 @@ module Github ...@@ -23,14 +23,14 @@ module Github
@author ||= Github::Representation::User.new(raw['user'], options) @author ||= Github::Representation::User.new(raw['user'], options)
end end
def assignee def labels?
return unless assigned? raw['labels'].any?
@assignee ||= Github::Representation::User.new(raw['assignee'], options)
end end
def assigned? def labels
raw['assignee'].present? @labels ||= Array(raw['labels']).map do |label|
Github::Representation::Label.new(label, options)
end
end end
end end
end end
......
module Github module Github
module Representation module Representation
class Issue < Representation::Issuable class Issue < Representation::Issuable
def labels
raw['labels']
end
def state def state
raw['state'] == 'closed' ? 'closed' : 'opened' raw['state'] == 'closed' ? 'closed' : 'opened'
end end
def has_comments? def comments?
raw['comments'] > 0 raw['comments'] > 0
end end
def has_labels?
labels.count > 0
end
def pull_request? def pull_request?
raw['pull_request'].present? raw['pull_request'].present?
end end
def assigned?
raw['assignees'].present?
end
def assignees
@assignees ||= Array(raw['assignees']).map do |user|
Github::Representation::User.new(user, options)
end
end
end end
end end
end end
module Github module Github
module Representation module Representation
class PullRequest < Representation::Issuable class PullRequest < Representation::Issuable
delegate :user, :repo, :ref, :sha, to: :source_branch, prefix: true delegate :sha, to: :source_branch, prefix: true
delegate :user, :exists?, :repo, :ref, :sha, :short_sha, to: :target_branch, prefix: true delegate :sha, to: :target_branch, prefix: true
def source_project def source_project
project project
end end
def source_branch_name def source_branch_name
@source_branch_name ||= # Mimic the "user:branch" displayed in the MR widget,
if cross_project? || !source_branch_exists? # i.e. "Request to merge rymai:add-external-mounts into master"
source_branch_name_prefixed cross_project? ? "#{source_branch.user}:#{source_branch.ref}" : source_branch.ref
else
source_branch_ref
end
end
def source_branch_exists?
return @source_branch_exists if defined?(@source_branch_exists)
@source_branch_exists = !cross_project? && source_branch.exists?
end end
def target_project def target_project
...@@ -28,11 +19,7 @@ module Github ...@@ -28,11 +19,7 @@ module Github
end end
def target_branch_name def target_branch_name
@target_branch_name ||= target_branch_exists? ? target_branch_ref : target_branch_name_prefixed target_branch.ref
end
def target_branch_exists?
@target_branch_exists ||= target_branch.exists?
end end
def state def state
...@@ -50,16 +37,14 @@ module Github ...@@ -50,16 +37,14 @@ module Github
source_branch.valid? && target_branch.valid? source_branch.valid? && target_branch.valid?
end end
def restore_branches! def assigned?
restore_source_branch! raw['assignee'].present?
restore_target_branch!
end end
def remove_restored_branches! def assignee
return if opened? return unless assigned?
remove_source_branch! @assignee ||= Github::Representation::User.new(raw['assignee'], options)
remove_target_branch!
end end
private private
...@@ -72,48 +57,14 @@ module Github ...@@ -72,48 +57,14 @@ module Github
@source_branch ||= Representation::Branch.new(raw['head'], repository: project.repository) @source_branch ||= Representation::Branch.new(raw['head'], repository: project.repository)
end end
def source_branch_name_prefixed
"gh-#{target_branch_short_sha}/#{iid}/#{source_branch_user}/#{source_branch_ref}"
end
def target_branch def target_branch
@target_branch ||= Representation::Branch.new(raw['base'], repository: project.repository) @target_branch ||= Representation::Branch.new(raw['base'], repository: project.repository)
end end
def target_branch_name_prefixed
"gl-#{target_branch_short_sha}/#{iid}/#{target_branch_user}/#{target_branch_ref}"
end
def cross_project? def cross_project?
return true if source_branch_repo.nil? return true unless source_branch.repo?
source_branch_repo.id != target_branch_repo.id
end
def restore_source_branch!
return if source_branch_exists?
source_branch.restore!(source_branch_name)
end
def restore_target_branch!
return if target_branch_exists?
target_branch.restore!(target_branch_name)
end
def remove_source_branch!
# We should remove the source/target branches only if they were
# restored. Otherwise, we'll remove branches like 'master' that
# target_branch_exists? returns true. In other words, we need
# to clean up only the restored branches that (source|target)_branch_exists?
# returns false for the first time it has been called, because of
# this that is important to memoize these values.
source_branch.remove!(source_branch_name) unless source_branch_exists?
end
def remove_target_branch! source_branch.repo.id != target_branch.repo.id
target_branch.remove!(target_branch_name) unless target_branch_exists?
end end
end end
end end
......
...@@ -39,13 +39,19 @@ class GithubImport ...@@ -39,13 +39,19 @@ class GithubImport
def import! def import!
@project.force_import_start @project.force_import_start
import_success = false
timings = Benchmark.measure do timings = Benchmark.measure do
Github::Import.new(@project, @options).execute import_success = Github::Import.new(@project, @options).execute
end end
puts "Import finished. Timings: #{timings}".color(:green) if import_success
@project.import_finish @project.import_finish
puts "Import finished. Timings: #{timings}".color(:green)
else
puts "Import was not successful. Errors were as follows:"
puts @project.import_error
end
end end
def new_project def new_project
...@@ -53,18 +59,23 @@ class GithubImport ...@@ -53,18 +59,23 @@ class GithubImport
namespace_path, _sep, name = @project_path.rpartition('/') namespace_path, _sep, name = @project_path.rpartition('/')
namespace = find_or_create_namespace(namespace_path) namespace = find_or_create_namespace(namespace_path)
Projects::CreateService.new( project = Projects::CreateService.new(
@current_user, @current_user,
name: name, name: name,
path: name, path: name,
description: @repo['description'], description: @repo['description'],
namespace_id: namespace.id, namespace_id: namespace.id,
visibility_level: visibility_level, visibility_level: visibility_level,
import_type: 'github',
import_source: @repo['full_name'],
import_url: @repo['clone_url'].sub('://', "://#{@options[:token]}@"),
skip_wiki: @repo['has_wiki'] skip_wiki: @repo['has_wiki']
).execute ).execute
project.update!(
import_type: 'github',
import_source: @repo['full_name'],
import_url: @repo['clone_url'].sub('://', "://#{@options[:token]}@")
)
project
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment