Commit 6ddd1275 authored by Sean McGivern's avatar Sean McGivern

Merge branch 'rc/fix/gh-import-branches-performance' into 'master'

Improve GitHub import performance

Closes #36288, #36292, and #38467

See merge request gitlab-org/gitlab-ce!14445
parents 5725e347 6e48cae2
module RepositoryMirroring
def set_remote_as_mirror(name)
config = raw_repository.rugged.config
IMPORT_HEAD_REFS = '+refs/heads/*:refs/heads/*'.freeze
IMPORT_TAG_REFS = '+refs/tags/*:refs/tags/*'.freeze
def set_remote_as_mirror(name)
# This is used to define repository as equivalent as "git clone --mirror"
config["remote.#{name}.fetch"] = 'refs/*:refs/*'
config["remote.#{name}.mirror"] = true
config["remote.#{name}.prune"] = true
raw_repository.rugged.config["remote.#{name}.fetch"] = 'refs/*:refs/*'
raw_repository.rugged.config["remote.#{name}.mirror"] = true
raw_repository.rugged.config["remote.#{name}.prune"] = true
end
def set_import_remote_as_mirror(remote_name)
# Add first fetch with Rugged so it does not create its own.
raw_repository.rugged.config["remote.#{remote_name}.fetch"] = IMPORT_HEAD_REFS
add_remote_fetch_config(remote_name, IMPORT_TAG_REFS)
raw_repository.rugged.config["remote.#{remote_name}.mirror"] = true
raw_repository.rugged.config["remote.#{remote_name}.prune"] = true
end
def add_remote_fetch_config(remote_name, refspec)
run_git(%W[config --add remote.#{remote_name}.fetch #{refspec}])
end
def fetch_mirror(remote, url)
......
......@@ -415,6 +415,8 @@ class MergeRequest < ActiveRecord::Base
end
def create_merge_request_diff
fetch_ref
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37435
Gitlab::GitalyClient.allow_n_plus_1_calls do
merge_request_diffs.create
......@@ -462,6 +464,7 @@ class MergeRequest < ActiveRecord::Base
return unless open?
old_diff_refs = self.diff_refs
create_merge_request_diff
MergeRequests::MergeRequestDiffCacheService.new.execute(self)
new_diff_refs = self.diff_refs
......
......@@ -55,7 +55,6 @@ class MergeRequestDiff < ActiveRecord::Base
end
def ensure_commit_shas
merge_request.fetch_ref
self.start_commit_sha ||= merge_request.target_branch_sha
self.head_commit_sha ||= merge_request.source_branch_sha
self.base_commit_sha ||= find_base_sha
......
---
title: Improve GitHub import performance
merge_request: 14445
author:
type: other
......@@ -9,7 +9,7 @@ module Github
include Gitlab::ShellAdapter
attr_reader :project, :repository, :repo, :repo_url, :wiki_url,
:options, :errors, :cached, :verbose
:options, :errors, :cached, :verbose, :last_fetched_at
def initialize(project, options = {})
@project = project
......@@ -21,12 +21,13 @@ module Github
@verbose = options.fetch(:verbose, false)
@cached = Hash.new { |hash, key| hash[key] = Hash.new }
@errors = []
@last_fetched_at = nil
end
# rubocop: disable Rails/Output
def execute
puts 'Fetching repository...'.color(:aqua) if verbose
fetch_repository
setup_and_fetch_repository
puts 'Fetching labels...'.color(:aqua) if verbose
fetch_labels
puts 'Fetching milestones...'.color(:aqua) if verbose
......@@ -42,7 +43,7 @@ module Github
puts 'Expiring repository cache...'.color(:aqua) if verbose
expire_repository_cache
true
errors.empty?
rescue Github::RepositoryFetchError
expire_repository_cache
false
......@@ -52,18 +53,24 @@ module Github
private
def fetch_repository
def setup_and_fetch_repository
begin
project.ensure_repository
project.repository.add_remote('github', repo_url)
project.repository.set_remote_as_mirror('github')
project.repository.fetch_remote('github', forced: true)
project.repository.set_import_remote_as_mirror('github')
project.repository.add_remote_fetch_config('github', '+refs/pull/*/head:refs/merge-requests/*/head')
fetch_remote(forced: true)
rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error => e
error(:project, repo_url, e.message)
raise Github::RepositoryFetchError
end
end
def fetch_remote(forced: false)
@last_fetched_at = Time.now
project.repository.fetch_remote('github', forced: forced)
end
def fetch_wiki_repository
return if project.wiki.repository_exists?
......@@ -92,7 +99,7 @@ module Github
label.color = representation.color
end
cached[:label_ids][label.title] = label.id
cached[:label_ids][representation.title] = label.id
rescue => e
error(:label, representation.url, e.message)
end
......@@ -143,7 +150,9 @@ module Github
next unless merge_request.new_record? && pull_request.valid?
begin
pull_request.restore_branches!
# If the PR has been created/updated after we last fetched the
# remote, we fetch again to get the up-to-date refs.
fetch_remote if pull_request.updated_at > last_fetched_at
author_id = user_id(pull_request.author, project.creator_id)
description = format_description(pull_request.description, pull_request.author)
......@@ -152,6 +161,7 @@ module Github
iid: pull_request.iid,
title: pull_request.title,
description: description,
ref_fetched: true,
source_project: pull_request.source_project,
source_branch: pull_request.source_branch_name,
source_branch_sha: pull_request.source_branch_sha,
......@@ -173,8 +183,6 @@ module Github
fetch_comments(merge_request, :review_comment, review_comments_url, LegacyDiffNote)
rescue => e
error(:pull_request, pull_request.url, e.message)
ensure
pull_request.remove_restored_branches!
end
end
......@@ -203,11 +211,11 @@ module Github
# for both features, like manipulating assignees, labels
# and milestones, are provided within the Issues API.
if representation.pull_request?
return unless representation.has_labels? || representation.has_comments?
return unless representation.labels? || representation.comments?
merge_request = MergeRequest.find_by!(target_project_id: project.id, iid: representation.iid)
if representation.has_labels?
if representation.labels?
merge_request.update_attribute(:label_ids, label_ids(representation.labels))
end
......@@ -222,14 +230,16 @@ module Github
issue.title = representation.title
issue.description = format_description(representation.description, representation.author)
issue.state = representation.state
issue.label_ids = label_ids(representation.labels)
issue.milestone_id = milestone_id(representation.milestone)
issue.author_id = author_id
issue.assignee_ids = [user_id(representation.assignee)]
issue.created_at = representation.created_at
issue.updated_at = representation.updated_at
issue.save!(validate: false)
issue.update(
label_ids: label_ids(representation.labels),
assignee_ids: assignee_ids(representation.assignees))
fetch_comments_conditionally(issue, representation)
end
rescue => e
......@@ -238,7 +248,7 @@ module Github
end
def fetch_comments_conditionally(issuable, representation)
if representation.has_comments?
if representation.comments?
comments_url = "/repos/#{repo}/issues/#{issuable.iid}/comments"
fetch_comments(issuable, :comment, comments_url)
end
......@@ -302,7 +312,11 @@ module Github
end
def label_ids(labels)
labels.map { |attrs| cached[:label_ids][attrs.fetch('name')] }.compact
labels.map { |label| cached[:label_ids][label.title] }.compact
end
def assignee_ids(assignees)
assignees.map { |assignee| user_id(assignee) }.compact
end
def milestone_id(milestone)
......
......@@ -7,10 +7,14 @@ module Github
raw.dig('user', 'login') || 'unknown'
end
def repo?
raw['repo'].present?
end
def repo
return @repo if defined?(@repo)
return unless repo?
@repo = Github::Representation::Repo.new(raw['repo']) if raw['repo'].present?
@repo ||= Github::Representation::Repo.new(raw['repo'])
end
def ref
......@@ -25,10 +29,6 @@ module Github
Commit.truncate_sha(sha)
end
def exists?
@exists ||= branch_exists? && commit_exists?
end
def valid?
sha.present? && ref.present?
end
......@@ -47,14 +47,6 @@ module Github
private
def branch_exists?
repository.branch_exists?(ref)
end
def commit_exists?
repository.branch_names_contains(sha).include?(ref)
end
def repository
@repository ||= options.fetch(:repository)
end
......
......@@ -23,14 +23,14 @@ module Github
@author ||= Github::Representation::User.new(raw['user'], options)
end
def assignee
return unless assigned?
@assignee ||= Github::Representation::User.new(raw['assignee'], options)
def labels?
raw['labels'].any?
end
def assigned?
raw['assignee'].present?
def labels
@labels ||= Array(raw['labels']).map do |label|
Github::Representation::Label.new(label, options)
end
end
end
end
......
module Github
module Representation
class Issue < Representation::Issuable
def labels
raw['labels']
end
def state
raw['state'] == 'closed' ? 'closed' : 'opened'
end
def has_comments?
def comments?
raw['comments'] > 0
end
def has_labels?
labels.count > 0
end
def pull_request?
raw['pull_request'].present?
end
def assigned?
raw['assignees'].present?
end
def assignees
@assignees ||= Array(raw['assignees']).map do |user|
Github::Representation::User.new(user, options)
end
end
end
end
end
module Github
module Representation
class PullRequest < Representation::Issuable
delegate :user, :repo, :ref, :sha, to: :source_branch, prefix: true
delegate :user, :exists?, :repo, :ref, :sha, :short_sha, to: :target_branch, prefix: true
delegate :sha, to: :source_branch, prefix: true
delegate :sha, to: :target_branch, prefix: true
def source_project
project
end
def source_branch_name
@source_branch_name ||=
if cross_project? || !source_branch_exists?
source_branch_name_prefixed
else
source_branch_ref
end
end
def source_branch_exists?
return @source_branch_exists if defined?(@source_branch_exists)
@source_branch_exists = !cross_project? && source_branch.exists?
# Mimic the "user:branch" displayed in the MR widget,
# i.e. "Request to merge rymai:add-external-mounts into master"
cross_project? ? "#{source_branch.user}:#{source_branch.ref}" : source_branch.ref
end
def target_project
......@@ -28,11 +19,7 @@ module Github
end
def target_branch_name
@target_branch_name ||= target_branch_exists? ? target_branch_ref : target_branch_name_prefixed
end
def target_branch_exists?
@target_branch_exists ||= target_branch.exists?
target_branch.ref
end
def state
......@@ -50,16 +37,14 @@ module Github
source_branch.valid? && target_branch.valid?
end
def restore_branches!
restore_source_branch!
restore_target_branch!
def assigned?
raw['assignee'].present?
end
def remove_restored_branches!
return if opened?
def assignee
return unless assigned?
remove_source_branch!
remove_target_branch!
@assignee ||= Github::Representation::User.new(raw['assignee'], options)
end
private
......@@ -72,48 +57,14 @@ module Github
@source_branch ||= Representation::Branch.new(raw['head'], repository: project.repository)
end
def source_branch_name_prefixed
"gh-#{target_branch_short_sha}/#{iid}/#{source_branch_user}/#{source_branch_ref}"
end
def target_branch
@target_branch ||= Representation::Branch.new(raw['base'], repository: project.repository)
end
def target_branch_name_prefixed
"gl-#{target_branch_short_sha}/#{iid}/#{target_branch_user}/#{target_branch_ref}"
end
def cross_project?
return true if source_branch_repo.nil?
source_branch_repo.id != target_branch_repo.id
end
def restore_source_branch!
return if source_branch_exists?
source_branch.restore!(source_branch_name)
end
def restore_target_branch!
return if target_branch_exists?
target_branch.restore!(target_branch_name)
end
def remove_source_branch!
# We should remove the source/target branches only if they were
# restored. Otherwise, we'll remove branches like 'master' that
# target_branch_exists? returns true. In other words, we need
# to clean up only the restored branches that (source|target)_branch_exists?
# returns false for the first time it has been called, because of
# this that is important to memoize these values.
source_branch.remove!(source_branch_name) unless source_branch_exists?
end
return true unless source_branch.repo?
def remove_target_branch!
target_branch.remove!(target_branch_name) unless target_branch_exists?
source_branch.repo.id != target_branch.repo.id
end
end
end
......
......@@ -39,13 +39,19 @@ class GithubImport
def import!
@project.force_import_start
import_success = false
timings = Benchmark.measure do
Github::Import.new(@project, @options).execute
import_success = Github::Import.new(@project, @options).execute
end
puts "Import finished. Timings: #{timings}".color(:green)
@project.import_finish
if import_success
@project.import_finish
puts "Import finished. Timings: #{timings}".color(:green)
else
puts "Import was not successful. Errors were as follows:"
puts @project.import_error
end
end
def new_project
......@@ -53,18 +59,23 @@ class GithubImport
namespace_path, _sep, name = @project_path.rpartition('/')
namespace = find_or_create_namespace(namespace_path)
Projects::CreateService.new(
project = Projects::CreateService.new(
@current_user,
name: name,
path: name,
description: @repo['description'],
namespace_id: namespace.id,
visibility_level: visibility_level,
import_type: 'github',
import_source: @repo['full_name'],
import_url: @repo['clone_url'].sub('://', "://#{@options[:token]}@"),
skip_wiki: @repo['has_wiki']
).execute
project.update!(
import_type: 'github',
import_source: @repo['full_name'],
import_url: @repo['clone_url'].sub('://', "://#{@options[:token]}@")
)
project
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment