Commit dd3d4221 authored by Robert Speicher's avatar Robert Speicher

Merge branch 'github' into 'master'

Improve GitHub importer

Closes #27429

See merge request !12886
parents 4c8b6668 38704e42
......@@ -34,8 +34,12 @@ module Projects
def import_repository
raise Error, 'Blocked import URL.' if Gitlab::UrlBlocker.blocked_url?(project.import_url)
# We should return early for a GitHub import because the new GitHub
# importer fetch the project repositories for us.
return if project.github_import?
begin
if project.github_import? || project.gitea_import?
if project.gitea_import?
fetch_repository
else
clone_repository
......@@ -55,7 +59,7 @@ module Projects
end
def fetch_repository
project.create_repository
project.ensure_repository
project.repository.add_remote(project.import_type, project.import_url)
project.repository.set_remote_as_mirror(project.import_type)
project.repository.fetch_remote(project.import_type, forced: true)
......
---
title: Reduce memory usage of the GitHub importer
merge_request: 12886
author:
module Github
class Client
TIMEOUT = 60
attr_reader :connection, :rate_limit
def initialize(options)
@connection = Faraday.new(url: options.fetch(:url)) do |faraday|
faraday.options.open_timeout = options.fetch(:timeout, 60)
faraday.options.timeout = options.fetch(:timeout, 60)
@connection = Faraday.new(url: options.fetch(:url, root_endpoint)) do |faraday|
faraday.options.open_timeout = options.fetch(:timeout, TIMEOUT)
faraday.options.timeout = options.fetch(:timeout, TIMEOUT)
faraday.authorization 'token', options.fetch(:token)
faraday.adapter :net_http
faraday.ssl.verify = verify_ssl
end
@rate_limit = RateLimit.new(connection)
......@@ -19,5 +22,32 @@ module Github
Github::Response.new(connection.get(url, query))
end
private
def root_endpoint
custom_endpoint || github_endpoint
end
def custom_endpoint
github_omniauth_provider.dig('args', 'client_options', 'site')
end
def verify_ssl
# If there is no config, we're connecting to github.com
# and we should verify ssl.
github_omniauth_provider.fetch('verify_ssl', true)
end
def github_endpoint
OmniAuth::Strategies::GitHub.default_options[:client_options][:site]
end
def github_omniauth_provider
@github_omniauth_provider ||=
Gitlab.config.omniauth.providers
.find { |provider| provider.name == 'github' }
.to_h
end
end
end
......@@ -41,13 +41,16 @@ module Github
self.reset_callbacks :validate
end
attr_reader :project, :repository, :repo, :options, :errors, :cached, :verbose
attr_reader :project, :repository, :repo, :repo_url, :wiki_url,
:options, :errors, :cached, :verbose
def initialize(project, options)
def initialize(project, options = {})
@project = project
@repository = project.repository
@repo = project.import_source
@options = options
@repo_url = project.import_url
@wiki_url = project.import_url.sub(/\.git\z/, '.wiki.git')
@options = options.reverse_merge(token: project.import_data&.credentials&.fetch(:user))
@verbose = options.fetch(:verbose, false)
@cached = Hash.new { |hash, key| hash[key] = Hash.new }
@errors = []
......@@ -65,6 +68,8 @@ module Github
fetch_pull_requests
puts 'Fetching issues...'.color(:aqua) if verbose
fetch_issues
puts 'Fetching releases...'.color(:aqua) if verbose
fetch_releases
puts 'Cloning wiki repository...'.color(:aqua) if verbose
fetch_wiki_repository
puts 'Expiring repository cache...'.color(:aqua) if verbose
......@@ -72,6 +77,7 @@ module Github
true
rescue Github::RepositoryFetchError
expire_repository_cache
false
ensure
keep_track_of_errors
......@@ -81,23 +87,21 @@ module Github
def fetch_repository
begin
project.create_repository unless project.repository.exists?
project.repository.add_remote('github', "https://#{options.fetch(:token)}@github.com/#{repo}.git")
project.ensure_repository
project.repository.add_remote('github', repo_url)
project.repository.set_remote_as_mirror('github')
project.repository.fetch_remote('github', forced: true)
rescue Gitlab::Shell::Error => e
error(:project, "https://github.com/#{repo}.git", e.message)
rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error => e
error(:project, repo_url, e.message)
raise Github::RepositoryFetchError
end
end
def fetch_wiki_repository
wiki_url = "https://#{options.fetch(:token)}@github.com/#{repo}.wiki.git"
wiki_path = "#{project.full_path}.wiki"
return if project.wiki.repository_exists?
unless project.wiki.repository_exists?
gitlab_shell.import_repository(project.repository_storage_path, wiki_path, wiki_url)
end
wiki_path = "#{project.disk_path}.wiki"
gitlab_shell.import_repository(project.repository_storage_path, wiki_path, wiki_url)
rescue Gitlab::Shell::Error => e
# GitHub error message when the wiki repo has not been created,
# this means that repo has wiki enabled, but have no pages. So,
......@@ -309,7 +313,7 @@ module Github
next unless representation.valid?
release = ::Release.find_or_initialize_by(project_id: project.id, tag: representation.tag)
next unless relese.new_record?
next unless release.new_record?
begin
release.description = representation.description
......@@ -337,7 +341,7 @@ module Github
def user_id(user, fallback_id = nil)
return unless user.present?
return cached[:user_ids][user.id] if cached[:user_ids].key?(user.id)
return cached[:user_ids][user.id] if cached[:user_ids][user.id].present?
gitlab_user_id = user_id_by_external_uid(user.id) || user_id_by_email(user.email)
......@@ -367,7 +371,7 @@ module Github
end
def expire_repository_cache
repository.expire_content_cache
repository.expire_content_cache if project.repository_exists?
end
def keep_track_of_errors
......
......@@ -8,7 +8,7 @@ module Gitlab
ImportSource = Struct.new(:name, :title, :importer)
ImportTable = [
ImportSource.new('github', 'GitHub', Gitlab::GithubImport::Importer),
ImportSource.new('github', 'GitHub', Github::Import),
ImportSource.new('bitbucket', 'Bitbucket', Gitlab::BitbucketImport::Importer),
ImportSource.new('gitlab', 'GitLab.com', Gitlab::GitlabImport::Importer),
ImportSource.new('google_code', 'Google Code', Gitlab::GoogleCodeImport::Importer),
......
......@@ -7,7 +7,7 @@ class GithubImport
end
def initialize(token, gitlab_username, project_path, extras)
@options = { url: 'https://api.github.com', token: token, verbose: true }
@options = { token: token, verbose: true }
@project_path = project_path
@current_user = User.find_by_username(gitlab_username)
@github_repo = extras.empty? ? nil : extras.first
......@@ -62,6 +62,7 @@ class GithubImport
visibility_level: visibility_level,
import_type: 'github',
import_source: @repo['full_name'],
import_url: @repo['clone_url'].sub('://', "://#{@options[:token]}@"),
skip_wiki: @repo['has_wiki']
).execute
end
......
......@@ -56,7 +56,7 @@ describe Gitlab::ImportSources do
describe '.importer' do
import_sources = {
'github' => Gitlab::GithubImport::Importer,
'github' => Github::Import,
'bitbucket' => Gitlab::BitbucketImport::Importer,
'gitlab' => Gitlab::GitlabImport::Importer,
'google_code' => Gitlab::GoogleCodeImport::Importer,
......
......@@ -38,8 +38,7 @@ describe Projects::ImportService do
context 'with a Github repository' do
it 'succeeds if repository import is successfully' do
expect_any_instance_of(Repository).to receive(:fetch_remote).and_return(true)
expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true)
expect_any_instance_of(Github::Import).to receive(:execute).and_return(true)
result = subject.execute
......@@ -52,16 +51,7 @@ describe Projects::ImportService do
result = subject.execute
expect(result[:status]).to eq :error
expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.full_path} - Failed to import the repository"
end
it 'does not remove the GitHub remote' do
expect_any_instance_of(Repository).to receive(:fetch_remote).and_return(true)
expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true)
subject.execute
expect(project.repository.raw_repository.remote_names).to include('github')
expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - The remote data could not be imported."
end
end
......@@ -102,8 +92,7 @@ describe Projects::ImportService do
end
it 'succeeds if importer succeeds' do
allow_any_instance_of(Repository).to receive(:fetch_remote).and_return(true)
allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true)
allow_any_instance_of(Github::Import).to receive(:execute).and_return(true)
result = subject.execute
......@@ -111,10 +100,7 @@ describe Projects::ImportService do
end
it 'flushes various caches' do
allow_any_instance_of(Repository).to receive(:fetch_remote)
.and_return(true)
allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute)
allow_any_instance_of(Github::Import).to receive(:execute)
.and_return(true)
expect_any_instance_of(Repository).to receive(:expire_content_cache)
......@@ -123,8 +109,7 @@ describe Projects::ImportService do
end
it 'fails if importer fails' do
allow_any_instance_of(Repository).to receive(:fetch_remote).and_return(true)
allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(false)
allow_any_instance_of(Github::Import).to receive(:execute).and_return(false)
result = subject.execute
......@@ -133,8 +118,7 @@ describe Projects::ImportService do
end
it 'fails if importer raise an error' do
allow_any_instance_of(Gitlab::Shell).to receive(:fetch_remote).and_return(true)
allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_raise(Projects::ImportService::Error.new('Github: failed to connect API'))
allow_any_instance_of(Github::Import).to receive(:execute).and_raise(Projects::ImportService::Error.new('Github: failed to connect API'))
result = subject.execute
......@@ -143,9 +127,9 @@ describe Projects::ImportService do
end
it 'expires content cache after error' do
allow_any_instance_of(Project).to receive(:repository_exists?).and_return(false, true)
allow_any_instance_of(Project).to receive(:repository_exists?).and_return(false)
expect_any_instance_of(Gitlab::Shell).to receive(:fetch_remote).and_raise(Gitlab::Shell::Error.new('Failed to import the repository'))
expect_any_instance_of(Repository).to receive(:fetch_remote).and_raise(Gitlab::Shell::Error.new)
expect_any_instance_of(Repository).to receive(:expire_content_cache)
subject.execute
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment