Port of fix-github-importer-slowness to EE

parent bee9a921
...@@ -3,4 +3,7 @@ module Importable ...@@ -3,4 +3,7 @@ module Importable
attr_accessor :importing attr_accessor :importing
alias_method :importing?, :importing alias_method :importing?, :importing
attr_accessor :imported
alias_method :imported?, :imported
end end
...@@ -14,6 +14,7 @@ module Issuable ...@@ -14,6 +14,7 @@ module Issuable
include Awardable include Awardable
include Taskable include Taskable
include TimeTrackable include TimeTrackable
include Importable
# This object is used to gather issuable meta data for displaying # This object is used to gather issuable meta data for displaying
# upvotes, downvotes, notes and closing merge requests count for issues and merge requests # upvotes, downvotes, notes and closing merge requests count for issues and merge requests
...@@ -102,7 +103,7 @@ module Issuable ...@@ -102,7 +103,7 @@ module Issuable
acts_as_paranoid acts_as_paranoid
after_save :update_assignee_cache_counts, if: :assignee_id_changed? after_save :update_assignee_cache_counts, if: :assignee_id_changed?
after_save :record_metrics after_save :record_metrics, unless: :imported?
def update_assignee_cache_counts def update_assignee_cache_counts
# make sure we flush the cache for both the old *and* new assignees(if they exist) # make sure we flush the cache for both the old *and* new assignees(if they exist)
......
...@@ -11,30 +11,20 @@ module RepositoryMirroring ...@@ -11,30 +11,20 @@ module RepositoryMirroring
gitlab_shell.delete_remote_branches(storage_path, path_with_namespace, remote, branches) gitlab_shell.delete_remote_branches(storage_path, path_with_namespace, remote, branches)
end end
def add_remote(name, url)
raw_repository.remote_add(name, url)
rescue Rugged::ConfigError
raw_repository.remote_update(name, url: url)
end
def remove_remote(name)
raw_repository.remote_delete(name)
true
rescue Rugged::ConfigError
false
end
def set_remote_as_mirror(name) def set_remote_as_mirror(name)
config = raw_repository.rugged.config config = raw_repository.rugged.config
# This is used by Gitlab Geo to define repository as equivalent as "git clone --mirror" # This is used to define repository as equivalent as "git clone --mirror"
config["remote.#{name}.fetch"] = 'refs/*:refs/*' config["remote.#{name}.fetch"] = 'refs/*:refs/*'
config["remote.#{name}.mirror"] = true config["remote.#{name}.mirror"] = true
config["remote.#{name}.prune"] = true config["remote.#{name}.prune"] = true
end end
def fetch_remote(remote, forced: false, no_tags: false) def fetch_mirror(remote, url)
gitlab_shell.fetch_remote(storage_path, path_with_namespace, remote, forced: forced, no_tags: no_tags) add_remote(remote, url)
set_remote_as_mirror(remote)
fetch_remote(remote, forced: true)
remove_remote(remote)
end end
def remote_tags(remote) def remote_tags(remote)
......
...@@ -4,7 +4,6 @@ class MergeRequest < ActiveRecord::Base ...@@ -4,7 +4,6 @@ class MergeRequest < ActiveRecord::Base
include Referable include Referable
include Sortable include Sortable
include Elastic::MergeRequestsSearch include Elastic::MergeRequestsSearch
include Importable
include Approvable include Approvable
belongs_to :target_project, class_name: "Project" belongs_to :target_project, class_name: "Project"
......
...@@ -655,6 +655,10 @@ class Project < ActiveRecord::Base ...@@ -655,6 +655,10 @@ class Project < ActiveRecord::Base
import_type == 'gitea' import_type == 'gitea'
end end
def github_import?
import_type == 'github'
end
def check_limit def check_limit
unless creator.can_create_project? || namespace.kind == 'group' unless creator.can_create_project? || namespace.kind == 'group'
projects_limit = creator.projects_limit projects_limit = creator.projects_limit
......
...@@ -72,7 +72,7 @@ class Repository ...@@ -72,7 +72,7 @@ class Repository
# Return absolute path to repository # Return absolute path to repository
def path_to_repo def path_to_repo
@path_to_repo ||= File.expand_path( @path_to_repo ||= File.expand_path(
File.join(@project.repository_storage_path, path_with_namespace + ".git") File.join(repository_storage_path, path_with_namespace + ".git")
) )
end end
...@@ -409,10 +409,6 @@ class Repository ...@@ -409,10 +409,6 @@ class Repository
expire_tags_cache expire_tags_cache
end end
def before_import
expire_content_cache
end
# Runs code after the HEAD of a repository is changed. # Runs code after the HEAD of a repository is changed.
def after_change_head def after_change_head
expire_method_caches(METHOD_CACHES_FOR_FILE_TYPES.keys) expire_method_caches(METHOD_CACHES_FOR_FILE_TYPES.keys)
...@@ -1111,6 +1107,23 @@ class Repository ...@@ -1111,6 +1107,23 @@ class Repository
rugged.references.delete(tmp_ref) if tmp_ref rugged.references.delete(tmp_ref) if tmp_ref
end end
def add_remote(name, url)
raw_repository.remote_add(name, url)
rescue Rugged::ConfigError
raw_repository.remote_update(name, url: url)
end
def remove_remote(name)
raw_repository.remote_delete(name)
true
rescue Rugged::ConfigError
false
end
def fetch_remote(remote, forced: false, no_tags: false)
gitlab_shell.fetch_remote(repository_storage_path, path_with_namespace, remote, forced: forced, no_tags: no_tags)
end
def fetch_ref(source_path, source_ref, target_ref) def fetch_ref(source_path, source_ref, target_ref)
args = %W(#{Gitlab.config.git.bin_path} fetch --no-tags -f #{source_path} #{source_ref}:#{target_ref}) args = %W(#{Gitlab.config.git.bin_path} fetch --no-tags -f #{source_path} #{source_ref}:#{target_ref})
Gitlab::Popen.popen(args, path_to_repo) Gitlab::Popen.popen(args, path_to_repo)
...@@ -1234,4 +1247,8 @@ class Repository ...@@ -1234,4 +1247,8 @@ class Repository
def repository_event(event, tags = {}) def repository_event(event, tags = {})
Gitlab::Metrics.add_event(event, { path: path_with_namespace }.merge(tags)) Gitlab::Metrics.add_event(event, { path: path_with_namespace }.merge(tags))
end end
def repository_storage_path
@project.repository_storage_path
end
end end
...@@ -11,7 +11,7 @@ module Projects ...@@ -11,7 +11,7 @@ module Projects
success success
rescue => e rescue => e
error(e.message) error("Error importing repository #{project.import_url} into #{project.path_with_namespace} - #{e.message}")
end end
private private
...@@ -32,23 +32,40 @@ module Projects ...@@ -32,23 +32,40 @@ module Projects
end end
def import_repository def import_repository
raise Error, 'Blocked import URL.' if Gitlab::UrlBlocker.blocked_url?(project.import_url)
begin begin
raise Error, "Blocked import URL." if Gitlab::UrlBlocker.blocked_url?(project.import_url) if project.github_import? || project.gitea_import?
gitlab_shell.import_repository(project.repository_storage_path, project.path_with_namespace, project.import_url) fetch_repository
rescue => e else
clone_repository
end
rescue Gitlab::Shell::Error => e
# Expire cache to prevent scenarios such as: # Expire cache to prevent scenarios such as:
# 1. First import failed, but the repo was imported successfully, so +exists?+ returns true # 1. First import failed, but the repo was imported successfully, so +exists?+ returns true
# 2. Retried import, repo is broken or not imported but +exists?+ still returns true # 2. Retried import, repo is broken or not imported but +exists?+ still returns true
project.repository.before_import if project.repository_exists? project.repository.expire_content_cache if project.repository_exists?
raise Error, "Error importing repository #{project.import_url} into #{project.path_with_namespace} - #{e.message}" raise Error, e.message
end
end end
def clone_repository
gitlab_shell.import_repository(project.repository_storage_path, project.path_with_namespace, project.import_url)
end
def fetch_repository
project.create_repository
project.repository.add_remote(project.import_type, project.import_url)
project.repository.set_remote_as_mirror(project.import_type)
project.repository.fetch_remote(project.import_type, forced: true)
project.repository.remove_remote(project.import_type)
end end
def import_data def import_data
return unless has_importer? return unless has_importer?
project.repository.before_import unless project.gitlab_project_import? project.repository.expire_content_cache unless project.gitlab_project_import?
unless importer.execute unless importer.execute
raise Error, 'The remote data could not be imported.' raise Error, 'The remote data could not be imported.'
......
class RepositoryImportWorker class RepositoryImportWorker
include Sidekiq::Worker include Sidekiq::Worker
include Gitlab::ShellAdapter
include DedicatedSidekiqQueue include DedicatedSidekiqQueue
attr_accessor :project, :current_user attr_accessor :project, :current_user
......
---
title: Improve performance of GitHub importer for large repositories.
merge_request: 10273
author:
...@@ -157,7 +157,7 @@ module Gitlab ...@@ -157,7 +157,7 @@ module Gitlab
end end
def restore_source_branch(pull_request) def restore_source_branch(pull_request)
project.repository.fetch_ref(repo_url, "pull/#{pull_request.number}/head", pull_request.source_branch_name) project.repository.create_branch(pull_request.source_branch_name, pull_request.source_branch_sha)
end end
def restore_target_branch(pull_request) def restore_target_branch(pull_request)
......
...@@ -20,7 +20,8 @@ module Gitlab ...@@ -20,7 +20,8 @@ module Gitlab
author_id: author_id, author_id: author_id,
assignee_id: assignee_id, assignee_id: assignee_id,
created_at: raw_data.created_at, created_at: raw_data.created_at,
updated_at: raw_data.updated_at updated_at: raw_data.updated_at,
imported: true
} }
end end
......
...@@ -121,12 +121,13 @@ module Gitlab ...@@ -121,12 +121,13 @@ module Gitlab
# name - project path with namespace # name - project path with namespace
# remote - remote name # remote - remote name
# forced - should we use --force flag? # forced - should we use --force flag?
# no_tags - should we use --no-tags flag?
# #
# Ex. # Ex.
# fetch_remote("gitlab/gitlab-ci", "upstream") # fetch_remote("gitlab/gitlab-ci", "upstream")
# #
def fetch_remote(storage, name, remote, forced: false, no_tags: false) def fetch_remote(storage, name, remote, forced: false, no_tags: false)
args = [gitlab_shell_projects_path, 'fetch-remote', storage, "#{name}.git", remote, '600'] args = [gitlab_shell_projects_path, 'fetch-remote', storage, "#{name}.git", remote, '800']
args << '--force' if forced args << '--force' if forced
args << '--no-tags' if no_tags args << '--no-tags' if no_tags
......
...@@ -73,14 +73,14 @@ describe Gitlab::Shell, lib: true do ...@@ -73,14 +73,14 @@ describe Gitlab::Shell, lib: true do
describe '#fetch_remote' do describe '#fetch_remote' do
it 'executes the command' do it 'executes the command' do
expect(Gitlab::Popen).to receive(:popen) expect(Gitlab::Popen).to receive(:popen)
.with([projects_path, 'fetch-remote', 'current/storage', 'project/path.git', 'new/storage', '600']).and_return([nil, 0]) .with([projects_path, 'fetch-remote', 'current/storage', 'project/path.git', 'new/storage', '800']).and_return([nil, 0])
expect(gitlab_shell.fetch_remote('current/storage', 'project/path', 'new/storage')).to be true expect(gitlab_shell.fetch_remote('current/storage', 'project/path', 'new/storage')).to be true
end end
it 'fails to execute the command' do it 'fails to execute the command' do
expect(Gitlab::Popen).to receive(:popen) expect(Gitlab::Popen).to receive(:popen)
.with([projects_path, 'fetch-remote', 'current/storage', 'project/path.git', 'new/storage', '600']).and_return(["error", 1]) .with([projects_path, 'fetch-remote', 'current/storage', 'project/path.git', 'new/storage', '800']).and_return(["error", 1])
expect { gitlab_shell.fetch_remote('current/storage', 'project/path', 'new/storage') }.to raise_error(Gitlab::Shell::Error, "error") expect { gitlab_shell.fetch_remote('current/storage', 'project/path', 'new/storage') }.to raise_error(Gitlab::Shell::Error, "error")
end end
......
...@@ -61,7 +61,8 @@ describe Gitlab::GithubImport::PullRequestFormatter, lib: true do ...@@ -61,7 +61,8 @@ describe Gitlab::GithubImport::PullRequestFormatter, lib: true do
author_id: project.creator_id, author_id: project.creator_id,
assignee_id: nil, assignee_id: nil,
created_at: created_at, created_at: created_at,
updated_at: updated_at updated_at: updated_at,
imported: true
} }
expect(pull_request.attributes).to eq(expected) expect(pull_request.attributes).to eq(expected)
...@@ -87,7 +88,8 @@ describe Gitlab::GithubImport::PullRequestFormatter, lib: true do ...@@ -87,7 +88,8 @@ describe Gitlab::GithubImport::PullRequestFormatter, lib: true do
author_id: project.creator_id, author_id: project.creator_id,
assignee_id: nil, assignee_id: nil,
created_at: created_at, created_at: created_at,
updated_at: updated_at updated_at: updated_at,
imported: true
} }
expect(pull_request.attributes).to eq(expected) expect(pull_request.attributes).to eq(expected)
...@@ -114,7 +116,8 @@ describe Gitlab::GithubImport::PullRequestFormatter, lib: true do ...@@ -114,7 +116,8 @@ describe Gitlab::GithubImport::PullRequestFormatter, lib: true do
author_id: project.creator_id, author_id: project.creator_id,
assignee_id: nil, assignee_id: nil,
created_at: created_at, created_at: created_at,
updated_at: updated_at updated_at: updated_at,
imported: true
} }
expect(pull_request.attributes).to eq(expected) expect(pull_request.attributes).to eq(expected)
......
...@@ -1319,14 +1319,6 @@ describe Repository, models: true do ...@@ -1319,14 +1319,6 @@ describe Repository, models: true do
end end
end end
describe '#before_import' do
it 'flushes the repository caches' do
expect(repository).to receive(:expire_content_cache)
repository.before_import
end
end
describe '#after_import' do describe '#after_import' do
it 'flushes and builds the cache' do it 'flushes and builds the cache' do
expect(repository).to receive(:expire_content_cache) expect(repository).to receive(:expire_content_cache)
......
...@@ -26,17 +26,45 @@ describe Projects::ImportService, services: true do ...@@ -26,17 +26,45 @@ describe Projects::ImportService, services: true do
result = subject.execute result = subject.execute
expect(result[:status]).to eq :error expect(result[:status]).to eq :error
expect(result[:message]).to eq 'The repository could not be created.' expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - The repository could not be created."
end end
end end
context 'with known url' do context 'with known url' do
before do before do
project.import_url = 'https://github.com/vim/vim.git' project.import_url = 'https://github.com/vim/vim.git'
project.import_type = 'github'
end
context 'with a Github repository' do
it 'succeeds if repository import is successfully' do
expect_any_instance_of(Repository).to receive(:fetch_remote).and_return(true)
expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true)
result = subject.execute
expect(result[:status]).to eq :success
end
it 'fails if repository import fails' do
expect_any_instance_of(Repository).to receive(:fetch_remote).and_raise(Gitlab::Shell::Error.new('Failed to import the repository'))
result = subject.execute
expect(result[:status]).to eq :error
expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - Failed to import the repository"
end
end
context 'with a non Github repository' do
before do
project.import_url = 'https://bitbucket.org/vim/vim.git'
project.import_type = 'bitbucket'
end end
it 'succeeds if repository import is successfully' do it 'succeeds if repository import is successfully' do
expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_return(true) expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).and_return(true)
expect_any_instance_of(Gitlab::BitbucketImport::Importer).to receive(:execute).and_return(true)
result = subject.execute result = subject.execute
...@@ -44,7 +72,7 @@ describe Projects::ImportService, services: true do ...@@ -44,7 +72,7 @@ describe Projects::ImportService, services: true do
end end
it 'fails if repository import fails' do it 'fails if repository import fails' do
expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_raise(Gitlab::Shell::Error.new('Failed to import the repository')) expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).and_raise(Gitlab::Shell::Error.new('Failed to import the repository'))
result = subject.execute result = subject.execute
...@@ -52,6 +80,7 @@ describe Projects::ImportService, services: true do ...@@ -52,6 +80,7 @@ describe Projects::ImportService, services: true do
expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - Failed to import the repository" expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - Failed to import the repository"
end end
end end
end
context 'with valid importer' do context 'with valid importer' do
before do before do
...@@ -64,8 +93,8 @@ describe Projects::ImportService, services: true do ...@@ -64,8 +93,8 @@ describe Projects::ImportService, services: true do
end end
it 'succeeds if importer succeeds' do it 'succeeds if importer succeeds' do
expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_return(true) allow_any_instance_of(Repository).to receive(:fetch_remote).and_return(true)
expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true) allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true)
result = subject.execute result = subject.execute
...@@ -73,48 +102,42 @@ describe Projects::ImportService, services: true do ...@@ -73,48 +102,42 @@ describe Projects::ImportService, services: true do
end end
it 'flushes various caches' do it 'flushes various caches' do
expect_any_instance_of(Gitlab::Shell).to receive(:import_repository). allow_any_instance_of(Repository).to receive(:fetch_remote).
with(project.repository_storage_path, project.path_with_namespace, project.import_url).
and_return(true) and_return(true)
expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute). allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).
and_return(true) and_return(true)
expect_any_instance_of(Repository).to receive(:expire_emptiness_caches). expect_any_instance_of(Repository).to receive(:expire_content_cache)
and_call_original
expect_any_instance_of(Repository).to receive(:expire_exists_cache).
and_call_original
subject.execute subject.execute
end end
it 'fails if importer fails' do it 'fails if importer fails' do
expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_return(true) allow_any_instance_of(Repository).to receive(:fetch_remote).and_return(true)
expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(false) allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(false)
result = subject.execute result = subject.execute
expect(result[:status]).to eq :error expect(result[:status]).to eq :error
expect(result[:message]).to eq 'The remote data could not be imported.' expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - The remote data could not be imported."
end end
it 'fails if importer raise an error' do it 'fails if importer raise an error' do
expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_return(true) allow_any_instance_of(Gitlab::Shell).to receive(:fetch_remote).and_return(true)
expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_raise(Projects::ImportService::Error.new('Github: failed to connect API')) allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_raise(Projects::ImportService::Error.new('Github: failed to connect API'))
result = subject.execute result = subject.execute
expect(result[:status]).to eq :error expect(result[:status]).to eq :error
expect(result[:message]).to eq 'Github: failed to connect API' expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - Github: failed to connect API"
end end
it 'expires existence cache after error' do it 'expires content cache after error' do
allow_any_instance_of(Project).to receive(:repository_exists?).and_return(false, true) allow_any_instance_of(Project).to receive(:repository_exists?).and_return(false, true)
expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_raise(Gitlab::Shell::Error.new('Failed to import the repository')) expect_any_instance_of(Gitlab::Shell).to receive(:fetch_remote).and_raise(Gitlab::Shell::Error.new('Failed to import the repository'))
expect_any_instance_of(Repository).to receive(:expire_emptiness_caches).and_call_original expect_any_instance_of(Repository).to receive(:expire_content_cache)
expect_any_instance_of(Repository).to receive(:expire_exists_cache).and_call_original
subject.execute subject.execute
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment