Commit 73e95ecf authored by Sean McGivern's avatar Sean McGivern

Merge branch 'large_imports_rake_task' into 'master'

Import large gitlab_project exports via rake task

See merge request gitlab-org/gitlab!20724
parents 7fa60df9 f0d907b2
---
title: Import large gitlab_project exports via rake task
merge_request: 20724
author:
type: added
# frozen_string_literal: true
# Import large project archives
#
# This task:
# 1. Disables ObjectStorage for archive upload
# 2. Performs Sidekiq job synchronously
#
# @example
# bundle exec rake "gitlab:import_export:import[root, root, imported_project, /path/to/file.tar.gz]"
#
require 'sidekiq/testing'
namespace :gitlab do
namespace :import_export do
desc 'EXPERIMENTAL | Import large project archives'
task :import, [:username, :namespace_path, :project_path, :archive_path] => :gitlab_environment do |_t, args|
warn_user_is_not_gitlab
GitlabProjectImport.new(
namespace_path: args.namespace_path,
project_path: args.project_path,
username: args.username,
file_path: args.archive_path
).import
end
end
end
class GitlabProjectImport
def initialize(opts)
@project_path = opts.fetch(:project_path)
@file_path = opts.fetch(:file_path)
@namespace = Namespace.find_by_full_path(opts.fetch(:namespace_path))
@current_user = User.find_by_username(opts.fetch(:username))
end
def import
show_import_start_message
run_isolated_sidekiq_job
show_import_failures_count
if @project&.import_state&.last_error
puts "ERROR: #{@project.import_state.last_error}"
exit 1
elsif @project.errors.any?
puts "ERROR: #{@project.errors.full_messages.join(', ')}"
exit 1
else
puts 'Done!'
end
rescue StandardError => e
puts "Exception: #{e.message}"
puts e.backtrace
exit 1
end
private
# We want to ensure that all Sidekiq jobs are executed
# synchronously as part of that process.
# This ensures that all expensive operations do not escape
# to general Sidekiq clusters/nodes.
def run_isolated_sidekiq_job
Sidekiq::Testing.fake! do
@project = create_project
execute_sidekiq_job
true
end
end
def create_project
# We are disabling ObjectStorage for `import`
# as it is too slow to handle big archives:
# 1. DB transaction timeouts on upload
# 2. Download of archive before unpacking
disable_upload_object_storage do
service = Projects::GitlabProjectsImportService.new(
@current_user,
{
namespace_id: @namespace.id,
path: @project_path,
file: File.open(@file_path)
}
)
service.execute
end
end
def execute_sidekiq_job
Sidekiq::Worker.drain_all
end
def disable_upload_object_storage
overwrite_uploads_setting('background_upload', false) do
overwrite_uploads_setting('direct_upload', false) do
yield
end
end
end
def overwrite_uploads_setting(key, value)
old_value = Settings.uploads.object_store[key]
Settings.uploads.object_store[key] = value
yield
ensure
Settings.uploads.object_store[key] = old_value
end
def full_path
"#{@namespace.full_path}/#{@project_path}"
end
def show_import_start_message
puts "Importing GitLab export: #{@file_path} into GitLab" \
" #{full_path}" \
" as #{@current_user.name}"
end
def show_import_failures_count
return unless @project.import_failures.exists?
puts "Total number of not imported relations: #{@project.import_failures.count}"
end
end
# frozen_string_literal: true
require 'rake_helper'
describe 'gitlab:import_export:import rake task' do
let(:username) { 'root' }
let(:namespace_path) { username }
let!(:user) { create(:user, username: username) }
let(:task_params) { [username, namespace_path, project_name, archive_path] }
let(:project) { Project.find_by_full_path("#{namespace_path}/#{project_name}") }
before do
Rake.application.rake_require('tasks/gitlab/import_export/import')
allow(Settings.uploads.object_store).to receive(:[]=).and_call_original
end
around do |example|
old_direct_upload_setting = Settings.uploads.object_store['direct_upload']
old_background_upload_setting = Settings.uploads.object_store['background_upload']
Settings.uploads.object_store['direct_upload'] = true
Settings.uploads.object_store['background_upload'] = true
example.run
Settings.uploads.object_store['direct_upload'] = old_direct_upload_setting
Settings.uploads.object_store['background_upload'] = old_background_upload_setting
end
subject { run_rake_task('gitlab:import_export:import', task_params) }
context 'when project import is valid' do
let(:project_name) { 'import_rake_test_project' }
let(:archive_path) { 'spec/fixtures/gitlab/import_export/lightweight_project_export.tar.gz' }
it 'performs project import successfully' do
expect { subject }.to output(/Done!/).to_stdout
expect { subject }.not_to raise_error
expect(project.merge_requests.count).to be > 0
expect(project.issues.count).to be > 0
expect(project.milestones.count).to be > 0
expect(project.import_state.status).to eq('finished')
end
it 'disables direct & background upload only during project creation' do
expect_next_instance_of(Projects::GitlabProjectsImportService) do |service|
expect(service).to receive(:execute).and_wrap_original do |m|
expect(Settings.uploads.object_store['background_upload']).to eq(false)
expect(Settings.uploads.object_store['direct_upload']).to eq(false)
m.call
end
end
expect_next_instance_of(GitlabProjectImport) do |importer|
expect(importer).to receive(:execute_sidekiq_job).and_wrap_original do |m|
expect(Settings.uploads.object_store['background_upload']).to eq(true)
expect(Settings.uploads.object_store['direct_upload']).to eq(true)
expect(Settings.uploads.object_store).not_to receive(:[]=).with('backgroud_upload', false)
expect(Settings.uploads.object_store).not_to receive(:[]=).with('direct_upload', false)
m.call
end
end
subject
end
end
context 'when project import is invalid' do
let(:project_name) { 'import_rake_invalid_test_project' }
let(:archive_path) { 'spec/fixtures/gitlab/import_export/corrupted_project_export.tar.gz' }
let(:not_imported_message) { /Total number of not imported relations: 1/ }
let(:error) { /Validation failed: Notes is invalid/ }
context 'when import_graceful_failures feature flag is enabled' do
before do
stub_feature_flags(import_graceful_failures: true)
end
it 'performs project import successfully' do
expect { subject }.to output(not_imported_message).to_stdout
expect { subject }.not_to raise_error
expect(project.merge_requests).to be_empty
expect(project.import_state.last_error).to be_nil
expect(project.import_state.status).to eq('finished')
end
end
context 'when import_graceful_failures feature flag is disabled' do
before do
stub_feature_flags(import_graceful_failures: false)
end
it 'fails project import with an error' do
expect { subject }.to raise_error(SystemExit).and output(error).to_stdout
expect(project.merge_requests).to be_empty
expect(project.import_state.last_error).to match(error)
expect(project.import_state.status).to eq('failed')
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment