Commit 2b53d520 authored by Qingyu Zhao's avatar Qingyu Zhao

Introduce ndjson reader for project import

- two relation readers co-exist: ndjson reader and legacy reader
- adds ndjson fixtures for project import spec
- ndjson reader is guarded by feature flag `project_import_ndjson`
   - by default, the ndjson reader is disabled
- when ndjson reader enabled(by `project_import_ndjson`):
   - it checks export archive format to choose the right relation reader
   - it prefers ndjson reader over legacy reader if the ndjson tree exists
parent 54fdfdea
......@@ -20,6 +20,7 @@ module Gitlab
def restore
@group_attributes = relation_reader.consume_attributes(nil)
@group_members = relation_reader.consume_relation(nil, 'members')
.map(&:first)
# We need to remove `name` and `path` as we did consume it in previous pass
@group_attributes.delete('name')
......
......@@ -53,6 +53,7 @@ module Gitlab
def initialize(relation_names:, allowed_path:)
@relation_names = relation_names.map(&:to_s)
@consumed_relations = Set.new
# This is legacy reader, to be used in transition
# period before `.ndjson`,
......@@ -81,17 +82,19 @@ module Gitlab
raise ArgumentError, "Invalid #{importable_name} passed to `consume_relation`. Use #{@allowed_path} instead."
end
value = relations.delete(key)
Enumerator.new do |documents|
next unless @consumed_relations.add?("#{importable_path}/#{key}")
return value unless block_given?
return if value.nil?
value = relations.delete(key)
next if value.nil?
if value.is_a?(Array)
value.each.with_index do |item, idx|
yield(item, idx)
documents << [item, idx]
end
else
yield(value, 0)
documents << [value, 0]
end
end
end
......
# frozen_string_literal: true
module Gitlab
module ImportExport
module JSON
class NdjsonReader
MAX_JSON_DOCUMENT_SIZE = 50.megabytes
attr_reader :dir_path
def initialize(dir_path)
@dir_path = dir_path
@consumed_relations = Set.new
end
def exist?
Dir.exist?(@dir_path)
end
# This can be removed once legacy_reader is deprecated.
def legacy?
false
end
def consume_attributes(importable_path)
# This reads from `tree/project.json`
path = file_path("#{importable_path}.json")
data = File.read(path, MAX_JSON_DOCUMENT_SIZE)
json_decode(data)
end
def consume_relation(importable_path, key)
Enumerator.new do |documents|
next unless @consumed_relations.add?("#{importable_path}/#{key}")
# This reads from `tree/project/merge_requests.ndjson`
path = file_path(importable_path, "#{key}.ndjson")
next unless File.exist?(path)
File.foreach(path, MAX_JSON_DOCUMENT_SIZE).with_index do |line, line_num|
documents << [json_decode(line), line_num]
end
end
end
private
def json_decode(string)
ActiveSupport::JSON.decode(string)
rescue ActiveSupport::JSON.parse_error => e
Gitlab::ErrorTracking.log_exception(e)
raise Gitlab::ImportExport::Error, 'Incorrect JSON format'
end
def file_path(*path)
File.join(dir_path, *path)
end
end
end
end
end
......@@ -17,8 +17,13 @@ module Gitlab
end
def restore
unless relation_reader
raise Gitlab::ImportExport::Error, 'invalid import format'
end
@project_attributes = relation_reader.consume_attributes(importable_path)
@project_members = relation_reader.consume_relation(importable_path, 'project_members')
.map(&:first)
if relation_tree_restorer.restore
import_failure_service.with_retry(action: 'set_latest_merge_request_diff_ids!') do
......@@ -38,13 +43,26 @@ module Gitlab
def relation_reader
strong_memoize(:relation_reader) do
[ndjson_relation_reader, legacy_relation_reader]
.compact.find(&:exist?)
end
end
def ndjson_relation_reader
return unless Feature.enabled?(:project_import_ndjson, project.namespace)
ImportExport::JSON::NdjsonReader.new(
File.join(shared.export_path, 'tree')
)
end
def legacy_relation_reader
ImportExport::JSON::LegacyReader::File.new(
File.join(shared.export_path, 'project.json'),
relation_names: reader.project_relation_names,
allowed_path: importable_path
)
end
end
def relation_tree_restorer
@relation_tree_restorer ||= RelationTreeRestorer.new(
......
......@@ -67,7 +67,7 @@ module Gitlab
end
def process_relation!(relation_key, relation_definition)
@relation_reader.consume_relation(@importable_path, relation_key) do |data_hash, relation_index|
@relation_reader.consume_relation(@importable_path, relation_key).each do |data_hash, relation_index|
process_relation_item!(relation_key, relation_definition, relation_index, data_hash)
end
end
......
......@@ -186,5 +186,23 @@
}
],
"snippets": [],
"hooks": []
"hooks": [],
"custom_attributes": [
{
"id": 201,
"project_id": 5,
"created_at": "2016-06-14T15:01:51.315Z",
"updated_at": "2016-06-14T15:01:51.315Z",
"key": "color",
"value": "red"
},
{
"id": 202,
"project_id": 5,
"created_at": "2016-06-14T15:01:51.315Z",
"updated_at": "2016-06-14T15:01:51.315Z",
"key": "size",
"value": "small"
}
]
}
......@@ -15,7 +15,6 @@ RSpec.shared_examples 'import/export json legacy reader' do
subject { legacy_reader.consume_attributes("project") }
context 'no excluded attributes' do
let(:excluded_attributes) { [] }
let(:relation_names) { [] }
it 'returns the whole tree from parsed JSON' do
......@@ -42,60 +41,53 @@ RSpec.shared_examples 'import/export json legacy reader' do
describe '#consume_relation' do
context 'when valid path is passed' do
let(:key) { 'description' }
let(:key) { 'labels' }
context 'block not given' do
it 'returns value of the key' do
expect(legacy_reader).to receive(:relations).and_return({ key => 'test value' })
expect(legacy_reader.consume_relation("project", key)).to eq('test value')
end
subject { legacy_reader.consume_relation("project", key) }
context 'key has not been consumed' do
it 'returns an Enumerator' do
expect(subject).to be_an_instance_of(Enumerator)
end
context 'key has been consumed' do
context 'value is nil' do
before do
legacy_reader.consume_relation("project", key)
expect(legacy_reader).to receive(:relations).and_return({ key => nil })
end
it 'does not yield' do
expect do |blk|
legacy_reader.consume_relation("project", key, &blk)
end.not_to yield_control
it 'yields nothing to the Enumerator' do
expect(subject.to_a).to eq([])
end
end
context 'value is nil' do
context 'value is an array' do
before do
expect(legacy_reader).to receive(:relations).and_return({ key => nil })
expect(legacy_reader).to receive(:relations).and_return({ key => %w[label1 label2] })
end
it 'does not yield' do
expect do |blk|
legacy_reader.consume_relation("project", key, &blk)
end.not_to yield_control
it 'yields every relation value to the Enumerator' do
expect(subject.to_a).to eq([['label1', 0], ['label2', 1]])
end
end
context 'value is not array' do
before do
expect(legacy_reader).to receive(:relations).and_return({ key => 'value' })
expect(legacy_reader).to receive(:relations).and_return({ key => 'non-array value' })
end
it 'yield the value with index 0' do
expect do |blk|
legacy_reader.consume_relation("project", key, &blk)
end.to yield_with_args('value', 0)
it 'yields the value with index 0 to the Enumerator' do
expect(subject.to_a).to eq([['non-array value', 0]])
end
end
end
context 'value is an array' do
context 'key has been consumed' do
before do
expect(legacy_reader).to receive(:relations).and_return({ key => %w[item1 item2 item3] })
legacy_reader.consume_relation("project", key).first
end
it 'yield each array element with index' do
expect do |blk|
legacy_reader.consume_relation("project", key, &blk)
end.to yield_successive_args(['item1', 0], ['item2', 1], ['item3', 2])
it 'yields nothing to the Enumerator' do
expect(subject.to_a).to eq([])
end
end
end
......
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::ImportExport::JSON::NdjsonReader do
include ImportExport::CommonUtil
let(:fixture) { 'spec/fixtures/lib/gitlab/import_export/light/tree' }
let(:root_tree) { JSON.parse(File.read(File.join(fixture, 'project.json'))) }
let(:ndjson_reader) { described_class.new(dir_path) }
let(:importable_path) { 'project' }
before :all do
extract_archive('spec/fixtures/lib/gitlab/import_export/light', 'tree.tar.gz')
end
after :all do
cleanup_artifacts_from_extract_archive('light')
end
describe '#exist?' do
subject { ndjson_reader.exist? }
context 'given valid dir_path' do
let(:dir_path) { fixture }
it { is_expected.to be true }
end
context 'given invalid dir_path' do
let(:dir_path) { 'invalid-dir-path' }
it { is_expected.to be false }
end
end
describe '#legacy?' do
let(:dir_path) { fixture }
subject { ndjson_reader.legacy? }
it { is_expected.to be false }
end
describe '#consume_attributes' do
let(:dir_path) { fixture }
subject { ndjson_reader.consume_attributes(importable_path) }
it 'returns the whole root tree from parsed JSON' do
expect(subject).to eq(root_tree)
end
end
describe '#consume_relation' do
let(:dir_path) { fixture }
subject { ndjson_reader.consume_relation(importable_path, key) }
context 'given any key' do
let(:key) { 'any-key' }
it 'returns an Enumerator' do
expect(subject).to be_an_instance_of(Enumerator)
end
end
context 'key has been consumed' do
let(:key) { 'issues' }
before do
ndjson_reader.consume_relation(importable_path, key).first
end
it 'yields nothing to the Enumerator' do
expect(subject.to_a).to eq([])
end
end
context 'key has not been consumed' do
context 'relation file does not exist' do
let(:key) { 'non-exist-relation-file-name' }
before do
relation_file_path = File.join(dir_path, importable_path, "#{key}.ndjson")
expect(File).to receive(:exist?).with(relation_file_path).and_return(false)
end
it 'yields nothing to the Enumerator' do
expect(subject.to_a).to eq([])
end
end
context 'relation file is empty' do
let(:key) { 'empty' }
it 'yields nothing to the Enumerator' do
expect(subject.to_a).to eq([])
end
end
context 'relation file contains multiple lines' do
let(:key) { 'custom_attributes' }
let(:attr_1) { JSON.parse('{"id":201,"project_id":5,"created_at":"2016-06-14T15:01:51.315Z","updated_at":"2016-06-14T15:01:51.315Z","key":"color","value":"red"}') }
let(:attr_2) { JSON.parse('{"id":202,"project_id":5,"created_at":"2016-06-14T15:01:51.315Z","updated_at":"2016-06-14T15:01:51.315Z","key":"size","value":"small"}') }
it 'yields every relation value to the Enumerator' do
expect(subject.to_a).to eq([[attr_1, 0], [attr_2, 1]])
end
end
end
end
end
......@@ -11,6 +11,7 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
let(:shared) { project.import_export_shared }
RSpec.shared_examples 'project tree restorer work properly' do |reader|
describe 'restore project tree' do
before_all do
# Using an admin for import, so we can check assignment of existing members
......@@ -25,6 +26,7 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
@shared = @project.import_export_shared
setup_import_export_config('complex')
setup_reader(reader)
allow_any_instance_of(Repository).to receive(:fetch_source_branch!).and_return(true)
allow_any_instance_of(Gitlab::Git::Repository).to receive(:branch_exists?).and_return(false)
......@@ -38,6 +40,10 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
end
end
after(:context) do
cleanup_artifacts_from_extract_archive('complex')
end
context 'JSON' do
it 'restores models based on JSON' do
expect(@restored_project_json).to be_truthy
......@@ -82,6 +88,7 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
expect(merge_request_note.note_html).to match(/#{note_content}/)
end
end
context 'merge request system note metadata' do
it 'restores title action for unmark wip' do
......@@ -103,7 +110,6 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
end
end
end
end
it 'creates a valid pipeline note' do
expect(Ci::Pipeline.find_by_sha('sha-notes').notes).not_to be_empty
......@@ -507,7 +513,7 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
allow(shared).to receive(:export_path).and_call_original
expect(project_tree_restorer.restore).to eq(false)
expect(shared.errors).to include('Incorrect JSON format')
expect(shared.errors).to include('invalid import format')
end
end
end
......@@ -521,16 +527,14 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
context 'with a simple project' do
before do
setup_import_export_config('light')
setup_reader(reader)
expect(restored_project_json).to eq(true)
end
it_behaves_like 'restores project successfully',
issues: 1,
labels: 2,
label_with_priorities: 'A project label',
milestones: 1,
first_issue_labels: 1,
services: 1
after do
cleanup_artifacts_from_extract_archive('light')
end
it 'issue system note metadata restored successfully' do
note_content = 'created merge request !1 to address this issue'
......@@ -542,6 +546,20 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
expect(note.system_note_metadata.commit_count).to be_nil
end
context 'when there is an existing build with build token' do
before do
create(:ci_build, token: 'abcd')
end
it_behaves_like 'restores project successfully',
issues: 1,
labels: 2,
label_with_priorities: 'A project label',
milestones: 1,
first_issue_labels: 1,
services: 1
end
context 'when there is an existing build with build token' do
before do
create(:ci_build, token: 'abcd')
......@@ -559,9 +577,15 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
context 'multiple pipelines reference the same external pull request' do
before do
setup_import_export_config('multi_pipeline_ref_one_external_pr')
setup_reader(reader)
expect(restored_project_json).to eq(true)
end
after do
cleanup_artifacts_from_extract_archive('multi_pipeline_ref_one_external_pr')
end
it_behaves_like 'restores project successfully',
issues: 0,
labels: 0,
......@@ -585,11 +609,17 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
before do
setup_import_export_config('light')
setup_reader(reader)
expect(project)
.to receive(:merge_requests)
.and_raise(exception)
end
after do
cleanup_artifacts_from_extract_archive('light')
end
it 'report post import error' do
expect(restored_project_json).to eq(false)
expect(shared.errors).to include('post_import_error')
......@@ -601,6 +631,8 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
before do
setup_import_export_config('light')
setup_reader(reader)
expect(project)
.to receive(:merge_requests)
.and_raise(exception)
......@@ -610,6 +642,10 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
expect(restored_project_json).to eq(true)
end
after do
cleanup_artifacts_from_extract_archive('light')
end
it_behaves_like 'restores project successfully',
issues: 1,
labels: 2,
......@@ -635,6 +671,11 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
context 'when the project has overridden params in import data' do
before do
setup_import_export_config('light')
setup_reader(reader)
end
after do
cleanup_artifacts_from_extract_archive('light')
end
it 'handles string versions of visibility_level' do
......@@ -697,9 +738,15 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
before do
setup_import_export_config('group')
setup_reader(reader)
expect(restored_project_json).to eq(true)
end
after do
cleanup_artifacts_from_extract_archive('group')
end
it_behaves_like 'restores project successfully',
issues: 3,
labels: 2,
......@@ -730,6 +777,11 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
before do
setup_import_export_config('light')
setup_reader(reader)
end
after do
cleanup_artifacts_from_extract_archive('light')
end
it 'does not import any templated services' do
......@@ -776,6 +828,11 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
before do
setup_import_export_config('milestone-iid')
setup_reader(reader)
end
after do
cleanup_artifacts_from_extract_archive('milestone-iid')
end
it 'preserves the project milestone IID' do
......@@ -791,6 +848,11 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
context 'with external authorization classification labels' do
before do
setup_import_export_config('light')
setup_reader(reader)
end
after do
cleanup_artifacts_from_extract_archive('light')
end
it 'converts empty external classification authorization labels to nil' do
......@@ -818,7 +880,8 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
end
before do
allow_any_instance_of(Gitlab::ImportExport::JSON::LegacyReader::File).to receive(:valid?).and_return(true)
allow_any_instance_of(Gitlab::ImportExport::JSON::LegacyReader::File).to receive(:exist?).and_return(true)
allow_any_instance_of(Gitlab::ImportExport::JSON::NdjsonReader).to receive(:exist?).and_return(false)
allow_any_instance_of(Gitlab::ImportExport::JSON::LegacyReader::File).to receive(:tree_hash) { tree_hash }
end
......@@ -901,10 +964,15 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
before do
setup_import_export_config('with_invalid_records')
setup_reader(reader)
subject
end
after do
cleanup_artifacts_from_extract_archive('with_invalid_records')
end
context 'when failures occur because a relation fails to be processed' do
it_behaves_like 'restores project successfully',
issues: 0,
......@@ -928,4 +996,26 @@ describe Gitlab::ImportExport::Project::TreeRestorer do
end
end
end
end
context 'enable ndjson import' do
before_all do
# Test suite `restore project tree` run `project_tree_restorer.restore` in `before_all`.
# `Enable all features by default for testing` happens in `before(:each)`
# So it requires manually enable feature flag to allow ndjson_reader
Feature.enable(:project_import_ndjson)
end
it_behaves_like 'project tree restorer work properly', :legacy_reader
it_behaves_like 'project tree restorer work properly', :ndjson_reader
end
context 'disable ndjson import' do
before do
stub_feature_flags(project_import_ndjson: false)
end
it_behaves_like 'project tree restorer work properly', :legacy_reader
end
end
......@@ -14,7 +14,7 @@ describe Gitlab::ImportExport::RelationTreeRestorer do
let(:user) { create(:user) }
let(:shared) { Gitlab::ImportExport::Shared.new(importable) }
let(:attributes) { {} }
let(:attributes) { relation_reader.consume_attributes(importable_name) }
let(:members_mapper) do
Gitlab::ImportExport::MembersMapper.new(exported_members: {}, user: user, importable: importable)
......@@ -30,7 +30,7 @@ describe Gitlab::ImportExport::RelationTreeRestorer do
relation_factory: relation_factory,
reader: reader,
importable: importable,
importable_path: nil,
importable_path: importable_path,
importable_attributes: attributes
)
end
......@@ -94,21 +94,24 @@ describe Gitlab::ImportExport::RelationTreeRestorer do
end
context 'when restoring a project' do
let(:path) { 'spec/fixtures/lib/gitlab/import_export/complex/project.json' }
let(:importable) { create(:project, :builds_enabled, :issues_disabled, name: 'project', path: 'project') }
let(:importable_name) { 'project' }
let(:importable_path) { 'project' }
let(:object_builder) { Gitlab::ImportExport::Project::ObjectBuilder }
let(:relation_factory) { Gitlab::ImportExport::Project::RelationFactory }
let(:reader) { Gitlab::ImportExport::Reader.new(shared: shared) }
context 'using legacy reader' do
let(:path) { 'spec/fixtures/lib/gitlab/import_export/complex/project.json' }
let(:relation_reader) do
Gitlab::ImportExport::JSON::LegacyReader::File.new(
path,
relation_names: reader.project_relation_names
relation_names: reader.project_relation_names,
allowed_path: 'project'
)
end
let(:attributes) { relation_reader.consume_attributes(nil) }
let(:attributes) { relation_reader.consume_attributes('project') }
it_behaves_like 'import project successfully'
......@@ -118,6 +121,21 @@ describe Gitlab::ImportExport::RelationTreeRestorer do
include_examples 'logging of relations creation'
end
context 'using ndjson reader' do
let(:path) { 'spec/fixtures/lib/gitlab/import_export/complex/tree' }
let(:relation_reader) { Gitlab::ImportExport::JSON::NdjsonReader.new(path) }
before :all do
extract_archive('spec/fixtures/lib/gitlab/import_export/complex', 'tree.tar.gz')
end
after :all do
cleanup_artifacts_from_extract_archive('complex')
end
it_behaves_like 'import project successfully'
end
end
end
......@@ -125,9 +143,16 @@ describe Gitlab::ImportExport::RelationTreeRestorer do
let(:path) { 'spec/fixtures/lib/gitlab/import_export/group_exports/no_children/group.json' }
let(:group) { create(:group) }
let(:importable) { create(:group, parent: group) }
let(:importable_name) { nil }
let(:importable_path) { nil }
let(:object_builder) { Gitlab::ImportExport::Group::ObjectBuilder }
let(:relation_factory) { Gitlab::ImportExport::Group::RelationFactory }
let(:relation_reader) { Gitlab::ImportExport::JSON::LegacyReader::File.new(path, relation_names: reader.group_relation_names) }
let(:relation_reader) do
Gitlab::ImportExport::JSON::LegacyReader::File.new(
path,
relation_names: reader.group_relation_names)
end
let(:reader) do
Gitlab::ImportExport::Reader.new(
shared: shared,
......@@ -135,6 +160,10 @@ describe Gitlab::ImportExport::RelationTreeRestorer do
)
end
it 'restores group tree' do
expect(subject).to eq(true)
end
include_examples 'logging of relations creation'
end
end
......@@ -15,9 +15,39 @@ module ImportExport
export_path = [prefix, 'spec', 'fixtures', 'lib', 'gitlab', 'import_export', name].compact
export_path = File.join(*export_path)
extract_archive(export_path, 'tree.tar.gz')
allow_any_instance_of(Gitlab::ImportExport).to receive(:export_path) { export_path }
end
def extract_archive(path, archive)
if File.exist?(File.join(path, archive))
system("cd #{path}; tar xzvf #{archive} &> /dev/null")
end
end
def cleanup_artifacts_from_extract_archive(name, prefix = nil)
export_path = [prefix, 'spec', 'fixtures', 'lib', 'gitlab', 'import_export', name].compact
export_path = File.join(*export_path)
if File.exist?(File.join(export_path, 'tree.tar.gz'))
system("cd #{export_path}; rm -fr tree &> /dev/null")
end
end
def setup_reader(reader)
case reader
when :legacy_reader
allow_any_instance_of(Gitlab::ImportExport::JSON::LegacyReader::File).to receive(:exist?).and_return(true)
allow_any_instance_of(Gitlab::ImportExport::JSON::NdjsonReader).to receive(:exist?).and_return(false)
when :ndjson_reader
allow_any_instance_of(Gitlab::ImportExport::JSON::LegacyReader::File).to receive(:exist?).and_return(false)
allow_any_instance_of(Gitlab::ImportExport::JSON::NdjsonReader).to receive(:exist?).and_return(true)
else
raise "invalid reader #{reader}. Supported readers: :legacy_reader, :ndjson_reader"
end
end
def fixtures_path
"spec/fixtures/lib/gitlab/import_export"
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment