Commit 2ad867b5 authored by Heinrich Lee Yu's avatar Heinrich Lee Yu

Merge branch '31101-project-export-with-ndjson' into 'master'

Introduce ndjson writer

See merge request gitlab-org/gitlab!26995
parents a7c20785 02b1b8b8
......@@ -3,50 +3,55 @@
require 'spec_helper'
describe Gitlab::ImportExport::Project::TreeSaver do
describe 'saves the project tree into a json object' do
let_it_be(:user) { create(:user) }
let_it_be(:group) { create(:group) }
let_it_be(:project) { create(:project, group: group) }
let_it_be(:issue) { create(:issue, project: project) }
let_it_be(:design) { create(:design, :with_file, versions_count: 2, issue: issue) }
let_it_be(:note) { create(:diff_note_on_design, noteable: design, project: project, author: user) }
let_it_be(:note2) { create(:note, noteable: issue, project: project, author: user) }
let_it_be(:epic) { create(:epic, group: group) }
let_it_be(:epic_issue) { create(:epic_issue, issue: issue, epic: epic) }
let(:shared) { project.import_export_shared }
let(:export_path) { "#{Dir.tmpdir}/project_tree_saver_spec_ee" }
let(:project_tree_saver) { described_class.new(project: project, current_user: user, shared: shared) }
let(:saved_project_json) do
project_tree_saver.save
project_json(project_tree_saver.full_path)
end
let_it_be(:user) { create(:user) }
let_it_be(:group) { create(:group) }
let_it_be(:project) { create(:project, group: group) }
let_it_be(:issue) { create(:issue, project: project) }
let_it_be(:shared) { project.import_export_shared }
before do
project.add_maintainer(user)
end
let_it_be(:design) { create(:design, :with_file, versions_count: 2, issue: issue) }
let_it_be(:note) { create(:diff_note_on_design, noteable: design, project: project, author: user) }
let_it_be(:note2) { create(:note, noteable: issue, project: project, author: user) }
let_it_be(:epic) { create(:epic, group: group) }
let_it_be(:epic_issue) { create(:epic_issue, issue: issue, epic: epic) }
let_it_be(:export_path) { "#{Dir.tmpdir}/project_tree_saver_spec_ee" }
after :all do
FileUtils.rm_rf(export_path)
end
shared_examples 'EE saves project tree successfully' do |ndjson_enabled|
include ::ImportExport::CommonUtil
after do
FileUtils.rm_rf(export_path)
let_it_be(:project_tree_saver) { described_class.new(project: project, current_user: user, shared: shared) }
let_it_be(:full_path) do
if ndjson_enabled
File.join(shared.export_path, 'tree')
else
File.join(shared.export_path, Gitlab::ImportExport.project_filename)
end
end
it 'saves successfully' do
let_it_be(:exportable_path) { 'project' }
before_all do
Feature.enable(:project_export_as_ndjson) if ndjson_enabled
project.add_maintainer(user)
expect(project_tree_saver.save).to be true
end
describe 'the designs json' do
let(:issue_json) { saved_project_json['issues'].first }
let_it_be(:issue_json) { get_json(full_path, exportable_path, :issues, ndjson_enabled).first }
describe 'the designs json' do
it 'saves issue.designs correctly' do
expect(issue_json['designs'].size).to eq(1)
end
it 'saves issue.design_versions correctly' do
actions = issue_json['design_versions'].map do |v|
v['actions']
end.flatten
actions = issue_json['design_versions'].flat_map { |v| v['actions'] }
expect(issue_json['design_versions'].size).to eq(2)
issue_json['design_versions'].each do |version|
......@@ -61,25 +66,29 @@ describe Gitlab::ImportExport::Project::TreeSaver do
context 'epics' do
it 'has epic_issue' do
expect(saved_project_json['issues'].first['epic_issue']).not_to be_empty
expect(saved_project_json['issues'].first['epic_issue']['id']).to eql(epic_issue.id)
expect(issue_json['epic_issue']).not_to be_empty
expect(issue_json['epic_issue']['id']).to eql(epic_issue.id)
end
it 'has epic' do
expect(saved_project_json['issues'].first['epic_issue']['epic']['title']).to eql(epic.title)
expect(issue_json['epic_issue']['epic']['title']).to eql(epic.title)
end
it 'does not have epic_id' do
expect(saved_project_json['issues'].first['epic_issue']['epic_id']).to be_nil
expect(issue_json['epic_issue']['epic_id']).to be_nil
end
it 'does not have issue_id' do
expect(saved_project_json['issues'].first['epic_issue']['issue_id']).to be_nil
expect(issue_json['epic_issue']['issue_id']).to be_nil
end
end
end
def project_json(filename)
::JSON.parse(IO.read(filename))
context 'with JSON' do
it_behaves_like "EE saves project tree successfully", false
end
context 'with NDJSON' do
it_behaves_like "EE saves project tree successfully", true
end
end
# frozen_string_literal: true
module Gitlab
module ImportExport
module JSON
class NdjsonWriter
include Gitlab::ImportExport::CommandLineUtil
def initialize(dir_path)
@dir_path = dir_path
end
def close
end
def write_attributes(exportable_path, hash)
# It will create:
# tree/project.json
with_file("#{exportable_path}.json") do |file|
file.write(hash.to_json)
end
end
def write_relation(exportable_path, relation, value)
# It will create:
# tree/project/ci_cd_setting.ndjson
with_file(exportable_path, "#{relation}.ndjson") do |file|
file.write(value.to_json)
end
end
def write_relation_array(exportable_path, relation, items)
# It will create:
# tree/project/merge_requests.ndjson
with_file(exportable_path, "#{relation}.ndjson") do |file|
items.each do |item|
file.write(item.to_json)
file.write("\n")
end
end
end
private
def with_file(*path)
file_path = File.join(@dir_path, *path)
raise ArgumentError, "The #{file_path} already exist" if File.exist?(file_path)
# ensure that path is created
mkdir_p(File.dirname(file_path))
File.open(file_path, "wb") do |file|
yield(file)
end
end
end
end
end
end
......@@ -11,15 +11,9 @@ module Gitlab
@project = project
@current_user = current_user
@shared = shared
@full_path = File.join(@shared.export_path, ImportExport.project_filename)
end
def save
json_writer = ImportExport::JSON::LegacyWriter.new(
@full_path,
allowed_path: "project"
)
ImportExport::JSON::StreamingSerializer.new(
exportable,
reader.project_tree,
......@@ -57,6 +51,18 @@ module Gitlab
def presenter_class
Projects::ImportExport::ProjectExportPresenter
end
def json_writer
@json_writer ||= begin
if ::Feature.enabled?(:project_export_as_ndjson, @project.namespace)
full_path = File.join(@shared.export_path, 'tree')
Gitlab::ImportExport::JSON::NdjsonWriter.new(full_path)
else
full_path = File.join(@shared.export_path, ImportExport.project_filename)
Gitlab::ImportExport::JSON::LegacyWriter.new(full_path, allowed_path: 'project')
end
end
end
end
end
end
......
......@@ -38,51 +38,109 @@ describe 'Import/Export - project export integration test', :js do
sign_in(user)
end
it 'exports a project successfully', :sidekiq_might_not_need_inline do
visit edit_project_path(project)
shared_examples 'export file without sensitive words' do
it 'exports a project successfully', :sidekiq_inline do
export_project_and_download_file(page, project)
expect(page).to have_content('Export project')
in_directory_with_expanded_export(project) do |exit_status, tmpdir|
expect(exit_status).to eq(0)
find(:link, 'Export project').send_keys(:return)
project_json_path = File.join(tmpdir, 'project.json')
expect(File).to exist(project_json_path)
visit edit_project_path(project)
project_hash = JSON.parse(IO.read(project_json_path))
expect(page).to have_content('Download export')
sensitive_words.each do |sensitive_word|
found = find_sensitive_attributes(sensitive_word, project_hash)
expect(project.export_status).to eq(:finished)
expect(project.export_file.path).to include('tar.gz')
expect(found).to be_nil, failure_message(found.try(:key_found), found.try(:parent), sensitive_word)
end
end
end
end
context "with legacy export" do
before do
stub_feature_flags(streaming_serializer: false)
stub_feature_flags(project_export_as_ndjson: false)
end
it_behaves_like "export file without sensitive words"
end
context "with streaming serializer" do
before do
stub_feature_flags(streaming_serializer: true)
stub_feature_flags(project_export_as_ndjson: false)
end
it_behaves_like "export file without sensitive words"
end
in_directory_with_expanded_export(project) do |exit_status, tmpdir|
expect(exit_status).to eq(0)
context "with ndjson" do
before do
stub_feature_flags(streaming_serializer: true)
stub_feature_flags(project_export_as_ndjson: true)
end
it 'exports a project successfully', :sidekiq_inline do
export_project_and_download_file(page, project)
in_directory_with_expanded_export(project) do |exit_status, tmpdir|
expect(exit_status).to eq(0)
project_json_path = File.join(tmpdir, 'project.json')
expect(File).to exist(project_json_path)
project_json_path = File.join(tmpdir, 'tree', 'project.json')
expect(File).to exist(project_json_path)
project_hash = JSON.parse(IO.read(project_json_path))
relations = []
relations << JSON.parse(IO.read(project_json_path))
Dir.glob(File.join(tmpdir, 'tree/project', '*.ndjson')) do |rb_filename|
File.foreach(rb_filename) do |line|
json = ActiveSupport::JSON.decode(line)
relations << json
end
end
sensitive_words.each do |sensitive_word|
found = find_sensitive_attributes(sensitive_word, project_hash)
relations.each do |relation_hash|
sensitive_words.each do |sensitive_word|
found = find_sensitive_attributes(sensitive_word, relation_hash)
expect(found).to be_nil, failure_message(found.try(:key_found), found.try(:parent), sensitive_word)
expect(found).to be_nil, failure_message(found.try(:key_found), found.try(:parent), sensitive_word)
end
end
end
end
end
end
def failure_message(key_found, parent, sensitive_word)
<<-MSG
Found a new sensitive word <#{key_found}>, which is part of the hash #{parent.inspect}
def export_project_and_download_file(page, project)
visit edit_project_path(project)
If you think this information shouldn't get exported, please exclude the model or attribute in IMPORT_EXPORT_CONFIG.
expect(page).to have_content('Export project')
Otherwise, please add the exception to +safe_list+ in CURRENT_SPEC using #{sensitive_word} as the key and the
correspondent hash or model as the value.
find(:link, 'Export project').send_keys(:return)
Also, if the attribute is a generated unique token, please add it to RelationFactory::TOKEN_RESET_MODELS if it needs to be
reset (to prevent duplicate column problems while importing to the same instance).
visit edit_project_path(project)
IMPORT_EXPORT_CONFIG: #{Gitlab::ImportExport.config_file}
CURRENT_SPEC: #{__FILE__}
MSG
end
expect(page).to have_content('Download export')
expect(project.export_status).to eq(:finished)
expect(project.export_file.path).to include('tar.gz')
end
def failure_message(key_found, parent, sensitive_word)
<<-MSG
Found a new sensitive word <#{key_found}>, which is part of the hash #{parent.inspect}
If you think this information shouldn't get exported, please exclude the model or attribute in IMPORT_EXPORT_CONFIG.
Otherwise, please add the exception to +safe_list+ in CURRENT_SPEC using #{sensitive_word} as the key and the
correspondent hash or model as the value.
Also, if the attribute is a generated unique token, please add it to RelationFactory::TOKEN_RESET_MODELS if it needs to be
reset (to prevent duplicate column problems while importing to the same instance).
IMPORT_EXPORT_CONFIG: #{Gitlab::ImportExport.config_file}
CURRENT_SPEC: #{__FILE__}
MSG
end
end
......@@ -32,6 +32,8 @@ describe 'forked project import' do
end
before do
stub_feature_flags(project_export_as_ndjson: false)
allow_next_instance_of(Gitlab::ImportExport) do |instance|
allow(instance).to receive(:storage_path).and_return(export_path)
end
......
......@@ -20,6 +20,10 @@ describe Gitlab::ImportExport do
let(:json_fixture) { 'complex' }
before do
stub_feature_flags(project_export_as_ndjson: false)
end
it 'yields the initial tree when importing and exporting it again' do
project = create(:project, creator: create(:user, :admin))
......
# frozen_string_literal: true
require "spec_helper"
describe Gitlab::ImportExport::JSON::NdjsonWriter do
include ImportExport::CommonUtil
let(:path) { "#{Dir.tmpdir}/ndjson_writer_spec/tree" }
let(:exportable_path) { 'projects' }
subject { described_class.new(path) }
after do
FileUtils.rm_rf(path)
end
describe "#write_attributes" do
it "writes correct json to root" do
expected_hash = { "key" => "value_1", "key_1" => "value_2" }
subject.write_attributes(exportable_path, expected_hash)
expect(consume_attributes(path, exportable_path)).to eq(expected_hash)
end
end
describe "#write_relation" do
context "when single relation is serialized" do
it "appends json in correct file " do
relation = "relation"
value = { "key" => "value_1", "key_1" => "value_1" }
subject.write_relation(exportable_path, relation, value)
expect(consume_relations(path, exportable_path, relation)).to eq([value])
end
end
context "when single relation is already serialized" do
it "raise exception" do
values = [{ "key" => "value_1", "key_1" => "value_1" }, { "key" => "value_2", "key_1" => "value_2" }]
relation = "relation"
file_path = File.join(path, exportable_path, "#{relation}.ndjson")
subject.write_relation(exportable_path, relation, values[0])
expect {subject.write_relation(exportable_path, relation, values[1])}.to raise_exception("The #{file_path} already exist")
end
end
end
describe "#write_relation_array" do
it "writes json in correct files" do
values = [{ "key" => "value_1", "key_1" => "value_1" }, { "key" => "value_2", "key_1" => "value_2" }]
relations = %w(relation1 relation2)
relations.each do |relation|
subject.write_relation_array(exportable_path, relation, values.to_enum)
end
subject.close
relations.each do |relation|
expect(consume_relations(path, exportable_path, relation)).to eq(values)
end
end
end
end
......@@ -26,6 +26,21 @@ module ImportExport
"tmp/tests/gitlab-test/import_export"
end
def get_json(path, exportable_path, key, ndjson_enabled)
if ndjson_enabled
json = if key == :projects
consume_attributes(path, exportable_path)
else
consume_relations(path, exportable_path, key)
end
else
json = project_json(path)
json = json[key.to_s] unless key == :projects
end
json
end
def restore_then_save_project(project, import_path:, export_path:)
project_restorer = get_project_restorer(project, import_path)
project_saver = get_project_saver(project, export_path)
......@@ -50,5 +65,30 @@ module ImportExport
allow(shared).to receive(:export_path).and_return(path)
end
end
def consume_attributes(dir_path, exportable_path)
path = File.join(dir_path, "#{exportable_path}.json")
return unless File.exist?(path)
ActiveSupport::JSON.decode(IO.read(path))
end
def consume_relations(dir_path, exportable_path, key)
path = File.join(dir_path, exportable_path, "#{key}.ndjson")
return unless File.exist?(path)
relations = []
File.foreach(path) do |line|
json = ActiveSupport::JSON.decode(line)
relations << json
end
key == :project_feature ? relations.first : relations.flatten
end
def project_json(filename)
ActiveSupport::JSON.decode(IO.read(filename))
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment