Commit 773ba745 authored by Robert Speicher's avatar Robert Speicher

Merge branch '208803-streamin-serializer-and-writer' into 'master'

Resolve "Export via streaming serializer, introduce "Writer" abstraction"

Closes #208803

See merge request gitlab-org/gitlab!26501
parents f8585f97 38f289cf
# frozen_string_literal: true
module Projects
module ImportExport
class ProjectExportPresenter < Gitlab::View::Presenter::Delegated
include ActiveModel::Serializers::JSON
presents :project
def project_members
super + converted_group_members
end
def description
self.respond_to?(:override_description) ? override_description : super
end
private
def converted_group_members
group_members.each do |group_member|
group_member.source_type = 'Project' # Make group members project members of the future import
end
end
# rubocop: disable CodeReuse/ActiveRecord
def group_members
return [] unless current_user.can?(:admin_group, project.group)
# We need `.where.not(user_id: nil)` here otherwise when a group has an
# invitee, it would make the following query return 0 rows since a NULL
# user_id would be present in the subquery
# See http://stackoverflow.com/questions/129077/not-in-clause-and-null-values
non_null_user_ids = project.project_members.where.not(user_id: nil).select(:user_id)
GroupMembersFinder.new(project.group).execute.where.not(user_id: non_null_user_ids)
end
# rubocop: enable CodeReuse/ActiveRecord
end
end
end
...@@ -54,7 +54,16 @@ module Projects ...@@ -54,7 +54,16 @@ module Projects
end end
def project_tree_saver def project_tree_saver
Gitlab::ImportExport::Project::TreeSaver.new(project: project, current_user: current_user, shared: shared, params: params) tree_saver_class.new(project: project, current_user: current_user, shared: shared, params: params)
end
def tree_saver_class
if ::Feature.enabled?(:streaming_serializer, project)
Gitlab::ImportExport::Project::TreeSaver
else
# Once we remove :streaming_serializer feature flag, Project::LegacyTreeSaver should be removed as well
Gitlab::ImportExport::Project::LegacyTreeSaver
end
end end
def uploads_saver def uploads_saver
......
...@@ -185,6 +185,6 @@ describe Gitlab::ImportExport::Group::TreeSaver do ...@@ -185,6 +185,6 @@ describe Gitlab::ImportExport::Group::TreeSaver do
end end
def group_json(filename) def group_json(filename)
JSON.parse(IO.read(filename)) ::JSON.parse(IO.read(filename))
end end
end end
...@@ -80,6 +80,6 @@ describe Gitlab::ImportExport::Project::TreeSaver do ...@@ -80,6 +80,6 @@ describe Gitlab::ImportExport::Project::TreeSaver do
end end
def project_json(filename) def project_json(filename)
JSON.parse(IO.read(filename)) ::JSON.parse(IO.read(filename))
end end
end end
...@@ -49,7 +49,7 @@ module Gitlab ...@@ -49,7 +49,7 @@ module Gitlab
end end
def tree_saver def tree_saver
@tree_saver ||= RelationTreeSaver.new @tree_saver ||= LegacyRelationTreeSaver.new
end end
end end
end end
......
# frozen_string_literal: true
module Gitlab
module ImportExport
module JSON
class LegacyWriter
include Gitlab::ImportExport::CommandLineUtil
attr_reader :path
def initialize(path)
@path = path
@last_array = nil
@keys = Set.new
mkdir_p(File.dirname(@path))
file.write('{}')
end
def close
@file&.close
@file = nil
end
def set(hash)
hash.each do |key, value|
write(key, value)
end
end
def write(key, value)
raise ArgumentError, "key '#{key}' already written" if @keys.include?(key)
# rewind by one byte, to overwrite '}'
file.pos = file.size - 1
file.write(',') if @keys.any?
file.write(key.to_json)
file.write(':')
file.write(value.to_json)
file.write('}')
@keys.add(key)
@last_array = nil
@last_array_count = nil
end
def append(key, value)
unless @last_array == key
write(key, [])
@last_array = key
@last_array_count = 0
end
# rewind by two bytes, to overwrite ']}'
file.pos = file.size - 2
file.write(',') if @last_array_count > 0
file.write(value.to_json)
file.write(']}')
@last_array_count += 1
end
private
def file
@file ||= File.open(@path, "wb")
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module ImportExport
module JSON
class StreamingSerializer
include Gitlab::ImportExport::CommandLineUtil
BATCH_SIZE = 100
class Raw < String
def to_json(*_args)
to_s
end
end
def initialize(exportable, relations_schema, json_writer)
@exportable = exportable
@relations_schema = relations_schema
@json_writer = json_writer
end
def execute
serialize_root
includes.each do |relation_definition|
serialize_relation(relation_definition)
end
end
private
attr_reader :json_writer, :relations_schema, :exportable
def serialize_root
attributes = exportable.as_json(
relations_schema.merge(include: nil, preloads: nil))
json_writer.set(attributes)
end
def serialize_relation(definition)
raise ArgumentError, 'definition needs to be Hash' unless definition.is_a?(Hash)
raise ArgumentError, 'definition needs to have exactly one Hash element' unless definition.one?
key, options = definition.first
record = exportable.public_send(key) # rubocop: disable GitlabSecurity/PublicSend
if record.is_a?(ActiveRecord::Relation)
serialize_many_relations(key, record, options)
else
serialize_single_relation(key, record, options)
end
end
def serialize_many_relations(key, records, options)
key_preloads = preloads&.dig(key)
records = records.preload(key_preloads) if key_preloads
records.find_each(batch_size: BATCH_SIZE) do |record|
json = Raw.new(record.to_json(options))
json_writer.append(key, json)
end
end
def serialize_single_relation(key, record, options)
json = Raw.new(record.to_json(options))
json_writer.write(key, json)
end
def includes
relations_schema[:include]
end
def preloads
relations_schema[:preload]
end
end
end
end
end
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
module Gitlab module Gitlab
module ImportExport module ImportExport
class RelationTreeSaver class LegacyRelationTreeSaver
include Gitlab::ImportExport::CommandLineUtil include Gitlab::ImportExport::CommandLineUtil
def serialize(exportable, relations_tree) def serialize(exportable, relations_tree)
......
# frozen_string_literal: true
module Gitlab
module ImportExport
module Project
class LegacyTreeSaver
attr_reader :full_path
def initialize(project:, current_user:, shared:, params: {})
@params = params
@project = project
@current_user = current_user
@shared = shared
@full_path = File.join(@shared.export_path, ImportExport.project_filename)
end
def save
project_tree = tree_saver.serialize(@project, reader.project_tree)
fix_project_tree(project_tree)
tree_saver.save(project_tree, @shared.export_path, ImportExport.project_filename)
true
rescue => e
@shared.error(e)
false
end
private
# Aware that the resulting hash needs to be pure-hash and
# does not include any AR objects anymore, only objects that run `.to_json`
def fix_project_tree(project_tree)
if @params[:description].present?
project_tree['description'] = @params[:description]
end
project_tree['project_members'] += group_members_array
end
def reader
@reader ||= Gitlab::ImportExport::Reader.new(shared: @shared)
end
def group_members_array
group_members.as_json(reader.group_members_tree).each do |group_member|
group_member['source_type'] = 'Project' # Make group members project members of the future import
end
end
def group_members
return [] unless @current_user.can?(:admin_group, @project.group)
# We need `.where.not(user_id: nil)` here otherwise when a group has an
# invitee, it would make the following query return 0 rows since a NULL
# user_id would be present in the subquery
# See http://stackoverflow.com/questions/129077/not-in-clause-and-null-values
non_null_user_ids = @project.project_members.where.not(user_id: nil).select(:user_id)
GroupMembersFinder.new(@project.group).execute.where.not(user_id: non_null_user_ids)
end
def tree_saver
@tree_saver ||= Gitlab::ImportExport::LegacyRelationTreeSaver.new
end
end
end
end
end
...@@ -15,52 +15,40 @@ module Gitlab ...@@ -15,52 +15,40 @@ module Gitlab
end end
def save def save
project_tree = tree_saver.serialize(@project, reader.project_tree) json_writer = ImportExport::JSON::LegacyWriter.new(@full_path)
fix_project_tree(project_tree)
tree_saver.save(project_tree, @shared.export_path, ImportExport.project_filename) serializer = ImportExport::JSON::StreamingSerializer.new(exportable, reader.project_tree, json_writer)
serializer.execute
true true
rescue => e rescue => e
@shared.error(e) @shared.error(e)
false false
ensure
json_writer&.close
end end
private private
# Aware that the resulting hash needs to be pure-hash and
# does not include any AR objects anymore, only objects that run `.to_json`
def fix_project_tree(project_tree)
if @params[:description].present?
project_tree['description'] = @params[:description]
end
project_tree['project_members'] += group_members_array
end
def reader def reader
@reader ||= Gitlab::ImportExport::Reader.new(shared: @shared) @reader ||= Gitlab::ImportExport::Reader.new(shared: @shared)
end end
def group_members_array def exportable
group_members.as_json(reader.group_members_tree).each do |group_member| @project.present(exportable_params)
group_member['source_type'] = 'Project' # Make group members project members of the future import
end
end end
def group_members def exportable_params
return [] unless @current_user.can?(:admin_group, @project.group) params = {
presenter_class: presenter_class,
# We need `.where.not(user_id: nil)` here otherwise when a group has an current_user: @current_user
# invitee, it would make the following query return 0 rows since a NULL }
# user_id would be present in the subquery params[:override_description] = @params[:description] if @params[:description].present?
# See http://stackoverflow.com/questions/129077/not-in-clause-and-null-values params
non_null_user_ids = @project.project_members.where.not(user_id: nil).select(:user_id)
GroupMembersFinder.new(@project.group).execute.where.not(user_id: non_null_user_ids)
end end
def tree_saver def presenter_class
@tree_saver ||= RelationTreeSaver.new Projects::ImportExport::ProjectExportPresenter
end end
end end
end end
......
...@@ -19,7 +19,6 @@ namespace :gitlab do ...@@ -19,7 +19,6 @@ namespace :gitlab do
if ENV['EXPORT_DEBUG'].present? if ENV['EXPORT_DEBUG'].present?
ActiveRecord::Base.logger = logger ActiveRecord::Base.logger = logger
Gitlab::Metrics::Exporter::SidekiqExporter.instance.start
logger.level = Logger::DEBUG logger.level = Logger::DEBUG
else else
logger.level = Logger::INFO logger.level = Logger::INFO
......
...@@ -23,7 +23,6 @@ namespace :gitlab do ...@@ -23,7 +23,6 @@ namespace :gitlab do
if ENV['IMPORT_DEBUG'].present? if ENV['IMPORT_DEBUG'].present?
ActiveRecord::Base.logger = logger ActiveRecord::Base.logger = logger
Gitlab::Metrics::Exporter::SidekiqExporter.instance.start
logger.level = Logger::DEBUG logger.level = Logger::DEBUG
else else
logger.level = Logger::INFO logger.level = Logger::INFO
......
...@@ -197,6 +197,6 @@ describe Gitlab::ImportExport::Group::TreeSaver do ...@@ -197,6 +197,6 @@ describe Gitlab::ImportExport::Group::TreeSaver do
end end
def group_json(filename) def group_json(filename)
JSON.parse(IO.read(filename)) ::JSON.parse(IO.read(filename))
end end
end end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::ImportExport::JSON::LegacyWriter do
let(:path) { "#{Dir.tmpdir}/legacy_writer_spec/test.json" }
subject { described_class.new(path) }
after do
FileUtils.rm_rf(path)
end
describe "#write" do
context "when key is already written" do
it "raises exception" do
key = "key"
value = "value"
subject.write(key, value)
expect { subject.write(key, "new value") }.to raise_exception("key '#{key}' already written")
end
end
context "when key is not already written" do
context "when multiple key value pairs are stored" do
it "writes correct json" do
expected_hash = { "key" => "value_1", "key_1" => "value_2" }
expected_hash.each do |key, value|
subject.write(key, value)
end
subject.close
expect(saved_json(path)).to eq(expected_hash)
end
end
end
end
describe "#append" do
context "when key is already written" do
it "appends values under a given key" do
key = "key"
values = %w(value_1 value_2)
expected_hash = { key => values }
values.each do |value|
subject.append(key, value)
end
subject.close
expect(saved_json(path)).to eq(expected_hash)
end
end
context "when key is not already written" do
it "writes correct json" do
expected_hash = { "key" => ["value"] }
subject.append("key", "value")
subject.close
expect(saved_json(path)).to eq(expected_hash)
end
end
end
describe "#set" do
it "writes correct json" do
expected_hash = { "key" => "value_1", "key_1" => "value_2" }
subject.set(expected_hash)
subject.close
expect(saved_json(path)).to eq(expected_hash)
end
end
def saved_json(filename)
::JSON.parse(IO.read(filename))
end
end
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
require 'spec_helper' require 'spec_helper'
describe Gitlab::ImportExport::RelationTreeSaver do describe Gitlab::ImportExport::LegacyRelationTreeSaver do
let(:exportable) { create(:group) } let(:exportable) { create(:group) }
let(:relation_tree_saver) { described_class.new } let(:relation_tree_saver) { described_class.new }
let(:tree) { {} } let(:tree) { {} }
......
...@@ -25,57 +25,6 @@ describe Gitlab::ImportExport::Project::TreeSaver do ...@@ -25,57 +25,6 @@ describe Gitlab::ImportExport::Project::TreeSaver do
expect(project_tree_saver.save).to be true expect(project_tree_saver.save).to be true
end end
context ':export_fast_serialize feature flag checks' do
before do
expect(Gitlab::ImportExport::Reader).to receive(:new).with(shared: shared).and_return(reader)
expect(reader).to receive(:project_tree).and_return(project_tree)
end
let(:serializer) { instance_double('Gitlab::ImportExport::FastHashSerializer') }
let(:reader) { instance_double('Gitlab::ImportExport::Reader') }
let(:project_tree) do
{
include: [{ issues: { include: [] } }],
preload: { issues: nil }
}
end
context 'when :export_fast_serialize feature is enabled' do
before do
stub_feature_flags(export_fast_serialize: true)
end
it 'uses FastHashSerializer' do
expect(Gitlab::ImportExport::FastHashSerializer)
.to receive(:new)
.with(project, project_tree)
.and_return(serializer)
expect(serializer).to receive(:execute)
project_tree_saver.save
end
end
context 'when :export_fast_serialize feature is disabled' do
before do
stub_feature_flags(export_fast_serialize: false)
end
it 'is serialized via built-in `as_json`' do
expect(project).to receive(:as_json).with(project_tree)
project_tree_saver.save
end
end
end
# It is mostly duplicated in
# `spec/lib/gitlab/import_export/fast_hash_serializer_spec.rb`
# except:
# context 'with description override' do
# context 'group members' do
# ^ These are specific for the Project::TreeSaver
context 'JSON' do context 'JSON' do
let(:saved_project_json) do let(:saved_project_json) do
project_tree_saver.save project_tree_saver.save
...@@ -392,6 +341,6 @@ describe Gitlab::ImportExport::Project::TreeSaver do ...@@ -392,6 +341,6 @@ describe Gitlab::ImportExport::Project::TreeSaver do
end end
def project_json(filename) def project_json(filename)
JSON.parse(IO.read(filename)) ::JSON.parse(IO.read(filename))
end end
end end
# frozen_string_literal: true
require 'spec_helper'
describe Projects::ImportExport::ProjectExportPresenter do
let_it_be(:group) { create(:group) }
let_it_be(:project) { create(:project, group: group) }
let_it_be(:user) { create(:user) }
subject { described_class.new(project, current_user: user) }
describe '#description' do
context "override_description not provided" do
it "keeps original description" do
expect(subject.description).to eq(project.description)
end
end
context "override_description provided" do
let(:description) { "overridden description" }
subject { described_class.new(project, current_user: user, override_description: description) }
it "overrides description" do
expect(subject.description).to eq(description)
end
end
end
describe '#as_json' do
context "override_description not provided" do
it "keeps original description" do
expect(subject.as_json["description"]).to eq(project.description)
end
end
context "override_description provided" do
let(:description) { "overridden description" }
subject { described_class.new(project, current_user: user, override_description: description) }
it "overrides description" do
expect(subject.as_json["description"]).to eq(description)
end
end
end
describe '#project_members' do
let(:user2) { create(:user, email: 'group@member.com') }
let(:member_emails) do
subject.project_members.map do |pm|
pm.user.email
end
end
before do
group.add_developer(user2)
end
it 'does not export group members if it has no permission' do
group.add_developer(user)
expect(member_emails).not_to include('group@member.com')
end
it 'does not export group members as maintainer' do
group.add_maintainer(user)
expect(member_emails).not_to include('group@member.com')
end
it 'exports group members as group owner' do
group.add_owner(user)
expect(member_emails).to include('group@member.com')
end
context 'as admin' do
let(:user) { create(:admin) }
it 'exports group members as admin' do
expect(member_emails).to include('group@member.com')
end
it 'exports group members as project members' do
member_types = subject.project_members.map { |pm| pm.source_type }
expect(member_types).to all(eq('Project'))
end
end
end
end
...@@ -26,11 +26,29 @@ describe Projects::ImportExport::ExportService do ...@@ -26,11 +26,29 @@ describe Projects::ImportExport::ExportService do
service.execute service.execute
end end
context 'when :streaming_serializer feature is enabled' do
before do
stub_feature_flags(streaming_serializer: true)
end
it 'saves the models' do it 'saves the models' do
expect(Gitlab::ImportExport::Project::TreeSaver).to receive(:new).and_call_original expect(Gitlab::ImportExport::Project::TreeSaver).to receive(:new).and_call_original
service.execute service.execute
end end
end
context 'when :streaming_serializer feature is disabled' do
before do
stub_feature_flags(streaming_serializer: false)
end
it 'saves the models' do
expect(Gitlab::ImportExport::Project::LegacyTreeSaver).to receive(:new).and_call_original
service.execute
end
end
it 'saves the uploads' do it 'saves the uploads' do
expect(Gitlab::ImportExport::UploadsSaver).to receive(:new).and_call_original expect(Gitlab::ImportExport::UploadsSaver).to receive(:new).and_call_original
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment