Commit 499be639 authored by Matthias Kaeppler's avatar Matthias Kaeppler

Move JSON parsing + processing into same class

This moves responsibilities closer to each other.

Also fix the problem with the group being hard-coded.
parent 197eb789
...@@ -2,42 +2,17 @@ ...@@ -2,42 +2,17 @@
module Gitlab module Gitlab
module ImportExport module ImportExport
# this is mostly useful for testing and comparing results between class ProjectTreeLoader
# processed and unprocessed project trees without changing the def load(path, dedup_entries: false)
# structure of the caller tree_hash = ActiveSupport::JSON.decode(IO.read(path))
class IdentityProjectTreeProcessor
def process(tree_hash)
tree_hash
end
end
# optimizes the project tree for memory efficiency by deduplicating entries
class ProjectTreeProcessor
LARGE_PROJECT_FILE_SIZE_BYTES = 500.megabyte
class << self
# some optimizations only yield amortized gains above a certain
# project size, see https://gitlab.com/gitlab-org/gitlab/issues/27070
def new_for_file(project_json_path)
if Feature.enabled?(:dedup_project_import_metadata, Group.find_by_path('gitlab-org')) &&
large_project?(project_json_path)
ProjectTreeProcessor.new
else
IdentityProjectTreeProcessor.new
end
end
private
def large_project?(project_json_path) if dedup_entries
File.size(project_json_path) >= LARGE_PROJECT_FILE_SIZE_BYTES dedup_tree(tree_hash)
else
tree_hash
end end
end end
def process(tree_hash)
dedup_tree(tree_hash)
end
private private
# This function removes duplicate entries from the given tree recursively # This function removes duplicate entries from the given tree recursively
......
...@@ -3,20 +3,21 @@ ...@@ -3,20 +3,21 @@
module Gitlab module Gitlab
module ImportExport module ImportExport
class ProjectTreeRestorer class ProjectTreeRestorer
LARGE_PROJECT_FILE_SIZE_BYTES = 500.megabyte
attr_reader :user attr_reader :user
attr_reader :shared attr_reader :shared
attr_reader :project attr_reader :project
def initialize(user:, shared:, project:, project_tree_processor: nil) def initialize(user:, shared:, project:)
@path = File.join(shared.export_path, 'project.json')
@user = user @user = user
@shared = shared @shared = shared
@project = project @project = project
@project_tree_processor = project_tree_processor || ProjectTreeProcessor.new_for_file(@path) @tree_loader = ProjectTreeLoader.new
end end
def restore def restore
@tree_hash = @project_tree_processor.process(read_tree_hash) @tree_hash = read_tree_hash
@project_members = @tree_hash.delete('project_members') @project_members = @tree_hash.delete('project_members')
RelationRenameService.rename(@tree_hash) RelationRenameService.rename(@tree_hash)
...@@ -35,9 +36,16 @@ module Gitlab ...@@ -35,9 +36,16 @@ module Gitlab
private private
def large_project?(path)
File.size(path) >= LARGE_PROJECT_FILE_SIZE_BYTES
end
def read_tree_hash def read_tree_hash
json = IO.read(@path) path = File.join(@shared.export_path, 'project.json')
ActiveSupport::JSON.decode(json) dedup_entries = large_project?(path) &&
Feature.enabled?(:dedup_project_import_metadata, project.group)
@tree_loader.load(path, dedup_entries: dedup_entries)
rescue => e rescue => e
Rails.logger.error("Import/Export error: #{e.message}") # rubocop:disable Gitlab/RailsLogger Rails.logger.error("Import/Export error: #{e.message}") # rubocop:disable Gitlab/RailsLogger
raise Gitlab::ImportExport::Error.new('Incorrect JSON format') raise Gitlab::ImportExport::Error.new('Incorrect JSON format')
......
{
"simple": 42,
"duped_hash_with_id": {
"id": 0,
"v1": 1
},
"duped_hash_no_id": {
"v1": 1
},
"duped_array": [
"v2"
],
"array": [
{
"duped_hash_with_id": {
"id": 0,
"v1": 1
}
},
{
"duped_array": [
"v2"
]
},
{
"duped_hash_no_id": {
"v1": 1
}
}
],
"nested": {
"duped_hash_with_id": {
"id": 0,
"v1": 1
},
"duped_array": [
"v2"
],
"array": [
"don't touch"
]
}
}
\ No newline at end of file
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::ImportExport::ProjectTreeLoader do
let(:fixture) { 'spec/fixtures/lib/gitlab/import_export/with_duplicates.json' }
let(:project_tree) { JSON.parse(File.read(fixture)) }
context 'without de-duplicating entries' do
let(:parsed_tree) do
subject.load(fixture)
end
it 'parses the JSON into the expected tree' do
expect(parsed_tree).to eq(project_tree)
end
it 'does not de-duplicate entries' do
expect(parsed_tree['duped_hash_with_id']).not_to be(parsed_tree['array'][0]['duped_hash_with_id'])
end
end
context 'with de-duplicating entries' do
let(:parsed_tree) do
subject.load(fixture, dedup_entries: true)
end
it 'parses the JSON into the expected tree' do
expect(parsed_tree).to eq(project_tree)
end
it 'de-duplicates equal values' do
expect(parsed_tree['duped_hash_with_id']).to be(parsed_tree['array'][0]['duped_hash_with_id'])
expect(parsed_tree['duped_hash_with_id']).to be(parsed_tree['nested']['duped_hash_with_id'])
expect(parsed_tree['duped_array']).to be(parsed_tree['array'][1]['duped_array'])
expect(parsed_tree['duped_array']).to be(parsed_tree['nested']['duped_array'])
end
it 'does not de-duplicate hashes without IDs' do
expect(parsed_tree['duped_hash_no_id']).to eq(parsed_tree['array'][2]['duped_hash_no_id'])
expect(parsed_tree['duped_hash_no_id']).not_to be(parsed_tree['array'][2]['duped_hash_no_id'])
end
it 'keeps single entries intact' do
expect(parsed_tree['simple']).to eq(42)
expect(parsed_tree['nested']['array']).to eq(["don't touch"])
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::ImportExport::ProjectTreeProcessor do
let(:tree) do
{
simple: 42,
duped_hash_with_id: { "id": 0, "v1": 1 },
duped_hash_no_id: { "v1": 1 },
duped_array: ["v2"],
array: [
{ duped_hash_with_id: { "id": 0, "v1": 1 } },
{ duped_array: ["v2"] },
{ duped_hash_no_id: { "v1": 1 } }
],
nested: {
duped_hash_with_id: { "id": 0, "v1": 1 },
duped_array: ["v2"],
array: ["don't touch"]
}
}.with_indifferent_access
end
let(:processed_tree) { subject.process(tree) }
it 'returns the processed tree' do
expect(processed_tree).to be(tree)
end
it 'de-duplicates equal values' do
expect(processed_tree[:duped_hash_with_id]).to be(processed_tree[:array][0][:duped_hash_with_id])
expect(processed_tree[:duped_hash_with_id]).to be(processed_tree[:nested][:duped_hash_with_id])
expect(processed_tree[:duped_array]).to be(processed_tree[:array][1][:duped_array])
expect(processed_tree[:duped_array]).to be(processed_tree[:nested][:duped_array])
end
it 'does not de-duplicate hashes without IDs' do
expect(processed_tree[:duped_hash_no_id]).to eq(processed_tree[:array][2][:duped_hash_no_id])
expect(processed_tree[:duped_hash_no_id]).not_to be(processed_tree[:array][2][:duped_hash_no_id])
end
it 'keeps single entries intact' do
expect(processed_tree[:simple]).to eq(42)
expect(processed_tree[:nested][:array]).to eq(["don't touch"])
end
it 'maintains object equality' do
expect { processed_tree }.not_to change { tree }
end
context 'obtaining a suitable processor' do
context 'when the project file is above the size threshold' do
it 'returns an optimizing processor' do
stub_project_file_size(subject.class::LARGE_PROJECT_FILE_SIZE_BYTES)
expect(subject.class.new_for_file('/path/to/project.json')).to(
be_an_instance_of(Gitlab::ImportExport::ProjectTreeProcessor)
)
end
end
context 'when the file is below the size threshold' do
it 'returns a no-op processor' do
stub_project_file_size(subject.class::LARGE_PROJECT_FILE_SIZE_BYTES - 1)
expect(subject.class.new_for_file('/path/to/project.json')).to(
be_an_instance_of(Gitlab::ImportExport::IdentityProjectTreeProcessor)
)
end
end
def stub_project_file_size(size)
allow(File).to receive(:size).and_return(size)
end
end
end
...@@ -29,8 +29,7 @@ describe Gitlab::ImportExport::ProjectTreeRestorer do ...@@ -29,8 +29,7 @@ describe Gitlab::ImportExport::ProjectTreeRestorer do
expect_any_instance_of(Gitlab::Git::Repository).to receive(:create_branch).with('feature', 'DCBA') expect_any_instance_of(Gitlab::Git::Repository).to receive(:create_branch).with('feature', 'DCBA')
allow_any_instance_of(Gitlab::Git::Repository).to receive(:create_branch) allow_any_instance_of(Gitlab::Git::Repository).to receive(:create_branch)
project_tree_restorer = described_class.new(user: @user, shared: @shared, project_tree_restorer = described_class.new(user: @user, shared: @shared, project: @project)
project: @project, project_tree_processor: Gitlab::ImportExport::ProjectTreeProcessor.new)
@restored_project_json = project_tree_restorer.restore @restored_project_json = project_tree_restorer.restore
end end
...@@ -452,8 +451,7 @@ describe Gitlab::ImportExport::ProjectTreeRestorer do ...@@ -452,8 +451,7 @@ describe Gitlab::ImportExport::ProjectTreeRestorer do
let(:user) { create(:user) } let(:user) { create(:user) }
let!(:project) { create(:project, :builds_disabled, :issues_disabled, name: 'project', path: 'project') } let!(:project) { create(:project, :builds_disabled, :issues_disabled, name: 'project', path: 'project') }
let(:project_tree_restorer) do let(:project_tree_restorer) do
described_class.new(user: user, shared: shared, project: project, described_class.new(user: user, shared: shared, project: project)
project_tree_processor: Gitlab::ImportExport::ProjectTreeProcessor.new)
end end
let(:restored_project_json) { project_tree_restorer.restore } let(:restored_project_json) { project_tree_restorer.restore }
...@@ -678,8 +676,7 @@ describe Gitlab::ImportExport::ProjectTreeRestorer do ...@@ -678,8 +676,7 @@ describe Gitlab::ImportExport::ProjectTreeRestorer do
let(:user) { create(:user) } let(:user) { create(:user) }
let(:tree_hash) { { 'visibility_level' => visibility } } let(:tree_hash) { { 'visibility_level' => visibility } }
let(:restorer) do let(:restorer) do
described_class.new(user: user, shared: shared, project: project, described_class.new(user: user, shared: shared, project: project)
project_tree_processor: Gitlab::ImportExport::ProjectTreeProcessor.new)
end end
before do before do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment