Commit b83971dd authored by Stan Hu's avatar Stan Hu

Merge branch 'georgekoltsov/poc-import-update-object-persistance' into 'master'

Update Import process to persist object subrelations in separate db transactions

See merge request gitlab-org/gitlab!79963
parents 9ec1872a 489555c9
---
name: import_relation_object_persistence
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/79963
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/354497
milestone: '14.9'
type: development
group: group::import
default_enabled: false
# frozen_string_literal: true
# RelationObjectSaver allows for an alternative approach to persisting
# objects during Project/Group Import which persists object's
# nested collection subrelations separately, in batches.
#
# Instead of the regular `relation_object.save!` that opens one db
# transaction for the object itself and all of its subrelations we
# separate collection subrelations from the object and save them
# in batches in smaller more frequent db transactions.
module Gitlab
module ImportExport
module Base
class RelationObjectSaver
include Gitlab::Utils::StrongMemoize
BATCH_SIZE = 100
MIN_RECORDS_SIZE = 5
# @param relation_object [Object] Object of a project/group, e.g. an issue
# @param relation_key [String] Name of the object association to group/project, e.g. :issues
# @param relation_definition [Hash] Object subrelations as defined in import_export.yml
# @param importable [Project|Group] Project or group where relation object is getting saved to
#
# @example
# Gitlab::ImportExport::Base::RelationObjectSaver.new(
# relation_key: 'merge_requests',
# relation_object: #<MergeRequest id: root/mrs!1, notes: [#<Note id: nil, note: 'test', ...>, #<Note id: nil, noteL 'another note'>]>,
# relation_definition: {"metrics"=>{}, "award_emoji"=>{}, "notes"=>{"author"=>{}, ... }}
# importable: @importable
# ).execute
def initialize(relation_object:, relation_key:, relation_definition:, importable:)
@relation_object = relation_object
@relation_key = relation_key
@relation_definition = relation_definition
@importable = importable
@invalid_subrelations = []
end
def execute
move_subrelations
relation_object.save!
save_subrelations
ensure
log_invalid_subrelations
end
private
attr_reader :relation_object, :relation_key, :relation_definition,
:importable, :collection_subrelations, :invalid_subrelations
# rubocop:disable GitlabSecurity/PublicSend
def save_subrelations
collection_subrelations.each_pair do |relation_name, records|
records.each_slice(BATCH_SIZE) do |batch|
valid_records, invalid_records = batch.partition { |record| record.valid? }
invalid_subrelations << invalid_records
relation_object.public_send(relation_name) << valid_records
end
end
end
def move_subrelations
strong_memoize(:collection_subrelations) do
relation_definition.each_key.each_with_object({}) do |definition, collection_subrelations|
subrelation = relation_object.public_send(definition)
association = relation_object.class.reflect_on_association(definition)
if association&.collection? && subrelation.size > MIN_RECORDS_SIZE
collection_subrelations[definition] = subrelation.records
subrelation.clear
end
end
end
end
# rubocop:enable GitlabSecurity/PublicSend
def log_invalid_subrelations
invalid_subrelations.flatten.each do |record|
Gitlab::Import::Logger.info(
message: '[Project/Group Import] Invalid subrelation',
importable_column_name => importable.id,
relation_key: relation_key,
error_messages: record.errors.full_messages.to_sentence
)
ImportFailure.create(
source: 'RelationObjectSaver#save!',
relation_key: relation_key,
exception_class: 'RecordInvalid',
exception_message: record.errors.full_messages.to_sentence,
correlation_id_value: Labkit::Correlation::CorrelationId.current_or_new_id,
importable_column_name => importable.id
)
end
end
def importable_column_name
@column_name ||= importable.class.reflect_on_association(:import_failures).foreign_key.to_sym
end
end
end
end
end
......@@ -79,10 +79,7 @@ module Gitlab
relation_object.assign_attributes(importable_class_sym => @importable)
import_failure_service.with_retry(action: 'relation_object.save!', relation_key: relation_key, relation_index: relation_index) do
relation_object.save!
log_relation_creation(@importable, relation_key, relation_object)
end
save_relation_object(relation_object, relation_key, relation_definition, relation_index)
rescue StandardError => e
import_failure_service.log_import_failure(
source: 'process_relation_item!',
......@@ -91,6 +88,23 @@ module Gitlab
exception: e)
end
def save_relation_object(relation_object, relation_key, relation_definition, relation_index)
if Feature.enabled?(:import_relation_object_persistence, default_enabled: :yaml) && relation_object.new_record?
Gitlab::ImportExport::Base::RelationObjectSaver.new(
relation_object: relation_object,
relation_key: relation_key,
relation_definition: relation_definition,
importable: @importable
).execute
else
import_failure_service.with_retry(action: 'relation_object.save!', relation_key: relation_key, relation_index: relation_index) do
relation_object.save!
end
end
log_relation_creation(@importable, relation_key, relation_object)
end
def import_failure_service
@import_failure_service ||= ImportFailureService.new(@importable)
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::ImportExport::Base::RelationObjectSaver do
let(:project) { create(:project) }
let(:relation_object) { build(:issue, project: project) }
let(:relation_definition) { {} }
let(:importable) { project }
let(:relation_key) { 'issues' }
subject(:saver) do
described_class.new(
relation_object: relation_object,
relation_key: relation_key,
relation_definition: relation_definition,
importable: importable
)
end
describe '#save' do
before do
expect(relation_object).to receive(:save!).and_call_original
end
it 'saves relation object' do
expect { saver.execute }.to change(project.issues, :count).by(1)
end
context 'when subrelation is present' do
let(:notes) { build_list(:note, 6, project: project, importing: true) }
let(:relation_object) { build(:issue, project: project, notes: notes) }
let(:relation_definition) { { 'notes' => {} } }
it 'saves relation object with subrelations' do
expect(relation_object.notes).to receive(:<<).and_call_original
saver.execute
issue = project.issues.last
expect(issue.notes.count).to eq(6)
end
end
context 'when subrelation is not a collection' do
let(:sentry_issue) { build(:sentry_issue, importing: true) }
let(:relation_object) { build(:issue, project: project, sentry_issue: sentry_issue) }
let(:relation_definition) { { 'sentry_issue' => {} } }
it 'saves subrelation as part of the relation object itself' do
expect(relation_object.notes).not_to receive(:<<)
saver.execute
issue = project.issues.last
expect(issue.sentry_issue.persisted?).to eq(true)
end
end
context 'when subrelation collection count is small' do
let(:notes) { build_list(:note, 2, project: project, importing: true) }
let(:relation_object) { build(:issue, project: project, notes: notes) }
let(:relation_definition) { { 'notes' => {} } }
it 'saves subrelation as part of the relation object itself' do
expect(relation_object.notes).not_to receive(:<<)
saver.execute
issue = project.issues.last
expect(issue.notes.count).to eq(2)
end
end
context 'when some subrelations are invalid' do
let(:notes) { build_list(:note, 5, project: project, importing: true) }
let(:invalid_note) { build(:note) }
let(:relation_object) { build(:issue, project: project, notes: notes + [invalid_note]) }
let(:relation_definition) { { 'notes' => {} } }
it 'saves valid subrelations and logs invalid subrelation' do
expect(relation_object.notes).to receive(:<<).and_call_original
expect(Gitlab::Import::Logger)
.to receive(:info)
.with(
message: '[Project/Group Import] Invalid subrelation',
project_id: project.id,
relation_key: 'issues',
error_messages: "Noteable can't be blank and Project does not match noteable project"
)
saver.execute
issue = project.issues.last
import_failure = project.import_failures.last
expect(issue.notes.count).to eq(5)
expect(import_failure.source).to eq('RelationObjectSaver#save!')
expect(import_failure.exception_message).to eq("Noteable can't be blank and Project does not match noteable project")
end
context 'when importable is group' do
let(:relation_key) { 'labels' }
let(:relation_definition) { { 'priorities' => {} } }
let(:importable) { create(:group) }
let(:valid_priorities) { build_list(:label_priority, 5, importing: true) }
let(:invalid_priority) { build(:label_priority, priority: -1) }
let(:relation_object) { build(:group_label, group: importable, title: 'test', priorities: valid_priorities + [invalid_priority]) }
it 'logs invalid subrelation for a group' do
expect(Gitlab::Import::Logger)
.to receive(:info)
.with(
message: '[Project/Group Import] Invalid subrelation',
group_id: importable.id,
relation_key: 'labels',
error_messages: 'Priority must be greater than or equal to 0'
)
saver.execute
label = importable.labels.last
import_failure = importable.import_failures.last
expect(label.priorities.count).to eq(5)
expect(import_failure.source).to eq('RelationObjectSaver#save!')
expect(import_failure.exception_message).to eq('Priority must be greater than or equal to 0')
end
end
end
end
end
......@@ -5,116 +5,117 @@ require 'spec_helper'
RSpec.describe Gitlab::ImportExport::Group::TreeRestorer do
include ImportExport::CommonUtil
describe 'restore group tree' do
before_all do
# Using an admin for import, so we can check assignment of existing members
user = create(:admin, email: 'root@gitlabexample.com')
create(:user, email: 'adriene.mcclure@gitlabexample.com')
create(:user, email: 'gwendolyn_robel@gitlabexample.com')
shared_examples 'group restoration' do
describe 'restore group tree' do
before_all do
# Using an admin for import, so we can check assignment of existing members
user = create(:admin, email: 'root@gitlabexample.com')
create(:user, email: 'adriene.mcclure@gitlabexample.com')
create(:user, email: 'gwendolyn_robel@gitlabexample.com')
RSpec::Mocks.with_temporary_scope do
@group = create(:group, name: 'group', path: 'group')
@shared = Gitlab::ImportExport::Shared.new(@group)
RSpec::Mocks.with_temporary_scope do
@group = create(:group, name: 'group', path: 'group')
@shared = Gitlab::ImportExport::Shared.new(@group)
setup_import_export_config('group_exports/complex')
setup_import_export_config('group_exports/complex')
group_tree_restorer = described_class.new(user: user, shared: @shared, group: @group)
group_tree_restorer = described_class.new(user: user, shared: @shared, group: @group)
expect(group_tree_restorer.restore).to be_truthy
expect(group_tree_restorer.groups_mapping).not_to be_empty
expect(group_tree_restorer.restore).to be_truthy
expect(group_tree_restorer.groups_mapping).not_to be_empty
end
end
end
it 'has the group description' do
expect(Group.find_by_path('group').description).to eq('Group Description')
end
it 'has group labels' do
expect(@group.labels.count).to eq(10)
end
it 'has the group description' do
expect(Group.find_by_path('group').description).to eq('Group Description')
end
context 'issue boards' do
it 'has issue boards' do
expect(@group.boards.count).to eq(1)
it 'has group labels' do
expect(@group.labels.count).to eq(10)
end
it 'has board label lists' do
lists = @group.boards.find_by(name: 'first board').lists
context 'issue boards' do
it 'has issue boards' do
expect(@group.boards.count).to eq(1)
end
it 'has board label lists' do
lists = @group.boards.find_by(name: 'first board').lists
expect(lists.count).to eq(3)
expect(lists.first.label.title).to eq('TSL')
expect(lists.second.label.title).to eq('Sosync')
expect(lists.count).to eq(3)
expect(lists.first.label.title).to eq('TSL')
expect(lists.second.label.title).to eq('Sosync')
end
end
end
it 'has badges' do
expect(@group.badges.count).to eq(1)
end
it 'has badges' do
expect(@group.badges.count).to eq(1)
end
it 'has milestones' do
expect(@group.milestones.count).to eq(5)
end
it 'has milestones' do
expect(@group.milestones.count).to eq(5)
end
it 'has group children' do
expect(@group.children.count).to eq(2)
end
it 'has group children' do
expect(@group.children.count).to eq(2)
end
it 'has group members' do
expect(@group.members.map(&:user).map(&:email)).to contain_exactly(
'root@gitlabexample.com',
'adriene.mcclure@gitlabexample.com',
'gwendolyn_robel@gitlabexample.com'
)
it 'has group members' do
expect(@group.members.map(&:user).map(&:email)).to contain_exactly(
'root@gitlabexample.com',
'adriene.mcclure@gitlabexample.com',
'gwendolyn_robel@gitlabexample.com'
)
end
end
end
context 'child with no parent' do
let(:user) { create(:user) }
let(:group) { create(:group) }
let(:shared) { Gitlab::ImportExport::Shared.new(group) }
let(:group_tree_restorer) { described_class.new(user: user, shared: shared, group: group) }
context 'child with no parent' do
let(:user) { create(:user) }
let(:group) { create(:group) }
let(:shared) { Gitlab::ImportExport::Shared.new(group) }
let(:group_tree_restorer) { described_class.new(user: user, shared: shared, group: group) }
before do
setup_import_export_config('group_exports/child_with_no_parent')
end
before do
setup_import_export_config('group_exports/child_with_no_parent')
end
it 'captures import failures when a child group does not have a valid parent_id' do
group_tree_restorer.restore
it 'captures import failures when a child group does not have a valid parent_id' do
group_tree_restorer.restore
expect(group.import_failures.first.exception_message).to eq('Parent group not found')
expect(group.import_failures.first.exception_message).to eq('Parent group not found')
end
end
end
context 'when child group creation fails' do
let(:user) { create(:user) }
let(:group) { create(:group) }
let(:shared) { Gitlab::ImportExport::Shared.new(group) }
let(:group_tree_restorer) { described_class.new(user: user, shared: shared, group: group) }
context 'when child group creation fails' do
let(:user) { create(:user) }
let(:group) { create(:group) }
let(:shared) { Gitlab::ImportExport::Shared.new(group) }
let(:group_tree_restorer) { described_class.new(user: user, shared: shared, group: group) }
before do
setup_import_export_config('group_exports/child_short_name')
end
before do
setup_import_export_config('group_exports/child_short_name')
end
it 'captures import failure' do
exception_message = 'Validation failed: Group URL is too short (minimum is 2 characters)'
it 'captures import failure' do
exception_message = 'Validation failed: Group URL is too short (minimum is 2 characters)'
group_tree_restorer.restore
group_tree_restorer.restore
expect(group.import_failures.first.exception_message).to eq(exception_message)
expect(group.import_failures.first.exception_message).to eq(exception_message)
end
end
end
context 'excluded attributes' do
let!(:source_user) { create(:user, id: 123) }
let!(:importer_user) { create(:user) }
let(:group) { create(:group, name: 'user-inputed-name', path: 'user-inputed-path') }
let(:shared) { Gitlab::ImportExport::Shared.new(group) }
let(:group_tree_restorer) { described_class.new(user: importer_user, shared: shared, group: group) }
let(:exported_file) { File.join(shared.export_path, 'tree/groups/4352.json') }
let(:group_json) { Gitlab::Json.parse(IO.read(exported_file)) }
shared_examples 'excluded attributes' do
excluded_attributes = %w[
context 'excluded attributes' do
let!(:source_user) { create(:user, id: 123) }
let!(:importer_user) { create(:user) }
let(:group) { create(:group, name: 'user-inputed-name', path: 'user-inputed-path') }
let(:shared) { Gitlab::ImportExport::Shared.new(group) }
let(:group_tree_restorer) { described_class.new(user: importer_user, shared: shared, group: group) }
let(:exported_file) { File.join(shared.export_path, 'tree/groups/4352.json') }
let(:group_json) { Gitlab::Json.parse(IO.read(exported_file)) }
shared_examples 'excluded attributes' do
excluded_attributes = %w[
id
parent_id
owner_id
......@@ -125,80 +126,97 @@ RSpec.describe Gitlab::ImportExport::Group::TreeRestorer do
saml_discovery_token
]
before do
group.add_owner(importer_user)
before do
group.add_owner(importer_user)
setup_import_export_config('group_exports/complex')
setup_import_export_config('group_exports/complex')
expect(File.exist?(exported_file)).to be_truthy
expect(File.exist?(exported_file)).to be_truthy
group_tree_restorer.restore
group.reload
end
group_tree_restorer.restore
group.reload
end
it 'does not import root group name' do
expect(group.name).to eq('user-inputed-name')
end
it 'does not import root group name' do
expect(group.name).to eq('user-inputed-name')
end
it 'does not import root group path' do
expect(group.path).to eq('user-inputed-path')
end
it 'does not import root group path' do
expect(group.path).to eq('user-inputed-path')
end
excluded_attributes.each do |excluded_attribute|
it 'does not allow override of excluded attributes' do
unless group.public_send(excluded_attribute).nil?
expect(group_json[excluded_attribute]).not_to eq(group.public_send(excluded_attribute))
excluded_attributes.each do |excluded_attribute|
it 'does not allow override of excluded attributes' do
unless group.public_send(excluded_attribute).nil?
expect(group_json[excluded_attribute]).not_to eq(group.public_send(excluded_attribute))
end
end
end
end
end
include_examples 'excluded attributes'
end
include_examples 'excluded attributes'
end
context 'group.json file access check' do
let(:user) { create(:user) }
let!(:group) { create(:group, name: 'group2', path: 'group2') }
let(:shared) { Gitlab::ImportExport::Shared.new(group) }
let(:group_tree_restorer) { described_class.new(user: user, shared: shared, group: group) }
context 'group.json file access check' do
let(:user) { create(:user) }
let!(:group) { create(:group, name: 'group2', path: 'group2') }
let(:shared) { Gitlab::ImportExport::Shared.new(group) }
let(:group_tree_restorer) { described_class.new(user: user, shared: shared, group: group) }
it 'does not read a symlink' do
Dir.mktmpdir do |tmpdir|
FileUtils.mkdir_p(File.join(tmpdir, 'tree', 'groups'))
setup_symlink(tmpdir, 'tree/groups/_all.ndjson')
it 'does not read a symlink' do
Dir.mktmpdir do |tmpdir|
FileUtils.mkdir_p(File.join(tmpdir, 'tree', 'groups'))
setup_symlink(tmpdir, 'tree/groups/_all.ndjson')
allow(shared).to receive(:export_path).and_return(tmpdir)
allow(shared).to receive(:export_path).and_return(tmpdir)
expect(group_tree_restorer.restore).to eq(false)
expect(shared.errors).to include('Incorrect JSON format')
expect(group_tree_restorer.restore).to eq(false)
expect(shared.errors).to include('Incorrect JSON format')
end
end
end
end
context 'group visibility levels' do
let(:user) { create(:user) }
let(:shared) { Gitlab::ImportExport::Shared.new(group) }
let(:group_tree_restorer) { described_class.new(user: user, shared: shared, group: group) }
context 'group visibility levels' do
let(:user) { create(:user) }
let(:shared) { Gitlab::ImportExport::Shared.new(group) }
let(:group_tree_restorer) { described_class.new(user: user, shared: shared, group: group) }
before do
setup_import_export_config(filepath)
before do
setup_import_export_config(filepath)
group_tree_restorer.restore
end
group_tree_restorer.restore
end
shared_examples 'with visibility level' do |visibility_level, expected_visibilities|
context "when visibility level is #{visibility_level}" do
let(:group) { create(:group, visibility_level) }
let(:filepath) { "group_exports/visibility_levels/#{visibility_level}" }
shared_examples 'with visibility level' do |visibility_level, expected_visibilities|
context "when visibility level is #{visibility_level}" do
let(:group) { create(:group, visibility_level) }
let(:filepath) { "group_exports/visibility_levels/#{visibility_level}" }
it "imports all subgroups as #{visibility_level}" do
expect(group.children.map(&:visibility_level)).to match_array(expected_visibilities)
it "imports all subgroups as #{visibility_level}" do
expect(group.children.map(&:visibility_level)).to match_array(expected_visibilities)
end
end
end
include_examples 'with visibility level', :public, [20, 10, 0]
include_examples 'with visibility level', :private, [0, 0, 0]
include_examples 'with visibility level', :internal, [10, 10, 0]
end
end
context 'when import_relation_object_persistence feature flag is enabled' do
before do
stub_feature_flags(import_relation_object_persistence: true)
end
include_examples 'group restoration'
end
context 'when import_relation_object_persistence feature flag is disabled' do
before do
stub_feature_flags(import_relation_object_persistence: false)
end
include_examples 'with visibility level', :public, [20, 10, 0]
include_examples 'with visibility level', :private, [0, 0, 0]
include_examples 'with visibility level', :internal, [10, 10, 0]
include_examples 'group restoration'
end
end
......@@ -1058,13 +1058,35 @@ RSpec.describe Gitlab::ImportExport::Project::TreeRestorer do
end
end
context 'enable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, true
context 'when import_relation_object_persistence feature flag is enabled' do
before do
stub_feature_flags(import_relation_object_persistence: true)
end
context 'enable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, true
it_behaves_like 'project tree restorer work properly', :ndjson_reader, true
end
it_behaves_like 'project tree restorer work properly', :ndjson_reader, true
context 'disable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, false
end
end
context 'disable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, false
context 'when import_relation_object_persistence feature flag is disabled' do
before do
stub_feature_flags(import_relation_object_persistence: false)
end
context 'enable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, true
it_behaves_like 'project tree restorer work properly', :ndjson_reader, true
end
context 'disable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, false
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment