Commit 489555c9 authored by George Koltsov's avatar George Koltsov

Update Import object persistence approach

  Add an alternative way of persisting objects during
  Project/Group Import. Instead of saving an object with
  all of its subrelations all at once within one potentially
  long database transaction, separate collection subrelations
  from the object and persist them separately, in batches and
  in smaller more frequent db transactions.

Changelog: changed
parent 27be56f2
---
name: import_relation_object_persistence
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/79963
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/354497
milestone: '14.9'
type: development
group: group::import
default_enabled: false
# frozen_string_literal: true
# RelationObjectSaver allows for an alternative approach to persisting
# objects during Project/Group Import which persists object's
# nested collection subrelations separately, in batches.
#
# Instead of the regular `relation_object.save!` that opens one db
# transaction for the object itself and all of its subrelations we
# separate collection subrelations from the object and save them
# in batches in smaller more frequent db transactions.
module Gitlab
module ImportExport
module Base
class RelationObjectSaver
include Gitlab::Utils::StrongMemoize
BATCH_SIZE = 100
MIN_RECORDS_SIZE = 5
# @param relation_object [Object] Object of a project/group, e.g. an issue
# @param relation_key [String] Name of the object association to group/project, e.g. :issues
# @param relation_definition [Hash] Object subrelations as defined in import_export.yml
# @param importable [Project|Group] Project or group where relation object is getting saved to
#
# @example
# Gitlab::ImportExport::Base::RelationObjectSaver.new(
# relation_key: 'merge_requests',
# relation_object: #<MergeRequest id: root/mrs!1, notes: [#<Note id: nil, note: 'test', ...>, #<Note id: nil, noteL 'another note'>]>,
# relation_definition: {"metrics"=>{}, "award_emoji"=>{}, "notes"=>{"author"=>{}, ... }}
# importable: @importable
# ).execute
def initialize(relation_object:, relation_key:, relation_definition:, importable:)
@relation_object = relation_object
@relation_key = relation_key
@relation_definition = relation_definition
@importable = importable
@invalid_subrelations = []
end
def execute
move_subrelations
relation_object.save!
save_subrelations
ensure
log_invalid_subrelations
end
private
attr_reader :relation_object, :relation_key, :relation_definition,
:importable, :collection_subrelations, :invalid_subrelations
# rubocop:disable GitlabSecurity/PublicSend
def save_subrelations
collection_subrelations.each_pair do |relation_name, records|
records.each_slice(BATCH_SIZE) do |batch|
valid_records, invalid_records = batch.partition { |record| record.valid? }
invalid_subrelations << invalid_records
relation_object.public_send(relation_name) << valid_records
end
end
end
def move_subrelations
strong_memoize(:collection_subrelations) do
relation_definition.each_key.each_with_object({}) do |definition, collection_subrelations|
subrelation = relation_object.public_send(definition)
association = relation_object.class.reflect_on_association(definition)
if association&.collection? && subrelation.size > MIN_RECORDS_SIZE
collection_subrelations[definition] = subrelation.records
subrelation.clear
end
end
end
end
# rubocop:enable GitlabSecurity/PublicSend
def log_invalid_subrelations
invalid_subrelations.flatten.each do |record|
Gitlab::Import::Logger.info(
message: '[Project/Group Import] Invalid subrelation',
importable_column_name => importable.id,
relation_key: relation_key,
error_messages: record.errors.full_messages.to_sentence
)
ImportFailure.create(
source: 'RelationObjectSaver#save!',
relation_key: relation_key,
exception_class: 'RecordInvalid',
exception_message: record.errors.full_messages.to_sentence,
correlation_id_value: Labkit::Correlation::CorrelationId.current_or_new_id,
importable_column_name => importable.id
)
end
end
def importable_column_name
@column_name ||= importable.class.reflect_on_association(:import_failures).foreign_key.to_sym
end
end
end
end
end
......@@ -79,10 +79,7 @@ module Gitlab
relation_object.assign_attributes(importable_class_sym => @importable)
import_failure_service.with_retry(action: 'relation_object.save!', relation_key: relation_key, relation_index: relation_index) do
relation_object.save!
log_relation_creation(@importable, relation_key, relation_object)
end
save_relation_object(relation_object, relation_key, relation_definition, relation_index)
rescue StandardError => e
import_failure_service.log_import_failure(
source: 'process_relation_item!',
......@@ -91,6 +88,23 @@ module Gitlab
exception: e)
end
def save_relation_object(relation_object, relation_key, relation_definition, relation_index)
if Feature.enabled?(:import_relation_object_persistence, default_enabled: :yaml) && relation_object.new_record?
Gitlab::ImportExport::Base::RelationObjectSaver.new(
relation_object: relation_object,
relation_key: relation_key,
relation_definition: relation_definition,
importable: @importable
).execute
else
import_failure_service.with_retry(action: 'relation_object.save!', relation_key: relation_key, relation_index: relation_index) do
relation_object.save!
end
end
log_relation_creation(@importable, relation_key, relation_object)
end
def import_failure_service
@import_failure_service ||= ImportFailureService.new(@importable)
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::ImportExport::Base::RelationObjectSaver do
let(:project) { create(:project) }
let(:relation_object) { build(:issue, project: project) }
let(:relation_definition) { {} }
let(:importable) { project }
let(:relation_key) { 'issues' }
subject(:saver) do
described_class.new(
relation_object: relation_object,
relation_key: relation_key,
relation_definition: relation_definition,
importable: importable
)
end
describe '#save' do
before do
expect(relation_object).to receive(:save!).and_call_original
end
it 'saves relation object' do
expect { saver.execute }.to change(project.issues, :count).by(1)
end
context 'when subrelation is present' do
let(:notes) { build_list(:note, 6, project: project, importing: true) }
let(:relation_object) { build(:issue, project: project, notes: notes) }
let(:relation_definition) { { 'notes' => {} } }
it 'saves relation object with subrelations' do
expect(relation_object.notes).to receive(:<<).and_call_original
saver.execute
issue = project.issues.last
expect(issue.notes.count).to eq(6)
end
end
context 'when subrelation is not a collection' do
let(:sentry_issue) { build(:sentry_issue, importing: true) }
let(:relation_object) { build(:issue, project: project, sentry_issue: sentry_issue) }
let(:relation_definition) { { 'sentry_issue' => {} } }
it 'saves subrelation as part of the relation object itself' do
expect(relation_object.notes).not_to receive(:<<)
saver.execute
issue = project.issues.last
expect(issue.sentry_issue.persisted?).to eq(true)
end
end
context 'when subrelation collection count is small' do
let(:notes) { build_list(:note, 2, project: project, importing: true) }
let(:relation_object) { build(:issue, project: project, notes: notes) }
let(:relation_definition) { { 'notes' => {} } }
it 'saves subrelation as part of the relation object itself' do
expect(relation_object.notes).not_to receive(:<<)
saver.execute
issue = project.issues.last
expect(issue.notes.count).to eq(2)
end
end
context 'when some subrelations are invalid' do
let(:notes) { build_list(:note, 5, project: project, importing: true) }
let(:invalid_note) { build(:note) }
let(:relation_object) { build(:issue, project: project, notes: notes + [invalid_note]) }
let(:relation_definition) { { 'notes' => {} } }
it 'saves valid subrelations and logs invalid subrelation' do
expect(relation_object.notes).to receive(:<<).and_call_original
expect(Gitlab::Import::Logger)
.to receive(:info)
.with(
message: '[Project/Group Import] Invalid subrelation',
project_id: project.id,
relation_key: 'issues',
error_messages: "Noteable can't be blank and Project does not match noteable project"
)
saver.execute
issue = project.issues.last
import_failure = project.import_failures.last
expect(issue.notes.count).to eq(5)
expect(import_failure.source).to eq('RelationObjectSaver#save!')
expect(import_failure.exception_message).to eq("Noteable can't be blank and Project does not match noteable project")
end
context 'when importable is group' do
let(:relation_key) { 'labels' }
let(:relation_definition) { { 'priorities' => {} } }
let(:importable) { create(:group) }
let(:valid_priorities) { build_list(:label_priority, 5, importing: true) }
let(:invalid_priority) { build(:label_priority, priority: -1) }
let(:relation_object) { build(:group_label, group: importable, title: 'test', priorities: valid_priorities + [invalid_priority]) }
it 'logs invalid subrelation for a group' do
expect(Gitlab::Import::Logger)
.to receive(:info)
.with(
message: '[Project/Group Import] Invalid subrelation',
group_id: importable.id,
relation_key: 'labels',
error_messages: 'Priority must be greater than or equal to 0'
)
saver.execute
label = importable.labels.last
import_failure = importable.import_failures.last
expect(label.priorities.count).to eq(5)
expect(import_failure.source).to eq('RelationObjectSaver#save!')
expect(import_failure.exception_message).to eq('Priority must be greater than or equal to 0')
end
end
end
end
end
......@@ -1058,13 +1058,35 @@ RSpec.describe Gitlab::ImportExport::Project::TreeRestorer do
end
end
context 'enable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, true
context 'when import_relation_object_persistence feature flag is enabled' do
before do
stub_feature_flags(import_relation_object_persistence: true)
end
context 'enable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, true
it_behaves_like 'project tree restorer work properly', :ndjson_reader, true
end
it_behaves_like 'project tree restorer work properly', :ndjson_reader, true
context 'disable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, false
end
end
context 'disable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, false
context 'when import_relation_object_persistence feature flag is disabled' do
before do
stub_feature_flags(import_relation_object_persistence: false)
end
context 'enable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, true
it_behaves_like 'project tree restorer work properly', :ndjson_reader, true
end
context 'disable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, false
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment