Commit b83971dd authored by Stan Hu's avatar Stan Hu

Merge branch 'georgekoltsov/poc-import-update-object-persistance' into 'master'

Update Import process to persist object subrelations in separate db transactions

See merge request gitlab-org/gitlab!79963
parents 9ec1872a 489555c9
---
name: import_relation_object_persistence
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/79963
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/354497
milestone: '14.9'
type: development
group: group::import
default_enabled: false
# frozen_string_literal: true
# RelationObjectSaver allows for an alternative approach to persisting
# objects during Project/Group Import which persists object's
# nested collection subrelations separately, in batches.
#
# Instead of the regular `relation_object.save!` that opens one db
# transaction for the object itself and all of its subrelations we
# separate collection subrelations from the object and save them
# in batches in smaller more frequent db transactions.
module Gitlab
module ImportExport
module Base
class RelationObjectSaver
include Gitlab::Utils::StrongMemoize
BATCH_SIZE = 100
MIN_RECORDS_SIZE = 5
# @param relation_object [Object] Object of a project/group, e.g. an issue
# @param relation_key [String] Name of the object association to group/project, e.g. :issues
# @param relation_definition [Hash] Object subrelations as defined in import_export.yml
# @param importable [Project|Group] Project or group where relation object is getting saved to
#
# @example
# Gitlab::ImportExport::Base::RelationObjectSaver.new(
# relation_key: 'merge_requests',
# relation_object: #<MergeRequest id: root/mrs!1, notes: [#<Note id: nil, note: 'test', ...>, #<Note id: nil, noteL 'another note'>]>,
# relation_definition: {"metrics"=>{}, "award_emoji"=>{}, "notes"=>{"author"=>{}, ... }}
# importable: @importable
# ).execute
def initialize(relation_object:, relation_key:, relation_definition:, importable:)
@relation_object = relation_object
@relation_key = relation_key
@relation_definition = relation_definition
@importable = importable
@invalid_subrelations = []
end
def execute
move_subrelations
relation_object.save!
save_subrelations
ensure
log_invalid_subrelations
end
private
attr_reader :relation_object, :relation_key, :relation_definition,
:importable, :collection_subrelations, :invalid_subrelations
# rubocop:disable GitlabSecurity/PublicSend
def save_subrelations
collection_subrelations.each_pair do |relation_name, records|
records.each_slice(BATCH_SIZE) do |batch|
valid_records, invalid_records = batch.partition { |record| record.valid? }
invalid_subrelations << invalid_records
relation_object.public_send(relation_name) << valid_records
end
end
end
def move_subrelations
strong_memoize(:collection_subrelations) do
relation_definition.each_key.each_with_object({}) do |definition, collection_subrelations|
subrelation = relation_object.public_send(definition)
association = relation_object.class.reflect_on_association(definition)
if association&.collection? && subrelation.size > MIN_RECORDS_SIZE
collection_subrelations[definition] = subrelation.records
subrelation.clear
end
end
end
end
# rubocop:enable GitlabSecurity/PublicSend
def log_invalid_subrelations
invalid_subrelations.flatten.each do |record|
Gitlab::Import::Logger.info(
message: '[Project/Group Import] Invalid subrelation',
importable_column_name => importable.id,
relation_key: relation_key,
error_messages: record.errors.full_messages.to_sentence
)
ImportFailure.create(
source: 'RelationObjectSaver#save!',
relation_key: relation_key,
exception_class: 'RecordInvalid',
exception_message: record.errors.full_messages.to_sentence,
correlation_id_value: Labkit::Correlation::CorrelationId.current_or_new_id,
importable_column_name => importable.id
)
end
end
def importable_column_name
@column_name ||= importable.class.reflect_on_association(:import_failures).foreign_key.to_sym
end
end
end
end
end
......@@ -79,10 +79,7 @@ module Gitlab
relation_object.assign_attributes(importable_class_sym => @importable)
import_failure_service.with_retry(action: 'relation_object.save!', relation_key: relation_key, relation_index: relation_index) do
relation_object.save!
log_relation_creation(@importable, relation_key, relation_object)
end
save_relation_object(relation_object, relation_key, relation_definition, relation_index)
rescue StandardError => e
import_failure_service.log_import_failure(
source: 'process_relation_item!',
......@@ -91,6 +88,23 @@ module Gitlab
exception: e)
end
def save_relation_object(relation_object, relation_key, relation_definition, relation_index)
if Feature.enabled?(:import_relation_object_persistence, default_enabled: :yaml) && relation_object.new_record?
Gitlab::ImportExport::Base::RelationObjectSaver.new(
relation_object: relation_object,
relation_key: relation_key,
relation_definition: relation_definition,
importable: @importable
).execute
else
import_failure_service.with_retry(action: 'relation_object.save!', relation_key: relation_key, relation_index: relation_index) do
relation_object.save!
end
end
log_relation_creation(@importable, relation_key, relation_object)
end
def import_failure_service
@import_failure_service ||= ImportFailureService.new(@importable)
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::ImportExport::Base::RelationObjectSaver do
let(:project) { create(:project) }
let(:relation_object) { build(:issue, project: project) }
let(:relation_definition) { {} }
let(:importable) { project }
let(:relation_key) { 'issues' }
subject(:saver) do
described_class.new(
relation_object: relation_object,
relation_key: relation_key,
relation_definition: relation_definition,
importable: importable
)
end
describe '#save' do
before do
expect(relation_object).to receive(:save!).and_call_original
end
it 'saves relation object' do
expect { saver.execute }.to change(project.issues, :count).by(1)
end
context 'when subrelation is present' do
let(:notes) { build_list(:note, 6, project: project, importing: true) }
let(:relation_object) { build(:issue, project: project, notes: notes) }
let(:relation_definition) { { 'notes' => {} } }
it 'saves relation object with subrelations' do
expect(relation_object.notes).to receive(:<<).and_call_original
saver.execute
issue = project.issues.last
expect(issue.notes.count).to eq(6)
end
end
context 'when subrelation is not a collection' do
let(:sentry_issue) { build(:sentry_issue, importing: true) }
let(:relation_object) { build(:issue, project: project, sentry_issue: sentry_issue) }
let(:relation_definition) { { 'sentry_issue' => {} } }
it 'saves subrelation as part of the relation object itself' do
expect(relation_object.notes).not_to receive(:<<)
saver.execute
issue = project.issues.last
expect(issue.sentry_issue.persisted?).to eq(true)
end
end
context 'when subrelation collection count is small' do
let(:notes) { build_list(:note, 2, project: project, importing: true) }
let(:relation_object) { build(:issue, project: project, notes: notes) }
let(:relation_definition) { { 'notes' => {} } }
it 'saves subrelation as part of the relation object itself' do
expect(relation_object.notes).not_to receive(:<<)
saver.execute
issue = project.issues.last
expect(issue.notes.count).to eq(2)
end
end
context 'when some subrelations are invalid' do
let(:notes) { build_list(:note, 5, project: project, importing: true) }
let(:invalid_note) { build(:note) }
let(:relation_object) { build(:issue, project: project, notes: notes + [invalid_note]) }
let(:relation_definition) { { 'notes' => {} } }
it 'saves valid subrelations and logs invalid subrelation' do
expect(relation_object.notes).to receive(:<<).and_call_original
expect(Gitlab::Import::Logger)
.to receive(:info)
.with(
message: '[Project/Group Import] Invalid subrelation',
project_id: project.id,
relation_key: 'issues',
error_messages: "Noteable can't be blank and Project does not match noteable project"
)
saver.execute
issue = project.issues.last
import_failure = project.import_failures.last
expect(issue.notes.count).to eq(5)
expect(import_failure.source).to eq('RelationObjectSaver#save!')
expect(import_failure.exception_message).to eq("Noteable can't be blank and Project does not match noteable project")
end
context 'when importable is group' do
let(:relation_key) { 'labels' }
let(:relation_definition) { { 'priorities' => {} } }
let(:importable) { create(:group) }
let(:valid_priorities) { build_list(:label_priority, 5, importing: true) }
let(:invalid_priority) { build(:label_priority, priority: -1) }
let(:relation_object) { build(:group_label, group: importable, title: 'test', priorities: valid_priorities + [invalid_priority]) }
it 'logs invalid subrelation for a group' do
expect(Gitlab::Import::Logger)
.to receive(:info)
.with(
message: '[Project/Group Import] Invalid subrelation',
group_id: importable.id,
relation_key: 'labels',
error_messages: 'Priority must be greater than or equal to 0'
)
saver.execute
label = importable.labels.last
import_failure = importable.import_failures.last
expect(label.priorities.count).to eq(5)
expect(import_failure.source).to eq('RelationObjectSaver#save!')
expect(import_failure.exception_message).to eq('Priority must be greater than or equal to 0')
end
end
end
end
end
......@@ -1058,13 +1058,35 @@ RSpec.describe Gitlab::ImportExport::Project::TreeRestorer do
end
end
context 'enable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, true
context 'when import_relation_object_persistence feature flag is enabled' do
before do
stub_feature_flags(import_relation_object_persistence: true)
end
context 'enable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, true
it_behaves_like 'project tree restorer work properly', :ndjson_reader, true
end
it_behaves_like 'project tree restorer work properly', :ndjson_reader, true
context 'disable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, false
end
end
context 'disable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, false
context 'when import_relation_object_persistence feature flag is disabled' do
before do
stub_feature_flags(import_relation_object_persistence: false)
end
context 'enable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, true
it_behaves_like 'project tree restorer work properly', :ndjson_reader, true
end
context 'disable ndjson import' do
it_behaves_like 'project tree restorer work properly', :legacy_reader, false
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment