Commit 54f2ab9b authored by Kamil Trzciński's avatar Kamil Trzciński

Merge branch '196844-safe-bulk-inserts' into 'master'

Introduce simple ActiveRecord-based bulk-insert functionality

See merge request gitlab-org/gitlab!25590
parents cafb3722 1b1eb221
# frozen_string_literal: true # frozen_string_literal: true
##
# A mixin for ActiveRecord models that enables callers to insert instances of the
# target class into the database en-bloc via the [bulk_insert] method.
#
# Upon inclusion in the target class, the mixin will perform a number of checks to
# ensure that the target is eligible for bulk insertions. For instance, it must not
# use ActiveRecord callbacks that fire between [save]s, since these would not run
# properly when instances are inserted in bulk.
#
# The mixin uses ActiveRecord 6's [InsertAll] type internally for bulk insertions.
# Unlike [InsertAll], however, it requires you to pass instances of the target type
# rather than row hashes, since it will run validations prior to insertion.
#
# @example
#
# class MyRecord < ApplicationRecord
# include BulkInsertSafe # must be included _last_ i.e. after any other concerns
# end
#
# # simple
# MyRecord.bulk_insert!(items)
#
# # with custom batch size
# MyRecord.bulk_insert!(items, batch_size: 100)
#
# # without validations
# MyRecord.bulk_insert!(items, validate: false)
#
# # with attribute hash modification
# MyRecord.bulk_insert!(items) { |item_attrs| item_attrs['col'] = 42 }
#
#
module BulkInsertSafe module BulkInsertSafe
extend ActiveSupport::Concern extend ActiveSupport::Concern
...@@ -13,7 +45,10 @@ module BulkInsertSafe ...@@ -13,7 +45,10 @@ module BulkInsertSafe
:destroy :destroy
].freeze ].freeze
DEFAULT_BATCH_SIZE = 500
MethodNotAllowedError = Class.new(StandardError) MethodNotAllowedError = Class.new(StandardError)
PrimaryKeySetError = Class.new(StandardError)
class_methods do class_methods do
def set_callback(name, *args) def set_callback(name, *args)
...@@ -26,8 +61,62 @@ module BulkInsertSafe ...@@ -26,8 +61,62 @@ module BulkInsertSafe
super super
end end
# Inserts the given ActiveRecord [items] to the table mapped to this class via [InsertAll].
# Items will be inserted in batches of a given size, where insertion semantics are
# "atomic across all batches", i.e. either all items will be inserted or none.
#
# @param [Boolean] validate Whether validations should run on [items]
# @param [Integer] batch_size How many items should at most be inserted at once
# @param [Proc] handle_attributes Block that will receive each item attribute hash
# prior to insertion for further processing
#
# Note that this method will throw on the following occasions:
# - [PrimaryKeySetError] when primary keys are set on entities prior to insertion
# - [ActiveRecord::RecordInvalid] on entity validation failures
# - [ActiveRecord::RecordNotUnique] on duplicate key errors
#
# @return true if all items succeeded to be inserted, throws otherwise.
#
def bulk_insert!(items, validate: true, batch_size: DEFAULT_BATCH_SIZE, &handle_attributes)
return true if items.empty?
_bulk_insert_in_batches(items, batch_size, validate, &handle_attributes)
true
end
private private
def _bulk_insert_in_batches(items, batch_size, validate_items, &handle_attributes)
transaction do
items.each_slice(batch_size) do |item_batch|
attributes = _bulk_insert_item_attributes(item_batch, validate_items, &handle_attributes)
insert_all!(attributes)
end
end
end
def _bulk_insert_item_attributes(items, validate_items)
items.map do |item|
item.validate! if validate_items
attributes = item.attributes
_bulk_insert_reject_primary_key!(attributes, item.class.primary_key)
yield attributes if block_given?
attributes
end
end
def _bulk_insert_reject_primary_key!(attributes, primary_key)
if attributes.delete(primary_key)
raise PrimaryKeySetError, "Primary key set: #{primary_key}:#{attributes[primary_key]}\n" \
"Bulk-inserts are only supported for rows that don't already have PK set"
end
end
def _bulk_insert_callback_allowed?(name, args) def _bulk_insert_callback_allowed?(name, args)
_bulk_insert_whitelisted?(name) || _bulk_insert_saved_from_belongs_to?(name, args) _bulk_insert_whitelisted?(name) || _bulk_insert_saved_from_belongs_to?(name, args)
end end
......
...@@ -11,6 +11,7 @@ class MergeRequestDiffCommit < ApplicationRecord ...@@ -11,6 +11,7 @@ class MergeRequestDiffCommit < ApplicationRecord
alias_attribute :id, :sha alias_attribute :id, :sha
def self.create_bulk(merge_request_diff_id, commits) def self.create_bulk(merge_request_diff_id, commits)
warn 'Deprecated; use `bulk_insert` from `BulkInsertSafe` mixin instead'
rows = commits.map.with_index do |commit, index| rows = commits.map.with_index do |commit, index|
# See #parent_ids. # See #parent_ids.
commit_hash = commit.to_hash.except(:parent_ids) commit_hash = commit.to_hash.except(:parent_ids)
......
...@@ -5,6 +5,8 @@ require 'spec_helper' ...@@ -5,6 +5,8 @@ require 'spec_helper'
describe BulkInsertSafe do describe BulkInsertSafe do
class BulkInsertItem < ApplicationRecord class BulkInsertItem < ApplicationRecord
include BulkInsertSafe include BulkInsertSafe
validates :name, presence: true
end end
module InheritedUnsafeMethods module InheritedUnsafeMethods
...@@ -23,7 +25,36 @@ describe BulkInsertSafe do ...@@ -23,7 +25,36 @@ describe BulkInsertSafe do
end end
end end
it_behaves_like 'a BulkInsertSafe model', BulkInsertItem before(:all) do
ActiveRecord::Schema.define do
create_table :bulk_insert_items, force: true do |t|
t.string :name, null: true
end
end
end
after(:all) do
ActiveRecord::Schema.define do
drop_table :bulk_insert_items, force: true
end
end
def build_valid_items_for_bulk_insertion
Array.new(10) do |n|
BulkInsertItem.new(name: "item-#{n}")
end
end
def build_invalid_items_for_bulk_insertion
Array.new(10) do
BulkInsertItem.new # requires `name` to be set
end
end
it_behaves_like 'a BulkInsertSafe model', BulkInsertItem do
let(:valid_items_for_bulk_insertion) { build_valid_items_for_bulk_insertion }
let(:invalid_items_for_bulk_insertion) { build_invalid_items_for_bulk_insertion }
end
context 'when inheriting class methods' do context 'when inheriting class methods' do
it 'raises an error when method is not bulk-insert safe' do it 'raises an error when method is not bulk-insert safe' do
...@@ -35,4 +66,40 @@ describe BulkInsertSafe do ...@@ -35,4 +66,40 @@ describe BulkInsertSafe do
expect { BulkInsertItem.include(InheritedSafeMethods) }.not_to raise_error expect { BulkInsertItem.include(InheritedSafeMethods) }.not_to raise_error
end end
end end
context 'primary keys' do
it 'raises error if primary keys are set prior to insertion' do
items = build_valid_items_for_bulk_insertion
items.each_with_index do |item, n|
item.id = n
end
expect { BulkInsertItem.bulk_insert!(items) }.to raise_error(subject::PrimaryKeySetError)
end
end
describe '.bulk_insert!' do
it 'inserts items in the given number of batches' do
items = build_valid_items_for_bulk_insertion
expect(items.size).to eq(10)
expect(BulkInsertItem).to receive(:insert_all!).twice
BulkInsertItem.bulk_insert!(items, batch_size: 5)
end
it 'rolls back the transaction when any item is invalid' do
# second batch is bad
all_items = build_valid_items_for_bulk_insertion + build_invalid_items_for_bulk_insertion
batch_size = all_items.size / 2
expect do
BulkInsertItem.bulk_insert!(all_items, batch_size: batch_size) rescue nil
end.not_to change { BulkInsertItem.count }
end
it 'does nothing and returns true when items are empty' do
expect(BulkInsertItem.bulk_insert!([])).to be(true)
expect(BulkInsertItem.count).to eq(0)
end
end
end end
...@@ -8,5 +8,8 @@ describe LabelLink do ...@@ -8,5 +8,8 @@ describe LabelLink do
it { is_expected.to belong_to(:label) } it { is_expected.to belong_to(:label) }
it { is_expected.to belong_to(:target) } it { is_expected.to belong_to(:target) }
it_behaves_like 'a BulkInsertSafe model', LabelLink it_behaves_like 'a BulkInsertSafe model', LabelLink do
let(:valid_items_for_bulk_insertion) { build_list(:label_link, 10) }
let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined
end
end end
...@@ -6,7 +6,10 @@ describe MergeRequestDiffCommit do ...@@ -6,7 +6,10 @@ describe MergeRequestDiffCommit do
let(:merge_request) { create(:merge_request) } let(:merge_request) { create(:merge_request) }
let(:project) { merge_request.project } let(:project) { merge_request.project }
it_behaves_like 'a BulkInsertSafe model', MergeRequestDiffCommit it_behaves_like 'a BulkInsertSafe model', MergeRequestDiffCommit do
let(:valid_items_for_bulk_insertion) { build_list(:merge_request_diff_commit, 10) }
let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined
end
describe '#to_hash' do describe '#to_hash' do
subject { merge_request.commits.first } subject { merge_request.commits.first }
......
...@@ -3,7 +3,10 @@ ...@@ -3,7 +3,10 @@
require 'spec_helper' require 'spec_helper'
describe MergeRequestDiffFile do describe MergeRequestDiffFile do
it_behaves_like 'a BulkInsertSafe model', MergeRequestDiffFile it_behaves_like 'a BulkInsertSafe model', MergeRequestDiffFile do
let(:valid_items_for_bulk_insertion) { build_list(:merge_request_diff_file, 10) }
let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined
end
describe '#diff' do describe '#diff' do
context 'when diff is not stored' do context 'when diff is not stored' do
......
...@@ -35,4 +35,44 @@ RSpec.shared_examples 'a BulkInsertSafe model' do |klass| ...@@ -35,4 +35,44 @@ RSpec.shared_examples 'a BulkInsertSafe model' do |klass|
expect { target_class.belongs_to(:other_record) }.not_to raise_error expect { target_class.belongs_to(:other_record) }.not_to raise_error
end end
end end
describe '.bulk_insert!' do
context 'when all items are valid' do
it 'inserts them all' do
items = valid_items_for_bulk_insertion
expect(items).not_to be_empty
expect { target_class.bulk_insert!(items) }.to change { target_class.count }.by(items.size)
end
it 'returns true' do
items = valid_items_for_bulk_insertion
expect(items).not_to be_empty
expect(target_class.bulk_insert!(items)).to be true
end
end
context 'when some items are invalid' do
it 'does not insert any of them and raises an error' do
items = invalid_items_for_bulk_insertion
# it is not always possible to create invalid items
if items.any?
expect { target_class.bulk_insert!(items) }.to raise_error(ActiveRecord::RecordInvalid)
expect(target_class.count).to eq(0)
end
end
it 'inserts them anyway when bypassing validations' do
items = invalid_items_for_bulk_insertion
# it is not always possible to create invalid items
if items.any?
expect(target_class.bulk_insert!(items, validate: false)).to be(true)
expect(target_class.count).to eq(items.size)
end
end
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment