Commit e9da2b0c authored by Sean McGivern's avatar Sean McGivern

Merge branch '31114-internal-ids-are-not-atomic' into 'master'

Atomic generation of internal ids for issues.

Closes #31114

See merge request gitlab-org/gitlab-ce!17580
parents f8171595 c3b489bd
# Include atomic internal id generation scheme for a model
#
# This allows us to atomically generate internal ids that are
# unique within a given scope.
#
# For example, let's generate internal ids for Issue per Project:
# ```
# class Issue < ActiveRecord::Base
# has_internal_id :iid, scope: :project, init: ->(s) { s.project.issues.maximum(:iid) }
# end
# ```
#
# This generates unique internal ids per project for newly created issues.
# The generated internal id is saved in the `iid` attribute of `Issue`.
#
# This concern uses InternalId records to facilitate atomicity.
# In the absence of a record for the given scope, one will be created automatically.
# In this situation, the `init` block is called to calculate the initial value.
# In the example above, we calculate the maximum `iid` of all issues
# within the given project.
#
# Note that a model may have more than one internal id associated with possibly
# different scopes.
module AtomicInternalId
extend ActiveSupport::Concern
module ClassMethods
def has_internal_id(column, scope:, init:) # rubocop:disable Naming/PredicateName
before_validation(on: :create) do
if read_attribute(column).blank?
scope_attrs = { scope => association(scope).reader }
usage = self.class.table_name.to_sym
new_iid = InternalId.generate_next(self, scope_attrs, usage, init)
write_attribute(column, new_iid)
end
end
validates column, presence: true, numericality: true
end
end
def to_param
iid.to_s
end
end
module InternalId
module NonatomicInternalId
extend ActiveSupport::Concern
included do
......
class Deployment < ActiveRecord::Base
include InternalId
include NonatomicInternalId
belongs_to :project, required: true
belongs_to :environment, required: true
......
# An InternalId is a strictly monotone sequence of integers
# generated for a given scope and usage.
#
# For example, issues use their project to scope internal ids:
# In that sense, scope is "project" and usage is "issues".
# Generated internal ids for an issue are unique per project.
#
# See InternalId#usage enum for available usages.
#
# In order to leverage InternalId for other usages, the idea is to
# * Add `usage` value to enum
# * (Optionally) add columns to `internal_ids` if needed for scope.
class InternalId < ActiveRecord::Base
belongs_to :project
enum usage: { issues: 0 }
validates :usage, presence: true
REQUIRED_SCHEMA_VERSION = 20180305095250
# Increments #last_value and saves the record
#
# The operation locks the record and gathers a `ROW SHARE` lock (in PostgreSQL).
# As such, the increment is atomic and safe to be called concurrently.
def increment_and_save!
lock!
self.last_value = (last_value || 0) + 1
save!
last_value
end
class << self
def generate_next(subject, scope, usage, init)
# Shortcut if `internal_ids` table is not available (yet)
# This can be the case in other (unrelated) migration specs
return (init.call(subject) || 0) + 1 unless available?
InternalIdGenerator.new(subject, scope, usage, init).generate
end
def available?
@available_flag ||= ActiveRecord::Migrator.current_version >= REQUIRED_SCHEMA_VERSION # rubocop:disable Gitlab/PredicateMemoization
end
# Flushes cached information about schema
def reset_column_information
@available_flag = nil
super
end
end
class InternalIdGenerator
# Generate next internal id for a given scope and usage.
#
# For currently supported usages, see #usage enum.
#
# The method implements a locking scheme that has the following properties:
# 1) Generated sequence of internal ids is unique per (scope and usage)
# 2) The method is thread-safe and may be used in concurrent threads/processes.
# 3) The generated sequence is gapless.
# 4) In the absence of a record in the internal_ids table, one will be created
# and last_value will be calculated on the fly.
#
# subject: The instance we're generating an internal id for. Gets passed to init if called.
# scope: Attributes that define the scope for id generation.
# usage: Symbol to define the usage of the internal id, see InternalId.usages
# init: Block that gets called to initialize InternalId record if not present
# Make sure to not throw exceptions in the absence of records (if this is expected).
attr_reader :subject, :scope, :init, :scope_attrs, :usage
def initialize(subject, scope, usage, init)
@subject = subject
@scope = scope
@init = init
@usage = usage
raise ArgumentError, 'Scope is not well-defined, need at least one column for scope (given: 0)' if scope.empty?
unless InternalId.usages.has_key?(usage.to_s)
raise ArgumentError, "Usage '#{usage}' is unknown. Supported values are #{InternalId.usages.keys} from InternalId.usages"
end
end
# Generates next internal id and returns it
def generate
subject.transaction do
# Create a record in internal_ids if one does not yet exist
# and increment its last value
#
# Note this will acquire a ROW SHARE lock on the InternalId record
(lookup || create_record).increment_and_save!
end
end
private
# Retrieve InternalId record for (project, usage) combination, if it exists
def lookup
InternalId.find_by(**scope, usage: usage_value)
end
def usage_value
@usage_value ||= InternalId.usages[usage.to_s]
end
# Create InternalId record for (scope, usage) combination, if it doesn't exist
#
# We blindly insert without synchronization. If another process
# was faster in doing this, we'll realize once we hit the unique key constraint
# violation. We can safely roll-back the nested transaction and perform
# a lookup instead to retrieve the record.
def create_record
subject.transaction(requires_new: true) do
InternalId.create!(
**scope,
usage: usage_value,
last_value: init.call(subject) || 0
)
end
rescue ActiveRecord::RecordNotUnique
lookup
end
end
end
require 'carrierwave/orm/activerecord'
class Issue < ActiveRecord::Base
include InternalId
include AtomicInternalId
include Issuable
include Noteable
include Referable
......@@ -24,6 +24,8 @@ class Issue < ActiveRecord::Base
belongs_to :project
belongs_to :moved_to, class_name: 'Issue'
has_internal_id :iid, scope: :project, init: ->(s) { s&.project&.issues&.maximum(:iid) }
has_many :events, as: :target, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent
has_many :merge_requests_closing_issues,
......
class MergeRequest < ActiveRecord::Base
include InternalId
include NonatomicInternalId
include Issuable
include Noteable
include Referable
......
......@@ -8,7 +8,7 @@ class Milestone < ActiveRecord::Base
Started = MilestoneStruct.new('Started', '#started', -3)
include CacheMarkdownField
include InternalId
include NonatomicInternalId
include Sortable
include Referable
include StripAttribute
......
......@@ -188,6 +188,8 @@ class Project < ActiveRecord::Base
has_many :todos
has_many :notification_settings, as: :source, dependent: :delete_all # rubocop:disable Cop/ActiveRecordDependent
has_many :internal_ids
has_one :import_data, class_name: 'ProjectImportData', inverse_of: :project, autosave: true
has_one :project_feature, inverse_of: :project
has_one :statistics, class_name: 'ProjectStatistics'
......
---
title: Atomic generation of internal ids for issues.
merge_request: 17580
author:
type: other
require 'active_record/connection_adapters/abstract_mysql_adapter'
module ActiveRecord
module ConnectionAdapters
class AbstractMysqlAdapter
NATIVE_DATABASE_TYPES.merge!(
bigserial: { name: 'bigint(20) auto_increment PRIMARY KEY' }
)
end
end
end
class CreateInternalIdsTable < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
create_table :internal_ids, id: :bigserial do |t|
t.references :project, null: false, foreign_key: { on_delete: :cascade }
t.integer :usage, null: false
t.integer :last_value, null: false
t.index [:usage, :project_id], unique: true
end
end
end
......@@ -866,6 +866,14 @@ ActiveRecord::Schema.define(version: 20180309160427) do
add_index "identities", ["user_id"], name: "index_identities_on_user_id", using: :btree
create_table "internal_ids", id: :bigserial, force: :cascade do |t|
t.integer "project_id", null: false
t.integer "usage", null: false
t.integer "last_value", null: false
end
add_index "internal_ids", ["usage", "project_id"], name: "index_internal_ids_on_usage_and_project_id", unique: true, using: :btree
create_table "issue_assignees", id: false, force: :cascade do |t|
t.integer "user_id", null: false
t.integer "issue_id", null: false
......@@ -2058,6 +2066,7 @@ ActiveRecord::Schema.define(version: 20180309160427) do
add_foreign_key "gpg_signatures", "gpg_keys", on_delete: :nullify
add_foreign_key "gpg_signatures", "projects", on_delete: :cascade
add_foreign_key "group_custom_attributes", "namespaces", column: "group_id", on_delete: :cascade
add_foreign_key "internal_ids", "projects", on_delete: :cascade
add_foreign_key "issue_assignees", "issues", name: "fk_b7d881734a", on_delete: :cascade
add_foreign_key "issue_assignees", "users", name: "fk_5e0c8d9154", on_delete: :cascade
add_foreign_key "issue_metrics", "issues", on_delete: :cascade
......
FactoryBot.define do
factory :internal_id do
project
usage :issues
last_value { project.issues.maximum(:iid) || 0 }
end
end
......@@ -279,6 +279,7 @@ project:
- lfs_file_locks
- project_badges
- source_of_merge_requests
- internal_ids
award_emoji:
- awardable
- user
......
......@@ -34,7 +34,7 @@ describe Issuable do
subject { build(:issue) }
before do
allow(subject).to receive(:set_iid).and_return(false)
allow(InternalId).to receive(:generate_next).and_return(nil)
end
it { is_expected.to validate_presence_of(:project) }
......
require 'spec_helper'
describe InternalId do
let(:project) { create(:project) }
let(:usage) { :issues }
let(:issue) { build(:issue, project: project) }
let(:scope) { { project: project } }
let(:init) { ->(s) { s.project.issues.size } }
context 'validations' do
it { is_expected.to validate_presence_of(:usage) }
end
describe '.generate_next' do
subject { described_class.generate_next(issue, scope, usage, init) }
context 'in the absence of a record' do
it 'creates a record if not yet present' do
expect { subject }.to change { described_class.count }.from(0).to(1)
end
it 'stores record attributes' do
subject
described_class.first.tap do |record|
expect(record.project).to eq(project)
expect(record.usage).to eq(usage.to_s)
end
end
context 'with existing issues' do
before do
rand(1..10).times { create(:issue, project: project) }
described_class.delete_all
end
it 'calculates last_value values automatically' do
expect(subject).to eq(project.issues.size + 1)
end
end
context 'with concurrent inserts on table' do
it 'looks up the record if it was created concurrently' do
args = { **scope, usage: described_class.usages[usage.to_s] }
record = double
expect(described_class).to receive(:find_by).with(args).and_return(nil) # first call, record not present
expect(described_class).to receive(:find_by).with(args).and_return(record) # second call, record was created by another process
expect(described_class).to receive(:create!).and_raise(ActiveRecord::RecordNotUnique, 'record not unique')
expect(record).to receive(:increment_and_save!)
subject
end
end
end
it 'generates a strictly monotone, gapless sequence' do
seq = (0..rand(100)).map do
described_class.generate_next(issue, scope, usage, init)
end
normalized = seq.map { |i| i - seq.min }
expect(normalized).to eq((0..seq.size - 1).to_a)
end
context 'with an insufficient schema version' do
before do
described_class.reset_column_information
expect(ActiveRecord::Migrator).to receive(:current_version).and_return(InternalId::REQUIRED_SCHEMA_VERSION - 1)
end
let(:init) { double('block') }
it 'calculates next internal ids on the fly' do
val = rand(1..100)
expect(init).to receive(:call).with(issue).and_return(val)
expect(subject).to eq(val + 1)
end
end
end
describe '#increment_and_save!' do
let(:id) { create(:internal_id) }
subject { id.increment_and_save! }
it 'returns incremented iid' do
value = id.last_value
expect(subject).to eq(value + 1)
end
it 'saves the record' do
subject
expect(id.changed?).to be_falsey
end
context 'with last_value=nil' do
let(:id) { build(:internal_id, last_value: nil) }
it 'returns 1' do
expect(subject).to eq(1)
end
end
end
end
......@@ -9,11 +9,17 @@ describe Issue do
describe 'modules' do
subject { described_class }
it { is_expected.to include_module(InternalId) }
it { is_expected.to include_module(Issuable) }
it { is_expected.to include_module(Referable) }
it { is_expected.to include_module(Sortable) }
it { is_expected.to include_module(Taskable) }
it_behaves_like 'AtomicInternalId' do
let(:internal_id_attribute) { :iid }
let(:instance) { build(:issue) }
let(:scope_attrs) { { project: instance.project } }
let(:usage) { :issues }
end
end
subject { create(:issue) }
......
......@@ -17,7 +17,7 @@ describe MergeRequest do
describe 'modules' do
subject { described_class }
it { is_expected.to include_module(InternalId) }
it { is_expected.to include_module(NonatomicInternalId) }
it { is_expected.to include_module(Issuable) }
it { is_expected.to include_module(Referable) }
it { is_expected.to include_module(Sortable) }
......
require 'spec_helper'
shared_examples_for 'AtomicInternalId' do
describe '.has_internal_id' do
describe 'Module inclusion' do
subject { described_class }
it { is_expected.to include_module(AtomicInternalId) }
end
describe 'Validation' do
subject { instance }
before do
allow(InternalId).to receive(:generate_next).and_return(nil)
end
it { is_expected.to validate_presence_of(internal_id_attribute) }
it { is_expected.to validate_numericality_of(internal_id_attribute) }
end
describe 'internal id generation' do
subject { instance.save! }
it 'calls InternalId.generate_next and sets internal id attribute' do
iid = rand(1..1000)
expect(InternalId).to receive(:generate_next).with(instance, scope_attrs, usage, any_args).and_return(iid)
subject
expect(instance.public_send(internal_id_attribute)).to eq(iid)
end
it 'does not overwrite an existing internal id' do
instance.public_send("#{internal_id_attribute}=", 4711)
expect { subject }.not_to change { instance.public_send(internal_id_attribute) }
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment