Commit b845cfe8 authored by Rémy Coutable's avatar Rémy Coutable

Merge branch 'document-polymorphing-columns' into 'master'

Document using separate tables instead of polymorphic associations and STI

See merge request !11168
parents fc1f828b 4ff1aaed
No related merge requests found
...@@ -5,7 +5,7 @@ class AwardEmoji < ActiveRecord::Base ...@@ -5,7 +5,7 @@ class AwardEmoji < ActiveRecord::Base
include Participable include Participable
include GhostUser include GhostUser
belongs_to :awardable, polymorphic: true belongs_to :awardable, polymorphic: true # rubocop:disable Cop/PolymorphicAssociations
belongs_to :user belongs_to :user
validates :awardable, :user, presence: true validates :awardable, :user, presence: true
......
...@@ -4,7 +4,7 @@ class Deployment < ActiveRecord::Base ...@@ -4,7 +4,7 @@ class Deployment < ActiveRecord::Base
belongs_to :project, required: true, validate: true belongs_to :project, required: true, validate: true
belongs_to :environment, required: true, validate: true belongs_to :environment, required: true, validate: true
belongs_to :user belongs_to :user
belongs_to :deployable, polymorphic: true belongs_to :deployable, polymorphic: true # rubocop:disable Cop/PolymorphicAssociations
validates :sha, presence: true validates :sha, presence: true
validates :ref, presence: true validates :ref, presence: true
......
...@@ -47,7 +47,7 @@ class Event < ActiveRecord::Base ...@@ -47,7 +47,7 @@ class Event < ActiveRecord::Base
belongs_to :author, class_name: "User" belongs_to :author, class_name: "User"
belongs_to :project belongs_to :project
belongs_to :target, polymorphic: true belongs_to :target, polymorphic: true # rubocop:disable Cop/PolymorphicAssociations
# For Hash only # For Hash only
serialize :data # rubocop:disable Cop/ActiverecordSerialize serialize :data # rubocop:disable Cop/ActiverecordSerialize
......
class LabelLink < ActiveRecord::Base class LabelLink < ActiveRecord::Base
include Importable include Importable
belongs_to :target, polymorphic: true belongs_to :target, polymorphic: true # rubocop:disable Cop/PolymorphicAssociations
belongs_to :label belongs_to :label
validates :target, presence: true, unless: :importing? validates :target, presence: true, unless: :importing?
......
...@@ -8,7 +8,7 @@ class Member < ActiveRecord::Base ...@@ -8,7 +8,7 @@ class Member < ActiveRecord::Base
belongs_to :created_by, class_name: "User" belongs_to :created_by, class_name: "User"
belongs_to :user belongs_to :user
belongs_to :source, polymorphic: true belongs_to :source, polymorphic: true # rubocop:disable Cop/PolymorphicAssociations
delegate :name, :username, :email, to: :user, prefix: true delegate :name, :username, :email, to: :user, prefix: true
......
...@@ -41,7 +41,7 @@ class Note < ActiveRecord::Base ...@@ -41,7 +41,7 @@ class Note < ActiveRecord::Base
participant :author participant :author
belongs_to :project belongs_to :project
belongs_to :noteable, polymorphic: true, touch: true belongs_to :noteable, polymorphic: true, touch: true # rubocop:disable Cop/PolymorphicAssociations
belongs_to :author, class_name: "User" belongs_to :author, class_name: "User"
belongs_to :updated_by, class_name: "User" belongs_to :updated_by, class_name: "User"
belongs_to :last_edited_by, class_name: 'User' belongs_to :last_edited_by, class_name: 'User'
......
...@@ -4,7 +4,7 @@ class NotificationSetting < ActiveRecord::Base ...@@ -4,7 +4,7 @@ class NotificationSetting < ActiveRecord::Base
default_value_for :level, NotificationSetting.levels[:global] default_value_for :level, NotificationSetting.levels[:global]
belongs_to :user belongs_to :user
belongs_to :source, polymorphic: true belongs_to :source, polymorphic: true # rubocop:disable Cop/PolymorphicAssociations
belongs_to :project, foreign_key: 'source_id' belongs_to :project, foreign_key: 'source_id'
validates :user, presence: true validates :user, presence: true
......
class RedirectRoute < ActiveRecord::Base class RedirectRoute < ActiveRecord::Base
belongs_to :source, polymorphic: true belongs_to :source, polymorphic: true # rubocop:disable Cop/PolymorphicAssociations
validates :source, presence: true validates :source, presence: true
......
class Route < ActiveRecord::Base class Route < ActiveRecord::Base
belongs_to :source, polymorphic: true belongs_to :source, polymorphic: true # rubocop:disable Cop/PolymorphicAssociations
validates :source, presence: true validates :source, presence: true
......
...@@ -2,7 +2,7 @@ class SentNotification < ActiveRecord::Base ...@@ -2,7 +2,7 @@ class SentNotification < ActiveRecord::Base
serialize :position, Gitlab::Diff::Position # rubocop:disable Cop/ActiverecordSerialize serialize :position, Gitlab::Diff::Position # rubocop:disable Cop/ActiverecordSerialize
belongs_to :project belongs_to :project
belongs_to :noteable, polymorphic: true belongs_to :noteable, polymorphic: true # rubocop:disable Cop/PolymorphicAssociations
belongs_to :recipient, class_name: "User" belongs_to :recipient, class_name: "User"
validates :project, :recipient, presence: true validates :project, :recipient, presence: true
......
class Subscription < ActiveRecord::Base class Subscription < ActiveRecord::Base
belongs_to :user belongs_to :user
belongs_to :project belongs_to :project
belongs_to :subscribable, polymorphic: true belongs_to :subscribable, polymorphic: true # rubocop:disable Cop/PolymorphicAssociations
validates :user, :subscribable, presence: true validates :user, :subscribable, presence: true
......
...@@ -22,7 +22,7 @@ class Todo < ActiveRecord::Base ...@@ -22,7 +22,7 @@ class Todo < ActiveRecord::Base
belongs_to :author, class_name: "User" belongs_to :author, class_name: "User"
belongs_to :note belongs_to :note
belongs_to :project belongs_to :project
belongs_to :target, polymorphic: true, touch: true belongs_to :target, polymorphic: true, touch: true # rubocop:disable Cop/PolymorphicAssociations
belongs_to :user belongs_to :user
delegate :name, :email, to: :author, prefix: true, allow_nil: true delegate :name, :email, to: :author, prefix: true, allow_nil: true
......
...@@ -2,7 +2,7 @@ class Upload < ActiveRecord::Base ...@@ -2,7 +2,7 @@ class Upload < ActiveRecord::Base
# Upper limit for foreground checksum processing # Upper limit for foreground checksum processing
CHECKSUM_THRESHOLD = 100.megabytes CHECKSUM_THRESHOLD = 100.megabytes
belongs_to :model, polymorphic: true belongs_to :model, polymorphic: true # rubocop:disable Cop/PolymorphicAssociations
validates :size, presence: true validates :size, presence: true
validates :path, presence: true validates :path, presence: true
......
class UserAgentDetail < ActiveRecord::Base class UserAgentDetail < ActiveRecord::Base
belongs_to :subject, polymorphic: true belongs_to :subject, polymorphic: true # rubocop:disable Cop/PolymorphicAssociations
validates :user_agent, :ip_address, :subject_id, :subject_type, presence: true validates :user_agent, :ip_address, :subject_id, :subject_type, presence: true
......
...@@ -51,6 +51,8 @@ ...@@ -51,6 +51,8 @@
- [Post Deployment Migrations](post_deployment_migrations.md) - [Post Deployment Migrations](post_deployment_migrations.md)
- [Foreign Keys & Associations](foreign_keys.md) - [Foreign Keys & Associations](foreign_keys.md)
- [Serializing Data](serializing_data.md) - [Serializing Data](serializing_data.md)
- [Polymorphic Associations](polymorphic_associations.md)
- [Single Table Inheritance](single_table_inheritance.md)
## i18n ## i18n
......
# Polymorphic Associations
**Summary:** always use separate tables instead of polymorphic associations.
Rails makes it possible to define so called "polymorphic associations". This
usually works by adding two columns to a table: a target type column, and a
target id. For example, at the time of writing we have such a setup for
`members` with the following columns:
* `source_type`: a string defining the model to use, can be either `Project` or
`Namespace`.
* `source_id`: the ID of the row to retrieve based on `source_type`. For
example, when `source_type` is `Project` then `source_id` will contain a
project ID.
While such a setup may appear to be useful, it comes with many drawbacks; enough
that you should avoid this at all costs.
## Space Wasted
Because this setup relies on string values to determine the model to use it will
end up wasting a lot of space. For example, for `Project` and `Namespace` the
maximum size is 9 bytes, plus 1 extra byte for every string when using
PostgreSQL. While this may only be 10 bytes per row, given enough tables and
rows using such a setup we can end up wasting quite a bit of disk space and
memory (for any indexes).
## Indexes
Because our associations are broken up into two columns this may result in
requiring composite indexes for queries to be performed efficiently. While
composite indexes are not wrong at all, they can be tricky to set up as the
ordering of columns in these indexes is important to ensure optimal performance.
## Consistency
One really big problem with polymorphic associations is being unable to enforce
data consistency on the database level using foreign keys. For consistency to be
enforced on the database level one would have to write their own foreign key
logic to support polymorphic associations.
Enforcing consistency on the database level is absolutely crucial for
maintaining a healthy environment, and thus is another reason to avoid
polymorphic associations.
## Query Overhead
When using polymorphic associations you always need to filter using both
columns. For example, you may end up writing a query like this:
```sql
SELECT *
FROM members
WHERE source_type = 'Project'
AND source_id = 13083;
```
Here PostgreSQL can perform the query quite efficiently if both columns are
indexed, but as the query gets more complex it may not be able to use these
indexes efficiently.
## Mixed Responsibilities
Similar to functions and classes a table should have a single responsibility:
storing data with a certain set of pre-defined columns. When using polymorphic
associations you are instead storing different types of data (possibly with
different columns set) in the same table.
## The Solution
Fortunately there is a very simple solution to these problems: simply use a
separate table for every type you would otherwise store in the same table. Using
a separate table allows you to use everything a database may provide to ensure
consistency and query data efficiently, without any additional application logic
being necessary.
Let's say you have a `members` table storing both approved and pending members,
for both projects and groups, and the pending state is determined by the column
`requested_at` being set or not. Schema wise such a setup can lead to various
columns only being set for certain rows, wasting space. It's also possible that
certain indexes will only be set for certain rows, again wasting space. Finally,
querying such a table requires less than ideal queries. For example:
```sql
SELECT *
FROM members
WHERE requested_at IS NULL
AND source_type = 'GroupMember'
AND source_id = 4
```
Instead such a table should be broken up into separate tables. For example, you
may end up with 4 tables in this case:
* project_members
* group_members
* pending_project_members
* pending_group_members
This makes querying data trivial. For example, to get the members of a group
you'd run:
```sql
SELECT *
FROM group_members
WHERE group_id = 4
```
To get all the pending members of a group in turn you'd run:
```sql
SELECT *
FROM pending_group_members
WHERE group_id = 4
```
If you want to get both you can use a UNION, though you need to be explicit
about what columns you want to SELECT as otherwise the result set will use the
columns of the first query. For example:
```sql
SELECT id, 'Group' AS target_type, group_id AS target_id
FROM group_members
UNION ALL
SELECT id, 'Project' AS target_type, project_id AS target_id
FROM project_members
```
The above example is perhaps a bit silly, but it shows that there's nothing
stopping you from merging the data together and presenting it on the same page.
Selecting columns explicitly can also speed up queries as the database has to do
less work to get the data (compared to selecting all columns, even ones you're
not using).
Our schema also becomes easier. No longer do we need to both store and index the
`source_type` column, we can define foreign keys easily, and we don't need to
filter rows using the `IS NULL` condition.
To summarize: using separate tables allows us to use foreign keys effectively,
create indexes only where necessary, conserve space, query data more
efficiently, and scale these tables more easily (e.g. by storing them on
separate disks). A nice side effect of this is that code can also become easier
as you won't end up with a single model having to handle different kinds of
data.
# Single Table Inheritance
**Summary:** don't use Single Table Inheritance (STI), use separate tables
instead.
Rails makes it possible to have multiple models stored in the same table and map
these rows to the correct models using a `type` column. This can be used to for
example store two different types of SSH keys in the same table.
While tempting to use one should avoid this at all costs for the same reasons as
outlined in the document ["Polymorphic Associations"](polymorphic_associations.md).
## Solution
The solution is very simple: just use a separate table for every type you'd
otherwise store in the same table. For example, instead of having a `keys` table
with `type` set to either `Key` or `DeployKey` you'd have two separate tables:
`keys` and `deploy_keys`.
require_relative '../model_helpers'
module RuboCop module RuboCop
module Cop module Cop
# Cop that prevents the use of `serialize` in ActiveRecord models. # Cop that prevents the use of `serialize` in ActiveRecord models.
class ActiverecordSerialize < RuboCop::Cop::Cop class ActiverecordSerialize < RuboCop::Cop::Cop
include ModelHelpers
MSG = 'Do not store serialized data in the database, use separate columns and/or tables instead'.freeze MSG = 'Do not store serialized data in the database, use separate columns and/or tables instead'.freeze
def on_send(node) def on_send(node)
return unless in_models?(node) return unless in_model?(node)
add_offense(node, :selector) if node.children[1] == :serialize add_offense(node, :selector) if node.children[1] == :serialize
end end
def models_path
File.join(Dir.pwd, 'app', 'models')
end
def in_models?(node)
path = node.location.expression.source_buffer.name
path.start_with?(models_path)
end
end end
end end
end end
require_relative '../model_helpers'
module RuboCop
module Cop
# Cop that prevents the use of polymorphic associations
class PolymorphicAssociations < RuboCop::Cop::Cop
include ModelHelpers
MSG = 'Do not use polymorphic associations, use separate tables instead'.freeze
def on_send(node)
return unless in_model?(node)
return unless node.children[1] == :belongs_to
node.children.last.each_node(:pair) do |pair|
key_name = pair.children[0].children[0]
add_offense(pair, :expression) if key_name == :polymorphic
end
end
end
end
end
module RuboCop
module ModelHelpers
# Returns true if the given node originated from the models directory.
def in_model?(node)
path = node.location.expression.source_buffer.name
models_path = File.join(Dir.pwd, 'app', 'models')
path.start_with?(models_path)
end
end
end
...@@ -2,6 +2,7 @@ require_relative 'cop/custom_error_class' ...@@ -2,6 +2,7 @@ require_relative 'cop/custom_error_class'
require_relative 'cop/gem_fetcher' require_relative 'cop/gem_fetcher'
require_relative 'cop/activerecord_serialize' require_relative 'cop/activerecord_serialize'
require_relative 'cop/redirect_with_status' require_relative 'cop/redirect_with_status'
require_relative 'cop/polymorphic_associations'
require_relative 'cop/migration/add_column' require_relative 'cop/migration/add_column'
require_relative 'cop/migration/add_column_with_default_to_large_table' require_relative 'cop/migration/add_column_with_default_to_large_table'
require_relative 'cop/migration/add_concurrent_foreign_key' require_relative 'cop/migration/add_concurrent_foreign_key'
......
...@@ -10,7 +10,7 @@ describe RuboCop::Cop::ActiverecordSerialize do ...@@ -10,7 +10,7 @@ describe RuboCop::Cop::ActiverecordSerialize do
context 'inside the app/models directory' do context 'inside the app/models directory' do
it 'registers an offense when serialize is used' do it 'registers an offense when serialize is used' do
allow(cop).to receive(:in_models?).and_return(true) allow(cop).to receive(:in_model?).and_return(true)
inspect_source(cop, 'serialize :foo') inspect_source(cop, 'serialize :foo')
...@@ -23,7 +23,7 @@ describe RuboCop::Cop::ActiverecordSerialize do ...@@ -23,7 +23,7 @@ describe RuboCop::Cop::ActiverecordSerialize do
context 'outside the app/models directory' do context 'outside the app/models directory' do
it 'does nothing' do it 'does nothing' do
allow(cop).to receive(:in_models?).and_return(false) allow(cop).to receive(:in_model?).and_return(false)
inspect_source(cop, 'serialize :foo') inspect_source(cop, 'serialize :foo')
......
require 'spec_helper'
require 'rubocop'
require 'rubocop/rspec/support'
require_relative '../../../rubocop/cop/polymorphic_associations'
describe RuboCop::Cop::PolymorphicAssociations do
include CopHelper
subject(:cop) { described_class.new }
context 'inside the app/models directory' do
it 'registers an offense when polymorphic: true is used' do
allow(cop).to receive(:in_model?).and_return(true)
inspect_source(cop, 'belongs_to :foo, polymorphic: true')
aggregate_failures do
expect(cop.offenses.size).to eq(1)
expect(cop.offenses.map(&:line)).to eq([1])
end
end
end
context 'outside the app/models directory' do
it 'does nothing' do
allow(cop).to receive(:in_model?).and_return(false)
inspect_source(cop, 'belongs_to :foo, polymorphic: true')
expect(cop.offenses).to be_empty
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment