Commit 2a7cd9cd authored by Adam Hegyi's avatar Adam Hegyi Committed by Stan Hu

Ensure even select values in UNION query [RUN AS-IF-FOSS]

parent 440e4556
...@@ -99,4 +99,8 @@ class ApplicationRecord < ActiveRecord::Base ...@@ -99,4 +99,8 @@ class ApplicationRecord < ActiveRecord::Base
::Feature.enabled?(:active_record_subtransactions_counter, type: :ops, default_enabled: :yaml) && ::Feature.enabled?(:active_record_subtransactions_counter, type: :ops, default_enabled: :yaml) &&
connection.transaction_open? connection.transaction_open?
end end
def self.cached_column_list
self.column_names.map { |column_name| self.arel_table[column_name] }
end
end end
...@@ -5,7 +5,7 @@ module SelectForProjectAuthorization ...@@ -5,7 +5,7 @@ module SelectForProjectAuthorization
class_methods do class_methods do
def select_for_project_authorization def select_for_project_authorization
select("projects.id AS project_id, members.access_level") select("projects.id AS project_id", "members.access_level")
end end
def select_as_maintainer_for_project_authorization def select_as_maintainer_for_project_authorization
......
...@@ -158,7 +158,7 @@ class Group < Namespace ...@@ -158,7 +158,7 @@ class Group < Namespace
if current_scope.joins_values.include?(:shared_projects) if current_scope.joins_values.include?(:shared_projects)
joins('INNER JOIN namespaces project_namespace ON project_namespace.id = projects.namespace_id') joins('INNER JOIN namespaces project_namespace ON project_namespace.id = projects.namespace_id')
.where(project_namespace: { share_with_group_lock: false }) .where(project_namespace: { share_with_group_lock: false })
.select("projects.id AS project_id, LEAST(project_group_links.group_access, members.access_level) AS access_level") .select("projects.id AS project_id", "LEAST(project_group_links.group_access, members.access_level) AS access_level")
else else
super super
end end
...@@ -463,7 +463,7 @@ class Group < Namespace ...@@ -463,7 +463,7 @@ class Group < Namespace
id id
end end
group_hierarchy_members = GroupMember.where(source_id: source_ids) group_hierarchy_members = GroupMember.where(source_id: source_ids).select(*GroupMember.cached_column_list)
GroupMember.from_union([group_hierarchy_members, GroupMember.from_union([group_hierarchy_members,
members_from_self_and_ancestor_group_shares]).authorizable members_from_self_and_ancestor_group_shares]).authorizable
...@@ -481,6 +481,7 @@ class Group < Namespace ...@@ -481,6 +481,7 @@ class Group < Namespace
group_hierarchy_members = GroupMember.active_without_invites_and_requests group_hierarchy_members = GroupMember.active_without_invites_and_requests
.non_minimal_access .non_minimal_access
.where(source_id: source_ids) .where(source_id: source_ids)
.select(*GroupMember.cached_column_list)
GroupMember.from_union([group_hierarchy_members, GroupMember.from_union([group_hierarchy_members,
members_from_self_and_ancestor_group_shares]) members_from_self_and_ancestor_group_shares])
......
...@@ -2008,8 +2008,8 @@ class User < ApplicationRecord ...@@ -2008,8 +2008,8 @@ class User < ApplicationRecord
def authorized_groups_without_shared_membership def authorized_groups_without_shared_membership
Group.from_union([ Group.from_union([
groups, groups.select(Namespace.arel_table[Arel.star]),
authorized_projects.joins(:namespace).select('namespaces.*') authorized_projects.joins(:namespace).select(Namespace.arel_table[Arel.star])
]) ])
end end
......
...@@ -311,6 +311,75 @@ union = Gitlab::SQL::Union.new([projects, more_projects, ...]) ...@@ -311,6 +311,75 @@ union = Gitlab::SQL::Union.new([projects, more_projects, ...])
Project.from("(#{union.to_sql}) projects") Project.from("(#{union.to_sql}) projects")
``` ```
### Uneven columns in the UNION sub-queries
When the UNION query has uneven columns in the SELECT clauses, the database returns an error.
Consider the following UNION query:
```sql
SELECT id FROM users WHERE id = 1
UNION
SELECT id, name FROM users WHERE id = 2
end
```
The query results in the following error message:
```plaintext
each UNION query must have the same number of columns
```
This problem is apparent and it can be easily fixed during development. One edge-case is when
UNION queries are combined with explicit column listing where the list comes from the
`ActiveRecord` schema cache.
Example (bad, avoid it):
```ruby
scope1 = User.select(User.column_names).where(id: [1, 2, 3]) # selects the columns explicitly
scope2 = User.where(id: [10, 11, 12]) # uses SELECT users.*
User.connection.execute(Gitlab::SQL::Union.new([scope1, scope2]).to_sql)
```
When this code is deployed, it doesn't cause problems immediately. When another
developer adds a new database column to the `users` table, this query breaks in
production and can cause downtime. The second query (`SELECT users.*`) includes the
newly added column; however, the first query does not. The `column_names` method returns stale
values (the new column is missing), because the values are cached within the `ActiveRecord` schema
cache. These values are usually populated when the application boots up.
At this point, the only fix would be a full application restart so that the schema cache gets
updated.
The problem can be avoided if we always use `SELECT users.*` or we always explicitly define the
columns.
Using `SELECT users.*`:
```ruby
# Bad, avoid it
scope1 = User.select(User.column_names).where(id: [1, 2, 3])
scope2 = User.where(id: [10, 11, 12])
# Good, both queries generate SELECT users.*
scope1 = User.where(id: [1, 2, 3])
scope2 = User.where(id: [10, 11, 12])
User.connection.execute(Gitlab::SQL::Union.new([scope1, scope2]).to_sql)
```
Explicit column list definition:
```ruby
# Good, the SELECT columns are consistent
columns = User.cached_column_names # The helper returns fully qualified (table.column) column names (Arel)
scope1 = User.select(*columns).where(id: [1, 2, 3]) # selects the columns explicitly
scope2 = User.select(*columns).where(id: [10, 11, 12]) # uses SELECT users.*
User.connection.execute(Gitlab::SQL::Union.new([scope1, scope2]).to_sql)
```
## Ordering by Creation Date ## Ordering by Creation Date
When ordering records based on the time they were created, you can order When ordering records based on the time they were created, you can order
......
...@@ -20,6 +20,11 @@ module EE ...@@ -20,6 +20,11 @@ module EE
self.inheritance_column = :_type_disabled self.inheritance_column = :_type_disabled
self.enumerate_columns_in_select_statements = true self.enumerate_columns_in_select_statements = true
# backported from ApplicationRecord
def self.cached_column_list
self.column_names.map { |column_name| self.arel_table[column_name] }
end
end end
class GroupGroupLink < ActiveRecord::Base class GroupGroupLink < ActiveRecord::Base
...@@ -47,7 +52,9 @@ module EE ...@@ -47,7 +52,9 @@ module EE
end end
def members_with_parents def members_with_parents
group_hierarchy_members = Member.where(source_type: 'Namespace', source_id: source_ids) group_hierarchy_members = Member
.where(source_type: 'Namespace', source_id: source_ids)
.select(*Member.cached_column_list)
Member.from_union([group_hierarchy_members, Member.from_union([group_hierarchy_members,
members_from_self_and_ancestor_group_shares]) members_from_self_and_ancestor_group_shares])
......
...@@ -19,6 +19,7 @@ module Gitlab ...@@ -19,6 +19,7 @@ module Gitlab
# Project.where("id IN (#{sql})") # Project.where("id IN (#{sql})")
class SetOperator class SetOperator
def initialize(relations, remove_duplicates: true, remove_order: true) def initialize(relations, remove_duplicates: true, remove_order: true)
verify_select_values!(relations) if Rails.env.test? || Rails.env.development?
@relations = relations @relations = relations
@remove_duplicates = remove_duplicates @remove_duplicates = remove_duplicates
@remove_order = remove_order @remove_order = remove_order
...@@ -54,6 +55,34 @@ module Gitlab ...@@ -54,6 +55,34 @@ module Gitlab
private private
attr_reader :relations, :remove_duplicates, :remove_order attr_reader :relations, :remove_duplicates, :remove_order
def verify_select_values!(relations)
all_select_values = relations.map do |relation|
if relation.respond_to?(:select_values)
relation.select_values
else
relation.projections # Handle Arel based subqueries
end
end
unless all_select_values.map(&:size).uniq.one?
relation_select_sizes = all_select_values.map.with_index do |select_values, index|
if select_values.empty?
"Relation ##{index}: *, all columns"
else
"Relation ##{index}: #{select_values.size} select values"
end
end
exception_text = <<~EOF
Relations with uneven select values were passed. The UNION query could break when the underlying table changes (add or remove columns).
#{relation_select_sizes.join("\n")}
EOF
raise(exception_text)
end
end
end end
end end
end end
...@@ -22,7 +22,7 @@ RSpec.shared_examples 'SQL set operator' do |operator_keyword| ...@@ -22,7 +22,7 @@ RSpec.shared_examples 'SQL set operator' do |operator_keyword|
end end
it 'skips Model.none segments' do it 'skips Model.none segments' do
empty_relation = User.none empty_relation = User.none.select(:id)
set_operator = described_class.new([empty_relation, relation_1, relation_2]) set_operator = described_class.new([empty_relation, relation_1, relation_2])
expect {User.where("users.id IN (#{set_operator.to_sql})").to_a}.not_to raise_error expect {User.where("users.id IN (#{set_operator.to_sql})").to_a}.not_to raise_error
...@@ -44,6 +44,17 @@ RSpec.shared_examples 'SQL set operator' do |operator_keyword| ...@@ -44,6 +44,17 @@ RSpec.shared_examples 'SQL set operator' do |operator_keyword|
end end
end end
context 'when uneven select values are used' do
let(:relation_1) { User.where(email: 'alice@example.com').select(*User.column_names) }
let(:relation_2) { User.where(email: 'bob@example.com') }
it 'raises error' do
expect do
described_class.new([relation_1, relation_2])
end.to raise_error /Relations with uneven select values were passed/
end
end
describe 'remove_order parameter' do describe 'remove_order parameter' do
let(:scopes) do let(:scopes) do
[ [
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment