Commit 6bdac411 authored by Kamil Trzciński's avatar Kamil Trzciński

Add `RestrictGitlabSchema` that enforces `restrict_gitlab_migration`

This allows to limit migrations to run only in a specific
context. The migration being executed will be validated against
specification defined and will be interrupted if not configured
properly. Migrations that do are executed in different context
will be ignored.

Changelog: added
parent c33c5d65
......@@ -231,12 +231,28 @@ module Gitlab
::ActiveRecord::Base.configurations.configs_for(env_name: Rails.env).map(&:name)
end
# This returns all matching schemas that a given connection can use
# Since the `ActiveRecord::Base` might change the connection (from main to ci)
# This does not look at literal connection names, but rather compares
# models that are holders for a given db_config_name
def self.gitlab_schemas_for_connection(connection)
connection_name = self.db_config_name(connection)
primary_model = self.database_base_models.fetch(connection_name)
self.schemas_to_base_models
.select { |_, models| models.include?(primary_model) }
.keys
.map!(&:to_sym)
end
def self.db_config_for_connection(connection)
return unless connection
# The LB connection proxy does not have a direct db_config
# that can be referenced
return if connection.is_a?(::Gitlab::Database::LoadBalancing::ConnectionProxy)
if connection.is_a?(::Gitlab::Database::LoadBalancing::ConnectionProxy)
return connection.load_balancer.configuration.primary_db_config
end
# During application init we might receive `NullPool`
return unless connection.respond_to?(:pool) &&
......
......@@ -95,6 +95,10 @@ module Gitlab
def self.tables_to_schema
@tables_to_schema ||= YAML.load_file(Rails.root.join('lib/gitlab/database/gitlab_schemas.yml'))
end
def self.schema_names
@schema_names ||= self.tables_to_schema.values.to_set
end
end
end
end
......@@ -47,6 +47,8 @@ module Gitlab
# Returns the role (primary/replica) of the database the connection is
# connecting to.
def self.db_role_for_connection(connection)
return ROLE_UNKNOWN if connection.is_a?(::Gitlab::Database::LoadBalancing::ConnectionProxy)
db_config = Database.db_config_for_connection(connection)
return ROLE_UNKNOWN unless db_config
......
......@@ -94,6 +94,10 @@ module Gitlab
end
end
def primary_db_config
primary_model_or_model_if_enabled.connection_db_config
end
def replica_db_config
@model.connection_db_config
end
......
# frozen_string_literal: true
module Gitlab
module Database
module MigrationHelpers
module RestrictGitlabSchema
extend ActiveSupport::Concern
MigrationSkippedError = Class.new(StandardError)
included do
class_attribute :allowed_gitlab_schemas
end
class_methods do
def restrict_gitlab_migration(gitlab_schema:)
unless Gitlab::Database::GitlabSchema.schema_names.include?(gitlab_schema)
raise "Unknown 'gitlab_schema: #{gitlab_schema}' specified. It needs to be one of: " \
"#{Gitlab::Database::GitlabSchema.schema_names.to_a}"
end
self.allowed_gitlab_schemas = [gitlab_schema]
end
end
def migrate(direction)
if unmatched_schemas.any?
# TODO: This will fail running migration if we have two connections
# This will prevent future migrations from being run. Likely the best would
# be to ignore this type of migration.
# This is yet to be solved: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73756/diffs#note_866211093
raise MigrationSkippedError, "Current migration is skipped since it modifies "\
"'#{self.class.allowed_gitlab_schemas}' which is outside of '#{allowed_schemas_for_connection}'"
end
Gitlab::Database::QueryAnalyzer.instance.within([validator_class]) do
validator_class.allowed_gitlab_schemas = self.allowed_gitlab_schemas
super
end
end
private
def validator_class
Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas
end
def unmatched_schemas
(self.allowed_gitlab_schemas || []) - allowed_schemas_for_connection
end
def allowed_schemas_for_connection
Gitlab::Database.gitlab_schemas_for_connection(connection)
end
end
end
end
end
......@@ -30,13 +30,17 @@ module Gitlab
end
end
def within
def within(user_analyzers = nil)
# Due to singleton nature of analyzers
# only an outer invocation of the `.within`
# is allowed to initialize them
return yield if already_within?
if already_within?
raise 'Query analyzers are already defined, cannot re-define them.' if user_analyzers
begin!
return yield
end
begin!(user_analyzers || all_analyzers)
begin
yield
......@@ -61,21 +65,21 @@ module Gitlab
next if analyzer.suppressed? && !analyzer.requires_tracking?(parsed)
analyzer.analyze(parsed)
rescue StandardError, QueryAnalyzers::Base::QueryAnalyzerError => e
rescue StandardError, ::Gitlab::Database::QueryAnalyzers::Base::QueryAnalyzerError => e
# We catch all standard errors to prevent validation errors to introduce fatal errors in production
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
end
end
# Enable query analyzers
def begin!
analyzers = all_analyzers.select do |analyzer|
def begin!(analyzers = all_analyzers)
analyzers = analyzers.select do |analyzer|
if analyzer.enabled?
analyzer.begin!
true
end
rescue StandardError, QueryAnalyzers::Base::QueryAnalyzerError => e
rescue StandardError, ::Gitlab::Database::QueryAnalyzers::Base::QueryAnalyzerError => e
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
false
......@@ -88,7 +92,7 @@ module Gitlab
def end!
enabled_analyzers.select do |analyzer|
analyzer.end!
rescue StandardError, QueryAnalyzers::Base::QueryAnalyzerError => e
rescue StandardError, ::Gitlab::Database::QueryAnalyzers::Base::QueryAnalyzerError => e
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
end
......
# frozen_string_literal: true
module Gitlab
module Database
module QueryAnalyzers
class RestrictAllowedSchemas < Base
UnsupportedSchemaError = Class.new(QueryAnalyzerError)
DDLNotAllowedError = Class.new(UnsupportedSchemaError)
DMLNotAllowedError = Class.new(UnsupportedSchemaError)
DMLAccessDeniedError = Class.new(UnsupportedSchemaError)
IGNORED_SCHEMAS = %i[gitlab_shared].freeze
def self.enabled?
true
end
def self.allowed_gitlab_schemas
self.context[:allowed_gitlab_schemas]
end
def self.allowed_gitlab_schemas=(value)
self.context[:allowed_gitlab_schemas] = value
end
def self.analyze(parsed)
# If list of schemas is empty, we allow only DDL changes
if self.allowed_gitlab_schemas
self.restrict_to_dml_only(parsed)
else
self.restrict_to_ddl_only(parsed)
end
end
def self.restrict_to_ddl_only(parsed)
tables = self.dml_tables(parsed)
schemas = self.dml_schemas(tables)
if schemas.any?
raise DMLNotAllowedError, "Select/DML queries (SELECT/UPDATE/DELETE) are disallowed in the DDL (structure) mode. " \
"Modifying of '#{tables}' (#{schemas.to_a}) with '#{parsed.sql}'"
end
end
def self.restrict_to_dml_only(parsed)
if parsed.pg.ddl_tables.any?
raise DDLNotAllowedError, "DDL queries (structure) are disallowed in the Select/DML (SELECT/UPDATE/DELETE) mode. " \
"Modifying of '#{parsed.pg.ddl_tables}' with '#{parsed.sql}'"
end
tables = self.dml_tables(parsed)
schemas = self.dml_schemas(tables)
if (schemas - self.allowed_gitlab_schemas).any?
raise DMLAccessDeniedError, "Select/DML queries (SELECT/UPDATE/DELETE) do access '#{tables}' (#{schemas.to_a}) " \
"which is outside of list of allowed schemas: '#{self.allowed_gitlab_schemas}'."
end
end
def self.dml_tables(parsed)
parsed.pg.select_tables + parsed.pg.dml_tables
end
def self.dml_schemas(tables)
extra_schemas = ::Gitlab::Database::GitlabSchema.table_schemas(tables)
extra_schemas.subtract(IGNORED_SCHEMAS)
extra_schemas
end
end
end
end
end
......@@ -92,6 +92,18 @@ RSpec.describe Gitlab::Database::LoadBalancing do
end
end
context 'when an invalid connection is used' do
it 'returns :unknown' do
expect(described_class.db_role_for_connection(:invalid)).to eq(:unknown)
end
end
context 'when a null connection is used' do
it 'returns :unknown' do
expect(described_class.db_role_for_connection(nil)).to eq(:unknown)
end
end
context 'when a read connection is used' do
it 'returns :replica' do
load_balancer.read do |connection|
......
......@@ -4,6 +4,7 @@ require 'spec_helper'
RSpec.describe Gitlab::Database::QueryAnalyzer, query_analyzers: false do
let(:analyzer) { double(:query_analyzer) }
let(:user_analyzer) { double(:query_analyzer) }
let(:disabled_analyzer) { double(:disabled_query_analyzer) }
before do
......@@ -53,6 +54,10 @@ RSpec.describe Gitlab::Database::QueryAnalyzer, query_analyzers: false do
expect { |b| described_class.instance.within(&b) }.to yield_control
end
it 'raises exception when trying to re-define analyzers' do
expect { |b| described_class.instance.within([user_analyzer], &b) }.to raise_error /Query analyzers are already defined, cannot re-define them/
end
end
context 'when initializer is enabled' do
......@@ -75,6 +80,18 @@ RSpec.describe Gitlab::Database::QueryAnalyzer, query_analyzers: false do
expect { |b| described_class.instance.within(&b) }.to yield_control
end
end
context 'when user analyzers are used' do
it 'calls begin! and end!' do
expect(analyzer).not_to receive(:begin!)
allow(user_analyzer).to receive(:enabled?).and_return(true)
allow(user_analyzer).to receive(:suppressed?).and_return(false)
expect(user_analyzer).to receive(:begin!)
expect(user_analyzer).to receive(:end!)
expect { |b| described_class.instance.within([user_analyzer], &b) }.to yield_control
end
end
end
describe '#process_sql' do
......
......@@ -205,12 +205,12 @@ RSpec.describe Gitlab::Database do
end
context 'when the connection is LoadBalancing::ConnectionProxy' do
it 'returns nil' do
it 'returns primary_db_config' do
lb_config = ::Gitlab::Database::LoadBalancing::Configuration.new(ActiveRecord::Base)
lb = ::Gitlab::Database::LoadBalancing::LoadBalancer.new(lb_config)
proxy = ::Gitlab::Database::LoadBalancing::ConnectionProxy.new(lb)
expect(described_class.db_config_for_connection(proxy)).to be_nil
expect(described_class.db_config_for_connection(proxy)).to eq(lb_config.primary_db_config)
end
end
......@@ -229,7 +229,7 @@ RSpec.describe Gitlab::Database do
# This is a ConnectionProxy
expect(described_class.db_config_name(model.connection))
.to eq('unknown')
.to eq('main')
# This is an actual connection
expect(described_class.db_config_name(model.retrieve_connection))
......@@ -245,6 +245,31 @@ RSpec.describe Gitlab::Database do
end
end
describe '.gitlab_schemas_for_connection' do
it 'does raise exception for invalid connection' do
expect { described_class.gitlab_schemas_for_connection(:invalid) }.to raise_error /key not found: "unknown"/
end
it 'does return a valid schema depending on a base model used', :request_store do
# This is currently required as otherwise the `Ci::Build.connection` == `Project.connection`
# ENV due to lib/gitlab/database/load_balancing/setup.rb:93
stub_env('GITLAB_USE_MODEL_LOAD_BALANCING', '1')
# FF due to lib/gitlab/database/load_balancing/configuration.rb:92
stub_feature_flags(force_no_sharing_primary_model: true)
expect(described_class.gitlab_schemas_for_connection(Project.connection)).to include(:gitlab_main, :gitlab_shared)
expect(described_class.gitlab_schemas_for_connection(Ci::Build.connection)).to include(:gitlab_ci, :gitlab_shared)
end
it 'does return gitlab_ci when a ActiveRecord::Base is using CI connection' do
with_reestablished_active_record_base do
reconfigure_db_connection(model: ActiveRecord::Base, config_model: Ci::Build)
expect(described_class.gitlab_schemas_for_connection(ActiveRecord::Base.connection)).to include(:gitlab_ci, :gitlab_shared)
end
end
end
describe '#true_value' do
it 'returns correct value' do
expect(described_class.true_value).to eq "'t'"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment