Commit 40adefa9 authored by Dylan Griffith's avatar Dylan Griffith

Merge branch '341783-log-cross-db-modifications-v2' into 'master'

Detect and log cross-database modifications in production (v2)

See merge request gitlab-org/gitlab!74177
parents 7d7180cb 7d680ceb
......@@ -12,7 +12,9 @@ module Ci
# Ci::Pipeline#destroy triggers `use_fast_destroy :job_artifacts` and
# ci_builds has ON DELETE CASCADE to ci_pipelines. The pipeline, the builds,
# job and pipeline artifacts all get destroyed here.
pipeline.reset.destroy!
::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.allow_cross_database_modification_within_transaction(url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/345664') do
pipeline.reset.destroy!
end
ServiceResponse.success(message: 'Pipeline not found')
rescue ActiveRecord::RecordNotFound
......
......@@ -65,7 +65,10 @@ module Users
user.destroy_dependent_associations_in_batches(exclude: [:snippets])
# Destroy the namespace after destroying the user since certain methods may depend on the namespace existing
user_data = user.destroy
user_data = nil
::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.allow_cross_database_modification_within_transaction(url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/340260') do
user_data = user.destroy
end
namespace.destroy
user_data
......
---
name: detect_cross_database_modification
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73316
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/344620
milestone: '14.5'
type: development
group: group::sharding
default_enabled: false
......@@ -5,6 +5,10 @@ if Gitlab.dev_or_test_env? || Gitlab::Utils.to_boolean(ENV['GITLAB_ENABLE_QUERY_
Gitlab::Database::QueryAnalyzer.instance.hook!
Gitlab::Database::QueryAnalyzer.instance.all_analyzers.append(::Gitlab::Database::QueryAnalyzers::GitlabSchemasMetrics)
if Rails.env.test? || Gitlab::Utils.to_boolean(ENV['ENABLE_CROSS_DATABASE_MODIFICATION_DETECTION'], default: false)
Gitlab::Database::QueryAnalyzer.instance.all_analyzers.append(::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification)
end
Gitlab::Application.configure do |config|
config.middleware.use(Gitlab::Middleware::QueryAnalyzer)
end
......
......@@ -168,18 +168,6 @@ module Gitlab
yield
end
# This method will allow cross database modifications within the block
# Example:
#
# allow_cross_database_modification_within_transaction(url: 'url-to-an-issue') do
# create(:build) # inserts ci_build and project record in one transaction
# end
def self.allow_cross_database_modification_within_transaction(url:)
# this method will be overridden in:
# spec/support/database/cross_database_modification_check.rb
yield
end
def self.add_post_migrate_path_to_rails(force: false)
return if ENV['SKIP_POST_DEPLOYMENT_MIGRATIONS'] && !force
......
......@@ -58,6 +58,8 @@ module Gitlab
return unless parsed
analyzers.each do |analyzer|
next if analyzer.suppressed?
analyzer.analyze(parsed)
rescue StandardError => e
# We catch all standard errors to prevent validation errors to introduce fatal errors in production
......
......@@ -4,16 +4,32 @@ module Gitlab
module Database
module QueryAnalyzers
class Base
def self.suppressed?
Thread.current[self.suppress_key]
end
def self.suppress=(value)
Thread.current[self.suppress_key] = value
end
def self.with_suppressed(value = true, &blk)
previous = self.suppressed?
self.suppress = value
yield
ensure
self.suppress = previous
end
def self.begin!
Thread.current[self.class.name] = {}
Thread.current[self.context_key] = {}
end
def self.end!
Thread.current[self.class.name] = nil
Thread.current[self.context_key] = nil
end
def self.context
Thread.current[self.class.name]
Thread.current[self.context_key]
end
def self.enabled?
......@@ -23,6 +39,14 @@ module Gitlab
def self.analyze(parsed)
raise NotImplementedError
end
def self.context_key
"#{self.class.name}_context"
end
def self.suppress_key
"#{self.class.name}_suppressed"
end
end
end
end
......
# frozen_string_literal: true
module Gitlab
module Database
module QueryAnalyzers
class PreventCrossDatabaseModification < Database::QueryAnalyzers::Base
CrossDatabaseModificationAcrossUnsupportedTablesError = Class.new(StandardError)
# This method will allow cross database modifications within the block
# Example:
#
# allow_cross_database_modification_within_transaction(url: 'url-to-an-issue') do
# create(:build) # inserts ci_build and project record in one transaction
# end
def self.allow_cross_database_modification_within_transaction(url:, &blk)
self.with_suppressed(true, &blk)
end
# This method will prevent cross database modifications within the block
# if it was allowed previously
def self.with_cross_database_modification_prevented(&blk)
self.with_suppressed(false, &blk)
end
def self.begin!
super
context.merge!({
transaction_depth_by_db: Hash.new { |h, k| h[k] = 0 },
modified_tables_by_db: Hash.new { |h, k| h[k] = Set.new }
})
end
def self.enabled?
::Feature::FlipperFeature.table_exists? &&
Feature.enabled?(:detect_cross_database_modification, default_enabled: :yaml)
end
# rubocop:disable Metrics/AbcSize
def self.analyze(parsed)
return if in_factory_bot_create?
database = ::Gitlab::Database.db_config_name(parsed.connection)
sql = parsed.sql
# We ignore BEGIN in tests as this is the outer transaction for
# DatabaseCleaner
if sql.start_with?('SAVEPOINT') || (!Rails.env.test? && sql.start_with?('BEGIN'))
context[:transaction_depth_by_db][database] += 1
return
elsif sql.start_with?('RELEASE SAVEPOINT', 'ROLLBACK TO SAVEPOINT') || (!Rails.env.test? && sql.start_with?('ROLLBACK', 'COMMIT'))
context[:transaction_depth_by_db][database] -= 1
if context[:transaction_depth_by_db][database] <= 0
context[:modified_tables_by_db][database].clear
end
return
end
return if context[:transaction_depth_by_db].values.all?(&:zero?)
# PgQuery might fail in some cases due to limited nesting:
# https://github.com/pganalyze/pg_query/issues/209
tables = sql.downcase.include?(' for update') ? parsed.pg.tables : parsed.pg.dml_tables
# We have some code where plans and gitlab_subscriptions are lazily
# created and this causes lots of spec failures
# https://gitlab.com/gitlab-org/gitlab/-/issues/343394
tables -= %w[plans gitlab_subscriptions]
return if tables.empty?
# All migrations will write to schema_migrations in the same transaction.
# It's safe to ignore this since schema_migrations exists in all
# databases
return if tables == ['schema_migrations']
context[:modified_tables_by_db][database].merge(tables)
all_tables = context[:modified_tables_by_db].values.map(&:to_a).flatten
schemas = ::Gitlab::Database::GitlabSchema.table_schemas(all_tables)
if schemas.many?
message = "Cross-database data modification of '#{schemas.to_a.join(", ")}' were detected within " \
"a transaction modifying the '#{all_tables.to_a.join(", ")}' tables." \
"Please refer to https://docs.gitlab.com/ee/development/database/multiple_databases.html#removing-cross-database-transactions for details on how to resolve this exception."
if schemas.any? { |s| s.to_s.start_with?("undefined") }
message += " The gitlab_schema was undefined for one or more of the tables in this transaction. Any new tables must be added to lib/gitlab/database/gitlab_schemas.yml ."
end
raise CrossDatabaseModificationAcrossUnsupportedTablesError, message
end
rescue CrossDatabaseModificationAcrossUnsupportedTablesError => e
::Gitlab::ErrorTracking.track_exception(e, { gitlab_schemas: schemas, tables: all_tables, query: parsed.sql })
raise if raise_exception?
end
# rubocop:enable Metrics/AbcSize
# We only raise in tests for now otherwise some features will be broken
# in development. For now we've mostly only added allowlist based on
# spec names. Until we have allowed all the violations inline we don't
# want to raise in development.
def self.raise_exception?
Rails.env.test?
end
# We ignore execution in the #create method from FactoryBot
# because it is not representative of real code we run in
# production. There are far too many false positives caused
# by instantiating objects in different `gitlab_schema` in a
# FactoryBot `create`.
def self.in_factory_bot_create?
Rails.env.test? && caller_locations.any? { |l| l.path.end_with?('lib/factory_bot/evaluation.rb') && l.label == 'create' }
end
end
end
end
end
......@@ -9,6 +9,7 @@ RSpec.describe Gitlab::Database::QueryAnalyzer, query_analyzers: false do
before do
allow(described_class.instance).to receive(:all_analyzers).and_return([analyzer, disabled_analyzer])
allow(analyzer).to receive(:enabled?).and_return(true)
allow(analyzer).to receive(:suppressed?).and_return(false)
allow(analyzer).to receive(:begin!)
allow(analyzer).to receive(:end!)
allow(disabled_analyzer).to receive(:enabled?).and_return(false)
......@@ -125,6 +126,13 @@ RSpec.describe Gitlab::Database::QueryAnalyzer, query_analyzers: false do
expect { process_sql("SELECT 1 FROM projects") }.not_to raise_error
end
it 'does not call analyze on suppressed analyzers' do
expect(analyzer).to receive(:suppressed?).and_return(true)
expect(analyzer).not_to receive(:analyze)
expect { process_sql("SELECT 1 FROM projects") }.not_to raise_error
end
def process_sql(sql)
described_class.instance.within do
ApplicationRecord.load_balancer.read_write do |connection|
......
......@@ -2,10 +2,18 @@
require 'spec_helper'
RSpec.describe 'Database::PreventCrossDatabaseModification' do
RSpec.describe Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification, query_analyzers: false do
let_it_be(:pipeline, refind: true) { create(:ci_pipeline) }
let_it_be(:project, refind: true) { create(:project) }
before do
allow(Gitlab::Database::QueryAnalyzer.instance).to receive(:all_analyzers).and_return([described_class])
end
around do |example|
Gitlab::Database::QueryAnalyzer.instance.within { example.run }
end
shared_examples 'successful examples' do
context 'outside transaction' do
it { expect { run_queries }.not_to raise_error }
......@@ -122,10 +130,10 @@ RSpec.describe 'Database::PreventCrossDatabaseModification' do
include_examples 'successful examples'
end
describe '#allow_cross_database_modification_within_transaction' do
describe '.allow_cross_database_modification_within_transaction' do
it 'skips raising error' do
expect do
Gitlab::Database.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do
described_class.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do
Project.transaction do
pipeline.touch
project.touch
......@@ -136,7 +144,7 @@ RSpec.describe 'Database::PreventCrossDatabaseModification' do
it 'skips raising error on factory creation' do
expect do
Gitlab::Database.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do
described_class.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do
ApplicationRecord.transaction do
create(:ci_pipeline)
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Middleware::QueryAnalyzer, query_analyzers: false do
describe 'the PreventCrossDatabaseModification' do
describe '#call' do
let(:app) { double(:app) }
let(:middleware) { described_class.new(app) }
let(:env) { {} }
subject { middleware.call(env) }
context 'when there is a cross modification' do
before do
allow(app).to receive(:call) do
Project.transaction do
Project.where(id: -1).update_all(id: -1)
::Ci::Pipeline.where(id: -1).update_all(id: -1)
end
end
end
it 'detects cross modifications and tracks exception' do
expect(::Gitlab::ErrorTracking).to receive(:track_and_raise_for_dev_exception)
expect { subject }.not_to raise_error
end
context 'when the detect_cross_database_modification is disabled' do
before do
stub_feature_flags(detect_cross_database_modification: false)
end
it 'does not detect cross modifications' do
expect(::Gitlab::ErrorTracking).not_to receive(:track_and_raise_for_dev_exception)
subject
end
end
end
context 'when there is no cross modification' do
before do
allow(app).to receive(:call) do
Project.transaction do
Project.where(id: -1).update_all(id: -1)
Namespace.where(id: -1).update_all(id: -1)
end
end
end
it 'does not log anything' do
expect(::Gitlab::ErrorTracking).not_to receive(:track_and_raise_for_dev_exception)
subject
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::SidekiqMiddleware::QueryAnalyzer, query_analyzers: false do
describe 'the PreventCrossDatabaseModification' do
describe '#call' do
let(:worker) { double(:worker) }
let(:job) { { 'jid' => 'job123' } }
let(:queue) { 'some-queue' }
let(:middleware) { described_class.new }
def do_queries
end
subject { middleware.call(worker, job, queue) { do_queries } }
context 'when there is a cross modification' do
def do_queries
Project.transaction do
Project.where(id: -1).update_all(id: -1)
::Ci::Pipeline.where(id: -1).update_all(id: -1)
end
end
it 'detects cross modifications and tracks exception' do
expect(::Gitlab::ErrorTracking).to receive(:track_and_raise_for_dev_exception)
subject
end
context 'when the detect_cross_database_modification is disabled' do
before do
stub_feature_flags(detect_cross_database_modification: false)
end
it 'does not detect cross modifications' do
expect(::Gitlab::ErrorTracking).not_to receive(:track_and_raise_for_dev_exception)
subject
end
end
end
context 'when there is no cross modification' do
def do_queries
Project.transaction do
Project.where(id: -1).update_all(id: -1)
Namespace.where(id: -1).update_all(id: -1)
end
end
it 'does not log anything' do
expect(::Gitlab::ErrorTracking).not_to receive(:track_and_raise_for_dev_exception)
subject
end
end
end
end
end
# frozen_string_literal: true
module Database
module PreventCrossDatabaseModification
CrossDatabaseModificationAcrossUnsupportedTablesError = Class.new(StandardError)
module GitlabDatabaseMixin
def allow_cross_database_modification_within_transaction(url:)
cross_database_context = Database::PreventCrossDatabaseModification.cross_database_context
return yield unless cross_database_context && cross_database_context[:enabled]
transaction_tracker_enabled_was = cross_database_context[:enabled]
cross_database_context[:enabled] = false
yield
ensure
cross_database_context[:enabled] = transaction_tracker_enabled_was if cross_database_context
end
end
module SpecHelpers
def with_cross_database_modification_prevented
subscriber = ActiveSupport::Notifications.subscribe('sql.active_record') do |name, start, finish, id, payload|
PreventCrossDatabaseModification.prevent_cross_database_modification!(payload[:connection], payload[:sql])
end
PreventCrossDatabaseModification.reset_cross_database_context!
PreventCrossDatabaseModification.cross_database_context.merge!(enabled: true, subscriber: subscriber)
yield if block_given?
ensure
cleanup_with_cross_database_modification_prevented if block_given?
end
def cleanup_with_cross_database_modification_prevented
if PreventCrossDatabaseModification.cross_database_context
ActiveSupport::Notifications.unsubscribe(PreventCrossDatabaseModification.cross_database_context[:subscriber])
PreventCrossDatabaseModification.cross_database_context[:enabled] = false
end
end
end
def self.cross_database_context
Thread.current[:transaction_tracker]
end
def self.reset_cross_database_context!
Thread.current[:transaction_tracker] = initial_data
end
def self.initial_data
{
enabled: false,
transaction_depth_by_db: Hash.new { |h, k| h[k] = 0 },
modified_tables_by_db: Hash.new { |h, k| h[k] = Set.new }
}
end
def self.prevent_cross_database_modification!(connection, sql)
return unless cross_database_context
return unless cross_database_context[:enabled]
return if connection.pool.instance_of?(ActiveRecord::ConnectionAdapters::NullPool)
return if in_factory_bot_create?
database = connection.pool.db_config.name
if sql.start_with?('SAVEPOINT')
cross_database_context[:transaction_depth_by_db][database] += 1
return
elsif sql.start_with?('RELEASE SAVEPOINT', 'ROLLBACK TO SAVEPOINT')
cross_database_context[:transaction_depth_by_db][database] -= 1
if cross_database_context[:transaction_depth_by_db][database] <= 0
cross_database_context[:modified_tables_by_db][database].clear
end
return
end
return if cross_database_context[:transaction_depth_by_db].values.all?(&:zero?)
# PgQuery might fail in some cases due to limited nesting:
# https://github.com/pganalyze/pg_query/issues/209
parsed_query = PgQuery.parse(sql)
tables = sql.downcase.include?(' for update') ? parsed_query.tables : parsed_query.dml_tables
# We have some code where plans and gitlab_subscriptions are lazily
# created and this causes lots of spec failures
# https://gitlab.com/gitlab-org/gitlab/-/issues/343394
tables -= %w[plans gitlab_subscriptions]
return if tables.empty?
# All migrations will write to schema_migrations in the same transaction.
# It's safe to ignore this since schema_migrations exists in all
# databases
return if tables == ['schema_migrations']
cross_database_context[:modified_tables_by_db][database].merge(tables)
all_tables = cross_database_context[:modified_tables_by_db].values.map(&:to_a).flatten
schemas = ::Gitlab::Database::GitlabSchema.table_schemas(all_tables)
if schemas.many?
message = "Cross-database data modification of '#{schemas.to_a.join(", ")}' were detected within " \
"a transaction modifying the '#{all_tables.to_a.join(", ")}' tables." \
"Please refer to https://docs.gitlab.com/ee/development/database/multiple_databases.html#removing-cross-database-transactions for details on how to resolve this exception."
if schemas.any? { |s| s.to_s.start_with?("undefined") }
message += " The gitlab_schema was undefined for one or more of the tables in this transaction. Any new tables must be added to spec/support/database/gitlab_schemas.yml ."
end
raise Database::PreventCrossDatabaseModification::CrossDatabaseModificationAcrossUnsupportedTablesError, message
end
end
# We ignore execution in the #create method from FactoryBot
# because it is not representative of real code we run in
# production. There are far too many false positives caused
# by instantiating objects in different `gitlab_schema` in a
# FactoryBot `create`.
def self.in_factory_bot_create?
caller_locations.any? { |l| l.path.end_with?('lib/factory_bot/evaluation.rb') && l.label == 'create' }
end
end
module PreventCrossDatabaseModificationSpecHelpers
delegate :with_cross_database_modification_prevented,
:allow_cross_database_modification_within_transaction,
to: :'::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification'
end
Gitlab::Database.singleton_class.prepend(
Database::PreventCrossDatabaseModification::GitlabDatabaseMixin)
CROSS_DB_MODIFICATION_ALLOW_LIST = Set.new(YAML.load_file(File.join(__dir__, 'cross-database-modification-allowlist.yml'))).freeze
RSpec.configure do |config|
config.include(::Database::PreventCrossDatabaseModification::SpecHelpers)
config.include(PreventCrossDatabaseModificationSpecHelpers)
# By default allow cross-modifications as we want to observe only transactions
# within a specific block of execution which is defined be `before(:each)` and `after(:each)`
config.before(:all) do
::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.suppress = true
end
# Using before and after blocks because the around block causes problems with the let_it_be
# record creations. It makes an extra savepoint which breaks the transaction count logic.
config.before do |example_file|
if CROSS_DB_MODIFICATION_ALLOW_LIST.exclude?(example_file.file_path_rerun_argument)
with_cross_database_modification_prevented
end
::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.suppress =
CROSS_DB_MODIFICATION_ALLOW_LIST.include?(example_file.file_path_rerun_argument)
end
# Reset after execution to preferred state
config.after do |example_file|
cleanup_with_cross_database_modification_prevented
::Gitlab::Database::QueryAnalyzers::PreventCrossDatabaseModification.suppress = true
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment