Commit 2e6280a6 authored by Mayra Cabrera's avatar Mayra Cabrera

Merge branch '213335-disallow-id-field-for-batch-counting' into 'master'

Disallow id field for distinct batch counting

See merge request gitlab-org/gitlab!29310
parents 99fef514 fc8a26c8
...@@ -37,6 +37,7 @@ module Gitlab ...@@ -37,6 +37,7 @@ module Gitlab
MIN_REQUIRED_BATCH_SIZE = 1_250 MIN_REQUIRED_BATCH_SIZE = 1_250
MAX_ALLOWED_LOOPS = 10_000 MAX_ALLOWED_LOOPS = 10_000
SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
ALLOWED_MODES = [:itself, :distinct].freeze
# Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705 # Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
DEFAULT_DISTINCT_BATCH_SIZE = 10_000 DEFAULT_DISTINCT_BATCH_SIZE = 10_000
...@@ -55,8 +56,8 @@ module Gitlab ...@@ -55,8 +56,8 @@ module Gitlab
def count(batch_size: nil, mode: :itself, start: nil, finish: nil) def count(batch_size: nil, mode: :itself, start: nil, finish: nil)
raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open? raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open?
raise "The mode #{mode.inspect} is not supported" unless [:itself, :distinct].include?(mode)
raise 'Use distinct count for optimized distinct counting' if @relation.limit(1).distinct_value.present? && mode != :distinct check_mode!(mode)
# non-distinct have better performance # non-distinct have better performance
batch_size ||= mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE batch_size ||= mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE
...@@ -102,6 +103,12 @@ module Gitlab ...@@ -102,6 +103,12 @@ module Gitlab
def actual_finish(finish) def actual_finish(finish)
finish || @relation.maximum(@column) || 0 finish || @relation.maximum(@column) || 0
end end
def check_mode!(mode)
raise "The mode #{mode.inspect} is not supported" unless ALLOWED_MODES.include?(mode)
raise 'Use distinct count for optimized distinct counting' if @relation.limit(1).distinct_value.present? && mode != :distinct
raise 'Use distinct count only with non id fields' if @column == :id && mode == :distinct
end
end end
end end
end end
...@@ -86,10 +86,6 @@ describe Gitlab::Database::BatchCount do ...@@ -86,10 +86,6 @@ describe Gitlab::Database::BatchCount do
end end
describe '#batch_distinct_count' do describe '#batch_distinct_count' do
it 'counts with :id field' do
expect(described_class.batch_distinct_count(model, :id)).to eq(5)
end
it 'counts with column field' do it 'counts with column field' do
expect(described_class.batch_distinct_count(model, column)).to eq(2) expect(described_class.batch_distinct_count(model, column)).to eq(2)
end end
...@@ -137,6 +133,12 @@ describe Gitlab::Database::BatchCount do ...@@ -137,6 +133,12 @@ describe Gitlab::Database::BatchCount do
it 'returns fallback if batch size is less than min required' do it 'returns fallback if batch size is less than min required' do
expect(described_class.batch_distinct_count(model, column, batch_size: small_batch_size)).to eq(fallback) expect(described_class.batch_distinct_count(model, column, batch_size: small_batch_size)).to eq(fallback)
end end
it 'will raise an error if distinct count with the :id column is requested' do
expect do
described_class.batch_count(described_class.batch_distinct_count(model, :id))
end.to raise_error 'Use distinct count only with non id fields'
end
end end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment