Commit c111cb39 authored by Mikołaj Wawrzyniak's avatar Mikołaj Wawrzyniak Committed by Alper Akgun

Extract Buckets out of BatchDistinctCounter

To enable more flexible operations over hll buckets we need to extract
dedicated class to model their behaviour.
parent fd79577a
...@@ -16,9 +16,9 @@ module Gitlab ...@@ -16,9 +16,9 @@ module Gitlab
# Grouped relations are NOT supported yet. # Grouped relations are NOT supported yet.
# #
# @example Usage # @example Usage
# ::Gitlab::Database::PostgresHllBatchDistinctCount.new(::Project, :creator_id).estimate_distinct_count # ::Gitlab::Database::PostgresHllBatchDistinctCount.new(::Project, :creator_id).execute
# ::Gitlab::Database::PostgresHllBatchDistinctCount.new(::Project.with_active_services.service_desk_enabled.where(time_period)) # ::Gitlab::Database::PostgresHllBatchDistinctCount.new(::Project.with_active_services.service_desk_enabled.where(time_period))
# .estimate_distinct_count( # .execute(
# batch_size: 1_000, # batch_size: 1_000,
# start: ::Project.with_active_services.service_desk_enabled.where(time_period).minimum(:id), # start: ::Project.with_active_services.service_desk_enabled.where(time_period).minimum(:id),
# finish: ::Project.with_active_services.service_desk_enabled.where(time_period).maximum(:id) # finish: ::Project.with_active_services.service_desk_enabled.where(time_period).maximum(:id)
...@@ -30,7 +30,6 @@ module Gitlab ...@@ -30,7 +30,6 @@ module Gitlab
# for the most of a cases this value is lower. However, if the exact value is necessary other tools has to be used. # for the most of a cases this value is lower. However, if the exact value is necessary other tools has to be used.
class BatchDistinctCounter class BatchDistinctCounter
ERROR_RATE = 4.9 # max encountered empirical error rate, used in tests ERROR_RATE = 4.9 # max encountered empirical error rate, used in tests
FALLBACK = -1
MIN_REQUIRED_BATCH_SIZE = 750 MIN_REQUIRED_BATCH_SIZE = 750
SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
MAX_DATA_VOLUME = 4_000_000_000 MAX_DATA_VOLUME = 4_000_000_000
...@@ -38,8 +37,10 @@ module Gitlab ...@@ -38,8 +37,10 @@ module Gitlab
# Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705 # Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
DEFAULT_BATCH_SIZE = 10_000 DEFAULT_BATCH_SIZE = 10_000
ZERO_OFFSET = 1
BUCKET_ID_MASK = (Buckets::TOTAL_BUCKETS - ZERO_OFFSET).to_s(2)
BIT_31_MASK = "B'0#{'1' * 31}'" BIT_31_MASK = "B'0#{'1' * 31}'"
BIT_9_MASK = "B'#{'0' * 23}#{'1' * 9}'" BIT_32_NORMALIZED_BUCKET_ID_MASK = "B'#{'0' * (32 - BUCKET_ID_MASK.size)}#{BUCKET_ID_MASK}'"
# @example source_query # @example source_query
# SELECT CAST(('X' || md5(CAST(%{column} as text))) as bit(32)) attr_hash_32_bits # SELECT CAST(('X' || md5(CAST(%{column} as text))) as bit(32)) attr_hash_32_bits
# FROM %{relation} # FROM %{relation}
...@@ -48,73 +49,58 @@ module Gitlab ...@@ -48,73 +49,58 @@ module Gitlab
# AND %{column} IS NOT NULL # AND %{column} IS NOT NULL
BUCKETED_DATA_SQL = <<~SQL BUCKETED_DATA_SQL = <<~SQL
WITH hashed_attributes AS (%{source_query}) WITH hashed_attributes AS (%{source_query})
SELECT (attr_hash_32_bits & #{BIT_9_MASK})::int AS bucket_num, SELECT (attr_hash_32_bits & #{BIT_32_NORMALIZED_BUCKET_ID_MASK})::int AS bucket_num,
(31 - floor(log(2, min((attr_hash_32_bits & #{BIT_31_MASK})::int))))::int as bucket_hash (31 - floor(log(2, min((attr_hash_32_bits & #{BIT_31_MASK})::int))))::int as bucket_hash
FROM hashed_attributes FROM hashed_attributes
GROUP BY 1 GROUP BY 1
SQL SQL
TOTAL_BUCKETS_NUMBER = 512 WRONG_CONFIGURATION_ERROR = Class.new(ActiveRecord::StatementInvalid)
def initialize(relation, column = nil) def initialize(relation, column = nil)
@relation = relation @relation = relation
@column = column || relation.primary_key @column = column || relation.primary_key
end end
def unwanted_configuration?(finish, batch_size, start) # Executes counter that iterates over database source and return Gitlab::Database::PostgresHll::Buckets
batch_size <= MIN_REQUIRED_BATCH_SIZE || # that can be used to estimation of number of uniq elements in analysed set
(finish - start) >= MAX_DATA_VOLUME || #
start > finish # @param batch_size maximal number of rows that will be analysed by single database query
end # @param start initial pkey range
# @param finish final pkey range
def estimate_distinct_count(batch_size: nil, start: nil, finish: nil) # @return [Gitlab::Database::PostgresHll::Buckets] HyperLogLog data structure instance that can estimate number of unique elements
def execute(batch_size: nil, start: nil, finish: nil)
raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open? raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open?
batch_size ||= DEFAULT_BATCH_SIZE batch_size ||= DEFAULT_BATCH_SIZE
start = actual_start(start) start = actual_start(start)
finish = actual_finish(finish) finish = actual_finish(finish)
raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0 raise WRONG_CONFIGURATION_ERROR if unwanted_configuration?(start, finish, batch_size)
return FALLBACK if unwanted_configuration?(finish, batch_size, start)
batch_start = start batch_start = start
hll_blob = {} hll_buckets = Buckets.new
while batch_start <= finish while batch_start <= finish
begin begin
hll_blob.merge!(hll_blob_for_batch(batch_start, batch_start + batch_size)) {|_key, old, new| new > old ? new : old } hll_buckets.merge_hash!(hll_buckets_for_batch(batch_start, batch_start + batch_size))
batch_start += batch_size batch_start += batch_size
end end
sleep(SLEEP_TIME_IN_SECONDS) sleep(SLEEP_TIME_IN_SECONDS)
end end
estimate_cardinality(hll_blob) hll_buckets
end end
private private
# arbitrary values that are present in #estimate_cardinality def unwanted_configuration?(start, finish, batch_size)
# are sourced from https://www.sisense.com/blog/hyperloglog-in-pure-sql/ batch_size <= MIN_REQUIRED_BATCH_SIZE ||
# article, they are not representing any entity and serves as tune value (finish - start) >= MAX_DATA_VOLUME ||
# for the whole equation start > finish || start < 0 || finish < 0
def estimate_cardinality(hll_blob)
num_zero_buckets = TOTAL_BUCKETS_NUMBER - hll_blob.size
num_uniques = (
((TOTAL_BUCKETS_NUMBER**2) * (0.7213 / (1 + 1.079 / TOTAL_BUCKETS_NUMBER))) /
(num_zero_buckets + hll_blob.values.sum { |bucket_hash| 2**(-1 * bucket_hash)} )
).to_i
if num_zero_buckets > 0 && num_uniques < 2.5 * TOTAL_BUCKETS_NUMBER
((0.7213 / (1 + 1.079 / TOTAL_BUCKETS_NUMBER)) * (TOTAL_BUCKETS_NUMBER *
Math.log2(TOTAL_BUCKETS_NUMBER.to_f / num_zero_buckets)))
else
num_uniques
end
end end
def hll_blob_for_batch(start, finish) def hll_buckets_for_batch(start, finish)
@relation @relation
.connection .connection
.execute(BUCKETED_DATA_SQL % { source_query: source_query(start, finish) }) .execute(BUCKETED_DATA_SQL % { source_query: source_query(start, finish) })
......
# frozen_string_literal: true
module Gitlab
module Database
module PostgresHll
# Bucket class represent data structure build with HyperLogLog algorithm
# that models data distribution in analysed set. This representation than can be used
# for following purposes
# 1. Estimating number of unique elements that this structure represents
# 2. Merging with other Buckets structure to later estimate number of unique elements in sum of two
# represented data sets
# 3. Serializing Buckets structure to json format, that can be stored in various persistence layers
#
# @example Usage
# ::Gitlab::Database::PostgresHll::Buckets.new(141 => 1, 56 => 1).estimated_distinct_count
# ::Gitlab::Database::PostgresHll::Buckets.new(141 => 1, 56 => 1).merge_hash!(141 => 1, 56 => 5).estimated_distinct_count
# ::Gitlab::Database::PostgresHll::Buckets.new(141 => 1, 56 => 1).to_json
# @note HyperLogLog is an PROBABILISTIC algorithm that ESTIMATES distinct count of given attribute value for supplied relation
# Like all probabilistic algorithm is has ERROR RATE margin, that can affect values,
# for given implementation no higher value was reported (https://gitlab.com/gitlab-org/gitlab/-/merge_requests/45673#accuracy-estimation) than 5.3%
# for the most of a cases this value is lower. However, if the exact value is necessary other tools has to be used.
class Buckets
TOTAL_BUCKETS = 512
def initialize(buckets = {})
@buckets = buckets
end
# Based on HyperLogLog structure estimates number of unique elements in analysed set.
#
# @return [Float] Estimate number of unique elements
def estimated_distinct_count
@estimated_distinct_count ||= estimate_cardinality
end
# Updates instance underlying HyperLogLog structure by merging it with other HyperLogLog structure
#
# @param other_buckets_hash hash with HyperLogLog structure representation
def merge_hash!(other_buckets_hash)
buckets.merge!(other_buckets_hash) {|_key, old, new| new > old ? new : old }
end
# Serialize instance underlying HyperLogLog structure to JSON format, that can be stored in various persistence layers
#
# @return [String] HyperLogLog data structure serialized to JSON
def to_json(_ = nil)
buckets.to_json
end
private
attr_accessor :buckets
# arbitrary values that are present in #estimate_cardinality
# are sourced from https://www.sisense.com/blog/hyperloglog-in-pure-sql/
# article, they are not representing any entity and serves as tune value
# for the whole equation
def estimate_cardinality
num_zero_buckets = TOTAL_BUCKETS - buckets.size
num_uniques = (
((TOTAL_BUCKETS**2) * (0.7213 / (1 + 1.079 / TOTAL_BUCKETS))) /
(num_zero_buckets + buckets.values.sum { |bucket_hash| 2**(-1 * bucket_hash)} )
).to_i
if num_zero_buckets > 0 && num_uniques < 2.5 * TOTAL_BUCKETS
((0.7213 / (1 + 1.079 / TOTAL_BUCKETS)) * (TOTAL_BUCKETS *
Math.log2(TOTAL_BUCKETS.to_f / num_zero_buckets)))
else
num_uniques
end
end
end
end
end
end
...@@ -61,7 +61,10 @@ module Gitlab ...@@ -61,7 +61,10 @@ module Gitlab
end end
def estimate_batch_distinct_count(relation, column = nil, batch_size: nil, start: nil, finish: nil) def estimate_batch_distinct_count(relation, column = nil, batch_size: nil, start: nil, finish: nil)
Gitlab::Database::PostgresHll::BatchDistinctCounter.new(relation, column).estimate_distinct_count(batch_size: batch_size, start: start, finish: finish) Gitlab::Database::PostgresHll::BatchDistinctCounter
.new(relation, column)
.execute(batch_size: batch_size, start: start, finish: finish)
.estimated_distinct_count
rescue ActiveRecord::StatementInvalid rescue ActiveRecord::StatementInvalid
FALLBACK FALLBACK
# catch all rescue should be removed as a part of feature flag rollout issue # catch all rescue should be removed as a part of feature flag rollout issue
......
...@@ -24,107 +24,48 @@ RSpec.describe Gitlab::Database::PostgresHll::BatchDistinctCounter do ...@@ -24,107 +24,48 @@ RSpec.describe Gitlab::Database::PostgresHll::BatchDistinctCounter do
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(in_transaction) allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(in_transaction)
end end
context 'different distribution of relation records' do
[10, 100, 100_000].each do |spread|
context "records are spread within #{spread}" do
before do
ids = (1..spread).to_a.sample(10)
create_list(:issue, 10).each_with_index do |issue, i|
issue.id = ids[i]
end
end
it 'counts table' do
expect(described_class.new(model).estimate_distinct_count).to be_within(error_rate).percent_of(10)
end
end
end
end
context 'unit test for different counting parameters' do context 'unit test for different counting parameters' do
before_all do before_all do
create_list(:issue, 3, author: user) create_list(:issue, 3, author: user)
create_list(:issue, 2, author: another_user) create_list(:issue, 2, author: another_user)
end end
describe '#estimate_distinct_count' do describe '#execute' do
it 'counts table' do it 'builds hll buckets' do
expect(described_class.new(model).estimate_distinct_count).to be_within(error_rate).percent_of(5) expect(described_class.new(model).execute).to be_an_instance_of(Gitlab::Database::PostgresHll::Buckets)
end
it 'counts with column field' do
expect(described_class.new(model, column).estimate_distinct_count).to be_within(error_rate).percent_of(2)
end
it 'counts with :id field' do
expect(described_class.new(model, :id).estimate_distinct_count).to be_within(error_rate).percent_of(5)
end
it 'counts with "id" field' do
expect(described_class.new(model, "id").estimate_distinct_count).to be_within(error_rate).percent_of(5)
end
it 'counts with table.column field' do
expect(described_class.new(model, "#{model.table_name}.#{column}").estimate_distinct_count).to be_within(error_rate).percent_of(2)
end
it 'counts with Arel column' do
expect(described_class.new(model, model.arel_table[column]).estimate_distinct_count).to be_within(error_rate).percent_of(2)
end
it 'counts over joined relations' do
expect(described_class.new(model.joins(:author), "users.email").estimate_distinct_count).to be_within(error_rate).percent_of(2)
end
it 'counts with :column field with batch_size of 50K' do
expect(described_class.new(model, column).estimate_distinct_count(batch_size: 50_000)).to be_within(error_rate).percent_of(2)
end
it 'will not count table with a batch size less than allowed' do
expect(described_class.new(model, column).estimate_distinct_count(batch_size: small_batch_size)).to eq(fallback)
end
it 'counts with different number of batches and aggregates total result' do
stub_const('Gitlab::Database::PostgresHll::BatchDistinctCounter::MIN_REQUIRED_BATCH_SIZE', 0)
[1, 2, 4, 5, 6].each { |i| expect(described_class.new(model).estimate_distinct_count(batch_size: i)).to be_within(error_rate).percent_of(5) }
end
it 'counts with a start and finish' do
expect(described_class.new(model, column).estimate_distinct_count(start: model.minimum(:id), finish: model.maximum(:id))).to be_within(error_rate).percent_of(2)
end end
it "defaults the batch size to #{Gitlab::Database::PostgresHll::BatchDistinctCounter::DEFAULT_BATCH_SIZE}" do it "defaults batch size to #{Gitlab::Database::PostgresHll::BatchDistinctCounter::DEFAULT_BATCH_SIZE}" do
min_id = model.minimum(:id) min_id = model.minimum(:id)
batch_end_id = min_id + calculate_batch_size(Gitlab::Database::PostgresHll::BatchDistinctCounter::DEFAULT_BATCH_SIZE) batch_end_id = min_id + calculate_batch_size(Gitlab::Database::PostgresHll::BatchDistinctCounter::DEFAULT_BATCH_SIZE)
expect(model).to receive(:where).with("id" => min_id..batch_end_id).and_call_original expect(model).to receive(:where).with("id" => min_id..batch_end_id).and_call_original
described_class.new(model).estimate_distinct_count described_class.new(model).execute
end end
context 'when a transaction is open' do context 'when a transaction is open' do
let(:in_transaction) { true } let(:in_transaction) { true }
it 'raises an error' do it 'raises an error' do
expect { described_class.new(model, column).estimate_distinct_count }.to raise_error('BatchCount can not be run inside a transaction') expect { described_class.new(model, column).execute }.to raise_error('BatchCount can not be run inside a transaction')
end end
end end
context 'disallowed configurations' do context 'disallowed configurations' do
let(:default_batch_size) { Gitlab::Database::PostgresHll::BatchDistinctCounter::DEFAULT_BATCH_SIZE } let(:default_batch_size) { Gitlab::Database::PostgresHll::BatchDistinctCounter::DEFAULT_BATCH_SIZE }
it 'returns fallback if start is bigger than finish' do it 'raises WRONG_CONFIGURATION_ERROR if start is bigger than finish' do
expect(described_class.new(model, column).estimate_distinct_count(start: 1, finish: 0)).to eq(fallback) expect { described_class.new(model, column).execute(start: 1, finish: 0) }.to raise_error(described_class::WRONG_CONFIGURATION_ERROR)
end end
it 'returns fallback if data volume exceeds upper limit' do it 'raises WRONG_CONFIGURATION_ERROR if data volume exceeds upper limit' do
large_finish = Gitlab::Database::PostgresHll::BatchDistinctCounter::MAX_DATA_VOLUME + 1 large_finish = Gitlab::Database::PostgresHll::BatchDistinctCounter::MAX_DATA_VOLUME + 1
expect(described_class.new(model, column).estimate_distinct_count(start: 1, finish: large_finish)).to eq(fallback) expect { described_class.new(model, column).execute(start: 1, finish: large_finish) }.to raise_error(described_class::WRONG_CONFIGURATION_ERROR)
end end
it 'returns fallback if batch size is less than min required' do it 'raises WRONG_CONFIGURATION_ERROR if batch size is less than min required' do
expect(described_class.new(model, column).estimate_distinct_count(batch_size: small_batch_size)).to eq(fallback) expect { described_class.new(model, column).execute(batch_size: small_batch_size) }.to raise_error(described_class::WRONG_CONFIGURATION_ERROR)
end end
end end
end end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::PostgresHll::Buckets do
let(:error_rate) { Gitlab::Database::PostgresHll::BatchDistinctCounter::ERROR_RATE } # HyperLogLog is a probabilistic algorithm, which provides estimated data, with given error margin
let(:buckets_hash_5) { { 121 => 2, 126 => 1, 141 => 1, 383 => 1, 56 => 1 } }
let(:buckets_hash_2) { { 141 => 1, 56 => 1 } }
describe '#estimated_distinct_count' do
it 'provides estimated cardinality', :aggregate_failures do
expect(described_class.new(buckets_hash_5).estimated_distinct_count).to be_within(error_rate).percent_of(5)
expect(described_class.new(buckets_hash_2).estimated_distinct_count).to be_within(error_rate).percent_of(2)
expect(described_class.new({}).estimated_distinct_count).to eq 0
expect(described_class.new.estimated_distinct_count).to eq 0
end
end
describe '#merge_hash!' do
let(:hash_a) { { 1 => 1, 2 => 3 } }
let(:hash_b) { { 1 => 2, 2 => 1 } }
it 'merges two hashes together into union of two sets' do
expect(described_class.new(hash_a).merge_hash!(hash_b).to_json).to eq described_class.new(1 => 2, 2 => 3).to_json
end
end
describe '#to_json' do
it 'serialize HyperLogLog buckets as hash' do
expect(described_class.new(1 => 5).to_json).to eq '{"1":5}'
end
end
end
...@@ -38,32 +38,123 @@ RSpec.describe Gitlab::Utils::UsageData do ...@@ -38,32 +38,123 @@ RSpec.describe Gitlab::Utils::UsageData do
end end
describe '#estimate_batch_distinct_count' do describe '#estimate_batch_distinct_count' do
let(:error_rate) { Gitlab::Database::PostgresHll::BatchDistinctCounter::ERROR_RATE } # HyperLogLog is a probabilistic algorithm, which provides estimated data, with given error margin
let(:relation) { double(:relation) } let(:relation) { double(:relation) }
before do
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false)
end
it 'delegates counting to counter class instance' do it 'delegates counting to counter class instance' do
buckets = instance_double(Gitlab::Database::PostgresHll::Buckets)
expect_next_instance_of(Gitlab::Database::PostgresHll::BatchDistinctCounter, relation, 'column') do |instance| expect_next_instance_of(Gitlab::Database::PostgresHll::BatchDistinctCounter, relation, 'column') do |instance|
expect(instance).to receive(:estimate_distinct_count) expect(instance).to receive(:execute)
.with(batch_size: nil, start: nil, finish: nil) .with(batch_size: nil, start: nil, finish: nil)
.and_return(5) .and_return(buckets)
end end
expect(buckets).to receive(:estimated_distinct_count).and_return(5)
expect(described_class.estimate_batch_distinct_count(relation, 'column')).to eq(5) expect(described_class.estimate_batch_distinct_count(relation, 'column')).to eq(5)
end end
it 'returns default fallback value when counting fails due to database error' do context 'quasi integration test for different counting parameters' do
stub_const("Gitlab::Utils::UsageData::FALLBACK", 15) let_it_be(:user) { create(:user, email: 'email1@domain.com') }
allow(Gitlab::Database::PostgresHll::BatchDistinctCounter).to receive(:new).and_raise(ActiveRecord::StatementInvalid.new('')) let_it_be(:another_user) { create(:user, email: 'email2@domain.com') }
let(:model) { Issue }
let(:column) { :author_id }
context 'different distribution of relation records' do
[10, 100, 100_000].each do |spread|
context "records are spread within #{spread}" do
before do
ids = (1..spread).to_a.sample(10)
create_list(:issue, 10).each_with_index do |issue, i|
issue.id = ids[i]
end
end
it 'counts table' do
expect(described_class.estimate_batch_distinct_count(model)).to be_within(error_rate).percent_of(10)
end
end
end
end
context 'different counting parameters' do
before_all do
create_list(:issue, 3, author: user)
create_list(:issue, 2, author: another_user)
end
it 'counts table' do
expect(described_class.estimate_batch_distinct_count(model)).to be_within(error_rate).percent_of(5)
end
it 'counts with column field' do
expect(described_class.estimate_batch_distinct_count(model, column)).to be_within(error_rate).percent_of(2)
end
it 'counts with :id field' do
expect(described_class.estimate_batch_distinct_count(model, :id)).to be_within(error_rate).percent_of(5)
end
it 'counts with "id" field' do
expect(described_class.estimate_batch_distinct_count(model, "id")).to be_within(error_rate).percent_of(5)
end
it 'counts with table.column field' do
expect(described_class.estimate_batch_distinct_count(model, "#{model.table_name}.#{column}")).to be_within(error_rate).percent_of(2)
end
it 'counts with Arel column' do
expect(described_class.estimate_batch_distinct_count(model, model.arel_table[column])).to be_within(error_rate).percent_of(2)
end
it 'counts over joined relations' do
expect(described_class.estimate_batch_distinct_count(model.joins(:author), "users.email")).to be_within(error_rate).percent_of(2)
end
it 'counts with :column field with batch_size of 50K' do
expect(described_class.estimate_batch_distinct_count(model, column, batch_size: 50_000)).to be_within(error_rate).percent_of(2)
end
it 'counts with different number of batches and aggregates total result' do
stub_const('Gitlab::Database::PostgresHll::BatchDistinctCounter::MIN_REQUIRED_BATCH_SIZE', 0)
[1, 2, 4, 5, 6].each { |i| expect(described_class.estimate_batch_distinct_count(model, batch_size: i)).to be_within(error_rate).percent_of(5) }
end
expect(described_class.estimate_batch_distinct_count(relation)).to eq(15) it 'counts with a start and finish' do
expect(described_class.estimate_batch_distinct_count(model, column, start: model.minimum(:id), finish: model.maximum(:id))).to be_within(error_rate).percent_of(2)
end
end
end end
it 'logs error and returns DISTRIBUTED_HLL_FALLBACK value when counting raises any error', :aggregate_failures do describe 'error handling' do
error = StandardError.new('') before do
stub_const("Gitlab::Utils::UsageData::DISTRIBUTED_HLL_FALLBACK", 15) stub_const("Gitlab::Utils::UsageData::FALLBACK", 3)
allow(Gitlab::Database::PostgresHll::BatchDistinctCounter).to receive(:new).and_raise(error) stub_const("Gitlab::Utils::UsageData::DISTRIBUTED_HLL_FALLBACK", 4)
end
it 'returns fallback if counter raises WRONG_CONFIGURATION_ERROR' do
expect(described_class.estimate_batch_distinct_count(relation, 'id', start: 1, finish: 0)).to eq 3
end
expect(Gitlab::ErrorTracking).to receive(:track_and_raise_for_dev_exception).with(error) it 'returns default fallback value when counting fails due to database error' do
expect(described_class.estimate_batch_distinct_count(relation)).to eq(15) allow(Gitlab::Database::PostgresHll::BatchDistinctCounter).to receive(:new).and_raise(ActiveRecord::StatementInvalid.new(''))
expect(described_class.estimate_batch_distinct_count(relation)).to eq(3)
end
it 'logs error and returns DISTRIBUTED_HLL_FALLBACK value when counting raises any error', :aggregate_failures do
error = StandardError.new('')
allow(Gitlab::Database::PostgresHll::BatchDistinctCounter).to receive(:new).and_raise(error)
expect(Gitlab::ErrorTracking).to receive(:track_and_raise_for_dev_exception).with(error)
expect(described_class.estimate_batch_distinct_count(relation)).to eq(4)
end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment