Commit f048036e authored by Bob Van Landuyt's avatar Bob Van Landuyt

Merge branch 'mwaw/use_postgres_hll_batch_counting_in_usage_ping' into 'master'

Apply Postgres HLL distributed batch counter to Usage Ping secure pipelines metrics

See merge request gitlab-org/gitlab!48233
parents 91fd7b72 7ead5a08
---
name: postgres_hll_batch_counting
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/48233
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/285485
milestone: '13.7'
type: development
group: group::product analytics
default_enabled: false
......@@ -396,9 +396,36 @@ module EE
def count_secure_pipelines(time_period)
return {} if time_period.blank?
pipelines_with_secure_jobs = {}
# HLL batch counting always iterate over pkey of
# given relation, while ordinary batch count
# iterated over counted attribute, one-to-many joins
# can break batch size limitation, and lead to
# time outing batch queries, to avoid that
# different join strategy is used for HLL counter
if ::Feature.enabled?(:postgres_hll_batch_counting)
relation = ::Security::Scan.where(time_period).group(:created_at)
start = relation.select('MIN(id) as min_id').order('min_id ASC').first&.min_id
finish = relation.select('MAX(id) as max_id').order('max_id DESC').first&.max_id
::Security::Scan.scan_types.each do |name, scan_type|
relation = ::Security::Scan.joins(:build)
.where(ci_builds: { status: 'success', retried: [nil, false] })
.where('security_scans.scan_type = ?', scan_type)
.where(security_scans: time_period)
pipelines_with_secure_jobs["#{name}_pipeline".to_sym] =
if start && finish
estimate_batch_distinct_count(relation, :commit_id, batch_size: 1000, start: start, finish: finish)
else
0
end
end
else
start = ::Ci::Pipeline.minimum(:id)
finish = ::Ci::Pipeline.maximum(:id)
pipelines_with_secure_jobs = {}
::Security::Scan.scan_types.each do |name, scan_type|
relation = ::Ci::Build.joins(:security_scans)
......@@ -407,6 +434,7 @@ module EE
.where(time_period)
pipelines_with_secure_jobs["#{name}_pipeline".to_sym] = distinct_count(relation, :commit_id, start: start, finish: finish, batch: false)
end
end
pipelines_with_secure_jobs
end
......
......@@ -504,6 +504,7 @@ RSpec.describe Gitlab::UsageData do
end
describe 'usage_activity_by_stage_secure' do
let_it_be(:error_rate) { Gitlab::Database::PostgresHllBatchDistinctCounter::ERROR_RATE }
let_it_be(:days_ago_within_monthly_time_period) { 3.days.ago }
let_it_be(:user) { create(:user, group_view: :security_dashboard, created_at: days_ago_within_monthly_time_period) }
let_it_be(:user2) { create(:user, group_view: :security_dashboard, created_at: days_ago_within_monthly_time_period) }
......@@ -523,6 +524,212 @@ RSpec.describe Gitlab::UsageData do
end
end
it 'includes accurate usage_activity_by_stage data' do
expect(described_class.usage_activity_by_stage_secure(described_class.last_28_days_time_period)).to include(
user_preferences_group_overview_security_dashboard: 3,
user_container_scanning_jobs: 1,
user_api_fuzzing_jobs: 1,
user_api_fuzzing_dnd_jobs: 1,
user_coverage_fuzzing_jobs: 1,
user_dast_jobs: 1,
user_dependency_scanning_jobs: 1,
user_license_management_jobs: 1,
user_sast_jobs: 1,
user_secret_detection_jobs: 1,
sast_pipeline: be_within(error_rate).percent_of(0),
sast_scans: 0,
dependency_scanning_pipeline: be_within(error_rate).percent_of(0),
dependency_scanning_scans: 0,
container_scanning_pipeline: be_within(error_rate).percent_of(0),
container_scanning_scans: 0,
dast_pipeline: be_within(error_rate).percent_of(0),
dast_scans: 0,
secret_detection_pipeline: be_within(error_rate).percent_of(0),
secret_detection_scans: 0,
coverage_fuzzing_pipeline: be_within(error_rate).percent_of(0),
coverage_fuzzing_scans: 0,
api_fuzzing_pipeline: be_within(error_rate).percent_of(0),
api_fuzzing_scans: 0,
user_unique_users_all_secure_scanners: 1
)
end
it 'counts pipelines that have security jobs' do
for_defined_days_back do
ds_build = create(:ci_build, name: 'retirejs', user: user, status: 'success')
ds_bundler_audit_build = create(:ci_build, :failed, user: user, name: 'retirejs')
ds_bundler_build = create(:ci_build, name: 'bundler-audit', user: user, commit_id: ds_build.pipeline.id, status: 'success')
secret_detection_build = create(:ci_build, name: 'secret', user: user, commit_id: ds_build.pipeline.id, status: 'success')
cs_build = create(:ci_build, name: 'klar', user: user, status: 'success')
sast_build = create(:ci_build, name: 'sast', user: user, status: 'success', retried: true)
create(:security_scan, build: ds_build, scan_type: 'dependency_scanning' )
create(:security_scan, build: ds_bundler_build, scan_type: 'dependency_scanning')
create(:security_scan, build: secret_detection_build, scan_type: 'secret_detection')
create(:security_scan, build: cs_build, scan_type: 'container_scanning')
create(:security_scan, build: sast_build, scan_type: 'sast')
create(:security_scan, build: ds_bundler_audit_build, scan_type: 'dependency_scanning')
end
expect(described_class.usage_activity_by_stage_secure({})).to include(
user_preferences_group_overview_security_dashboard: 3,
user_container_scanning_jobs: 1,
user_dast_jobs: 1,
user_dependency_scanning_jobs: 1,
user_license_management_jobs: 1,
user_sast_jobs: 1,
user_secret_detection_jobs: 1,
user_unique_users_all_secure_scanners: 1,
sast_scans: 0,
dependency_scanning_scans: 4,
container_scanning_scans: 2,
dast_scans: 0,
secret_detection_scans: 2,
coverage_fuzzing_scans: 0
)
expect(described_class.usage_activity_by_stage_secure(described_class.last_28_days_time_period)).to include(
user_preferences_group_overview_security_dashboard: 3,
user_api_fuzzing_jobs: 1,
user_api_fuzzing_dnd_jobs: 1,
user_container_scanning_jobs: 1,
user_coverage_fuzzing_jobs: 1,
user_dast_jobs: 1,
user_dependency_scanning_jobs: 1,
user_license_management_jobs: 1,
user_sast_jobs: 1,
user_secret_detection_jobs: 1,
sast_pipeline: be_within(error_rate).percent_of(0),
dependency_scanning_pipeline: be_within(error_rate).percent_of(1),
container_scanning_pipeline: be_within(error_rate).percent_of(1),
dast_pipeline: be_within(error_rate).percent_of(0),
secret_detection_pipeline: be_within(error_rate).percent_of(1),
coverage_fuzzing_pipeline: be_within(error_rate).percent_of(0),
api_fuzzing_pipeline: be_within(error_rate).percent_of(0),
user_unique_users_all_secure_scanners: 1,
sast_scans: 0,
dependency_scanning_scans: 2,
container_scanning_scans: 1,
dast_scans: 0,
secret_detection_scans: 1,
coverage_fuzzing_scans: 0,
api_fuzzing_scans: 0
)
end
it 'counts unique users correctly across multiple scanners' do
for_defined_days_back do
create(:ci_build, name: 'sast', user: user2)
create(:ci_build, name: 'dast', user: user2)
create(:ci_build, name: 'dast', user: user3)
end
expect(described_class.usage_activity_by_stage_secure(described_class.last_28_days_time_period)).to include(
user_preferences_group_overview_security_dashboard: 3,
user_api_fuzzing_jobs: 1,
user_api_fuzzing_dnd_jobs: 1,
user_container_scanning_jobs: 1,
user_coverage_fuzzing_jobs: 1,
user_dast_jobs: 3,
user_dependency_scanning_jobs: 1,
user_license_management_jobs: 1,
user_sast_jobs: 2,
user_secret_detection_jobs: 1,
sast_pipeline: be_within(error_rate).percent_of(0),
sast_scans: 0,
dependency_scanning_pipeline: be_within(error_rate).percent_of(0),
dependency_scanning_scans: 0,
container_scanning_pipeline: be_within(error_rate).percent_of(0),
container_scanning_scans: 0,
dast_pipeline: be_within(error_rate).percent_of(0),
dast_scans: 0,
secret_detection_pipeline: be_within(error_rate).percent_of(0),
secret_detection_scans: 0,
coverage_fuzzing_pipeline: be_within(error_rate).percent_of(0),
coverage_fuzzing_scans: 0,
api_fuzzing_pipeline: be_within(error_rate).percent_of(0),
api_fuzzing_scans: 0,
user_unique_users_all_secure_scanners: 3
)
end
it 'combines license_scanning into license_management' do
for_defined_days_back do
create(:ci_build, name: 'license_scanning', user: user)
end
expect(described_class.usage_activity_by_stage_secure(described_class.last_28_days_time_period)).to include(
user_preferences_group_overview_security_dashboard: 3,
user_api_fuzzing_jobs: 1,
user_api_fuzzing_dnd_jobs: 1,
user_container_scanning_jobs: 1,
user_coverage_fuzzing_jobs: 1,
user_dast_jobs: 1,
user_dependency_scanning_jobs: 1,
user_license_management_jobs: 2,
user_sast_jobs: 1,
user_secret_detection_jobs: 1,
sast_pipeline: be_within(error_rate).percent_of(0),
sast_scans: 0,
dependency_scanning_pipeline: be_within(error_rate).percent_of(0),
dependency_scanning_scans: 0,
container_scanning_pipeline: be_within(error_rate).percent_of(0),
container_scanning_scans: 0,
dast_pipeline: be_within(error_rate).percent_of(0),
dast_scans: 0,
secret_detection_pipeline: be_within(error_rate).percent_of(0),
secret_detection_scans: 0,
coverage_fuzzing_pipeline: be_within(error_rate).percent_of(0),
coverage_fuzzing_scans: 0,
api_fuzzing_pipeline: be_within(error_rate).percent_of(0),
api_fuzzing_scans: 0,
user_unique_users_all_secure_scanners: 1
)
end
it 'has to resort to 0 for counting license scan' do
for_defined_days_back do
create(:security_scan)
end
allow(Gitlab::Database::BatchCount).to receive(:batch_distinct_count).and_raise(ActiveRecord::StatementInvalid)
allow(Gitlab::Database::BatchCount).to receive(:batch_count).and_raise(ActiveRecord::StatementInvalid)
allow(Gitlab::Database::PostgresHllBatchDistinctCounter).to receive(:new).and_raise(ActiveRecord::StatementInvalid)
allow(::Ci::Build).to receive(:distinct_count_by).and_raise(ActiveRecord::StatementInvalid)
expect(described_class.usage_activity_by_stage_secure(described_class.last_28_days_time_period)).to include(
user_preferences_group_overview_security_dashboard: -1,
user_api_fuzzing_jobs: -1,
user_api_fuzzing_dnd_jobs: -1,
user_container_scanning_jobs: -1,
user_coverage_fuzzing_jobs: -1,
user_dast_jobs: -1,
user_dependency_scanning_jobs: -1,
user_license_management_jobs: -1,
user_sast_jobs: -1,
user_secret_detection_jobs: -1,
sast_pipeline: -1,
sast_scans: -1,
dependency_scanning_pipeline: -1,
dependency_scanning_scans: -1,
container_scanning_pipeline: -1,
container_scanning_scans: -1,
dast_pipeline: -1,
dast_scans: -1,
secret_detection_pipeline: -1,
secret_detection_scans: -1,
coverage_fuzzing_pipeline: -1,
coverage_fuzzing_scans: -1,
api_fuzzing_pipeline: -1,
api_fuzzing_scans: -1,
user_unique_users_all_secure_scanners: -1
)
end
context 'with feature flag: postgres_hll_batch_counting is disabled' do
before do
stub_feature_flags(postgres_hll_batch_counting: false)
end
it 'includes accurate usage_activity_by_stage data' do
expect(described_class.usage_activity_by_stage_secure(described_class.last_28_days_time_period)).to eq(
user_preferences_group_overview_security_dashboard: 3,
......@@ -719,6 +926,7 @@ RSpec.describe Gitlab::UsageData do
)
end
end
end
describe 'usage_activity_by_stage_verify' do
it 'includes accurate usage_activity_by_stage data' do
......
......@@ -28,13 +28,14 @@ module Gitlab
# for given implementation no higher value was reported (https://gitlab.com/gitlab-org/gitlab/-/merge_requests/45673#accuracy-estimation) than 5.3%
# for the most of a cases this value is lower. However, if the exact value is necessary other tools has to be used.
class PostgresHllBatchDistinctCounter
ERROR_RATE = 4.9 # max encountered empirical error rate, used in tests
FALLBACK = -1
MIN_REQUIRED_BATCH_SIZE = 1_250
MAX_ALLOWED_LOOPS = 10_000
MIN_REQUIRED_BATCH_SIZE = 750
SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
MAX_DATA_VOLUME = 4_000_000_000
# Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
DEFAULT_BATCH_SIZE = 100_000
DEFAULT_BATCH_SIZE = 10_000
BIT_31_MASK = "B'0#{'1' * 31}'"
BIT_9_MASK = "B'#{'0' * 23}#{'1' * 9}'"
......@@ -49,7 +50,7 @@ module Gitlab
SELECT (attr_hash_32_bits & #{BIT_9_MASK})::int AS bucket_num,
(31 - floor(log(2, min((attr_hash_32_bits & #{BIT_31_MASK})::int))))::int as bucket_hash
FROM hashed_attributes
GROUP BY 1 ORDER BY 1
GROUP BY 1
SQL
TOTAL_BUCKETS_NUMBER = 512
......@@ -61,7 +62,7 @@ module Gitlab
def unwanted_configuration?(finish, batch_size, start)
batch_size <= MIN_REQUIRED_BATCH_SIZE ||
(finish - start) / batch_size >= MAX_ALLOWED_LOOPS ||
(finish - start) >= MAX_DATA_VOLUME ||
start > finish
end
......@@ -101,7 +102,7 @@ module Gitlab
num_uniques = (
((TOTAL_BUCKETS_NUMBER**2) * (0.7213 / (1 + 1.079 / TOTAL_BUCKETS_NUMBER))) /
(num_zero_buckets + hll_blob.values.sum { |bucket_hash, _| 2**(-1 * bucket_hash)} )
(num_zero_buckets + hll_blob.values.sum { |bucket_hash| 2**(-1 * bucket_hash)} )
).to_i
if num_zero_buckets > 0 && num_uniques < 2.5 * TOTAL_BUCKETS_NUMBER
......
......@@ -25,6 +25,13 @@ module Gitlab
relation.select(relation.all.table[column].sum).to_sql
end
# For estimated distinct count use exact query instead of hll
# buckets query, because it can't be used to obtain estimations without
# supplementary ruby code present in Gitlab::Database::PostgresHllBatchDistinctCounter
def estimate_batch_distinct_count(relation, column = nil, *rest)
raw_sql(relation, column, :distinct)
end
private
def raw_sql(relation, column, distinct = nil)
......
......@@ -38,6 +38,7 @@ module Gitlab
extend self
FALLBACK = -1
DISTRIBUTED_HLL_FALLBACK = -2
def count(relation, column = nil, batch: true, batch_size: nil, start: nil, finish: nil)
if batch
......@@ -59,6 +60,17 @@ module Gitlab
FALLBACK
end
def estimate_batch_distinct_count(relation, column = nil, batch_size: nil, start: nil, finish: nil)
Gitlab::Database::PostgresHllBatchDistinctCounter.new(relation, column).estimate_distinct_count(batch_size: batch_size, start: start, finish: finish)
rescue ActiveRecord::StatementInvalid
FALLBACK
# catch all rescue should be removed as a part of feature flag rollout issue
# https://gitlab.com/gitlab-org/gitlab/-/issues/285485
rescue StandardError => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
DISTRIBUTED_HLL_FALLBACK
end
def sum(relation, column, batch_size: nil, start: nil, finish: nil)
Gitlab::Database::BatchCount.batch_sum(relation, column, batch_size: batch_size, start: start, finish: finish)
rescue ActiveRecord::StatementInvalid
......
......@@ -3,9 +3,9 @@
require 'spec_helper'
RSpec.describe Gitlab::Database::PostgresHllBatchDistinctCounter do
let_it_be(:error_rate) { 4.9 } # HyperLogLog is a probabilistic algorithm, which provides estimated data, with given error margin
let_it_be(:error_rate) { described_class::ERROR_RATE } # HyperLogLog is a probabilistic algorithm, which provides estimated data, with given error margin
let_it_be(:fallback) { ::Gitlab::Database::BatchCounter::FALLBACK }
let_it_be(:small_batch_size) { calculate_batch_size(::Gitlab::Database::BatchCounter::MIN_REQUIRED_BATCH_SIZE) }
let_it_be(:small_batch_size) { calculate_batch_size(described_class::MIN_REQUIRED_BATCH_SIZE) }
let(:model) { Issue }
let(:column) { :author_id }
......@@ -118,8 +118,8 @@ RSpec.describe Gitlab::Database::PostgresHllBatchDistinctCounter do
expect(described_class.new(model, column).estimate_distinct_count(start: 1, finish: 0)).to eq(fallback)
end
it 'returns fallback if loops more than allowed' do
large_finish = Gitlab::Database::PostgresHllBatchDistinctCounter::MAX_ALLOWED_LOOPS * default_batch_size + 1
it 'returns fallback if data volume exceeds upper limit' do
large_finish = Gitlab::Database::PostgresHllBatchDistinctCounter::MAX_DATA_VOLUME + 1
expect(described_class.new(model, column).estimate_distinct_count(start: 1, finish: large_finish)).to eq(fallback)
end
......
......@@ -37,6 +37,36 @@ RSpec.describe Gitlab::Utils::UsageData do
end
end
describe '#estimate_batch_distinct_count' do
let(:relation) { double(:relation) }
it 'delegates counting to counter class instance' do
expect_next_instance_of(Gitlab::Database::PostgresHllBatchDistinctCounter, relation, 'column') do |instance|
expect(instance).to receive(:estimate_distinct_count)
.with(batch_size: nil, start: nil, finish: nil)
.and_return(5)
end
expect(described_class.estimate_batch_distinct_count(relation, 'column')).to eq(5)
end
it 'returns default fallback value when counting fails due to database error' do
stub_const("Gitlab::Utils::UsageData::FALLBACK", 15)
allow(Gitlab::Database::PostgresHllBatchDistinctCounter).to receive(:new).and_raise(ActiveRecord::StatementInvalid.new(''))
expect(described_class.estimate_batch_distinct_count(relation)).to eq(15)
end
it 'logs error and returns DISTRIBUTED_HLL_FALLBACK value when counting raises any error', :aggregate_failures do
error = StandardError.new('')
stub_const("Gitlab::Utils::UsageData::DISTRIBUTED_HLL_FALLBACK", 15)
allow(Gitlab::Database::PostgresHllBatchDistinctCounter).to receive(:new).and_raise(error)
expect(Gitlab::ErrorTracking).to receive(:track_and_raise_for_dev_exception).with(error)
expect(described_class.estimate_batch_distinct_count(relation)).to eq(15)
end
end
describe '#sum' do
let(:relation) { double(:relation) }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment