Commit 9564bbb5 authored by Alper Akgun's avatar Alper Akgun

Merge branch...

Merge branch '299325-extend-aggregated-metrics-definition-to-include-database-metrics' into 'master'

Extend aggregated metrics definition to include database metrics [RUN ALL RSPEC] [RUN AS-IF-FOSS]

See merge request gitlab-org/gitlab!52784
parents 02acc3f6 5e319f52
---
name: database_sourced_aggregated_metrics
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/52784
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/300411
milestone: '13.9'
type: development
group: group::product intelligence
default_enabled: false
...@@ -451,10 +451,12 @@ module EE ...@@ -451,10 +451,12 @@ module EE
pipelines_with_secure_jobs[metric_name.to_sym] = pipelines_with_secure_jobs[metric_name.to_sym] =
if start_id && finish_id if start_id && finish_id
estimate_batch_distinct_count(relation, :commit_id, batch_size: 1000, start: start_id, finish: finish_id) do |result| estimate_batch_distinct_count(relation, :commit_id, batch_size: 1000, start: start_id, finish: finish_id) do |result|
save_aggregated_metrics(**aggregated_metrics_params.merge({ data: result })) ::Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll
.save_aggregated_metrics(**aggregated_metrics_params.merge({ data: result }))
end end
else else
save_aggregated_metrics(**aggregated_metrics_params.merge({ data: ::Gitlab::Database::PostgresHll::Buckets.new })) ::Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll
.save_aggregated_metrics(**aggregated_metrics_params.merge({ data: ::Gitlab::Database::PostgresHll::Buckets.new }))
0 0
end end
end end
......
...@@ -8,16 +8,26 @@ module Gitlab ...@@ -8,16 +8,26 @@ module Gitlab
INTERSECTION_OF_AGGREGATED_METRICS = 'AND' INTERSECTION_OF_AGGREGATED_METRICS = 'AND'
ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS, INTERSECTION_OF_AGGREGATED_METRICS].freeze ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS, INTERSECTION_OF_AGGREGATED_METRICS].freeze
AGGREGATED_METRICS_PATH = Rails.root.join('lib/gitlab/usage_data_counters/aggregated_metrics/*.yml') AGGREGATED_METRICS_PATH = Rails.root.join('lib/gitlab/usage_data_counters/aggregated_metrics/*.yml')
UnknownAggregationOperator = Class.new(StandardError) AggregatedMetricError = Class.new(StandardError)
UnknownAggregationOperator = Class.new(AggregatedMetricError)
UnknownAggregationSource = Class.new(AggregatedMetricError)
DATABASE_SOURCE = 'database'
REDIS_SOURCE = 'redis'
SOURCES = {
DATABASE_SOURCE => Sources::PostgresHll,
REDIS_SOURCE => Sources::RedisHll
}.freeze
class Aggregate class Aggregate
delegate :calculate_events_union, delegate :weekly_time_range,
:weekly_time_range,
:monthly_time_range, :monthly_time_range,
to: Gitlab::UsageDataCounters::HLLRedisCounter to: Gitlab::UsageDataCounters::HLLRedisCounter
def initialize def initialize(recorded_at)
@aggregated_metrics = load_events(AGGREGATED_METRICS_PATH) @aggregated_metrics = load_metrics(AGGREGATED_METRICS_PATH)
@recorded_at = recorded_at
end end
def monthly_data def monthly_data
...@@ -30,35 +40,49 @@ module Gitlab ...@@ -30,35 +40,49 @@ module Gitlab
private private
attr_accessor :aggregated_metrics attr_accessor :aggregated_metrics, :recorded_at
def aggregated_metrics_data(start_date:, end_date:) def aggregated_metrics_data(start_date:, end_date:)
aggregated_metrics.each_with_object({}) do |aggregation, weekly_data| aggregated_metrics.each_with_object({}) do |aggregation, data|
next if aggregation[:feature_flag] && Feature.disabled?(aggregation[:feature_flag], default_enabled: false, type: :development) next if aggregation[:feature_flag] && Feature.disabled?(aggregation[:feature_flag], default_enabled: false, type: :development)
weekly_data[aggregation[:name]] = calculate_count_for_aggregation(aggregation, start_date: start_date, end_date: end_date) case aggregation[:source]
when REDIS_SOURCE
data[aggregation[:name]] = calculate_count_for_aggregation(aggregation: aggregation, start_date: start_date, end_date: end_date)
when DATABASE_SOURCE
next unless Feature.enabled?('database_sourced_aggregated_metrics', default_enabled: false, type: :development)
data[aggregation[:name]] = calculate_count_for_aggregation(aggregation: aggregation, start_date: start_date, end_date: end_date)
else
Gitlab::ErrorTracking
.track_and_raise_for_dev_exception(UnknownAggregationSource.new("Aggregation source: '#{aggregation[:source]}' must be included in #{SOURCES.keys}"))
data[aggregation[:name]] = Gitlab::Utils::UsageData::FALLBACK
end
end end
end end
def calculate_count_for_aggregation(aggregation, start_date:, end_date:) def calculate_count_for_aggregation(aggregation:, start_date:, end_date:)
source = SOURCES[aggregation[:source]]
case aggregation[:operator] case aggregation[:operator]
when UNION_OF_AGGREGATED_METRICS when UNION_OF_AGGREGATED_METRICS
calculate_events_union(event_names: aggregation[:events], start_date: start_date, end_date: end_date) source.calculate_metrics_union(metric_names: aggregation[:events], start_date: start_date, end_date: end_date, recorded_at: recorded_at)
when INTERSECTION_OF_AGGREGATED_METRICS when INTERSECTION_OF_AGGREGATED_METRICS
calculate_events_intersections(event_names: aggregation[:events], start_date: start_date, end_date: end_date) calculate_metrics_intersections(source: source, metric_names: aggregation[:events], start_date: start_date, end_date: end_date)
else else
Gitlab::ErrorTracking Gitlab::ErrorTracking
.track_and_raise_for_dev_exception(UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}")) .track_and_raise_for_dev_exception(UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}"))
Gitlab::Utils::UsageData::FALLBACK Gitlab::Utils::UsageData::FALLBACK
end end
rescue Gitlab::UsageDataCounters::HLLRedisCounter::EventError => error rescue Gitlab::UsageDataCounters::HLLRedisCounter::EventError, AggregatedMetricError => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error) Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
Gitlab::Utils::UsageData::FALLBACK Gitlab::Utils::UsageData::FALLBACK
end end
# calculate intersection of 'n' sets based on inclusion exclusion principle https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle # calculate intersection of 'n' sets based on inclusion exclusion principle https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle
# this method will be extracted to dedicated module with https://gitlab.com/gitlab-org/gitlab/-/issues/273391 # this method will be extracted to dedicated module with https://gitlab.com/gitlab-org/gitlab/-/issues/273391
def calculate_events_intersections(event_names:, start_date:, end_date:, subset_powers_cache: Hash.new({})) def calculate_metrics_intersections(source:, metric_names:, start_date:, end_date:, subset_powers_cache: Hash.new({}))
# calculate power of intersection of all given metrics from inclusion exclusion principle # calculate power of intersection of all given metrics from inclusion exclusion principle
# |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) => # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) =>
# |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C| # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
...@@ -66,12 +90,12 @@ module Gitlab ...@@ -66,12 +90,12 @@ module Gitlab
# |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D| # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
# calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... # calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ...
subset_powers_data = subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache) subset_powers_data = subsets_intersection_powers(source, metric_names, start_date, end_date, subset_powers_cache)
# calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D| # calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D|
power_of_union_of_all_events = begin power_of_union_of_all_metrics = begin
subset_powers_cache[event_names.size][event_names.join('_+_')] ||= \ subset_powers_cache[metric_names.size][metric_names.join('_+_')] ||= \
calculate_events_union(event_names: event_names, start_date: start_date, end_date: end_date) source.calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at)
end end
# in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate, # in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate,
...@@ -86,7 +110,7 @@ module Gitlab ...@@ -86,7 +110,7 @@ module Gitlab
sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even) sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even)
# add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D| # add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D|
sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_events : -power_of_union_of_all_events) sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_metrics : -power_of_union_of_all_metrics)
end end
def sum_subset_powers(subset_powers_data, subset_powers_size_even) def sum_subset_powers(subset_powers_data, subset_powers_size_even)
...@@ -97,29 +121,29 @@ module Gitlab ...@@ -97,29 +121,29 @@ module Gitlab
(subset_powers_size_even ? -1 : 1) * sum_without_sign (subset_powers_size_even ? -1 : 1) * sum_without_sign
end end
def subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache) def subsets_intersection_powers(source, metric_names, start_date, end_date, subset_powers_cache)
subset_sizes = (1..(event_names.size - 1)) subset_sizes = (1...metric_names.size)
subset_sizes.map do |subset_size| subset_sizes.map do |subset_size|
if subset_size > 1 if subset_size > 1
# calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|) # calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|)
event_names.combination(subset_size).sum do |events_subset| metric_names.combination(subset_size).sum do |metrics_subset|
subset_powers_cache[subset_size][events_subset.join('_&_')] ||= \ subset_powers_cache[subset_size][metrics_subset.join('_&_')] ||=
calculate_events_intersections(event_names: events_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache) calculate_metrics_intersections(source: source, metric_names: metrics_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache)
end end
else else
# calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ... # calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ...
event_names.sum do |event| metric_names.sum do |metric|
subset_powers_cache[subset_size][event] ||= \ subset_powers_cache[subset_size][metric] ||= \
calculate_events_union(event_names: event, start_date: start_date, end_date: end_date) source.calculate_metrics_union(metric_names: metric, start_date: start_date, end_date: end_date, recorded_at: recorded_at)
end end
end end
end end
end end
def load_events(wildcard) def load_metrics(wildcard)
Dir[wildcard].each_with_object([]) do |path, events| Dir[wildcard].each_with_object([]) do |path, metrics|
events.push(*load_yaml_from_path(path)) metrics.push(*load_yaml_from_path(path))
end end
end end
......
# frozen_string_literal: true
module Gitlab
module Usage
module Metrics
module Aggregates
module Sources
class PostgresHll
class << self
def calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at:)
time_period = start_date && end_date ? (start_date..end_date) : nil
Array(metric_names).each_with_object(Gitlab::Database::PostgresHll::Buckets.new) do |event, buckets|
json = read_aggregated_metric(metric_name: event, time_period: time_period, recorded_at: recorded_at)
raise UnionNotAvailable, "Union data not available for #{metric_names}" unless json
buckets.merge_hash!(Gitlab::Json.parse(json))
end.estimated_distinct_count
end
def save_aggregated_metrics(metric_name:, time_period:, recorded_at_timestamp:, data:)
unless data.is_a? ::Gitlab::Database::PostgresHll::Buckets
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(StandardError.new("Unsupported data type: #{data.class}"))
return
end
# Usage Ping report generation for gitlab.com is very long running process
# to make sure that saved keys are available at the end of report generation process
# lets use triple max generation time
keys_expiration = ::Gitlab::UsageData::MAX_GENERATION_TIME_FOR_SAAS * 3
Gitlab::Redis::SharedState.with do |redis|
redis.set(
redis_key(metric_name: metric_name, time_period: time_period&.values&.first, recorded_at: recorded_at_timestamp),
data.to_json,
ex: keys_expiration
)
end
rescue ::Redis::CommandError => e
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
end
private
def read_aggregated_metric(metric_name:, time_period:, recorded_at:)
Gitlab::Redis::SharedState.with do |redis|
redis.get(redis_key(metric_name: metric_name, time_period: time_period, recorded_at: recorded_at))
end
end
def redis_key(metric_name:, time_period:, recorded_at:)
# add timestamp at the end of the key to avoid stale keys if
# usage ping job is retried
"#{metric_name}_#{time_period_to_human_name(time_period)}-#{recorded_at.to_i}"
end
def time_period_to_human_name(time_period)
return Gitlab::Utils::UsageData::ALL_TIME_PERIOD_HUMAN_NAME if time_period.blank?
start_date = time_period.first.to_date
end_date = time_period.last.to_date
if (end_date - start_date).to_i > 7
Gitlab::Utils::UsageData::MONTHLY_PERIOD_HUMAN_NAME
else
Gitlab::Utils::UsageData::WEEKLY_PERIOD_HUMAN_NAME
end
end
end
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Usage
module Metrics
module Aggregates
module Sources
UnionNotAvailable = Class.new(AggregatedMetricError)
class RedisHll
def self.calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at: nil)
union = Gitlab::UsageDataCounters::HLLRedisCounter
.calculate_events_union(event_names: metric_names, start_date: start_date, end_date: end_date)
return union if union >= 0
raise UnionNotAvailable, "Union data not available for #{metric_names}"
end
end
end
end
end
end
end
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
module Gitlab module Gitlab
class UsageData class UsageData
DEPRECATED_VALUE = -1000 DEPRECATED_VALUE = -1000
MAX_GENERATION_TIME_FOR_SAAS = 40.hours
CE_MEMOIZED_VALUES = %i( CE_MEMOIZED_VALUES = %i(
issue_minimum_id issue_minimum_id
...@@ -754,7 +755,7 @@ module Gitlab ...@@ -754,7 +755,7 @@ module Gitlab
private private
def aggregated_metrics def aggregated_metrics
@aggregated_metrics ||= ::Gitlab::Usage::Metrics::Aggregates::Aggregate.new @aggregated_metrics ||= ::Gitlab::Usage::Metrics::Aggregates::Aggregate.new(recorded_at)
end end
def event_monthly_active_users(date_range) def event_monthly_active_users(date_range)
......
...@@ -4,21 +4,28 @@ ...@@ -4,21 +4,28 @@
# - "AND": counts unique elements that were observed triggering all of following events # - "AND": counts unique elements that were observed triggering all of following events
# events: list of events names to aggregate into metric. All events in this list must have the same 'redis_slot' and 'aggregation' attributes # events: list of events names to aggregate into metric. All events in this list must have the same 'redis_slot' and 'aggregation' attributes
# see from lib/gitlab/usage_data_counters/known_events/ for the list of valid events. # see from lib/gitlab/usage_data_counters/known_events/ for the list of valid events.
# source: defines which datasource will be used to locate events that should be included in aggregated metric. Valid values are:
# - database
# - redis
# feature_flag: name of development feature flag that will be checked before metrics aggregation is performed. # feature_flag: name of development feature flag that will be checked before metrics aggregation is performed.
# Corresponding feature flag should have `default_enabled` attribute set to `false`. # Corresponding feature flag should have `default_enabled` attribute set to `false`.
# This attribute is OPTIONAL and can be omitted, when `feature_flag` is missing no feature flag will be checked. # This attribute is OPTIONAL and can be omitted, when `feature_flag` is missing no feature flag will be checked.
--- ---
- name: compliance_features_track_unique_visits_union - name: compliance_features_track_unique_visits_union
operator: OR operator: OR
source: redis
events: ['g_compliance_audit_events', 'g_compliance_dashboard', 'i_compliance_audit_events', 'a_compliance_audit_events_api', 'i_compliance_credential_inventory'] events: ['g_compliance_audit_events', 'g_compliance_dashboard', 'i_compliance_audit_events', 'a_compliance_audit_events_api', 'i_compliance_credential_inventory']
- name: product_analytics_test_metrics_union - name: product_analytics_test_metrics_union
operator: OR operator: OR
source: redis
events: ['i_search_total', 'i_search_advanced', 'i_search_paid'] events: ['i_search_total', 'i_search_advanced', 'i_search_paid']
- name: product_analytics_test_metrics_intersection - name: product_analytics_test_metrics_intersection
operator: AND operator: AND
source: redis
events: ['i_search_total', 'i_search_advanced', 'i_search_paid'] events: ['i_search_total', 'i_search_advanced', 'i_search_paid']
- name: incident_management_alerts_total_unique_counts - name: incident_management_alerts_total_unique_counts
operator: OR operator: OR
source: redis
events: [ events: [
'incident_management_alert_status_changed', 'incident_management_alert_status_changed',
'incident_management_alert_assigned', 'incident_management_alert_assigned',
...@@ -27,6 +34,7 @@ ...@@ -27,6 +34,7 @@
] ]
- name: incident_management_incidents_total_unique_counts - name: incident_management_incidents_total_unique_counts
operator: OR operator: OR
source: redis
events: [ events: [
'incident_management_incident_created', 'incident_management_incident_created',
'incident_management_incident_reopened', 'incident_management_incident_reopened',
......
...@@ -80,27 +80,6 @@ module Gitlab ...@@ -80,27 +80,6 @@ module Gitlab
DISTRIBUTED_HLL_FALLBACK DISTRIBUTED_HLL_FALLBACK
end end
def save_aggregated_metrics(metric_name:, time_period:, recorded_at_timestamp:, data:)
unless data.is_a? ::Gitlab::Database::PostgresHll::Buckets
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(StandardError.new("Unsupported data type: #{data.class}"))
return
end
# the longest recorded usage ping generation time for gitlab.com
# was below 40 hours, there is added error margin of 20 h
usage_ping_generation_period = 80.hours
# add timestamp at the end of the key to avoid stale keys if
# usage ping job is retried
redis_key = "#{metric_name}_#{time_period_to_human_name(time_period)}-#{recorded_at_timestamp}"
Gitlab::Redis::SharedState.with do |redis|
redis.set(redis_key, data.to_json, ex: usage_ping_generation_period)
end
rescue ::Redis::CommandError => e
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
end
def sum(relation, column, batch_size: nil, start: nil, finish: nil) def sum(relation, column, batch_size: nil, start: nil, finish: nil)
Gitlab::Database::BatchCount.batch_sum(relation, column, batch_size: batch_size, start: start, finish: finish) Gitlab::Database::BatchCount.batch_sum(relation, column, batch_size: batch_size, start: start, finish: finish)
rescue ActiveRecord::StatementInvalid rescue ActiveRecord::StatementInvalid
...@@ -152,20 +131,6 @@ module Gitlab ...@@ -152,20 +131,6 @@ module Gitlab
Gitlab::UsageDataCounters::HLLRedisCounter.track_event(event_name.to_s, values: values) Gitlab::UsageDataCounters::HLLRedisCounter.track_event(event_name.to_s, values: values)
end end
def time_period_to_human_name(time_period)
return ALL_TIME_PERIOD_HUMAN_NAME if time_period.blank?
date_range = time_period.values[0]
start_date = date_range.first.to_date
end_date = date_range.last.to_date
if (end_date - start_date).to_i > 7
MONTHLY_PERIOD_HUMAN_NAME
else
WEEKLY_PERIOD_HUMAN_NAME
end
end
private private
def prometheus_client(verify:) def prometheus_client(verify:)
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll, :clean_gitlab_redis_shared_state do
let_it_be(:start_date) { 7.days.ago }
let_it_be(:end_date) { Date.current }
let_it_be(:recorded_at) { Time.current }
let_it_be(:time_period) { { created_at: (start_date..end_date) } }
let(:metric_1) { 'metric_1' }
let(:metric_2) { 'metric_2' }
let(:metric_names) { [metric_1, metric_2] }
describe '.calculate_events_union' do
subject(:calculate_metrics_union) do
described_class.calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at)
end
before do
[
{
metric_name: metric_1,
time_period: time_period,
recorded_at_timestamp: recorded_at,
data: ::Gitlab::Database::PostgresHll::Buckets.new(141 => 1, 56 => 1)
},
{
metric_name: metric_2,
time_period: time_period,
recorded_at_timestamp: recorded_at,
data: ::Gitlab::Database::PostgresHll::Buckets.new(10 => 1, 56 => 1)
}
].each do |params|
described_class.save_aggregated_metrics(**params)
end
end
it 'returns the number of unique events in the union of all metrics' do
expect(calculate_metrics_union.round(2)).to eq(3.12)
end
context 'when there is no aggregated data saved' do
let(:metric_names) { [metric_1, 'i do not have any records'] }
it 'raises error when union data is missing' do
expect { calculate_metrics_union }.to raise_error Gitlab::Usage::Metrics::Aggregates::Sources::UnionNotAvailable
end
end
context 'when there is only one metric defined as aggregated' do
let(:metric_names) { [metric_1] }
it 'returns the number of unique events for that metric' do
expect(calculate_metrics_union.round(2)).to eq(2.08)
end
end
end
describe '.save_aggregated_metrics' do
subject(:save_aggregated_metrics) do
described_class.save_aggregated_metrics(metric_name: metric_1,
time_period: time_period,
recorded_at_timestamp: recorded_at,
data: data)
end
context 'with compatible data argument' do
let(:data) { ::Gitlab::Database::PostgresHll::Buckets.new(141 => 1, 56 => 1) }
it 'persists serialized data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:set).with("#{metric_1}_weekly-#{recorded_at.to_i}", '{"141":1,"56":1}', ex: 120.hours)
end
save_aggregated_metrics
end
context 'with monthly key' do
let_it_be(:start_date) { 4.weeks.ago }
let_it_be(:time_period) { { created_at: (start_date..end_date) } }
it 'persists serialized data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:set).with("#{metric_1}_monthly-#{recorded_at.to_i}", '{"141":1,"56":1}', ex: 120.hours)
end
save_aggregated_metrics
end
end
context 'with all_time key' do
let_it_be(:time_period) { nil }
it 'persists serialized data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:set).with("#{metric_1}_all_time-#{recorded_at.to_i}", '{"141":1,"56":1}', ex: 120.hours)
end
save_aggregated_metrics
end
end
context 'error handling' do
before do
allow(Gitlab::Redis::SharedState).to receive(:with).and_raise(::Redis::CommandError)
end
it 'rescues and reraise ::Redis::CommandError for development and test environments' do
expect { save_aggregated_metrics }.to raise_error ::Redis::CommandError
end
context 'for environment different than development' do
before do
stub_rails_env('production')
end
it 'rescues ::Redis::CommandError' do
expect { save_aggregated_metrics }.not_to raise_error
end
end
end
end
context 'with incompatible data argument' do
let(:data) { 1 }
context 'for environment different than development' do
before do
stub_rails_env('production')
end
it 'does not persist data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).not_to receive(:set)
end
save_aggregated_metrics
end
end
it 'raises error for development environment' do
expect { save_aggregated_metrics }.to raise_error /Unsupported data type/
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Usage::Metrics::Aggregates::Sources::RedisHll do
describe '.calculate_events_union' do
let(:event_names) { %w[event_a event_b] }
let(:start_date) { 7.days.ago }
let(:end_date) { Date.current }
subject(:calculate_metrics_union) do
described_class.calculate_metrics_union(metric_names: event_names, start_date: start_date, end_date: end_date, recorded_at: nil)
end
it 'calls Gitlab::UsageDataCounters::HLLRedisCounter.calculate_events_union' do
expect(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union)
.with(event_names: event_names, start_date: start_date, end_date: end_date)
.and_return(5)
calculate_metrics_union
end
it 'prevents from using fallback value as valid union result' do
allow(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union).and_return(-1)
expect { calculate_metrics_union }.to raise_error Gitlab::Usage::Metrics::Aggregates::Sources::UnionNotAvailable
end
end
end
...@@ -13,18 +13,32 @@ RSpec.describe 'aggregated metrics' do ...@@ -13,18 +13,32 @@ RSpec.describe 'aggregated metrics' do
end end
end end
RSpec::Matchers.define :has_known_source do
match do |aggregate|
Gitlab::Usage::Metrics::Aggregates::SOURCES.include?(aggregate[:source])
end
failure_message do |aggregate|
"Aggregate with name: `#{aggregate[:name]}` uses not allowed source `#{aggregate[:source]}`"
end
end
let_it_be(:known_events) do let_it_be(:known_events) do
Gitlab::UsageDataCounters::HLLRedisCounter.known_events Gitlab::UsageDataCounters::HLLRedisCounter.known_events
end end
Gitlab::Usage::Metrics::Aggregates::Aggregate.new.send(:aggregated_metrics).tap do |aggregated_metrics| Gitlab::Usage::Metrics::Aggregates::Aggregate.new(Time.current).send(:aggregated_metrics).tap do |aggregated_metrics|
it 'all events has unique name' do it 'all events has unique name' do
event_names = aggregated_metrics&.map { |event| event[:name] } event_names = aggregated_metrics&.map { |event| event[:name] }
expect(event_names).to eq(event_names&.uniq) expect(event_names).to eq(event_names&.uniq)
end end
aggregated_metrics&.each do |aggregate| it 'all aggregated metrics has known source' do
expect(aggregated_metrics).to all has_known_source
end
aggregated_metrics&.select { |agg| agg[:source] == Gitlab::Usage::Metrics::Aggregates::REDIS_SOURCE }&.each do |aggregate|
context "for #{aggregate[:name]} aggregate of #{aggregate[:events].join(' ')}" do context "for #{aggregate[:name]} aggregate of #{aggregate[:events].join(' ')}" do
let_it_be(:events_records) { known_events.select { |event| aggregate[:events].include?(event[:name]) } } let_it_be(:events_records) { known_events.select { |event| aggregate[:events].include?(event[:name]) } }
......
...@@ -372,97 +372,4 @@ RSpec.describe Gitlab::Utils::UsageData do ...@@ -372,97 +372,4 @@ RSpec.describe Gitlab::Utils::UsageData do
end end
end end
end end
describe '#save_aggregated_metrics', :clean_gitlab_redis_shared_state do
let(:timestamp) { Time.current.to_i }
let(:time_period) { { created_at: 7.days.ago..Date.current } }
let(:metric_name) { 'test_metric' }
let(:method_params) do
{
metric_name: metric_name,
time_period: time_period,
recorded_at_timestamp: timestamp,
data: data
}
end
context 'with compatible data argument' do
let(:data) { ::Gitlab::Database::PostgresHll::Buckets.new(141 => 1, 56 => 1) }
it 'persists serialized data in Redis' do
time_period_name = 'weekly'
expect(described_class).to receive(:time_period_to_human_name).with(time_period).and_return(time_period_name)
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:set).with("#{metric_name}_#{time_period_name}-#{timestamp}", '{"141":1,"56":1}', ex: 80.hours)
end
described_class.save_aggregated_metrics(**method_params)
end
context 'error handling' do
before do
allow(Gitlab::Redis::SharedState).to receive(:with).and_raise(::Redis::CommandError)
end
it 'rescues and reraise ::Redis::CommandError for development and test environments' do
expect { described_class.save_aggregated_metrics(**method_params) }.to raise_error ::Redis::CommandError
end
context 'for environment different than development' do
before do
stub_rails_env('production')
end
it 'rescues ::Redis::CommandError' do
expect { described_class.save_aggregated_metrics(**method_params) }.not_to raise_error
end
end
end
end
context 'with incompatible data argument' do
let(:data) { 1 }
context 'for environment different than development' do
before do
stub_rails_env('production')
end
it 'does not persist data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).not_to receive(:set)
end
described_class.save_aggregated_metrics(**method_params)
end
end
it 'raises error for development environment' do
expect { described_class.save_aggregated_metrics(**method_params) }.to raise_error /Unsupported data type/
end
end
end
describe '#time_period_to_human_name' do
it 'translates empty time period as all_time' do
expect(described_class.time_period_to_human_name({})).to eql 'all_time'
end
it 'translates time period not longer than 7 days as weekly', :aggregate_failures do
days_6_time_period = 6.days.ago..Date.current
days_7_time_period = 7.days.ago..Date.current
expect(described_class.time_period_to_human_name(column_name: days_6_time_period)).to eql 'weekly'
expect(described_class.time_period_to_human_name(column_name: days_7_time_period)).to eql 'weekly'
end
it 'translates time period longer than 7 days as monthly', :aggregate_failures do
days_8_time_period = 8.days.ago..Date.current
days_31_time_period = 31.days.ago..Date.current
expect(described_class.time_period_to_human_name(column_name: days_8_time_period)).to eql 'monthly'
expect(described_class.time_period_to_human_name(column_name: days_31_time_period)).to eql 'monthly'
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment