Commit 5e319f52 authored by Mikolaj Wawrzyniak's avatar Mikolaj Wawrzyniak

Create Agggregated Metrics Sources

To house all source related operations for Aggregated Metrics
feautre new Sources class has to be added.
parent 7bf372c1
---
name: database_sourced_aggregated_metrics
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/52784
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/300411
milestone: '13.9'
type: development
group: group::product intelligence
default_enabled: false
......@@ -451,10 +451,12 @@ module EE
pipelines_with_secure_jobs[metric_name.to_sym] =
if start_id && finish_id
estimate_batch_distinct_count(relation, :commit_id, batch_size: 1000, start: start_id, finish: finish_id) do |result|
save_aggregated_metrics(**aggregated_metrics_params.merge({ data: result }))
::Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll
.save_aggregated_metrics(**aggregated_metrics_params.merge({ data: result }))
end
else
save_aggregated_metrics(**aggregated_metrics_params.merge({ data: ::Gitlab::Database::PostgresHll::Buckets.new }))
::Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll
.save_aggregated_metrics(**aggregated_metrics_params.merge({ data: ::Gitlab::Database::PostgresHll::Buckets.new }))
0
end
end
......
......@@ -8,16 +8,26 @@ module Gitlab
INTERSECTION_OF_AGGREGATED_METRICS = 'AND'
ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS, INTERSECTION_OF_AGGREGATED_METRICS].freeze
AGGREGATED_METRICS_PATH = Rails.root.join('lib/gitlab/usage_data_counters/aggregated_metrics/*.yml')
UnknownAggregationOperator = Class.new(StandardError)
AggregatedMetricError = Class.new(StandardError)
UnknownAggregationOperator = Class.new(AggregatedMetricError)
UnknownAggregationSource = Class.new(AggregatedMetricError)
DATABASE_SOURCE = 'database'
REDIS_SOURCE = 'redis'
SOURCES = {
DATABASE_SOURCE => Sources::PostgresHll,
REDIS_SOURCE => Sources::RedisHll
}.freeze
class Aggregate
delegate :calculate_events_union,
:weekly_time_range,
delegate :weekly_time_range,
:monthly_time_range,
to: Gitlab::UsageDataCounters::HLLRedisCounter
def initialize
@aggregated_metrics = load_events(AGGREGATED_METRICS_PATH)
def initialize(recorded_at)
@aggregated_metrics = load_metrics(AGGREGATED_METRICS_PATH)
@recorded_at = recorded_at
end
def monthly_data
......@@ -30,35 +40,49 @@ module Gitlab
private
attr_accessor :aggregated_metrics
attr_accessor :aggregated_metrics, :recorded_at
def aggregated_metrics_data(start_date:, end_date:)
aggregated_metrics.each_with_object({}) do |aggregation, weekly_data|
aggregated_metrics.each_with_object({}) do |aggregation, data|
next if aggregation[:feature_flag] && Feature.disabled?(aggregation[:feature_flag], default_enabled: false, type: :development)
weekly_data[aggregation[:name]] = calculate_count_for_aggregation(aggregation, start_date: start_date, end_date: end_date)
case aggregation[:source]
when REDIS_SOURCE
data[aggregation[:name]] = calculate_count_for_aggregation(aggregation: aggregation, start_date: start_date, end_date: end_date)
when DATABASE_SOURCE
next unless Feature.enabled?('database_sourced_aggregated_metrics', default_enabled: false, type: :development)
data[aggregation[:name]] = calculate_count_for_aggregation(aggregation: aggregation, start_date: start_date, end_date: end_date)
else
Gitlab::ErrorTracking
.track_and_raise_for_dev_exception(UnknownAggregationSource.new("Aggregation source: '#{aggregation[:source]}' must be included in #{SOURCES.keys}"))
data[aggregation[:name]] = Gitlab::Utils::UsageData::FALLBACK
end
end
end
def calculate_count_for_aggregation(aggregation, start_date:, end_date:)
def calculate_count_for_aggregation(aggregation:, start_date:, end_date:)
source = SOURCES[aggregation[:source]]
case aggregation[:operator]
when UNION_OF_AGGREGATED_METRICS
calculate_events_union(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
source.calculate_metrics_union(metric_names: aggregation[:events], start_date: start_date, end_date: end_date, recorded_at: recorded_at)
when INTERSECTION_OF_AGGREGATED_METRICS
calculate_events_intersections(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
calculate_metrics_intersections(source: source, metric_names: aggregation[:events], start_date: start_date, end_date: end_date)
else
Gitlab::ErrorTracking
.track_and_raise_for_dev_exception(UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}"))
Gitlab::Utils::UsageData::FALLBACK
end
rescue Gitlab::UsageDataCounters::HLLRedisCounter::EventError => error
rescue Gitlab::UsageDataCounters::HLLRedisCounter::EventError, AggregatedMetricError => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
Gitlab::Utils::UsageData::FALLBACK
end
# calculate intersection of 'n' sets based on inclusion exclusion principle https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle
# this method will be extracted to dedicated module with https://gitlab.com/gitlab-org/gitlab/-/issues/273391
def calculate_events_intersections(event_names:, start_date:, end_date:, subset_powers_cache: Hash.new({}))
def calculate_metrics_intersections(source:, metric_names:, start_date:, end_date:, subset_powers_cache: Hash.new({}))
# calculate power of intersection of all given metrics from inclusion exclusion principle
# |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) =>
# |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
......@@ -66,12 +90,12 @@ module Gitlab
# |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
# calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ...
subset_powers_data = subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
subset_powers_data = subsets_intersection_powers(source, metric_names, start_date, end_date, subset_powers_cache)
# calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D|
power_of_union_of_all_events = begin
subset_powers_cache[event_names.size][event_names.join('_+_')] ||= \
calculate_events_union(event_names: event_names, start_date: start_date, end_date: end_date)
power_of_union_of_all_metrics = begin
subset_powers_cache[metric_names.size][metric_names.join('_+_')] ||= \
source.calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at)
end
# in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate,
......@@ -86,7 +110,7 @@ module Gitlab
sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even)
# add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D|
sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_events : -power_of_union_of_all_events)
sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_metrics : -power_of_union_of_all_metrics)
end
def sum_subset_powers(subset_powers_data, subset_powers_size_even)
......@@ -97,29 +121,29 @@ module Gitlab
(subset_powers_size_even ? -1 : 1) * sum_without_sign
end
def subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
subset_sizes = (1..(event_names.size - 1))
def subsets_intersection_powers(source, metric_names, start_date, end_date, subset_powers_cache)
subset_sizes = (1...metric_names.size)
subset_sizes.map do |subset_size|
if subset_size > 1
# calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|)
event_names.combination(subset_size).sum do |events_subset|
subset_powers_cache[subset_size][events_subset.join('_&_')] ||= \
calculate_events_intersections(event_names: events_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache)
metric_names.combination(subset_size).sum do |metrics_subset|
subset_powers_cache[subset_size][metrics_subset.join('_&_')] ||=
calculate_metrics_intersections(source: source, metric_names: metrics_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache)
end
else
# calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ...
event_names.sum do |event|
subset_powers_cache[subset_size][event] ||= \
calculate_events_union(event_names: event, start_date: start_date, end_date: end_date)
metric_names.sum do |metric|
subset_powers_cache[subset_size][metric] ||= \
source.calculate_metrics_union(metric_names: metric, start_date: start_date, end_date: end_date, recorded_at: recorded_at)
end
end
end
end
def load_events(wildcard)
Dir[wildcard].each_with_object([]) do |path, events|
events.push(*load_yaml_from_path(path))
def load_metrics(wildcard)
Dir[wildcard].each_with_object([]) do |path, metrics|
metrics.push(*load_yaml_from_path(path))
end
end
......
# frozen_string_literal: true
module Gitlab
module Usage
module Metrics
module Aggregates
module Sources
class PostgresHll
class << self
def calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at:)
time_period = start_date && end_date ? (start_date..end_date) : nil
Array(metric_names).each_with_object(Gitlab::Database::PostgresHll::Buckets.new) do |event, buckets|
json = read_aggregated_metric(metric_name: event, time_period: time_period, recorded_at: recorded_at)
raise UnionNotAvailable, "Union data not available for #{metric_names}" unless json
buckets.merge_hash!(Gitlab::Json.parse(json))
end.estimated_distinct_count
end
def save_aggregated_metrics(metric_name:, time_period:, recorded_at_timestamp:, data:)
unless data.is_a? ::Gitlab::Database::PostgresHll::Buckets
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(StandardError.new("Unsupported data type: #{data.class}"))
return
end
# Usage Ping report generation for gitlab.com is very long running process
# to make sure that saved keys are available at the end of report generation process
# lets use triple max generation time
keys_expiration = ::Gitlab::UsageData::MAX_GENERATION_TIME_FOR_SAAS * 3
Gitlab::Redis::SharedState.with do |redis|
redis.set(
redis_key(metric_name: metric_name, time_period: time_period&.values&.first, recorded_at: recorded_at_timestamp),
data.to_json,
ex: keys_expiration
)
end
rescue ::Redis::CommandError => e
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
end
private
def read_aggregated_metric(metric_name:, time_period:, recorded_at:)
Gitlab::Redis::SharedState.with do |redis|
redis.get(redis_key(metric_name: metric_name, time_period: time_period, recorded_at: recorded_at))
end
end
def redis_key(metric_name:, time_period:, recorded_at:)
# add timestamp at the end of the key to avoid stale keys if
# usage ping job is retried
"#{metric_name}_#{time_period_to_human_name(time_period)}-#{recorded_at.to_i}"
end
def time_period_to_human_name(time_period)
return Gitlab::Utils::UsageData::ALL_TIME_PERIOD_HUMAN_NAME if time_period.blank?
start_date = time_period.first.to_date
end_date = time_period.last.to_date
if (end_date - start_date).to_i > 7
Gitlab::Utils::UsageData::MONTHLY_PERIOD_HUMAN_NAME
else
Gitlab::Utils::UsageData::WEEKLY_PERIOD_HUMAN_NAME
end
end
end
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Usage
module Metrics
module Aggregates
module Sources
UnionNotAvailable = Class.new(AggregatedMetricError)
class RedisHll
def self.calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at: nil)
union = Gitlab::UsageDataCounters::HLLRedisCounter
.calculate_events_union(event_names: metric_names, start_date: start_date, end_date: end_date)
return union if union >= 0
raise UnionNotAvailable, "Union data not available for #{metric_names}"
end
end
end
end
end
end
end
......@@ -13,6 +13,7 @@
module Gitlab
class UsageData
DEPRECATED_VALUE = -1000
MAX_GENERATION_TIME_FOR_SAAS = 40.hours
CE_MEMOIZED_VALUES = %i(
issue_minimum_id
......@@ -754,7 +755,7 @@ module Gitlab
private
def aggregated_metrics
@aggregated_metrics ||= ::Gitlab::Usage::Metrics::Aggregates::Aggregate.new
@aggregated_metrics ||= ::Gitlab::Usage::Metrics::Aggregates::Aggregate.new(recorded_at)
end
def event_monthly_active_users(date_range)
......
......@@ -4,21 +4,28 @@
# - "AND": counts unique elements that were observed triggering all of following events
# events: list of events names to aggregate into metric. All events in this list must have the same 'redis_slot' and 'aggregation' attributes
# see from lib/gitlab/usage_data_counters/known_events/ for the list of valid events.
# source: defines which datasource will be used to locate events that should be included in aggregated metric. Valid values are:
# - database
# - redis
# feature_flag: name of development feature flag that will be checked before metrics aggregation is performed.
# Corresponding feature flag should have `default_enabled` attribute set to `false`.
# This attribute is OPTIONAL and can be omitted, when `feature_flag` is missing no feature flag will be checked.
---
- name: compliance_features_track_unique_visits_union
operator: OR
source: redis
events: ['g_compliance_audit_events', 'g_compliance_dashboard', 'i_compliance_audit_events', 'a_compliance_audit_events_api', 'i_compliance_credential_inventory']
- name: product_analytics_test_metrics_union
operator: OR
source: redis
events: ['i_search_total', 'i_search_advanced', 'i_search_paid']
- name: product_analytics_test_metrics_intersection
operator: AND
source: redis
events: ['i_search_total', 'i_search_advanced', 'i_search_paid']
- name: incident_management_alerts_total_unique_counts
operator: OR
source: redis
events: [
'incident_management_alert_status_changed',
'incident_management_alert_assigned',
......@@ -27,6 +34,7 @@
]
- name: incident_management_incidents_total_unique_counts
operator: OR
source: redis
events: [
'incident_management_incident_created',
'incident_management_incident_reopened',
......
......@@ -80,27 +80,6 @@ module Gitlab
DISTRIBUTED_HLL_FALLBACK
end
def save_aggregated_metrics(metric_name:, time_period:, recorded_at_timestamp:, data:)
unless data.is_a? ::Gitlab::Database::PostgresHll::Buckets
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(StandardError.new("Unsupported data type: #{data.class}"))
return
end
# the longest recorded usage ping generation time for gitlab.com
# was below 40 hours, there is added error margin of 20 h
usage_ping_generation_period = 80.hours
# add timestamp at the end of the key to avoid stale keys if
# usage ping job is retried
redis_key = "#{metric_name}_#{time_period_to_human_name(time_period)}-#{recorded_at_timestamp}"
Gitlab::Redis::SharedState.with do |redis|
redis.set(redis_key, data.to_json, ex: usage_ping_generation_period)
end
rescue ::Redis::CommandError => e
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
end
def sum(relation, column, batch_size: nil, start: nil, finish: nil)
Gitlab::Database::BatchCount.batch_sum(relation, column, batch_size: batch_size, start: start, finish: finish)
rescue ActiveRecord::StatementInvalid
......@@ -152,20 +131,6 @@ module Gitlab
Gitlab::UsageDataCounters::HLLRedisCounter.track_event(event_name.to_s, values: values)
end
def time_period_to_human_name(time_period)
return ALL_TIME_PERIOD_HUMAN_NAME if time_period.blank?
date_range = time_period.values[0]
start_date = date_range.first.to_date
end_date = date_range.last.to_date
if (end_date - start_date).to_i > 7
MONTHLY_PERIOD_HUMAN_NAME
else
WEEKLY_PERIOD_HUMAN_NAME
end
end
private
def prometheus_client(verify:)
......
......@@ -7,34 +7,14 @@ RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redi
let(:entity2) { '1dd9afb2-a3ee-4de1-8ae3-a405579c8584' }
let(:entity3) { '34rfjuuy-ce56-sa35-ds34-dfer567dfrf2' }
let(:entity4) { '8b9a2671-2abf-4bec-a682-22f6a8f7bf31' }
let(:end_date) { Date.current }
let(:sources) { Gitlab::Usage::Metrics::Aggregates::Sources }
around do |example|
# We need to freeze to a reference time
# because visits are grouped by the week number in the year
# Without freezing the time, the test may behave inconsistently
# depending on which day of the week test is run.
# Monday 6th of June
reference_time = Time.utc(2020, 6, 1)
travel_to(reference_time) { example.run }
end
let_it_be(:recorded_at) { Time.current.to_i }
context 'aggregated_metrics_data' do
let(:known_events) do
[
{ name: 'event1_slot', redis_slot: "slot", category: 'category1', aggregation: "weekly" },
{ name: 'event2_slot', redis_slot: "slot", category: 'category2', aggregation: "weekly" },
{ name: 'event3_slot', redis_slot: "slot", category: 'category3', aggregation: "weekly" },
{ name: 'event5_slot', redis_slot: "slot", category: 'category4', aggregation: "weekly" },
{ name: 'event4', category: 'category2', aggregation: "weekly" }
].map(&:with_indifferent_access)
end
before do
allow(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:known_events).and_return(known_events)
end
shared_examples 'aggregated_metrics_data' do
context 'no aggregated metrics is defined' do
context 'no aggregated metric is defined' do
it 'returns empty hash' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics).and_return([])
......@@ -51,23 +31,75 @@ RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redi
end
end
context 'with disabled database_sourced_aggregated_metrics feature flag' do
before do
stub_feature_flags(database_sourced_aggregated_metrics: false)
end
let(:aggregated_metrics) do
[
{ name: 'gmau_1', source: 'redis', events: %w[event3 event5], operator: "OR" },
{ name: 'gmau_2', source: 'database', events: %w[event1 event2 event3], operator: "OR" }
].map(&:with_indifferent_access)
end
it 'skips database sourced metrics', :aggregate_failures do
results = {
'gmau_1' => 5
}
params = { start_date: start_date, end_date: end_date, recorded_at: recorded_at }
expect(sources::RedisHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event3 event5])).and_return(5)
expect(sources::PostgresHll).not_to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event1 event2 event3]))
expect(aggregated_metrics_data).to eq(results)
end
end
context 'with AND operator' do
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event1_slot event2_slot], operator: "AND" },
{ name: 'gmau_2', events: %w[event1_slot event2_slot event3_slot], operator: "AND" },
{ name: 'gmau_3', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" },
{ name: 'gmau_4', events: %w[event4], operator: "AND" }
{ name: 'gmau_1', source: 'redis', events: %w[event3 event5], operator: "AND" },
{ name: 'gmau_2', source: 'database', events: %w[event1 event2 event3], operator: "AND" }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
it 'returns the number of unique events recorded for every metric in aggregate', :aggregate_failures do
results = {
'gmau_1' => 3,
'gmau_2' => 2,
'gmau_3' => 1,
'gmau_4' => 3
'gmau_1' => 2,
'gmau_2' => 1
}
params = { start_date: start_date, end_date: end_date, recorded_at: recorded_at }
# gmau_1 data is as follow
# |A| => 4
expect(sources::RedisHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event3')).and_return(4)
# |B| => 6
expect(sources::RedisHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event5')).and_return(6)
# |A + B| => 8
expect(sources::RedisHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event3 event5])).and_return(8)
# Exclusion inclusion principle formula to calculate intersection of 2 sets
# |A & B| = (|A| + |B|) - |A + B| => (4 + 6) - 8 => 2
# gmau_2 data is as follow:
# |A| => 2
expect(sources::PostgresHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event1')).and_return(2)
# |B| => 3
expect(sources::PostgresHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event2')).and_return(3)
# |C| => 5
expect(sources::PostgresHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event3')).and_return(5)
# |A + B| => 4 therefore |A & B| = (|A| + |B|) - |A + B| => 2 + 3 - 4 => 1
expect(sources::PostgresHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event1 event2])).and_return(4)
# |A + C| => 6 therefore |A & C| = (|A| + |C|) - |A + C| => 2 + 5 - 6 => 1
expect(sources::PostgresHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event1 event3])).and_return(6)
# |B + C| => 7 therefore |B & C| = (|B| + |C|) - |B + C| => 3 + 5 - 7 => 1
expect(sources::PostgresHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event2 event3])).and_return(7)
# |A + B + C| => 8
expect(sources::PostgresHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event1 event2 event3])).and_return(8)
# Exclusion inclusion principle formula to calculate intersection of 3 sets
# |A & B & C| = (|A & B| + |A & C| + |B & C|) - (|A| + |B| + |C|) + |A + B + C|
# (1 + 1 + 1) - (2 + 3 + 5) + 8 => 1
expect(aggregated_metrics_data).to eq(results)
end
......@@ -76,19 +108,20 @@ RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redi
context 'with OR operator' do
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_2', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_3', events: %w[event4], operator: "OR" }
{ name: 'gmau_1', source: 'redis', events: %w[event3 event5], operator: "OR" },
{ name: 'gmau_2', source: 'database', events: %w[event1 event2 event3], operator: "OR" }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
it 'returns the number of unique events occurred for any metric in aggregate', :aggregate_failures do
results = {
'gmau_1' => 2,
'gmau_2' => 3,
'gmau_3' => 3
'gmau_1' => 5,
'gmau_2' => 3
}
params = { start_date: start_date, end_date: end_date, recorded_at: recorded_at }
expect(sources::RedisHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event3 event5])).and_return(5)
expect(sources::PostgresHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event1 event2 event3])).and_return(3)
expect(aggregated_metrics_data).to eq(results)
end
end
......@@ -99,19 +132,20 @@ RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redi
let(:aggregated_metrics) do
[
# represents stable aggregated metrics that has been fully released
{ name: 'gmau_without_ff', events: %w[event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_without_ff', source: 'redis', events: %w[event3_slot event5_slot], operator: "OR" },
# represents new aggregated metric that is under performance testing on gitlab.com
{ name: 'gmau_enabled', events: %w[event4], operator: "AND", feature_flag: enabled_feature_flag },
{ name: 'gmau_enabled', source: 'redis', events: %w[event4], operator: "OR", feature_flag: enabled_feature_flag },
# represents aggregated metric that is under development and shouldn't be yet collected even on gitlab.com
{ name: 'gmau_disabled', events: %w[event4], operator: "AND", feature_flag: disabled_feature_flag }
{ name: 'gmau_disabled', source: 'redis', events: %w[event4], operator: "OR", feature_flag: disabled_feature_flag }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
it 'does not calculate data for aggregates with ff turned off' do
skip_feature_flags_yaml_validation
stub_feature_flags(enabled_feature_flag => true, disabled_feature_flag => false)
allow(sources::RedisHll).to receive(:calculate_metrics_union).and_return(6)
expect(aggregated_metrics_data).to eq('gmau_without_ff' => 2, 'gmau_enabled' => 3)
expect(aggregated_metrics_data).to eq('gmau_without_ff' => 6, 'gmau_enabled' => 6)
end
end
end
......@@ -121,19 +155,28 @@ RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redi
it 'raises error when unknown aggregation operator is used' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "SUM" }])
.and_return([{ name: 'gmau_1', source: 'redis', events: %w[event1_slot], operator: "SUM" }])
end
expect { aggregated_metrics_data }.to raise_error Gitlab::Usage::Metrics::Aggregates::UnknownAggregationOperator
end
it 'raises error when unknown aggregation source is used' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', source: 'whoami', events: %w[event1_slot], operator: "AND" }])
end
expect { aggregated_metrics_data }.to raise_error Gitlab::Usage::Metrics::Aggregates::UnknownAggregationSource
end
it 're raises Gitlab::UsageDataCounters::HLLRedisCounter::EventError' do
error = Gitlab::UsageDataCounters::HLLRedisCounter::EventError
allow(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union).and_raise(error)
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "OR" }])
.and_return([{ name: 'gmau_1', source: 'redis', events: %w[event1_slot], operator: "OR" }])
end
expect { aggregated_metrics_data }.to raise_error error
......@@ -148,7 +191,16 @@ RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redi
it 'rescues unknown aggregation operator error' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "SUM" }])
.and_return([{ name: 'gmau_1', source: 'redis', events: %w[event1_slot], operator: "SUM" }])
end
expect(aggregated_metrics_data).to eq('gmau_1' => -1)
end
it 'rescues unknown aggregation source error' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', source: 'whoami', events: %w[event1_slot], operator: "AND" }])
end
expect(aggregated_metrics_data).to eq('gmau_1' => -1)
......@@ -160,7 +212,7 @@ RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redi
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "OR" }])
.and_return([{ name: 'gmau_1', source: 'redis', events: %w[event1_slot], operator: "OR" }])
end
expect(aggregated_metrics_data).to eq('gmau_1' => -1)
......@@ -170,81 +222,47 @@ RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redi
end
describe '.aggregated_metrics_weekly_data' do
subject(:aggregated_metrics_data) { described_class.new.weekly_data }
before do
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity3, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity2, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity3, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity1, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity2, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event5_slot', values: entity2, time: 3.days.ago)
# events out of time scope
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity3, time: 8.days.ago)
# events in different slots
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity4, time: 2.days.ago)
end
subject(:aggregated_metrics_data) { described_class.new(recorded_at).weekly_data }
let(:start_date) { 7.days.ago.to_date }
it_behaves_like 'aggregated_metrics_data'
end
describe '.aggregated_metrics_monthly_data' do
subject(:aggregated_metrics_data) { described_class.new.monthly_data }
subject(:aggregated_metrics_data) { described_class.new(recorded_at).monthly_data }
it_behaves_like 'aggregated_metrics_data' do
before do
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity3, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity2, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity3, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity1, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity2, time: 10.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event5_slot', values: entity2, time: 4.weeks.ago.advance(days: 1))
# events out of time scope
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event5_slot', values: entity1, time: 4.weeks.ago.advance(days: -1))
# events in different slots
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity4, time: 2.days.ago)
end
end
let(:start_date) { 4.weeks.ago.to_date }
context 'Redis calls' do
it_behaves_like 'aggregated_metrics_data'
context 'metrics union calls' do
let(:aggregated_metrics) do
[
{ name: 'gmau_3', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" }
{ name: 'gmau_3', source: 'redis', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" }
].map(&:with_indifferent_access)
end
it 'caches intermediate operations' do
it 'caches intermediate operations', :aggregate_failures do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics).and_return(aggregated_metrics)
end
params = { start_date: start_date, end_date: end_date, recorded_at: recorded_at }
aggregated_metrics[0][:events].each do |event|
expect(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union)
.with(event_names: event, start_date: 4.weeks.ago.to_date, end_date: Date.current)
.once
.and_return(0)
expect(sources::RedisHll).to receive(:calculate_metrics_union)
.with(params.merge(metric_names: event))
.once
.and_return(0)
end
2.upto(4) do |subset_size|
aggregated_metrics[0][:events].combination(subset_size).each do |events|
expect(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union)
.with(event_names: events, start_date: 4.weeks.ago.to_date, end_date: Date.current)
.once
.and_return(0)
expect(sources::RedisHll).to receive(:calculate_metrics_union)
.with(params.merge(metric_names: events))
.once
.and_return(0)
end
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll, :clean_gitlab_redis_shared_state do
let_it_be(:start_date) { 7.days.ago }
let_it_be(:end_date) { Date.current }
let_it_be(:recorded_at) { Time.current }
let_it_be(:time_period) { { created_at: (start_date..end_date) } }
let(:metric_1) { 'metric_1' }
let(:metric_2) { 'metric_2' }
let(:metric_names) { [metric_1, metric_2] }
describe '.calculate_events_union' do
subject(:calculate_metrics_union) do
described_class.calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at)
end
before do
[
{
metric_name: metric_1,
time_period: time_period,
recorded_at_timestamp: recorded_at,
data: ::Gitlab::Database::PostgresHll::Buckets.new(141 => 1, 56 => 1)
},
{
metric_name: metric_2,
time_period: time_period,
recorded_at_timestamp: recorded_at,
data: ::Gitlab::Database::PostgresHll::Buckets.new(10 => 1, 56 => 1)
}
].each do |params|
described_class.save_aggregated_metrics(**params)
end
end
it 'returns the number of unique events in the union of all metrics' do
expect(calculate_metrics_union.round(2)).to eq(3.12)
end
context 'when there is no aggregated data saved' do
let(:metric_names) { [metric_1, 'i do not have any records'] }
it 'raises error when union data is missing' do
expect { calculate_metrics_union }.to raise_error Gitlab::Usage::Metrics::Aggregates::Sources::UnionNotAvailable
end
end
context 'when there is only one metric defined as aggregated' do
let(:metric_names) { [metric_1] }
it 'returns the number of unique events for that metric' do
expect(calculate_metrics_union.round(2)).to eq(2.08)
end
end
end
describe '.save_aggregated_metrics' do
subject(:save_aggregated_metrics) do
described_class.save_aggregated_metrics(metric_name: metric_1,
time_period: time_period,
recorded_at_timestamp: recorded_at,
data: data)
end
context 'with compatible data argument' do
let(:data) { ::Gitlab::Database::PostgresHll::Buckets.new(141 => 1, 56 => 1) }
it 'persists serialized data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:set).with("#{metric_1}_weekly-#{recorded_at.to_i}", '{"141":1,"56":1}', ex: 120.hours)
end
save_aggregated_metrics
end
context 'with monthly key' do
let_it_be(:start_date) { 4.weeks.ago }
let_it_be(:time_period) { { created_at: (start_date..end_date) } }
it 'persists serialized data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:set).with("#{metric_1}_monthly-#{recorded_at.to_i}", '{"141":1,"56":1}', ex: 120.hours)
end
save_aggregated_metrics
end
end
context 'with all_time key' do
let_it_be(:time_period) { nil }
it 'persists serialized data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:set).with("#{metric_1}_all_time-#{recorded_at.to_i}", '{"141":1,"56":1}', ex: 120.hours)
end
save_aggregated_metrics
end
end
context 'error handling' do
before do
allow(Gitlab::Redis::SharedState).to receive(:with).and_raise(::Redis::CommandError)
end
it 'rescues and reraise ::Redis::CommandError for development and test environments' do
expect { save_aggregated_metrics }.to raise_error ::Redis::CommandError
end
context 'for environment different than development' do
before do
stub_rails_env('production')
end
it 'rescues ::Redis::CommandError' do
expect { save_aggregated_metrics }.not_to raise_error
end
end
end
end
context 'with incompatible data argument' do
let(:data) { 1 }
context 'for environment different than development' do
before do
stub_rails_env('production')
end
it 'does not persist data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).not_to receive(:set)
end
save_aggregated_metrics
end
end
it 'raises error for development environment' do
expect { save_aggregated_metrics }.to raise_error /Unsupported data type/
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Usage::Metrics::Aggregates::Sources::RedisHll do
describe '.calculate_events_union' do
let(:event_names) { %w[event_a event_b] }
let(:start_date) { 7.days.ago }
let(:end_date) { Date.current }
subject(:calculate_metrics_union) do
described_class.calculate_metrics_union(metric_names: event_names, start_date: start_date, end_date: end_date, recorded_at: nil)
end
it 'calls Gitlab::UsageDataCounters::HLLRedisCounter.calculate_events_union' do
expect(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union)
.with(event_names: event_names, start_date: start_date, end_date: end_date)
.and_return(5)
calculate_metrics_union
end
it 'prevents from using fallback value as valid union result' do
allow(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union).and_return(-1)
expect { calculate_metrics_union }.to raise_error Gitlab::Usage::Metrics::Aggregates::Sources::UnionNotAvailable
end
end
end
......@@ -13,18 +13,32 @@ RSpec.describe 'aggregated metrics' do
end
end
RSpec::Matchers.define :has_known_source do
match do |aggregate|
Gitlab::Usage::Metrics::Aggregates::SOURCES.include?(aggregate[:source])
end
failure_message do |aggregate|
"Aggregate with name: `#{aggregate[:name]}` uses not allowed source `#{aggregate[:source]}`"
end
end
let_it_be(:known_events) do
Gitlab::UsageDataCounters::HLLRedisCounter.known_events
end
Gitlab::Usage::Metrics::Aggregates::Aggregate.new.send(:aggregated_metrics).tap do |aggregated_metrics|
Gitlab::Usage::Metrics::Aggregates::Aggregate.new(Time.current).send(:aggregated_metrics).tap do |aggregated_metrics|
it 'all events has unique name' do
event_names = aggregated_metrics&.map { |event| event[:name] }
expect(event_names).to eq(event_names&.uniq)
end
aggregated_metrics&.each do |aggregate|
it 'all aggregated metrics has known source' do
expect(aggregated_metrics).to all has_known_source
end
aggregated_metrics&.select { |agg| agg[:source] == Gitlab::Usage::Metrics::Aggregates::REDIS_SOURCE }&.each do |aggregate|
context "for #{aggregate[:name]} aggregate of #{aggregate[:events].join(' ')}" do
let_it_be(:events_records) { known_events.select { |event| aggregate[:events].include?(event[:name]) } }
......
......@@ -372,97 +372,4 @@ RSpec.describe Gitlab::Utils::UsageData do
end
end
end
describe '#save_aggregated_metrics', :clean_gitlab_redis_shared_state do
let(:timestamp) { Time.current.to_i }
let(:time_period) { { created_at: 7.days.ago..Date.current } }
let(:metric_name) { 'test_metric' }
let(:method_params) do
{
metric_name: metric_name,
time_period: time_period,
recorded_at_timestamp: timestamp,
data: data
}
end
context 'with compatible data argument' do
let(:data) { ::Gitlab::Database::PostgresHll::Buckets.new(141 => 1, 56 => 1) }
it 'persists serialized data in Redis' do
time_period_name = 'weekly'
expect(described_class).to receive(:time_period_to_human_name).with(time_period).and_return(time_period_name)
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:set).with("#{metric_name}_#{time_period_name}-#{timestamp}", '{"141":1,"56":1}', ex: 80.hours)
end
described_class.save_aggregated_metrics(**method_params)
end
context 'error handling' do
before do
allow(Gitlab::Redis::SharedState).to receive(:with).and_raise(::Redis::CommandError)
end
it 'rescues and reraise ::Redis::CommandError for development and test environments' do
expect { described_class.save_aggregated_metrics(**method_params) }.to raise_error ::Redis::CommandError
end
context 'for environment different than development' do
before do
stub_rails_env('production')
end
it 'rescues ::Redis::CommandError' do
expect { described_class.save_aggregated_metrics(**method_params) }.not_to raise_error
end
end
end
end
context 'with incompatible data argument' do
let(:data) { 1 }
context 'for environment different than development' do
before do
stub_rails_env('production')
end
it 'does not persist data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).not_to receive(:set)
end
described_class.save_aggregated_metrics(**method_params)
end
end
it 'raises error for development environment' do
expect { described_class.save_aggregated_metrics(**method_params) }.to raise_error /Unsupported data type/
end
end
end
describe '#time_period_to_human_name' do
it 'translates empty time period as all_time' do
expect(described_class.time_period_to_human_name({})).to eql 'all_time'
end
it 'translates time period not longer than 7 days as weekly', :aggregate_failures do
days_6_time_period = 6.days.ago..Date.current
days_7_time_period = 7.days.ago..Date.current
expect(described_class.time_period_to_human_name(column_name: days_6_time_period)).to eql 'weekly'
expect(described_class.time_period_to_human_name(column_name: days_7_time_period)).to eql 'weekly'
end
it 'translates time period longer than 7 days as monthly', :aggregate_failures do
days_8_time_period = 8.days.ago..Date.current
days_31_time_period = 31.days.ago..Date.current
expect(described_class.time_period_to_human_name(column_name: days_8_time_period)).to eql 'monthly'
expect(described_class.time_period_to_human_name(column_name: days_31_time_period)).to eql 'monthly'
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment