Commit 9564bbb5 authored by Alper Akgun's avatar Alper Akgun

Merge branch...

Merge branch '299325-extend-aggregated-metrics-definition-to-include-database-metrics' into 'master'

Extend aggregated metrics definition to include database metrics [RUN ALL RSPEC] [RUN AS-IF-FOSS]

See merge request gitlab-org/gitlab!52784
parents 02acc3f6 5e319f52
---
name: database_sourced_aggregated_metrics
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/52784
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/300411
milestone: '13.9'
type: development
group: group::product intelligence
default_enabled: false
......@@ -451,10 +451,12 @@ module EE
pipelines_with_secure_jobs[metric_name.to_sym] =
if start_id && finish_id
estimate_batch_distinct_count(relation, :commit_id, batch_size: 1000, start: start_id, finish: finish_id) do |result|
save_aggregated_metrics(**aggregated_metrics_params.merge({ data: result }))
::Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll
.save_aggregated_metrics(**aggregated_metrics_params.merge({ data: result }))
end
else
save_aggregated_metrics(**aggregated_metrics_params.merge({ data: ::Gitlab::Database::PostgresHll::Buckets.new }))
::Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll
.save_aggregated_metrics(**aggregated_metrics_params.merge({ data: ::Gitlab::Database::PostgresHll::Buckets.new }))
0
end
end
......
......@@ -8,16 +8,26 @@ module Gitlab
INTERSECTION_OF_AGGREGATED_METRICS = 'AND'
ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS, INTERSECTION_OF_AGGREGATED_METRICS].freeze
AGGREGATED_METRICS_PATH = Rails.root.join('lib/gitlab/usage_data_counters/aggregated_metrics/*.yml')
UnknownAggregationOperator = Class.new(StandardError)
AggregatedMetricError = Class.new(StandardError)
UnknownAggregationOperator = Class.new(AggregatedMetricError)
UnknownAggregationSource = Class.new(AggregatedMetricError)
DATABASE_SOURCE = 'database'
REDIS_SOURCE = 'redis'
SOURCES = {
DATABASE_SOURCE => Sources::PostgresHll,
REDIS_SOURCE => Sources::RedisHll
}.freeze
class Aggregate
delegate :calculate_events_union,
:weekly_time_range,
delegate :weekly_time_range,
:monthly_time_range,
to: Gitlab::UsageDataCounters::HLLRedisCounter
def initialize
@aggregated_metrics = load_events(AGGREGATED_METRICS_PATH)
def initialize(recorded_at)
@aggregated_metrics = load_metrics(AGGREGATED_METRICS_PATH)
@recorded_at = recorded_at
end
def monthly_data
......@@ -30,35 +40,49 @@ module Gitlab
private
attr_accessor :aggregated_metrics
attr_accessor :aggregated_metrics, :recorded_at
def aggregated_metrics_data(start_date:, end_date:)
aggregated_metrics.each_with_object({}) do |aggregation, weekly_data|
aggregated_metrics.each_with_object({}) do |aggregation, data|
next if aggregation[:feature_flag] && Feature.disabled?(aggregation[:feature_flag], default_enabled: false, type: :development)
weekly_data[aggregation[:name]] = calculate_count_for_aggregation(aggregation, start_date: start_date, end_date: end_date)
case aggregation[:source]
when REDIS_SOURCE
data[aggregation[:name]] = calculate_count_for_aggregation(aggregation: aggregation, start_date: start_date, end_date: end_date)
when DATABASE_SOURCE
next unless Feature.enabled?('database_sourced_aggregated_metrics', default_enabled: false, type: :development)
data[aggregation[:name]] = calculate_count_for_aggregation(aggregation: aggregation, start_date: start_date, end_date: end_date)
else
Gitlab::ErrorTracking
.track_and_raise_for_dev_exception(UnknownAggregationSource.new("Aggregation source: '#{aggregation[:source]}' must be included in #{SOURCES.keys}"))
data[aggregation[:name]] = Gitlab::Utils::UsageData::FALLBACK
end
end
end
def calculate_count_for_aggregation(aggregation, start_date:, end_date:)
def calculate_count_for_aggregation(aggregation:, start_date:, end_date:)
source = SOURCES[aggregation[:source]]
case aggregation[:operator]
when UNION_OF_AGGREGATED_METRICS
calculate_events_union(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
source.calculate_metrics_union(metric_names: aggregation[:events], start_date: start_date, end_date: end_date, recorded_at: recorded_at)
when INTERSECTION_OF_AGGREGATED_METRICS
calculate_events_intersections(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
calculate_metrics_intersections(source: source, metric_names: aggregation[:events], start_date: start_date, end_date: end_date)
else
Gitlab::ErrorTracking
.track_and_raise_for_dev_exception(UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}"))
Gitlab::Utils::UsageData::FALLBACK
end
rescue Gitlab::UsageDataCounters::HLLRedisCounter::EventError => error
rescue Gitlab::UsageDataCounters::HLLRedisCounter::EventError, AggregatedMetricError => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
Gitlab::Utils::UsageData::FALLBACK
end
# calculate intersection of 'n' sets based on inclusion exclusion principle https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle
# this method will be extracted to dedicated module with https://gitlab.com/gitlab-org/gitlab/-/issues/273391
def calculate_events_intersections(event_names:, start_date:, end_date:, subset_powers_cache: Hash.new({}))
def calculate_metrics_intersections(source:, metric_names:, start_date:, end_date:, subset_powers_cache: Hash.new({}))
# calculate power of intersection of all given metrics from inclusion exclusion principle
# |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) =>
# |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
......@@ -66,12 +90,12 @@ module Gitlab
# |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
# calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ...
subset_powers_data = subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
subset_powers_data = subsets_intersection_powers(source, metric_names, start_date, end_date, subset_powers_cache)
# calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D|
power_of_union_of_all_events = begin
subset_powers_cache[event_names.size][event_names.join('_+_')] ||= \
calculate_events_union(event_names: event_names, start_date: start_date, end_date: end_date)
power_of_union_of_all_metrics = begin
subset_powers_cache[metric_names.size][metric_names.join('_+_')] ||= \
source.calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at)
end
# in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate,
......@@ -86,7 +110,7 @@ module Gitlab
sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even)
# add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D|
sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_events : -power_of_union_of_all_events)
sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_metrics : -power_of_union_of_all_metrics)
end
def sum_subset_powers(subset_powers_data, subset_powers_size_even)
......@@ -97,29 +121,29 @@ module Gitlab
(subset_powers_size_even ? -1 : 1) * sum_without_sign
end
def subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
subset_sizes = (1..(event_names.size - 1))
def subsets_intersection_powers(source, metric_names, start_date, end_date, subset_powers_cache)
subset_sizes = (1...metric_names.size)
subset_sizes.map do |subset_size|
if subset_size > 1
# calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|)
event_names.combination(subset_size).sum do |events_subset|
subset_powers_cache[subset_size][events_subset.join('_&_')] ||= \
calculate_events_intersections(event_names: events_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache)
metric_names.combination(subset_size).sum do |metrics_subset|
subset_powers_cache[subset_size][metrics_subset.join('_&_')] ||=
calculate_metrics_intersections(source: source, metric_names: metrics_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache)
end
else
# calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ...
event_names.sum do |event|
subset_powers_cache[subset_size][event] ||= \
calculate_events_union(event_names: event, start_date: start_date, end_date: end_date)
metric_names.sum do |metric|
subset_powers_cache[subset_size][metric] ||= \
source.calculate_metrics_union(metric_names: metric, start_date: start_date, end_date: end_date, recorded_at: recorded_at)
end
end
end
end
def load_events(wildcard)
Dir[wildcard].each_with_object([]) do |path, events|
events.push(*load_yaml_from_path(path))
def load_metrics(wildcard)
Dir[wildcard].each_with_object([]) do |path, metrics|
metrics.push(*load_yaml_from_path(path))
end
end
......
# frozen_string_literal: true
module Gitlab
module Usage
module Metrics
module Aggregates
module Sources
class PostgresHll
class << self
def calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at:)
time_period = start_date && end_date ? (start_date..end_date) : nil
Array(metric_names).each_with_object(Gitlab::Database::PostgresHll::Buckets.new) do |event, buckets|
json = read_aggregated_metric(metric_name: event, time_period: time_period, recorded_at: recorded_at)
raise UnionNotAvailable, "Union data not available for #{metric_names}" unless json
buckets.merge_hash!(Gitlab::Json.parse(json))
end.estimated_distinct_count
end
def save_aggregated_metrics(metric_name:, time_period:, recorded_at_timestamp:, data:)
unless data.is_a? ::Gitlab::Database::PostgresHll::Buckets
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(StandardError.new("Unsupported data type: #{data.class}"))
return
end
# Usage Ping report generation for gitlab.com is very long running process
# to make sure that saved keys are available at the end of report generation process
# lets use triple max generation time
keys_expiration = ::Gitlab::UsageData::MAX_GENERATION_TIME_FOR_SAAS * 3
Gitlab::Redis::SharedState.with do |redis|
redis.set(
redis_key(metric_name: metric_name, time_period: time_period&.values&.first, recorded_at: recorded_at_timestamp),
data.to_json,
ex: keys_expiration
)
end
rescue ::Redis::CommandError => e
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
end
private
def read_aggregated_metric(metric_name:, time_period:, recorded_at:)
Gitlab::Redis::SharedState.with do |redis|
redis.get(redis_key(metric_name: metric_name, time_period: time_period, recorded_at: recorded_at))
end
end
def redis_key(metric_name:, time_period:, recorded_at:)
# add timestamp at the end of the key to avoid stale keys if
# usage ping job is retried
"#{metric_name}_#{time_period_to_human_name(time_period)}-#{recorded_at.to_i}"
end
def time_period_to_human_name(time_period)
return Gitlab::Utils::UsageData::ALL_TIME_PERIOD_HUMAN_NAME if time_period.blank?
start_date = time_period.first.to_date
end_date = time_period.last.to_date
if (end_date - start_date).to_i > 7
Gitlab::Utils::UsageData::MONTHLY_PERIOD_HUMAN_NAME
else
Gitlab::Utils::UsageData::WEEKLY_PERIOD_HUMAN_NAME
end
end
end
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Usage
module Metrics
module Aggregates
module Sources
UnionNotAvailable = Class.new(AggregatedMetricError)
class RedisHll
def self.calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at: nil)
union = Gitlab::UsageDataCounters::HLLRedisCounter
.calculate_events_union(event_names: metric_names, start_date: start_date, end_date: end_date)
return union if union >= 0
raise UnionNotAvailable, "Union data not available for #{metric_names}"
end
end
end
end
end
end
end
......@@ -13,6 +13,7 @@
module Gitlab
class UsageData
DEPRECATED_VALUE = -1000
MAX_GENERATION_TIME_FOR_SAAS = 40.hours
CE_MEMOIZED_VALUES = %i(
issue_minimum_id
......@@ -754,7 +755,7 @@ module Gitlab
private
def aggregated_metrics
@aggregated_metrics ||= ::Gitlab::Usage::Metrics::Aggregates::Aggregate.new
@aggregated_metrics ||= ::Gitlab::Usage::Metrics::Aggregates::Aggregate.new(recorded_at)
end
def event_monthly_active_users(date_range)
......
......@@ -4,21 +4,28 @@
# - "AND": counts unique elements that were observed triggering all of following events
# events: list of events names to aggregate into metric. All events in this list must have the same 'redis_slot' and 'aggregation' attributes
# see from lib/gitlab/usage_data_counters/known_events/ for the list of valid events.
# source: defines which datasource will be used to locate events that should be included in aggregated metric. Valid values are:
# - database
# - redis
# feature_flag: name of development feature flag that will be checked before metrics aggregation is performed.
# Corresponding feature flag should have `default_enabled` attribute set to `false`.
# This attribute is OPTIONAL and can be omitted, when `feature_flag` is missing no feature flag will be checked.
---
- name: compliance_features_track_unique_visits_union
operator: OR
source: redis
events: ['g_compliance_audit_events', 'g_compliance_dashboard', 'i_compliance_audit_events', 'a_compliance_audit_events_api', 'i_compliance_credential_inventory']
- name: product_analytics_test_metrics_union
operator: OR
source: redis
events: ['i_search_total', 'i_search_advanced', 'i_search_paid']
- name: product_analytics_test_metrics_intersection
operator: AND
source: redis
events: ['i_search_total', 'i_search_advanced', 'i_search_paid']
- name: incident_management_alerts_total_unique_counts
operator: OR
source: redis
events: [
'incident_management_alert_status_changed',
'incident_management_alert_assigned',
......@@ -27,6 +34,7 @@
]
- name: incident_management_incidents_total_unique_counts
operator: OR
source: redis
events: [
'incident_management_incident_created',
'incident_management_incident_reopened',
......
......@@ -80,27 +80,6 @@ module Gitlab
DISTRIBUTED_HLL_FALLBACK
end
def save_aggregated_metrics(metric_name:, time_period:, recorded_at_timestamp:, data:)
unless data.is_a? ::Gitlab::Database::PostgresHll::Buckets
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(StandardError.new("Unsupported data type: #{data.class}"))
return
end
# the longest recorded usage ping generation time for gitlab.com
# was below 40 hours, there is added error margin of 20 h
usage_ping_generation_period = 80.hours
# add timestamp at the end of the key to avoid stale keys if
# usage ping job is retried
redis_key = "#{metric_name}_#{time_period_to_human_name(time_period)}-#{recorded_at_timestamp}"
Gitlab::Redis::SharedState.with do |redis|
redis.set(redis_key, data.to_json, ex: usage_ping_generation_period)
end
rescue ::Redis::CommandError => e
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
end
def sum(relation, column, batch_size: nil, start: nil, finish: nil)
Gitlab::Database::BatchCount.batch_sum(relation, column, batch_size: batch_size, start: start, finish: finish)
rescue ActiveRecord::StatementInvalid
......@@ -152,20 +131,6 @@ module Gitlab
Gitlab::UsageDataCounters::HLLRedisCounter.track_event(event_name.to_s, values: values)
end
def time_period_to_human_name(time_period)
return ALL_TIME_PERIOD_HUMAN_NAME if time_period.blank?
date_range = time_period.values[0]
start_date = date_range.first.to_date
end_date = date_range.last.to_date
if (end_date - start_date).to_i > 7
MONTHLY_PERIOD_HUMAN_NAME
else
WEEKLY_PERIOD_HUMAN_NAME
end
end
private
def prometheus_client(verify:)
......
......@@ -7,34 +7,14 @@ RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redi
let(:entity2) { '1dd9afb2-a3ee-4de1-8ae3-a405579c8584' }
let(:entity3) { '34rfjuuy-ce56-sa35-ds34-dfer567dfrf2' }
let(:entity4) { '8b9a2671-2abf-4bec-a682-22f6a8f7bf31' }
let(:end_date) { Date.current }
let(:sources) { Gitlab::Usage::Metrics::Aggregates::Sources }
around do |example|
# We need to freeze to a reference time
# because visits are grouped by the week number in the year
# Without freezing the time, the test may behave inconsistently
# depending on which day of the week test is run.
# Monday 6th of June
reference_time = Time.utc(2020, 6, 1)
travel_to(reference_time) { example.run }
end
let_it_be(:recorded_at) { Time.current.to_i }
context 'aggregated_metrics_data' do
let(:known_events) do
[
{ name: 'event1_slot', redis_slot: "slot", category: 'category1', aggregation: "weekly" },
{ name: 'event2_slot', redis_slot: "slot", category: 'category2', aggregation: "weekly" },
{ name: 'event3_slot', redis_slot: "slot", category: 'category3', aggregation: "weekly" },
{ name: 'event5_slot', redis_slot: "slot", category: 'category4', aggregation: "weekly" },
{ name: 'event4', category: 'category2', aggregation: "weekly" }
].map(&:with_indifferent_access)
end
before do
allow(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:known_events).and_return(known_events)
end
shared_examples 'aggregated_metrics_data' do
context 'no aggregated metrics is defined' do
context 'no aggregated metric is defined' do
it 'returns empty hash' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics).and_return([])
......@@ -51,23 +31,75 @@ RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redi
end
end
context 'with disabled database_sourced_aggregated_metrics feature flag' do
before do
stub_feature_flags(database_sourced_aggregated_metrics: false)
end
let(:aggregated_metrics) do
[
{ name: 'gmau_1', source: 'redis', events: %w[event3 event5], operator: "OR" },
{ name: 'gmau_2', source: 'database', events: %w[event1 event2 event3], operator: "OR" }
].map(&:with_indifferent_access)
end
it 'skips database sourced metrics', :aggregate_failures do
results = {
'gmau_1' => 5
}
params = { start_date: start_date, end_date: end_date, recorded_at: recorded_at }
expect(sources::RedisHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event3 event5])).and_return(5)
expect(sources::PostgresHll).not_to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event1 event2 event3]))
expect(aggregated_metrics_data).to eq(results)
end
end
context 'with AND operator' do
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event1_slot event2_slot], operator: "AND" },
{ name: 'gmau_2', events: %w[event1_slot event2_slot event3_slot], operator: "AND" },
{ name: 'gmau_3', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" },
{ name: 'gmau_4', events: %w[event4], operator: "AND" }
{ name: 'gmau_1', source: 'redis', events: %w[event3 event5], operator: "AND" },
{ name: 'gmau_2', source: 'database', events: %w[event1 event2 event3], operator: "AND" }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
it 'returns the number of unique events recorded for every metric in aggregate', :aggregate_failures do
results = {
'gmau_1' => 3,
'gmau_2' => 2,
'gmau_3' => 1,
'gmau_4' => 3
'gmau_1' => 2,
'gmau_2' => 1
}
params = { start_date: start_date, end_date: end_date, recorded_at: recorded_at }
# gmau_1 data is as follow
# |A| => 4
expect(sources::RedisHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event3')).and_return(4)
# |B| => 6
expect(sources::RedisHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event5')).and_return(6)
# |A + B| => 8
expect(sources::RedisHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event3 event5])).and_return(8)
# Exclusion inclusion principle formula to calculate intersection of 2 sets
# |A & B| = (|A| + |B|) - |A + B| => (4 + 6) - 8 => 2
# gmau_2 data is as follow:
# |A| => 2
expect(sources::PostgresHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event1')).and_return(2)
# |B| => 3
expect(sources::PostgresHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event2')).and_return(3)
# |C| => 5
expect(sources::PostgresHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: 'event3')).and_return(5)
# |A + B| => 4 therefore |A & B| = (|A| + |B|) - |A + B| => 2 + 3 - 4 => 1
expect(sources::PostgresHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event1 event2])).and_return(4)
# |A + C| => 6 therefore |A & C| = (|A| + |C|) - |A + C| => 2 + 5 - 6 => 1
expect(sources::PostgresHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event1 event3])).and_return(6)
# |B + C| => 7 therefore |B & C| = (|B| + |C|) - |B + C| => 3 + 5 - 7 => 1
expect(sources::PostgresHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event2 event3])).and_return(7)
# |A + B + C| => 8
expect(sources::PostgresHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event1 event2 event3])).and_return(8)
# Exclusion inclusion principle formula to calculate intersection of 3 sets
# |A & B & C| = (|A & B| + |A & C| + |B & C|) - (|A| + |B| + |C|) + |A + B + C|
# (1 + 1 + 1) - (2 + 3 + 5) + 8 => 1
expect(aggregated_metrics_data).to eq(results)
end
......@@ -76,19 +108,20 @@ RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redi
context 'with OR operator' do
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_2', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_3', events: %w[event4], operator: "OR" }
{ name: 'gmau_1', source: 'redis', events: %w[event3 event5], operator: "OR" },
{ name: 'gmau_2', source: 'database', events: %w[event1 event2 event3], operator: "OR" }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
it 'returns the number of unique events occurred for any metric in aggregate', :aggregate_failures do
results = {
'gmau_1' => 2,
'gmau_2' => 3,
'gmau_3' => 3
'gmau_1' => 5,
'gmau_2' => 3
}
params = { start_date: start_date, end_date: end_date, recorded_at: recorded_at }
expect(sources::RedisHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event3 event5])).and_return(5)
expect(sources::PostgresHll).to receive(:calculate_metrics_union).with(params.merge(metric_names: %w[event1 event2 event3])).and_return(3)
expect(aggregated_metrics_data).to eq(results)
end
end
......@@ -99,19 +132,20 @@ RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redi
let(:aggregated_metrics) do
[
# represents stable aggregated metrics that has been fully released
{ name: 'gmau_without_ff', events: %w[event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_without_ff', source: 'redis', events: %w[event3_slot event5_slot], operator: "OR" },
# represents new aggregated metric that is under performance testing on gitlab.com
{ name: 'gmau_enabled', events: %w[event4], operator: "AND", feature_flag: enabled_feature_flag },
{ name: 'gmau_enabled', source: 'redis', events: %w[event4], operator: "OR", feature_flag: enabled_feature_flag },
# represents aggregated metric that is under development and shouldn't be yet collected even on gitlab.com
{ name: 'gmau_disabled', events: %w[event4], operator: "AND", feature_flag: disabled_feature_flag }
{ name: 'gmau_disabled', source: 'redis', events: %w[event4], operator: "OR", feature_flag: disabled_feature_flag }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
it 'does not calculate data for aggregates with ff turned off' do
skip_feature_flags_yaml_validation
stub_feature_flags(enabled_feature_flag => true, disabled_feature_flag => false)
allow(sources::RedisHll).to receive(:calculate_metrics_union).and_return(6)
expect(aggregated_metrics_data).to eq('gmau_without_ff' => 2, 'gmau_enabled' => 3)
expect(aggregated_metrics_data).to eq('gmau_without_ff' => 6, 'gmau_enabled' => 6)
end
end
end
......@@ -121,19 +155,28 @@ RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redi
it 'raises error when unknown aggregation operator is used' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "SUM" }])
.and_return([{ name: 'gmau_1', source: 'redis', events: %w[event1_slot], operator: "SUM" }])
end
expect { aggregated_metrics_data }.to raise_error Gitlab::Usage::Metrics::Aggregates::UnknownAggregationOperator
end
it 'raises error when unknown aggregation source is used' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', source: 'whoami', events: %w[event1_slot], operator: "AND" }])
end
expect { aggregated_metrics_data }.to raise_error Gitlab::Usage::Metrics::Aggregates::UnknownAggregationSource
end
it 're raises Gitlab::UsageDataCounters::HLLRedisCounter::EventError' do
error = Gitlab::UsageDataCounters::HLLRedisCounter::EventError
allow(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union).and_raise(error)
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "OR" }])
.and_return([{ name: 'gmau_1', source: 'redis', events: %w[event1_slot], operator: "OR" }])
end
expect { aggregated_metrics_data }.to raise_error error
......@@ -148,7 +191,16 @@ RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redi
it 'rescues unknown aggregation operator error' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "SUM" }])
.and_return([{ name: 'gmau_1', source: 'redis', events: %w[event1_slot], operator: "SUM" }])
end
expect(aggregated_metrics_data).to eq('gmau_1' => -1)
end
it 'rescues unknown aggregation source error' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', source: 'whoami', events: %w[event1_slot], operator: "AND" }])
end
expect(aggregated_metrics_data).to eq('gmau_1' => -1)
......@@ -160,7 +212,7 @@ RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redi
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "OR" }])
.and_return([{ name: 'gmau_1', source: 'redis', events: %w[event1_slot], operator: "OR" }])
end
expect(aggregated_metrics_data).to eq('gmau_1' => -1)
......@@ -170,81 +222,47 @@ RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redi
end
describe '.aggregated_metrics_weekly_data' do
subject(:aggregated_metrics_data) { described_class.new.weekly_data }
before do
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity3, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity2, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity3, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity1, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity2, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event5_slot', values: entity2, time: 3.days.ago)
# events out of time scope
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity3, time: 8.days.ago)
# events in different slots
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity4, time: 2.days.ago)
end
subject(:aggregated_metrics_data) { described_class.new(recorded_at).weekly_data }
let(:start_date) { 7.days.ago.to_date }
it_behaves_like 'aggregated_metrics_data'
end
describe '.aggregated_metrics_monthly_data' do
subject(:aggregated_metrics_data) { described_class.new.monthly_data }
subject(:aggregated_metrics_data) { described_class.new(recorded_at).monthly_data }
it_behaves_like 'aggregated_metrics_data' do
before do
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity3, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity2, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity3, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity1, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity2, time: 10.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event5_slot', values: entity2, time: 4.weeks.ago.advance(days: 1))
# events out of time scope
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event5_slot', values: entity1, time: 4.weeks.ago.advance(days: -1))
# events in different slots
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity4, time: 2.days.ago)
end
end
let(:start_date) { 4.weeks.ago.to_date }
context 'Redis calls' do
it_behaves_like 'aggregated_metrics_data'
context 'metrics union calls' do
let(:aggregated_metrics) do
[
{ name: 'gmau_3', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" }
{ name: 'gmau_3', source: 'redis', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" }
].map(&:with_indifferent_access)
end
it 'caches intermediate operations' do
it 'caches intermediate operations', :aggregate_failures do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics).and_return(aggregated_metrics)
end
params = { start_date: start_date, end_date: end_date, recorded_at: recorded_at }
aggregated_metrics[0][:events].each do |event|
expect(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union)
.with(event_names: event, start_date: 4.weeks.ago.to_date, end_date: Date.current)
.once
.and_return(0)
expect(sources::RedisHll).to receive(:calculate_metrics_union)
.with(params.merge(metric_names: event))
.once
.and_return(0)
end
2.upto(4) do |subset_size|
aggregated_metrics[0][:events].combination(subset_size).each do |events|
expect(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union)
.with(event_names: events, start_date: 4.weeks.ago.to_date, end_date: Date.current)
.once
.and_return(0)
expect(sources::RedisHll).to receive(:calculate_metrics_union)
.with(params.merge(metric_names: events))
.once
.and_return(0)
end
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll, :clean_gitlab_redis_shared_state do
let_it_be(:start_date) { 7.days.ago }
let_it_be(:end_date) { Date.current }
let_it_be(:recorded_at) { Time.current }
let_it_be(:time_period) { { created_at: (start_date..end_date) } }
let(:metric_1) { 'metric_1' }
let(:metric_2) { 'metric_2' }
let(:metric_names) { [metric_1, metric_2] }
describe '.calculate_events_union' do
subject(:calculate_metrics_union) do
described_class.calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at)
end
before do
[
{
metric_name: metric_1,
time_period: time_period,
recorded_at_timestamp: recorded_at,
data: ::Gitlab::Database::PostgresHll::Buckets.new(141 => 1, 56 => 1)
},
{
metric_name: metric_2,
time_period: time_period,
recorded_at_timestamp: recorded_at,
data: ::Gitlab::Database::PostgresHll::Buckets.new(10 => 1, 56 => 1)
}
].each do |params|
described_class.save_aggregated_metrics(**params)
end
end
it 'returns the number of unique events in the union of all metrics' do
expect(calculate_metrics_union.round(2)).to eq(3.12)
end
context 'when there is no aggregated data saved' do
let(:metric_names) { [metric_1, 'i do not have any records'] }
it 'raises error when union data is missing' do
expect { calculate_metrics_union }.to raise_error Gitlab::Usage::Metrics::Aggregates::Sources::UnionNotAvailable
end
end
context 'when there is only one metric defined as aggregated' do
let(:metric_names) { [metric_1] }
it 'returns the number of unique events for that metric' do
expect(calculate_metrics_union.round(2)).to eq(2.08)
end
end
end
describe '.save_aggregated_metrics' do
subject(:save_aggregated_metrics) do
described_class.save_aggregated_metrics(metric_name: metric_1,
time_period: time_period,
recorded_at_timestamp: recorded_at,
data: data)
end
context 'with compatible data argument' do
let(:data) { ::Gitlab::Database::PostgresHll::Buckets.new(141 => 1, 56 => 1) }
it 'persists serialized data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:set).with("#{metric_1}_weekly-#{recorded_at.to_i}", '{"141":1,"56":1}', ex: 120.hours)
end
save_aggregated_metrics
end
context 'with monthly key' do
let_it_be(:start_date) { 4.weeks.ago }
let_it_be(:time_period) { { created_at: (start_date..end_date) } }
it 'persists serialized data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:set).with("#{metric_1}_monthly-#{recorded_at.to_i}", '{"141":1,"56":1}', ex: 120.hours)
end
save_aggregated_metrics
end
end
context 'with all_time key' do
let_it_be(:time_period) { nil }
it 'persists serialized data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:set).with("#{metric_1}_all_time-#{recorded_at.to_i}", '{"141":1,"56":1}', ex: 120.hours)
end
save_aggregated_metrics
end
end
context 'error handling' do
before do
allow(Gitlab::Redis::SharedState).to receive(:with).and_raise(::Redis::CommandError)
end
it 'rescues and reraise ::Redis::CommandError for development and test environments' do
expect { save_aggregated_metrics }.to raise_error ::Redis::CommandError
end
context 'for environment different than development' do
before do
stub_rails_env('production')
end
it 'rescues ::Redis::CommandError' do
expect { save_aggregated_metrics }.not_to raise_error
end
end
end
end
context 'with incompatible data argument' do
let(:data) { 1 }
context 'for environment different than development' do
before do
stub_rails_env('production')
end
it 'does not persist data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).not_to receive(:set)
end
save_aggregated_metrics
end
end
it 'raises error for development environment' do
expect { save_aggregated_metrics }.to raise_error /Unsupported data type/
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Usage::Metrics::Aggregates::Sources::RedisHll do
describe '.calculate_events_union' do
let(:event_names) { %w[event_a event_b] }
let(:start_date) { 7.days.ago }
let(:end_date) { Date.current }
subject(:calculate_metrics_union) do
described_class.calculate_metrics_union(metric_names: event_names, start_date: start_date, end_date: end_date, recorded_at: nil)
end
it 'calls Gitlab::UsageDataCounters::HLLRedisCounter.calculate_events_union' do
expect(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union)
.with(event_names: event_names, start_date: start_date, end_date: end_date)
.and_return(5)
calculate_metrics_union
end
it 'prevents from using fallback value as valid union result' do
allow(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union).and_return(-1)
expect { calculate_metrics_union }.to raise_error Gitlab::Usage::Metrics::Aggregates::Sources::UnionNotAvailable
end
end
end
......@@ -13,18 +13,32 @@ RSpec.describe 'aggregated metrics' do
end
end
RSpec::Matchers.define :has_known_source do
match do |aggregate|
Gitlab::Usage::Metrics::Aggregates::SOURCES.include?(aggregate[:source])
end
failure_message do |aggregate|
"Aggregate with name: `#{aggregate[:name]}` uses not allowed source `#{aggregate[:source]}`"
end
end
let_it_be(:known_events) do
Gitlab::UsageDataCounters::HLLRedisCounter.known_events
end
Gitlab::Usage::Metrics::Aggregates::Aggregate.new.send(:aggregated_metrics).tap do |aggregated_metrics|
Gitlab::Usage::Metrics::Aggregates::Aggregate.new(Time.current).send(:aggregated_metrics).tap do |aggregated_metrics|
it 'all events has unique name' do
event_names = aggregated_metrics&.map { |event| event[:name] }
expect(event_names).to eq(event_names&.uniq)
end
aggregated_metrics&.each do |aggregate|
it 'all aggregated metrics has known source' do
expect(aggregated_metrics).to all has_known_source
end
aggregated_metrics&.select { |agg| agg[:source] == Gitlab::Usage::Metrics::Aggregates::REDIS_SOURCE }&.each do |aggregate|
context "for #{aggregate[:name]} aggregate of #{aggregate[:events].join(' ')}" do
let_it_be(:events_records) { known_events.select { |event| aggregate[:events].include?(event[:name]) } }
......
......@@ -372,97 +372,4 @@ RSpec.describe Gitlab::Utils::UsageData do
end
end
end
describe '#save_aggregated_metrics', :clean_gitlab_redis_shared_state do
let(:timestamp) { Time.current.to_i }
let(:time_period) { { created_at: 7.days.ago..Date.current } }
let(:metric_name) { 'test_metric' }
let(:method_params) do
{
metric_name: metric_name,
time_period: time_period,
recorded_at_timestamp: timestamp,
data: data
}
end
context 'with compatible data argument' do
let(:data) { ::Gitlab::Database::PostgresHll::Buckets.new(141 => 1, 56 => 1) }
it 'persists serialized data in Redis' do
time_period_name = 'weekly'
expect(described_class).to receive(:time_period_to_human_name).with(time_period).and_return(time_period_name)
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:set).with("#{metric_name}_#{time_period_name}-#{timestamp}", '{"141":1,"56":1}', ex: 80.hours)
end
described_class.save_aggregated_metrics(**method_params)
end
context 'error handling' do
before do
allow(Gitlab::Redis::SharedState).to receive(:with).and_raise(::Redis::CommandError)
end
it 'rescues and reraise ::Redis::CommandError for development and test environments' do
expect { described_class.save_aggregated_metrics(**method_params) }.to raise_error ::Redis::CommandError
end
context 'for environment different than development' do
before do
stub_rails_env('production')
end
it 'rescues ::Redis::CommandError' do
expect { described_class.save_aggregated_metrics(**method_params) }.not_to raise_error
end
end
end
end
context 'with incompatible data argument' do
let(:data) { 1 }
context 'for environment different than development' do
before do
stub_rails_env('production')
end
it 'does not persist data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).not_to receive(:set)
end
described_class.save_aggregated_metrics(**method_params)
end
end
it 'raises error for development environment' do
expect { described_class.save_aggregated_metrics(**method_params) }.to raise_error /Unsupported data type/
end
end
end
describe '#time_period_to_human_name' do
it 'translates empty time period as all_time' do
expect(described_class.time_period_to_human_name({})).to eql 'all_time'
end
it 'translates time period not longer than 7 days as weekly', :aggregate_failures do
days_6_time_period = 6.days.ago..Date.current
days_7_time_period = 7.days.ago..Date.current
expect(described_class.time_period_to_human_name(column_name: days_6_time_period)).to eql 'weekly'
expect(described_class.time_period_to_human_name(column_name: days_7_time_period)).to eql 'weekly'
end
it 'translates time period longer than 7 days as monthly', :aggregate_failures do
days_8_time_period = 8.days.ago..Date.current
days_31_time_period = 31.days.ago..Date.current
expect(described_class.time_period_to_human_name(column_name: days_8_time_period)).to eql 'monthly'
expect(described_class.time_period_to_human_name(column_name: days_31_time_period)).to eql 'monthly'
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment