Commit 08ecfdc4 authored by Mikolaj Wawrzyniak's avatar Mikolaj Wawrzyniak

Extract Aggregaed Metrics out of RedisHll

In order to reuse Aggregated Metrics logic
with other Hll implementations, it has to be
extracted into separate module
parent 1efd4771
# frozen_string_literal: true
module Gitlab
module Usage
module Metrics
module Aggregates
UNION_OF_AGGREGATED_METRICS = 'OR'
INTERSECTION_OF_AGGREGATED_METRICS = 'AND'
ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS, INTERSECTION_OF_AGGREGATED_METRICS].freeze
AGGREGATED_METRICS_PATH = Rails.root.join('lib/gitlab/usage_data_counters/aggregated_metrics/*.yml')
UnknownAggregationOperator = Class.new(StandardError)
class Aggregate
delegate :calculate_events_union,
:weekly_time_range,
:monthly_time_range,
to: Gitlab::UsageDataCounters::HLLRedisCounter
def initialize
@aggregated_metrics = load_events(AGGREGATED_METRICS_PATH)
end
def monthly_data
aggregated_metrics_data(monthly_time_range)
end
def weekly_data
aggregated_metrics_data(weekly_time_range)
end
private
attr_accessor :aggregated_metrics
def aggregated_metrics_data(start_date:, end_date:)
aggregated_metrics.each_with_object({}) do |aggregation, weekly_data|
next if aggregation[:feature_flag] && Feature.disabled?(aggregation[:feature_flag], default_enabled: false, type: :development)
weekly_data[aggregation[:name]] = calculate_count_for_aggregation(aggregation, start_date: start_date, end_date: end_date)
end
end
def calculate_count_for_aggregation(aggregation, start_date:, end_date:)
case aggregation[:operator]
when UNION_OF_AGGREGATED_METRICS
calculate_events_union(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
when INTERSECTION_OF_AGGREGATED_METRICS
calculate_events_intersections(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
else
Gitlab::ErrorTracking
.track_and_raise_for_dev_exception(UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}"))
Gitlab::Utils::UsageData::FALLBACK
end
rescue Gitlab::UsageDataCounters::HLLRedisCounter::EventError => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
Gitlab::Utils::UsageData::FALLBACK
end
# calculate intersection of 'n' sets based on inclusion exclusion principle https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle
# this method will be extracted to dedicated module with https://gitlab.com/gitlab-org/gitlab/-/issues/273391
def calculate_events_intersections(event_names:, start_date:, end_date:, subset_powers_cache: Hash.new({}))
# calculate power of intersection of all given metrics from inclusion exclusion principle
# |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) =>
# |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
# |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
# |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
# calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ...
subset_powers_data = subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
# calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D|
power_of_union_of_all_events = begin
subset_powers_cache[event_names.size][event_names.join('_+_')] ||= \
calculate_events_union(event_names: event_names, start_date: start_date, end_date: end_date)
end
# in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate,
# is positive or negative in particular equation we need to determine if number of subsets is even or odd. Please take a look at two examples below
# |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + |A & B & C| =>
# |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
# |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
# |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
subset_powers_size_even = subset_powers_data.size.even?
# sum all components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... =>
sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even)
# add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D|
sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_events : -power_of_union_of_all_events)
end
def sum_subset_powers(subset_powers_data, subset_powers_size_even)
sum_without_sign = subset_powers_data.to_enum.with_index.sum do |value, index|
(index + 1).odd? ? value : -value
end
(subset_powers_size_even ? -1 : 1) * sum_without_sign
end
def subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
subset_sizes = (1..(event_names.size - 1))
subset_sizes.map do |subset_size|
if subset_size > 1
# calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|)
event_names.combination(subset_size).sum do |events_subset|
subset_powers_cache[subset_size][events_subset.join('_&_')] ||= \
calculate_events_intersections(event_names: events_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache)
end
else
# calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ...
event_names.sum do |event|
subset_powers_cache[subset_size][event] ||= \
calculate_events_union(event_names: event, start_date: start_date, end_date: end_date)
end
end
end
end
def load_events(wildcard)
Dir[wildcard].each_with_object([]) do |path, events|
events.push(*load_yaml_from_path(path))
end
end
def load_yaml_from_path(path)
YAML.safe_load(File.read(path))&.map(&:with_indifferent_access)
end
end
end
end
end
end
......@@ -23,6 +23,7 @@ module Gitlab
deployment_minimum_id
deployment_maximum_id
auth_providers
aggregated_metrics
recorded_at
).freeze
......@@ -691,13 +692,13 @@ module Gitlab
def aggregated_metrics_monthly
{
aggregated_metrics: ::Gitlab::UsageDataCounters::HLLRedisCounter.aggregated_metrics_monthly_data
aggregated_metrics: aggregated_metrics.monthly_data
}
end
def aggregated_metrics_weekly
{
aggregated_metrics: ::Gitlab::UsageDataCounters::HLLRedisCounter.aggregated_metrics_weekly_data
aggregated_metrics: aggregated_metrics.weekly_data
}
end
......@@ -742,6 +743,10 @@ module Gitlab
private
def aggregated_metrics
@aggregated_metrics ||= ::Gitlab::Usage::Metrics::Aggregates::Aggregate.new
end
def event_monthly_active_users(date_range)
data = {
action_monthly_active_users_project_repo: Gitlab::UsageDataCounters::TrackUniqueEvents::PUSH_ACTION,
......
......@@ -13,15 +13,10 @@ module Gitlab
AggregationMismatch = Class.new(EventError)
SlotMismatch = Class.new(EventError)
CategoryMismatch = Class.new(EventError)
UnknownAggregationOperator = Class.new(EventError)
InvalidContext = Class.new(EventError)
KNOWN_EVENTS_PATH = File.expand_path('known_events/*.yml', __dir__)
ALLOWED_AGGREGATIONS = %i(daily weekly).freeze
UNION_OF_AGGREGATED_METRICS = 'OR'
INTERSECTION_OF_AGGREGATED_METRICS = 'AND'
ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS, INTERSECTION_OF_AGGREGATED_METRICS].freeze
AGGREGATED_METRICS_PATH = File.expand_path('aggregated_metrics/*.yml', __dir__)
# Track event on entity_id
# Increment a Redis HLL counter for unique event_name and entity_id
......@@ -90,37 +85,40 @@ module Gitlab
events_names = events_for_category(category)
event_results = events_names.each_with_object({}) do |event, hash|
hash["#{event}_weekly"] = unique_events(event_names: [event], start_date: 7.days.ago.to_date, end_date: Date.current)
hash["#{event}_monthly"] = unique_events(event_names: [event], start_date: 4.weeks.ago.to_date, end_date: Date.current)
hash["#{event}_weekly"] = unique_events(**weekly_time_range.merge(event_names: [event]))
hash["#{event}_monthly"] = unique_events(**monthly_time_range.merge(event_names: [event]))
end
if eligible_for_totals?(events_names)
event_results["#{category}_total_unique_counts_weekly"] = unique_events(event_names: events_names, start_date: 7.days.ago.to_date, end_date: Date.current)
event_results["#{category}_total_unique_counts_monthly"] = unique_events(event_names: events_names, start_date: 4.weeks.ago.to_date, end_date: Date.current)
event_results["#{category}_total_unique_counts_weekly"] = unique_events(**weekly_time_range.merge(event_names: events_names))
event_results["#{category}_total_unique_counts_monthly"] = unique_events(**monthly_time_range.merge(event_names: events_names))
end
category_results["#{category}"] = event_results
end
end
def known_event?(event_name)
event_for(event_name).present?
def weekly_time_range
{ start_date: 7.days.ago.to_date, end_date: Date.current }
end
def aggregated_metrics_monthly_data
aggregated_metrics_data(4.weeks.ago.to_date)
def monthly_time_range
{ start_date: 4.weeks.ago.to_date, end_date: Date.current }
end
def aggregated_metrics_weekly_data
aggregated_metrics_data(7.days.ago.to_date)
def known_event?(event_name)
event_for(event_name).present?
end
def known_events
@known_events ||= load_events(KNOWN_EVENTS_PATH)
end
def aggregated_metrics
@aggregated_metrics ||= load_events(AGGREGATED_METRICS_PATH)
def calculate_events_union(event_names:, start_date:, end_date:)
count_unique_events(event_names: event_names, start_date: start_date, end_date: end_date) do |events|
raise SlotMismatch, events unless events_in_same_slot?(events)
raise AggregationMismatch, events unless events_same_aggregation?(events)
end
end
private
......@@ -139,93 +137,6 @@ module Gitlab
Plan.all_plans
end
def aggregated_metrics_data(start_date)
aggregated_metrics.each_with_object({}) do |aggregation, weekly_data|
next if aggregation[:feature_flag] && Feature.disabled?(aggregation[:feature_flag], default_enabled: false, type: :development)
weekly_data[aggregation[:name]] = calculate_count_for_aggregation(aggregation, start_date: start_date, end_date: Date.current)
end
end
def calculate_count_for_aggregation(aggregation, start_date:, end_date:)
case aggregation[:operator]
when UNION_OF_AGGREGATED_METRICS
calculate_events_union(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
when INTERSECTION_OF_AGGREGATED_METRICS
calculate_events_intersections(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
else
raise UnknownAggregationOperator, "Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}"
end
end
# calculate intersection of 'n' sets based on inclusion exclusion principle https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle
# this method will be extracted to dedicated module with https://gitlab.com/gitlab-org/gitlab/-/issues/273391
def calculate_events_intersections(event_names:, start_date:, end_date:, subset_powers_cache: Hash.new({}))
# calculate power of intersection of all given metrics from inclusion exclusion principle
# |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) =>
# |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
# |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
# |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
# calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ...
subset_powers_data = subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
# calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D|
power_of_union_of_all_events = begin
subset_powers_cache[event_names.size][event_names.join('_+_')] ||= \
calculate_events_union(event_names: event_names, start_date: start_date, end_date: end_date)
end
# in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate,
# is positive or negative in particular equation we need to determine if number of subsets is even or odd. Please take a look at two examples below
# |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + |A & B & C| =>
# |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
# |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
# |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
subset_powers_size_even = subset_powers_data.size.even?
# sum all components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... =>
sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even)
# add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D|
sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_events : -power_of_union_of_all_events)
end
def sum_subset_powers(subset_powers_data, subset_powers_size_even)
sum_without_sign = subset_powers_data.to_enum.with_index.sum do |value, index|
(index + 1).odd? ? value : -value
end
(subset_powers_size_even ? -1 : 1) * sum_without_sign
end
def subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
subset_sizes = (1..(event_names.size - 1))
subset_sizes.map do |subset_size|
if subset_size > 1
# calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|)
event_names.combination(subset_size).sum do |events_subset|
subset_powers_cache[subset_size][events_subset.join('_&_')] ||= \
calculate_events_intersections(event_names: events_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache)
end
else
# calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ...
event_names.sum do |event|
subset_powers_cache[subset_size][event] ||= \
unique_events(event_names: event, start_date: start_date, end_date: end_date)
end
end
end
end
def calculate_events_union(event_names:, start_date:, end_date:)
count_unique_events(event_names: event_names, start_date: start_date, end_date: end_date) do |events|
raise SlotMismatch, events unless events_in_same_slot?(events)
raise AggregationMismatch, events unless events_same_aggregation?(events)
end
end
def count_unique_events(event_names:, start_date:, end_date:, context: '')
events = events_for(Array(event_names).map(&:to_s))
......@@ -340,12 +251,6 @@ module Gitlab
end.flatten
end
def validate_aggregation_operator!(operator)
return true if ALLOWED_METRICS_AGGREGATIONS.include?(operator)
raise UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}")
end
def weekly_redis_keys(events:, start_date:, end_date:, context: '')
end_date = end_date.end_of_week - 1.week
(start_date.to_date..end_date.to_date).map do |date|
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redis_shared_state do
let(:entity1) { 'dfb9d2d2-f56c-4c77-8aeb-6cddc4a1f857' }
let(:entity2) { '1dd9afb2-a3ee-4de1-8ae3-a405579c8584' }
let(:entity3) { '34rfjuuy-ce56-sa35-ds34-dfer567dfrf2' }
let(:entity4) { '8b9a2671-2abf-4bec-a682-22f6a8f7bf31' }
around do |example|
# We need to freeze to a reference time
# because visits are grouped by the week number in the year
# Without freezing the time, the test may behave inconsistently
# depending on which day of the week test is run.
# Monday 6th of June
reference_time = Time.utc(2020, 6, 1)
travel_to(reference_time) { example.run }
end
context 'aggregated_metrics_data' do
let(:known_events) do
[
{ name: 'event1_slot', redis_slot: "slot", category: 'category1', aggregation: "weekly" },
{ name: 'event2_slot', redis_slot: "slot", category: 'category2', aggregation: "weekly" },
{ name: 'event3_slot', redis_slot: "slot", category: 'category3', aggregation: "weekly" },
{ name: 'event5_slot', redis_slot: "slot", category: 'category4', aggregation: "weekly" },
{ name: 'event4', category: 'category2', aggregation: "weekly" }
].map(&:with_indifferent_access)
end
before do
allow(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:known_events).and_return(known_events)
end
shared_examples 'aggregated_metrics_data' do
context 'no aggregated metrics is defined' do
it 'returns empty hash' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics).and_return([])
end
expect(aggregated_metrics_data).to eq({})
end
end
context 'there are aggregated metrics defined' do
before do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics).and_return(aggregated_metrics)
end
end
context 'with AND operator' do
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event1_slot event2_slot], operator: "AND" },
{ name: 'gmau_2', events: %w[event1_slot event2_slot event3_slot], operator: "AND" },
{ name: 'gmau_3', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" },
{ name: 'gmau_4', events: %w[event4], operator: "AND" }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
results = {
'gmau_1' => 3,
'gmau_2' => 2,
'gmau_3' => 1,
'gmau_4' => 3
}
expect(aggregated_metrics_data).to eq(results)
end
end
context 'with OR operator' do
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_2', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_3', events: %w[event4], operator: "OR" }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
results = {
'gmau_1' => 2,
'gmau_2' => 3,
'gmau_3' => 3
}
expect(aggregated_metrics_data).to eq(results)
end
end
context 'hidden behind feature flag' do
let(:enabled_feature_flag) { 'test_ff_enabled' }
let(:disabled_feature_flag) { 'test_ff_disabled' }
let(:aggregated_metrics) do
[
# represents stable aggregated metrics that has been fully released
{ name: 'gmau_without_ff', events: %w[event3_slot event5_slot], operator: "OR" },
# represents new aggregated metric that is under performance testing on gitlab.com
{ name: 'gmau_enabled', events: %w[event4], operator: "AND", feature_flag: enabled_feature_flag },
# represents aggregated metric that is under development and shouldn't be yet collected even on gitlab.com
{ name: 'gmau_disabled', events: %w[event4], operator: "AND", feature_flag: disabled_feature_flag }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
skip_feature_flags_yaml_validation
stub_feature_flags(enabled_feature_flag => true, disabled_feature_flag => false)
expect(aggregated_metrics_data).to eq('gmau_without_ff' => 2, 'gmau_enabled' => 3)
end
end
end
context 'error handling' do
context 'development and test environment' do
it 'raises error when unknown aggregation operator is used' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "SUM" }])
end
expect { aggregated_metrics_data }.to raise_error Gitlab::Usage::Metrics::Aggregates::UnknownAggregationOperator
end
it 're raises Gitlab::UsageDataCounters::HLLRedisCounter::EventError' do
error = Gitlab::UsageDataCounters::HLLRedisCounter::EventError
allow(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union).and_raise(error)
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "OR" }])
end
expect { aggregated_metrics_data }.to raise_error error
end
end
context 'production' do
before do
stub_rails_env('production')
end
it 'rescues unknown aggregation operator error' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "SUM" }])
end
expect(aggregated_metrics_data).to eq('gmau_1' => -1)
end
it 'rescues Gitlab::UsageDataCounters::HLLRedisCounter::EventError' do
error = Gitlab::UsageDataCounters::HLLRedisCounter::EventError
allow(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union).and_raise(error)
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "OR" }])
end
expect(aggregated_metrics_data).to eq('gmau_1' => -1)
end
end
end
end
describe '.aggregated_metrics_weekly_data' do
subject(:aggregated_metrics_data) { described_class.new.weekly_data }
before do
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity3, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity2, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity3, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity1, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity2, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event5_slot', values: entity2, time: 3.days.ago)
# events out of time scope
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity3, time: 8.days.ago)
# events in different slots
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity4, time: 2.days.ago)
end
it_behaves_like 'aggregated_metrics_data'
end
describe '.aggregated_metrics_monthly_data' do
subject(:aggregated_metrics_data) { described_class.new.monthly_data }
it_behaves_like 'aggregated_metrics_data' do
before do
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity3, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity2, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity3, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity1, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity2, time: 10.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event5_slot', values: entity2, time: 4.weeks.ago.advance(days: 1))
# events out of time scope
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event5_slot', values: entity1, time: 4.weeks.ago.advance(days: -1))
# events in different slots
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity4, time: 2.days.ago)
end
end
context 'Redis calls' do
let(:aggregated_metrics) do
[
{ name: 'gmau_3', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" }
].map(&:with_indifferent_access)
end
it 'caches intermediate operations' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics).and_return(aggregated_metrics)
end
aggregated_metrics[0][:events].each do |event|
expect(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union)
.with(event_names: event, start_date: 4.weeks.ago.to_date, end_date: Date.current)
.once
.and_return(0)
end
2.upto(4) do |subset_size|
aggregated_metrics[0][:events].combination(subset_size).each do |events|
expect(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union)
.with(event_names: events, start_date: 4.weeks.ago.to_date, end_date: Date.current)
.once
.and_return(0)
end
end
aggregated_metrics_data
end
end
end
end
end
......@@ -17,7 +17,7 @@ RSpec.describe 'aggregated metrics' do
Gitlab::UsageDataCounters::HLLRedisCounter.known_events
end
Gitlab::UsageDataCounters::HLLRedisCounter.aggregated_metrics.tap do |aggregated_metrics|
Gitlab::Usage::Metrics::Aggregates::Aggregate.new.send(:aggregated_metrics).tap do |aggregated_metrics|
it 'all events has unique name' do
event_names = aggregated_metrics&.map { |event| event[:name] }
......@@ -37,7 +37,7 @@ RSpec.describe 'aggregated metrics' do
end
it "uses allowed aggregation operators" do
expect(Gitlab::UsageDataCounters::HLLRedisCounter::ALLOWED_METRICS_AGGREGATIONS).to include aggregate[:operator]
expect(Gitlab::Usage::Metrics::Aggregates::ALLOWED_METRICS_AGGREGATIONS).to include aggregate[:operator]
end
it "uses events from the same Redis slot" do
......
......@@ -426,182 +426,59 @@ RSpec.describe Gitlab::UsageDataCounters::HLLRedisCounter, :clean_gitlab_redis_s
end
end
context 'aggregated_metrics_data' do
describe '.calculate_events_union' do
let(:time_range) { { start_date: 7.days.ago, end_date: DateTime.current } }
let(:known_events) do
[
{ name: 'event1_slot', redis_slot: "slot", category: 'category1', aggregation: "weekly" },
{ name: 'event2_slot', redis_slot: "slot", category: 'category2', aggregation: "weekly" },
{ name: 'event3_slot', redis_slot: "slot", category: 'category3', aggregation: "weekly" },
{ name: 'event5_slot', redis_slot: "slot", category: 'category4', aggregation: "weekly" },
{ name: 'event5_slot', redis_slot: "slot", category: 'category4', aggregation: "daily" },
{ name: 'event4', category: 'category2', aggregation: "weekly" }
].map(&:with_indifferent_access)
end
before do
allow(described_class).to receive(:known_events).and_return(known_events)
end
shared_examples 'aggregated_metrics_data' do
context 'no aggregated metrics is defined' do
it 'returns empty hash' do
allow(described_class).to receive(:aggregated_metrics).and_return([])
expect(aggregated_metrics_data).to eq({})
end
end
context 'there are aggregated metrics defined' do
before do
allow(described_class).to receive(:aggregated_metrics).and_return(aggregated_metrics)
end
context 'with AND operator' do
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event1_slot event2_slot], operator: "AND" },
{ name: 'gmau_2', events: %w[event1_slot event2_slot event3_slot], operator: "AND" },
{ name: 'gmau_3', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" },
{ name: 'gmau_4', events: %w[event4], operator: "AND" }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
results = {
'gmau_1' => 3,
'gmau_2' => 2,
'gmau_3' => 1,
'gmau_4' => 3
}
expect(aggregated_metrics_data).to eq(results)
end
end
context 'with OR operator' do
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_2', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_3', events: %w[event4], operator: "OR" }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
results = {
'gmau_1' => 2,
'gmau_2' => 3,
'gmau_3' => 3
}
expect(aggregated_metrics_data).to eq(results)
end
end
context 'hidden behind feature flag' do
let(:enabled_feature_flag) { 'test_ff_enabled' }
let(:disabled_feature_flag) { 'test_ff_disabled' }
let(:aggregated_metrics) do
[
# represents stable aggregated metrics that has been fully released
{ name: 'gmau_without_ff', events: %w[event3_slot event5_slot], operator: "OR" },
# represents new aggregated metric that is under performance testing on gitlab.com
{ name: 'gmau_enabled', events: %w[event4], operator: "AND", feature_flag: enabled_feature_flag },
# represents aggregated metric that is under development and shouldn't be yet collected even on gitlab.com
{ name: 'gmau_disabled', events: %w[event4], operator: "AND", feature_flag: disabled_feature_flag }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
skip_feature_flags_yaml_validation
stub_feature_flags(enabled_feature_flag => true, disabled_feature_flag => false)
described_class.track_event('event1_slot', values: entity1, time: 2.days.ago)
described_class.track_event('event1_slot', values: entity2, time: 2.days.ago)
described_class.track_event('event1_slot', values: entity3, time: 2.days.ago)
described_class.track_event('event2_slot', values: entity1, time: 2.days.ago)
described_class.track_event('event2_slot', values: entity2, time: 3.days.ago)
described_class.track_event('event2_slot', values: entity3, time: 3.days.ago)
described_class.track_event('event3_slot', values: entity1, time: 3.days.ago)
described_class.track_event('event3_slot', values: entity2, time: 3.days.ago)
described_class.track_event('event5_slot', values: entity2, time: 3.days.ago)
# events out of time scope
described_class.track_event('event2_slot', values: entity4, time: 8.days.ago)
expect(aggregated_metrics_data).to eq('gmau_without_ff' => 2, 'gmau_enabled' => 3)
end
end
end
# events in different slots
described_class.track_event('event4', values: entity1, time: 2.days.ago)
described_class.track_event('event4', values: entity2, time: 2.days.ago)
end
describe '.aggregated_metrics_weekly_data' do
subject(:aggregated_metrics_data) { described_class.aggregated_metrics_weekly_data }
before do
described_class.track_event('event1_slot', values: entity1, time: 2.days.ago)
described_class.track_event('event1_slot', values: entity2, time: 2.days.ago)
described_class.track_event('event1_slot', values: entity3, time: 2.days.ago)
described_class.track_event('event2_slot', values: entity1, time: 2.days.ago)
described_class.track_event('event2_slot', values: entity2, time: 3.days.ago)
described_class.track_event('event2_slot', values: entity3, time: 3.days.ago)
described_class.track_event('event3_slot', values: entity1, time: 3.days.ago)
described_class.track_event('event3_slot', values: entity2, time: 3.days.ago)
described_class.track_event('event5_slot', values: entity2, time: 3.days.ago)
# events out of time scope
described_class.track_event('event2_slot', values: entity3, time: 8.days.ago)
# events in different slots
described_class.track_event('event4', values: entity1, time: 2.days.ago)
described_class.track_event('event4', values: entity2, time: 2.days.ago)
described_class.track_event('event4', values: entity4, time: 2.days.ago)
end
it_behaves_like 'aggregated_metrics_data'
it 'calculates union of given events', :aggregate_failure do
expect(described_class.calculate_events_union(**time_range.merge(event_names: %w[event4]))).to eq 2
expect(described_class.calculate_events_union(**time_range.merge(event_names: %w[event1_slot event2_slot event3_slot]))).to eq 3
end
describe '.aggregated_metrics_monthly_data' do
subject(:aggregated_metrics_data) { described_class.aggregated_metrics_monthly_data }
it_behaves_like 'aggregated_metrics_data' do
before do
described_class.track_event('event1_slot', values: entity1, time: 2.days.ago)
described_class.track_event('event1_slot', values: entity2, time: 2.days.ago)
described_class.track_event('event1_slot', values: entity3, time: 2.days.ago)
described_class.track_event('event2_slot', values: entity1, time: 2.days.ago)
described_class.track_event('event2_slot', values: entity2, time: 3.days.ago)
described_class.track_event('event2_slot', values: entity3, time: 3.days.ago)
described_class.track_event('event3_slot', values: entity1, time: 3.days.ago)
described_class.track_event('event3_slot', values: entity2, time: 10.days.ago)
described_class.track_event('event5_slot', values: entity2, time: 4.weeks.ago.advance(days: 1))
# events out of time scope
described_class.track_event('event5_slot', values: entity1, time: 4.weeks.ago.advance(days: -1))
# events in different slots
described_class.track_event('event4', values: entity1, time: 2.days.ago)
described_class.track_event('event4', values: entity2, time: 2.days.ago)
described_class.track_event('event4', values: entity4, time: 2.days.ago)
end
end
context 'Redis calls' do
let(:aggregated_metrics) do
[
{ name: 'gmau_3', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" }
].map(&:with_indifferent_access)
end
let(:known_events) do
[
{ name: 'event1_slot', redis_slot: "slot", category: 'category1', aggregation: "weekly" },
{ name: 'event2_slot', redis_slot: "slot", category: 'category2', aggregation: "weekly" },
{ name: 'event3_slot', redis_slot: "slot", category: 'category3', aggregation: "weekly" },
{ name: 'event5_slot', redis_slot: "slot", category: 'category4', aggregation: "weekly" }
].map(&:with_indifferent_access)
end
it 'caches intermediate operations' do
allow(described_class).to receive(:known_events).and_return(known_events)
allow(described_class).to receive(:aggregated_metrics).and_return(aggregated_metrics)
it 'validates and raise exception if events has mismatched slot or aggregation', :aggregate_failure do
expect { described_class.calculate_events_union(**time_range.merge(event_names: %w[event1_slot event4])) }.to raise_error described_class::SlotMismatch
expect { described_class.calculate_events_union(**time_range.merge(event_names: %w[event5_slot event3_slot])) }.to raise_error described_class::AggregationMismatch
end
end
4.downto(1) do |subset_size|
known_events.combination(subset_size).each do |events|
keys = described_class.send(:weekly_redis_keys, events: events, start_date: 4.weeks.ago.to_date, end_date: Date.current)
expect(Gitlab::Redis::HLL).to receive(:count).with(keys: keys).once.and_return(0)
end
end
describe '.weekly_time_range' do
it 'return hash with weekly time range boundaries' do
expect(described_class.weekly_time_range).to eq(start_date: 7.days.ago.to_date, end_date: Date.current)
end
end
subject
end
end
describe '.monthly_time_range' do
it 'return hash with monthly time range boundaries' do
expect(described_class.monthly_time_range).to eq(start_date: 4.weeks.ago.to_date, end_date: Date.current)
end
end
end
......@@ -1286,8 +1286,10 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do
describe '.aggregated_metrics_weekly' do
subject(:aggregated_metrics_payload) { described_class.aggregated_metrics_weekly }
it 'uses ::Gitlab::UsageDataCounters::HLLRedisCounter#aggregated_metrics_data', :aggregate_failures do
expect(::Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:aggregated_metrics_weekly_data).and_return(global_search_gmau: 123)
it 'uses ::Gitlab::Usage::Metrics::Aggregates::Aggregate#weekly_data', :aggregate_failures do
expect_next_instance_of(::Gitlab::Usage::Metrics::Aggregates::Aggregate) do |instance|
expect(instance).to receive(:weekly_data).and_return(global_search_gmau: 123)
end
expect(aggregated_metrics_payload).to eq(aggregated_metrics: { global_search_gmau: 123 })
end
end
......@@ -1295,8 +1297,10 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do
describe '.aggregated_metrics_monthly' do
subject(:aggregated_metrics_payload) { described_class.aggregated_metrics_monthly }
it 'uses ::Gitlab::UsageDataCounters::HLLRedisCounter#aggregated_metrics_data', :aggregate_failures do
expect(::Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:aggregated_metrics_monthly_data).and_return(global_search_gmau: 123)
it 'uses ::Gitlab::Usage::Metrics::Aggregates::Aggregate#monthly_data', :aggregate_failures do
expect_next_instance_of(::Gitlab::Usage::Metrics::Aggregates::Aggregate) do |instance|
expect(instance).to receive(:monthly_data).and_return(global_search_gmau: 123)
end
expect(aggregated_metrics_payload).to eq(aggregated_metrics: { global_search_gmau: 123 })
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment