Commit 44c184e9 authored by Heinrich Lee Yu's avatar Heinrich Lee Yu

Merge branch...

Merge branch 'mwaw/273391-extract-product-analytics-metrics-aggregation-out-of-hllrediscounter-module' into 'master'

Extract Product Analytics metrics aggregation out of HLLRedisCounter module

See merge request gitlab-org/gitlab!52334
parents 0224e857 5ce4b4d2
# frozen_string_literal: true
module Gitlab
module Usage
module Metrics
module Aggregates
UNION_OF_AGGREGATED_METRICS = 'OR'
INTERSECTION_OF_AGGREGATED_METRICS = 'AND'
ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS, INTERSECTION_OF_AGGREGATED_METRICS].freeze
AGGREGATED_METRICS_PATH = Rails.root.join('lib/gitlab/usage_data_counters/aggregated_metrics/*.yml')
UnknownAggregationOperator = Class.new(StandardError)
class Aggregate
delegate :calculate_events_union,
:weekly_time_range,
:monthly_time_range,
to: Gitlab::UsageDataCounters::HLLRedisCounter
def initialize
@aggregated_metrics = load_events(AGGREGATED_METRICS_PATH)
end
def monthly_data
aggregated_metrics_data(**monthly_time_range)
end
def weekly_data
aggregated_metrics_data(**weekly_time_range)
end
private
attr_accessor :aggregated_metrics
def aggregated_metrics_data(start_date:, end_date:)
aggregated_metrics.each_with_object({}) do |aggregation, weekly_data|
next if aggregation[:feature_flag] && Feature.disabled?(aggregation[:feature_flag], default_enabled: false, type: :development)
weekly_data[aggregation[:name]] = calculate_count_for_aggregation(aggregation, start_date: start_date, end_date: end_date)
end
end
def calculate_count_for_aggregation(aggregation, start_date:, end_date:)
case aggregation[:operator]
when UNION_OF_AGGREGATED_METRICS
calculate_events_union(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
when INTERSECTION_OF_AGGREGATED_METRICS
calculate_events_intersections(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
else
Gitlab::ErrorTracking
.track_and_raise_for_dev_exception(UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}"))
Gitlab::Utils::UsageData::FALLBACK
end
rescue Gitlab::UsageDataCounters::HLLRedisCounter::EventError => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
Gitlab::Utils::UsageData::FALLBACK
end
# calculate intersection of 'n' sets based on inclusion exclusion principle https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle
# this method will be extracted to dedicated module with https://gitlab.com/gitlab-org/gitlab/-/issues/273391
def calculate_events_intersections(event_names:, start_date:, end_date:, subset_powers_cache: Hash.new({}))
# calculate power of intersection of all given metrics from inclusion exclusion principle
# |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) =>
# |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
# |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
# |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
# calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ...
subset_powers_data = subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
# calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D|
power_of_union_of_all_events = begin
subset_powers_cache[event_names.size][event_names.join('_+_')] ||= \
calculate_events_union(event_names: event_names, start_date: start_date, end_date: end_date)
end
# in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate,
# is positive or negative in particular equation we need to determine if number of subsets is even or odd. Please take a look at two examples below
# |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + |A & B & C| =>
# |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
# |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
# |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
subset_powers_size_even = subset_powers_data.size.even?
# sum all components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... =>
sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even)
# add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D|
sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_events : -power_of_union_of_all_events)
end
def sum_subset_powers(subset_powers_data, subset_powers_size_even)
sum_without_sign = subset_powers_data.to_enum.with_index.sum do |value, index|
(index + 1).odd? ? value : -value
end
(subset_powers_size_even ? -1 : 1) * sum_without_sign
end
def subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
subset_sizes = (1..(event_names.size - 1))
subset_sizes.map do |subset_size|
if subset_size > 1
# calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|)
event_names.combination(subset_size).sum do |events_subset|
subset_powers_cache[subset_size][events_subset.join('_&_')] ||= \
calculate_events_intersections(event_names: events_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache)
end
else
# calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ...
event_names.sum do |event|
subset_powers_cache[subset_size][event] ||= \
calculate_events_union(event_names: event, start_date: start_date, end_date: end_date)
end
end
end
end
def load_events(wildcard)
Dir[wildcard].each_with_object([]) do |path, events|
events.push(*load_yaml_from_path(path))
end
end
def load_yaml_from_path(path)
YAML.safe_load(File.read(path))&.map(&:with_indifferent_access)
end
end
end
end
end
end
...@@ -23,6 +23,7 @@ module Gitlab ...@@ -23,6 +23,7 @@ module Gitlab
deployment_minimum_id deployment_minimum_id
deployment_maximum_id deployment_maximum_id
auth_providers auth_providers
aggregated_metrics
recorded_at recorded_at
).freeze ).freeze
...@@ -691,13 +692,13 @@ module Gitlab ...@@ -691,13 +692,13 @@ module Gitlab
def aggregated_metrics_monthly def aggregated_metrics_monthly
{ {
aggregated_metrics: ::Gitlab::UsageDataCounters::HLLRedisCounter.aggregated_metrics_monthly_data aggregated_metrics: aggregated_metrics.monthly_data
} }
end end
def aggregated_metrics_weekly def aggregated_metrics_weekly
{ {
aggregated_metrics: ::Gitlab::UsageDataCounters::HLLRedisCounter.aggregated_metrics_weekly_data aggregated_metrics: aggregated_metrics.weekly_data
} }
end end
...@@ -742,6 +743,10 @@ module Gitlab ...@@ -742,6 +743,10 @@ module Gitlab
private private
def aggregated_metrics
@aggregated_metrics ||= ::Gitlab::Usage::Metrics::Aggregates::Aggregate.new
end
def event_monthly_active_users(date_range) def event_monthly_active_users(date_range)
data = { data = {
action_monthly_active_users_project_repo: Gitlab::UsageDataCounters::TrackUniqueEvents::PUSH_ACTION, action_monthly_active_users_project_repo: Gitlab::UsageDataCounters::TrackUniqueEvents::PUSH_ACTION,
......
...@@ -13,15 +13,10 @@ module Gitlab ...@@ -13,15 +13,10 @@ module Gitlab
AggregationMismatch = Class.new(EventError) AggregationMismatch = Class.new(EventError)
SlotMismatch = Class.new(EventError) SlotMismatch = Class.new(EventError)
CategoryMismatch = Class.new(EventError) CategoryMismatch = Class.new(EventError)
UnknownAggregationOperator = Class.new(EventError)
InvalidContext = Class.new(EventError) InvalidContext = Class.new(EventError)
KNOWN_EVENTS_PATH = File.expand_path('known_events/*.yml', __dir__) KNOWN_EVENTS_PATH = File.expand_path('known_events/*.yml', __dir__)
ALLOWED_AGGREGATIONS = %i(daily weekly).freeze ALLOWED_AGGREGATIONS = %i(daily weekly).freeze
UNION_OF_AGGREGATED_METRICS = 'OR'
INTERSECTION_OF_AGGREGATED_METRICS = 'AND'
ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS, INTERSECTION_OF_AGGREGATED_METRICS].freeze
AGGREGATED_METRICS_PATH = File.expand_path('aggregated_metrics/*.yml', __dir__)
# Track event on entity_id # Track event on entity_id
# Increment a Redis HLL counter for unique event_name and entity_id # Increment a Redis HLL counter for unique event_name and entity_id
...@@ -90,37 +85,40 @@ module Gitlab ...@@ -90,37 +85,40 @@ module Gitlab
events_names = events_for_category(category) events_names = events_for_category(category)
event_results = events_names.each_with_object({}) do |event, hash| event_results = events_names.each_with_object({}) do |event, hash|
hash["#{event}_weekly"] = unique_events(event_names: [event], start_date: 7.days.ago.to_date, end_date: Date.current) hash["#{event}_weekly"] = unique_events(**weekly_time_range.merge(event_names: [event]))
hash["#{event}_monthly"] = unique_events(event_names: [event], start_date: 4.weeks.ago.to_date, end_date: Date.current) hash["#{event}_monthly"] = unique_events(**monthly_time_range.merge(event_names: [event]))
end end
if eligible_for_totals?(events_names) if eligible_for_totals?(events_names)
event_results["#{category}_total_unique_counts_weekly"] = unique_events(event_names: events_names, start_date: 7.days.ago.to_date, end_date: Date.current) event_results["#{category}_total_unique_counts_weekly"] = unique_events(**weekly_time_range.merge(event_names: events_names))
event_results["#{category}_total_unique_counts_monthly"] = unique_events(event_names: events_names, start_date: 4.weeks.ago.to_date, end_date: Date.current) event_results["#{category}_total_unique_counts_monthly"] = unique_events(**monthly_time_range.merge(event_names: events_names))
end end
category_results["#{category}"] = event_results category_results["#{category}"] = event_results
end end
end end
def known_event?(event_name) def weekly_time_range
event_for(event_name).present? { start_date: 7.days.ago.to_date, end_date: Date.current }
end end
def aggregated_metrics_monthly_data def monthly_time_range
aggregated_metrics_data(4.weeks.ago.to_date) { start_date: 4.weeks.ago.to_date, end_date: Date.current }
end end
def aggregated_metrics_weekly_data def known_event?(event_name)
aggregated_metrics_data(7.days.ago.to_date) event_for(event_name).present?
end end
def known_events def known_events
@known_events ||= load_events(KNOWN_EVENTS_PATH) @known_events ||= load_events(KNOWN_EVENTS_PATH)
end end
def aggregated_metrics def calculate_events_union(event_names:, start_date:, end_date:)
@aggregated_metrics ||= load_events(AGGREGATED_METRICS_PATH) count_unique_events(event_names: event_names, start_date: start_date, end_date: end_date) do |events|
raise SlotMismatch, events unless events_in_same_slot?(events)
raise AggregationMismatch, events unless events_same_aggregation?(events)
end
end end
private private
...@@ -139,93 +137,6 @@ module Gitlab ...@@ -139,93 +137,6 @@ module Gitlab
Plan.all_plans Plan.all_plans
end end
def aggregated_metrics_data(start_date)
aggregated_metrics.each_with_object({}) do |aggregation, weekly_data|
next if aggregation[:feature_flag] && Feature.disabled?(aggregation[:feature_flag], default_enabled: false, type: :development)
weekly_data[aggregation[:name]] = calculate_count_for_aggregation(aggregation, start_date: start_date, end_date: Date.current)
end
end
def calculate_count_for_aggregation(aggregation, start_date:, end_date:)
case aggregation[:operator]
when UNION_OF_AGGREGATED_METRICS
calculate_events_union(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
when INTERSECTION_OF_AGGREGATED_METRICS
calculate_events_intersections(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
else
raise UnknownAggregationOperator, "Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}"
end
end
# calculate intersection of 'n' sets based on inclusion exclusion principle https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle
# this method will be extracted to dedicated module with https://gitlab.com/gitlab-org/gitlab/-/issues/273391
def calculate_events_intersections(event_names:, start_date:, end_date:, subset_powers_cache: Hash.new({}))
# calculate power of intersection of all given metrics from inclusion exclusion principle
# |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) =>
# |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
# |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
# |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
# calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ...
subset_powers_data = subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
# calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D|
power_of_union_of_all_events = begin
subset_powers_cache[event_names.size][event_names.join('_+_')] ||= \
calculate_events_union(event_names: event_names, start_date: start_date, end_date: end_date)
end
# in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate,
# is positive or negative in particular equation we need to determine if number of subsets is even or odd. Please take a look at two examples below
# |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + |A & B & C| =>
# |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
# |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
# |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
subset_powers_size_even = subset_powers_data.size.even?
# sum all components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... =>
sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even)
# add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D|
sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_events : -power_of_union_of_all_events)
end
def sum_subset_powers(subset_powers_data, subset_powers_size_even)
sum_without_sign = subset_powers_data.to_enum.with_index.sum do |value, index|
(index + 1).odd? ? value : -value
end
(subset_powers_size_even ? -1 : 1) * sum_without_sign
end
def subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
subset_sizes = (1..(event_names.size - 1))
subset_sizes.map do |subset_size|
if subset_size > 1
# calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|)
event_names.combination(subset_size).sum do |events_subset|
subset_powers_cache[subset_size][events_subset.join('_&_')] ||= \
calculate_events_intersections(event_names: events_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache)
end
else
# calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ...
event_names.sum do |event|
subset_powers_cache[subset_size][event] ||= \
unique_events(event_names: event, start_date: start_date, end_date: end_date)
end
end
end
end
def calculate_events_union(event_names:, start_date:, end_date:)
count_unique_events(event_names: event_names, start_date: start_date, end_date: end_date) do |events|
raise SlotMismatch, events unless events_in_same_slot?(events)
raise AggregationMismatch, events unless events_same_aggregation?(events)
end
end
def count_unique_events(event_names:, start_date:, end_date:, context: '') def count_unique_events(event_names:, start_date:, end_date:, context: '')
events = events_for(Array(event_names).map(&:to_s)) events = events_for(Array(event_names).map(&:to_s))
...@@ -340,12 +251,6 @@ module Gitlab ...@@ -340,12 +251,6 @@ module Gitlab
end.flatten end.flatten
end end
def validate_aggregation_operator!(operator)
return true if ALLOWED_METRICS_AGGREGATIONS.include?(operator)
raise UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}")
end
def weekly_redis_keys(events:, start_date:, end_date:, context: '') def weekly_redis_keys(events:, start_date:, end_date:, context: '')
end_date = end_date.end_of_week - 1.week end_date = end_date.end_of_week - 1.week
(start_date.to_date..end_date.to_date).map do |date| (start_date.to_date..end_date.to_date).map do |date|
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redis_shared_state do
let(:entity1) { 'dfb9d2d2-f56c-4c77-8aeb-6cddc4a1f857' }
let(:entity2) { '1dd9afb2-a3ee-4de1-8ae3-a405579c8584' }
let(:entity3) { '34rfjuuy-ce56-sa35-ds34-dfer567dfrf2' }
let(:entity4) { '8b9a2671-2abf-4bec-a682-22f6a8f7bf31' }
around do |example|
# We need to freeze to a reference time
# because visits are grouped by the week number in the year
# Without freezing the time, the test may behave inconsistently
# depending on which day of the week test is run.
# Monday 6th of June
reference_time = Time.utc(2020, 6, 1)
travel_to(reference_time) { example.run }
end
context 'aggregated_metrics_data' do
let(:known_events) do
[
{ name: 'event1_slot', redis_slot: "slot", category: 'category1', aggregation: "weekly" },
{ name: 'event2_slot', redis_slot: "slot", category: 'category2', aggregation: "weekly" },
{ name: 'event3_slot', redis_slot: "slot", category: 'category3', aggregation: "weekly" },
{ name: 'event5_slot', redis_slot: "slot", category: 'category4', aggregation: "weekly" },
{ name: 'event4', category: 'category2', aggregation: "weekly" }
].map(&:with_indifferent_access)
end
before do
allow(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:known_events).and_return(known_events)
end
shared_examples 'aggregated_metrics_data' do
context 'no aggregated metrics is defined' do
it 'returns empty hash' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics).and_return([])
end
expect(aggregated_metrics_data).to eq({})
end
end
context 'there are aggregated metrics defined' do
before do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics).and_return(aggregated_metrics)
end
end
context 'with AND operator' do
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event1_slot event2_slot], operator: "AND" },
{ name: 'gmau_2', events: %w[event1_slot event2_slot event3_slot], operator: "AND" },
{ name: 'gmau_3', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" },
{ name: 'gmau_4', events: %w[event4], operator: "AND" }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
results = {
'gmau_1' => 3,
'gmau_2' => 2,
'gmau_3' => 1,
'gmau_4' => 3
}
expect(aggregated_metrics_data).to eq(results)
end
end
context 'with OR operator' do
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_2', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_3', events: %w[event4], operator: "OR" }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
results = {
'gmau_1' => 2,
'gmau_2' => 3,
'gmau_3' => 3
}
expect(aggregated_metrics_data).to eq(results)
end
end
context 'hidden behind feature flag' do
let(:enabled_feature_flag) { 'test_ff_enabled' }
let(:disabled_feature_flag) { 'test_ff_disabled' }
let(:aggregated_metrics) do
[
# represents stable aggregated metrics that has been fully released
{ name: 'gmau_without_ff', events: %w[event3_slot event5_slot], operator: "OR" },
# represents new aggregated metric that is under performance testing on gitlab.com
{ name: 'gmau_enabled', events: %w[event4], operator: "AND", feature_flag: enabled_feature_flag },
# represents aggregated metric that is under development and shouldn't be yet collected even on gitlab.com
{ name: 'gmau_disabled', events: %w[event4], operator: "AND", feature_flag: disabled_feature_flag }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
skip_feature_flags_yaml_validation
stub_feature_flags(enabled_feature_flag => true, disabled_feature_flag => false)
expect(aggregated_metrics_data).to eq('gmau_without_ff' => 2, 'gmau_enabled' => 3)
end
end
end
context 'error handling' do
context 'development and test environment' do
it 'raises error when unknown aggregation operator is used' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "SUM" }])
end
expect { aggregated_metrics_data }.to raise_error Gitlab::Usage::Metrics::Aggregates::UnknownAggregationOperator
end
it 're raises Gitlab::UsageDataCounters::HLLRedisCounter::EventError' do
error = Gitlab::UsageDataCounters::HLLRedisCounter::EventError
allow(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union).and_raise(error)
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "OR" }])
end
expect { aggregated_metrics_data }.to raise_error error
end
end
context 'production' do
before do
stub_rails_env('production')
end
it 'rescues unknown aggregation operator error' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "SUM" }])
end
expect(aggregated_metrics_data).to eq('gmau_1' => -1)
end
it 'rescues Gitlab::UsageDataCounters::HLLRedisCounter::EventError' do
error = Gitlab::UsageDataCounters::HLLRedisCounter::EventError
allow(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union).and_raise(error)
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "OR" }])
end
expect(aggregated_metrics_data).to eq('gmau_1' => -1)
end
end
end
end
describe '.aggregated_metrics_weekly_data' do
subject(:aggregated_metrics_data) { described_class.new.weekly_data }
before do
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity3, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity2, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity3, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity1, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity2, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event5_slot', values: entity2, time: 3.days.ago)
# events out of time scope
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity3, time: 8.days.ago)
# events in different slots
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity4, time: 2.days.ago)
end
it_behaves_like 'aggregated_metrics_data'
end
describe '.aggregated_metrics_monthly_data' do
subject(:aggregated_metrics_data) { described_class.new.monthly_data }
it_behaves_like 'aggregated_metrics_data' do
before do
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity3, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity2, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity3, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity1, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity2, time: 10.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event5_slot', values: entity2, time: 4.weeks.ago.advance(days: 1))
# events out of time scope
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event5_slot', values: entity1, time: 4.weeks.ago.advance(days: -1))
# events in different slots
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity4, time: 2.days.ago)
end
end
context 'Redis calls' do
let(:aggregated_metrics) do
[
{ name: 'gmau_3', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" }
].map(&:with_indifferent_access)
end
it 'caches intermediate operations' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics).and_return(aggregated_metrics)
end
aggregated_metrics[0][:events].each do |event|
expect(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union)
.with(event_names: event, start_date: 4.weeks.ago.to_date, end_date: Date.current)
.once
.and_return(0)
end
2.upto(4) do |subset_size|
aggregated_metrics[0][:events].combination(subset_size).each do |events|
expect(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union)
.with(event_names: events, start_date: 4.weeks.ago.to_date, end_date: Date.current)
.once
.and_return(0)
end
end
aggregated_metrics_data
end
end
end
end
end
...@@ -17,7 +17,7 @@ RSpec.describe 'aggregated metrics' do ...@@ -17,7 +17,7 @@ RSpec.describe 'aggregated metrics' do
Gitlab::UsageDataCounters::HLLRedisCounter.known_events Gitlab::UsageDataCounters::HLLRedisCounter.known_events
end end
Gitlab::UsageDataCounters::HLLRedisCounter.aggregated_metrics.tap do |aggregated_metrics| Gitlab::Usage::Metrics::Aggregates::Aggregate.new.send(:aggregated_metrics).tap do |aggregated_metrics|
it 'all events has unique name' do it 'all events has unique name' do
event_names = aggregated_metrics&.map { |event| event[:name] } event_names = aggregated_metrics&.map { |event| event[:name] }
...@@ -37,7 +37,7 @@ RSpec.describe 'aggregated metrics' do ...@@ -37,7 +37,7 @@ RSpec.describe 'aggregated metrics' do
end end
it "uses allowed aggregation operators" do it "uses allowed aggregation operators" do
expect(Gitlab::UsageDataCounters::HLLRedisCounter::ALLOWED_METRICS_AGGREGATIONS).to include aggregate[:operator] expect(Gitlab::Usage::Metrics::Aggregates::ALLOWED_METRICS_AGGREGATIONS).to include aggregate[:operator]
end end
it "uses events from the same Redis slot" do it "uses events from the same Redis slot" do
......
...@@ -426,105 +426,21 @@ RSpec.describe Gitlab::UsageDataCounters::HLLRedisCounter, :clean_gitlab_redis_s ...@@ -426,105 +426,21 @@ RSpec.describe Gitlab::UsageDataCounters::HLLRedisCounter, :clean_gitlab_redis_s
end end
end end
context 'aggregated_metrics_data' do describe '.calculate_events_union' do
let(:time_range) { { start_date: 7.days.ago, end_date: DateTime.current } }
let(:known_events) do let(:known_events) do
[ [
{ name: 'event1_slot', redis_slot: "slot", category: 'category1', aggregation: "weekly" }, { name: 'event1_slot', redis_slot: "slot", category: 'category1', aggregation: "weekly" },
{ name: 'event2_slot', redis_slot: "slot", category: 'category2', aggregation: "weekly" }, { name: 'event2_slot', redis_slot: "slot", category: 'category2', aggregation: "weekly" },
{ name: 'event3_slot', redis_slot: "slot", category: 'category3', aggregation: "weekly" }, { name: 'event3_slot', redis_slot: "slot", category: 'category3', aggregation: "weekly" },
{ name: 'event5_slot', redis_slot: "slot", category: 'category4', aggregation: "weekly" }, { name: 'event5_slot', redis_slot: "slot", category: 'category4', aggregation: "daily" },
{ name: 'event4', category: 'category2', aggregation: "weekly" } { name: 'event4', category: 'category2', aggregation: "weekly" }
].map(&:with_indifferent_access) ].map(&:with_indifferent_access)
end end
before do before do
allow(described_class).to receive(:known_events).and_return(known_events) allow(described_class).to receive(:known_events).and_return(known_events)
end
shared_examples 'aggregated_metrics_data' do
context 'no aggregated metrics is defined' do
it 'returns empty hash' do
allow(described_class).to receive(:aggregated_metrics).and_return([])
expect(aggregated_metrics_data).to eq({})
end
end
context 'there are aggregated metrics defined' do
before do
allow(described_class).to receive(:aggregated_metrics).and_return(aggregated_metrics)
end
context 'with AND operator' do
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event1_slot event2_slot], operator: "AND" },
{ name: 'gmau_2', events: %w[event1_slot event2_slot event3_slot], operator: "AND" },
{ name: 'gmau_3', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" },
{ name: 'gmau_4', events: %w[event4], operator: "AND" }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
results = {
'gmau_1' => 3,
'gmau_2' => 2,
'gmau_3' => 1,
'gmau_4' => 3
}
expect(aggregated_metrics_data).to eq(results)
end
end
context 'with OR operator' do
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_2', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_3', events: %w[event4], operator: "OR" }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
results = {
'gmau_1' => 2,
'gmau_2' => 3,
'gmau_3' => 3
}
expect(aggregated_metrics_data).to eq(results)
end
end
context 'hidden behind feature flag' do
let(:enabled_feature_flag) { 'test_ff_enabled' }
let(:disabled_feature_flag) { 'test_ff_disabled' }
let(:aggregated_metrics) do
[
# represents stable aggregated metrics that has been fully released
{ name: 'gmau_without_ff', events: %w[event3_slot event5_slot], operator: "OR" },
# represents new aggregated metric that is under performance testing on gitlab.com
{ name: 'gmau_enabled', events: %w[event4], operator: "AND", feature_flag: enabled_feature_flag },
# represents aggregated metric that is under development and shouldn't be yet collected even on gitlab.com
{ name: 'gmau_disabled', events: %w[event4], operator: "AND", feature_flag: disabled_feature_flag }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
skip_feature_flags_yaml_validation
stub_feature_flags(enabled_feature_flag => true, disabled_feature_flag => false)
expect(aggregated_metrics_data).to eq('gmau_without_ff' => 2, 'gmau_enabled' => 3)
end
end
end
end
describe '.aggregated_metrics_weekly_data' do
subject(:aggregated_metrics_data) { described_class.aggregated_metrics_weekly_data }
before do
described_class.track_event('event1_slot', values: entity1, time: 2.days.ago) described_class.track_event('event1_slot', values: entity1, time: 2.days.ago)
described_class.track_event('event1_slot', values: entity2, time: 2.days.ago) described_class.track_event('event1_slot', values: entity2, time: 2.days.ago)
described_class.track_event('event1_slot', values: entity3, time: 2.days.ago) described_class.track_event('event1_slot', values: entity3, time: 2.days.ago)
...@@ -536,72 +452,33 @@ RSpec.describe Gitlab::UsageDataCounters::HLLRedisCounter, :clean_gitlab_redis_s ...@@ -536,72 +452,33 @@ RSpec.describe Gitlab::UsageDataCounters::HLLRedisCounter, :clean_gitlab_redis_s
described_class.track_event('event5_slot', values: entity2, time: 3.days.ago) described_class.track_event('event5_slot', values: entity2, time: 3.days.ago)
# events out of time scope # events out of time scope
described_class.track_event('event2_slot', values: entity3, time: 8.days.ago) described_class.track_event('event2_slot', values: entity4, time: 8.days.ago)
# events in different slots # events in different slots
described_class.track_event('event4', values: entity1, time: 2.days.ago) described_class.track_event('event4', values: entity1, time: 2.days.ago)
described_class.track_event('event4', values: entity2, time: 2.days.ago) described_class.track_event('event4', values: entity2, time: 2.days.ago)
described_class.track_event('event4', values: entity4, time: 2.days.ago)
end end
it_behaves_like 'aggregated_metrics_data' it 'calculates union of given events', :aggregate_failure do
expect(described_class.calculate_events_union(**time_range.merge(event_names: %w[event4]))).to eq 2
expect(described_class.calculate_events_union(**time_range.merge(event_names: %w[event1_slot event2_slot event3_slot]))).to eq 3
end end
describe '.aggregated_metrics_monthly_data' do it 'validates and raise exception if events has mismatched slot or aggregation', :aggregate_failure do
subject(:aggregated_metrics_data) { described_class.aggregated_metrics_monthly_data } expect { described_class.calculate_events_union(**time_range.merge(event_names: %w[event1_slot event4])) }.to raise_error described_class::SlotMismatch
expect { described_class.calculate_events_union(**time_range.merge(event_names: %w[event5_slot event3_slot])) }.to raise_error described_class::AggregationMismatch
it_behaves_like 'aggregated_metrics_data' do
before do
described_class.track_event('event1_slot', values: entity1, time: 2.days.ago)
described_class.track_event('event1_slot', values: entity2, time: 2.days.ago)
described_class.track_event('event1_slot', values: entity3, time: 2.days.ago)
described_class.track_event('event2_slot', values: entity1, time: 2.days.ago)
described_class.track_event('event2_slot', values: entity2, time: 3.days.ago)
described_class.track_event('event2_slot', values: entity3, time: 3.days.ago)
described_class.track_event('event3_slot', values: entity1, time: 3.days.ago)
described_class.track_event('event3_slot', values: entity2, time: 10.days.ago)
described_class.track_event('event5_slot', values: entity2, time: 4.weeks.ago.advance(days: 1))
# events out of time scope
described_class.track_event('event5_slot', values: entity1, time: 4.weeks.ago.advance(days: -1))
# events in different slots
described_class.track_event('event4', values: entity1, time: 2.days.ago)
described_class.track_event('event4', values: entity2, time: 2.days.ago)
described_class.track_event('event4', values: entity4, time: 2.days.ago)
end
end end
context 'Redis calls' do
let(:aggregated_metrics) do
[
{ name: 'gmau_3', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" }
].map(&:with_indifferent_access)
end
let(:known_events) do
[
{ name: 'event1_slot', redis_slot: "slot", category: 'category1', aggregation: "weekly" },
{ name: 'event2_slot', redis_slot: "slot", category: 'category2', aggregation: "weekly" },
{ name: 'event3_slot', redis_slot: "slot", category: 'category3', aggregation: "weekly" },
{ name: 'event5_slot', redis_slot: "slot", category: 'category4', aggregation: "weekly" }
].map(&:with_indifferent_access)
end end
it 'caches intermediate operations' do describe '.weekly_time_range' do
allow(described_class).to receive(:known_events).and_return(known_events) it 'return hash with weekly time range boundaries' do
allow(described_class).to receive(:aggregated_metrics).and_return(aggregated_metrics) expect(described_class.weekly_time_range).to eq(start_date: 7.days.ago.to_date, end_date: Date.current)
4.downto(1) do |subset_size|
known_events.combination(subset_size).each do |events|
keys = described_class.send(:weekly_redis_keys, events: events, start_date: 4.weeks.ago.to_date, end_date: Date.current)
expect(Gitlab::Redis::HLL).to receive(:count).with(keys: keys).once.and_return(0)
end end
end end
subject describe '.monthly_time_range' do
end it 'return hash with monthly time range boundaries' do
end expect(described_class.monthly_time_range).to eq(start_date: 4.weeks.ago.to_date, end_date: Date.current)
end end
end end
end end
...@@ -1286,8 +1286,10 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do ...@@ -1286,8 +1286,10 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do
describe '.aggregated_metrics_weekly' do describe '.aggregated_metrics_weekly' do
subject(:aggregated_metrics_payload) { described_class.aggregated_metrics_weekly } subject(:aggregated_metrics_payload) { described_class.aggregated_metrics_weekly }
it 'uses ::Gitlab::UsageDataCounters::HLLRedisCounter#aggregated_metrics_data', :aggregate_failures do it 'uses ::Gitlab::Usage::Metrics::Aggregates::Aggregate#weekly_data', :aggregate_failures do
expect(::Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:aggregated_metrics_weekly_data).and_return(global_search_gmau: 123) expect_next_instance_of(::Gitlab::Usage::Metrics::Aggregates::Aggregate) do |instance|
expect(instance).to receive(:weekly_data).and_return(global_search_gmau: 123)
end
expect(aggregated_metrics_payload).to eq(aggregated_metrics: { global_search_gmau: 123 }) expect(aggregated_metrics_payload).to eq(aggregated_metrics: { global_search_gmau: 123 })
end end
end end
...@@ -1295,8 +1297,10 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do ...@@ -1295,8 +1297,10 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do
describe '.aggregated_metrics_monthly' do describe '.aggregated_metrics_monthly' do
subject(:aggregated_metrics_payload) { described_class.aggregated_metrics_monthly } subject(:aggregated_metrics_payload) { described_class.aggregated_metrics_monthly }
it 'uses ::Gitlab::UsageDataCounters::HLLRedisCounter#aggregated_metrics_data', :aggregate_failures do it 'uses ::Gitlab::Usage::Metrics::Aggregates::Aggregate#monthly_data', :aggregate_failures do
expect(::Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:aggregated_metrics_monthly_data).and_return(global_search_gmau: 123) expect_next_instance_of(::Gitlab::Usage::Metrics::Aggregates::Aggregate) do |instance|
expect(instance).to receive(:monthly_data).and_return(global_search_gmau: 123)
end
expect(aggregated_metrics_payload).to eq(aggregated_metrics: { global_search_gmau: 123 }) expect(aggregated_metrics_payload).to eq(aggregated_metrics: { global_search_gmau: 123 })
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment