Commit 6eb80521 authored by Mikolaj Wawrzyniak's avatar Mikolaj Wawrzyniak

Calculate unions over metrics

In order to allow for accurate metrics
aggregations, we need to calculate those
aggregates before they are persited.
parent 4ba00f14
---
name: product_analytics_aggregated_metrics
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/44624
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/267550
type: development
group: group::product analytics
default_enabled: false
......@@ -689,6 +689,14 @@ module Gitlab
{ redis_hll_counters: ::Gitlab::UsageDataCounters::HLLRedisCounter.unique_events_data }
end
def aggregated_metrics
return {} unless Feature.enabled?(:product_analytics_aggregated_metrics)
{
aggregated_metrics: ::Gitlab::UsageDataCounters::HLLRedisCounter.aggregated_metrics_data
}
end
def analytics_unique_visits_data
results = ::Gitlab::Analytics::UniqueVisits.analytics_events.each_with_object({}) do |target, hash|
hash[target] = redis_usage_data { unique_visit_service.unique_visits_for(targets: target) }
......
#- name: unique name of aggregated metric
# operator: aggregation operator. Valid values are:
# - "ANY": counts unique elements that were observed triggering any of following events
# events: list of events names to aggregate into metric. All events in this list must have the same 'redis_slot' and 'aggregation' attributes
# see from lib/gitlab/usage_data_counters/known_events.yml for the list of valid events.
---
- name: product_analytics_test_aggregated_metrics
operator: ANY
events: ['i_search_total', 'i_search_advanced', 'i_search_paid']
......@@ -5,13 +5,20 @@ module Gitlab
module HLLRedisCounter
DEFAULT_WEEKLY_KEY_EXPIRY_LENGTH = 6.weeks
DEFAULT_DAILY_KEY_EXPIRY_LENGTH = 29.days
DEFAULT_REDIS_SLOT = ''.freeze
DEFAULT_REDIS_SLOT = ''
UnknownEvent = Class.new(StandardError)
UnknownAggregation = Class.new(StandardError)
EventError = Class.new(StandardError)
UnknownEvent = Class.new(EventError)
UnknownAggregation = Class.new(EventError)
AggregationMismatch = Class.new(EventError)
SlotMismatch = Class.new(EventError)
CategoryMismatch = Class.new(EventError)
UnknownAggregationOperator = Class.new(EventError)
KNOWN_EVENTS_PATH = 'lib/gitlab/usage_data_counters/known_events.yml'.freeze
KNOWN_EVENTS_PATH = 'lib/gitlab/usage_data_counters/known_events.yml'
ALLOWED_AGGREGATIONS = %i(daily weekly).freeze
AGGREGATED_METRICS_PATH = 'lib/gitlab/usage_data_counters/aggregated_metrics.yml'
ALLOWED_METRICS_AGGREGATIONS = %w[ANY].freeze
# Track event on entity_id
# Increment a Redis HLL counter for unique event_name and entity_id
......@@ -38,23 +45,17 @@ module Gitlab
event = event_for(event_name)
raise UnknownEvent.new("Unknown event #{event_name}") unless event.present?
raise UnknownEvent, "Unknown event #{event_name}" unless event.present?
Gitlab::Redis::HLL.add(key: redis_key(event, time), value: entity_id, expiry: expiry(event))
end
def unique_events(event_names:, start_date:, end_date:)
events = events_for(Array(event_names).map(&:to_s))
raise 'Events should be in same slot' unless events_in_same_slot?(events)
raise 'Events should be in same category' unless events_in_same_category?(events)
raise 'Events should have same aggregation level' unless events_same_aggregation?(events)
aggregation = events.first[:aggregation]
keys = keys_for_aggregation(aggregation, events: events, start_date: start_date, end_date: end_date)
redis_usage_data { Gitlab::Redis::HLL.count(keys: keys) }
count_unique_events(event_names: event_names, start_date: start_date, end_date: end_date) do |events|
raise SlotMismatch, events unless events_in_same_slot?(events)
raise CategoryMismatch, events unless events_in_same_category?(events)
raise AggregationMismatch, events unless events_same_aggregation?(events)
end
end
def categories
......@@ -72,8 +73,8 @@ module Gitlab
events_names = events_for_category(category)
event_results = events_names.each_with_object({}) do |event, hash|
hash["#{event}_weekly"] = unique_events(event_names: event, start_date: 7.days.ago.to_date, end_date: Date.current)
hash["#{event}_monthly"] = unique_events(event_names: event, start_date: 4.weeks.ago.to_date, end_date: Date.current)
hash["#{event}_weekly"] = unique_events(event_names: [event], start_date: 7.days.ago.to_date, end_date: Date.current)
hash["#{event}_monthly"] = unique_events(event_names: [event], start_date: 4.weeks.ago.to_date, end_date: Date.current)
end
if eligible_for_totals?(events_names)
......@@ -89,8 +90,34 @@ module Gitlab
event_for(event_name).present?
end
def aggregated_metrics_data
aggregated_metrics.to_h do |aggregation|
[aggregation[:name], calculate_count_for_aggregation(aggregation)]
end
end
private
def calculate_count_for_aggregation(aggregation)
validate_aggregation_operator!(aggregation[:operator])
count_unique_events(event_names: aggregation[:events], start_date: 4.weeks.ago.to_date, end_date: Date.current) do |events|
raise SlotMismatch, events unless events_in_same_slot?(events)
raise AggregationMismatch, events unless events_same_aggregation?(events)
end
end
def count_unique_events(event_names:, start_date:, end_date:)
events = events_for(Array(event_names).map(&:to_s))
yield events if block_given?
aggregation = events.first[:aggregation]
keys = keys_for_aggregation(aggregation, events: events, start_date: start_date, end_date: end_date)
redis_usage_data { Gitlab::Redis::HLL.count(keys: keys) }
end
# Allow to add totals for events that are in the same redis slot, category and have the same aggregation level
# and if there are more than 1 event
def eligible_for_totals?(events_names)
......@@ -109,7 +136,15 @@ module Gitlab
end
def known_events
@known_events ||= YAML.load_file(Rails.root.join(KNOWN_EVENTS_PATH)).map(&:with_indifferent_access)
@known_events ||= load_yaml_from_path(KNOWN_EVENTS_PATH)
end
def aggregated_metrics
@aggregated_metrics ||= (load_yaml_from_path(AGGREGATED_METRICS_PATH) || [])
end
def load_yaml_from_path(path)
YAML.safe_load(File.read(Rails.root.join(path)))&.map(&:with_indifferent_access)
end
def known_events_names
......@@ -179,6 +214,12 @@ module Gitlab
end.flatten
end
def validate_aggregation_operator!(operator)
return true if ALLOWED_METRICS_AGGREGATIONS.include?(operator)
raise UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}")
end
def weekly_redis_keys(events:, start_date:, end_date:)
weeks = end_date.to_date.cweek - start_date.to_date.cweek
weeks = 1 if weeks == 0
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe 'aggregated metrics' do
RSpec::Matchers.define :be_known_event do
match do |event|
Gitlab::UsageDataCounters::HLLRedisCounter.known_event?(event)
end
failure_message do
"Event with name: `#{event}` can not be found within `#{Gitlab::UsageDataCounters::HLLRedisCounter::KNOWN_EVENTS_PATH}`"
end
end
let_it_be(:known_events) do
YAML.load_file(
Rails.root.join(Gitlab::UsageDataCounters::HLLRedisCounter::KNOWN_EVENTS_PATH)
).map(&:with_indifferent_access)
end
YAML.load_file(Rails.root.join(Gitlab::UsageDataCounters::HLLRedisCounter::AGGREGATED_METRICS_PATH))&.map(&:with_indifferent_access).tap do |aggregated_metrics|
it 'all events has unique name' do
event_names = aggregated_metrics&.map { |event| event[:name] }
expect(event_names).to eq(event_names&.uniq)
end
aggregated_metrics&.each do |aggregate|
context "for #{aggregate[:name]} aggregate of #{aggregate[:events].join(' ')}" do
let_it_be(:events_records) { known_events.select { |event| aggregate[:events].include?(event[:name]) } }
it "only refers to known events" do
expect(aggregate[:events]).to all be_known_event
end
it "has expected structure" do
expect(aggregate.keys).to match_array(%w[name operator events])
end
it "uses allowed aggregation operators" do
expect(Gitlab::UsageDataCounters::HLLRedisCounter::ALLOWED_METRICS_AGGREGATIONS).to include aggregate[:operator]
end
it "uses events from the same Redis slot" do
event_slots = events_records.map { |event| event[:redis_slot] }.uniq
expect(event_slots).to contain_exactly(be_present)
end
it "uses events with the same aggregation period" do
event_slots = events_records.map { |event| event[:aggregation] }.uniq
expect(event_slots).to contain_exactly(be_present)
end
end
end
end
end
......@@ -36,8 +36,8 @@ RSpec.describe Gitlab::UsageDataCounters::HLLRedisCounter, :clean_gitlab_redis_s
let(:custom_daily_event) { 'g_analytics_custom' }
let(:global_category) { 'global' }
let(:compliance_category) {'compliance' }
let(:productivity_category) {'productivity' }
let(:compliance_category) { 'compliance' }
let(:productivity_category) { 'productivity' }
let(:analytics_category) { 'analytics' }
let(:known_events) do
......@@ -84,11 +84,11 @@ RSpec.describe Gitlab::UsageDataCounters::HLLRedisCounter, :clean_gitlab_redis_s
end
it "raise error if metrics don't have same aggregation" do
expect { described_class.track_event(entity1, different_aggregation, Date.current) } .to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::UnknownAggregation)
expect { described_class.track_event(entity1, different_aggregation, Date.current) }.to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::UnknownAggregation)
end
it 'raise error if metrics of unknown aggregation' do
expect { described_class.track_event(entity1, 'unknown', Date.current) } .to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::UnknownEvent)
expect { described_class.track_event(entity1, 'unknown', Date.current) }.to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::UnknownEvent)
end
context 'for weekly events' do
......@@ -184,41 +184,47 @@ RSpec.describe Gitlab::UsageDataCounters::HLLRedisCounter, :clean_gitlab_redis_s
end
it 'raise error if metrics are not in the same slot' do
expect { described_class.unique_events(event_names: [compliance_slot_event, analytics_slot_event], start_date: 4.weeks.ago, end_date: Date.current) }.to raise_error('Events should be in same slot')
expect do
described_class.unique_events(event_names: [compliance_slot_event, analytics_slot_event], start_date: 4.weeks.ago, end_date: Date.current)
end.to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::SlotMismatch)
end
it 'raise error if metrics are not in the same category' do
expect { described_class.unique_events(event_names: [category_analytics_event, category_productivity_event], start_date: 4.weeks.ago, end_date: Date.current) }.to raise_error('Events should be in same category')
expect do
described_class.unique_events(event_names: [category_analytics_event, category_productivity_event], start_date: 4.weeks.ago, end_date: Date.current)
end.to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::CategoryMismatch)
end
it "raise error if metrics don't have same aggregation" do
expect { described_class.unique_events(event_names: [daily_event, weekly_event], start_date: 4.weeks.ago, end_date: Date.current) }.to raise_error('Events should have same aggregation level')
expect do
described_class.unique_events(event_names: [daily_event, weekly_event], start_date: 4.weeks.ago, end_date: Date.current)
end.to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::AggregationMismatch)
end
context 'when data for the last complete week' do
it { expect(described_class.unique_events(event_names: weekly_event, start_date: 1.week.ago, end_date: Date.current)).to eq(1) }
it { expect(described_class.unique_events(event_names: [weekly_event], start_date: 1.week.ago, end_date: Date.current)).to eq(1) }
end
context 'when data for the last 4 complete weeks' do
it { expect(described_class.unique_events(event_names: weekly_event, start_date: 4.weeks.ago, end_date: Date.current)).to eq(2) }
it { expect(described_class.unique_events(event_names: [weekly_event], start_date: 4.weeks.ago, end_date: Date.current)).to eq(2) }
end
context 'when data for the week 4 weeks ago' do
it { expect(described_class.unique_events(event_names: weekly_event, start_date: 4.weeks.ago, end_date: 3.weeks.ago)).to eq(1) }
it { expect(described_class.unique_events(event_names: [weekly_event], start_date: 4.weeks.ago, end_date: 3.weeks.ago)).to eq(1) }
end
context 'when using symbol as parameter' do
it { expect(described_class.unique_events(event_names: weekly_event.to_sym, start_date: 4.weeks.ago, end_date: 3.weeks.ago)).to eq(1) }
it { expect(described_class.unique_events(event_names: [weekly_event.to_sym], start_date: 4.weeks.ago, end_date: 3.weeks.ago)).to eq(1) }
end
context 'when using daily aggregation' do
it { expect(described_class.unique_events(event_names: daily_event, start_date: 7.days.ago, end_date: Date.current)).to eq(2) }
it { expect(described_class.unique_events(event_names: daily_event, start_date: 28.days.ago, end_date: Date.current)).to eq(3) }
it { expect(described_class.unique_events(event_names: daily_event, start_date: 28.days.ago, end_date: 21.days.ago)).to eq(1) }
it { expect(described_class.unique_events(event_names: [daily_event], start_date: 7.days.ago, end_date: Date.current)).to eq(2) }
it { expect(described_class.unique_events(event_names: [daily_event], start_date: 28.days.ago, end_date: Date.current)).to eq(3) }
it { expect(described_class.unique_events(event_names: [daily_event], start_date: 28.days.ago, end_date: 21.days.ago)).to eq(1) }
end
context 'when no slot is set' do
it { expect(described_class.unique_events(event_names: no_slot, start_date: 7.days.ago, end_date: Date.current)).to eq(1) }
it { expect(described_class.unique_events(event_names: [no_slot], start_date: 7.days.ago, end_date: Date.current)).to eq(1) }
end
end
end
......@@ -267,4 +273,54 @@ RSpec.describe Gitlab::UsageDataCounters::HLLRedisCounter, :clean_gitlab_redis_s
expect(subject.unique_events_data).to eq(results)
end
end
describe '.aggregated_metrics_data' do
context 'no combination is tracked' do
it 'returns empty hash' do
allow(described_class).to receive(:aggregated_metrics).and_return([])
expect(subject.aggregated_metrics_data).to eq({})
end
end
context 'there are some combinations defined' do
let(:known_events) do
[
{ name: 'event1_slot', redis_slot: "slot", category: 'category1', aggregation: "weekly" },
{ name: 'event2_slot', redis_slot: "slot", category: 'category2', aggregation: "weekly" },
{ name: 'event3', category: 'category2', aggregation: "weekly" }
].map(&:with_indifferent_access)
end
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event1_slot event2_slot], operator: "ANY" },
{ name: 'gmau_2', events: %w[event3], operator: "ANY" }
].map(&:with_indifferent_access)
end
before do
allow(described_class).to receive(:known_events).and_return(known_events)
allow(described_class).to receive(:aggregated_metrics).and_return(aggregated_metrics)
described_class.track_event(entity1, 'event1_slot', 2.days.ago)
described_class.track_event(entity1, 'event2_slot', 2.days.ago)
described_class.track_event(entity3, 'event2_slot', 3.days.ago)
# events in different slots
described_class.track_event(entity1, 'event3', 2.days.ago)
described_class.track_event(entity2, 'event3', 2.days.ago)
described_class.track_event(entity4, 'event3', 2.days.ago)
end
it 'returns the number of unique events for all known events' do
results = {
'gmau_1' => 2,
'gmau_2' => 3
}
expect(subject.aggregated_metrics_data).to eq(results)
end
end
end
end
......@@ -1222,6 +1222,32 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do
end
end
describe 'aggregated_metrics' do
subject(:aggregated_metrics) { described_class.aggregated_metrics }
context 'with product_analytics_aggregated_metrics feature flag on' do
before do
stub_feature_flags(product_analytics_aggregated_metrics: true)
end
it 'uses ::Gitlab::UsageDataCounters::HLLRedisCounter#aggregated_metrics_data', :aggregate_failures do
expect(::Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:aggregated_metrics_data).and_return(global_search_gmau: 123)
expect(aggregated_metrics).to eq(aggregated_metrics: { global_search_gmau: 123 })
end
end
context 'with product_analytics_aggregated_metrics feature flag off' do
before do
stub_feature_flags(product_analytics_aggregated_metrics: false)
end
it 'returns empty hash', :aggregate_failures do
expect(::Gitlab::UsageDataCounters::HLLRedisCounter).not_to receive(:aggregated_metrics_data)
expect(aggregated_metrics).to be {}
end
end
end
describe '.service_desk_counts' do
subject { described_class.send(:service_desk_counts) }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment