Commit 766745ca authored by Yannis Roussos's avatar Yannis Roussos

Merge branch '233853-persist-instance-statistics-object-counts' into 'master'

Store Instance Statistics measurements periodically

See merge request gitlab-org/gitlab!41300
parents e1ec396e 15d08ccd
...@@ -12,6 +12,15 @@ module Analytics ...@@ -12,6 +12,15 @@ module Analytics
pipelines: 6 pipelines: 6
} }
IDENTIFIER_QUERY_MAPPING = {
identifiers[:projects] => -> { Project },
identifiers[:users] => -> { User },
identifiers[:issues] => -> { Issue },
identifiers[:merge_requests] => -> { MergeRequest },
identifiers[:groups] => -> { Group },
identifiers[:pipelines] => -> { Ci::Pipeline }
}.freeze
validates :recorded_at, :identifier, :count, presence: true validates :recorded_at, :identifier, :count, presence: true
validates :recorded_at, uniqueness: { scope: :identifier } validates :recorded_at, uniqueness: { scope: :identifier }
......
...@@ -115,6 +115,14 @@ ...@@ -115,6 +115,14 @@
:weight: 1 :weight: 1
:idempotent: :idempotent:
:tags: [] :tags: []
- :name: cronjob:analytics_instance_statistics_count_job_trigger
:feature_category: :instance_statistics
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: cronjob:authorized_project_update_periodic_recalculate - :name: cronjob:authorized_project_update_periodic_recalculate
:feature_category: :source_code_management :feature_category: :source_code_management
:has_external_dependencies: :has_external_dependencies:
...@@ -1204,6 +1212,14 @@ ...@@ -1204,6 +1212,14 @@
:weight: 1 :weight: 1
:idempotent: true :idempotent: true
:tags: [] :tags: []
- :name: analytics_instance_statistics_counter_job
:feature_category: :instance_statistics
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: authorized_keys - :name: authorized_keys
:feature_category: :source_code_management :feature_category: :source_code_management
:has_external_dependencies: :has_external_dependencies:
......
# frozen_string_literal: true
module Analytics
module InstanceStatistics
class CountJobTriggerWorker
include ApplicationWorker
include CronjobQueue # rubocop:disable Scalability/CronWorkerContext
DEFAULT_DELAY = 3.minutes.freeze
feature_category :instance_statistics
urgency :low
idempotent!
def perform
return if Feature.disabled?(:store_instance_statistics_measurements)
recorded_at = Time.zone.now
measurement_identifiers = Analytics::InstanceStatistics::Measurement.identifiers
worker_arguments = Gitlab::Analytics::InstanceStatistics::WorkersArgumentBuilder.new(
measurement_identifiers: measurement_identifiers.values,
recorded_at: recorded_at
).execute
perform_in = DEFAULT_DELAY.minutes.from_now
worker_arguments.each do |args|
CounterJobWorker.perform_in(perform_in, *args)
perform_in += DEFAULT_DELAY
end
end
end
end
end
# frozen_string_literal: true
module Analytics
module InstanceStatistics
class CounterJobWorker
include ApplicationWorker
feature_category :instance_statistics
urgency :low
idempotent!
def perform(measurement_identifier, min_id, max_id, recorded_at)
query_scope = ::Analytics::InstanceStatistics::Measurement::IDENTIFIER_QUERY_MAPPING[measurement_identifier].call
count = if min_id.nil? || max_id.nil? # table is empty
0
else
Gitlab::Database::BatchCount.batch_count(query_scope, start: min_id, finish: max_id)
end
return if count == Gitlab::Database::BatchCounter::FALLBACK
InstanceStatistics::Measurement.insert_all([{ recorded_at: recorded_at, count: count, identifier: measurement_identifier }])
end
end
end
end
...@@ -65,6 +65,7 @@ ...@@ -65,6 +65,7 @@
- integrations - integrations
- interactive_application_security_testing - interactive_application_security_testing
- internationalization - internationalization
- instance_statistics
- issue_tracking - issue_tracking
- jenkins_importer - jenkins_importer
- jira_importer - jira_importer
......
---
name: store_instance_statistics_measurements
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/41300
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/247871
group: group::analytics
type: development
default_enabled: false
...@@ -514,6 +514,9 @@ Settings.cron_jobs['postgres_dynamic_partitions_creator']['job_class'] ||= 'Part ...@@ -514,6 +514,9 @@ Settings.cron_jobs['postgres_dynamic_partitions_creator']['job_class'] ||= 'Part
Settings.cron_jobs['ci_platform_metrics_update_cron_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['ci_platform_metrics_update_cron_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['ci_platform_metrics_update_cron_worker']['cron'] ||= '47 9 * * *' Settings.cron_jobs['ci_platform_metrics_update_cron_worker']['cron'] ||= '47 9 * * *'
Settings.cron_jobs['ci_platform_metrics_update_cron_worker']['job_class'] = 'CiPlatformMetricsUpdateCronWorker' Settings.cron_jobs['ci_platform_metrics_update_cron_worker']['job_class'] = 'CiPlatformMetricsUpdateCronWorker'
Settings.cron_jobs['analytics_instance_statistics_count_job_trigger_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['analytics_instance_statistics_count_job_trigger_worker']['cron'] ||= '50 23 */1 * *'
Settings.cron_jobs['analytics_instance_statistics_count_job_trigger_worker']['job_class'] ||= 'Analytics::InstanceStatistics::CountJobTriggerWorker'
Gitlab.ee do Gitlab.ee do
Settings.cron_jobs['adjourned_group_deletion_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['adjourned_group_deletion_worker'] ||= Settingslogic.new({})
......
...@@ -30,6 +30,8 @@ ...@@ -30,6 +30,8 @@
- 1 - 1
- - analytics_code_review_metrics - - analytics_code_review_metrics
- 1 - 1
- - analytics_instance_statistics_counter_job
- 1
- - authorized_keys - - authorized_keys
- 2 - 2
- - authorized_project_update - - authorized_project_update
......
# frozen_string_literal: true
module Gitlab
module Analytics
module InstanceStatistics
class WorkersArgumentBuilder
def initialize(measurement_identifiers: [], recorded_at: Time.zone.now)
@measurement_identifiers = measurement_identifiers
@recorded_at = recorded_at
end
def execute
measurement_identifiers.map do |measurement_identifier|
query_scope = ::Analytics::InstanceStatistics::Measurement::IDENTIFIER_QUERY_MAPPING[measurement_identifier]&.call
next if query_scope.nil?
# Determining the query range (id range) as early as possible in order to get more accurate counts.
start = query_scope.minimum(:id)
finish = query_scope.maximum(:id)
[measurement_identifier, start, finish, recorded_at]
end.compact
end
private
attr_reader :measurement_identifiers, :recorded_at
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Analytics::InstanceStatistics::WorkersArgumentBuilder do
context 'when no measurement identifiers are given' do
it 'returns empty array' do
expect(described_class.new(measurement_identifiers: []).execute).to be_empty
end
end
context 'when measurement identifiers are given' do
let_it_be(:user_1) { create(:user) }
let_it_be(:project_1) { create(:project, namespace: user_1.namespace, creator: user_1) }
let_it_be(:project_2) { create(:project, namespace: user_1.namespace, creator: user_1) }
let_it_be(:project_3) { create(:project, namespace: user_1.namespace, creator: user_1) }
let(:recorded_at) { 2.days.ago }
let(:projects_measurement_identifier) { ::Analytics::InstanceStatistics::Measurement.identifiers.fetch(:projects) }
let(:users_measurement_identifier) { ::Analytics::InstanceStatistics::Measurement.identifiers.fetch(:users) }
let(:measurement_identifiers) { [projects_measurement_identifier, users_measurement_identifier] }
subject { described_class.new(measurement_identifiers: measurement_identifiers, recorded_at: recorded_at).execute }
it 'returns worker arguments' do
expect(subject).to eq([
[projects_measurement_identifier, project_1.id, project_3.id, recorded_at],
[users_measurement_identifier, user_1.id, user_1.id, recorded_at]
])
end
context 'when bogus measurement identifiers are given' do
before do
measurement_identifiers << 'bogus1'
measurement_identifiers << 'bogus2'
end
it 'skips bogus measurement identifiers' do
expect(subject).to eq([
[projects_measurement_identifier, project_1.id, project_3.id, recorded_at],
[users_measurement_identifier, user_1.id, user_1.id, recorded_at]
])
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Analytics::InstanceStatistics::CountJobTriggerWorker do
it_behaves_like 'an idempotent worker'
context 'triggers a job for each measurement identifiers' do
let(:expected_count) { Analytics::InstanceStatistics::Measurement.identifiers.size }
it 'triggers CounterJobWorker jobs' do
subject.perform
expect(Analytics::InstanceStatistics::CounterJobWorker.jobs.count).to eq(expected_count)
end
end
context 'when the `store_instance_statistics_measurements` feature flag is off' do
before do
stub_feature_flags(store_instance_statistics_measurements: false)
end
it 'does not trigger any CounterJobWorker job' do
subject.perform
expect(Analytics::InstanceStatistics::CounterJobWorker.jobs.count).to eq(0)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Analytics::InstanceStatistics::CounterJobWorker do
let_it_be(:user_1) { create(:user) }
let_it_be(:user_2) { create(:user) }
let(:users_measurement_identifier) { ::Analytics::InstanceStatistics::Measurement.identifiers.fetch(:users) }
let(:recorded_at) { Time.zone.now }
let(:job_args) { [users_measurement_identifier, user_1.id, user_2.id, recorded_at] }
before do
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false)
end
include_examples 'an idempotent worker' do
it 'counts a scope and stores the result' do
subject
measurement = Analytics::InstanceStatistics::Measurement.first
expect(measurement.recorded_at).to be_like_time(recorded_at)
expect(measurement.identifier).to eq('users')
expect(measurement.count).to eq(2)
end
end
context 'when no records are in the database' do
let(:users_measurement_identifier) { ::Analytics::InstanceStatistics::Measurement.identifiers.fetch(:groups) }
subject { described_class.new.perform(users_measurement_identifier, nil, nil, recorded_at) }
it 'sets 0 as the count' do
subject
measurement = Analytics::InstanceStatistics::Measurement.first
expect(measurement.recorded_at).to be_like_time(recorded_at)
expect(measurement.identifier).to eq('groups')
expect(measurement.count).to eq(0)
end
end
it 'does not raise error when inserting duplicated measurement' do
subject
expect { subject }.not_to raise_error
end
it 'does not insert anything when BatchCount returns error' do
allow(Gitlab::Database::BatchCount).to receive(:batch_count).and_return(Gitlab::Database::BatchCounter::FALLBACK)
expect { subject }.not_to change { Analytics::InstanceStatistics::Measurement.count }
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment