Commit b4120f95 authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch 'pb-move-bbm-prometheus-metrics' into 'master'

Move BBM prometheus metrics into separate class

See merge request gitlab-org/gitlab!83644
parents ae5b9e31 a6f0994c
......@@ -4,10 +4,9 @@ module Gitlab
module Database
module BackgroundMigration
class BatchedMigrationWrapper
extend Gitlab::Utils::StrongMemoize
def initialize(connection: ApplicationRecord.connection)
def initialize(connection: ApplicationRecord.connection, metrics: PrometheusMetrics.new)
@connection = connection
@metrics = metrics
end
# Wraps the execution of a batched_background_migration.
......@@ -28,12 +27,12 @@ module Gitlab
raise
ensure
track_prometheus_metrics(batch_tracking_record)
metrics.track(batch_tracking_record)
end
private
attr_reader :connection
attr_reader :connection, :metrics
def start_tracking_execution(tracking_record)
tracking_record.run!
......@@ -63,80 +62,6 @@ module Gitlab
job_class.new
end
end
def track_prometheus_metrics(tracking_record)
migration = tracking_record.batched_migration
base_labels = migration.prometheus_labels
metric_for(:gauge_batch_size).set(base_labels, tracking_record.batch_size)
metric_for(:gauge_sub_batch_size).set(base_labels, tracking_record.sub_batch_size)
metric_for(:gauge_interval).set(base_labels, tracking_record.batched_migration.interval)
metric_for(:gauge_job_duration).set(base_labels, (tracking_record.finished_at - tracking_record.started_at).to_i)
metric_for(:counter_updated_tuples).increment(base_labels, tracking_record.batch_size)
metric_for(:gauge_migrated_tuples).set(base_labels, tracking_record.batched_migration.migrated_tuple_count)
metric_for(:gauge_total_tuple_count).set(base_labels, tracking_record.batched_migration.total_tuple_count)
metric_for(:gauge_last_update_time).set(base_labels, Time.current.to_i)
if metrics = tracking_record.metrics
metrics['timings']&.each do |key, timings|
summary = metric_for(:histogram_timings)
labels = base_labels.merge(operation: key)
timings.each do |timing|
summary.observe(labels, timing)
end
end
end
end
def metric_for(name)
self.class.metrics[name]
end
def self.metrics
strong_memoize(:metrics) do
{
gauge_batch_size: Gitlab::Metrics.gauge(
:batched_migration_job_batch_size,
'Batch size for a batched migration job'
),
gauge_sub_batch_size: Gitlab::Metrics.gauge(
:batched_migration_job_sub_batch_size,
'Sub-batch size for a batched migration job'
),
gauge_interval: Gitlab::Metrics.gauge(
:batched_migration_job_interval_seconds,
'Interval for a batched migration job'
),
gauge_job_duration: Gitlab::Metrics.gauge(
:batched_migration_job_duration_seconds,
'Duration for a batched migration job'
),
counter_updated_tuples: Gitlab::Metrics.counter(
:batched_migration_job_updated_tuples_total,
'Number of tuples updated by batched migration job'
),
gauge_migrated_tuples: Gitlab::Metrics.gauge(
:batched_migration_migrated_tuples_total,
'Total number of tuples migrated by a batched migration'
),
histogram_timings: Gitlab::Metrics.histogram(
:batched_migration_job_query_duration_seconds,
'Query timings for a batched migration job',
{},
[0.1, 0.25, 0.5, 1, 5].freeze
),
gauge_total_tuple_count: Gitlab::Metrics.gauge(
:batched_migration_total_tuple_count,
'Total tuple count the migration needs to touch'
),
gauge_last_update_time: Gitlab::Metrics.gauge(
:batched_migration_last_update_time_seconds,
'Unix epoch time in seconds'
)
}
end
end
end
end
end
......
# frozen_string_literal: true
module Gitlab
module Database
module BackgroundMigration
class PrometheusMetrics
extend Gitlab::Utils::StrongMemoize
QUERY_TIMING_BUCKETS = [0.1, 0.25, 0.5, 1, 5].freeze
def track(job_record)
migration_record = job_record.batched_migration
base_labels = migration_record.prometheus_labels
metric_for(:gauge_batch_size).set(base_labels, job_record.batch_size)
metric_for(:gauge_sub_batch_size).set(base_labels, job_record.sub_batch_size)
metric_for(:gauge_interval).set(base_labels, job_record.batched_migration.interval)
metric_for(:gauge_job_duration).set(base_labels, (job_record.finished_at - job_record.started_at).to_i)
metric_for(:counter_updated_tuples).increment(base_labels, job_record.batch_size)
metric_for(:gauge_migrated_tuples).set(base_labels, migration_record.migrated_tuple_count)
metric_for(:gauge_total_tuple_count).set(base_labels, migration_record.total_tuple_count)
metric_for(:gauge_last_update_time).set(base_labels, Time.current.to_i)
track_timing_metrics(base_labels, job_record.metrics)
end
def self.metrics
strong_memoize(:metrics) do
{
gauge_batch_size: Gitlab::Metrics.gauge(
:batched_migration_job_batch_size,
'Batch size for a batched migration job'
),
gauge_sub_batch_size: Gitlab::Metrics.gauge(
:batched_migration_job_sub_batch_size,
'Sub-batch size for a batched migration job'
),
gauge_interval: Gitlab::Metrics.gauge(
:batched_migration_job_interval_seconds,
'Interval for a batched migration job'
),
gauge_job_duration: Gitlab::Metrics.gauge(
:batched_migration_job_duration_seconds,
'Duration for a batched migration job'
),
counter_updated_tuples: Gitlab::Metrics.counter(
:batched_migration_job_updated_tuples_total,
'Number of tuples updated by batched migration job'
),
gauge_migrated_tuples: Gitlab::Metrics.gauge(
:batched_migration_migrated_tuples_total,
'Total number of tuples migrated by a batched migration'
),
histogram_timings: Gitlab::Metrics.histogram(
:batched_migration_job_query_duration_seconds,
'Query timings for a batched migration job',
{},
QUERY_TIMING_BUCKETS
),
gauge_total_tuple_count: Gitlab::Metrics.gauge(
:batched_migration_total_tuple_count,
'Total tuple count the migration needs to touch'
),
gauge_last_update_time: Gitlab::Metrics.gauge(
:batched_migration_last_update_time_seconds,
'Unix epoch time in seconds'
)
}
end
end
private
def track_timing_metrics(base_labels, metrics)
return unless metrics && metrics['timings']
metrics['timings'].each do |key, timings|
summary = metric_for(:histogram_timings)
labels = base_labels.merge(operation: key)
timings.each do |timing|
summary.observe(labels, timing)
end
end
end
def metric_for(name)
self.class.metrics[name]
end
end
end
end
end
......@@ -3,8 +3,9 @@
require 'spec_helper'
RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '#perform' do
subject { described_class.new.perform(job_record) }
subject { described_class.new(metrics: metrics_tracker).perform(job_record) }
let(:metrics_tracker) { instance_double('::Gitlab::Database::BackgroundMigration::PrometheusMetrics', track: nil) }
let(:job_class) { Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJob }
let_it_be(:pause_ms) { 250 }
......@@ -78,86 +79,6 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '
end
end
context 'reporting prometheus metrics' do
let(:labels) { job_record.batched_migration.prometheus_labels }
before do
allow(job_instance).to receive(:perform)
end
it 'reports batch_size' do
expect(described_class.metrics[:gauge_batch_size]).to receive(:set).with(labels, job_record.batch_size)
subject
end
it 'reports sub_batch_size' do
expect(described_class.metrics[:gauge_sub_batch_size]).to receive(:set).with(labels, job_record.sub_batch_size)
subject
end
it 'reports interval' do
expect(described_class.metrics[:gauge_interval]).to receive(:set).with(labels, job_record.batched_migration.interval)
subject
end
it 'reports updated tuples (currently based on batch_size)' do
expect(described_class.metrics[:counter_updated_tuples]).to receive(:increment).with(labels, job_record.batch_size)
subject
end
it 'reports migrated tuples' do
count = double
expect(job_record.batched_migration).to receive(:migrated_tuple_count).and_return(count)
expect(described_class.metrics[:gauge_migrated_tuples]).to receive(:set).with(labels, count)
subject
end
it 'reports summary of query timings' do
metrics = { 'timings' => { 'update_all' => [1, 2, 3, 4, 5] } }
expect(job_instance).to receive(:batch_metrics).and_return(metrics)
metrics['timings'].each do |key, timings|
summary_labels = labels.merge(operation: key)
timings.each do |timing|
expect(described_class.metrics[:histogram_timings]).to receive(:observe).with(summary_labels, timing)
end
end
subject
end
it 'reports job duration' do
freeze_time do
expect(Time).to receive(:current).and_return(Time.zone.now - 5.seconds).ordered
allow(Time).to receive(:current).and_call_original
expect(described_class.metrics[:gauge_job_duration]).to receive(:set).with(labels, 5.seconds)
subject
end
end
it 'reports the total tuple count for the migration' do
expect(described_class.metrics[:gauge_total_tuple_count]).to receive(:set).with(labels, job_record.batched_migration.total_tuple_count)
subject
end
it 'reports last updated at timestamp' do
freeze_time do
expect(described_class.metrics[:gauge_last_update_time]).to receive(:set).with(labels, Time.current.to_i)
subject
end
end
end
context 'when the migration job does not raise an error' do
it 'marks the tracking record as succeeded' do
expect(job_instance).to receive(:perform).with(1, 10, 'events', 'id', 1, pause_ms, 'id', 'other_id')
......@@ -171,6 +92,13 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '
expect(reloaded_job_record.finished_at).to eq(Time.current)
end
end
it 'tracks metrics of the execution' do
expect(job_instance).to receive(:perform)
expect(metrics_tracker).to receive(:track).with(job_record)
subject
end
end
context 'when the migration job raises an error' do
......@@ -189,6 +117,13 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '
expect(reloaded_job_record.finished_at).to eq(Time.current)
end
end
it 'tracks metrics of the execution' do
expect(job_instance).to receive(:perform).and_raise(error_class)
expect(metrics_tracker).to receive(:track).with(job_record)
expect { subject }.to raise_error(error_class)
end
end
it_behaves_like 'an error is raised', RuntimeError.new('Something broke!')
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::BackgroundMigration::PrometheusMetrics, :prometheus do
describe '#track' do
let(:job_record) do
build(:batched_background_migration_job, :succeeded,
started_at: Time.current - 2.minutes,
finished_at: Time.current - 1.minute,
updated_at: Time.current,
metrics: { 'timings' => { 'update_all' => [0.05, 0.2, 0.4, 0.9, 4] } })
end
let(:labels) { job_record.batched_migration.prometheus_labels }
subject(:track_job_record_metrics) { described_class.new.track(job_record) }
it 'reports batch_size' do
track_job_record_metrics
expect(metric_for_job_by_name(:gauge_batch_size)).to eq(job_record.batch_size)
end
it 'reports sub_batch_size' do
track_job_record_metrics
expect(metric_for_job_by_name(:gauge_sub_batch_size)).to eq(job_record.sub_batch_size)
end
it 'reports interval' do
track_job_record_metrics
expect(metric_for_job_by_name(:gauge_interval)).to eq(job_record.batched_migration.interval)
end
it 'reports job duration' do
freeze_time do
track_job_record_metrics
expect(metric_for_job_by_name(:gauge_job_duration)).to eq(1.minute)
end
end
it 'increments updated tuples (currently based on batch_size)' do
expect(described_class.metrics[:counter_updated_tuples]).to receive(:increment)
.with(labels, job_record.batch_size)
.twice
.and_call_original
track_job_record_metrics
expect(metric_for_job_by_name(:counter_updated_tuples)).to eq(job_record.batch_size)
described_class.new.track(job_record)
expect(metric_for_job_by_name(:counter_updated_tuples)).to eq(job_record.batch_size * 2)
end
it 'reports migrated tuples' do
expect(job_record.batched_migration).to receive(:migrated_tuple_count).and_return(20)
track_job_record_metrics
expect(metric_for_job_by_name(:gauge_migrated_tuples)).to eq(20)
end
it 'reports the total tuple count for the migration' do
track_job_record_metrics
expect(metric_for_job_by_name(:gauge_total_tuple_count)).to eq(job_record.batched_migration.total_tuple_count)
end
it 'reports last updated at timestamp' do
freeze_time do
track_job_record_metrics
expect(metric_for_job_by_name(:gauge_last_update_time)).to eq(Time.current.to_i)
end
end
it 'reports summary of query timings' do
summary_labels = labels.merge(operation: 'update_all')
job_record.metrics['timings']['update_all'].each do |timing|
expect(described_class.metrics[:histogram_timings]).to receive(:observe)
.with(summary_labels, timing)
.and_call_original
end
track_job_record_metrics
expect(metric_for_job_by_name(:histogram_timings, job_labels: summary_labels))
.to eq({ 0.1 => 1.0, 0.25 => 2.0, 0.5 => 3.0, 1 => 4.0, 5 => 5.0 })
end
context 'when the tracking record does not having timing metrics' do
before do
job_record.metrics = {}
end
it 'does not attempt to report query timings' do
summary_labels = labels.merge(operation: 'update_all')
expect(described_class.metrics[:histogram_timings]).not_to receive(:observe)
track_job_record_metrics
expect(metric_for_job_by_name(:histogram_timings, job_labels: summary_labels))
.to eq({ 0.1 => 0.0, 0.25 => 0.0, 0.5 => 0.0, 1 => 0.0, 5 => 0.0 })
end
end
def metric_for_job_by_name(name, job_labels: labels)
described_class.metrics[name].values[job_labels].get
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment