Commit 299b32fd authored by Dylan Griffith's avatar Dylan Griffith

Merge branch 'initialise-sidekiq-jobs-completion-metrics' into 'master'

Initialise sidekiq_jobs_completion_seconds before running jobs

See merge request gitlab-org/gitlab!64637
parents e1d4a322 bc1c330c
---
name: sidekiq_job_completion_metric_initialize
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/64637
rollout_issue_url: https://gitlab.com/gitlab-com/gl-infra/scalability/-/issues/1133
milestone: '14.1'
type: development
group: team::Scalability
default_enabled: true
......@@ -103,6 +103,21 @@ module Gitlab
queues_for_sidekiq_queues_yml != config_queues
end
# Returns a hash of worker class name => mapped queue name
def worker_queue_mappings
workers
.reject { |worker| worker.klass.is_a?(Gitlab::SidekiqConfig::DummyWorker) }
.to_h { |worker| [worker.klass.to_s, ::Gitlab::SidekiqConfig::WorkerRouter.global.route(worker.klass)] }
end
# Like worker_queue_mappings, but only for the queues running in
# the current Sidekiq process
def current_worker_queue_mappings
worker_queue_mappings
.select { |worker, queue| Sidekiq.options[:queues].include?(queue) }
.to_h
end
private
def find_workers(root, ee:)
......
......@@ -12,7 +12,13 @@ module Gitlab
# Size limiter should be placed at the top
chain.add ::Gitlab::SidekiqMiddleware::SizeLimiter::Server
chain.add ::Gitlab::SidekiqMiddleware::Monitor
chain.add ::Gitlab::SidekiqMiddleware::ServerMetrics if metrics
if metrics
chain.add ::Gitlab::SidekiqMiddleware::ServerMetrics
::Gitlab::SidekiqMiddleware::ServerMetrics.initialize_process_metrics
end
chain.add ::Gitlab::SidekiqMiddleware::ArgumentsLogger if arguments_logger
chain.add ::Gitlab::SidekiqMiddleware::MemoryKiller if memory_killer
chain.add ::Gitlab::SidekiqMiddleware::RequestStoreMiddleware
......
......@@ -9,10 +9,50 @@ module Gitlab
# timeframes than the DEFAULT_BUCKET definition. Defined in seconds.
SIDEKIQ_LATENCY_BUCKETS = [0.1, 0.25, 0.5, 1, 2.5, 5, 10, 60, 300, 600].freeze
def initialize
@metrics = init_metrics
class << self
include ::Gitlab::SidekiqMiddleware::MetricsHelper
def metrics
{
sidekiq_jobs_cpu_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_cpu_seconds, 'Seconds of cpu time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_db_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_db_seconds, 'Seconds of database time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_gitaly_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_gitaly_seconds, 'Seconds of Gitaly time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_queue_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_queue_duration_seconds, 'Duration in seconds that a Sidekiq job was queued before being executed', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_redis_requests_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_redis_requests_duration_seconds, 'Duration in seconds that a Sidekiq job spent requests a Redis server', {}, Gitlab::Instrumentation::Redis::QUERY_TIME_BUCKETS),
sidekiq_elasticsearch_requests_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_elasticsearch_requests_duration_seconds, 'Duration in seconds that a Sidekiq job spent in requests to an Elasticsearch server', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'),
sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'),
sidekiq_redis_requests_total: ::Gitlab::Metrics.counter(:sidekiq_redis_requests_total, 'Redis requests during a Sidekiq job execution'),
sidekiq_elasticsearch_requests_total: ::Gitlab::Metrics.counter(:sidekiq_elasticsearch_requests_total, 'Elasticsearch requests during a Sidekiq job execution'),
sidekiq_running_jobs: ::Gitlab::Metrics.gauge(:sidekiq_running_jobs, 'Number of Sidekiq jobs running', {}, :all),
sidekiq_concurrency: ::Gitlab::Metrics.gauge(:sidekiq_concurrency, 'Maximum number of Sidekiq jobs', {}, :all)
}
end
def initialize_process_metrics
metrics = self.metrics
metrics[:sidekiq_concurrency].set({}, Sidekiq.options[:concurrency].to_i)
@metrics[:sidekiq_concurrency].set({}, Sidekiq.options[:concurrency].to_i)
return unless ::Feature.enabled?(:sidekiq_job_completion_metric_initialize, default_enabled: :yaml)
::Gitlab::SidekiqConfig.current_worker_queue_mappings.each do |worker, queue|
worker_class = worker.safe_constantize
next unless worker_class
base_labels = create_labels(worker_class, queue, {})
%w[done fail].each do |status|
metrics[:sidekiq_jobs_completion_seconds].get(base_labels.merge(job_status: status))
end
end
end
end
def initialize
@metrics = self.class.metrics
if ::Gitlab::Database::LoadBalancing.enable?
@metrics[:sidekiq_load_balancing_count] = ::Gitlab::Metrics.counter(:sidekiq_load_balancing_count, 'Sidekiq jobs with load balancing')
......@@ -85,24 +125,6 @@ module Gitlab
end
end
def init_metrics
{
sidekiq_jobs_cpu_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_cpu_seconds, 'Seconds of cpu time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_db_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_db_seconds, 'Seconds of database time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_gitaly_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_gitaly_seconds, 'Seconds of Gitaly time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_queue_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_queue_duration_seconds, 'Duration in seconds that a Sidekiq job was queued before being executed', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_redis_requests_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_redis_requests_duration_seconds, 'Duration in seconds that a Sidekiq job spent requests a Redis server', {}, Gitlab::Instrumentation::Redis::QUERY_TIME_BUCKETS),
sidekiq_elasticsearch_requests_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_elasticsearch_requests_duration_seconds, 'Duration in seconds that a Sidekiq job spent in requests to an Elasticsearch server', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'),
sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'),
sidekiq_redis_requests_total: ::Gitlab::Metrics.counter(:sidekiq_redis_requests_total, 'Redis requests during a Sidekiq job execution'),
sidekiq_elasticsearch_requests_total: ::Gitlab::Metrics.counter(:sidekiq_elasticsearch_requests_total, 'Elasticsearch requests during a Sidekiq job execution'),
sidekiq_running_jobs: ::Gitlab::Metrics.gauge(:sidekiq_running_jobs, 'Number of Sidekiq jobs running', {}, :all),
sidekiq_concurrency: ::Gitlab::Metrics.gauge(:sidekiq_concurrency, 'Maximum number of Sidekiq jobs', {}, :all)
}
end
private
def with_load_balancing_settings(job)
......
......@@ -7,25 +7,18 @@ namespace :gitlab do
end
namespace :migrate_jobs do
def mappings
::Gitlab::SidekiqConfig
.workers
.reject { |worker| worker.klass.is_a?(Gitlab::SidekiqConfig::DummyWorker) }
.to_h { |worker| [worker.klass.to_s, ::Gitlab::SidekiqConfig::WorkerRouter.global.route(worker.klass)] }
end
desc 'GitLab | Sidekiq | Migrate jobs in the scheduled set to new queue names'
task schedule: :environment do
::Gitlab::SidekiqMigrateJobs
.new('schedule', logger: Logger.new($stdout))
.execute(mappings)
.execute(::Gitlab::SidekiqConfig.worker_queue_mappings)
end
desc 'GitLab | Sidekiq | Migrate jobs in the retry set to new queue names'
task retry: :environment do
::Gitlab::SidekiqMigrateJobs
.new('retry', logger: Logger.new($stdout))
.execute(mappings)
.execute(::Gitlab::SidekiqConfig.worker_queue_mappings)
end
end
......
......@@ -122,4 +122,43 @@ RSpec.describe Gitlab::SidekiqConfig do
expect(described_class.sidekiq_queues_yml_outdated?).to be(false)
end
end
describe '.worker_queue_mappings' do
it 'returns the worker class => queue mappings based on the current routing configuration' do
test_routes = [
['urgency=high', 'default'],
['*', nil]
]
allow(::Gitlab::SidekiqConfig::WorkerRouter)
.to receive(:global).and_return(::Gitlab::SidekiqConfig::WorkerRouter.new(test_routes))
expect(described_class.worker_queue_mappings).to include('MergeWorker' => 'default',
'BuildFinishedWorker' => 'default',
'BackgroundMigrationWorker' => 'background_migration',
'AdminEmailWorker' => 'cronjob:admin_email')
end
end
describe '.current_worker_queue_mappings' do
it 'returns worker queue mappings that have queues in the current Sidekiq options' do
test_routes = [
['urgency=high', 'default'],
['*', nil]
]
allow(::Gitlab::SidekiqConfig::WorkerRouter)
.to receive(:global).and_return(::Gitlab::SidekiqConfig::WorkerRouter.new(test_routes))
allow(Sidekiq).to receive(:options).and_return(queues: %w[default background_migration])
mappings = described_class.current_worker_queue_mappings
expect(mappings).to include('MergeWorker' => 'default',
'BuildFinishedWorker' => 'default',
'BackgroundMigrationWorker' => 'background_migration')
expect(mappings).not_to include('AdminEmailWorker' => 'cronjob:admin_email')
end
end
end
......@@ -8,11 +8,77 @@ RSpec.describe Gitlab::SidekiqMiddleware::ServerMetrics do
context "with mocked prometheus" do
include_context 'server metrics with mocked prometheus'
describe '#initialize' do
describe '.initialize_process_metrics' do
it 'sets concurrency metrics' do
expect(concurrency_metric).to receive(:set).with({}, Sidekiq.options[:concurrency].to_i)
subject
described_class.initialize_process_metrics
end
it 'initializes sidekiq_jobs_completion_seconds for the workers in the current Sidekiq process' do
allow(Gitlab::SidekiqConfig)
.to receive(:current_worker_queue_mappings)
.and_return('MergeWorker' => 'merge', 'BuildFinishedWorker' => 'default')
expect(completion_seconds_metric)
.to receive(:get).with(queue: 'merge',
worker: 'MergeWorker',
urgency: 'high',
external_dependencies: 'no',
feature_category: 'source_code_management',
boundary: '',
job_status: 'done')
expect(completion_seconds_metric)
.to receive(:get).with(queue: 'merge',
worker: 'MergeWorker',
urgency: 'high',
external_dependencies: 'no',
feature_category: 'source_code_management',
boundary: '',
job_status: 'fail')
expect(completion_seconds_metric)
.to receive(:get).with(queue: 'default',
worker: 'BuildFinishedWorker',
urgency: 'high',
external_dependencies: 'no',
feature_category: 'continuous_integration',
boundary: 'cpu',
job_status: 'done')
expect(completion_seconds_metric)
.to receive(:get).with(queue: 'default',
worker: 'BuildFinishedWorker',
urgency: 'high',
external_dependencies: 'no',
feature_category: 'continuous_integration',
boundary: 'cpu',
job_status: 'fail')
described_class.initialize_process_metrics
end
context 'when the sidekiq_job_completion_metric_initialize feature flag is disabled' do
before do
stub_feature_flags(sidekiq_job_completion_metric_initialize: false)
end
it 'sets the concurrency metric' do
expect(concurrency_metric).to receive(:set).with({}, Sidekiq.options[:concurrency].to_i)
described_class.initialize_process_metrics
end
it 'does not initialize sidekiq_jobs_completion_seconds' do
allow(Gitlab::SidekiqConfig)
.to receive(:current_worker_queue_mappings)
.and_return('MergeWorker' => 'merge', 'BuildFinishedWorker' => 'default')
expect(completion_seconds_metric).not_to receive(:get)
described_class.initialize_process_metrics
end
end
end
......@@ -47,6 +113,26 @@ RSpec.describe Gitlab::SidekiqMiddleware::ServerMetrics do
subject.call(worker, job, :test) { nil }
end
it 'sets sidekiq_jobs_completion_seconds values that are compatible with those from .initialize_process_metrics' do
label_validator = Prometheus::Client::LabelSetValidator.new([:le])
allow(Gitlab::SidekiqConfig)
.to receive(:current_worker_queue_mappings)
.and_return('MergeWorker' => 'merge', 'BuildFinishedWorker' => 'default')
allow(completion_seconds_metric).to receive(:get) do |labels|
expect { label_validator.validate(labels) }.not_to raise_error
end
allow(completion_seconds_metric).to receive(:observe) do |labels, _duration|
expect { label_validator.validate(labels) }.not_to raise_error
end
described_class.initialize_process_metrics
subject.call(worker, job, :test) { nil }
end
it 'sets the thread name if it was nil' do
allow(Thread.current).to receive(:name).and_return(nil)
expect(Thread.current).to receive(:name=).with(Gitlab::Metrics::Samplers::ThreadsSampler::SIDEKIQ_WORKER_THREAD_NAME)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment