Commit d84fe983 authored by Sean McGivern's avatar Sean McGivern

Initialise sidekiq_jobs_completion_seconds before running jobs

When a worker runs for the first time, it will have
sidekiq_jobs_completion_seconds_count set to 1 (because it was just
completed). But this is wrong in Prometheus terms, because there was no
previous value of 0, and so the rate of change will also be 0.

To work around this, we initialise this bucket for all workers that can
be run by the current Sidekiq process before processing the first job.
This way we can calculate rates correctly for infrequent workers.
parent a9d16d2f
...@@ -103,6 +103,21 @@ module Gitlab ...@@ -103,6 +103,21 @@ module Gitlab
queues_for_sidekiq_queues_yml != config_queues queues_for_sidekiq_queues_yml != config_queues
end end
# Returns a hash of worker class name => mapped queue name
def worker_queue_mappings
workers
.reject { |worker| worker.klass.is_a?(Gitlab::SidekiqConfig::DummyWorker) }
.to_h { |worker| [worker.klass.to_s, ::Gitlab::SidekiqConfig::WorkerRouter.global.route(worker.klass)] }
end
# Like worker_queue_mappings, but only for the queues running in
# the current Sidekiq process
def current_worker_queue_mappings
worker_queue_mappings
.select { |worker, queue| Sidekiq.options[:queues].include?(queue) }
.to_h
end
private private
def find_workers(root, ee:) def find_workers(root, ee:)
......
...@@ -12,7 +12,13 @@ module Gitlab ...@@ -12,7 +12,13 @@ module Gitlab
# Size limiter should be placed at the top # Size limiter should be placed at the top
chain.add ::Gitlab::SidekiqMiddleware::SizeLimiter::Server chain.add ::Gitlab::SidekiqMiddleware::SizeLimiter::Server
chain.add ::Gitlab::SidekiqMiddleware::Monitor chain.add ::Gitlab::SidekiqMiddleware::Monitor
chain.add ::Gitlab::SidekiqMiddleware::ServerMetrics if metrics
if metrics
chain.add ::Gitlab::SidekiqMiddleware::ServerMetrics
::Gitlab::SidekiqMiddleware::ServerMetrics.initialize_process_metrics
end
chain.add ::Gitlab::SidekiqMiddleware::ArgumentsLogger if arguments_logger chain.add ::Gitlab::SidekiqMiddleware::ArgumentsLogger if arguments_logger
chain.add ::Gitlab::SidekiqMiddleware::MemoryKiller if memory_killer chain.add ::Gitlab::SidekiqMiddleware::MemoryKiller if memory_killer
chain.add ::Gitlab::SidekiqMiddleware::RequestStoreMiddleware chain.add ::Gitlab::SidekiqMiddleware::RequestStoreMiddleware
......
...@@ -9,10 +9,48 @@ module Gitlab ...@@ -9,10 +9,48 @@ module Gitlab
# timeframes than the DEFAULT_BUCKET definition. Defined in seconds. # timeframes than the DEFAULT_BUCKET definition. Defined in seconds.
SIDEKIQ_LATENCY_BUCKETS = [0.1, 0.25, 0.5, 1, 2.5, 5, 10, 60, 300, 600].freeze SIDEKIQ_LATENCY_BUCKETS = [0.1, 0.25, 0.5, 1, 2.5, 5, 10, 60, 300, 600].freeze
def initialize class << self
@metrics = init_metrics include ::Gitlab::SidekiqMiddleware::MetricsHelper
def metrics
{
sidekiq_jobs_cpu_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_cpu_seconds, 'Seconds of cpu time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_db_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_db_seconds, 'Seconds of database time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_gitaly_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_gitaly_seconds, 'Seconds of Gitaly time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_queue_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_queue_duration_seconds, 'Duration in seconds that a Sidekiq job was queued before being executed', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_redis_requests_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_redis_requests_duration_seconds, 'Duration in seconds that a Sidekiq job spent requests a Redis server', {}, Gitlab::Instrumentation::Redis::QUERY_TIME_BUCKETS),
sidekiq_elasticsearch_requests_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_elasticsearch_requests_duration_seconds, 'Duration in seconds that a Sidekiq job spent in requests to an Elasticsearch server', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'),
sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'),
sidekiq_redis_requests_total: ::Gitlab::Metrics.counter(:sidekiq_redis_requests_total, 'Redis requests during a Sidekiq job execution'),
sidekiq_elasticsearch_requests_total: ::Gitlab::Metrics.counter(:sidekiq_elasticsearch_requests_total, 'Elasticsearch requests during a Sidekiq job execution'),
sidekiq_running_jobs: ::Gitlab::Metrics.gauge(:sidekiq_running_jobs, 'Number of Sidekiq jobs running', {}, :all),
sidekiq_concurrency: ::Gitlab::Metrics.gauge(:sidekiq_concurrency, 'Maximum number of Sidekiq jobs', {}, :all)
}
end
def initialize_process_metrics
metrics = self.metrics
metrics[:sidekiq_concurrency].set({}, Sidekiq.options[:concurrency].to_i)
::Gitlab::SidekiqConfig.current_worker_queue_mappings.each do |worker, queue|
worker_class = worker.safe_constantize
next unless worker_class
base_labels = create_labels(worker_class, queue, {})
@metrics[:sidekiq_concurrency].set({}, Sidekiq.options[:concurrency].to_i) %w[done fail].each do |status|
metrics[:sidekiq_jobs_completion_seconds].get(base_labels.merge(job_status: status))
end
end
end
end
def initialize
@metrics = self.class.metrics
if ::Gitlab::Database::LoadBalancing.enable? if ::Gitlab::Database::LoadBalancing.enable?
@metrics[:sidekiq_load_balancing_count] = ::Gitlab::Metrics.counter(:sidekiq_load_balancing_count, 'Sidekiq jobs with load balancing') @metrics[:sidekiq_load_balancing_count] = ::Gitlab::Metrics.counter(:sidekiq_load_balancing_count, 'Sidekiq jobs with load balancing')
...@@ -85,24 +123,6 @@ module Gitlab ...@@ -85,24 +123,6 @@ module Gitlab
end end
end end
def init_metrics
{
sidekiq_jobs_cpu_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_cpu_seconds, 'Seconds of cpu time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_db_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_db_seconds, 'Seconds of database time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_gitaly_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_gitaly_seconds, 'Seconds of Gitaly time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_queue_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_queue_duration_seconds, 'Duration in seconds that a Sidekiq job was queued before being executed', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_redis_requests_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_redis_requests_duration_seconds, 'Duration in seconds that a Sidekiq job spent requests a Redis server', {}, Gitlab::Instrumentation::Redis::QUERY_TIME_BUCKETS),
sidekiq_elasticsearch_requests_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_elasticsearch_requests_duration_seconds, 'Duration in seconds that a Sidekiq job spent in requests to an Elasticsearch server', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'),
sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'),
sidekiq_redis_requests_total: ::Gitlab::Metrics.counter(:sidekiq_redis_requests_total, 'Redis requests during a Sidekiq job execution'),
sidekiq_elasticsearch_requests_total: ::Gitlab::Metrics.counter(:sidekiq_elasticsearch_requests_total, 'Elasticsearch requests during a Sidekiq job execution'),
sidekiq_running_jobs: ::Gitlab::Metrics.gauge(:sidekiq_running_jobs, 'Number of Sidekiq jobs running', {}, :all),
sidekiq_concurrency: ::Gitlab::Metrics.gauge(:sidekiq_concurrency, 'Maximum number of Sidekiq jobs', {}, :all)
}
end
private private
def with_load_balancing_settings(job) def with_load_balancing_settings(job)
......
...@@ -7,25 +7,18 @@ namespace :gitlab do ...@@ -7,25 +7,18 @@ namespace :gitlab do
end end
namespace :migrate_jobs do namespace :migrate_jobs do
def mappings
::Gitlab::SidekiqConfig
.workers
.reject { |worker| worker.klass.is_a?(Gitlab::SidekiqConfig::DummyWorker) }
.to_h { |worker| [worker.klass.to_s, ::Gitlab::SidekiqConfig::WorkerRouter.global.route(worker.klass)] }
end
desc 'GitLab | Sidekiq | Migrate jobs in the scheduled set to new queue names' desc 'GitLab | Sidekiq | Migrate jobs in the scheduled set to new queue names'
task schedule: :environment do task schedule: :environment do
::Gitlab::SidekiqMigrateJobs ::Gitlab::SidekiqMigrateJobs
.new('schedule', logger: Logger.new($stdout)) .new('schedule', logger: Logger.new($stdout))
.execute(mappings) .execute(::Gitlab::SidekiqConfig.worker_queue_mappings)
end end
desc 'GitLab | Sidekiq | Migrate jobs in the retry set to new queue names' desc 'GitLab | Sidekiq | Migrate jobs in the retry set to new queue names'
task retry: :environment do task retry: :environment do
::Gitlab::SidekiqMigrateJobs ::Gitlab::SidekiqMigrateJobs
.new('retry', logger: Logger.new($stdout)) .new('retry', logger: Logger.new($stdout))
.execute(mappings) .execute(::Gitlab::SidekiqConfig.worker_queue_mappings)
end end
end end
......
...@@ -122,4 +122,43 @@ RSpec.describe Gitlab::SidekiqConfig do ...@@ -122,4 +122,43 @@ RSpec.describe Gitlab::SidekiqConfig do
expect(described_class.sidekiq_queues_yml_outdated?).to be(false) expect(described_class.sidekiq_queues_yml_outdated?).to be(false)
end end
end end
describe '.worker_queue_mappings' do
it 'returns the worker class => queue mappings based on the current routing configuration' do
test_routes = [
['urgency=high', 'default'],
['*', nil]
]
allow(::Gitlab::SidekiqConfig::WorkerRouter)
.to receive(:global).and_return(::Gitlab::SidekiqConfig::WorkerRouter.new(test_routes))
expect(described_class.worker_queue_mappings).to include('MergeWorker' => 'default',
'BuildFinishedWorker' => 'default',
'BackgroundMigrationWorker' => 'background_migration',
'AdminEmailWorker' => 'cronjob:admin_email')
end
end
describe '.current_worker_queue_mappings' do
it 'returns worker queue mappings that have queues in the current Sidekiq options' do
test_routes = [
['urgency=high', 'default'],
['*', nil]
]
allow(::Gitlab::SidekiqConfig::WorkerRouter)
.to receive(:global).and_return(::Gitlab::SidekiqConfig::WorkerRouter.new(test_routes))
allow(Sidekiq).to receive(:options).and_return(queues: %w[default background_migration])
mappings = described_class.current_worker_queue_mappings
expect(mappings).to include('MergeWorker' => 'default',
'BuildFinishedWorker' => 'default',
'BackgroundMigrationWorker' => 'background_migration')
expect(mappings).not_to include('AdminEmailWorker' => 'cronjob:admin_email')
end
end
end end
...@@ -8,11 +8,55 @@ RSpec.describe Gitlab::SidekiqMiddleware::ServerMetrics do ...@@ -8,11 +8,55 @@ RSpec.describe Gitlab::SidekiqMiddleware::ServerMetrics do
context "with mocked prometheus" do context "with mocked prometheus" do
include_context 'server metrics with mocked prometheus' include_context 'server metrics with mocked prometheus'
describe '#initialize' do describe '.initialize_process_metrics' do
it 'sets concurrency metrics' do it 'sets concurrency metrics' do
expect(concurrency_metric).to receive(:set).with({}, Sidekiq.options[:concurrency].to_i) expect(concurrency_metric).to receive(:set).with({}, Sidekiq.options[:concurrency].to_i)
subject described_class.initialize_process_metrics
end
it 'initializes sidekiq_jobs_completion_seconds for the workers in the current Sidekiq process' do
allow(Gitlab::SidekiqConfig)
.to receive(:current_worker_queue_mappings)
.and_return('MergeWorker' => 'merge', 'BuildFinishedWorker' => 'default')
expect(completion_seconds_metric)
.to receive(:get).with(queue: 'merge',
worker: 'MergeWorker',
urgency: 'high',
external_dependencies: 'no',
feature_category: 'source_code_management',
boundary: '',
job_status: 'done')
expect(completion_seconds_metric)
.to receive(:get).with(queue: 'merge',
worker: 'MergeWorker',
urgency: 'high',
external_dependencies: 'no',
feature_category: 'source_code_management',
boundary: '',
job_status: 'fail')
expect(completion_seconds_metric)
.to receive(:get).with(queue: 'default',
worker: 'BuildFinishedWorker',
urgency: 'high',
external_dependencies: 'no',
feature_category: 'continuous_integration',
boundary: 'cpu',
job_status: 'done')
expect(completion_seconds_metric)
.to receive(:get).with(queue: 'default',
worker: 'BuildFinishedWorker',
urgency: 'high',
external_dependencies: 'no',
feature_category: 'continuous_integration',
boundary: 'cpu',
job_status: 'fail')
described_class.initialize_process_metrics
end end
end end
...@@ -47,6 +91,26 @@ RSpec.describe Gitlab::SidekiqMiddleware::ServerMetrics do ...@@ -47,6 +91,26 @@ RSpec.describe Gitlab::SidekiqMiddleware::ServerMetrics do
subject.call(worker, job, :test) { nil } subject.call(worker, job, :test) { nil }
end end
it 'sets sidekiq_jobs_completion_seconds values that are compatible with those from .initialize_process_metrics' do
label_validator = Prometheus::Client::LabelSetValidator.new([:le])
allow(Gitlab::SidekiqConfig)
.to receive(:current_worker_queue_mappings)
.and_return('MergeWorker' => 'merge', 'BuildFinishedWorker' => 'default')
allow(completion_seconds_metric).to receive(:get) do |labels|
expect { label_validator.validate(labels) }.not_to raise_error
end
allow(completion_seconds_metric).to receive(:observe) do |labels, _duration|
expect { label_validator.validate(labels) }.not_to raise_error
end
described_class.initialize_process_metrics
subject.call(worker, job, :test) { nil }
end
it 'sets the thread name if it was nil' do it 'sets the thread name if it was nil' do
allow(Thread.current).to receive(:name).and_return(nil) allow(Thread.current).to receive(:name).and_return(nil)
expect(Thread.current).to receive(:name=).with(Gitlab::Metrics::Samplers::ThreadsSampler::SIDEKIQ_WORKER_THREAD_NAME) expect(Thread.current).to receive(:name=).with(Gitlab::Metrics::Samplers::ThreadsSampler::SIDEKIQ_WORKER_THREAD_NAME)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment