Commit d84fe983 authored by Sean McGivern's avatar Sean McGivern

Initialise sidekiq_jobs_completion_seconds before running jobs

When a worker runs for the first time, it will have
sidekiq_jobs_completion_seconds_count set to 1 (because it was just
completed). But this is wrong in Prometheus terms, because there was no
previous value of 0, and so the rate of change will also be 0.

To work around this, we initialise this bucket for all workers that can
be run by the current Sidekiq process before processing the first job.
This way we can calculate rates correctly for infrequent workers.
parent a9d16d2f
......@@ -103,6 +103,21 @@ module Gitlab
queues_for_sidekiq_queues_yml != config_queues
end
# Returns a hash of worker class name => mapped queue name
def worker_queue_mappings
workers
.reject { |worker| worker.klass.is_a?(Gitlab::SidekiqConfig::DummyWorker) }
.to_h { |worker| [worker.klass.to_s, ::Gitlab::SidekiqConfig::WorkerRouter.global.route(worker.klass)] }
end
# Like worker_queue_mappings, but only for the queues running in
# the current Sidekiq process
def current_worker_queue_mappings
worker_queue_mappings
.select { |worker, queue| Sidekiq.options[:queues].include?(queue) }
.to_h
end
private
def find_workers(root, ee:)
......
......@@ -12,7 +12,13 @@ module Gitlab
# Size limiter should be placed at the top
chain.add ::Gitlab::SidekiqMiddleware::SizeLimiter::Server
chain.add ::Gitlab::SidekiqMiddleware::Monitor
chain.add ::Gitlab::SidekiqMiddleware::ServerMetrics if metrics
if metrics
chain.add ::Gitlab::SidekiqMiddleware::ServerMetrics
::Gitlab::SidekiqMiddleware::ServerMetrics.initialize_process_metrics
end
chain.add ::Gitlab::SidekiqMiddleware::ArgumentsLogger if arguments_logger
chain.add ::Gitlab::SidekiqMiddleware::MemoryKiller if memory_killer
chain.add ::Gitlab::SidekiqMiddleware::RequestStoreMiddleware
......
......@@ -9,10 +9,48 @@ module Gitlab
# timeframes than the DEFAULT_BUCKET definition. Defined in seconds.
SIDEKIQ_LATENCY_BUCKETS = [0.1, 0.25, 0.5, 1, 2.5, 5, 10, 60, 300, 600].freeze
def initialize
@metrics = init_metrics
class << self
include ::Gitlab::SidekiqMiddleware::MetricsHelper
def metrics
{
sidekiq_jobs_cpu_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_cpu_seconds, 'Seconds of cpu time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_db_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_db_seconds, 'Seconds of database time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_gitaly_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_gitaly_seconds, 'Seconds of Gitaly time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_queue_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_queue_duration_seconds, 'Duration in seconds that a Sidekiq job was queued before being executed', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_redis_requests_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_redis_requests_duration_seconds, 'Duration in seconds that a Sidekiq job spent requests a Redis server', {}, Gitlab::Instrumentation::Redis::QUERY_TIME_BUCKETS),
sidekiq_elasticsearch_requests_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_elasticsearch_requests_duration_seconds, 'Duration in seconds that a Sidekiq job spent in requests to an Elasticsearch server', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'),
sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'),
sidekiq_redis_requests_total: ::Gitlab::Metrics.counter(:sidekiq_redis_requests_total, 'Redis requests during a Sidekiq job execution'),
sidekiq_elasticsearch_requests_total: ::Gitlab::Metrics.counter(:sidekiq_elasticsearch_requests_total, 'Elasticsearch requests during a Sidekiq job execution'),
sidekiq_running_jobs: ::Gitlab::Metrics.gauge(:sidekiq_running_jobs, 'Number of Sidekiq jobs running', {}, :all),
sidekiq_concurrency: ::Gitlab::Metrics.gauge(:sidekiq_concurrency, 'Maximum number of Sidekiq jobs', {}, :all)
}
end
def initialize_process_metrics
metrics = self.metrics
metrics[:sidekiq_concurrency].set({}, Sidekiq.options[:concurrency].to_i)
::Gitlab::SidekiqConfig.current_worker_queue_mappings.each do |worker, queue|
worker_class = worker.safe_constantize
next unless worker_class
base_labels = create_labels(worker_class, queue, {})
@metrics[:sidekiq_concurrency].set({}, Sidekiq.options[:concurrency].to_i)
%w[done fail].each do |status|
metrics[:sidekiq_jobs_completion_seconds].get(base_labels.merge(job_status: status))
end
end
end
end
def initialize
@metrics = self.class.metrics
if ::Gitlab::Database::LoadBalancing.enable?
@metrics[:sidekiq_load_balancing_count] = ::Gitlab::Metrics.counter(:sidekiq_load_balancing_count, 'Sidekiq jobs with load balancing')
......@@ -85,24 +123,6 @@ module Gitlab
end
end
def init_metrics
{
sidekiq_jobs_cpu_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_cpu_seconds, 'Seconds of cpu time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_db_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_db_seconds, 'Seconds of database time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_gitaly_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_gitaly_seconds, 'Seconds of Gitaly time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_queue_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_queue_duration_seconds, 'Duration in seconds that a Sidekiq job was queued before being executed', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_redis_requests_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_redis_requests_duration_seconds, 'Duration in seconds that a Sidekiq job spent requests a Redis server', {}, Gitlab::Instrumentation::Redis::QUERY_TIME_BUCKETS),
sidekiq_elasticsearch_requests_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_elasticsearch_requests_duration_seconds, 'Duration in seconds that a Sidekiq job spent in requests to an Elasticsearch server', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'),
sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'),
sidekiq_redis_requests_total: ::Gitlab::Metrics.counter(:sidekiq_redis_requests_total, 'Redis requests during a Sidekiq job execution'),
sidekiq_elasticsearch_requests_total: ::Gitlab::Metrics.counter(:sidekiq_elasticsearch_requests_total, 'Elasticsearch requests during a Sidekiq job execution'),
sidekiq_running_jobs: ::Gitlab::Metrics.gauge(:sidekiq_running_jobs, 'Number of Sidekiq jobs running', {}, :all),
sidekiq_concurrency: ::Gitlab::Metrics.gauge(:sidekiq_concurrency, 'Maximum number of Sidekiq jobs', {}, :all)
}
end
private
def with_load_balancing_settings(job)
......
......@@ -7,25 +7,18 @@ namespace :gitlab do
end
namespace :migrate_jobs do
def mappings
::Gitlab::SidekiqConfig
.workers
.reject { |worker| worker.klass.is_a?(Gitlab::SidekiqConfig::DummyWorker) }
.to_h { |worker| [worker.klass.to_s, ::Gitlab::SidekiqConfig::WorkerRouter.global.route(worker.klass)] }
end
desc 'GitLab | Sidekiq | Migrate jobs in the scheduled set to new queue names'
task schedule: :environment do
::Gitlab::SidekiqMigrateJobs
.new('schedule', logger: Logger.new($stdout))
.execute(mappings)
.execute(::Gitlab::SidekiqConfig.worker_queue_mappings)
end
desc 'GitLab | Sidekiq | Migrate jobs in the retry set to new queue names'
task retry: :environment do
::Gitlab::SidekiqMigrateJobs
.new('retry', logger: Logger.new($stdout))
.execute(mappings)
.execute(::Gitlab::SidekiqConfig.worker_queue_mappings)
end
end
......
......@@ -122,4 +122,43 @@ RSpec.describe Gitlab::SidekiqConfig do
expect(described_class.sidekiq_queues_yml_outdated?).to be(false)
end
end
describe '.worker_queue_mappings' do
it 'returns the worker class => queue mappings based on the current routing configuration' do
test_routes = [
['urgency=high', 'default'],
['*', nil]
]
allow(::Gitlab::SidekiqConfig::WorkerRouter)
.to receive(:global).and_return(::Gitlab::SidekiqConfig::WorkerRouter.new(test_routes))
expect(described_class.worker_queue_mappings).to include('MergeWorker' => 'default',
'BuildFinishedWorker' => 'default',
'BackgroundMigrationWorker' => 'background_migration',
'AdminEmailWorker' => 'cronjob:admin_email')
end
end
describe '.current_worker_queue_mappings' do
it 'returns worker queue mappings that have queues in the current Sidekiq options' do
test_routes = [
['urgency=high', 'default'],
['*', nil]
]
allow(::Gitlab::SidekiqConfig::WorkerRouter)
.to receive(:global).and_return(::Gitlab::SidekiqConfig::WorkerRouter.new(test_routes))
allow(Sidekiq).to receive(:options).and_return(queues: %w[default background_migration])
mappings = described_class.current_worker_queue_mappings
expect(mappings).to include('MergeWorker' => 'default',
'BuildFinishedWorker' => 'default',
'BackgroundMigrationWorker' => 'background_migration')
expect(mappings).not_to include('AdminEmailWorker' => 'cronjob:admin_email')
end
end
end
......@@ -8,11 +8,55 @@ RSpec.describe Gitlab::SidekiqMiddleware::ServerMetrics do
context "with mocked prometheus" do
include_context 'server metrics with mocked prometheus'
describe '#initialize' do
describe '.initialize_process_metrics' do
it 'sets concurrency metrics' do
expect(concurrency_metric).to receive(:set).with({}, Sidekiq.options[:concurrency].to_i)
subject
described_class.initialize_process_metrics
end
it 'initializes sidekiq_jobs_completion_seconds for the workers in the current Sidekiq process' do
allow(Gitlab::SidekiqConfig)
.to receive(:current_worker_queue_mappings)
.and_return('MergeWorker' => 'merge', 'BuildFinishedWorker' => 'default')
expect(completion_seconds_metric)
.to receive(:get).with(queue: 'merge',
worker: 'MergeWorker',
urgency: 'high',
external_dependencies: 'no',
feature_category: 'source_code_management',
boundary: '',
job_status: 'done')
expect(completion_seconds_metric)
.to receive(:get).with(queue: 'merge',
worker: 'MergeWorker',
urgency: 'high',
external_dependencies: 'no',
feature_category: 'source_code_management',
boundary: '',
job_status: 'fail')
expect(completion_seconds_metric)
.to receive(:get).with(queue: 'default',
worker: 'BuildFinishedWorker',
urgency: 'high',
external_dependencies: 'no',
feature_category: 'continuous_integration',
boundary: 'cpu',
job_status: 'done')
expect(completion_seconds_metric)
.to receive(:get).with(queue: 'default',
worker: 'BuildFinishedWorker',
urgency: 'high',
external_dependencies: 'no',
feature_category: 'continuous_integration',
boundary: 'cpu',
job_status: 'fail')
described_class.initialize_process_metrics
end
end
......@@ -47,6 +91,26 @@ RSpec.describe Gitlab::SidekiqMiddleware::ServerMetrics do
subject.call(worker, job, :test) { nil }
end
it 'sets sidekiq_jobs_completion_seconds values that are compatible with those from .initialize_process_metrics' do
label_validator = Prometheus::Client::LabelSetValidator.new([:le])
allow(Gitlab::SidekiqConfig)
.to receive(:current_worker_queue_mappings)
.and_return('MergeWorker' => 'merge', 'BuildFinishedWorker' => 'default')
allow(completion_seconds_metric).to receive(:get) do |labels|
expect { label_validator.validate(labels) }.not_to raise_error
end
allow(completion_seconds_metric).to receive(:observe) do |labels, _duration|
expect { label_validator.validate(labels) }.not_to raise_error
end
described_class.initialize_process_metrics
subject.call(worker, job, :test) { nil }
end
it 'sets the thread name if it was nil' do
allow(Thread.current).to receive(:name).and_return(nil)
expect(Thread.current).to receive(:name=).with(Gitlab::Metrics::Samplers::ThreadsSampler::SIDEKIQ_WORKER_THREAD_NAME)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment