Commit a74396dc authored by Stan Hu's avatar Stan Hu

Add Gitaly and Rugged call timing in Sidekiq logs

This will help identify Sidekiq jobs that invoke excessive number of
filesystem access.

The timing data is stored in `RequestStore`, but this is only active
within the middleware and is not directly accessible to the Sidekiq
logger. However, it is possible for the middleware to modify the job
hash to pass this data along to the logger.
parent bbf639c4
---
title: Add Gitaly and Rugged call timing in Sidekiq logs
merge_request: 31651
author:
type: other
......@@ -27,19 +27,7 @@ unless Sidekiq.server?
queue_duration: event.payload[:queue_duration]
}
gitaly_calls = Gitlab::GitalyClient.get_request_count
if gitaly_calls > 0
payload[:gitaly_calls] = gitaly_calls
payload[:gitaly_duration] = Gitlab::GitalyClient.query_time_ms
end
rugged_calls = Gitlab::RuggedInstrumentation.query_count
if rugged_calls > 0
payload[:rugged_calls] = rugged_calls
payload[:rugged_duration_ms] = Gitlab::RuggedInstrumentation.query_time_ms
end
::Gitlab::InstrumentationHelper.add_instrumentation_data(payload)
payload[:response] = event.payload[:response] if event.payload[:response]
payload[Labkit::Correlation::CorrelationId::LOG_KEY] = Labkit::Correlation::CorrelationId.current_id
......
......@@ -39,6 +39,7 @@ Sidekiq.configure_server do |config|
chain.add Gitlab::SidekiqMiddleware::RequestStoreMiddleware unless ENV['SIDEKIQ_REQUEST_STORE'] == '0'
chain.add Gitlab::SidekiqMiddleware::BatchLoader
chain.add Gitlab::SidekiqMiddleware::CorrelationLogger
chain.add Gitlab::SidekiqMiddleware::InstrumentationLogger
chain.add Gitlab::SidekiqStatus::ServerMiddleware
end
......
......@@ -240,7 +240,7 @@ module Gitlab
# Ensures that Gitaly is not being abuse through n+1 misuse etc
def self.enforce_gitaly_request_limits(call_site)
# Only count limits in request-response environments (not sidekiq for example)
# Only count limits in request-response environments
return unless Gitlab::SafeRequestStore.active?
# This is this actual number of times this call was made. Used for information purposes only
......
# frozen_string_literal: true
module Gitlab
module InstrumentationHelper
extend self
KEYS = %i(gitaly_calls gitaly_duration rugged_calls rugged_duration_ms).freeze
def add_instrumentation_data(payload)
gitaly_calls = Gitlab::GitalyClient.get_request_count
if gitaly_calls > 0
payload[:gitaly_calls] = gitaly_calls
payload[:gitaly_duration] = Gitlab::GitalyClient.query_time_ms
end
rugged_calls = Gitlab::RuggedInstrumentation.query_count
if rugged_calls > 0
payload[:rugged_calls] = rugged_calls
payload[:rugged_duration_ms] = Gitlab::RuggedInstrumentation.query_time_ms
end
end
end
end
......@@ -15,9 +15,9 @@ module Gitlab
yield
Sidekiq.logger.info log_job_done(started_at, base_payload)
Sidekiq.logger.info log_job_done(job, started_at, base_payload)
rescue => job_exception
Sidekiq.logger.warn log_job_done(started_at, base_payload, job_exception)
Sidekiq.logger.warn log_job_done(job, started_at, base_payload, job_exception)
raise
end
......@@ -28,6 +28,10 @@ module Gitlab
"#{payload['class']} JID-#{payload['jid']}"
end
def add_instrumentation_keys!(job, output_payload)
output_payload.merge!(job.slice(*::Gitlab::InstrumentationHelper::KEYS))
end
def log_job_start(started_at, payload)
payload['message'] = "#{base_message(payload)}: start"
payload['job_status'] = 'start'
......@@ -41,8 +45,9 @@ module Gitlab
payload
end
def log_job_done(started_at, payload, job_exception = nil)
def log_job_done(job, started_at, payload, job_exception = nil)
payload = payload.dup
add_instrumentation_keys!(job, payload)
payload['duration'] = elapsed(started_at)
payload['completed_at'] = Time.now.utc
......
# frozen_string_literal: true
module Gitlab
module SidekiqMiddleware
class InstrumentationLogger
def call(worker, job, queue)
yield
# The Sidekiq logger is called outside the middleware block, so
# we need to modify the job hash to pass along this information
# since RequestStore is only active in the Sidekiq middleware.
#
# Modifying the job hash in a middleware is permitted by Sidekiq
# because Sidekiq keeps a pristine copy of the original hash
# before sending it to the middleware:
# https://github.com/mperham/sidekiq/blob/53bd529a0c3f901879925b8390353129c465b1f2/lib/sidekiq/processor.rb#L115-L118
::Gitlab::InstrumentationHelper.add_instrumentation_data(job)
end
end
end
end
......@@ -19,6 +19,7 @@ ALLOWED = [
'config/initializers/peek.rb',
'config/initializers/lograge.rb',
'lib/gitlab/grape_logging/loggers/perf_logger.rb',
'lib/gitlab/instrumentation_helper.rb',
'lib/gitlab/rugged_instrumentation.rb',
'lib/peek/views/rugged.rb'
].freeze
......
......@@ -153,5 +153,29 @@ describe Gitlab::SidekiqLogging::StructuredLogger do
end
end
end
context 'with Gitaly and Rugged calls' do
let(:timing_data) do
{
gitaly_calls: 10,
gitaly_duration: 10000,
rugged_calls: 1,
rugged_duration_ms: 5000
}
end
before do
job.merge!(timing_data)
end
it 'logs with Gitaly and Rugged timing data' do
Timecop.freeze(timestamp) do
expect(logger).to receive(:info).with(start_payload.except('args')).ordered
expect(logger).to receive(:info).with(end_payload.except('args')).ordered
subject.call(job, 'test_queue') { }
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment