Commit 7a66ca29 authored by Sean McGivern's avatar Sean McGivern

Merge branch 'jv-redis-latency' into 'master'

Measure Redis command latency per instance

See merge request gitlab-org/gitlab!36546
parents 1b87f3ca 407bdc95
......@@ -264,6 +264,7 @@ instance (`cache`, `shared_state` etc.).
|:--------------------------------- |:------- |:----- |:----------- |
| `gitlab_redis_client_exceptions_total` | Counter | 13.2 | Number of Redis client exceptions, broken down by exception class |
| `gitlab_redis_client_requests_total` | Counter | 13.2 | Number of Redis client requests |
| `gitlab_redis_client_requests_duration_seconds` | Histogram | 13.2 | Redis request latency, excluding blocking commands |
## Metrics shared directory
......
......@@ -81,12 +81,12 @@ module Gitlab
self
end
def count_request
def instance_count_request
@request_counter ||= Gitlab::Metrics.counter(:gitlab_redis_client_requests_total, 'Client side Redis request count, per Redis server')
@request_counter.increment({ storage: storage_key })
end
def count_exception(ex)
def instance_count_exception(ex)
# This metric is meant to give a client side view of how the Redis
# server is doing. Redis itself does not expose error counts. This
# metric can be used for Redis alerting and service health monitoring.
......@@ -94,6 +94,17 @@ module Gitlab
@exception_counter.increment({ storage: storage_key, exception: ex.class.to_s })
end
def instance_observe_duration(duration)
@request_latency_histogram ||= Gitlab::Metrics.histogram(
:gitlab_redis_client_requests_duration_seconds,
'Client side Redis request latency, per Redis server, excluding blocking commands',
{},
[0.001, 0.005, 0.01]
)
@request_latency_histogram.observe({ storage: storage_key }, duration)
end
private
def request_count_key
......
......@@ -5,19 +5,26 @@ require 'redis'
module Gitlab
module Instrumentation
module RedisInterceptor
APDEX_EXCLUDE = %w[brpop blpop brpoplpush bzpopmin bzpopmax xread xreadgroup].freeze
def call(*args, &block)
instrumentation_class.count_request
start = Time.now # must come first so that 'start' is always defined
instrumentation_class.instance_count_request
instrumentation_class.redis_cluster_validate!(args.first)
start = Time.now
super(*args, &block)
rescue ::Redis::BaseError => ex
instrumentation_class.count_exception(ex)
instrumentation_class.instance_count_exception(ex)
raise ex
ensure
duration = (Time.now - start)
duration = Time.now - start
unless APDEX_EXCLUDE.include?(command_from_args(args))
instrumentation_class.instance_observe_duration(duration)
end
if ::RequestStore.active?
# These metrics measure total Redis usage per Rails request / job.
instrumentation_class.increment_request_count
instrumentation_class.add_duration(duration)
instrumentation_class.add_call_details(duration, args)
......@@ -83,6 +90,12 @@ module Gitlab
def instrumentation_class
@options[:instrumentation_class] # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
def command_from_args(args)
command = args[0]
command = command[0] if command.is_a?(Array)
command.to_s.downcase
end
end
end
end
......
......@@ -47,15 +47,15 @@ RSpec.describe Gitlab::Instrumentation::RedisInterceptor, :clean_gitlab_redis_sh
let(:instrumentation_class) { Gitlab::Redis::SharedState.instrumentation_class }
it 'counts successful requests' do
expect(instrumentation_class).to receive(:count_request).and_call_original
expect(instrumentation_class).to receive(:instance_count_request).and_call_original
Gitlab::Redis::SharedState.with { |redis| redis.call(:get, 'foobar') }
end
it 'counts exceptions' do
expect(instrumentation_class).to receive(:count_exception)
expect(instrumentation_class).to receive(:instance_count_exception)
.with(instance_of(Redis::CommandError)).and_call_original
expect(instrumentation_class).to receive(:count_request).and_call_original
expect(instrumentation_class).to receive(:instance_count_request).and_call_original
expect do
Gitlab::Redis::SharedState.with do |redis|
......@@ -64,4 +64,51 @@ RSpec.describe Gitlab::Instrumentation::RedisInterceptor, :clean_gitlab_redis_sh
end.to raise_exception(Redis::CommandError)
end
end
describe 'latency' do
let(:instrumentation_class) { Gitlab::Redis::SharedState.instrumentation_class }
describe 'commands in the apdex' do
where(:command) do
[
[[:get, 'foobar']],
[%w[GET foobar]]
]
end
with_them do
it 'measures requests we want in the apdex' do
expect(instrumentation_class).to receive(:instance_observe_duration).with(a_value > 0)
.and_call_original
Gitlab::Redis::SharedState.with { |redis| redis.call(*command) }
end
end
end
describe 'commands not in the apdex' do
where(:command) do
[
[%w[brpop foobar 0.01]],
[%w[blpop foobar 0.01]],
[%w[brpoplpush foobar bazqux 0.01]],
[%w[bzpopmin foobar 0.01]],
[%w[bzpopmax foobar 0.01]],
[%w[xread block 1 streams mystream 0-0]],
[%w[xreadgroup group mygroup myconsumer block 1 streams foobar 0-0]]
]
end
with_them do
it 'skips requests we do not want in the apdex' do
expect(instrumentation_class).not_to receive(:instance_observe_duration)
begin
Gitlab::Redis::SharedState.with { |redis| redis.call(*command) }
rescue Gitlab::Instrumentation::RedisClusterValidator::CrossSlotError, ::Redis::CommandError
end
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment