Commit be08202b authored by Douwe Maan's avatar Douwe Maan

Merge branch 'bjk/metric_names' into 'master'

Update metric names to match Prometheus guidelines.

Closes #35031

See merge request !12812
parents 0f393724 2d0741e5
......@@ -48,7 +48,7 @@ class SessionsController < Devise::SessionsController
private
def login_counter
@login_counter ||= Gitlab::Metrics.counter(:user_session_logins, 'User sign in count')
@login_counter ||= Gitlab::Metrics.counter(:user_session_logins_total, 'User sign in count')
end
# Handle an "initial setup" state, where there's only one user, it's an admin,
......
......@@ -123,7 +123,7 @@ Gitlab::Metrics::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_
Gitlab::Application.configure do |config|
# 0 should be Sentry to catch errors in this middleware
config.middleware.insert(1, Gitlab::Metrics::ConnectionRackMiddleware)
config.middleware.insert(1, Gitlab::Metrics::RequestsRackMiddleware)
end
if Gitlab::Metrics.enabled?
......
......@@ -23,21 +23,24 @@ Currently the embedded Prometheus server is not automatically configured to coll
In this experimental phase, only a few metrics are available:
| Metric | Type | Description |
| ------ | ---- | ----------- |
| db_ping_timeout | Gauge | Whether or not the last database ping timed out |
| db_ping_success | Gauge | Whether or not the last database ping succeeded |
| db_ping_latency | Gauge | Round trip time of the database ping |
| redis_ping_timeout | Gauge | Whether or not the last redis ping timed out |
| redis_ping_success | Gauge | Whether or not the last redis ping succeeded |
| redis_ping_latency | Gauge | Round trip time of the redis ping |
| filesystem_access_latency | gauge | Latency in accessing a specific filesystem |
| filesystem_accessible | gauge | Whether or not a specific filesystem is accessible |
| filesystem_write_latency | gauge | Write latency of a specific filesystem |
| filesystem_writable | gauge | Whether or not the filesystem is writable |
| filesystem_read_latency | gauge | Read latency of a specific filesystem |
| filesystem_readable | gauge | Whether or not the filesystem is readable |
| user_sessions_logins | Counter | Counter of how many users have logged in |
| Metric | Type | Description |
| --------------------------------- | --------- | ----------- |
| db_ping_timeout | Gauge | Whether or not the last database ping timed out |
| db_ping_success | Gauge | Whether or not the last database ping succeeded |
| db_ping_latency_seconds | Gauge | Round trip time of the database ping |
| filesystem_access_latency_seconds | Gauge | Latency in accessing a specific filesystem |
| filesystem_accessible | Gauge | Whether or not a specific filesystem is accessible |
| filesystem_write_latency_seconds | Gauge | Write latency of a specific filesystem |
| filesystem_writable | Gauge | Whether or not the filesystem is writable |
| filesystem_read_latency_seconds | Gauge | Read latency of a specific filesystem |
| filesystem_readable | Gauge | Whether or not the filesystem is readable |
| http_requests_total | Counter | Rack request count |
| http_request_duration_seconds | Histogram | HTTP response time from rack middleware |
| rack_uncaught_errors_total | Counter | Rack connections handling uncaught errors count |
| redis_ping_timeout | Gauge | Whether or not the last redis ping timed out |
| redis_ping_success | Gauge | Whether or not the last redis ping succeeded |
| redis_ping_latency_seconds | Gauge | Round trip time of the redis ping |
| user_session_logins_total | Counter | Counter of how many users have logged in |
[← Back to the main Prometheus page](index.md)
......
......@@ -35,9 +35,9 @@ module Gitlab
repository_storages.flat_map do |storage_name|
tmp_file_path = tmp_file_path(storage_name)
[
operation_metrics(:filesystem_accessible, :filesystem_access_latency, -> { storage_stat_test(storage_name) }, shard: storage_name),
operation_metrics(:filesystem_writable, :filesystem_write_latency, -> { storage_write_test(tmp_file_path) }, shard: storage_name),
operation_metrics(:filesystem_readable, :filesystem_read_latency, -> { storage_read_test(tmp_file_path) }, shard: storage_name)
operation_metrics(:filesystem_accessible, :filesystem_access_latency_seconds, -> { storage_stat_test(storage_name) }, shard: storage_name),
operation_metrics(:filesystem_writable, :filesystem_write_latency_seconds, -> { storage_write_test(tmp_file_path) }, shard: storage_name),
operation_metrics(:filesystem_readable, :filesystem_read_latency_seconds, -> { storage_read_test(tmp_file_path) }, shard: storage_name)
].flatten
end
end
......
......@@ -20,7 +20,7 @@ module Gitlab
[
metric("#{metric_prefix}_timeout", result.is_a?(Timeout::Error) ? 1 : 0),
metric("#{metric_prefix}_success", is_successful?(result) ? 1 : 0),
metric("#{metric_prefix}_latency", elapsed)
metric("#{metric_prefix}_latency_seconds", elapsed)
]
end
end
......
module Gitlab
module Metrics
class ConnectionRackMiddleware
class RequestsRackMiddleware
def initialize(app)
@app = app
end
def self.rack_request_count
@rack_request_count ||= Gitlab::Metrics.counter(:rack_request, 'Rack request count')
end
def self.rack_response_count
@rack_response_count ||= Gitlab::Metrics.counter(:rack_response, 'Rack response count')
def self.http_request_total
@http_request_total ||= Gitlab::Metrics.counter(:http_requests_total, 'Request count')
end
def self.rack_uncaught_errors_count
@rack_uncaught_errors_count ||= Gitlab::Metrics.counter(:rack_uncaught_errors, 'Rack connections handling uncaught errors count')
@rack_uncaught_errors_count ||= Gitlab::Metrics.counter(:rack_uncaught_errors_total, 'Request handling uncaught errors count')
end
def self.rack_execution_time
@rack_execution_time ||= Gitlab::Metrics.histogram(:rack_execution_time, 'Rack connection handling execution time',
{}, [0.05, 0.1, 0.25, 0.5, 0.7, 1, 1.5, 2, 2.5, 3, 5, 7, 10])
def self.http_request_duration_seconds
@http_request_duration_seconds ||= Gitlab::Metrics.histogram(:http_request_duration_seconds, 'Request handling execution time',
{}, [0.05, 0.1, 0.25, 0.5, 0.7, 1, 2.5, 5, 10, 25])
end
def call(env)
method = env['REQUEST_METHOD'].downcase
started = Time.now.to_f
begin
ConnectionRackMiddleware.rack_request_count.increment(method: method)
RequestsRackMiddleware.http_request_total.increment(method: method)
status, headers, body = @app.call(env)
ConnectionRackMiddleware.rack_response_count.increment(method: method, status: status)
elapsed = Time.now.to_f - started
RequestsRackMiddleware.http_request_duration_seconds.observe({ method: method, status: status }, elapsed)
[status, headers, body]
rescue
ConnectionRackMiddleware.rack_uncaught_errors_count.increment
RequestsRackMiddleware.rack_uncaught_errors_count.increment
raise
ensure
elapsed = Time.now.to_f - started
ConnectionRackMiddleware.rack_execution_time.observe({}, elapsed)
end
end
end
......
......@@ -24,7 +24,7 @@ describe MetricsController do
expect(response.body).to match(/^db_ping_timeout 0$/)
expect(response.body).to match(/^db_ping_success 1$/)
expect(response.body).to match(/^db_ping_latency [0-9\.]+$/)
expect(response.body).to match(/^db_ping_latency_seconds [0-9\.]+$/)
end
it 'returns Redis ping metrics' do
......@@ -32,7 +32,7 @@ describe MetricsController do
expect(response.body).to match(/^redis_ping_timeout 0$/)
expect(response.body).to match(/^redis_ping_success 1$/)
expect(response.body).to match(/^redis_ping_latency [0-9\.]+$/)
expect(response.body).to match(/^redis_ping_latency_seconds [0-9\.]+$/)
end
it 'returns Caching ping metrics' do
......@@ -40,7 +40,7 @@ describe MetricsController do
expect(response.body).to match(/^redis_cache_ping_timeout 0$/)
expect(response.body).to match(/^redis_cache_ping_success 1$/)
expect(response.body).to match(/^redis_cache_ping_latency [0-9\.]+$/)
expect(response.body).to match(/^redis_cache_ping_latency_seconds [0-9\.]+$/)
end
it 'returns Queues ping metrics' do
......@@ -48,7 +48,7 @@ describe MetricsController do
expect(response.body).to match(/^redis_queues_ping_timeout 0$/)
expect(response.body).to match(/^redis_queues_ping_success 1$/)
expect(response.body).to match(/^redis_queues_ping_latency [0-9\.]+$/)
expect(response.body).to match(/^redis_queues_ping_latency_seconds [0-9\.]+$/)
end
it 'returns SharedState ping metrics' do
......@@ -56,17 +56,17 @@ describe MetricsController do
expect(response.body).to match(/^redis_shared_state_ping_timeout 0$/)
expect(response.body).to match(/^redis_shared_state_ping_success 1$/)
expect(response.body).to match(/^redis_shared_state_ping_latency [0-9\.]+$/)
expect(response.body).to match(/^redis_shared_state_ping_latency_seconds [0-9\.]+$/)
end
it 'returns file system check metrics' do
get :index
expect(response.body).to match(/^filesystem_access_latency{shard="default"} [0-9\.]+$/)
expect(response.body).to match(/^filesystem_access_latency_seconds{shard="default"} [0-9\.]+$/)
expect(response.body).to match(/^filesystem_accessible{shard="default"} 1$/)
expect(response.body).to match(/^filesystem_write_latency{shard="default"} [0-9\.]+$/)
expect(response.body).to match(/^filesystem_write_latency_seconds{shard="default"} [0-9\.]+$/)
expect(response.body).to match(/^filesystem_writable{shard="default"} 1$/)
expect(response.body).to match(/^filesystem_read_latency{shard="default"} [0-9\.]+$/)
expect(response.body).to match(/^filesystem_read_latency_seconds{shard="default"} [0-9\.]+$/)
expect(response.body).to match(/^filesystem_readable{shard="default"} 1$/)
end
......
......@@ -109,9 +109,9 @@ describe Gitlab::HealthChecks::FsShardsCheck do
expect(subject).to include(an_object_having_attributes(name: :filesystem_readable, value: 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_writable, value: 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency_seconds, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency_seconds, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency_seconds, value: be >= 0))
end
end
......@@ -127,9 +127,9 @@ describe Gitlab::HealthChecks::FsShardsCheck do
expect(subject).to include(an_object_having_attributes(name: :filesystem_readable, value: 1))
expect(subject).to include(an_object_having_attributes(name: :filesystem_writable, value: 1))
expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency_seconds, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency_seconds, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency_seconds, value: be >= 0))
end
end
end
......@@ -159,9 +159,9 @@ describe Gitlab::HealthChecks::FsShardsCheck do
expect(subject).to include(an_object_having_attributes(name: :filesystem_readable, value: 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_writable, value: 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency_seconds, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency_seconds, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency_seconds, value: be >= 0))
end
end
end
......
......@@ -8,7 +8,7 @@ shared_context 'simple_check' do |metrics_prefix, check_name, success_result|
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 1)) }
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 0)) }
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency", value: be >= 0)) }
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency_seconds", value: be >= 0)) }
end
context 'Check is misbehaving' do
......@@ -18,7 +18,7 @@ shared_context 'simple_check' do |metrics_prefix, check_name, success_result|
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 0)) }
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 0)) }
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency", value: be >= 0)) }
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency_seconds", value: be >= 0)) }
end
context 'Check is timeouting' do
......@@ -28,7 +28,7 @@ shared_context 'simple_check' do |metrics_prefix, check_name, success_result|
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 0)) }
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 1)) }
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency", value: be >= 0)) }
it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency_seconds", value: be >= 0)) }
end
end
......
require 'spec_helper'
describe Gitlab::Metrics::ConnectionRackMiddleware do
describe Gitlab::Metrics::RequestsRackMiddleware do
let(:app) { double('app') }
subject { described_class.new(app) }
......@@ -22,14 +22,8 @@ describe Gitlab::Metrics::ConnectionRackMiddleware do
allow(app).to receive(:call).and_return([200, nil, nil])
end
it 'increments response count with status label' do
expect(described_class).to receive_message_chain(:rack_response_count, :increment).with(include(status: 200, method: 'get'))
subject.call(env)
end
it 'increments requests count' do
expect(described_class).to receive_message_chain(:rack_request_count, :increment).with(method: 'get')
expect(described_class).to receive_message_chain(:http_request_total, :increment).with(method: 'get')
subject.call(env)
end
......@@ -38,20 +32,21 @@ describe Gitlab::Metrics::ConnectionRackMiddleware do
execution_time = 10
allow(app).to receive(:call) do |*args|
Timecop.freeze(execution_time.seconds)
[200, nil, nil]
end
expect(described_class).to receive_message_chain(:rack_execution_time, :observe).with({}, execution_time)
expect(described_class).to receive_message_chain(:http_request_duration_seconds, :observe).with({ status: 200, method: 'get' }, execution_time)
subject.call(env)
end
end
context '@app.call throws exception' do
let(:rack_response_count) { double('rack_response_count') }
let(:http_request_duration_seconds) { double('http_request_duration_seconds') }
before do
allow(app).to receive(:call).and_raise(StandardError)
allow(described_class).to receive(:rack_response_count).and_return(rack_response_count)
allow(described_class).to receive(:http_request_duration_seconds).and_return(http_request_duration_seconds)
end
it 'increments exceptions count' do
......@@ -61,25 +56,13 @@ describe Gitlab::Metrics::ConnectionRackMiddleware do
end
it 'increments requests count' do
expect(described_class).to receive_message_chain(:rack_request_count, :increment).with(method: 'get')
expect { subject.call(env) }.to raise_error(StandardError)
end
it "does't increment response count" do
expect(described_class.rack_response_count).not_to receive(:increment)
expect(described_class).to receive_message_chain(:http_request_total, :increment).with(method: 'get')
expect { subject.call(env) }.to raise_error(StandardError)
end
it 'measures execution time' do
execution_time = 10
allow(app).to receive(:call) do |*args|
Timecop.freeze(execution_time.seconds)
raise StandardError
end
expect(described_class).to receive_message_chain(:rack_execution_time, :observe).with({}, execution_time)
it "does't measure request execution time" do
expect(described_class.http_request_duration_seconds).not_to receive(:increment)
expect { subject.call(env) }.to raise_error(StandardError)
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment