Commit 6aa1f23c authored by Ben Kochie's avatar Ben Kochie Committed by Mark Chao

Initialize http_requests_total label values

Add http method/status initilaization to the `http_requests_total`
counter. This is required to track error rates and error ratios.

For example, the ratio of 5xx requests to all requests.

    sum(rate(http_requests_total{status=~"5.."}[5m])
    /
    sum(rate(http_requests_total[5m])

Without the inialization, there would be no values for 5xx on small or
low traffic servers. This would lead to blank graphs.

https://gitlab.com/gitlab-com/gl-infra/scalability/-/issues/585Signed-off-by: default avatarBen Kochie <bjk@gitlab.com>
parent 66bb7420
---
title: Make sure the http_requests_total and http_request_duration_seconds metrics are not empty on application start
merge_request: 45755
author:
type: fixed
...@@ -70,7 +70,7 @@ if !Rails.env.test? && Gitlab::Metrics.prometheus_metrics_enabled? ...@@ -70,7 +70,7 @@ if !Rails.env.test? && Gitlab::Metrics.prometheus_metrics_enabled?
Gitlab::Metrics.gauge(:deployments, 'GitLab Version', {}, :max).set({ version: Gitlab::VERSION }, 1) Gitlab::Metrics.gauge(:deployments, 'GitLab Version', {}, :max).set({ version: Gitlab::VERSION }, 1)
unless Gitlab::Runtime.sidekiq? unless Gitlab::Runtime.sidekiq?
Gitlab::Metrics::RequestsRackMiddleware.initialize_http_request_duration_seconds Gitlab::Metrics::RequestsRackMiddleware.initialize_metrics
end end
rescue IOError => e rescue IOError => e
Gitlab::ErrorTracking.track_exception(e) Gitlab::ErrorTracking.track_exception(e)
......
...@@ -3,7 +3,15 @@ ...@@ -3,7 +3,15 @@
module Gitlab module Gitlab
module Metrics module Metrics
class RequestsRackMiddleware class RequestsRackMiddleware
HTTP_METHODS = %w(delete get head options patch post put).to_set.freeze HTTP_METHODS = {
"delete" => %w(200 202 204 303 400 401 403 404 500 503),
"get" => %w(200 204 301 302 303 304 307 400 401 403 404 410 422 429 500 503),
"head" => %w(200 204 301 302 303 401 403 404 410 500),
"options" => %w(200 404),
"patch" => %w(200 202 204 400 403 404 409 416 500),
"post" => %w(200 201 202 204 301 302 303 304 400 401 403 404 406 409 410 412 422 429 500 503),
"put" => %w(200 202 204 400 401 403 404 405 406 409 410 422 500)
}.freeze
HEALTH_ENDPOINT = /^\/-\/(liveness|readiness|health|metrics)\/?$/.freeze HEALTH_ENDPOINT = /^\/-\/(liveness|readiness|health|metrics)\/?$/.freeze
...@@ -14,8 +22,8 @@ module Gitlab ...@@ -14,8 +22,8 @@ module Gitlab
@app = app @app = app
end end
def self.http_request_total def self.http_requests_total
@http_request_total ||= ::Gitlab::Metrics.counter(:http_requests_total, 'Request count') @http_requests_total ||= ::Gitlab::Metrics.counter(:http_requests_total, 'Request count')
end end
def self.rack_uncaught_errors_count def self.rack_uncaught_errors_count
...@@ -31,15 +39,31 @@ module Gitlab ...@@ -31,15 +39,31 @@ module Gitlab
@http_health_requests_total ||= ::Gitlab::Metrics.counter(:http_health_requests_total, 'Health endpoint request count') @http_health_requests_total ||= ::Gitlab::Metrics.counter(:http_health_requests_total, 'Health endpoint request count')
end end
def self.initialize_http_request_duration_seconds def self.initialize_metrics
HTTP_METHODS.each do |method| # This initialization is done to avoid gaps in scraped metrics after
# restarts. It makes sure all counters/histograms are available at
# process start.
#
# For example `rate(http_requests_total{status="500"}[1m])` would return
# no data until the first 500 error would occur.
# The list of feature categories is currently not needed by the application
# anywhere else. So no need to keep these in memory forever.
# Doing it here, means we're only reading the file on boot.
feature_categories = YAML.load_file(Rails.root.join('config', 'feature_categories.yml')).map(&:strip).uniq << FEATURE_CATEGORY_DEFAULT
HTTP_METHODS.each do |method, statuses|
http_request_duration_seconds.get({ method: method }) http_request_duration_seconds.get({ method: method })
statuses.product(feature_categories) do |status, feature_category|
http_requests_total.get({ method: method, status: status, feature_category: feature_category })
end
end end
end end
def call(env) def call(env)
method = env['REQUEST_METHOD'].downcase method = env['REQUEST_METHOD'].downcase
method = 'INVALID' unless HTTP_METHODS.include?(method) method = 'INVALID' unless HTTP_METHODS.key?(method)
started = Time.now.to_f started = Time.now.to_f
health_endpoint = health_endpoint?(env['PATH_INFO']) health_endpoint = health_endpoint?(env['PATH_INFO'])
status = 'undefined' status = 'undefined'
...@@ -61,9 +85,13 @@ module Gitlab ...@@ -61,9 +85,13 @@ module Gitlab
raise raise
ensure ensure
if health_endpoint if health_endpoint
RequestsRackMiddleware.http_health_requests_total.increment(status: status, method: method) RequestsRackMiddleware.http_health_requests_total.increment(status: status.to_s, method: method)
else else
RequestsRackMiddleware.http_request_total.increment(status: status, method: method, feature_category: feature_category.presence || FEATURE_CATEGORY_DEFAULT) RequestsRackMiddleware.http_requests_total.increment(
status: status.to_s,
method: method,
feature_category: feature_category.presence || FEATURE_CATEGORY_DEFAULT
)
end end
end end
end end
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Gitlab::Metrics::RequestsRackMiddleware do RSpec.describe Gitlab::Metrics::RequestsRackMiddleware, :aggregate_failures do
let(:app) { double('app') } let(:app) { double('app') }
subject { described_class.new(app) } subject { described_class.new(app) }
...@@ -21,20 +21,15 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware do ...@@ -21,20 +21,15 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware do
allow(app).to receive(:call).and_return([200, nil, nil]) allow(app).to receive(:call).and_return([200, nil, nil])
end end
it 'increments requests count' do
expect(described_class).to receive_message_chain(:http_request_total, :increment).with(method: 'get', status: 200, feature_category: 'unknown')
subject.call(env)
end
RSpec::Matchers.define :a_positive_execution_time do RSpec::Matchers.define :a_positive_execution_time do
match { |actual| actual > 0 } match { |actual| actual > 0 }
end end
it 'measures execution time' do it 'tracks request count and duration' do
expect(described_class).to receive_message_chain(:http_requests_total, :increment).with(method: 'get', status: '200', feature_category: 'unknown')
expect(described_class).to receive_message_chain(:http_request_duration_seconds, :observe).with({ method: 'get' }, a_positive_execution_time) expect(described_class).to receive_message_chain(:http_request_duration_seconds, :observe).with({ method: 'get' }, a_positive_execution_time)
Timecop.scale(3600) { subject.call(env) } subject.call(env)
end end
context 'request is a health check endpoint' do context 'request is a health check endpoint' do
...@@ -44,15 +39,10 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware do ...@@ -44,15 +39,10 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware do
env['PATH_INFO'] = path env['PATH_INFO'] = path
end end
it 'increments health endpoint counter rather than overall counter' do it 'increments health endpoint counter rather than overall counter and does not record duration' do
expect(described_class).to receive_message_chain(:http_health_requests_total, :increment).with(method: 'get', status: 200)
expect(described_class).not_to receive(:http_request_total)
subject.call(env)
end
it 'does not record the request duration' do
expect(described_class).not_to receive(:http_request_duration_seconds) expect(described_class).not_to receive(:http_request_duration_seconds)
expect(described_class).not_to receive(:http_requests_total)
expect(described_class).to receive_message_chain(:http_health_requests_total, :increment).with(method: 'get', status: '200')
subject.call(env) subject.call(env)
end end
...@@ -67,14 +57,9 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware do ...@@ -67,14 +57,9 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware do
env['PATH_INFO'] = path env['PATH_INFO'] = path
end end
it 'increments overall counter rather than health endpoint counter' do it 'increments regular counters and tracks duration' do
expect(described_class).to receive_message_chain(:http_request_total, :increment).with(method: 'get', status: 200, feature_category: 'unknown') expect(described_class).to receive_message_chain(:http_requests_total, :increment).with(method: 'get', status: '200', feature_category: 'unknown')
expect(described_class).not_to receive(:http_health_requests_total) expect(described_class).not_to receive(:http_health_requests_total)
subject.call(env)
end
it 'records the request duration' do
expect(described_class) expect(described_class)
.to receive_message_chain(:http_request_duration_seconds, :observe) .to receive_message_chain(:http_request_duration_seconds, :observe)
.with({ method: 'get' }, a_positive_execution_time) .with({ method: 'get' }, a_positive_execution_time)
...@@ -88,26 +73,18 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware do ...@@ -88,26 +73,18 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware do
context '@app.call throws exception' do context '@app.call throws exception' do
let(:http_request_duration_seconds) { double('http_request_duration_seconds') } let(:http_request_duration_seconds) { double('http_request_duration_seconds') }
let(:http_requests_total) { double('http_requests_total') }
before do before do
allow(app).to receive(:call).and_raise(StandardError) allow(app).to receive(:call).and_raise(StandardError)
allow(described_class).to receive(:http_request_duration_seconds).and_return(http_request_duration_seconds) allow(described_class).to receive(:http_request_duration_seconds).and_return(http_request_duration_seconds)
allow(described_class).to receive(:http_requests_total).and_return(http_requests_total)
end end
it 'increments exceptions count' do it 'tracks the correct metrics' do
expect(described_class).to receive_message_chain(:rack_uncaught_errors_count, :increment) expect(described_class).to receive_message_chain(:rack_uncaught_errors_count, :increment)
expect(described_class).to receive_message_chain(:http_requests_total, :increment).with(method: 'get', status: 'undefined', feature_category: 'unknown')
expect { subject.call(env) }.to raise_error(StandardError) expect(described_class.http_request_duration_seconds).not_to receive(:observe)
end
it 'increments requests count' do
expect(described_class).to receive_message_chain(:http_request_total, :increment).with(method: 'get', status: 'undefined', feature_category: 'unknown')
expect { subject.call(env) }.to raise_error(StandardError)
end
it "does't measure request execution time" do
expect(described_class.http_request_duration_seconds).not_to receive(:increment)
expect { subject.call(env) }.to raise_error(StandardError) expect { subject.call(env) }.to raise_error(StandardError)
end end
...@@ -119,8 +96,9 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware do ...@@ -119,8 +96,9 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware do
allow(app).to receive(:call).and_return([200, { described_class::FEATURE_CATEGORY_HEADER => 'issue_tracking' }, nil]) allow(app).to receive(:call).and_return([200, { described_class::FEATURE_CATEGORY_HEADER => 'issue_tracking' }, nil])
end end
it 'adds the feature category to the labels for http_request_total' do it 'adds the feature category to the labels for http_requests_total' do
expect(described_class).to receive_message_chain(:http_request_total, :increment).with(method: 'get', status: 200, feature_category: 'issue_tracking') expect(described_class).to receive_message_chain(:http_requests_total, :increment).with(method: 'get', status: '200', feature_category: 'issue_tracking')
expect(described_class).not_to receive(:http_health_requests_total)
subject.call(env) subject.call(env)
end end
...@@ -128,8 +106,8 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware do ...@@ -128,8 +106,8 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware do
it 'does not record a feature category for health check endpoints' do it 'does not record a feature category for health check endpoints' do
env['PATH_INFO'] = '/-/liveness' env['PATH_INFO'] = '/-/liveness'
expect(described_class).to receive_message_chain(:http_health_requests_total, :increment).with(method: 'get', status: 200) expect(described_class).to receive_message_chain(:http_health_requests_total, :increment).with(method: 'get', status: '200')
expect(described_class).not_to receive(:http_request_total) expect(described_class).not_to receive(:http_requests_total)
subject.call(env) subject.call(env)
end end
...@@ -141,21 +119,37 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware do ...@@ -141,21 +119,37 @@ RSpec.describe Gitlab::Metrics::RequestsRackMiddleware do
end end
it 'sets the feature category to unknown' do it 'sets the feature category to unknown' do
expect(described_class).to receive_message_chain(:http_request_total, :increment).with(method: 'get', status: 200, feature_category: 'unknown') expect(described_class).to receive_message_chain(:http_requests_total, :increment).with(method: 'get', status: '200', feature_category: 'unknown')
expect(described_class).not_to receive(:http_health_requests_total)
subject.call(env) subject.call(env)
end end
end end
end end
describe '.initialize_http_request_duration_seconds' do describe '.initialize_metrics', :prometheus do
it "sets labels" do it "sets labels for http_requests_total" do
feature_categories = YAML.load_file(Rails.root.join('config', 'feature_categories.yml')).map(&:strip) << described_class::FEATURE_CATEGORY_DEFAULT
expected_labels = [] expected_labels = []
described_class::HTTP_METHODS.each do |method|
expected_labels << { method: method } described_class::HTTP_METHODS.each do |method, statuses|
statuses.each do |status|
feature_categories.each do |feature_category|
expected_labels << { method: method.to_s, status: status.to_s, feature_category: feature_category.to_s }
end
end
end end
described_class.initialize_http_request_duration_seconds described_class.initialize_metrics
expect(described_class.http_requests_total.values.keys).to contain_exactly(*expected_labels)
end
it 'sets labels for http_request_duration_seconds' do
expected_labels = described_class::HTTP_METHODS.keys.map { |method| { method: method } }
described_class.initialize_metrics
expect(described_class.http_request_duration_seconds.values.keys).to include(*expected_labels) expect(described_class.http_request_duration_seconds.values.keys).to include(*expected_labels)
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment