Commit 92e15071 authored by Douwe Maan's avatar Douwe Maan

Merge branch 'pawel/reduce_cardinality_of_prometheus_metrics' into 'master'

Reduce cardinality of some of GitLab's Prometheus metrics and fix observed duration reporting.

Closes #41045

See merge request gitlab-org/gitlab-ce!15881
parents 866c97fb 10af36f0
---
title: Reduce the number of buckets in gitlab_cache_operation_duration_seconds metric
merge_request: 15881
author:
type: changed
...@@ -38,6 +38,7 @@ module Gitlab ...@@ -38,6 +38,7 @@ module Gitlab
# This is memoized since this method is called for every instrumented # This is memoized since this method is called for every instrumented
# method. Loading data from an external cache on every method call slows # method. Loading data from an external cache on every method call slows
# things down too much. # things down too much.
# in milliseconds
@method_call_threshold ||= settings[:method_call_threshold] @method_call_threshold ||= settings[:method_call_threshold]
end end
......
...@@ -35,8 +35,8 @@ module Gitlab ...@@ -35,8 +35,8 @@ module Gitlab
@transaction = transaction @transaction = transaction
@name = name @name = name
@labels = { module: @module_name, method: @method_name } @labels = { module: @module_name, method: @method_name }
@real_time = 0 @real_time = 0.0
@cpu_time = 0 @cpu_time = 0.0
@call_count = 0 @call_count = 0
end end
...@@ -54,7 +54,7 @@ module Gitlab ...@@ -54,7 +54,7 @@ module Gitlab
@call_count += 1 @call_count += 1
if call_measurement_enabled? && above_threshold? if call_measurement_enabled? && above_threshold?
self.class.call_duration_histogram.observe(@transaction.labels.merge(labels), real_time / 1000.0) self.class.call_duration_histogram.observe(@transaction.labels.merge(labels), real_time)
end end
retval retval
...@@ -65,8 +65,8 @@ module Gitlab ...@@ -65,8 +65,8 @@ module Gitlab
Metric.new( Metric.new(
Instrumentation.series, Instrumentation.series,
{ {
duration: real_time, duration: real_time.in_milliseconds.to_i,
cpu_duration: cpu_time, cpu_duration: cpu_time.in_milliseconds.to_i,
call_count: call_count call_count: call_count
}, },
method: @name method: @name
...@@ -76,7 +76,7 @@ module Gitlab ...@@ -76,7 +76,7 @@ module Gitlab
# Returns true if the total runtime of this method exceeds the method call # Returns true if the total runtime of this method exceeds the method call
# threshold. # threshold.
def above_threshold? def above_threshold?
real_time >= Metrics.method_call_threshold real_time.in_milliseconds >= Metrics.method_call_threshold
end end
def call_measurement_enabled? def call_measurement_enabled?
......
...@@ -52,7 +52,7 @@ module Gitlab ...@@ -52,7 +52,7 @@ module Gitlab
metrics[:memory_usage].set(labels, System.memory_usage) metrics[:memory_usage].set(labels, System.memory_usage)
metrics[:file_descriptors].set(labels, System.file_descriptor_count) metrics[:file_descriptors].set(labels, System.file_descriptor_count)
metrics[:sampler_duration].observe(labels.merge(worker_label), (System.monotonic_time - start_time) / 1000.0) metrics[:sampler_duration].observe(labels.merge(worker_label), System.monotonic_time - start_time)
ensure ensure
GC::Profiler.clear GC::Profiler.clear
end end
......
...@@ -66,7 +66,7 @@ module Gitlab ...@@ -66,7 +66,7 @@ module Gitlab
:gitlab_cache_operation_duration_seconds, :gitlab_cache_operation_duration_seconds,
'Cache access time', 'Cache access time',
Transaction::BASE_LABELS.merge({ action: nil }), Transaction::BASE_LABELS.merge({ action: nil }),
[0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.500, 2.0, 10.0] [0.001, 0.01, 0.1, 1, 10]
) )
end end
......
...@@ -35,27 +35,27 @@ module Gitlab ...@@ -35,27 +35,27 @@ module Gitlab
if Process.const_defined?(:CLOCK_THREAD_CPUTIME_ID) if Process.const_defined?(:CLOCK_THREAD_CPUTIME_ID)
def self.cpu_time def self.cpu_time
Process Process
.clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID, :millisecond) .clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID, :float_second)
end end
else else
def self.cpu_time def self.cpu_time
Process Process
.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID, :millisecond) .clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID, :float_second)
end end
end end
# Returns the current real time in a given precision. # Returns the current real time in a given precision.
# #
# Returns the time as a Fixnum. # Returns the time as a Float for precision = :float_second.
def self.real_time(precision = :millisecond) def self.real_time(precision = :float_second)
Process.clock_gettime(Process::CLOCK_REALTIME, precision) Process.clock_gettime(Process::CLOCK_REALTIME, precision)
end end
# Returns the current monotonic clock time in a given precision. # Returns the current monotonic clock time as seconds with microseconds precision.
# #
# Returns the time as a Fixnum. # Returns the time as a Float.
def self.monotonic_time(precision = :millisecond) def self.monotonic_time
Process.clock_gettime(Process::CLOCK_MONOTONIC, precision) Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_second)
end end
end end
end end
......
...@@ -35,6 +35,10 @@ module Gitlab ...@@ -35,6 +35,10 @@ module Gitlab
@finished_at ? (@finished_at - @started_at) : 0.0 @finished_at ? (@finished_at - @started_at) : 0.0
end end
def duration_milliseconds
duration.in_milliseconds.to_i
end
def allocated_memory def allocated_memory
@memory_after - @memory_before @memory_after - @memory_before
end end
...@@ -50,7 +54,7 @@ module Gitlab ...@@ -50,7 +54,7 @@ module Gitlab
@memory_after = System.memory_usage @memory_after = System.memory_usage
@finished_at = System.monotonic_time @finished_at = System.monotonic_time
self.class.metric_transaction_duration_seconds.observe(labels, duration * 1000) self.class.metric_transaction_duration_seconds.observe(labels, duration)
self.class.metric_transaction_allocated_memory_bytes.observe(labels, allocated_memory * 1024.0) self.class.metric_transaction_allocated_memory_bytes.observe(labels, allocated_memory * 1024.0)
Thread.current[THREAD_KEY] = nil Thread.current[THREAD_KEY] = nil
...@@ -97,7 +101,7 @@ module Gitlab ...@@ -97,7 +101,7 @@ module Gitlab
end end
def track_self def track_self
values = { duration: duration, allocated_memory: allocated_memory } values = { duration: duration_milliseconds, allocated_memory: allocated_memory }
@values.each do |name, value| @values.each do |name, value|
values[name] = value values[name] = value
......
...@@ -96,14 +96,17 @@ describe Gitlab::Metrics::MethodCall do ...@@ -96,14 +96,17 @@ describe Gitlab::Metrics::MethodCall do
describe '#to_metric' do describe '#to_metric' do
it 'returns a Metric instance' do it 'returns a Metric instance' do
expect(method_call).to receive(:real_time).and_return(4.0001)
expect(method_call).to receive(:cpu_time).and_return(3.0001)
method_call.measure { 'foo' } method_call.measure { 'foo' }
metric = method_call.to_metric metric = method_call.to_metric
expect(metric).to be_an_instance_of(Gitlab::Metrics::Metric) expect(metric).to be_an_instance_of(Gitlab::Metrics::Metric)
expect(metric.series).to eq('rails_method_calls') expect(metric.series).to eq('rails_method_calls')
expect(metric.values[:duration]).to be_a_kind_of(Numeric) expect(metric.values[:duration]).to eq(4000)
expect(metric.values[:cpu_duration]).to be_a_kind_of(Numeric) expect(metric.values[:cpu_duration]).to eq(3000)
expect(metric.values[:call_count]).to be_an(Integer) expect(metric.values[:call_count]).to be_an(Integer)
expect(metric.tags).to eq({ method: 'Foo#bar' }) expect(metric.tags).to eq({ method: 'Foo#bar' })
...@@ -116,13 +119,13 @@ describe Gitlab::Metrics::MethodCall do ...@@ -116,13 +119,13 @@ describe Gitlab::Metrics::MethodCall do
end end
it 'returns false when the total call time is not above the threshold' do it 'returns false when the total call time is not above the threshold' do
expect(method_call).to receive(:real_time).and_return(9) expect(method_call).to receive(:real_time).and_return(0.009)
expect(method_call.above_threshold?).to eq(false) expect(method_call.above_threshold?).to eq(false)
end end
it 'returns true when the total call time is above the threshold' do it 'returns true when the total call time is above the threshold' do
expect(method_call).to receive(:real_time).and_return(9000) expect(method_call).to receive(:real_time).and_return(9)
expect(method_call.above_threshold?).to eq(true) expect(method_call.above_threshold?).to eq(true)
end end
......
...@@ -29,19 +29,19 @@ describe Gitlab::Metrics::System do ...@@ -29,19 +29,19 @@ describe Gitlab::Metrics::System do
describe '.cpu_time' do describe '.cpu_time' do
it 'returns a Fixnum' do it 'returns a Fixnum' do
expect(described_class.cpu_time).to be_an(Integer) expect(described_class.cpu_time).to be_an(Float)
end end
end end
describe '.real_time' do describe '.real_time' do
it 'returns a Fixnum' do it 'returns a Fixnum' do
expect(described_class.real_time).to be_an(Integer) expect(described_class.real_time).to be_an(Float)
end end
end end
describe '.monotonic_time' do describe '.monotonic_time' do
it 'returns a Fixnum' do it 'returns a Float' do
expect(described_class.monotonic_time).to be_an(Integer) expect(described_class.monotonic_time).to be_an(Float)
end end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment