Commit 9417c0b1 authored by Yannis Roussos's avatar Yannis Roussos

Add Query Apdex Prometheus metric to usage ping

- Update lib/gitlab/usage_data/topology.rb to
  add the query_apdex_weekly_average metric by using
  the new topology_query_apdex_weekly_average method
  to calculate the rolling 7 day average over the
  gitlab_usage_ping:sql_duration_apdex:ratio_rate5m
  prometheus metric
- Add query_apdex_weekly_average to the topology_spec
- Add query_apdex_weekly_average to the usage_ping doc
parent 4cb6bba6
---
title: Add the Query Apdex Prometheus metric to usage ping
merge_request: 39256
author:
type: other
...@@ -747,6 +747,7 @@ The following is example content of the Usage Ping payload. ...@@ -747,6 +747,7 @@ The following is example content of the Usage Ping payload.
"topology": { "topology": {
"duration_s": 0.013836685999194742, "duration_s": 0.013836685999194742,
"application_requests_per_hour": 4224, "application_requests_per_hour": 4224,
"query_apdex_weekly_average": 0.996,
"failures": [], "failures": [],
"nodes": [ "nodes": [
{ {
......
...@@ -43,6 +43,7 @@ module Gitlab ...@@ -43,6 +43,7 @@ module Gitlab
with_prometheus_client(fallback: {}) do |client| with_prometheus_client(fallback: {}) do |client|
{ {
application_requests_per_hour: topology_app_requests_per_hour(client), application_requests_per_hour: topology_app_requests_per_hour(client),
query_apdex_weekly_average: topology_query_apdex_weekly_average(client),
nodes: topology_node_data(client) nodes: topology_node_data(client)
}.compact }.compact
end end
...@@ -63,6 +64,16 @@ module Gitlab ...@@ -63,6 +64,16 @@ module Gitlab
(result['value'].last.to_f * 1.hour).to_i (result['value'].last.to_f * 1.hour).to_i
end end
def topology_query_apdex_weekly_average(client)
result = query_safely('gitlab_usage_ping:sql_duration_apdex:ratio_rate5m', 'query_apdex', fallback: nil) do |query|
client.query(aggregate_one_week(query)).first
end
return unless result
result['value'].last.to_f
end
def topology_node_data(client) def topology_node_data(client)
# node-level data # node-level data
by_instance_mem = topology_node_memory(client) by_instance_mem = topology_node_memory(client)
......
...@@ -23,6 +23,7 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -23,6 +23,7 @@ RSpec.describe Gitlab::UsageData::Topology do
it 'contains node level metrics for each instance' do it 'contains node level metrics for each instance' do
expect_prometheus_api_to( expect_prometheus_api_to(
receive_app_request_volume_query, receive_app_request_volume_query,
receive_query_apdex_ratio_query,
receive_node_memory_query, receive_node_memory_query,
receive_node_memory_utilization_query, receive_node_memory_utilization_query,
receive_node_cpu_count_query, receive_node_cpu_count_query,
...@@ -38,6 +39,7 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -38,6 +39,7 @@ RSpec.describe Gitlab::UsageData::Topology do
expect(subject[:topology]).to eq({ expect(subject[:topology]).to eq({
duration_s: 0, duration_s: 0,
application_requests_per_hour: 36, application_requests_per_hour: 36,
query_apdex_weekly_average: 0.996,
failures: [], failures: [],
nodes: [ nodes: [
{ {
...@@ -107,6 +109,7 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -107,6 +109,7 @@ RSpec.describe Gitlab::UsageData::Topology do
it 'removes the respective entries and includes the failures' do it 'removes the respective entries and includes the failures' do
expect_prometheus_api_to( expect_prometheus_api_to(
receive_app_request_volume_query(result: []), receive_app_request_volume_query(result: []),
receive_query_apdex_ratio_query(result: []),
receive_node_memory_query(result: []), receive_node_memory_query(result: []),
receive_node_memory_utilization_query(result: []), receive_node_memory_utilization_query(result: []),
receive_node_cpu_count_query, receive_node_cpu_count_query,
...@@ -123,6 +126,7 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -123,6 +126,7 @@ RSpec.describe Gitlab::UsageData::Topology do
duration_s: 0, duration_s: 0,
failures: [ failures: [
{ 'app_requests' => 'empty_result' }, { 'app_requests' => 'empty_result' },
{ 'query_apdex' => 'empty_result' },
{ 'node_memory' => 'empty_result' }, { 'node_memory' => 'empty_result' },
{ 'node_memory_utilization' => 'empty_result' }, { 'node_memory_utilization' => 'empty_result' },
{ 'service_rss' => 'empty_result' }, { 'service_rss' => 'empty_result' },
...@@ -245,6 +249,7 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -245,6 +249,7 @@ RSpec.describe Gitlab::UsageData::Topology do
it 'normalizes equivalent instance values and maps them to the same node' do it 'normalizes equivalent instance values and maps them to the same node' do
expect_prometheus_api_to( expect_prometheus_api_to(
receive_app_request_volume_query(result: []), receive_app_request_volume_query(result: []),
receive_query_apdex_ratio_query(result: []),
receive_node_memory_query(result: node_memory_response), receive_node_memory_query(result: node_memory_response),
receive_node_memory_utilization_query(result: node_memory_utilization_response), receive_node_memory_utilization_query(result: node_memory_utilization_response),
receive_node_cpu_count_query(result: []), receive_node_cpu_count_query(result: []),
...@@ -261,6 +266,7 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -261,6 +266,7 @@ RSpec.describe Gitlab::UsageData::Topology do
duration_s: 0, duration_s: 0,
failures: [ failures: [
{ 'app_requests' => 'empty_result' }, { 'app_requests' => 'empty_result' },
{ 'query_apdex' => 'empty_result' },
{ 'node_cpus' => 'empty_result' }, { 'node_cpus' => 'empty_result' },
{ 'node_cpu_utilization' => 'empty_result' }, { 'node_cpu_utilization' => 'empty_result' },
{ 'service_uss' => 'empty_result' }, { 'service_uss' => 'empty_result' },
...@@ -309,6 +315,7 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -309,6 +315,7 @@ RSpec.describe Gitlab::UsageData::Topology do
it 'still reports service metrics' do it 'still reports service metrics' do
expect_prometheus_api_to( expect_prometheus_api_to(
receive_app_request_volume_query(result: []), receive_app_request_volume_query(result: []),
receive_query_apdex_ratio_query(result: []),
receive_node_memory_query(result: []), receive_node_memory_query(result: []),
receive_node_memory_utilization_query(result: []), receive_node_memory_utilization_query(result: []),
receive_node_cpu_count_query(result: []), receive_node_cpu_count_query(result: []),
...@@ -325,6 +332,7 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -325,6 +332,7 @@ RSpec.describe Gitlab::UsageData::Topology do
duration_s: 0, duration_s: 0,
failures: [ failures: [
{ 'app_requests' => 'empty_result' }, { 'app_requests' => 'empty_result' },
{ 'query_apdex' => 'empty_result' },
{ 'node_memory' => 'empty_result' }, { 'node_memory' => 'empty_result' },
{ 'node_memory_utilization' => 'empty_result' }, { 'node_memory_utilization' => 'empty_result' },
{ 'node_cpus' => 'empty_result' }, { 'node_cpus' => 'empty_result' },
...@@ -382,6 +390,7 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -382,6 +390,7 @@ RSpec.describe Gitlab::UsageData::Topology do
it 'filters out unknown service data and reports the unknown services as a failure' do it 'filters out unknown service data and reports the unknown services as a failure' do
expect_prometheus_api_to( expect_prometheus_api_to(
receive_app_request_volume_query(result: []), receive_app_request_volume_query(result: []),
receive_query_apdex_ratio_query(result: []),
receive_node_memory_query(result: []), receive_node_memory_query(result: []),
receive_node_memory_utilization_query(result: []), receive_node_memory_utilization_query(result: []),
receive_node_cpu_count_query(result: []), receive_node_cpu_count_query(result: []),
...@@ -412,6 +421,7 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -412,6 +421,7 @@ RSpec.describe Gitlab::UsageData::Topology do
duration_s: 0, duration_s: 0,
failures: [ failures: [
{ 'app_requests' => 'Gitlab::PrometheusClient::ConnectionError' }, { 'app_requests' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'query_apdex' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'node_memory' => 'Gitlab::PrometheusClient::ConnectionError' }, { 'node_memory' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'node_memory_utilization' => 'Gitlab::PrometheusClient::ConnectionError' }, { 'node_memory_utilization' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'node_cpus' => 'Gitlab::PrometheusClient::ConnectionError' }, { 'node_cpus' => 'Gitlab::PrometheusClient::ConnectionError' },
...@@ -442,6 +452,7 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -442,6 +452,7 @@ RSpec.describe Gitlab::UsageData::Topology do
duration_s: 0, duration_s: 0,
failures: [ failures: [
{ 'app_requests' => exception.to_s }, { 'app_requests' => exception.to_s },
{ 'query_apdex' => 'timeout_cancellation' },
{ 'node_memory' => 'timeout_cancellation' }, { 'node_memory' => 'timeout_cancellation' },
{ 'node_memory_utilization' => 'timeout_cancellation' }, { 'node_memory_utilization' => 'timeout_cancellation' },
{ 'node_cpus' => 'timeout_cancellation' }, { 'node_cpus' => 'timeout_cancellation' },
...@@ -497,6 +508,17 @@ RSpec.describe Gitlab::UsageData::Topology do ...@@ -497,6 +508,17 @@ RSpec.describe Gitlab::UsageData::Topology do
]) ])
end end
def receive_query_apdex_ratio_query(result: nil)
receive(:query)
.with(/gitlab_usage_ping:sql_duration_apdex:ratio_rate5m/)
.and_return(result || [
{
'metric' => {},
'value' => [1000, '0.996']
}
])
end
def receive_node_memory_query(result: nil) def receive_node_memory_query(result: nil)
receive(:query) receive(:query)
.with(/node_memory_total_bytes/, an_instance_of(Hash)) .with(/node_memory_total_bytes/, an_instance_of(Hash))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment