Commit 9417c0b1 authored by Yannis Roussos's avatar Yannis Roussos

Add Query Apdex Prometheus metric to usage ping

- Update lib/gitlab/usage_data/topology.rb to
  add the query_apdex_weekly_average metric by using
  the new topology_query_apdex_weekly_average method
  to calculate the rolling 7 day average over the
  gitlab_usage_ping:sql_duration_apdex:ratio_rate5m
  prometheus metric
- Add query_apdex_weekly_average to the topology_spec
- Add query_apdex_weekly_average to the usage_ping doc
parent 4cb6bba6
---
title: Add the Query Apdex Prometheus metric to usage ping
merge_request: 39256
author:
type: other
......@@ -747,6 +747,7 @@ The following is example content of the Usage Ping payload.
"topology": {
"duration_s": 0.013836685999194742,
"application_requests_per_hour": 4224,
"query_apdex_weekly_average": 0.996,
"failures": [],
"nodes": [
{
......
......@@ -43,6 +43,7 @@ module Gitlab
with_prometheus_client(fallback: {}) do |client|
{
application_requests_per_hour: topology_app_requests_per_hour(client),
query_apdex_weekly_average: topology_query_apdex_weekly_average(client),
nodes: topology_node_data(client)
}.compact
end
......@@ -63,6 +64,16 @@ module Gitlab
(result['value'].last.to_f * 1.hour).to_i
end
def topology_query_apdex_weekly_average(client)
result = query_safely('gitlab_usage_ping:sql_duration_apdex:ratio_rate5m', 'query_apdex', fallback: nil) do |query|
client.query(aggregate_one_week(query)).first
end
return unless result
result['value'].last.to_f
end
def topology_node_data(client)
# node-level data
by_instance_mem = topology_node_memory(client)
......
......@@ -23,6 +23,7 @@ RSpec.describe Gitlab::UsageData::Topology do
it 'contains node level metrics for each instance' do
expect_prometheus_api_to(
receive_app_request_volume_query,
receive_query_apdex_ratio_query,
receive_node_memory_query,
receive_node_memory_utilization_query,
receive_node_cpu_count_query,
......@@ -38,6 +39,7 @@ RSpec.describe Gitlab::UsageData::Topology do
expect(subject[:topology]).to eq({
duration_s: 0,
application_requests_per_hour: 36,
query_apdex_weekly_average: 0.996,
failures: [],
nodes: [
{
......@@ -107,6 +109,7 @@ RSpec.describe Gitlab::UsageData::Topology do
it 'removes the respective entries and includes the failures' do
expect_prometheus_api_to(
receive_app_request_volume_query(result: []),
receive_query_apdex_ratio_query(result: []),
receive_node_memory_query(result: []),
receive_node_memory_utilization_query(result: []),
receive_node_cpu_count_query,
......@@ -123,6 +126,7 @@ RSpec.describe Gitlab::UsageData::Topology do
duration_s: 0,
failures: [
{ 'app_requests' => 'empty_result' },
{ 'query_apdex' => 'empty_result' },
{ 'node_memory' => 'empty_result' },
{ 'node_memory_utilization' => 'empty_result' },
{ 'service_rss' => 'empty_result' },
......@@ -245,6 +249,7 @@ RSpec.describe Gitlab::UsageData::Topology do
it 'normalizes equivalent instance values and maps them to the same node' do
expect_prometheus_api_to(
receive_app_request_volume_query(result: []),
receive_query_apdex_ratio_query(result: []),
receive_node_memory_query(result: node_memory_response),
receive_node_memory_utilization_query(result: node_memory_utilization_response),
receive_node_cpu_count_query(result: []),
......@@ -261,6 +266,7 @@ RSpec.describe Gitlab::UsageData::Topology do
duration_s: 0,
failures: [
{ 'app_requests' => 'empty_result' },
{ 'query_apdex' => 'empty_result' },
{ 'node_cpus' => 'empty_result' },
{ 'node_cpu_utilization' => 'empty_result' },
{ 'service_uss' => 'empty_result' },
......@@ -309,6 +315,7 @@ RSpec.describe Gitlab::UsageData::Topology do
it 'still reports service metrics' do
expect_prometheus_api_to(
receive_app_request_volume_query(result: []),
receive_query_apdex_ratio_query(result: []),
receive_node_memory_query(result: []),
receive_node_memory_utilization_query(result: []),
receive_node_cpu_count_query(result: []),
......@@ -325,6 +332,7 @@ RSpec.describe Gitlab::UsageData::Topology do
duration_s: 0,
failures: [
{ 'app_requests' => 'empty_result' },
{ 'query_apdex' => 'empty_result' },
{ 'node_memory' => 'empty_result' },
{ 'node_memory_utilization' => 'empty_result' },
{ 'node_cpus' => 'empty_result' },
......@@ -382,6 +390,7 @@ RSpec.describe Gitlab::UsageData::Topology do
it 'filters out unknown service data and reports the unknown services as a failure' do
expect_prometheus_api_to(
receive_app_request_volume_query(result: []),
receive_query_apdex_ratio_query(result: []),
receive_node_memory_query(result: []),
receive_node_memory_utilization_query(result: []),
receive_node_cpu_count_query(result: []),
......@@ -412,6 +421,7 @@ RSpec.describe Gitlab::UsageData::Topology do
duration_s: 0,
failures: [
{ 'app_requests' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'query_apdex' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'node_memory' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'node_memory_utilization' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'node_cpus' => 'Gitlab::PrometheusClient::ConnectionError' },
......@@ -442,6 +452,7 @@ RSpec.describe Gitlab::UsageData::Topology do
duration_s: 0,
failures: [
{ 'app_requests' => exception.to_s },
{ 'query_apdex' => 'timeout_cancellation' },
{ 'node_memory' => 'timeout_cancellation' },
{ 'node_memory_utilization' => 'timeout_cancellation' },
{ 'node_cpus' => 'timeout_cancellation' },
......@@ -497,6 +508,17 @@ RSpec.describe Gitlab::UsageData::Topology do
])
end
def receive_query_apdex_ratio_query(result: nil)
receive(:query)
.with(/gitlab_usage_ping:sql_duration_apdex:ratio_rate5m/)
.and_return(result || [
{
'metric' => {},
'value' => [1000, '0.996']
}
])
end
def receive_node_memory_query(result: nil)
receive(:query)
.with(/node_memory_total_bytes/, an_instance_of(Hash))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment