Add Query Apdex Prometheus metric to usage ping

- Update lib/gitlab/usage_data/topology.rb to add the query_apdex_weekly_average metric by using the new topology_query_apdex_weekly_average method to calculate the rolling 7 day average over the gitlab_usage_ping:sql_duration_apdex:ratio_rate5m prometheus metric - Add query_apdex_weekly_average to the topology_spec - Add query_apdex_weekly_average to the usage_ping doc

Add Query Apdex Prometheus metric to usage ping
- Update lib/gitlab/usage_data/topology.rb to add the query_apdex_weekly_average metric by using the new topology_query_apdex_weekly_average method to calculate the rolling 7 day average over the gitlab_usage_ping:sql_duration_apdex:ratio_rate5m prometheus metric - Add query_apdex_weekly_average to the topology_spec - Add query_apdex_weekly_average to the usage_ping doc
9417c0b1 · Yannis Roussos · 4cb6bba6 · 9417c0b1 · 9417c0b1 · 9417c0b1
Commit 9417c0b1 authored Aug 11, 2020 by Yannis Roussos
4 changed files
--- a/changelogs/unreleased/227305-add-query-apdex-prometheus-metric-to-usage-ping.yml
+++ b/changelogs/unreleased/227305-add-query-apdex-prometheus-metric-to-usage-ping.yml
+---
+title: Add the Query Apdex Prometheus metric to usage ping
+merge_request: 39256
+author:
+type: other
--- a/doc/development/telemetry/usage_ping.md
+++ b/doc/development/telemetry/usage_ping.md
@@ -747,6 +747,7 @@ The following is example content of the Usage Ping payload.
  "topology": {
    "duration_s": 0.013836685999194742,
    "application_requests_per_hour": 4224,
+    "query_apdex_weekly_average": 0.996,
    "failures": [],
    "nodes": [
      {

--- a/lib/gitlab/usage_data/topology.rb
+++ b/lib/gitlab/usage_data/topology.rb
@@ -43,6 +43,7 @@ module Gitlab
        with_prometheus_client(fallback: {}) do |client|
          {
            application_requests_per_hour: topology_app_requests_per_hour(client),
+            query_apdex_weekly_average: topology_query_apdex_weekly_average(client),
            nodes: topology_node_data(client)
          }.compact
        end
@@ -63,6 +64,16 @@ module Gitlab
        (result['value'].last.to_f * 1.hour).to_i
      end
+      def topology_query_apdex_weekly_average(client)
+        result = query_safely('gitlab_usage_ping:sql_duration_apdex:ratio_rate5m', 'query_apdex', fallback: nil) do |query|
+          client.query(aggregate_one_week(query)).first
+        end
+        return unless result
+        result['value'].last.to_f
+      end
      def topology_node_data(client)
        # node-level data
        by_instance_mem = topology_node_memory(client)

--- a/spec/lib/gitlab/usage_data/topology_spec.rb
+++ b/spec/lib/gitlab/usage_data/topology_spec.rb
@@ -23,6 +23,7 @@ RSpec.describe Gitlab::UsageData::Topology do
        it 'contains node level metrics for each instance' do
          expect_prometheus_api_to(
            receive_app_request_volume_query,
+            receive_query_apdex_ratio_query,
            receive_node_memory_query,
            receive_node_memory_utilization_query,
            receive_node_cpu_count_query,
@@ -38,6 +39,7 @@ RSpec.describe Gitlab::UsageData::Topology do
          expect(subject[:topology]).to eq({
            duration_s: 0,
            application_requests_per_hour: 36,
+            query_apdex_weekly_average: 0.996,
            failures: [],
            nodes: [
              {
@@ -107,6 +109,7 @@ RSpec.describe Gitlab::UsageData::Topology do
        it 'removes the respective entries and includes the failures' do
          expect_prometheus_api_to(
            receive_app_request_volume_query(result: []),
+            receive_query_apdex_ratio_query(result: []),
            receive_node_memory_query(result: []),
            receive_node_memory_utilization_query(result: []),
            receive_node_cpu_count_query,
@@ -123,6 +126,7 @@ RSpec.describe Gitlab::UsageData::Topology do
            duration_s: 0,
            failures: [
              { 'app_requests' => 'empty_result' },
+              { 'query_apdex' => 'empty_result' },
              { 'node_memory' => 'empty_result' },
              { 'node_memory_utilization' => 'empty_result' },
              { 'service_rss' => 'empty_result' },
@@ -245,6 +249,7 @@ RSpec.describe Gitlab::UsageData::Topology do
        it 'normalizes equivalent instance values and maps them to the same node' do
          expect_prometheus_api_to(
            receive_app_request_volume_query(result: []),
+            receive_query_apdex_ratio_query(result: []),
            receive_node_memory_query(result: node_memory_response),
            receive_node_memory_utilization_query(result: node_memory_utilization_response),
            receive_node_cpu_count_query(result: []),
@@ -261,6 +266,7 @@ RSpec.describe Gitlab::UsageData::Topology do
            duration_s: 0,
            failures: [
              { 'app_requests' => 'empty_result' },
+              { 'query_apdex' => 'empty_result' },
              { 'node_cpus' => 'empty_result' },
              { 'node_cpu_utilization' => 'empty_result' },
              { 'service_uss' => 'empty_result' },
@@ -309,6 +315,7 @@ RSpec.describe Gitlab::UsageData::Topology do
        it 'still reports service metrics' do
          expect_prometheus_api_to(
            receive_app_request_volume_query(result: []),
+            receive_query_apdex_ratio_query(result: []),
            receive_node_memory_query(result: []),
            receive_node_memory_utilization_query(result: []),
            receive_node_cpu_count_query(result: []),
@@ -325,6 +332,7 @@ RSpec.describe Gitlab::UsageData::Topology do
            duration_s: 0,
            failures: [
              { 'app_requests' => 'empty_result' },
+              { 'query_apdex' => 'empty_result' },
              { 'node_memory' => 'empty_result' },
              { 'node_memory_utilization' => 'empty_result' },
              { 'node_cpus' => 'empty_result' },
@@ -382,6 +390,7 @@ RSpec.describe Gitlab::UsageData::Topology do
        it 'filters out unknown service data and reports the unknown services as a failure' do
          expect_prometheus_api_to(
            receive_app_request_volume_query(result: []),
+            receive_query_apdex_ratio_query(result: []),
            receive_node_memory_query(result: []),
            receive_node_memory_utilization_query(result: []),
            receive_node_cpu_count_query(result: []),
@@ -412,6 +421,7 @@ RSpec.describe Gitlab::UsageData::Topology do
              duration_s: 0,
              failures: [
                { 'app_requests' => 'Gitlab::PrometheusClient::ConnectionError' },
+                { 'query_apdex' => 'Gitlab::PrometheusClient::ConnectionError' },
                { 'node_memory' => 'Gitlab::PrometheusClient::ConnectionError' },
                { 'node_memory_utilization' => 'Gitlab::PrometheusClient::ConnectionError' },
                { 'node_cpus' => 'Gitlab::PrometheusClient::ConnectionError' },
@@ -442,6 +452,7 @@ RSpec.describe Gitlab::UsageData::Topology do
                duration_s: 0,
                failures: [
                  { 'app_requests' => exception.to_s },
+                  { 'query_apdex' => 'timeout_cancellation' },
                  { 'node_memory' => 'timeout_cancellation' },
                  { 'node_memory_utilization' => 'timeout_cancellation' },
                  { 'node_cpus' => 'timeout_cancellation' },
@@ -497,6 +508,17 @@ RSpec.describe Gitlab::UsageData::Topology do
      ])
  end
+  def receive_query_apdex_ratio_query(result: nil)
+    receive(:query)
+      .with(/gitlab_usage_ping:sql_duration_apdex:ratio_rate5m/)
+      .and_return(result || [
+        {
+          'metric' => {},
+          'value' => [1000, '0.996']
+        }
+      ])
+  end
  def receive_node_memory_query(result: nil)
    receive(:query)
      .with(/node_memory_total_bytes/, an_instance_of(Hash))