Commit 4daf2ff2 authored by Dylan Griffith's avatar Dylan Griffith

Add gauge metric global search queue size

This metric is used to track the size of updates that need
to be synchronized to Elasticsearch. Since this is a custom queue and
we're not using Sidekiq we needed to implement our own custom monitoring
for it.

This MR just sets a gauge every minute in a cron worker which will
ultimately be available in Prometheus. Later we will build a chart for
this.

See https://gitlab.com/gitlab-org/gitlab/issues/207095
parent f3f4ee8e
...@@ -459,6 +459,11 @@ production: &base ...@@ -459,6 +459,11 @@ production: &base
elastic_index_bulk_cron_worker: elastic_index_bulk_cron_worker:
cron: "*/1 * * * *" cron: "*/1 * * * *"
# Elasticsearch metrics
# NOTE: This will only take effect if Elasticsearch is enabled.
elastic_metrics_update_worker:
cron: "*/1 * * * *"
registry: registry:
# enabled: true # enabled: true
# host: registry.example.com # host: registry.example.com
......
...@@ -546,6 +546,9 @@ Gitlab.ee do ...@@ -546,6 +546,9 @@ Gitlab.ee do
Settings.cron_jobs['elastic_index_bulk_cron_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['elastic_index_bulk_cron_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['elastic_index_bulk_cron_worker']['cron'] ||= '*/1 * * * *' Settings.cron_jobs['elastic_index_bulk_cron_worker']['cron'] ||= '*/1 * * * *'
Settings.cron_jobs['elastic_index_bulk_cron_worker']['job_class'] ||= 'ElasticIndexBulkCronWorker' Settings.cron_jobs['elastic_index_bulk_cron_worker']['job_class'] ||= 'ElasticIndexBulkCronWorker'
Settings.cron_jobs['elastic_metrics_update_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['elastic_metrics_update_worker']['cron'] ||= '*/1 * * * *'
Settings.cron_jobs['elastic_metrics_update_worker']['job_class'] ||= 'ElasticMetricsUpdateWorker'
Settings.cron_jobs['sync_seat_link_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['sync_seat_link_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['sync_seat_link_worker']['cron'] ||= "#{rand(60)} 0 * * *" Settings.cron_jobs['sync_seat_link_worker']['cron'] ||= "#{rand(60)} 0 * * *"
Settings.cron_jobs['sync_seat_link_worker']['job_class'] = 'SyncSeatLinkWorker' Settings.cron_jobs['sync_seat_link_worker']['job_class'] = 'SyncSeatLinkWorker'
......
...@@ -111,12 +111,12 @@ The following metrics can be controlled by feature flags: ...@@ -111,12 +111,12 @@ The following metrics can be controlled by feature flags:
| `gitlab_method_call_duration_seconds` | `prometheus_metrics_method_instrumentation` | | `gitlab_method_call_duration_seconds` | `prometheus_metrics_method_instrumentation` |
| `gitlab_view_rendering_duration_seconds` | `prometheus_metrics_view_instrumentation` | | `gitlab_view_rendering_duration_seconds` | `prometheus_metrics_view_instrumentation` |
## Sidekiq Metrics ## Sidekiq metrics
Sidekiq jobs may also gather metrics, and these metrics can be accessed if the Sidekiq jobs may also gather metrics, and these metrics can be accessed if the
Sidekiq exporter is enabled (e.g. via the `monitoring.sidekiq_exporter` Sidekiq exporter is enabled (for example, using the `monitoring.sidekiq_exporter`
configuration option in `gitlab.yml`. These metrics are served from the configuration option in `gitlab.yml`. These metrics are served from the
`/metrics` path on configured the port. `/metrics` path on the configured port.
| Metric | Type | Since | Description | Labels | | Metric | Type | Since | Description | Labels |
|:---------------------------------------------- |:------- |:----- |:----------- |:------ | |:---------------------------------------------- |:------- |:----- |:----------- |:------ |
...@@ -153,6 +153,7 @@ configuration option in `gitlab.yml`. These metrics are served from the ...@@ -153,6 +153,7 @@ configuration option in `gitlab.yml`. These metrics are served from the
| `geo_repositories_checked_failed_count` | Gauge | 11.1 | Number of repositories that have a failure from `git fsck` | url | | `geo_repositories_checked_failed_count` | Gauge | 11.1 | Number of repositories that have a failure from `git fsck` | url |
| `geo_repositories_retrying_verification_count` | Gauge | 11.2 | Number of repositories verification failures that Geo is actively trying to correct on secondary | url | | `geo_repositories_retrying_verification_count` | Gauge | 11.2 | Number of repositories verification failures that Geo is actively trying to correct on secondary | url |
| `geo_wikis_retrying_verification_count` | Gauge | 11.2 | Number of wikis verification failures that Geo is actively trying to correct on secondary | url | | `geo_wikis_retrying_verification_count` | Gauge | 11.2 | Number of wikis verification failures that Geo is actively trying to correct on secondary | url |
| `global_search_bulk_cron_queue_size` | Gauge | 12.10 | Number of database records waiting to be synchronized to Elasticsearch | |
## Database load balancing metrics **(PREMIUM ONLY)** ## Database load balancing metrics **(PREMIUM ONLY)**
......
...@@ -31,6 +31,13 @@ ...@@ -31,6 +31,13 @@
:resource_boundary: :unknown :resource_boundary: :unknown
:weight: 1 :weight: 1
:idempotent: true :idempotent: true
- :name: cronjob:elastic_metrics_update
:feature_category: :global_search
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
- :name: cronjob:geo_container_repository_sync_dispatch - :name: cronjob:geo_container_repository_sync_dispatch
:feature_category: :geo_replication :feature_category: :geo_replication
:has_external_dependencies: :has_external_dependencies:
......
# frozen_string_literal: true
class ElasticMetricsUpdateWorker
include ApplicationWorker
include ExclusiveLeaseGuard
# rubocop:disable Scalability/CronWorkerContext
# This worker does not perform work scoped to a context
include CronjobQueue
# rubocop:enable Scalability/CronWorkerContext
feature_category :global_search
idempotent!
LEASE_TIMEOUT = 5.minutes
def perform
try_obtain_lease { Elastic::MetricsUpdateService.new.execute }
end
private
def lease_timeout
LEASE_TIMEOUT
end
end
---
title: Monitoring for Elasticsearch incremental updates buffer queue
merge_request: 27384
author:
type: other
# frozen_string_literal: true
module Elastic
class MetricsUpdateService
def execute
return unless elasticsearch_enabled?
return unless prometheus_enabled?
gauge = Gitlab::Metrics.gauge(:global_search_bulk_cron_queue_size, 'Number of database records waiting to be synchronized to Elasticsearch', {}, :max)
gauge.set({}, Elastic::ProcessBookkeepingService.queue_size)
end
private
def elasticsearch_enabled?
::Gitlab::CurrentSettings.elasticsearch_indexing?
end
def prometheus_enabled?
::Gitlab::Metrics.prometheus_metrics_enabled?
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Elastic::MetricsUpdateService, :prometheus do
subject { described_class.new }
before do
stub_ee_application_setting(elasticsearch_indexing: true)
allow(Gitlab::Metrics).to receive(:prometheus_metrics_enabled?).and_return(true)
end
describe '#execute' do
it 'sets a gauge for global_search_bulk_cron_queue_size' do
expect(Elastic::ProcessBookkeepingService).to receive(:queue_size).and_return(4)
gauge_double = instance_double(Prometheus::Client::Gauge)
expect(Gitlab::Metrics).to receive(:gauge)
.with(:global_search_bulk_cron_queue_size, anything, {}, :max)
.and_return(gauge_double)
expect(gauge_double).to receive(:set).with({}, 4)
subject.execute
end
context 'when prometheus metrics is disabled' do
before do
allow(Gitlab::Metrics).to receive(:prometheus_metrics_enabled?).and_return(false)
end
it 'does not set a gauge' do
expect(Gitlab::Metrics).not_to receive(:gauge)
subject.execute
end
end
context 'when elasticsearch indexing and search is disabled' do
before do
stub_ee_application_setting(elasticsearch_indexing: false)
end
it 'does not set a gauge' do
expect(Gitlab::Metrics).not_to receive(:gauge)
subject.execute
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe ElasticMetricsUpdateWorker do
include ExclusiveLeaseHelpers
describe '.perform' do
it 'executes the service under an exclusive lease' do
expect_to_obtain_exclusive_lease('elastic_metrics_update_worker')
expect_next_instance_of(::Elastic::MetricsUpdateService) do |service|
expect(service).to receive(:execute)
end
described_class.new.perform
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment