Commit 9bb58839 authored by James Lopez's avatar James Lopez

Merge branch '207095-monitoring-for-bulk-es-queue' into 'master'

Monitoring for Elasticsearch incremental updates buffer queue

See merge request gitlab-org/gitlab!27384
parents 61d48edf 4daf2ff2
......@@ -459,6 +459,11 @@ production: &base
elastic_index_bulk_cron_worker:
cron: "*/1 * * * *"
# Elasticsearch metrics
# NOTE: This will only take effect if Elasticsearch is enabled.
elastic_metrics_update_worker:
cron: "*/1 * * * *"
registry:
# enabled: true
# host: registry.example.com
......
......@@ -546,6 +546,9 @@ Gitlab.ee do
Settings.cron_jobs['elastic_index_bulk_cron_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['elastic_index_bulk_cron_worker']['cron'] ||= '*/1 * * * *'
Settings.cron_jobs['elastic_index_bulk_cron_worker']['job_class'] ||= 'ElasticIndexBulkCronWorker'
Settings.cron_jobs['elastic_metrics_update_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['elastic_metrics_update_worker']['cron'] ||= '*/1 * * * *'
Settings.cron_jobs['elastic_metrics_update_worker']['job_class'] ||= 'ElasticMetricsUpdateWorker'
Settings.cron_jobs['sync_seat_link_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['sync_seat_link_worker']['cron'] ||= "#{rand(60)} 0 * * *"
Settings.cron_jobs['sync_seat_link_worker']['job_class'] = 'SyncSeatLinkWorker'
......
......@@ -17,7 +17,13 @@ GitLab monitors its own internal service metrics, and makes them available at th
`/-/metrics` endpoint. Unlike other [Prometheus](https://prometheus.io) exporters, in order to access
it, the client IP needs to be [included in a whitelist](../ip_whitelist.md).
For Omnibus and Chart installations, these metrics are automatically enabled and collected as of [GitLab 9.4](https://gitlab.com/gitlab-org/omnibus-gitlab/-/merge_requests/1702). For source installations or earlier versions, these metrics will need to be enabled manually and collected by a Prometheus server.
For Omnibus and Chart installations, these metrics are automatically enabled
and collected as of [GitLab
9.4](https://gitlab.com/gitlab-org/omnibus-gitlab/-/merge_requests/1702). For
source installations or earlier versions, these metrics will need to be enabled
manually and collected by a Prometheus server.
See also [Sidekiq metrics](#sidekiq-metrics) for how to enable and view metrics from Sidekiq nodes.
## Metrics available
......@@ -105,10 +111,12 @@ The following metrics can be controlled by feature flags:
| `gitlab_method_call_duration_seconds` | `prometheus_metrics_method_instrumentation` |
| `gitlab_view_rendering_duration_seconds` | `prometheus_metrics_view_instrumentation` |
## Sidekiq Metrics available for Geo **(PREMIUM)**
## Sidekiq metrics
Sidekiq jobs may also gather metrics, and these metrics can be accessed if the Sidekiq exporter is enabled (e.g. via
the `monitoring.sidekiq_exporter` configuration option in `gitlab.yml`.
Sidekiq jobs may also gather metrics, and these metrics can be accessed if the
Sidekiq exporter is enabled (for example, using the `monitoring.sidekiq_exporter`
configuration option in `gitlab.yml`. These metrics are served from the
`/metrics` path on the configured port.
| Metric | Type | Since | Description | Labels |
|:---------------------------------------------- |:------- |:----- |:----------- |:------ |
......@@ -145,6 +153,7 @@ the `monitoring.sidekiq_exporter` configuration option in `gitlab.yml`.
| `geo_repositories_checked_failed_count` | Gauge | 11.1 | Number of repositories that have a failure from `git fsck` | url |
| `geo_repositories_retrying_verification_count` | Gauge | 11.2 | Number of repositories verification failures that Geo is actively trying to correct on secondary | url |
| `geo_wikis_retrying_verification_count` | Gauge | 11.2 | Number of wikis verification failures that Geo is actively trying to correct on secondary | url |
| `global_search_bulk_cron_queue_size` | Gauge | 12.10 | Number of database records waiting to be synchronized to Elasticsearch | |
## Database load balancing metrics **(PREMIUM ONLY)**
......
......@@ -31,6 +31,13 @@
:resource_boundary: :unknown
:weight: 1
:idempotent: true
- :name: cronjob:elastic_metrics_update
:feature_category: :global_search
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
- :name: cronjob:geo_container_repository_sync_dispatch
:feature_category: :geo_replication
:has_external_dependencies:
......
# frozen_string_literal: true
class ElasticMetricsUpdateWorker
include ApplicationWorker
include ExclusiveLeaseGuard
# rubocop:disable Scalability/CronWorkerContext
# This worker does not perform work scoped to a context
include CronjobQueue
# rubocop:enable Scalability/CronWorkerContext
feature_category :global_search
idempotent!
LEASE_TIMEOUT = 5.minutes
def perform
try_obtain_lease { Elastic::MetricsUpdateService.new.execute }
end
private
def lease_timeout
LEASE_TIMEOUT
end
end
---
title: Monitoring for Elasticsearch incremental updates buffer queue
merge_request: 27384
author:
type: other
# frozen_string_literal: true
module Elastic
class MetricsUpdateService
def execute
return unless elasticsearch_enabled?
return unless prometheus_enabled?
gauge = Gitlab::Metrics.gauge(:global_search_bulk_cron_queue_size, 'Number of database records waiting to be synchronized to Elasticsearch', {}, :max)
gauge.set({}, Elastic::ProcessBookkeepingService.queue_size)
end
private
def elasticsearch_enabled?
::Gitlab::CurrentSettings.elasticsearch_indexing?
end
def prometheus_enabled?
::Gitlab::Metrics.prometheus_metrics_enabled?
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Elastic::MetricsUpdateService, :prometheus do
subject { described_class.new }
before do
stub_ee_application_setting(elasticsearch_indexing: true)
allow(Gitlab::Metrics).to receive(:prometheus_metrics_enabled?).and_return(true)
end
describe '#execute' do
it 'sets a gauge for global_search_bulk_cron_queue_size' do
expect(Elastic::ProcessBookkeepingService).to receive(:queue_size).and_return(4)
gauge_double = instance_double(Prometheus::Client::Gauge)
expect(Gitlab::Metrics).to receive(:gauge)
.with(:global_search_bulk_cron_queue_size, anything, {}, :max)
.and_return(gauge_double)
expect(gauge_double).to receive(:set).with({}, 4)
subject.execute
end
context 'when prometheus metrics is disabled' do
before do
allow(Gitlab::Metrics).to receive(:prometheus_metrics_enabled?).and_return(false)
end
it 'does not set a gauge' do
expect(Gitlab::Metrics).not_to receive(:gauge)
subject.execute
end
end
context 'when elasticsearch indexing and search is disabled' do
before do
stub_ee_application_setting(elasticsearch_indexing: false)
end
it 'does not set a gauge' do
expect(Gitlab::Metrics).not_to receive(:gauge)
subject.execute
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe ElasticMetricsUpdateWorker do
include ExclusiveLeaseHelpers
describe '.perform' do
it 'executes the service under an exclusive lease' do
expect_to_obtain_exclusive_lease('elastic_metrics_update_worker')
expect_next_instance_of(::Elastic::MetricsUpdateService) do |service|
expect(service).to receive(:execute)
end
described_class.new.perform
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment