Commit f527b468 authored by Peter Leitzen's avatar Peter Leitzen

Merge branch '118788-automatic-metric-alerts' into 'master'

Automatic created Ngnix alert after Prometheus install (1/2)

See merge request gitlab-org/gitlab!28503
parents c4a7c7ec dbfc215f
...@@ -24,6 +24,8 @@ class PrometheusService < MonitoringService ...@@ -24,6 +24,8 @@ class PrometheusService < MonitoringService
after_commit :track_events after_commit :track_events
after_create_commit :create_default_alerts
def initialize_properties def initialize_properties
if properties.nil? if properties.nil?
self.properties = {} self.properties = {}
...@@ -147,4 +149,10 @@ class PrometheusService < MonitoringService ...@@ -147,4 +149,10 @@ class PrometheusService < MonitoringService
def disabled_manual_prometheus? def disabled_manual_prometheus?
manual_configuration_changed? && !manual_configuration? manual_configuration_changed? && !manual_configuration?
end end
def create_default_alerts
return unless project_id
Prometheus::CreateDefaultAlertsWorker.perform_async(project_id: project_id)
end
end end
# frozen_string_literal: true
module Prometheus
class CreateDefaultAlertsService < BaseService
include Gitlab::Utils::StrongMemoize
attr_reader :project
DEFAULT_ALERTS = [
{
identifier: 'response_metrics_nginx_ingress_16_http_error_rate',
operator: 'gt',
threshold: 0.1
},
{
identifier: 'response_metrics_nginx_ingress_http_error_rate',
operator: 'gt',
threshold: 0.1
}
].freeze
def initialize(project:)
@project = project
end
def execute
return ServiceResponse.error(message: 'Invalid project') unless project
return ServiceResponse.error(message: 'Invalid environment') unless environment
create_alerts
ServiceResponse.success
end
private
def create_alerts
DEFAULT_ALERTS.each do |alert_hash|
identifier = alert_hash[:identifier]
next if alerts_by_identifier(environment).key?(identifier)
metric = metrics_by_identifier[identifier]
next unless metric
create_alert(alert: alert_hash, metric: metric)
end
end
def metrics_by_identifier
strong_memoize(:metrics_by_identifier) do
metric_identifiers = DEFAULT_ALERTS.map { |alert| alert[:identifier] }
PrometheusMetricsFinder
.new(identifier: metric_identifiers, common: true)
.execute
.index_by(&:identifier)
end
end
def alerts_by_identifier(environment)
strong_memoize(:alerts_by_identifier) do
Projects::Prometheus::AlertsFinder
.new(project: project, metric: metrics_by_identifier.values, environment: environment)
.execute
.index_by { |alert| alert.prometheus_metric.identifier }
end
end
def environment
strong_memoize(:environment) do
EnvironmentsFinder.new(project, nil, name: 'production').find.first ||
project.environments.first
end
end
def create_alert(alert:, metric:)
PrometheusAlert.create!(
project: project,
prometheus_metric: metric,
environment: environment,
threshold: alert[:threshold],
operator: alert[:operator]
)
rescue ActiveRecord::RecordNotUnique
# Ignore duplicate creations although it unlikely to happen
end
end
end
...@@ -1256,6 +1256,13 @@ ...@@ -1256,6 +1256,13 @@
:resource_boundary: :unknown :resource_boundary: :unknown
:weight: 1 :weight: 1
:idempotent: :idempotent:
- :name: prometheus_create_default_alerts
:feature_category: :incident_management
:has_external_dependencies:
:urgency: :high
:resource_boundary: :unknown
:weight: 1
:idempotent: true
- :name: propagate_service_template - :name: propagate_service_template
:feature_category: :source_code_management :feature_category: :source_code_management
:has_external_dependencies: :has_external_dependencies:
......
# frozen_string_literal: true
module Prometheus
class CreateDefaultAlertsWorker
include ApplicationWorker
feature_category :incident_management
urgency :high
idempotent!
def perform(project_id)
project = Project.find_by_id(project_id)
return unless project
result = Prometheus::CreateDefaultAlertsService.new(project: project).execute
log_info(result.message) if result.error?
end
private
def log_info(message)
logger.info(structured_payload(message: message))
end
end
end
---
title: Add Prometheus alerts automatically after Prometheus Service was created
merge_request: 28503
author:
type: added
...@@ -200,6 +200,8 @@ ...@@ -200,6 +200,8 @@
- 1 - 1
- - project_update_repository_storage - - project_update_repository_storage
- 1 - 1
- - prometheus_create_default_alerts
- 1
- - propagate_service_template - - propagate_service_template
- 1 - 1
- - reactive_caching - - reactive_caching
......
...@@ -123,6 +123,34 @@ describe PrometheusService, :use_clean_rails_memory_store_caching do ...@@ -123,6 +123,34 @@ describe PrometheusService, :use_clean_rails_memory_store_caching do
end end
end end
describe 'callbacks' do
context 'after_create' do
let(:project) { create(:project) }
let(:service) { build(:prometheus_service, project: project) }
subject(:create_service) { service.save! }
it 'creates default alerts' do
expect(Prometheus::CreateDefaultAlertsWorker)
.to receive(:perform_async)
.with(project_id: project.id)
create_service
end
context 'no project exists' do
let(:service) { build(:prometheus_service, :instance) }
it 'does not create default alerts' do
expect(Prometheus::CreateDefaultAlertsWorker)
.not_to receive(:perform_async)
create_service
end
end
end
end
describe '#test' do describe '#test' do
before do before do
service.manual_configuration = true service.manual_configuration = true
......
# frozen_string_literal: true
require 'spec_helper'
describe Prometheus::CreateDefaultAlertsService do
let_it_be(:project) { create(:project) }
let(:instance) { described_class.new(project: project) }
let(:expected_alerts) { described_class::DEFAULT_ALERTS }
describe '#execute' do
subject(:execute) { instance.execute }
shared_examples 'no alerts created' do
it 'does not create alerts' do
expect { execute }.not_to change { project.reload.prometheus_alerts.count }
end
end
context 'no environment' do
it_behaves_like 'no alerts created'
end
context 'environment exists' do
let_it_be(:environment) { create(:environment, project: project) }
context 'no found metric' do
it_behaves_like 'no alerts created'
end
context 'metric exists' do
before do
create_expected_metrics!
end
context 'alert exists already' do
before do
create_pre_existing_alerts!(environment)
end
it_behaves_like 'no alerts created'
end
it 'creates alerts' do
expect { execute }.to change { project.reload.prometheus_alerts.count }
.by(expected_alerts.size)
end
context 'multiple environments' do
let!(:production) { create(:environment, project: project, name: 'production') }
it 'uses the production environment' do
expect { execute }.to change { production.reload.prometheus_alerts.count }
.by(expected_alerts.size)
end
end
end
end
end
private
def create_expected_metrics!
expected_alerts.each do |alert_hash|
create(:prometheus_metric, :common, identifier: alert_hash.fetch(:identifier))
end
end
def create_pre_existing_alerts!(environment)
expected_alerts.each do |alert_hash|
metric = PrometheusMetric.for_identifier(alert_hash[:identifier]).first!
create(:prometheus_alert, prometheus_metric: metric, project: project, environment: environment)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Prometheus::CreateDefaultAlertsWorker do
let_it_be(:project) { create(:project) }
let(:worker) { described_class.new }
let(:logger) { worker.send(:logger) }
let(:service) { instance_double(Prometheus::CreateDefaultAlertsService) }
let(:service_result) { ServiceResponse.success }
subject { described_class.new.perform(project.id) }
before do
allow(Prometheus::CreateDefaultAlertsService)
.to receive(:new).with(project: project)
.and_return(service)
allow(service).to receive(:execute)
.and_return(service_result)
end
it_behaves_like 'an idempotent worker' do
let(:job_args) { [project.id] }
it 'calls the service' do
expect(service).to receive(:execute)
subject
end
context 'project is nil' do
let(:job_args) { [nil] }
it 'does not call the service' do
expect(service).not_to receive(:execute)
subject
end
end
context 'when service returns an error' do
let(:error_message) { 'some message' }
let(:service_result) { ServiceResponse.error(message: error_message) }
it 'succeeds and logs the error' do
expect(logger)
.to receive(:info)
.with(a_hash_including('message' => error_message))
.exactly(worker_exec_times).times
subject
end
end
end
context 'when service raises an exception' do
let(:error_message) { 'some exception' }
let(:exception) { StandardError.new(error_message) }
it 're-raises exception' do
allow(service).to receive(:execute).and_raise(exception)
expect { subject }.to raise_error(exception)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment