Commit 7460ac5a authored by Grzegorz Bizon's avatar Grzegorz Bizon

Merge branch '7527-ci-variables-are-not-parameterized-for-alerting-rules' into 'master'

Resolve "CI variables are not parameterized for alerting rules"

Closes #7527

See merge request gitlab-org/gitlab-ee!8481
parents df6b6ccd bfea0095
# frozen_string_literal: true
module Clusters
module Applications
class PrometheusConfigService
def initialize(project, cluster)
@project = project
@cluster = cluster
end
def execute(config)
if has_alerts?
generate_alert_manager(config)
else
reset_alert_manager(config)
end
end
private
attr_reader :project, :cluster
def reset_alert_manager(config)
config = set_alert_manager_enabled(config, false)
config.delete('alertmanagerFiles')
config['serverFiles']['alerts'] = {}
config
end
def generate_alert_manager(config)
config = set_alert_manager_enabled(config, true)
config = set_alert_manager_files(config)
set_alert_manager_groups(config)
end
def set_alert_manager_enabled(config, enabled)
config['alertmanager']['enabled'] = enabled
config
end
def set_alert_manager_files(config)
config['alertmanagerFiles'] = {
'alertmanager.yml' => {
'receivers' => alert_manager_receivers_params,
'route' => alert_manager_route_params
}
}
config
end
def set_alert_manager_groups(config)
config['serverFiles']['alerts']['groups'] ||= []
environments_with_alerts.each do |env_name, alerts|
index = config['serverFiles']['alerts']['groups'].find_index do |group|
group['name'] == env_name
end
if index
config['serverFiles']['alerts']['groups'][index]['rules'] = alerts
else
config['serverFiles']['alerts']['groups'] << {
'name' => env_name,
'rules' => alerts
}
end
end
config
end
def alert_manager_receivers_params
[
{
'name' => 'gitlab',
'webhook_configs' => [
{
'url' => notify_url,
'send_resolved' => true
}
]
}
]
end
def alert_manager_route_params
{
'receiver' => 'gitlab',
'group_wait' => '30s',
'group_interval' => '5m',
'repeat_interval' => '4h'
}
end
def notify_url
::Gitlab::Routing.url_helpers.notify_namespace_project_prometheus_alerts_url(
namespace_id: project.namespace.path,
project_id: project.path,
format: :json
)
end
def has_alerts?
environments_with_alerts.values.flatten(1).any?
end
def environments_with_alerts
@environments_with_alerts ||=
environments.each_with_object({}) do |environment, hash|
name = rule_name(environment)
hash[name] = alerts(environment)
end
end
def rule_name(environment)
"#{environment.name}.rules"
end
def alerts(environment)
variables = Gitlab::Prometheus::QueryVariables.call(environment)
environment.prometheus_alerts.map do |alert|
substitute_query_variables(alert.to_param, variables)
end
end
def substitute_query_variables(hash, variables)
hash['expr'] %= variables
hash
end
def environments
project.environments_for_scope(cluster.environment_scope)
end
end
end
end
...@@ -11,17 +11,13 @@ module Clusters ...@@ -11,17 +11,13 @@ module Clusters
def execute def execute
app.make_updating! app.make_updating!
response = helm_api.get_config_map(config_map_name) values = helm_api
config = extract_config(response) .get_config_map(config_map_name)
.yield_self { |response| extract_config(response) }
.yield_self { |config| update_config(config) }
.yield_self { |config| config.to_yaml }
data = helm_api.update(upgrade_command(values))
if has_alerts?
generate_alert_manager(config)
else
reset_alert_manager(config)
end
helm_api.update(upgrade_command(data.to_yaml))
::ClusterWaitForAppUpdateWorker.perform_in(::ClusterWaitForAppUpdateWorker::INTERVAL, app.name, app.id) ::ClusterWaitForAppUpdateWorker.perform_in(::ClusterWaitForAppUpdateWorker::INTERVAL, app.name, app.id)
rescue ::Kubeclient::HttpError => ke rescue ::Kubeclient::HttpError => ke
...@@ -36,112 +32,14 @@ module Clusters ...@@ -36,112 +32,14 @@ module Clusters
::Gitlab::Kubernetes::ConfigMap.new(app.name, app.files).config_map_name ::Gitlab::Kubernetes::ConfigMap.new(app.name, app.files).config_map_name
end end
def reset_alert_manager(config)
config = set_alert_manager_enabled(config, false)
config.delete("alertmanagerFiles")
config["serverFiles"]["alerts"] = {}
config
end
def generate_alert_manager(config)
config = set_alert_manager_enabled(config, true)
config = set_alert_manager_files(config)
set_alert_manager_groups(config)
end
def set_alert_manager_enabled(config, enabled)
config["alertmanager"]["enabled"] = enabled
config
end
def set_alert_manager_files(config)
config["alertmanagerFiles"] = {
"alertmanager.yml" => {
"receivers" => alert_manager_receivers_params,
"route" => alert_manager_route_params
}
}
config
end
def set_alert_manager_groups(config)
config["serverFiles"]["alerts"]["groups"] ||= []
environments_with_alerts.each do |env_name, alerts|
index = config["serverFiles"]["alerts"]["groups"].find_index do |group|
group["name"] == env_name
end
if index
config["serverFiles"]["alerts"]["groups"][index]["rules"] = alerts
else
config["serverFiles"]["alerts"]["groups"] << {
"name" => env_name,
"rules" => alerts
}
end
end
config
end
def alert_manager_receivers_params
[
{
"name" => "gitlab",
"webhook_configs" => [
{
"url" => notify_url,
"send_resolved" => true
}
]
}
]
end
def alert_manager_route_params
{
"receiver" => "gitlab",
"group_wait" => "30s",
"group_interval" => "5m",
"repeat_interval" => "4h"
}
end
def notify_url
::Gitlab::Routing.url_helpers.notify_namespace_project_prometheus_alerts_url(
namespace_id: project.namespace.path,
project_id: project.path,
format: :json
)
end
def extract_config(response) def extract_config(response)
YAML.safe_load(response.data[:'values.yaml']) YAML.safe_load(response.data[:'values.yaml'])
end end
def has_alerts? def update_config(config)
environments_with_alerts.values.flatten.any? PrometheusConfigService
end .new(project, cluster)
.execute(config)
def environments_with_alerts
@environments_with_alerts ||=
environments.each_with_object({}) do |environment, hsh|
name = rule_name(environment)
hsh[name] = environment.prometheus_alerts.map(&:to_param)
end
end
def rule_name(environment)
"#{environment.name}.rules"
end
def environments
project.environments_for_scope(cluster.environment_scope)
end end
end end
end end
......
---
title: Parameterize alerting rules with variables
merge_request: 8481
author:
type: fixed
# frozen_string_literal: true
require 'spec_helper'
describe Clusters::Applications::PrometheusConfigService do
set(:project) { create(:project) }
set(:production) { create(:environment, project: project) }
set(:cluster) { create(:cluster, :provided_by_user, projects: [project]) }
subject { described_class.new(project, cluster).execute(input) }
describe '#execute' do
let(:input) do
YAML.load_file(Rails.root.join('vendor/prometheus/values.yaml'))
end
context 'with alerts' do
let!(:alert) do
create(:prometheus_alert, project: project, environment: production)
end
it 'enables alertmanager' do
expect(subject.dig('alertmanager', 'enabled')).to eq(true)
end
describe 'alertmanagerFiles' do
let(:alertmanager) do
subject.dig('alertmanagerFiles', 'alertmanager.yml')
end
it 'contains receivers and route' do
expect(alertmanager.keys).to contain_exactly('receivers', 'route')
end
describe 'receivers' do
let(:receiver) { alertmanager.dig('receivers', 0) }
let(:webhook_config) { receiver.dig('webhook_configs', 0) }
let(:notify_url) do
"http://localhost/#{project.namespace.name}/#{project.name}/prometheus/alerts/notify.json"
end
it 'sets receiver' do
expect(receiver['name']).to eq('gitlab')
end
it 'sets webhook_config' do
expect(webhook_config).to eq(
'url' => notify_url,
'send_resolved' => true
)
end
end
describe 'route' do
let(:route) { alertmanager.fetch('route') }
it 'sets route' do
expect(route).to eq(
'receiver' => 'gitlab',
'group_wait' => '30s',
'group_interval' => '5m',
'repeat_interval' => '4h'
)
end
end
end
describe 'serverFiles' do
let(:groups) { subject.dig('serverFiles', 'alerts', 'groups') }
it 'sets the alerts' do
rules = groups.dig(0, 'rules')
expect(rules.size).to eq(1)
expect(rules.first['alert']).to eq(alert.title)
end
context 'with parameterized queries' do
let!(:alert) do
create(:prometheus_alert,
project: project,
environment: production,
prometheus_metric: metric)
end
let(:metric) do
create(:prometheus_metric, query: query, project: project)
end
let(:query) { '%{ci_environment_slug}' }
it 'substitutes query variables' do
expect(Gitlab::Prometheus::QueryVariables)
.to receive(:call)
.with(production)
.and_call_original
expr = groups.dig(0, 'rules', 0, 'expr')
expect(expr).to include(production.name)
end
end
context 'with multiple environments' do
let(:staging) { create(:environment, project: project) }
before do
create(:prometheus_alert, project: project, environment: production)
create(:prometheus_alert, project: project, environment: staging)
end
it 'sets alerts for multiple environment' do
env_names = groups.map { |group| group['name'] }
expect(env_names).to contain_exactly(
"#{production.name}.rules",
"#{staging.name}.rules"
)
end
it 'substitutes query variables once per environment' do
expect(Gitlab::Prometheus::QueryVariables)
.to receive(:call)
.with(production)
expect(Gitlab::Prometheus::QueryVariables)
.to receive(:call)
.with(staging)
subject
end
end
end
end
context 'without alerts' do
it 'disables alertmanager' do
expect(subject.dig('alertmanager', 'enabled')).to eq(false)
end
it 'removes alertmanagerFiles' do
expect(subject).not_to include('alertmanagerFiles')
end
it 'removes alerts' do
expect(subject.dig('serverFiles', 'alerts')).to eq({})
end
end
end
end
...@@ -4,10 +4,11 @@ describe Clusters::Applications::PrometheusUpdateService do ...@@ -4,10 +4,11 @@ describe Clusters::Applications::PrometheusUpdateService do
describe '#execute' do describe '#execute' do
let(:project) { create(:project) } let(:project) { create(:project) }
let(:environment) { create(:environment, project: project) } let(:environment) { create(:environment, project: project) }
let(:cluster) { create(:cluster, :with_installed_helm, projects: [project]) } let(:cluster) { create(:cluster, :provided_by_user, :with_installed_helm, projects: [project]) }
let(:application) { create(:clusters_applications_prometheus, :installed, cluster: cluster) } let(:application) { create(:clusters_applications_prometheus, :installed, cluster: cluster) }
let!(:get_command_values) { OpenStruct.new(data: OpenStruct.new('values.yaml': application.values)) } let(:values_yaml) { application.values }
let!(:upgrade_command) { application.upgrade_command("") } let!(:get_command_values) { OpenStruct.new(data: OpenStruct.new('values.yaml': values_yaml)) }
let!(:upgrade_command) { application.upgrade_command('') }
let(:helm_client) { instance_double(::Gitlab::Kubernetes::Helm::Api) } let(:helm_client) { instance_double(::Gitlab::Kubernetes::Helm::Api) }
subject(:service) { described_class.new(application, project) } subject(:service) { described_class.new(application, project) }
...@@ -19,35 +20,40 @@ describe Clusters::Applications::PrometheusUpdateService do ...@@ -19,35 +20,40 @@ describe Clusters::Applications::PrometheusUpdateService do
context 'when there are no errors' do context 'when there are no errors' do
before do before do
expect(helm_client).to receive(:get_config_map).with("values-content-configuration-prometheus").and_return(get_command_values) expect(helm_client)
.to receive(:get_config_map)
.with('values-content-configuration-prometheus')
.and_return(get_command_values)
expect(helm_client).to receive(:update).with(upgrade_command) expect(helm_client).to receive(:update).with(upgrade_command)
allow(::ClusterWaitForAppUpdateWorker).to receive(:perform_in).and_return(nil)
allow(::ClusterWaitForAppUpdateWorker)
.to receive(:perform_in)
.and_return(nil)
end end
context 'when prometheus alerts exist' do it 'make the application updating' do
it 'generates the alert manager values' do expect(application.cluster).not_to be_nil
create(:prometheus_alert, project: project, environment: environment)
expect(service).to receive(:generate_alert_manager).once service.execute
service.execute expect(application).to be_updating
end
end end
context 'when prometheus alerts do not exist' do it 'updates current config' do
it 'resets the alert manager values' do prometheus_config_service = spy(:prometheus_config_service)
expect(service).to receive(:reset_alert_manager).once values = YAML.safe_load(values_yaml)
service.execute expect(Clusters::Applications::PrometheusConfigService)
end .to receive(:new)
end .with(project, cluster)
.and_return(prometheus_config_service)
it 'make the application updating' do expect(prometheus_config_service)
expect(application.cluster).not_to be_nil .to receive(:execute)
.with(values)
service.execute service.execute
expect(application).to be_updating
end end
it 'schedules async update status check' do it 'schedules async update status check' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment