Commit 31214455 authored by Peter Leitzen's avatar Peter Leitzen Committed by John Jarvis

[master] Add a shared secret to prevent abuse of the alert endpoint

parent 8566a49d
......@@ -834,6 +834,8 @@ ActiveRecord::Schema.define(version: 20181220165848) do
t.datetime_with_timezone "created_at", null: false
t.datetime_with_timezone "updated_at", null: false
t.datetime_with_timezone "last_update_started_at"
t.string "encrypted_alert_manager_token"
t.string "encrypted_alert_manager_token_iv"
t.index ["cluster_id"], name: "index_clusters_applications_prometheus_on_cluster_id", unique: true, using: :btree
end
......
......@@ -24,9 +24,10 @@ module Projects
end
def notify
token = extract_alert_manager_token(request)
notify = Projects::Prometheus::Alerts::NotifyService.new(project, current_user, params)
if notify.execute
if notify.execute(token)
head :ok
else
head :unprocessable_entity
......@@ -96,6 +97,10 @@ module Projects
def application
@application ||= alert.environment.cluster_prometheus_adapter
end
def extract_alert_manager_token(request)
Doorkeeper::OAuth::Token.from_bearer_authorization(request)
end
end
end
end
# frozen_string_literal: true
require 'securerandom'
module EE
module Clusters
module Applications
......@@ -7,6 +9,11 @@ module EE
extend ActiveSupport::Concern
prepended do
attr_encrypted :alert_manager_token,
mode: :per_attribute_iv,
key: Settings.attr_encrypted_db_key_base_truncated,
algorithm: 'aes-256-gcm'
state_machine :status do
after_transition any => :updating do |application|
application.update(last_update_started_at: Time.now, version: application.class.const_get(:VERSION))
......@@ -49,6 +56,18 @@ module EE
def files_with_replaced_values(replaced_values)
files.merge('values.yaml': replaced_values)
end
def generate_alert_manager_token!
unless alert_manager_token.present?
update!(alert_manager_token: generate_token)
end
end
private
def generate_token
SecureRandom.hex
end
end
end
end
......
......@@ -13,9 +13,7 @@ module Clusters
def execute
app.make_updating!
values = helm_api
.get_config_map(config_map_name)
.yield_self { |response| extract_config(response) }
values = load_config(app)
.yield_self { |config| update_config(config) }
.yield_self { |config| config.to_yaml }
......@@ -30,12 +28,8 @@ module Clusters
private
def config_map_name
::Gitlab::Kubernetes::ConfigMap.new(app.name, app.files).config_map_name
end
def extract_config(response)
YAML.safe_load(response.data[:'values.yaml'])
def load_config(app)
YAML.safe_load(app.values)
end
def update_config(config)
......
......@@ -4,8 +4,9 @@ module Projects
module Prometheus
module Alerts
class NotifyService < BaseService
def execute
return false unless valid?
def execute(token)
return false unless valid_version?
return false unless valid_alert_manager_token?(token)
notification_service.async.prometheus_alerts_fired(project, firings) if firings.any?
......@@ -28,10 +29,49 @@ module Projects
params['alerts']
end
def valid?
def valid_version?
params['version'] == '4'
end
def valid_alert_manager_token?(token)
# We don't support token authorization for manual installations.
prometheus = project.find_or_initialize_service('prometheus')
return true if prometheus.manual_configuration?
prometheus_application = available_prometheus_application(project)
return false unless prometheus_application
if token
compare_token(token, prometheus_application.alert_manager_token)
else
prometheus_application.alert_manager_token.nil?
end
end
def available_prometheus_application(project)
alert_id = gitlab_alert_id
return unless alert_id
alert = project.prometheus_alerts.for_metric(alert_id).first
return unless alert
cluster = alert.environment.deployment_platform&.cluster
return unless cluster&.enabled?
return unless cluster.application_prometheus_available?
cluster.application_prometheus
end
def gitlab_alert_id
alerts.first.dig('labels', 'gitlab_alert_id')
end
def compare_token(expected, actual)
return unless expected && actual
ActiveSupport::SecurityUtils.variable_size_secure_compare(expected, actual)
end
def persist_events(project, current_user, params)
CreateEventsService.new(project, current_user, params).execute
end
......
---
title: Add a shared secret to prevent abuse of the alert endpoint
merge_request:
author:
type: security
# frozen_string_literal: true
class AddAlertManagerTokenToClustersApplicationPrometheus < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
add_column :clusters_applications_prometheus, :encrypted_alert_manager_token, :string
add_column :clusters_applications_prometheus, :encrypted_alert_manager_token_iv, :string
end
end
# frozen_string_literal: true
class EnqueuePrometheusUpdates < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
MIGRATION = 'UpdatePrometheusApplication'
BATCH_SIZE = 1000
DELAY_INTERVAL = 45.minutes.to_i
class Prometheus < ActiveRecord::Base
include EachBatch
self.table_name = 'clusters_applications_prometheus'
end
disable_ddl_transaction!
def up
Prometheus.each_batch(of: BATCH_SIZE) do |relation, index|
delay = DELAY_INTERVAL * index
min, max = relation.pluck('MIN(id)', 'MAX(id)').first
BackgroundMigrationWorker.perform_in(delay, MIGRATION, [min, max])
end
end
def down
# no-op
end
end
# frozen_string_literal: true
class Gitlab::BackgroundMigration::UpdatePrometheusApplication
class Project < ActiveRecord::Base
self.table_name = 'projects'
has_many :cluster_projects, class_name: 'ClustersProject'
has_many :clusters, through: :cluster_projects, class_name: 'Cluster'
end
class Cluster < ActiveRecord::Base
self.table_name = 'clusters'
has_one :application_prometheus, class_name: 'Prometheus'
end
class ClustersProject < ActiveRecord::Base
self.table_name = 'cluster_projects'
belongs_to :cluster, class_name: 'Cluster'
belongs_to :project, class_name: 'Project'
end
class Prometheus < ActiveRecord::Base
self.table_name = 'clusters_applications_prometheus'
end
def perform(from, to)
app_name = 'prometheus'
now = Time.now
project_prometheus(from, to).each do |project_id, app_id|
ClusterUpdateAppWorker.perform_async(app_name, app_id, project_id, now)
end
end
private
def project_prometheus(from, to, &block)
Project
.joins(clusters: :application_prometheus)
.where('clusters_applications_prometheus.id IN (?)', from..to)
.pluck('projects.id, clusters_applications_prometheus.id')
end
end
......@@ -107,6 +107,34 @@ describe Projects::Prometheus::AlertsController do
expect(response).to have_gitlab_http_status(422)
end
context 'bearer token' do
context 'when set' do
it 'extracts bearer token' do
request.headers['HTTP_AUTHORIZATION'] = 'Bearer some token'
expect(notify_service).to receive(:execute).with('some token')
post :notify, project_params, as: :json
end
it 'pass nil if cannot extract a non-bearer token' do
request.headers['HTTP_AUTHORIZATION'] = 'some token'
expect(notify_service).to receive(:execute).with(nil)
post :notify, project_params, as: :json
end
end
context 'when missing' do
it 'passes nil' do
expect(notify_service) .to receive(:execute).with(nil)
post :notify, project_params, as: :json
end
end
end
end
describe 'POST #create' do
......
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::BackgroundMigration::UpdatePrometheusApplication, :migration, schema: 20181115140251 do
let(:background_migration) { described_class.new }
let(:projects) { table(:projects) }
let(:clusters) { table(:clusters) }
let(:cluster_projects) { table(:cluster_projects) }
let(:prometheus) { table(:clusters_applications_prometheus) }
let(:namespaces) { table(:namespaces) }
let(:namespace) { namespaces.create!(id: 1, name: 'gitlab', path: 'gitlab') }
describe '#perform' do
around do |example|
Timecop.freeze { example.run }
end
let(:app_name) { 'prometheus' }
let(:now) { Time.now }
let!(:project1) { create_project }
let!(:project2) { create_project }
let!(:cluster1) { create_cluster(project: project1) }
let!(:cluster2) { create_cluster(project: project2) }
let!(:prometheus1) { create_prometheus(cluster: cluster1) }
let!(:prometheus2) { create_prometheus(cluster: cluster2) }
it 'schedules prometheus updates' do
expect(ClusterUpdateAppWorker)
.to receive(:perform_async)
.with(app_name, prometheus1.id, project1.id, now)
expect(ClusterUpdateAppWorker)
.to receive(:perform_async)
.with(app_name, prometheus2.id, project2.id, now)
background_migration.perform(prometheus1.id, prometheus2.id)
end
end
private
def create_project
projects.create!(namespace_id: namespace.id)
end
def create_cluster(project:)
cluster = clusters.create!(name: 'cluster')
cluster_projects.create!(cluster_id: cluster.id, project_id: project.id)
cluster
end
def create_prometheus(cluster:)
prometheus.create!(
cluster_id: cluster.id,
status: 3,
version: '1.2.3'
)
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('ee', 'db', 'post_migrate', '20181115140251_enqueue_prometheus_updates.rb')
describe EnqueuePrometheusUpdates, :migration, :sidekiq do
let(:migration) { described_class.new }
let(:background_migration) { described_class::MIGRATION }
let(:batch_size_constant) { "#{described_class}::BATCH_SIZE" }
let(:delay) { described_class::DELAY_INTERVAL }
let(:clusters) { table(:clusters) }
let(:prometheus) { table(:clusters_applications_prometheus) }
describe '#up' do
around do |example|
Sidekiq::Testing.fake! do
Timecop.freeze do
example.run
end
end
end
before do
stub_const(batch_size_constant, 2)
end
context 'with prometheus applications' do
let!(:prometheus1) { create_prometheus }
let!(:prometheus2) { create_prometheus }
let!(:prometheus3) { create_prometheus }
it 'schedules update jobs' do
migration.up
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
expect(background_migration)
.to be_scheduled_delayed_migration(delay, prometheus1.id, prometheus2.id)
expect(background_migration)
.to be_scheduled_delayed_migration(delay * 2, prometheus3.id, prometheus3.id)
end
end
context 'without prometheus applications' do
it 'does not schedule update jobs' do
migration.up
expect(BackgroundMigrationWorker.jobs.size).to eq(0)
end
end
private
def create_prometheus
cluster = clusters.create!(name: 'cluster')
prometheus.create!(
cluster_id: cluster.id,
status: 3,
version: '1.2.3'
)
end
end
end
# frozen_string_literal: true
require 'rails_helper'
describe Clusters::Applications::Prometheus do
......@@ -157,4 +159,46 @@ describe Clusters::Applications::Prometheus do
end
end
end
describe 'alert manager token' do
subject { create(:clusters_applications_prometheus) }
context 'when not set' do
it 'is empty by default' do
expect(subject.alert_manager_token).to be_nil
expect(subject.encrypted_alert_manager_token).to be_nil
expect(subject.encrypted_alert_manager_token_iv).to be_nil
end
describe '#generate_alert_manager_token!' do
it 'generates a token' do
subject.generate_alert_manager_token!
expect(subject.alert_manager_token).to match(/\A\h{32}\z/)
end
end
end
context 'when set' do
let(:token) { SecureRandom.hex }
before do
subject.update!(alert_manager_token: token)
end
it 'reads the token' do
expect(subject.alert_manager_token).to eq(token)
expect(subject.encrypted_alert_manager_token).not_to be_nil
expect(subject.encrypted_alert_manager_token_iv).not_to be_nil
end
describe '#generate_alert_manager_token!' do
it 'does not re-generate the token' do
subject.generate_alert_manager_token!
expect(subject.alert_manager_token).to eq(token)
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Clusters::Applications::PrometheusUpdateService do
......@@ -7,7 +9,6 @@ describe Clusters::Applications::PrometheusUpdateService do
let(:cluster) { create(:cluster, :provided_by_user, :with_installed_helm, projects: [project]) }
let(:application) { create(:clusters_applications_prometheus, :installed, cluster: cluster) }
let(:values_yaml) { application.values }
let!(:get_command_values) { OpenStruct.new(data: OpenStruct.new('values.yaml': values_yaml)) }
let!(:upgrade_command) { application.upgrade_command('') }
let(:helm_client) { instance_double(::Gitlab::Kubernetes::Helm::Api) }
......@@ -20,11 +21,6 @@ describe Clusters::Applications::PrometheusUpdateService do
context 'when there are no errors' do
before do
expect(helm_client)
.to receive(:get_config_map)
.with('values-content-configuration-prometheus')
.and_return(get_command_values)
expect(helm_client).to receive(:update).with(upgrade_command)
allow(::ClusterWaitForAppUpdateWorker)
......@@ -64,10 +60,12 @@ describe Clusters::Applications::PrometheusUpdateService do
end
context 'when k8s cluster communication fails' do
it 'make the application update errored' do
before do
error = ::Kubeclient::HttpError.new(500, 'system failure', nil)
allow(helm_client).to receive(:get_config_map).and_raise(error)
allow(helm_client).to receive(:update).and_raise(error)
end
it 'make the application update errored' do
service.execute
expect(application).to be_update_errored
......@@ -78,10 +76,12 @@ describe Clusters::Applications::PrometheusUpdateService do
context 'when application cannot be persisted' do
let(:application) { build(:clusters_applications_prometheus, :installed) }
it 'make the application update errored' do
allow(application).to receive(:make_updating!).once.and_raise(ActiveRecord::RecordInvalid)
before do
allow(application).to receive(:make_updating!).once
.and_raise(ActiveRecord::RecordInvalid.new(application))
end
expect(helm_client).not_to receive(:get_config_map)
it 'make the application update errored' do
expect(helm_client).not_to receive(:update)
service.execute
......
......@@ -3,17 +3,16 @@
require 'spec_helper'
describe Projects::Prometheus::Alerts::NotifyService do
let(:user) { create(:user) }
let(:project) { create(:project) }
set(:user) { create(:user) }
set(:project) { create(:project) }
let(:service) { described_class.new(project, user, payload) }
let(:token_input) { 'token' }
let(:subject) { service.execute(token_input) }
context 'with valid payload' do
let(:alert_firing) { create(:prometheus_alert, project: project) }
let(:alert_resolved) { create(:prometheus_alert, project: project) }
shared_examples 'notifies alerts' do
let(:notification_service) { spy }
let(:create_events_service) { spy }
let(:payload) { payload_for(firing: [alert_firing], resolved: [alert_resolved]) }
let(:payload_alert_firing) { payload['alerts'].first }
before do
allow(NotificationService).to receive(:new).and_return(notification_service)
......@@ -26,27 +25,138 @@ describe Projects::Prometheus::Alerts::NotifyService do
.to receive_message_chain(:async, :prometheus_alerts_fired)
.with(project, [payload_alert_firing])
expect(service.execute).to eq(true)
expect(subject).to eq(true)
end
it 'persists events' do
expect(create_events_service).to receive(:execute)
expect(service.execute).to eq(true)
expect(subject).to eq(true)
end
end
shared_examples 'no notifications' do
let(:notification_service) { spy }
let(:create_events_service) { spy }
it 'does not notify' do
expect(notification_service).not_to receive(:async)
expect(create_events_service).not_to receive(:execute)
expect(subject).to eq(false)
end
end
context 'with valid payload' do
let(:alert_firing) { create(:prometheus_alert, project: project) }
let(:alert_resolved) { create(:prometheus_alert, project: project) }
let(:payload) { payload_for(firing: [alert_firing], resolved: [alert_resolved]) }
let(:payload_alert_firing) { payload['alerts'].first }
let(:token) { 'token' }
context 'with project specific cluster' do
using RSpec::Parameterized::TableSyntax
where(:cluster_enabled, :status, :configured_token, :token_input, :result) do
true | :installed | token | token | :success
true | :installed | nil | nil | :success
true | :updated | token | token | :success
true | :updating | token | token | :failure
true | :installed | token | 'x' | :failure
true | :installed | nil | token | :failure
true | :installed | token | nil | :failure
true | nil | token | token | :failure
false | :installed | token | token | :failure
end
with_them do
let(:alert_manager_token) { token_input }
before do
cluster = create(:cluster, :provided_by_user,
projects: [project],
enabled: cluster_enabled)
if status
create(:clusters_applications_prometheus, status,
cluster: cluster,
alert_manager_token: configured_token)
end
end
case result = params[:result]
when :success
it_behaves_like 'notifies alerts'
when :failure
it_behaves_like 'no notifications'
else
raise "invalid result: #{result.inspect}"
end
end
end
context 'with environment specific clusters' do
let(:prd_cluster) do
create(:cluster, :provided_by_user, projects: [project], enabled: true, environment_scope: '*')
end
let(:stg_cluster) do
create(:cluster, :provided_by_user, projects: [project], enabled: true, environment_scope: 'stg/*')
end
let(:stg_environment) do
create(:environment, project: project, name: 'stg/1')
end
let(:alert_firing) do
create(:prometheus_alert, project: project, environment: stg_environment)
end
before do
stub_licensed_features(multiple_clusters: true)
create(:clusters_applications_prometheus, :installed,
cluster: prd_cluster, alert_manager_token: token)
create(:clusters_applications_prometheus, :installed,
cluster: stg_cluster, alert_manager_token: nil)
end
context 'without token' do
let(:token_input) { nil }
it_behaves_like 'notifies alerts'
end
context 'with token' do
it_behaves_like 'no notifications'
end
end
context 'without project specific cluster' do
let!(:cluster) { create(:cluster, enabled: true) }
it_behaves_like 'no notifications'
end
context 'with manual prometheus installation' do
before do
create(:prometheus_service, project: project)
end
it_behaves_like 'notifies alerts'
end
end
context 'with invalid payload' do
let(:payload) { {} }
context 'without version' do
let(:payload) { {} }
it 'returns false without `version`' do
expect(service.execute).to eq(false)
it_behaves_like 'no notifications'
end
it 'returns false if `version` is not 4' do
payload['version'] = '5'
context 'when version is not "4"' do
let(:payload) { { 'version' => '5' } }
expect(service.execute).to eq(false)
it_behaves_like 'no notifications'
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment