Commit 67a4f2fc authored by Kerri Miller's avatar Kerri Miller

Merge branch 'ajk-disable-broken-webhooks' into 'master'

Add columns and indices on web hooks [RUN ALL RSPEC] [RUN AS-IF-FOSS]

See merge request gitlab-org/gitlab!60837
parents 5dc6b976 84761627
......@@ -29,11 +29,11 @@ module TriggerableHooks
callable_scopes = triggers.keys + [:all]
return none unless callable_scopes.include?(trigger)
public_send(trigger) # rubocop:disable GitlabSecurity/PublicSend
executable.public_send(trigger) # rubocop:disable GitlabSecurity/PublicSend
end
def select_active(hooks_scope, data)
select do |hook|
executable.select do |hook|
ActiveHookFilter.new(hook).matches?(hooks_scope, data)
end
end
......
......@@ -29,6 +29,10 @@ class ProjectHook < WebHook
def pluralized_name
_('Webhooks')
end
def web_hooks_disable_failed?
Feature.enabled?(:web_hooks_disable_failed, project)
end
end
ProjectHook.prepend_if_ee('EE::ProjectHook')
......@@ -6,9 +6,7 @@ class ServiceHook < WebHook
belongs_to :service
validates :service, presence: true
# rubocop: disable CodeReuse/ServiceClass
def execute(data, hook_name = 'service_hook')
WebHookService.new(self, data, hook_name).execute
super(data, hook_name)
end
# rubocop: enable CodeReuse/ServiceClass
end
......@@ -3,6 +3,11 @@
class WebHook < ApplicationRecord
include Sortable
FAILURE_THRESHOLD = 3 # three strikes
INITIAL_BACKOFF = 10.minutes
MAX_BACKOFF = 1.day
BACKOFF_GROWTH_FACTOR = 2.0
attr_encrypted :token,
mode: :per_attribute_iv,
algorithm: 'aes-256-gcm',
......@@ -21,15 +26,27 @@ class WebHook < ApplicationRecord
validates :token, format: { without: /\n/ }
validates :push_events_branch_filter, branch_filter: true
scope :executable, -> do
next all unless Feature.enabled?(:web_hooks_disable_failed)
where('recent_failures <= ? AND (disabled_until IS NULL OR disabled_until < ?)', FAILURE_THRESHOLD, Time.current)
end
def executable?
return true unless web_hooks_disable_failed?
recent_failures <= FAILURE_THRESHOLD && (disabled_until.nil? || disabled_until < Time.current)
end
# rubocop: disable CodeReuse/ServiceClass
def execute(data, hook_name)
WebHookService.new(self, data, hook_name).execute
WebHookService.new(self, data, hook_name).execute if executable?
end
# rubocop: enable CodeReuse/ServiceClass
# rubocop: disable CodeReuse/ServiceClass
def async_execute(data, hook_name)
WebHookService.new(self, data, hook_name).async_execute
WebHookService.new(self, data, hook_name).async_execute if executable?
end
# rubocop: enable CodeReuse/ServiceClass
......@@ -41,4 +58,26 @@ class WebHook < ApplicationRecord
def help_path
'user/project/integrations/webhooks'
end
def next_backoff
return MAX_BACKOFF if backoff_count >= 8 # optimization to prevent expensive exponentiation and possible overflows
(INITIAL_BACKOFF * (BACKOFF_GROWTH_FACTOR**backoff_count))
.clamp(INITIAL_BACKOFF, MAX_BACKOFF)
.seconds
end
def disable!
update!(recent_failures: FAILURE_THRESHOLD + 1)
end
def enable!
update!(recent_failures: 0, disabled_until: nil, backoff_count: 0)
end
private
def web_hooks_disable_failed?
Feature.enabled?(:web_hooks_disable_failed)
end
end
......@@ -10,7 +10,7 @@ class SystemHooksService
end
def execute_hooks(data, hooks_scope = :all)
SystemHook.hooks_for(hooks_scope).find_each do |hook|
SystemHook.executable.hooks_for(hooks_scope).find_each do |hook|
hook.async_execute(data, 'system_hooks')
end
......
......@@ -6,6 +6,18 @@ class WebHookService
attr_reader :body, :headers, :code
def success?
false
end
def redirection?
false
end
def internal_server_error?
true
end
def initialize
@headers = Gitlab::HTTP::Response::Headers.new({})
@body = ''
......@@ -33,6 +45,8 @@ class WebHookService
end
def execute
return { status: :error, message: 'Hook disabled' } unless hook.executable?
start_time = Gitlab::Metrics::System.monotonic_time
response = if parsed_url.userinfo.blank?
......@@ -104,6 +118,8 @@ class WebHookService
end
def log_execution(trigger:, url:, request_data:, response:, execution_duration:, error_message: nil)
handle_failure(response, hook)
WebHookLog.create(
web_hook: hook,
trigger: trigger,
......@@ -118,6 +134,17 @@ class WebHookService
)
end
def handle_failure(response, hook)
if response.success? || response.redirection?
hook.enable!
elsif response.internal_server_error?
next_backoff = hook.next_backoff
hook.update!(disabled_until: next_backoff.from_now, backoff_count: hook.backoff_count + 1)
else
hook.update!(recent_failures: hook.recent_failures + 1)
end
end
def build_headers(hook_name)
@headers ||= begin
{
......
---
title: Disable web-hooks that fail repeatedly
merge_request: 60837
author:
type: changed
---
name: web_hooks_disable_failed
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/60837
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/329849
milestone: '13.12'
type: development
group: group::ecosystem
default_enabled: false
# frozen_string_literal: true
class AddFailureTrackingToWebHooks < ActiveRecord::Migration[6.0]
def change
change_table(:web_hooks, bulk: true) do |t|
t.integer :recent_failures, null: false, limit: 2, default: 0
t.integer :backoff_count, null: false, limit: 2, default: 0
t.column :disabled_until, :timestamptz
end
end
end
# frozen_string_literal: true
class AddIndexOnWebHookProjectIdRecentFailures < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
INDEX_NAME = 'index_web_hooks_on_project_id_recent_failures'
disable_ddl_transaction!
def up
add_concurrent_index(:web_hooks, [:project_id, :recent_failures], name: INDEX_NAME)
end
def down
remove_concurrent_index_by_name(:web_hooks, INDEX_NAME)
end
end
9674f04640f897928925ff1e23ff6d3ff918627b7c2374713a31071678956614
\ No newline at end of file
3cdf8e93c4b80867a5d8e086f3f44eaeb479e875abf16187b94b3f6238faf062
\ No newline at end of file
......@@ -19111,7 +19111,10 @@ CREATE TABLE web_hooks (
releases_events boolean DEFAULT false NOT NULL,
feature_flag_events boolean DEFAULT false NOT NULL,
member_events boolean DEFAULT false NOT NULL,
subgroup_events boolean DEFAULT false NOT NULL
subgroup_events boolean DEFAULT false NOT NULL,
recent_failures smallint DEFAULT 0 NOT NULL,
backoff_count smallint DEFAULT 0 NOT NULL,
disabled_until timestamp with time zone
);
CREATE SEQUENCE web_hooks_id_seq
......@@ -24578,6 +24581,8 @@ CREATE INDEX index_web_hooks_on_group_id ON web_hooks USING btree (group_id) WHE
CREATE INDEX index_web_hooks_on_project_id ON web_hooks USING btree (project_id);
CREATE INDEX index_web_hooks_on_project_id_recent_failures ON web_hooks USING btree (project_id, recent_failures);
CREATE INDEX index_web_hooks_on_service_id ON web_hooks USING btree (service_id);
CREATE INDEX index_web_hooks_on_type ON web_hooks USING btree (type);
......@@ -32,4 +32,8 @@ class GroupHook < WebHook
def pluralized_name
_('Group Hooks')
end
def web_hooks_disable_failed?
Feature.enabled?(:web_hooks_disable_failed, group)
end
end
......@@ -986,6 +986,19 @@ RSpec.describe Group do
group.execute_hooks(data, :member_hooks)
end
end
context 'when a hook is not executable' do
before do
group_hook.update!(recent_failures: 4)
end
it 'does not execute the disabled hook' do
expect(WebHookService).to receive(:new).with(parent_group_hook, anything, anything).and_call_original
expect(WebHookService).not_to receive(:new).with(group_hook, anything, anything)
group.execute_hooks(data, :member_hooks)
end
end
end
context 'when group_webhooks feature is disabled' do
......
......@@ -3,7 +3,15 @@
require 'spec_helper'
RSpec.describe WebHook do
let(:hook) { build(:project_hook) }
include AfterNextHelpers
let_it_be(:project) { create(:project) }
let(:hook) { build(:project_hook, project: project) }
around do |example|
freeze_time { example.run }
end
describe 'associations' do
it { is_expected.to have_many(:web_hook_logs) }
......@@ -69,18 +77,30 @@ RSpec.describe WebHook do
let(:data) { { key: 'value' } }
let(:hook_name) { 'project hook' }
before do
expect(WebHookService).to receive(:new).with(hook, data, hook_name).and_call_original
it '#execute' do
expect_next(WebHookService).to receive(:execute)
hook.execute(data, hook_name)
end
it '#execute' do
expect_any_instance_of(WebHookService).to receive(:execute)
it 'does not execute non-executable hooks' do
hook.update!(disabled_until: 1.day.from_now)
expect(WebHookService).not_to receive(:new)
hook.execute(data, hook_name)
end
it '#async_execute' do
expect_any_instance_of(WebHookService).to receive(:async_execute)
expect_next(WebHookService).to receive(:async_execute)
hook.async_execute(data, hook_name)
end
it 'does not async execute non-executable hooks' do
hook.update!(disabled_until: 1.day.from_now)
expect(WebHookService).not_to receive(:new)
hook.async_execute(data, hook_name)
end
......@@ -94,4 +114,170 @@ RSpec.describe WebHook do
expect { web_hook.destroy! }.to change(web_hook.web_hook_logs, :count).by(-3)
end
end
describe '.executable' do
let(:not_executable) do
[
[0, Time.current],
[0, 1.minute.from_now],
[1, 1.minute.from_now],
[3, 1.minute.from_now],
[4, nil],
[4, 1.day.ago],
[4, 1.minute.from_now]
].map do |(recent_failures, disabled_until)|
create(:project_hook, project: project, recent_failures: recent_failures, disabled_until: disabled_until)
end
end
let(:executables) do
[
[0, nil],
[0, 1.day.ago],
[1, nil],
[1, 1.day.ago],
[3, nil],
[3, 1.day.ago]
].map do |(recent_failures, disabled_until)|
create(:project_hook, project: project, recent_failures: recent_failures, disabled_until: disabled_until)
end
end
it 'finds the correct set of project hooks' do
expect(described_class.where(project_id: project.id).executable).to match_array executables
end
context 'when the feature flag is not enabled' do
before do
stub_feature_flags(web_hooks_disable_failed: false)
end
it 'is the same as all' do
expect(described_class.where(project_id: project.id).executable).to match_array(executables + not_executable)
end
end
end
describe '#executable?' do
let(:web_hook) { create(:project_hook, project: project) }
where(:recent_failures, :not_until, :executable) do
[
[0, :not_set, true],
[0, :past, true],
[0, :future, false],
[0, :now, false],
[1, :not_set, true],
[1, :past, true],
[1, :future, false],
[3, :not_set, true],
[3, :past, true],
[3, :future, false],
[4, :not_set, false],
[4, :past, false],
[4, :future, false]
]
end
with_them do
# Phasing means we cannot put these values in the where block,
# which is not subject to the frozen time context.
let(:disabled_until) do
case not_until
when :not_set
nil
when :past
1.minute.ago
when :future
1.minute.from_now
when :now
Time.current
end
end
before do
web_hook.update!(recent_failures: recent_failures, disabled_until: disabled_until)
end
it 'has the correct state' do
expect(web_hook.executable?).to eq(executable)
end
context 'when the feature flag is enabled for a project' do
before do
stub_feature_flags(web_hooks_disable_failed: project)
end
it 'has the expected value' do
expect(web_hook.executable?).to eq(executable)
end
end
context 'when the feature flag is not enabled' do
before do
stub_feature_flags(web_hooks_disable_failed: false)
end
it 'is executable' do
expect(web_hook).to be_executable
end
end
end
end
describe '#next_backoff' do
context 'when there was no last backoff' do
before do
hook.backoff_count = 0
end
it 'is 10 minutes' do
expect(hook.next_backoff).to eq(described_class::INITIAL_BACKOFF)
end
end
context 'when we have backed off once' do
before do
hook.backoff_count = 1
end
it 'is twice the initial value' do
expect(hook.next_backoff).to eq(20.minutes)
end
end
context 'when we have backed off 3 times' do
before do
hook.backoff_count = 3
end
it 'grows exponentially' do
expect(hook.next_backoff).to eq(80.minutes)
end
end
context 'when the previous backoff was large' do
before do
hook.backoff_count = 8 # last value before MAX_BACKOFF
end
it 'does not exceed the max backoff value' do
expect(hook.next_backoff).to eq(described_class::MAX_BACKOFF)
end
end
end
describe '#enable!' do
it 'makes a hook executable' do
hook.recent_failures = 1000
expect { hook.enable! }.to change(hook, :executable?).from(false).to(true)
end
end
describe '#disable!' do
it 'disables a hook' do
expect { hook.disable! }.to change(hook, :executable?).from(true).to(false)
end
end
end
......@@ -21,6 +21,10 @@ RSpec.describe WebHookService do
let(:service_instance) { described_class.new(project_hook, data, :push_hooks) }
around do |example|
travel_to(Time.current) { example.run }
end
describe '#initialize' do
before do
stub_application_setting(setting_name => setting)
......@@ -120,10 +124,21 @@ RSpec.describe WebHookService do
expect { service_instance.execute }.to raise_error(StandardError)
end
it 'does not execute disabled hooks' do
project_hook.update!(recent_failures: 4)
expect(service_instance.execute).to eq({ status: :error, message: 'Hook disabled' })
end
it 'handles exceptions' do
exceptions = [SocketError, OpenSSL::SSL::SSLError, Errno::ECONNRESET, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Net::OpenTimeout, Net::ReadTimeout, Gitlab::HTTP::BlockedUrlError, Gitlab::HTTP::RedirectionTooDeep]
exceptions = [
SocketError, OpenSSL::SSL::SSLError, Errno::ECONNRESET, Errno::ECONNREFUSED,
Errno::EHOSTUNREACH, Net::OpenTimeout, Net::ReadTimeout,
Gitlab::HTTP::BlockedUrlError, Gitlab::HTTP::RedirectionTooDeep
]
exceptions.each do |exception_class|
exception = exception_class.new('Exception message')
project_hook.enable!
stub_full_request(project_hook.url, method: :post).to_raise(exception)
expect(service_instance.execute).to eq({ status: :error, message: exception.to_s })
......@@ -166,10 +181,11 @@ RSpec.describe WebHookService do
context 'with success' do
before do
stub_full_request(project_hook.url, method: :post).to_return(status: 200, body: 'Success')
service_instance.execute
end
it 'log successful execution' do
service_instance.execute
expect(hook_log.trigger).to eq('push_hooks')
expect(hook_log.url).to eq(project_hook.url)
expect(hook_log.request_headers).to eq(headers)
......@@ -178,15 +194,62 @@ RSpec.describe WebHookService do
expect(hook_log.execution_duration).to be > 0
expect(hook_log.internal_error_message).to be_nil
end
it 'does not increment the failure count' do
expect { service_instance.execute }.not_to change(project_hook, :recent_failures)
end
it 'does not change the disabled_until attribute' do
expect { service_instance.execute }.not_to change(project_hook, :disabled_until)
end
context 'when the hook had previously failed' do
before do
project_hook.update!(recent_failures: 2)
end
it 'resets the failure count' do
expect { service_instance.execute }.to change(project_hook, :recent_failures).to(0)
end
end
end
context 'with bad request' do
before do
stub_full_request(project_hook.url, method: :post).to_return(status: 400, body: 'Bad request')
end
it 'logs failed execution' do
service_instance.execute
expect(hook_log).to have_attributes(
trigger: eq('push_hooks'),
url: eq(project_hook.url),
request_headers: eq(headers),
response_body: eq('Bad request'),
response_status: eq('400'),
execution_duration: be > 0,
internal_error_message: be_nil
)
end
it 'increments the failure count' do
expect { service_instance.execute }.to change(project_hook, :recent_failures).by(1)
end
it 'does not change the disabled_until attribute' do
expect { service_instance.execute }.not_to change(project_hook, :disabled_until)
end
end
context 'with exception' do
before do
stub_full_request(project_hook.url, method: :post).to_raise(SocketError.new('Some HTTP Post error'))
service_instance.execute
end
it 'log failed execution' do
service_instance.execute
expect(hook_log.trigger).to eq('push_hooks')
expect(hook_log.url).to eq(project_hook.url)
expect(hook_log.request_headers).to eq(headers)
......@@ -195,6 +258,47 @@ RSpec.describe WebHookService do
expect(hook_log.execution_duration).to be > 0
expect(hook_log.internal_error_message).to eq('Some HTTP Post error')
end
it 'does not increment the failure count' do
expect { service_instance.execute }.not_to change(project_hook, :recent_failures)
end
it 'sets the disabled_until attribute' do
expect { service_instance.execute }
.to change(project_hook, :disabled_until).to(project_hook.next_backoff.from_now)
end
it 'increases the backoff count' do
expect { service_instance.execute }.to change(project_hook, :backoff_count).by(1)
end
context 'when the previous cool-off was near the maximum' do
before do
project_hook.update!(disabled_until: 5.minutes.ago, backoff_count: 8)
end
it 'sets the disabled_until attribute' do
expect { service_instance.execute }.to change(project_hook, :disabled_until).to(1.day.from_now)
end
it 'sets the last_backoff attribute' do
expect { service_instance.execute }.to change(project_hook, :backoff_count).by(1)
end
end
context 'when we have backed-off many many times' do
before do
project_hook.update!(disabled_until: 5.minutes.ago, backoff_count: 365)
end
it 'sets the disabled_until attribute' do
expect { service_instance.execute }.to change(project_hook, :disabled_until).to(1.day.from_now)
end
it 'sets the last_backoff attribute' do
expect { service_instance.execute }.to change(project_hook, :backoff_count).by(1)
end
end
end
context 'with unsafe response body' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment