Commit 31b725e7 authored by Fabio Pitino's avatar Fabio Pitino Committed by Bob Van Landuyt

Report BatchNotResetError to Sentry only after retries

Only one report will be sent to Sentry, containing data
about failing batches and not each time an error is rescued.
This removes noise from Sentry when a worker succeeds on
retry.
parent b936b2d5
...@@ -3,12 +3,26 @@ ...@@ -3,12 +3,26 @@
module Ci module Ci
module Minutes module Minutes
class BatchResetService class BatchResetService
BatchNotResetError = Class.new(StandardError) class BatchNotResetError < StandardError
def initialize(failed_batches)
@failed_batches = failed_batches
end
def message
'Some namespace shared runner minutes were not reset'
end
def sentry_extra_data
{
failed_batches: @failed_batches
}
end
end
BATCH_SIZE = 1000.freeze BATCH_SIZE = 1000.freeze
def initialize def initialize
@errors = [] @failed_batches = []
end end
def execute!(ids_range: nil, batch_size: BATCH_SIZE) def execute!(ids_range: nil, batch_size: BATCH_SIZE)
...@@ -18,10 +32,7 @@ module Ci ...@@ -18,10 +32,7 @@ module Ci
reset_ci_minutes!(namespaces) reset_ci_minutes!(namespaces)
end end
if @errors.any? raise BatchNotResetError.new(@failed_batches) if @failed_batches.any?
exception = BatchNotResetError.new('Some namespace shared runner minutes were not reset.')
Gitlab::ErrorTracking.track_and_raise_exception(exception, namespace_ranges: @errors)
end
end end
private private
...@@ -36,7 +47,15 @@ module Ci ...@@ -36,7 +47,15 @@ module Ci
reset_ci_minutes_notifications!(namespaces) reset_ci_minutes_notifications!(namespaces)
end end
rescue ActiveRecord::ActiveRecordError => e rescue ActiveRecord::ActiveRecordError => e
@errors << { count: namespaces.size, first_id: namespaces.first.id, last_id: namespaces.last.id, error: e.message } # We cleanup the backtrace for intermediate errors so they remain compact and
# relevant due to the possibility of having many failed batches.
@failed_batches << {
count: namespaces.size,
first_namespace_id: namespaces.first.id,
last_namespace_id: namespaces.last.id,
error_message: e.message,
error_backtrace: ::Gitlab::BacktraceCleaner.clean_backtrace(e.backtrace)
}
end end
# This service is responsible for the logic that recalculates the extra shared runners # This service is responsible for the logic that recalculates the extra shared runners
......
...@@ -142,25 +142,23 @@ RSpec.describe Ci::Minutes::BatchResetService do ...@@ -142,25 +142,23 @@ RSpec.describe Ci::Minutes::BatchResetService do
expect(Namespace).to receive(:transaction).once.ordered.and_call_original expect(Namespace).to receive(:transaction).once.ordered.and_call_original
end end
it 'continues its progress' do it 'continues its progress and raises exception at the end' do
expect(service).to receive(:reset_ci_minutes!).with([namespace_1, namespace_2, namespace_3]).and_call_original expect(service).to receive(:reset_ci_minutes!).with([namespace_1, namespace_2, namespace_3]).and_call_original
expect(service).to receive(:reset_ci_minutes!).with([namespace_4, namespace_5, namespace_6]).and_call_original expect(service).to receive(:reset_ci_minutes!).with([namespace_4, namespace_5, namespace_6]).and_call_original
expect(Gitlab::ErrorTracking).to receive(:track_and_raise_exception) expect { subject }
subject .to raise_error(described_class::BatchNotResetError) do |error|
end expect(error.message).to eq('Some namespace shared runner minutes were not reset')
expect(error.sentry_extra_data[:failed_batches]).to contain_exactly(
it 'raises exception with namespace details' do {
expect(Gitlab::ErrorTracking).to receive( count: 3,
:track_and_raise_exception first_namespace_id: 1,
).with( last_namespace_id: 3,
Ci::Minutes::BatchResetService::BatchNotResetError.new( error_message: 'something went wrong',
'Some namespace shared runner minutes were not reset.' error_backtrace: kind_of(Array)
), }
{ namespace_ranges: [{ count: 3, first_id: 1, last_id: 3, error: 'something went wrong' }] } )
).once.and_call_original end
expect { subject }.to raise_error(Ci::Minutes::BatchResetService::BatchNotResetError)
end end
end end
end end
......
...@@ -50,7 +50,7 @@ RSpec.describe ClearSharedRunnersMinutesWorker do ...@@ -50,7 +50,7 @@ RSpec.describe ClearSharedRunnersMinutesWorker do
it 'raises an exception' do it 'raises an exception' do
expect { worker.perform }.to raise_error( expect { worker.perform }.to raise_error(
Ci::Minutes::BatchResetService::BatchNotResetError, Ci::Minutes::BatchResetService::BatchNotResetError,
'Some namespace shared runner minutes were not reset.' 'Some namespace shared runner minutes were not reset'
) )
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment