Commit ab5fab5e authored by Stan Hu's avatar Stan Hu

Merge branch '119198-chat_notification-sidekiq-job-latency-has-increased' into 'master'

Limit the amount of time ChatNotificationWorker waits for the build trace

Closes #119198

See merge request gitlab-org/gitlab!22132
parents 8315b6ed fd1dccce
...@@ -3,6 +3,9 @@ ...@@ -3,6 +3,9 @@
class ChatNotificationWorker class ChatNotificationWorker
include ApplicationWorker include ApplicationWorker
TimeoutExceeded = Class.new(StandardError)
sidekiq_options retry: false
feature_category :chatops feature_category :chatops
latency_sensitive_worker! latency_sensitive_worker!
# TODO: break this into multiple jobs # TODO: break this into multiple jobs
...@@ -11,18 +14,21 @@ class ChatNotificationWorker ...@@ -11,18 +14,21 @@ class ChatNotificationWorker
# worker_has_external_dependencies! # worker_has_external_dependencies!
RESCHEDULE_INTERVAL = 2.seconds RESCHEDULE_INTERVAL = 2.seconds
RESCHEDULE_TIMEOUT = 5.minutes
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def perform(build_id) def perform(build_id, reschedule_count = 0)
Ci::Build.find_by(id: build_id).try do |build| Ci::Build.find_by(id: build_id).try do |build|
send_response(build) send_response(build)
end end
rescue Gitlab::Chat::Output::MissingBuildSectionError rescue Gitlab::Chat::Output::MissingBuildSectionError
raise TimeoutExceeded if timeout_exceeded?(reschedule_count)
# The creation of traces and sections appears to be eventually consistent. # The creation of traces and sections appears to be eventually consistent.
# As a result it's possible for us to run the above code before the trace # As a result it's possible for us to run the above code before the trace
# sections are present. To better handle such cases we'll just reschedule # sections are present. To better handle such cases we'll just reschedule
# the job instead of producing an error. # the job instead of producing an error.
self.class.perform_in(RESCHEDULE_INTERVAL, build_id) self.class.perform_in(RESCHEDULE_INTERVAL, build_id, reschedule_count + 1)
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
...@@ -37,4 +43,10 @@ class ChatNotificationWorker ...@@ -37,4 +43,10 @@ class ChatNotificationWorker
end end
end end
end end
private
def timeout_exceeded?(reschedule_count)
(reschedule_count * RESCHEDULE_INTERVAL) >= RESCHEDULE_TIMEOUT
end
end end
---
title: Limit the amount of time ChatNotificationWorker waits for the build trace
merge_request: 22132
author:
type: fixed
...@@ -8,6 +8,10 @@ describe ChatNotificationWorker do ...@@ -8,6 +8,10 @@ describe ChatNotificationWorker do
create(:ci_build, pipeline: create(:ci_pipeline, source: :chat)) create(:ci_build, pipeline: create(:ci_pipeline, source: :chat))
end end
it 'instructs sidekiq not to retry on failure' do
expect(described_class.get_sidekiq_options['retry']).to eq(false)
end
describe '#perform' do describe '#perform' do
it 'does nothing when the build no longer exists' do it 'does nothing when the build no longer exists' do
expect(worker).not_to receive(:send_response) expect(worker).not_to receive(:send_response)
...@@ -23,16 +27,31 @@ describe ChatNotificationWorker do ...@@ -23,16 +27,31 @@ describe ChatNotificationWorker do
worker.perform(chat_build.id) worker.perform(chat_build.id)
end end
it 'reschedules the job if the trace sections could not be found' do context 'when the trace sections could not be found' do
it 'reschedules the job' do
expect(worker) expect(worker)
.to receive(:send_response) .to receive(:send_response)
.and_raise(Gitlab::Chat::Output::MissingBuildSectionError) .and_raise(Gitlab::Chat::Output::MissingBuildSectionError)
expect(described_class) expect(described_class)
.to receive(:perform_in) .to receive(:perform_in)
.with(described_class::RESCHEDULE_INTERVAL, chat_build.id) .with(described_class::RESCHEDULE_INTERVAL, chat_build.id, 1)
worker.perform(chat_build.id)
end
it "raises an error after #{described_class::RESCHEDULE_TIMEOUT} seconds of retrying" do
allow(described_class).to receive(:new).and_return(worker)
allow(worker).to receive(:send_response).and_raise(Gitlab::Chat::Output::MissingBuildSectionError)
worker.perform(chat_build.id) worker.perform(chat_build.id)
expect { described_class.drain }.to raise_error(described_class::TimeoutExceeded)
max_reschedules = described_class::RESCHEDULE_TIMEOUT / described_class::RESCHEDULE_INTERVAL
expect(worker).to have_received(:send_response).exactly(max_reschedules + 1).times
end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment