Commit ab5fab5e authored by Stan Hu's avatar Stan Hu

Merge branch '119198-chat_notification-sidekiq-job-latency-has-increased' into 'master'

Limit the amount of time ChatNotificationWorker waits for the build trace

Closes #119198

See merge request gitlab-org/gitlab!22132
parents 8315b6ed fd1dccce
......@@ -3,6 +3,9 @@
class ChatNotificationWorker
include ApplicationWorker
TimeoutExceeded = Class.new(StandardError)
sidekiq_options retry: false
feature_category :chatops
latency_sensitive_worker!
# TODO: break this into multiple jobs
......@@ -11,18 +14,21 @@ class ChatNotificationWorker
# worker_has_external_dependencies!
RESCHEDULE_INTERVAL = 2.seconds
RESCHEDULE_TIMEOUT = 5.minutes
# rubocop: disable CodeReuse/ActiveRecord
def perform(build_id)
def perform(build_id, reschedule_count = 0)
Ci::Build.find_by(id: build_id).try do |build|
send_response(build)
end
rescue Gitlab::Chat::Output::MissingBuildSectionError
raise TimeoutExceeded if timeout_exceeded?(reschedule_count)
# The creation of traces and sections appears to be eventually consistent.
# As a result it's possible for us to run the above code before the trace
# sections are present. To better handle such cases we'll just reschedule
# the job instead of producing an error.
self.class.perform_in(RESCHEDULE_INTERVAL, build_id)
self.class.perform_in(RESCHEDULE_INTERVAL, build_id, reschedule_count + 1)
end
# rubocop: enable CodeReuse/ActiveRecord
......@@ -37,4 +43,10 @@ class ChatNotificationWorker
end
end
end
private
def timeout_exceeded?(reschedule_count)
(reschedule_count * RESCHEDULE_INTERVAL) >= RESCHEDULE_TIMEOUT
end
end
---
title: Limit the amount of time ChatNotificationWorker waits for the build trace
merge_request: 22132
author:
type: fixed
......@@ -8,6 +8,10 @@ describe ChatNotificationWorker do
create(:ci_build, pipeline: create(:ci_pipeline, source: :chat))
end
it 'instructs sidekiq not to retry on failure' do
expect(described_class.get_sidekiq_options['retry']).to eq(false)
end
describe '#perform' do
it 'does nothing when the build no longer exists' do
expect(worker).not_to receive(:send_response)
......@@ -23,16 +27,31 @@ describe ChatNotificationWorker do
worker.perform(chat_build.id)
end
it 'reschedules the job if the trace sections could not be found' do
context 'when the trace sections could not be found' do
it 'reschedules the job' do
expect(worker)
.to receive(:send_response)
.and_raise(Gitlab::Chat::Output::MissingBuildSectionError)
expect(described_class)
.to receive(:perform_in)
.with(described_class::RESCHEDULE_INTERVAL, chat_build.id)
.with(described_class::RESCHEDULE_INTERVAL, chat_build.id, 1)
worker.perform(chat_build.id)
end
it "raises an error after #{described_class::RESCHEDULE_TIMEOUT} seconds of retrying" do
allow(described_class).to receive(:new).and_return(worker)
allow(worker).to receive(:send_response).and_raise(Gitlab::Chat::Output::MissingBuildSectionError)
worker.perform(chat_build.id)
expect { described_class.drain }.to raise_error(described_class::TimeoutExceeded)
max_reschedules = described_class::RESCHEDULE_TIMEOUT / described_class::RESCHEDULE_INTERVAL
expect(worker).to have_received(:send_response).exactly(max_reschedules + 1).times
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment