Commit 2050e382 authored by David Kim's avatar David Kim

Merge branch 'qmnguyen0711/825-limit-the-size-of-sidekiq-jobs' into 'master'

Limit the size of a Sidekiq job

See merge request gitlab-org/gitlab!53829
parents 41b03ea7 c9440403
...@@ -133,5 +133,13 @@ module WorkerAttributes ...@@ -133,5 +133,13 @@ module WorkerAttributes
def get_deduplication_options def get_deduplication_options
class_attributes[:deduplication_options] || {} class_attributes[:deduplication_options] || {}
end end
def big_payload!
class_attributes[:big_payload] = true
end
def big_payload?
class_attributes[:big_payload]
end
end end
end end
---
title: Limit the payload size of Sidekiq jobs before scheduling
merge_request: 53829
author:
type: added
...@@ -36,6 +36,8 @@ module Gitlab ...@@ -36,6 +36,8 @@ module Gitlab
chain.add ::Gitlab::SidekiqMiddleware::DuplicateJobs::Client chain.add ::Gitlab::SidekiqMiddleware::DuplicateJobs::Client
chain.add ::Gitlab::SidekiqStatus::ClientMiddleware chain.add ::Gitlab::SidekiqStatus::ClientMiddleware
chain.add ::Gitlab::SidekiqMiddleware::AdminMode::Client chain.add ::Gitlab::SidekiqMiddleware::AdminMode::Client
# Size limiter should be placed at the bottom, but before the metrics midleware
chain.add ::Gitlab::SidekiqMiddleware::SizeLimiter::Client
chain.add ::Gitlab::SidekiqMiddleware::ClientMetrics chain.add ::Gitlab::SidekiqMiddleware::ClientMetrics
end end
end end
......
# frozen_string_literal: true
module Gitlab
module SidekiqMiddleware
module SizeLimiter
# This midleware is inserted into Sidekiq **client** middleware chain. It
# prevents the caller from dispatching a too-large job payload. The job
# payload should be small and simple. Read more at:
# https://github.com/mperham/sidekiq/wiki/Best-Practices#1-make-your-job-parameters-small-and-simple
class Client
def call(worker_class, job, queue, _redis_pool)
Validator.validate!(worker_class, job)
yield
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module SidekiqMiddleware
module SizeLimiter
# A custom exception for size limiter. It contains worker class and its
# size to easier track later
class ExceedLimitError < StandardError
attr_reader :worker_class, :size, :size_limit
def initialize(worker_class, size, size_limit)
@worker_class = worker_class
@size = size
@size_limit = size_limit
super "#{@worker_class} job exceeds payload size limit (#{size}/#{size_limit})"
end
def sentry_extra_data
{
worker_class: @worker_class.to_s,
size: @size.to_i,
size_limit: @size_limit.to_i
}
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module SidekiqMiddleware
module SizeLimiter
# Validate a Sidekiq job payload limit based on current configuration.
# This validator pulls the configuration from the environment variables:
#
# - GITLAB_SIDEKIQ_SIZE_LIMITER_MODE: the current mode of the size
# limiter. This must be either `track` or `raise`.
#
# - GITLAB_SIDEKIQ_SIZE_LIMITER_LIMIT_BYTES: the size limit in bytes.
#
# If the size of job payload after serialization exceeds the limit, an
# error is tracked raised adhering to the mode.
class Validator
def self.validate!(worker_class, job)
new(worker_class, job).validate!
end
DEFAULT_SIZE_LIMIT = 0
MODES = [
TRACK_MODE = 'track',
RAISE_MODE = 'raise'
].freeze
attr_reader :mode, :size_limit
def initialize(
worker_class, job,
mode: ENV['GITLAB_SIDEKIQ_SIZE_LIMITER_MODE'],
size_limit: ENV['GITLAB_SIDEKIQ_SIZE_LIMITER_LIMIT_BYTES']
)
@worker_class = worker_class
@job = job
@mode = (mode || TRACK_MODE).to_s.strip
unless MODES.include?(@mode)
::Sidekiq.logger.warn "Invalid Sidekiq size limiter mode: #{@mode}. Fallback to #{TRACK_MODE} mode."
@mode = TRACK_MODE
end
@size_limit = (size_limit || DEFAULT_SIZE_LIMIT).to_i
if @size_limit < 0
::Sidekiq.logger.warn "Invalid Sidekiq size limiter limit: #{@size_limit}"
end
end
def validate!
return unless @size_limit > 0
return if allow_big_payload?
return if job_size <= @size_limit
exception = ExceedLimitError.new(@worker_class, job_size, @size_limit)
# This should belong to Gitlab::ErrorTracking. We'll remove this
# after this epic is done:
# https://gitlab.com/groups/gitlab-com/gl-infra/-/epics/396
exception.set_backtrace(backtrace)
if raise_mode?
raise exception
else
track(exception)
end
end
private
def job_size
# This maynot be the optimal solution, but can be acceptable solution
# for now. Internally, Sidekiq calls Sidekiq.dump_json everywhere.
# There is no clean way to intefere to prevent double serialization.
@job_size ||= ::Sidekiq.dump_json(@job).bytesize
end
def allow_big_payload?
worker_class = @worker_class.to_s.safe_constantize
worker_class.respond_to?(:big_payload?) && worker_class.big_payload?
end
def raise_mode?
@mode == RAISE_MODE
end
def track(exception)
Gitlab::ErrorTracking.track_exception(exception)
end
def backtrace
Gitlab::BacktraceCleaner.clean_backtrace(caller)
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::SidekiqMiddleware::SizeLimiter::Client, :clean_gitlab_redis_queues do
let(:worker_class) do
Class.new do
def self.name
"TestSizeLimiterWorker"
end
include ApplicationWorker
def perform(*args); end
end
end
before do
stub_const("TestSizeLimiterWorker", worker_class)
end
describe '#call' do
context 'when the validator rejects the job' do
before do
allow(Gitlab::SidekiqMiddleware::SizeLimiter::Validator).to receive(:validate!).and_raise(
Gitlab::SidekiqMiddleware::SizeLimiter::ExceedLimitError.new(
TestSizeLimiterWorker, 500, 300
)
)
end
it 'raises an exception when scheduling job with #perform_at' do
expect do
TestSizeLimiterWorker.perform_at(30.seconds.from_now, 1, 2, 3)
end.to raise_error Gitlab::SidekiqMiddleware::SizeLimiter::ExceedLimitError
end
it 'raises an exception when scheduling job with #perform_async' do
expect do
TestSizeLimiterWorker.perform_async(1, 2, 3)
end.to raise_error Gitlab::SidekiqMiddleware::SizeLimiter::ExceedLimitError
end
it 'raises an exception when scheduling job with #perform_in' do
expect do
TestSizeLimiterWorker.perform_in(3.seconds, 1, 2, 3)
end.to raise_error Gitlab::SidekiqMiddleware::SizeLimiter::ExceedLimitError
end
end
context 'when the validator validates the job suscessfully' do
before do
# Do nothing
allow(Gitlab::SidekiqMiddleware::SizeLimiter::Client).to receive(:validate!)
end
it 'raises an exception when scheduling job with #perform_at' do
expect do
TestSizeLimiterWorker.perform_at(30.seconds.from_now, 1, 2, 3)
end.not_to raise_error
expect(TestSizeLimiterWorker.jobs).to contain_exactly(
a_hash_including(
"class" => "TestSizeLimiterWorker",
"args" => [1, 2, 3],
"at" => be_a(Float)
)
)
end
it 'raises an exception when scheduling job with #perform_async' do
expect do
TestSizeLimiterWorker.perform_async(1, 2, 3)
end.not_to raise_error
expect(TestSizeLimiterWorker.jobs).to contain_exactly(
a_hash_including(
"class" => "TestSizeLimiterWorker",
"args" => [1, 2, 3]
)
)
end
it 'raises an exception when scheduling job with #perform_in' do
expect do
TestSizeLimiterWorker.perform_in(3.seconds, 1, 2, 3)
end.not_to raise_error
expect(TestSizeLimiterWorker.jobs).to contain_exactly(
a_hash_including(
"class" => "TestSizeLimiterWorker",
"args" => [1, 2, 3],
"at" => be_a(Float)
)
)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::SidekiqMiddleware::SizeLimiter::ExceedLimitError do
let(:worker_class) do
Class.new do
def self.name
"TestSizeLimiterWorker"
end
include ApplicationWorker
def perform(*args); end
end
end
before do
stub_const("TestSizeLimiterWorker", worker_class)
end
it 'encapsulates worker info' do
exception = described_class.new(TestSizeLimiterWorker, 500, 300)
expect(exception.message).to eql("TestSizeLimiterWorker job exceeds payload size limit (500/300)")
expect(exception.worker_class).to eql(TestSizeLimiterWorker)
expect(exception.size).to be(500)
expect(exception.size_limit).to be(300)
expect(exception.sentry_extra_data).to eql(
worker_class: 'TestSizeLimiterWorker',
size: 500,
size_limit: 300
)
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::SidekiqMiddleware::SizeLimiter::Validator do
let(:worker_class) do
Class.new do
def self.name
"TestSizeLimiterWorker"
end
include ApplicationWorker
def perform(*args); end
end
end
before do
stub_const("TestSizeLimiterWorker", worker_class)
end
describe '#initialize' do
context 'when the input mode is valid' do
it 'does not log a warning message' do
expect(::Sidekiq.logger).not_to receive(:warn)
described_class.new(TestSizeLimiterWorker, {}, mode: 'track')
described_class.new(TestSizeLimiterWorker, {}, mode: 'raise')
end
end
context 'when the input mode is invalid' do
it 'defaults to track mode and logs a warning message' do
expect(::Sidekiq.logger).to receive(:warn).with('Invalid Sidekiq size limiter mode: invalid. Fallback to track mode.')
validator = described_class.new(TestSizeLimiterWorker, {}, mode: 'invalid')
expect(validator.mode).to eql('track')
end
end
context 'when the input mode is empty' do
it 'defaults to track mode' do
expect(::Sidekiq.logger).not_to receive(:warn)
validator = described_class.new(TestSizeLimiterWorker, {})
expect(validator.mode).to eql('track')
end
end
context 'when the size input is valid' do
it 'does not log a warning message' do
expect(::Sidekiq.logger).not_to receive(:warn)
described_class.new(TestSizeLimiterWorker, {}, size_limit: 300)
described_class.new(TestSizeLimiterWorker, {}, size_limit: 0)
end
end
context 'when the size input is invalid' do
it 'defaults to 0 and logs a warning message' do
expect(::Sidekiq.logger).to receive(:warn).with('Invalid Sidekiq size limiter limit: -1')
described_class.new(TestSizeLimiterWorker, {}, size_limit: -1)
end
end
context 'when the size input is empty' do
it 'defaults to 0' do
expect(::Sidekiq.logger).not_to receive(:warn)
validator = described_class.new(TestSizeLimiterWorker, {})
expect(validator.size_limit).to be(0)
end
end
end
shared_examples 'validate limit job payload size' do
context 'in track mode' do
let(:mode) { 'track' }
context 'when size limit negative' do
let(:size_limit) { -1 }
it 'does not track jobs' do
expect(Gitlab::ErrorTracking).not_to receive(:track_exception)
validate.call(TestSizeLimiterWorker, { a: 'a' * 300 })
end
it 'does not raise exception' do
expect { validate.call(TestSizeLimiterWorker, { a: 'a' * 300 }) }.not_to raise_error
end
end
context 'when size limit is 0' do
let(:size_limit) { 0 }
it 'does not track jobs' do
expect(Gitlab::ErrorTracking).not_to receive(:track_exception)
validate.call(TestSizeLimiterWorker, { a: 'a' * 300 })
end
it 'does not raise exception' do
expect { validate.call(TestSizeLimiterWorker, { a: 'a' * 300 }) }.not_to raise_error
end
end
context 'when job size is bigger than size limit' do
let(:size_limit) { 50 }
it 'tracks job' do
expect(Gitlab::ErrorTracking).to receive(:track_exception).with(
be_a(Gitlab::SidekiqMiddleware::SizeLimiter::ExceedLimitError)
)
validate.call(TestSizeLimiterWorker, { a: 'a' * 100 })
end
it 'does not raise an exception' do
expect { validate.call(TestSizeLimiterWorker, { a: 'a' * 300 }) }.not_to raise_error
end
context 'when the worker has big_payload attribute' do
before do
worker_class.big_payload!
end
it 'does not track jobs' do
expect(Gitlab::ErrorTracking).not_to receive(:track_exception)
validate.call(TestSizeLimiterWorker, { a: 'a' * 300 })
validate.call('TestSizeLimiterWorker', { a: 'a' * 300 })
end
it 'does not raise an exception' do
expect { validate.call(TestSizeLimiterWorker, { a: 'a' * 300 }) }.not_to raise_error
expect { validate.call('TestSizeLimiterWorker', { a: 'a' * 300 }) }.not_to raise_error
end
end
end
context 'when job size is less than size limit' do
let(:size_limit) { 50 }
it 'does not track job' do
expect(Gitlab::ErrorTracking).not_to receive(:track_exception)
validate.call(TestSizeLimiterWorker, { a: 'a' })
end
it 'does not raise an exception' do
expect { validate.call(TestSizeLimiterWorker, { a: 'a' }) }.not_to raise_error
end
end
end
context 'in raise mode' do
let(:mode) { 'raise' }
context 'when size limit is negative' do
let(:size_limit) { -1 }
it 'does not raise exception' do
expect { validate.call(TestSizeLimiterWorker, { a: 'a' * 300 }) }.not_to raise_error
end
end
context 'when size limit is 0' do
let(:size_limit) { 0 }
it 'does not raise exception' do
expect { validate.call(TestSizeLimiterWorker, { a: 'a' * 300 }) }.not_to raise_error
end
end
context 'when job size is bigger than size limit' do
let(:size_limit) { 50 }
it 'raises an exception' do
expect do
validate.call(TestSizeLimiterWorker, { a: 'a' * 300 })
end.to raise_error(
Gitlab::SidekiqMiddleware::SizeLimiter::ExceedLimitError,
/TestSizeLimiterWorker job exceeds payload size limit/i
)
end
context 'when the worker has big_payload attribute' do
before do
worker_class.big_payload!
end
it 'does not raise an exception' do
expect { validate.call(TestSizeLimiterWorker, { a: 'a' * 300 }) }.not_to raise_error
expect { validate.call('TestSizeLimiterWorker', { a: 'a' * 300 }) }.not_to raise_error
end
end
end
context 'when job size is less than size limit' do
let(:size_limit) { 50 }
it 'does not raise an exception' do
expect { validate.call(TestSizeLimiterWorker, { a: 'a' }) }.not_to raise_error
end
end
end
end
describe '#validate!' do
context 'when calling SizeLimiter.validate!' do
let(:validate) { ->(worker_clas, job) { described_class.validate!(worker_class, job) } }
before do
stub_env('GITLAB_SIDEKIQ_SIZE_LIMITER_MODE', mode)
stub_env('GITLAB_SIDEKIQ_SIZE_LIMITER_LIMIT_BYTES', size_limit)
end
it_behaves_like 'validate limit job payload size'
end
context 'when creating an instance with the related ENV variables' do
let(:validate) do
->(worker_clas, job) do
validator = described_class.new(worker_class, job, mode: mode, size_limit: size_limit)
validator.validate!
end
end
before do
stub_env('GITLAB_SIDEKIQ_SIZE_LIMITER_MODE', mode)
stub_env('GITLAB_SIDEKIQ_SIZE_LIMITER_LIMIT_BYTES', size_limit)
end
it_behaves_like 'validate limit job payload size'
end
context 'when creating an instance with mode and size limit' do
let(:validate) do
->(worker_clas, job) do
validator = described_class.new(worker_class, job, mode: mode, size_limit: size_limit)
validator.validate!
end
end
it_behaves_like 'validate limit job payload size'
end
end
end
...@@ -177,6 +177,7 @@ RSpec.describe Gitlab::SidekiqMiddleware do ...@@ -177,6 +177,7 @@ RSpec.describe Gitlab::SidekiqMiddleware do
::Gitlab::SidekiqMiddleware::DuplicateJobs::Client, ::Gitlab::SidekiqMiddleware::DuplicateJobs::Client,
::Gitlab::SidekiqStatus::ClientMiddleware, ::Gitlab::SidekiqStatus::ClientMiddleware,
::Gitlab::SidekiqMiddleware::AdminMode::Client, ::Gitlab::SidekiqMiddleware::AdminMode::Client,
::Gitlab::SidekiqMiddleware::SizeLimiter::Client,
::Gitlab::SidekiqMiddleware::ClientMetrics ::Gitlab::SidekiqMiddleware::ClientMetrics
] ]
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment