Commit 97a0e2fd authored by Mark Chao's avatar Mark Chao

Merge branch 'make-deduplication-ttl-configurable' into 'master'

Allow configuration of deduplication TTL

See merge request gitlab-org/gitlab!73221
parents 22376a8e 09e3236e
......@@ -284,6 +284,36 @@ module AuthorizedProjectUpdate
end
```
### Setting the deduplication time-to-live (TTL)
Deduplication depends on an idempotency key that is stored in Redis. This is normally
cleared by the configured deduplication strategy.
However, the key can remain until its TTL in certain cases like:
1. `until_executing` is used but the job was never enqueued or executed after the Sidekiq
client middleware was run.
1. `until_executed` is used but the job fails to finish due to retry exhaustion, gets
interrupted the maximum number of times, or gets lost.
The default value is 6 hours. During this time, jobs won't be enqueued even if the first
job never executed or finished.
The TTL can be configured with:
```ruby
class ProjectImportScheduleWorker
include ApplicationWorker
idempotent!
deduplicate :until_executing, ttl: 5.minutes
end
```
Duplicate jobs can happen when the TTL is reached, so make sure you lower this only for jobs
that can tolerate some duplication.
### Deduplication with load balancing
> [Introduced](https://gitlab.com/groups/gitlab-org/-/epics/6763) in GitLab 14.4.
......
......@@ -9,6 +9,8 @@ class ProjectImportScheduleWorker
prepend WaitableWorker
idempotent!
deduplicate :until_executing, ttl: 5.minutes
feature_category :source_code_management
sidekiq_options retry: false
loggable_arguments 1 # For the job waiter key
......
......@@ -19,7 +19,7 @@ module Gitlab
class DuplicateJob
include Gitlab::Utils::StrongMemoize
DUPLICATE_KEY_TTL = 6.hours
DEFAULT_DUPLICATE_KEY_TTL = 6.hours
WAL_LOCATION_TTL = 60.seconds
MAX_REDIS_RETRIES = 5
DEFAULT_STRATEGY = :until_executing
......@@ -59,7 +59,7 @@ module Gitlab
end
# This method will return the jid that was set in redis
def check!(expiry = DUPLICATE_KEY_TTL)
def check!(expiry = duplicate_key_ttl)
read_jid = nil
read_wal_locations = {}
......@@ -133,7 +133,7 @@ module Gitlab
jid != existing_jid
end
def set_deduplicated_flag!(expiry = DUPLICATE_KEY_TTL)
def set_deduplicated_flag!(expiry = duplicate_key_ttl)
return unless reschedulable?
Sidekiq.redis do |redis|
......@@ -168,6 +168,10 @@ module Gitlab
worker_klass.idempotent?
end
def duplicate_key_ttl
options[:ttl] || DEFAULT_DUPLICATE_KEY_TTL
end
private
attr_writer :existing_wal_locations
......
......@@ -26,8 +26,8 @@ module Gitlab
end
def check!
# The default expiry time is the DuplicateJob::DUPLICATE_KEY_TTL already
# Only the strategies de-duplicating when scheduling
# The default expiry time is the worker class'
# configured deduplication TTL or DuplicateJob::DEFAULT_DUPLICATE_KEY_TTL.
duplicate_job.check!
end
end
......
......@@ -52,11 +52,11 @@ module Gitlab
def expiry
strong_memoize(:expiry) do
next DuplicateJob::DUPLICATE_KEY_TTL unless duplicate_job.scheduled?
next duplicate_job.duplicate_key_ttl unless duplicate_job.scheduled?
time_diff = duplicate_job.scheduled_at.to_i - Time.now.to_i
time_diff > 0 ? time_diff : DuplicateJob::DUPLICATE_KEY_TTL
time_diff > 0 ? time_diff : duplicate_job.duplicate_key_ttl
end
end
end
......
......@@ -85,23 +85,41 @@ RSpec.describe Gitlab::SidekiqMiddleware::DuplicateJobs::DuplicateJob, :clean_gi
context 'when there was no job in the queue yet' do
it { expect(duplicate_job.check!).to eq('123') }
it "adds a idempotency key with ttl set to #{described_class::DUPLICATE_KEY_TTL}" do
expect { duplicate_job.check! }
.to change { read_idempotency_key_with_ttl(idempotency_key) }
.from([nil, -2])
.to(['123', be_within(1).of(described_class::DUPLICATE_KEY_TTL)])
end
context 'when wal locations is not empty' do
it "adds a existing wal locations key with ttl set to #{described_class::DUPLICATE_KEY_TTL}" do
shared_examples 'sets Redis keys with correct TTL' do
it "adds an idempotency key with correct ttl" do
expect { duplicate_job.check! }
.to change { read_idempotency_key_with_ttl(existing_wal_location_key(idempotency_key, :main)) }
.from([nil, -2])
.to([wal_locations[:main], be_within(1).of(described_class::DUPLICATE_KEY_TTL)])
.and change { read_idempotency_key_with_ttl(existing_wal_location_key(idempotency_key, :ci)) }
.to change { read_idempotency_key_with_ttl(idempotency_key) }
.from([nil, -2])
.to([wal_locations[:ci], be_within(1).of(described_class::DUPLICATE_KEY_TTL)])
.to(['123', be_within(1).of(expected_ttl)])
end
context 'when wal locations is not empty' do
it "adds an existing wal locations key with correct ttl" do
expect { duplicate_job.check! }
.to change { read_idempotency_key_with_ttl(existing_wal_location_key(idempotency_key, :main)) }
.from([nil, -2])
.to([wal_locations[:main], be_within(1).of(expected_ttl)])
.and change { read_idempotency_key_with_ttl(existing_wal_location_key(idempotency_key, :ci)) }
.from([nil, -2])
.to([wal_locations[:ci], be_within(1).of(expected_ttl)])
end
end
end
context 'with TTL option is not set' do
let(:expected_ttl) { described_class::DEFAULT_DUPLICATE_KEY_TTL }
it_behaves_like 'sets Redis keys with correct TTL'
end
context 'when TTL option is set' do
let(:expected_ttl) { 5.minutes }
before do
allow(duplicate_job).to receive(:options).and_return({ ttl: expected_ttl })
end
it_behaves_like 'sets Redis keys with correct TTL'
end
context 'when preserve_latest_wal_locations_for_idempotent_jobs feature flag is disabled' do
......
......@@ -2,7 +2,7 @@
RSpec.shared_examples 'deduplicating jobs when scheduling' do |strategy_name|
let(:fake_duplicate_job) do
instance_double(Gitlab::SidekiqMiddleware::DuplicateJobs::DuplicateJob)
instance_double(Gitlab::SidekiqMiddleware::DuplicateJobs::DuplicateJob, duplicate_key_ttl: Gitlab::SidekiqMiddleware::DuplicateJobs::DuplicateJob::DEFAULT_DUPLICATE_KEY_TTL)
end
let(:expected_message) { "dropped #{strategy_name.to_s.humanize.downcase}" }
......@@ -18,7 +18,7 @@ RSpec.shared_examples 'deduplicating jobs when scheduling' do |strategy_name|
expect(fake_duplicate_job).to receive(:scheduled?).twice.ordered.and_return(false)
expect(fake_duplicate_job).to(
receive(:check!)
.with(Gitlab::SidekiqMiddleware::DuplicateJobs::DuplicateJob::DUPLICATE_KEY_TTL)
.with(fake_duplicate_job.duplicate_key_ttl)
.ordered
.and_return('a jid'))
expect(fake_duplicate_job).to receive(:duplicate?).ordered.and_return(false)
......@@ -62,7 +62,7 @@ RSpec.shared_examples 'deduplicating jobs when scheduling' do |strategy_name|
allow(fake_duplicate_job).to receive(:options).and_return({ including_scheduled: true })
allow(fake_duplicate_job).to(
receive(:check!)
.with(Gitlab::SidekiqMiddleware::DuplicateJobs::DuplicateJob::DUPLICATE_KEY_TTL)
.with(fake_duplicate_job.duplicate_key_ttl)
.and_return('the jid'))
allow(fake_duplicate_job).to receive(:idempotent?).and_return(true)
allow(fake_duplicate_job).to receive(:update_latest_wal_location!)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment