Commit 2937425b authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch 'mk/fix-retry-time-of-verification-failures' into 'master'

Geo framework: Fix retry time of verification failures

See merge request gitlab-org/gitlab!53115
parents 7daba09e 789c7c6e
......@@ -35,7 +35,8 @@ module Gitlab
scope :checksummed, -> { where.not(verification_checksum: nil) }
scope :not_checksummed, -> { where(verification_checksum: nil) }
scope :verification_timed_out, -> { verification_started.where("verification_started_at < ?", VERIFICATION_TIMEOUT.ago) }
scope :needs_verification, -> { with_verification_state(:verification_pending, :verification_failed) }
scope :retry_due, -> { where(arel_table[:verification_retry_at].eq(nil).or(arel_table[:verification_retry_at].lt(Time.current))) }
scope :needs_verification, -> { with_verification_state(:verification_pending).or(with_verification_state(:verification_failed).retry_due) }
# rubocop:enable CodeReuse/ActiveRecord
state_machine :verification_state, initial: :verification_pending do
......@@ -128,7 +129,7 @@ module Gitlab
# Overridden by Geo::VerifiableRegistry
def verification_failed_batch_relation(batch_size:)
verification_failed.order(Gitlab::Database.nulls_first_order(:verification_retry_at)).limit(batch_size) # rubocop:disable CodeReuse/ActiveRecord
verification_failed.retry_due.order(Gitlab::Database.nulls_first_order(:verification_retry_at)).limit(batch_size) # rubocop:disable CodeReuse/ActiveRecord
end
# @return [Integer] number of records that need verification
......@@ -237,6 +238,10 @@ module Gitlab
track_checksum_result!(checksum, calculation_started_at)
rescue => e
# Reset any potential changes from track_checksum_result, i.e.
# verification_retry_count may have been cleared.
reset
verification_failed_with_message!('Error during verification', e)
end
......
......@@ -90,44 +90,82 @@ RSpec.describe Gitlab::Geo::VerificationState do
let(:other_failed_ids) { other_failed_records.map { |result| result['id'] } }
before do
subject.verification_started!
subject.verification_started
subject.verification_failed_with_message!('foo')
end
it 'returns IDs of rows pending verification' do
expect(subject.class.verification_failed_batch(batch_size: 3)).to include(subject.id)
end
context 'with a failed record with retry due' do
before do
subject.update!(verification_retry_at: 1.minute.ago)
end
it 'marks verification as started' do
subject.class.verification_failed_batch(batch_size: 3)
it 'returns IDs of rows pending verification' do
expect(subject.class.verification_failed_batch(batch_size: 3)).to include(subject.id)
end
expect(subject.reload.verification_started?).to be_truthy
expect(subject.verification_started_at).to be_present
it 'marks verification as started' do
subject.class.verification_failed_batch(batch_size: 3)
expect(subject.reload.verification_started?).to be_truthy
expect(subject.verification_started_at).to be_present
end
it 'limits with batch_size and orders records by verification_retry_at with NULLs first' do
expected = other_failed_ids
# `match_array` instead of `eq` because the UPDATE query does not
# guarantee that results are returned in the same order as the subquery
# used to SELECT the correct batch.
expect(subject.class.verification_failed_batch(batch_size: 2)).to match_array(expected)
end
context 'other verification states' do
it 'does not include them' do
subject.verification_started!
expect(subject.class.verification_failed_batch(batch_size: 5)).not_to include(subject.id)
subject.verification_succeeded_with_checksum!('foo', Time.current)
expect(subject.class.verification_failed_batch(batch_size: 5)).not_to include(subject.id)
subject.verification_pending!
expect(subject.class.verification_failed_batch(batch_size: 5)).not_to include(subject.id)
end
end
end
it 'limits with batch_size and orders records by verification_retry_at with NULLs first' do
expected = other_failed_ids
context 'when verification_retry_at is in the future' do
it 'does not return the row' do
subject.update!(verification_retry_at: 1.minute.from_now)
# `match_array` instead of `eq` because the UPDATE query does not
# guarantee that results are returned in the same order as the subquery
# used to SELECT the correct batch.
expect(subject.class.verification_failed_batch(batch_size: 2)).to match_array(expected)
expect(subject.class.verification_failed_batch(batch_size: 3)).not_to include(subject.id)
end
end
end
context 'other verification states' do
it 'does not include them' do
subject.verification_started!
describe '.needs_verification' do
it 'includes verification_pending' do
subject.save!
expect(subject.class.verification_failed_batch(batch_size: 5)).not_to include(subject.id)
expect(subject.class.needs_verification).to include(subject)
end
subject.verification_succeeded_with_checksum!('foo', Time.current)
it 'includes verification_failed and retry_due' do
subject.verification_started
subject.verification_failed_with_message!('foo')
subject.update!(verification_retry_at: 1.minute.ago)
expect(subject.class.verification_failed_batch(batch_size: 5)).not_to include(subject.id)
expect(subject.class.needs_verification).to include(subject)
end
subject.verification_pending!
it 'excludes verification_failed with future verification_retry_at' do
subject.verification_started
subject.verification_failed_with_message!('foo')
subject.update!(verification_retry_at: 1.minute.from_now)
expect(subject.class.verification_failed_batch(batch_size: 5)).not_to include(subject.id)
end
expect(subject.class.needs_verification).not_to include(subject)
end
end
......@@ -157,7 +195,7 @@ RSpec.describe Gitlab::Geo::VerificationState do
end
end
describe '#track_checksum_attempt!' do
describe '#track_checksum_attempt!', :aggregate_failures do
context 'when verification was not yet started' do
it 'starts verification' do
expect do
......@@ -196,12 +234,39 @@ RSpec.describe Gitlab::Geo::VerificationState do
end
context 'when an error occurs while yielding' do
it 'sets verification_failed' do
subject.track_checksum_attempt! do
raise 'an error'
context 'when the record was failed' do
it 'sets verification_failed and increments verification_retry_count' do
subject.verification_failed_with_message!('foo')
subject.track_checksum_attempt! do
raise 'an error'
end
expect(subject.reload.verification_failed?).to be_truthy
expect(subject.verification_retry_count).to eq(2)
end
end
end
expect(subject.reload.verification_failed?).to be_truthy
context 'when the yielded block returns nil' do
context 'when the record was pending' do
it 'sets verification_failed and sets verification_retry_count to 1' do
subject.track_checksum_attempt! { nil }
expect(subject.reload.verification_failed?).to be_truthy
expect(subject.verification_retry_count).to eq(1)
end
end
context 'when the record was failed' do
it 'sets verification_failed and increments verification_retry_count' do
subject.verification_failed_with_message!('foo')
subject.track_checksum_attempt! { nil }
expect(subject.reload.verification_failed?).to be_truthy
expect(subject.verification_retry_count).to eq(2)
end
end
end
end
......
......@@ -42,28 +42,42 @@ RSpec.shared_examples 'a Geo verifiable registry' do
subject.verification_failed_with_message!('foo')
end
it 'returns IDs of rows which are synced and failed verification' do
expect(described_class.verification_failed_batch(batch_size: 4)).to match_array([subject.model_record_id])
end
context 'with a failed record with retry due' do
before do
subject.update!(verification_retry_at: 1.minute.ago)
end
it 'excludes rows which are not synced or have not failed verification' do
# rubocop:disable Rails/SaveBang
create(registry_class_factory, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo')
create(registry_class_factory, :started, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo')
create(registry_class_factory, :failed, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo')
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_pending))
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_started))
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_succeeded), verification_checksum: 'abc123')
# rubocop:enable Rails/SaveBang
it 'returns IDs of rows which are synced and have failed verification' do
expect(described_class.verification_failed_batch(batch_size: 4)).to match_array([subject.model_record_id])
end
it 'excludes rows which are not synced or have not failed verification' do
# rubocop:disable Rails/SaveBang
create(registry_class_factory, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo')
create(registry_class_factory, :started, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo')
create(registry_class_factory, :failed, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo')
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_pending))
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_started))
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_succeeded), verification_checksum: 'abc123')
# rubocop:enable Rails/SaveBang
expect(described_class.verification_failed_batch(batch_size: 4)).to match_array([subject.model_record_id])
end
expect(described_class.verification_failed_batch(batch_size: 4)).to match_array([subject.model_record_id])
it 'marks verification as started' do
described_class.verification_failed_batch(batch_size: 4)
expect(subject.reload.verification_started?).to be_truthy
expect(subject.verification_started_at).to be_present
end
end
it 'marks verification as started' do
described_class.verification_failed_batch(batch_size: 4)
context 'when verification_retry_at is in the future' do
it 'does not return the row which failed verification' do
subject.update!(verification_retry_at: 1.minute.from_now)
expect(subject.reload.verification_started?).to be_truthy
expect(subject.verification_started_at).to be_present
expect(subject.class.verification_failed_batch(batch_size: 4)).not_to include(subject.model_record_id)
end
end
end
......@@ -72,12 +86,22 @@ RSpec.shared_examples 'a Geo verifiable registry' do
subject.save!
end
it 'returns the number of rows which are synced and (pending or failed) verification' do
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo') # rubocop:disable Rails/SaveBang
it 'returns the number of rows which are synced and pending verification' do
expect(described_class.needs_verification_count(limit: 3)).to eq(1)
end
it 'includes rows which are synced and failed verification and are due for retry' do
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo', verification_retry_at: 1.minute.ago) # rubocop:disable Rails/SaveBang
expect(described_class.needs_verification_count(limit: 3)).to eq(2)
end
it 'excludes rows which are synced and failed verification and have a future retry time' do
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo', verification_retry_at: 1.minute.from_now) # rubocop:disable Rails/SaveBang
expect(described_class.needs_verification_count(limit: 3)).to eq(1)
end
it 'excludes rows which are not synced or are not (pending or failed) verification' do
# rubocop:disable Rails/SaveBang
create(registry_class_factory, verification_state: verification_state_value(:verification_pending))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment