Commit 2937425b authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch 'mk/fix-retry-time-of-verification-failures' into 'master'

Geo framework: Fix retry time of verification failures

See merge request gitlab-org/gitlab!53115
parents 7daba09e 789c7c6e
...@@ -35,7 +35,8 @@ module Gitlab ...@@ -35,7 +35,8 @@ module Gitlab
scope :checksummed, -> { where.not(verification_checksum: nil) } scope :checksummed, -> { where.not(verification_checksum: nil) }
scope :not_checksummed, -> { where(verification_checksum: nil) } scope :not_checksummed, -> { where(verification_checksum: nil) }
scope :verification_timed_out, -> { verification_started.where("verification_started_at < ?", VERIFICATION_TIMEOUT.ago) } scope :verification_timed_out, -> { verification_started.where("verification_started_at < ?", VERIFICATION_TIMEOUT.ago) }
scope :needs_verification, -> { with_verification_state(:verification_pending, :verification_failed) } scope :retry_due, -> { where(arel_table[:verification_retry_at].eq(nil).or(arel_table[:verification_retry_at].lt(Time.current))) }
scope :needs_verification, -> { with_verification_state(:verification_pending).or(with_verification_state(:verification_failed).retry_due) }
# rubocop:enable CodeReuse/ActiveRecord # rubocop:enable CodeReuse/ActiveRecord
state_machine :verification_state, initial: :verification_pending do state_machine :verification_state, initial: :verification_pending do
...@@ -128,7 +129,7 @@ module Gitlab ...@@ -128,7 +129,7 @@ module Gitlab
# Overridden by Geo::VerifiableRegistry # Overridden by Geo::VerifiableRegistry
def verification_failed_batch_relation(batch_size:) def verification_failed_batch_relation(batch_size:)
verification_failed.order(Gitlab::Database.nulls_first_order(:verification_retry_at)).limit(batch_size) # rubocop:disable CodeReuse/ActiveRecord verification_failed.retry_due.order(Gitlab::Database.nulls_first_order(:verification_retry_at)).limit(batch_size) # rubocop:disable CodeReuse/ActiveRecord
end end
# @return [Integer] number of records that need verification # @return [Integer] number of records that need verification
...@@ -237,6 +238,10 @@ module Gitlab ...@@ -237,6 +238,10 @@ module Gitlab
track_checksum_result!(checksum, calculation_started_at) track_checksum_result!(checksum, calculation_started_at)
rescue => e rescue => e
# Reset any potential changes from track_checksum_result, i.e.
# verification_retry_count may have been cleared.
reset
verification_failed_with_message!('Error during verification', e) verification_failed_with_message!('Error during verification', e)
end end
......
...@@ -90,44 +90,82 @@ RSpec.describe Gitlab::Geo::VerificationState do ...@@ -90,44 +90,82 @@ RSpec.describe Gitlab::Geo::VerificationState do
let(:other_failed_ids) { other_failed_records.map { |result| result['id'] } } let(:other_failed_ids) { other_failed_records.map { |result| result['id'] } }
before do before do
subject.verification_started! subject.verification_started
subject.verification_failed_with_message!('foo') subject.verification_failed_with_message!('foo')
end end
it 'returns IDs of rows pending verification' do context 'with a failed record with retry due' do
expect(subject.class.verification_failed_batch(batch_size: 3)).to include(subject.id) before do
end subject.update!(verification_retry_at: 1.minute.ago)
end
it 'marks verification as started' do it 'returns IDs of rows pending verification' do
subject.class.verification_failed_batch(batch_size: 3) expect(subject.class.verification_failed_batch(batch_size: 3)).to include(subject.id)
end
expect(subject.reload.verification_started?).to be_truthy it 'marks verification as started' do
expect(subject.verification_started_at).to be_present subject.class.verification_failed_batch(batch_size: 3)
expect(subject.reload.verification_started?).to be_truthy
expect(subject.verification_started_at).to be_present
end
it 'limits with batch_size and orders records by verification_retry_at with NULLs first' do
expected = other_failed_ids
# `match_array` instead of `eq` because the UPDATE query does not
# guarantee that results are returned in the same order as the subquery
# used to SELECT the correct batch.
expect(subject.class.verification_failed_batch(batch_size: 2)).to match_array(expected)
end
context 'other verification states' do
it 'does not include them' do
subject.verification_started!
expect(subject.class.verification_failed_batch(batch_size: 5)).not_to include(subject.id)
subject.verification_succeeded_with_checksum!('foo', Time.current)
expect(subject.class.verification_failed_batch(batch_size: 5)).not_to include(subject.id)
subject.verification_pending!
expect(subject.class.verification_failed_batch(batch_size: 5)).not_to include(subject.id)
end
end
end end
it 'limits with batch_size and orders records by verification_retry_at with NULLs first' do context 'when verification_retry_at is in the future' do
expected = other_failed_ids it 'does not return the row' do
subject.update!(verification_retry_at: 1.minute.from_now)
# `match_array` instead of `eq` because the UPDATE query does not expect(subject.class.verification_failed_batch(batch_size: 3)).not_to include(subject.id)
# guarantee that results are returned in the same order as the subquery end
# used to SELECT the correct batch.
expect(subject.class.verification_failed_batch(batch_size: 2)).to match_array(expected)
end end
end
context 'other verification states' do describe '.needs_verification' do
it 'does not include them' do it 'includes verification_pending' do
subject.verification_started! subject.save!
expect(subject.class.verification_failed_batch(batch_size: 5)).not_to include(subject.id) expect(subject.class.needs_verification).to include(subject)
end
subject.verification_succeeded_with_checksum!('foo', Time.current) it 'includes verification_failed and retry_due' do
subject.verification_started
subject.verification_failed_with_message!('foo')
subject.update!(verification_retry_at: 1.minute.ago)
expect(subject.class.verification_failed_batch(batch_size: 5)).not_to include(subject.id) expect(subject.class.needs_verification).to include(subject)
end
subject.verification_pending! it 'excludes verification_failed with future verification_retry_at' do
subject.verification_started
subject.verification_failed_with_message!('foo')
subject.update!(verification_retry_at: 1.minute.from_now)
expect(subject.class.verification_failed_batch(batch_size: 5)).not_to include(subject.id) expect(subject.class.needs_verification).not_to include(subject)
end
end end
end end
...@@ -157,7 +195,7 @@ RSpec.describe Gitlab::Geo::VerificationState do ...@@ -157,7 +195,7 @@ RSpec.describe Gitlab::Geo::VerificationState do
end end
end end
describe '#track_checksum_attempt!' do describe '#track_checksum_attempt!', :aggregate_failures do
context 'when verification was not yet started' do context 'when verification was not yet started' do
it 'starts verification' do it 'starts verification' do
expect do expect do
...@@ -196,12 +234,39 @@ RSpec.describe Gitlab::Geo::VerificationState do ...@@ -196,12 +234,39 @@ RSpec.describe Gitlab::Geo::VerificationState do
end end
context 'when an error occurs while yielding' do context 'when an error occurs while yielding' do
it 'sets verification_failed' do context 'when the record was failed' do
subject.track_checksum_attempt! do it 'sets verification_failed and increments verification_retry_count' do
raise 'an error' subject.verification_failed_with_message!('foo')
subject.track_checksum_attempt! do
raise 'an error'
end
expect(subject.reload.verification_failed?).to be_truthy
expect(subject.verification_retry_count).to eq(2)
end end
end
end
expect(subject.reload.verification_failed?).to be_truthy context 'when the yielded block returns nil' do
context 'when the record was pending' do
it 'sets verification_failed and sets verification_retry_count to 1' do
subject.track_checksum_attempt! { nil }
expect(subject.reload.verification_failed?).to be_truthy
expect(subject.verification_retry_count).to eq(1)
end
end
context 'when the record was failed' do
it 'sets verification_failed and increments verification_retry_count' do
subject.verification_failed_with_message!('foo')
subject.track_checksum_attempt! { nil }
expect(subject.reload.verification_failed?).to be_truthy
expect(subject.verification_retry_count).to eq(2)
end
end end
end end
end end
......
...@@ -42,28 +42,42 @@ RSpec.shared_examples 'a Geo verifiable registry' do ...@@ -42,28 +42,42 @@ RSpec.shared_examples 'a Geo verifiable registry' do
subject.verification_failed_with_message!('foo') subject.verification_failed_with_message!('foo')
end end
it 'returns IDs of rows which are synced and failed verification' do context 'with a failed record with retry due' do
expect(described_class.verification_failed_batch(batch_size: 4)).to match_array([subject.model_record_id]) before do
end subject.update!(verification_retry_at: 1.minute.ago)
end
it 'excludes rows which are not synced or have not failed verification' do it 'returns IDs of rows which are synced and have failed verification' do
# rubocop:disable Rails/SaveBang expect(described_class.verification_failed_batch(batch_size: 4)).to match_array([subject.model_record_id])
create(registry_class_factory, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo') end
create(registry_class_factory, :started, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo')
create(registry_class_factory, :failed, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo') it 'excludes rows which are not synced or have not failed verification' do
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_pending)) # rubocop:disable Rails/SaveBang
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_started)) create(registry_class_factory, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo')
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_succeeded), verification_checksum: 'abc123') create(registry_class_factory, :started, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo')
# rubocop:enable Rails/SaveBang create(registry_class_factory, :failed, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo')
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_pending))
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_started))
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_succeeded), verification_checksum: 'abc123')
# rubocop:enable Rails/SaveBang
expect(described_class.verification_failed_batch(batch_size: 4)).to match_array([subject.model_record_id])
end
expect(described_class.verification_failed_batch(batch_size: 4)).to match_array([subject.model_record_id]) it 'marks verification as started' do
described_class.verification_failed_batch(batch_size: 4)
expect(subject.reload.verification_started?).to be_truthy
expect(subject.verification_started_at).to be_present
end
end end
it 'marks verification as started' do context 'when verification_retry_at is in the future' do
described_class.verification_failed_batch(batch_size: 4) it 'does not return the row which failed verification' do
subject.update!(verification_retry_at: 1.minute.from_now)
expect(subject.reload.verification_started?).to be_truthy expect(subject.class.verification_failed_batch(batch_size: 4)).not_to include(subject.model_record_id)
expect(subject.verification_started_at).to be_present end
end end
end end
...@@ -72,12 +86,22 @@ RSpec.shared_examples 'a Geo verifiable registry' do ...@@ -72,12 +86,22 @@ RSpec.shared_examples 'a Geo verifiable registry' do
subject.save! subject.save!
end end
it 'returns the number of rows which are synced and (pending or failed) verification' do it 'returns the number of rows which are synced and pending verification' do
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo') # rubocop:disable Rails/SaveBang expect(described_class.needs_verification_count(limit: 3)).to eq(1)
end
it 'includes rows which are synced and failed verification and are due for retry' do
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo', verification_retry_at: 1.minute.ago) # rubocop:disable Rails/SaveBang
expect(described_class.needs_verification_count(limit: 3)).to eq(2) expect(described_class.needs_verification_count(limit: 3)).to eq(2)
end end
it 'excludes rows which are synced and failed verification and have a future retry time' do
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo', verification_retry_at: 1.minute.from_now) # rubocop:disable Rails/SaveBang
expect(described_class.needs_verification_count(limit: 3)).to eq(1)
end
it 'excludes rows which are not synced or are not (pending or failed) verification' do it 'excludes rows which are not synced or are not (pending or failed) verification' do
# rubocop:disable Rails/SaveBang # rubocop:disable Rails/SaveBang
create(registry_class_factory, verification_state: verification_state_value(:verification_pending)) create(registry_class_factory, verification_state: verification_state_value(:verification_pending))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment