Commit 2e69cdb1 authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch 'mk/recover-stuck-started-rows' into 'master'

Geo: Recover stuck verification started rows

See merge request gitlab-org/gitlab!48006
parents 84b86a3b c0ed0a67
...@@ -11,7 +11,7 @@ module Geo ...@@ -11,7 +11,7 @@ module Geo
class_methods do class_methods do
extend Gitlab::Utils::Override extend Gitlab::Utils::Override
delegate :verification_pending_batch, :verification_failed_batch, :needs_verification_count, to: :verification_query_class delegate :verification_pending_batch, :verification_failed_batch, :needs_verification_count, :fail_verification_timeouts, to: :verification_query_class
# If replication is disabled, then so is verification. # If replication is disabled, then so is verification.
override :verification_enabled? override :verification_enabled?
...@@ -37,6 +37,8 @@ module Geo ...@@ -37,6 +37,8 @@ module Geo
return false unless verification_enabled? return false unless verification_enabled?
::Geo::VerificationBatchWorker.perform_with_capacity(replicable_name) ::Geo::VerificationBatchWorker.perform_with_capacity(replicable_name)
::Geo::VerificationTimeoutWorker.perform_async(replicable_name)
end end
# Called by VerificationBatchWorker. # Called by VerificationBatchWorker.
......
...@@ -531,6 +531,14 @@ ...@@ -531,6 +531,14 @@
:weight: 1 :weight: 1
:idempotent: true :idempotent: true
:tags: [] :tags: []
- :name: geo:geo_verification_timeout
:feature_category: :geo_replication
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: personal_access_tokens:personal_access_tokens_groups_policy - :name: personal_access_tokens:personal_access_tokens_groups_policy
:feature_category: :authentication_and_authorization :feature_category: :authentication_and_authorization
:has_external_dependencies: :has_external_dependencies:
......
# frozen_string_literal: true
module Geo
# Fail verification for records which started verification a long time ago
class VerificationTimeoutWorker
include ApplicationWorker
include GeoQueue
include ::Gitlab::Geo::LogHelpers
idempotent!
sidekiq_options retry: false, dead: false
loggable_arguments 0
def perform(replicable_name)
replicator_class_for(replicable_name).fail_verification_timeouts
end
def replicator_class_for(replicable_name)
@replicator_class ||= ::Gitlab::Geo::Replicator.for_replicable_name(replicable_name)
end
end
end
...@@ -13,6 +13,7 @@ module Gitlab ...@@ -13,6 +13,7 @@ module Gitlab
extend ActiveSupport::Concern extend ActiveSupport::Concern
include ::ShaAttribute include ::ShaAttribute
include Delay include Delay
include EachBatch
include Gitlab::Geo::LogHelpers include Gitlab::Geo::LogHelpers
VERIFICATION_STATE_VALUES = { VERIFICATION_STATE_VALUES = {
...@@ -91,6 +92,8 @@ module Gitlab ...@@ -91,6 +92,8 @@ module Gitlab
end end
class_methods do class_methods do
include Delay
def verification_state_value(state_string) def verification_state_value(state_string)
VERIFICATION_STATE_VALUES[state_string] VERIFICATION_STATE_VALUES[state_string]
end end
...@@ -156,6 +159,22 @@ module Gitlab ...@@ -156,6 +159,22 @@ module Gitlab
RETURNING #{self.primary_key} RETURNING #{self.primary_key}
SQL SQL
end end
# Fail verification for records which started verification a long time ago
def fail_verification_timeouts
attrs = {
verification_state: verification_state_value(:verification_failed),
verification_failure: "Verification timed out after #{VERIFICATION_TIMEOUT}",
verification_checksum: nil,
verification_retry_count: 1,
verification_retry_at: next_retry_time(1),
verified_at: Time.current
}
verification_timed_out.each_batch do |relation|
relation.update_all(attrs)
end
end
end end
# Convenience method to update checksum and transition to success state. # Convenience method to update checksum and transition to success state.
......
...@@ -131,6 +131,32 @@ RSpec.describe Gitlab::Geo::VerificationState do ...@@ -131,6 +131,32 @@ RSpec.describe Gitlab::Geo::VerificationState do
end end
end end
describe '.fail_verification_timeouts' do
before do
subject.verification_started!
end
context 'when verification has not timed out for a record' do
it 'does not update verification state' do
subject.update!(verification_started_at: (described_class::VERIFICATION_TIMEOUT - 1.minute).ago)
DummyModel.fail_verification_timeouts
expect(subject.reload.verification_started?).to be_truthy
end
end
context 'when verification has timed out for a record' do
it 'sets verification state to failed' do
subject.update!(verification_started_at: (described_class::VERIFICATION_TIMEOUT + 1.minute).ago)
DummyModel.fail_verification_timeouts
expect(subject.reload.verification_failed?).to be_truthy
end
end
end
describe '#verification_succeeded_with_checksum!' do describe '#verification_succeeded_with_checksum!' do
before do before do
subject.verification_started! subject.verification_started!
......
...@@ -131,6 +131,12 @@ RSpec.shared_examples 'a verifiable replicator' do ...@@ -131,6 +131,12 @@ RSpec.shared_examples 'a verifiable replicator' do
described_class.trigger_background_verification described_class.trigger_background_verification
end end
it 'enqueues VerificationTimeoutWorker' do
expect(::Geo::VerificationTimeoutWorker).to receive(:perform_async).with(described_class.replicable_name)
described_class.trigger_background_verification
end
end end
context 'when verification is disabled' do context 'when verification is disabled' do
...@@ -143,6 +149,12 @@ RSpec.shared_examples 'a verifiable replicator' do ...@@ -143,6 +149,12 @@ RSpec.shared_examples 'a verifiable replicator' do
described_class.trigger_background_verification described_class.trigger_background_verification
end end
it 'does not enqueue VerificationTimeoutWorker' do
expect(::Geo::VerificationTimeoutWorker).not_to receive(:perform_async)
described_class.trigger_background_verification
end
end end
end end
...@@ -265,6 +277,26 @@ RSpec.shared_examples 'a verifiable replicator' do ...@@ -265,6 +277,26 @@ RSpec.shared_examples 'a verifiable replicator' do
end end
end end
describe '.fail_verification_timeouts' do
context 'when current node is a primary' do
it 'delegates to the model class of the replicator' do
expect(described_class.model).to receive(:fail_verification_timeouts)
described_class.fail_verification_timeouts
end
end
context 'when current node is a secondary' do
it 'delegates to the registry class of the replicator' do
stub_current_geo_node(secondary)
expect(described_class.registry_class).to receive(:fail_verification_timeouts)
described_class.fail_verification_timeouts
end
end
end
describe '#after_verifiable_update' do describe '#after_verifiable_update' do
it 'calls verify_async if needed' do it 'calls verify_async if needed' do
expect(replicator).to receive(:verify_async) expect(replicator).to receive(:verify_async)
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Geo::VerificationTimeoutWorker, :geo do
let(:replicable_name) { 'widget' }
let(:replicator_class) { double('widget_replicator_class') }
it 'uses a Geo queue' do
expect(described_class.new.sidekiq_options_hash).to include(
'queue' => 'geo:geo_verification_timeout',
'queue_namespace' => :geo
)
end
describe '#perform' do
before do
allow(::Gitlab::Geo::Replicator).to receive(:for_replicable_name).with(replicable_name).and_return(replicator_class)
# This stub is not relevant to the test defined below. This stub is needed
# for another example defined in `include_examples 'an idempotent
# worker'`.
allow(replicator_class).to receive(:fail_verification_timeouts)
end
include_examples 'an idempotent worker' do
let(:job_args) { replicable_name }
it 'calls fail_verification_timeouts' do
expect(replicator_class).to receive(:fail_verification_timeouts).exactly(IdempotentWorkerHelper::WORKER_EXEC_TIMES).times
subject
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment