Commit 8d2faa1b authored by David Fernandez's avatar David Fernandez

Improve observability of the Enqueuer worker

All guards have now logs so that when it is triggered we get the reason
in the logs.
Update the error handling to avoid aborting container repository that
has nothing to do with the error
parent 2948fd16
......@@ -14,48 +14,52 @@ module ContainerRegistry
idempotent!
def perform
return unless migration.enabled?
return unless below_capacity?
return unless waiting_time_passed?
return unless runnable?
re_enqueue_if_capacity if handle_aborted_migration || handle_next_migration
rescue StandardError => e
Gitlab::ErrorTracking.log_exception(
e,
next_repository_id: next_repository&.id,
next_aborted_repository_id: next_aborted_repository&.id
)
next_repository&.abort_import
end
private
def handle_aborted_migration
return unless next_aborted_repository&.retry_aborted_migration
return unless next_aborted_repository
log_extra_metadata_on_done(:container_repository_id, next_aborted_repository.id)
log_extra_metadata_on_done(:import_type, 'retry')
log_repository(next_aborted_repository)
next_aborted_repository.retry_aborted_migration
true
rescue StandardError => e
Gitlab::ErrorTracking.log_exception(e, next_aborted_repository_id: next_aborted_repository&.id)
true
end
def handle_next_migration
return unless next_repository
log_extra_metadata_on_done(:import_type, 'next')
log_repository(next_repository)
# We return true because the repository was successfully processed (migration_state is changed)
return true if tag_count_too_high?
return unless next_repository.start_pre_import
log_extra_metadata_on_done(:container_repository_id, next_repository.id)
log_extra_metadata_on_done(:import_type, 'next')
true
rescue StandardError => e
Gitlab::ErrorTracking.log_exception(e, next_repository_id: next_repository&.id)
next_repository&.abort_import
false
end
def tag_count_too_high?
return false unless next_repository.tags_count > migration.max_tags_count
next_repository.skip_import(reason: :too_many_tags)
log_extra_metadata_on_done(:tags_count_too_high, true)
log_extra_metadata_on_done(:max_tags_count_setting, migration.max_tags_count)
true
end
......@@ -72,6 +76,29 @@ module ContainerRegistry
last_step_completed_repository.last_import_step_done_at < Time.zone.now - delay
end
def runnable?
unless migration.enabled?
log_extra_metadata_on_done(:migration_enabled, false)
return false
end
unless below_capacity?
log_extra_metadata_on_done(:max_capacity_setting, maximum_capacity)
log_extra_metadata_on_done(:below_capacity, false)
return false
end
unless waiting_time_passed?
log_extra_metadata_on_done(:waiting_time_passed, false)
log_extra_metadata_on_done(:current_waiting_time_setting, migration.enqueue_waiting_time)
return false
end
true
end
def current_capacity
strong_memoize(:current_capacity) do
ContainerRepository.with_migration_states(
......@@ -111,6 +138,11 @@ module ContainerRegistry
self.class.perform_async
end
def log_repository(repository)
log_extra_metadata_on_done(:container_repository_id, repository&.id)
log_extra_metadata_on_done(:container_repository_path, repository&.path)
end
end
end
end
......@@ -31,7 +31,7 @@ RSpec.describe ContainerRegistry::Migration::EnqueuerWorker, :aggregate_failures
end
it 're-enqueues the worker' do
expect(ContainerRegistry::Migration::EnqueuerWorker).to receive(:perform_async)
expect(described_class).to receive(:perform_async)
subject
end
......@@ -43,7 +43,7 @@ RSpec.describe ContainerRegistry::Migration::EnqueuerWorker, :aggregate_failures
end
it 'does not re-enqueue the worker' do
expect(ContainerRegistry::Migration::EnqueuerWorker).not_to receive(:perform_async)
expect(described_class).not_to receive(:perform_async)
subject
end
......@@ -59,10 +59,11 @@ RSpec.describe ContainerRegistry::Migration::EnqueuerWorker, :aggregate_failures
next_qualified_repository
end
expect(worker).to receive(:log_extra_metadata_on_done)
.with(:container_repository_id, container_repository.id)
expect(worker).to receive(:log_extra_metadata_on_done)
.with(:import_type, 'next')
expect_log_extra_metadata(
import_type: 'next',
container_repository_id: container_repository.id,
container_repository_path: container_repository.path
)
subject
......@@ -77,7 +78,11 @@ RSpec.describe ContainerRegistry::Migration::EnqueuerWorker, :aggregate_failures
allow(ContainerRegistry::Migration).to receive(:enabled?).and_return(false)
end
it_behaves_like 'no action'
it_behaves_like 'no action' do
before do
expect_log_extra_metadata(migration_enabled: false)
end
end
end
context 'above capacity' do
......@@ -87,7 +92,11 @@ RSpec.describe ContainerRegistry::Migration::EnqueuerWorker, :aggregate_failures
allow(ContainerRegistry::Migration).to receive(:capacity).and_return(1)
end
it_behaves_like 'no action'
it_behaves_like 'no action' do
before do
expect_log_extra_metadata(below_capacity: false, max_capacity_setting: 1)
end
end
it 'does not re-enqueue the worker' do
expect(ContainerRegistry::Migration::EnqueuerWorker).not_to receive(:perform_async)
......@@ -102,7 +111,11 @@ RSpec.describe ContainerRegistry::Migration::EnqueuerWorker, :aggregate_failures
allow(ContainerRegistry::Migration).to receive(:enqueue_waiting_time).and_return(1.hour)
end
it_behaves_like 'no action'
it_behaves_like 'no action' do
before do
expect_log_extra_metadata(waiting_time_passed: false, current_waiting_time_setting: 1.hour)
end
end
end
context 'when an aborted import is available' do
......@@ -117,10 +130,11 @@ RSpec.describe ContainerRegistry::Migration::EnqueuerWorker, :aggregate_failures
next_aborted_repository
end
expect(worker).to receive(:log_extra_metadata_on_done)
.with(:container_repository_id, aborted_repository.id)
expect(worker).to receive(:log_extra_metadata_on_done)
.with(:import_type, 'retry')
expect_log_extra_metadata(
import_type: 'retry',
container_repository_id: aborted_repository.id,
container_repository_path: aborted_repository.path
)
subject
......@@ -129,6 +143,28 @@ RSpec.describe ContainerRegistry::Migration::EnqueuerWorker, :aggregate_failures
end
it_behaves_like 're-enqueuing based on capacity'
context 'when an error occurs' do
it 'does not abort that migration' do
method = worker.method(:next_aborted_repository)
allow(worker).to receive(:next_aborted_repository) do
next_aborted_repository = method.call
allow(next_aborted_repository).to receive(:retry_aborted_migration).and_raise(StandardError)
next_aborted_repository
end
expect_log_extra_metadata(
import_type: 'retry',
container_repository_id: aborted_repository.id,
container_repository_path: aborted_repository.path
)
subject
expect(aborted_repository.reload).to be_import_aborted
expect(container_repository.reload).to be_default
end
end
end
context 'when no repository qualifies' do
......@@ -147,6 +183,14 @@ RSpec.describe ContainerRegistry::Migration::EnqueuerWorker, :aggregate_failures
end
it 'skips the repository' do
expect_log_extra_metadata(
import_type: 'next',
container_repository_id: container_repository.id,
container_repository_path: container_repository.path,
tags_count_too_high: true,
max_tags_count_setting: 2
)
subject
expect(container_repository.reload).to be_import_skipped
......@@ -163,10 +207,15 @@ RSpec.describe ContainerRegistry::Migration::EnqueuerWorker, :aggregate_failures
end
it 'aborts the import' do
expect_log_extra_metadata(
import_type: 'next',
container_repository_id: container_repository.id,
container_repository_path: container_repository.path
)
expect(Gitlab::ErrorTracking).to receive(:log_exception).with(
instance_of(StandardError),
next_repository_id: container_repository.id,
next_aborted_repository_id: nil
next_repository_id: container_repository.id
)
subject
......@@ -174,5 +223,11 @@ RSpec.describe ContainerRegistry::Migration::EnqueuerWorker, :aggregate_failures
expect(container_repository.reload).to be_import_aborted
end
end
def expect_log_extra_metadata(metadata)
metadata.each do |key, value|
expect(worker).to receive(:log_extra_metadata_on_done).with(key, value)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment