Commit c6e31a8f authored by Marius Bobin's avatar Marius Bobin Committed by Michael Kozono

Resolve Geo: Secondaries are orphaning artifact files

parent 2bc97e0a
......@@ -10,6 +10,9 @@ module EE
extend ActiveSupport::Concern
prepended do
# After destroy callbacks are often skipped because of FastDestroyAll.
# All destroy callbacks should be implemented in `Ci::JobArtifacts::DestroyBatchService`
# See https://gitlab.com/gitlab-org/gitlab/-/issues/297472
after_destroy :log_geo_deleted_event
SECURITY_REPORT_FILE_TYPES = %w[sast secret_detection dependency_scanning container_scanning dast coverage_fuzzing api_fuzzing].freeze
......
......@@ -4,6 +4,7 @@ module Geo
class JobArtifactDeletedEvent < ApplicationRecord
include Geo::Model
include Geo::Eventable
include BulkInsertSafe
belongs_to :job_artifact, class_name: 'Ci::JobArtifact'
......
......@@ -11,6 +11,7 @@ module EE
override :destroy_related_records
def destroy_related_records(artifacts)
destroy_security_findings(artifacts)
insert_geo_event_records(artifacts)
end
def destroy_security_findings(artifacts)
......@@ -18,6 +19,10 @@ module EE
::Security::Finding.by_build_ids(job_ids).delete_all
end
def insert_geo_event_records(artifacts)
::Geo::JobArtifactDeletedEventStore.bulk_create(artifacts)
end
end
end
end
......
......@@ -23,6 +23,13 @@ module Geo
class << self
attr_accessor :event_type
def can_create_event?
return false unless Gitlab::Geo.primary?
return false unless Gitlab::Geo.secondary_nodes.any? # no need to create an event if no one is listening
true
end
end
attr_reader :project, :params
......@@ -33,8 +40,7 @@ module Geo
end
def create!
return unless Gitlab::Geo.primary?
return unless Gitlab::Geo.secondary_nodes.any? # no need to create an event if no one is listening
return unless self.class.can_create_event?
event = build_event
event.validate!
......
......@@ -8,10 +8,36 @@ module Geo
attr_reader :job_artifact
def self.bulk_create(artifacts)
return unless can_create_event?
events = artifacts
.map { |artifact| new(artifact).build_valid_event }
.compact
return if events.empty?
Geo::EventLog.transaction do
ids = JobArtifactDeletedEvent.bulk_insert!(events, validate: false, returns: :ids)
ids.map! { |id| { "#{event_type}_id" => id, created_at: Time.current } }
Geo::EventLog.insert_all!(ids)
end
end
def initialize(job_artifact)
@job_artifact = job_artifact
end
def build_valid_event
event = build_event
event.validate!
event
rescue ActiveRecord::RecordInvalid, NoMethodError => e
log_error("#{self.class.event_type.to_s.humanize} could not be created", e)
# This return value is used in the bulk_insert method call
nil
end
private
def build_event
......
---
title: 'Resolve Geo: Secondaries are orphaning artifact files'
merge_request: 60644
author:
type: fixed
......@@ -3,6 +3,8 @@
require 'spec_helper'
RSpec.describe Ci::JobArtifacts::DestroyBatchService do
include EE::GeoHelpers
describe '.execute' do
subject { service.execute }
......@@ -16,5 +18,17 @@ RSpec.describe Ci::JobArtifacts::DestroyBatchService do
expect { subject }.to change { Ci::JobArtifact.count }.by(-1)
.and change { Security::Finding.count }.from(1).to(0)
end
context 'with Geo replication' do
let_it_be(:primary) { create(:geo_node, :primary) }
let_it_be(:secondary) { create(:geo_node) }
it 'creates a JobArtifactDeletedEvent' do
stub_current_geo_node(primary)
create(:ee_ci_job_artifact, :archive)
expect { subject }.to change { Geo::JobArtifactDeletedEvent.count }.by(1)
end
end
end
end
......@@ -6,12 +6,25 @@ RSpec.describe Geo::JobArtifactDeletedEventStore do
include EE::GeoHelpers
let_it_be(:secondary_node) { create(:geo_node) }
let(:job_artifact) { create(:ci_job_artifact, :archive) }
subject { described_class.new(job_artifact) }
let_it_be(:job_artifact) { create(:ci_job_artifact, :archive) }
let_it_be(:invalid_job_artifact) { create(:ci_job_artifact) }
let_it_be(:project) { invalid_job_artifact.project }
let_it_be(:expected_error_message) do
{
class: "Geo::JobArtifactDeletedEventStore",
host: "localhost",
job_artifact_id: invalid_job_artifact.id,
project_id: project.id,
project_path: project.full_path,
storage_version: project.storage_version,
message: "Job artifact deleted event could not be created",
error: "Validation failed: File path can't be blank"
}
end
describe '#create!' do
subject { described_class.new(job_artifact) }
it_behaves_like 'a Geo event store', Geo::JobArtifactDeletedEvent do
let(:file_subject) { job_artifact }
end
......@@ -31,26 +44,86 @@ RSpec.describe Geo::JobArtifactDeletedEventStore do
end
it 'logs an error message when event creation fail' do
invalid_job_artifact = create(:ci_job_artifact)
project = invalid_job_artifact.project
subject = described_class.new(invalid_job_artifact)
expected_message = {
class: "Geo::JobArtifactDeletedEventStore",
host: "localhost",
job_artifact_id: invalid_job_artifact.id,
project_id: project.id,
project_path: project.full_path,
storage_version: project.storage_version,
message: "Job artifact deleted event could not be created",
error: "Validation failed: File path can't be blank"
}
expect(Gitlab::Geo::Logger).to receive(:error)
.with(expected_message).and_call_original
.with(expected_error_message).and_call_original
subject.create!
end
end
end
describe '.bulk_create' do
subject(:bulk_create) { described_class.bulk_create([job_artifact]) }
context 'when running on a secondary node' do
before do
stub_secondary_node
end
it 'does not create an event' do
expect { bulk_create }.not_to change(Geo::JobArtifactDeletedEvent, :count)
end
end
context 'when running on a primary node' do
before do
stub_primary_node
end
it 'does not create an event if there are no secondary nodes' do
allow(Gitlab::Geo).to receive(:secondary_nodes) { [] }
expect { bulk_create }.not_to change(Geo::JobArtifactDeletedEvent, :count)
end
it 'creates an event' do
expect { bulk_create }.to change(Geo::JobArtifactDeletedEvent, :count).by(1)
end
context 'when file subject is not on local store' do
before do
allow(job_artifact).to receive(:local?).and_return(false)
end
it 'creates an event' do
expect { bulk_create }.to change(Geo::JobArtifactDeletedEvent, :count).by(1)
end
end
it 'tracks artifact attributes' do
bulk_create
event = Geo::JobArtifactDeletedEvent.last
expect(event).to have_attributes(
job_artifact_id: job_artifact.id,
file_path: match(%r{\A\h+/\h+/\h+/[\d_]+/\d+/\d+/ci_build_artifacts.zip\z})
)
expect(event.geo_event_log).to be_present
end
it 'logs an error message when event creation fail' do
expect(Gitlab::Geo::Logger).to receive(:error)
.with(expected_error_message).and_call_original
described_class.bulk_create([invalid_job_artifact])
end
it 'inserts valid artifacts and logs errors for invalid ones' do
expect(Gitlab::Geo::Logger).to receive(:error)
.with(expected_error_message).and_call_original
expect { described_class.bulk_create([invalid_job_artifact, job_artifact]) }
.to change { Geo::JobArtifactDeletedEvent.count }.by(1)
expect(Geo::JobArtifactDeletedEvent.last).to have_attributes(
job_artifact_id: job_artifact.id,
file_path: match(%r{\A\h+/\h+/\h+/[\d_]+/\d+/\d+/ci_build_artifacts.zip\z})
)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment