Commit bdd1f589 authored by Marius Bobin's avatar Marius Bobin

Validate remote checksum for archived job traces

Validate remote checksum for archived job traces
parent 10329c86
...@@ -51,6 +51,11 @@ module Ci ...@@ -51,6 +51,11 @@ module Ci
end end
end end
def remote_checksum_valid?
checksum.present? &&
checksum == remote_checksum
end
private private
def backoff def backoff
......
---
name: ci_archived_build_trace_checksum
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/70072
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/340737
milestone: '14.4'
type: development
group: group::pipeline execution
default_enabled: false
...@@ -7,9 +7,10 @@ module Gitlab ...@@ -7,9 +7,10 @@ module Gitlab
include ::Gitlab::Utils::StrongMemoize include ::Gitlab::Utils::StrongMemoize
include Checksummable include Checksummable
def initialize(job, trace_metadata) def initialize(job, trace_metadata, metrics = ::Gitlab::Ci::Trace::Metrics.new)
@job = job @job = job
@trace_metadata = trace_metadata @trace_metadata = trace_metadata
@metrics = metrics
end end
def execute!(stream) def execute!(stream)
...@@ -18,15 +19,18 @@ module Gitlab ...@@ -18,15 +19,18 @@ module Gitlab
sha256_checksum = self.class.sha256_hexdigest(clone_path) sha256_checksum = self.class.sha256_hexdigest(clone_path)
job.transaction do job.transaction do
trace_artifact = create_build_trace!(clone_path, sha256_checksum) self.trace_artifact = create_build_trace!(clone_path, sha256_checksum)
trace_metadata.track_archival!(trace_artifact.id, md5_checksum) trace_metadata.track_archival!(trace_artifact.id, md5_checksum)
end end
end end
validate_archived_trace
end end
private private
attr_reader :job, :trace_metadata attr_reader :job, :trace_metadata, :metrics
attr_accessor :trace_artifact
def clone_file!(src_stream, temp_dir) def clone_file!(src_stream, temp_dir)
FileUtils.mkdir_p(temp_dir) FileUtils.mkdir_p(temp_dir)
...@@ -51,6 +55,22 @@ module Gitlab ...@@ -51,6 +55,22 @@ module Gitlab
file_sha256: file_sha256) file_sha256: file_sha256)
end end
end end
def validate_archived_trace
return unless remote_checksum
trace_metadata.update!(remote_checksum: remote_checksum)
unless trace_metadata.remote_checksum_valid?
metrics.increment_error_counter(type: :archive_invalid_checksum)
end
end
def remote_checksum
strong_memoize(:remote_checksum) do
::Gitlab::Ci::Trace::RemoteChecksum.new(trace_artifact).md5_checksum
end
end
end end
end end
end end
......
# frozen_string_literal: true
module Gitlab
module Ci
class Trace
##
# RemoteChecksum class is responsible for fetching the MD5 checksum of
# an uploaded build trace.
#
class RemoteChecksum
include Gitlab::Utils::StrongMemoize
def initialize(trace_artifact)
@trace_artifact = trace_artifact
end
def md5_checksum
strong_memoize(:md5_checksum) do
fetch_md5_checksum
end
end
private
attr_reader :trace_artifact
delegate :aws?, :google?, to: :object_store_config, prefix: :provider
def fetch_md5_checksum
return unless Feature.enabled?(:ci_archived_build_trace_checksum, trace_artifact.project, default_enabled: :yaml)
return unless object_store_config.enabled?
return if trace_artifact.local_store?
remote_checksum_value
end
def remote_checksum_value
strong_memoize(:remote_checksum_value) do
if provider_google?
checksum_from_google
elsif provider_aws?
checksum_from_aws
end
end
end
def object_store_config
strong_memoize(:object_store_config) do
trace_artifact.file.class.object_store_config
end
end
def checksum_from_google
Base64.decode64(upload_attributes[:content_md5].to_s).unpack1('H*')
end
def checksum_from_aws
upload_attributes[:etag]
end
def upload_attributes
strong_memoize(:upload_attributes) do
trace_artifact.file.file.attributes
end
end
end
end
end
end
...@@ -4,13 +4,15 @@ require 'spec_helper' ...@@ -4,13 +4,15 @@ require 'spec_helper'
RSpec.describe Gitlab::Ci::Trace::Archive do RSpec.describe Gitlab::Ci::Trace::Archive do
let_it_be(:job) { create(:ci_build, :success, :trace_live) } let_it_be(:job) { create(:ci_build, :success, :trace_live) }
let_it_be(:trace_metadata) { create(:ci_build_trace_metadata, build: job) } let_it_be_with_reload(:trace_metadata) { create(:ci_build_trace_metadata, build: job) }
let_it_be(:src_checksum) do let_it_be(:src_checksum) do
job.trace.read { |stream| Digest::MD5.hexdigest(stream.raw) } job.trace.read { |stream| Digest::MD5.hexdigest(stream.raw) }
end end
let(:metrics) { spy('metrics') }
describe '#execute' do describe '#execute' do
subject { described_class.new(job, trace_metadata) } subject { described_class.new(job, trace_metadata, metrics) }
it 'computes and assigns checksum' do it 'computes and assigns checksum' do
Gitlab::Ci::Trace::ChunkedIO.new(job) do |stream| Gitlab::Ci::Trace::ChunkedIO.new(job) do |stream|
...@@ -20,5 +22,80 @@ RSpec.describe Gitlab::Ci::Trace::Archive do ...@@ -20,5 +22,80 @@ RSpec.describe Gitlab::Ci::Trace::Archive do
expect(trace_metadata.checksum).to eq(src_checksum) expect(trace_metadata.checksum).to eq(src_checksum)
expect(trace_metadata.trace_artifact).to eq(job.job_artifacts_trace) expect(trace_metadata.trace_artifact).to eq(job.job_artifacts_trace)
end end
context 'validating artifact checksum' do
let(:trace) { 'abc' }
let(:stream) { StringIO.new(trace, 'rb') }
let(:src_checksum) { Digest::MD5.hexdigest(trace) }
context 'when the object store is disabled' do
before do
stub_artifacts_object_storage(enabled: false)
end
it 'skips validation' do
subject.execute!(stream)
expect(trace_metadata.checksum).to eq(src_checksum)
expect(trace_metadata.remote_checksum).to be_nil
expect(metrics)
.not_to have_received(:increment_error_counter)
.with(type: :archive_invalid_checksum)
end
end
context 'with background_upload enabled' do
before do
stub_artifacts_object_storage(background_upload: true)
end
it 'skips validation' do
subject.execute!(stream)
expect(trace_metadata.checksum).to eq(src_checksum)
expect(trace_metadata.remote_checksum).to be_nil
expect(metrics)
.not_to have_received(:increment_error_counter)
.with(type: :archive_invalid_checksum)
end
end
context 'with direct_upload enabled' do
before do
stub_artifacts_object_storage(direct_upload: true)
end
it 'validates the archived trace' do
subject.execute!(stream)
expect(trace_metadata.checksum).to eq(src_checksum)
expect(trace_metadata.remote_checksum).to eq(src_checksum)
expect(metrics)
.not_to have_received(:increment_error_counter)
.with(type: :archive_invalid_checksum)
end
context 'when the checksum does not match' do
let(:invalid_remote_checksum) { SecureRandom.hex }
before do
expect(::Gitlab::Ci::Trace::RemoteChecksum)
.to receive(:new)
.with(an_instance_of(Ci::JobArtifact))
.and_return(double(md5_checksum: invalid_remote_checksum))
end
it 'validates the archived trace' do
subject.execute!(stream)
expect(trace_metadata.checksum).to eq(src_checksum)
expect(trace_metadata.remote_checksum).to eq(invalid_remote_checksum)
expect(metrics)
.to have_received(:increment_error_counter)
.with(type: :archive_invalid_checksum)
end
end
end
end
end end
end end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Ci::Trace::RemoteChecksum do
let_it_be(:job) { create(:ci_build, :success) }
let(:file_store) { JobArtifactUploader::Store::LOCAL }
let(:trace_artifact) { create(:ci_job_artifact, :trace, job: job, file_store: file_store) }
let(:checksum) { Digest::MD5.hexdigest(trace_artifact.file.read) }
let(:base64checksum) { Digest::MD5.base64digest(trace_artifact.file.read) }
let(:fetcher) { described_class.new(trace_artifact) }
describe '#md5_checksum' do
subject { fetcher.md5_checksum }
context 'when the file is stored locally' do
it { is_expected.to be_nil }
end
context 'when object store is enabled' do
before do
stub_artifacts_object_storage
end
context 'with local files' do
it { is_expected.to be_nil }
end
context 'with remote files' do
let(:file_store) { JobArtifactUploader::Store::REMOTE }
context 'when the feature flag is disabled' do
before do
stub_feature_flags(ci_archived_build_trace_checksum: false)
end
it { is_expected.to be_nil }
end
context 'with AWS as provider' do
it { is_expected.to eq(checksum) }
end
context 'with Google as provider' do
let(:metadata) {{ content_md5: base64checksum }}
before do
expect(fetcher).to receive(:provider_google?) { true }
expect(fetcher).not_to receive(:provider_aws?) { false }
allow(trace_artifact.file.file)
.to receive(:attributes)
.and_return(metadata)
end
it { is_expected.to eq(checksum) }
end
context 'with unsupported providers' do
let(:file_store) { JobArtifactUploader::Store::REMOTE }
before do
expect(fetcher).to receive(:provider_aws?) { false }
expect(fetcher).to receive(:provider_google?) { false }
end
it { is_expected.to be_nil }
end
end
end
end
end
...@@ -133,4 +133,29 @@ RSpec.describe Ci::BuildTraceMetadata do ...@@ -133,4 +133,29 @@ RSpec.describe Ci::BuildTraceMetadata do
end end
end end
end end
describe '#remote_checksum_valid?' do
using RSpec::Parameterized::TableSyntax
let(:metadata) do
build(:ci_build_trace_metadata,
checksum: checksum,
remote_checksum: remote_checksum)
end
subject { metadata.remote_checksum_valid? }
where(:checksum, :remote_checksum, :result) do
nil | nil | false
nil | 'a' | false
'a' | nil | false
'a' | 'b' | false
'b' | 'a' | false
'a' | 'a' | true
end
with_them do
it { is_expected.to eq(result) }
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment