Commit df1a8846 authored by Mehmet Emin INAC's avatar Mehmet Emin INAC

Implement new service classes to save scan & findings

These new service classes will save the entries of Security::Scan and
Security::Finding models grouped by same report kind to optimize memory
usage while marking the duplicated findings withing the same group.
parent 9f500a1a
......@@ -6,6 +6,7 @@ module EE
# This module is intended to encapsulate EE-specific model logic
# and be prepended in the `Ci::JobArtifact` model
module Ci::JobArtifact
include ::Gitlab::Utils::StrongMemoize
extend ActiveSupport::Concern
prepended do
......@@ -97,5 +98,29 @@ module EE
def log_geo_deleted_event
::Geo::JobArtifactDeletedEventStore.new(self).create!
end
# Ideally we would have a method to return an instance of
# parsed report regardless of the `file_type` but this will
# require more effort so we can have this security reports
# specific method here for now.
def security_report
strong_memoize(:security_report) do
next unless file_type.in?(SECURITY_REPORT_FILE_TYPES)
::Gitlab::Ci::Reports::Security::Report.new(file_type, nil, nil).tap do |report|
each_blob do |blob|
::Gitlab::Ci::Parsers.fabricate!(file_type).parse!(blob, report)
end
end
end
end
# This method is necessary to remove the reference to the
# security report object which allows GC to free the memory
# slots in vm_heap occupied for the report object and it's
# dependents.
def clear_security_report
clear_memoization(:security_report)
end
end
end
......@@ -20,5 +20,7 @@ module Security
enum severity: Vulnerabilities::Finding::SEVERITY_LEVELS, _prefix: :severity
validates :project_fingerprint, presence: true, length: { maximum: 40 }
scope :by_project_fingerprint, -> (fingerprints) { where(project_fingerprint: fingerprints) }
end
end
# frozen_string_literal: true
module Security
class StoreGroupedScansService < ::BaseService
include ::Gitlab::ExclusiveLeaseHelpers
LEASE_TTL = 30.minutes
LEASE_NAMESPACE = "store_grouped_scans"
def self.execute(artifacts)
new(artifacts).execute
end
def initialize(artifacts)
@artifacts = artifacts
@known_keys = Set.new
end
def execute
in_lock(lease_key, ttl: LEASE_TTL) do
sorted_artifacts.reduce(false) do |deduplicate, artifact|
store_scan_for(artifact, deduplicate)
end
end
end
private
attr_reader :artifacts, :known_keys
def lease_key
"#{LEASE_NAMESPACE}:#{pipeline_id}:#{report_type}"
end
def pipeline_id
artifacts.first&.job&.pipeline_id
end
def report_type
artifacts.first&.file_type
end
def sorted_artifacts
@sorted_artifacts ||= artifacts.sort_by { |artifact| [scanner_order_for(artifact), artifact.job.name] }
end
# This method returns the priority of scanners for dependency_scanning
# and `INFINITY` for all the other scan types. There is no problem with
# calling this method for all the scan types to get rid of branching.
def scanner_order_for(artifact)
MergeReportsService::ANALYZER_ORDER.fetch(artifact.security_report.primary_scanner&.external_id, Float::INFINITY)
end
def store_scan_for(artifact, deduplicate)
StoreScanService.execute(artifact, known_keys, deduplicate)
ensure
artifact.clear_security_report
end
end
end
# frozen_string_literal: true
# This service stores the `Security::Scan` and
# `Security::Finding` records for the given job artifact.
#
# @param artifact [Ci::JobArtifact] the artifact to create scan and finding records from.
# @param known_keys [Set] the set of known finding keys stored by previous invocations of this service class.
# @param deduplicate [Boolean] attribute to force running deduplication logic.
module Security
class StoreScanService < ::BaseService
def self.execute(artifact, known_keys, deduplicate)
new(artifact, known_keys, deduplicate).execute
end
def initialize(artifact, known_keys, deduplicate)
@artifact = artifact
@known_keys = known_keys
@deduplicate = deduplicate
end
def execute
StoreFindingsMetadataService.execute(security_scan, security_report)
deduplicate_findings? ? update_deduplicated_findings : register_finding_keys
deduplicate_findings?
end
private
attr_reader :artifact, :known_keys, :deduplicate
delegate :security_report, to: :artifact, private: true
def security_scan
@security_scan ||= Security::Scan.safe_find_or_create_by!(build: artifact.job, scan_type: artifact.file_type)
end
def deduplicate_findings?
deduplicate || security_scan.saved_changes?
end
def update_deduplicated_findings
ActiveRecord::Base.transaction do
security_scan.findings.update_all(deduplicated: false)
security_scan.findings
.by_project_fingerprint(deduplicated_project_fingerprints)
.update_all(deduplicated: true)
end
end
def deduplicated_project_fingerprints
register_finding_keys.map(&:project_fingerprint)
end
def register_finding_keys
@register_finding_keys ||= security_report.findings.select { |finding| register_keys(finding.keys) }
end
def register_keys(keys)
keys.map { |key| known_keys.add?(key) }.all?
end
end
end
......@@ -2,36 +2,34 @@
module Security
class StoreScansService
def initialize(build)
@build = build
def self.execute(pipeline)
new(pipeline).execute
end
def execute
return if canceled_or_skipped?
def initialize(pipeline)
@pipeline = pipeline
end
security_reports.each { |_, report| store_scan_for(report) }
def execute
grouped_report_artifacts.each { |artifacts| StoreGroupedScansService.execute(artifacts) }
end
private
attr_reader :build
attr_reader :pipeline
def canceled_or_skipped?
build.canceled? || build.skipped?
end
delegate :project, to: :pipeline, private: true
def security_reports
::Gitlab::Ci::Reports::Security::Reports.new(self).tap do |security_reports|
build.collect_security_reports!(security_reports)
end
def grouped_report_artifacts
pipeline.job_artifacts
.security_reports
.group_by(&:file_type)
.select { |file_type, _| parse_report_file?(file_type) }
.values
end
def store_scan_for(report)
ActiveRecord::Base.transaction do
security_scan = Security::Scan.safe_find_or_create_by!(build: build, scan_type: report.type)
StoreFindingsMetadataService.execute(security_scan, report)
end
def parse_report_file?(file_type)
project.feature_available?(Ci::Build::LICENSED_PARSER_FEATURES.fetch(file_type))
end
end
end
......@@ -87,6 +87,12 @@ module Gitlab
scanner.present? && primary_identifier.present? && location.present?
end
def keys
@keys ||= identifiers.map do |identifier|
FindingKey.new(location_fingerprint: location&.fingerprint, identifier_fingerprint: identifier.fingerprint)
end
end
protected
def primary_fingerprint
......
# frozen_string_literal: true
module Gitlab
module Ci
module Reports
module Security
class FindingKey
def initialize(location_fingerprint:, identifier_fingerprint:)
@location_fingerprint = location_fingerprint
@identifier_fingerprint = identifier_fingerprint
end
def ==(other)
location_fingerprint == other.location_fingerprint &&
identifier_fingerprint == other.identifier_fingerprint
end
def hash
location_fingerprint.hash ^ identifier_fingerprint.hash
end
alias_method :eql?, :==
protected
attr_reader :location_fingerprint, :identifier_fingerprint
end
end
end
end
end
......@@ -59,6 +59,10 @@ module Gitlab
def merge!(other)
replace_with!(::Security::MergeReportsService.new(self, other).execute)
end
def primary_scanner
scanners.first&.second
end
end
end
end
......
# frozen_string_literal: true
FactoryBot.define do
factory :ci_reports_security_finding_key, class: '::Gitlab::Ci::Reports::Security::FindingKey' do
sequence :location_fingerprint do |a|
Digest::SHA1.hexdigest(a.to_s)
end
sequence :identifier_fingerprint do |a|
Digest::SHA1.hexdigest(a.to_s)
end
skip_create
initialize_with do
::Gitlab::Ci::Reports::Security::FindingKey.new(attributes)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Ci::Reports::Security::FindingKey do
using RSpec::Parameterized::TableSyntax
describe '#==' do
where(:location_fp_1, :location_fp_2, :identifier_fp_1, :identifier_fp_2, :equals?) do
'location fp' | 'different location fp' | 'identifier fp' | 'different identifier fp' | false
'location fp' | 'different location fp' | 'identifier fp' | 'identifier fp' | false
'location fp' | 'location fp' | 'identifier fp' | 'different identifier fp' | false
'location fp' | 'location fp' | 'identifier fp' | 'identifier fp' | true
end
with_them do
let(:finding_key_1) do
build(:ci_reports_security_finding_key,
location_fingerprint: location_fp_1,
identifier_fingerprint: identifier_fp_1)
end
let(:finding_key_2) do
build(:ci_reports_security_finding_key,
location_fingerprint: location_fp_2,
identifier_fingerprint: identifier_fp_2)
end
subject { finding_key_1 == finding_key_2 }
it { is_expected.to be(equals?) }
end
end
end
......@@ -275,4 +275,21 @@ RSpec.describe Gitlab::Ci::Reports::Security::Finding do
it { is_expected.to be_truthy }
end
end
describe '#keys' do
let(:identifier_1) { build(:ci_reports_security_identifier) }
let(:identifier_2) { build(:ci_reports_security_identifier) }
let(:location) { build(:ci_reports_security_locations_sast) }
let(:finding) { build(:ci_reports_security_finding, identifiers: [identifier_1, identifier_2], location: location) }
let(:expected_keys) do
[
build(:ci_reports_security_finding_key, location_fingerprint: location.fingerprint, identifier_fingerprint: identifier_1.fingerprint),
build(:ci_reports_security_finding_key, location_fingerprint: location.fingerprint, identifier_fingerprint: identifier_2.fingerprint)
]
end
subject { finding.keys }
it { is_expected.to match_array(expected_keys) }
end
end
......@@ -123,4 +123,18 @@ RSpec.describe Gitlab::Ci::Reports::Security::Report do
expect(report).to have_received(:replace_with!).with(merged_report)
end
end
describe '#primary_scanner' do
let(:scanner_1) { create(:ci_reports_security_scanner, external_id: 'external_id_1') }
let(:scanner_2) { create(:ci_reports_security_scanner, external_id: 'external_id_2') }
subject { report.primary_scanner }
before do
report.add_scanner(scanner_1)
report.add_scanner(scanner_2)
end
it { is_expected.to eq(scanner_1) }
end
end
......@@ -212,4 +212,51 @@ RSpec.describe Ci::JobArtifact do
end
end
end
describe '#security_report' do
let(:job_artifact) { create(:ee_ci_job_artifact, :sast) }
let(:security_report) { job_artifact.security_report }
subject(:findings_count) { security_report.findings.length }
it { is_expected.to be(33) }
context 'for different types' do
where(:file_type, :security_report?) do
:performance | false
:sast | true
:secret_detection | true
:dependency_scanning | true
:container_scanning | true
:dast | true
:coverage_fuzzing | true
end
with_them do
let(:job_artifact) { create(:ee_ci_job_artifact, file_type) }
subject { security_report.is_a?(::Gitlab::Ci::Reports::Security::Report) }
it { is_expected.to be(security_report?) }
end
end
end
describe '#clear_security_report' do
let(:job_artifact) { create(:ee_ci_job_artifact, :sast) }
subject(:clear_security_report) { job_artifact.clear_security_report }
before do
job_artifact.security_report # Memoize first
allow(::Gitlab::Ci::Reports::Security::Report).to receive(:new).and_call_original
end
it 'clears the security_report' do
clear_security_report
job_artifact.security_report
expect(::Gitlab::Ci::Reports::Security::Report).to have_received(:new).once
end
end
end
......@@ -12,4 +12,14 @@ RSpec.describe Security::Finding do
it { is_expected.to validate_presence_of(:project_fingerprint) }
it { is_expected.to validate_length_of(:project_fingerprint).is_at_most(40) }
end
describe '.by_project_fingerprint' do
let!(:finding_1) { create(:security_finding) }
let!(:finding_2) { create(:security_finding) }
let(:expected_findings) { [finding_1] }
subject { described_class.by_project_fingerprint(finding_1.project_fingerprint) }
it { is_expected.to match_array(expected_findings) }
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Security::StoreGroupedScansService do
let_it_be(:report_type) { :dast }
let_it_be(:build_1) { create(:ee_ci_build, name: 'Report 1') }
let_it_be(:build_2) { create(:ee_ci_build, name: 'Report 3') }
let_it_be(:build_3) { create(:ee_ci_build, name: 'Report 2') }
let_it_be(:artifact_1) { create(:ee_ci_job_artifact, report_type, job: build_1) }
let_it_be(:artifact_2) { create(:ee_ci_job_artifact, report_type, job: build_2) }
let_it_be(:artifact_3) { create(:ee_ci_job_artifact, report_type, job: build_3) }
let(:artifacts) { [artifact_1, artifact_2, artifact_3] }
describe '.execute' do
let(:mock_service_object) { instance_double(described_class, execute: true) }
subject(:execute) { described_class.execute(artifacts) }
before do
allow(described_class).to receive(:new).with(artifacts).and_return(mock_service_object)
end
it 'delegates the call to an instance of `Security::StoreGroupedScansService`' do
execute
expect(described_class).to have_received(:new).with(artifacts)
expect(mock_service_object).to have_received(:execute)
end
end
describe '#execute' do
let(:service_object) { described_class.new(artifacts) }
let(:empty_set) { Set.new }
subject(:store_scan_group) { service_object.execute }
before do
allow(Security::StoreScanService).to receive(:execute).and_return(true)
end
context 'when the artifacts are not dependency_scanning' do
it 'calls the Security::StoreScanService with ordered artifacts' do
store_scan_group
expect(Security::StoreScanService).to have_received(:execute).with(artifact_1, empty_set, false).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_3, empty_set, true).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_2, empty_set, true).ordered
end
end
context 'when the artifacts are dependency_scanning' do
let(:report_type) { :dependency_scanning }
let(:build_4) { create(:ee_ci_build, name: 'Report 0') }
let(:artifact_4) { create(:ee_ci_job_artifact, report_type, job: build_4) }
let(:scanner_1) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'this is an unknown id') }
let(:scanner_2) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'bundler_audit') }
let(:scanner_3) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'retire.js') }
let(:mock_report_1) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_1) }
let(:mock_report_2) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_2) }
let(:mock_report_3) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_3) }
let(:artifacts) { [artifact_1, artifact_2, artifact_3, artifact_4] }
before do
allow(artifact_1).to receive(:security_report).and_return(mock_report_1)
allow(artifact_2).to receive(:security_report).and_return(mock_report_2)
allow(artifact_3).to receive(:security_report).and_return(mock_report_3)
allow(artifact_4).to receive(:security_report).and_return(mock_report_2)
end
it 'calls the Security::StoreScanService with ordered artifacts' do
store_scan_group
expect(Security::StoreScanService).to have_received(:execute).with(artifact_4, empty_set, false).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_2, empty_set, true).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_3, empty_set, true).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_1, empty_set, true).ordered
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Security::StoreScanService do
let_it_be(:artifact) { create(:ee_ci_job_artifact, :sast) }
let(:known_keys) { Set.new }
describe '.execute' do
let(:mock_service_object) { instance_double(described_class, execute: true) }
subject(:execute) { described_class.execute(artifact, known_keys, false) }
before do
allow(described_class).to receive(:new).with(artifact, known_keys, false).and_return(mock_service_object)
end
it 'delegates the call to an instance of `Security::StoreScanService`' do
execute
expect(described_class).to have_received(:new).with(artifact, known_keys, false)
expect(mock_service_object).to have_received(:execute)
end
end
describe '#execute' do
let(:deduplicate) { false }
let(:service_object) { described_class.new(artifact, known_keys, deduplicate) }
let(:finding_key) do
build(:ci_reports_security_finding_key,
location_fingerprint: 'd869ba3f0b3347eb2749135a437dc07c8ae0f420',
identifier_fingerprint: 'e6dd15eda2137be0034977a85b300a94a4f243a3')
end
subject(:store_scan) { service_object.execute }
before do
allow(Security::StoreFindingsMetadataService).to receive(:execute)
known_keys.add(finding_key)
end
it 'calls the `Security::StoreFindingsMetadataService` to store findings' do
store_scan
expect(Security::StoreFindingsMetadataService).to have_received(:execute)
end
context 'when the security scan already exists for the artifact' do
let_it_be(:security_scan) { create(:security_scan, build: artifact.job, scan_type: :sast) }
let_it_be(:duplicated_security_finding) do
create(:security_finding,
scan: security_scan,
project_fingerprint: 'd533c3a12403b6c6033a50b53f9c73f894a40fc6')
end
let_it_be(:unique_security_finding) do
create(:security_finding,
scan: security_scan,
project_fingerprint: 'b9c0d1cdc7cb9c180ebb6981abbddc2df0172509')
end
it 'does not create a new security scan' do
expect { store_scan }.not_to change { artifact.job.security_scans.count }
end
context 'when the `deduplicate` param is set as false' do
it 'does not change the deduplicated flag of duplicated finding' do
expect { store_scan }.not_to change { duplicated_security_finding.reload.deduplicated }.from(false)
end
it 'does not change the deduplicated flag of unique finding' do
expect { store_scan }.not_to change { unique_security_finding.reload.deduplicated }.from(false)
end
end
context 'when the `deduplicate` param is set as true' do
let(:deduplicate) { true }
it 'does not change the deduplicated flag of duplicated finding false' do
expect { store_scan }.not_to change { duplicated_security_finding.reload.deduplicated }.from(false)
end
it 'sets the deduplicated flag of unique finding as true' do
expect { store_scan }.to change { unique_security_finding.reload.deduplicated }.to(true)
end
end
end
context 'when the security scan does not exist for the artifact' do
let(:duplicated_finding_attribute) do
-> { Security::Finding.by_project_fingerprint('d533c3a12403b6c6033a50b53f9c73f894a40fc6').first&.deduplicated }
end
let(:unique_finding_attribute) do
-> { Security::Finding.by_project_fingerprint('b9c0d1cdc7cb9c180ebb6981abbddc2df0172509').first&.deduplicated }
end
before do
allow(Security::StoreFindingsMetadataService).to receive(:execute).and_call_original
end
it 'creates a new security scan' do
expect { store_scan }.to change { artifact.job.security_scans.sast.count }.by(1)
end
context 'when the `deduplicate` param is set as false' do
it 'sets the deduplicated flag of duplicated finding as false' do
expect { store_scan }.to change { duplicated_finding_attribute.call }.to(false)
end
it 'sets the deduplicated flag of unique finding as true' do
expect { store_scan }.to change { unique_finding_attribute.call }.to(true)
end
end
context 'when the `deduplicate` param is set as true' do
let(:deduplicate) { true }
it 'sets the deduplicated flag of duplicated finding false' do
expect { store_scan }.to change { duplicated_finding_attribute.call }.to(false)
end
it 'sets the deduplicated flag of unique finding as true' do
expect { store_scan }.to change { unique_finding_attribute.call }.to(true)
end
end
end
end
end
......@@ -3,53 +3,45 @@
require 'spec_helper'
RSpec.describe Security::StoreScansService do
let(:build) { create(:ci_build) }
let_it_be(:pipeline) { create(:ci_pipeline) }
subject { Security::StoreScansService.new(build).execute }
describe '.execute' do
let(:mock_service_object) { instance_double(described_class, execute: true) }
before do
allow(Security::StoreFindingsMetadataService).to receive(:execute)
end
subject(:execute) { described_class.execute(pipeline) }
context 'build has security reports' do
before do
create(:ee_ci_job_artifact, :dast, job: build)
create(:ee_ci_job_artifact, :sast, job: build)
create(:ee_ci_job_artifact, :codequality, job: build)
end
it 'saves security scans' do
subject
scans = Security::Scan.where(build: build)
expect(scans.count).to be(2)
expect(scans.sast.count).to be(1)
expect(scans.dast.count).to be(1)
allow(described_class).to receive(:new).with(pipeline).and_return(mock_service_object)
end
it 'calls the StoreFindingsMetadataService' do
subject
it 'delegates the call to an instance of `Security::StoreScansService`' do
execute
expect(Security::StoreFindingsMetadataService).to have_received(:execute).twice
expect(described_class).to have_received(:new).with(pipeline)
expect(mock_service_object).to have_received(:execute)
end
end
context 'scan already exists' do
before do
create(:ee_ci_job_artifact, :dast, job: build)
create(:security_scan, build: build, scan_type: 'dast')
end
describe '#execute' do
let(:service_object) { described_class.new(pipeline) }
it 'does not save' do
subject
let_it_be(:sast_build) { create(:ee_ci_build, pipeline: pipeline) }
let_it_be(:dast_build) { create(:ee_ci_build, pipeline: pipeline) }
let_it_be(:sast_artifact) { create(:ee_ci_job_artifact, :sast, job: sast_build) }
let_it_be(:dast_artifact) { create(:ee_ci_job_artifact, :dast, job: dast_build) }
expect(Security::Scan.where(build: build).count).to be(1)
subject(:store_group_of_artifacts) { service_object.execute }
before do
allow(Security::StoreGroupedScansService).to receive(:execute)
stub_licensed_features(sast: true, dast: false)
end
it 'calls the StoreFindingsMetadataService' do
subject
it 'executes Security::StoreGroupedScansService for each group of artifacts if the feature is available' do
store_group_of_artifacts
expect(Security::StoreFindingsMetadataService).to have_received(:execute).once
expect(Security::StoreGroupedScansService).to have_received(:execute).with([sast_artifact])
expect(Security::StoreGroupedScansService).not_to have_received(:execute).with([dast_artifact])
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment