Commit 87508599 authored by Mehmet Emin INAC's avatar Mehmet Emin INAC Committed by Markus Koller

Make MergeReportsService reliable

We should sort each finding individually to make the result of merge
result reliable.
parent fb7acc47
...@@ -2,112 +2,69 @@ ...@@ -2,112 +2,69 @@
module Security module Security
class MergeReportsService class MergeReportsService
ANALYZER_ORDER = { attr_reader :source_reports
"bundler_audit" => 1,
"retire.js" => 2,
"gemnasium" => 3,
"gemnasium-maven" => 3,
"gemnasium-python" => 3,
"bandit" => 1,
"semgrep" => 2,
"unknown" => 999
}.freeze
def initialize(*source_reports) def initialize(*source_reports)
@source_reports = source_reports @source_reports = source_reports
end
def execute
copy_resources_to_target_report
copy_findings_to_target
target_report
end
sort_by_analyzer_order! private
@target_report = ::Gitlab::Ci::Reports::Security::Report.new( def target_report
@source_reports.first.type, @target_report ||= ::Gitlab::Ci::Reports::Security::Report.new(
@source_reports.first.pipeline, source_reports.first.type,
@source_reports.first.created_at source_reports.first.pipeline,
source_reports.first.created_at
).tap { |report| report.errors = source_reports.flat_map(&:errors) } ).tap { |report| report.errors = source_reports.flat_map(&:errors) }
@findings = []
end end
def execute def copy_resources_to_target_report
@source_reports.each do |source| sorted_source_reports.each do |source_report|
copy_scanners_to_target(source) copy_scanners_to_target(source_report)
copy_identifiers_to_target(source) copy_identifiers_to_target(source_report)
copy_findings_to_buffer(source) copy_scanned_resources_to_target(source_report)
copy_scanned_resources_to_target(source)
end end
copy_findings_to_target
@target_report
end end
private def sorted_source_reports
source_reports.sort { |a, b| a.primary_scanner_order_to(b) }
end
def copy_scanners_to_target(source_report) def copy_scanners_to_target(source_report)
# no need for de-duping: it's done by Report internally # no need for de-duping: it's done by Report internally
source_report.scanners.values.each { |scanner| @target_report.add_scanner(scanner) } source_report.scanners.values.each { |scanner| target_report.add_scanner(scanner) }
end end
def copy_identifiers_to_target(source_report) def copy_identifiers_to_target(source_report)
# no need for de-duping: it's done by Report internally # no need for de-duping: it's done by Report internally
source_report.identifiers.values.each { |identifier| @target_report.add_identifier(identifier) } source_report.identifiers.values.each { |identifier| target_report.add_identifier(identifier) }
end end
def copy_findings_to_buffer(source) def copy_scanned_resources_to_target(source_report)
@findings.concat(source.findings) target_report.scanned_resources.concat(source_report.scanned_resources).uniq!
end end
def copy_scanned_resources_to_target(source_report) def copy_findings_to_target
@target_report.scanned_resources.concat(source_report.scanned_resources).uniq! deduplicated_findings.sort.each { |finding| target_report.add_finding(finding) }
end end
def deduplicate_findings! def deduplicated_findings
@findings, * = @findings.each_with_object([[], Set.new]) do |finding, (deduplicated, seen_identifiers)| prioritized_findings.each_with_object([[], Set.new]) do |finding, (deduplicated, seen_identifiers)|
next if seen_identifiers.intersect?(finding.keys.to_set) next if seen_identifiers.intersect?(finding.keys.to_set)
seen_identifiers.merge(finding.keys) seen_identifiers.merge(finding.keys)
deduplicated << finding deduplicated << finding
end end.first
end end
def sort_findings! def prioritized_findings
@findings.sort! do |a, b| source_reports.flat_map(&:findings).sort { |a, b| a.scanner_order_to(b) }
a_severity = a.severity
b_severity = b.severity
if a_severity == b_severity
a.compare_key <=> b.compare_key
else
::Enums::Vulnerability.severity_levels[b_severity] <=>
::Enums::Vulnerability.severity_levels[a_severity]
end
end
end
def copy_findings_to_target
deduplicate_findings!
sort_findings!
@findings.each { |finding| @target_report.add_finding(finding) }
end
def reports_sortable?
return true if @source_reports.all? { |x| x.type == :dependency_scanning }
return true if @source_reports.all? { |x| x.type == :sast }
false
end
def sort_by_analyzer_order!
return unless reports_sortable?
@source_reports.sort! do |a, b|
a_scanner_id = a.scanners.values[0].external_id
b_scanner_id = b.scanners.values[0].external_id
a_scanner_id = "unknown" if ANALYZER_ORDER[a_scanner_id].nil?
b_scanner_id = "unknown" if ANALYZER_ORDER[b_scanner_id].nil?
ANALYZER_ORDER[a_scanner_id] <=> ANALYZER_ORDER[b_scanner_id]
end
end end
end end
end end
...@@ -43,14 +43,12 @@ module Security ...@@ -43,14 +43,12 @@ module Security
end end
def sorted_artifacts def sorted_artifacts
@sorted_artifacts ||= artifacts.sort_by { |artifact| [scanner_order_for(artifact), artifact.job.name] } @sorted_artifacts ||= artifacts.sort do |a, b|
end report_a = a.security_report(validate: true)
report_b = b.security_report(validate: true)
# This method returns the priority of scanners for dependency_scanning and sast report_a.primary_scanner_order_to(report_b)
# and `INFINITY` for all the other scan types. There is no problem with end
# calling this method for all the scan types to get rid of branching.
def scanner_order_for(artifact)
MergeReportsService::ANALYZER_ORDER.fetch(artifact.security_report(validate: true).primary_scanner&.external_id, Float::INFINITY)
end end
def store_scan_for(artifact, deduplicate) def store_scan_for(artifact, deduplicate)
......
---
title: Make the MergeReportsService more reliable
merge_request: 59682
author:
type: fixed
...@@ -123,6 +123,22 @@ module Gitlab ...@@ -123,6 +123,22 @@ module Gitlab
primary_identifier&.fingerprint primary_identifier&.fingerprint
end end
def <=>(other)
if severity == other.severity
compare_key <=> other.compare_key
else
::Enums::Vulnerability.severity_levels[other.severity] <=>
::Enums::Vulnerability.severity_levels[severity]
end
end
def scanner_order_to(other)
return 1 unless scanner
return -1 unless other&.scanner
scanner <=> other.scanner
end
private private
def generate_project_fingerprint def generate_project_fingerprint
......
...@@ -62,6 +62,13 @@ module Gitlab ...@@ -62,6 +62,13 @@ module Gitlab
def primary_scanner def primary_scanner
scanners.first&.second scanners.first&.second
end end
def primary_scanner_order_to(other)
return 1 unless primary_scanner
return -1 unless other.primary_scanner
primary_scanner <=> other.primary_scanner
end
end end
end end
end end
......
...@@ -5,18 +5,26 @@ module Gitlab ...@@ -5,18 +5,26 @@ module Gitlab
module Reports module Reports
module Security module Security
class Scanner class Scanner
ANALYZER_ORDER = {
"bundler_audit" => 1,
"retire.js" => 2,
"gemnasium" => 3,
"gemnasium-maven" => 3,
"gemnasium-python" => 3,
"bandit" => 1,
"semgrep" => 2
}.freeze
attr_accessor :external_id, :name, :vendor attr_accessor :external_id, :name, :vendor
alias_method :key, :external_id
def initialize(external_id:, name:, vendor:) def initialize(external_id:, name:, vendor:)
@external_id = external_id @external_id = external_id
@name = name @name = name
@vendor = vendor @vendor = vendor
end end
def key
external_id
end
def to_hash def to_hash
{ {
external_id: external_id.to_s, external_id: external_id.to_s,
...@@ -28,6 +36,22 @@ module Gitlab ...@@ -28,6 +36,22 @@ module Gitlab
def ==(other) def ==(other)
other.external_id == external_id other.external_id == external_id
end end
def <=>(other)
sort_keys <=> other.sort_keys
end
protected
def sort_keys
@sort_keys ||= [order, external_id, name, vendor]
end
private
def order
ANALYZER_ORDER.fetch(external_id, Float::INFINITY)
end
end end
end end
end end
......
...@@ -408,4 +408,56 @@ RSpec.describe Gitlab::Ci::Reports::Security::Finding do ...@@ -408,4 +408,56 @@ RSpec.describe Gitlab::Ci::Reports::Security::Finding do
end end
end end
end end
describe '#scanner_order_to' do
let(:scanner_1) { build(:ci_reports_security_scanner) }
let(:scanner_2) { build(:ci_reports_security_scanner) }
let(:finding_1) { build(:ci_reports_security_finding, scanner: scanner_1) }
let(:finding_2) { build(:ci_reports_security_finding, scanner: scanner_2) }
subject(:compare_based_on_scanners) { finding_1.scanner_order_to(finding_2) }
context 'when the scanner of the receiver is nil' do
let(:scanner_1) { nil }
context 'when the scanner of the other is nil' do
let(:scanner_2) { nil }
it { is_expected.to be(1) }
end
context 'when the scanner of the other is not nil' do
it { is_expected.to be(1) }
end
end
context 'when the scanner of the receiver is not nil' do
context 'when the scanner of the other is nil' do
let(:scanner_2) { nil }
it { is_expected.to be(-1) }
end
context 'when the scanner of the other is not nil' do
before do
allow(scanner_1).to receive(:<=>).and_return(0)
end
it 'compares two scanners' do
expect(compare_based_on_scanners).to be(0)
expect(scanner_1).to have_received(:<=>).with(scanner_2)
end
end
end
end
describe '#<=>' do
let(:finding_1) { build(:ci_reports_security_finding, severity: :critical, compare_key: 'b') }
let(:finding_2) { build(:ci_reports_security_finding, severity: :critical, compare_key: 'a') }
let(:finding_3) { build(:ci_reports_security_finding, severity: :high) }
subject { [finding_1, finding_2, finding_3].sort }
it { is_expected.to eq([finding_2, finding_1, finding_3]) }
end
end end
...@@ -173,4 +173,52 @@ RSpec.describe Gitlab::Ci::Reports::Security::Report do ...@@ -173,4 +173,52 @@ RSpec.describe Gitlab::Ci::Reports::Security::Report do
it { is_expected.to be_truthy } it { is_expected.to be_truthy }
end end
end end
describe '#primary_scanner_order_to' do
let(:scanner_1) { build(:ci_reports_security_scanner) }
let(:scanner_2) { build(:ci_reports_security_scanner) }
let(:report_1) { described_class.new('sast', pipeline, created_at) }
let(:report_2) { described_class.new('sast', pipeline, created_at) }
subject(:compare_based_on_primary_scanners) { report_1.primary_scanner_order_to(report_2) }
context 'when the primary scanner of the receiver is nil' do
context 'when the primary scanner of the other is nil' do
it { is_expected.to be(1) }
end
context 'when the primary scanner of the other is not nil' do
before do
report_2.add_scanner(scanner_2)
end
it { is_expected.to be(1) }
end
end
context 'when the primary scanner of the receiver is not nil' do
before do
report_1.add_scanner(scanner_1)
end
context 'when the primary scanner of the other is nil' do
let(:scanner_2) { nil }
it { is_expected.to be(-1) }
end
context 'when the primary scanner of the other is not nil' do
before do
report_2.add_scanner(scanner_2)
allow(scanner_1).to receive(:<=>).and_return(0)
end
it 'compares two scanners' do
expect(compare_based_on_primary_scanners).to be(0)
expect(scanner_1).to have_received(:<=>).with(scanner_2)
end
end
end
end
end end
...@@ -91,4 +91,54 @@ RSpec.describe Gitlab::Ci::Reports::Security::Scanner do ...@@ -91,4 +91,54 @@ RSpec.describe Gitlab::Ci::Reports::Security::Scanner do
end end
end end
end end
describe '#<=>' do
using RSpec::Parameterized::TableSyntax
let(:scanner_1) { create(:ci_reports_security_scanner, **scanner_1_attributes) }
let(:scanner_2) { create(:ci_reports_security_scanner, **scanner_2_attributes) }
subject { scanner_1 <=> scanner_2 }
context 'when the `external_id` of the scanners are different' do
where(:scanner_1_attributes, :scanner_2_attributes, :expected_comparison_result) do
{ external_id: 'bundler_audit', name: 'foo', vendor: 'bar' } | { external_id: 'retire.js', name: 'foo', vendor: 'bar' } | -1
{ external_id: 'retire.js', name: 'foo', vendor: 'bar' } | { external_id: 'gemnasium', name: 'foo', vendor: 'bar' } | -1
{ external_id: 'gemnasium', name: 'foo', vendor: 'bar' } | { external_id: 'gemnasium-maven', name: 'foo', vendor: 'bar' } | -1
{ external_id: 'gemnasium-maven', name: 'foo', vendor: 'bar' } | { external_id: 'gemnasium-python', name: 'foo', vendor: 'bar' } | -1
{ external_id: 'gemnasium-python', name: 'foo', vendor: 'bar' } | { external_id: 'bandit', name: 'foo', vendor: 'bar' } | 1
{ external_id: 'bandit', name: 'foo', vendor: 'bar' } | { external_id: 'semgrep', name: 'foo', vendor: 'bar' } | -1
{ external_id: 'semgrep', name: 'foo', vendor: 'bar' } | { external_id: 'unknown', name: 'foo', vendor: 'bar' } | -1
end
with_them do
it { is_expected.to eq(expected_comparison_result) }
end
end
context 'when the `external_id` of the scanners are equal' do
context 'when the `name` of the scanners are different' do
where(:scanner_1_attributes, :scanner_2_attributes, :expected_comparison_result) do
{ external_id: 'gemnasium', name: 'a', vendor: 'bar' } | { external_id: 'gemnasium', name: 'b', vendor: 'bar' } | -1
{ external_id: 'gemnasium', name: 'd', vendor: 'bar' } | { external_id: 'gemnasium', name: 'c', vendor: 'bar' } | 1
end
with_them do
it { is_expected.to eq(expected_comparison_result) }
end
end
context 'when the `name` of the scanners are equal' do
where(:scanner_1_attributes, :scanner_2_attributes, :expected_comparison_result) do
{ external_id: 'gemnasium', name: 'foo', vendor: 'a' } | { external_id: 'gemnasium', name: 'foo', vendor: 'a' } | 0 # rubocop:disable Lint/BinaryOperatorWithIdenticalOperands
{ external_id: 'gemnasium', name: 'foo', vendor: 'a' } | { external_id: 'gemnasium', name: 'foo', vendor: 'b' } | -1
{ external_id: 'gemnasium', name: 'foo', vendor: 'b' } | { external_id: 'gemnasium', name: 'foo', vendor: 'a' } | 1
end
with_them do
it { is_expected.to eq(expected_comparison_result) }
end
end
end
end
end end
...@@ -294,7 +294,7 @@ RSpec.describe Security::MergeReportsService, '#execute' do ...@@ -294,7 +294,7 @@ RSpec.describe Security::MergeReportsService, '#execute' do
subject(:merged_report) { described_class.new(pre_merged_report, retirejs_report).execute } subject(:merged_report) { described_class.new(pre_merged_report, retirejs_report).execute }
it 'keeps the finding from `retirejs` as it has higher priority', pending: 'https://gitlab.com/gitlab-org/gitlab/-/issues/296520' do it 'keeps the finding from `retirejs` as it has higher priority' do
expect(merged_report.findings).to include(finding_id_5) expect(merged_report.findings).to include(finding_id_5)
end end
end end
......
...@@ -4,9 +4,9 @@ require 'spec_helper' ...@@ -4,9 +4,9 @@ require 'spec_helper'
RSpec.describe Security::StoreGroupedScansService do RSpec.describe Security::StoreGroupedScansService do
let_it_be(:report_type) { :dast } let_it_be(:report_type) { :dast }
let_it_be(:build_1) { create(:ee_ci_build, name: 'Report 1') } let_it_be(:build_1) { create(:ee_ci_build) }
let_it_be(:build_2) { create(:ee_ci_build, name: 'Report 3') } let_it_be(:build_2) { create(:ee_ci_build) }
let_it_be(:build_3) { create(:ee_ci_build, name: 'Report 2') } let_it_be(:build_3) { create(:ee_ci_build) }
let_it_be(:artifact_1) { create(:ee_ci_job_artifact, report_type, job: build_1) } let_it_be(:artifact_1) { create(:ee_ci_job_artifact, report_type, job: build_1) }
let_it_be(:artifact_2) { create(:ee_ci_job_artifact, report_type, job: build_2) } let_it_be(:artifact_2) { create(:ee_ci_job_artifact, report_type, job: build_2) }
let_it_be(:artifact_3) { create(:ee_ci_job_artifact, report_type, job: build_3) } let_it_be(:artifact_3) { create(:ee_ci_job_artifact, report_type, job: build_3) }
...@@ -61,8 +61,7 @@ RSpec.describe Security::StoreGroupedScansService do ...@@ -61,8 +61,7 @@ RSpec.describe Security::StoreGroupedScansService do
end end
context 'schema validation' do context 'schema validation' do
let(:mock_scanner) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'unknown') } let(:mock_report) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner_order_to: -1) }
let(:mock_report) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: mock_scanner) }
before do before do
allow(artifact_1).to receive(:security_report).and_return(mock_report) allow(artifact_1).to receive(:security_report).and_return(mock_report)
...@@ -73,9 +72,9 @@ RSpec.describe Security::StoreGroupedScansService do ...@@ -73,9 +72,9 @@ RSpec.describe Security::StoreGroupedScansService do
it 'accesses the validated security reports' do it 'accesses the validated security reports' do
store_scan_group store_scan_group
expect(artifact_1).to have_received(:security_report).with(validate: true) expect(artifact_1).to have_received(:security_report).with(validate: true).once
expect(artifact_2).to have_received(:security_report).with(validate: true) expect(artifact_2).to have_received(:security_report).with(validate: true).twice
expect(artifact_3).to have_received(:security_report).with(validate: true) expect(artifact_3).to have_received(:security_report).with(validate: true).once
end end
end end
...@@ -84,8 +83,8 @@ RSpec.describe Security::StoreGroupedScansService do ...@@ -84,8 +83,8 @@ RSpec.describe Security::StoreGroupedScansService do
store_scan_group store_scan_group
expect(Security::StoreScanService).to have_received(:execute).with(artifact_1, empty_set, false).ordered expect(Security::StoreScanService).to have_received(:execute).with(artifact_1, empty_set, false).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_3, empty_set, true).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_2, empty_set, true).ordered expect(Security::StoreScanService).to have_received(:execute).with(artifact_2, empty_set, true).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_3, empty_set, true).ordered
end end
end end
...@@ -94,12 +93,9 @@ RSpec.describe Security::StoreGroupedScansService do ...@@ -94,12 +93,9 @@ RSpec.describe Security::StoreGroupedScansService do
let_it_be(:sast_artifact_2) { create(:ee_ci_job_artifact, :sast, job: create(:ee_ci_build)) } let_it_be(:sast_artifact_2) { create(:ee_ci_job_artifact, :sast, job: create(:ee_ci_build)) }
let_it_be(:sast_artifact_3) { create(:ee_ci_job_artifact, :sast, job: create(:ee_ci_build)) } let_it_be(:sast_artifact_3) { create(:ee_ci_job_artifact, :sast, job: create(:ee_ci_build)) }
let(:scanner_1) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'unknown') } let(:mock_report_1) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner_order_to: 1) }
let(:scanner_2) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'bandit') } let(:mock_report_2) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner_order_to: -1) }
let(:scanner_3) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'semgrep') } let(:mock_report_3) { instance_double(::Gitlab::Ci::Reports::Security::Report) }
let(:mock_report_1) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_1) }
let(:mock_report_2) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_2) }
let(:mock_report_3) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_3) }
let(:artifacts) { [sast_artifact_1, sast_artifact_2, sast_artifact_3] } let(:artifacts) { [sast_artifact_1, sast_artifact_2, sast_artifact_3] }
before do before do
...@@ -119,28 +115,21 @@ RSpec.describe Security::StoreGroupedScansService do ...@@ -119,28 +115,21 @@ RSpec.describe Security::StoreGroupedScansService do
context 'when the artifacts are dependency_scanning' do context 'when the artifacts are dependency_scanning' do
let(:report_type) { :dependency_scanning } let(:report_type) { :dependency_scanning }
let(:build_4) { create(:ee_ci_build, name: 'Report 0') } let(:mock_report_1) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner_order_to: 1) }
let(:artifact_4) { create(:ee_ci_job_artifact, report_type, job: build_4) } let(:mock_report_2) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner_order_to: -1) }
let(:scanner_1) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'this is an unknown id') } let(:mock_report_3) { instance_double(::Gitlab::Ci::Reports::Security::Report) }
let(:scanner_2) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'bundler_audit') } let(:artifacts) { [artifact_1, artifact_2, artifact_3] }
let(:scanner_3) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'retire.js') }
let(:mock_report_1) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_1) }
let(:mock_report_2) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_2) }
let(:mock_report_3) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_3) }
let(:artifacts) { [artifact_1, artifact_2, artifact_3, artifact_4] }
before do before do
allow(artifact_1).to receive(:security_report).and_return(mock_report_1) allow(artifact_1).to receive(:security_report).and_return(mock_report_1)
allow(artifact_2).to receive(:security_report).and_return(mock_report_2) allow(artifact_2).to receive(:security_report).and_return(mock_report_2)
allow(artifact_3).to receive(:security_report).and_return(mock_report_3) allow(artifact_3).to receive(:security_report).and_return(mock_report_3)
allow(artifact_4).to receive(:security_report).and_return(mock_report_2)
end end
it 'calls the Security::StoreScanService with ordered artifacts' do it 'calls the Security::StoreScanService with ordered artifacts' do
store_scan_group store_scan_group
expect(Security::StoreScanService).to have_received(:execute).with(artifact_4, empty_set, false).ordered expect(Security::StoreScanService).to have_received(:execute).with(artifact_2, empty_set, false).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_2, empty_set, true).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_3, empty_set, true).ordered expect(Security::StoreScanService).to have_received(:execute).with(artifact_3, empty_set, true).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_1, empty_set, true).ordered expect(Security::StoreScanService).to have_received(:execute).with(artifact_1, empty_set, true).ordered
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment