Commit b2271f23 authored by Saikat Sarkar's avatar Saikat Sarkar

Dedupe vulnerability_findings for SAST analyzers

parent a8c1ef2c
......@@ -8,13 +8,16 @@ module Security
"gemnasium" => 3,
"gemnasium-maven" => 3,
"gemnasium-python" => 3,
"bandit" => 1,
"semgrep" => 2,
"unknown" => 999
}.freeze
def initialize(*source_reports)
@source_reports = source_reports
# temporary sort https://gitlab.com/gitlab-org/gitlab/-/issues/213839
sort_by_ds_analyzers!
sort_by_analyzer_order!
@target_report = ::Gitlab::Ci::Reports::Security::Report.new(
@source_reports.first.type,
@source_reports.first.pipeline,
......@@ -85,13 +88,19 @@ module Security
@findings.each { |finding| @target_report.add_finding(finding) }
end
def sort_by_ds_analyzers!
return if @source_reports.any? { |x| x.type != :dependency_scanning }
def reports_sortable?
return true if @source_reports.all? { |x| x.type == :dependency_scanning }
return true if @source_reports.all? { |x| x.type == :sast }
false
end
def sort_by_analyzer_order!
return unless reports_sortable?
@source_reports.sort! do |a, b|
a_scanner_id, b_scanner_id = a.scanners.values[0].external_id, b.scanners.values[0].external_id
# for custom analyzers
a_scanner_id = "unknown" if ANALYZER_ORDER[a_scanner_id].nil?
b_scanner_id = "unknown" if ANALYZER_ORDER[b_scanner_id].nil?
......
......@@ -46,7 +46,7 @@ module Security
@sorted_artifacts ||= artifacts.sort_by { |artifact| [scanner_order_for(artifact), artifact.job.name] }
end
# This method returns the priority of scanners for dependency_scanning
# This method returns the priority of scanners for dependency_scanning and sast
# and `INFINITY` for all the other scan types. There is no problem with
# calling this method for all the scan types to get rid of branching.
def scanner_order_for(artifact)
......
---
title: Dedupe vulnerability_findings for bandit and semgrep
merge_request: 54181
author:
type: added
......@@ -133,14 +133,14 @@ RSpec.describe Security::MergeReportsService, '#execute' do
let(:merge_service) { described_class.new(report_1, report_2, report_3) }
subject { merge_service.execute }
subject(:merged_report) { merge_service.execute }
it 'copies scanners into target report and eliminates duplicates' do
expect(subject.scanners.values).to contain_exactly(scanner_1, scanner_2, scanner_3)
expect(merged_report.scanners.values).to contain_exactly(scanner_1, scanner_2, scanner_3)
end
it 'copies identifiers into target report and eliminates duplicates' do
expect(subject.identifiers.values).to(
expect(merged_report.identifiers.values).to(
contain_exactly(
identifier_1_primary,
identifier_1_cve,
......@@ -153,7 +153,7 @@ RSpec.describe Security::MergeReportsService, '#execute' do
end
it 'deduplicates (except cwe and wasc) and sorts the vulnerabilities by severity (desc) then by compare key' do
expect(subject.findings).to(
expect(merged_report.findings).to(
eq([
finding_cwe_2,
finding_wasc_2,
......@@ -167,7 +167,7 @@ RSpec.describe Security::MergeReportsService, '#execute' do
end
it 'deduplicates scanned resources' do
expect(subject.scanned_resources).to(
expect(merged_report.scanned_resources).to(
eq([
scanned_resource,
scanned_resource_1,
......@@ -230,20 +230,79 @@ RSpec.describe Security::MergeReportsService, '#execute' do
end
context 'when reports are gathered in an unprioritized order' do
subject { described_class.new(gemnasium_report, retirejs_report, bundler_audit_report).execute }
subject(:ds_merged_report) { described_class.new(gemnasium_report, retirejs_report, bundler_audit_report).execute }
specify { expect(subject.scanners.values).to eql([bundler_audit_scanner, retire_js_scaner, gemnasium_scanner]) }
specify { expect(subject.findings.count).to eq(2) }
specify { expect(subject.findings.first.identifiers).to contain_exactly(identifier_cve) }
specify { expect(subject.findings.last.identifiers).to contain_exactly(identifier_npm) }
specify { expect(ds_merged_report.scanners.values).to eql([bundler_audit_scanner, retire_js_scaner, gemnasium_scanner]) }
specify { expect(ds_merged_report.findings.count).to eq(2) }
specify { expect(ds_merged_report.findings.first.identifiers).to contain_exactly(identifier_cve) }
specify { expect(ds_merged_report.findings.last.identifiers).to contain_exactly(identifier_npm) }
end
context 'when a custom analyzer is completed before the known analyzers' do
subject { described_class.new(custom_analyzer_report, retirejs_report, bundler_audit_report).execute }
subject(:ds_merged_report) { described_class.new(custom_analyzer_report, retirejs_report, bundler_audit_report).execute }
specify { expect(subject.scanners.values).to eql([bundler_audit_scanner, retire_js_scaner, scanner_2]) }
specify { expect(subject.findings.count).to eq(3) }
specify { expect(subject.findings.last.identifiers).to match_array(finding_id_2_loc_1.identifiers) }
specify { expect(ds_merged_report.scanners.values).to eql([bundler_audit_scanner, retire_js_scaner, scanner_2]) }
specify { expect(ds_merged_report.findings.count).to eq(3) }
specify { expect(ds_merged_report.findings.last.identifiers).to match_array(finding_id_2_loc_1.identifiers) }
end
end
context 'ordering reports for sast analyzers' do
let(:bandit_scanner) { build(:ci_reports_security_scanner, external_id: 'bandit', name: 'Bandit') }
let(:semgrep_scanner) { build(:ci_reports_security_scanner, external_id: 'semgrep', name: 'Semgrep') }
let(:identifier_bandit) { build(:ci_reports_security_identifier, external_id: 'B403', external_type: 'bandit_test_id') }
let(:identifier_cve) { build(:ci_reports_security_identifier, external_id: 'CVE-2019-123', external_type: 'cve') }
let(:identifier_semgrep) { build(:ci_reports_security_identifier, external_id: 'rules.bandit.B105', external_type: 'semgrep_id') }
let(:finding_id_1) { build(:ci_reports_security_finding, identifiers: [identifier_bandit, identifier_cve], scanner: bandit_scanner, report_type: :sast) }
let(:finding_id_2) { build(:ci_reports_security_finding, identifiers: [identifier_cve], scanner: semgrep_scanner, report_type: :sast) }
let(:finding_id_3) { build(:ci_reports_security_finding, identifiers: [identifier_semgrep], scanner: semgrep_scanner, report_type: :sast ) }
let(:bandit_report) do
build( :ci_reports_security_report,
type: :sast,
scanners: [bandit_scanner],
findings: [finding_id_1],
identifiers: finding_id_1.identifiers
)
end
let(:semgrep_report) do
build(
:ci_reports_security_report,
type: :sast,
scanners: [semgrep_scanner],
findings: [finding_id_2, finding_id_3],
identifiers: finding_id_2.identifiers + finding_id_3.identifiers
)
end
let(:custom_analyzer_report) do
build(
:ci_reports_security_report,
type: :sast,
scanners: [scanner_2],
findings: [finding_id_2_loc_1],
identifiers: finding_id_2_loc_1.identifiers
)
end
context 'when reports are gathered in an unprioritized order' do
subject(:sast_merged_report) { described_class.new(semgrep_report, bandit_report).execute }
specify { expect(sast_merged_report.scanners.values).to eql([bandit_scanner, semgrep_scanner]) }
specify { expect(sast_merged_report.findings.count).to eq(2) }
specify { expect(sast_merged_report.findings.first.identifiers).to eql([identifier_bandit, identifier_cve]) }
specify { expect(sast_merged_report.findings.last.identifiers).to contain_exactly(identifier_semgrep) }
end
context 'when a custom analyzer is completed before the known analyzers' do
subject(:sast_merged_report) { described_class.new(custom_analyzer_report, semgrep_report, bandit_report).execute }
specify { expect(sast_merged_report.scanners.values).to eql([bandit_scanner, semgrep_scanner, scanner_2]) }
specify { expect(sast_merged_report.findings.count).to eq(3) }
specify { expect(sast_merged_report.findings.last.identifiers).to match_array(finding_id_2_loc_1.identifiers) }
end
end
end
......@@ -70,6 +70,33 @@ RSpec.describe Security::StoreGroupedScansService do
end
end
context 'when the artifacts are sast' do
let_it_be(:sast_artifact_1) { create(:ee_ci_job_artifact, :sast, job: create(:ee_ci_build)) }
let_it_be(:sast_artifact_2) { create(:ee_ci_job_artifact, :sast, job: create(:ee_ci_build)) }
let_it_be(:sast_artifact_3) { create(:ee_ci_job_artifact, :sast, job: create(:ee_ci_build)) }
let(:scanner_1) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'unknown') }
let(:scanner_2) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'bandit') }
let(:scanner_3) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'semgrep') }
let(:mock_report_1) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_1) }
let(:mock_report_2) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_2) }
let(:mock_report_3) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_3) }
let(:artifacts) { [sast_artifact_1, sast_artifact_2, sast_artifact_3] }
before do
allow(sast_artifact_1).to receive(:security_report).and_return(mock_report_1)
allow(sast_artifact_2).to receive(:security_report).and_return(mock_report_2)
allow(sast_artifact_3).to receive(:security_report).and_return(mock_report_3)
end
it 'calls the Security::StoreScanService with ordered artifacts' do
store_scan_group
expect(Security::StoreScanService).to have_received(:execute).with(sast_artifact_2, empty_set, false).ordered
expect(Security::StoreScanService).to have_received(:execute).with(sast_artifact_3, empty_set, true).ordered
expect(Security::StoreScanService).to have_received(:execute).with(sast_artifact_1, empty_set, true).ordered
end
end
context 'when the artifacts are dependency_scanning' do
let(:report_type) { :dependency_scanning }
let(:build_4) { create(:ee_ci_build, name: 'Report 0') }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment