Commit b2271f23 authored by Saikat Sarkar's avatar Saikat Sarkar

Dedupe vulnerability_findings for SAST analyzers

parent a8c1ef2c
...@@ -8,13 +8,16 @@ module Security ...@@ -8,13 +8,16 @@ module Security
"gemnasium" => 3, "gemnasium" => 3,
"gemnasium-maven" => 3, "gemnasium-maven" => 3,
"gemnasium-python" => 3, "gemnasium-python" => 3,
"bandit" => 1,
"semgrep" => 2,
"unknown" => 999 "unknown" => 999
}.freeze }.freeze
def initialize(*source_reports) def initialize(*source_reports)
@source_reports = source_reports @source_reports = source_reports
# temporary sort https://gitlab.com/gitlab-org/gitlab/-/issues/213839
sort_by_ds_analyzers! sort_by_analyzer_order!
@target_report = ::Gitlab::Ci::Reports::Security::Report.new( @target_report = ::Gitlab::Ci::Reports::Security::Report.new(
@source_reports.first.type, @source_reports.first.type,
@source_reports.first.pipeline, @source_reports.first.pipeline,
...@@ -85,13 +88,19 @@ module Security ...@@ -85,13 +88,19 @@ module Security
@findings.each { |finding| @target_report.add_finding(finding) } @findings.each { |finding| @target_report.add_finding(finding) }
end end
def sort_by_ds_analyzers! def reports_sortable?
return if @source_reports.any? { |x| x.type != :dependency_scanning } return true if @source_reports.all? { |x| x.type == :dependency_scanning }
return true if @source_reports.all? { |x| x.type == :sast }
false
end
def sort_by_analyzer_order!
return unless reports_sortable?
@source_reports.sort! do |a, b| @source_reports.sort! do |a, b|
a_scanner_id, b_scanner_id = a.scanners.values[0].external_id, b.scanners.values[0].external_id a_scanner_id, b_scanner_id = a.scanners.values[0].external_id, b.scanners.values[0].external_id
# for custom analyzers
a_scanner_id = "unknown" if ANALYZER_ORDER[a_scanner_id].nil? a_scanner_id = "unknown" if ANALYZER_ORDER[a_scanner_id].nil?
b_scanner_id = "unknown" if ANALYZER_ORDER[b_scanner_id].nil? b_scanner_id = "unknown" if ANALYZER_ORDER[b_scanner_id].nil?
......
...@@ -46,7 +46,7 @@ module Security ...@@ -46,7 +46,7 @@ module Security
@sorted_artifacts ||= artifacts.sort_by { |artifact| [scanner_order_for(artifact), artifact.job.name] } @sorted_artifacts ||= artifacts.sort_by { |artifact| [scanner_order_for(artifact), artifact.job.name] }
end end
# This method returns the priority of scanners for dependency_scanning # This method returns the priority of scanners for dependency_scanning and sast
# and `INFINITY` for all the other scan types. There is no problem with # and `INFINITY` for all the other scan types. There is no problem with
# calling this method for all the scan types to get rid of branching. # calling this method for all the scan types to get rid of branching.
def scanner_order_for(artifact) def scanner_order_for(artifact)
......
---
title: Dedupe vulnerability_findings for bandit and semgrep
merge_request: 54181
author:
type: added
...@@ -133,14 +133,14 @@ RSpec.describe Security::MergeReportsService, '#execute' do ...@@ -133,14 +133,14 @@ RSpec.describe Security::MergeReportsService, '#execute' do
let(:merge_service) { described_class.new(report_1, report_2, report_3) } let(:merge_service) { described_class.new(report_1, report_2, report_3) }
subject { merge_service.execute } subject(:merged_report) { merge_service.execute }
it 'copies scanners into target report and eliminates duplicates' do it 'copies scanners into target report and eliminates duplicates' do
expect(subject.scanners.values).to contain_exactly(scanner_1, scanner_2, scanner_3) expect(merged_report.scanners.values).to contain_exactly(scanner_1, scanner_2, scanner_3)
end end
it 'copies identifiers into target report and eliminates duplicates' do it 'copies identifiers into target report and eliminates duplicates' do
expect(subject.identifiers.values).to( expect(merged_report.identifiers.values).to(
contain_exactly( contain_exactly(
identifier_1_primary, identifier_1_primary,
identifier_1_cve, identifier_1_cve,
...@@ -153,7 +153,7 @@ RSpec.describe Security::MergeReportsService, '#execute' do ...@@ -153,7 +153,7 @@ RSpec.describe Security::MergeReportsService, '#execute' do
end end
it 'deduplicates (except cwe and wasc) and sorts the vulnerabilities by severity (desc) then by compare key' do it 'deduplicates (except cwe and wasc) and sorts the vulnerabilities by severity (desc) then by compare key' do
expect(subject.findings).to( expect(merged_report.findings).to(
eq([ eq([
finding_cwe_2, finding_cwe_2,
finding_wasc_2, finding_wasc_2,
...@@ -167,7 +167,7 @@ RSpec.describe Security::MergeReportsService, '#execute' do ...@@ -167,7 +167,7 @@ RSpec.describe Security::MergeReportsService, '#execute' do
end end
it 'deduplicates scanned resources' do it 'deduplicates scanned resources' do
expect(subject.scanned_resources).to( expect(merged_report.scanned_resources).to(
eq([ eq([
scanned_resource, scanned_resource,
scanned_resource_1, scanned_resource_1,
...@@ -230,20 +230,79 @@ RSpec.describe Security::MergeReportsService, '#execute' do ...@@ -230,20 +230,79 @@ RSpec.describe Security::MergeReportsService, '#execute' do
end end
context 'when reports are gathered in an unprioritized order' do context 'when reports are gathered in an unprioritized order' do
subject { described_class.new(gemnasium_report, retirejs_report, bundler_audit_report).execute } subject(:ds_merged_report) { described_class.new(gemnasium_report, retirejs_report, bundler_audit_report).execute }
specify { expect(subject.scanners.values).to eql([bundler_audit_scanner, retire_js_scaner, gemnasium_scanner]) } specify { expect(ds_merged_report.scanners.values).to eql([bundler_audit_scanner, retire_js_scaner, gemnasium_scanner]) }
specify { expect(subject.findings.count).to eq(2) } specify { expect(ds_merged_report.findings.count).to eq(2) }
specify { expect(subject.findings.first.identifiers).to contain_exactly(identifier_cve) } specify { expect(ds_merged_report.findings.first.identifiers).to contain_exactly(identifier_cve) }
specify { expect(subject.findings.last.identifiers).to contain_exactly(identifier_npm) } specify { expect(ds_merged_report.findings.last.identifiers).to contain_exactly(identifier_npm) }
end end
context 'when a custom analyzer is completed before the known analyzers' do context 'when a custom analyzer is completed before the known analyzers' do
subject { described_class.new(custom_analyzer_report, retirejs_report, bundler_audit_report).execute } subject(:ds_merged_report) { described_class.new(custom_analyzer_report, retirejs_report, bundler_audit_report).execute }
specify { expect(subject.scanners.values).to eql([bundler_audit_scanner, retire_js_scaner, scanner_2]) } specify { expect(ds_merged_report.scanners.values).to eql([bundler_audit_scanner, retire_js_scaner, scanner_2]) }
specify { expect(subject.findings.count).to eq(3) } specify { expect(ds_merged_report.findings.count).to eq(3) }
specify { expect(subject.findings.last.identifiers).to match_array(finding_id_2_loc_1.identifiers) } specify { expect(ds_merged_report.findings.last.identifiers).to match_array(finding_id_2_loc_1.identifiers) }
end
end
context 'ordering reports for sast analyzers' do
let(:bandit_scanner) { build(:ci_reports_security_scanner, external_id: 'bandit', name: 'Bandit') }
let(:semgrep_scanner) { build(:ci_reports_security_scanner, external_id: 'semgrep', name: 'Semgrep') }
let(:identifier_bandit) { build(:ci_reports_security_identifier, external_id: 'B403', external_type: 'bandit_test_id') }
let(:identifier_cve) { build(:ci_reports_security_identifier, external_id: 'CVE-2019-123', external_type: 'cve') }
let(:identifier_semgrep) { build(:ci_reports_security_identifier, external_id: 'rules.bandit.B105', external_type: 'semgrep_id') }
let(:finding_id_1) { build(:ci_reports_security_finding, identifiers: [identifier_bandit, identifier_cve], scanner: bandit_scanner, report_type: :sast) }
let(:finding_id_2) { build(:ci_reports_security_finding, identifiers: [identifier_cve], scanner: semgrep_scanner, report_type: :sast) }
let(:finding_id_3) { build(:ci_reports_security_finding, identifiers: [identifier_semgrep], scanner: semgrep_scanner, report_type: :sast ) }
let(:bandit_report) do
build( :ci_reports_security_report,
type: :sast,
scanners: [bandit_scanner],
findings: [finding_id_1],
identifiers: finding_id_1.identifiers
)
end
let(:semgrep_report) do
build(
:ci_reports_security_report,
type: :sast,
scanners: [semgrep_scanner],
findings: [finding_id_2, finding_id_3],
identifiers: finding_id_2.identifiers + finding_id_3.identifiers
)
end
let(:custom_analyzer_report) do
build(
:ci_reports_security_report,
type: :sast,
scanners: [scanner_2],
findings: [finding_id_2_loc_1],
identifiers: finding_id_2_loc_1.identifiers
)
end
context 'when reports are gathered in an unprioritized order' do
subject(:sast_merged_report) { described_class.new(semgrep_report, bandit_report).execute }
specify { expect(sast_merged_report.scanners.values).to eql([bandit_scanner, semgrep_scanner]) }
specify { expect(sast_merged_report.findings.count).to eq(2) }
specify { expect(sast_merged_report.findings.first.identifiers).to eql([identifier_bandit, identifier_cve]) }
specify { expect(sast_merged_report.findings.last.identifiers).to contain_exactly(identifier_semgrep) }
end
context 'when a custom analyzer is completed before the known analyzers' do
subject(:sast_merged_report) { described_class.new(custom_analyzer_report, semgrep_report, bandit_report).execute }
specify { expect(sast_merged_report.scanners.values).to eql([bandit_scanner, semgrep_scanner, scanner_2]) }
specify { expect(sast_merged_report.findings.count).to eq(3) }
specify { expect(sast_merged_report.findings.last.identifiers).to match_array(finding_id_2_loc_1.identifiers) }
end end
end end
end end
...@@ -70,6 +70,33 @@ RSpec.describe Security::StoreGroupedScansService do ...@@ -70,6 +70,33 @@ RSpec.describe Security::StoreGroupedScansService do
end end
end end
context 'when the artifacts are sast' do
let_it_be(:sast_artifact_1) { create(:ee_ci_job_artifact, :sast, job: create(:ee_ci_build)) }
let_it_be(:sast_artifact_2) { create(:ee_ci_job_artifact, :sast, job: create(:ee_ci_build)) }
let_it_be(:sast_artifact_3) { create(:ee_ci_job_artifact, :sast, job: create(:ee_ci_build)) }
let(:scanner_1) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'unknown') }
let(:scanner_2) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'bandit') }
let(:scanner_3) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'semgrep') }
let(:mock_report_1) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_1) }
let(:mock_report_2) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_2) }
let(:mock_report_3) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_3) }
let(:artifacts) { [sast_artifact_1, sast_artifact_2, sast_artifact_3] }
before do
allow(sast_artifact_1).to receive(:security_report).and_return(mock_report_1)
allow(sast_artifact_2).to receive(:security_report).and_return(mock_report_2)
allow(sast_artifact_3).to receive(:security_report).and_return(mock_report_3)
end
it 'calls the Security::StoreScanService with ordered artifacts' do
store_scan_group
expect(Security::StoreScanService).to have_received(:execute).with(sast_artifact_2, empty_set, false).ordered
expect(Security::StoreScanService).to have_received(:execute).with(sast_artifact_3, empty_set, true).ordered
expect(Security::StoreScanService).to have_received(:execute).with(sast_artifact_1, empty_set, true).ordered
end
end
context 'when the artifacts are dependency_scanning' do context 'when the artifacts are dependency_scanning' do
let(:report_type) { :dependency_scanning } let(:report_type) { :dependency_scanning }
let(:build_4) { create(:ee_ci_build, name: 'Report 0') } let(:build_4) { create(:ee_ci_build, name: 'Report 0') }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment