Commit d68a1592 authored by Kerri Miller's avatar Kerri Miller

Merge branch 'rf-improve_vuln_tracking-backend_store_only' into 'master'

Improve Vulnerability Tracking: Store Fingerprints

See merge request gitlab-org/gitlab!55173
parents 0b61d6a7 2187f459
---
title: 'Improve Vulnerability Tracking: Store Fingerprints'
merge_request: 55173
author:
type: added
# frozen_string_literal: true
class ChangeFindingFingerprintEnum < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def up
change_column :vulnerability_finding_fingerprints, :algorithm_type, :integer, limit: 2
end
def down
change_column :vulnerability_finding_fingerprints, :algorithm_type, :integer
end
end
063c97800eec5bfc7f6879dbe559fa39952295d5ba947cf44ad03ac23212e924
\ No newline at end of file
......@@ -18258,7 +18258,7 @@ CREATE TABLE vulnerability_finding_fingerprints (
finding_id bigint NOT NULL,
created_at timestamp with time zone NOT NULL,
updated_at timestamp with time zone NOT NULL,
algorithm_type integer NOT NULL,
algorithm_type smallint NOT NULL,
fingerprint_sha256 bytea NOT NULL
);
......@@ -34,6 +34,8 @@ module Vulnerabilities
has_many :finding_pipelines, class_name: 'Vulnerabilities::FindingPipeline', inverse_of: :finding, foreign_key: 'occurrence_id'
has_many :pipelines, through: :finding_pipelines, class_name: 'Ci::Pipeline'
has_many :fingerprints, class_name: 'Vulnerabilities::FindingFingerprint', inverse_of: :finding
serialize :config_options, Serializers::JSON # rubocop:disable Cop/ActiveRecordSerialize
attr_writer :sha
......
# frozen_string_literal: true
module Vulnerabilities
class FindingFingerprint < ApplicationRecord
self.table_name = 'vulnerability_finding_fingerprints'
include BulkInsertSafe
belongs_to :finding, foreign_key: 'finding_id', inverse_of: :fingerprints, class_name: 'Vulnerabilities::Finding'
enum algorithm_type: { hash: 1, location: 2, scope_offset: 3 }, _prefix: :algorithm
validates :finding, presence: true
end
end
......@@ -57,6 +57,7 @@ module Security
update_vulnerability_finding(vulnerability_finding, vulnerability_params)
reset_remediations_for(vulnerability_finding, finding)
update_finding_fingerprints(finding, vulnerability_finding)
# The maximum number of identifiers is not used in validation
# we just want to ignore the rest if a finding has more than that.
......@@ -170,6 +171,43 @@ module Security
end
# rubocop: enable CodeReuse/ActiveRecord
def update_finding_fingerprints(finding, vulnerability_finding)
to_update = {}
to_create = []
poro_fingerprints = finding.fingerprints.index_by(&:algorithm_type)
vulnerability_finding.fingerprints.each do |fingerprint|
# NOTE: index_by takes the last entry if there are duplicates of the same algorithm, which should never occur.
poro_fingerprint = poro_fingerprints[fingerprint.algorithm_type]
# We're no longer generating these types of fingerprints. Since
# we're updating the persisted vulnerability, no need to do anything
# with these fingerprints now. We will track growth with
# https://gitlab.com/gitlab-org/gitlab/-/issues/322186
next if poro_fingerprint.nil?
poro_fingerprints.delete(fingerprint.algorithm_type)
to_update[fingerprint.id] = poro_fingerprint.to_h
end
# any remaining poro fingerprints left are new
poro_fingerprints.values.each do |poro_fingerprint|
attributes = poro_fingerprint.to_h.merge(finding_id: vulnerability_finding.id)
to_create << ::Vulnerabilities::FindingFingerprint.new(attributes: attributes, created_at: Time.zone.now, updated_at: Time.zone.now)
end
::Vulnerabilities::FindingFingerprint.transaction do
if to_update.count > 0
::Vulnerabilities::FindingFingerprint.update(to_update.keys, to_update.values)
end
if to_create.count > 0
::Vulnerabilities::FindingFingerprint.bulk_insert!(to_create)
end
end
end
def create_vulnerability(vulnerability_finding, pipeline)
if vulnerability_finding.vulnerability_id
Vulnerabilities::UpdateService.new(vulnerability_finding.project, pipeline.user, finding: vulnerability_finding, resolved_on_default_branch: false).execute
......
......@@ -71,11 +71,16 @@ module Gitlab
end
end
def tracking_data(data)
data['tracking']
end
def create_vulnerability(data)
identifiers = create_identifiers(data['identifiers'])
links = create_links(data['links'])
location = create_location(data['location'] || {})
remediations = create_remediations(data['remediations'])
fingerprints = create_fingerprints(tracking_data(data))
report.add_finding(
::Gitlab::Ci::Reports::Security::Finding.new(
......@@ -93,7 +98,36 @@ module Gitlab
remediations: remediations,
raw_metadata: data.to_json,
metadata_version: report_version,
details: data['details'] || {}))
details: data['details'] || {},
fingerprints: fingerprints))
end
def create_fingerprints(tracking)
return [] if tracking.nil? || tracking['items'].nil?
fingerprint_algorithms = Hash.new { |hash, key| hash[key] = [] }
tracking['items'].each do |item|
next unless item.key?('fingerprints')
item['fingerprints'].each do |fingerprint|
alg = fingerprint['algorithm']
fingerprint_algorithms[alg] << fingerprint['value']
end
end
fingerprint_algorithms.map do |algorithm, values|
value = values.join('|')
begin
fingerprint = ::Gitlab::Ci::Reports::Security::FindingFingerprint.new(
algorithm_type: algorithm,
fingerprint_value: value
)
fingerprint.valid? ? fingerprint : nil
rescue ArgumentError => e
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
nil
end
end.compact
end
def create_scan
......
......@@ -24,10 +24,11 @@ module Gitlab
attr_reader :uuid
attr_reader :remediations
attr_reader :details
attr_reader :fingerprints
delegate :file_path, :start_line, :end_line, to: :location
def initialize(compare_key:, identifiers:, links: [], remediations: [], location:, metadata_version:, name:, raw_metadata:, report_type:, scanner:, scan:, uuid:, confidence: nil, severity: nil, details: {}) # rubocop:disable Metrics/ParameterLists
def initialize(compare_key:, identifiers:, links: [], remediations: [], location:, metadata_version:, name:, raw_metadata:, report_type:, scanner:, scan:, uuid:, confidence: nil, severity: nil, details: {}, fingerprints: []) # rubocop:disable Metrics/ParameterLists
@compare_key = compare_key
@confidence = confidence
@identifiers = identifiers
......@@ -43,6 +44,7 @@ module Gitlab
@uuid = uuid
@remediations = remediations
@details = details
@fingerprints = fingerprints
@project_fingerprint = generate_project_fingerprint
end
......
# frozen_string_literal: true
module Gitlab
module Ci
module Reports
module Security
class FindingFingerprint
attr_accessor :algorithm_type, :fingerprint_value
def initialize(params = {})
@algorithm_type = params.dig(:algorithm_type)
@fingerprint_value = params.dig(:fingerprint_value)
end
def fingerprint_sha256
Digest::SHA1.digest(fingerprint_value)
end
def to_h
{
algorithm_type: algorithm_type,
fingerprint_sha256: fingerprint_sha256
}
end
def valid?
::Vulnerabilities::FindingFingerprint.algorithm_types.key?(algorithm_type)
end
end
end
end
end
end
# frozen_string_literal: true
FactoryBot.define do
factory :vulnerabilities_finding_fingerprint, class: 'Vulnerabilities::FindingFingerprint' do
finding factory: :vulnerabilities_finding
algorithm_type { ::Vulnerabilities::FindingFingerprint.algorithm_types[:hash] }
fingerprint_sha256 { ::Digest::SHA1.digest(SecureRandom.hex(50)) }
end
end
......@@ -9,12 +9,24 @@ RSpec.describe Gitlab::Ci::Parsers::Security::Common do
let(:artifact) { build(:ee_ci_job_artifact, :common_security_report) }
let(:report) { Gitlab::Ci::Reports::Security::Report.new(artifact.file_type, pipeline, 2.weeks.ago) }
let(:location) { ::Gitlab::Ci::Reports::Security::Locations::DependencyScanning.new(file_path: 'yarn/yarn.lock', package_version: 'v2', package_name: 'saml2') }
let(:tracking_data) do
{
'type' => 'source',
'items' => [
'fingerprints' => [
{ 'algorithm' => 'hash', 'value' => 'hash_value' },
{ 'algorithm' => 'location', 'value' => 'location_value' },
{ 'algorithm' => 'scope_offset', 'value' => 'scope_offset_value' }
]
]
}
end
before do
allow_next_instance_of(described_class) do |parser|
allow(parser).to receive(:create_location).and_return(location)
allow(parser).to receive(:tracking_data).and_return(tracking_data)
end
artifact.each_blob { |blob| described_class.parse!(blob, report) }
end
......@@ -187,5 +199,36 @@ RSpec.describe Gitlab::Ci::Parsers::Security::Common do
expect(finding_uuids).to match_array(expected_uuids)
end
end
describe 'parsing tracking' do
context 'with valid tracking information' do
it 'creates fingerprints for each algorithm' do
finding = report.findings.first
expect(finding.fingerprints.size).to eq(3)
expect(finding.fingerprints.map(&:algorithm_type).to_set).to eq(Set['hash', 'location', 'scope_offset'])
end
end
context 'with invalid tracking information' do
let(:tracking_data) do
{
'type' => 'source',
'items' => [
'fingerprints' => [
{ 'algorithm' => 'hash', 'value' => 'hash_value' },
{ 'algorithm' => 'location', 'value' => 'location_value' },
{ 'algorithm' => 'INVALID', 'value' => 'scope_offset_value' }
]
]
}
end
it 'ignores invalid algorithm types' do
finding = report.findings.first
expect(finding.fingerprints.size).to eq(2)
expect(finding.fingerprints.map(&:algorithm_type).to_set).to eq(Set['hash', 'location'])
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Ci::Reports::Security::FindingFingerprint do
subject { described_class.new(params.with_indifferent_access) }
let(:params) do
{
algorithm_type: 'hash',
fingerprint_value: 'FINGERPRINT'
}
end
describe '#initialize' do
context 'when a supported algorithm type is given' do
it 'allows itself to be created' do
expect(subject.algorithm_type).to eq(params[:algorithm_type])
expect(subject.fingerprint_value).to eq(params[:fingerprint_value])
end
end
end
describe '#to_h' do
it 'returns a hash representation of the fingerprint' do
expect(subject.to_h).to eq(
algorithm_type: params[:algorithm_type],
fingerprint_sha256: Digest::SHA1.digest(params[:fingerprint_value])
)
end
end
describe '#valid?' do
context 'when supported algorithm_type is given' do
it 'is valid' do
expect(subject.valid?).to eq(true)
end
end
context 'when an unsupported algorithm_type is given' do
let(:params) do
{
algorithm_type: 'INVALID',
fingerprint_value: 'FINGERPRINT'
}
end
it 'is not valid' do
expect(subject.valid?).to eq(false)
end
end
end
end
......@@ -30,11 +30,11 @@ RSpec.describe Security::StoreReportService, '#execute' do
using RSpec::Parameterized::TableSyntax
where(:case_name, :trait, :scanners, :identifiers, :findings, :finding_identifiers, :finding_pipelines, :remediations) do
'with SAST report' | :sast | 3 | 17 | 33 | 39 | 33 | 0
'with exceeding identifiers' | :with_exceeding_identifiers | 1 | 20 | 1 | 20 | 1 | 0
'with Dependency Scanning report' | :dependency_scanning_remediation | 1 | 3 | 2 | 3 | 2 | 1
'with Container Scanning report' | :container_scanning | 1 | 8 | 8 | 8 | 8 | 0
where(:case_name, :trait, :scanners, :identifiers, :findings, :finding_identifiers, :finding_pipelines, :remediations, :fingerprints) do
'with SAST report' | :sast | 3 | 17 | 33 | 39 | 33 | 0 | 2
'with exceeding identifiers' | :with_exceeding_identifiers | 1 | 20 | 1 | 20 | 1 | 0 | 0
'with Dependency Scanning report' | :dependency_scanning_remediation | 1 | 3 | 2 | 3 | 2 | 1 | 0
'with Container Scanning report' | :container_scanning | 1 | 8 | 8 | 8 | 8 | 0 | 0
end
with_them do
......@@ -65,6 +65,10 @@ RSpec.describe Security::StoreReportService, '#execute' do
it 'inserts all vulnerabilities' do
expect { subject }.to change { Vulnerability.count }.by(findings)
end
it 'inserts all fingerprints' do
expect { subject }.to change { Vulnerabilities::FindingFingerprint.count }.by(fingerprints)
end
end
context 'when report data includes all raw_metadata' do
......@@ -116,6 +120,13 @@ RSpec.describe Security::StoreReportService, '#execute' do
let(:new_build) { create(:ci_build, pipeline: new_pipeline) }
let(:new_pipeline) { create(:ci_pipeline, project: project) }
let(:new_report) { new_pipeline.security_reports.get_report(report_type.to_s, artifact) }
let(:existing_fingerprint) { create(:vulnerabilities_finding_fingerprint, finding: finding) }
let(:unsupported_fingerprint) do
create(:vulnerabilities_finding_fingerprint,
finding: finding,
algorithm_type: ::Vulnerabilities::FindingFingerprint.algorithm_types[:location])
end
let(:trait) { :sast }
let!(:finding) do
......@@ -195,6 +206,31 @@ RSpec.describe Security::StoreReportService, '#execute' do
expect(finding.reload).to have_attributes(severity: 'medium', name: 'Probable insecure usage of temp file/directory.')
end
it 'updates fingerprints to match new values' do
existing_fingerprint
unsupported_fingerprint
expect(finding.fingerprints.count).to eq(2)
fingerprint_algs = finding.fingerprints.map(&:algorithm_type).sort
expect(fingerprint_algs).to eq(%w[hash location])
subject
finding.reload
existing_fingerprint.reload
# check that unsupported algorithm is not deleted
expect(finding.fingerprints.count).to eq(3)
fingerprint_algs = finding.fingerprints.sort.map(&:algorithm_type)
expect(fingerprint_algs).to eq(%w[hash location scope_offset])
# check that the existing hash fingerprint was updated/reused
expect(existing_fingerprint.id).to eq(finding.fingerprints.min.id)
# check that the unsupported fingerprint was not deleted
expect(::Vulnerabilities::FindingFingerprint.exists?(unsupported_fingerprint.id)).to eq(true)
end
it 'updates existing vulnerability with new data' do
subject
......
......@@ -28,7 +28,21 @@
"file": "python/hardcoded/hardcoded-tmp.py",
"line": 1,
"url": "https://docs.openstack.org/bandit/latest/plugins/b108_hardcoded_tmp_directory.html",
"tool": "bandit"
"tool": "bandit",
"tracking": {
"type": "source",
"items": [
{
"file": "python/hardcoded/hardcoded-tmp.py",
"start_line": 1,
"end_line": 1,
"fingerprints": [
{ "algorithm": "hash", "value": "HASHVALUE" },
{ "algorithm": "scope_offset", "value": "python/hardcoded/hardcoded-tmp.py:ClassA:method_b:2" }
]
}
]
}
},
{
"category": "sast",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment