Commit 086cb496 authored by Mikołaj Wawrzyniak's avatar Mikołaj Wawrzyniak

Merge branch '321918_validate_report_artifacts' into 'master'

Validate security report artifacts

See merge request gitlab-org/gitlab!58667
parents 9eeaf517 4836e251
......@@ -412,6 +412,46 @@ You can do it quickly by following the hyperlink given to run a new pipeline.
![Run a new pipeline](img/outdated_report_pipeline_v12_9.png)
## Security report validation
> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/321918) in GitLab 13.11.
As of GitLab 13.11, we've introduced the **optional** validation of the security report artifacts based on the
[report schemas](https://gitlab.com/gitlab-org/security-products/security-report-schemas/-/tree/master/dist).
If you enable validation, GitLab validates the report artifacts before ingesting the vulnerabilities.
This prevents ingesting broken vulnerability data into the database.
### Enable security report validation
To enable report artifacts validation, set the `VALIDATE_SCHEMA` environment variable to `"true"` for the jobs in the `.gitlab-ci.yml` file.
For example, the configuration below enables validation for only the `sast` job:
```yaml
include:
- template: Security/Dependency-Scanning.gitlab-ci.yml
- template: Security/License-Scanning.gitlab-ci.yml
- template: Security/SAST.gitlab-ci.yml
- template: Security/Secret-Detection.gitlab-ci.yml
stages:
- security-scan
dependency_scanning:
stage: security-scan
license_scanning:
stage: security-scan
sast:
stage: security-scan
variables:
VALIDATE_SCHEMA: "true"
.secret-analyzer:
stage: security-scan
```
## Troubleshooting
### Getting error message `sast job: stage parameter should be [some stage name here]`
......
......@@ -25,7 +25,7 @@ module Security
def execute
findings = requested_reports.each_with_object([]) do |(type, report), findings|
raise ParseError, 'JSON parsing failed' if report.error.is_a?(Gitlab::Ci::Parsers::Security::Common::SecurityReportParserError)
raise ParseError, 'JSON parsing failed' if report.errored?
normalized_findings = normalize_report_findings(
report.findings,
......
......@@ -10,6 +10,7 @@ module EE
extend ActiveSupport::Concern
extend ::Gitlab::Utils::Override
VALIDATE_SCHEMA_VARIABLE_NAME = 'VALIDATE_SCHEMA'
LICENSED_PARSER_FEATURES = {
sast: :sast,
secret_detection: :secret_detection,
......@@ -82,8 +83,8 @@ module EE
next unless project.feature_available?(LICENSED_PARSER_FEATURES.fetch(file_type))
parse_security_artifact_blob(security_report, blob)
rescue => e
security_report.error = e
rescue
security_report.add_error('ParsingError')
end
end
end
......@@ -161,6 +162,10 @@ module EE
variables_hash.fetch(key, default)
end
def validate_schema?
variables[VALIDATE_SCHEMA_VARIABLE_NAME]&.value&.casecmp?('true')
end
private
def variables_hash
......
......@@ -56,6 +56,8 @@ module EE
scope :api_fuzzing_reports, -> do
with_file_types(API_FUZZING_REPORT_TYPES)
end
delegate :validate_schema?, to: :job
end
class_methods do
......@@ -78,14 +80,16 @@ module EE
# parsed report regardless of the `file_type` but this will
# require more effort so we can have this security reports
# specific method here for now.
def security_report
def security_report(validate: false)
strong_memoize(:security_report) do
next unless file_type.in?(SECURITY_REPORT_FILE_TYPES)
report = ::Gitlab::Ci::Reports::Security::Report.new(file_type, job.pipeline, nil).tap do |report|
each_blob do |blob|
::Gitlab::Ci::Parsers.fabricate!(file_type, blob, report).parse!
::Gitlab::Ci::Parsers.fabricate!(file_type, blob, report, validate: (validate && validate_schema?)).parse!
end
rescue
report.add_error('ParsingError')
end
# This will remove the duplicated findings within the artifact itself
......
......@@ -40,5 +40,9 @@ module Security
scope :latest_successful_by_build, -> { joins(:build).where(ci_builds: { status: 'success', retried: [nil, false] }) }
delegate :project, :name, to: :build
def has_errors?
info&.fetch('errors', []).present?
end
end
end
......@@ -22,7 +22,7 @@ module Security
@source_reports.first.type,
@source_reports.first.pipeline,
@source_reports.first.created_at
)
).tap { |report| report.errors = source_reports.flat_map(&:errors) }
@findings = []
end
......
......@@ -50,7 +50,7 @@ module Security
# and `INFINITY` for all the other scan types. There is no problem with
# calling this method for all the scan types to get rid of branching.
def scanner_order_for(artifact)
MergeReportsService::ANALYZER_ORDER.fetch(artifact.security_report.primary_scanner&.external_id, Float::INFINITY)
MergeReportsService::ANALYZER_ORDER.fetch(artifact.security_report(validate: true).primary_scanner&.external_id, Float::INFINITY)
end
def store_scan_for(artifact, deduplicate)
......
......@@ -19,6 +19,8 @@ module Security
end
def execute
return deduplicate if security_scan.has_errors?
StoreFindingsMetadataService.execute(security_scan, security_report)
deduplicate_findings? ? update_deduplicated_findings : register_finding_keys
......@@ -31,7 +33,9 @@ module Security
delegate :security_report, :project, to: :artifact, private: true
def security_scan
@security_scan ||= Security::Scan.safe_find_or_create_by!(build: artifact.job, scan_type: artifact.file_type)
@security_scan ||= Security::Scan.safe_find_or_create_by!(build: artifact.job, scan_type: artifact.file_type) do |scan|
scan.info['errors'] = security_report.errors.map(&:stringify_keys) if security_report.errored?
end
end
def deduplicate_findings?
......
---
title: Introduce security report schema validation
merge_request: 58667
author:
type: added
......@@ -7,17 +7,20 @@ module Gitlab
class Common
SecurityReportParserError = Class.new(Gitlab::Ci::Parsers::ParserError)
def self.parse!(json_data, report, vulnerability_finding_signatures_enabled = false)
new(json_data, report, vulnerability_finding_signatures_enabled).parse!
def self.parse!(json_data, report, vulnerability_finding_signatures_enabled = false, validate: false)
new(json_data, report, vulnerability_finding_signatures_enabled, validate: validate).parse!
end
def initialize(json_data, report, vulnerability_finding_signatures_enabled = false)
def initialize(json_data, report, vulnerability_finding_signatures_enabled = false, validate: false)
@json_data = json_data
@report = report
@validate = validate
@vulnerability_finding_signatures_enabled = vulnerability_finding_signatures_enabled
end
def parse!
return report_data unless valid?
raise SecurityReportParserError, "Invalid report format" unless report_data.is_a?(Hash)
create_scanner
......@@ -34,7 +37,19 @@ module Gitlab
private
attr_reader :json_data, :report
attr_reader :json_data, :report, :validate
def valid?
return true if !validate || schema_validator.valid?
schema_validator.errors.each { |error| report.add_error('Schema', error) }
false
end
def schema_validator
@schema_validator ||= ::Gitlab::Ci::Parsers::Security::Validators::SchemaValidator.new(report.type, report_data)
end
def report_data
@report_data ||= Gitlab::Json.parse!(json_data)
......
# frozen_string_literal: true
module Gitlab
module Ci
module Parsers
module Security
module Validators
class SchemaValidator
class Schema
ROOT_PATH = File.join(__dir__, 'schemas')
def initialize(report_type)
@report_type = report_type
end
delegate :validate, to: :schemer
private
attr_reader :report_type
def schemer
JSONSchemer.schema(pathname)
end
def pathname
Pathname.new(schema_path)
end
def schema_path
File.join(ROOT_PATH, file_name)
end
def file_name
"#{report_type}.json"
end
end
def initialize(report_type, report_data)
@report_type = report_type
@report_data = report_data
end
def valid?
errors.empty?
end
def errors
@errors ||= schema.validate(report_data).map { |error| JSONSchemer::Errors.pretty(error) }
end
private
attr_reader :report_type, :report_data
def schema
Schema.new(report_type)
end
end
end
end
end
end
end
This diff is collapsed.
This diff is collapsed.
......@@ -6,7 +6,7 @@ module Gitlab
module Security
class Report
attr_reader :created_at, :type, :pipeline, :findings, :scanners, :identifiers
attr_accessor :scan, :scanned_resources, :error
attr_accessor :scan, :scanned_resources, :errors
delegate :project_id, to: :pipeline
......@@ -18,14 +18,19 @@ module Gitlab
@scanners = {}
@identifiers = {}
@scanned_resources = []
@errors = []
end
def commit_sha
pipeline.sha
end
def add_error(type, message = 'An unexpected error happened!')
errors << { type: type, message: message }
end
def errored?
error.present?
errors.present?
end
def add_scanner(scanner)
......
......@@ -22,6 +22,77 @@ RSpec.describe Gitlab::Ci::Parsers::Security::Common do
artifact.each_blob { |blob| described_class.parse!(blob, report, vulnerability_finding_signatures_enabled) }
end
describe 'schema validation' do
let(:validator_class) { Gitlab::Ci::Parsers::Security::Validators::SchemaValidator }
let(:parser) { described_class.new('{}', report, vulnerability_finding_signatures_enabled, validate: validate) }
subject(:parse_report) { parser.parse! }
before do
allow(validator_class).to receive(:new).and_call_original
end
context 'when the validate flag is set as `false`' do
let(:validate) { false }
it 'does not run the validation logic' do
parse_report
expect(validator_class).not_to have_received(:new)
end
end
context 'when the validate flag is set as `true`' do
let(:validate) { true }
let(:valid?) { false }
before do
allow_next_instance_of(validator_class) do |instance|
allow(instance).to receive(:valid?).and_return(valid?)
allow(instance).to receive(:errors).and_return(['foo'])
end
allow(parser).to receive_messages(create_scanner: true, create_scan: true, collate_remediations: [])
end
it 'instantiates the validator with correct params' do
parse_report
expect(validator_class).to have_received(:new).with(report.type, {})
end
context 'when the report data is not valid according to the schema' do
it 'adds errors to the report' do
expect { parse_report }.to change { report.errors }.from([]).to([{ message: 'foo', type: 'Schema' }])
end
it 'does not try to create report entities' do
parse_report
expect(parser).not_to have_received(:create_scanner)
expect(parser).not_to have_received(:create_scan)
expect(parser).not_to have_received(:collate_remediations)
end
end
context 'when the report data is valid according to the schema' do
let(:valid?) { true }
it 'does not add errors to the report' do
expect { parse_report }.not_to change { report.errors }.from([])
end
it 'keeps the execution flow as normal' do
parse_report
expect(parser).to have_received(:create_scanner)
expect(parser).to have_received(:create_scan)
expect(parser).to have_received(:collate_remediations)
end
end
end
end
describe 'parsing finding.name' do
let(:artifact) { build(:ee_ci_job_artifact, :common_security_report_with_blank_names) }
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Ci::Parsers::Security::Validators::SchemaValidator do
using RSpec::Parameterized::TableSyntax
where(:report_type, :expected_errors, :valid_data) do
:container_scanning | ['root is missing required keys: vulnerabilities'] | { 'version' => '10.0.0', 'vulnerabilities' => [] }
:coverage_fuzzing | ['root is missing required keys: vulnerabilities'] | { 'version' => '10.0.0', 'vulnerabilities' => [] }
:dast | ['root is missing required keys: vulnerabilities'] | { 'version' => '10.0.0', 'vulnerabilities' => [] }
:dependency_scanning | ['root is missing required keys: dependency_files, vulnerabilities'] | { 'version' => '10.0.0', 'vulnerabilities' => [], 'dependency_files' => [] }
:sast | ['root is missing required keys: vulnerabilities'] | { 'version' => '10.0.0', 'vulnerabilities' => [] }
:secret_detection | ['root is missing required keys: vulnerabilities'] | { 'version' => '10.0.0', 'vulnerabilities' => [] }
end
with_them do
let(:validator) { described_class.new(report_type, report_data) }
describe '#valid?' do
subject { validator.valid? }
context 'when given data is invalid according to the schema' do
let(:report_data) { {} }
it { is_expected.to be_falsey }
end
context 'when given data is valid according to the schema' do
let(:report_data) { valid_data }
it { is_expected.to be_truthy }
end
end
describe '#errors' do
let(:report_data) { { 'version' => '10.0.0' } }
subject { validator.errors }
it { is_expected.to eq(expected_errors) }
end
end
end
......@@ -139,4 +139,38 @@ RSpec.describe Gitlab::Ci::Reports::Security::Report do
it { is_expected.to eq(scanner_1) }
end
describe '#add_error' do
context 'when the message is not given' do
it 'adds a new error to report with the generic error message' do
expect { report.add_error('foo') }.to change { report.errors }
.from([])
.to([{ type: 'foo', message: 'An unexpected error happened!' }])
end
end
context 'when the message is given' do
it 'adds a new error to report' do
expect { report.add_error('foo', 'bar') }.to change { report.errors }
.from([])
.to([{ type: 'foo', message: 'bar' }])
end
end
end
describe 'errored?' do
subject { report.errored? }
context 'when the report does not have any errors' do
it { is_expected.to be_falsey }
end
context 'when the report has errors' do
before do
report.add_error('foo', 'bar')
end
it { is_expected.to be_truthy }
end
end
end
......@@ -639,4 +639,34 @@ RSpec.describe Ci::Build do
end
end
end
describe '#validate_schema?' do
let(:ci_build) { build(:ci_build) }
subject { ci_build.validate_schema? }
before do
ci_build.yaml_variables = variables
end
context 'when the yaml variables does not have the configuration' do
let(:variables) { [] }
it { is_expected.to be_falsey }
end
context 'when the yaml variables has the configuration' do
context 'when the configuration is set as `false`' do
let(:variables) { [{ key: 'VALIDATE_SCHEMA', value: 'false' }] }
it { is_expected.to be_falsey }
end
context 'when the configuration is set as `true`' do
let(:variables) { [{ key: 'VALIDATE_SCHEMA', value: 'true' }] }
it { is_expected.to be_truthy }
end
end
end
end
......@@ -6,6 +6,8 @@ RSpec.describe Ci::JobArtifact do
using RSpec::Parameterized::TableSyntax
include EE::GeoHelpers
it { is_expected.to delegate_method(:validate_schema?).to(:job) }
describe '#destroy' do
let_it_be(:primary) { create(:geo_node, :primary) }
let_it_be(:secondary) { create(:geo_node) }
......@@ -223,7 +225,8 @@ RSpec.describe Ci::JobArtifact do
describe '#security_report' do
let(:job_artifact) { create(:ee_ci_job_artifact, :sast) }
let(:security_report) { job_artifact.security_report }
let(:validate) { false }
let(:security_report) { job_artifact.security_report(validate: validate) }
subject(:findings_count) { security_report.findings.length }
......@@ -248,6 +251,44 @@ RSpec.describe Ci::JobArtifact do
it { is_expected.to be(security_report?) }
end
end
context 'when the parsing fails' do
let(:job_artifact) { create(:ee_ci_job_artifact, :sast) }
let(:errors) { security_report.errors }
before do
allow(::Gitlab::Ci::Parsers).to receive(:fabricate!).and_raise(:foo)
end
it 'returns an errored report instance' do
expect(errors).to eql([{ type: 'ParsingError', message: 'An unexpected error happened!' }])
end
end
describe 'schema validation' do
where(:validate, :build_is_subject_to_validation?, :expected_validate_flag) do
false | false | false
false | true | false
true | false | false
true | true | true
end
with_them do
let(:mock_parser) { double(:parser, parse!: true) }
let(:expected_parser_args) { ['sast', instance_of(String), instance_of(::Gitlab::Ci::Reports::Security::Report), validate: expected_validate_flag] }
before do
allow(job_artifact.job).to receive(:validate_schema?).and_return(build_is_subject_to_validation?)
allow(::Gitlab::Ci::Parsers).to receive(:fabricate!).and_return(mock_parser)
end
it 'calls the parser with the correct arguments' do
security_report
expect(::Gitlab::Ci::Parsers).to have_received(:fabricate!).with(*expected_parser_args)
end
end
end
end
describe '#clear_security_report' do
......
......@@ -44,6 +44,34 @@ RSpec.describe Security::Scan do
it { is_expected.to delegate_method(:name).to(:build) }
end
describe '#has_errors?' do
let(:scan) { build(:security_scan, info: info) }
subject { scan.has_errors? }
context 'when the info attribute is nil' do
let(:info) { nil }
it { is_expected.to be_falsey }
end
context 'when the info attribute presents' do
let(:info) { { errors: errors } }
context 'when there is no error' do
let(:errors) { [] }
it { is_expected.to be_falsey }
end
context 'when there are errors' do
let(:errors) { [{ type: 'Foo', message: 'Bar' }] }
it { is_expected.to be_truthy }
end
end
end
describe '.by_scan_types' do
let!(:sast_scan) { create(:security_scan, scan_type: :sast) }
let!(:dast_scan) { create(:security_scan, scan_type: :dast) }
......
......@@ -136,6 +136,17 @@ RSpec.describe Security::MergeReportsService, '#execute' do
subject(:merged_report) { merge_service.execute }
describe 'errors on target report' do
subject { merged_report.errors }
before do
report_1.add_error('foo', 'bar')
report_2.add_error('zoo', 'baz')
end
it { is_expected.to eq([{ type: 'foo', message: 'bar' }, { type: 'zoo', message: 'baz' }]) }
end
it 'copies scanners into target report and eliminates duplicates' do
expect(merged_report.scanners.values).to contain_exactly(scanner_1, scanner_2, scanner_3)
end
......
......@@ -60,6 +60,25 @@ RSpec.describe Security::StoreGroupedScansService do
allow(Security::StoreScanService).to receive(:execute).and_return(true)
end
context 'schema validation' do
let(:mock_scanner) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'unknown') }
let(:mock_report) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: mock_scanner) }
before do
allow(artifact_1).to receive(:security_report).and_return(mock_report)
allow(artifact_2).to receive(:security_report).and_return(mock_report)
allow(artifact_3).to receive(:security_report).and_return(mock_report)
end
it 'accesses the validated security reports' do
store_scan_group
expect(artifact_1).to have_received(:security_report).with(validate: true)
expect(artifact_2).to have_received(:security_report).with(validate: true)
expect(artifact_3).to have_received(:security_report).with(validate: true)
end
end
context 'when the artifacts are not dependency_scanning' do
it 'calls the Security::StoreScanService with ordered artifacts' do
store_scan_group
......
......@@ -57,6 +57,22 @@ RSpec.describe Security::StoreScanService do
known_keys.add(finding_key)
end
context 'when the report has some errors' do
before do
artifact.security_report.errors << { 'type' => 'foo', 'message' => 'bar' }
end
it 'does not call the `Security::StoreFindingsMetadataService` and returns false' do
expect(store_scan).to be(false)
expect(Security::StoreFindingsMetadataService).not_to have_received(:execute)
end
end
context 'when the report does not have any errors' do
before do
artifact.security_report.errors.clear
end
it 'calls the `Security::StoreFindingsMetadataService` to store findings' do
store_scan
......@@ -144,4 +160,5 @@ RSpec.describe Security::StoreScanService do
end
end
end
end
end
......@@ -15,8 +15,8 @@ module Gitlab
}
end
def self.fabricate!(file_type, *args)
parsers.fetch(file_type.to_sym).new(*args)
def self.fabricate!(file_type, *args, **kwargs)
parsers.fetch(file_type.to_sym).new(*args, **kwargs)
rescue KeyError
raise ParserNotFoundError, "Cannot find any parser matching file type '#{file_type}'"
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment