Commit 981859f8 authored by Adam Hegyi's avatar Adam Hegyi

Merge branch 'remove-duplicated-cs-findings-without-vid' into 'master'

Remove duplicate broken container scanning findings

See merge request gitlab-org/gitlab!42609
parents 6366318b 54952404
# frozen_string_literal: true
class RemoveDuplicatedCsFindingsWithoutVulnerabilityId < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
BATCH_SIZE = 1_000
INTERVAL = 2.minutes
# 1_500 records will be deleted
def up
return unless Gitlab.com?
migration = Gitlab::BackgroundMigration::RemoveDuplicatedCsFindingsWithoutVulnerabilityId
migration_name = migration.to_s.demodulize
relation = migration::Finding.container_scanning.with_broken_fingerprint.where(vulnerability_id: nil)
queue_background_migration_jobs_by_range_at_intervals(relation,
migration_name,
INTERVAL,
batch_size: BATCH_SIZE)
end
def down
# no-op
end
end
8d9e75f7c6344b03cb740fa691fcbb5bea1751802741229158701bc1af975897
\ No newline at end of file
---
title: Remove duplicate broken container scanning findings
merge_request: 42609
author:
type: other
# frozen_string_literal: true
module EE
module Gitlab
module BackgroundMigration
module RemoveDuplicatedCsFindingsWithoutVulnerabilityId
extend ::Gitlab::Utils::Override
class Finding < ActiveRecord::Base
include ::ShaAttribute
include ::EachBatch
BROKEN_FINGERPRINT_LENGTH = 40
scope :with_broken_fingerprint, -> { where("length(location_fingerprint) = ?", BROKEN_FINGERPRINT_LENGTH) }
self.table_name = 'vulnerability_occurrences'
REPORT_TYPES = {
container_scanning: 2
}.with_indifferent_access.freeze
enum report_type: REPORT_TYPES
sha_attribute :location_fingerprint
end
override :perform
def perform(start_id, stop_id)
Finding.select(:id, :project_id, :primary_identifier_id, :location_fingerprint, :scanner_id)
.container_scanning
.where(id: start_id..stop_id, vulnerability_id: nil)
.with_broken_fingerprint
.each do |finding|
fixed_fingerprint = ::Gitlab::Database::ShaAttribute.new.serialize(finding.location_fingerprint).to_s
duplicate = Finding.container_scanning
.where(project_id: finding.project_id,
primary_identifier_id: finding.primary_identifier_id,
scanner_id: finding.scanner_id,
location_fingerprint: fixed_fingerprint,
vulnerability_id: nil)
.where.not(id: finding.id).first
next if duplicate.blank?
Finding.transaction do
duplicate.delete
finding.update(location_fingerprint: fixed_fingerprint)
end
end
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::RemoveDuplicatedCsFindingsWithoutVulnerabilityId, :migration, schema: 20200917135802 do
let(:migration) { 'RemoveDuplicatedCsFindingsWithoutVulnerabilityId'}
let(:namespaces) { table(:namespaces) }
let(:notes) { table(:notes) }
let(:group) { namespaces.create!(name: 'foo', path: 'foo') }
let(:projects) { table(:projects) }
let(:findings) { table(:vulnerability_occurrences) }
let(:scanners) { table(:vulnerability_scanners) }
let(:identifiers) { table(:vulnerability_identifiers) }
let(:finding_identifiers) { table(:vulnerability_occurrence_identifiers) }
let!(:project) { projects.create!(id: 12058473, namespace_id: group.id, name: 'gitlab', path: 'gitlab') }
let!(:scanner) do
scanners.create!(id: 6, project_id: project.id, external_id: 'clair', name: 'Security Scanner')
end
it 'removes duplicate findings and vulnerabilities' do
allow(::Gitlab).to receive(:com?).and_return(true)
ids = [231411, 231412, 231413, 231500, 231600, 231700, 231800]
fingerprints = %w(
6c871440eb9f7618b9aef25e5246acddff6ed7a1
9d1a47927875f1aee1e2b9f16c25a8ff7586f1a6
d7da2cc109c18d890ab239e833524d451cc45246
6c871440eb9f7618b9aef25e5246acddff6ed7a1
9d1a47927875f1aee1e2b9f16c25a8ff7586f1a6
d7da2cc109c18d890ab239e833524d451cc45246
d7da2cc109c18d890ab239e833524d453cd45246
)
expected_fingerprints = %w(
6c871440eb9f7618b9aef25e5246acddff6ed7a1
9d1a47927875f1aee1e2b9f16c25a8ff7586f1a6
d7da2cc109c18d890ab239e833524d451cc45246
d7da2cc109c18d890ab239e833524d453cd45246
)
7.times.each { |x| identifiers.create!(vulnerability_identifer_params(x, project.id)) }
3.times.each { |x| findings.create!(finding_params(x, project.id).merge({ id: ids[x], location_fingerprint: fingerprints[x], vulnerability_id: nil })) }
findings.create!(finding_params(0, project.id).merge({ id: ids[3], location_fingerprint: Gitlab::Database::ShaAttribute.new.serialize(fingerprints[3]).to_s, vulnerability_id: nil }))
findings.create!(finding_params(1, project.id).merge({ id: ids[4], location_fingerprint: Gitlab::Database::ShaAttribute.new.serialize(fingerprints[4]).to_s, vulnerability_id: nil }))
findings.create!(finding_params(2, project.id).merge({ id: ids[5], location_fingerprint: Gitlab::Database::ShaAttribute.new.serialize(fingerprints[5]).to_s, vulnerability_id: nil }))
findings.create!(finding_params(3, project.id).merge({ id: ids[6], location_fingerprint: Gitlab::Database::ShaAttribute.new.serialize(fingerprints[6]).to_s, vulnerability_id: nil }))
7.times.each { |x| finding_identifiers.create!(occurrence_id: ids[x], identifier_id: x ) }
expect(finding_identifiers.all.count). to eq(7)
described_class.new.perform(231411, 231413)
expect(findings.ids).to match_array([231411, 231412, 231413, 231800])
expect(findings.where(report_type: 2).count). to eq(4)
expect(finding_identifiers.all.count). to eq(4)
location_fingerprints = findings.pluck(:location_fingerprint).flat_map { |x| Gitlab::Database::ShaAttribute.new.deserialize(x) }
expect(location_fingerprints).to match_array(expected_fingerprints)
end
def vulnerability_identifer_params(id, project_id)
{
id: id,
project_id: project_id,
fingerprint: 'd432c2ad2953e8bd587a3a43b3ce309b5b0154c' + id.to_s,
external_type: 'SECURITY_ID',
external_id: 'SECURITY_0',
name: 'SECURITY_IDENTIFIER 0'
}
end
def finding_params(primary_identifier_id, project_id)
attrs = attributes_for(:vulnerabilities_finding)
{
severity: 0,
confidence: 5,
report_type: 2,
project_id: project_id,
scanner_id: 6,
primary_identifier_id: primary_identifier_id,
project_fingerprint: attrs[:project_fingerprint],
location_fingerprint: Digest::SHA1.hexdigest(SecureRandom.hex(10)),
uuid: SecureRandom.uuid,
name: attrs[:name],
metadata_version: '1.3',
raw_metadata: attrs[:raw_metadata]
}
end
def create_identifier(number_of)
(1..number_of).each do |identifier_id|
identifiers.create!(id: identifier_id,
project_id: 123,
fingerprint: 'd432c2ad2953e8bd587a3a43b3ce309b5b0154c' + identifier_id.to_s,
external_type: 'SECURITY_ID',
external_id: 'SECURITY_0',
name: 'SECURITY_IDENTIFIER 0')
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe RemoveDuplicatedCsFindingsWithoutVulnerabilityId, :migration do
let(:migration) { 'RemoveDuplicatedCsFindingsWithoutVulnerabilityId'}
let(:namespaces) { table(:namespaces) }
let(:notes) { table(:notes) }
let(:group) { namespaces.create!(name: 'foo', path: 'foo') }
let(:projects) { table(:projects) }
let(:findings) { table(:vulnerability_occurrences) }
let(:scanners) { table(:vulnerability_scanners) }
let(:identifiers) { table(:vulnerability_identifiers) }
let(:finding_identifiers) { table(:vulnerability_occurrence_identifiers) }
let!(:project) { projects.create!(id: 12058473, namespace_id: group.id, name: 'gitlab', path: 'gitlab') }
let!(:scanner) do
scanners.create!(id: 6, project_id: project.id, external_id: 'clair', name: 'Security Scanner')
end
before do
stub_const("#{described_class}::BATCH_SIZE", 2)
end
around do |example|
Timecop.freeze { Sidekiq::Testing.fake! { example.run } }
end
it 'removes duplicate findings and vulnerabilities' do
allow(::Gitlab).to receive(:com?).and_return(true)
ids = [231411, 231412, 231413, 231500, 231600, 231700, 231800]
fingerprints = %w(
6c871440eb9f7618b9aef25e5246acddff6ed7a1
9d1a47927875f1aee1e2b9f16c25a8ff7586f1a6
d7da2cc109c18d890ab239e833524d451cc45246
6c871440eb9f7618b9aef25e5246acddff6ed7a1
9d1a47927875f1aee1e2b9f16c25a8ff7586f1a6
d7da2cc109c18d890ab239e833524d451cc45246
d7da2cc109c18d890ab239e833524d453cd45246
)
7.times.each { |x| identifiers.create!(vulnerability_identifer_params(x, project.id)) }
3.times.each { |x| findings.create!(finding_params(x, project.id).merge({ id: ids[x], location_fingerprint: fingerprints[x], vulnerability_id: nil })) }
findings.create!(finding_params(0, project.id).merge({ id: ids[3], location_fingerprint: Gitlab::Database::ShaAttribute.new.serialize(fingerprints[3]).to_s, vulnerability_id: nil }))
findings.create!(finding_params(1, project.id).merge({ id: ids[4], location_fingerprint: Gitlab::Database::ShaAttribute.new.serialize(fingerprints[4]).to_s, vulnerability_id: nil }))
findings.create!(finding_params(2, project.id).merge({ id: ids[5], location_fingerprint: Gitlab::Database::ShaAttribute.new.serialize(fingerprints[5]).to_s, vulnerability_id: nil }))
findings.create!(finding_params(3, project.id).merge({ id: ids[6], location_fingerprint: Gitlab::Database::ShaAttribute.new.serialize(fingerprints[6]).to_s, vulnerability_id: nil }))
7.times.each { |x| finding_identifiers.create!(occurrence_id: ids[x], identifier_id: x ) }
expect(finding_identifiers.all.count). to eq(7)
migrate!
expect(migration)
.to be_scheduled_delayed_migration(2.minutes, 231411, 231412)
expect(migration)
.to be_scheduled_delayed_migration(4.minutes, 231413, 231413)
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end
it 'skips migration for on premise' do
allow(::Gitlab).to receive(:com?).and_return(true)
migrate!
expect(BackgroundMigrationWorker.jobs.size).to eq(0)
end
def vulnerability_identifer_params(id, project_id)
{
id: id,
project_id: project_id,
fingerprint: 'd432c2ad2953e8bd587a3a43b3ce309b5b0154c' + id.to_s,
external_type: 'SECURITY_ID',
external_id: 'SECURITY_0',
name: 'SECURITY_IDENTIFIER 0'
}
end
def vulnerability_params(project_id, user_id)
{
title: 'title',
state: 1,
confidence: 5,
severity: 6,
report_type: 2,
project_id: project.id,
author_id: user.id
}
end
def finding_params(primary_identifier_id, project_id)
attrs = attributes_for(:vulnerabilities_finding)
{
severity: 0,
confidence: 5,
report_type: 2,
project_id: project_id,
scanner_id: 6,
primary_identifier_id: primary_identifier_id,
project_fingerprint: attrs[:project_fingerprint],
location_fingerprint: Digest::SHA1.hexdigest(SecureRandom.hex(10)),
uuid: SecureRandom.uuid,
name: attrs[:name],
metadata_version: '1.3',
raw_metadata: attrs[:raw_metadata]
}
end
def create_identifier(number_of)
(1..number_of).each do |identifier_id|
identifiers.create!(id: identifier_id,
project_id: 123,
fingerprint: 'd432c2ad2953e8bd587a3a43b3ce309b5b0154c' + identifier_id.to_s,
external_type: 'SECURITY_ID',
external_id: 'SECURITY_0',
name: 'SECURITY_IDENTIFIER 0')
end
end
end
# frozen_string_literal: true
# rubocop:disable Style/Documentation
module Gitlab
module BackgroundMigration
class RemoveDuplicatedCsFindingsWithoutVulnerabilityId
def perform(start_id, stop_id)
end
end
end
end
Gitlab::BackgroundMigration::RemoveDuplicatedCsFindingsWithoutVulnerabilityId.prepend_if_ee('EE::Gitlab::BackgroundMigration::RemoveDuplicatedCsFindingsWithoutVulnerabilityId')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment