Commit 4ffd00cf authored by can eldem's avatar can eldem Committed by Adam Hegyi

Remove duplicated container scanning findings

This migration will only run in Gitlab.com
parent 51c7e790
---
title: Remove duplicated container scanning findings
merge_request: 42041
author:
type: other
# frozen_string_literal: true
class TmpIndexForFixingInconsistentVulnerabilityOccurrences < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
INDEX_NAME = 'tmp_index_for_fixing_inconsistent_vulnerability_occurrences'
disable_ddl_transaction!
def up
# report_type: 2 container scanning
add_concurrent_index(:vulnerability_occurrences, :id,
where: "LENGTH(location_fingerprint) = 40 AND report_type = 2",
name: INDEX_NAME)
end
def down
remove_concurrent_index_by_name(:vulnerability_occurrences, INDEX_NAME)
end
end
# frozen_string_literal: true
class RemoveDuplicatedCsFindings < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
BATCH_SIZE = 1_000
INTERVAL = 2.minutes
# 23_893 records will be updated
# 23_893 records will be deleted
def up
return unless Gitlab.com?
migration = Gitlab::BackgroundMigration::RemoveDuplicateCsFindings
migration_name = migration.to_s.demodulize
relation = migration::Finding.container_scanning.where("LENGTH(location_fingerprint) = 40")
queue_background_migration_jobs_by_range_at_intervals(relation,
migration_name,
INTERVAL,
batch_size: BATCH_SIZE)
end
def down
# no-op
# intentionally blank
end
end
205580c1ba38fd03ce025dfd1d9be67756ade4fd28ba957bb71cd9e5e89ef190
\ No newline at end of file
ba431f19818b93da91c4ed2c3f25dc8e2f62c6d9ac07b15f6d01f21f085c1730
\ No newline at end of file
......@@ -21400,6 +21400,8 @@ CREATE INDEX tmp_build_stage_position_index ON public.ci_builds USING btree (sta
CREATE INDEX tmp_index_for_email_unconfirmation_migration ON public.emails USING btree (id) WHERE (confirmed_at IS NOT NULL);
CREATE INDEX tmp_index_for_fixing_inconsistent_vulnerability_occurrences ON public.vulnerability_occurrences USING btree (id) WHERE ((length(location_fingerprint) = 40) AND (report_type = 2));
CREATE UNIQUE INDEX unique_merge_request_metrics_by_merge_request_id ON public.merge_request_metrics USING btree (merge_request_id);
CREATE UNIQUE INDEX users_security_dashboard_projects_unique_index ON public.users_security_dashboard_projects USING btree (project_id, user_id);
......
# frozen_string_literal: true
module EE
module Gitlab
module BackgroundMigration
module RemoveDuplicateCsFindings
extend ::Gitlab::Utils::Override
class Finding < ActiveRecord::Base
include ::ShaAttribute
include ::EachBatch
BROKEN_FINGERPRINT_LENGTH = 40
belongs_to :vulnerability, class_name: 'Vulnerability'
self.table_name = 'vulnerability_occurrences'
REPORT_TYPES = {
container_scanning: 2
}.with_indifferent_access.freeze
enum report_type: REPORT_TYPES
sha_attribute :location_fingerprint
end
class Note < ActiveRecord::Base; end
class Vulnerability < ActiveRecord::Base
has_many :findings, class_name: 'Finding', inverse_of: :vulnerability
has_many :notes, class_name: 'Note', foreign_key: 'noteable_id'
def delete_notes
Note.where(project_id: project_id, noteable_type: 'Vulnerability', noteable_id: id).delete_all
end
end
override :perform
def perform(start_id, stop_id)
Finding.select(:id, :project_id, :primary_identifier_id, :location_fingerprint, :scanner_id)
.container_scanning
.where(id: start_id..stop_id)
.where("length(location_fingerprint) = ?", Finding::BROKEN_FINGERPRINT_LENGTH)
.each do |finding|
colliding_fingerprint = ::Gitlab::Database::ShaAttribute.new.serialize(finding.location_fingerprint).to_s
duplicated_finding = Finding.container_scanning.where(project_id: finding.project_id,
primary_identifier_id: finding.primary_identifier_id,
scanner_id: finding.scanner_id,
location_fingerprint: colliding_fingerprint).first
next if duplicated_finding.blank?
ActiveRecord::Base.transaction do
duplicated_finding.vulnerability.delete_notes
duplicated_finding.vulnerability.delete
duplicated_finding.delete
# update can be done without violating unique constraint
# index_vulnerability_occurrences_on_unique_keys
# since we included sha_attribute :location_fingerprint it will be updated in correct format
finding.update(location_fingerprint: colliding_fingerprint)
end
end
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::RemoveDuplicateCsFindings, :migration, schema: 20200910131218 do
let(:namespaces) { table(:namespaces) }
let(:notes) { table(:notes) }
let(:group) { namespaces.create!(name: 'foo', path: 'foo') }
let(:projects) { table(:projects) }
let(:findings) { table(:vulnerability_occurrences) }
let(:scanners) { table(:vulnerability_scanners) }
let(:issues) { table(:issues) }
let(:epics) { table(:epics) }
let(:identifiers) { table(:vulnerability_identifiers) }
let(:vulnerabilities) { table(:vulnerabilities) }
let(:issue_links) { table(:vulnerability_issue_links) }
let(:finding_identifiers) { table(:vulnerability_occurrence_identifiers) }
let(:users) { table(:users) }
let!(:epic_1) { epics.create!(iid: 14532, title: 'from issue 1', group_id: group.id, author_id: user.id, title_html: 'any') }
let!(:project) { projects.create!(id: 12058473, namespace_id: group.id, name: 'gitlab', path: 'gitlab') }
let!(:user) { users.create!(id: 13, email: 'author@example.com', username: 'author', projects_limit: 10) }
let!(:scanner) do
scanners.create!(id: 6, project_id: project.id, external_id: 'clair', name: 'Security Scanner')
end
it 'removes duplicate findings and vulnerabilities' do
ids = [231411, 231412, 231413, 231500, 231600, 231700, 231800]
fingerprints = %w(
6c871440eb9f7618b9aef25e5246acddff6ed7a1
9d1a47927875f1aee1e2b9f16c25a8ff7586f1a6
d7da2cc109c18d890ab239e833524d451cc45246
6c871440eb9f7618b9aef25e5246acddff6ed7a1
9d1a47927875f1aee1e2b9f16c25a8ff7586f1a6
d7da2cc109c18d890ab239e833524d451cc45246
d7da2cc109c18d890ab239e833524d453cd45246
)
expected_fingerprints = %w(
6c871440eb9f7618b9aef25e5246acddff6ed7a1
9d1a47927875f1aee1e2b9f16c25a8ff7586f1a6
d7da2cc109c18d890ab239e833524d451cc45246
d7da2cc109c18d890ab239e833524d453cd45246
)
7.times.each { |x| identifiers.create!(vulnerability_identifer_params(x, project.id)) }
7.times.each { vulnerabilities.create!(vulnerability_params(project.id, user.id)) }
vulnerability_ids = vulnerabilities.all.ids
3.times.each { |x| findings.create!(finding_params(x, project.id).merge({ id: ids[x], location_fingerprint: fingerprints[x], vulnerability_id: vulnerability_ids[x] })) }
findings.create!(finding_params(0, project.id).merge({ id: ids[3], location_fingerprint: Gitlab::Database::ShaAttribute.new.serialize(fingerprints[3]).to_s, vulnerability_id: vulnerability_ids[3] }))
findings.create!(finding_params(1, project.id).merge({ id: ids[4], location_fingerprint: Gitlab::Database::ShaAttribute.new.serialize(fingerprints[4]).to_s, vulnerability_id: vulnerability_ids[4] }))
findings.create!(finding_params(2, project.id).merge({ id: ids[5], location_fingerprint: Gitlab::Database::ShaAttribute.new.serialize(fingerprints[5]).to_s, vulnerability_id: vulnerability_ids[5] }))
findings.create!(finding_params(3, project.id).merge({ id: ids[6], location_fingerprint: Gitlab::Database::ShaAttribute.new.serialize(fingerprints[6]).to_s, vulnerability_id: vulnerability_ids[6] }))
7.times.each { |x| finding_identifiers.create!(occurrence_id: ids[x], identifier_id: x ) }
1.upto(5).each { |x| issues.create!(description: '1234', state_id: 1, project_id: project.id, id: x) }
notes.create!(project_id: project.id, noteable_id: vulnerability_ids[4], noteable_type: "Vulnerability", note: "test note", system: true)
1.upto(5).each { |x| issue_links.create!(vulnerability_id: vulnerability_ids[x], issue_id: x ) }
expect(finding_identifiers.all.count). to eq(7)
expect(issue_links.all.count). to eq(5)
described_class.new.perform(231411, 231413)
expect(findings.ids).to match_array([231800, 231412, 231413, 231411])
expect(findings.where(report_type: 2).count). to eq(4)
expect(vulnerabilities.all.count). to eq(4)
expect(notes.all.count). to eq(0)
expect(finding_identifiers.all.count). to eq(4)
expect(issue_links.all.count). to eq(2)
location_fingerprints = findings.pluck(:location_fingerprint).flat_map { |x| Gitlab::Database::ShaAttribute.new.deserialize(x) }
expect(location_fingerprints).to match_array(expected_fingerprints)
end
def vulnerability_identifer_params(id, project_id)
{
id: id,
project_id: project_id,
fingerprint: 'd432c2ad2953e8bd587a3a43b3ce309b5b0154c' + id.to_s,
external_type: 'SECURITY_ID',
external_id: 'SECURITY_0',
name: 'SECURITY_IDENTIFIER 0'
}
end
def vulnerability_params(project_id, user_id)
{
title: 'title',
state: 1,
confidence: 5,
severity: 6,
report_type: 2,
project_id: project.id,
author_id: user.id
}
end
def finding_params(primary_identifier_id, project_id)
attrs = attributes_for(:vulnerabilities_finding)
{
severity: 0,
confidence: 5,
report_type: 2,
project_id: project_id,
scanner_id: 6,
primary_identifier_id: primary_identifier_id,
project_fingerprint: attrs[:project_fingerprint],
location_fingerprint: Digest::SHA1.hexdigest(SecureRandom.hex(10)),
uuid: attrs[:uuid],
name: attrs[:name],
metadata_version: '1.3',
raw_metadata: attrs[:raw_metadata]
}
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20200910131218_remove_duplicated_cs_findings.rb')
RSpec.describe RemoveDuplicatedCsFindings, :migration do
let(:migration) { 'RemoveDuplicateCsFindings'}
let(:namespaces) { table(:namespaces) }
let(:notes) { table(:notes) }
let(:group) { namespaces.create!(name: 'foo', path: 'foo') }
let(:projects) { table(:projects) }
let(:findings) { table(:vulnerability_occurrences) }
let(:scanners) { table(:vulnerability_scanners) }
let(:identifiers) { table(:vulnerability_identifiers) }
let!(:project) { projects.create!(id: 12058473, namespace_id: group.id, name: 'gitlab', path: 'gitlab') }
let!(:scanner) do
scanners.create!(id: 6, project_id: project.id, external_id: 'clair', name: 'Security Scanner')
end
before do
stub_const("#{described_class}::BATCH_SIZE", 2)
end
around do |example|
Timecop.freeze { Sidekiq::Testing.fake! { example.run } }
end
it 'updates location fingerprint for containter scanning findings', :sidekiq_might_not_need_inline do
allow(::Gitlab).to receive(:com?).and_return(true)
ids = [231411, 231412, 231413, 231500, 231600, 231700]
fingerprints = %w(
6c871440eb9f7618b9aef25e5246acddff6ed7a1
9d1a47927875f1aee1e2b9f16c25a8ff7586f1a6
d7da2cc109c18d890ab239e833524d451cc45246
6c871440eb9f7618b9aef25e5246acddff6ed7a1
9d1a47927875f1aee1e2b9f16c25a8ff7586f1a6
d7da2cc109c18d890ab239e833524d451cc45246
)
7.times.each { |x| identifiers.create!(vulnerability_identifer_params(x, project.id)) }
3.times.each { |x| findings.create!(finding_params(x, project.id).merge({ id: ids[x], location_fingerprint: fingerprints[x] })) }
findings.create!(finding_params(0, project.id).merge({ id: ids[3], location_fingerprint: Gitlab::Database::ShaAttribute.new.serialize(fingerprints[3]).to_s }))
findings.create!(finding_params(1, project.id).merge({ id: ids[4], location_fingerprint: Gitlab::Database::ShaAttribute.new.serialize(fingerprints[4]).to_s }))
findings.create!(finding_params(2, project.id).merge({ id: ids[5], location_fingerprint: Gitlab::Database::ShaAttribute.new.serialize(fingerprints[5]).to_s }))
migrate!
expect(migration)
.to be_scheduled_delayed_migration(2.minutes, 231411, 231412)
expect(migration)
.to be_scheduled_delayed_migration(4.minutes, 231413, 231413)
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end
it 'skips migration for on premise' do
allow(::Gitlab).to receive(:com?).and_return(true)
migrate!
expect(BackgroundMigrationWorker.jobs.size).to eq(0)
end
def vulnerability_identifer_params(id, project_id)
{
id: id,
project_id: project_id,
fingerprint: 'd432c2ad2953e8bd587a3a43b3ce309b5b0154c' + id.to_s,
external_type: 'SECURITY_ID',
external_id: 'SECURITY_0',
name: 'SECURITY_IDENTIFIER 0'
}
end
def vulnerability_params(project_id, user_id)
{
title: 'title',
state: 1,
confidence: 5,
severity: 6,
report_type: 2,
project_id: project.id,
author_id: user.id
}
end
def finding_params(primary_identifier_id, project_id)
attrs = attributes_for(:vulnerabilities_finding)
{
severity: 0,
confidence: 5,
report_type: 2,
project_id: project_id,
scanner_id: 6,
primary_identifier_id: primary_identifier_id,
project_fingerprint: attrs[:project_fingerprint],
location_fingerprint: Digest::SHA1.hexdigest(SecureRandom.hex(10)),
uuid: attrs[:uuid],
name: attrs[:name],
metadata_version: '1.3',
raw_metadata: attrs[:raw_metadata]
}
end
def create_identifier(number_of)
(1..number_of).each do |identifier_id|
identifiers.create!(id: identifier_id,
project_id: 123,
fingerprint: 'd432c2ad2953e8bd587a3a43b3ce309b5b0154c' + identifier_id.to_s,
external_type: 'SECURITY_ID',
external_id: 'SECURITY_0',
name: 'SECURITY_IDENTIFIER 0')
end
end
end
# frozen_string_literal: true
# rubocop:disable Style/Documentation
module Gitlab
module BackgroundMigration
class RemoveDuplicateCsFindings
def perform(start_id, stop_id)
end
end
end
end
Gitlab::BackgroundMigration::RemoveDuplicateCsFindings.prepend_if_ee('EE::Gitlab::BackgroundMigration::RemoveDuplicateCsFindings')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment