From 632a2bc9ed1e48d12cbf2bb79fbd717350557ba4 Mon Sep 17 00:00:00 2001
From: Mehmet Emin INAC <minac@gitlab.com>
Date: Fri, 11 Dec 2020 10:17:24 +0100
Subject: [PATCH] Populate `finding_uuid` attribute for vulnerability_feedback

We can only populate these values if the feedback has an associated
finding with it. Which means, we can't populate the values for the
records created through the "pipeline security tab" for security
findings.
---
 ...finding_uuid_for_vulnerability_feedback.rb |  25 ++++
 db/schema_migrations/20201211090634           |   1 +
 ...finding_uuid_for_vulnerability_feedback.rb | 125 ++++++++++++++++++
 ...ng_uuid_for_vulnerability_feedback_spec.rb |  90 +++++++++++++
 ...ng_uuid_for_vulnerability_feedback_spec.rb |  34 +++++
 5 files changed, 275 insertions(+)
 create mode 100644 db/post_migrate/20201211090634_schedule_populate_finding_uuid_for_vulnerability_feedback.rb
 create mode 100644 db/schema_migrations/20201211090634
 create mode 100644 lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback.rb
 create mode 100644 spec/lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback_spec.rb
 create mode 100644 spec/migrations/schedule_populate_finding_uuid_for_vulnerability_feedback_spec.rb

diff --git a/db/post_migrate/20201211090634_schedule_populate_finding_uuid_for_vulnerability_feedback.rb b/db/post_migrate/20201211090634_schedule_populate_finding_uuid_for_vulnerability_feedback.rb
new file mode 100644
index 00000000000..3ecb48dab0f
--- /dev/null
+++ b/db/post_migrate/20201211090634_schedule_populate_finding_uuid_for_vulnerability_feedback.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+class SchedulePopulateFindingUuidForVulnerabilityFeedback < ActiveRecord::Migration[6.0]
+  include Gitlab::Database::MigrationHelpers
+
+  DOWNTIME = false
+  MIGRATION_CLASS = 'PopulateFindingUuidForVulnerabilityFeedback'
+  DELAY_INTERVAL = 2.minutes
+  BATCH_SIZE = 1000
+
+  disable_ddl_transaction!
+
+  def up
+    queue_background_migration_jobs_by_range_at_intervals(
+      Gitlab::BackgroundMigration::PopulateFindingUuidForVulnerabilityFeedback::VulnerabilityFeedback,
+      MIGRATION_CLASS,
+      DELAY_INTERVAL,
+      batch_size: BATCH_SIZE
+    )
+  end
+
+  def down
+    # no-op
+  end
+end
diff --git a/db/schema_migrations/20201211090634 b/db/schema_migrations/20201211090634
new file mode 100644
index 00000000000..0c11b8f85fb
--- /dev/null
+++ b/db/schema_migrations/20201211090634
@@ -0,0 +1 @@
+5117b71950bec3c6c746eaf4851c111a335bf2280075ddc2c73b60a30e23d907
\ No newline at end of file
diff --git a/lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback.rb b/lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback.rb
new file mode 100644
index 00000000000..43a176898b7
--- /dev/null
+++ b/lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback.rb
@@ -0,0 +1,125 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module BackgroundMigration
+    # This class populates the `finding_uuid` attribute for
+    # the existing `vulnerability_feedback` records.
+    class PopulateFindingUuidForVulnerabilityFeedback
+      REPORT_TYPES = {
+        sast: 0,
+        dependency_scanning: 1,
+        container_scanning: 2,
+        dast: 3,
+        secret_detection: 4,
+        coverage_fuzzing: 5,
+        api_fuzzing: 6
+      }.freeze
+
+      class VulnerabilityFeedback < ActiveRecord::Base # rubocop:disable Style/Documentation
+        include EachBatch
+
+        self.table_name = 'vulnerability_feedback'
+
+        enum category: REPORT_TYPES
+
+        scope :in_range, -> (start, stop) { where('id BETWEEN ? AND ?', start, stop) }
+
+        def self.load_vulnerability_findings
+          all.to_a.tap { |collection| collection.each(&:vulnerability_finding) }
+        end
+
+        def set_finding_uuid
+          return unless vulnerability_finding.present?
+
+          update_column(:finding_uuid, calculated_uuid)
+        end
+
+        def vulnerability_finding
+          BatchLoader.for(finding_key).batch(replace_methods: false) do |finding_keys, loader|
+            project_ids = finding_keys.map { |key| key[:project_id] }
+            categories = finding_keys.map { |key| key[:category] }
+            fingerprints = finding_keys.map { |key| key[:project_fingerprint] }
+
+            findings = Finding.with_primary_identifier.where(
+              project_id: project_ids.uniq,
+              report_type: categories.uniq,
+              project_fingerprint: fingerprints.uniq
+            ).to_a
+
+            finding_keys.each do |finding_key|
+              loader.call(
+                finding_key,
+                findings.find { |f| finding_key == f.finding_key }
+              )
+            end
+          end
+        end
+
+        private
+
+        def calculated_uuid
+          Gitlab::UUID.v5(uuid_components)
+        end
+
+        def uuid_components
+          [
+            category,
+            vulnerability_finding.primary_identifier&.fingerprint,
+            vulnerability_finding.location_fingerprint,
+            project_id
+          ].join('-')
+        end
+
+        def finding_key
+          {
+            project_id: project_id,
+            category: category,
+            project_fingerprint: project_fingerprint
+          }
+        end
+      end
+
+      class Finding < ActiveRecord::Base # rubocop:disable Style/Documentation
+        include ShaAttribute
+
+        self.table_name = 'vulnerability_occurrences'
+
+        sha_attribute :project_fingerprint
+        sha_attribute :location_fingerprint
+
+        belongs_to :primary_identifier, class_name: 'Gitlab::BackgroundMigration::PopulateFindingUuidForVulnerabilityFeedback::Identifier'
+
+        enum report_type: REPORT_TYPES
+
+        scope :with_primary_identifier, -> { includes(:primary_identifier) }
+
+        def finding_key
+          {
+            project_id: project_id,
+            category: report_type,
+            project_fingerprint: project_fingerprint
+          }
+        end
+      end
+
+      class Identifier < ActiveRecord::Base # rubocop:disable Style/Documentation
+        self.table_name = 'vulnerability_identifiers'
+      end
+
+      def perform(*range)
+        feedback = VulnerabilityFeedback.in_range(*range).load_vulnerability_findings
+        feedback.each(&:set_finding_uuid)
+      ensure
+        log_info(feedback.count)
+      end
+
+      def log_info(feedback_count)
+        ::Gitlab::BackgroundMigration::Logger.info(
+          migrator: self.class.name,
+          message: '`finding_uuid` attributes has been set',
+          count: feedback_count
+        )
+      end
+    end
+  end
+end
diff --git a/spec/lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback_spec.rb b/spec/lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback_spec.rb
new file mode 100644
index 00000000000..14464c338e1
--- /dev/null
+++ b/spec/lib/gitlab/background_migration/populate_finding_uuid_for_vulnerability_feedback_spec.rb
@@ -0,0 +1,90 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::BackgroundMigration::PopulateFindingUuidForVulnerabilityFeedback, schema: 20201211090634 do
+  let(:namespaces) { table(:namespaces) }
+  let(:projects) { table(:projects) }
+  let(:users) { table(:users) }
+  let(:scanners) { table(:vulnerability_scanners) }
+  let(:identifiers) { table(:vulnerability_identifiers) }
+  let(:findings) { table(:vulnerability_occurrences) }
+  let(:vulnerability_feedback) { table(:vulnerability_feedback) }
+
+  let(:namespace) { namespaces.create!(name: 'gitlab', path: 'gitlab-org') }
+  let(:project) { projects.create!(namespace_id: namespace.id, name: 'foo') }
+  let(:user) { users.create!(username: 'john.doe', projects_limit: 5) }
+  let(:scanner) { scanners.create!(project_id: project.id, external_id: 'foo', name: 'bar') }
+  let(:identifier) { identifiers.create!(project_id: project.id, fingerprint: 'foo', external_type: 'bar', external_id: 'zoo', name: 'baz') }
+  let(:sast_report) { 0 }
+  let(:dependency_scanning_report) { 1 }
+  let(:dast_report) { 3 }
+  let(:secret_detection_report) { 4 }
+  let(:project_fingerprint) { Digest::SHA1.hexdigest(SecureRandom.uuid) }
+  let(:location_fingerprint_1) { Digest::SHA1.hexdigest(SecureRandom.uuid) }
+  let(:location_fingerprint_2) { Digest::SHA1.hexdigest(SecureRandom.uuid) }
+  let(:location_fingerprint_3) { Digest::SHA1.hexdigest(SecureRandom.uuid) }
+  let(:finding_1) { finding_creator.call(sast_report, location_fingerprint_1) }
+  let(:finding_2) { finding_creator.call(dast_report, location_fingerprint_2) }
+  let(:finding_3) { finding_creator.call(secret_detection_report, location_fingerprint_3) }
+  let(:uuid_1_components) { ['sast', identifier.fingerprint, location_fingerprint_1, project.id].join('-') }
+  let(:uuid_2_components) { ['dast', identifier.fingerprint, location_fingerprint_2, project.id].join('-') }
+  let(:uuid_3_components) { ['secret_detection', identifier.fingerprint, location_fingerprint_3, project.id].join('-') }
+  let(:expected_uuid_1) { Gitlab::UUID.v5(uuid_1_components) }
+  let(:expected_uuid_2) { Gitlab::UUID.v5(uuid_2_components) }
+  let(:expected_uuid_3) { Gitlab::UUID.v5(uuid_3_components) }
+  let(:finding_creator) do
+    -> (report_type, location_fingerprint) do
+      findings.create!(
+        project_id: project.id,
+        primary_identifier_id: identifier.id,
+        scanner_id: scanner.id,
+        report_type: report_type,
+        uuid: SecureRandom.uuid,
+        name: 'Foo',
+        location_fingerprint: Gitlab::Database::ShaAttribute.serialize(location_fingerprint),
+        project_fingerprint: Gitlab::Database::ShaAttribute.serialize(project_fingerprint),
+        metadata_version: '1',
+        severity: 0,
+        confidence: 5,
+        raw_metadata: '{}'
+      )
+    end
+  end
+
+  let(:feedback_creator) do
+    -> (category, project_fingerprint) do
+      vulnerability_feedback.create!(
+        project_id: project.id,
+        author_id: user.id,
+        feedback_type: 0,
+        category: category,
+        project_fingerprint: project_fingerprint
+      )
+    end
+  end
+
+  let!(:feedback_1) { feedback_creator.call(finding_1.report_type, project_fingerprint) }
+  let!(:feedback_2) { feedback_creator.call(finding_2.report_type, project_fingerprint) }
+  let!(:feedback_3) { feedback_creator.call(finding_3.report_type, project_fingerprint) }
+  let!(:feedback_4) { feedback_creator.call(finding_1.report_type, 'foo') }
+  let!(:feedback_5) { feedback_creator.call(dependency_scanning_report, project_fingerprint) }
+
+  subject(:populate_finding_uuids) { described_class.new.perform(feedback_1.id, feedback_5.id) }
+
+  before do
+    allow(Gitlab::BackgroundMigration::Logger).to receive(:info)
+  end
+
+  describe '#perform' do
+    it 'updates the `finding_uuid` attributes of the feedback records' do
+      expect { populate_finding_uuids }.to change { feedback_1.reload.finding_uuid }.from(nil).to(expected_uuid_1)
+                                       .and change { feedback_2.reload.finding_uuid }.from(nil).to(expected_uuid_2)
+                                       .and change { feedback_3.reload.finding_uuid }.from(nil).to(expected_uuid_3)
+                                       .and not_change { feedback_4.reload.finding_uuid }
+                                       .and not_change { feedback_5.reload.finding_uuid }
+
+      expect(Gitlab::BackgroundMigration::Logger).to have_received(:info).once
+    end
+  end
+end
diff --git a/spec/migrations/schedule_populate_finding_uuid_for_vulnerability_feedback_spec.rb b/spec/migrations/schedule_populate_finding_uuid_for_vulnerability_feedback_spec.rb
new file mode 100644
index 00000000000..99c3e6072d3
--- /dev/null
+++ b/spec/migrations/schedule_populate_finding_uuid_for_vulnerability_feedback_spec.rb
@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+require_migration!
+
+RSpec.describe SchedulePopulateFindingUuidForVulnerabilityFeedback do
+  let(:namespaces) { table(:namespaces) }
+  let(:projects) { table(:projects) }
+  let(:users) { table(:users) }
+  let(:vulnerability_feedback) { table(:vulnerability_feedback) }
+
+  let(:namespace) { namespaces.create!(name: 'gitlab', path: 'gitlab-org') }
+  let(:project) { projects.create!(namespace_id: namespace.id, name: 'foo') }
+  let(:user) { users.create!(username: 'john.doe', projects_limit: 1) }
+
+  let!(:feedback_1) { vulnerability_feedback.create!(feedback_type: 0, category: 0, project_id: project.id, author_id: user.id, project_fingerprint: 'foo') }
+  let!(:feedback_2) { vulnerability_feedback.create!(feedback_type: 0, category: 0, project_id: project.id, author_id: user.id, project_fingerprint: 'bar') }
+
+  around do |example|
+    freeze_time { Sidekiq::Testing.fake! { example.run } }
+  end
+
+  before do
+    stub_const("#{described_class.name}::BATCH_SIZE", 1)
+  end
+
+  it 'schedules the background jobs', :aggregate_failures do
+    migrate!
+
+    expect(BackgroundMigrationWorker.jobs.size).to be(2)
+    expect(described_class::MIGRATION_CLASS).to be_scheduled_delayed_migration(2.minutes, feedback_1.id, feedback_1.id)
+    expect(described_class::MIGRATION_CLASS).to be_scheduled_delayed_migration(4.minutes, feedback_2.id, feedback_2.id)
+  end
+end
-- 
2.30.9