Commit f645af99 authored by Mayra Cabrera's avatar Mayra Cabrera

Merge branch '254228-background-migration-for-pipeline-id' into 'master'

Add migration to populate pipeline_id in Vulnerability Feedback

See merge request gitlab-org/gitlab!46266
parents 294be1a1 72e88375
---
title: Add migration to populate pipeline_id in Vulnerability Feedback
merge_request: 46266
author:
type: added
# frozen_string_literal: true
class SchedulePopulateVulnerabilityFeedbackPipelineId < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
INTERVAL = 2.minutes.to_i
BATCH_SIZE = 100
MIGRATION = 'PopulateVulnerabilityFeedbackPipelineId'
disable_ddl_transaction!
def up
return unless Gitlab.ee?
vulnerability_feedback = exec_query <<~SQL
SELECT DISTINCT "vulnerability_feedback"."project_id"
FROM "vulnerability_feedback"
WHERE "vulnerability_feedback"."pipeline_id" IS NULL
ORDER BY "vulnerability_feedback"."project_id" ASC
SQL
return if vulnerability_feedback.rows.blank?
vulnerability_feedback.rows.flatten.in_groups_of(BATCH_SIZE, false).each_with_index do |project_ids, index|
migrate_in(index * INTERVAL, MIGRATION, [project_ids])
end
end
def down
# no-op
end
end
ab2b8af98a8a29658f92c302d45668c7b9f8f5234ef35f4311a0f0ebbd954ec8
\ No newline at end of file
# frozen_string_literal: true
module EE
module Gitlab
module BackgroundMigration
# This class updates vulnerability feedback entities with no pipeline id assigned.
module PopulateVulnerabilityFeedbackPipelineId
extend ::Gitlab::Utils::Override
SECURITY_REPORT_FILE_TYPES = {
sast: 5,
dependency_scanning: 6,
container_scanning: 7,
dast: 8,
license_management: 10,
license_scanning: 101,
secret_detection: 21,
coverage_fuzzing: 23,
api_fuzzing: 26
}.freeze
override :perform
def perform(project_ids)
filtered_project_ids = ::Project.non_archived.without_deleted.where(id: project_ids).pluck(:id)
update_vulnerability_feedback_with_pipeline_id(pipelines_with_security_reports_subquery(filtered_project_ids))
update_vulnerability_feedback_with_pipeline_id(legacy_pipelines_with_security_reports_subquery(filtered_project_ids))
end
private
def update_vulnerability_feedback_with_pipeline_id(subquery)
update_feedback_pipeline_id_sql = <<~SQL
UPDATE "vulnerability_feedback"
SET pipeline_id = "pipelines_with_reports"."id"
FROM (#{subquery}) AS pipelines_with_reports
WHERE "vulnerability_feedback"."pipeline_id" IS NULL
AND "vulnerability_feedback"."project_id" = "pipelines_with_reports"."project_id";
SQL
connection.execute(update_feedback_pipeline_id_sql)
end
def pipelines_with_security_reports_subquery(project_ids)
<<~SQL
SELECT "ci_pipelines"."id", "ci_pipelines"."project_id"
FROM "ci_pipelines"
WHERE ("ci_pipelines"."project_id" in (#{project_ids.join(', ')}))
AND ("ci_pipelines"."status" IN ('success'))
AND (
EXISTS (
SELECT 1
FROM "ci_builds"
WHERE "ci_builds"."type" = 'Ci::Build'
AND (
"ci_builds"."retried" = FALSE
OR "ci_builds"."retried" IS NULL
)
AND (
EXISTS (
SELECT 1
FROM "ci_job_artifacts"
WHERE ("ci_builds"."id" = "ci_job_artifacts"."job_id")
AND "ci_job_artifacts"."file_type" IN (#{SECURITY_REPORT_FILE_TYPES.except(:license_management, :license_scanning).values.join(", ")})
)
)
AND ("ci_pipelines"."id" = "ci_builds"."commit_id")
)
)
ORDER BY "ci_pipelines"."id" DESC
LIMIT 1
SQL
end
def legacy_pipelines_with_security_reports_subquery(project_ids)
<<~SQL
SELECT "ci_pipelines"."id", "ci_pipelines"."project_id"
FROM "ci_pipelines"
INNER JOIN "ci_builds" ON "ci_builds"."commit_id" = "ci_pipelines"."id"
AND "ci_builds"."type" = 'Ci::Build'
AND ("ci_builds"."retried" = FALSE OR "ci_builds"."retried" IS NULL)
INNER JOIN "ci_job_artifacts" ON "ci_job_artifacts"."file_type" IN (#{SECURITY_REPORT_FILE_TYPES.values.join(", ")})
AND "ci_job_artifacts"."job_id" = "ci_builds"."id"
WHERE ("ci_pipelines"."project_id" in (#{project_ids.join(', ')}))
AND ("ci_pipelines"."status" IN ('success'))
AND "ci_builds"."name" IN ('sast', 'secret_detection', 'dependency_scanning', 'container_scanning', 'dast')
ORDER BY "ci_pipelines"."id" DESC
LIMIT 1
SQL
end
def connection
@connection ||= ActiveRecord::Base.connection
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::PopulateVulnerabilityFeedbackPipelineId, schema: 2020_10_26_182253 do
let(:users) { table(:users) }
let(:namespaces) { table(:namespaces) }
let(:vulnerability_feedback) { table(:vulnerability_feedback) }
let(:pipelines) { table(:ci_pipelines) }
let(:builds) { table(:ci_builds) }
let(:job_artifacts) { table(:ci_job_artifacts) }
let(:projects) { table(:projects) }
let(:namespace) { namespaces.create!(name: 'gitlab', path: 'gitlab-org') }
let(:user) { users.create!(name: 'test', email: 'test@example.com', projects_limit: 5) }
let(:timestamp) { Date.current }
def create_pipeline(project_id, finished_at:, status: 'success', with_security_reports: false, with_legacy_security_report: false)
pipeline_params = { project_id: project_id, ref: '1', sha: '1', source: 6, status: status, finished_at: finished_at }
pipeline = pipelines.create!(pipeline_params)
if with_security_reports
build = builds.create!(project_id: project_id, name: 'brakeman', retried: false, commit_id: pipeline.id, type: 'Ci::Build')
job_artifacts.create!(project_id: project_id, file_format: 'raw', file_type: 5, job_id: build.id)
elsif with_legacy_security_report
build = builds.create!(project_id: project_id, name: 'sast', retried: false, commit_id: pipeline.id, type: 'Ci::Build')
job_artifacts.create!(project_id: project_id, file_format: 'raw', file_type: 101, job_id: build.id)
end
pipeline
end
def create_project_with_pipeline
project_params = { namespace_id: namespace.id, name: 'foo' }
project = projects.create!(project_params)
pipeline = create_pipeline(project.id, finished_at: timestamp - 31.days)
feedback_params = { project_id: project.id, author_id: user.id, feedback_type: 'dismissal', category: 'sast' }
vulnerability_feedback.create!(feedback_params.merge(project_fingerprint: SecureRandom.hex, created_at: timestamp - 30.days, pipeline_id: pipeline.id))
vulnerability_feedback.create!(feedback_params.merge(project_fingerprint: SecureRandom.hex, created_at: timestamp - 15.days))
vulnerability_feedback.create!(feedback_params.merge(project_fingerprint: SecureRandom.hex, created_at: timestamp - 5.days))
{ project: project, pipeline: pipeline }
end
let(:project_with_pipeline_1) { create_project_with_pipeline }
let(:project_with_pipeline_2) { create_project_with_pipeline }
let(:project_1) { project_with_pipeline_1[:project] }
let(:project_2) { project_with_pipeline_2[:project] }
let(:pipeline_1) { project_with_pipeline_1[:pipeline] }
let(:pipeline_2) { project_with_pipeline_2[:pipeline] }
describe '#perform' do
context 'when there is a succesful pipeline' do
context 'but the pipeline has no security reports' do
it 'does not update pipeline_id' do
subject.perform([project_1.id, project_2.id].sort)
updated_rows = [
{ 'project_id' => project_1.id, 'pipeline_id' => pipeline_1.id, 'created_at' => timestamp - 30.days },
{ 'project_id' => project_1.id, 'pipeline_id' => nil, 'created_at' => timestamp - 15.days },
{ 'project_id' => project_1.id, 'pipeline_id' => nil, 'created_at' => timestamp - 5.days },
{ 'project_id' => project_2.id, 'pipeline_id' => pipeline_2.id, 'created_at' => timestamp - 30.days },
{ 'project_id' => project_2.id, 'pipeline_id' => nil, 'created_at' => timestamp - 15.days },
{ 'project_id' => project_2.id, 'pipeline_id' => nil, 'created_at' => timestamp - 5.days }
]
rows = vulnerability_feedback.order(:project_id, :created_at).map do |row|
row.attributes.slice(*%w(project_id pipeline_id created_at))
end
expect(rows).to match_array(updated_rows)
end
end
context 'and the pipeline has security reports' do
context 'and is not successful' do
let!(:pipeline_3) do
create_pipeline(project_1.id, status: 'failed', finished_at: timestamp - 10.days, with_security_reports: true, with_legacy_security_report: false)
end
it 'does not update pipeline_id' do
subject.perform([project_1.id, project_2.id].sort)
updated_rows = [
{ 'project_id' => project_1.id, 'pipeline_id' => pipeline_1.id, 'created_at' => timestamp - 30.days },
{ 'project_id' => project_1.id, 'pipeline_id' => nil, 'created_at' => timestamp - 15.days },
{ 'project_id' => project_1.id, 'pipeline_id' => nil, 'created_at' => timestamp - 5.days },
{ 'project_id' => project_2.id, 'pipeline_id' => pipeline_2.id, 'created_at' => timestamp - 30.days },
{ 'project_id' => project_2.id, 'pipeline_id' => nil, 'created_at' => timestamp - 15.days },
{ 'project_id' => project_2.id, 'pipeline_id' => nil, 'created_at' => timestamp - 5.days }
]
rows = vulnerability_feedback.order(:project_id, :created_at).map do |row|
row.attributes.slice(*%w(project_id pipeline_id created_at))
end
expect(rows).to match_array(updated_rows)
end
end
context 'and is successful' do
let!(:pipeline_3) do
create_pipeline(project_1.id, finished_at: timestamp - 10.days, with_security_reports: true, with_legacy_security_report: false)
end
it 'does update pipeline_id for feedback' do
subject.perform([project_1.id, project_2.id].sort)
updated_rows = [
{ 'project_id' => project_1.id, 'pipeline_id' => pipeline_1.id, 'created_at' => timestamp - 30.days },
{ 'project_id' => project_1.id, 'pipeline_id' => pipeline_3.id, 'created_at' => timestamp - 15.days },
{ 'project_id' => project_1.id, 'pipeline_id' => pipeline_3.id, 'created_at' => timestamp - 5.days },
{ 'project_id' => project_2.id, 'pipeline_id' => pipeline_2.id, 'created_at' => timestamp - 30.days },
{ 'project_id' => project_2.id, 'pipeline_id' => nil, 'created_at' => timestamp - 15.days },
{ 'project_id' => project_2.id, 'pipeline_id' => nil, 'created_at' => timestamp - 5.days }
]
rows = vulnerability_feedback.order(:project_id, :created_at).map do |row|
row.attributes.slice(*%w(project_id pipeline_id created_at))
end
expect(rows).to match_array(updated_rows)
end
context 'and the pipeline has also legacy security reports' do
let(:pipeline_4) do
create_pipeline(project_1.id, finished_at: timestamp - 5.days, with_security_reports: false, with_legacy_security_report: true)
end
it 'does update pipeline_id from for feedback using non-legacy pipeline_id' do
subject.perform([project_1.id, project_2.id].sort)
updated_rows = [
{ 'project_id' => project_1.id, 'pipeline_id' => pipeline_1.id, 'created_at' => timestamp - 30.days },
{ 'project_id' => project_1.id, 'pipeline_id' => pipeline_3.id, 'created_at' => timestamp - 15.days },
{ 'project_id' => project_1.id, 'pipeline_id' => pipeline_3.id, 'created_at' => timestamp - 5.days },
{ 'project_id' => project_2.id, 'pipeline_id' => pipeline_2.id, 'created_at' => timestamp - 30.days },
{ 'project_id' => project_2.id, 'pipeline_id' => nil, 'created_at' => timestamp - 15.days },
{ 'project_id' => project_2.id, 'pipeline_id' => nil, 'created_at' => timestamp - 5.days }
]
rows = vulnerability_feedback.order(:project_id, :created_at).map do |row|
row.attributes.slice(*%w(project_id pipeline_id created_at))
end
expect(rows).to match_array(updated_rows)
end
end
end
end
context 'and the pipeline has legacy security reports' do
context 'and is not successful' do
let!(:pipeline_4) do
create_pipeline(project_1.id, status: 'failed', finished_at: timestamp - 10.days, with_security_reports: false, with_legacy_security_report: true)
end
it 'does not update pipeline_id' do
subject.perform([project_1.id, project_2.id].sort)
updated_rows = [
{ 'project_id' => project_1.id, 'pipeline_id' => pipeline_1.id, 'created_at' => timestamp - 30.days },
{ 'project_id' => project_1.id, 'pipeline_id' => nil, 'created_at' => timestamp - 15.days },
{ 'project_id' => project_1.id, 'pipeline_id' => nil, 'created_at' => timestamp - 5.days },
{ 'project_id' => project_2.id, 'pipeline_id' => pipeline_2.id, 'created_at' => timestamp - 30.days },
{ 'project_id' => project_2.id, 'pipeline_id' => nil, 'created_at' => timestamp - 15.days },
{ 'project_id' => project_2.id, 'pipeline_id' => nil, 'created_at' => timestamp - 5.days }
]
rows = vulnerability_feedback.order(:project_id, :created_at).map do |row|
row.attributes.slice(*%w(project_id pipeline_id created_at))
end
expect(rows).to match_array(updated_rows)
end
end
context 'and is successful' do
let!(:pipeline_4) do
create_pipeline(project_2.id, finished_at: timestamp - 10.days, with_security_reports: false, with_legacy_security_report: true)
end
it 'does update pipeline_id for feedback' do
subject.perform([project_1.id, project_2.id].sort)
updated_rows = [
{ 'project_id' => project_1.id, 'pipeline_id' => pipeline_1.id, 'created_at' => timestamp - 30.days },
{ 'project_id' => project_1.id, 'pipeline_id' => nil, 'created_at' => timestamp - 15.days },
{ 'project_id' => project_1.id, 'pipeline_id' => nil, 'created_at' => timestamp - 5.days },
{ 'project_id' => project_2.id, 'pipeline_id' => pipeline_2.id, 'created_at' => timestamp - 30.days },
{ 'project_id' => project_2.id, 'pipeline_id' => pipeline_4.id, 'created_at' => timestamp - 15.days },
{ 'project_id' => project_2.id, 'pipeline_id' => pipeline_4.id, 'created_at' => timestamp - 5.days }
]
rows = vulnerability_feedback.order(:project_id, :created_at).map do |row|
row.attributes.slice(*%w(project_id pipeline_id created_at))
end
expect(rows).to match_array(updated_rows)
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# This class updates vulnerability feedback entities with no pipeline id assigned.
class PopulateVulnerabilityFeedbackPipelineId
def perform(project_ids)
end
end
end
end
Gitlab::BackgroundMigration::PopulateVulnerabilityFeedbackPipelineId.prepend_if_ee('EE::Gitlab::BackgroundMigration::PopulateVulnerabilityFeedbackPipelineId')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment