Commit 3027ba6f authored by Markus Koller's avatar Markus Koller

Merge branch '241809-db-migration-for-vulnerabilities-count' into 'master'

Change Vulnerabilities Count Data Retention to 1 year

Closes #241809

See merge request gitlab-org/gitlab!40766
parents 03e0b6ff ca2436b3
---
title: Change Vulnerabilities Count Data Retention to 1 year
merge_request: 40766
author:
type: other
# frozen_string_literal: true
class PopulateVulnerabilityHistoricalStatisticsForYear < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
DELAY_INTERVAL = 5.minutes.to_i
BATCH_SIZE = 50
MIGRATION = 'PopulateVulnerabilityHistoricalStatistics'
disable_ddl_transaction!
class Vulnerability < ActiveRecord::Base
self.table_name = 'vulnerabilities'
include ::EachBatch
end
def up
return unless Gitlab.ee?
Vulnerability.select('project_id').distinct.each_batch(of: BATCH_SIZE, column: 'project_id') do |project_batch, index|
migrate_in(index * DELAY_INTERVAL, MIGRATION, [project_batch.pluck(:project_id), 365])
end
end
def down
# no-op
end
end
ce72274a7cc38c3708a03b8a301c6dafa6543acb03c8638a972c197657182ccf
\ No newline at end of file
...@@ -6,8 +6,6 @@ module Vulnerabilities ...@@ -6,8 +6,6 @@ module Vulnerabilities
self.table_name = 'vulnerability_historical_statistics' self.table_name = 'vulnerability_historical_statistics'
RETENTION_PERIOD = 90.days
belongs_to :project, optional: false belongs_to :project, optional: false
validates :date, presence: true validates :date, presence: true
......
...@@ -9,9 +9,9 @@ module EE ...@@ -9,9 +9,9 @@ module EE
extend ::Gitlab::Utils::Override extend ::Gitlab::Utils::Override
override :perform override :perform
def perform(project_ids) def perform(project_ids, retention_period = 90)
project_ids.each do |project_id| project_ids.each do |project_id|
upsert_vulnerability_historical_statistics(project_id) upsert_vulnerability_historical_statistics(project_id, retention_period)
rescue => e rescue => e
error_message("Error updating statistics for project #{project_id}: #{e.message}") error_message("Error updating statistics for project #{project_id}: #{e.message}")
end end
...@@ -19,7 +19,6 @@ module EE ...@@ -19,7 +19,6 @@ module EE
private private
RETENTION_PERIOD = 90.days
MAX_DAYS_IN_SINGLE_QUERY = 10 MAX_DAYS_IN_SINGLE_QUERY = 10
EMPTY_STATISTIC = { EMPTY_STATISTIC = {
...@@ -44,9 +43,9 @@ module EE ...@@ -44,9 +43,9 @@ module EE
enum letter_grade: { a: 0, b: 1, c: 2, d: 3, f: 4 } enum letter_grade: { a: 0, b: 1, c: 2, d: 3, f: 4 }
end end
def upsert_vulnerability_historical_statistics(project_id) def upsert_vulnerability_historical_statistics(project_id, retention_period)
end_date = Date.today end_date = Date.today
start_date = end_date - RETENTION_PERIOD start_date = end_date - retention_period.days
time_now = Time.current time_now = Time.current
counts_by_day_and_severity_in_batches(project_id, start_date, end_date, of: MAX_DAYS_IN_SINGLE_QUERY) counts_by_day_and_severity_in_batches(project_id, start_date, end_date, of: MAX_DAYS_IN_SINGLE_QUERY)
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::PopulateVulnerabilityHistoricalStatistics, schema: 2020_08_21_224343 do RSpec.describe Gitlab::BackgroundMigration::PopulateVulnerabilityHistoricalStatistics, schema: 2020_08_31_224343 do
let(:users) { table(:users) } let(:users) { table(:users) }
let(:namespaces) { table(:namespaces) } let(:namespaces) { table(:namespaces) }
let(:vulnerabilities) { table(:vulnerabilities) } let(:vulnerabilities) { table(:vulnerabilities) }
...@@ -22,6 +22,9 @@ RSpec.describe Gitlab::BackgroundMigration::PopulateVulnerabilityHistoricalStati ...@@ -22,6 +22,9 @@ RSpec.describe Gitlab::BackgroundMigration::PopulateVulnerabilityHistoricalStati
vulnerability_params = { title: 'title', state: 1, confidence: 5, report_type: 2, project_id: project.id, author_id: user.id } vulnerability_params = { title: 'title', state: 1, confidence: 5, report_type: 2, project_id: project.id, author_id: user.id }
vulnerabilities.create!(vulnerability_params.merge(created_at: 400.days.ago, resolved_at: 380.days.ago, severity: 7))
vulnerabilities.create!(vulnerability_params.merge(created_at: 350.days.ago, resolved_at: 300.days.ago, severity: 6))
vulnerabilities.create!(vulnerability_params.merge(created_at: 80.days.ago, resolved_at: 50.days.ago, severity: 5))
vulnerabilities.create!(vulnerability_params.merge(created_at: 5.days.ago, dismissed_at: Date.current, severity: 7)) vulnerabilities.create!(vulnerability_params.merge(created_at: 5.days.ago, dismissed_at: Date.current, severity: 7))
vulnerabilities.create!(vulnerability_params.merge(created_at: 5.days.ago, dismissed_at: 1.day.ago, severity: 6)) vulnerabilities.create!(vulnerability_params.merge(created_at: 5.days.ago, dismissed_at: 1.day.ago, severity: 6))
vulnerabilities.create!(vulnerability_params.merge(created_at: 4.days.ago, resolved_at: 2.days.ago, severity: 7)) vulnerabilities.create!(vulnerability_params.merge(created_at: 4.days.ago, resolved_at: 2.days.ago, severity: 7))
...@@ -37,25 +40,56 @@ RSpec.describe Gitlab::BackgroundMigration::PopulateVulnerabilityHistoricalStati ...@@ -37,25 +40,56 @@ RSpec.describe Gitlab::BackgroundMigration::PopulateVulnerabilityHistoricalStati
end end
describe '#perform' do describe '#perform' do
it 'creates historical statistic rows according to projects', :aggregate_failures do context 'when using default retention period' do
expect { subject.perform([1, 2]) }.to change(Vulnerabilities::HistoricalStatistic, :count).by(8) it 'creates historical statistic rows according to projects for 90 days', :aggregate_failures do
expect { subject.perform([1, 2]) }.to change(Vulnerabilities::HistoricalStatistic, :count).by(10)
created_rows = [
{ 'letter_grade' => 4, 'project_id' => 1, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-23') }, created_rows = [
{ 'letter_grade' => 4, 'project_id' => 1, 'total' => 3, 'critical' => 2, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-24') }, { 'letter_grade' => 2, 'project_id' => 1, 'total' => 1, 'critical' => 0, 'high' => 0, 'medium' => 1, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 80.days },
{ 'letter_grade' => 4, 'project_id' => 1, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-26') }, { 'letter_grade' => 4, 'project_id' => 1, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 5.days },
{ 'letter_grade' => 4, 'project_id' => 1, 'total' => 1, 'critical' => 1, 'high' => 0, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-27') }, { 'letter_grade' => 4, 'project_id' => 1, 'total' => 3, 'critical' => 2, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 4.days },
{ 'letter_grade' => 4, 'project_id' => 2, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-23') }, { 'letter_grade' => 4, 'project_id' => 1, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 2.days },
{ 'letter_grade' => 4, 'project_id' => 2, 'total' => 3, 'critical' => 2, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-24') }, { 'letter_grade' => 4, 'project_id' => 1, 'total' => 1, 'critical' => 1, 'high' => 0, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 1.day },
{ 'letter_grade' => 4, 'project_id' => 2, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-26') }, { 'letter_grade' => 2, 'project_id' => 2, 'total' => 1, 'critical' => 0, 'high' => 0, 'medium' => 1, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 80.days },
{ 'letter_grade' => 4, 'project_id' => 2, 'total' => 1, 'critical' => 1, 'high' => 0, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.parse('2020-07-27') } { 'letter_grade' => 4, 'project_id' => 2, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 5.days },
] { 'letter_grade' => 4, 'project_id' => 2, 'total' => 3, 'critical' => 2, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 4.days },
{ 'letter_grade' => 4, 'project_id' => 2, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 2.days },
rows = historical_statistics.order(:project_id, :date).map do |row| { 'letter_grade' => 4, 'project_id' => 2, 'total' => 1, 'critical' => 1, 'high' => 0, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 1.day }
row.attributes.slice(*%w(letter_grade project_id total critical high medium low unknown info date)) ]
rows = historical_statistics.order(:project_id, :date).map do |row|
row.attributes.slice(*%w(letter_grade project_id total critical high medium low unknown info date))
end
expect(rows).to match_array(created_rows)
end end
end
expect(rows).to match_array(created_rows) context 'when using a provided retention period' do
it 'creates historical statistic rows according to projects for requested period', :aggregate_failures do
expect { subject.perform([1, 2], 365) }.to change(Vulnerabilities::HistoricalStatistic, :count).by(12)
created_rows = [
{ 'letter_grade' => 3, 'project_id' => 1, 'total' => 1, 'critical' => 0, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 350.days },
{ 'letter_grade' => 2, 'project_id' => 1, 'total' => 1, 'critical' => 0, 'high' => 0, 'medium' => 1, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 80.days },
{ 'letter_grade' => 4, 'project_id' => 1, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 5.days },
{ 'letter_grade' => 4, 'project_id' => 1, 'total' => 3, 'critical' => 2, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 4.days },
{ 'letter_grade' => 4, 'project_id' => 1, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 2.days },
{ 'letter_grade' => 4, 'project_id' => 1, 'total' => 1, 'critical' => 1, 'high' => 0, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 1.day },
{ 'letter_grade' => 3, 'project_id' => 2, 'total' => 1, 'critical' => 0, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 350.days },
{ 'letter_grade' => 2, 'project_id' => 2, 'total' => 1, 'critical' => 0, 'high' => 0, 'medium' => 1, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 80.days },
{ 'letter_grade' => 4, 'project_id' => 2, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 5.days },
{ 'letter_grade' => 4, 'project_id' => 2, 'total' => 3, 'critical' => 2, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 4.days },
{ 'letter_grade' => 4, 'project_id' => 2, 'total' => 2, 'critical' => 1, 'high' => 1, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 2.days },
{ 'letter_grade' => 4, 'project_id' => 2, 'total' => 1, 'critical' => 1, 'high' => 0, 'medium' => 0, 'low' => 0, 'unknown' => 0, 'info' => 0, 'date' => Date.current - 1.day }
]
rows = historical_statistics.order(:project_id, :date).map do |row|
row.attributes.slice(*%w(letter_grade project_id total critical high medium low unknown info date))
end
expect(rows).to match_array(created_rows)
end
end end
end end
end end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20200831224343_populate_vulnerability_historical_statistics_for_year.rb')
RSpec.describe PopulateVulnerabilityHistoricalStatisticsForYear do
let(:users) { table(:users) }
let(:namespaces) { table(:namespaces) }
let(:vulnerabilities) { table(:vulnerabilities) }
let(:projects) { table(:projects) }
let(:namespace) { namespaces.create!(name: 'gitlab', path: 'gitlab-org') }
let(:user) { users.create!(name: 'test', email: 'test@example.com', projects_limit: 5) }
def create_project(id, with_vulnerabilities: false)
project_params = {
id: id,
namespace_id: namespace.id,
name: 'foo'
}
project = projects.create!(project_params)
return unless with_vulnerabilities
vulnerabilities.create!(title: 'title', state: 1, severity: 0, confidence: 5, report_type: 2, project_id: project.id, author_id: user.id)
end
it 'correctly schedules background migrations with projects with vulnerabilities only', :aggregate_failures do
create_project(1, with_vulnerabilities: true)
create_project(2, with_vulnerabilities: true)
create_project(5, with_vulnerabilities: false)
create_project(6, with_vulnerabilities: false)
stub_const("#{described_class.name}::BATCH_SIZE", 1)
Sidekiq::Testing.fake! do
Timecop.freeze do
migrate!
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(5.minutes, [1], 365)
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(10.minutes, [2], 365)
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end
end
end
context 'for FOSS version' do
before do
allow(Gitlab).to receive(:ee?).and_return(false)
end
it 'does not schedule any jobs' do
create_project(2)
Sidekiq::Testing.fake! do
Timecop.freeze do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to eq(0)
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment