Commit 73a36252 authored by Mehmet Emin INAC's avatar Mehmet Emin INAC Committed by Mayra Cabrera

Implement cronjob to adjust vulnerability statistics everyday

This cronjob will schedule background jobs for each 1000 projects to
adjust their vulnerability statistics. The reason for this approach is
if we try to adjust the vulnerability statistics of all the projects in
one background job, then in case if the cluster crushes or shuts down
with force then we can not update the statistics.
parent 27fe6f83
......@@ -586,6 +586,9 @@ Gitlab.ee do
Settings.cron_jobs['iterations_update_status_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['iterations_update_status_worker']['cron'] ||= '5 0 * * *'
Settings.cron_jobs['iterations_update_status_worker']['job_class'] = 'IterationsUpdateStatusWorker'
Settings.cron_jobs['vulnerability_statistics_schedule_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['vulnerability_statistics_schedule_worker']['cron'] ||= '15 1 * * *'
Settings.cron_jobs['vulnerability_statistics_schedule_worker']['job_class'] = 'Vulnerabilities::Statistics::ScheduleWorker'
end
#
......
......@@ -272,6 +272,8 @@
- 1
- - upload_checksum
- 1
- - vulnerabilities_statistics_adjustment
- 1
- - vulnerability_exports_export
- 1
- - vulnerability_exports_export_deletion
......
......@@ -110,6 +110,10 @@ class Vulnerability < ApplicationRecord
def passive_states
PASSIVE_STATES
end
def active_state_values
states.values_at(*ACTIVE_STATES)
end
end
# There will only be one finding associated with a vulnerability for the foreseeable future
......
# frozen_string_literal: true
module Vulnerabilities
module Statistics
class AdjustmentService
TooManyProjectsError = Class.new(StandardError)
UPSERT_SQL = <<~SQL
INSERT INTO vulnerability_statistics
(project_id, total, info, unknown, low, medium, high, critical, letter_grade, created_at, updated_at)
(%{stats_sql})
ON CONFLICT (project_id)
DO UPDATE SET
total = EXCLUDED.total,
info = EXCLUDED.info,
unknown = EXCLUDED.unknown,
low = EXCLUDED.low,
medium = EXCLUDED.medium,
high = EXCLUDED.high,
critical = EXCLUDED.critical,
letter_grade = EXCLUDED.letter_grade,
updated_at = EXCLUDED.updated_at
SQL
STATS_SQL = <<~SQL
SELECT
severity_counts.*,
(
CASE
WHEN severity_counts.critical > 0 THEN
#{Statistic.letter_grades['f']}
WHEN severity_counts.high > 0 OR severity_counts.unknown > 0 THEN
#{Statistic.letter_grades['d']}
WHEN severity_counts.medium > 0 THEN
#{Statistic.letter_grades['c']}
WHEN severity_counts.low > 0 THEN
#{Statistic.letter_grades['b']}
ELSE
#{Statistic.letter_grades['a']}
END
) AS letter_grade,
now() AS created_at,
now() AS updated_at
FROM (
SELECT
vulnerabilities.project_id AS project_id,
COUNT(*) AS total,
COUNT(*) FILTER (WHERE severity = #{Vulnerability.severities['info']}) as info,
COUNT(*) FILTER (WHERE severity = #{Vulnerability.severities['unknown']}) as unknown,
COUNT(*) FILTER (WHERE severity = #{Vulnerability.severities['low']}) as low,
COUNT(*) FILTER (WHERE severity = #{Vulnerability.severities['medium']}) as medium,
COUNT(*) FILTER (WHERE severity = #{Vulnerability.severities['high']}) as high,
COUNT(*) FILTER (WHERE severity = #{Vulnerability.severities['critical']}) as critical
FROM vulnerabilities
WHERE vulnerabilities.project_id IN (%{project_ids}) AND state IN (%{active_states})
GROUP BY vulnerabilities.project_id
) AS severity_counts
SQL
MAX_PROJECTS = 1_000
def self.execute(project_ids)
new(project_ids).execute
end
def initialize(project_ids)
raise TooManyProjectsError, "Cannot adjust statistics for more than #{MAX_PROJECTS} projects" if project_ids.size > MAX_PROJECTS
self.project_ids = project_ids.join(', ')
end
def execute
connection.execute(upsert_sql)
end
private
attr_accessor :project_ids
delegate :connection, to: ApplicationRecord, private: true
def upsert_sql
UPSERT_SQL % { stats_sql: stats_sql }
end
def stats_sql
STATS_SQL % { project_ids: project_ids, active_states: active_states }
end
def active_states
Vulnerability.active_state_values.join(', ')
end
end
end
end
......@@ -251,6 +251,14 @@
:weight: 1
:idempotent:
:tags: []
- :name: cronjob:vulnerabilities_statistics_schedule
:feature_category: :vulnerability_management
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent:
:tags: []
- :name: dependency_proxy:purge_dependency_proxy_cache
:feature_category: :dependency_proxy
:has_external_dependencies:
......@@ -707,6 +715,14 @@
:weight: 1
:idempotent: true
:tags: []
- :name: vulnerabilities_statistics_adjustment
:feature_category: :vulnerability_management
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent:
:tags: []
- :name: vulnerability_exports_export
:feature_category: :vulnerability_management
:has_external_dependencies:
......
# frozen_string_literal: true
module Vulnerabilities
module Statistics
class AdjustmentWorker # rubocop:disable Scalability/IdempotentWorker
include ApplicationWorker
feature_category :vulnerability_management
def perform(project_ids)
AdjustmentService.execute(project_ids)
end
end
end
end
# frozen_string_literal: true
module Vulnerabilities
module Statistics
class ScheduleWorker # rubocop:disable Scalability/IdempotentWorker
include ApplicationWorker
# rubocop:disable Scalability/CronWorkerContext
# This worker does not perform work scoped to a context
include CronjobQueue
# rubocop:enable Scalability/CronWorkerContext
feature_category :vulnerability_management
BATCH_SIZE = 500
DELAY_INTERVAL = 30.seconds.to_i
def perform
Project.without_deleted.has_vulnerabilities.each_batch(of: BATCH_SIZE) do |relation, index|
AdjustmentWorker.perform_in(index * DELAY_INTERVAL, relation.pluck(:id)) # rubocop: disable CodeReuse/ActiveRecord
end
end
end
end
end
......@@ -29,6 +29,22 @@ FactoryBot.define do
confirmed_at { Time.current }
end
trait :critical_severity do
severity { :critical }
end
trait :high_severity do
severity { :high }
end
trait :medium_severity do
severity { :medium }
end
trait :low_severity do
severity { :low }
end
::Vulnerabilities::Occurrence::SEVERITY_LEVELS.keys.each do |severity_level|
trait severity_level do
severity { severity_level }
......
......@@ -3,5 +3,6 @@
FactoryBot.define do
factory :vulnerability_statistic, class: 'Vulnerabilities::Statistic' do
project
letter_grade { :a }
end
end
......@@ -234,6 +234,14 @@ RSpec.describe Vulnerability do
end
end
describe '.active_state_values' do
let(:expected_values) { Vulnerability.states.values_at('detected', 'confirmed') }
subject { described_class.active_state_values }
it { is_expected.to match_array(expected_values) }
end
describe '#finding' do
let_it_be(:project) { create(:project, :with_vulnerability) }
let_it_be(:vulnerability) { project.vulnerabilities.first }
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Vulnerabilities::Statistics::AdjustmentService do
let_it_be(:project) { create(:project) }
describe '.execute' do
let(:project_ids) { [1, 2, 3] }
let(:mock_service_object) { instance_double(described_class, execute: true) }
subject(:execute_for_project_ids) { described_class.execute(project_ids) }
before do
allow(described_class).to receive(:new).with([1, 2, 3]).and_return(mock_service_object)
end
it 'instantiates the service object for given project ids and calls `execute` on them' do
execute_for_project_ids
expect(mock_service_object).to have_received(:execute)
end
end
describe '#execute' do
let(:project) { create(:project) }
let(:statistics) { project.vulnerability_statistic.reload.as_json(except: [:id, :project_id, :created_at, :updated_at]) }
let(:project_ids) { [project.id] }
let(:expected_statistics) do
{
'total' => 2,
'critical' => 1,
'high' => 1,
'medium' => 0,
'low' => 0,
'info' => 0,
'unknown' => 0,
'letter_grade' => 'f'
}
end
subject(:adjust_statistics) { described_class.new(project_ids).execute }
before do
create(:vulnerability, :critical_severity, project: project)
create(:vulnerability, :high_severity, project: project)
end
context 'when more than 1000 projects is provided' do
let(:project_ids) { (1..1001).to_a }
it 'raises error' do
expect {adjust_statistics}.to raise_error(described_class::TooManyProjectsError, 'Cannot adjust statistics for more than 1000 projects')
end
end
context 'when there is no vulnerability_statistic record for project' do
before do
Vulnerabilities::Statistic.where(project: project).delete_all
end
it 'creates a new record' do
expect { adjust_statistics }.to change { Vulnerabilities::Statistic.count }.by(1)
end
it 'sets the correct values for the record' do
adjust_statistics
expect(statistics).to eq(expected_statistics)
end
end
context 'when there is already a vulnerability_statistic record for project' do
before do
project.vulnerability_statistic ||= create(:vulnerability_statistic, project: project)
Vulnerabilities::Statistic.where(project: project).update_all(critical: 0, total: 0)
end
it 'does not create a new record in database' do
expect { adjust_statistics }.not_to change { Vulnerabilities::Statistic.count }
end
it 'sets the correct values for the record' do
adjust_statistics
expect(statistics).to eq(expected_statistics)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Vulnerabilities::Statistics::AdjustmentWorker do
let(:worker) { described_class.new }
describe "#perform" do
let(:project_ids) { [1, 2, 3] }
before do
allow(Vulnerabilities::Statistics::AdjustmentService).to receive(:execute)
end
it 'calls `Vulnerabilities::Statistics::AdjustmentService` with given project_ids' do
worker.perform(project_ids)
expect(Vulnerabilities::Statistics::AdjustmentService).to have_received(:execute).with(project_ids)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Vulnerabilities::Statistics::ScheduleWorker do
let(:worker) { described_class.new }
describe "#perform" do
let(:project) { create(:project) }
let(:deleted_project) { create(:project, pending_delete: true) }
before do
create(:vulnerability, project: project)
create(:vulnerability, project: deleted_project)
allow(Vulnerabilities::Statistics::AdjustmentWorker).to receive(:perform_in)
end
it 'schedules the AdjustmentWorker with project_ids' do
worker.perform
expect(Vulnerabilities::Statistics::AdjustmentWorker).to have_received(:perform_in).with(30, [project.id])
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment