Commit edf1fdbf authored by Erick Bajao's avatar Erick Bajao Committed by Bob Van Landuyt

Recalculate project build artifacts size

This adds a new service and worker that refreshes the project
statistics build artifacts size and recalculates by batches.
parent 73349120
...@@ -102,9 +102,7 @@ module CounterAttribute ...@@ -102,9 +102,7 @@ module CounterAttribute
run_after_commit_or_now do run_after_commit_or_now do
if counter_attribute_enabled?(attribute) if counter_attribute_enabled?(attribute)
redis_state do |redis| increment_counter(attribute, increment)
redis.incrby(counter_key(attribute), increment)
end
FlushCounterIncrementsWorker.perform_in(WORKER_DELAY, self.class.name, self.id, attribute) FlushCounterIncrementsWorker.perform_in(WORKER_DELAY, self.class.name, self.id, attribute)
else else
...@@ -115,6 +113,28 @@ module CounterAttribute ...@@ -115,6 +113,28 @@ module CounterAttribute
true true
end end
def increment_counter(attribute, increment)
if counter_attribute_enabled?(attribute)
redis_state do |redis|
redis.incrby(counter_key(attribute), increment)
end
end
end
def clear_counter!(attribute)
if counter_attribute_enabled?(attribute)
redis_state { |redis| redis.del(counter_key(attribute)) }
end
end
def get_counter_value(attribute)
if counter_attribute_enabled?(attribute)
redis_state do |redis|
redis.get(counter_key(attribute)).to_i
end
end
end
def counter_key(attribute) def counter_key(attribute)
"project:{#{project_id}}:counters:#{self.class}:#{id}:#{attribute}" "project:{#{project_id}}:counters:#{self.class}:#{id}:#{attribute}"
end end
......
# frozen_string_literal: true
module Projects
class BuildArtifactsSizeRefresh < ApplicationRecord
include BulkInsertSafe
STALE_WINDOW = 3.days
self.table_name = 'project_build_artifacts_size_refreshes'
belongs_to :project
validates :project, presence: true
STATES = {
created: 1,
running: 2,
pending: 3
}.freeze
state_machine :state, initial: :created do
# created -> running <-> pending
state :created, value: STATES[:created]
state :running, value: STATES[:running]
state :pending, value: STATES[:pending]
event :process do
transition [:created, :pending, :running] => :running
end
event :requeue do
transition running: :pending
end
# set it only the first time we execute the refresh
before_transition created: :running do |refresh|
refresh.reset_project_statistics!
refresh.refresh_started_at = Time.zone.now
end
before_transition running: any do |refresh, transition|
refresh.updated_at = Time.zone.now
end
before_transition running: :pending do |refresh, transition|
refresh.last_job_artifact_id = transition.args.first
end
end
scope :stale, -> { with_state(:running).where('updated_at < ?', STALE_WINDOW.ago) }
scope :remaining, -> { with_state(:created, :pending).or(stale) }
def self.enqueue_refresh(projects)
now = Time.zone.now
records = Array(projects).map do |project|
new(project: project, state: STATES[:created], created_at: now, updated_at: now)
end
bulk_insert!(records, skip_duplicates: true)
end
def self.process_next_refresh!
next_refresh = nil
transaction do
next_refresh = remaining
.order(:state, :updated_at)
.lock('FOR UPDATE SKIP LOCKED')
.take
next_refresh&.process!
end
next_refresh
end
def reset_project_statistics!
statistics = project.statistics
statistics.update!(build_artifacts_size: 0)
statistics.clear_counter!(:build_artifacts_size)
end
def next_batch(limit:)
project.job_artifacts.select(:id, :size)
.where('created_at <= ? AND id > ?', refresh_started_at, last_job_artifact_id.to_i)
.order(:created_at)
.limit(limit)
end
end
end
# frozen_string_literal: true
module Projects
class RefreshBuildArtifactsSizeStatisticsService
BATCH_SIZE = 1000
def execute
refresh = Projects::BuildArtifactsSizeRefresh.process_next_refresh!
return unless refresh
batch = refresh.next_batch(limit: BATCH_SIZE).to_a
if batch.any?
# We are doing the sum in ruby because the query takes too long when done in SQL
total_artifacts_size = batch.sum(&:size)
Projects::BuildArtifactsSizeRefresh.transaction do
# Mark the refresh ready for another worker to pick up and process the next batch
refresh.requeue!(batch.last.id)
refresh.project.statistics.delayed_increment_counter(:build_artifacts_size, total_artifacts_size)
end
else
# Remove the refresh job from the table if there are no more
# remaining job artifacts to calculate for the given project.
refresh.destroy!
end
refresh
end
end
end
...@@ -561,6 +561,15 @@ ...@@ -561,6 +561,15 @@
:weight: 1 :weight: 1
:idempotent: :idempotent:
:tags: [] :tags: []
- :name: cronjob:projects_schedule_refresh_build_artifacts_size_statistics
:worker_name: Projects::ScheduleRefreshBuildArtifactsSizeStatisticsWorker
:feature_category: :build_artifacts
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: cronjob:prune_old_events - :name: cronjob:prune_old_events
:worker_name: PruneOldEventsWorker :worker_name: PruneOldEventsWorker
:feature_category: :users :feature_category: :users
...@@ -2803,6 +2812,15 @@ ...@@ -2803,6 +2812,15 @@
:weight: 1 :weight: 1
:idempotent: true :idempotent: true
:tags: [] :tags: []
- :name: projects_refresh_build_artifacts_size_statistics
:worker_name: Projects::RefreshBuildArtifactsSizeStatisticsWorker
:feature_category: :build_artifacts
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: projects_schedule_bulk_repository_shard_moves - :name: projects_schedule_bulk_repository_shard_moves
:worker_name: Projects::ScheduleBulkRepositoryShardMovesWorker :worker_name: Projects::ScheduleBulkRepositoryShardMovesWorker
:feature_category: :gitaly :feature_category: :gitaly
......
# frozen_string_literal: true
module Projects
class RefreshBuildArtifactsSizeStatisticsWorker
include ApplicationWorker
include LimitedCapacity::Worker
MAX_RUNNING_LOW = 2
MAX_RUNNING_MEDIUM = 20
MAX_RUNNING_HIGH = 50
data_consistency :always
feature_category :build_artifacts
idempotent!
def perform_work(*args)
refresh = Projects::RefreshBuildArtifactsSizeStatisticsService.new.execute
return unless refresh
log_extra_metadata_on_done(:project_id, refresh.project_id)
log_extra_metadata_on_done(:last_job_artifact_id, refresh.last_job_artifact_id)
log_extra_metadata_on_done(:last_batch, refresh.destroyed?)
log_extra_metadata_on_done(:refresh_started_at, refresh.refresh_started_at)
end
def remaining_work_count(*args)
# LimitedCapacity::Worker only needs to know if there is work left to do
# so we can get by with an EXISTS query rather than a count.
# https://gitlab.com/gitlab-org/gitlab/-/issues/356167
if Projects::BuildArtifactsSizeRefresh.remaining.any?
1
else
0
end
end
def max_running_jobs
if ::Feature.enabled?(:projects_build_artifacts_size_refresh_high)
MAX_RUNNING_HIGH
elsif ::Feature.enabled?(:projects_build_artifacts_size_refresh_medium)
MAX_RUNNING_MEDIUM
elsif ::Feature.enabled?(:projects_build_artifacts_size_refresh_low)
MAX_RUNNING_LOW
else
0
end
end
end
end
# frozen_string_literal: true
module Projects
class ScheduleRefreshBuildArtifactsSizeStatisticsWorker
include ApplicationWorker
include CronjobQueue # rubocop:disable Scalability/CronWorkerContext
data_consistency :always
feature_category :build_artifacts
idempotent!
def perform
Projects::RefreshBuildArtifactsSizeStatisticsWorker.perform_with_capacity
end
end
end
---
name: projects_build_artifacts_size_refresh_high
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/81306
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/356018
milestone: '14.9'
type: development
group: group::pipeline insights
default_enabled: false
---
name: projects_build_artifacts_size_refresh_low
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/81306
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/356018
milestone: '14.9'
type: development
group: group::pipeline insights
default_enabled: false
---
name: projects_build_artifacts_size_refresh_medium
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/81306
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/356018
milestone: '14.9'
type: development
group: group::pipeline insights
default_enabled: false
...@@ -620,6 +620,9 @@ Settings.cron_jobs['issues_reschedule_stuck_issue_rebalances']['job_class'] = 'I ...@@ -620,6 +620,9 @@ Settings.cron_jobs['issues_reschedule_stuck_issue_rebalances']['job_class'] = 'I
Settings.cron_jobs['clusters_integrations_check_prometheus_health_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['clusters_integrations_check_prometheus_health_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['clusters_integrations_check_prometheus_health_worker']['cron'] ||= '0 * * * *' Settings.cron_jobs['clusters_integrations_check_prometheus_health_worker']['cron'] ||= '0 * * * *'
Settings.cron_jobs['clusters_integrations_check_prometheus_health_worker']['job_class'] = 'Clusters::Integrations::CheckPrometheusHealthWorker' Settings.cron_jobs['clusters_integrations_check_prometheus_health_worker']['job_class'] = 'Clusters::Integrations::CheckPrometheusHealthWorker'
Settings.cron_jobs['projects_schedule_refresh_build_artifacts_size_statistics_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['projects_schedule_refresh_build_artifacts_size_statistics_worker']['cron'] ||= '2/17 * * * *'
Settings.cron_jobs['projects_schedule_refresh_build_artifacts_size_statistics_worker']['job_class'] = 'Projects::ScheduleRefreshBuildArtifactsSizeStatisticsWorker'
Gitlab.ee do Gitlab.ee do
Settings.cron_jobs['analytics_devops_adoption_create_all_snapshots_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['analytics_devops_adoption_create_all_snapshots_worker'] ||= Settingslogic.new({})
......
...@@ -363,6 +363,8 @@ ...@@ -363,6 +363,8 @@
- 1 - 1
- - projects_process_sync_events - - projects_process_sync_events
- 1 - 1
- - projects_refresh_build_artifacts_size_statistics
- 1
- - projects_schedule_bulk_repository_shard_moves - - projects_schedule_bulk_repository_shard_moves
- 1 - 1
- - projects_update_repository_storage - - projects_update_repository_storage
......
# frozen_string_literal: true
class CreateProjectBuildArtifactsSizeRefresh < Gitlab::Database::Migration[1.0]
enable_lock_retries!
CREATED_STATE = 1
def change
create_table :project_build_artifacts_size_refreshes do |t|
t.references :project, index: { unique: true }, foreign_key: { on_delete: :cascade }, null: false
t.bigint :last_job_artifact_id, null: true
t.integer :state, null: false, default: CREATED_STATE, limit: 1
t.datetime_with_timezone :refresh_started_at, null: true
t.timestamps_with_timezone null: false
# We will use this index for 2 purposes:
# - for finding rows with state = :waiting
# - for finding rows with state = :running and updated_at < x.days.ago
# which we can use to find jobs that were not able to complete and considered
# stale so we can retry
t.index [:state, :updated_at], name: 'idx_build_artifacts_size_refreshes_state_updated_at'
end
end
end
# frozen_string_literal: true
class AddIndexCiJobArtifactsProjectIdCreatedAt < Gitlab::Database::Migration[1.0]
INDEX_NAME = 'index_ci_job_artifacts_on_id_project_id_and_created_at'
disable_ddl_transaction!
def up
add_concurrent_index :ci_job_artifacts, [:project_id, :created_at, :id], name: INDEX_NAME
end
def down
remove_concurrent_index_by_name :ci_job_artifacts, INDEX_NAME
end
end
d0a8daf9fb9892fc92b03f13de4d7e470e5c54f03b09f887cdd45bc5eb9a7e37
\ No newline at end of file
7992448797888fd69d1e5cd4f2602e5a2b49a57052c50b19522f37d711c9f2f2
\ No newline at end of file
...@@ -18974,6 +18974,25 @@ CREATE SEQUENCE project_auto_devops_id_seq ...@@ -18974,6 +18974,25 @@ CREATE SEQUENCE project_auto_devops_id_seq
ALTER SEQUENCE project_auto_devops_id_seq OWNED BY project_auto_devops.id; ALTER SEQUENCE project_auto_devops_id_seq OWNED BY project_auto_devops.id;
CREATE TABLE project_build_artifacts_size_refreshes (
id bigint NOT NULL,
project_id bigint NOT NULL,
last_job_artifact_id bigint,
state smallint DEFAULT 1 NOT NULL,
refresh_started_at timestamp with time zone,
created_at timestamp with time zone NOT NULL,
updated_at timestamp with time zone NOT NULL
);
CREATE SEQUENCE project_build_artifacts_size_refreshes_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE project_build_artifacts_size_refreshes_id_seq OWNED BY project_build_artifacts_size_refreshes.id;
CREATE TABLE project_ci_cd_settings ( CREATE TABLE project_ci_cd_settings (
id integer NOT NULL, id integer NOT NULL,
project_id integer NOT NULL, project_id integer NOT NULL,
...@@ -22870,6 +22889,8 @@ ALTER TABLE ONLY project_aliases ALTER COLUMN id SET DEFAULT nextval('project_al ...@@ -22870,6 +22889,8 @@ ALTER TABLE ONLY project_aliases ALTER COLUMN id SET DEFAULT nextval('project_al
ALTER TABLE ONLY project_auto_devops ALTER COLUMN id SET DEFAULT nextval('project_auto_devops_id_seq'::regclass); ALTER TABLE ONLY project_auto_devops ALTER COLUMN id SET DEFAULT nextval('project_auto_devops_id_seq'::regclass);
ALTER TABLE ONLY project_build_artifacts_size_refreshes ALTER COLUMN id SET DEFAULT nextval('project_build_artifacts_size_refreshes_id_seq'::regclass);
ALTER TABLE ONLY project_ci_cd_settings ALTER COLUMN id SET DEFAULT nextval('project_ci_cd_settings_id_seq'::regclass); ALTER TABLE ONLY project_ci_cd_settings ALTER COLUMN id SET DEFAULT nextval('project_ci_cd_settings_id_seq'::regclass);
ALTER TABLE ONLY project_ci_feature_usages ALTER COLUMN id SET DEFAULT nextval('project_ci_feature_usages_id_seq'::regclass); ALTER TABLE ONLY project_ci_feature_usages ALTER COLUMN id SET DEFAULT nextval('project_ci_feature_usages_id_seq'::regclass);
...@@ -24940,6 +24961,9 @@ ALTER TABLE ONLY project_authorizations ...@@ -24940,6 +24961,9 @@ ALTER TABLE ONLY project_authorizations
ALTER TABLE ONLY project_auto_devops ALTER TABLE ONLY project_auto_devops
ADD CONSTRAINT project_auto_devops_pkey PRIMARY KEY (id); ADD CONSTRAINT project_auto_devops_pkey PRIMARY KEY (id);
ALTER TABLE ONLY project_build_artifacts_size_refreshes
ADD CONSTRAINT project_build_artifacts_size_refreshes_pkey PRIMARY KEY (id);
ALTER TABLE ONLY project_ci_cd_settings ALTER TABLE ONLY project_ci_cd_settings
ADD CONSTRAINT project_ci_cd_settings_pkey PRIMARY KEY (id); ADD CONSTRAINT project_ci_cd_settings_pkey PRIMARY KEY (id);
...@@ -26398,6 +26422,8 @@ CREATE INDEX idx_audit_events_part_on_entity_id_desc_author_id_created_at ON ONL ...@@ -26398,6 +26422,8 @@ CREATE INDEX idx_audit_events_part_on_entity_id_desc_author_id_created_at ON ONL
CREATE INDEX idx_award_emoji_on_user_emoji_name_awardable_type_awardable_id ON award_emoji USING btree (user_id, name, awardable_type, awardable_id); CREATE INDEX idx_award_emoji_on_user_emoji_name_awardable_type_awardable_id ON award_emoji USING btree (user_id, name, awardable_type, awardable_id);
CREATE INDEX idx_build_artifacts_size_refreshes_state_updated_at ON project_build_artifacts_size_refreshes USING btree (state, updated_at);
CREATE INDEX idx_ci_pipelines_artifacts_locked ON ci_pipelines USING btree (ci_ref_id, id) WHERE (locked = 1); CREATE INDEX idx_ci_pipelines_artifacts_locked ON ci_pipelines USING btree (ci_ref_id, id) WHERE (locked = 1);
CREATE INDEX idx_container_exp_policies_on_project_id_next_run_at ON container_expiration_policies USING btree (project_id, next_run_at) WHERE (enabled = true); CREATE INDEX idx_container_exp_policies_on_project_id_next_run_at ON container_expiration_policies USING btree (project_id, next_run_at) WHERE (enabled = true);
...@@ -26920,6 +26946,8 @@ CREATE INDEX index_ci_job_artifacts_on_file_store ON ci_job_artifacts USING btre ...@@ -26920,6 +26946,8 @@ CREATE INDEX index_ci_job_artifacts_on_file_store ON ci_job_artifacts USING btre
CREATE INDEX index_ci_job_artifacts_on_file_type_for_devops_adoption ON ci_job_artifacts USING btree (file_type, project_id, created_at) WHERE (file_type = ANY (ARRAY[5, 6, 8, 23])); CREATE INDEX index_ci_job_artifacts_on_file_type_for_devops_adoption ON ci_job_artifacts USING btree (file_type, project_id, created_at) WHERE (file_type = ANY (ARRAY[5, 6, 8, 23]));
CREATE INDEX index_ci_job_artifacts_on_id_project_id_and_created_at ON ci_job_artifacts USING btree (project_id, created_at, id);
CREATE INDEX index_ci_job_artifacts_on_id_project_id_and_file_type ON ci_job_artifacts USING btree (project_id, file_type, id); CREATE INDEX index_ci_job_artifacts_on_id_project_id_and_file_type ON ci_job_artifacts USING btree (project_id, file_type, id);
CREATE UNIQUE INDEX index_ci_job_artifacts_on_job_id_and_file_type ON ci_job_artifacts USING btree (job_id, file_type); CREATE UNIQUE INDEX index_ci_job_artifacts_on_job_id_and_file_type ON ci_job_artifacts USING btree (job_id, file_type);
...@@ -28528,6 +28556,8 @@ CREATE INDEX index_project_aliases_on_project_id ON project_aliases USING btree ...@@ -28528,6 +28556,8 @@ CREATE INDEX index_project_aliases_on_project_id ON project_aliases USING btree
CREATE UNIQUE INDEX index_project_auto_devops_on_project_id ON project_auto_devops USING btree (project_id); CREATE UNIQUE INDEX index_project_auto_devops_on_project_id ON project_auto_devops USING btree (project_id);
CREATE UNIQUE INDEX index_project_build_artifacts_size_refreshes_on_project_id ON project_build_artifacts_size_refreshes USING btree (project_id);
CREATE UNIQUE INDEX index_project_ci_cd_settings_on_project_id ON project_ci_cd_settings USING btree (project_id); CREATE UNIQUE INDEX index_project_ci_cd_settings_on_project_id ON project_ci_cd_settings USING btree (project_id);
CREATE UNIQUE INDEX index_project_ci_feature_usages_unique_columns ON project_ci_feature_usages USING btree (project_id, feature, default_branch); CREATE UNIQUE INDEX index_project_ci_feature_usages_unique_columns ON project_ci_feature_usages USING btree (project_id, feature, default_branch);
...@@ -32667,6 +32697,9 @@ ALTER TABLE ONLY list_user_preferences ...@@ -32667,6 +32697,9 @@ ALTER TABLE ONLY list_user_preferences
ALTER TABLE ONLY merge_request_cleanup_schedules ALTER TABLE ONLY merge_request_cleanup_schedules
ADD CONSTRAINT fk_rails_92dd0e705c FOREIGN KEY (merge_request_id) REFERENCES merge_requests(id) ON DELETE CASCADE; ADD CONSTRAINT fk_rails_92dd0e705c FOREIGN KEY (merge_request_id) REFERENCES merge_requests(id) ON DELETE CASCADE;
ALTER TABLE ONLY project_build_artifacts_size_refreshes
ADD CONSTRAINT fk_rails_936db5fc44 FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE;
ALTER TABLE ONLY board_labels ALTER TABLE ONLY board_labels
ADD CONSTRAINT fk_rails_9374a16edd FOREIGN KEY (board_id) REFERENCES boards(id) ON DELETE CASCADE; ADD CONSTRAINT fk_rails_9374a16edd FOREIGN KEY (board_id) REFERENCES boards(id) ON DELETE CASCADE;
...@@ -401,6 +401,7 @@ project_alerting_settings: :gitlab_main ...@@ -401,6 +401,7 @@ project_alerting_settings: :gitlab_main
project_aliases: :gitlab_main project_aliases: :gitlab_main
project_authorizations: :gitlab_main project_authorizations: :gitlab_main
project_auto_devops: :gitlab_main project_auto_devops: :gitlab_main
project_build_artifacts_size_refreshes: :gitlab_main
project_ci_cd_settings: :gitlab_main project_ci_cd_settings: :gitlab_main
project_ci_feature_usages: :gitlab_main project_ci_feature_usages: :gitlab_main
project_compliance_framework_settings: :gitlab_main project_compliance_framework_settings: :gitlab_main
......
# frozen_string_literal: true
namespace :gitlab do
desc "GitLab | Refresh build artifacts size project statistics for given project IDs"
BUILD_ARTIFACTS_SIZE_REFRESH_ENQUEUE_BATCH_SIZE = 500
task :refresh_project_statistics_build_artifacts_size, [:project_ids] => :environment do |_t, args|
project_ids = []
project_ids = $stdin.read.split unless $stdin.tty?
project_ids = args.project_ids.to_s.split unless project_ids.any?
if project_ids.any?
project_ids.in_groups_of(BUILD_ARTIFACTS_SIZE_REFRESH_ENQUEUE_BATCH_SIZE) do |ids|
projects = Project.where(id: ids)
Projects::BuildArtifactsSizeRefresh.enqueue_refresh(projects)
end
puts 'Done.'.green
else
puts 'Please provide a string of space-separated project IDs as the argument or through the STDIN'.red
end
end
end
...@@ -67,6 +67,7 @@ RSpec.describe 'Database schema' do ...@@ -67,6 +67,7 @@ RSpec.describe 'Database schema' do
oauth_access_tokens: %w[resource_owner_id application_id], oauth_access_tokens: %w[resource_owner_id application_id],
oauth_applications: %w[owner_id], oauth_applications: %w[owner_id],
product_analytics_events_experimental: %w[event_id txn_id user_id], product_analytics_events_experimental: %w[event_id txn_id user_id],
project_build_artifacts_size_refreshes: %w[last_job_artifact_id],
project_group_links: %w[group_id], project_group_links: %w[group_id],
project_statistics: %w[namespace_id], project_statistics: %w[namespace_id],
projects: %w[creator_id ci_id mirror_user_id], projects: %w[creator_id ci_id mirror_user_id],
......
# frozen_string_literal: true
FactoryBot.define do
factory :project_build_artifacts_size_refresh, class: 'Projects::BuildArtifactsSizeRefresh' do
project factory: :project
trait :created do
state { Projects::BuildArtifactsSizeRefresh::STATES[:created] }
end
trait :pending do
state { Projects::BuildArtifactsSizeRefresh::STATES[:pending] }
refresh_started_at { Time.zone.now }
end
trait :running do
state { Projects::BuildArtifactsSizeRefresh::STATES[:running] }
refresh_started_at { Time.zone.now }
end
trait :stale do
running
refresh_started_at { 30.days.ago }
updated_at { 30.days.ago }
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Projects::BuildArtifactsSizeRefresh, type: :model do
describe 'associations' do
it { is_expected.to belong_to(:project) }
end
it_behaves_like 'having unique enum values'
describe 'validations' do
it { is_expected.to validate_presence_of(:project) }
end
describe 'scopes' do
let_it_be(:refresh_1) { create(:project_build_artifacts_size_refresh, :running, updated_at: 4.days.ago) }
let_it_be(:refresh_2) { create(:project_build_artifacts_size_refresh, :running, updated_at: 2.days.ago) }
let_it_be(:refresh_3) { create(:project_build_artifacts_size_refresh, :pending) }
let_it_be(:refresh_4) { create(:project_build_artifacts_size_refresh, :created) }
describe 'stale' do
it 'returns records in running state and has not been updated for more than 3 days' do
expect(described_class.stale).to eq([refresh_1])
end
end
describe 'remaining' do
it 'returns stale, created, and pending records' do
expect(described_class.remaining).to match_array([refresh_1, refresh_3, refresh_4])
end
end
end
describe 'state machine', :clean_gitlab_redis_shared_state do
around do |example|
freeze_time { example.run }
end
let(:now) { Time.zone.now }
describe 'initial state' do
let(:refresh) { create(:project_build_artifacts_size_refresh) }
it 'defaults to created' do
expect(refresh).to be_created
end
end
describe '#process!' do
context 'when refresh state is created' do
let!(:refresh) do
create(
:project_build_artifacts_size_refresh,
:created,
updated_at: 2.days.ago,
refresh_started_at: nil,
last_job_artifact_id: nil
)
end
before do
stats = create(:project_statistics, project: refresh.project, build_artifacts_size: 120)
stats.increment_counter(:build_artifacts_size, 30)
end
it 'transitions the state to running' do
expect { refresh.process! }.to change { refresh.reload.state }.to(described_class::STATES[:running])
end
it 'sets the refresh_started_at' do
expect { refresh.process! }.to change { refresh.reload.refresh_started_at.to_i }.to(now.to_i)
end
it 'bumps the updated_at' do
expect { refresh.process! }.to change { refresh.reload.updated_at.to_i }.to(now.to_i)
end
it 'resets the build artifacts size stats' do
expect { refresh.process! }.to change { refresh.project.statistics.reload.build_artifacts_size }.to(0)
end
it 'resets the counter attribute to zero' do
expect { refresh.process! }.to change { refresh.project.statistics.get_counter_value(:build_artifacts_size) }.to(0)
end
end
context 'when refresh state is pending' do
let!(:refresh) do
create(
:project_build_artifacts_size_refresh,
:pending,
updated_at: 2.days.ago
)
end
before do
create(:project_statistics, project: refresh.project)
end
it 'transitions the state to running' do
expect { refresh.process! }.to change { refresh.reload.state }.to(described_class::STATES[:running])
end
it 'bumps the updated_at' do
expect { refresh.process! }.to change { refresh.reload.updated_at.to_i }.to(now.to_i)
end
end
context 'when refresh state is running' do
let!(:refresh) do
create(
:project_build_artifacts_size_refresh,
:running,
updated_at: 2.days.ago
)
end
before do
create(:project_statistics, project: refresh.project)
end
it 'keeps the state at running' do
expect { refresh.process! }.not_to change { refresh.reload.state }
end
it 'bumps the updated_at' do
# If this was a stale job, we want to bump the updated at now so that
# it won't be picked up by another worker while we're recalculating
expect { refresh.process! }.to change { refresh.reload.updated_at.to_i }.to(now.to_i)
end
end
end
describe '#requeue!' do
let!(:refresh) do
create(
:project_build_artifacts_size_refresh,
:running,
updated_at: 2.days.ago,
last_job_artifact_id: 111
)
end
let(:last_job_artifact_id) { 123 }
it 'transitions refresh state from running to pending' do
expect { refresh.requeue!(last_job_artifact_id) }.to change { refresh.reload.state }.to(described_class::STATES[:pending])
end
it 'bumps updated_at' do
expect { refresh.requeue!(last_job_artifact_id) }.to change { refresh.reload.updated_at.to_i }.to(now.to_i)
end
it 'updates last_job_artifact_id' do
expect { refresh.requeue!(last_job_artifact_id) }.to change { refresh.reload.last_job_artifact_id.to_i }.to(last_job_artifact_id)
end
end
end
describe '.process_next_refresh!' do
let!(:refresh_running) { create(:project_build_artifacts_size_refresh, :running) }
let!(:refresh_created) { create(:project_build_artifacts_size_refresh, :created) }
let!(:refresh_stale) { create(:project_build_artifacts_size_refresh, :stale) }
let!(:refresh_pending) { create(:project_build_artifacts_size_refresh, :pending) }
subject(:processed_refresh) { described_class.process_next_refresh! }
it 'picks the first record from the remaining work' do
expect(processed_refresh).to eq(refresh_created)
expect(processed_refresh.reload).to be_running
end
end
describe '.enqueue_refresh' do
let_it_be(:project_1) { create(:project) }
let_it_be(:project_2) { create(:project) }
let(:projects) { [project_1, project_1, project_2] }
it 'creates refresh records for each given project, skipping duplicates' do
expect { described_class.enqueue_refresh(projects) }
.to change { described_class.count }.from(0).to(2)
expect(described_class.first).to have_attributes(
project_id: project_1.id,
last_job_artifact_id: nil,
refresh_started_at: nil,
state: described_class::STATES[:created]
)
expect(described_class.last).to have_attributes(
project_id: project_2.id,
last_job_artifact_id: nil,
refresh_started_at: nil,
state: described_class::STATES[:created]
)
end
end
describe '#next_batch' do
let!(:project) { create(:project) }
let!(:artifact_1) { create(:ci_job_artifact, project: project, created_at: 14.days.ago) }
let!(:artifact_2) { create(:ci_job_artifact, project: project, created_at: 13.days.ago) }
let!(:artifact_3) { create(:ci_job_artifact, project: project, created_at: 12.days.ago) }
# This should not be included in the recalculation as it is created later than the refresh start time
let!(:future_artifact) { create(:ci_job_artifact, project: project, size: 8, created_at: refresh.refresh_started_at + 1.second) }
let!(:refresh) do
create(
:project_build_artifacts_size_refresh,
:pending,
project: project,
updated_at: 2.days.ago,
refresh_started_at: 10.days.ago,
last_job_artifact_id: artifact_1.id
)
end
subject(:batch) { refresh.next_batch(limit: 3) }
it 'returns the job artifact records that were created not later than the refresh_started_at and IDs greater than the last_job_artifact_id' do
expect(batch).to eq([artifact_2, artifact_3])
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Projects::RefreshBuildArtifactsSizeStatisticsService, :clean_gitlab_redis_shared_state do
let(:service) { described_class.new }
describe '#execute' do
let_it_be(:project) { create(:project) }
let_it_be(:artifact_1) { create(:ci_job_artifact, project: project, size: 1, created_at: 14.days.ago) }
let_it_be(:artifact_2) { create(:ci_job_artifact, project: project, size: 2, created_at: 13.days.ago) }
let_it_be(:artifact_3) { create(:ci_job_artifact, project: project, size: 5, created_at: 12.days.ago) }
# This should not be included in the recalculation as it is created later than the refresh start time
let_it_be(:future_artifact) { create(:ci_job_artifact, project: project, size: 8, created_at: 2.days.from_now) }
let!(:refresh) do
create(
:project_build_artifacts_size_refresh,
:created,
project: project,
updated_at: 2.days.ago,
refresh_started_at: nil,
last_job_artifact_id: nil
)
end
let(:now) { Time.zone.now }
around do |example|
freeze_time { example.run }
end
before do
stub_const("#{described_class}::BATCH_SIZE", 2)
stats = create(:project_statistics, project: project, build_artifacts_size: 120)
stats.increment_counter(:build_artifacts_size, 30)
end
it 'resets the build artifacts size stats' do
expect { service.execute }.to change { project.statistics.reload.build_artifacts_size }.to(0)
end
it 'increments the counter attribute by the total size of the current batch of artifacts' do
expect { service.execute }.to change { project.statistics.get_counter_value(:build_artifacts_size) }.to(3)
end
it 'updates the last_job_artifact_id to the ID of the last artifact from the batch' do
expect { service.execute }.to change { refresh.reload.last_job_artifact_id.to_i }.to(artifact_2.id)
end
it 'requeues the refresh job' do
service.execute
expect(refresh.reload).to be_pending
end
context 'when an error happens after the recalculation has started' do
let!(:refresh) do
create(
:project_build_artifacts_size_refresh,
:pending,
project: project,
last_job_artifact_id: artifact_2.id
)
end
before do
allow(Gitlab::Redis::SharedState).to receive(:with).and_raise(StandardError, 'error')
expect { service.execute }.to raise_error(StandardError)
end
it 'keeps the last_job_artifact_id unchanged' do
expect(refresh.reload.last_job_artifact_id).to eq(artifact_2.id)
end
it 'keeps the state of the refresh record at running' do
expect(refresh.reload).to be_running
end
end
context 'when there are no more artifacts to recalculate for the next refresh job' do
let!(:refresh) do
create(
:project_build_artifacts_size_refresh,
:pending,
project: project,
updated_at: 2.days.ago,
refresh_started_at: now,
last_job_artifact_id: artifact_3.id
)
end
it 'deletes the refresh record' do
service.execute
expect(Projects::BuildArtifactsSizeRefresh.where(id: refresh.id)).not_to exist
end
end
end
end
# frozen_string_literal: true
require 'rake_helper'
RSpec.describe 'gitlab:refresh_project_statistics_build_artifacts_size rake task', :silence_stdout do
let(:rake_task) { 'gitlab:refresh_project_statistics_build_artifacts_size' }
describe 'enqueuing build artifacts size statistics refresh for given list of project IDs' do
let_it_be(:project_1) { create(:project) }
let_it_be(:project_2) { create(:project) }
let_it_be(:project_3) { create(:project) }
let(:string_of_ids) { "#{project_1.id} #{project_2.id} #{project_3.id} 999999" }
before do
Rake.application.rake_require('tasks/gitlab/refresh_project_statistics_build_artifacts_size')
stub_const("BUILD_ARTIFACTS_SIZE_REFRESH_ENQUEUE_BATCH_SIZE", 2)
end
context 'when given a list of space-separated IDs through STDIN' do
before do
allow($stdin).to receive(:tty?).and_return(false)
allow($stdin).to receive(:read).and_return(string_of_ids)
end
it 'enqueues the projects for refresh' do
expect { run_rake_task(rake_task) }.to output(/Done/).to_stdout
expect(Projects::BuildArtifactsSizeRefresh.all.map(&:project)).to match_array([project_1, project_2, project_3])
end
end
context 'when given a list of space-separated IDs through rake argument' do
it 'enqueues the projects for refresh' do
expect { run_rake_task(rake_task, string_of_ids) }.to output(/Done/).to_stdout
expect(Projects::BuildArtifactsSizeRefresh.all.map(&:project)).to match_array([project_1, project_2, project_3])
end
end
context 'when not given any IDs' do
it 'returns an error message' do
expect { run_rake_task(rake_task) }.to output(/Please provide a string of space-separated project IDs/).to_stdout
end
end
end
end
...@@ -395,6 +395,7 @@ RSpec.describe 'Every Sidekiq worker' do ...@@ -395,6 +395,7 @@ RSpec.describe 'Every Sidekiq worker' do
'Projects::PostCreationWorker' => 3, 'Projects::PostCreationWorker' => 3,
'Projects::ScheduleBulkRepositoryShardMovesWorker' => 3, 'Projects::ScheduleBulkRepositoryShardMovesWorker' => 3,
'Projects::UpdateRepositoryStorageWorker' => 3, 'Projects::UpdateRepositoryStorageWorker' => 3,
'Projects::RefreshBuildArtifactsSizeStatisticsWorker' => 0,
'Prometheus::CreateDefaultAlertsWorker' => 3, 'Prometheus::CreateDefaultAlertsWorker' => 3,
'PropagateIntegrationGroupWorker' => 3, 'PropagateIntegrationGroupWorker' => 3,
'PropagateIntegrationInheritDescendantWorker' => 3, 'PropagateIntegrationInheritDescendantWorker' => 3,
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Projects::RefreshBuildArtifactsSizeStatisticsWorker do
let(:worker) { described_class.new }
describe '#perform_work' do
before do
expect_next_instance_of(Projects::RefreshBuildArtifactsSizeStatisticsService) do |instance|
expect(instance).to receive(:execute).and_return(refresh)
end
end
context 'when refresh job is present' do
let(:refresh) do
build(
:project_build_artifacts_size_refresh,
:running,
project_id: 77,
last_job_artifact_id: 123
)
end
it 'logs refresh information' do
expect(worker).to receive(:log_extra_metadata_on_done).with(:project_id, refresh.project_id)
expect(worker).to receive(:log_extra_metadata_on_done).with(:last_job_artifact_id, refresh.last_job_artifact_id)
expect(worker).to receive(:log_extra_metadata_on_done).with(:last_batch, refresh.destroyed?)
expect(worker).to receive(:log_extra_metadata_on_done).with(:refresh_started_at, refresh.refresh_started_at)
worker.perform_work
end
end
context 'when refresh job is not present' do
let(:refresh) { nil }
it 'logs refresh information' do
expect(worker).not_to receive(:log_extra_metadata_on_done)
worker.perform_work
end
end
end
describe '#remaining_work_count' do
subject { worker.remaining_work_count }
context 'and there are remaining refresh jobs' do
before do
create_list(:project_build_artifacts_size_refresh, 2, :pending)
end
it { is_expected.to eq(1) }
end
context 'and there are no remaining refresh jobs' do
it { is_expected.to eq(0) }
end
end
describe '#max_running_jobs' do
subject { worker.max_running_jobs }
context 'when all projects_build_artifacts_size_refresh flags are enabled' do
it { is_expected.to eq(described_class::MAX_RUNNING_HIGH) }
end
context 'when projects_build_artifacts_size_refresh_high flags is disabled' do
before do
stub_feature_flags(projects_build_artifacts_size_refresh_high: false)
end
it { is_expected.to eq(described_class::MAX_RUNNING_MEDIUM) }
end
context 'when projects_build_artifacts_size_refresh_high and projects_build_artifacts_size_refresh_medium flags are disabled' do
before do
stub_feature_flags(projects_build_artifacts_size_refresh_high: false)
stub_feature_flags(projects_build_artifacts_size_refresh_medium: false)
end
it { is_expected.to eq(described_class::MAX_RUNNING_LOW) }
end
context 'when all projects_build_artifacts_size_refresh flags are disabled' do
before do
stub_feature_flags(projects_build_artifacts_size_refresh_low: false)
stub_feature_flags(projects_build_artifacts_size_refresh_medium: false)
stub_feature_flags(projects_build_artifacts_size_refresh_high: false)
end
it { is_expected.to eq(0) }
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Projects::ScheduleRefreshBuildArtifactsSizeStatisticsWorker do
subject(:worker) { described_class.new }
describe '#perform' do
include_examples 'an idempotent worker' do
it 'schedules Projects::RefreshBuildArtifactsSizeStatisticsWorker to be performed with capacity' do
expect(Projects::RefreshBuildArtifactsSizeStatisticsWorker).to receive(:perform_with_capacity).twice
subject
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment