Commit 48a0acb9 authored by Bob Van Landuyt's avatar Bob Van Landuyt

Merge branch 'enable-ci-deleted-objects-ff' into 'master'

Rollout ci_deleted_objects feature

See merge request gitlab-org/gitlab!46971
parents 2cd3043f a10774a1
...@@ -4,24 +4,23 @@ module Ci ...@@ -4,24 +4,23 @@ module Ci
class DestroyExpiredJobArtifactsService class DestroyExpiredJobArtifactsService
include ::Gitlab::ExclusiveLeaseHelpers include ::Gitlab::ExclusiveLeaseHelpers
include ::Gitlab::LoopHelpers include ::Gitlab::LoopHelpers
include ::Gitlab::Utils::StrongMemoize
BATCH_SIZE = 100 BATCH_SIZE = 100
LOOP_TIMEOUT = 5.minutes LOOP_TIMEOUT = 5.minutes
LEGACY_LOOP_TIMEOUT = 45.minutes
LOOP_LIMIT = 1000 LOOP_LIMIT = 1000
EXCLUSIVE_LOCK_KEY = 'expired_job_artifacts:destroy:lock' EXCLUSIVE_LOCK_KEY = 'expired_job_artifacts:destroy:lock'
LOCK_TIMEOUT = 10.minutes LOCK_TIMEOUT = 6.minutes
LEGACY_LOCK_TIMEOUT = 50.minutes
## ##
# Destroy expired job artifacts on GitLab instance # Destroy expired job artifacts on GitLab instance
# #
# This destroy process cannot run for more than 10 minutes. This is for # This destroy process cannot run for more than 6 minutes. This is for
# preventing multiple `ExpireBuildArtifactsWorker` CRON jobs run concurrently, # preventing multiple `ExpireBuildArtifactsWorker` CRON jobs run concurrently,
# which is scheduled at every hour. # which is scheduled every 7 minutes.
def execute def execute
in_lock(EXCLUSIVE_LOCK_KEY, ttl: lock_timeout, retries: 1) do in_lock(EXCLUSIVE_LOCK_KEY, ttl: LOCK_TIMEOUT, retries: 1) do
loop_until(timeout: loop_timeout, limit: LOOP_LIMIT) do loop_until(timeout: LOOP_TIMEOUT, limit: LOOP_LIMIT) do
destroy_artifacts_batch destroy_artifacts_batch
end end
end end
...@@ -42,30 +41,19 @@ module Ci ...@@ -42,30 +41,19 @@ module Ci
return false if artifacts.empty? return false if artifacts.empty?
if parallel_destroy? parallel_destroy_batch(artifacts)
parallel_destroy_batch(artifacts)
else
legacy_destroy_batch(artifacts)
destroy_related_records_for(artifacts)
end
true true
end end
# TODO: Make sure this can also be parallelized
# https://gitlab.com/gitlab-org/gitlab/-/issues/270973
def destroy_pipeline_artifacts_batch def destroy_pipeline_artifacts_batch
artifacts = Ci::PipelineArtifact.expired(BATCH_SIZE).to_a artifacts = Ci::PipelineArtifact.expired(BATCH_SIZE).to_a
return false if artifacts.empty? return false if artifacts.empty?
legacy_destroy_batch(artifacts)
true
end
def parallel_destroy?
::Feature.enabled?(:ci_delete_objects)
end
def legacy_destroy_batch(artifacts)
artifacts.each(&:destroy!) artifacts.each(&:destroy!)
true
end end
def parallel_destroy_batch(job_artifacts) def parallel_destroy_batch(job_artifacts)
...@@ -77,6 +65,7 @@ module Ci ...@@ -77,6 +65,7 @@ module Ci
# This is executed outside of the transaction because it depends on Redis # This is executed outside of the transaction because it depends on Redis
update_statistics_for(job_artifacts) update_statistics_for(job_artifacts)
destroyed_artifacts_counter.increment({}, job_artifacts.size)
end end
# This method is implemented in EE and it must do only database work # This method is implemented in EE and it must do only database work
...@@ -91,19 +80,12 @@ module Ci ...@@ -91,19 +80,12 @@ module Ci
end end
end end
def loop_timeout def destroyed_artifacts_counter
if parallel_destroy? strong_memoize(:destroyed_artifacts_counter) do
LOOP_TIMEOUT name = :destroyed_job_artifacts_count_total
else comment = 'Counter of destroyed expired job artifacts'
LEGACY_LOOP_TIMEOUT
end
end
def lock_timeout ::Gitlab::Metrics.counter(name, comment)
if parallel_destroy?
LOCK_TIMEOUT
else
LEGACY_LOCK_TIMEOUT
end end
end end
end end
......
...@@ -18,14 +18,12 @@ module Ci ...@@ -18,14 +18,12 @@ module Ci
end end
def max_running_jobs def max_running_jobs
if ::Feature.enabled?(:ci_delete_objects_low_concurrency) if ::Feature.enabled?(:ci_delete_objects_medium_concurrency)
2
elsif ::Feature.enabled?(:ci_delete_objects_medium_concurrency)
20 20
elsif ::Feature.enabled?(:ci_delete_objects_high_concurrency) elsif ::Feature.enabled?(:ci_delete_objects_high_concurrency)
50 50
else else
0 2
end end
end end
......
---
title: Parallelize the removal of expired job artifacts
merge_request: 46971
author:
type: performance
---
name: ci_delete_objects
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/42242
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/247103
group: group::continuous integration
type: development
default_enabled: false
\ No newline at end of file
---
name: ci_delete_objects_low_concurrency
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/39464
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/247103
group: group::continuous integration
type: development
default_enabled: false
\ No newline at end of file
...@@ -435,7 +435,7 @@ production: &base ...@@ -435,7 +435,7 @@ production: &base
cron: "19 * * * *" cron: "19 * * * *"
# Remove expired build artifacts # Remove expired build artifacts
expire_build_artifacts_worker: expire_build_artifacts_worker:
cron: "50 * * * *" cron: "*/7 * * * *"
# Remove files from object storage # Remove files from object storage
ci_schedule_delete_objects_worker: ci_schedule_delete_objects_worker:
cron: "*/16 * * * *" cron: "*/16 * * * *"
......
...@@ -415,7 +415,7 @@ Settings.cron_jobs['pipeline_schedule_worker'] ||= Settingslogic.new({}) ...@@ -415,7 +415,7 @@ Settings.cron_jobs['pipeline_schedule_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['pipeline_schedule_worker']['cron'] ||= '19 * * * *' Settings.cron_jobs['pipeline_schedule_worker']['cron'] ||= '19 * * * *'
Settings.cron_jobs['pipeline_schedule_worker']['job_class'] = 'PipelineScheduleWorker' Settings.cron_jobs['pipeline_schedule_worker']['job_class'] = 'PipelineScheduleWorker'
Settings.cron_jobs['expire_build_artifacts_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['expire_build_artifacts_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['expire_build_artifacts_worker']['cron'] ||= '50 * * * *' Settings.cron_jobs['expire_build_artifacts_worker']['cron'] ||= '*/7 * * * *'
Settings.cron_jobs['expire_build_artifacts_worker']['job_class'] = 'ExpireBuildArtifactsWorker' Settings.cron_jobs['expire_build_artifacts_worker']['job_class'] = 'ExpireBuildArtifactsWorker'
Settings.cron_jobs['ci_schedule_delete_objects_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['ci_schedule_delete_objects_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['ci_schedule_delete_objects_worker']['cron'] ||= '*/16 * * * *' Settings.cron_jobs['ci_schedule_delete_objects_worker']['cron'] ||= '*/16 * * * *'
......
...@@ -208,6 +208,7 @@ configuration option in `gitlab.yml`. These metrics are served from the ...@@ -208,6 +208,7 @@ configuration option in `gitlab.yml`. These metrics are served from the
| `limited_capacity_worker_running_jobs` | Gauge | 13.5 | Number of running jobs | `worker` | | `limited_capacity_worker_running_jobs` | Gauge | 13.5 | Number of running jobs | `worker` |
| `limited_capacity_worker_max_running_jobs` | Gauge | 13.5 | Maximum number of running jobs | `worker` | | `limited_capacity_worker_max_running_jobs` | Gauge | 13.5 | Maximum number of running jobs | `worker` |
| `limited_capacity_worker_remaining_work_count` | Gauge | 13.5 | Number of jobs waiting to be enqueued | `worker` | | `limited_capacity_worker_remaining_work_count` | Gauge | 13.5 | Number of jobs waiting to be enqueued | `worker` |
| `destroyed_job_artifacts_count_total` | Counter | 13.6 | Number of destroyed expired job artifacts | |
## Database load balancing metrics **(PREMIUM ONLY)** ## Database load balancing metrics **(PREMIUM ONLY)**
......
...@@ -61,35 +61,17 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared ...@@ -61,35 +61,17 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared
end end
context 'when failed to destroy artifact' do context 'when failed to destroy artifact' do
context 'with ci_delete_objects disabled' do before do
before do stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10)
stub_feature_flags(ci_delete_objects: false) expect(Ci::DeletedObject)
stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10) .to receive(:bulk_import)
.once
allow_next_found_instance_of(Ci::JobArtifact) do |artifact| .and_raise(ActiveRecord::RecordNotDestroyed)
allow(artifact).to receive(:destroy!).and_raise(ActiveRecord::RecordNotDestroyed)
end
end
it 'raises an exception and stop destroying' do
expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed)
.and not_change { Security::Finding.count }.from(1)
end
end end
context 'with ci_delete_objects enabled' do it 'raises an exception and stop destroying' do
before do expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed)
stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10) .and not_change { Security::Finding.count }.from(1)
expect(Ci::DeletedObject)
.to receive(:bulk_import)
.once
.and_raise(ActiveRecord::RecordNotDestroyed)
end
it 'raises an exception and stop destroying' do
expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed)
.and not_change { Security::Finding.count }.from(1)
end
end end
end end
......
...@@ -75,6 +75,14 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared ...@@ -75,6 +75,14 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared
it 'does not remove the files' do it 'does not remove the files' do
expect { subject }.not_to change { artifact.file.exists? } expect { subject }.not_to change { artifact.file.exists? }
end end
it 'reports metrics for destroyed artifacts' do
counter = service.send(:destroyed_artifacts_counter)
expect(counter).to receive(:increment).with({}, 1).and_call_original
subject
end
end end
end end
...@@ -114,49 +122,31 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared ...@@ -114,49 +122,31 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared
stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10) stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10)
end end
context 'with ci_delete_objects disabled' do context 'when the import fails' do
before do before do
stub_feature_flags(ci_delete_objects: false) expect(Ci::DeletedObject)
.to receive(:bulk_import)
allow_any_instance_of(Ci::JobArtifact) .once
.to receive(:destroy!)
.and_raise(ActiveRecord::RecordNotDestroyed) .and_raise(ActiveRecord::RecordNotDestroyed)
end end
it 'raises an exception and stop destroying' do it 'raises an exception and stop destroying' do
expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed) expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed)
.and not_change { Ci::JobArtifact.count }.from(1)
end end
end end
context 'with ci_delete_objects enabled' do context 'when the delete fails' do
context 'when the import fails' do before do
before do expect(Ci::JobArtifact)
stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10) .to receive(:id_in)
expect(Ci::DeletedObject) .once
.to receive(:bulk_import) .and_raise(ActiveRecord::RecordNotDestroyed)
.once
.and_raise(ActiveRecord::RecordNotDestroyed)
end
it 'raises an exception and stop destroying' do
expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed)
.and not_change { Ci::JobArtifact.count }.from(1)
end
end end
context 'when the delete fails' do it 'raises an exception rolls back the insert' do
before do expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed)
stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10) .and not_change { Ci::DeletedObject.count }.from(0)
expect(Ci::JobArtifact)
.to receive(:id_in)
.once
.and_raise(ActiveRecord::RecordNotDestroyed)
end
it 'raises an exception rolls back the insert' do
expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed)
.and not_change { Ci::DeletedObject.count }.from(0)
end
end end
end end
end end
......
...@@ -28,7 +28,6 @@ RSpec.describe Ci::DeleteObjectsWorker do ...@@ -28,7 +28,6 @@ RSpec.describe Ci::DeleteObjectsWorker do
before do before do
stub_feature_flags( stub_feature_flags(
ci_delete_objects_low_concurrency: low,
ci_delete_objects_medium_concurrency: medium, ci_delete_objects_medium_concurrency: medium,
ci_delete_objects_high_concurrency: high ci_delete_objects_high_concurrency: high
) )
...@@ -36,13 +35,11 @@ RSpec.describe Ci::DeleteObjectsWorker do ...@@ -36,13 +35,11 @@ RSpec.describe Ci::DeleteObjectsWorker do
subject(:max_running_jobs) { worker.max_running_jobs } subject(:max_running_jobs) { worker.max_running_jobs }
where(:low, :medium, :high, :expected) do where(:medium, :high, :expected) do
false | false | false | 0 false | false | 2
true | true | true | 2 true | false | 20
true | false | false | 2 true | true | 20
false | true | false | 20 false | true | 50
false | true | true | 20
false | false | true | 50
end end
with_them do with_them do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment