Commit dcca1b42 authored by Patrick Bajao's avatar Patrick Bajao

Add a cron worker to perform scheduled cleanup jobs

Query the `merge_request_cleanup_schedules` table for records
that are scheduled to be cleaned up. Limited at 300 records per
minute.
parent a93907ad
......@@ -4,4 +4,11 @@ class MergeRequest::CleanupSchedule < ApplicationRecord
belongs_to :merge_request, inverse_of: :cleanup_schedule
validates :scheduled_at, presence: true
def self.scheduled_merge_request_ids(limit)
where('completed_at IS NULL AND scheduled_at <= NOW()')
.order('scheduled_at DESC')
.limit(limit)
.pluck(:merge_request_id)
end
end
......@@ -379,6 +379,14 @@
:weight: 1
:idempotent:
:tags: []
- :name: cronjob:schedule_merge_request_cleanup_refs
:feature_category: :source_code_management
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent:
:tags: []
- :name: cronjob:schedule_migrate_external_diffs
:feature_category: :source_code_management
:has_external_dependencies:
......
# frozen_string_literal: true
class ScheduleMergeRequestCleanupRefsWorker # rubocop:disable Scalability/IdempotentWorker
include ApplicationWorker
include CronjobQueue # rubocop:disable Scalability/CronWorkerContext
feature_category :source_code_management
# Based on existing data, MergeRequestCleanupRefsWorker can run for ~190ms per
# job and this is scheduled per minute. This means that 300 jobs can be performed
# but since there are some spikes from time time, it's better to give it some
# allowance.
LIMIT = 300
DELAY = 10.seconds
BATCH_SIZE = 50
def perform
return if Gitlab::Database.read_only?
ids = MergeRequest::CleanupSchedule.scheduled_merge_request_ids(LIMIT).map { |id| [id] }
MergeRequestCleanupRefsWorker.bulk_perform_in(DELAY, ids, batch_size: BATCH_SIZE) # rubocop:disable Scalability/BulkPerformWithContext
end
end
......@@ -531,6 +531,9 @@ Settings.cron_jobs['analytics_instance_statistics_count_job_trigger_worker']['jo
Settings.cron_jobs['member_invitation_reminder_emails_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['member_invitation_reminder_emails_worker']['cron'] ||= '0 0 * * *'
Settings.cron_jobs['member_invitation_reminder_emails_worker']['job_class'] = 'MemberInvitationReminderEmailsWorker'
Settings.cron_jobs['schedule_merge_request_cleanup_refs_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['schedule_merge_request_cleanup_refs_worker']['cron'] ||= '* * * * *'
Settings.cron_jobs['schedule_merge_request_cleanup_refs_worker']['job_class'] = 'ScheduleMergeRequestCleanupRefsWorker'
Gitlab.ee do
Settings.cron_jobs['active_user_count_threshold_worker'] ||= Settingslogic.new({})
......
# frozen_string_literal: true
FactoryBot.define do
factory :merge_request_cleanup_schedule, class: 'MergeRequest::CleanupSchedule' do
merge_request
scheduled_at { Time.current }
end
end
......@@ -10,4 +10,23 @@ RSpec.describe MergeRequest::CleanupSchedule do
describe 'validations' do
it { is_expected.to validate_presence_of(:scheduled_at) }
end
describe '.scheduled_merge_request_ids' do
let_it_be(:mr_cleanup_schedule_1) { create(:merge_request_cleanup_schedule, scheduled_at: 2.days.ago) }
let_it_be(:mr_cleanup_schedule_2) { create(:merge_request_cleanup_schedule, scheduled_at: 1.day.ago) }
let_it_be(:mr_cleanup_schedule_3) { create(:merge_request_cleanup_schedule, scheduled_at: 1.day.ago, completed_at: Time.current) }
let_it_be(:mr_cleanup_schedule_4) { create(:merge_request_cleanup_schedule, scheduled_at: 4.days.ago) }
let_it_be(:mr_cleanup_schedule_5) { create(:merge_request_cleanup_schedule, scheduled_at: 3.days.ago) }
let_it_be(:mr_cleanup_schedule_6) { create(:merge_request_cleanup_schedule, scheduled_at: 1.day.from_now) }
let_it_be(:mr_cleanup_schedule_7) { create(:merge_request_cleanup_schedule, scheduled_at: 5.days.ago) }
it 'only includes incomplete schedule within the specified limit' do
expect(described_class.scheduled_merge_request_ids(4)).to eq([
mr_cleanup_schedule_2.merge_request_id,
mr_cleanup_schedule_1.merge_request_id,
mr_cleanup_schedule_5.merge_request_id,
mr_cleanup_schedule_4.merge_request_id
])
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ScheduleMergeRequestCleanupRefsWorker do
subject(:worker) { described_class.new }
describe '#perform' do
before do
allow(MergeRequest::CleanupSchedule)
.to receive(:scheduled_merge_request_ids)
.with(described_class::LIMIT)
.and_return([1, 2, 3, 4])
end
it 'does nothing if the database is read-only' do
allow(Gitlab::Database).to receive(:read_only?).and_return(true)
expect(MergeRequestCleanupRefsWorker).not_to receive(:bulk_perform_in)
worker.perform
end
it 'schedules MergeRequestCleanupRefsWorker to be performed by batch' do
expect(MergeRequestCleanupRefsWorker)
.to receive(:bulk_perform_in)
.with(
described_class::DELAY,
[[1], [2], [3], [4]],
batch_size: described_class::BATCH_SIZE
)
worker.perform
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment