Commit f7f3b3c3 authored by Grzegorz Bizon's avatar Grzegorz Bizon

Merge branch 'optimise-build-queue-service' into 'master'

Optimise UpdateBuildQueueService

Closes #66438

See merge request gitlab-org/gitlab-ce!32095
parents e724b323 a5f61827
......@@ -23,13 +23,17 @@ module Ci
project_type: 3
}
RUNNER_QUEUE_EXPIRY_TIME = 60.minutes
ONLINE_CONTACT_TIMEOUT = 1.hour
UPDATE_DB_RUNNER_INFO_EVERY = 40.minutes
RUNNER_QUEUE_EXPIRY_TIME = 1.hour
# This needs to be less than `ONLINE_CONTACT_TIMEOUT`
UPDATE_CONTACT_COLUMN_EVERY = (40.minutes..55.minutes).freeze
AVAILABLE_TYPES_LEGACY = %w[specific shared].freeze
AVAILABLE_TYPES = runner_types.keys.freeze
AVAILABLE_STATUSES = %w[active paused online offline].freeze
AVAILABLE_SCOPES = (AVAILABLE_TYPES_LEGACY + AVAILABLE_TYPES + AVAILABLE_STATUSES).freeze
FORM_EDITABLE = %i[description tag_list active run_untagged locked access_level maximum_timeout_human_readable].freeze
ignore_column :is_shared
......@@ -46,7 +50,7 @@ module Ci
scope :active, -> { where(active: true) }
scope :paused, -> { where(active: false) }
scope :online, -> { where('contacted_at > ?', contact_time_deadline) }
scope :online, -> { where('contacted_at > ?', online_contact_time_deadline) }
# The following query using negation is cheaper than using `contacted_at <= ?`
# because there are less runners online than have been created. The
# resulting query is quickly finding online ones and then uses the regular
......@@ -56,6 +60,8 @@ module Ci
scope :offline, -> { where.not(id: online) }
scope :ordered, -> { order(id: :desc) }
scope :with_recent_runner_queue, -> { where('contacted_at > ?', recent_queue_deadline) }
# BACKWARD COMPATIBILITY: There are needed to maintain compatibility with `AVAILABLE_SCOPES` used by `lib/api/runners.rb`
scope :deprecated_shared, -> { instance_type }
scope :deprecated_specific, -> { project_type.or(group_type) }
......@@ -137,10 +143,18 @@ module Ci
fuzzy_search(query, [:token, :description])
end
def self.contact_time_deadline
def self.online_contact_time_deadline
ONLINE_CONTACT_TIMEOUT.ago
end
def self.recent_queue_deadline
# we add queue expiry + online
# - contacted_at can be updated at any time within this interval
# we have always accurate `contacted_at` but it is stored in Redis
# and not persisted in database
(ONLINE_CONTACT_TIMEOUT + RUNNER_QUEUE_EXPIRY_TIME).ago
end
def self.order_by(order)
if order == 'contacted_asc'
order_contacted_at_asc
......@@ -174,7 +188,7 @@ module Ci
end
def online?
contacted_at && contacted_at > self.class.contact_time_deadline
contacted_at && contacted_at > self.class.online_contact_time_deadline
end
def status
......@@ -275,9 +289,7 @@ module Ci
def persist_cached_data?
# Use a random threshold to prevent beating DB updates.
# It generates a distribution between [40m, 80m].
contacted_at_max_age = UPDATE_DB_RUNNER_INFO_EVERY + Random.rand(UPDATE_DB_RUNNER_INFO_EVERY)
contacted_at_max_age = Random.rand(UPDATE_CONTACT_COLUMN_EVERY)
real_contacted_at = read_attribute(:contacted_at)
real_contacted_at.nil? ||
......
......@@ -9,6 +9,10 @@ module Ci
private
def tick_for(build, runners)
if Feature.enabled?(:ci_update_queues_for_online_runners, build.project, default_enabled: true)
runners = runners.with_recent_runner_queue
end
runners.each do |runner|
runner.pick_build!(build)
end
......
---
title: Optimise UpdateBuildQueueService
merge_request: 32095
author:
type: performance
......@@ -80,6 +80,13 @@ describe Ci::Runner do
end
end
describe 'constraints' do
it '.UPDATE_CONTACT_COLUMN_EVERY' do
expect(described_class::UPDATE_CONTACT_COLUMN_EVERY.max)
.to be <= described_class::ONLINE_CONTACT_TIMEOUT
end
end
describe '#access_level' do
context 'when creating new runner and access_level is nil' do
let(:runner) do
......
......@@ -7,84 +7,108 @@ describe Ci::UpdateBuildQueueService do
let(:build) { create(:ci_build, pipeline: pipeline) }
let(:pipeline) { create(:ci_pipeline, project: project) }
context 'when updating specific runners' do
let(:runner) { create(:ci_runner, :project, projects: [project]) }
context 'when there is a runner that can pick build' do
shared_examples 'refreshes runner' do
it 'ticks runner queue value' do
expect { subject.execute(build) }.to change { runner.ensure_runner_queue_value }
end
end
context 'when there is no runner that can pick build' do
let(:another_project) { create(:project) }
let(:runner) { create(:ci_runner, :project, projects: [another_project]) }
it 'does not tick runner queue value' do
shared_examples 'does not refresh runner' do
it 'ticks runner queue value' do
expect { subject.execute(build) }.not_to change { runner.ensure_runner_queue_value }
end
end
end
context 'when updating shared runners' do
let(:runner) { create(:ci_runner, :instance) }
shared_examples 'matching build' do
context 'when there is a online runner that can pick build' do
before do
runner.update!(contacted_at: 30.minutes.ago)
end
context 'when there is no runner that can pick build' do
it 'ticks runner queue value' do
expect { subject.execute(build) }.to change { runner.ensure_runner_queue_value }
it_behaves_like 'refreshes runner'
end
end
shared_examples 'mismatching tags' do
context 'when there is no runner that can pick build due to tag mismatch' do
before do
build.tag_list = [:docker]
end
it 'does not tick runner queue value' do
expect { subject.execute(build) }.not_to change { runner.ensure_runner_queue_value }
it_behaves_like 'does not refresh runner'
end
end
context 'when there is no runner that can pick build due to being disabled on project' do
shared_examples 'recent runner queue' do
context 'when there is runner with expired cache' do
before do
build.project.shared_runners_enabled = false
runner.update!(contacted_at: Ci::Runner.recent_queue_deadline)
end
it 'does not tick runner queue value' do
expect { subject.execute(build) }.not_to change { runner.ensure_runner_queue_value }
context 'when ci_update_queues_for_online_runners is enabled' do
before do
stub_feature_flags(ci_update_queues_for_online_runners: true)
end
it_behaves_like 'does not refresh runner'
end
context 'when ci_update_queues_for_online_runners is disabled' do
before do
stub_feature_flags(ci_update_queues_for_online_runners: false)
end
context 'when updating group runners' do
let(:group) { create(:group) }
let(:project) { create(:project, group: group) }
let(:runner) { create(:ci_runner, :group, groups: [group]) }
it_behaves_like 'refreshes runner'
end
end
end
context 'when there is a runner that can pick build' do
it 'ticks runner queue value' do
expect { subject.execute(build) }.to change { runner.ensure_runner_queue_value }
context 'when updating specific runners' do
let(:runner) { create(:ci_runner, :project, projects: [project]) }
it_behaves_like 'matching build'
it_behaves_like 'mismatching tags'
it_behaves_like 'recent runner queue'
context 'when the runner is assigned to another project' do
let(:another_project) { create(:project) }
let(:runner) { create(:ci_runner, :project, projects: [another_project]) }
it_behaves_like 'does not refresh runner'
end
end
context 'when there is no runner that can pick build due to tag mismatch' do
context 'when updating shared runners' do
let(:runner) { create(:ci_runner, :instance) }
it_behaves_like 'matching build'
it_behaves_like 'mismatching tags'
it_behaves_like 'recent runner queue'
context 'when there is no runner that can pick build due to being disabled on project' do
before do
build.tag_list = [:docker]
build.project.shared_runners_enabled = false
end
it 'does not tick runner queue value' do
expect { subject.execute(build) }.not_to change { runner.ensure_runner_queue_value }
it_behaves_like 'does not refresh runner'
end
end
context 'when updating group runners' do
let(:group) { create(:group) }
let(:project) { create(:project, group: group) }
let(:runner) { create(:ci_runner, :group, groups: [group]) }
it_behaves_like 'matching build'
it_behaves_like 'mismatching tags'
it_behaves_like 'recent runner queue'
context 'when there is no runner that can pick build due to being disabled on project' do
before do
build.project.group_runners_enabled = false
end
it 'does not tick runner queue value' do
expect { subject.execute(build) }.not_to change { runner.ensure_runner_queue_value }
end
it_behaves_like 'does not refresh runner'
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment